This commit is contained in:
Yuvi9587
2025-09-07 04:56:08 -07:00
parent 24880b5042
commit 7217bfdb39
10 changed files with 1023 additions and 143 deletions

View File

@@ -0,0 +1,72 @@
# src/core/Hentai2read_client.py
import re
import os
import json
import requests
import cloudscraper
from bs4 import BeautifulSoup
def fetch_hentai2read_data(url, logger, session):
"""
Scrapes a SINGLE Hentai2Read chapter page using a provided session.
"""
logger(f"Attempting to fetch chapter data from: {url}")
try:
response = session.get(url, timeout=30)
response.raise_for_status()
page_content_text = response.text
soup = BeautifulSoup(page_content_text, 'html.parser')
album_title = ""
title_tags = soup.select('span[itemprop="name"]')
if title_tags:
album_title = title_tags[-1].text.strip()
if not album_title:
title_tag = soup.select_one('h1.title')
if title_tag:
album_title = title_tag.text.strip()
if not album_title:
logger("❌ Could not find album title on page.")
return None, None
image_urls = []
try:
start_index = page_content_text.index("'images' : ") + len("'images' : ")
end_index = page_content_text.index(",\n", start_index)
images_json_str = page_content_text[start_index:end_index]
image_paths = json.loads(images_json_str)
image_urls = ["https://hentaicdn.com/hentai" + part for part in image_paths]
except (ValueError, json.JSONDecodeError):
logger("❌ Could not find or parse image JSON data for this chapter.")
return None, None
if not image_urls:
logger("❌ No image URLs found for this chapter.")
return None, None
logger(f" Found {len(image_urls)} images for album '{album_title}'.")
files_to_download = []
for i, img_url in enumerate(image_urls):
page_num = i + 1
extension = os.path.splitext(img_url)[1].split('?')[0]
if not extension: extension = ".jpg"
filename = f"{page_num:03d}{extension}"
files_to_download.append({'url': img_url, 'filename': filename})
return album_title, files_to_download
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
logger(f" Chapter not found (404 Error). This likely marks the end of the series.")
else:
logger(f"❌ An HTTP error occurred: {e}")
return None, None
except Exception as e:
logger(f"❌ An unexpected error occurred while fetching data: {e}")
return None, None

View File

@@ -848,6 +848,8 @@ class PostProcessorWorker:
'original_post_id_for_log': original_post_id_for_log, 'post_title': post_title,
'file_index_in_post': file_index_in_post, 'num_files_in_this_post': num_files_in_this_post,
'forced_filename_override': filename_to_save_in_main_path,
'service': self.service,
'user_id': self.user_id
}
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION, permanent_failure_details
finally: