6 Commits

Author SHA1 Message Date
Yuvi63771
257111d462 Update main_window.py 2025-11-02 09:40:25 +05:30
Yuvi63771
9563ce82db Commit 2025-11-01 10:41:00 +05:30
Yuvi63771
169ded3fd8 Commit 2025-10-30 08:05:45 +05:30
Yuvi63771
7e8e8a59e2 commit 2025-10-26 12:08:48 +05:30
Yuvi63771
0acd433920 commit 2025-10-25 08:19:06 +05:30
Yuvi63771
cef4211d7b Commit 2025-10-20 13:37:27 +05:30
8 changed files with 872 additions and 114 deletions

View File

@@ -1,5 +1,3 @@
# src/core/Hentai2read_client.py
import re import re
import os import os
import time import time
@@ -65,12 +63,37 @@ def run_hentai2read_download(start_url, output_dir, progress_callback, overall_p
def _get_series_metadata(start_url, progress_callback, scraper): def _get_series_metadata(start_url, progress_callback, scraper):
""" """
Scrapes the main series page to get the Artist Name, Series Title, and chapter list. Scrapes the main series page to get the Artist Name, Series Title, and chapter list.
Includes a retry mechanism for the initial connection.
""" """
try: max_retries = 4 # Total number of attempts (1 initial + 3 retries)
response = scraper.get(start_url, timeout=30) last_exception = None
response.raise_for_status() soup = None
soup = BeautifulSoup(response.text, 'html.parser')
for attempt in range(max_retries):
try:
if attempt > 0:
progress_callback(f" [Hentai2Read] ⚠️ Retrying connection (Attempt {attempt + 1}/{max_retries})...")
response = scraper.get(start_url, timeout=30)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# If successful, clear exception and break the loop
last_exception = None
break
except Exception as e:
last_exception = e
progress_callback(f" [Hentai2Read] ⚠️ Connection attempt {attempt + 1} failed: {e}")
if attempt < max_retries - 1:
time.sleep(2 * (attempt + 1)) # Wait 2s, 4s, 6s
continue # Try again
if last_exception:
progress_callback(f" [Hentai2Read] ❌ Error getting series metadata after {max_retries} attempts: {last_exception}")
return "Unknown Series", []
try:
series_title = "Unknown Series" series_title = "Unknown Series"
artist_name = None artist_name = None
metadata_list = soup.select_one("ul.list.list-simple-mini") metadata_list = soup.select_one("ul.list.list-simple-mini")
@@ -107,10 +130,9 @@ def _get_series_metadata(start_url, progress_callback, scraper):
return top_level_folder_name, chapters_to_process return top_level_folder_name, chapters_to_process
except Exception as e: except Exception as e:
progress_callback(f" [Hentai2Read] ❌ Error getting series metadata: {e}") progress_callback(f" [Hentai2Read] ❌ Error parsing metadata after successful connection: {e}")
return "Unknown Series", [] return "Unknown Series", []
### NEW: This function contains the pipeline logic ###
def _process_and_download_chapter(chapter_url, save_path, scraper, progress_callback, check_pause_func): def _process_and_download_chapter(chapter_url, save_path, scraper, progress_callback, check_pause_func):
""" """
Uses a producer-consumer pattern to download a chapter. Uses a producer-consumer pattern to download a chapter.
@@ -120,12 +142,10 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
task_queue = queue.Queue() task_queue = queue.Queue()
num_download_threads = 8 num_download_threads = 8
# These will be updated by the worker threads
download_stats = {'downloaded': 0, 'skipped': 0} download_stats = {'downloaded': 0, 'skipped': 0}
def downloader_worker(): def downloader_worker():
"""The function that each download thread will run.""" """The function that each download thread will run."""
# Create a unique session for each thread to avoid conflicts
worker_scraper = cloudscraper.create_scraper() worker_scraper = cloudscraper.create_scraper()
while True: while True:
try: try:
@@ -153,12 +173,10 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
finally: finally:
task_queue.task_done() task_queue.task_done()
# --- Start the downloader threads ---
executor = ThreadPoolExecutor(max_workers=num_download_threads, thread_name_prefix='H2R_Downloader') executor = ThreadPoolExecutor(max_workers=num_download_threads, thread_name_prefix='H2R_Downloader')
for _ in range(num_download_threads): for _ in range(num_download_threads):
executor.submit(downloader_worker) executor.submit(downloader_worker)
# --- Main thread acts as the scraper (producer) ---
page_number = 1 page_number = 1
while True: while True:
if check_pause_func(): break if check_pause_func(): break
@@ -168,12 +186,25 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
page_url_to_check = f"{chapter_url}{page_number}/" page_url_to_check = f"{chapter_url}{page_number}/"
try: try:
response = scraper.get(page_url_to_check, timeout=30) page_response = None
if response.history or response.status_code != 200: page_last_exception = None
for page_attempt in range(3): # 3 attempts for sub-pages
try:
page_response = scraper.get(page_url_to_check, timeout=30)
page_last_exception = None
break
except Exception as e:
page_last_exception = e
time.sleep(1) # Short delay for page scraping retries
if page_last_exception:
raise page_last_exception # Give up after 3 tries
if page_response.history or page_response.status_code != 200:
progress_callback(f" [Hentai2Read] End of chapter detected on page {page_number}.") progress_callback(f" [Hentai2Read] End of chapter detected on page {page_number}.")
break break
soup = BeautifulSoup(response.text, 'html.parser') soup = BeautifulSoup(page_response.text, 'html.parser')
img_tag = soup.select_one("img#arf-reader") img_tag = soup.select_one("img#arf-reader")
img_src = img_tag.get("src") if img_tag else None img_src = img_tag.get("src") if img_tag else None
@@ -181,12 +212,11 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
progress_callback(f" [Hentai2Read] End of chapter detected (Placeholder image on page {page_number}).") progress_callback(f" [Hentai2Read] End of chapter detected (Placeholder image on page {page_number}).")
break break
normalized_img_src = urljoin(response.url, img_src) normalized_img_src = urljoin(page_response.url, img_src)
ext = os.path.splitext(normalized_img_src.split('/')[-1])[-1] or ".jpg" ext = os.path.splitext(normalized_img_src.split('/')[-1])[-1] or ".jpg"
filename = f"{page_number:03d}{ext}" filename = f"{page_number:03d}{ext}"
filepath = os.path.join(save_path, filename) filepath = os.path.join(save_path, filename)
# Put the download task into the queue for a worker to pick up
task_queue.put((filepath, normalized_img_src)) task_queue.put((filepath, normalized_img_src))
page_number += 1 page_number += 1
@@ -195,12 +225,9 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
progress_callback(f" [Hentai2Read] ❌ Error while scraping page {page_number}: {e}") progress_callback(f" [Hentai2Read] ❌ Error while scraping page {page_number}: {e}")
break break
# --- Shutdown sequence ---
# Tell all worker threads to exit by sending the sentinel value
for _ in range(num_download_threads): for _ in range(num_download_threads):
task_queue.put(None) task_queue.put(None)
# Wait for all download tasks to be completed
executor.shutdown(wait=True) executor.shutdown(wait=True)
progress_callback(f" Found and processed {page_number - 1} images for this chapter.") progress_callback(f" Found and processed {page_number - 1} images for this chapter.")

View File

@@ -69,15 +69,28 @@ def fetch_fap_nation_data(album_url, logger_func):
if direct_links_found: if direct_links_found:
logger_func(f" [Fap-Nation] Found {len(direct_links_found)} direct media link(s). Selecting the best quality...") logger_func(f" [Fap-Nation] Found {len(direct_links_found)} direct media link(s). Selecting the best quality...")
best_link = direct_links_found[0] best_link = None
for link in direct_links_found: # Define qualities from highest to lowest
if '1080p' in link.lower(): qualities_to_check = ['1080p', '720p', '480p', '360p']
best_link = link
break # Find the best quality link by iterating through preferred qualities
for quality in qualities_to_check:
for link in direct_links_found:
if quality in link.lower():
best_link = link
logger_func(f" [Fap-Nation] Found '{quality}' link: {best_link}")
break # Found the best link for this quality level
if best_link:
break # Found the highest quality available
# Fallback if no quality string was found in any link
if not best_link:
best_link = direct_links_found[0]
logger_func(f" [Fap-Nation] ⚠️ No quality tags (1080p, 720p, etc.) found in links. Defaulting to first link: {best_link}")
final_url = best_link final_url = best_link
link_type = 'direct' link_type = 'direct'
logger_func(f" [Fap-Nation] Identified direct media link: {final_url}") logger_func(f" [Fap-Nation] Identified direct media link: {final_url}")
# If after all checks, we still have no URL, then fail. # If after all checks, we still have no URL, then fail.
if not final_url: if not final_url:
logger_func(" [Fap-Nation] ❌ Stage 1 Failed: Could not find any HLS stream or direct link.") logger_func(" [Fap-Nation] ❌ Stage 1 Failed: Could not find any HLS stream or direct link.")

View File

@@ -52,7 +52,7 @@ from ..utils.file_utils import (
from ..utils.network_utils import prepare_cookies_for_request, get_link_platform from ..utils.network_utils import prepare_cookies_for_request, get_link_platform
from ..utils.text_utils import ( from ..utils.text_utils import (
is_title_match_for_character, is_filename_match_for_character, strip_html_tags, is_title_match_for_character, is_filename_match_for_character, strip_html_tags,
extract_folder_name_from_title, # This was the function causing the error extract_folder_name_from_title,
match_folders_from_title, match_folders_from_filename_enhanced match_folders_from_title, match_folders_from_filename_enhanced
) )
from ..config.constants import * from ..config.constants import *
@@ -1810,6 +1810,31 @@ class PostProcessorWorker:
if not all_files_from_post_api: if not all_files_from_post_api:
self.logger(f" No files found to download for post {post_id}.") self.logger(f" No files found to download for post {post_id}.")
if not self.extract_links_only and should_create_post_subfolder:
path_to_check_for_emptiness = determined_post_save_path_for_history
try:
if os.path.isdir(path_to_check_for_emptiness):
dir_contents = os.listdir(path_to_check_for_emptiness)
# Check if the directory is empty OR only contains our ID file
is_effectively_empty = True
if dir_contents:
if not all(f.startswith('.postid_') for f in dir_contents):
is_effectively_empty = False
if is_effectively_empty:
self.logger(f" 🗑️ Removing empty post-specific subfolder (post had no files): '{path_to_check_for_emptiness}'")
if dir_contents:
for id_file in dir_contents:
if id_file.startswith('.postid_'):
try:
os.remove(os.path.join(path_to_check_for_emptiness, id_file))
except OSError as e_rm_id:
self.logger(f" ⚠️ Could not remove ID file '{id_file}' during cleanup: {e_rm_id}")
os.rmdir(path_to_check_for_emptiness)
except OSError as e_rmdir:
self.logger(f" ⚠️ Could not remove effectively empty subfolder (no files) '{path_to_check_for_emptiness}': {e_rmdir}")
# --- END NEW CLEANUP LOGIC ---
history_data_for_no_files_post = { history_data_for_no_files_post = {
'post_title': post_title, 'post_title': post_title,
'post_id': post_id, 'post_id': post_id,
@@ -2052,9 +2077,27 @@ class PostProcessorWorker:
if not self.extract_links_only and self.use_post_subfolders and total_downloaded_this_post == 0: if not self.extract_links_only and self.use_post_subfolders and total_downloaded_this_post == 0:
path_to_check_for_emptiness = determined_post_save_path_for_history path_to_check_for_emptiness = determined_post_save_path_for_history
try: try:
if os.path.isdir(path_to_check_for_emptiness) and not os.listdir(path_to_check_for_emptiness): if os.path.isdir(path_to_check_for_emptiness):
self.logger(f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness}'") dir_contents = os.listdir(path_to_check_for_emptiness)
os.rmdir(path_to_check_for_emptiness) # Check if the directory is empty OR only contains our ID file
is_effectively_empty = True
if dir_contents:
# If there are files, check if ALL of them are .postid files
if not all(f.startswith('.postid_') for f in dir_contents):
is_effectively_empty = False
if is_effectively_empty:
self.logger(f" 🗑️ Removing empty post-specific subfolder (no files downloaded): '{path_to_check_for_emptiness}'")
# We must first remove the ID file(s) before removing the dir
if dir_contents:
for id_file in dir_contents:
if id_file.startswith('.postid_'):
try:
os.remove(os.path.join(path_to_check_for_emptiness, id_file))
except OSError as e_rm_id:
self.logger(f" ⚠️ Could not remove ID file '{id_file}' during cleanup: {e_rm_id}")
os.rmdir(path_to_check_for_emptiness) # Now the rmdir should work
except OSError as e_rmdir: except OSError as e_rmdir:
self.logger(f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness}': {e_rmdir}") self.logger(f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness}': {e_rmdir}")
@@ -2066,11 +2109,29 @@ class PostProcessorWorker:
if not self.extract_links_only and self.use_post_subfolders and total_downloaded_this_post == 0: if not self.extract_links_only and self.use_post_subfolders and total_downloaded_this_post == 0:
path_to_check_for_emptiness = determined_post_save_path_for_history path_to_check_for_emptiness = determined_post_save_path_for_history
try: try:
if os.path.isdir(path_to_check_for_emptiness) and not os.listdir(path_to_check_for_emptiness): if os.path.isdir(path_to_check_for_emptiness):
self.logger(f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness}'") dir_contents = os.listdir(path_to_check_for_emptiness)
os.rmdir(path_to_check_for_emptiness) # Check if the directory is empty OR only contains our ID file
is_effectively_empty = True
if dir_contents:
# If there are files, check if ALL of them are .postid files
if not all(f.startswith('.postid_') for f in dir_contents):
is_effectively_empty = False
if is_effectively_empty:
self.logger(f" 🗑️ Removing empty post-specific subfolder (no files downloaded): '{path_to_check_for_emptiness}'")
# We must first remove the ID file(s) before removing the dir
if dir_contents:
for id_file in dir_contents:
if id_file.startswith('.postid_'):
try:
os.remove(os.path.join(path_to_check_for_emptiness, id_file))
except OSError as e_rm_id:
self.logger(f" ⚠️ Could not remove ID file '{id_file}' during cleanup: {e_rm_id}")
os.rmdir(path_to_check_for_emptiness) # Now the rmdir should work
except OSError as e_rmdir: except OSError as e_rmdir:
self.logger(f" ⚠️ Could not remove potentially empty subfolder '{path_to_check_for_emptiness}': {e_rmdir}") self.logger(f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness}': {e_rmdir}")
self._emit_signal('worker_finished', result_tuple) self._emit_signal('worker_finished', result_tuple)
return result_tuple return result_tuple

View File

@@ -2,32 +2,38 @@ import re
import requests import requests
from urllib.parse import urlparse from urllib.parse import urlparse
# Utility Imports
from ...utils.network_utils import prepare_cookies_for_request from ...utils.network_utils import prepare_cookies_for_request
from ...utils.file_utils import clean_folder_name from ...utils.file_utils import clean_folder_name
# Downloader Thread Imports (Alphabetical Order Recommended)
from .allcomic_downloader_thread import AllcomicDownloadThread from .allcomic_downloader_thread import AllcomicDownloadThread
from .booru_downloader_thread import BooruDownloadThread from .booru_downloader_thread import BooruDownloadThread
from .bunkr_downloader_thread import BunkrDownloadThread from .bunkr_downloader_thread import BunkrDownloadThread
from .discord_downloader_thread import DiscordDownloadThread from .discord_downloader_thread import DiscordDownloadThread # Official Discord
from .drive_downloader_thread import DriveDownloadThread from .drive_downloader_thread import DriveDownloadThread
from .erome_downloader_thread import EromeDownloadThread from .erome_downloader_thread import EromeDownloadThread
from .external_link_downloader_thread import ExternalLinkDownloadThread from .external_link_downloader_thread import ExternalLinkDownloadThread
from .fap_nation_downloader_thread import FapNationDownloadThread from .fap_nation_downloader_thread import FapNationDownloadThread
from .hentai2read_downloader_thread import Hentai2readDownloadThread from .hentai2read_downloader_thread import Hentai2readDownloadThread
from .kemono_discord_downloader_thread import KemonoDiscordDownloadThread
from .mangadex_downloader_thread import MangaDexDownloadThread from .mangadex_downloader_thread import MangaDexDownloadThread
from .nhentai_downloader_thread import NhentaiDownloadThread from .nhentai_downloader_thread import NhentaiDownloadThread
from .pixeldrain_downloader_thread import PixeldrainDownloadThread from .pixeldrain_downloader_thread import PixeldrainDownloadThread
from .rule34video_downloader_thread import Rule34VideoDownloadThread
from .saint2_downloader_thread import Saint2DownloadThread from .saint2_downloader_thread import Saint2DownloadThread
from .simp_city_downloader_thread import SimpCityDownloadThread from .simp_city_downloader_thread import SimpCityDownloadThread
from .toonily_downloader_thread import ToonilyDownloadThread from .toonily_downloader_thread import ToonilyDownloadThread
from .rule34video_downloader_thread import Rule34VideoDownloadThread
def create_downloader_thread(main_app, api_url, service, id1, id2, effective_output_dir_for_run): def create_downloader_thread(main_app, api_url, service, id1, id2, effective_output_dir_for_run):
""" """
Factory function to create and configure the correct QThread for a given URL. Factory function to create and configure the correct QThread for a given URL.
Returns a configured QThread instance or None if no special handler is found. Returns a configured QThread instance, a specific error string ("COOKIE_ERROR", "FETCH_ERROR"),
or None if no special handler is found (indicating fallback to generic BackendDownloadThread).
""" """
# Handler for Booru sites (Danbooru, Gelbooru) # Handler for Booru sites (Danbooru, Gelbooru)
if service in ['danbooru', 'gelbooru']: if service in ['danbooru', 'gelbooru']:
api_key = main_app.api_key_input.text().strip() api_key = main_app.api_key_input.text().strip()
@@ -37,7 +43,7 @@ def create_downloader_thread(main_app, api_url, service, id1, id2, effective_out
api_key=api_key, user_id=user_id, parent=main_app api_key=api_key, user_id=user_id, parent=main_app
) )
# Handler for cloud storage sites (Mega, GDrive, etc.) # Handler for cloud storage sites (Mega, GDrive, Dropbox, GoFile)
platform = None platform = None
if 'mega.nz' in api_url or 'mega.io' in api_url: platform = 'mega' if 'mega.nz' in api_url or 'mega.io' in api_url: platform = 'mega'
elif 'drive.google.com' in api_url: platform = 'gdrive' elif 'drive.google.com' in api_url: platform = 'gdrive'
@@ -47,7 +53,8 @@ def create_downloader_thread(main_app, api_url, service, id1, id2, effective_out
use_post_subfolder = main_app.use_subfolder_per_post_checkbox.isChecked() use_post_subfolder = main_app.use_subfolder_per_post_checkbox.isChecked()
return DriveDownloadThread( return DriveDownloadThread(
api_url, effective_output_dir_for_run, platform, use_post_subfolder, api_url, effective_output_dir_for_run, platform, use_post_subfolder,
main_app.cancellation_event, main_app.pause_event, main_app.log_signal.emit main_app.cancellation_event, main_app.pause_event, main_app.log_signal.emit,
parent=main_app # Pass parent for consistency
) )
# Handler for Erome # Handler for Erome
@@ -59,75 +66,118 @@ def create_downloader_thread(main_app, api_url, service, id1, id2, effective_out
return MangaDexDownloadThread(api_url, effective_output_dir_for_run, main_app) return MangaDexDownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for Saint2 # Handler for Saint2
is_saint2_url = 'saint2.su' in api_url or 'saint2.pk' in api_url is_saint2_url = service == 'saint2' or 'saint2.su' in api_url or 'saint2.pk' in api_url # Add more domains if needed
if is_saint2_url and api_url.strip().lower() != 'saint2.su': # Exclude batch mode trigger if is_saint2_url and api_url.strip().lower() != 'saint2.su': # Exclude batch mode trigger if using URL input
return Saint2DownloadThread(api_url, effective_output_dir_for_run, main_app) return Saint2DownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for SimpCity # Handler for SimpCity
if service == 'simpcity': if service == 'simpcity':
cookies = prepare_cookies_for_request( cookies = prepare_cookies_for_request(
use_cookie_flag=True, cookie_text_input=main_app.cookie_text_input.text(), use_cookie_flag=True, # SimpCity requires cookies
selected_cookie_file_path=main_app.selected_cookie_filepath, cookie_text_input=main_app.simpcity_cookie_text_input.text(), # Use dedicated input
app_base_dir=main_app.app_base_dir, logger_func=main_app.log_signal.emit, selected_cookie_file_path=main_app.selected_cookie_filepath, # Use shared selection
target_domain='simpcity.cr' app_base_dir=main_app.app_base_dir,
logger_func=main_app.log_signal.emit,
target_domain='simpcity.cr' # Specific domain
) )
if not cookies: if not cookies:
# The main app will handle the error dialog main_app.log_signal.emit("❌ SimpCity requires valid cookies. Please provide them.")
return "COOKIE_ERROR" return "COOKIE_ERROR" # Sentinel value for cookie failure
return SimpCityDownloadThread(api_url, id2, effective_output_dir_for_run, cookies, main_app) return SimpCityDownloadThread(api_url, id2, effective_output_dir_for_run, cookies, main_app)
# Handler for Rule34Video
if service == 'rule34video': if service == 'rule34video':
main_app.log_signal.emit(" Rule34Video.com URL detected. Starting dedicated downloader.") main_app.log_signal.emit(" Rule34Video.com URL detected. Starting dedicated downloader.")
# id1 contains the video_id from extract_post_info return Rule34VideoDownloadThread(api_url, effective_output_dir_for_run, main_app) # id1 (video_id) is used inside the thread
return Rule34VideoDownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for official Discord URLs # HANDLER FOR KEMONO DISCORD (Place BEFORE official Discord)
if 'discord.com' in api_url and service == 'discord': elif service == 'discord' and any(domain in api_url for domain in ['kemono.cr', 'kemono.su', 'kemono.party']):
token = main_app.remove_from_filename_input.text().strip() main_app.log_signal.emit(" Kemono Discord URL detected. Starting dedicated downloader.")
limit_text = main_app.discord_message_limit_input.text().strip() cookies = prepare_cookies_for_request(
message_limit = int(limit_text) if limit_text else None use_cookie_flag=main_app.use_cookie_checkbox.isChecked(), # Respect UI setting
mode = 'pdf' if main_app.discord_download_scope == 'messages' else 'files' cookie_text_input=main_app.cookie_text_input.text(),
return DiscordDownloadThread( selected_cookie_file_path=main_app.selected_cookie_filepath,
mode=mode, session=requests.Session(), token=token, output_dir=effective_output_dir_for_run, app_base_dir=main_app.app_base_dir,
server_id=id1, channel_id=id2, url=api_url, app_base_dir=main_app.app_base_dir, logger_func=main_app.log_signal.emit,
limit=message_limit, parent=main_app target_domain='kemono.cr' # Primary Kemono domain, adjust if needed
)
# KemonoDiscordDownloadThread expects parent for events
return KemonoDiscordDownloadThread(
server_id=id1,
channel_id=id2,
output_dir=effective_output_dir_for_run,
cookies_dict=cookies,
parent=main_app
) )
# Handler for Allcomic/Allporncomic # Handler for official Discord URLs
if 'allcomic.com' in api_url or 'allporncomic.com' in api_url: elif service == 'discord' and 'discord.com' in api_url:
main_app.log_signal.emit(" Official Discord URL detected. Starting dedicated downloader.")
token = main_app.remove_from_filename_input.text().strip() # Token is in the "Remove Words" field for Discord
if not token:
main_app.log_signal.emit("❌ Official Discord requires an Authorization Token in the 'Remove Words' field.")
return None # Or a specific error sentinel
limit_text = main_app.discord_message_limit_input.text().strip()
message_limit = int(limit_text) if limit_text.isdigit() else None
mode = main_app.discord_download_scope # Should be 'pdf' or 'files'
return DiscordDownloadThread(
mode=mode,
session=requests.Session(), # Create a session for this thread
token=token,
output_dir=effective_output_dir_for_run,
server_id=id1,
channel_id=id2,
url=api_url,
app_base_dir=main_app.app_base_dir,
limit=message_limit,
parent=main_app # Pass main_app for events/signals
)
# Check specific domains or rely on service name if extract_post_info provides it
if service == 'allcomic' or 'allcomic.com' in api_url or 'allporncomic.com' in api_url:
return AllcomicDownloadThread(api_url, effective_output_dir_for_run, main_app) return AllcomicDownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for Hentai2Read # Handler for Hentai2Read
if 'hentai2read.com' in api_url: if service == 'hentai2read' or 'hentai2read.com' in api_url:
return Hentai2readDownloadThread(api_url, effective_output_dir_for_run, main_app) return Hentai2readDownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for Fap-Nation # Handler for Fap-Nation
if 'fap-nation.com' in api_url or 'fap-nation.org' in api_url: if service == 'fap-nation' or 'fap-nation.com' in api_url or 'fap-nation.org' in api_url:
use_post_subfolder = main_app.use_subfolder_per_post_checkbox.isChecked() use_post_subfolder = main_app.use_subfolder_per_post_checkbox.isChecked()
# Ensure signals are passed correctly if needed by the thread
return FapNationDownloadThread( return FapNationDownloadThread(
api_url, effective_output_dir_for_run, use_post_subfolder, api_url, effective_output_dir_for_run, use_post_subfolder,
main_app.pause_event, main_app.cancellation_event, main_app.actual_gui_signals, main_app main_app.pause_event, main_app.cancellation_event, main_app.actual_gui_signals, main_app
) )
# Handler for Pixeldrain # Handler for Pixeldrain
if 'pixeldrain.com' in api_url: if service == 'pixeldrain' or 'pixeldrain.com' in api_url:
return PixeldrainDownloadThread(api_url, effective_output_dir_for_run, main_app) return PixeldrainDownloadThread(api_url, effective_output_dir_for_run, main_app) # URL contains the ID
# Handler for nHentai # Handler for nHentai
if service == 'nhentai': if service == 'nhentai':
from ...core.nhentai_client import fetch_nhentai_gallery from ...core.nhentai_client import fetch_nhentai_gallery
main_app.log_signal.emit(f" nHentai gallery ID {id1} detected. Fetching gallery data...")
gallery_data = fetch_nhentai_gallery(id1, main_app.log_signal.emit) gallery_data = fetch_nhentai_gallery(id1, main_app.log_signal.emit)
if not gallery_data: if not gallery_data:
main_app.log_signal.emit(f"❌ Failed to fetch nHentai gallery data for ID {id1}.")
return "FETCH_ERROR" # Sentinel value for fetch failure return "FETCH_ERROR" # Sentinel value for fetch failure
return NhentaiDownloadThread(gallery_data, effective_output_dir_for_run, main_app) return NhentaiDownloadThread(gallery_data, effective_output_dir_for_run, main_app)
# Handler for Toonily # Handler for Toonily
if 'toonily.com' in api_url: if service == 'toonily' or 'toonily.com' in api_url:
return ToonilyDownloadThread(api_url, effective_output_dir_for_run, main_app) return ToonilyDownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for Bunkr # Handler for Bunkr
if service == 'bunkr': if service == 'bunkr':
# id1 contains the full URL or album ID from extract_post_info
return BunkrDownloadThread(id1, effective_output_dir_for_run, main_app) return BunkrDownloadThread(id1, effective_output_dir_for_run, main_app)
# If no special handler matched, return None # --- Fallback ---
# If no specific handler matched based on service name or URL pattern, return None.
# This signals main_window.py to use the generic BackendDownloadThread/PostProcessorWorker
# which uses the standard Kemono/Coomer post API.
main_app.log_signal.emit(f" No specialized downloader found for service '{service}' and URL '{api_url[:50]}...'. Using generic downloader.")
return None return None

View File

@@ -0,0 +1,549 @@
# kemono_discord_downloader_thread.py
import os
import time
import uuid
import threading
import cloudscraper
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from PyQt5.QtCore import QThread, pyqtSignal
# --- Assuming these files are in the correct relative path ---
# Adjust imports if your project structure is different
try:
from ...core.discord_client import fetch_server_channels, fetch_channel_messages
from ...utils.file_utils import clean_filename
except ImportError as e:
# Basic fallback logging if signals aren't ready
print(f"ERROR: Failed to import required modules for Kemono Discord thread: {e}")
# Re-raise to prevent the thread from being created incorrectly
raise
# Custom exception for clean cancellation/pausing
class InterruptedError(Exception):
"""Custom exception for handling cancellations/pausing gracefully within download loops."""
pass
class KemonoDiscordDownloadThread(QThread):
"""
A dedicated QThread for downloading files from Kemono Discord server/channel pages,
using the Kemono API via discord_client and multithreading for file downloads.
Includes a single retry attempt after a 15-second delay for specific errors.
"""
# --- Signals ---
progress_signal = pyqtSignal(str) # General log messages
progress_label_signal = pyqtSignal(str) # Update main progress label (e.g., "Fetching messages...")
file_progress_signal = pyqtSignal(str, object) # Update file progress bar (filename, (downloaded_bytes, total_bytes | None))
permanent_file_failed_signal = pyqtSignal(list) # To report failures to main window
finished_signal = pyqtSignal(int, int, bool, list) # (downloaded_count, skipped_count, was_cancelled, [])
def __init__(self, server_id, channel_id, output_dir, cookies_dict, parent):
"""
Initializes the Kemono Discord downloader thread.
Args:
server_id (str): The Discord server ID from Kemono.
channel_id (str | None): The specific Discord channel ID from Kemono, if provided.
output_dir (str): The base directory to save downloaded files.
cookies_dict (dict | None): Cookies to use for requests.
parent (QWidget): The parent widget (main_app) to access events/settings.
"""
super().__init__(parent)
self.server_id = server_id
self.target_channel_id = channel_id # The specific channel from URL, if any
self.output_dir = output_dir
self.cookies_dict = cookies_dict
self.parent_app = parent # Access main app's events and settings
# --- Shared Events & Internal State ---
self.cancellation_event = getattr(parent, 'cancellation_event', threading.Event())
self.pause_event = getattr(parent, 'pause_event', threading.Event())
self._is_cancelled_internal = False # Internal flag for quick breaking
# --- Thread-Safe Counters ---
self.download_count = 0
self.skip_count = 0
self.count_lock = threading.Lock()
# --- List to Store Failure Details ---
self.permanently_failed_details = []
# --- Multithreading Configuration ---
self.num_file_threads = 1 # Default
try:
use_mt = getattr(self.parent_app, 'use_multithreading_checkbox', None)
thread_input = getattr(self.parent_app, 'thread_count_input', None)
if use_mt and use_mt.isChecked() and thread_input:
thread_count_ui = int(thread_input.text().strip())
# Apply a reasonable cap specific to this downloader type (adjust as needed)
self.num_file_threads = max(1, min(thread_count_ui, 20)) # Cap at 20 file threads
except (ValueError, AttributeError, TypeError):
try: self.progress_signal.emit("⚠️ Warning: Could not read thread count setting, defaulting to 1.")
except: pass
self.num_file_threads = 1 # Fallback on error getting setting
# --- Network Client ---
try:
self.scraper = cloudscraper.create_scraper(browser={'browser': 'firefox', 'platform': 'windows', 'mobile': False})
except Exception as e:
try: self.progress_signal.emit(f"❌ ERROR: Failed to initialize cloudscraper: {e}")
except: pass
self.scraper = None
# --- Control Methods (cancel, pause, resume - same as before) ---
def cancel(self):
self._is_cancelled_internal = True
self.cancellation_event.set()
try: self.progress_signal.emit(" Cancellation requested for Kemono Discord download.")
except: pass
def pause(self):
if not self.pause_event.is_set():
self.pause_event.set()
try: self.progress_signal.emit(" Pausing Kemono Discord download...")
except: pass
def resume(self):
if self.pause_event.is_set():
self.pause_event.clear()
try: self.progress_signal.emit(" Resuming Kemono Discord download...")
except: pass
# --- Helper: Check Cancellation/Pause (same as before) ---
def _check_events(self):
if self._is_cancelled_internal or self.cancellation_event.is_set():
if not self._is_cancelled_internal:
self._is_cancelled_internal = True
try: self.progress_signal.emit(" Cancellation detected by Kemono Discord thread check.")
except: pass
return True # Cancelled
was_paused = False
while self.pause_event.is_set():
if not was_paused:
try: self.progress_signal.emit(" Kemono Discord operation paused...")
except: pass
was_paused = True
if self.cancellation_event.is_set():
self._is_cancelled_internal = True
try: self.progress_signal.emit(" Cancellation detected while paused.")
except: pass
return True
time.sleep(0.5)
return False
# --- REVISED Helper: Download Single File with ONE Retry ---
def _download_single_kemono_file(self, file_info):
"""
Downloads a single file, handles collisions after download,
and automatically retries ONCE after 15s for specific network errors.
Returns:
tuple: (bool_success, dict_error_details_or_None)
"""
# --- Constants for Retry Logic ---
MAX_ATTEMPTS = 2 # 1 initial attempt + 1 retry
RETRY_DELAY_SECONDS = 15
# --- Extract info ---
channel_dir = file_info['channel_dir']
original_filename = file_info['original_filename']
file_url = file_info['file_url']
channel_id = file_info['channel_id']
post_title = file_info.get('post_title', f"Message in channel {channel_id}")
original_post_id_for_log = file_info.get('message_id', 'N/A')
base_kemono_domain = "kemono.cr"
if not self.scraper:
try: self.progress_signal.emit(f" ❌ Cannot download '{original_filename}': Cloudscraper not initialized.")
except: pass
failure_details = { 'file_info': {'url': file_url, 'name': original_filename}, 'post_title': post_title, 'original_post_id_for_log': original_post_id_for_log, 'target_folder_path': channel_dir, 'error': 'Cloudscraper not initialized', 'service': 'discord', 'user_id': self.server_id }
return False, failure_details
if self._check_events(): return False, None # Interrupted before start
# --- Determine filenames ---
cleaned_original_filename = clean_filename(original_filename)
intended_final_filename = cleaned_original_filename
unique_suffix = uuid.uuid4().hex[:8]
temp_filename = f"{intended_final_filename}.{unique_suffix}.part"
temp_filepath = os.path.join(channel_dir, temp_filename)
# --- Download Attempt Loop ---
download_successful = False
last_exception = None
should_retry = False # Flag to indicate if the first attempt failed with a retryable error
for attempt in range(1, MAX_ATTEMPTS + 1):
response = None
try:
# --- Pre-attempt checks ---
if self._check_events(): raise InterruptedError("Cancelled/Paused before attempt")
if attempt == 2 and should_retry: # Only delay *before* the retry
try: self.progress_signal.emit(f" ⏳ Retrying '{original_filename}' (Attempt {attempt}/{MAX_ATTEMPTS}) after {RETRY_DELAY_SECONDS}s...")
except: pass
for _ in range(RETRY_DELAY_SECONDS):
if self._check_events(): raise InterruptedError("Cancelled/Paused during retry delay")
time.sleep(1)
# If it's attempt 2 but should_retry is False, it means the first error was non-retryable, so skip
elif attempt == 2 and not should_retry:
break # Exit loop, failure already determined
# --- Log attempt ---
log_prefix = f" ⬇️ Downloading:" if attempt == 1 else f" 🔄 Retrying:"
try: self.progress_signal.emit(f"{log_prefix} '{original_filename}' (Attempt {attempt}/{MAX_ATTEMPTS})...")
except: pass
if attempt == 1:
try: self.file_progress_signal.emit(original_filename, (0, 0))
except: pass
# --- Perform Download ---
headers = { 'User-Agent': 'Mozilla/5.0 ...', 'Referer': f'https://{base_kemono_domain}/discord/channel/{channel_id}'} # Shortened for brevity
response = self.scraper.get(file_url, headers=headers, cookies=self.cookies_dict, stream=True, timeout=(15, 120))
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
downloaded_size = 0
last_progress_emit_time = time.time()
with open(temp_filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024*1024):
if self._check_events(): raise InterruptedError("Cancelled/Paused during chunk writing")
if chunk:
f.write(chunk)
downloaded_size += len(chunk)
current_time = time.time()
if total_size > 0 and (current_time - last_progress_emit_time > 0.5 or downloaded_size == total_size):
try: self.file_progress_signal.emit(original_filename, (downloaded_size, total_size))
except: pass
last_progress_emit_time = current_time
elif total_size == 0 and (current_time - last_progress_emit_time > 0.5):
try: self.file_progress_signal.emit(original_filename, (downloaded_size, 0))
except: pass
last_progress_emit_time = current_time
response.close()
# --- Verification ---
if self._check_events(): raise InterruptedError("Cancelled/Paused after download completion")
if total_size > 0 and downloaded_size != total_size:
try: self.progress_signal.emit(f" ⚠️ Size mismatch on attempt {attempt} for '{original_filename}'. Expected {total_size}, got {downloaded_size}.")
except: pass
last_exception = IOError(f"Size mismatch: Expected {total_size}, got {downloaded_size}")
if os.path.exists(temp_filepath):
try: os.remove(temp_filepath)
except OSError: pass
should_retry = (attempt == 1) # Only retry if it was the first attempt
continue # Try again if attempt 1, otherwise loop finishes
else:
download_successful = True
break # Success!
# --- Error Handling within Loop ---
except InterruptedError as e:
last_exception = e
should_retry = False # Don't retry if interrupted
break
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError, cloudscraper.exceptions.CloudflareException) as e:
last_exception = e
try: self.progress_signal.emit(f" ❌ Network/Cloudflare error on attempt {attempt} for '{original_filename}': {e}")
except: pass
should_retry = (attempt == 1) # Retry only if first attempt
except requests.exceptions.RequestException as e:
status_code = getattr(e.response, 'status_code', None)
if status_code and 500 <= status_code <= 599: # Retry on 5xx
last_exception = e
try: self.progress_signal.emit(f" ❌ Server error ({status_code}) on attempt {attempt} for '{original_filename}'. Will retry...")
except: pass
should_retry = (attempt == 1) # Retry only if first attempt
else: # Don't retry on 4xx or other request errors
last_exception = e
try: self.progress_signal.emit(f" ❌ Non-retryable HTTP error for '{original_filename}': {e}")
except: pass
should_retry = False
break
except OSError as e:
last_exception = e
try: self.progress_signal.emit(f" ❌ OS error during download attempt {attempt} for '{original_filename}': {e}")
except: pass
should_retry = False
break
except Exception as e:
last_exception = e
try: self.progress_signal.emit(f" ❌ Unexpected error on attempt {attempt} for '{original_filename}': {e}")
except: pass
should_retry = False
break
finally:
if response:
try: response.close()
except Exception: pass
# --- End Download Attempt Loop ---
try: self.file_progress_signal.emit(original_filename, None) # Clear progress
except: pass
# --- Post-Download Processing ---
if download_successful:
# --- Rename Logic ---
final_filename_to_use = intended_final_filename
final_filepath_on_disk = os.path.join(channel_dir, final_filename_to_use)
counter = 1
base_name, extension = os.path.splitext(intended_final_filename)
while os.path.exists(final_filepath_on_disk):
final_filename_to_use = f"{base_name} ({counter}){extension}"
final_filepath_on_disk = os.path.join(channel_dir, final_filename_to_use)
counter += 1
if final_filename_to_use != intended_final_filename:
try: self.progress_signal.emit(f" -> Name conflict for '{intended_final_filename}'. Renaming to '{final_filename_to_use}'.")
except: pass
try:
os.rename(temp_filepath, final_filepath_on_disk)
try: self.progress_signal.emit(f" ✅ Saved: '{final_filename_to_use}'")
except: pass
return True, None # SUCCESS
except OSError as e:
try: self.progress_signal.emit(f" ❌ OS error renaming temp file to '{final_filename_to_use}': {e}")
except: pass
if os.path.exists(temp_filepath):
try: os.remove(temp_filepath)
except OSError: pass
# ---> RETURN FAILURE TUPLE (Rename Failed) <---
failure_details = { 'file_info': {'url': file_url, 'name': original_filename}, 'post_title': post_title, 'original_post_id_for_log': original_post_id_for_log, 'target_folder_path': channel_dir, 'intended_filename': intended_final_filename, 'error': f"Rename failed: {e}", 'service': 'discord', 'user_id': self.server_id }
return False, failure_details
else:
# Download failed or was interrupted
if not isinstance(last_exception, InterruptedError):
try: self.progress_signal.emit(f" ❌ FAILED to download '{original_filename}' after {MAX_ATTEMPTS} attempts. Last error: {last_exception}")
except: pass
if os.path.exists(temp_filepath):
try: os.remove(temp_filepath)
except OSError as e_rem:
try: self.progress_signal.emit(f" (Failed to remove temp file '{temp_filename}': {e_rem})")
except: pass
# ---> RETURN FAILURE TUPLE (Download Failed/Interrupted) <---
# Only generate details if it wasn't interrupted by user
failure_details = None
if not isinstance(last_exception, InterruptedError):
failure_details = {
'file_info': {'url': file_url, 'name': original_filename},
'post_title': post_title, 'original_post_id_for_log': original_post_id_for_log,
'target_folder_path': channel_dir, 'intended_filename': intended_final_filename,
'error': f"Failed after {MAX_ATTEMPTS} attempts: {last_exception}",
'service': 'discord', 'user_id': self.server_id,
'forced_filename_override': intended_final_filename,
'file_index_in_post': file_info.get('file_index', 0),
'num_files_in_this_post': file_info.get('num_files', 1)
}
return False, failure_details # Return None details if interrupted
# --- Main Thread Execution ---
def run(self):
"""Main execution logic: Fetches channels/messages and dispatches file downloads."""
self.download_count = 0
self.skip_count = 0
self._is_cancelled_internal = False
self.permanently_failed_details = [] # Reset failed list
if not self.scraper:
try: self.progress_signal.emit("❌ Aborting Kemono Discord download: Cloudscraper failed to initialize.")
except: pass
self.finished_signal.emit(0, 0, False, [])
return
try:
# --- Log Start ---
try:
self.progress_signal.emit("=" * 40)
self.progress_signal.emit(f"🚀 Starting Kemono Discord download for server: {self.server_id}")
self.progress_signal.emit(f" Using {self.num_file_threads} thread(s) for file downloads.")
except: pass
# --- Channel Fetching (same as before) ---
channels_to_process = []
# ... (logic to populate channels_to_process using fetch_server_channels or target_channel_id) ...
if self.target_channel_id:
channels_to_process.append({'id': self.target_channel_id, 'name': self.target_channel_id})
try: self.progress_signal.emit(f" Targeting specific channel: {self.target_channel_id}")
except: pass
else:
try: self.progress_label_signal.emit("Fetching server channels via Kemono API...")
except: pass
channels_data = fetch_server_channels(self.server_id, logger=self.progress_signal.emit, cookies_dict=self.cookies_dict)
if self._check_events(): return
if channels_data is not None:
channels_to_process = channels_data
try: self.progress_signal.emit(f" Found {len(channels_to_process)} channels.")
except: pass
else:
try: self.progress_signal.emit(f" ❌ Failed to fetch channels for server {self.server_id} via Kemono API.")
except: pass
return
# --- Process Each Channel ---
for channel in channels_to_process:
if self._check_events(): break
channel_id = channel['id']
channel_name = clean_filename(channel.get('name', channel_id))
channel_dir = os.path.join(self.output_dir, channel_name)
try:
os.makedirs(channel_dir, exist_ok=True)
except OSError as e:
try: self.progress_signal.emit(f" ❌ Failed to create directory for channel '{channel_name}': {e}. Skipping channel.")
except: pass
continue
try:
self.progress_signal.emit(f"\n--- Processing Channel: #{channel_name} ({channel_id}) ---")
self.progress_label_signal.emit(f"Fetching messages for #{channel_name}...")
except: pass
# --- Collect File Download Tasks ---
file_tasks = []
message_generator = fetch_channel_messages(
channel_id, logger=self.progress_signal.emit,
cancellation_event=self.cancellation_event, pause_event=self.pause_event,
cookies_dict=self.cookies_dict
)
try:
message_index = 0
for message_batch in message_generator:
if self._check_events(): break
for message in message_batch:
message_id = message.get('id', f'msg_{message_index}')
post_title_context = (message.get('content') or f"Message {message_id}")[:50] + "..."
attachments = message.get('attachments', [])
file_index_in_message = 0
num_files_in_message = len(attachments)
for attachment in attachments:
if self._check_events(): raise InterruptedError
file_path = attachment.get('path')
original_filename = attachment.get('name')
if file_path and original_filename:
base_kemono_domain = "kemono.cr"
if not file_path.startswith('/'): file_path = '/' + file_path
file_url = f"https://{base_kemono_domain}/data{file_path}"
file_tasks.append({
'channel_dir': channel_dir, 'original_filename': original_filename,
'file_url': file_url, 'channel_id': channel_id,
'message_id': message_id, 'post_title': post_title_context,
'file_index': file_index_in_message, 'num_files': num_files_in_message
})
file_index_in_message += 1
message_index += 1
if self._check_events(): raise InterruptedError
if self._check_events(): raise InterruptedError
except InterruptedError:
try: self.progress_signal.emit(" Interrupted while collecting file tasks.")
except: pass
break # Exit channel processing
except Exception as e_msg:
try: self.progress_signal.emit(f" ❌ Error fetching messages for channel {channel_name}: {e_msg}")
except: pass
continue # Continue to next channel
if self._check_events(): break
if not file_tasks:
try: self.progress_signal.emit(" No downloadable file attachments found in this channel's messages.")
except: pass
continue
try:
self.progress_signal.emit(f" Found {len(file_tasks)} potential file attachments. Starting downloads...")
self.progress_label_signal.emit(f"Downloading {len(file_tasks)} files for #{channel_name}...")
except: pass
# --- Execute Downloads Concurrently ---
files_processed_in_channel = 0
with ThreadPoolExecutor(max_workers=self.num_file_threads, thread_name_prefix=f"KDC_{channel_id[:4]}_") as executor:
futures = {executor.submit(self._download_single_kemono_file, task): task for task in file_tasks}
try:
for future in as_completed(futures):
files_processed_in_channel += 1
task_info = futures[future]
try:
success, details = future.result() # Unpack result
with self.count_lock:
if success:
self.download_count += 1
else:
self.skip_count += 1
if details: # Append details if the download permanently failed
self.permanently_failed_details.append(details)
except Exception as e_future:
filename = task_info.get('original_filename', 'unknown file')
try: self.progress_signal.emit(f" ❌ System error processing download future for '{filename}': {e_future}")
except: pass
with self.count_lock:
self.skip_count += 1
# Append details on system failure
failure_details = { 'file_info': {'url': task_info.get('file_url'), 'name': filename}, 'post_title': task_info.get('post_title', 'N/A'), 'original_post_id_for_log': task_info.get('message_id', 'N/A'), 'target_folder_path': task_info.get('channel_dir'), 'error': f"Future execution error: {e_future}", 'service': 'discord', 'user_id': self.server_id, 'forced_filename_override': clean_filename(filename), 'file_index_in_post': task_info.get('file_index', 0), 'num_files_in_this_post': task_info.get('num_files', 1) }
self.permanently_failed_details.append(failure_details)
try: self.progress_label_signal.emit(f"#{channel_name}: {files_processed_in_channel}/{len(file_tasks)} files processed")
except: pass
if self._check_events():
try: self.progress_signal.emit(" Cancelling remaining file downloads for this channel...")
except: pass
executor.shutdown(wait=False, cancel_futures=True)
break # Exit as_completed loop
except InterruptedError:
try: self.progress_signal.emit(" Download processing loop interrupted.")
except: pass
executor.shutdown(wait=False, cancel_futures=True)
if self._check_events(): break # Check between channels
# --- End Channel Loop ---
except Exception as e:
# Catch unexpected errors in the main run logic
try:
self.progress_signal.emit(f"❌ Unexpected critical error in Kemono Discord thread run loop: {e}")
import traceback
self.progress_signal.emit(traceback.format_exc())
except: pass # Avoid errors if signals fail at the very end
finally:
# --- Final Cleanup and Signal ---
try:
try: self.progress_signal.emit("=" * 40)
except: pass
cancelled = self._is_cancelled_internal or self.cancellation_event.is_set()
# --- EMIT FAILED FILES SIGNAL ---
if self.permanently_failed_details:
try:
self.progress_signal.emit(f" Reporting {len(self.permanently_failed_details)} permanently failed files...")
self.permanent_file_failed_signal.emit(list(self.permanently_failed_details)) # Emit a copy
except Exception as e_emit_fail:
print(f"ERROR emitting permanent_file_failed_signal: {e_emit_fail}")
# Log final status
try:
if cancelled and not self._is_cancelled_internal:
self.progress_signal.emit(" Kemono Discord download cancelled externally.")
elif self._is_cancelled_internal:
self.progress_signal.emit(" Kemono Discord download finished due to cancellation.")
else:
self.progress_signal.emit("✅ Kemono Discord download process finished.")
except: pass
# Clear file progress
try: self.file_progress_signal.emit("", None)
except: pass
# Get final counts safely
with self.count_lock:
final_download_count = self.download_count
final_skip_count = self.skip_count
# Emit finished signal
self.finished_signal.emit(final_download_count, final_skip_count, cancelled, [])
except Exception as e_final:
# Log final signal emission error if possible
print(f"ERROR in KemonoDiscordDownloadThread finally block: {e_final}")

View File

@@ -104,6 +104,7 @@ from .classes.drive_downloader_thread import DriveDownloadThread
from .classes.external_link_downloader_thread import ExternalLinkDownloadThread from .classes.external_link_downloader_thread import ExternalLinkDownloadThread
from .classes.nhentai_downloader_thread import NhentaiDownloadThread from .classes.nhentai_downloader_thread import NhentaiDownloadThread
from .classes.downloader_factory import create_downloader_thread from .classes.downloader_factory import create_downloader_thread
from .classes.kemono_discord_downloader_thread import KemonoDiscordDownloadThread
_ff_ver = (datetime.date.today().toordinal() - 735506) // 28 _ff_ver = (datetime.date.today().toordinal() - 735506) // 28
USERAGENT_FIREFOX = (f"Mozilla/5.0 (Windows NT 10.0; Win64; x64; " USERAGENT_FIREFOX = (f"Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
@@ -333,16 +334,14 @@ class DownloaderApp (QWidget ):
self.download_location_label_widget = None self.download_location_label_widget = None
self.remove_from_filename_label_widget = None self.remove_from_filename_label_widget = None
self.skip_words_label_widget = None self.skip_words_label_widget = None
self.setWindowTitle("Kemono Downloader v7.5.1") self.setWindowTitle("Kemono Downloader v7.5.2")
setup_ui(self) setup_ui(self)
self._connect_signals() self._connect_signals()
if hasattr(self, 'character_input'): if hasattr(self, 'character_input'):
self.character_input.setToolTip(self._tr("character_input_tooltip", "Enter character names (comma-separated)...")) self.character_input.setToolTip(self._tr("character_input_tooltip", "Enter character names (comma-separated)..."))
self.log_signal.emit(f" Manga filename style loaded: '{self.manga_filename_style}'") self.log_signal.emit(f" filename style loaded: '{self.manga_filename_style}'")
self.log_signal.emit(f" Skip words scope loaded: '{self.skip_words_scope}'") self.log_signal.emit(f" Skip words scope loaded: '{self.skip_words_scope}'")
self.log_signal.emit(f" Character filter scope set to default: '{self.char_filter_scope}'") self.log_signal.emit(f" Character filter scope set to default: '{self.char_filter_scope}'")
self.log_signal.emit(f" Multi-part download defaults to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'}")
self.log_signal.emit(f" Scan post content for images defaults to: {'Enabled' if self.scan_content_images_setting else 'Disabled'}")
self.log_signal.emit(f" Application language loaded: '{self.current_selected_language.upper()}' (UI may not reflect this yet).") self.log_signal.emit(f" Application language loaded: '{self.current_selected_language.upper()}' (UI may not reflect this yet).")
self._retranslate_main_ui() self._retranslate_main_ui()
self._load_persistent_history() self._load_persistent_history()
@@ -494,6 +493,8 @@ class DownloaderApp (QWidget ):
def _connect_specialized_thread_signals(self, thread): def _connect_specialized_thread_signals(self, thread):
"""Connects common signals for specialized downloader threads.""" """Connects common signals for specialized downloader threads."""
is_kemono_discord = isinstance(thread, KemonoDiscordDownloadThread)
if hasattr(thread, 'progress_signal'): if hasattr(thread, 'progress_signal'):
thread.progress_signal.connect(self.handle_main_log) thread.progress_signal.connect(self.handle_main_log)
if hasattr(thread, 'file_progress_signal'): if hasattr(thread, 'file_progress_signal'):
@@ -508,6 +509,10 @@ class DownloaderApp (QWidget ):
if hasattr(thread, 'progress_label_signal'): # For Discord thread if hasattr(thread, 'progress_label_signal'): # For Discord thread
thread.progress_label_signal.connect(self.progress_label.setText) thread.progress_label_signal.connect(self.progress_label.setText)
if is_kemono_discord and hasattr(thread, 'permanent_file_failed_signal'):
thread.permanent_file_failed_signal.connect(self._handle_permanent_file_failure_from_thread)
print("DEBUG: Connected permanent_file_failed_signal for KemonoDiscordDownloadThread.") # Debug print
def _apply_theme_and_restart_prompt(self): def _apply_theme_and_restart_prompt(self):
"""Applies the theme and prompts the user to restart.""" """Applies the theme and prompts the user to restart."""
if self.current_theme == "dark": if self.current_theme == "dark":
@@ -824,14 +829,11 @@ class DownloaderApp (QWidget ):
self.download_btn.setEnabled(False) self.download_btn.setEnabled(False)
self.pause_btn.setEnabled(False) self.pause_btn.setEnabled(False)
else: else:
# --- START MODIFICATION ---
# Check if we are about to download fetched posts and update text accordingly
if self.is_ready_to_download_fetched: if self.is_ready_to_download_fetched:
num_posts = len(self.fetched_posts_for_download) num_posts = len(self.fetched_posts_for_download)
self.download_btn.setText(f"⬇️ Start Download ({num_posts} Posts)") self.download_btn.setText(f"⬇️ Start Download ({num_posts} Posts)")
self.download_btn.setEnabled(True) # Keep it enabled for the user to click self.download_btn.setEnabled(True) # Keep it enabled for the user to click
else: else:
# Original logic for an active download in other scenarios
self.download_btn.setText(self._tr("start_download_button_text", "⬇️ Start Download")) self.download_btn.setText(self._tr("start_download_button_text", "⬇️ Start Download"))
self.download_btn.setEnabled(False) self.download_btn.setEnabled(False)
@@ -919,11 +921,9 @@ class DownloaderApp (QWidget ):
args_template = self.last_start_download_args args_template = self.last_start_download_args
# Update both the character filter list and the domain override in the arguments
args_template['filter_character_list'] = parsed_filters args_template['filter_character_list'] = parsed_filters
args_template['domain_override'] = domain_override args_template['domain_override'] = domain_override
# Manually set the UI to a "downloading" state for reliability
self.set_ui_enabled(False) self.set_ui_enabled(False)
self.download_btn.setText("⬇️ Downloading...") self.download_btn.setText("⬇️ Downloading...")
self.download_btn.setEnabled(False) self.download_btn.setEnabled(False)
@@ -931,7 +931,6 @@ class DownloaderApp (QWidget ):
self.cancel_btn.setEnabled(True) self.cancel_btn.setEnabled(True)
self.cancel_btn.setText("❌ Cancel & Reset UI") self.cancel_btn.setText("❌ Cancel & Reset UI")
try: try:
# Ensure signals are connected to the correct actions for this state
self.cancel_btn.clicked.disconnect() self.cancel_btn.clicked.disconnect()
self.pause_btn.clicked.disconnect() self.pause_btn.clicked.disconnect()
except TypeError: except TypeError:
@@ -3184,8 +3183,7 @@ class DownloaderApp (QWidget ):
self .update_custom_folder_visibility () self .update_custom_folder_visibility ()
self .update_page_range_enabled_state () self .update_page_range_enabled_state ()
if self .manga_mode_checkbox : if self .manga_mode_checkbox :
self .manga_mode_checkbox .setChecked (False ) pass
self .manga_mode_checkbox .setEnabled (False )
if hasattr (self ,'use_cookie_checkbox'): if hasattr (self ,'use_cookie_checkbox'):
self .use_cookie_checkbox .setChecked (True ) self .use_cookie_checkbox .setChecked (True )
self .use_cookie_checkbox .setEnabled (False ) self .use_cookie_checkbox .setEnabled (False )
@@ -3247,8 +3245,7 @@ class DownloaderApp (QWidget ):
is_single_post = True is_single_post = True
# --- MODIFIED: Added check for is_discord_url --- # --- MODIFIED: Added check for is_discord_url ---
can_enable_manga_checkbox = (is_creator_feed or is_single_post) and not is_favorite_mode_on and not is_discord_url can_enable_manga_checkbox = ((is_creator_feed or is_single_post) or is_favorite_mode_on) and not is_discord_url
if self .manga_mode_checkbox : if self .manga_mode_checkbox :
self .manga_mode_checkbox .setEnabled (can_enable_manga_checkbox) self .manga_mode_checkbox .setEnabled (can_enable_manga_checkbox)
if not can_enable_manga_checkbox and self .manga_mode_checkbox .isChecked (): if not can_enable_manga_checkbox and self .manga_mode_checkbox .isChecked ():
@@ -5547,6 +5544,7 @@ class DownloaderApp (QWidget ):
'known_names':list (KNOWN_NAMES ), 'known_names':list (KNOWN_NAMES ),
'emitter':self .worker_to_gui_queue , 'emitter':self .worker_to_gui_queue ,
'unwanted_keywords':{'spicy','hd','nsfw','4k','preview','teaser','clip'}, 'unwanted_keywords':{'spicy','hd','nsfw','4k','preview','teaser','clip'},
'creator_name_cache': self.creator_name_cache,
'domain_override': domain_override_command, 'domain_override': domain_override_command,
'sfp_threshold': sfp_threshold_command, 'sfp_threshold': sfp_threshold_command,
'handle_unknown_mode': handle_unknown_command, 'handle_unknown_mode': handle_unknown_command,
@@ -5619,13 +5617,11 @@ class DownloaderApp (QWidget ):
api_domain = parsed_api_url.netloc if parsed_api_url.netloc else self._get_domain_for_service(service) api_domain = parsed_api_url.netloc if parsed_api_url.netloc else self._get_domain_for_service(service)
post_page_url = f"https://{api_domain}/{service}/user/{user_id}/post/{post_id}" post_page_url = f"https://{api_domain}/{service}/user/{user_id}/post/{post_id}"
# --- NEW LOGIC: Differentiate between loaded files and live session errors ---
# Initialize variables before the conditional blocks # Initialize variables before the conditional blocks
target_folder_path_for_download = None target_folder_path_for_download = None
filename_override_for_download = None filename_override_for_download = None
if job_details.get('is_loaded_from_txt'): if job_details.get('is_loaded_from_txt'):
# --- BEHAVIOR FOR LOADED FILES: Recalculate everything from current UI settings ---
self.log_signal.emit(f" Retrying loaded file. Recalculating path and name from current UI settings...") self.log_signal.emit(f" Retrying loaded file. Recalculating path and name from current UI settings...")
# 1. Get all current settings and job data # 1. Get all current settings and job data
@@ -6217,7 +6213,7 @@ class DownloaderApp (QWidget ):
'manga_date_prefix': self.manga_date_prefix_input.text().strip(), 'manga_date_prefix': self.manga_date_prefix_input.text().strip(),
'manga_date_file_counter_ref': None, 'manga_date_file_counter_ref': None,
'scan_content_for_images': self.scan_content_images_checkbox.isChecked(), 'scan_content_for_images': self.scan_content_images_checkbox.isChecked(),
'creator_name_cache': self.creator_name_cache,
'creator_download_folder_ignore_words': creator_folder_ignore_words_for_run, 'creator_download_folder_ignore_words': creator_folder_ignore_words_for_run,
'num_file_threads_for_worker': effective_num_file_threads_per_worker, 'num_file_threads_for_worker': effective_num_file_threads_per_worker,
'multipart_scope': 'files', 'multipart_scope': 'files',
@@ -6318,10 +6314,8 @@ class DownloaderApp (QWidget ):
if hasattr(self, 'link_input'): if hasattr(self, 'link_input'):
self.last_link_input_text_for_queue_sync = self.link_input.text() self.last_link_input_text_for_queue_sync = self.link_input.text()
# --- START: MODIFIED LOGIC ---
# Manually trigger the UI update now that the queue is populated and the dialog is closed. # Manually trigger the UI update now that the queue is populated and the dialog is closed.
self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False) self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)
# --- END: MODIFIED LOGIC ---
def _load_saved_cookie_settings(self): def _load_saved_cookie_settings(self):
"""Loads and applies saved cookie settings on startup.""" """Loads and applies saved cookie settings on startup."""

View File

@@ -26,6 +26,16 @@ KNOWN_TXT_MATCH_CLEANUP_PATTERNS = [
r'\bPreview\b', r'\bPreview\b',
] ]
# --- START NEW CODE ---
# Regular expression to detect CJK characters
# Covers Hiragana, Katakana, Half/Full width forms, CJK Unified Ideographs, Hangul Syllables, etc.
cjk_pattern = re.compile(r'[\u3000-\u303f\u3040-\u309f\u30a0-\u30ff\uff00-\uffef\u4e00-\u9fff\uac00-\ud7af]')
def contains_cjk(text):
"""Checks if the text contains any CJK characters."""
return bool(cjk_pattern.search(text))
# --- END NEW CODE ---
# --- Text Matching and Manipulation Utilities --- # --- Text Matching and Manipulation Utilities ---
def is_title_match_for_character(post_title, character_name_filter): def is_title_match_for_character(post_title, character_name_filter):
@@ -120,6 +130,7 @@ def match_folders_from_title(title, names_to_match, unwanted_keywords):
""" """
Matches folder names from a title based on a list of known name objects. Matches folder names from a title based on a list of known name objects.
Each name object is a dict: {'name': 'PrimaryName', 'aliases': ['alias1', ...]} Each name object is a dict: {'name': 'PrimaryName', 'aliases': ['alias1', ...]}
MODIFIED: Uses substring matching for CJK aliases, word boundary for others.
Args: Args:
title (str): The post title to check. title (str): The post title to check.
@@ -137,6 +148,7 @@ def match_folders_from_title(title, names_to_match, unwanted_keywords):
for pat_str in KNOWN_TXT_MATCH_CLEANUP_PATTERNS: for pat_str in KNOWN_TXT_MATCH_CLEANUP_PATTERNS:
cleaned_title = re.sub(pat_str, ' ', cleaned_title, flags=re.IGNORECASE) cleaned_title = re.sub(pat_str, ' ', cleaned_title, flags=re.IGNORECASE)
cleaned_title = re.sub(r'\s+', ' ', cleaned_title).strip() cleaned_title = re.sub(r'\s+', ' ', cleaned_title).strip()
# Store both original case cleaned title and lower case for different matching
title_lower = cleaned_title.lower() title_lower = cleaned_title.lower()
matched_cleaned_names = set() matched_cleaned_names = set()
@@ -150,17 +162,41 @@ def match_folders_from_title(title, names_to_match, unwanted_keywords):
if not primary_folder_name or not aliases: if not primary_folder_name or not aliases:
continue continue
for alias in aliases: # <<< START MODIFICATION >>>
alias_lower = alias.lower() cleaned_primary_name = clean_folder_name(primary_folder_name)
if not alias_lower: continue if not cleaned_primary_name or cleaned_primary_name.lower() in unwanted_keywords:
continue # Skip this entry entirely if its primary name is unwanted or empty
# Use word boundaries for accurate matching match_found_for_this_object = False
pattern = r'\b' + re.escape(alias_lower) + r'\b' for alias in aliases:
if re.search(pattern, title_lower): if not alias: continue
cleaned_primary_name = clean_folder_name(primary_folder_name) alias_lower = alias.lower()
if cleaned_primary_name.lower() not in unwanted_keywords:
# Check if the alias contains CJK characters
if contains_cjk(alias):
# Use simple substring matching for CJK
if alias_lower in title_lower:
matched_cleaned_names.add(cleaned_primary_name) matched_cleaned_names.add(cleaned_primary_name)
break # Move to the next name object once a match is found for this one match_found_for_this_object = True
break # Move to the next name object
else:
# Use original word boundary matching for non-CJK
try:
# Compile pattern for efficiency if used repeatedly, though here it changes each loop
pattern = r'\b' + re.escape(alias_lower) + r'\b'
if re.search(pattern, title_lower):
matched_cleaned_names.add(cleaned_primary_name)
match_found_for_this_object = True
break # Move to the next name object
except re.error as e:
# Log error if the alias creates an invalid regex (unlikely with escape)
print(f"Regex error for alias '{alias}': {e}") # Or use proper logging
continue
# This outer break logic remains the same (though slightly redundant with inner breaks)
if match_found_for_this_object:
pass # Already added and broke inner loop
# <<< END MODIFICATION >>>
return sorted(list(matched_cleaned_names)) return sorted(list(matched_cleaned_names))
@@ -169,6 +205,8 @@ def match_folders_from_filename_enhanced(filename, names_to_match, unwanted_keyw
""" """
Matches folder names from a filename, prioritizing longer and more specific aliases. Matches folder names from a filename, prioritizing longer and more specific aliases.
It returns immediately after finding the first (longest) match. It returns immediately after finding the first (longest) match.
MODIFIED: Prioritizes boundary-aware matches for Latin characters,
falls back to substring search for CJK compatibility.
Args: Args:
filename (str): The filename to check. filename (str): The filename to check.
@@ -194,17 +232,43 @@ def match_folders_from_filename_enhanced(filename, names_to_match, unwanted_keyw
continue continue
for alias in name_obj.get("aliases", []): for alias in name_obj.get("aliases", []):
if alias.lower(): if alias: # Check if alias is not None and not an empty string
alias_map_to_primary.append((alias.lower(), cleaned_primary_name)) alias_lower_val = alias.lower()
if alias_lower_val: # Check again after lowercasing
alias_map_to_primary.append((alias_lower_val, cleaned_primary_name))
# Sort by alias length, descending, to match longer aliases first # Sort by alias length, descending, to match longer aliases first
alias_map_to_primary.sort(key=lambda x: len(x[0]), reverse=True) alias_map_to_primary.sort(key=lambda x: len(x[0]), reverse=True)
# <<< MODIFICATION: Return the FIRST match found, which will be the longest >>> # Return the FIRST match found, which will be the longest
for alias_lower, primary_name_for_alias in alias_map_to_primary: for alias_lower, primary_name_for_alias in alias_map_to_primary:
if alias_lower in filename_lower: try:
# Found the longest possible alias that is a substring. Return immediately. # 1. Attempt boundary-aware match first (good for English/Latin)
return [primary_name_for_alias] # Matches alias if it's at the start/end or surrounded by common separators
# We use word boundaries (\b) and also check for common non-word separators like +_-
pattern = r'(?:^|[\s_+-])' + re.escape(alias_lower) + r'(?:[\s_+-]|$)'
if re.search(pattern, filename_lower):
# Found a precise, boundary-aware match. This is the best case.
return [primary_name_for_alias]
# 2. Fallback: Simple substring check (for CJK or other cases)
# This executes ONLY if the boundary match above failed.
# We check if the alias contains CJK OR if the filename does.
# This avoids applying the simple 'in' check for Latin-only aliases in Latin-only filenames.
elif (contains_cjk(alias_lower) or contains_cjk(filename_lower)) and alias_lower in filename_lower:
# This is the fallback for CJK compatibility.
return [primary_name_for_alias]
# If alias is "ul" and filename is "sin+título":
# 1. re.search(r'(?:^|[\s_+-])ul(?:[\s_+-]|$)', "sin+título") -> Fails (good)
# 2. contains_cjk("ul") -> False
# 3. contains_cjk("sin+título") -> False
# 4. No match is found for "ul". (correct)
except re.error as e:
print(f"Regex error matching alias '{alias_lower}' in filename '{filename_lower}': {e}")
continue # Skip this alias if regex fails
# If the loop finishes without any matches, return an empty list. # If the loop finishes without any matches, return an empty list.
return [] return []