6 Commits

Author SHA1 Message Date
Yuvi63771
257111d462 Update main_window.py 2025-11-02 09:40:25 +05:30
Yuvi63771
9563ce82db Commit 2025-11-01 10:41:00 +05:30
Yuvi63771
169ded3fd8 Commit 2025-10-30 08:05:45 +05:30
Yuvi63771
7e8e8a59e2 commit 2025-10-26 12:08:48 +05:30
Yuvi63771
0acd433920 commit 2025-10-25 08:19:06 +05:30
Yuvi63771
cef4211d7b Commit 2025-10-20 13:37:27 +05:30
8 changed files with 872 additions and 114 deletions

View File

@@ -1,8 +1,6 @@
# src/core/Hentai2read_client.py
import re
import os
import time
import time
import cloudscraper
from bs4 import BeautifulSoup
from urllib.parse import urljoin
@@ -65,12 +63,37 @@ def run_hentai2read_download(start_url, output_dir, progress_callback, overall_p
def _get_series_metadata(start_url, progress_callback, scraper):
"""
Scrapes the main series page to get the Artist Name, Series Title, and chapter list.
Includes a retry mechanism for the initial connection.
"""
try:
response = scraper.get(start_url, timeout=30)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
max_retries = 4 # Total number of attempts (1 initial + 3 retries)
last_exception = None
soup = None
for attempt in range(max_retries):
try:
if attempt > 0:
progress_callback(f" [Hentai2Read] ⚠️ Retrying connection (Attempt {attempt + 1}/{max_retries})...")
response = scraper.get(start_url, timeout=30)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# If successful, clear exception and break the loop
last_exception = None
break
except Exception as e:
last_exception = e
progress_callback(f" [Hentai2Read] ⚠️ Connection attempt {attempt + 1} failed: {e}")
if attempt < max_retries - 1:
time.sleep(2 * (attempt + 1)) # Wait 2s, 4s, 6s
continue # Try again
if last_exception:
progress_callback(f" [Hentai2Read] ❌ Error getting series metadata after {max_retries} attempts: {last_exception}")
return "Unknown Series", []
try:
series_title = "Unknown Series"
artist_name = None
metadata_list = soup.select_one("ul.list.list-simple-mini")
@@ -107,10 +130,9 @@ def _get_series_metadata(start_url, progress_callback, scraper):
return top_level_folder_name, chapters_to_process
except Exception as e:
progress_callback(f" [Hentai2Read] ❌ Error getting series metadata: {e}")
progress_callback(f" [Hentai2Read] ❌ Error parsing metadata after successful connection: {e}")
return "Unknown Series", []
### NEW: This function contains the pipeline logic ###
def _process_and_download_chapter(chapter_url, save_path, scraper, progress_callback, check_pause_func):
"""
Uses a producer-consumer pattern to download a chapter.
@@ -120,12 +142,10 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
task_queue = queue.Queue()
num_download_threads = 8
# These will be updated by the worker threads
download_stats = {'downloaded': 0, 'skipped': 0}
def downloader_worker():
"""The function that each download thread will run."""
# Create a unique session for each thread to avoid conflicts
worker_scraper = cloudscraper.create_scraper()
while True:
try:
@@ -153,12 +173,10 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
finally:
task_queue.task_done()
# --- Start the downloader threads ---
executor = ThreadPoolExecutor(max_workers=num_download_threads, thread_name_prefix='H2R_Downloader')
for _ in range(num_download_threads):
executor.submit(downloader_worker)
# --- Main thread acts as the scraper (producer) ---
page_number = 1
while True:
if check_pause_func(): break
@@ -168,12 +186,25 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
page_url_to_check = f"{chapter_url}{page_number}/"
try:
response = scraper.get(page_url_to_check, timeout=30)
if response.history or response.status_code != 200:
page_response = None
page_last_exception = None
for page_attempt in range(3): # 3 attempts for sub-pages
try:
page_response = scraper.get(page_url_to_check, timeout=30)
page_last_exception = None
break
except Exception as e:
page_last_exception = e
time.sleep(1) # Short delay for page scraping retries
if page_last_exception:
raise page_last_exception # Give up after 3 tries
if page_response.history or page_response.status_code != 200:
progress_callback(f" [Hentai2Read] End of chapter detected on page {page_number}.")
break
soup = BeautifulSoup(response.text, 'html.parser')
soup = BeautifulSoup(page_response.text, 'html.parser')
img_tag = soup.select_one("img#arf-reader")
img_src = img_tag.get("src") if img_tag else None
@@ -181,12 +212,11 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
progress_callback(f" [Hentai2Read] End of chapter detected (Placeholder image on page {page_number}).")
break
normalized_img_src = urljoin(response.url, img_src)
normalized_img_src = urljoin(page_response.url, img_src)
ext = os.path.splitext(normalized_img_src.split('/')[-1])[-1] or ".jpg"
filename = f"{page_number:03d}{ext}"
filepath = os.path.join(save_path, filename)
# Put the download task into the queue for a worker to pick up
task_queue.put((filepath, normalized_img_src))
page_number += 1
@@ -195,12 +225,9 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
progress_callback(f" [Hentai2Read] ❌ Error while scraping page {page_number}: {e}")
break
# --- Shutdown sequence ---
# Tell all worker threads to exit by sending the sentinel value
for _ in range(num_download_threads):
task_queue.put(None)
# Wait for all download tasks to be completed
executor.shutdown(wait=True)
progress_callback(f" Found and processed {page_number - 1} images for this chapter.")

View File

@@ -69,15 +69,28 @@ def fetch_fap_nation_data(album_url, logger_func):
if direct_links_found:
logger_func(f" [Fap-Nation] Found {len(direct_links_found)} direct media link(s). Selecting the best quality...")
best_link = direct_links_found[0]
for link in direct_links_found:
if '1080p' in link.lower():
best_link = link
break
best_link = None
# Define qualities from highest to lowest
qualities_to_check = ['1080p', '720p', '480p', '360p']
# Find the best quality link by iterating through preferred qualities
for quality in qualities_to_check:
for link in direct_links_found:
if quality in link.lower():
best_link = link
logger_func(f" [Fap-Nation] Found '{quality}' link: {best_link}")
break # Found the best link for this quality level
if best_link:
break # Found the highest quality available
# Fallback if no quality string was found in any link
if not best_link:
best_link = direct_links_found[0]
logger_func(f" [Fap-Nation] ⚠️ No quality tags (1080p, 720p, etc.) found in links. Defaulting to first link: {best_link}")
final_url = best_link
link_type = 'direct'
logger_func(f" [Fap-Nation] Identified direct media link: {final_url}")
# If after all checks, we still have no URL, then fail.
if not final_url:
logger_func(" [Fap-Nation] ❌ Stage 1 Failed: Could not find any HLS stream or direct link.")

View File

@@ -52,7 +52,7 @@ from ..utils.file_utils import (
from ..utils.network_utils import prepare_cookies_for_request, get_link_platform
from ..utils.text_utils import (
is_title_match_for_character, is_filename_match_for_character, strip_html_tags,
extract_folder_name_from_title, # This was the function causing the error
extract_folder_name_from_title,
match_folders_from_title, match_folders_from_filename_enhanced
)
from ..config.constants import *
@@ -1810,6 +1810,31 @@ class PostProcessorWorker:
if not all_files_from_post_api:
self.logger(f" No files found to download for post {post_id}.")
if not self.extract_links_only and should_create_post_subfolder:
path_to_check_for_emptiness = determined_post_save_path_for_history
try:
if os.path.isdir(path_to_check_for_emptiness):
dir_contents = os.listdir(path_to_check_for_emptiness)
# Check if the directory is empty OR only contains our ID file
is_effectively_empty = True
if dir_contents:
if not all(f.startswith('.postid_') for f in dir_contents):
is_effectively_empty = False
if is_effectively_empty:
self.logger(f" 🗑️ Removing empty post-specific subfolder (post had no files): '{path_to_check_for_emptiness}'")
if dir_contents:
for id_file in dir_contents:
if id_file.startswith('.postid_'):
try:
os.remove(os.path.join(path_to_check_for_emptiness, id_file))
except OSError as e_rm_id:
self.logger(f" ⚠️ Could not remove ID file '{id_file}' during cleanup: {e_rm_id}")
os.rmdir(path_to_check_for_emptiness)
except OSError as e_rmdir:
self.logger(f" ⚠️ Could not remove effectively empty subfolder (no files) '{path_to_check_for_emptiness}': {e_rmdir}")
# --- END NEW CLEANUP LOGIC ---
history_data_for_no_files_post = {
'post_title': post_title,
'post_id': post_id,
@@ -1823,7 +1848,7 @@ class PostProcessorWorker:
result_tuple = (0, 0, [], [], [], history_data_for_no_files_post, None)
self._emit_signal('worker_finished', result_tuple)
return result_tuple
files_to_download_info_list = []
processed_original_filenames_in_this_post = set()
if self.keep_in_post_duplicates:
@@ -2052,9 +2077,27 @@ class PostProcessorWorker:
if not self.extract_links_only and self.use_post_subfolders and total_downloaded_this_post == 0:
path_to_check_for_emptiness = determined_post_save_path_for_history
try:
if os.path.isdir(path_to_check_for_emptiness) and not os.listdir(path_to_check_for_emptiness):
self.logger(f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness}'")
os.rmdir(path_to_check_for_emptiness)
if os.path.isdir(path_to_check_for_emptiness):
dir_contents = os.listdir(path_to_check_for_emptiness)
# Check if the directory is empty OR only contains our ID file
is_effectively_empty = True
if dir_contents:
# If there are files, check if ALL of them are .postid files
if not all(f.startswith('.postid_') for f in dir_contents):
is_effectively_empty = False
if is_effectively_empty:
self.logger(f" 🗑️ Removing empty post-specific subfolder (no files downloaded): '{path_to_check_for_emptiness}'")
# We must first remove the ID file(s) before removing the dir
if dir_contents:
for id_file in dir_contents:
if id_file.startswith('.postid_'):
try:
os.remove(os.path.join(path_to_check_for_emptiness, id_file))
except OSError as e_rm_id:
self.logger(f" ⚠️ Could not remove ID file '{id_file}' during cleanup: {e_rm_id}")
os.rmdir(path_to_check_for_emptiness) # Now the rmdir should work
except OSError as e_rmdir:
self.logger(f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness}': {e_rmdir}")
@@ -2066,11 +2109,29 @@ class PostProcessorWorker:
if not self.extract_links_only and self.use_post_subfolders and total_downloaded_this_post == 0:
path_to_check_for_emptiness = determined_post_save_path_for_history
try:
if os.path.isdir(path_to_check_for_emptiness) and not os.listdir(path_to_check_for_emptiness):
self.logger(f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness}'")
os.rmdir(path_to_check_for_emptiness)
if os.path.isdir(path_to_check_for_emptiness):
dir_contents = os.listdir(path_to_check_for_emptiness)
# Check if the directory is empty OR only contains our ID file
is_effectively_empty = True
if dir_contents:
# If there are files, check if ALL of them are .postid files
if not all(f.startswith('.postid_') for f in dir_contents):
is_effectively_empty = False
if is_effectively_empty:
self.logger(f" 🗑️ Removing empty post-specific subfolder (no files downloaded): '{path_to_check_for_emptiness}'")
# We must first remove the ID file(s) before removing the dir
if dir_contents:
for id_file in dir_contents:
if id_file.startswith('.postid_'):
try:
os.remove(os.path.join(path_to_check_for_emptiness, id_file))
except OSError as e_rm_id:
self.logger(f" ⚠️ Could not remove ID file '{id_file}' during cleanup: {e_rm_id}")
os.rmdir(path_to_check_for_emptiness) # Now the rmdir should work
except OSError as e_rmdir:
self.logger(f" ⚠️ Could not remove potentially empty subfolder '{path_to_check_for_emptiness}': {e_rmdir}")
self.logger(f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness}': {e_rmdir}")
self._emit_signal('worker_finished', result_tuple)
return result_tuple

View File

@@ -2,32 +2,38 @@ import re
import requests
from urllib.parse import urlparse
# Utility Imports
from ...utils.network_utils import prepare_cookies_for_request
from ...utils.file_utils import clean_folder_name
from ...utils.file_utils import clean_folder_name
# Downloader Thread Imports (Alphabetical Order Recommended)
from .allcomic_downloader_thread import AllcomicDownloadThread
from .booru_downloader_thread import BooruDownloadThread
from .bunkr_downloader_thread import BunkrDownloadThread
from .discord_downloader_thread import DiscordDownloadThread
from .discord_downloader_thread import DiscordDownloadThread # Official Discord
from .drive_downloader_thread import DriveDownloadThread
from .erome_downloader_thread import EromeDownloadThread
from .external_link_downloader_thread import ExternalLinkDownloadThread
from .fap_nation_downloader_thread import FapNationDownloadThread
from .hentai2read_downloader_thread import Hentai2readDownloadThread
from .kemono_discord_downloader_thread import KemonoDiscordDownloadThread
from .mangadex_downloader_thread import MangaDexDownloadThread
from .nhentai_downloader_thread import NhentaiDownloadThread
from .pixeldrain_downloader_thread import PixeldrainDownloadThread
from .rule34video_downloader_thread import Rule34VideoDownloadThread
from .saint2_downloader_thread import Saint2DownloadThread
from .simp_city_downloader_thread import SimpCityDownloadThread
from .toonily_downloader_thread import ToonilyDownloadThread
from .rule34video_downloader_thread import Rule34VideoDownloadThread
def create_downloader_thread(main_app, api_url, service, id1, id2, effective_output_dir_for_run):
"""
Factory function to create and configure the correct QThread for a given URL.
Returns a configured QThread instance or None if no special handler is found.
Returns a configured QThread instance, a specific error string ("COOKIE_ERROR", "FETCH_ERROR"),
or None if no special handler is found (indicating fallback to generic BackendDownloadThread).
"""
# Handler for Booru sites (Danbooru, Gelbooru)
if service in ['danbooru', 'gelbooru']:
api_key = main_app.api_key_input.text().strip()
@@ -37,7 +43,7 @@ def create_downloader_thread(main_app, api_url, service, id1, id2, effective_out
api_key=api_key, user_id=user_id, parent=main_app
)
# Handler for cloud storage sites (Mega, GDrive, etc.)
# Handler for cloud storage sites (Mega, GDrive, Dropbox, GoFile)
platform = None
if 'mega.nz' in api_url or 'mega.io' in api_url: platform = 'mega'
elif 'drive.google.com' in api_url: platform = 'gdrive'
@@ -47,7 +53,8 @@ def create_downloader_thread(main_app, api_url, service, id1, id2, effective_out
use_post_subfolder = main_app.use_subfolder_per_post_checkbox.isChecked()
return DriveDownloadThread(
api_url, effective_output_dir_for_run, platform, use_post_subfolder,
main_app.cancellation_event, main_app.pause_event, main_app.log_signal.emit
main_app.cancellation_event, main_app.pause_event, main_app.log_signal.emit,
parent=main_app # Pass parent for consistency
)
# Handler for Erome
@@ -59,75 +66,118 @@ def create_downloader_thread(main_app, api_url, service, id1, id2, effective_out
return MangaDexDownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for Saint2
is_saint2_url = 'saint2.su' in api_url or 'saint2.pk' in api_url
if is_saint2_url and api_url.strip().lower() != 'saint2.su': # Exclude batch mode trigger
is_saint2_url = service == 'saint2' or 'saint2.su' in api_url or 'saint2.pk' in api_url # Add more domains if needed
if is_saint2_url and api_url.strip().lower() != 'saint2.su': # Exclude batch mode trigger if using URL input
return Saint2DownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for SimpCity
if service == 'simpcity':
cookies = prepare_cookies_for_request(
use_cookie_flag=True, cookie_text_input=main_app.cookie_text_input.text(),
selected_cookie_file_path=main_app.selected_cookie_filepath,
app_base_dir=main_app.app_base_dir, logger_func=main_app.log_signal.emit,
target_domain='simpcity.cr'
use_cookie_flag=True, # SimpCity requires cookies
cookie_text_input=main_app.simpcity_cookie_text_input.text(), # Use dedicated input
selected_cookie_file_path=main_app.selected_cookie_filepath, # Use shared selection
app_base_dir=main_app.app_base_dir,
logger_func=main_app.log_signal.emit,
target_domain='simpcity.cr' # Specific domain
)
if not cookies:
# The main app will handle the error dialog
return "COOKIE_ERROR"
main_app.log_signal.emit("❌ SimpCity requires valid cookies. Please provide them.")
return "COOKIE_ERROR" # Sentinel value for cookie failure
return SimpCityDownloadThread(api_url, id2, effective_output_dir_for_run, cookies, main_app)
# Handler for Rule34Video
if service == 'rule34video':
main_app.log_signal.emit(" Rule34Video.com URL detected. Starting dedicated downloader.")
# id1 contains the video_id from extract_post_info
return Rule34VideoDownloadThread(api_url, effective_output_dir_for_run, main_app)
return Rule34VideoDownloadThread(api_url, effective_output_dir_for_run, main_app) # id1 (video_id) is used inside the thread
# Handler for official Discord URLs
if 'discord.com' in api_url and service == 'discord':
token = main_app.remove_from_filename_input.text().strip()
limit_text = main_app.discord_message_limit_input.text().strip()
message_limit = int(limit_text) if limit_text else None
mode = 'pdf' if main_app.discord_download_scope == 'messages' else 'files'
return DiscordDownloadThread(
mode=mode, session=requests.Session(), token=token, output_dir=effective_output_dir_for_run,
server_id=id1, channel_id=id2, url=api_url, app_base_dir=main_app.app_base_dir,
limit=message_limit, parent=main_app
# HANDLER FOR KEMONO DISCORD (Place BEFORE official Discord)
elif service == 'discord' and any(domain in api_url for domain in ['kemono.cr', 'kemono.su', 'kemono.party']):
main_app.log_signal.emit(" Kemono Discord URL detected. Starting dedicated downloader.")
cookies = prepare_cookies_for_request(
use_cookie_flag=main_app.use_cookie_checkbox.isChecked(), # Respect UI setting
cookie_text_input=main_app.cookie_text_input.text(),
selected_cookie_file_path=main_app.selected_cookie_filepath,
app_base_dir=main_app.app_base_dir,
logger_func=main_app.log_signal.emit,
target_domain='kemono.cr' # Primary Kemono domain, adjust if needed
)
# KemonoDiscordDownloadThread expects parent for events
return KemonoDiscordDownloadThread(
server_id=id1,
channel_id=id2,
output_dir=effective_output_dir_for_run,
cookies_dict=cookies,
parent=main_app
)
# Handler for Allcomic/Allporncomic
if 'allcomic.com' in api_url or 'allporncomic.com' in api_url:
# Handler for official Discord URLs
elif service == 'discord' and 'discord.com' in api_url:
main_app.log_signal.emit(" Official Discord URL detected. Starting dedicated downloader.")
token = main_app.remove_from_filename_input.text().strip() # Token is in the "Remove Words" field for Discord
if not token:
main_app.log_signal.emit("❌ Official Discord requires an Authorization Token in the 'Remove Words' field.")
return None # Or a specific error sentinel
limit_text = main_app.discord_message_limit_input.text().strip()
message_limit = int(limit_text) if limit_text.isdigit() else None
mode = main_app.discord_download_scope # Should be 'pdf' or 'files'
return DiscordDownloadThread(
mode=mode,
session=requests.Session(), # Create a session for this thread
token=token,
output_dir=effective_output_dir_for_run,
server_id=id1,
channel_id=id2,
url=api_url,
app_base_dir=main_app.app_base_dir,
limit=message_limit,
parent=main_app # Pass main_app for events/signals
)
# Check specific domains or rely on service name if extract_post_info provides it
if service == 'allcomic' or 'allcomic.com' in api_url or 'allporncomic.com' in api_url:
return AllcomicDownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for Hentai2Read
if 'hentai2read.com' in api_url:
if service == 'hentai2read' or 'hentai2read.com' in api_url:
return Hentai2readDownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for Fap-Nation
if 'fap-nation.com' in api_url or 'fap-nation.org' in api_url:
if service == 'fap-nation' or 'fap-nation.com' in api_url or 'fap-nation.org' in api_url:
use_post_subfolder = main_app.use_subfolder_per_post_checkbox.isChecked()
# Ensure signals are passed correctly if needed by the thread
return FapNationDownloadThread(
api_url, effective_output_dir_for_run, use_post_subfolder,
main_app.pause_event, main_app.cancellation_event, main_app.actual_gui_signals, main_app
)
# Handler for Pixeldrain
if 'pixeldrain.com' in api_url:
return PixeldrainDownloadThread(api_url, effective_output_dir_for_run, main_app)
if service == 'pixeldrain' or 'pixeldrain.com' in api_url:
return PixeldrainDownloadThread(api_url, effective_output_dir_for_run, main_app) # URL contains the ID
# Handler for nHentai
if service == 'nhentai':
from ...core.nhentai_client import fetch_nhentai_gallery
main_app.log_signal.emit(f" nHentai gallery ID {id1} detected. Fetching gallery data...")
gallery_data = fetch_nhentai_gallery(id1, main_app.log_signal.emit)
if not gallery_data:
main_app.log_signal.emit(f"❌ Failed to fetch nHentai gallery data for ID {id1}.")
return "FETCH_ERROR" # Sentinel value for fetch failure
return NhentaiDownloadThread(gallery_data, effective_output_dir_for_run, main_app)
# Handler for Toonily
if 'toonily.com' in api_url:
if service == 'toonily' or 'toonily.com' in api_url:
return ToonilyDownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for Bunkr
if service == 'bunkr':
# id1 contains the full URL or album ID from extract_post_info
return BunkrDownloadThread(id1, effective_output_dir_for_run, main_app)
# If no special handler matched, return None
# --- Fallback ---
# If no specific handler matched based on service name or URL pattern, return None.
# This signals main_window.py to use the generic BackendDownloadThread/PostProcessorWorker
# which uses the standard Kemono/Coomer post API.
main_app.log_signal.emit(f" No specialized downloader found for service '{service}' and URL '{api_url[:50]}...'. Using generic downloader.")
return None

View File

@@ -0,0 +1,549 @@
# kemono_discord_downloader_thread.py
import os
import time
import uuid
import threading
import cloudscraper
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from PyQt5.QtCore import QThread, pyqtSignal
# --- Assuming these files are in the correct relative path ---
# Adjust imports if your project structure is different
try:
from ...core.discord_client import fetch_server_channels, fetch_channel_messages
from ...utils.file_utils import clean_filename
except ImportError as e:
# Basic fallback logging if signals aren't ready
print(f"ERROR: Failed to import required modules for Kemono Discord thread: {e}")
# Re-raise to prevent the thread from being created incorrectly
raise
# Custom exception for clean cancellation/pausing
class InterruptedError(Exception):
"""Custom exception for handling cancellations/pausing gracefully within download loops."""
pass
class KemonoDiscordDownloadThread(QThread):
"""
A dedicated QThread for downloading files from Kemono Discord server/channel pages,
using the Kemono API via discord_client and multithreading for file downloads.
Includes a single retry attempt after a 15-second delay for specific errors.
"""
# --- Signals ---
progress_signal = pyqtSignal(str) # General log messages
progress_label_signal = pyqtSignal(str) # Update main progress label (e.g., "Fetching messages...")
file_progress_signal = pyqtSignal(str, object) # Update file progress bar (filename, (downloaded_bytes, total_bytes | None))
permanent_file_failed_signal = pyqtSignal(list) # To report failures to main window
finished_signal = pyqtSignal(int, int, bool, list) # (downloaded_count, skipped_count, was_cancelled, [])
def __init__(self, server_id, channel_id, output_dir, cookies_dict, parent):
"""
Initializes the Kemono Discord downloader thread.
Args:
server_id (str): The Discord server ID from Kemono.
channel_id (str | None): The specific Discord channel ID from Kemono, if provided.
output_dir (str): The base directory to save downloaded files.
cookies_dict (dict | None): Cookies to use for requests.
parent (QWidget): The parent widget (main_app) to access events/settings.
"""
super().__init__(parent)
self.server_id = server_id
self.target_channel_id = channel_id # The specific channel from URL, if any
self.output_dir = output_dir
self.cookies_dict = cookies_dict
self.parent_app = parent # Access main app's events and settings
# --- Shared Events & Internal State ---
self.cancellation_event = getattr(parent, 'cancellation_event', threading.Event())
self.pause_event = getattr(parent, 'pause_event', threading.Event())
self._is_cancelled_internal = False # Internal flag for quick breaking
# --- Thread-Safe Counters ---
self.download_count = 0
self.skip_count = 0
self.count_lock = threading.Lock()
# --- List to Store Failure Details ---
self.permanently_failed_details = []
# --- Multithreading Configuration ---
self.num_file_threads = 1 # Default
try:
use_mt = getattr(self.parent_app, 'use_multithreading_checkbox', None)
thread_input = getattr(self.parent_app, 'thread_count_input', None)
if use_mt and use_mt.isChecked() and thread_input:
thread_count_ui = int(thread_input.text().strip())
# Apply a reasonable cap specific to this downloader type (adjust as needed)
self.num_file_threads = max(1, min(thread_count_ui, 20)) # Cap at 20 file threads
except (ValueError, AttributeError, TypeError):
try: self.progress_signal.emit("⚠️ Warning: Could not read thread count setting, defaulting to 1.")
except: pass
self.num_file_threads = 1 # Fallback on error getting setting
# --- Network Client ---
try:
self.scraper = cloudscraper.create_scraper(browser={'browser': 'firefox', 'platform': 'windows', 'mobile': False})
except Exception as e:
try: self.progress_signal.emit(f"❌ ERROR: Failed to initialize cloudscraper: {e}")
except: pass
self.scraper = None
# --- Control Methods (cancel, pause, resume - same as before) ---
def cancel(self):
self._is_cancelled_internal = True
self.cancellation_event.set()
try: self.progress_signal.emit(" Cancellation requested for Kemono Discord download.")
except: pass
def pause(self):
if not self.pause_event.is_set():
self.pause_event.set()
try: self.progress_signal.emit(" Pausing Kemono Discord download...")
except: pass
def resume(self):
if self.pause_event.is_set():
self.pause_event.clear()
try: self.progress_signal.emit(" Resuming Kemono Discord download...")
except: pass
# --- Helper: Check Cancellation/Pause (same as before) ---
def _check_events(self):
if self._is_cancelled_internal or self.cancellation_event.is_set():
if not self._is_cancelled_internal:
self._is_cancelled_internal = True
try: self.progress_signal.emit(" Cancellation detected by Kemono Discord thread check.")
except: pass
return True # Cancelled
was_paused = False
while self.pause_event.is_set():
if not was_paused:
try: self.progress_signal.emit(" Kemono Discord operation paused...")
except: pass
was_paused = True
if self.cancellation_event.is_set():
self._is_cancelled_internal = True
try: self.progress_signal.emit(" Cancellation detected while paused.")
except: pass
return True
time.sleep(0.5)
return False
# --- REVISED Helper: Download Single File with ONE Retry ---
def _download_single_kemono_file(self, file_info):
"""
Downloads a single file, handles collisions after download,
and automatically retries ONCE after 15s for specific network errors.
Returns:
tuple: (bool_success, dict_error_details_or_None)
"""
# --- Constants for Retry Logic ---
MAX_ATTEMPTS = 2 # 1 initial attempt + 1 retry
RETRY_DELAY_SECONDS = 15
# --- Extract info ---
channel_dir = file_info['channel_dir']
original_filename = file_info['original_filename']
file_url = file_info['file_url']
channel_id = file_info['channel_id']
post_title = file_info.get('post_title', f"Message in channel {channel_id}")
original_post_id_for_log = file_info.get('message_id', 'N/A')
base_kemono_domain = "kemono.cr"
if not self.scraper:
try: self.progress_signal.emit(f" ❌ Cannot download '{original_filename}': Cloudscraper not initialized.")
except: pass
failure_details = { 'file_info': {'url': file_url, 'name': original_filename}, 'post_title': post_title, 'original_post_id_for_log': original_post_id_for_log, 'target_folder_path': channel_dir, 'error': 'Cloudscraper not initialized', 'service': 'discord', 'user_id': self.server_id }
return False, failure_details
if self._check_events(): return False, None # Interrupted before start
# --- Determine filenames ---
cleaned_original_filename = clean_filename(original_filename)
intended_final_filename = cleaned_original_filename
unique_suffix = uuid.uuid4().hex[:8]
temp_filename = f"{intended_final_filename}.{unique_suffix}.part"
temp_filepath = os.path.join(channel_dir, temp_filename)
# --- Download Attempt Loop ---
download_successful = False
last_exception = None
should_retry = False # Flag to indicate if the first attempt failed with a retryable error
for attempt in range(1, MAX_ATTEMPTS + 1):
response = None
try:
# --- Pre-attempt checks ---
if self._check_events(): raise InterruptedError("Cancelled/Paused before attempt")
if attempt == 2 and should_retry: # Only delay *before* the retry
try: self.progress_signal.emit(f" ⏳ Retrying '{original_filename}' (Attempt {attempt}/{MAX_ATTEMPTS}) after {RETRY_DELAY_SECONDS}s...")
except: pass
for _ in range(RETRY_DELAY_SECONDS):
if self._check_events(): raise InterruptedError("Cancelled/Paused during retry delay")
time.sleep(1)
# If it's attempt 2 but should_retry is False, it means the first error was non-retryable, so skip
elif attempt == 2 and not should_retry:
break # Exit loop, failure already determined
# --- Log attempt ---
log_prefix = f" ⬇️ Downloading:" if attempt == 1 else f" 🔄 Retrying:"
try: self.progress_signal.emit(f"{log_prefix} '{original_filename}' (Attempt {attempt}/{MAX_ATTEMPTS})...")
except: pass
if attempt == 1:
try: self.file_progress_signal.emit(original_filename, (0, 0))
except: pass
# --- Perform Download ---
headers = { 'User-Agent': 'Mozilla/5.0 ...', 'Referer': f'https://{base_kemono_domain}/discord/channel/{channel_id}'} # Shortened for brevity
response = self.scraper.get(file_url, headers=headers, cookies=self.cookies_dict, stream=True, timeout=(15, 120))
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
downloaded_size = 0
last_progress_emit_time = time.time()
with open(temp_filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024*1024):
if self._check_events(): raise InterruptedError("Cancelled/Paused during chunk writing")
if chunk:
f.write(chunk)
downloaded_size += len(chunk)
current_time = time.time()
if total_size > 0 and (current_time - last_progress_emit_time > 0.5 or downloaded_size == total_size):
try: self.file_progress_signal.emit(original_filename, (downloaded_size, total_size))
except: pass
last_progress_emit_time = current_time
elif total_size == 0 and (current_time - last_progress_emit_time > 0.5):
try: self.file_progress_signal.emit(original_filename, (downloaded_size, 0))
except: pass
last_progress_emit_time = current_time
response.close()
# --- Verification ---
if self._check_events(): raise InterruptedError("Cancelled/Paused after download completion")
if total_size > 0 and downloaded_size != total_size:
try: self.progress_signal.emit(f" ⚠️ Size mismatch on attempt {attempt} for '{original_filename}'. Expected {total_size}, got {downloaded_size}.")
except: pass
last_exception = IOError(f"Size mismatch: Expected {total_size}, got {downloaded_size}")
if os.path.exists(temp_filepath):
try: os.remove(temp_filepath)
except OSError: pass
should_retry = (attempt == 1) # Only retry if it was the first attempt
continue # Try again if attempt 1, otherwise loop finishes
else:
download_successful = True
break # Success!
# --- Error Handling within Loop ---
except InterruptedError as e:
last_exception = e
should_retry = False # Don't retry if interrupted
break
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError, cloudscraper.exceptions.CloudflareException) as e:
last_exception = e
try: self.progress_signal.emit(f" ❌ Network/Cloudflare error on attempt {attempt} for '{original_filename}': {e}")
except: pass
should_retry = (attempt == 1) # Retry only if first attempt
except requests.exceptions.RequestException as e:
status_code = getattr(e.response, 'status_code', None)
if status_code and 500 <= status_code <= 599: # Retry on 5xx
last_exception = e
try: self.progress_signal.emit(f" ❌ Server error ({status_code}) on attempt {attempt} for '{original_filename}'. Will retry...")
except: pass
should_retry = (attempt == 1) # Retry only if first attempt
else: # Don't retry on 4xx or other request errors
last_exception = e
try: self.progress_signal.emit(f" ❌ Non-retryable HTTP error for '{original_filename}': {e}")
except: pass
should_retry = False
break
except OSError as e:
last_exception = e
try: self.progress_signal.emit(f" ❌ OS error during download attempt {attempt} for '{original_filename}': {e}")
except: pass
should_retry = False
break
except Exception as e:
last_exception = e
try: self.progress_signal.emit(f" ❌ Unexpected error on attempt {attempt} for '{original_filename}': {e}")
except: pass
should_retry = False
break
finally:
if response:
try: response.close()
except Exception: pass
# --- End Download Attempt Loop ---
try: self.file_progress_signal.emit(original_filename, None) # Clear progress
except: pass
# --- Post-Download Processing ---
if download_successful:
# --- Rename Logic ---
final_filename_to_use = intended_final_filename
final_filepath_on_disk = os.path.join(channel_dir, final_filename_to_use)
counter = 1
base_name, extension = os.path.splitext(intended_final_filename)
while os.path.exists(final_filepath_on_disk):
final_filename_to_use = f"{base_name} ({counter}){extension}"
final_filepath_on_disk = os.path.join(channel_dir, final_filename_to_use)
counter += 1
if final_filename_to_use != intended_final_filename:
try: self.progress_signal.emit(f" -> Name conflict for '{intended_final_filename}'. Renaming to '{final_filename_to_use}'.")
except: pass
try:
os.rename(temp_filepath, final_filepath_on_disk)
try: self.progress_signal.emit(f" ✅ Saved: '{final_filename_to_use}'")
except: pass
return True, None # SUCCESS
except OSError as e:
try: self.progress_signal.emit(f" ❌ OS error renaming temp file to '{final_filename_to_use}': {e}")
except: pass
if os.path.exists(temp_filepath):
try: os.remove(temp_filepath)
except OSError: pass
# ---> RETURN FAILURE TUPLE (Rename Failed) <---
failure_details = { 'file_info': {'url': file_url, 'name': original_filename}, 'post_title': post_title, 'original_post_id_for_log': original_post_id_for_log, 'target_folder_path': channel_dir, 'intended_filename': intended_final_filename, 'error': f"Rename failed: {e}", 'service': 'discord', 'user_id': self.server_id }
return False, failure_details
else:
# Download failed or was interrupted
if not isinstance(last_exception, InterruptedError):
try: self.progress_signal.emit(f" ❌ FAILED to download '{original_filename}' after {MAX_ATTEMPTS} attempts. Last error: {last_exception}")
except: pass
if os.path.exists(temp_filepath):
try: os.remove(temp_filepath)
except OSError as e_rem:
try: self.progress_signal.emit(f" (Failed to remove temp file '{temp_filename}': {e_rem})")
except: pass
# ---> RETURN FAILURE TUPLE (Download Failed/Interrupted) <---
# Only generate details if it wasn't interrupted by user
failure_details = None
if not isinstance(last_exception, InterruptedError):
failure_details = {
'file_info': {'url': file_url, 'name': original_filename},
'post_title': post_title, 'original_post_id_for_log': original_post_id_for_log,
'target_folder_path': channel_dir, 'intended_filename': intended_final_filename,
'error': f"Failed after {MAX_ATTEMPTS} attempts: {last_exception}",
'service': 'discord', 'user_id': self.server_id,
'forced_filename_override': intended_final_filename,
'file_index_in_post': file_info.get('file_index', 0),
'num_files_in_this_post': file_info.get('num_files', 1)
}
return False, failure_details # Return None details if interrupted
# --- Main Thread Execution ---
def run(self):
"""Main execution logic: Fetches channels/messages and dispatches file downloads."""
self.download_count = 0
self.skip_count = 0
self._is_cancelled_internal = False
self.permanently_failed_details = [] # Reset failed list
if not self.scraper:
try: self.progress_signal.emit("❌ Aborting Kemono Discord download: Cloudscraper failed to initialize.")
except: pass
self.finished_signal.emit(0, 0, False, [])
return
try:
# --- Log Start ---
try:
self.progress_signal.emit("=" * 40)
self.progress_signal.emit(f"🚀 Starting Kemono Discord download for server: {self.server_id}")
self.progress_signal.emit(f" Using {self.num_file_threads} thread(s) for file downloads.")
except: pass
# --- Channel Fetching (same as before) ---
channels_to_process = []
# ... (logic to populate channels_to_process using fetch_server_channels or target_channel_id) ...
if self.target_channel_id:
channels_to_process.append({'id': self.target_channel_id, 'name': self.target_channel_id})
try: self.progress_signal.emit(f" Targeting specific channel: {self.target_channel_id}")
except: pass
else:
try: self.progress_label_signal.emit("Fetching server channels via Kemono API...")
except: pass
channels_data = fetch_server_channels(self.server_id, logger=self.progress_signal.emit, cookies_dict=self.cookies_dict)
if self._check_events(): return
if channels_data is not None:
channels_to_process = channels_data
try: self.progress_signal.emit(f" Found {len(channels_to_process)} channels.")
except: pass
else:
try: self.progress_signal.emit(f" ❌ Failed to fetch channels for server {self.server_id} via Kemono API.")
except: pass
return
# --- Process Each Channel ---
for channel in channels_to_process:
if self._check_events(): break
channel_id = channel['id']
channel_name = clean_filename(channel.get('name', channel_id))
channel_dir = os.path.join(self.output_dir, channel_name)
try:
os.makedirs(channel_dir, exist_ok=True)
except OSError as e:
try: self.progress_signal.emit(f" ❌ Failed to create directory for channel '{channel_name}': {e}. Skipping channel.")
except: pass
continue
try:
self.progress_signal.emit(f"\n--- Processing Channel: #{channel_name} ({channel_id}) ---")
self.progress_label_signal.emit(f"Fetching messages for #{channel_name}...")
except: pass
# --- Collect File Download Tasks ---
file_tasks = []
message_generator = fetch_channel_messages(
channel_id, logger=self.progress_signal.emit,
cancellation_event=self.cancellation_event, pause_event=self.pause_event,
cookies_dict=self.cookies_dict
)
try:
message_index = 0
for message_batch in message_generator:
if self._check_events(): break
for message in message_batch:
message_id = message.get('id', f'msg_{message_index}')
post_title_context = (message.get('content') or f"Message {message_id}")[:50] + "..."
attachments = message.get('attachments', [])
file_index_in_message = 0
num_files_in_message = len(attachments)
for attachment in attachments:
if self._check_events(): raise InterruptedError
file_path = attachment.get('path')
original_filename = attachment.get('name')
if file_path and original_filename:
base_kemono_domain = "kemono.cr"
if not file_path.startswith('/'): file_path = '/' + file_path
file_url = f"https://{base_kemono_domain}/data{file_path}"
file_tasks.append({
'channel_dir': channel_dir, 'original_filename': original_filename,
'file_url': file_url, 'channel_id': channel_id,
'message_id': message_id, 'post_title': post_title_context,
'file_index': file_index_in_message, 'num_files': num_files_in_message
})
file_index_in_message += 1
message_index += 1
if self._check_events(): raise InterruptedError
if self._check_events(): raise InterruptedError
except InterruptedError:
try: self.progress_signal.emit(" Interrupted while collecting file tasks.")
except: pass
break # Exit channel processing
except Exception as e_msg:
try: self.progress_signal.emit(f" ❌ Error fetching messages for channel {channel_name}: {e_msg}")
except: pass
continue # Continue to next channel
if self._check_events(): break
if not file_tasks:
try: self.progress_signal.emit(" No downloadable file attachments found in this channel's messages.")
except: pass
continue
try:
self.progress_signal.emit(f" Found {len(file_tasks)} potential file attachments. Starting downloads...")
self.progress_label_signal.emit(f"Downloading {len(file_tasks)} files for #{channel_name}...")
except: pass
# --- Execute Downloads Concurrently ---
files_processed_in_channel = 0
with ThreadPoolExecutor(max_workers=self.num_file_threads, thread_name_prefix=f"KDC_{channel_id[:4]}_") as executor:
futures = {executor.submit(self._download_single_kemono_file, task): task for task in file_tasks}
try:
for future in as_completed(futures):
files_processed_in_channel += 1
task_info = futures[future]
try:
success, details = future.result() # Unpack result
with self.count_lock:
if success:
self.download_count += 1
else:
self.skip_count += 1
if details: # Append details if the download permanently failed
self.permanently_failed_details.append(details)
except Exception as e_future:
filename = task_info.get('original_filename', 'unknown file')
try: self.progress_signal.emit(f" ❌ System error processing download future for '{filename}': {e_future}")
except: pass
with self.count_lock:
self.skip_count += 1
# Append details on system failure
failure_details = { 'file_info': {'url': task_info.get('file_url'), 'name': filename}, 'post_title': task_info.get('post_title', 'N/A'), 'original_post_id_for_log': task_info.get('message_id', 'N/A'), 'target_folder_path': task_info.get('channel_dir'), 'error': f"Future execution error: {e_future}", 'service': 'discord', 'user_id': self.server_id, 'forced_filename_override': clean_filename(filename), 'file_index_in_post': task_info.get('file_index', 0), 'num_files_in_this_post': task_info.get('num_files', 1) }
self.permanently_failed_details.append(failure_details)
try: self.progress_label_signal.emit(f"#{channel_name}: {files_processed_in_channel}/{len(file_tasks)} files processed")
except: pass
if self._check_events():
try: self.progress_signal.emit(" Cancelling remaining file downloads for this channel...")
except: pass
executor.shutdown(wait=False, cancel_futures=True)
break # Exit as_completed loop
except InterruptedError:
try: self.progress_signal.emit(" Download processing loop interrupted.")
except: pass
executor.shutdown(wait=False, cancel_futures=True)
if self._check_events(): break # Check between channels
# --- End Channel Loop ---
except Exception as e:
# Catch unexpected errors in the main run logic
try:
self.progress_signal.emit(f"❌ Unexpected critical error in Kemono Discord thread run loop: {e}")
import traceback
self.progress_signal.emit(traceback.format_exc())
except: pass # Avoid errors if signals fail at the very end
finally:
# --- Final Cleanup and Signal ---
try:
try: self.progress_signal.emit("=" * 40)
except: pass
cancelled = self._is_cancelled_internal or self.cancellation_event.is_set()
# --- EMIT FAILED FILES SIGNAL ---
if self.permanently_failed_details:
try:
self.progress_signal.emit(f" Reporting {len(self.permanently_failed_details)} permanently failed files...")
self.permanent_file_failed_signal.emit(list(self.permanently_failed_details)) # Emit a copy
except Exception as e_emit_fail:
print(f"ERROR emitting permanent_file_failed_signal: {e_emit_fail}")
# Log final status
try:
if cancelled and not self._is_cancelled_internal:
self.progress_signal.emit(" Kemono Discord download cancelled externally.")
elif self._is_cancelled_internal:
self.progress_signal.emit(" Kemono Discord download finished due to cancellation.")
else:
self.progress_signal.emit("✅ Kemono Discord download process finished.")
except: pass
# Clear file progress
try: self.file_progress_signal.emit("", None)
except: pass
# Get final counts safely
with self.count_lock:
final_download_count = self.download_count
final_skip_count = self.skip_count
# Emit finished signal
self.finished_signal.emit(final_download_count, final_skip_count, cancelled, [])
except Exception as e_final:
# Log final signal emission error if possible
print(f"ERROR in KemonoDiscordDownloadThread finally block: {e_final}")

View File

@@ -104,6 +104,7 @@ from .classes.drive_downloader_thread import DriveDownloadThread
from .classes.external_link_downloader_thread import ExternalLinkDownloadThread
from .classes.nhentai_downloader_thread import NhentaiDownloadThread
from .classes.downloader_factory import create_downloader_thread
from .classes.kemono_discord_downloader_thread import KemonoDiscordDownloadThread
_ff_ver = (datetime.date.today().toordinal() - 735506) // 28
USERAGENT_FIREFOX = (f"Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
@@ -333,16 +334,14 @@ class DownloaderApp (QWidget ):
self.download_location_label_widget = None
self.remove_from_filename_label_widget = None
self.skip_words_label_widget = None
self.setWindowTitle("Kemono Downloader v7.5.1")
self.setWindowTitle("Kemono Downloader v7.5.2")
setup_ui(self)
self._connect_signals()
if hasattr(self, 'character_input'):
self.character_input.setToolTip(self._tr("character_input_tooltip", "Enter character names (comma-separated)..."))
self.log_signal.emit(f" Manga filename style loaded: '{self.manga_filename_style}'")
self.log_signal.emit(f" filename style loaded: '{self.manga_filename_style}'")
self.log_signal.emit(f" Skip words scope loaded: '{self.skip_words_scope}'")
self.log_signal.emit(f" Character filter scope set to default: '{self.char_filter_scope}'")
self.log_signal.emit(f" Multi-part download defaults to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'}")
self.log_signal.emit(f" Scan post content for images defaults to: {'Enabled' if self.scan_content_images_setting else 'Disabled'}")
self.log_signal.emit(f" Application language loaded: '{self.current_selected_language.upper()}' (UI may not reflect this yet).")
self._retranslate_main_ui()
self._load_persistent_history()
@@ -494,6 +493,8 @@ class DownloaderApp (QWidget ):
def _connect_specialized_thread_signals(self, thread):
"""Connects common signals for specialized downloader threads."""
is_kemono_discord = isinstance(thread, KemonoDiscordDownloadThread)
if hasattr(thread, 'progress_signal'):
thread.progress_signal.connect(self.handle_main_log)
if hasattr(thread, 'file_progress_signal'):
@@ -508,6 +509,10 @@ class DownloaderApp (QWidget ):
if hasattr(thread, 'progress_label_signal'): # For Discord thread
thread.progress_label_signal.connect(self.progress_label.setText)
if is_kemono_discord and hasattr(thread, 'permanent_file_failed_signal'):
thread.permanent_file_failed_signal.connect(self._handle_permanent_file_failure_from_thread)
print("DEBUG: Connected permanent_file_failed_signal for KemonoDiscordDownloadThread.") # Debug print
def _apply_theme_and_restart_prompt(self):
"""Applies the theme and prompts the user to restart."""
if self.current_theme == "dark":
@@ -824,14 +829,11 @@ class DownloaderApp (QWidget ):
self.download_btn.setEnabled(False)
self.pause_btn.setEnabled(False)
else:
# --- START MODIFICATION ---
# Check if we are about to download fetched posts and update text accordingly
if self.is_ready_to_download_fetched:
num_posts = len(self.fetched_posts_for_download)
self.download_btn.setText(f"⬇️ Start Download ({num_posts} Posts)")
self.download_btn.setEnabled(True) # Keep it enabled for the user to click
else:
# Original logic for an active download in other scenarios
self.download_btn.setText(self._tr("start_download_button_text", "⬇️ Start Download"))
self.download_btn.setEnabled(False)
@@ -919,11 +921,9 @@ class DownloaderApp (QWidget ):
args_template = self.last_start_download_args
# Update both the character filter list and the domain override in the arguments
args_template['filter_character_list'] = parsed_filters
args_template['domain_override'] = domain_override
# Manually set the UI to a "downloading" state for reliability
self.set_ui_enabled(False)
self.download_btn.setText("⬇️ Downloading...")
self.download_btn.setEnabled(False)
@@ -931,7 +931,6 @@ class DownloaderApp (QWidget ):
self.cancel_btn.setEnabled(True)
self.cancel_btn.setText("❌ Cancel & Reset UI")
try:
# Ensure signals are connected to the correct actions for this state
self.cancel_btn.clicked.disconnect()
self.pause_btn.clicked.disconnect()
except TypeError:
@@ -3184,8 +3183,7 @@ class DownloaderApp (QWidget ):
self .update_custom_folder_visibility ()
self .update_page_range_enabled_state ()
if self .manga_mode_checkbox :
self .manga_mode_checkbox .setChecked (False )
self .manga_mode_checkbox .setEnabled (False )
pass
if hasattr (self ,'use_cookie_checkbox'):
self .use_cookie_checkbox .setChecked (True )
self .use_cookie_checkbox .setEnabled (False )
@@ -3247,8 +3245,7 @@ class DownloaderApp (QWidget ):
is_single_post = True
# --- MODIFIED: Added check for is_discord_url ---
can_enable_manga_checkbox = (is_creator_feed or is_single_post) and not is_favorite_mode_on and not is_discord_url
can_enable_manga_checkbox = ((is_creator_feed or is_single_post) or is_favorite_mode_on) and not is_discord_url
if self .manga_mode_checkbox :
self .manga_mode_checkbox .setEnabled (can_enable_manga_checkbox)
if not can_enable_manga_checkbox and self .manga_mode_checkbox .isChecked ():
@@ -5547,6 +5544,7 @@ class DownloaderApp (QWidget ):
'known_names':list (KNOWN_NAMES ),
'emitter':self .worker_to_gui_queue ,
'unwanted_keywords':{'spicy','hd','nsfw','4k','preview','teaser','clip'},
'creator_name_cache': self.creator_name_cache,
'domain_override': domain_override_command,
'sfp_threshold': sfp_threshold_command,
'handle_unknown_mode': handle_unknown_command,
@@ -5619,13 +5617,11 @@ class DownloaderApp (QWidget ):
api_domain = parsed_api_url.netloc if parsed_api_url.netloc else self._get_domain_for_service(service)
post_page_url = f"https://{api_domain}/{service}/user/{user_id}/post/{post_id}"
# --- NEW LOGIC: Differentiate between loaded files and live session errors ---
# Initialize variables before the conditional blocks
target_folder_path_for_download = None
filename_override_for_download = None
if job_details.get('is_loaded_from_txt'):
# --- BEHAVIOR FOR LOADED FILES: Recalculate everything from current UI settings ---
self.log_signal.emit(f" Retrying loaded file. Recalculating path and name from current UI settings...")
# 1. Get all current settings and job data
@@ -6217,7 +6213,7 @@ class DownloaderApp (QWidget ):
'manga_date_prefix': self.manga_date_prefix_input.text().strip(),
'manga_date_file_counter_ref': None,
'scan_content_for_images': self.scan_content_images_checkbox.isChecked(),
'creator_name_cache': self.creator_name_cache,
'creator_download_folder_ignore_words': creator_folder_ignore_words_for_run,
'num_file_threads_for_worker': effective_num_file_threads_per_worker,
'multipart_scope': 'files',
@@ -6318,10 +6314,8 @@ class DownloaderApp (QWidget ):
if hasattr(self, 'link_input'):
self.last_link_input_text_for_queue_sync = self.link_input.text()
# --- START: MODIFIED LOGIC ---
# Manually trigger the UI update now that the queue is populated and the dialog is closed.
self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)
# --- END: MODIFIED LOGIC ---
def _load_saved_cookie_settings(self):
"""Loads and applies saved cookie settings on startup."""

View File

@@ -26,6 +26,16 @@ KNOWN_TXT_MATCH_CLEANUP_PATTERNS = [
r'\bPreview\b',
]
# --- START NEW CODE ---
# Regular expression to detect CJK characters
# Covers Hiragana, Katakana, Half/Full width forms, CJK Unified Ideographs, Hangul Syllables, etc.
cjk_pattern = re.compile(r'[\u3000-\u303f\u3040-\u309f\u30a0-\u30ff\uff00-\uffef\u4e00-\u9fff\uac00-\ud7af]')
def contains_cjk(text):
"""Checks if the text contains any CJK characters."""
return bool(cjk_pattern.search(text))
# --- END NEW CODE ---
# --- Text Matching and Manipulation Utilities ---
def is_title_match_for_character(post_title, character_name_filter):
@@ -42,7 +52,7 @@ def is_title_match_for_character(post_title, character_name_filter):
"""
if not post_title or not character_name_filter:
return False
# Use word boundaries (\b) to match whole words only
pattern = r"(?i)\b" + re.escape(str(character_name_filter).strip()) + r"\b"
return bool(re.search(pattern, post_title))
@@ -62,7 +72,7 @@ def is_filename_match_for_character(filename, character_name_filter):
"""
if not filename or not character_name_filter:
return False
return str(character_name_filter).strip().lower() in filename.lower()
@@ -101,16 +111,16 @@ def extract_folder_name_from_title(title, unwanted_keywords):
"""
if not title:
return 'Uncategorized'
title_lower = title.lower()
# Find all whole words in the title
tokens = re.findall(r'\b[\w\-]+\b', title_lower)
for token in tokens:
clean_token = clean_folder_name(token)
if clean_token and clean_token.lower() not in unwanted_keywords:
return clean_token
# Fallback to cleaning the full title if no single significant word is found
cleaned_full_title = clean_folder_name(title)
return cleaned_full_title if cleaned_full_title else 'Uncategorized'
@@ -120,6 +130,7 @@ def match_folders_from_title(title, names_to_match, unwanted_keywords):
"""
Matches folder names from a title based on a list of known name objects.
Each name object is a dict: {'name': 'PrimaryName', 'aliases': ['alias1', ...]}
MODIFIED: Uses substring matching for CJK aliases, word boundary for others.
Args:
title (str): The post title to check.
@@ -137,10 +148,11 @@ def match_folders_from_title(title, names_to_match, unwanted_keywords):
for pat_str in KNOWN_TXT_MATCH_CLEANUP_PATTERNS:
cleaned_title = re.sub(pat_str, ' ', cleaned_title, flags=re.IGNORECASE)
cleaned_title = re.sub(r'\s+', ' ', cleaned_title).strip()
# Store both original case cleaned title and lower case for different matching
title_lower = cleaned_title.lower()
matched_cleaned_names = set()
# Sort by name length descending to match longer names first (e.g., "Cloud Strife" before "Cloud")
sorted_name_objects = sorted(names_to_match, key=lambda x: len(x.get("name", "")), reverse=True)
@@ -149,19 +161,43 @@ def match_folders_from_title(title, names_to_match, unwanted_keywords):
aliases = name_obj.get("aliases", [])
if not primary_folder_name or not aliases:
continue
# <<< START MODIFICATION >>>
cleaned_primary_name = clean_folder_name(primary_folder_name)
if not cleaned_primary_name or cleaned_primary_name.lower() in unwanted_keywords:
continue # Skip this entry entirely if its primary name is unwanted or empty
match_found_for_this_object = False
for alias in aliases:
if not alias: continue
alias_lower = alias.lower()
if not alias_lower: continue
# Use word boundaries for accurate matching
pattern = r'\b' + re.escape(alias_lower) + r'\b'
if re.search(pattern, title_lower):
cleaned_primary_name = clean_folder_name(primary_folder_name)
if cleaned_primary_name.lower() not in unwanted_keywords:
# Check if the alias contains CJK characters
if contains_cjk(alias):
# Use simple substring matching for CJK
if alias_lower in title_lower:
matched_cleaned_names.add(cleaned_primary_name)
break # Move to the next name object once a match is found for this one
match_found_for_this_object = True
break # Move to the next name object
else:
# Use original word boundary matching for non-CJK
try:
# Compile pattern for efficiency if used repeatedly, though here it changes each loop
pattern = r'\b' + re.escape(alias_lower) + r'\b'
if re.search(pattern, title_lower):
matched_cleaned_names.add(cleaned_primary_name)
match_found_for_this_object = True
break # Move to the next name object
except re.error as e:
# Log error if the alias creates an invalid regex (unlikely with escape)
print(f"Regex error for alias '{alias}': {e}") # Or use proper logging
continue
# This outer break logic remains the same (though slightly redundant with inner breaks)
if match_found_for_this_object:
pass # Already added and broke inner loop
# <<< END MODIFICATION >>>
return sorted(list(matched_cleaned_names))
@@ -169,6 +205,8 @@ def match_folders_from_filename_enhanced(filename, names_to_match, unwanted_keyw
"""
Matches folder names from a filename, prioritizing longer and more specific aliases.
It returns immediately after finding the first (longest) match.
MODIFIED: Prioritizes boundary-aware matches for Latin characters,
falls back to substring search for CJK compatibility.
Args:
filename (str): The filename to check.
@@ -188,23 +226,49 @@ def match_folders_from_filename_enhanced(filename, names_to_match, unwanted_keyw
for name_obj in names_to_match:
primary_name = name_obj.get("name")
if not primary_name: continue
cleaned_primary_name = clean_folder_name(primary_name)
if not cleaned_primary_name or cleaned_primary_name.lower() in unwanted_keywords:
continue
for alias in name_obj.get("aliases", []):
if alias.lower():
alias_map_to_primary.append((alias.lower(), cleaned_primary_name))
if alias: # Check if alias is not None and not an empty string
alias_lower_val = alias.lower()
if alias_lower_val: # Check again after lowercasing
alias_map_to_primary.append((alias_lower_val, cleaned_primary_name))
# Sort by alias length, descending, to match longer aliases first
alias_map_to_primary.sort(key=lambda x: len(x[0]), reverse=True)
# <<< MODIFICATION: Return the FIRST match found, which will be the longest >>>
# Return the FIRST match found, which will be the longest
for alias_lower, primary_name_for_alias in alias_map_to_primary:
if alias_lower in filename_lower:
# Found the longest possible alias that is a substring. Return immediately.
return [primary_name_for_alias]
try:
# 1. Attempt boundary-aware match first (good for English/Latin)
# Matches alias if it's at the start/end or surrounded by common separators
# We use word boundaries (\b) and also check for common non-word separators like +_-
pattern = r'(?:^|[\s_+-])' + re.escape(alias_lower) + r'(?:[\s_+-]|$)'
if re.search(pattern, filename_lower):
# Found a precise, boundary-aware match. This is the best case.
return [primary_name_for_alias]
# 2. Fallback: Simple substring check (for CJK or other cases)
# This executes ONLY if the boundary match above failed.
# We check if the alias contains CJK OR if the filename does.
# This avoids applying the simple 'in' check for Latin-only aliases in Latin-only filenames.
elif (contains_cjk(alias_lower) or contains_cjk(filename_lower)) and alias_lower in filename_lower:
# This is the fallback for CJK compatibility.
return [primary_name_for_alias]
# If alias is "ul" and filename is "sin+título":
# 1. re.search(r'(?:^|[\s_+-])ul(?:[\s_+-]|$)', "sin+título") -> Fails (good)
# 2. contains_cjk("ul") -> False
# 3. contains_cjk("sin+título") -> False
# 4. No match is found for "ul". (correct)
except re.error as e:
print(f"Regex error matching alias '{alias_lower}' in filename '{filename_lower}': {e}")
continue # Skip this alias if regex fails
# If the loop finishes without any matches, return an empty list.
return []
return []