From aec44f178233aca65d4564ed9b7ca3e82c7fef1b Mon Sep 17 00:00:00 2001 From: Yuvi9587 <114073886+Yuvi9587@users.noreply.github.com> Date: Sat, 10 May 2025 11:07:27 +0530 Subject: [PATCH] Commit --- Known.txt | 206 +++- downloader_utils.py | 1071 +++++++++-------- main.py | 2694 +++++++++++++++++++++++-------------------- tour.py | 248 ++-- 4 files changed, 2299 insertions(+), 1920 deletions(-) diff --git a/Known.txt b/Known.txt index 6301234..6c758ac 100644 --- a/Known.txt +++ b/Known.txt @@ -1,9 +1,197 @@ -Back to Hell -Fade -Jett -Psylocke -Viper -clove -neon -reyna -sage +Ada +Aeris +Alina +Amara +Anya +Aria +Artemis +Ashe +Astrid +Asuka +Athena +Azura +Belladonna +Bianca +C.C. +Calla +Camilla +Cassia +Celeste +Chika +Clara +Delilah +Dia +Diana +Eira +Elara +Eli +Elise +Elma +Ember +Erza +Esme +Evelyn +Evie +Fiora +Freya +Gasai +Greta +Hanayo +Hancock +Haruhi +Hatsume +Hawkeye +Hinata +Holo +Homura +Ichigo +Illya +Inara +Ino +Isla +Isolde +Ivy +Jeanne +Jinx +Jiro +Juniper +Juvia +Kaelin +Kagome +Kagura +Kaida +Kairi +Kali +Kana +Kanao +Kanna +Kiera +Kikyo +Kirari +Korra +Kotori +Kurisu +Kushina +Kyoko +Lan Fan +Leona +Levy +Lilith +Liora +Lira +Lisanna +Lucia +Lucoa +Lucy +Luna +Lust +Lyra +Madoka +Maia +Makima +Makise +Makomo +Mami +Mari +Marin +Mary +Mavis +Mayuri +Medusa +Mei +Merlin +Mikasa +Milly +Mina +Mion +Mira +Mirabel +Misato +Mitsuri +Momo +Morgana +Nadia +Nami +Naomi +Nelliel +Nerissa +Neve +Nezuko +Noelle +Nova +Nozomi +Nunnally +Nyx +Ochaco +Odette +Ophelia +Orihime +Orla +Perona +Phoebe +Raven +Rei +Reyna +Rhea +Rika +Rin +Rin Tohsaka +Rinoa +Ritsuko +Riza +Robin +Rosalie +Rowan +Ruby +Rukia +Rumi +Saber +Sable +Sakura +Sakura Matou +Sango +Sansa +Satoko +Sayaka +Scáthach +Selene +Seline +Serena +Shinobu +Shion +Shirley +Sierra +Skye +Sophie +Soraya +Sylvia +Talia +Tamayo +Tamsin +Tashigi +Tatiana +Temari +Thalia +Tifa +Toga +Tohru +Tsunade +Umi +Valeria +Viola +Violet +Vivi +Wendy +Winry +Wynne +Yara +Yazawa +Yoruichi +Yoshiko +Yuki Nagato +Yumeko +Yuna +Yuno +Zara +Zelda +Zero Two diff --git a/downloader_utils.py b/downloader_utils.py index 7b050ed..72c7831 100644 --- a/downloader_utils.py +++ b/downloader_utils.py @@ -3,12 +3,12 @@ import time import requests import re import threading -import queue +import queue # Not directly used for link queue, but kept for historical reasons import hashlib import http.client import traceback from concurrent.futures import ThreadPoolExecutor, Future, CancelledError, as_completed -import html # Import the html module for unescaping +import html from PyQt5.QtCore import QObject, pyqtSignal, QThread, QMutex, QMutexLocker from urllib.parse import urlparse @@ -21,9 +21,17 @@ except ImportError: from io import BytesIO +# Constants for filename styles, mirroring main.py for clarity if used directly here +STYLE_POST_TITLE = "post_title" +STYLE_ORIGINAL_NAME = "original_name" -fastapi_app = None # Placeholder, not used in this script -KNOWN_NAMES = [] # Global list, populated by main.py +# Constants for skip_words_scope, mirroring main.py +SKIP_SCOPE_FILES = "files" +SKIP_SCOPE_POSTS = "posts" +SKIP_SCOPE_BOTH = "both" + +fastapi_app = None +KNOWN_NAMES = [] IMAGE_EXTENSIONS = { '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.tif', '.webp', @@ -33,13 +41,15 @@ VIDEO_EXTENSIONS = { '.mp4', '.mov', '.mkv', '.webm', '.avi', '.wmv', '.flv', '.mpeg', '.mpg', '.m4v', '.3gp', '.ogv', '.ts', '.vob' } +# ADDED: Archive Extensions +ARCHIVE_EXTENSIONS = { + '.zip', '.rar', '.7z', '.tar', '.gz', '.bz2' # Added more common archive types +} def is_title_match_for_character(post_title, character_name_filter): """Checks if a post title contains a specific character name (case-insensitive, whole word).""" if not post_title or not character_name_filter: return False - # Ensure character_name_filter is treated as a whole word, avoid partial matches within larger words. - # Regex: \b matches word boundary. re.escape handles special characters in filter. pattern = r"(?i)\b" + re.escape(character_name_filter) + r"\b" return bool(re.search(pattern, post_title)) @@ -47,17 +57,14 @@ def is_filename_match_for_character(filename, character_name_filter): """Checks if a filename contains a specific character name (case-insensitive, substring).""" if not filename or not character_name_filter: return False - # For filenames, substring matching is often more practical. return character_name_filter.lower() in filename.lower() def clean_folder_name(name): """Cleans a string to be suitable for a folder name.""" if not isinstance(name, str): name = str(name) - # Remove characters that are generally problematic in folder names across OS - cleaned = re.sub(r'[^\w\s\-\_\.\(\)]', '', name) # Allow letters, numbers, whitespace, hyphens, underscores, periods, parentheses - cleaned = cleaned.strip() # Remove leading/trailing whitespace - # Replace sequences of whitespace with a single underscore + cleaned = re.sub(r'[^\w\s\-\_\.\(\)]', '', name) + cleaned = cleaned.strip() cleaned = re.sub(r'\s+', '_', cleaned) return cleaned if cleaned else "untitled_folder" @@ -65,10 +72,8 @@ def clean_folder_name(name): def clean_filename(name): """Cleans a string to be suitable for a file name.""" if not isinstance(name, str): name = str(name) - # Remove characters that are generally problematic in file names across OS - cleaned = re.sub(r'[^\w\s\-\_\.\(\)]', '', name) # Allow letters, numbers, whitespace, hyphens, underscores, periods, parentheses - cleaned = cleaned.strip() # Remove leading/trailing whitespace - # Replace sequences of whitespace with a single underscore + cleaned = re.sub(r'[^\w\s\-\_\.\(\)]', '', name) + cleaned = cleaned.strip() cleaned = re.sub(r'\s+', '_', cleaned) return cleaned if cleaned else "untitled_file" @@ -77,13 +82,11 @@ def extract_folder_name_from_title(title, unwanted_keywords): """Extracts a potential folder name from a title, avoiding unwanted keywords.""" if not title: return 'Uncategorized' title_lower = title.lower() - # Try to find a meaningful token not in unwanted_keywords - tokens = re.findall(r'\b[\w\-]+\b', title_lower) # Find words + tokens = re.findall(r'\b[\w\-]+\b', title_lower) for token in tokens: - clean_token = clean_folder_name(token) # Clean the token itself - if clean_token and clean_token.lower() not in unwanted_keywords: # Check against lowercased unwanted keywords + clean_token = clean_folder_name(token) + if clean_token and clean_token.lower() not in unwanted_keywords: return clean_token - # Fallback to cleaned full title if no suitable token found cleaned_full_title = clean_folder_name(title) return cleaned_full_title if cleaned_full_title else 'Uncategorized' @@ -96,20 +99,17 @@ def match_folders_from_title(title, names_to_match, unwanted_keywords): if not title or not names_to_match: return [] title_lower = title.lower() matched_cleaned_names = set() - # Sort names by length (descending) to match longer names first (e.g., "Spider-Man" before "Spider") sorted_names_to_match = sorted(names_to_match, key=len, reverse=True) for name in sorted_names_to_match: name_lower = name.lower() - if not name_lower: continue # Skip empty names + if not name_lower: continue - # Use word boundary regex to ensure whole word matching pattern = r'\b' + re.escape(name_lower) + r'\b' if re.search(pattern, title_lower): - # Clean the original casing 'name' for folder creation, then lowercase for unwanted keyword check cleaned_name_for_folder = clean_folder_name(name) - if cleaned_name_for_folder.lower() not in unwanted_keywords: # Check against lowercased unwanted keywords - matched_cleaned_names.add(cleaned_name_for_folder) # Add the cleaned name with original casing preserved as much as possible + if cleaned_name_for_folder.lower() not in unwanted_keywords: + matched_cleaned_names.add(cleaned_name_for_folder) return sorted(list(matched_cleaned_names)) @@ -138,6 +138,13 @@ def is_rar(filename): if not filename: return False return filename.lower().endswith('.rar') +# ADDED: Generic is_archive function +def is_archive(filename): + """Checks if the filename has a common archive extension.""" + if not filename: return False + _, ext = os.path.splitext(filename) + return ext.lower() in ARCHIVE_EXTENSIONS + def is_post_url(url): """Checks if the URL likely points to a specific post.""" @@ -152,16 +159,13 @@ def extract_post_info(url_string): try: parsed_url = urlparse(url_string.strip()) domain = parsed_url.netloc.lower() - # Check if the domain is one of the known Kemono or Coomer domains is_kemono = any(d in domain for d in ['kemono.su', 'kemono.party']) is_coomer = any(d in domain for d in ['coomer.su', 'coomer.party']) - if not (is_kemono or is_coomer): return None, None, None # Not a recognized service + if not (is_kemono or is_coomer): return None, None, None path_parts = [part for part in parsed_url.path.strip('/').split('/') if part] - # Standard URL structure: /{service}/user/{user_id}/post/{post_id} - # Or creator page: /{service}/user/{user_id} if len(path_parts) >= 3 and path_parts[1].lower() == 'user': service = path_parts[0] user_id = path_parts[2] @@ -169,8 +173,6 @@ def extract_post_info(url_string): post_id = path_parts[4] return service, user_id, post_id - # API URL structure: /api/v1/{service}/user/{user_id}/post/{post_id} - # Or API creator page: /api/v1/{service}/user/{user_id} if len(path_parts) >= 5 and path_parts[0].lower() == 'api' and \ path_parts[1].lower() == 'v1' and path_parts[3].lower() == 'user': service = path_parts[2] @@ -180,41 +182,35 @@ def extract_post_info(url_string): return service, user_id, post_id except Exception as e: - # Log or handle unexpected errors during URL parsing if necessary print(f"Debug: Exception during extract_post_info for URL '{url_string}': {e}") - return None, None, None # Return None for all if parsing fails or structure is not matched + return None, None, None def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None): """Fetches a single page of posts from the API.""" if cancellation_event and cancellation_event.is_set(): logger(" Fetch cancelled before request.") - raise RuntimeError("Fetch operation cancelled by user.") # Raise error to stop pagination + raise RuntimeError("Fetch operation cancelled by user.") paginated_url = f'{api_url_base}?o={offset}' - logger(f" Fetching: {paginated_url}") + logger(f" Fetching: {paginated_url} (Page approx. {offset // 50 + 1})") try: - response = requests.get(paginated_url, headers=headers, timeout=(10, 60)) # connect_timeout, read_timeout - response.raise_for_status() # Raises HTTPError for bad responses (4XX or 5XX) - # It's good practice to check content type before parsing JSON + response = requests.get(paginated_url, headers=headers, timeout=(10, 60)) + response.raise_for_status() if 'application/json' not in response.headers.get('Content-Type', '').lower(): logger(f"⚠️ Unexpected content type from API: {response.headers.get('Content-Type')}. Body: {response.text[:200]}") - return [] # Return empty list or raise error if JSON is strictly expected + return [] # Return empty list on unexpected content type return response.json() except requests.exceptions.Timeout: - # Log specific timeout and re-raise or handle as a specific error raise RuntimeError(f"Timeout fetching offset {offset} from {paginated_url}") except requests.exceptions.RequestException as e: - # General request exception (includes HTTPError, ConnectionError, etc.) err_msg = f"Error fetching offset {offset} from {paginated_url}: {e}" if e.response is not None: err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})" raise RuntimeError(err_msg) - except ValueError as e: # JSONDecodeError inherits from ValueError - # Handle cases where response is not valid JSON + except ValueError as e: # JSONDecodeError is a subclass of ValueError raise RuntimeError(f"Error decoding JSON from offset {offset} ({paginated_url}): {e}. Response text: {response.text[:200]}") except Exception as e: - # Catch any other unexpected errors raise RuntimeError(f"Unexpected error fetching offset {offset} ({paginated_url}): {e}") @@ -223,8 +219,9 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non Generator function to fetch post data from Kemono/Coomer API. Handles pagination and yields batches of posts. In Manga Mode, fetches all posts first, then yields them in reverse order (oldest first). + If target_post_id is specified, it will paginate until that post is found or all pages are exhausted. """ - headers = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'} # Standard headers + headers = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'} service, user_id, target_post_id = extract_post_info(api_url_input) if cancellation_event and cancellation_event.is_set(): @@ -235,25 +232,21 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non logger(f"❌ Invalid URL or could not extract service/user: {api_url_input}") return - # Page range is ignored for single post URLs if target_post_id and (start_page or end_page): - logger("⚠️ Page range (start/end page) is ignored when a specific post URL is provided.") - start_page = end_page = None + logger("⚠️ Page range (start/end page) is ignored when a specific post URL is provided (searching all pages for the post).") + start_page = end_page = None # Ensure no page limits when searching for a specific post - # Manga mode is only applicable for creator feeds (not single posts) is_creator_feed_for_manga = manga_mode and not target_post_id parsed_input = urlparse(api_url_input) api_domain = parsed_input.netloc - # Ensure we use a valid API domain, default to kemono.su if unrecognized if not any(d in api_domain.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']): logger(f"⚠️ Unrecognized domain '{api_domain}'. Defaulting to kemono.su for API calls.") - api_domain = "kemono.su" # Or "coomer.party" if that's preferred default + api_domain = "kemono.su" api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}" page_size = 50 # Kemono API typically returns 50 posts per page - # --- Manga Mode: Fetch all posts first, then reverse --- if is_creator_feed_for_manga: logger(" Manga Mode: Fetching all posts to reverse order (oldest posts processed first)...") all_posts_for_manga_mode = [] @@ -264,33 +257,32 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non break try: posts_batch_manga = fetch_posts_paginated(api_base_url, headers, current_offset_manga, logger, cancellation_event) - if not isinstance(posts_batch_manga, list): # API should always return a list + if not isinstance(posts_batch_manga, list): logger(f"❌ API Error (Manga Mode): Expected list of posts, got {type(posts_batch_manga)}.") break - if not posts_batch_manga: # Empty list means no more posts + if not posts_batch_manga: logger("✅ Reached end of posts (Manga Mode fetch all).") break all_posts_for_manga_mode.extend(posts_batch_manga) - current_offset_manga += len(posts_batch_manga) # API doesn't use page_size in offset, but number of posts - time.sleep(0.6) # Be respectful to the API - except RuntimeError as e: # Catch errors from fetch_posts_paginated + current_offset_manga += len(posts_batch_manga) # Use actual length + time.sleep(0.6) + except RuntimeError as e: if "cancelled by user" in str(e).lower(): logger(f"ℹ️ Manga mode pagination stopped due to cancellation: {e}") else: logger(f"❌ {e}\n Aborting manga mode pagination.") - break # Stop fetching on error + break # Stop on runtime error except Exception as e: # Catch any other unexpected errors logger(f"❌ Unexpected error during manga mode fetch: {e}") traceback.print_exc() - break + break # Stop on other errors - if cancellation_event and cancellation_event.is_set(): return # Early exit if cancelled + if cancellation_event and cancellation_event.is_set(): return if all_posts_for_manga_mode: logger(f" Manga Mode: Fetched {len(all_posts_for_manga_mode)} total posts. Reversing order...") - all_posts_for_manga_mode.reverse() # Oldest posts first + all_posts_for_manga_mode.reverse() # Oldest first - # Yield in batches of page_size for i in range(0, len(all_posts_for_manga_mode), page_size): if cancellation_event and cancellation_event.is_set(): logger(" Manga mode post yielding cancelled.") @@ -298,85 +290,89 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non yield all_posts_for_manga_mode[i:i + page_size] else: logger(" Manga Mode: No posts found to process.") - return # End of Manga Mode logic + return # End of manga mode logic - # --- Normal Mode or Single Post Mode --- + # --- Regular pagination (Creator feed or Single Post search) --- current_page_num = 1 current_offset = 0 - processed_target_post_flag = False # For single post URLs + processed_target_post_flag = False - if start_page and start_page > 1: - current_offset = (start_page - 1) * page_size # Calculate offset for starting page + if start_page and start_page > 1 and not target_post_id: # Only apply start_page if not searching for a specific post + current_offset = (start_page - 1) * page_size current_page_num = start_page logger(f" Starting from page {current_page_num} (calculated offset {current_offset}).") - while True: # Pagination loop + while True: if cancellation_event and cancellation_event.is_set(): logger(" Post fetching loop cancelled.") break - if end_page and current_page_num > end_page: - logger(f"✅ Reached specified end page ({end_page}). Stopping.") + + if target_post_id and processed_target_post_flag: # If target post was found and yielded in a previous iteration + # logger(f"✅ Target post {target_post_id} was processed. Stopping pagination.") # Logged when found break - if target_post_id and processed_target_post_flag: # If single post was found and processed - logger(f"✅ Target post {target_post_id} has been processed.") + + # For creator feeds (not target_post_id mode), check end_page limit + if not target_post_id and end_page and current_page_num > end_page: + logger(f"✅ Reached specified end page ({end_page}) for creator feed. Stopping.") break try: posts_batch = fetch_posts_paginated(api_base_url, headers, current_offset, logger, cancellation_event) if not isinstance(posts_batch, list): - logger(f"❌ API Error: Expected list of posts, got {type(posts_batch)} at page {current_page_num}.") + logger(f"❌ API Error: Expected list of posts, got {type(posts_batch)} at page {current_page_num} (offset {current_offset}).") break - except RuntimeError as e: # Catch errors from fetch_posts_paginated + except RuntimeError as e: if "cancelled by user" in str(e).lower(): logger(f"ℹ️ Pagination stopped due to cancellation: {e}") else: - logger(f"❌ {e}\n Aborting pagination at page {current_page_num}.") - break + logger(f"❌ {e}\n Aborting pagination at page {current_page_num} (offset {current_offset}).") + break # Stop on runtime error except Exception as e: # Catch any other unexpected errors - logger(f"❌ Unexpected error fetching page {current_page_num}: {e}") + logger(f"❌ Unexpected error fetching page {current_page_num} (offset {current_offset}): {e}") traceback.print_exc() - break + break # Stop on other errors - if not posts_batch: # No more posts - if current_page_num == (start_page or 1) and not target_post_id : # No posts on first page of a creator feed - logger("😕 No posts found on the first page checked.") - elif not target_post_id: # End of creator feed - logger("✅ Reached end of posts (no more content).") - break # Exit pagination loop + if not posts_batch: # API returned an empty list, meaning no more posts + if target_post_id and not processed_target_post_flag: + logger(f"❌ Target post {target_post_id} not found after checking all available pages (API returned no more posts at offset {current_offset}).") + elif not target_post_id: # Normal creator feed end + if current_page_num == (start_page or 1): # Check if it was the first page attempted + logger(f"😕 No posts found on the first page checked (page {current_page_num}, offset {current_offset}).") + else: + logger(f"✅ Reached end of posts (no more content from API at offset {current_offset}).") + break # Exit while loop - if target_post_id: # Processing a single post URL + if target_post_id and not processed_target_post_flag: matching_post = next((p for p in posts_batch if str(p.get('id')) == str(target_post_id)), None) if matching_post: - logger(f"🎯 Found target post {target_post_id}.") - yield [matching_post] # Yield as a list containing one item - processed_target_post_flag = True # Mark as processed - else: - # This case should ideally not happen if the post ID is valid and API is consistent. - # If the API returns posts in pages, a specific post ID might not be on the first page if offset isn't 0. - # However, for a direct post URL, we expect it or an error. - logger(f"❌ Target post {target_post_id} not found in the batch from offset {current_offset}. This may indicate the post URL is incorrect or the API behavior is unexpected.") - break # Stop if target post not found where expected - else: # Processing a creator feed (not a single post) - yield posts_batch # Yield the batch of posts + logger(f"🎯 Found target post {target_post_id} on page {current_page_num} (offset {current_offset}).") + yield [matching_post] # Yield only the matching post as a list + processed_target_post_flag = True + # Loop will break at the top in the next iteration due to processed_target_post_flag + # If not found in this batch, the loop continues to the next page. + # Logger message for "not found in batch" is removed here to avoid spam if post is on a later page. + elif not target_post_id: # Processing a creator feed (no specific target post) + yield posts_batch - if not (target_post_id and processed_target_post_flag): # If not a single post that was just processed - if not posts_batch : break # Should be redundant due to check above, but safe - current_offset += len(posts_batch) # Kemono API uses item offset, not page offset - current_page_num += 1 - time.sleep(0.6) # Be respectful to the API - else: # Single post was processed, exit loop + if processed_target_post_flag: # If we just found and yielded the target post, stop. break + + # Increment page and offset for the next iteration + current_offset += len(posts_batch) # Use actual length of batch for offset + current_page_num += 1 + time.sleep(0.6) # Keep the delay - # Final check if a specific target post was requested but not found + # Final check after the loop, specifically if a target post was being searched for but not found if target_post_id and not processed_target_post_flag and not (cancellation_event and cancellation_event.is_set()): - logger(f"❌ Target post {target_post_id} could not be found after checking relevant pages.") + # This log might be redundant if the one inside "if not posts_batch:" already covered it, + # but it serves as a final confirmation if the loop exited for other reasons before exhausting pages. + logger(f"❌ Target post {target_post_id} could not be found after checking all relevant pages (final check after loop).") def get_link_platform(url): """Attempts to identify the platform of an external link from its domain.""" try: domain = urlparse(url).netloc.lower() - # Specific known platforms (add more as needed) if 'drive.google.com' in domain: return 'google drive' if 'mega.nz' in domain or 'mega.io' in domain: return 'mega' if 'dropbox.com' in domain: return 'dropbox' @@ -385,32 +381,26 @@ def get_link_platform(url): if 'twitter.com' in domain or 'x.com' in domain: return 'twitter/x' if 'discord.gg' in domain or 'discord.com/invite' in domain: return 'discord invite' if 'pixiv.net' in domain: return 'pixiv' - if 'kemono.su' in domain or 'kemono.party' in domain: return 'kemono' # Explicitly identify kemono - if 'coomer.su' in domain or 'coomer.party' in domain: return 'coomer' # Explicitly identify coomer + if 'kemono.su' in domain or 'kemono.party' in domain: return 'kemono' + if 'coomer.su' in domain or 'coomer.party' in domain: return 'coomer' - # Generic extraction for other domains (e.g., 'example' from 'www.example.com') parts = domain.split('.') if len(parts) >= 2: - # Return the second-to-last part for common structures (e.g., 'google' from google.com) - # Avoid returning generic TLDs like 'com', 'org', 'net' as the platform - # Handle cases like 'google.co.uk' -> 'google' if parts[-2] not in ['com', 'org', 'net', 'gov', 'edu', 'co'] or len(parts) == 2: return parts[-2] elif len(parts) >= 3 and parts[-3] not in ['com', 'org', 'net', 'gov', 'edu', 'co']: return parts[-3] - else: # Fallback to full domain if unsure or very short domain + else: return domain - return 'external' # Default if domain parsing fails or is too simple (e.g., 'localhost') - except Exception: return 'unknown' # Error case + return 'external' + except Exception: return 'unknown' class PostProcessorSignals(QObject): """Defines signals used by PostProcessorWorker to communicate with the GUI thread.""" - progress_signal = pyqtSignal(str) # Generic log messages - file_download_status_signal = pyqtSignal(bool) # True if a file download starts, False if ends/fails - # Signal carries post_title, link_text, link_url, platform - external_link_signal = pyqtSignal(str, str, str, str) - # Signal carries filename, downloaded_bytes, total_bytes for progress bar + progress_signal = pyqtSignal(str) + file_download_status_signal = pyqtSignal(bool) + external_link_signal = pyqtSignal(str, str, str, str) file_progress_signal = pyqtSignal(str, int, int) @@ -423,46 +413,49 @@ class PostProcessorWorker: compress_images, download_thumbnails, service, user_id, api_url_input, cancellation_event, signals, downloaded_files, downloaded_file_hashes, downloaded_files_lock, downloaded_file_hashes_lock, - skip_words_list=None, show_external_links=False, + skip_words_list=None, + skip_words_scope=SKIP_SCOPE_FILES, # New parameter with default + show_external_links=False, extract_links_only=False, num_file_threads=4, skip_current_file_flag=None, - manga_mode_active=False + manga_mode_active=False, + manga_filename_style=STYLE_POST_TITLE ): self.post = post_data self.download_root = download_root self.known_names = known_names self.filter_character_list = filter_character_list if filter_character_list else [] self.unwanted_keywords = unwanted_keywords if unwanted_keywords is not None else set() - self.filter_mode = filter_mode # 'image', 'video', or 'all' + self.filter_mode = filter_mode self.skip_zip = skip_zip self.skip_rar = skip_rar self.use_subfolders = use_subfolders self.use_post_subfolders = use_post_subfolders - self.target_post_id_from_initial_url = target_post_id_from_initial_url # ID from initial URL if it was a post URL - self.custom_folder_name = custom_folder_name # For single post downloads + self.target_post_id_from_initial_url = target_post_id_from_initial_url + self.custom_folder_name = custom_folder_name self.compress_images = compress_images self.download_thumbnails = download_thumbnails self.service = service self.user_id = user_id - self.api_url_input = api_url_input # The original URL input by the user + self.api_url_input = api_url_input self.cancellation_event = cancellation_event - self.signals = signals # For emitting progress, logs, etc. - self.skip_current_file_flag = skip_current_file_flag # Event to skip current file download + self.signals = signals + self.skip_current_file_flag = skip_current_file_flag - # Sets and locks for tracking downloaded files/hashes across threads/workers self.downloaded_files = downloaded_files if downloaded_files is not None else set() self.downloaded_file_hashes = downloaded_file_hashes if downloaded_file_hashes is not None else set() self.downloaded_files_lock = downloaded_files_lock if downloaded_files_lock is not None else threading.Lock() self.downloaded_file_hashes_lock = downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading.Lock() self.skip_words_list = skip_words_list if skip_words_list is not None else [] - self.show_external_links = show_external_links # Whether to extract and log external links - self.extract_links_only = extract_links_only # If true, only extracts links, no downloads - self.num_file_threads = num_file_threads # Threads for downloading files within this post + self.skip_words_scope = skip_words_scope # Store the new scope + self.show_external_links = show_external_links + self.extract_links_only = extract_links_only + self.num_file_threads = num_file_threads - self.manga_mode_active = manga_mode_active # True if manga mode is on + self.manga_mode_active = manga_mode_active + self.manga_filename_style = manga_filename_style - # Disable compression if Pillow is not available if self.compress_images and Image is None: self.logger("⚠️ Image compression disabled: Pillow library not found.") self.compress_images = False @@ -471,7 +464,7 @@ class PostProcessorWorker: """Emits a log message via the progress_signal if available.""" if self.signals and hasattr(self.signals, 'progress_signal'): self.signals.progress_signal.emit(message) - else: # Fallback if signals are not connected (e.g., testing) + else: print(f"(Worker Log - No Signal): {message}") def check_cancel(self): @@ -479,436 +472,428 @@ class PostProcessorWorker: return self.cancellation_event.is_set() def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event, - post_title="", file_index_in_post=0): # Added post_title here - """Downloads a single file, handles retries, compression, and hash checking.""" - if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1 # Downloaded, Skipped + post_title="", file_index_in_post=0, num_files_in_this_post=1): + """ + Downloads a single file, handles retries, compression, and hash checking. + Returns: + (int, int, str, bool): (downloaded_count, skipped_count, final_filename_saved, was_original_name_kept_flag) + """ + was_original_name_kept_flag = False + final_filename_saved_for_return = "" + + + if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1, "", False file_url = file_info.get('url') - # Use '_original_name_for_log' if available (set in process()), otherwise 'name' api_original_filename = file_info.get('_original_name_for_log', file_info.get('name')) if not file_url or not api_original_filename: self.logger(f"⚠️ Skipping file from post {original_post_id_for_log}: Missing URL or original filename. Info: {str(file_info)[:100]}") - return 0, 1 + return 0, 1, api_original_filename or "", False - # --- Skip Check 1: Skip Words (Always based on Filename) --- - if self.skip_words_list: - content_to_check_for_skip_words = api_original_filename.lower() # ALWAYS use filename for skip words - log_source_for_skip_words = f"Filename '{api_original_filename}'" - + final_filename_saved_for_return = api_original_filename + + # Apply skip_words_list based on skip_words_scope (for files) + if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_FILES or self.skip_words_scope == SKIP_SCOPE_BOTH): + filename_to_check_for_skip_words = api_original_filename.lower() for skip_word in self.skip_words_list: - if skip_word.lower() in content_to_check_for_skip_words: - self.logger(f" -> Skip File (Keyword Match): {log_source_for_skip_words} contains '{skip_word}'.") - return 0, 1 + if skip_word.lower() in filename_to_check_for_skip_words: + self.logger(f" -> Skip File (Keyword in Original Name '{skip_word}'): '{api_original_filename}'. Scope: {self.skip_words_scope}") + return 0, 1, api_original_filename, False - # --- Character Filter (Global Gate) --- - # If character filters are active, the item (post for manga, file for normal) must match. if self.filter_character_list: matches_any_character_filter = False - if self.manga_mode_active: - # Manga Mode: Character filter applies to POST TITLE + if self.manga_mode_active: # In manga mode, character filter applies to post title primarily if any(is_title_match_for_character(post_title, char_filter) for char_filter in self.filter_character_list): matches_any_character_filter = True - if not matches_any_character_filter: - # This log might be redundant if the post-level check in process() already skipped it, - # but it's a safeguard if a file somehow reaches here without its post title matching. - self.logger(f" -> Skip File (Manga Mode - Post Title No Char Match): Title '{post_title[:30]}' doesn't match active character filters for this file.") - return 0, 1 - else: # Normal mode: Character filter applies to FILENAME + # Fallback: if title doesn't match, but filename does, still consider it a match for manga mode if desired + # For now, let's stick to title match for manga post filtering, file name match for file filtering. + # If you want manga mode character filter to also check filenames, uncomment below: + # if not matches_any_character_filter and any(is_filename_match_for_character(api_original_filename, char_filter) for char_filter in self.filter_character_list): + # matches_any_character_filter = True + else: # Normal mode, character filter applies to filename if any(is_filename_match_for_character(api_original_filename, char_filter) for char_filter in self.filter_character_list): matches_any_character_filter = True - if not matches_any_character_filter: - self.logger(f" -> Skip File (Normal Mode - Filename No Char Match): '{api_original_filename}' doesn't match active character filters.") - return 0, 1 + + if not matches_any_character_filter: # If no character filter matched (based on mode) + self.logger(f" -> Skip File (No Char Match): '{api_original_filename}' (Post: '{post_title[:30]}...') doesn't match character filters.") + return 0, 1, api_original_filename, False - # --- Filename Generation (Manga Mode vs Normal Mode) --- - _, original_ext = os.path.splitext(api_original_filename) - if original_ext and not original_ext.startswith('.'): original_ext = '.' + original_ext - elif not original_ext: # Try to derive extension if missing - _, temp_ext = os.path.splitext(clean_filename(api_original_filename)) # Clean first - if temp_ext and not temp_ext.startswith('.'): original_ext = '.' + temp_ext - elif temp_ext: original_ext = temp_ext - else: original_ext = '' # No extension found + original_filename_cleaned_base, original_ext = os.path.splitext(clean_filename(api_original_filename)) + if not original_ext.startswith('.'): original_ext = '.' + original_ext if original_ext else '' filename_to_save = "" if self.manga_mode_active: - # Manga mode renaming logic (uses post_title and sequence) - if post_title and post_title.strip(): - cleaned_post_title_full = clean_filename(post_title.strip()) # Clean the post title for filename use - original_filename_base, _ = os.path.splitext(api_original_filename) # Get base of original API filename + if self.manga_filename_style == STYLE_ORIGINAL_NAME: + filename_to_save = clean_filename(api_original_filename) + was_original_name_kept_flag = True # Original name is kept by definition here + elif self.manga_filename_style == STYLE_POST_TITLE: + if post_title and post_title.strip(): + cleaned_post_title_base = clean_filename(post_title.strip()) + if num_files_in_this_post > 1: # Multi-file post + if file_index_in_post == 0: # First file of multi-file post + filename_to_save = f"{cleaned_post_title_base}{original_ext}" + was_original_name_kept_flag = False + else: # Subsequent files of multi-file post + filename_to_save = clean_filename(api_original_filename) # Keep original for subsequent + was_original_name_kept_flag = True + else: # Single file post in manga mode + filename_to_save = f"{cleaned_post_title_base}{original_ext}" + was_original_name_kept_flag = False + else: # Manga mode, post title style, but post_title is missing + filename_to_save = clean_filename(api_original_filename) + was_original_name_kept_flag = False # Not truly "kept original" in the spirit of the style choice + self.logger(f"⚠️ Manga mode (Post Title Style): Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save}'.") + else: # Unknown manga style + self.logger(f"⚠️ Manga mode: Unknown filename style '{self.manga_filename_style}'. Defaulting to original filename for '{api_original_filename}'.") + filename_to_save = clean_filename(api_original_filename) + was_original_name_kept_flag = False # Or True, depending on interpretation. Let's say False as it's a fallback. - # Try to extract a sequence number from the original filename - extracted_sequence_from_original = "" - # Simple number at the end: e.g., "image_01", "pic123" - simple_end_match = re.search(r'(\d+)$', original_filename_base) - if simple_end_match: - extracted_sequence_from_original = simple_end_match.group(1).zfill(2) # Pad with zero if needed - else: - # More complex patterns like "page 01", "ch-2", "ep_003" - complex_match = re.search(r'(?:[ _.\-/]|^)(?:p|page|ch|chapter|ep|episode|v|vol|volume|no|num|number|pt|part)[ _.\-]*(\d+)', original_filename_base, re.IGNORECASE) - if complex_match: - extracted_sequence_from_original = complex_match.group(1).zfill(2) # Pad - - # Base for new filename from post title, removing existing page/chapter numbers from title - cleaned_title_base = re.sub( - r'[|\[\]()]*[ _.\-]*(?:page|p|ch|chapter|ep|episode|v|vol|volume|no|num|number|pt|part)s?[ _.\-]*\d+([ _.\-]+\d+)?([ _.\-]*(?:END|FIN))?$', - '', - cleaned_post_title_full, - flags=re.IGNORECASE - ).strip() - if not cleaned_title_base: # Fallback if regex strips everything - cleaned_title_base = cleaned_post_title_full - cleaned_title_base = cleaned_title_base.rstrip(' _.-') # Clean trailing separators - - if extracted_sequence_from_original: - filename_to_save = f"{cleaned_title_base} - {extracted_sequence_from_original}{original_ext}" - else: - # Fallback to file index in post if no sequence found in original filename - fallback_sequence = str(file_index_in_post + 1).zfill(2) # Pad with zero - filename_to_save = f"{cleaned_title_base} - {fallback_sequence}{original_ext}" - - # Handle potential filename collisions by appending a counter + # Collision handling for manga mode filenames + if filename_to_save: counter = 1 base_name_coll, ext_coll = os.path.splitext(filename_to_save) temp_filename_for_collision_check = filename_to_save + # Ensure unique filename in target folder while os.path.exists(os.path.join(target_folder_path, temp_filename_for_collision_check)): - temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}" + # If it's the first file of a multi-file post using post_title style, append _N + if self.manga_filename_style == STYLE_POST_TITLE and file_index_in_post == 0 and num_files_in_this_post > 1: + temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}" + # If it's original name style, or subsequent file, or single file post, append _N to its base + else: + temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}" counter += 1 if temp_filename_for_collision_check != filename_to_save: - # self.logger(f" Manga Mode: Collision detected. Adjusted filename to '{temp_filename_for_collision_check}'") filename_to_save = temp_filename_for_collision_check - else: # Manga mode but post_title is missing (should be rare) - filename_to_save = clean_filename(api_original_filename) # Fallback to cleaned original - self.logger(f"⚠️ Manga mode: Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save}'.") - else: # Normal mode + else: # Fallback if filename_to_save ended up empty + filename_to_save = f"manga_file_{original_post_id_for_log}_{file_index_in_post + 1}{original_ext}" + self.logger(f"⚠️ Manga mode: Generated filename was empty. Using generic fallback: '{filename_to_save}'.") + was_original_name_kept_flag = False + + else: # Not Manga Mode filename_to_save = clean_filename(api_original_filename) + was_original_name_kept_flag = False # Not manga mode, so this flag isn't relevant in the same way + # Collision handling for non-manga mode + counter = 1 + base_name_coll, ext_coll = os.path.splitext(filename_to_save) + temp_filename_for_collision_check = filename_to_save + while os.path.exists(os.path.join(target_folder_path, temp_filename_for_collision_check)): + temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}" + counter += 1 + if temp_filename_for_collision_check != filename_to_save: + filename_to_save = temp_filename_for_collision_check - final_filename_for_sets_and_saving = filename_to_save # This is the name used for saving and duplicate checks + final_filename_for_sets_and_saving = filename_to_save + final_filename_saved_for_return = final_filename_for_sets_and_saving - # --- File Type Filtering (applies to both modes, based on original filename) --- - if not self.download_thumbnails: # Thumbnail mode bypasses these filters - is_img_type = is_image(api_original_filename) # Check original type + if not self.download_thumbnails: + # Determine file type based on the original API filename + is_img_type = is_image(api_original_filename) is_vid_type = is_video(api_original_filename) - is_zip_type = is_zip(api_original_filename) - is_rar_type = is_rar(api_original_filename) + # Use the generic is_archive function + is_archive_type = is_archive(api_original_filename) - if self.filter_mode == 'image' and not is_img_type: - self.logger(f" -> Filter Skip: '{api_original_filename}' (Not Image).") - return 0,1 - if self.filter_mode == 'video' and not is_vid_type: - self.logger(f" -> Filter Skip: '{api_original_filename}' (Not Video).") - return 0,1 - if self.skip_zip and is_zip_type: + + # ===== MODIFICATION START ===== + if self.filter_mode == 'archive': + if not is_archive_type: # If in 'archive' mode and the file is NOT an archive + self.logger(f" -> Filter Skip (Archive Mode): '{api_original_filename}' (Not an Archive).") + return 0, 1, api_original_filename, False + # If it IS an archive, it will proceed. + # self.skip_zip and self.skip_rar are False in this mode (set in main.py), so they won't cause a skip. + elif self.filter_mode == 'image': + if not is_img_type: + self.logger(f" -> Filter Skip: '{api_original_filename}' (Not Image).") + return 0, 1, api_original_filename, False + elif self.filter_mode == 'video': + if not is_vid_type: + self.logger(f" -> Filter Skip: '{api_original_filename}' (Not Video).") + return 0, 1, api_original_filename, False + # No specific 'elif self.filter_mode == 'all':' is needed here, as 'all' implies no primary type filtering. + # The self.skip_zip / self.skip_rar checks below will handle user preference for skipping archives in 'all' mode. + + # These skip checks are now primarily for 'all' mode or if filter_mode is something else. + # In 'archive' mode, self.skip_zip and self.skip_rar will be False. + if self.skip_zip and is_zip(api_original_filename): # Use specific is_zip for the skip_zip flag self.logger(f" -> Pref Skip: '{api_original_filename}' (ZIP).") - return 0,1 - if self.skip_rar and is_rar_type: + return 0, 1, api_original_filename, False + if self.skip_rar and is_rar(api_original_filename): # Use specific is_rar for the skip_rar flag self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR).") - return 0,1 + return 0, 1, api_original_filename, False + # ===== MODIFICATION END ===== - target_folder_basename = os.path.basename(target_folder_path) # For logging + target_folder_basename = os.path.basename(target_folder_path) current_save_path = os.path.join(target_folder_path, final_filename_for_sets_and_saving) - # --- Duplicate Checks (Path, Global Filename, Hash) --- if os.path.exists(current_save_path) and os.path.getsize(current_save_path) > 0: self.logger(f" -> Exists (Path): '{final_filename_for_sets_and_saving}' in '{target_folder_basename}'.") - with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving) # Add to global set - return 0, 1 - + with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving) # Add final name + return 0, 1, final_filename_for_sets_and_saving, was_original_name_kept_flag + with self.downloaded_files_lock: if final_filename_for_sets_and_saving in self.downloaded_files: - self.logger(f" -> Global Skip (Filename): '{final_filename_for_sets_and_saving}' already recorded as downloaded this session.") - return 0, 1 + self.logger(f" -> Global Skip (Filename): '{final_filename_for_sets_and_saving}' already recorded this session.") + return 0, 1, final_filename_for_sets_and_saving, was_original_name_kept_flag - # --- Download Loop with Retries --- max_retries = 3 - retry_delay = 5 # seconds - downloaded_size_bytes = 0 + retry_delay = 5 + downloaded_size_bytes = 0 calculated_file_hash = None - file_content_bytes = None # BytesIO to hold downloaded content - total_size_bytes = 0 # From Content-Length header, set on first attempt + file_content_bytes = None + total_size_bytes = 0 # Initialize total_size_bytes for this download attempt download_successful_flag = False - for attempt_num in range(max_retries + 1): # max_retries means max_retries + 1 attempts total + for attempt_num in range(max_retries + 1): if self.check_cancel() or (skip_event and skip_event.is_set()): - break # Exit retry loop if cancelled + break try: if attempt_num > 0: self.logger(f" Retrying '{api_original_filename}' (Attempt {attempt_num}/{max_retries})...") time.sleep(retry_delay * (2**(attempt_num - 1))) # Exponential backoff if self.signals and hasattr(self.signals, 'file_download_status_signal'): - self.signals.file_download_status_signal.emit(True) # Signal download start + self.signals.file_download_status_signal.emit(True) # Indicate download attempt start - response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True) # connect_timeout, read_timeout + response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True) # Generous timeout response.raise_for_status() # Check for HTTP errors current_total_size_bytes_from_headers = int(response.headers.get('Content-Length', 0)) - if attempt_num == 0: # First attempt, log initial size + if attempt_num == 0: # Only set total_size_bytes on the first attempt from headers total_size_bytes = current_total_size_bytes_from_headers size_str = f"{total_size_bytes / (1024 * 1024):.2f} MB" if total_size_bytes > 0 else "unknown size" self.logger(f"⬇️ Downloading: '{api_original_filename}' (Size: {size_str}) [Saving as: '{final_filename_for_sets_and_saving}']") - # Use the size from the current attempt for progress reporting - current_attempt_total_size = current_total_size_bytes_from_headers + current_attempt_total_size = total_size_bytes # Use the initial total_size for progress calculation - file_content_buffer = BytesIO() # Buffer for this attempt's content + file_content_buffer = BytesIO() current_attempt_downloaded_bytes = 0 md5_hasher = hashlib.md5() last_progress_time = time.time() for chunk in response.iter_content(chunk_size=1 * 1024 * 1024): # 1MB chunks if self.check_cancel() or (skip_event and skip_event.is_set()): - break # Stop reading chunks if cancelled + break if chunk: file_content_buffer.write(chunk) md5_hasher.update(chunk) current_attempt_downloaded_bytes += len(chunk) - # Emit progress signal periodically if time.time() - last_progress_time > 1 and current_attempt_total_size > 0 and \ self.signals and hasattr(self.signals, 'file_progress_signal'): self.signals.file_progress_signal.emit( api_original_filename, # Show original name in progress current_attempt_downloaded_bytes, - current_attempt_total_size + current_attempt_total_size ) last_progress_time = time.time() if self.check_cancel() or (skip_event and skip_event.is_set()): if file_content_buffer: file_content_buffer.close() - break # Break from retry loop if cancelled during chunk iteration - - # Check if download was successful for this attempt - if current_attempt_downloaded_bytes > 0: # Successfully downloaded some data + break # Exit retry loop if cancelled + + # After loop, check if download was successful for this attempt + if current_attempt_downloaded_bytes > 0 or (current_attempt_total_size == 0 and response.status_code == 200): # Successfully downloaded something or it's a valid 0-byte file calculated_file_hash = md5_hasher.hexdigest() downloaded_size_bytes = current_attempt_downloaded_bytes - if file_content_bytes: file_content_bytes.close() # Close previous attempt's buffer - file_content_bytes = file_content_buffer # Keep this attempt's content - file_content_bytes.seek(0) # Reset pointer for reading + if file_content_bytes: file_content_bytes.close() # Close previous buffer if any + file_content_bytes = file_content_buffer # Assign the new buffer + file_content_bytes.seek(0) # Rewind for reading download_successful_flag = True - break # Exit retry loop on success - elif current_attempt_total_size == 0 and response.status_code == 200: # Handle 0-byte files - self.logger(f" Note: '{api_original_filename}' is a 0-byte file according to server.") - calculated_file_hash = md5_hasher.hexdigest() # Hash of empty content - downloaded_size_bytes = 0 - if file_content_bytes: file_content_bytes.close() - file_content_bytes = file_content_buffer # Keep empty buffer - file_content_bytes.seek(0) - download_successful_flag = True - break # Exit retry loop - else: # No data or failed attempt (e.g. connection dropped before any data) - if file_content_buffer: file_content_buffer.close() # Discard this attempt's buffer + break # Successful download, exit retry loop + else: # No bytes downloaded, and not a 0-byte file case + if file_content_buffer: file_content_buffer.close() + # Continue to next retry if not max retries except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e: self.logger(f" ❌ Download Error (Retryable): {api_original_filename}. Error: {e}") if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close() - except requests.exceptions.RequestException as e: # Non-retryable (like 404) + # Continue to next retry if not max retries + except requests.exceptions.RequestException as e: # Non-retryable HTTP errors self.logger(f" ❌ Download Error (Non-Retryable): {api_original_filename}. Error: {e}") if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close() - break # Break from retry loop + break # Exit retry loop except Exception as e: # Other unexpected errors self.logger(f" ❌ Unexpected Download Error: {api_original_filename}: {e}\n{traceback.format_exc(limit=2)}") if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close() - break # Break from retry loop + break # Exit retry loop finally: if self.signals and hasattr(self.signals, 'file_download_status_signal'): - self.signals.file_download_status_signal.emit(False) # Signal download end/attempt end - # End of retry loop - - # Emit final progress update (e.g., 100% or 0/0 if failed) + self.signals.file_download_status_signal.emit(False) # Indicate download attempt end + + # Final progress update after all retries or success if self.signals and hasattr(self.signals, 'file_progress_signal'): - # Use total_size_bytes from the first successful header read for consistency in total final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes self.signals.file_progress_signal.emit(api_original_filename, downloaded_size_bytes, final_total_for_progress) - if self.check_cancel() or (skip_event and skip_event.is_set()): self.logger(f" ⚠️ Download interrupted for {api_original_filename}.") if file_content_bytes: file_content_bytes.close() - return 0, 1 # Skipped due to interruption + return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag if not download_successful_flag: self.logger(f"❌ Download failed for '{api_original_filename}' after {max_retries + 1} attempts.") - if file_content_bytes: file_content_bytes.close() - return 0, 1 # Skipped due to download failure + if file_content_bytes: file_content_bytes.close() + return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag - # --- Hash Check (post-download), Compression, Saving --- + # Check hash against already downloaded files (session-based) with self.downloaded_file_hashes_lock: if calculated_file_hash in self.downloaded_file_hashes: self.logger(f" -> Content Skip (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...) already downloaded this session.") - with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving) # Still mark filename as "processed" + with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving) # Add final name if file_content_bytes: file_content_bytes.close() - return 0, 1 # Skipped due to hash duplicate + return 0, 1, final_filename_for_sets_and_saving, was_original_name_kept_flag - bytes_to_write = file_content_bytes # This is the BytesIO from the successful download - final_filename_after_processing = final_filename_for_sets_and_saving # May change if compressed - current_save_path_final = current_save_path # May change if filename changes due to compression + bytes_to_write = file_content_bytes # This is the BytesIO object with downloaded content + final_filename_after_processing = final_filename_for_sets_and_saving + current_save_path_final = current_save_path # Path with potentially collided name - is_img_for_compress_check = is_image(api_original_filename) # Check original type for compression eligibility - if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024): # Compress if > 1.5MB + is_img_for_compress_check = is_image(api_original_filename) # Check original name for image type + if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024): # 1.5MB threshold self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024*1024):.2f} MB)...") try: - # Ensure bytes_to_write is at the beginning for Pillow - bytes_to_write.seek(0) + bytes_to_write.seek(0) # Ensure buffer is at the beginning with Image.open(bytes_to_write) as img_obj: - # Handle palette mode images and convert to RGB/RGBA for WebP - if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA') + # Handle palette mode images by converting to RGBA/RGB + if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA') elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB') compressed_bytes_io = BytesIO() - img_obj.save(compressed_bytes_io, format='WebP', quality=80, method=4) # method 4 is a good balance + img_obj.save(compressed_bytes_io, format='WebP', quality=80, method=4) # method=4 is a good balance compressed_size = compressed_bytes_io.getbuffer().nbytes - # Only use compressed if significantly smaller (e.g., >10% reduction) - if compressed_size < downloaded_size_bytes * 0.9: + if compressed_size < downloaded_size_bytes * 0.9: # Only save if significantly smaller (e.g., 10% reduction) self.logger(f" Compression success: {compressed_size / (1024*1024):.2f} MB.") - bytes_to_write.close() # Close original downloaded content stream - bytes_to_write = compressed_bytes_io # Use compressed content stream - bytes_to_write.seek(0) # Reset pointer for writing + bytes_to_write.close() # Close original downloaded buffer + bytes_to_write = compressed_bytes_io # Switch to compressed buffer + bytes_to_write.seek(0) # Rewind compressed buffer base_name_orig, _ = os.path.splitext(final_filename_for_sets_and_saving) - final_filename_after_processing = base_name_orig + '.webp' # Change extension - current_save_path_final = os.path.join(target_folder_path, final_filename_after_processing) + final_filename_after_processing = base_name_orig + '.webp' + current_save_path_final = os.path.join(target_folder_path, final_filename_after_processing) # Update save path self.logger(f" Updated filename (compressed): {final_filename_after_processing}") else: - self.logger(f" Compression skipped: WebP not significantly smaller."); bytes_to_write.seek(0) # Reset pointer if not using compressed + self.logger(f" Compression skipped: WebP not significantly smaller."); bytes_to_write.seek(0) # Rewind original if not using compressed except Exception as comp_e: - self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); bytes_to_write.seek(0) # Reset pointer + self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); bytes_to_write.seek(0) # Rewind original - # Check for existence again if filename changed due to compression + final_filename_saved_for_return = final_filename_after_processing # This is the name that will be saved + + # Final check if the (potentially new, e.g. .webp) filename already exists if final_filename_after_processing != final_filename_for_sets_and_saving and \ os.path.exists(current_save_path_final) and os.path.getsize(current_save_path_final) > 0: self.logger(f" -> Exists (Path - Post-Compress): '{final_filename_after_processing}' in '{target_folder_basename}'.") with self.downloaded_files_lock: self.downloaded_files.add(final_filename_after_processing) bytes_to_write.close() - return 0, 1 + return 0, 1, final_filename_after_processing, was_original_name_kept_flag - # --- Save the file --- try: - os.makedirs(os.path.dirname(current_save_path_final), exist_ok=True) # Ensure directory exists + os.makedirs(os.path.dirname(current_save_path_final), exist_ok=True) with open(current_save_path_final, 'wb') as f_out: - f_out.write(bytes_to_write.getvalue()) # Write content + f_out.write(bytes_to_write.getvalue()) - # Add to downloaded sets upon successful save with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash) - with self.downloaded_files_lock: self.downloaded_files.add(final_filename_after_processing) + with self.downloaded_files_lock: self.downloaded_files.add(final_filename_after_processing) # Add final name self.logger(f"✅ Saved: '{final_filename_after_processing}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{target_folder_basename}'") - time.sleep(0.05) # Small delay, can be removed if not needed - return 1, 0 # Downloaded, Skipped + time.sleep(0.05) # Small delay + return 1, 0, final_filename_after_processing, was_original_name_kept_flag except Exception as save_err: self.logger(f"❌ Save Fail for '{final_filename_after_processing}': {save_err}") - if os.path.exists(current_save_path_final): # Attempt to remove partial file + if os.path.exists(current_save_path_final): # Attempt to clean up partial file try: os.remove(current_save_path_final); except OSError: self.logger(f" -> Failed to remove partially saved file: {current_save_path_final}") - return 0, 1 # Skipped due to save error + return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag # Return the name it attempted to save as finally: - if bytes_to_write: bytes_to_write.close() # Ensure stream is closed + if bytes_to_write: bytes_to_write.close() def process(self): """Main processing logic for a single post.""" - if self.check_cancel(): return 0, 0 # Downloaded, Skipped - + if self.check_cancel(): return 0, 0, [] + + kept_original_filenames_for_log = [] total_downloaded_this_post = 0 total_skipped_this_post = 0 - # Prepare headers for file downloads - parsed_api_url = urlparse(self.api_url_input) # Use the original input URL for referer base + parsed_api_url = urlparse(self.api_url_input) referer_url = f"https://{parsed_api_url.netloc}/" headers = {'User-Agent': 'Mozilla/5.0', 'Referer': referer_url, 'Accept': '*/*'} - # Regex for finding links in HTML content link_pattern = re.compile(r"""]*>(.*?)""", re.IGNORECASE | re.DOTALL) - # Extract post details post_data = self.post post_title = post_data.get('title', '') or 'untitled_post' post_id = post_data.get('id', 'unknown_id') - post_main_file_info = post_data.get('file') # Main file object for the post - post_attachments = post_data.get('attachments', []) # List of attachment objects - post_content_html = post_data.get('content', '') # HTML content of the post + post_main_file_info = post_data.get('file') # This is a dict if present + post_attachments = post_data.get('attachments', []) # This is a list of dicts + post_content_html = post_data.get('content', '') - # Log post processing start self.logger(f"\n--- Processing Post {post_id} ('{post_title[:50]}...') (Thread: {threading.current_thread().name}) ---") - num_potential_files = len(post_attachments or []) + (1 if post_main_file_info and post_main_file_info.get('path') else 0) + num_potential_files_in_post = len(post_attachments or []) + (1 if post_main_file_info and post_main_file_info.get('path') else 0) - # --- Post-Level Skip Word Check (REMOVED for Manga Mode based on Title) --- - # Skip words are now ALWAYS checked at the file level based on FILENAME in _download_single_file. - # The old Manga Mode post-level skip based on title is removed. + # Apply skip_words_list based on skip_words_scope (for posts) + if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH): + post_title_lower = post_title.lower() + for skip_word in self.skip_words_list: + if skip_word.lower() in post_title_lower: + self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'. Scope: {self.skip_words_scope}") + return 0, num_potential_files_in_post, [] # Skip all files in this post - # --- Post-Level Character Filter Check (Only for Manga Mode, based on Title) --- - # If Manga Mode is active and character filters are set, the post title MUST match one of them. - # This acts as a gate for processing files from this post in Manga Mode. + # Character filter for Manga Mode (applies to post title) if not self.extract_links_only and self.manga_mode_active and self.filter_character_list: if not any(is_title_match_for_character(post_title, char_name) for char_name in self.filter_character_list): self.logger(f" -> Skip Post (Manga Mode - Title No Char Match): Title '{post_title[:50]}' doesn't match active character filters.") - return 0, num_potential_files # Skip all files in this post + return 0, num_potential_files_in_post, [] - # Validate attachments structure - if not isinstance(post_attachments, list): + if not isinstance(post_attachments, list): # Basic sanity check self.logger(f"⚠️ Corrupt attachment data for post {post_id} (expected list, got {type(post_attachments)}). Skipping attachments.") post_attachments = [] - # --- Determine Base Save Folders --- - potential_base_save_folders = [] # List of base folder names (not full paths yet) - if not self.extract_links_only: # Folder logic only applies if not just extracting links + potential_base_save_folders = [] + if not self.extract_links_only: if self.use_subfolders: - if self.filter_character_list: # User specified character names for folders - if self.manga_mode_active: - # Manga Mode: Only consider character folders if post title matches that character - for char_filter_name in self.filter_character_list: - if is_title_match_for_character(post_title, char_filter_name): - cleaned_folder = clean_folder_name(char_filter_name) - if cleaned_folder: potential_base_save_folders.append(cleaned_folder) - # If in manga mode and title didn't match any char filter, this list will be empty. - # The post-level skip above should have already caught this. - else: # Normal Mode: Create folders for all specified character filters - for char_filter_name in self.filter_character_list: + # If character filters are active and it's manga mode, folder name comes from character filter matching post title + if self.filter_character_list and self.manga_mode_active: + for char_filter_name in self.filter_character_list: + if is_title_match_for_character(post_title, char_filter_name): cleaned_folder = clean_folder_name(char_filter_name) if cleaned_folder: potential_base_save_folders.append(cleaned_folder) - - if potential_base_save_folders: - self.logger(f" Folder Target(s) (from Character Filter list): {', '.join(potential_base_save_folders)}") - elif self.filter_character_list: - self.logger(f" Note: Post {post_id} title did not match character filters for folder assignment (Manga Mode) or no valid char folders.") - else: # No character filter list from UI, derive folders from title using known_names + # If not manga mode with character filter, or if manga mode didn't find a match, try known names / title + if not potential_base_save_folders: derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords) if derived_folders: potential_base_save_folders.extend(derived_folders) self.logger(f" Folder Target(s) (Derived from Title & Known Names): {', '.join(derived_folders)}") - else: # Fallback if no known_names match + else: fallback_folder = extract_folder_name_from_title(post_title, self.unwanted_keywords) potential_base_save_folders.append(fallback_folder) self.logger(f" Folder Target (Fallback from Title): {fallback_folder}") - if not potential_base_save_folders: # If still no folders, use a generic one based on post title or default + if not potential_base_save_folders: # Absolute fallback potential_base_save_folders.append(clean_folder_name(post_title if post_title else "untitled_creator_content")) self.logger(f" Folder Target (Final Fallback): {potential_base_save_folders[0]}") + else: # Not using subfolders, save to root + potential_base_save_folders = [""] - else: # Not using subfolders, all files go to download_root - potential_base_save_folders = [""] # Represents the root download directory - - # --- Post-Level Skip Words in Folder Name --- - # This applies if subfolders are used and a folder name itself contains a skip word. + # Skip post if folder name contains skip words (only if subfolders are used) if not self.extract_links_only and self.use_subfolders and self.skip_words_list: for folder_name_to_check in potential_base_save_folders: - if not folder_name_to_check: continue # Skip root "" + if not folder_name_to_check: continue # Skip if base folder is root if any(skip_word.lower() in folder_name_to_check.lower() for skip_word in self.skip_words_list): matched_skip = next((sw for sw in self.skip_words_list if sw.lower() in folder_name_to_check.lower()), "unknown_skip_word") self.logger(f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check}' contains '{matched_skip}'.") - return 0, num_potential_files + return 0, num_potential_files_in_post, [] - # --- Extract and Log External Links --- + # External Link Extraction if (self.show_external_links or self.extract_links_only) and post_content_html: try: - unique_links_data = {} # Store unique URLs and their text + unique_links_data = {} for match in link_pattern.finditer(post_content_html): link_url = match.group(1).strip() - link_inner_text = match.group(2) # Raw inner HTML of the tag + link_inner_text = match.group(2) if not any(ext in link_url.lower() for ext in ['.css', '.js', '.ico', '.xml', '.svg']) \ and not link_url.startswith('javascript:') \ @@ -935,97 +920,87 @@ class PostProcessorWorker: if self.extract_links_only: self.logger(f" Extract Links Only mode: Finished processing post {post_id} for links.") - return 0, 0 + return 0, 0, [] # No files downloaded or skipped in this mode for this counter - # --- Prepare List of Files to Download --- - files_to_download_info_list = [] - api_file_domain = urlparse(self.api_url_input).netloc - if not api_file_domain: + # --- Prepare list of files to download from this post --- + all_files_from_post_api = [] + api_file_domain = urlparse(self.api_url_input).netloc # Use domain from input URL + if not api_file_domain or not any(d in api_file_domain.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']): + # Fallback if input URL isn't a direct site URL (e.g. API URL was passed, though less common for user input) api_file_domain = "kemono.su" if "kemono" in self.service.lower() else "coomer.party" - if self.download_thumbnails: - self.logger(f" Thumbnail-only mode for Post {post_id}.") - if post_main_file_info and isinstance(post_main_file_info, dict) and post_main_file_info.get('path'): - if is_image(post_main_file_info.get('name')): - file_path = post_main_file_info['path'].lstrip('/') - original_api_name = post_main_file_info.get('name') or os.path.basename(file_path) - if original_api_name: - files_to_download_info_list.append({ - 'url': f"https://{api_file_domain}{file_path}" if file_path.startswith('/') else f"https://{api_file_domain}/data/{file_path}", - 'name': original_api_name, - '_original_name_for_log': original_api_name, - '_is_thumbnail': True - }) - for att_info in post_attachments: - if isinstance(att_info, dict) and att_info.get('path') and is_image(att_info.get('name')): - att_path = att_info['path'].lstrip('/') - original_api_att_name = att_info.get('name') or os.path.basename(att_path) - if original_api_att_name: - files_to_download_info_list.append({ - 'url': f"https://{api_file_domain}{att_path}" if att_path.startswith('/') else f"https://{api_file_domain}/data/{att_path}", - 'name': original_api_att_name, - '_original_name_for_log': original_api_att_name, - '_is_thumbnail': True - }) - if not files_to_download_info_list: - self.logger(f" -> No image thumbnails found for post {post_id} in thumbnail-only mode.") - return 0, 0 - else: # Normal download mode - if post_main_file_info and isinstance(post_main_file_info, dict) and post_main_file_info.get('path'): - file_path = post_main_file_info['path'].lstrip('/') - original_api_name = post_main_file_info.get('name') or os.path.basename(file_path) - if original_api_name: - files_to_download_info_list.append({ - 'url': f"https://{api_file_domain}{file_path}" if file_path.startswith('/') else f"https://{api_file_domain}/data/{file_path}", - 'name': original_api_name, - '_original_name_for_log': original_api_name, - '_is_thumbnail': False + if post_main_file_info and isinstance(post_main_file_info, dict) and post_main_file_info.get('path'): + file_path = post_main_file_info['path'].lstrip('/') + original_api_name = post_main_file_info.get('name') or os.path.basename(file_path) + if original_api_name: + all_files_from_post_api.append({ + 'url': f"https://{api_file_domain}{file_path}" if file_path.startswith('/') else f"https://{api_file_domain}/data/{file_path}", + 'name': original_api_name, # This 'name' might be used for initial filtering if _original_name_for_log isn't set + '_original_name_for_log': original_api_name, # Store the true original for logging/type checks + '_is_thumbnail': self.download_thumbnails and is_image(original_api_name) + }) + else: self.logger(f" ⚠️ Skipping main file for post {post_id}: Missing name (Path: {file_path})") + + for idx, att_info in enumerate(post_attachments): + if isinstance(att_info, dict) and att_info.get('path'): + att_path = att_info['path'].lstrip('/') + original_api_att_name = att_info.get('name') or os.path.basename(att_path) + if original_api_att_name: + all_files_from_post_api.append({ + 'url': f"https://{api_file_domain}{att_path}" if att_path.startswith('/') else f"https://{api_file_domain}/data/{att_path}", + 'name': original_api_att_name, + '_original_name_for_log': original_api_att_name, + '_is_thumbnail': self.download_thumbnails and is_image(original_api_att_name) }) - else: self.logger(f" ⚠️ Skipping main file for post {post_id}: Missing name (Path: {file_path})") - for idx, att_info in enumerate(post_attachments): - if isinstance(att_info, dict) and att_info.get('path'): - att_path = att_info['path'].lstrip('/') - original_api_att_name = att_info.get('name') or os.path.basename(att_path) - if original_api_att_name: - files_to_download_info_list.append({ - 'url': f"https://{api_file_domain}{att_path}" if att_path.startswith('/') else f"https://{api_file_domain}/data/{att_path}", - 'name': original_api_att_name, - '_original_name_for_log': original_api_att_name, - '_is_thumbnail': False - }) - else: self.logger(f" ⚠️ Skipping attachment {idx+1} for post {post_id}: Missing name (Path: {att_path})") - else: self.logger(f" ⚠️ Skipping invalid attachment {idx+1} for post {post_id}: {str(att_info)[:100]}") + else: self.logger(f" ⚠️ Skipping attachment {idx+1} for post {post_id}: Missing name (Path: {att_path})") + else: self.logger(f" ⚠️ Skipping invalid attachment {idx+1} for post {post_id}: {str(att_info)[:100]}") - if not files_to_download_info_list: + if self.download_thumbnails: # Filter non-images if in thumbnail mode + all_files_from_post_api = [finfo for finfo in all_files_from_post_api if finfo['_is_thumbnail']] + if not all_files_from_post_api: + self.logger(f" -> No image thumbnails found for post {post_id} in thumbnail-only mode.") + return 0, 0, [] + + + if not all_files_from_post_api: self.logger(f" No files found to download for post {post_id}.") - return 0, 0 + return 0, 0, [] + + # --- Filter out duplicates based on original API filename WITHIN THIS POST --- + files_to_download_info_list = [] + processed_original_filenames_in_this_post = set() + for file_info in all_files_from_post_api: + current_api_original_filename = file_info.get('_original_name_for_log') + if current_api_original_filename in processed_original_filenames_in_this_post: + self.logger(f" -> Skip Duplicate Original Name (within post {post_id}): '{current_api_original_filename}' already processed/listed for this post.") + total_skipped_this_post += 1 + else: + files_to_download_info_list.append(file_info) + if current_api_original_filename: + processed_original_filenames_in_this_post.add(current_api_original_filename) + + if not files_to_download_info_list: + self.logger(f" All files for post {post_id} were duplicate original names or skipped earlier.") + return 0, total_skipped_this_post, [] + + + num_files_in_this_post_for_naming = len(files_to_download_info_list) + self.logger(f" Identified {num_files_in_this_post_for_naming} unique original file(s) for potential download from post {post_id}.") - self.logger(f" Identified {len(files_to_download_info_list)} file(s) for potential download from post {post_id}.") - # --- File Download Loop (using ThreadPoolExecutor for individual files) --- with ThreadPoolExecutor(max_workers=self.num_file_threads, thread_name_prefix=f'P{post_id}File_') as file_pool: futures_list = [] for file_idx, file_info_to_dl in enumerate(files_to_download_info_list): if self.check_cancel(): break - actual_target_full_paths_for_this_file = [] + actual_target_full_paths_for_this_file = [] if self.use_subfolders: - if self.filter_character_list: + # If character filters are active and NOT manga mode, folder name comes from char filter matching filename + if self.filter_character_list and not self.manga_mode_active: for char_name_from_filter_list in self.filter_character_list: - assign_to_this_char_folder = False - if self.manga_mode_active: - # Manga Mode: Folder assignment is based on post_title matching char_name_from_filter_list - # This check is somewhat redundant if the post-level title check passed, - # but ensures files from this post go into the matched character's folder. - if is_title_match_for_character(post_title, char_name_from_filter_list): - assign_to_this_char_folder = True - else: # Normal mode - if is_filename_match_for_character(file_info_to_dl.get('_original_name_for_log'), char_name_from_filter_list): - assign_to_this_char_folder = True - - if assign_to_this_char_folder: + if is_filename_match_for_character(file_info_to_dl.get('_original_name_for_log'), char_name_from_filter_list): base_char_folder_path = os.path.join(self.download_root, clean_folder_name(char_name_from_filter_list)) if self.use_post_subfolders: cleaned_title_for_subfolder = clean_folder_name(post_title) @@ -1033,9 +1008,8 @@ class PostProcessorWorker: actual_target_full_paths_for_this_file.append(os.path.join(base_char_folder_path, post_specific_subfolder_name)) else: actual_target_full_paths_for_this_file.append(base_char_folder_path) - - else: - for base_folder_name in potential_base_save_folders: + else: # Manga mode with char filter (already handled for potential_base_save_folders) OR no char filter OR char filter didn't match filename in normal mode + for base_folder_name in potential_base_save_folders: # These were determined earlier base_folder_path = os.path.join(self.download_root, base_folder_name) if self.use_post_subfolders: cleaned_title_for_subfolder = clean_folder_name(post_title) @@ -1043,98 +1017,109 @@ class PostProcessorWorker: actual_target_full_paths_for_this_file.append(os.path.join(base_folder_path, post_specific_subfolder_name)) else: actual_target_full_paths_for_this_file.append(base_folder_path) - else: + else: # Not using subfolders at all actual_target_full_paths_for_this_file = [self.download_root] - if self.target_post_id_from_initial_url and self.custom_folder_name: + # Override with custom folder name if it's a single post download and custom name is provided + if self.target_post_id_from_initial_url and self.custom_folder_name: # custom_folder_name is already cleaned custom_full_path = os.path.join(self.download_root, self.custom_folder_name) actual_target_full_paths_for_this_file = [custom_full_path] - # self.logger(f" Using custom folder for single post: {custom_full_path}") # Logged once is enough - + # Fallback if no specific target paths were determined (e.g. char filter normal mode no match) if not actual_target_full_paths_for_this_file: - self.logger(f" -> File Skip (No Target Folder): '{file_info_to_dl.get('_original_name_for_log')}' for post '{post_title[:30]}'. No character folder match or other path error.") - total_skipped_this_post +=1 - continue + default_target_for_non_match = self.download_root + if self.use_subfolders: # Should use one of the potential_base_save_folders if subfolders enabled + gen_folder_name = potential_base_save_folders[0] if potential_base_save_folders and potential_base_save_folders[0] else clean_folder_name(post_title) + default_target_for_non_match = os.path.join(self.download_root, gen_folder_name) + if self.use_post_subfolders: + cleaned_title_for_subfolder = clean_folder_name(post_title) + post_specific_subfolder_name = f"{post_id}_{cleaned_title_for_subfolder}" if cleaned_title_for_subfolder else f"{post_id}_untitled" + default_target_for_non_match = os.path.join(default_target_for_non_match, post_specific_subfolder_name) + actual_target_full_paths_for_this_file = [default_target_for_non_match] - for target_path in set(actual_target_full_paths_for_this_file): + for target_path in set(actual_target_full_paths_for_this_file): # Use set to avoid duplicate downloads to same path if self.check_cancel(): break futures_list.append(file_pool.submit( self._download_single_file, file_info_to_dl, - target_path, + target_path, headers, - post_id, - self.skip_current_file_flag, - post_title, - file_idx + post_id, + self.skip_current_file_flag, + post_title, # Pass post_title for manga naming + file_idx, + num_files_in_this_post_for_naming )) - if self.check_cancel(): break + if self.check_cancel(): break for future in as_completed(futures_list): - if self.check_cancel(): - for f_to_cancel in futures_list: + if self.check_cancel(): + for f_to_cancel in futures_list: # Attempt to cancel pending futures if not f_to_cancel.done(): f_to_cancel.cancel() - break + break try: - dl_count, skip_count = future.result() + dl_count, skip_count, actual_filename_saved, original_kept_flag = future.result() total_downloaded_this_post += dl_count total_skipped_this_post += skip_count - except CancelledError: - total_skipped_this_post += 1 + if original_kept_flag and dl_count > 0 and actual_filename_saved: # Ensure filename is not empty + kept_original_filenames_for_log.append(actual_filename_saved) + except CancelledError: + self.logger(f" File download task for post {post_id} was cancelled.") + total_skipped_this_post += 1 # Assume one file per cancelled future except Exception as exc_f: self.logger(f"❌ File download task for post {post_id} resulted in error: {exc_f}") - total_skipped_this_post += 1 + total_skipped_this_post += 1 # Assume one file failed + # Clear file progress after all files for this post are done or cancelled if self.signals and hasattr(self.signals, 'file_progress_signal'): - self.signals.file_progress_signal.emit("", 0, 0) + self.signals.file_progress_signal.emit("", 0, 0) if self.check_cancel(): self.logger(f" Post {post_id} processing interrupted/cancelled."); else: self.logger(f" Post {post_id} Summary: Downloaded={total_downloaded_this_post}, Skipped Files={total_skipped_this_post}") - return total_downloaded_this_post, total_skipped_this_post + return total_downloaded_this_post, total_skipped_this_post, kept_original_filenames_for_log class DownloadThread(QThread): """ - Manages the overall download process. + Manages the overall download process. Fetches posts using download_from_api and then processes each post using PostProcessorWorker. - This class is typically used when the GUI needs a separate thread for the entire download operation - (e.g., when not using the multi-threaded PostFetcher model from the main app). """ - progress_signal = pyqtSignal(str) # For general log messages - add_character_prompt_signal = pyqtSignal(str) # To ask user to add character to known list - file_download_status_signal = pyqtSignal(bool) # True when a file download starts, False when it ends - finished_signal = pyqtSignal(int, int, bool) # (total_downloaded, total_skipped, was_cancelled) - external_link_signal = pyqtSignal(str, str, str, str) # (post_title, link_text, link_url, platform) - file_progress_signal = pyqtSignal(str, int, int) # (filename, downloaded_bytes, total_bytes) + progress_signal = pyqtSignal(str) + add_character_prompt_signal = pyqtSignal(str) # For main app to show prompt + file_download_status_signal = pyqtSignal(bool) # True when a file dl starts, False when ends/fails + finished_signal = pyqtSignal(int, int, bool, list) # dl_count, skip_count, was_cancelled, kept_original_names + external_link_signal = pyqtSignal(str, str, str, str) # post_title, link_text, link_url, platform + file_progress_signal = pyqtSignal(str, int, int) # filename, downloaded_bytes, total_bytes + def __init__(self, api_url_input, output_dir, known_names_copy, - cancellation_event, # threading.Event() + cancellation_event, # This is a threading.Event from the main app filter_character_list=None, filter_mode='all', skip_zip=True, skip_rar=True, use_subfolders=True, use_post_subfolders=False, custom_folder_name=None, compress_images=False, download_thumbnails=False, service=None, user_id=None, downloaded_files=None, downloaded_file_hashes=None, downloaded_files_lock=None, downloaded_file_hashes_lock=None, skip_words_list=None, + skip_words_scope=SKIP_SCOPE_FILES, show_external_links=False, extract_links_only=False, - num_file_threads_for_worker=1, # Threads per PostProcessorWorker instance - skip_current_file_flag=None, # threading.Event() to skip one file + num_file_threads_for_worker=1, # For PostProcessorWorker's internal pool + skip_current_file_flag=None, # This is a threading.Event start_page=None, end_page=None, - target_post_id_from_initial_url=None, # If the input URL was a specific post + target_post_id_from_initial_url=None, # The specific post ID if single post URL manga_mode_active=False, - unwanted_keywords=None # Set of keywords to avoid in auto-generated folder names + unwanted_keywords=None, + manga_filename_style=STYLE_POST_TITLE ): super().__init__() - # --- Store all passed arguments as instance attributes --- self.api_url_input = api_url_input self.output_dir = output_dir - self.known_names = list(known_names_copy) # Use a copy - self.cancellation_event = cancellation_event - self.skip_current_file_flag = skip_current_file_flag - self.initial_target_post_id = target_post_id_from_initial_url + self.known_names = list(known_names_copy) # Make a copy + self.cancellation_event = cancellation_event # Use the shared event + self.skip_current_file_flag = skip_current_file_flag # Use the shared event + self.initial_target_post_id = target_post_id_from_initial_url # Store the original target self.filter_character_list = filter_character_list if filter_character_list else [] self.filter_mode = filter_mode self.skip_zip = skip_zip @@ -1147,14 +1132,14 @@ class DownloadThread(QThread): self.service = service self.user_id = user_id self.skip_words_list = skip_words_list if skip_words_list is not None else [] - # Shared sets and locks for tracking downloads across potential multiple workers (if this thread spawns them) - self.downloaded_files = downloaded_files if downloaded_files is not None else set() - self.downloaded_files_lock = downloaded_files_lock if downloaded_files_lock is not None else threading.Lock() - self.downloaded_file_hashes = downloaded_file_hashes if downloaded_file_hashes is not None else set() - self.downloaded_file_hashes_lock = downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading.Lock() + self.skip_words_scope = skip_words_scope + self.downloaded_files = downloaded_files # Should be the shared set from main app + self.downloaded_files_lock = downloaded_files_lock # Shared lock + self.downloaded_file_hashes = downloaded_file_hashes # Shared set + self.downloaded_file_hashes_lock = downloaded_file_hashes_lock # Shared lock - self._add_character_response = None # For handling synchronous prompt results - self.prompt_mutex = QMutex() # Mutex for _add_character_response + self._add_character_response = None # For sync prompt result + self.prompt_mutex = QMutex() # For sync prompt result self.show_external_links = show_external_links self.extract_links_only = extract_links_only @@ -1163,10 +1148,10 @@ class DownloadThread(QThread): self.end_page = end_page self.manga_mode_active = manga_mode_active self.unwanted_keywords = unwanted_keywords if unwanted_keywords is not None else \ - {'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'} # Default unwanted keywords + {'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'} + self.manga_filename_style = manga_filename_style - # Disable compression if Pillow is not available - if self.compress_images and Image is None: + if self.compress_images and Image is None: # Check Pillow again self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).") self.compress_images = False @@ -1176,123 +1161,133 @@ class DownloadThread(QThread): def isInterruptionRequested(self): """Checks if Qt interruption or manual cancellation event is set.""" - return super().isInterruptionRequested() or self.cancellation_event.is_set() + # QThread's interruption is different from threading.Event + # We primarily use the threading.Event (self.cancellation_event) + return self.cancellation_event.is_set() or super().isInterruptionRequested() + def skip_file(self): """Sets the flag to skip the currently processing file (if any).""" + # This method is called from the main thread via the GUI button. + # It needs to signal the PostProcessorWorker's skip_event if one is active. + # However, the DownloadThread itself doesn't directly manage the skip_event for individual files. + # The skip_current_file_flag is passed to PostProcessorWorker. if self.isRunning() and self.skip_current_file_flag: self.logger("⏭️ Skip requested for current file (single-thread mode).") - self.skip_current_file_flag.set() # Signal the PostProcessorWorker - else: self.logger("ℹ️ Skip file: No download active or skip flag not available.") + self.skip_current_file_flag.set() # Signal the event + else: self.logger("ℹ️ Skip file: No download active or skip flag not available for current context.") + def run(self): """Main execution logic for the download thread.""" grand_total_downloaded_files = 0 grand_total_skipped_files = 0 + grand_list_of_kept_original_filenames = [] was_process_cancelled = False - # Create a signals object for PostProcessorWorker instances - # This allows PostProcessorWorker to emit signals that this DownloadThread can connect to. + # Create a PostProcessorSignals instance for this thread's workers worker_signals_obj = PostProcessorSignals() try: - # Connect signals from the worker_signals_obj to this thread's signals - # This effectively forwards signals from PostProcessorWorker up to the GUI + # Connect signals from this worker_signals_obj to the DownloadThread's own signals worker_signals_obj.progress_signal.connect(self.progress_signal) worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal) worker_signals_obj.file_progress_signal.connect(self.file_progress_signal) worker_signals_obj.external_link_signal.connect(self.external_link_signal) self.logger(" Starting post fetch (single-threaded download process)...") - # Get the generator for fetching posts post_generator = download_from_api( self.api_url_input, - logger=self.logger, # Pass this thread's logger + logger=self.logger, start_page=self.start_page, end_page=self.end_page, manga_mode=self.manga_mode_active, - cancellation_event=self.cancellation_event # Pass cancellation event + cancellation_event=self.cancellation_event # Pass the shared event ) - for posts_batch_data in post_generator: # Iterate through batches of posts + for posts_batch_data in post_generator: # download_from_api yields batches if self.isInterruptionRequested(): was_process_cancelled = True; break - for individual_post_data in posts_batch_data: # Iterate through posts in a batch + for individual_post_data in posts_batch_data: # Iterate through posts in the batch if self.isInterruptionRequested(): was_process_cancelled = True; break - # Create a PostProcessorWorker for each post + # Create and run PostProcessorWorker for each post + # The PostProcessorWorker will use its own ThreadPoolExecutor for files if num_file_threads_for_worker > 1 post_processing_worker = PostProcessorWorker( post_data=individual_post_data, download_root=self.output_dir, - known_names=self.known_names, # Pass copy + known_names=self.known_names, # Pass the copy filter_character_list=self.filter_character_list, unwanted_keywords=self.unwanted_keywords, filter_mode=self.filter_mode, skip_zip=self.skip_zip, skip_rar=self.skip_rar, use_subfolders=self.use_subfolders, use_post_subfolders=self.use_post_subfolders, - target_post_id_from_initial_url=self.initial_target_post_id, + target_post_id_from_initial_url=self.initial_target_post_id, # Pass the original target custom_folder_name=self.custom_folder_name, compress_images=self.compress_images, download_thumbnails=self.download_thumbnails, service=self.service, user_id=self.user_id, - api_url_input=self.api_url_input, - cancellation_event=self.cancellation_event, - signals=worker_signals_obj, # Pass the shared signals object - downloaded_files=self.downloaded_files, # Pass shared sets and locks - downloaded_file_hashes=self.downloaded_file_hashes, - downloaded_files_lock=self.downloaded_files_lock, - downloaded_file_hashes_lock=self.downloaded_file_hashes_lock, + api_url_input=self.api_url_input, # Pass the original input URL + cancellation_event=self.cancellation_event, # Pass the shared event + signals=worker_signals_obj, # Pass the signals object for this thread + downloaded_files=self.downloaded_files, # Pass shared set + downloaded_file_hashes=self.downloaded_file_hashes, # Pass shared set + downloaded_files_lock=self.downloaded_files_lock, # Pass shared lock + downloaded_file_hashes_lock=self.downloaded_file_hashes_lock, # Pass shared lock skip_words_list=self.skip_words_list, + skip_words_scope=self.skip_words_scope, show_external_links=self.show_external_links, extract_links_only=self.extract_links_only, - num_file_threads=self.num_file_threads_for_worker, - skip_current_file_flag=self.skip_current_file_flag, - manga_mode_active=self.manga_mode_active + num_file_threads=self.num_file_threads_for_worker, # Threads for files within this post + skip_current_file_flag=self.skip_current_file_flag, # Pass the shared event + manga_mode_active=self.manga_mode_active, + manga_filename_style=self.manga_filename_style ) try: - # Process the post (this will block until the worker is done with this post) - dl_count, skip_count = post_processing_worker.process() + # The process method of PostProcessorWorker handles its internal file downloads + dl_count, skip_count, kept_originals_this_post = post_processing_worker.process() grand_total_downloaded_files += dl_count grand_total_skipped_files += skip_count + if kept_originals_this_post: # This is a list + grand_list_of_kept_original_filenames.extend(kept_originals_this_post) except Exception as proc_err: post_id_for_err = individual_post_data.get('id', 'N/A') self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}") traceback.print_exc() - # Estimate skipped files for this post if worker failed catastrophically + # Estimate skipped files for this post if worker crashes num_potential_files_est = len(individual_post_data.get('attachments', [])) + \ (1 if individual_post_data.get('file') else 0) grand_total_skipped_files += num_potential_files_est - # Clear the skip_current_file_flag if it was set and processed if self.skip_current_file_flag and self.skip_current_file_flag.is_set(): - self.skip_current_file_flag.clear() + self.skip_current_file_flag.clear() # Reset for the next file/post self.logger(" Skip current file flag was processed and cleared by DownloadThread.") - self.msleep(10) # Small delay to allow GUI to update, if needed - if was_process_cancelled: break # Break from batch loop if cancelled + self.msleep(10) # Small delay between processing posts in single-thread mode + if was_process_cancelled: break # Break from outer loop (batches) - if not was_process_cancelled: self.logger("✅ All posts processed or end of content reached.") + if not was_process_cancelled and not self.isInterruptionRequested(): # Check again after loops + self.logger("✅ All posts processed or end of content reached by DownloadThread.") except Exception as main_thread_err: self.logger(f"\n❌ Critical error within DownloadThread run loop: {main_thread_err}") traceback.print_exc() - # Ensure was_process_cancelled reflects the state if error wasn't due to user cancellation - if not self.isInterruptionRequested(): was_process_cancelled = False # Error, not user cancel + # Don't assume cancelled if an unexpected error occurs, let was_process_cancelled reflect actual interruption + if not self.isInterruptionRequested(): was_process_cancelled = False finally: - # Clean up: Disconnect signals to avoid issues if the thread is somehow reused or objects persist + # Disconnect signals try: if worker_signals_obj: # Check if it was initialized worker_signals_obj.progress_signal.disconnect(self.progress_signal) worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal) worker_signals_obj.external_link_signal.disconnect(self.external_link_signal) worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal) - except (TypeError, RuntimeError) as e: # Catch if signals were already disconnected or other issues + except (TypeError, RuntimeError) as e: #TypeError if not connected, RuntimeError if object deleted self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}") - # Emit the finished signal with totals and cancellation status - self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, was_process_cancelled) + # Emit finished signal with final counts and status + self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, self.isInterruptionRequested(), grand_list_of_kept_original_filenames) def receive_add_character_result(self, result): """Slot to receive the result from a character add prompt shown in the main thread.""" - with QMutexLocker(self.prompt_mutex): # Ensure thread-safe access + # This is called by a signal from the main thread + with QMutexLocker(self.prompt_mutex): self._add_character_response = result - self.logger(f" (DownloadThread) Received character prompt response: {'Yes (added/confirmed)' if result else 'No (declined/failed)'}") - # This response might be used by logic within the thread if it was waiting for it, - # though typically prompts are handled by the main GUI thread. + self.logger(f" (DownloadThread) Received character prompt response: {'Yes (added/confirmed)' if result else 'No (declined/failed)'}") \ No newline at end of file diff --git a/main.py b/main.py index 4627a5a..87c23b1 100644 --- a/main.py +++ b/main.py @@ -20,10 +20,11 @@ from PyQt5.QtGui import ( from PyQt5.QtWidgets import ( QApplication, QWidget, QLabel, QLineEdit, QTextEdit, QPushButton, QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget, - QRadioButton, QButtonGroup, QCheckBox, QSplitter, QSizePolicy, QDialog + QRadioButton, QButtonGroup, QCheckBox, QSplitter, QSizePolicy, QDialog, + QFrame, + QAbstractButton ) -# Ensure QTimer is imported -from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject, QTimer +from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject, QTimer, QSettings from urllib.parse import urlparse try: @@ -36,7 +37,6 @@ from io import BytesIO # --- Import from downloader_utils --- try: print("Attempting to import from downloader_utils...") - # Assuming downloader_utils_link_text is the correct version from downloader_utils import ( KNOWN_NAMES, clean_folder_name, @@ -44,37 +44,45 @@ try: download_from_api, PostProcessorSignals, PostProcessorWorker, - DownloadThread as BackendDownloadThread + DownloadThread as BackendDownloadThread, # Renamed to avoid conflict + SKIP_SCOPE_FILES, + SKIP_SCOPE_POSTS, + SKIP_SCOPE_BOTH ) print("Successfully imported names from downloader_utils.") except ImportError as e: print(f"--- IMPORT ERROR ---") print(f"Failed to import from 'downloader_utils.py': {e}") - # ... (rest of error handling as in your original file) ... + # Define fallbacks if import fails, so the app might still run with limited functionality or show an error. KNOWN_NAMES = [] - PostProcessorSignals = QObject - PostProcessorWorker = object - BackendDownloadThread = QThread - def clean_folder_name(n): return str(n) # Fallback - def extract_post_info(u): return None, None, None - def download_from_api(*a, **k): yield [] + PostProcessorSignals = QObject # Fallback to base QObject + PostProcessorWorker = object # Fallback to base object + BackendDownloadThread = QThread # Fallback to base QThread + def clean_folder_name(n): return str(n) # Simple fallback + def extract_post_info(u): return None, None, None # Fallback + def download_from_api(*a, **k): yield [] # Fallback generator + SKIP_SCOPE_FILES = "files" + SKIP_SCOPE_POSTS = "posts" + SKIP_SCOPE_BOTH = "both" + # Potentially show a critical error to the user here if downloader_utils is essential + # For now, printing to console is the primary error indication. except Exception as e: print(f"--- UNEXPECTED IMPORT ERROR ---") print(f"An unexpected error occurred during import: {e}") traceback.print_exc() print(f"-----------------------------", file=sys.stderr) - sys.exit(1) + sys.exit(1) # Exit if a critical, unexpected error occurs during import # --- End Import --- # --- Import Tour Dialog --- try: - from tour import TourDialog + from tour import TourDialog # Assuming tour.py exists in the same directory print("Successfully imported TourDialog from tour.py.") except ImportError as e: print(f"--- TOUR IMPORT ERROR ---") print(f"Failed to import TourDialog from 'tour.py': {e}") print("Tour functionality will be unavailable.") - TourDialog = None # Fallback if tour.py is missing + TourDialog = None # Fallback if tour.py is not found except Exception as e: print(f"--- UNEXPECTED TOUR IMPORT ERROR ---") print(f"An unexpected error occurred during tour import: {e}") @@ -84,527 +92,594 @@ except Exception as e: # --- Constants for Thread Limits --- -MAX_THREADS = 200 # Absolute maximum allowed by the input validator -RECOMMENDED_MAX_THREADS = 50 # Threshold for showing the informational warning +MAX_THREADS = 200 # Max post workers for creator feeds +RECOMMENDED_MAX_THREADS = 50 # Recommended max post workers +MAX_FILE_THREADS_PER_POST_OR_WORKER = 10 # Max file download threads for single post or per creator feed worker # --- END --- -# --- ADDED: Prefix for HTML messages in main log --- -HTML_PREFIX = "" # Used to identify HTML lines for insertHtml -# --- END ADDED --- +HTML_PREFIX = "" # Prefix to indicate a log message is HTML + +# --- QSettings Constants --- +CONFIG_ORGANIZATION_NAME = "KemonoDownloader" # Company/Organization Name for settings +CONFIG_APP_NAME_MAIN = "ApplicationSettings" # Application Name for settings +MANGA_FILENAME_STYLE_KEY = "mangaFilenameStyleV1" # Key for storing manga filename style +STYLE_POST_TITLE = "post_title" # Constant for post title filename style +STYLE_ORIGINAL_NAME = "original_name" # Constant for original filename style +SKIP_WORDS_SCOPE_KEY = "skipWordsScopeV1" # Key for storing skip words scope +# --- END QSettings --- + class DownloaderApp(QWidget): - character_prompt_response_signal = pyqtSignal(bool) - log_signal = pyqtSignal(str) - add_character_prompt_signal = pyqtSignal(str) - overall_progress_signal = pyqtSignal(int, int) - finished_signal = pyqtSignal(int, int, bool) - # Signal now carries link_text (ensure this matches downloader_utils) - external_link_signal = pyqtSignal(str, str, str, str) # post_title, link_text, link_url, platform - file_progress_signal = pyqtSignal(str, int, int) + # Signals for cross-thread communication and UI updates + character_prompt_response_signal = pyqtSignal(bool) # Signal for character prompt response + log_signal = pyqtSignal(str) # Signal for logging messages to the UI + add_character_prompt_signal = pyqtSignal(str) # Signal to prompt adding a character + overall_progress_signal = pyqtSignal(int, int) # Signal for overall download progress (total, processed) + finished_signal = pyqtSignal(int, int, bool, list) # Signal when download finishes (dl_count, skip_count, cancelled, kept_original_names) + external_link_signal = pyqtSignal(str, str, str, str) # Signal for found external links (post_title, link_text, url, platform) + file_progress_signal = pyqtSignal(str, int, int) # Signal for individual file download progress (filename, downloaded_bytes, total_bytes) def __init__(self): super().__init__() - self.config_file = "Known.txt" - self.download_thread = None - self.thread_pool = None - self.cancellation_event = threading.Event() - self.active_futures = [] - self.total_posts_to_process = 0 - self.processed_posts_count = 0 - self.download_counter = 0 - self.skip_counter = 0 - self.worker_signals = PostProcessorSignals() # Instance of signals for multi-thread workers + # Initialize QSettings for storing application settings persistently + self.settings = QSettings(CONFIG_ORGANIZATION_NAME, CONFIG_APP_NAME_MAIN) + self.config_file = "Known.txt" # File to store known character/show names + + # Download process related attributes + self.download_thread = None # Holds the single download thread instance + self.thread_pool = None # Holds the ThreadPoolExecutor for multi-threaded downloads + self.cancellation_event = threading.Event() # Event to signal cancellation to threads + self.active_futures = [] # List of active Future objects from the thread pool + self.total_posts_to_process = 0 # Total posts identified for the current download + self.processed_posts_count = 0 # Number of posts processed so far + self.download_counter = 0 # Total files downloaded in the current session/run + self.skip_counter = 0 # Total files skipped in the current session/run + + # Signals object for PostProcessorWorker instances + self.worker_signals = PostProcessorSignals() + # Mutex and response attribute for synchronous character add prompt self.prompt_mutex = QMutex() self._add_character_response = None - self.downloaded_files = set() - self.downloaded_files_lock = threading.Lock() - self.downloaded_file_hashes = set() - self.downloaded_file_hashes_lock = threading.Lock() - # self.external_links = [] # This list seems unused now - self.show_external_links = False - # --- For sequential delayed link display --- - self.external_link_queue = deque() - self._is_processing_external_link_queue = False - self._current_link_post_title = None # Track title for grouping - self.extracted_links_cache = [] # Store all links when in "Only Links" mode - # --- END --- + # Sets to keep track of downloaded files/hashes to avoid re-downloads in the same session + self.downloaded_files = set() # Set of downloaded filenames (final saved names) + self.downloaded_files_lock = threading.Lock() # Lock for accessing downloaded_files set + self.downloaded_file_hashes = set() # Set of MD5 hashes of downloaded files + self.downloaded_file_hashes_lock = threading.Lock() # Lock for accessing downloaded_file_hashes set - # --- For Log Verbosity --- - self.basic_log_mode = False # Start with full log (basic_log_mode is False) - self.log_verbosity_button = None - # --- END --- + # External links related attributes + self.show_external_links = False # Flag to control display of external links log + self.external_link_queue = deque() # Queue for processing external links with delays + self._is_processing_external_link_queue = False # Flag to prevent concurrent processing of the link queue + self._current_link_post_title = None # Tracks current post title for grouping links in "Only Links" mode + self.extracted_links_cache = [] # Cache of all extracted links for "Only Links" mode display and export - self.main_log_output = None - self.external_log_output = None - self.log_splitter = None # This is the VERTICAL splitter for logs - self.main_splitter = None # This will be the main HORIZONTAL splitter - self.reset_button = None - self.progress_log_label = None # To change title + # UI and Logging related attributes + self.basic_log_mode = False # Flag for toggling basic/full log verbosity + self.log_verbosity_button = None # Button to toggle log verbosity + self.manga_rename_toggle_button = None # Button to toggle manga filename style - # --- For Link Search --- - self.link_search_input = None - self.link_search_button = None - # --- END --- + self.main_log_output = None # QTextEdit for main progress log + self.external_log_output = None # QTextEdit for external links log + self.log_splitter = None # QSplitter for main and external logs + self.main_splitter = None # Main QSplitter for left (controls) and right (logs) panels + self.reset_button = None # Button to reset application state + self.progress_log_label = None # Label above the main log area - # --- For Export Links --- - self.export_links_button = None - # --- END --- + self.link_search_input = None # QLineEdit for searching in extracted links + self.link_search_button = None # QPushButton to trigger link search/filter + self.export_links_button = None # QPushButton to export extracted links - self.manga_mode_checkbox = None - self.radio_only_links = None # Define radio button attribute + self.manga_mode_checkbox = None # QCheckBox for enabling Manga/Comic mode + self.radio_only_links = None # QRadioButton for "Only Links" filter mode + self.radio_only_archives = None # QRadioButton for "Only Archives" filter mode + + self.skip_scope_toggle_button = None # Button to cycle skip words scope - self.load_known_names_from_util() - self.setWindowTitle("Kemono Downloader v2.9 (Manga Mode - No Skip Button)") - self.setGeometry(150, 150, 1050, 820) # Initial size - self.setStyleSheet(self.get_dark_theme()) - self.init_ui() - self._connect_signals() + # List to store filenames that kept their original names (for manga mode logging) + self.all_kept_original_filenames = [] + + # Load persistent settings or use defaults + self.manga_filename_style = self.settings.value(MANGA_FILENAME_STYLE_KEY, STYLE_POST_TITLE, type=str) + self.skip_words_scope = self.settings.value(SKIP_WORDS_SCOPE_KEY, SKIP_SCOPE_FILES, type=str) + + + self.load_known_names_from_util() # Load known names from config file + self.setWindowTitle("Kemono Downloader v3.1.0") # Update version number + self.setGeometry(150, 150, 1050, 820) # Set initial window size and position + self.setStyleSheet(self.get_dark_theme()) # Apply a dark theme stylesheet + self.init_ui() # Initialize the user interface elements + self._connect_signals() # Connect signals to their respective slots + + # Initial log messages self.log_signal.emit("ℹ️ Local API server functionality has been removed.") self.log_signal.emit("ℹ️ 'Skip Current File' button has been removed.") - self.character_input.setToolTip("Enter one or more character names, separated by commas (e.g., yor, makima)") + if hasattr(self, 'character_input'): # Set tooltip for character input if it exists + self.character_input.setToolTip("Enter one or more character names, separated by commas (e.g., yor, makima)") + self.log_signal.emit(f"ℹ️ Manga filename style loaded: '{self.manga_filename_style}'") + self.log_signal.emit(f"ℹ️ Skip words scope loaded: '{self.skip_words_scope}'") def _connect_signals(self): - # Signals from the worker_signals object (used by PostProcessorWorker in multi-threaded mode) + """Connects various signals from UI elements and worker threads to their handler methods.""" + # Worker signals (from PostProcessorWorker via PostProcessorSignals) if hasattr(self.worker_signals, 'progress_signal'): self.worker_signals.progress_signal.connect(self.handle_main_log) if hasattr(self.worker_signals, 'file_progress_signal'): self.worker_signals.file_progress_signal.connect(self.update_file_progress_display) - # Connect the external_link_signal from worker_signals to the queue handler if hasattr(self.worker_signals, 'external_link_signal'): self.worker_signals.external_link_signal.connect(self.handle_external_link_signal) - # App's own signals (some of which might be emitted by DownloadThread which then connects to these handlers) + # Internal app signals self.log_signal.connect(self.handle_main_log) self.add_character_prompt_signal.connect(self.prompt_add_character) self.character_prompt_response_signal.connect(self.receive_add_character_result) self.overall_progress_signal.connect(self.update_progress_display) self.finished_signal.connect(self.download_finished) - # Connect the app's external_link_signal also to the queue handler - self.external_link_signal.connect(self.handle_external_link_signal) - self.file_progress_signal.connect(self.update_file_progress_display) + self.external_link_signal.connect(self.handle_external_link_signal) # Also connect direct app signal + self.file_progress_signal.connect(self.update_file_progress_display) # Also connect direct app signal + # UI element signals + if hasattr(self, 'character_search_input'): self.character_search_input.textChanged.connect(self.filter_character_list) + if hasattr(self, 'external_links_checkbox'): self.external_links_checkbox.toggled.connect(self.update_external_links_setting) + if hasattr(self, 'thread_count_input'): self.thread_count_input.textChanged.connect(self.update_multithreading_label) + if hasattr(self, 'use_subfolder_per_post_checkbox'): self.use_subfolder_per_post_checkbox.toggled.connect(self.update_ui_for_subfolders) + if hasattr(self, 'use_multithreading_checkbox'): self.use_multithreading_checkbox.toggled.connect(self._handle_multithreading_toggle) - self.character_search_input.textChanged.connect(self.filter_character_list) - self.external_links_checkbox.toggled.connect(self.update_external_links_setting) - self.thread_count_input.textChanged.connect(self.update_multithreading_label) - self.use_subfolder_per_post_checkbox.toggled.connect(self.update_ui_for_subfolders) + # Radio button group for file filters + if hasattr(self, 'radio_group') and self.radio_group: + # Connect only once to the buttonToggled signal of the QButtonGroup + self.radio_group.buttonToggled.connect(self._handle_filter_mode_change) - # --- MODIFIED: Connect multithreading checkbox toggle --- - self.use_multithreading_checkbox.toggled.connect(self._handle_multithreading_toggle) - # --- END MODIFIED --- + # Button clicks + if self.reset_button: self.reset_button.clicked.connect(self.reset_application_state) + if self.log_verbosity_button: self.log_verbosity_button.clicked.connect(self.toggle_log_verbosity) - # --- MODIFIED: Connect radio group toggle --- - if self.radio_group: - self.radio_group.buttonToggled.connect(self._handle_filter_mode_change) # Use buttonToggled for group signal - # --- END MODIFIED --- - - if self.reset_button: - self.reset_button.clicked.connect(self.reset_application_state) - - # Connect log verbosity button if it exists - if self.log_verbosity_button: - self.log_verbosity_button.clicked.connect(self.toggle_log_verbosity) - - # --- ADDED: Connect link search elements --- - if self.link_search_button: - self.link_search_button.clicked.connect(self._filter_links_log) + # Link search UI signals (for "Only Links" mode) + if self.link_search_button: self.link_search_button.clicked.connect(self._filter_links_log) if self.link_search_input: - self.link_search_input.returnPressed.connect(self._filter_links_log) - self.link_search_input.textChanged.connect(self._filter_links_log) # Real-time filtering - # --- END ADDED --- + self.link_search_input.returnPressed.connect(self._filter_links_log) # Filter on Enter + self.link_search_input.textChanged.connect(self._filter_links_log) # Live filtering as text changes + if self.export_links_button: self.export_links_button.clicked.connect(self._export_links_to_file) - # --- ADDED: Connect export links button --- - if self.export_links_button: - self.export_links_button.clicked.connect(self._export_links_to_file) - # --- END ADDED --- + # Manga mode UI signals + if self.manga_mode_checkbox: self.manga_mode_checkbox.toggled.connect(self.update_ui_for_manga_mode) + if self.manga_rename_toggle_button: self.manga_rename_toggle_button.clicked.connect(self._toggle_manga_filename_style) - if self.manga_mode_checkbox: - self.manga_mode_checkbox.toggled.connect(self.update_ui_for_manga_mode) - self.link_input.textChanged.connect(lambda: self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)) + # URL input text change (affects manga mode UI and page range) + if hasattr(self, 'link_input'): + self.link_input.textChanged.connect(lambda: self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)) + + # Skip words scope toggle button + if self.skip_scope_toggle_button: + self.skip_scope_toggle_button.clicked.connect(self._cycle_skip_scope) - # --- load_known_names_from_util, save_known_names, closeEvent --- - # These methods remain unchanged from your original file def load_known_names_from_util(self): - global KNOWN_NAMES + """Loads known character/show names from the config file into the global KNOWN_NAMES list.""" + global KNOWN_NAMES # Access the global list (potentially shared with downloader_utils) if os.path.exists(self.config_file): try: with open(self.config_file, 'r', encoding='utf-8') as f: raw_names = [line.strip() for line in f] - # Filter out empty strings before setting KNOWN_NAMES - KNOWN_NAMES[:] = sorted(list(set(filter(None, raw_names)))) + # Update KNOWN_NAMES in-place to ensure shared references (like in downloader_utils) are updated + KNOWN_NAMES[:] = sorted(list(set(filter(None, raw_names)))) # Unique, sorted, non-empty names log_msg = f"ℹ️ Loaded {len(KNOWN_NAMES)} known names from {self.config_file}" except Exception as e: log_msg = f"❌ Error loading config '{self.config_file}': {e}" QMessageBox.warning(self, "Config Load Error", f"Could not load list from {self.config_file}:\n{e}") - KNOWN_NAMES[:] = [] + KNOWN_NAMES[:] = [] # Reset to empty if loading fails else: log_msg = f"ℹ️ Config file '{self.config_file}' not found. Starting empty." - KNOWN_NAMES[:] = [] + KNOWN_NAMES[:] = [] # Ensure it's empty if file doesn't exist - self.log_signal.emit(log_msg) - - if hasattr(self, 'character_list'): # Ensure character_list widget exists + if hasattr(self, 'log_signal'): self.log_signal.emit(log_msg) # Log loading status + + # Update the QListWidget in the UI with the loaded names + if hasattr(self, 'character_list'): self.character_list.clear() self.character_list.addItems(KNOWN_NAMES) - def save_known_names(self): - global KNOWN_NAMES + """Saves the current list of known names to the config file.""" + global KNOWN_NAMES # Access the global (potentially shared) list try: - # Ensure KNOWN_NAMES contains unique, non-empty, sorted strings + # Ensure KNOWN_NAMES itself is updated to the unique sorted list before saving unique_sorted_names = sorted(list(set(filter(None, KNOWN_NAMES)))) - KNOWN_NAMES[:] = unique_sorted_names # Update global list in place + KNOWN_NAMES[:] = unique_sorted_names # Modify in-place with open(self.config_file, 'w', encoding='utf-8') as f: for name in unique_sorted_names: f.write(name + '\n') - self.log_signal.emit(f"💾 Saved {len(unique_sorted_names)} known names to {self.config_file}") + if hasattr(self, 'log_signal'): self.log_signal.emit(f"💾 Saved {len(unique_sorted_names)} known names to {self.config_file}") except Exception as e: log_msg = f"❌ Error saving config '{self.config_file}': {e}" - self.log_signal.emit(log_msg) + if hasattr(self, 'log_signal'): self.log_signal.emit(log_msg) QMessageBox.warning(self, "Config Save Error", f"Could not save list to {self.config_file}:\n{e}") def closeEvent(self, event): + """Handles the application close event. Saves settings and manages active downloads.""" + # Save known names and other persistent settings self.save_known_names() - should_exit = True - is_downloading = (self.download_thread and self.download_thread.isRunning()) or \ - (self.thread_pool is not None and any(not f.done() for f in self.active_futures if f is not None)) + self.settings.setValue(MANGA_FILENAME_STYLE_KEY, self.manga_filename_style) + self.settings.setValue(SKIP_WORDS_SCOPE_KEY, self.skip_words_scope) + self.settings.sync() # Ensure settings are written to disk + should_exit = True + is_downloading = self._is_download_active() # Check if any download is currently active if is_downloading: + # Confirm with the user if they want to exit while a download is in progress reply = QMessageBox.question(self, "Confirm Exit", "Download in progress. Are you sure you want to exit and cancel?", - QMessageBox.Yes | QMessageBox.No, QMessageBox.No) + QMessageBox.Yes | QMessageBox.No, QMessageBox.No) # Default to No if reply == QMessageBox.Yes: self.log_signal.emit("⚠️ Cancelling active download due to application exit...") - self.cancel_download() # Signal cancellation - # --- MODIFICATION START: Wait for threads to finish --- + self.cancel_download() # Signal cancellation to active threads/pool self.log_signal.emit(" Waiting briefly for threads to acknowledge cancellation...") - # Wait for the single thread if it exists + + # Wait for threads to finish, with a timeout if self.download_thread and self.download_thread.isRunning(): - self.download_thread.wait(3000) # Wait up to 3 seconds + self.download_thread.wait(3000) # Wait up to 3 seconds for single thread if self.download_thread.isRunning(): self.log_signal.emit(" ⚠️ Single download thread did not terminate gracefully.") - # Wait for the thread pool if it exists if self.thread_pool: - # Shutdown was already initiated by cancel_download, just wait here - # Use wait=True here for cleaner exit + # Shutdown with cancel_futures=True. The wait=True here might block, + # but cancel_download should have already signaled futures. self.thread_pool.shutdown(wait=True, cancel_futures=True) self.log_signal.emit(" Thread pool shutdown complete.") - self.thread_pool = None # Clear reference - # --- MODIFICATION END --- + self.thread_pool = None # Clear the reference else: - should_exit = False + should_exit = False # User chose not to exit self.log_signal.emit("ℹ️ Application exit cancelled.") - event.ignore() - return + event.ignore() # Ignore the close event + return # Don't proceed to exit if should_exit: self.log_signal.emit("ℹ️ Application closing.") - # Ensure thread pool is None if already shut down above + # Ensure any remaining pool is shut down if not already handled if self.thread_pool: self.log_signal.emit(" Final thread pool check: Shutting down...") - self.cancellation_event.set() - self.thread_pool.shutdown(wait=True, cancel_futures=True) + self.cancellation_event.set() # Ensure cancellation event is set + self.thread_pool.shutdown(wait=True, cancel_futures=True) # Wait for shutdown self.thread_pool = None self.log_signal.emit("👋 Exiting application.") - event.accept() + event.accept() # Accept the close event + def init_ui(self): - # --- MODIFIED: Use QSplitter for main layout --- + """Initializes all UI elements and layouts.""" + # Main layout splitter (divides window into left controls panel and right logs panel) self.main_splitter = QSplitter(Qt.Horizontal) + left_panel_widget = QWidget() # Container widget for the left panel + right_panel_widget = QWidget() # Container widget for the right panel + left_layout = QVBoxLayout(left_panel_widget) # Main vertical layout for the left panel + right_layout = QVBoxLayout(right_panel_widget) # Main vertical layout for the right panel + left_layout.setContentsMargins(10, 10, 10, 10) # Add some padding around left panel contents + right_layout.setContentsMargins(10, 10, 10, 10) # Add padding around right panel contents - # Create container widgets for left and right panels - left_panel_widget = QWidget() - right_panel_widget = QWidget() + # --- Left Panel (Controls) --- - # Setup layouts for the panels - left_layout = QVBoxLayout(left_panel_widget) # Apply layout to widget - right_layout = QVBoxLayout(right_panel_widget) # Apply layout to widget - left_layout.setContentsMargins(10, 10, 10, 10) # Add some margins - right_layout.setContentsMargins(10, 10, 10, 10) - - # --- Populate Left Panel (Controls) --- - # (All the QLineEdit, QCheckBox, QPushButton, etc. setup code goes here, adding to left_layout) - # URL and Page Range Input - url_page_layout = QHBoxLayout() - url_page_layout.setContentsMargins(0,0,0,0) + # URL and Page Range Input Section + url_page_layout = QHBoxLayout() # Horizontal layout for URL and page range inputs + url_page_layout.setContentsMargins(0,0,0,0) # No internal margins for this specific QHBoxLayout url_page_layout.addWidget(QLabel("🔗 Kemono Creator/Post URL:")) self.link_input = QLineEdit() self.link_input.setPlaceholderText("e.g., https://kemono.su/patreon/user/12345 or .../post/98765") - self.link_input.textChanged.connect(self.update_custom_folder_visibility) - # self.link_input.setFixedWidth(int(self.width() * 0.45)) # Remove fixed width for splitter - url_page_layout.addWidget(self.link_input, 1) # Give it stretch factor + self.link_input.textChanged.connect(self.update_custom_folder_visibility) # Connect to update custom folder UI + url_page_layout.addWidget(self.link_input, 1) # Allow URL input to stretch + # Page range inputs (Start and End) self.page_range_label = QLabel("Page Range:") - self.page_range_label.setStyleSheet("font-weight: bold; padding-left: 10px;") + self.page_range_label.setStyleSheet("font-weight: bold; padding-left: 10px;") # Style for emphasis self.start_page_input = QLineEdit() self.start_page_input.setPlaceholderText("Start") - self.start_page_input.setFixedWidth(50) - self.start_page_input.setValidator(QIntValidator(1, 99999)) # Min 1 - self.to_label = QLabel("to") + self.start_page_input.setFixedWidth(50) # Fixed width for small input + self.start_page_input.setValidator(QIntValidator(1, 99999)) # Allow only positive integers + self.to_label = QLabel("to") # Simple "to" label between inputs self.end_page_input = QLineEdit() self.end_page_input.setPlaceholderText("End") self.end_page_input.setFixedWidth(50) - self.end_page_input.setValidator(QIntValidator(1, 99999)) # Min 1 + self.end_page_input.setValidator(QIntValidator(1, 99999)) + # Add page range widgets to the horizontal layout url_page_layout.addWidget(self.page_range_label) url_page_layout.addWidget(self.start_page_input) url_page_layout.addWidget(self.to_label) url_page_layout.addWidget(self.end_page_input) - # url_page_layout.addStretch(1) # No need for stretch with splitter - left_layout.addLayout(url_page_layout) + left_layout.addLayout(url_page_layout) # Add URL/Page layout to the main left layout - # Download Directory Input + # Download Directory Input Section left_layout.addWidget(QLabel("📁 Download Location:")) self.dir_input = QLineEdit() self.dir_input.setPlaceholderText("Select folder where downloads will be saved") - self.dir_button = QPushButton("Browse...") + self.dir_button = QPushButton("Browse...") # Button to open file dialog self.dir_button.clicked.connect(self.browse_directory) - dir_layout = QHBoxLayout() - dir_layout.addWidget(self.dir_input, 1) # Input takes more space + dir_layout = QHBoxLayout() # Horizontal layout for directory input and browse button + dir_layout.addWidget(self.dir_input, 1) # Allow directory input to stretch dir_layout.addWidget(self.dir_button) left_layout.addLayout(dir_layout) - # Custom Folder Name (for single post) - self.custom_folder_widget = QWidget() # Use a widget to hide/show group - custom_folder_layout = QVBoxLayout(self.custom_folder_widget) - custom_folder_layout.setContentsMargins(0, 5, 0, 0) # No top margin if hidden - self.custom_folder_label = QLabel("🗄️ Custom Folder Name (Single Post Only):") - self.custom_folder_input = QLineEdit() - self.custom_folder_input.setPlaceholderText("Optional: Save this post to specific folder") - custom_folder_layout.addWidget(self.custom_folder_label) - custom_folder_layout.addWidget(self.custom_folder_input) - self.custom_folder_widget.setVisible(False) # Initially hidden - left_layout.addWidget(self.custom_folder_widget) - # Character Filter Input - self.character_filter_widget = QWidget() - character_filter_layout = QVBoxLayout(self.character_filter_widget) - character_filter_layout.setContentsMargins(0,5,0,0) + # Container for Character Filter and Custom Folder (to manage visibility together) + self.filters_and_custom_folder_container_widget = QWidget() + filters_and_custom_folder_layout = QHBoxLayout(self.filters_and_custom_folder_container_widget) + filters_and_custom_folder_layout.setContentsMargins(0, 5, 0, 0) # Top margin, no others + filters_and_custom_folder_layout.setSpacing(10) # Spacing between filter and custom folder + + # Character Filter (will be added to the container) + self.character_filter_widget = QWidget() # Dedicated widget for character filter + character_filter_v_layout = QVBoxLayout(self.character_filter_widget) + character_filter_v_layout.setContentsMargins(0,0,0,0) # No internal margins for this VBox + character_filter_v_layout.setSpacing(2) # Minimal spacing between label and input self.character_label = QLabel("🎯 Filter by Character(s) (comma-separated):") self.character_input = QLineEdit() self.character_input.setPlaceholderText("e.g., yor, Tifa, Reyna") - character_filter_layout.addWidget(self.character_label) - character_filter_layout.addWidget(self.character_input) - self.character_filter_widget.setVisible(True) # Visible by default - left_layout.addWidget(self.character_filter_widget) + character_filter_v_layout.addWidget(self.character_label) + character_filter_v_layout.addWidget(self.character_input) - # Skip Words Input - left_layout.addWidget(QLabel("🚫 Skip Posts/Files with Words (comma-separated):")) + # Custom Folder Name (will be added to the container) + self.custom_folder_widget = QWidget() # Dedicated widget for custom folder input + custom_folder_v_layout = QVBoxLayout(self.custom_folder_widget) + custom_folder_v_layout.setContentsMargins(0,0,0,0) # No internal margins + custom_folder_v_layout.setSpacing(2) + self.custom_folder_label = QLabel("🗄️ Custom Folder Name (Single Post Only):") + self.custom_folder_input = QLineEdit() + self.custom_folder_input.setPlaceholderText("Optional: Save this post to specific folder") + custom_folder_v_layout.addWidget(self.custom_folder_label) + custom_folder_v_layout.addWidget(self.custom_folder_input) + self.custom_folder_widget.setVisible(False) # Initially hidden, shown based on URL and settings + + # Add character filter and custom folder widgets to their container layout + filters_and_custom_folder_layout.addWidget(self.character_filter_widget, 1) # Allow stretch + filters_and_custom_folder_layout.addWidget(self.custom_folder_widget, 1) # Allow stretch + + # Add the container widget to the main left layout + left_layout.addWidget(self.filters_and_custom_folder_container_widget) + + + # Skip Words Input Section + left_layout.addWidget(QLabel("🚫 Skip with Words (comma-separated):")) + skip_input_and_button_layout = QHBoxLayout() # Horizontal layout for skip words input and scope button + skip_input_and_button_layout.setContentsMargins(0, 0, 0, 0) + skip_input_and_button_layout.setSpacing(10) self.skip_words_input = QLineEdit() self.skip_words_input.setPlaceholderText("e.g., WM, WIP, sketch, preview") - left_layout.addWidget(self.skip_words_input) + skip_input_and_button_layout.addWidget(self.skip_words_input, 3) # Give more space to input + self.skip_scope_toggle_button = QPushButton() # Text set by _update_skip_scope_button_text + self._update_skip_scope_button_text() # Set initial text based on loaded/default scope + self.skip_scope_toggle_button.setToolTip("Click to cycle skip scope (Files -> Posts -> Both)") + self.skip_scope_toggle_button.setStyleSheet("padding: 6px 10px;") # Ensure consistent padding + self.skip_scope_toggle_button.setMinimumWidth(100) # Ensure button is wide enough for text + skip_input_and_button_layout.addWidget(self.skip_scope_toggle_button, 1) # Add scope button + left_layout.addLayout(skip_input_and_button_layout) - # --- MODIFIED: File Type Filter Radio Buttons --- - file_filter_layout = QVBoxLayout() # Group label and radio buttons - file_filter_layout.setContentsMargins(0,0,0,0) # Compact - file_filter_layout.addWidget(QLabel("Filter Files:")) - radio_button_layout = QHBoxLayout() - radio_button_layout.setSpacing(10) - self.radio_group = QButtonGroup(self) # Ensures one selection + + # File Filter Radio Buttons Section + file_filter_layout = QVBoxLayout() # Vertical layout for the file filter section + file_filter_layout.setContentsMargins(0,10,0,0) # Add some top margin for separation + file_filter_layout.addWidget(QLabel("Filter Files:")) # Section label + radio_button_layout = QHBoxLayout() # Horizontal layout for the radio buttons themselves + radio_button_layout.setSpacing(10) # Adjusted spacing between radio buttons + self.radio_group = QButtonGroup(self) # Group to ensure only one radio button is selected + # Define radio buttons self.radio_all = QRadioButton("All") self.radio_images = QRadioButton("Images/GIFs") self.radio_videos = QRadioButton("Videos") - self.radio_only_links = QRadioButton("🔗 Only Links") # New button - self.radio_all.setChecked(True) + self.radio_only_archives = QRadioButton("📦 Only Archives") # New radio button for archives + self.radio_only_links = QRadioButton("🔗 Only Links") + self.radio_all.setChecked(True) # Default selection + # Add buttons to the group self.radio_group.addButton(self.radio_all) self.radio_group.addButton(self.radio_images) self.radio_group.addButton(self.radio_videos) - self.radio_group.addButton(self.radio_only_links) # Add to group + self.radio_group.addButton(self.radio_only_archives) # Add new button to group + self.radio_group.addButton(self.radio_only_links) + # Add buttons to the horizontal layout radio_button_layout.addWidget(self.radio_all) radio_button_layout.addWidget(self.radio_images) radio_button_layout.addWidget(self.radio_videos) - radio_button_layout.addWidget(self.radio_only_links) # Add to layout - radio_button_layout.addStretch(1) # Pushes buttons to left - file_filter_layout.addLayout(radio_button_layout) - left_layout.addLayout(file_filter_layout) - # --- END MODIFIED --- + radio_button_layout.addWidget(self.radio_only_archives) # Add new button to layout + radio_button_layout.addWidget(self.radio_only_links) + radio_button_layout.addStretch(1) # Push buttons to the left, filling remaining space + file_filter_layout.addLayout(radio_button_layout) # Add radio button layout to section layout + left_layout.addLayout(file_filter_layout) # Add section layout to main left layout - # Checkboxes Group - checkboxes_group_layout = QVBoxLayout() + # Checkboxes Group Section (for various download options) + checkboxes_group_layout = QVBoxLayout() # Vertical layout for checkbox groups checkboxes_group_layout.setSpacing(10) # Spacing between rows of checkboxes - - row1_layout = QHBoxLayout() # First row of checkboxes + + # Row 1 of Checkboxes (Skip ZIP/RAR, Thumbnails, Compress) + row1_layout = QHBoxLayout() # Horizontal layout for the first row of checkboxes row1_layout.setSpacing(10) self.skip_zip_checkbox = QCheckBox("Skip .zip") - self.skip_zip_checkbox.setChecked(True) + self.skip_zip_checkbox.setChecked(True) # Default to skipping ZIPs row1_layout.addWidget(self.skip_zip_checkbox) self.skip_rar_checkbox = QCheckBox("Skip .rar") - self.skip_rar_checkbox.setChecked(True) + self.skip_rar_checkbox.setChecked(True) # Default to skipping RARs row1_layout.addWidget(self.skip_rar_checkbox) self.download_thumbnails_checkbox = QCheckBox("Download Thumbnails Only") - self.download_thumbnails_checkbox.setChecked(False) + self.download_thumbnails_checkbox.setChecked(False) # Default to not downloading only thumbnails self.download_thumbnails_checkbox.setToolTip("Thumbnail download functionality is currently limited without the API.") row1_layout.addWidget(self.download_thumbnails_checkbox) self.compress_images_checkbox = QCheckBox("Compress Large Images (to WebP)") - self.compress_images_checkbox.setChecked(False) + self.compress_images_checkbox.setChecked(False) # Default to not compressing images self.compress_images_checkbox.setToolTip("Compress images > 1.5MB to WebP format (requires Pillow).") row1_layout.addWidget(self.compress_images_checkbox) - row1_layout.addStretch(1) # Pushes checkboxes to left - checkboxes_group_layout.addLayout(row1_layout) + row1_layout.addStretch(1) # Push checkboxes to the left + checkboxes_group_layout.addLayout(row1_layout) # Add row to the group layout - # Advanced Settings Section - advanced_settings_label = QLabel("⚙️ Advanced Settings:") + # Advanced Settings Label and Checkboxes + advanced_settings_label = QLabel("⚙️ Advanced Settings:") # Label for advanced settings section checkboxes_group_layout.addWidget(advanced_settings_label) - - advanced_row1_layout = QHBoxLayout() # Subfolders options + + # Advanced Row 1 (Subfolders) + advanced_row1_layout = QHBoxLayout() # Horizontal layout for first row of advanced checkboxes advanced_row1_layout.setSpacing(10) self.use_subfolders_checkbox = QCheckBox("Separate Folders by Name/Title") - self.use_subfolders_checkbox.setChecked(True) - self.use_subfolders_checkbox.toggled.connect(self.update_ui_for_subfolders) + self.use_subfolders_checkbox.setChecked(True) # Default to using subfolders + self.use_subfolders_checkbox.toggled.connect(self.update_ui_for_subfolders) # Connect to update UI advanced_row1_layout.addWidget(self.use_subfolders_checkbox) self.use_subfolder_per_post_checkbox = QCheckBox("Subfolder per Post") - self.use_subfolder_per_post_checkbox.setChecked(False) + self.use_subfolder_per_post_checkbox.setChecked(False) # Default to not using subfolder per post self.use_subfolder_per_post_checkbox.setToolTip("Creates a subfolder for each post inside the character/title folder.") - self.use_subfolder_per_post_checkbox.toggled.connect(self.update_ui_for_subfolders) # Also update UI + self.use_subfolder_per_post_checkbox.toggled.connect(self.update_ui_for_subfolders) # Connect to update UI advanced_row1_layout.addWidget(self.use_subfolder_per_post_checkbox) - advanced_row1_layout.addStretch(1) + advanced_row1_layout.addStretch(1) # Push to left checkboxes_group_layout.addLayout(advanced_row1_layout) - advanced_row2_layout = QHBoxLayout() # Multithreading, External Links, Manga Mode + # Advanced Row 2 (Multithreading, External Links, Manga Mode) + advanced_row2_layout = QHBoxLayout() # Horizontal layout for second row of advanced checkboxes advanced_row2_layout.setSpacing(10) - multithreading_layout = QHBoxLayout() # Group multithreading checkbox and input - multithreading_layout.setContentsMargins(0,0,0,0) + + # Multithreading specific layout (checkbox, label, input) + multithreading_layout = QHBoxLayout() + multithreading_layout.setContentsMargins(0,0,0,0) # No internal margins for this group self.use_multithreading_checkbox = QCheckBox("Use Multithreading") - self.use_multithreading_checkbox.setChecked(True) - self.use_multithreading_checkbox.setToolTip("Speeds up downloads for full creator pages.\nSingle post URLs always use one thread.") + self.use_multithreading_checkbox.setChecked(True) # Default to using multithreading + self.use_multithreading_checkbox.setToolTip( # Updated tooltip explaining thread count usage + "Enables concurrent operations. See 'Threads' input for details." + ) multithreading_layout.addWidget(self.use_multithreading_checkbox) - self.thread_count_label = QLabel("Threads:") + self.thread_count_label = QLabel("Threads:") # Label for thread count input multithreading_layout.addWidget(self.thread_count_label) - self.thread_count_input = QLineEdit() - self.thread_count_input.setFixedWidth(40) - self.thread_count_input.setText("4") - # --- MODIFIED: Updated tooltip to remove recommendation --- - self.thread_count_input.setToolTip(f"Number of threads (max: {MAX_THREADS}).") - # --- END MODIFIED --- - self.thread_count_input.setValidator(QIntValidator(1, MAX_THREADS)) # Use constant + self.thread_count_input = QLineEdit() # Input for number of threads + self.thread_count_input.setFixedWidth(40) # Small fixed width + self.thread_count_input.setText("4") # Default thread count + self.thread_count_input.setToolTip( # Updated tooltip explaining thread usage contexts + f"Number of concurrent operations.\n" + f"- Single Post: Concurrent file downloads (1-{MAX_FILE_THREADS_PER_POST_OR_WORKER} recommended).\n" + f"- Creator Feed: Concurrent post processing (1-{MAX_THREADS}).\n" + f" File downloads per post worker also use this value (1-{MAX_FILE_THREADS_PER_POST_OR_WORKER} recommended)." + ) + self.thread_count_input.setValidator(QIntValidator(1, MAX_THREADS)) # Validate input (1 to MAX_THREADS) multithreading_layout.addWidget(self.thread_count_input) - advanced_row2_layout.addLayout(multithreading_layout) + advanced_row2_layout.addLayout(multithreading_layout) # Add multithreading group to advanced row 2 + # External Links Checkbox self.external_links_checkbox = QCheckBox("Show External Links in Log") - self.external_links_checkbox.setChecked(False) + self.external_links_checkbox.setChecked(False) # Default to not showing external links log separately advanced_row2_layout.addWidget(self.external_links_checkbox) + # Manga Mode Checkbox self.manga_mode_checkbox = QCheckBox("Manga/Comic Mode") - self.manga_mode_checkbox.setToolTip("Process newest posts first, rename files based on post title (for creator feeds only).") - self.manga_mode_checkbox.setChecked(False) + self.manga_mode_checkbox.setToolTip("Downloads posts from oldest to newest and renames files based on post title (for creator feeds only).") + self.manga_mode_checkbox.setChecked(False) # Default to manga mode off advanced_row2_layout.addWidget(self.manga_mode_checkbox) - advanced_row2_layout.addStretch(1) - checkboxes_group_layout.addLayout(advanced_row2_layout) + advanced_row2_layout.addStretch(1) # Push to left + checkboxes_group_layout.addLayout(advanced_row2_layout) # Add advanced row 2 to group layout + left_layout.addLayout(checkboxes_group_layout) # Add checkbox group layout to main left layout - left_layout.addLayout(checkboxes_group_layout) - # Download and Cancel Buttons - btn_layout = QHBoxLayout() + # Download and Cancel Buttons Section + btn_layout = QHBoxLayout() # Horizontal layout for main action buttons btn_layout.setSpacing(10) self.download_btn = QPushButton("⬇️ Start Download") - self.download_btn.setStyleSheet("padding: 8px 15px; font-weight: bold;") # Make it prominent - self.download_btn.clicked.connect(self.start_download) + self.download_btn.setStyleSheet("padding: 8px 15px; font-weight: bold;") # Make download button prominent + self.download_btn.clicked.connect(self.start_download) # Connect to start download logic self.cancel_btn = QPushButton("❌ Cancel") - self.cancel_btn.setEnabled(False) # Initially disabled - self.cancel_btn.clicked.connect(self.cancel_download) + self.cancel_btn.setEnabled(False) # Initially disabled, enabled when download is active + self.cancel_btn.clicked.connect(self.cancel_download) # Connect to cancel download logic btn_layout.addWidget(self.download_btn) btn_layout.addWidget(self.cancel_btn) - left_layout.addLayout(btn_layout) - left_layout.addSpacing(10) # Some space before known characters list + left_layout.addLayout(btn_layout) # Add button layout to main left layout + left_layout.addSpacing(10) # Add some space after buttons - # Known Characters/Shows List Management - known_chars_label_layout = QHBoxLayout() + # Known Characters/Shows List Section + known_chars_label_layout = QHBoxLayout() # Layout for label and search input for known characters known_chars_label_layout.setSpacing(10) self.known_chars_label = QLabel("🎭 Known Shows/Characters (for Folder Names):") - self.character_search_input = QLineEdit() + self.character_search_input = QLineEdit() # Input to filter the character list self.character_search_input.setPlaceholderText("Search characters...") - known_chars_label_layout.addWidget(self.known_chars_label, 1) # Label takes more space + known_chars_label_layout.addWidget(self.known_chars_label, 1) # Allow label to take space known_chars_label_layout.addWidget(self.character_search_input) left_layout.addLayout(known_chars_label_layout) - self.character_list = QListWidget() - self.character_list.setSelectionMode(QListWidget.ExtendedSelection) # Allow multi-select for delete - left_layout.addWidget(self.character_list, 1) # Takes remaining vertical space + self.character_list = QListWidget() # List to display known characters + self.character_list.setSelectionMode(QListWidget.ExtendedSelection) # Allow multiple selections for deletion + left_layout.addWidget(self.character_list, 1) # Allow list to stretch vertically - char_manage_layout = QHBoxLayout() # Add/Delete character buttons + # Character Management Buttons Section (Add/Delete) + char_manage_layout = QHBoxLayout() # Layout for adding/deleting characters from the list char_manage_layout.setSpacing(10) - self.new_char_input = QLineEdit() + self.new_char_input = QLineEdit() # Input for new character name self.new_char_input.setPlaceholderText("Add new show/character name") - self.add_char_button = QPushButton("➕ Add") - self.delete_char_button = QPushButton("🗑️ Delete Selected") - self.add_char_button.clicked.connect(self.add_new_character) - self.new_char_input.returnPressed.connect(self.add_char_button.click) # Add on Enter - self.delete_char_button.clicked.connect(self.delete_selected_character) - char_manage_layout.addWidget(self.new_char_input, 2) # Input field wider + self.add_char_button = QPushButton("➕ Add") # Button to add new character + self.delete_char_button = QPushButton("🗑️ Delete Selected") # Button to delete selected characters + self.add_char_button.clicked.connect(self.add_new_character) # Connect add button + self.new_char_input.returnPressed.connect(self.add_char_button.click) # Allow adding on Enter key press + self.delete_char_button.clicked.connect(self.delete_selected_character) # Connect delete button + char_manage_layout.addWidget(self.new_char_input, 2) # Give more space to input field char_manage_layout.addWidget(self.add_char_button, 1) char_manage_layout.addWidget(self.delete_char_button, 1) - left_layout.addLayout(char_manage_layout) - left_layout.addStretch(0) # Prevent vertical stretching of controls + left_layout.addLayout(char_manage_layout) # Add management buttons layout to main left layout + left_layout.addStretch(0) # Prevent excessive stretching at the bottom of left panel - # --- Populate Right Panel (Logs) --- - log_title_layout = QHBoxLayout() - self.progress_log_label = QLabel("📜 Progress Log:") # Store label reference + # --- Right Panel (Logs) --- + log_title_layout = QHBoxLayout() # Layout for log title and utility buttons (verbosity, reset) + self.progress_log_label = QLabel("📜 Progress Log:") # Main label for the log area log_title_layout.addWidget(self.progress_log_label) - log_title_layout.addStretch(1) + log_title_layout.addStretch(1) # Push utility buttons to the right - # --- ADDED: Link Search Bar --- + # Link Search Input and Button (initially hidden, for "Only Links" mode) self.link_search_input = QLineEdit() self.link_search_input.setPlaceholderText("Search Links...") - self.link_search_input.setVisible(False) # Initially hidden - self.link_search_input.setFixedWidth(150) # Adjust width + self.link_search_input.setVisible(False) # Hidden by default + self.link_search_input.setFixedWidth(150) log_title_layout.addWidget(self.link_search_input) - - self.link_search_button = QPushButton("🔍") + self.link_search_button = QPushButton("🔍") # Search icon button self.link_search_button.setToolTip("Filter displayed links") - self.link_search_button.setVisible(False) # Initially hidden + self.link_search_button.setVisible(False) # Hidden by default self.link_search_button.setFixedWidth(30) - self.link_search_button.setStyleSheet("padding: 4px 4px;") + self.link_search_button.setStyleSheet("padding: 4px 4px;") # Compact padding log_title_layout.addWidget(self.link_search_button) - # --- END ADDED --- - # --- ADDED: Log Verbosity Button --- - self.log_verbosity_button = QPushButton("Show Basic Log") # Default text + # Manga Rename Toggle Button (initially hidden, for Manga Mode) + self.manga_rename_toggle_button = QPushButton() # Text set by _update_manga_filename_style_button_text + self.manga_rename_toggle_button.setVisible(False) # Hidden by default + self.manga_rename_toggle_button.setFixedWidth(140) # Adjusted width for text + self.manga_rename_toggle_button.setStyleSheet("padding: 4px 8px;") + self._update_manga_filename_style_button_text() # Set initial text based on loaded style + log_title_layout.addWidget(self.manga_rename_toggle_button) + + # Log Verbosity Toggle Button + self.log_verbosity_button = QPushButton("Show Basic Log") # Button to toggle log detail self.log_verbosity_button.setToolTip("Toggle between full and basic log details.") - self.log_verbosity_button.setFixedWidth(110) # Adjust width as needed + self.log_verbosity_button.setFixedWidth(110) # Fixed width self.log_verbosity_button.setStyleSheet("padding: 4px 8px;") log_title_layout.addWidget(self.log_verbosity_button) - # --- END ADDED --- - self.reset_button = QPushButton("🔄 Reset") + # Reset Button + self.reset_button = QPushButton("🔄 Reset") # Button to reset application state self.reset_button.setToolTip("Reset all inputs and logs to default state (only when idle).") self.reset_button.setFixedWidth(80) - self.reset_button.setStyleSheet("padding: 4px 8px;") # Smaller padding + self.reset_button.setStyleSheet("padding: 4px 8px;") log_title_layout.addWidget(self.reset_button) - right_layout.addLayout(log_title_layout) + right_layout.addLayout(log_title_layout) # Add log title/utility layout to main right layout - self.log_splitter = QSplitter(Qt.Vertical) # Keep the vertical splitter for logs - self.main_log_output = QTextEdit() - self.main_log_output.setReadOnly(True) - # self.main_log_output.setMinimumWidth(450) # Remove minimum width - self.main_log_output.setLineWrapMode(QTextEdit.NoWrap) # Disable line wrapping + # Log Output Areas (Splitter for Main and External Logs) + self.log_splitter = QSplitter(Qt.Vertical) # Vertical splitter for two log areas + self.main_log_output = QTextEdit() # Main log display + self.main_log_output.setReadOnly(True) # Make it read-only + self.main_log_output.setLineWrapMode(QTextEdit.NoWrap) # No wrap for better log readability self.main_log_output.setStyleSheet(""" - QTextEdit { - background-color: #3C3F41; border: 1px solid #5A5A5A; padding: 5px; - color: #F0F0F0; border-radius: 4px; font-family: Consolas, Courier New, monospace; font-size: 9.5pt; - }""") - self.external_log_output = QTextEdit() + QTextEdit { background-color: #3C3F41; border: 1px solid #5A5A5A; padding: 5px; + color: #F0F0F0; border-radius: 4px; font-family: Consolas, Courier New, monospace; font-size: 9.5pt; }""") + self.external_log_output = QTextEdit() # External links log display self.external_log_output.setReadOnly(True) - # self.external_log_output.setMinimumWidth(450) # Remove minimum width - self.external_log_output.setLineWrapMode(QTextEdit.NoWrap) # Disable line wrapping + self.external_log_output.setLineWrapMode(QTextEdit.NoWrap) self.external_log_output.setStyleSheet(""" - QTextEdit { - background-color: #3C3F41; border: 1px solid #5A5A5A; padding: 5px; - color: #F0F0F0; border-radius: 4px; font-family: Consolas, Courier New, monospace; font-size: 9.5pt; - }""") - self.external_log_output.hide() # Initially hidden - self.log_splitter.addWidget(self.main_log_output) - self.log_splitter.addWidget(self.external_log_output) + QTextEdit { background-color: #3C3F41; border: 1px solid #5A5A5A; padding: 5px; + color: #F0F0F0; border-radius: 4px; font-family: Consolas, Courier New, monospace; font-size: 9.5pt; }""") + self.external_log_output.hide() # Initially hidden, shown when "Show External Links" is checked + self.log_splitter.addWidget(self.main_log_output) # Add main log to splitter + self.log_splitter.addWidget(self.external_log_output) # Add external log to splitter self.log_splitter.setSizes([self.height(), 0]) # Main log takes all space initially - right_layout.addWidget(self.log_splitter, 1) # Log splitter takes available vertical space + right_layout.addWidget(self.log_splitter, 1) # Allow splitter to stretch vertically - # --- ADDED: Export Links Button --- - export_button_layout = QHBoxLayout() - export_button_layout.addStretch(1) # Push button to the right + # Export Links Button (initially hidden, for "Only Links" mode) + export_button_layout = QHBoxLayout() # Layout to push button to the right + export_button_layout.addStretch(1) # Push to right self.export_links_button = QPushButton("Export Links") self.export_links_button.setToolTip("Export all extracted links to a .txt file.") self.export_links_button.setFixedWidth(100) @@ -612,53 +687,51 @@ class DownloaderApp(QWidget): self.export_links_button.setEnabled(False) # Initially disabled self.export_links_button.setVisible(False) # Initially hidden export_button_layout.addWidget(self.export_links_button) - right_layout.addLayout(export_button_layout) # Add to bottom of right panel - # --- END ADDED --- + right_layout.addLayout(export_button_layout) - self.progress_label = QLabel("Progress: Idle") + + # Progress Labels (Overall and Individual File) + self.progress_label = QLabel("Progress: Idle") # Label for overall download progress self.progress_label.setStyleSheet("padding-top: 5px; font-style: italic;") right_layout.addWidget(self.progress_label) - - self.file_progress_label = QLabel("") # For individual file progress - self.file_progress_label.setWordWrap(True) # Enable word wrapping for the status label + self.file_progress_label = QLabel("") # Label for individual file download progress + self.file_progress_label.setWordWrap(True) # Allow text to wrap if long self.file_progress_label.setStyleSheet("padding-top: 2px; font-style: italic; color: #A0A0A0;") right_layout.addWidget(self.file_progress_label) - # --- Add panels to the main horizontal splitter --- + + # Add left and right panels to the main splitter self.main_splitter.addWidget(left_panel_widget) self.main_splitter.addWidget(right_panel_widget) - - # --- Set initial sizes for the splitter --- - # Calculate initial sizes (e.g., left 30%, right 70%) - initial_width = self.width() # Use the initial window width - left_width = int(initial_width * 0.30) + # Set initial splitter sizes (e.g., 35% for left controls, 65% for right logs) + initial_width = self.width() + left_width = int(initial_width * 0.35) right_width = initial_width - left_width self.main_splitter.setSizes([left_width, right_width]) - # --- Set the main splitter as the central layout --- - # Need a top-level layout to hold the splitter - top_level_layout = QHBoxLayout(self) # Apply layout directly to the main widget (self) - top_level_layout.setContentsMargins(0,0,0,0) # No margins for the main layout - top_level_layout.addWidget(self.main_splitter) - # self.setLayout(top_level_layout) # Already set above + # Set main layout for the window + top_level_layout = QHBoxLayout(self) # Top-level layout for the main window + top_level_layout.setContentsMargins(0,0,0,0) # No margins for the top-level layout itself + top_level_layout.addWidget(self.main_splitter) # Add the main splitter to the window's layout - # --- End Layout Modification --- - - # Initial UI state updates + # Initial UI state updates based on defaults and loaded settings self.update_ui_for_subfolders(self.use_subfolders_checkbox.isChecked()) - self.update_custom_folder_visibility() self.update_external_links_setting(self.external_links_checkbox.isChecked()) self.update_multithreading_label(self.thread_count_input.text()) - self.update_page_range_enabled_state() - if self.manga_mode_checkbox: # Ensure it exists before accessing - self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked()) - self.link_input.textChanged.connect(self.update_page_range_enabled_state) # Connect after init - self.load_known_names_from_util() # Load names into the list widget - self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked()) # Set initial state - self._handle_filter_mode_change(self.radio_group.checkedButton(), True) # Set initial filter mode UI state + self.update_page_range_enabled_state() # Call after link_input is created + if self.manga_mode_checkbox: # Ensure checkbox exists before accessing + self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked()) # Call after manga_mode_checkbox created + if hasattr(self, 'link_input'): self.link_input.textChanged.connect(self.update_page_range_enabled_state) # Connect page range update + self.load_known_names_from_util() # Load known names into the list widget + self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked()) # Set initial state of thread count input + if hasattr(self, 'radio_group') and self.radio_group.checkedButton(): # Ensure radio group and a checked button exist + self._handle_filter_mode_change(self.radio_group.checkedButton(), True) # Set initial UI based on default radio selection + self._update_manga_filename_style_button_text() # Set initial text for manga rename button + self._update_skip_scope_button_text() # Set initial text for skip scope button def get_dark_theme(self): + """Returns a string containing CSS for a dark theme.""" return """ QWidget { background-color: #2E2E2E; color: #E0E0E0; font-family: Segoe UI, Arial, sans-serif; font-size: 10pt; } QLineEdit, QListWidget { background-color: #3C3F41; border: 1px solid #5A5A5A; padding: 5px; color: #F0F0F0; border-radius: 4px; } @@ -673,188 +746,180 @@ class DownloaderApp(QWidget): QListWidget { alternate-background-color: #353535; border: 1px solid #5A5A5A; } QListWidget::item:selected { background-color: #007ACC; color: #FFFFFF; } QToolTip { background-color: #4A4A4A; color: #F0F0F0; border: 1px solid #6A6A6A; padding: 4px; border-radius: 3px; } - QSplitter::handle { background-color: #5A5A5A; width: 5px; /* Make handle slightly wider */ } + QSplitter::handle { background-color: #5A5A5A; /* Thicker handle for easier grabbing */ } QSplitter::handle:horizontal { width: 5px; } QSplitter::handle:vertical { height: 5px; } - """ # Added styling for splitter handle + /* Style for QFrame used as a separator or container if needed */ + QFrame[frameShape="4"], QFrame[frameShape="5"] { /* HLine, VLine */ + border: 1px solid #4A4A4A; /* Darker line for subtle separation */ + border-radius: 3px; + } + """ def browse_directory(self): + """Opens a dialog to select the download directory.""" + # Get current directory from input if valid, otherwise use home directory or last used current_dir = self.dir_input.text() if os.path.isdir(self.dir_input.text()) else "" folder = QFileDialog.getExistingDirectory(self, "Select Download Folder", current_dir) - if folder: - self.dir_input.setText(folder) + if folder: # If a folder was selected + self.dir_input.setText(folder) # Update the directory input field def handle_main_log(self, message): - # --- MODIFIED: Check for HTML_PREFIX --- - is_html_message = message.startswith(HTML_PREFIX) + """Appends a message to the main log output area, handling HTML and basic log mode.""" + is_html_message = message.startswith(HTML_PREFIX) # Check if message is flagged as HTML + display_message = message + use_html = False if is_html_message: - # If it's HTML, strip the prefix and use insertHtml - display_message = message[len(HTML_PREFIX):] + display_message = message[len(HTML_PREFIX):] # Remove HTML prefix use_html = True - elif self.basic_log_mode: # Apply basic filtering only if NOT HTML - # Define keywords/prefixes for messages to ALWAYS show in basic mode + elif self.basic_log_mode: # If basic log mode is active, filter messages + # Keywords that indicate a message should be shown in basic mode basic_keywords = [ - '🚀 starting download', '🏁 download finished', '🏁 download cancelled', - '❌', '⚠️', '✅ all posts processed', '✅ reached end of posts', - 'summary:', 'progress:', '[fetcher]', # Show fetcher logs for context - 'critical error', 'import error', 'error', 'fail', 'timeout', - 'unsupported url', 'invalid url', 'no posts found', 'could not create directory', - 'missing dependency', 'high thread count', 'manga mode filter warning', - 'duplicate name', 'potential name conflict', 'invalid filter name', - 'no valid character filters' + '🚀 starting download', '🏁 download finished', '🏁 download cancelled', # Start/End messages + '❌', '⚠️', '✅ all posts processed', '✅ reached end of posts', # Errors, Warnings, Key Milestones + 'summary:', 'progress:', '[fetcher]', # Summaries, Progress, Fetcher logs + 'critical error', 'import error', 'error', 'fail', 'timeout', # Specific error types + 'unsupported url', 'invalid url', 'no posts found', 'could not create directory', # Common operational issues + 'missing dependency', 'high thread count', 'manga mode filter warning', # Configuration/Setup warnings + 'duplicate name', 'potential name conflict', 'invalid filter name', # Known list issues + 'no valid character filters' # Filter issues ] - message_lower = message.lower() + message_lower = message.lower() # For case-insensitive keyword check if not any(keyword in message_lower for keyword in basic_keywords): + # Allow specific success messages even in basic mode if they are not too verbose if not message.strip().startswith("✅ Saved:") and \ not message.strip().startswith("✅ Added") and \ not message.strip().startswith("✅ Application reset complete"): - return # Skip appending less important messages in basic mode - display_message = message # Use original message if it passes basic filter - use_html = False - else: # Full log mode and not HTML - display_message = message - use_html = False - # --- END MODIFIED --- - + return # Skip message if not matching keywords and not an allowed specific success message + try: - # Ensure message is a string and replace null characters that can crash QTextEdit + # Sanitize null characters that can crash QTextEdit safe_message = str(display_message).replace('\x00', '[NULL]') if use_html: - self.main_log_output.insertHtml(safe_message) # Use insertHtml for formatted titles + self.main_log_output.insertHtml(safe_message) # Insert as HTML else: - self.main_log_output.append(safe_message) # Use append for plain text - # Auto-scroll if near the bottom + self.main_log_output.append(safe_message) # Append as plain text + + # Auto-scroll if the scrollbar is near the bottom scrollbar = self.main_log_output.verticalScrollBar() if scrollbar.value() >= scrollbar.maximum() - 30: # Threshold for auto-scroll - scrollbar.setValue(scrollbar.maximum()) + scrollbar.setValue(scrollbar.maximum()) # Scroll to the bottom except Exception as e: - # Fallback logging if GUI logging fails + # Fallback print if GUI logging fails for some reason print(f"GUI Main Log Error: {e}\nOriginal Message: {message}") - # --- ADDED: Helper to check download state --- + def _is_download_active(self): - """Checks if a download thread or pool is currently active.""" + """Checks if any download process (single or multi-threaded for posts) is currently active.""" single_thread_active = self.download_thread and self.download_thread.isRunning() - # Check if pool exists AND has any futures that are not done + # Check if thread_pool exists and has any non-done futures pool_active = self.thread_pool is not None and any(not f.done() for f in self.active_futures if f is not None) return single_thread_active or pool_active - # --- END ADDED --- - # --- ADDED: New system for handling external links with sequential CONDIITONAL delay --- - # MODIFIED: Slot now takes link_text as the second argument + def handle_external_link_signal(self, post_title, link_text, link_url, platform): - """Receives link signals, adds them to a queue, and triggers processing.""" + """Handles external links found by worker threads by adding them to a queue for processing.""" link_data = (post_title, link_text, link_url, platform) - self.external_link_queue.append(link_data) - # --- ADDED: Cache link if in "Only Links" mode --- + self.external_link_queue.append(link_data) # Add to queue if self.radio_only_links and self.radio_only_links.isChecked(): - self.extracted_links_cache.append(link_data) - # --- END ADDED --- - self._try_process_next_external_link() + self.extracted_links_cache.append(link_data) # Also add to cache for "Only Links" mode display + self._try_process_next_external_link() # Attempt to process immediately or schedule def _try_process_next_external_link(self): - """Processes the next link from the queue if not already processing.""" + """Processes the next external link from the queue with appropriate delays to avoid flooding the UI.""" if self._is_processing_external_link_queue or not self.external_link_queue: - return # Don't process if busy or queue empty - - # Determine if we should display based on mode and checkbox state - is_only_links_mode = self.radio_only_links and self.radio_only_links.isChecked() - should_display_in_external = self.show_external_links and not is_only_links_mode - - # Only proceed if displaying in *either* log is currently possible/enabled - if not (is_only_links_mode or should_display_in_external): - # If neither log is active/visible for this link, still need to allow queue processing - self._is_processing_external_link_queue = False - if self.external_link_queue: - QTimer.singleShot(0, self._try_process_next_external_link) + # Already processing or queue is empty, so return return - self._is_processing_external_link_queue = True + # Determine if links should be displayed in the external log or main log (for "Only Links" mode) + is_only_links_mode = self.radio_only_links and self.radio_only_links.isChecked() + should_display_in_external_log = self.show_external_links and not is_only_links_mode - link_data = self.external_link_queue.popleft() + if not (is_only_links_mode or should_display_in_external_log): + # Neither "Only Links" mode nor "Show External Links" is active for displaying this link now. + # It's queued, but we don't need to display it immediately. + self._is_processing_external_link_queue = False # Ensure flag is reset + if self.external_link_queue: # If there are still items, try again later (e.g., if settings change) + QTimer.singleShot(0, self._try_process_next_external_link) # Check again soon + return - # --- MODIFIED: Schedule the display AND the next step based on mode --- + self._is_processing_external_link_queue = True # Set flag that we are processing one + link_data = self.external_link_queue.popleft() # Get the next link from the queue + + # Apply different delays based on context to manage UI updates if is_only_links_mode: - # Schedule with fixed 0.4s delay for "Only Links" mode - delay_ms = 80 # 0.08 seconds + # Shorter delay for "Only Links" mode as it's the primary output + delay_ms = 80 # milliseconds QTimer.singleShot(delay_ms, lambda data=link_data: self._display_and_schedule_next(data)) - elif self._is_download_active(): - # Schedule with random delay for other modes during download - delay_ms = random.randint(4000, 8000) + elif self._is_download_active(): # If a download is active, use a longer, randomized delay + delay_ms = random.randint(4000, 8000) # 4-8 seconds QTimer.singleShot(delay_ms, lambda data=link_data: self._display_and_schedule_next(data)) - else: - # No download active in other modes, process immediately + else: # No download active, process with minimal delay QTimer.singleShot(0, lambda data=link_data: self._display_and_schedule_next(data)) - # --- END MODIFIED --- - # --- NEW Method --- + def _display_and_schedule_next(self, link_data): - """Displays the link in the correct log and schedules the check for the next link.""" - post_title, link_text, link_url, platform = link_data # Unpack all data + """Displays a single external link and schedules the processing of the next one from the queue.""" + post_title, link_text, link_url, platform = link_data is_only_links_mode = self.radio_only_links and self.radio_only_links.isChecked() - # Format the link text part + # Format link for display (truncate long link text) max_link_text_len = 35 display_text = link_text[:max_link_text_len].strip() + "..." if len(link_text) > max_link_text_len else link_text formatted_link_info = f"{display_text} - {link_url} - {platform}" - separator = "-" * 45 + separator = "-" * 45 # Separator for visual grouping by post in "Only Links" mode if is_only_links_mode: - # Check if the post title has changed - if post_title != self._current_link_post_title: - # Emit separator and new title (formatted as HTML) - self.log_signal.emit(HTML_PREFIX + "
" + separator + "
") - # Use HTML for bold blue title - title_html = f'{post_title}
' - self.log_signal.emit(HTML_PREFIX + title_html) - self._current_link_post_title = post_title # Update current title + # In "Only Links" mode, display in the main log + if post_title != self._current_link_post_title: # If it's a new post title + self.log_signal.emit(HTML_PREFIX + "
" + separator + "
") # Add separator and space using HTML + title_html = f'{post_title}
' # Make post title prominent + self.log_signal.emit(HTML_PREFIX + title_html) # Emit title as HTML + self._current_link_post_title = post_title # Update current title tracker + self.log_signal.emit(formatted_link_info) # Emit the link info as plain text + elif self.show_external_links: # If "Show External Links" is checked (and not "Only Links" mode) + # Display in the dedicated external links log + self._append_to_external_log(formatted_link_info, separator) # Pass separator for consistency if needed - # Emit the link info as plain text (handle_main_log will append it) - self.log_signal.emit(formatted_link_info) - - elif self.show_external_links: - # Append directly to external log (plain text) - self._append_to_external_log(formatted_link_info, separator) - - # Allow the next link to be processed + # Reset flag and try to process the next link in the queue self._is_processing_external_link_queue = False - self._try_process_next_external_link() # Check queue again - # --- END NEW Method --- + self._try_process_next_external_link() + - # --- RENAMED and MODIFIED: Appends ONLY to external log --- def _append_to_external_log(self, formatted_link_text, separator): - """Appends a single formatted link to the external_log_output widget.""" - # Visibility check is done before calling this now + """Appends a formatted link to the external log output if it's visible.""" if not (self.external_log_output and self.external_log_output.isVisible()): - return + return # Don't append if log area is hidden try: - self.external_log_output.append(separator) + # Append the formatted link text self.external_log_output.append(formatted_link_text) - self.external_log_output.append("") # Add a blank line for spacing + self.external_log_output.append("") # Add a blank line for spacing between links - # Auto-scroll + # Auto-scroll if near the bottom scrollbar = self.external_log_output.verticalScrollBar() - if scrollbar.value() >= scrollbar.maximum() - 50: # Adjust threshold if needed - scrollbar.setValue(scrollbar.maximum()) + if scrollbar.value() >= scrollbar.maximum() - 50: # Threshold for auto-scroll + scrollbar.setValue(scrollbar.maximum()) # Scroll to bottom except Exception as e: - # Log errors related to external log to the main log - self.log_signal.emit(f"GUI External Log Append Error: {e}\nOriginal Message: {formatted_link_text}") + # Fallback if GUI logging fails + self.log_signal.emit(f"GUI External Log Append Error: {e}\nOriginal Message: {formatted_link_text}") # Log to main log as fallback print(f"GUI External Log Error (Append): {e}\nOriginal Message: {formatted_link_text}") - # --- END MODIFIED --- def update_file_progress_display(self, filename, downloaded_bytes, total_bytes): + """Updates the label showing individual file download progress.""" if not filename and total_bytes == 0 and downloaded_bytes == 0: # Clear signal - self.file_progress_label.setText("") + self.file_progress_label.setText("") # Clear the progress label return - # MODIFIED: Truncate filename more aggressively (e.g., max 25 chars) - max_filename_len = 25 - display_filename = filename[:max_filename_len-3].strip() + "..." if len(filename) > max_filename_len else filename + max_filename_len = 25 # Max length for filename part of the string for display + display_filename = filename + if len(filename) > max_filename_len: # Truncate if too long + display_filename = filename[:max_filename_len-3].strip() + "..." - if total_bytes > 0: + # Format progress text + if total_bytes > 0: # If total size is known downloaded_mb = downloaded_bytes / (1024 * 1024) total_mb = total_bytes / (1024 * 1024) progress_text = f"Downloading '{display_filename}' ({downloaded_mb:.1f}MB / {total_mb:.1f}MB)" @@ -862,161 +927,180 @@ class DownloaderApp(QWidget): downloaded_mb = downloaded_bytes / (1024 * 1024) progress_text = f"Downloading '{display_filename}' ({downloaded_mb:.1f}MB)" - # Check if the resulting text might still be too long (heuristic) - # This is a basic check, might need refinement based on typical log width - if len(progress_text) > 75: # Example threshold, adjust as needed - # If still too long, truncate the display_filename even more - display_filename = filename[:15].strip() + "..." if len(filename) > 18 else display_filename - if total_bytes > 0: - progress_text = f"DL '{display_filename}' ({downloaded_mb:.1f}/{total_mb:.1f}MB)" - else: - progress_text = f"DL '{display_filename}' ({downloaded_mb:.1f}MB)" + # Further shorten if the whole string is too long for the UI label + if len(progress_text) > 75: # Heuristic length limit for the label + # Shorter truncate for filename if the whole string is still too long + display_filename = filename[:15].strip() + "..." if len(filename) > 18 else display_filename + if total_bytes > 0: progress_text = f"DL '{display_filename}' ({downloaded_mb:.1f}/{total_mb:.1f}MB)" + else: progress_text = f"DL '{display_filename}' ({downloaded_mb:.1f}MB)" - self.file_progress_label.setText(progress_text) + self.file_progress_label.setText(progress_text) # Update the label text def update_external_links_setting(self, checked): - # This function is now primarily controlled by _handle_filter_mode_change - # when the "Only Links" mode is NOT selected. + """Handles changes to the 'Show External Links in Log' checkbox, updating UI visibility.""" is_only_links_mode = self.radio_only_links and self.radio_only_links.isChecked() - if is_only_links_mode: - # In "Only Links" mode, the external log is always hidden. - if self.external_log_output: self.external_log_output.hide() - if self.log_splitter: self.log_splitter.setSizes([self.height(), 0]) - return + is_only_archives_mode = self.radio_only_archives and self.radio_only_archives.isChecked() # Check new mode - # Proceed only if NOT in "Only Links" mode - self.show_external_links = checked + # External links log is not shown for "Only Links" or "Only Archives" mode, regardless of checkbox state + if is_only_links_mode or is_only_archives_mode: + if self.external_log_output: self.external_log_output.hide() # Hide external log + if self.log_splitter: self.log_splitter.setSizes([self.height(), 0]) # Main log takes all space + # self.show_external_links should ideally be false if these modes are active, + # and the checkbox should be disabled by _handle_filter_mode_change. + return # Exit early, no further action needed for these modes + + self.show_external_links = checked # Update the internal flag based on checkbox state if checked: + # Show the external log area if self.external_log_output: self.external_log_output.show() - # Adjust splitter, give both logs some space - if self.log_splitter: self.log_splitter.setSizes([self.height() // 2, self.height() // 2]) - if self.main_log_output: self.main_log_output.setMinimumHeight(50) # Ensure it doesn't disappear - if self.external_log_output: self.external_log_output.setMinimumHeight(50) - self.log_signal.emit("\n" + "="*40 + "\n🔗 External Links Log Enabled\n" + "="*40) - if self.external_log_output: - self.external_log_output.clear() # Clear previous content - self.external_log_output.append("🔗 External Links Found:") # Header - # Try processing queue if log becomes visible - self._try_process_next_external_link() + if self.log_splitter: self.log_splitter.setSizes([self.height() // 2, self.height() // 2]) # Split space between logs + if self.main_log_output: self.main_log_output.setMinimumHeight(50) # Ensure some min height for main log + if self.external_log_output: self.external_log_output.setMinimumHeight(50) # Ensure min height for external log + self.log_signal.emit("\n" + "="*40 + "\n🔗 External Links Log Enabled\n" + "="*40) # Log change + if self.external_log_output: # Clear and add title if showing external log + self.external_log_output.clear() + self.external_log_output.append("🔗 External Links Found:") + self._try_process_next_external_link() # Process any queued links now that log is visible else: + # Hide the external log area if self.external_log_output: self.external_log_output.hide() - # Adjust splitter if self.log_splitter: self.log_splitter.setSizes([self.height(), 0]) # Main log takes all space if self.main_log_output: self.main_log_output.setMinimumHeight(0) # Reset min height - if self.external_log_output: self.external_log_output.setMinimumHeight(0) - if self.external_log_output: self.external_log_output.clear() # Clear content when hidden - self.log_signal.emit("\n" + "="*40 + "\n🔗 External Links Log Disabled\n" + "="*40) + if self.external_log_output: self.external_log_output.setMinimumHeight(0) # Reset min height + if self.external_log_output: self.external_log_output.clear() # Clear content when hiding + self.log_signal.emit("\n" + "="*40 + "\n🔗 External Links Log Disabled\n" + "="*40) # Log change + - # --- ADDED: Handler for filter mode radio buttons --- def _handle_filter_mode_change(self, button, checked): - # button can be None during initial setup sometimes - if not button or not checked: + """Handles changes in the file filter radio buttons, updating UI accordingly.""" + if not button or not checked: # Only act on the button that was toggled to 'checked' return - filter_mode_text = button.text() + filter_mode_text = button.text() # Get text of the selected radio button is_only_links = (filter_mode_text == "🔗 Only Links") + is_only_archives = (filter_mode_text == "📦 Only Archives") # Check for "Only Archives" mode - # --- MODIFIED: Enable/disable widgets based on mode --- - file_options_enabled = not is_only_links - widgets_to_disable_in_links_mode = [ - self.dir_input, self.dir_button, # Download Location - self.skip_zip_checkbox, self.skip_rar_checkbox, - self.download_thumbnails_checkbox, self.compress_images_checkbox, - self.use_subfolders_checkbox, self.use_subfolder_per_post_checkbox, - self.character_filter_widget, # Includes label and input - self.skip_words_input, - self.custom_folder_widget # Includes label and input - ] - # --- END MODIFIED --- - for widget in widgets_to_disable_in_links_mode: - if widget: widget.setEnabled(file_options_enabled) - - # --- ADDED: Show/hide link search bar and export button --- + # --- Visibility of Link-Specific UI (Search, Export) --- if self.link_search_input: self.link_search_input.setVisible(is_only_links) if self.link_search_button: self.link_search_button.setVisible(is_only_links) if self.export_links_button: self.export_links_button.setVisible(is_only_links) - self.export_links_button.setEnabled(is_only_links and bool(self.extracted_links_cache)) # Enable if cache has items - if not is_only_links and self.link_search_input: self.link_search_input.clear() # Clear search when hiding - # --- END ADDED --- + # Enable export button only if in links mode and there are cached links + self.export_links_button.setEnabled(is_only_links and bool(self.extracted_links_cache)) + if not is_only_links and self.link_search_input: self.link_search_input.clear() # Clear search if not in links mode - # Specific handling for "Only Links" mode vs others - if is_only_links: - self.progress_log_label.setText("📜 Extracted Links Log:") # Change title - # Ensure external log is hidden and main log takes full vertical space - if self.external_log_output: self.external_log_output.hide() - if self.log_splitter: self.log_splitter.setSizes([self.height(), 0]) - if self.main_log_output: self.main_log_output.setMinimumHeight(0) - if self.external_log_output: self.external_log_output.setMinimumHeight(0) - # Clear logs for the new mode - if self.main_log_output: self.main_log_output.clear() - if self.external_log_output: self.external_log_output.clear() - # External links checkbox is irrelevant in this mode, keep it enabled but ignored - if self.external_links_checkbox: self.external_links_checkbox.setEnabled(True) - self.log_signal.emit("="*20 + " Mode changed to: Only Links " + "="*20) - # Start processing links immediately for the main log display - self._filter_links_log() # Display initially filtered (all) links - self._try_process_next_external_link() # Start paced display + # --- Enable/Disable State of General Download-Related Widgets --- + # File download mode is active if NOT "Only Links" mode + file_download_mode_active = not is_only_links - else: # Other modes (All, Images, Videos) - self.progress_log_label.setText("📜 Progress Log:") # Restore title - if self.external_links_checkbox: - self.external_links_checkbox.setEnabled(True) # Ensure checkbox is enabled - # Restore log visibility based on checkbox state - self.update_external_links_setting(self.external_links_checkbox.isChecked()) - # Re-enable potentially disabled subfolder options if needed - self.update_ui_for_subfolders(self.use_subfolders_checkbox.isChecked()) - self.log_signal.emit(f"="*20 + f" Mode changed to: {filter_mode_text} " + "="*20) + # Widgets generally active for file downloads (All, Images, Videos, Archives) + if self.dir_input: self.dir_input.setEnabled(file_download_mode_active) + if self.dir_button: self.dir_button.setEnabled(file_download_mode_active) + if self.use_subfolders_checkbox: self.use_subfolders_checkbox.setEnabled(file_download_mode_active) + # Skip words input and scope button are relevant if downloading files + if self.skip_words_input: self.skip_words_input.setEnabled(file_download_mode_active) + if self.skip_scope_toggle_button: self.skip_scope_toggle_button.setEnabled(file_download_mode_active) + + # --- Skip Archive Checkboxes Logic --- + # Enabled if NOT "Only Links" AND NOT "Only Archives" + # Unchecked and disabled if "Only Archives" mode is selected + if self.skip_zip_checkbox: + can_skip_zip = not is_only_links and not is_only_archives + self.skip_zip_checkbox.setEnabled(can_skip_zip) + if is_only_archives: + self.skip_zip_checkbox.setChecked(False) # Ensure unchecked in "Only Archives" mode + + if self.skip_rar_checkbox: + can_skip_rar = not is_only_links and not is_only_archives + self.skip_rar_checkbox.setEnabled(can_skip_rar) + if is_only_archives: + self.skip_rar_checkbox.setChecked(False) # Ensure unchecked in "Only Archives" mode + + # --- Other File Processing Checkboxes (Thumbnails, Compression) --- + # Enabled if NOT "Only Links" AND NOT "Only Archives" + other_file_proc_enabled = not is_only_links and not is_only_archives + if self.download_thumbnails_checkbox: self.download_thumbnails_checkbox.setEnabled(other_file_proc_enabled) + if self.compress_images_checkbox: self.compress_images_checkbox.setEnabled(other_file_proc_enabled) + + # --- External Links Checkbox Logic --- + # Enabled if NOT "Only Links" AND NOT "Only Archives" + if self.external_links_checkbox: + can_show_external_log_option = not is_only_links and not is_only_archives + self.external_links_checkbox.setEnabled(can_show_external_log_option) + if not can_show_external_log_option: # If disabled due to current mode + self.external_links_checkbox.setChecked(False) # Uncheck it + + + # --- Log Area and Specific Mode UI Updates --- + if is_only_links: # "Only Links" mode specific UI + self.progress_log_label.setText("📜 Extracted Links Log:") # Change log label + if self.external_log_output: self.external_log_output.hide() # Hide separate external log area + if self.log_splitter: self.log_splitter.setSizes([self.height(), 0]) # Main log takes all space + if self.main_log_output: self.main_log_output.clear(); self.main_log_output.setMinimumHeight(0) # Clear main log + if self.external_log_output: self.external_log_output.clear(); self.external_log_output.setMinimumHeight(0) # Clear external log + self.log_signal.emit("="*20 + " Mode changed to: Only Links " + "="*20) # Log mode change + self._filter_links_log() # Refresh link log display based on current cache and search + self._try_process_next_external_link() # Process any queued links for this mode + elif is_only_archives: # "Only Archives" mode specific UI + self.progress_log_label.setText("📜 Progress Log (Archives Only):") # Change log label + if self.external_log_output: self.external_log_output.hide() # Hide external links log for archives mode + if self.log_splitter: self.log_splitter.setSizes([self.height(), 0]) # Main log takes all space + if self.main_log_output: self.main_log_output.clear() # Clear main log for new mode + self.log_signal.emit("="*20 + " Mode changed to: Only Archives " + "="*20) # Log mode change + else: # All, Images, Videos modes + self.progress_log_label.setText("📜 Progress Log:") # Default log label + # For these modes, the external links log visibility depends on its checkbox state + self.update_external_links_setting(self.external_links_checkbox.isChecked() if self.external_links_checkbox else False) + self.log_signal.emit(f"="*20 + f" Mode changed to: {filter_mode_text} " + "="*20) # Log mode change + + # --- Common UI Updates based on current states (called after mode-specific changes) --- + # Update subfolder related UI (character filter, per-post subfolder checkbox, custom folder input) + if self.use_subfolders_checkbox: # Ensure it exists + self.update_ui_for_subfolders(self.use_subfolders_checkbox.isChecked()) + + # Update visibility of custom folder input (depends on single post URL and subfolder settings) + self.update_custom_folder_visibility() - # --- END ADDED --- - # --- ADDED: Method to filter links in "Only Links" mode --- def _filter_links_log(self): - """Filters and displays links from the cache in the main log.""" - if not (self.radio_only_links and self.radio_only_links.isChecked()): - return # Only filter when in "Only Links" mode + """Filters and displays links in the main log when 'Only Links' mode is active, based on search input.""" + if not (self.radio_only_links and self.radio_only_links.isChecked()): return # Only run in "Only Links" mode - search_term = self.link_search_input.text().lower().strip() - self.main_log_output.clear() # Clear current display - - current_title_for_display = None # Track title for grouping in this filtered view - separator = "-" * 45 + search_term = self.link_search_input.text().lower().strip() if self.link_search_input else "" + self.main_log_output.clear() # Clear previous content from the main log + current_title_for_display = None # To group links by post title in the display + separator = "-" * 45 # Visual separator between post sections + # Iterate through the cached extracted links for post_title, link_text, link_url, platform in self.extracted_links_cache: - # Check if the search term matches any part of the link info + # Check if any part of the link data matches the search term (case-insensitive) matches_search = ( - not search_term or + not search_term or # Show all if no search term is provided search_term in link_text.lower() or search_term in link_url.lower() or search_term in platform.lower() ) - - if matches_search: - # Check if the post title has changed - if post_title != current_title_for_display: - # Append separator and new title (formatted as HTML) - self.main_log_output.insertHtml("
" + separator + "
") - title_html = f'{post_title}
' - self.main_log_output.insertHtml(title_html) - current_title_for_display = post_title # Update current title - - # Format and append the link info as plain text - max_link_text_len = 35 + if matches_search: # If the link matches the search criteria + if post_title != current_title_for_display: # If it's a new post section + self.main_log_output.insertHtml("
" + separator + "
") # Add separator and space using HTML + title_html = f'{post_title}
' # Format post title + self.main_log_output.insertHtml(title_html) # Insert title as HTML + current_title_for_display = post_title # Update current title tracker + + # Format and display the link information + max_link_text_len = 35 # Truncate long link text for display display_text = link_text[:max_link_text_len].strip() + "..." if len(link_text) > max_link_text_len else link_text formatted_link_info = f"{display_text} - {link_url} - {platform}" - self.main_log_output.append(formatted_link_info) + self.main_log_output.append(formatted_link_info) # Append link info as plain text - # Add a final blank line if any links were displayed - if self.main_log_output.toPlainText().strip(): - self.main_log_output.append("") + if self.main_log_output.toPlainText().strip(): # Add a final newline if content was added + self.main_log_output.append("") + self.main_log_output.verticalScrollBar().setValue(0) # Scroll to top of the log - # Scroll to top after filtering - self.main_log_output.verticalScrollBar().setValue(0) - # --- END ADDED --- - # --- ADDED: Method to export links --- def _export_links_to_file(self): + """Exports extracted links to a text file when in 'Only Links' mode.""" if not (self.radio_only_links and self.radio_only_links.isChecked()): QMessageBox.information(self, "Export Links", "Link export is only available in 'Only Links' mode.") return @@ -1024,984 +1108,1101 @@ class DownloaderApp(QWidget): QMessageBox.information(self, "Export Links", "No links have been extracted yet.") return + # Suggest a default filename for the export default_filename = "extracted_links.txt" filepath, _ = QFileDialog.getSaveFileName(self, "Save Links", default_filename, "Text Files (*.txt);;All Files (*)") - if filepath: + if filepath: # If a filepath was chosen try: with open(filepath, 'w', encoding='utf-8') as f: - current_title_for_export = None - separator = "-" * 60 + "\n" # For file output - + current_title_for_export = None # To group links by post title in the file + separator = "-" * 60 + "\n" # Separator for file content for post_title, link_text, link_url, platform in self.extracted_links_cache: - if post_title != current_title_for_export: - if current_title_for_export is not None: # Add separator before new title, except for the first one + if post_title != current_title_for_export: # If it's a new post section + if current_title_for_export is not None: # Add separator before new post section (if not the first) f.write("\n" + separator + "\n") - f.write(f"Post Title: {post_title}\n\n") - current_title_for_export = post_title - + f.write(f"Post Title: {post_title}\n\n") # Write post title + current_title_for_export = post_title # Update current title tracker + # Write link details f.write(f" {link_text} - {link_url} - {platform}\n") - self.log_signal.emit(f"✅ Links successfully exported to: {filepath}") QMessageBox.information(self, "Export Successful", f"Links exported to:\n{filepath}") except Exception as e: self.log_signal.emit(f"❌ Error exporting links: {e}") QMessageBox.critical(self, "Export Error", f"Could not export links: {e}") - # --- END ADDED --- def get_filter_mode(self): - # This method returns the simplified filter mode string for the backend + """Determines the backend filter mode ('all', 'image', 'video', 'archive') based on radio button selection.""" if self.radio_only_links and self.radio_only_links.isChecked(): - # When "Only Links" is checked, the backend doesn't filter by file type, - # but it does need a 'filter_mode'. 'all' is a safe default. - # The actual link extraction is controlled by the 'extract_links_only' flag. - return 'all' - elif self.radio_images.isChecked(): return 'image' - elif self.radio_videos.isChecked(): return 'video' - return 'all' # Default for "All" radio or if somehow no radio is checked. + # Backend expects 'all' for link extraction, even if UI says "Only Links", + # as the worker will then be told to extract_links_only. + return 'all' + elif self.radio_images.isChecked(): + return 'image' + elif self.radio_videos.isChecked(): + return 'video' + elif self.radio_only_archives and self.radio_only_archives.isChecked(): # Check for "Only Archives" mode + return 'archive' + elif self.radio_all.isChecked(): # Explicitly check for 'All' if others aren't matched + return 'all' + return 'all' # Default if somehow no button is checked (should not happen with QButtonGroup) + + + def get_skip_words_scope(self): + """Returns the current scope for skip words (files, posts, or both) from the internal attribute.""" + return self.skip_words_scope + + + def _update_skip_scope_button_text(self): + """Updates the text of the skip scope toggle button based on the current self.skip_words_scope.""" + if self.skip_scope_toggle_button: # Ensure button exists + if self.skip_words_scope == SKIP_SCOPE_FILES: + self.skip_scope_toggle_button.setText("Scope: Files") + elif self.skip_words_scope == SKIP_SCOPE_POSTS: + self.skip_scope_toggle_button.setText("Scope: Posts") + elif self.skip_words_scope == SKIP_SCOPE_BOTH: + self.skip_scope_toggle_button.setText("Scope: Both") + else: # Should not happen if logic is correct + self.skip_scope_toggle_button.setText("Scope: Unknown") + + + def _cycle_skip_scope(self): + """Cycles through the available skip word scopes (Files -> Posts -> Both -> Files) and updates UI and settings.""" + if self.skip_words_scope == SKIP_SCOPE_FILES: + self.skip_words_scope = SKIP_SCOPE_POSTS + elif self.skip_words_scope == SKIP_SCOPE_POSTS: + self.skip_words_scope = SKIP_SCOPE_BOTH + elif self.skip_words_scope == SKIP_SCOPE_BOTH: + self.skip_words_scope = SKIP_SCOPE_FILES + else: # Default to files if current state is unknown (should not occur) + self.skip_words_scope = SKIP_SCOPE_FILES + + self._update_skip_scope_button_text() # Update button text to reflect new scope + self.settings.setValue(SKIP_WORDS_SCOPE_KEY, self.skip_words_scope) # Save the new scope to settings + self.log_signal.emit(f"ℹ️ Skip words scope changed to: '{self.skip_words_scope}'") # Log the change + def add_new_character(self): - global KNOWN_NAMES, clean_folder_name # Ensure clean_folder_name is accessible - name_to_add = self.new_char_input.text().strip() - if not name_to_add: - QMessageBox.warning(self, "Input Error", "Name cannot be empty.") - return False # Indicate failure + """Adds a new character/show name to the known list, with validation and conflict checks.""" + global KNOWN_NAMES, clean_folder_name # Ensure we use the potentially shared KNOWN_NAMES and utility function + name_to_add = self.new_char_input.text().strip() # Get name from input and strip whitespace + if not name_to_add: # Check for empty input + QMessageBox.warning(self, "Input Error", "Name cannot be empty."); return False # Indicate failure - name_lower = name_to_add.lower() + name_lower = name_to_add.lower() # For case-insensitive comparisons + # Check for exact duplicates (case-insensitive) + if any(existing.lower() == name_lower for existing in KNOWN_NAMES): + QMessageBox.warning(self, "Duplicate Name", f"The name '{name_to_add}' (case-insensitive) already exists."); return False - # 1. Exact Duplicate Check (case-insensitive) - is_exact_duplicate = any(existing.lower() == name_lower for existing in KNOWN_NAMES) - if is_exact_duplicate: - QMessageBox.warning(self, "Duplicate Name", f"The name '{name_to_add}' (case-insensitive) already exists.") - return False - - # 2. Similarity Check (substring, case-insensitive) - similar_names_details = [] # Store tuples of (new_name, existing_name) + # Check for potential conflicts (substrings or superstrings) + similar_names_details = [] for existing_name in KNOWN_NAMES: existing_name_lower = existing_name.lower() - # Avoid self-comparison if somehow name_lower was already in a different case - if name_lower != existing_name_lower: - if name_lower in existing_name_lower or existing_name_lower in name_lower: - similar_names_details.append((name_to_add, existing_name)) + # Check if new name is in existing OR existing is in new name (but not identical) + if name_lower != existing_name_lower and (name_lower in existing_name_lower or existing_name_lower in name_lower): + similar_names_details.append((name_to_add, existing_name)) # Store pair for message - if similar_names_details: + if similar_names_details: # If potential conflicts found first_similar_new, first_similar_existing = similar_names_details[0] + # Determine which name is shorter for the example message to illustrate potential grouping issue + shorter, longer = sorted([first_similar_new, first_similar_existing], key=len) - # Determine shorter and longer for the example message - shorter_name_for_msg, longer_name_for_msg = sorted( - [first_similar_new, first_similar_existing], key=len - ) - + # Warn user about potential conflict and ask for confirmation msg_box = QMessageBox(self) msg_box.setIcon(QMessageBox.Warning) msg_box.setWindowTitle("Potential Name Conflict") msg_box.setText( f"The name '{first_similar_new}' is very similar to an existing name: '{first_similar_existing}'.\n\n" - f"For example, if a post title primarily matches the shorter name ('{shorter_name_for_msg}'), " - f"files might be saved under a folder for '{clean_folder_name(shorter_name_for_msg)}', " - f"even if the longer name ('{longer_name_for_msg}') was also relevant or intended for a more specific folder.\n" - "This could lead to files being grouped into less specific or overly broad folders than desired.\n\n" + f"This could lead to files being grouped into less specific folders (e.g., under '{clean_folder_name(shorter)}' instead of a more specific '{clean_folder_name(longer)}').\n\n" "Do you want to change the name you are adding, or proceed anyway?" ) - change_button = msg_box.addButton("Change Name", QMessageBox.RejectRole) - proceed_button = msg_box.addButton("Proceed Anyway", QMessageBox.AcceptRole) + change_button = msg_box.addButton("Change Name", QMessageBox.RejectRole) # Option to change + proceed_button = msg_box.addButton("Proceed Anyway", QMessageBox.AcceptRole) # Option to proceed msg_box.setDefaultButton(proceed_button) # Default to proceed - msg_box.setEscapeButton(change_button) # Escape cancels/rejects - + msg_box.setEscapeButton(change_button) # Escape cancels/changes msg_box.exec_() - if msg_box.clickedButton() == change_button: - self.log_signal.emit(f"ℹ️ User chose to change the name '{first_similar_new}' due to similarity with '{first_similar_existing}'.") - return False # Don't add, user will change input and click "Add" again - # If proceed_button is clicked (or dialog is closed and proceed is default) - self.log_signal.emit(f"⚠️ User chose to proceed with adding '{first_similar_new}' despite similarity with '{first_similar_existing}'.") - # Fall through to add the name + if msg_box.clickedButton() == change_button: # If user chose to change + self.log_signal.emit(f"ℹ️ User chose to change '{first_similar_new}' due to similarity with '{first_similar_existing}'.") + return False # Indicate user chose to change, so don't add this one - # If no exact duplicate, and (no similar names OR user chose to proceed with similar name) + # If user chose to proceed, log it + self.log_signal.emit(f"⚠️ User proceeded with adding '{first_similar_new}' despite similarity with '{first_similar_existing}'.") + + # If no conflict or user chose to proceed, add the name to KNOWN_NAMES KNOWN_NAMES.append(name_to_add) - KNOWN_NAMES.sort(key=str.lower) # Keep the list sorted (case-insensitive for sorting) + KNOWN_NAMES.sort(key=str.lower) # Keep the list sorted case-insensitively + + # Update UI list (QListWidget) self.character_list.clear() self.character_list.addItems(KNOWN_NAMES) - self.filter_character_list(self.character_search_input.text()) # Re-apply filter + self.filter_character_list(self.character_search_input.text()) # Re-apply search filter if any + self.log_signal.emit(f"✅ Added '{name_to_add}' to known names list.") - self.new_char_input.clear() - self.save_known_names() # Save to file + self.new_char_input.clear() # Clear input field after adding + self.save_known_names() # Persist changes to the config file return True # Indicate success def delete_selected_character(self): - global KNOWN_NAMES - selected_items = self.character_list.selectedItems() - if not selected_items: - QMessageBox.warning(self, "Selection Error", "Please select one or more names to delete.") - return + """Deletes selected character/show names from the known list and UI.""" + global KNOWN_NAMES # Ensure we use the potentially shared KNOWN_NAMES + selected_items = self.character_list.selectedItems() # Get selected items from QListWidget + if not selected_items: # If no items selected + QMessageBox.warning(self, "Selection Error", "Please select one or more names to delete."); return - names_to_remove = {item.text() for item in selected_items} + names_to_remove = {item.text() for item in selected_items} # Get unique names to remove + # Confirm deletion with the user confirm = QMessageBox.question(self, "Confirm Deletion", f"Are you sure you want to delete {len(names_to_remove)} name(s)?", - QMessageBox.Yes | QMessageBox.No, QMessageBox.No) - + QMessageBox.Yes | QMessageBox.No, QMessageBox.No) # Default to No if confirm == QMessageBox.Yes: original_count = len(KNOWN_NAMES) - # Filter out names to remove - KNOWN_NAMES = [n for n in KNOWN_NAMES if n not in names_to_remove] + # Filter out the names to remove from KNOWN_NAMES (modify in-place) + KNOWN_NAMES[:] = [n for n in KNOWN_NAMES if n not in names_to_remove] removed_count = original_count - len(KNOWN_NAMES) - if removed_count > 0: + if removed_count > 0: # If names were actually removed self.log_signal.emit(f"🗑️ Removed {removed_count} name(s).") - self.character_list.clear() # Update UI + # Update UI list + self.character_list.clear() self.character_list.addItems(KNOWN_NAMES) - self.filter_character_list(self.character_search_input.text()) # Re-apply filter - self.save_known_names() # Save changes - else: - self.log_signal.emit("ℹ️ No names were removed (they might not have been in the list or already deleted).") + self.filter_character_list(self.character_search_input.text()) # Re-apply search filter + self.save_known_names() # Persist changes to config file + else: # Should not happen if items were selected, but good to handle + self.log_signal.emit("ℹ️ No names were removed (they might not have been in the list).") def update_custom_folder_visibility(self, url_text=None): - if url_text is None: url_text = self.link_input.text() # Get current text if not passed - _, _, post_id = extract_post_info(url_text.strip()) - # Show if it's a post URL AND subfolders are generally enabled - should_show = bool(post_id) and self.use_subfolders_checkbox.isChecked() - # --- MODIFIED: Also hide if in "Only Links" mode --- + """Shows or hides the custom folder input based on URL type (single post) and subfolder settings.""" + if url_text is None: # If called without arg (e.g., from other UI changes that affect this) + url_text = self.link_input.text() # Get current URL from input + + _, _, post_id = extract_post_info(url_text.strip()) # Check if it's a single post URL + + is_single_post_url = bool(post_id) # True if a post ID was extracted + # Subfolders must be generally enabled for custom folder to be relevant + subfolders_enabled = self.use_subfolders_checkbox.isChecked() if self.use_subfolders_checkbox else False + + # Custom folder input is NOT relevant if in "Only Links" or "Only Archives" mode, + # as these modes might not use folder structures in the same way or at all. + not_only_links_or_archives_mode = not ( + (self.radio_only_links and self.radio_only_links.isChecked()) or + (self.radio_only_archives and self.radio_only_archives.isChecked()) + ) + + # Show custom folder input if all conditions are met: + # 1. It's a single post URL. + # 2. "Separate Folders by Name/Title" (main subfolder option) is checked. + # 3. It's NOT "Only Links" or "Only Archives" mode. + should_show_custom_folder = is_single_post_url and subfolders_enabled and not_only_links_or_archives_mode + + if self.custom_folder_widget: # Ensure custom folder widget exists + self.custom_folder_widget.setVisible(should_show_custom_folder) # Set visibility + + # If the custom folder input is hidden, clear its content + if not (self.custom_folder_widget and self.custom_folder_widget.isVisible()): + if self.custom_folder_input: self.custom_folder_input.clear() + + + def update_ui_for_subfolders(self, checked): + """Updates UI elements related to subfolder settings (character filter, per-post subfolder checkbox).""" + # "Only Links" and "Only Archives" modes generally don't use character-based subfolders or per-post subfolders. is_only_links = self.radio_only_links and self.radio_only_links.isChecked() - self.custom_folder_widget.setVisible(should_show and not is_only_links) - # --- END MODIFIED --- - if not self.custom_folder_widget.isVisible(): self.custom_folder_input.clear() # Clear if hidden + is_only_archives = self.radio_only_archives and self.radio_only_archives.isChecked() - def update_ui_for_subfolders(self, checked): - # Character filter input visibility depends on subfolder usage - is_only_links = self.radio_only_links and self.radio_only_links.isChecked() - self.character_filter_widget.setVisible(checked and not is_only_links) # Hide if only links - if not checked: self.character_input.clear() # Clear filter if hiding + # Character filter and per-post subfolder options are relevant if: + # 1. The main "Separate Folders by Name/Title" (passed as 'checked' arg) is ON. + # 2. It's NOT "Only Links" mode AND NOT "Only Archives" mode. + enable_char_and_post_subfolder_options = checked and not is_only_links and not is_only_archives - self.update_custom_folder_visibility() # Custom folder also depends on this + # Character filter widget visibility + if self.character_filter_widget: # Ensure widget exists + self.character_filter_widget.setVisible(enable_char_and_post_subfolder_options) + if not self.character_filter_widget.isVisible() and self.character_input: + self.character_input.clear() # Clear character input if hidden + + # "Subfolder per Post" checkbox enabled state + if self.use_subfolder_per_post_checkbox: # Ensure checkbox exists + self.use_subfolder_per_post_checkbox.setEnabled(enable_char_and_post_subfolder_options) + if not enable_char_and_post_subfolder_options: # If disabled by current conditions + self.use_subfolder_per_post_checkbox.setChecked(False) # Also uncheck it + + # Update custom folder visibility, as it depends on subfolder settings too + self.update_custom_folder_visibility() - # "Subfolder per Post" is only enabled if "Separate Folders" is also checked - self.use_subfolder_per_post_checkbox.setEnabled(checked and not is_only_links) # Disable if only links - if not checked or is_only_links: self.use_subfolder_per_post_checkbox.setChecked(False) # Uncheck if parent is disabled or only links def update_page_range_enabled_state(self): - url_text = self.link_input.text().strip() - service, user_id, post_id = extract_post_info(url_text) - # Page range is for creator feeds (no post_id) - is_creator_feed = service is not None and user_id is not None and post_id is None + """Enables/disables page range inputs based on URL type (creator feed vs single post) and Manga Mode.""" + url_text = self.link_input.text().strip() if self.link_input else "" + _, _, post_id = extract_post_info(url_text) # Check if it's a single post URL + is_creator_feed = not post_id if url_text else False # True if URL is present and not a post URL + # Manga mode overrides page range (downloads all posts, sorted oldest first) manga_mode_active = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False - # Enable page range if it's a creator feed AND manga mode is NOT active + + # Enable page range if it's a creator feed AND Manga Mode is OFF enable_page_range = is_creator_feed and not manga_mode_active + # Enable/disable page range UI elements for widget in [self.page_range_label, self.start_page_input, self.to_label, self.end_page_input]: if widget: widget.setEnabled(enable_page_range) - if not enable_page_range: # Clear inputs if disabled - self.start_page_input.clear() - self.end_page_input.clear() - def update_ui_for_manga_mode(self, checked): - url_text = self.link_input.text().strip() - _, _, post_id = extract_post_info(url_text) - is_creator_feed = not post_id if url_text else False # Manga mode only for creator feeds + # If page range is disabled, clear the input fields + if not enable_page_range: + if self.start_page_input: self.start_page_input.clear() + if self.end_page_input: self.end_page_input.clear() + + def _update_manga_filename_style_button_text(self): + """Updates the text and tooltip of the manga filename style toggle button based on current style.""" + if self.manga_rename_toggle_button: # Ensure button exists + if self.manga_filename_style == STYLE_POST_TITLE: + self.manga_rename_toggle_button.setText("Name: Post Title") + self.manga_rename_toggle_button.setToolTip( + "Manga files: First file named by post title. Subsequent files in same post keep original names.\n" + "Click to change to original file names for all files." + ) + elif self.manga_filename_style == STYLE_ORIGINAL_NAME: + self.manga_rename_toggle_button.setText("Name: Original File") + self.manga_rename_toggle_button.setToolTip( + "Manga files will keep their original names as provided by the site (e.g., 001.jpg, page_01.png).\n" + "Click to change to post title based naming for the first file." + ) + else: # Fallback for unknown style (should not happen) + self.manga_rename_toggle_button.setText("Name: Unknown Style") + self.manga_rename_toggle_button.setToolTip("Manga filename style is in an unknown state.") + + + def _toggle_manga_filename_style(self): + """Toggles the manga filename style between 'post_title' and 'original_name', updates UI and settings.""" + current_style = self.manga_filename_style + new_style = "" + + if current_style == STYLE_POST_TITLE: # If current is Post Title, switch to Original Name + new_style = STYLE_ORIGINAL_NAME + # Optional: Warn user if they switch away from the recommended style for manga + reply = QMessageBox.information(self, "Manga Filename Preference", + "Using 'Name: Post Title' (first file by title, others original) is recommended for Manga Mode.\n\n" + "Using 'Name: Original File' for all files might lead to less organized downloads if original names are inconsistent or non-sequential.\n\n" + "Proceed with using 'Name: Original File' for all files?", + QMessageBox.Yes | QMessageBox.No, QMessageBox.No) # Default to No + if reply == QMessageBox.No: # If user cancels the change + self.log_signal.emit("ℹ️ Manga filename style change to 'Original File' cancelled by user.") + return # Don't change if user cancels + elif current_style == STYLE_ORIGINAL_NAME: # If current is Original Name, switch to Post Title + new_style = STYLE_POST_TITLE + else: # If current style is unknown (e.g., corrupted setting), reset to default + self.log_signal.emit(f"⚠️ Unknown current manga filename style: {current_style}. Resetting to default ('{STYLE_POST_TITLE}').") + new_style = STYLE_POST_TITLE + + self.manga_filename_style = new_style # Update internal attribute + self.settings.setValue(MANGA_FILENAME_STYLE_KEY, self.manga_filename_style) # Save new style to settings + self.settings.sync() # Ensure setting is written to disk + self._update_manga_filename_style_button_text() # Update button UI text and tooltip + self.log_signal.emit(f"ℹ️ Manga filename style changed to: '{self.manga_filename_style}'") # Log the change + + + def update_ui_for_manga_mode(self, checked): # 'checked' is the state of the manga_mode_checkbox + """Updates UI elements based on Manga Mode state (checkbox state and URL type).""" + url_text = self.link_input.text().strip() if self.link_input else "" + _, _, post_id = extract_post_info(url_text) # Check if it's a single post URL + + # Manga mode is only applicable to creator feeds (not single posts) + is_creator_feed = not post_id if url_text else False + + # Enable/disable the Manga Mode checkbox itself based on whether it's a creator feed if self.manga_mode_checkbox: # Ensure checkbox exists - self.manga_mode_checkbox.setEnabled(is_creator_feed) # Only enable for creator feeds - if not is_creator_feed and self.manga_mode_checkbox.isChecked(): - self.manga_mode_checkbox.setChecked(False) # Uncheck if URL changes to non-creator feed + self.manga_mode_checkbox.setEnabled(is_creator_feed) + if not is_creator_feed and self.manga_mode_checkbox.isChecked(): # If URL changes to single post, uncheck manga mode + self.manga_mode_checkbox.setChecked(False) + # 'checked' variable (passed in) might now be stale, so re-evaluate based on checkbox's current state + checked = self.manga_mode_checkbox.isChecked() - # If manga mode is active (checked and enabled), disable page range - if is_creator_feed and self.manga_mode_checkbox and self.manga_mode_checkbox.isChecked(): - self.page_range_label.setEnabled(False) - self.start_page_input.setEnabled(False); self.start_page_input.clear() - self.to_label.setEnabled(False) - self.end_page_input.setEnabled(False); self.end_page_input.clear() - else: # Otherwise, let update_page_range_enabled_state handle it + # Manga mode is effectively ON if the checkbox is checked AND it's a creator feed + manga_mode_effectively_on = is_creator_feed and checked # Use the potentially updated 'checked' value + + # Show/hide the manga filename style toggle button + if self.manga_rename_toggle_button: # Ensure button exists + self.manga_rename_toggle_button.setVisible(manga_mode_effectively_on) + + # If manga mode is on, page range is disabled (as it downloads all posts, sorted) + if manga_mode_effectively_on: + if self.page_range_label: self.page_range_label.setEnabled(False) + if self.start_page_input: self.start_page_input.setEnabled(False); self.start_page_input.clear() + if self.to_label: self.to_label.setEnabled(False) + if self.end_page_input: self.end_page_input.setEnabled(False); self.end_page_input.clear() + else: # If manga mode is off (or not applicable), re-evaluate page range normally self.update_page_range_enabled_state() def filter_character_list(self, search_text): - search_text_lower = search_text.lower() - for i in range(self.character_list.count()): + """Filters the QListWidget of known characters based on the provided search text.""" + search_text_lower = search_text.lower() # For case-insensitive search + for i in range(self.character_list.count()): # Iterate through all items in the list item = self.character_list.item(i) + # Hide item if search text is not in item text (case-insensitive) item.setHidden(search_text_lower not in item.text().lower()) - def update_multithreading_label(self, text): - # This method only updates the checkbox label text - # The actual enabling/disabling is handled by _handle_multithreading_toggle - if self.use_multithreading_checkbox.isChecked(): - try: - num_threads = int(text) - if num_threads > 0 : - self.use_multithreading_checkbox.setText(f"Use Multithreading ({num_threads} Threads)") - else: - self.use_multithreading_checkbox.setText("Use Multithreading (Invalid: >0)") - except ValueError: - self.use_multithreading_checkbox.setText("Use Multithreading (Invalid Input)") - else: - self.use_multithreading_checkbox.setText("Use Multithreading (1 Thread)") # Show 1 thread when disabled - # --- ADDED: Handler for multithreading checkbox toggle --- - def _handle_multithreading_toggle(self, checked): - """Handles enabling/disabling the thread count input.""" - if not checked: - # Unchecked: Set to 1 and disable - self.thread_count_input.setText("1") - self.thread_count_input.setEnabled(False) - self.thread_count_label.setEnabled(False) + def update_multithreading_label(self, text): # 'text' is the current text of thread_count_input + """Updates the multithreading checkbox text to show the current thread count if enabled.""" + if self.use_multithreading_checkbox.isChecked(): # If multithreading is enabled + try: + num_threads_val = int(text) # Convert input text to integer + if num_threads_val > 0 : self.use_multithreading_checkbox.setText(f"Use Multithreading ({num_threads_val} Threads)") + else: self.use_multithreading_checkbox.setText("Use Multithreading (Invalid: >0)") # Should be caught by validator + except ValueError: # If text is not a valid integer + self.use_multithreading_checkbox.setText("Use Multithreading (Invalid Input)") + else: # If multithreading is unchecked, it implies 1 thread (main thread operation) self.use_multithreading_checkbox.setText("Use Multithreading (1 Thread)") - else: - # Checked: Enable and update label based on current value - self.thread_count_input.setEnabled(True) - self.thread_count_label.setEnabled(True) + + + def _handle_multithreading_toggle(self, checked): # 'checked' is the state of use_multithreading_checkbox + """Enables/disables the thread count input based on the multithreading checkbox state.""" + if not checked: # Multithreading disabled (checkbox unchecked) + self.thread_count_input.setEnabled(False) # Disable thread count input + self.thread_count_label.setEnabled(False) # Disable thread count label + # Update checkbox text to reflect single-threaded operation + self.use_multithreading_checkbox.setText("Use Multithreading (1 Thread)") + else: # Multithreading enabled (checkbox checked) + self.thread_count_input.setEnabled(True) # Enable thread count input + self.thread_count_label.setEnabled(True) # Enable thread count label + # Update checkbox text based on current value in thread_count_input self.update_multithreading_label(self.thread_count_input.text()) - # --- END ADDED --- def update_progress_display(self, total_posts, processed_posts): - if total_posts > 0: + """Updates the overall progress label in the UI.""" + if total_posts > 0: # If total number of posts is known progress_percent = (processed_posts / total_posts) * 100 self.progress_label.setText(f"Progress: {processed_posts} / {total_posts} posts ({progress_percent:.1f}%)") - elif processed_posts > 0 : # If total_posts is unknown (e.g., single post) + elif processed_posts > 0 : # If total is unknown but some posts are processed (e.g., single post mode) self.progress_label.setText(f"Progress: Processing post {processed_posts}...") - else: # Initial state or no posts + else: # Initial state or no posts found yet self.progress_label.setText("Progress: Starting...") - - if total_posts > 0 or processed_posts > 0 : self.file_progress_label.setText("") # Clear file progress + + # Clear individual file progress when overall progress updates (unless it's a clear signal for file progress) + if total_posts > 0 or processed_posts > 0 : + self.file_progress_label.setText("") # Clear individual file progress label def start_download(self): - global KNOWN_NAMES, BackendDownloadThread, PostProcessorWorker, extract_post_info, clean_folder_name - - if (self.download_thread and self.download_thread.isRunning()) or self.thread_pool: - QMessageBox.warning(self, "Busy", "A download is already running.") - return + """Initiates the download process based on current UI settings and validations.""" + # Ensure access to global/utility functions and classes from downloader_utils + global KNOWN_NAMES, BackendDownloadThread, PostProcessorWorker, extract_post_info, clean_folder_name, MAX_FILE_THREADS_PER_POST_OR_WORKER + + if self._is_download_active(): # Prevent multiple concurrent downloads from starting + QMessageBox.warning(self, "Busy", "A download is already running."); return + # --- Gather all settings from UI --- api_url = self.link_input.text().strip() output_dir = self.dir_input.text().strip() - skip_zip = self.skip_zip_checkbox.isChecked() - skip_rar = self.skip_rar_checkbox.isChecked() + use_subfolders = self.use_subfolders_checkbox.isChecked() + # Per-post subfolders only make sense if main subfolders are also enabled use_post_subfolders = self.use_subfolder_per_post_checkbox.isChecked() and use_subfolders compress_images = self.compress_images_checkbox.isChecked() download_thumbnails = self.download_thumbnails_checkbox.isChecked() - use_multithreading = self.use_multithreading_checkbox.isChecked() - raw_skip_words = self.skip_words_input.text().strip() + + use_multithreading_enabled_by_checkbox = self.use_multithreading_checkbox.isChecked() + try: # Get and validate thread count from GUI + num_threads_from_gui = int(self.thread_count_input.text().strip()) + if num_threads_from_gui < 1: num_threads_from_gui = 1 # Ensure at least 1 thread + except ValueError: # If input is not a valid integer + QMessageBox.critical(self, "Thread Count Error", "Invalid number of threads. Please enter a positive number.") + self.set_ui_enabled(True) # Re-enable UI if error occurs before download starts + return + + raw_skip_words = self.skip_words_input.text().strip() # Get raw skip words string + # Parse skip words into a list of lowercase, stripped words skip_words_list = [word.strip().lower() for word in raw_skip_words.split(',') if word.strip()] - + current_skip_words_scope = self.get_skip_words_scope() # Get current scope for skip words manga_mode_is_checked = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False - + + # Determine filter mode and if only links are being extracted extract_links_only = (self.radio_only_links and self.radio_only_links.isChecked()) - - # --- MODIFICATION FOR FILTER MODE --- - # Get the simplified filter mode for the backend (e.g., 'image', 'video', 'all') - backend_filter_mode = self.get_filter_mode() - # Get the user-facing text of the selected radio button for logging purposes + backend_filter_mode = self.get_filter_mode() # This will be 'archive' if that radio button is selected + # Get text of the selected filter radio button for logging purposes user_selected_filter_text = self.radio_group.checkedButton().text() if self.radio_group.checkedButton() else "All" - # --- END MODIFICATION FOR FILTER MODE --- + # Determine effective skip_zip and skip_rar based on the selected filter mode + # If "Only Archives" mode is selected, we want to download archives, so skip flags must be False. + if backend_filter_mode == 'archive': + effective_skip_zip = False + effective_skip_rar = False + else: # For other modes (All, Images, Videos, Only Links), respect the checkbox states + effective_skip_zip = self.skip_zip_checkbox.isChecked() + effective_skip_rar = self.skip_rar_checkbox.isChecked() - if not api_url: - QMessageBox.critical(self, "Input Error", "URL is required."); return + # --- Validations --- + if not api_url: QMessageBox.critical(self, "Input Error", "URL is required."); return + # Output directory is required unless only extracting links if not extract_links_only and not output_dir: QMessageBox.critical(self, "Input Error", "Download Directory is required when not in 'Only Links' mode."); return - - service, user_id, post_id_from_url = extract_post_info(api_url) - if not service or not user_id: + + service, user_id, post_id_from_url = extract_post_info(api_url) # Extract info from URL + if not service or not user_id: # Basic URL validation (must have service and user ID) QMessageBox.critical(self, "Input Error", "Invalid or unsupported URL format."); return + # Create output directory if it doesn't exist (and not in links-only mode) if not extract_links_only and not os.path.isdir(output_dir): reply = QMessageBox.question(self, "Create Directory?", f"The directory '{output_dir}' does not exist.\nCreate it now?", - QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) + QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) # Default to Yes if reply == QMessageBox.Yes: - try: - os.makedirs(output_dir, exist_ok=True) - self.log_signal.emit(f"ℹ️ Created directory: {output_dir}") - except Exception as e: - QMessageBox.critical(self, "Directory Error", f"Could not create directory: {e}"); return - else: - self.log_signal.emit("❌ Download cancelled: Output directory does not exist and was not created.") - return + try: os.makedirs(output_dir, exist_ok=True); self.log_signal.emit(f"ℹ️ Created directory: {output_dir}") + except Exception as e: QMessageBox.critical(self, "Directory Error", f"Could not create directory: {e}"); return + else: self.log_signal.emit("❌ Download cancelled: Output directory does not exist and was not created."); return - if compress_images and Image is None: + # Check for Pillow library if image compression is enabled + if compress_images and Image is None: # Image is None if Pillow import failed QMessageBox.warning(self, "Missing Dependency", "Pillow library (for image compression) not found. Compression will be disabled.") - compress_images = False - self.compress_images_checkbox.setChecked(False) + compress_images = False; self.compress_images_checkbox.setChecked(False) # Update UI and flag + # Manga mode is only applicable for creator feeds (not single posts) manga_mode = manga_mode_is_checked and not post_id_from_url - num_threads_str = self.thread_count_input.text().strip() - num_threads = 1 - if use_multithreading: - try: - num_threads_requested = int(num_threads_str) - if num_threads_requested > MAX_THREADS: - warning_message = ( - f"You have requested {num_threads_requested} threads, which is above the maximum limit of {MAX_THREADS}.\n\n" - f"High thread counts can lead to instability or rate-limiting.\n\n" - f"The thread count will be automatically capped at {MAX_THREADS} for this download." - ) - QMessageBox.warning(self, "High Thread Count Warning", warning_message) - self.log_signal.emit(f"⚠️ High thread count requested ({num_threads_requested}). Capping at {MAX_THREADS}.") - num_threads = MAX_THREADS - self.thread_count_input.setText(str(num_threads)) - elif num_threads_requested > RECOMMENDED_MAX_THREADS: - QMessageBox.information(self, "High Thread Count Note", - f"Using {num_threads_requested} threads (above {RECOMMENDED_MAX_THREADS}) may increase resource usage and risk rate-limiting from the site.\n\nProceeding with caution.") - self.log_signal.emit(f"ℹ️ Using high thread count: {num_threads_requested}.") - num_threads = num_threads_requested - elif num_threads_requested < 1: - self.log_signal.emit(f"⚠️ Invalid thread count ({num_threads_requested}). Using 1 thread.") - num_threads = 1 - self.thread_count_input.setText(str(num_threads)) - else: - num_threads = num_threads_requested - except ValueError: - QMessageBox.critical(self, "Thread Count Error", "Invalid number of threads. Please enter a numeric value."); return - else: - num_threads = 1 - + # Page range validation (only if not manga mode and it's a creator feed) start_page_str, end_page_str = self.start_page_input.text().strip(), self.end_page_input.text().strip() - start_page, end_page = None, None - is_creator_feed = bool(not post_id_from_url) - - if is_creator_feed and not manga_mode: - try: + start_page, end_page = None, None # Initialize to None + is_creator_feed = bool(not post_id_from_url) # True if URL is present and not a single post URL + if is_creator_feed and not manga_mode: # Page range applies only to creator feeds not in manga mode + try: # Validate page range inputs if start_page_str: start_page = int(start_page_str) if end_page_str: end_page = int(end_page_str) if start_page is not None and start_page <= 0: raise ValueError("Start page must be positive.") if end_page is not None and end_page <= 0: raise ValueError("End page must be positive.") - if start_page and end_page and start_page > end_page: - raise ValueError("Start page cannot be greater than end page.") - except ValueError as e: - QMessageBox.critical(self, "Page Range Error", f"Invalid page range: {e}"); return - elif manga_mode: - start_page, end_page = None, None - - self.external_link_queue.clear() - self.extracted_links_cache = [] - self._is_processing_external_link_queue = False - self._current_link_post_title = None + if start_page and end_page and start_page > end_page: raise ValueError("Start page cannot be greater than end page.") + except ValueError as e: QMessageBox.critical(self, "Page Range Error", f"Invalid page range: {e}"); return + elif manga_mode: # In manga mode, ignore page range inputs (downloads all) + start_page, end_page = None, None + # --- Reset state for new download --- + self.external_link_queue.clear(); self.extracted_links_cache = []; self._is_processing_external_link_queue = False; self._current_link_post_title = None + self.all_kept_original_filenames = [] # Reset list of filenames that kept their original names + # Character filter validation and prompt (if subfolders enabled and not links only mode) raw_character_filters_text = self.character_input.text().strip() - parsed_character_list = None - if raw_character_filters_text: - temp_list = [name.strip() for name in raw_character_filters_text.split(',') if name.strip()] - if temp_list: parsed_character_list = temp_list + # Parse character filters from comma-separated string + parsed_character_list = [name.strip() for name in raw_character_filters_text.split(',') if name.strip()] if raw_character_filters_text else None + filter_character_list_to_pass = None # This will be passed to the backend download logic - filter_character_list_to_pass = None + # Validate character filters if subfolders are used, it's a creator feed, and not extracting only links if use_subfolders and parsed_character_list and not post_id_from_url and not extract_links_only: self.log_signal.emit(f"ℹ️ Validating character filters for subfolder naming: {', '.join(parsed_character_list)}") - valid_filters_for_backend = [] - user_cancelled_validation = False + valid_filters_for_backend = [] # List of filters confirmed to be valid + user_cancelled_validation = False # Flag if user cancels during validation for char_name in parsed_character_list: - cleaned_name_test = clean_folder_name(char_name) - if not cleaned_name_test: + cleaned_name_test = clean_folder_name(char_name) # Test if name is valid for a folder name + if not cleaned_name_test: # If cleaning results in empty or invalid name QMessageBox.warning(self, "Invalid Filter Name", f"Filter name '{char_name}' is invalid for a folder and will be skipped.") - self.log_signal.emit(f"⚠️ Skipping invalid filter for folder: '{char_name}'") - continue + self.log_signal.emit(f"⚠️ Skipping invalid filter for folder: '{char_name}'"); continue + # Check if name is in known list (Known.txt), prompt to add if not if char_name.lower() not in {kn.lower() for kn in KNOWN_NAMES}: reply = QMessageBox.question(self, "Add Filter Name to Known List?", - f"The character filter '{char_name}' is not in your known names list (used for folder suggestions).\nAdd it now?", - QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel, QMessageBox.Yes) - if reply == QMessageBox.Yes: - self.new_char_input.setText(char_name) - if self.add_new_character(): + f"Filter '{char_name}' is not in known names list.\nAdd it now?", + QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel, QMessageBox.Yes) + if reply == QMessageBox.Yes: # User wants to add + self.new_char_input.setText(char_name) # Pre-fill input for user convenience + if self.add_new_character(): # Try to add it (this calls save_known_names) self.log_signal.emit(f"✅ Added '{char_name}' to known names via filter prompt.") - valid_filters_for_backend.append(char_name) - else: - self.log_signal.emit(f"⚠️ Failed to add '{char_name}' via filter prompt (or user opted out). It will still be used for filtering this session if valid.") - if cleaned_name_test: valid_filters_for_backend.append(char_name) - elif reply == QMessageBox.Cancel: - self.log_signal.emit(f"❌ Download cancelled by user during filter validation for '{char_name}'.") - user_cancelled_validation = True; break - else: - self.log_signal.emit(f"ℹ️ Proceeding with filter '{char_name}' for matching without adding to known list.") - if cleaned_name_test: valid_filters_for_backend.append(char_name) - else: - if cleaned_name_test: valid_filters_for_backend.append(char_name) + valid_filters_for_backend.append(char_name) # Add to list to pass if successful + else: # Add failed (e.g., user cancelled sub-prompt or conflict during add_new_character) + self.log_signal.emit(f"⚠️ Failed to add '{char_name}' via filter prompt (might have been a conflict or cancellation).") + # Still add if it was a valid folder name, even if not added to known list, for this run + if cleaned_name_test: valid_filters_for_backend.append(char_name) + elif reply == QMessageBox.Cancel: # User cancelled the whole download process + self.log_signal.emit(f"❌ Download cancelled during filter validation for '{char_name}'."); user_cancelled_validation = True; break + else: # User chose No (don't add to known list, but proceed with filter for this run) + self.log_signal.emit(f"ℹ️ Proceeding with filter '{char_name}' without adding to known list.") + if cleaned_name_test: valid_filters_for_backend.append(char_name) # Add if valid folder name + else: # Already in known list + if cleaned_name_test: valid_filters_for_backend.append(char_name) # Add if valid folder name - if user_cancelled_validation: return + if user_cancelled_validation: return # Stop if user cancelled during prompt - if valid_filters_for_backend: + if valid_filters_for_backend: # If there are valid filters after validation filter_character_list_to_pass = valid_filters_for_backend self.log_signal.emit(f" Using validated character filters for subfolders: {', '.join(filter_character_list_to_pass)}") - else: - self.log_signal.emit("⚠️ No valid character filters remaining after validation for subfolder naming.") - elif parsed_character_list : + else: # If no valid filters remain + self.log_signal.emit("⚠️ No valid character filters remaining for subfolder naming (after validation).") + elif parsed_character_list : # If not using subfolders or it's a single post, still pass the list for other filtering purposes (e.g., file content filtering) filter_character_list_to_pass = parsed_character_list - self.log_signal.emit(f"ℹ️ Character filters provided: {', '.join(filter_character_list_to_pass)} (Subfolder creation rules may differ).") + self.log_signal.emit(f"ℹ️ Character filters provided: {', '.join(filter_character_list_to_pass)} (Subfolder rules may differ or not apply).") + + # Manga mode warning if no character filter is provided (as filter is used for naming/folder) if manga_mode and not filter_character_list_to_pass and not extract_links_only: msg_box = QMessageBox(self) msg_box.setIcon(QMessageBox.Warning) msg_box.setWindowTitle("Manga Mode Filter Warning") msg_box.setText( - "Manga Mode is enabled, but the 'Filter by Character(s)' field is empty.\n\n" - "For best results (correct file naming and grouping), please enter the exact Manga/Series title " - "(as used by the creator on the site) into the filter field.\n\n" - "Do you want to proceed without a filter (file names might be generic) or cancel?" + "Manga Mode is enabled, but 'Filter by Character(s)' is empty.\n\n" + "For best results (correct file naming and folder organization if subfolders are on), " + "please enter the Manga/Series title into the filter field.\n\n" + "Proceed without a filter (names might be generic, folder might be less specific)?" ) proceed_button = msg_box.addButton("Proceed Anyway", QMessageBox.AcceptRole) cancel_button = msg_box.addButton("Cancel Download", QMessageBox.RejectRole) - msg_box.exec_() - - if msg_box.clickedButton() == cancel_button: - self.log_signal.emit("❌ Download cancelled by user due to Manga Mode filter warning.") - return - else: + if msg_box.clickedButton() == cancel_button: # If user cancels + self.log_signal.emit("❌ Download cancelled due to Manga Mode filter warning."); return + else: # User proceeds self.log_signal.emit("⚠️ Proceeding with Manga Mode without a specific title filter.") - custom_folder_name_cleaned = None - if use_subfolders and post_id_from_url and self.custom_folder_widget.isVisible() and not extract_links_only: - raw_custom_name = self.custom_folder_input.text().strip() - if raw_custom_name: - cleaned_custom = clean_folder_name(raw_custom_name) - if cleaned_custom: custom_folder_name_cleaned = cleaned_custom - else: self.log_signal.emit(f"⚠️ Invalid custom folder name ignored: '{raw_custom_name}'") + # Custom folder name for single post downloads + custom_folder_name_cleaned = None # Initialize + # Check if custom folder input is relevant and visible + if use_subfolders and post_id_from_url and self.custom_folder_widget and self.custom_folder_widget.isVisible() and not extract_links_only: + raw_custom_name = self.custom_folder_input.text().strip() # Get raw custom folder name + if raw_custom_name: # If a name was provided + cleaned_custom = clean_folder_name(raw_custom_name) # Clean it for folder usage + if cleaned_custom: custom_folder_name_cleaned = cleaned_custom # Use if valid + else: self.log_signal.emit(f"⚠️ Invalid custom folder name ignored: '{raw_custom_name}' (resulted in empty string after cleaning).") - self.main_log_output.clear() - if extract_links_only: - self.main_log_output.append("🔗 Extracting Links...") - if self.external_log_output: self.external_log_output.clear() - elif self.show_external_links: - self.external_log_output.clear() + + # --- Clear logs and reset progress counters --- + self.main_log_output.clear() # Clear main log + if extract_links_only: self.main_log_output.append("🔗 Extracting Links..."); # Initial message for links mode + elif backend_filter_mode == 'archive': self.main_log_output.append("📦 Downloading Archives Only...") # Log for new archive mode + + if self.external_log_output: self.external_log_output.clear() # Clear external log + # Show external log title only if it's relevant for the current mode and setting + if self.show_external_links and not extract_links_only and backend_filter_mode != 'archive': self.external_log_output.append("🔗 External Links Found:") - self.file_progress_label.setText("") - self.cancellation_event.clear() - self.active_futures = [] - self.total_posts_to_process = self.processed_posts_count = self.download_counter = self.skip_counter = 0 - self.progress_label.setText("Progress: Initializing...") + + self.file_progress_label.setText(""); self.cancellation_event.clear(); self.active_futures = [] # Reset progress and cancellation + self.total_posts_to_process = self.processed_posts_count = self.download_counter = self.skip_counter = 0 # Reset counters + self.progress_label.setText("Progress: Initializing...") # Initial progress message - log_messages = [ - "="*40, f"🚀 Starting {'Link Extraction' if extract_links_only else 'Download'} @ {time.strftime('%Y-%m-%d %H:%M:%S')}", - f" URL: {api_url}", - ] - if not extract_links_only: - log_messages.append(f" Save Location: {output_dir}") + # Determine effective number of threads for posts and files based on settings + effective_num_post_workers = 1 # Default for single post or non-multithreaded creator feed + effective_num_file_threads_per_worker = 1 # Default number of file download threads per worker + + if post_id_from_url: # Single post URL + if use_multithreading_enabled_by_checkbox: # Use GUI thread count for file downloads for this single post + effective_num_file_threads_per_worker = max(1, min(num_threads_from_gui, MAX_FILE_THREADS_PER_POST_OR_WORKER)) + else: # Creator feed URL + if use_multithreading_enabled_by_checkbox: # If multithreading is enabled for creator feed + effective_num_post_workers = max(1, min(num_threads_from_gui, MAX_THREADS)) # For concurrent post processing + # The same GUI thread count is also used as the *max* for files per worker, capped appropriately + effective_num_file_threads_per_worker = max(1, min(num_threads_from_gui, MAX_FILE_THREADS_PER_POST_OR_WORKER)) - log_messages.append(f" Mode: {'Single Post' if post_id_from_url else 'Creator Feed'}") - if is_creator_feed: - if manga_mode: - log_messages.append(" Page Range: All (Manga Mode - Oldest Posts Processed First)") - else: - pr_log = "All" - if start_page or end_page: - pr_log = f"{f'From {start_page} ' if start_page else ''}{'to ' if start_page and end_page else ''}{f'{end_page}' if end_page else (f'Up to {end_page}' if end_page else (f'From {start_page}' if start_page else 'Specific Range'))}".strip() - log_messages.append(f" Page Range: {pr_log if pr_log else 'All'}") + # --- Log initial download parameters to the main log --- + log_messages = ["="*40, f"🚀 Starting {'Link Extraction' if extract_links_only else ('Archive Download' if backend_filter_mode == 'archive' else 'Download')} @ {time.strftime('%Y-%m-%d %H:%M:%S')}", f" URL: {api_url}"] + if not extract_links_only: log_messages.append(f" Save Location: {output_dir}") + + if post_id_from_url: # Logging for Single Post download + log_messages.append(f" Mode: Single Post") + log_messages.append(f" ↳ File Downloads: Up to {effective_num_file_threads_per_worker} concurrent file(s)") + else: # Logging for Creator Feed download + log_messages.append(f" Mode: Creator Feed") + log_messages.append(f" Post Processing: {'Multi-threaded (' + str(effective_num_post_workers) + ' workers)' if effective_num_post_workers > 1 else 'Single-threaded (1 worker)'}") + log_messages.append(f" ↳ File Downloads per Worker: Up to {effective_num_file_threads_per_worker} concurrent file(s)") + if is_creator_feed: # Only log page range for creator feeds + if manga_mode: log_messages.append(" Page Range: All (Manga Mode - Oldest Posts Processed First)") + else: # Construct a readable page range string for logging + pr_log = "All" # Default if no pages specified + if start_page or end_page: + pr_log = f"{f'From {start_page} ' if start_page else ''}{'to ' if start_page and end_page else ''}{f'{end_page}' if end_page else (f'Up to {end_page}' if end_page else (f'From {start_page}' if start_page else 'Specific Range'))}".strip() + log_messages.append(f" Page Range: {pr_log if pr_log else 'All'}") - if not extract_links_only: + + if not extract_links_only: # Settings relevant to file downloading log_messages.append(f" Subfolders: {'Enabled' if use_subfolders else 'Disabled'}") - if use_subfolders: + if use_subfolders: # Log subfolder naming details if custom_folder_name_cleaned: log_messages.append(f" Custom Folder (Post): '{custom_folder_name_cleaned}'") elif filter_character_list_to_pass and not post_id_from_url: log_messages.append(f" Character Filters for Folders: {', '.join(filter_character_list_to_pass)}") else: log_messages.append(f" Folder Naming: Automatic (based on title/known names)") log_messages.append(f" Subfolder per Post: {'Enabled' if use_post_subfolders else 'Disabled'}") log_messages.extend([ - # --- MODIFIED LOGGING FOR FILTER MODE --- f" File Type Filter: {user_selected_filter_text} (Backend processing as: {backend_filter_mode})", - # --- END MODIFIED LOGGING --- - f" Skip Archives: {'.zip' if skip_zip else ''}{', ' if skip_zip and skip_rar else ''}{'.rar' if skip_rar else ''}{'None' if not (skip_zip or skip_rar) else ''}", + f" Skip Archives: {'.zip' if effective_skip_zip else ''}{', ' if effective_skip_zip and effective_skip_rar else ''}{'.rar' if effective_skip_rar else ''}{'None (Archive Mode)' if backend_filter_mode == 'archive' else ('None' if not (effective_skip_zip or effective_skip_rar) else '')}", # Clarify for archive mode f" Skip Words (posts/files): {', '.join(skip_words_list) if skip_words_list else 'None'}", + f" Skip Words Scope: {current_skip_words_scope.capitalize()}", f" Compress Images: {'Enabled' if compress_images else 'Disabled'}", - f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}", + f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}" ]) - else: - log_messages.append(f" Mode: Extracting Links Only") # This handles the "Only Links" case + else: # Link extraction mode logging + log_messages.append(f" Mode: Extracting Links Only") - log_messages.append(f" Show External Links: {'Enabled' if self.show_external_links else 'Disabled'}") - if manga_mode: log_messages.append(f" Manga Mode (File Renaming by Post Title): Enabled") + # Log external links setting (relevant unless in "Only Links" or "Only Archives" mode where it's forced off) + log_messages.append(f" Show External Links: {'Enabled' if self.show_external_links and not extract_links_only and backend_filter_mode != 'archive' else 'Disabled'}") + + if manga_mode: # Manga mode specific logs + log_messages.append(f" Manga Mode (File Renaming by Post Title): Enabled") + log_messages.append(f" ↳ Manga Filename Style: {'Post Title Based' if self.manga_filename_style == STYLE_POST_TITLE else 'Original File Name'}") - should_use_multithreading = use_multithreading and not post_id_from_url - log_messages.append(f" Threading: {'Multi-threaded (posts)' if should_use_multithreading else 'Single-threaded (posts)'}") - if should_use_multithreading: log_messages.append(f" Number of Post Worker Threads: {num_threads}") - log_messages.append("="*40) - for msg in log_messages: self.log_signal.emit(msg) + # Determine if multithreading for posts is actually used for logging + # It's used if checkbox is checked AND it's a creator feed (not single post) + should_use_multithreading_for_posts = use_multithreading_enabled_by_checkbox and not post_id_from_url + log_messages.append(f" Threading: {'Multi-threaded (posts)' if should_use_multithreading_for_posts else 'Single-threaded (posts)'}") + if should_use_multithreading_for_posts: # Log number of post workers only if actually using them + log_messages.append(f" Number of Post Worker Threads: {effective_num_post_workers}") + log_messages.append("="*40) # End of parameter logging + for msg in log_messages: self.log_signal.emit(msg) # Emit all log messages - self.set_ui_enabled(False) - - unwanted_keywords_for_folders = {'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'} + # --- Disable UI and prepare for download --- + self.set_ui_enabled(False) # Disable UI elements during download + unwanted_keywords_for_folders = {'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'} # Example set of keywords to avoid in folder names + + # --- Prepare arguments dictionary for backend thread/worker --- + # This template holds all possible arguments that might be needed by either single or multi-threaded download logic args_template = { 'api_url_input': api_url, - 'download_root': output_dir, - 'output_dir': output_dir, - 'known_names': list(KNOWN_NAMES), - 'known_names_copy': list(KNOWN_NAMES), + 'download_root': output_dir, # Used by PostProcessorWorker if it creates folders + 'output_dir': output_dir, # Passed to DownloadThread for consistency (though it might use download_root) + 'known_names': list(KNOWN_NAMES), # Pass a copy of the current known names + 'known_names_copy': list(KNOWN_NAMES), # Legacy, ensure it's there if used by older parts of backend 'filter_character_list': filter_character_list_to_pass, - # --- MODIFIED: Pass the correct backend_filter_mode --- - 'filter_mode': backend_filter_mode, - # --- END MODIFICATION --- - 'skip_zip': skip_zip, 'skip_rar': skip_rar, - 'use_subfolders': use_subfolders, 'use_post_subfolders': use_post_subfolders, - 'compress_images': compress_images, 'download_thumbnails': download_thumbnails, - 'service': service, 'user_id': user_id, - 'downloaded_files': self.downloaded_files, - 'downloaded_files_lock': self.downloaded_files_lock, - 'downloaded_file_hashes': self.downloaded_file_hashes, - 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, + 'filter_mode': backend_filter_mode, # 'all', 'image', 'video', or 'archive' + 'skip_zip': effective_skip_zip, # Use the determined effective value based on mode + 'skip_rar': effective_skip_rar, # Use the determined effective value based on mode + 'use_subfolders': use_subfolders, + 'use_post_subfolders': use_post_subfolders, + 'compress_images': compress_images, + 'download_thumbnails': download_thumbnails, + 'service': service, # Extracted from URL + 'user_id': user_id, # Extracted from URL + 'downloaded_files': self.downloaded_files, # Pass shared set for session-based skip + 'downloaded_files_lock': self.downloaded_files_lock, # Pass shared lock + 'downloaded_file_hashes': self.downloaded_file_hashes, # Pass shared set for hash-based skip + 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, # Pass shared lock 'skip_words_list': skip_words_list, - 'show_external_links': self.show_external_links, - 'extract_links_only': extract_links_only, - 'start_page': start_page, - 'end_page': end_page, - 'target_post_id_from_initial_url': post_id_from_url, - 'custom_folder_name': custom_folder_name_cleaned, - 'manga_mode_active': manga_mode, - 'unwanted_keywords': unwanted_keywords_for_folders, - 'cancellation_event': self.cancellation_event, - 'signals': self.worker_signals, + 'skip_words_scope': current_skip_words_scope, + 'show_external_links': self.show_external_links, # For worker to know if it should emit external_link_signal + 'extract_links_only': extract_links_only, # For worker to know if it should only extract links + 'start_page': start_page, # Validated start page + 'end_page': end_page, # Validated end page + 'target_post_id_from_initial_url': post_id_from_url, # The specific post ID if a single post URL was given + 'custom_folder_name': custom_folder_name_cleaned, # Cleaned custom folder name for single post + 'manga_mode_active': manga_mode, # Flag for manga mode + 'unwanted_keywords': unwanted_keywords_for_folders, # For folder naming logic in worker + 'cancellation_event': self.cancellation_event, # Shared cancellation event for all threads/workers + 'signals': self.worker_signals, # Signals object for PostProcessorWorker instances to communicate back to GUI + 'manga_filename_style': self.manga_filename_style, # Current manga filename style + # Pass the effective number of file threads for the worker/post processor to use internally + 'num_file_threads_for_worker': effective_num_file_threads_per_worker } - + # --- Start download (single-threaded for posts or multi-threaded for posts) --- try: - if should_use_multithreading: - self.log_signal.emit(f" Initializing multi-threaded {'link extraction' if extract_links_only else 'download'} with {num_threads} post workers...") - self.start_multi_threaded_download(num_post_workers=num_threads, **args_template) - else: + if should_use_multithreading_for_posts: # Multi-threaded for posts (creator feed with multithreading enabled) + self.log_signal.emit(f" Initializing multi-threaded {'link extraction' if extract_links_only else 'download'} with {effective_num_post_workers} post workers...") + self.start_multi_threaded_download(num_post_workers=effective_num_post_workers, **args_template) + else: # Single-threaded for posts (either single post URL or creator feed with multithreading off) self.log_signal.emit(f" Initializing single-threaded {'link extraction' if extract_links_only else 'download'}...") + # Define keys expected by BackendDownloadThread constructor for clarity and to avoid passing unexpected args dt_expected_keys = [ 'api_url_input', 'output_dir', 'known_names_copy', 'cancellation_event', 'filter_character_list', 'filter_mode', 'skip_zip', 'skip_rar', 'use_subfolders', 'use_post_subfolders', 'custom_folder_name', 'compress_images', 'download_thumbnails', 'service', 'user_id', - 'downloaded_files', 'downloaded_file_hashes', 'downloaded_files_lock', - 'downloaded_file_hashes_lock', 'skip_words_list', 'show_external_links', - 'extract_links_only', - 'num_file_threads_for_worker', 'skip_current_file_flag', + 'downloaded_files', 'downloaded_file_hashes', + 'downloaded_files_lock', 'downloaded_file_hashes_lock', + 'skip_words_list', 'skip_words_scope', 'show_external_links', 'extract_links_only', + 'num_file_threads_for_worker', # This is for the PostProcessorWorker that BackendDownloadThread might create + 'skip_current_file_flag', # Event for skipping a single file (if feature existed) 'start_page', 'end_page', 'target_post_id_from_initial_url', - 'manga_mode_active', 'unwanted_keywords' + 'manga_mode_active', 'unwanted_keywords', 'manga_filename_style' ] - args_template['num_file_threads_for_worker'] = 1 - args_template['skip_current_file_flag'] = None - - single_thread_args = {} - for key in dt_expected_keys: - if key in args_template: - single_thread_args[key] = args_template[key] - - self.start_single_threaded_download(**single_thread_args) - - except Exception as e: - self.log_signal.emit(f"❌ CRITICAL ERROR preparing {'link extraction' if extract_links_only else 'download'}: {e}\n{traceback.format_exc()}") + # For single threaded (post) download, the 'num_file_threads_for_worker' from args_template + # will be used by the PostProcessorWorker if it needs to download multiple files for that single post. + args_template['skip_current_file_flag'] = None # Ensure this is explicitly set (or passed if it were a feature) + # Filter args_template to only include keys expected by BackendDownloadThread constructor + single_thread_args = {key: args_template[key] for key in dt_expected_keys if key in args_template} + self.start_single_threaded_download(**single_thread_args) # Start the single download thread + except Exception as e: # Catch any errors during the preparation/start of download + self.log_signal.emit(f"❌ CRITICAL ERROR preparing download: {e}\n{traceback.format_exc()}") QMessageBox.critical(self, "Start Error", f"Failed to start process:\n{e}") - self.download_finished(0,0,False) + self.download_finished(0,0,False, []) # Ensure UI is re-enabled and state is reset def start_single_threaded_download(self, **kwargs): - global BackendDownloadThread + """Starts the download process in a single QThread (BackendDownloadThread). + This thread handles post fetching and then processes each post sequentially (though file downloads within a post can be multi-threaded by PostProcessorWorker). + """ + global BackendDownloadThread # The class imported from downloader_utils try: - self.download_thread = BackendDownloadThread(**kwargs) + self.download_thread = BackendDownloadThread(**kwargs) # Instantiate with all necessary arguments + # Connect signals from the backend thread to GUI handler methods + if hasattr(self.download_thread, 'progress_signal'): self.download_thread.progress_signal.connect(self.handle_main_log) + if hasattr(self.download_thread, 'add_character_prompt_signal'): self.download_thread.add_character_prompt_signal.connect(self.add_character_prompt_signal) + if hasattr(self.download_thread, 'finished_signal'): self.download_thread.finished_signal.connect(self.download_finished) + # For character prompt response flowing back from GUI to the backend thread + if hasattr(self.download_thread, 'receive_add_character_result'): self.character_prompt_response_signal.connect(self.download_thread.receive_add_character_result) + if hasattr(self.download_thread, 'external_link_signal'): self.download_thread.external_link_signal.connect(self.handle_external_link_signal) + if hasattr(self.download_thread, 'file_progress_signal'): self.download_thread.file_progress_signal.connect(self.update_file_progress_display) - if hasattr(self.download_thread, 'progress_signal'): - self.download_thread.progress_signal.connect(self.handle_main_log) - if hasattr(self.download_thread, 'add_character_prompt_signal'): - self.download_thread.add_character_prompt_signal.connect(self.add_character_prompt_signal) - if hasattr(self.download_thread, 'finished_signal'): - self.download_thread.finished_signal.connect(self.finished_signal) - if hasattr(self.download_thread, 'receive_add_character_result'): - self.character_prompt_response_signal.connect(self.download_thread.receive_add_character_result) - if hasattr(self.download_thread, 'external_link_signal'): - self.download_thread.external_link_signal.connect(self.handle_external_link_signal) - if hasattr(self.download_thread, 'file_progress_signal'): - self.download_thread.file_progress_signal.connect(self.update_file_progress_display) - - self.download_thread.start() + self.download_thread.start() # Start the QThread self.log_signal.emit("✅ Single download thread (for posts) started.") - except Exception as e: + except Exception as e: # Catch errors during thread instantiation or start self.log_signal.emit(f"❌ CRITICAL ERROR starting single-thread: {e}\n{traceback.format_exc()}") QMessageBox.critical(self, "Thread Start Error", f"Failed to start download process: {e}") - self.download_finished(0,0,False) + self.download_finished(0,0,False, []) # Ensure UI is re-enabled and state is reset def start_multi_threaded_download(self, num_post_workers, **kwargs): - global PostProcessorWorker - self.thread_pool = ThreadPoolExecutor(max_workers=num_post_workers, thread_name_prefix='PostWorker_') - self.active_futures = [] - self.processed_posts_count = 0 - self.total_posts_to_process = 0 - self.download_counter = 0 - self.skip_counter = 0 + """Starts the download process using a ThreadPoolExecutor for fetching and processing posts concurrently.""" + global PostProcessorWorker # The worker class from downloader_utils + # Ensure thread pool is created if it doesn't exist or was previously shut down + if self.thread_pool is None: + self.thread_pool = ThreadPoolExecutor(max_workers=num_post_workers, thread_name_prefix='PostWorker_') + + self.active_futures = [] # Reset list of active futures for this download run + # Reset progress counters for this run + self.processed_posts_count = 0; self.total_posts_to_process = 0; self.download_counter = 0; self.skip_counter = 0 + self.all_kept_original_filenames = [] # Reset list of kept original filenames for this run + # 'num_file_threads_for_worker' is already in kwargs from the main start_download logic. + # This will be passed to each PostProcessorWorker instance created by _fetch_and_queue_posts. + + # Start a separate Python thread (not QThread) to fetch post data and submit tasks to the pool. + # This prevents the GUI from freezing during the initial API calls to get all post data, + # especially for large creator feeds. fetcher_thread = threading.Thread( - target=self._fetch_and_queue_posts, - args=(kwargs['api_url_input'], kwargs, num_post_workers), - daemon=True, name="PostFetcher" + target=self._fetch_and_queue_posts, # Method to run in the new thread + args=(kwargs['api_url_input'], kwargs, num_post_workers), # Pass API URL, base args, and worker count + daemon=True, # Daemon thread will exit when the main application exits + name="PostFetcher" # Name for the thread (useful for debugging) ) - fetcher_thread.start() + fetcher_thread.start() # Start the fetcher thread self.log_signal.emit(f"✅ Post fetcher thread started. {num_post_workers} post worker threads initializing...") def _fetch_and_queue_posts(self, api_url_input_for_fetcher, worker_args_template, num_post_workers): - global PostProcessorWorker, download_from_api - all_posts_data = [] - fetch_error_occurred = False + """ + (This method runs in a separate Python thread, not the main GUI thread) + Fetches all post data using download_from_api and submits each post as a task to the ThreadPoolExecutor. + """ + global PostProcessorWorker, download_from_api # Ensure access to these from downloader_utils + all_posts_data = [] # List to store all fetched post data + fetch_error_occurred = False # Flag to track if an error occurs during fetching + manga_mode_active_for_fetch = worker_args_template.get('manga_mode_active', False) # Get manga mode status - manga_mode_active_for_fetch = worker_args_template.get('manga_mode_active', False) + # Ensure signals object is available for workers (it's created in DownloaderApp.__init__) signals_for_worker = worker_args_template.get('signals') - if not signals_for_worker: - self.log_signal.emit("❌ CRITICAL ERROR: Signals object missing for worker in _fetch_and_queue_posts.") - self.finished_signal.emit(0,0,True) + if not signals_for_worker: # This should not happen if setup is correct + self.log_signal.emit("❌ CRITICAL ERROR: Signals object missing for worker in _fetch_and_queue_posts."); + self.finished_signal.emit(0,0,True, []); # Signal failure to GUI return - try: - self.log_signal.emit(" Fetching post data from API...") - post_generator = download_from_api( + try: # Fetch post data from API + self.log_signal.emit(" Fetching post data from API (this may take a moment for large feeds)...") + post_generator = download_from_api( # Call the API fetching function from downloader_utils api_url_input_for_fetcher, - logger=lambda msg: self.log_signal.emit(f"[Fetcher] {msg}"), + logger=lambda msg: self.log_signal.emit(f"[Fetcher] {msg}"), # Prefix fetcher logs for clarity start_page=worker_args_template.get('start_page'), end_page=worker_args_template.get('end_page'), - manga_mode=manga_mode_active_for_fetch, - cancellation_event=self.cancellation_event + manga_mode=manga_mode_active_for_fetch, # Pass manga mode for correct fetching order + cancellation_event=self.cancellation_event # Pass shared cancellation event ) - for posts_batch in post_generator: - if self.cancellation_event.is_set(): - fetch_error_occurred = True; self.log_signal.emit(" Post fetching cancelled by user."); break - if isinstance(posts_batch, list): - all_posts_data.extend(posts_batch) - self.total_posts_to_process = len(all_posts_data) - if self.total_posts_to_process > 0 and self.total_posts_to_process % 100 == 0 : - self.log_signal.emit(f" Fetched {self.total_posts_to_process} posts so far...") - else: - fetch_error_occurred = True - self.log_signal.emit(f"❌ API fetcher returned non-list type: {type(posts_batch)}"); break - if not fetch_error_occurred and not self.cancellation_event.is_set(): + for posts_batch in post_generator: # download_from_api yields batches of posts + if self.cancellation_event.is_set(): # Check for cancellation + fetch_error_occurred = True; self.log_signal.emit(" Post fetching cancelled by user."); break + if isinstance(posts_batch, list): # Ensure API returned a list + all_posts_data.extend(posts_batch) # Add fetched posts to the list + self.total_posts_to_process = len(all_posts_data) # Update total post count + # Log progress periodically for very large feeds to show activity + if self.total_posts_to_process > 0 and self.total_posts_to_process % 100 == 0 : # e.g., log every 100 posts + self.log_signal.emit(f" Fetched {self.total_posts_to_process} posts so far...") + else: # Should not happen if download_from_api is implemented correctly + fetch_error_occurred = True; self.log_signal.emit(f"❌ API fetcher returned non-list type: {type(posts_batch)}"); break + + if not fetch_error_occurred and not self.cancellation_event.is_set(): # If fetching completed without error/cancellation self.log_signal.emit(f"✅ Post fetching complete. Total posts to process: {self.total_posts_to_process}") - except TypeError as te: - self.log_signal.emit(f"❌ TypeError calling download_from_api: {te}") - self.log_signal.emit(" Check if 'downloader_utils.py' has the correct 'download_from_api' signature (including 'manga_mode' and 'cancellation_event').") - self.log_signal.emit(traceback.format_exc(limit=2)) - fetch_error_occurred = True - except RuntimeError as re: - self.log_signal.emit(f"ℹ️ Post fetching runtime error (likely cancellation): {re}") - fetch_error_occurred = True - except Exception as e: - self.log_signal.emit(f"❌ Error during post fetching: {e}\n{traceback.format_exc(limit=2)}") - fetch_error_occurred = True + + except TypeError as te: # Error in calling download_from_api (e.g., wrong arguments) + self.log_signal.emit(f"❌ TypeError calling download_from_api: {te}\n Check 'downloader_utils.py' signature.\n{traceback.format_exc(limit=2)}"); fetch_error_occurred = True + except RuntimeError as re_err: # Typically from cancellation within fetch_posts_paginated or API errors + self.log_signal.emit(f"ℹ️ Post fetching runtime error (likely cancellation or API issue): {re_err}"); fetch_error_occurred = True + except Exception as e: # Other unexpected errors during fetching + self.log_signal.emit(f"❌ Error during post fetching: {e}\n{traceback.format_exc(limit=2)}"); fetch_error_occurred = True + if self.cancellation_event.is_set() or fetch_error_occurred: - self.finished_signal.emit(self.download_counter, self.skip_counter, self.cancellation_event.is_set()) - if self.thread_pool: - self.thread_pool.shutdown(wait=False, cancel_futures=True); self.thread_pool = None + # If fetching was cancelled or failed, signal completion to GUI and clean up thread pool + self.finished_signal.emit(self.download_counter, self.skip_counter, self.cancellation_event.is_set(), self.all_kept_original_filenames) + if self.thread_pool: self.thread_pool.shutdown(wait=False, cancel_futures=True); self.thread_pool = None # Don't wait if already cancelling return - if self.total_posts_to_process == 0: - self.log_signal.emit("😕 No posts found or fetched to process.") - self.finished_signal.emit(0,0,False); return + if self.total_posts_to_process == 0: # No posts found or fetched + self.log_signal.emit("😕 No posts found or fetched to process."); + self.finished_signal.emit(0,0,False, []); # Signal completion with zero counts + return + # --- Submit fetched posts to the thread pool for processing --- self.log_signal.emit(f" Submitting {self.total_posts_to_process} post processing tasks to thread pool...") - self.processed_posts_count = 0 - self.overall_progress_signal.emit(self.total_posts_to_process, 0) + self.processed_posts_count = 0 # Reset counter for this run + self.overall_progress_signal.emit(self.total_posts_to_process, 0) # Update GUI progress bar/label + + # 'num_file_threads_for_worker' should be in worker_args_template from start_download, + # this is the number of file download threads each PostProcessorWorker will use. + num_file_dl_threads_for_each_worker = worker_args_template.get('num_file_threads_for_worker', 1) - num_file_dl_threads = 4 + # Define keys expected by PostProcessorWorker constructor for clarity and safety when preparing arguments ppw_expected_keys = [ - 'post_data', 'download_root', 'known_names', 'filter_character_list', - 'unwanted_keywords', 'filter_mode', 'skip_zip', 'skip_rar', - 'use_subfolders', 'use_post_subfolders', 'target_post_id_from_initial_url', - 'custom_folder_name', 'compress_images', 'download_thumbnails', 'service', - 'user_id', 'api_url_input', 'cancellation_event', 'signals', - 'downloaded_files', 'downloaded_file_hashes', 'downloaded_files_lock', - 'downloaded_file_hashes_lock', 'skip_words_list', 'show_external_links', - 'extract_links_only', 'num_file_threads', 'skip_current_file_flag', - 'manga_mode_active' + 'post_data', 'download_root', 'known_names', 'filter_character_list', 'unwanted_keywords', + 'filter_mode', 'skip_zip', 'skip_rar', 'use_subfolders', 'use_post_subfolders', + 'target_post_id_from_initial_url', 'custom_folder_name', 'compress_images', + 'download_thumbnails', 'service', 'user_id', 'api_url_input', + 'cancellation_event', 'signals', 'downloaded_files', 'downloaded_file_hashes', + 'downloaded_files_lock', 'downloaded_file_hashes_lock', + 'skip_words_list', 'skip_words_scope', 'show_external_links', 'extract_links_only', + 'num_file_threads', # This will be num_file_dl_threads_for_each_worker for the worker's internal pool + 'skip_current_file_flag', # Event for skipping a single file within a worker (if feature existed) + 'manga_mode_active', 'manga_filename_style' ] + # Keys that are optional for PostProcessorWorker or have defaults defined there ppw_optional_keys_with_defaults = { - 'skip_words_list', 'show_external_links', 'extract_links_only', - 'num_file_threads', 'skip_current_file_flag', 'manga_mode_active' + 'skip_words_list', 'skip_words_scope', 'show_external_links', 'extract_links_only', + 'num_file_threads', 'skip_current_file_flag', 'manga_mode_active', 'manga_filename_style' + # Note: 'unwanted_keywords' also has a default in the worker if not provided in args } - for post_data_item in all_posts_data: - if self.cancellation_event.is_set(): break - if not isinstance(post_data_item, dict): - self.log_signal.emit(f"⚠️ Skipping invalid post data item (not a dict): {type(post_data_item)}") - self.processed_posts_count += 1 + for post_data_item in all_posts_data: # Iterate through each fetched post data + if self.cancellation_event.is_set(): break # Stop submitting new tasks if cancellation is requested + if not isinstance(post_data_item, dict): # Sanity check on post data type + self.log_signal.emit(f"⚠️ Skipping invalid post data item (not a dict): {type(post_data_item)}"); + self.processed_posts_count += 1; # Count as processed to not hang progress if this happens continue - worker_init_args = {} - missing_keys = [] - for key in ppw_expected_keys: - if key == 'post_data': worker_init_args[key] = post_data_item - elif key == 'num_file_threads': worker_init_args[key] = num_file_dl_threads - elif key == 'signals': worker_init_args[key] = signals_for_worker - elif key in worker_args_template: worker_init_args[key] = worker_args_template[key] - elif key in ppw_optional_keys_with_defaults: pass - else: missing_keys.append(key) + # Prepare arguments for this specific PostProcessorWorker instance + worker_init_args = {}; missing_keys = [] # To store args for worker and track any missing ones + for key in ppw_expected_keys: # Iterate through expected keys for the worker + if key == 'post_data': worker_init_args[key] = post_data_item # Set the current post's data + elif key == 'num_file_threads': worker_init_args[key] = num_file_dl_threads_for_each_worker # Set file threads for this worker + elif key == 'signals': worker_init_args[key] = signals_for_worker # Use the shared signals object for this batch of workers + elif key in worker_args_template: worker_init_args[key] = worker_args_template[key] # Get from template if available + elif key in ppw_optional_keys_with_defaults: pass # Worker has a default, so no need to pass if not in template + else: missing_keys.append(key) # Should not happen if ppw_expected_keys is correct and covers all mandatory args + if missing_keys: # If any mandatory arguments are missing + self.log_signal.emit(f"❌ CRITICAL ERROR: Missing keys for PostProcessorWorker: {', '.join(missing_keys)}"); + self.cancellation_event.set(); break # Stop everything if critical args are missing - if missing_keys: - self.log_signal.emit(f"❌ CRITICAL ERROR: Missing expected keys for PostProcessorWorker: {', '.join(missing_keys)}") - self.cancellation_event.set() - break + try: # Submit the worker task to the thread pool + worker_instance = PostProcessorWorker(**worker_init_args) # Create worker instance + if self.thread_pool: # Ensure pool still exists and is active + future = self.thread_pool.submit(worker_instance.process) # Submit the worker's process method as a task + future.add_done_callback(self._handle_future_result) # Add callback for when this task finishes + self.active_futures.append(future) # Keep track of the submitted future + else: # Pool was shut down or never created (should not happen if logic is correct) + self.log_signal.emit("⚠️ Thread pool not available. Cannot submit more tasks."); break + except TypeError as te: self.log_signal.emit(f"❌ TypeError creating PostProcessorWorker: {te}\n Passed Args: [{', '.join(sorted(worker_init_args.keys()))}]\n{traceback.format_exc(limit=5)}"); self.cancellation_event.set(); break + except RuntimeError: self.log_signal.emit("⚠️ Runtime error submitting task (pool likely shutting down)."); break + except Exception as e: self.log_signal.emit(f"❌ Error submitting post {post_data_item.get('id','N/A')} to worker: {e}"); break - try: - worker_instance = PostProcessorWorker(**worker_init_args) - if self.thread_pool: - future = self.thread_pool.submit(worker_instance.process) - future.add_done_callback(self._handle_future_result) - self.active_futures.append(future) - else: - self.log_signal.emit("⚠️ Thread pool not available. Cannot submit more tasks.") - break - except TypeError as te: - self.log_signal.emit(f"❌ TypeError creating PostProcessorWorker: {te}") - passed_keys_str = ", ".join(sorted(worker_init_args.keys())) - self.log_signal.emit(f" Passed Args: [{passed_keys_str}]") - self.log_signal.emit(traceback.format_exc(limit=5)) - self.cancellation_event.set(); break - except RuntimeError: - self.log_signal.emit("⚠️ Runtime error submitting task (pool likely shutting down)."); break - except Exception as e: - self.log_signal.emit(f"❌ Error submitting post {post_data_item.get('id','N/A')} to worker: {e}"); break - - if not self.cancellation_event.is_set(): - self.log_signal.emit(f" {len(self.active_futures)} post processing tasks submitted to pool.") + if not self.cancellation_event.is_set(): self.log_signal.emit(f" {len(self.active_futures)} post processing tasks submitted to pool.") else: - self.finished_signal.emit(self.download_counter, self.skip_counter, True) - if self.thread_pool: - self.thread_pool.shutdown(wait=False, cancel_futures=True); self.thread_pool = None - + self.finished_signal.emit(self.download_counter, self.skip_counter, True, self.all_kept_original_filenames) + if self.thread_pool: self.thread_pool.shutdown(wait=False, cancel_futures=True); self.thread_pool = None def _handle_future_result(self, future: Future): self.processed_posts_count += 1 - downloaded_files_from_future = 0 - skipped_files_from_future = 0 + downloaded_files_from_future, skipped_files_from_future = 0, 0 + kept_originals_from_future = [] try: - if future.cancelled(): - self.log_signal.emit(" A post processing task was cancelled.") - elif future.exception(): - worker_exception = future.exception() - self.log_signal.emit(f"❌ Post processing worker error: {worker_exception}") - else: # Success - downloaded_files_from_future, skipped_files_from_future = future.result() + if future.cancelled(): self.log_signal.emit(" A post processing task was cancelled.") + elif future.exception(): self.log_signal.emit(f"❌ Post processing worker error: {future.exception()}") + else: + downloaded_files_from_future, skipped_files_from_future, kept_originals_from_future = future.result() with self.downloaded_files_lock: - self.download_counter += downloaded_files_from_future - self.skip_counter += skipped_files_from_future + self.download_counter += downloaded_files_from_future + self.skip_counter += skipped_files_from_future + + if kept_originals_from_future: + self.all_kept_original_filenames.extend(kept_originals_from_future) self.overall_progress_signal.emit(self.total_posts_to_process, self.processed_posts_count) - - except Exception as e: - self.log_signal.emit(f"❌ Error in _handle_future_result callback: {e}\n{traceback.format_exc(limit=2)}") + except Exception as e: self.log_signal.emit(f"❌ Error in _handle_future_result callback: {e}\n{traceback.format_exc(limit=2)}") if self.total_posts_to_process > 0 and self.processed_posts_count >= self.total_posts_to_process: - all_done = all(f.done() for f in self.active_futures) - if all_done: + if all(f.done() for f in self.active_futures): QApplication.processEvents() self.log_signal.emit("🏁 All submitted post tasks have completed or failed.") - self.finished_signal.emit(self.download_counter, self.skip_counter, self.cancellation_event.is_set()) - + self.finished_signal.emit(self.download_counter, self.skip_counter, self.cancellation_event.is_set(), self.all_kept_original_filenames) def set_ui_enabled(self, enabled): - widgets_to_toggle = [ - self.download_btn, self.link_input, - self.radio_all, self.radio_images, self.radio_videos, self.radio_only_links, - self.skip_zip_checkbox, self.skip_rar_checkbox, - self.use_subfolders_checkbox, self.compress_images_checkbox, - self.download_thumbnails_checkbox, self.use_multithreading_checkbox, - self.skip_words_input, self.character_search_input, self.new_char_input, - self.add_char_button, self.delete_char_button, - self.start_page_input, self.end_page_input, self.page_range_label, self.to_label, - self.character_input, self.custom_folder_input, self.custom_folder_label, - self.reset_button, - self.manga_mode_checkbox + widgets_to_toggle = [ self.download_btn, self.link_input, self.radio_all, self.radio_images, self.radio_videos, self.radio_only_links, + self.skip_zip_checkbox, self.skip_rar_checkbox, self.use_subfolders_checkbox, self.compress_images_checkbox, + self.download_thumbnails_checkbox, self.use_multithreading_checkbox, self.skip_words_input, self.character_search_input, + self.new_char_input, self.add_char_button, self.delete_char_button, self.start_page_input, self.end_page_input, + self.page_range_label, self.to_label, self.character_input, self.custom_folder_input, self.custom_folder_label, + self.reset_button, self.manga_mode_checkbox, self.manga_rename_toggle_button, + self.skip_scope_toggle_button # Ensure the new button is in this list ] + for widget in widgets_to_toggle: - if widget: - widget.setEnabled(enabled) + if widget: widget.setEnabled(enabled) + + if enabled: + # When re-enabling UI, ensure skip scope button is correctly enabled/disabled by _handle_filter_mode_change + self._handle_filter_mode_change(self.radio_group.checkedButton(), True) + # else: # When disabling, the loop above handles the skip_scope_toggle_button if self.external_links_checkbox: is_only_links = self.radio_only_links and self.radio_only_links.isChecked() - self.external_links_checkbox.setEnabled(not is_only_links) - if self.log_verbosity_button: - self.log_verbosity_button.setEnabled(True) + self.external_links_checkbox.setEnabled(enabled and not is_only_links) + if self.log_verbosity_button: self.log_verbosity_button.setEnabled(True) multithreading_currently_on = self.use_multithreading_checkbox.isChecked() self.thread_count_input.setEnabled(enabled and multithreading_currently_on) self.thread_count_label.setEnabled(enabled and multithreading_currently_on) - subfolders_currently_on = self.use_subfolders_checkbox.isChecked() self.use_subfolder_per_post_checkbox.setEnabled(enabled and subfolders_currently_on) self.cancel_btn.setEnabled(not enabled) if enabled: - self._handle_filter_mode_change(self.radio_group.checkedButton(), True) + # _handle_filter_mode_change is already called above, which should handle the button's enabled state self._handle_multithreading_toggle(multithreading_currently_on) - + self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False) def cancel_download(self): - if not self.cancel_btn.isEnabled() and not self.cancellation_event.is_set(): - self.log_signal.emit("ℹ️ No active download to cancel or already cancelling.") - return + if not self.cancel_btn.isEnabled() and not self.cancellation_event.is_set(): self.log_signal.emit("ℹ️ No active download to cancel or already cancelling."); return + self.log_signal.emit("⚠️ Requesting cancellation of download process..."); self.cancellation_event.set() + if self.download_thread and self.download_thread.isRunning(): self.download_thread.requestInterruption(); self.log_signal.emit(" Signaled single download thread to interrupt.") + if self.thread_pool: self.log_signal.emit(" Initiating immediate shutdown and cancellation of worker pool tasks..."); self.thread_pool.shutdown(wait=False, cancel_futures=True) + self.external_link_queue.clear(); self._is_processing_external_link_queue = False; self._current_link_post_title = None + self.cancel_btn.setEnabled(False); self.progress_label.setText("Progress: Cancelling..."); self.file_progress_label.setText("") - self.log_signal.emit("⚠️ Requesting cancellation of download process...") - self.cancellation_event.set() - - if self.download_thread and self.download_thread.isRunning(): - self.download_thread.requestInterruption() - self.log_signal.emit(" Signaled single download thread to interrupt.") - - if self.thread_pool: - self.log_signal.emit(" Initiating immediate shutdown and cancellation of worker pool tasks...") - self.thread_pool.shutdown(wait=False, cancel_futures=True) - - self.external_link_queue.clear() - self._is_processing_external_link_queue = False - self._current_link_post_title = None - - self.cancel_btn.setEnabled(False) - self.progress_label.setText("Progress: Cancelling...") - self.file_progress_label.setText("") + def download_finished(self, total_downloaded, total_skipped, cancelled_by_user, kept_original_names_list=None): + if kept_original_names_list is None: + kept_original_names_list = self.all_kept_original_filenames if hasattr(self, 'all_kept_original_filenames') else [] + if kept_original_names_list is None: + kept_original_names_list = [] - def download_finished(self, total_downloaded, total_skipped, cancelled_by_user): status_message = "Cancelled by user" if cancelled_by_user else "Finished" - self.log_signal.emit("="*40 + f"\n🏁 Download {status_message}!\n Summary: Downloaded Files={total_downloaded}, Skipped Files={total_skipped}\n" + "="*40) - self.progress_label.setText(f"{status_message}: {total_downloaded} downloaded, {total_skipped} skipped.") - self.file_progress_label.setText("") - if not cancelled_by_user: - self._try_process_next_external_link() + summary_log = "="*40 + summary_log += f"\n🏁 Download {status_message}!\n Summary: Downloaded Files={total_downloaded}, Skipped Files={total_skipped}\n" + summary_log += "="*40 + self.log_signal.emit(summary_log) + + if kept_original_names_list: + intro_msg = ( + HTML_PREFIX + + "

ℹ️ The following files from multi-file manga posts " + "(after the first file) kept their original names:

" + ) + self.log_signal.emit(intro_msg) + + html_list_items = "" + + self.log_signal.emit(HTML_PREFIX + html_list_items) + self.log_signal.emit("="*40) + + + self.progress_label.setText(f"{status_message}: {total_downloaded} downloaded, {total_skipped} skipped."); self.file_progress_label.setText("") + if not cancelled_by_user: self._try_process_next_external_link() if self.download_thread: try: if hasattr(self.download_thread, 'progress_signal'): self.download_thread.progress_signal.disconnect(self.handle_main_log) if hasattr(self.download_thread, 'add_character_prompt_signal'): self.download_thread.add_character_prompt_signal.disconnect(self.add_character_prompt_signal) - if hasattr(self.download_thread, 'finished_signal'): self.download_thread.finished_signal.disconnect(self.finished_signal) + if hasattr(self.download_thread, 'finished_signal'): self.download_thread.finished_signal.disconnect(self.download_finished) if hasattr(self.download_thread, 'receive_add_character_result'): self.character_prompt_response_signal.disconnect(self.download_thread.receive_add_character_result) if hasattr(self.download_thread, 'external_link_signal'): self.download_thread.external_link_signal.disconnect(self.handle_external_link_signal) if hasattr(self.download_thread, 'file_progress_signal'): self.download_thread.file_progress_signal.disconnect(self.update_file_progress_display) - except (TypeError, RuntimeError) as e: - self.log_signal.emit(f"ℹ️ Note during single-thread signal disconnection: {e}") + except (TypeError, RuntimeError) as e: self.log_signal.emit(f"ℹ️ Note during single-thread signal disconnection: {e}") self.download_thread = None - - if self.thread_pool: - self.log_signal.emit(" Ensuring worker thread pool is shut down...") - self.thread_pool.shutdown(wait=True, cancel_futures=True) - self.thread_pool = None + if self.thread_pool: self.log_signal.emit(" Ensuring worker thread pool is shut down..."); self.thread_pool.shutdown(wait=True, cancel_futures=True); self.thread_pool = None self.active_futures = [] - - self.set_ui_enabled(True) - self.cancel_btn.setEnabled(False) + self.set_ui_enabled(True); self.cancel_btn.setEnabled(False) def toggle_log_verbosity(self): self.basic_log_mode = not self.basic_log_mode - if self.basic_log_mode: - self.log_verbosity_button.setText("Show Full Log") - self.log_signal.emit("="*20 + " Basic Log Mode Enabled " + "="*20) - else: - self.log_verbosity_button.setText("Show Basic Log") - self.log_signal.emit("="*20 + " Full Log Mode Enabled " + "="*20) + if self.basic_log_mode: self.log_verbosity_button.setText("Show Full Log"); self.log_signal.emit("="*20 + " Basic Log Mode Enabled " + "="*20) + else: self.log_verbosity_button.setText("Show Basic Log"); self.log_signal.emit("="*20 + " Full Log Mode Enabled " + "="*20) def reset_application_state(self): - is_running = (self.download_thread and self.download_thread.isRunning()) or \ - (self.thread_pool is not None and any(not f.done() for f in self.active_futures if f is not None)) - if is_running: - QMessageBox.warning(self, "Reset Error", "Cannot reset while a download is in progress. Please cancel the download first.") - return + if self._is_download_active(): QMessageBox.warning(self, "Reset Error", "Cannot reset while a download is in progress. Please cancel first."); return + self.log_signal.emit("🔄 Resetting application state to defaults..."); self._reset_ui_to_defaults() + self.main_log_output.clear(); self.external_log_output.clear() + if self.show_external_links and not (self.radio_only_links and self.radio_only_links.isChecked()): self.external_log_output.append("🔗 External Links Found:") + self.external_link_queue.clear(); self.extracted_links_cache = []; self._is_processing_external_link_queue = False; self._current_link_post_title = None + self.progress_label.setText("Progress: Idle"); self.file_progress_label.setText("") - self.log_signal.emit("🔄 Resetting application state to defaults...") - self._reset_ui_to_defaults() - self.main_log_output.clear() - self.external_log_output.clear() - if self.show_external_links: - self.external_log_output.append("🔗 External Links Found:") + with self.downloaded_files_lock: count = len(self.downloaded_files); self.downloaded_files.clear(); + if count > 0: self.log_signal.emit(f" Cleared {count} downloaded filename(s) from session memory.") + with self.downloaded_file_hashes_lock: count = len(self.downloaded_file_hashes); self.downloaded_file_hashes.clear(); + if count > 0: self.log_signal.emit(f" Cleared {count} downloaded file hash(es) from session memory.") - self.external_link_queue.clear() - self.extracted_links_cache = [] - self._is_processing_external_link_queue = False - self._current_link_post_title = None + self.total_posts_to_process = 0; self.processed_posts_count = 0; self.download_counter = 0; self.skip_counter = 0 + self.all_kept_original_filenames = [] + self.cancellation_event.clear(); self.basic_log_mode = False + if self.log_verbosity_button: self.log_verbosity_button.setText("Show Basic Log") - self.progress_label.setText("Progress: Idle") - self.file_progress_label.setText("") + self.manga_filename_style = STYLE_POST_TITLE + self.settings.setValue(MANGA_FILENAME_STYLE_KEY, self.manga_filename_style) + + self.skip_words_scope = SKIP_SCOPE_FILES # Reset to default "Files" + self.settings.setValue(SKIP_WORDS_SCOPE_KEY, self.skip_words_scope) + self._update_skip_scope_button_text() # Update button text - with self.downloaded_files_lock: - count = len(self.downloaded_files) - self.downloaded_files.clear() - if count > 0: self.log_signal.emit(f" Cleared {count} downloaded filename(s) from session memory.") - with self.downloaded_file_hashes_lock: - count = len(self.downloaded_file_hashes) - self.downloaded_file_hashes.clear() - if count > 0: self.log_signal.emit(f" Cleared {count} downloaded file hash(es) from session memory.") - - self.total_posts_to_process = 0 - self.processed_posts_count = 0 - self.download_counter = 0 - self.skip_counter = 0 - - self.cancellation_event.clear() - - self.basic_log_mode = False - if self.log_verbosity_button: - self.log_verbosity_button.setText("Show Basic Log") + self.settings.sync() + self._update_manga_filename_style_button_text() + self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False) self.log_signal.emit("✅ Application reset complete.") - def _reset_ui_to_defaults(self): - self.link_input.clear() - self.dir_input.clear() - self.custom_folder_input.clear() - self.character_input.clear() - self.skip_words_input.clear() - self.start_page_input.clear() - self.end_page_input.clear() - self.new_char_input.clear() - self.character_search_input.clear() - self.thread_count_input.setText("4") - - self.radio_all.setChecked(True) - self.skip_zip_checkbox.setChecked(True) - self.skip_rar_checkbox.setChecked(True) - self.download_thumbnails_checkbox.setChecked(False) - self.compress_images_checkbox.setChecked(False) - self.use_subfolders_checkbox.setChecked(True) - self.use_subfolder_per_post_checkbox.setChecked(False) - self.use_multithreading_checkbox.setChecked(True) + self.link_input.clear(); self.dir_input.clear(); self.custom_folder_input.clear(); self.character_input.clear(); + self.skip_words_input.clear(); self.start_page_input.clear(); self.end_page_input.clear(); self.new_char_input.clear(); + self.character_search_input.clear(); self.thread_count_input.setText("4"); self.radio_all.setChecked(True); + self.skip_zip_checkbox.setChecked(True); self.skip_rar_checkbox.setChecked(True); self.download_thumbnails_checkbox.setChecked(False); + self.compress_images_checkbox.setChecked(False); self.use_subfolders_checkbox.setChecked(True); + self.use_subfolder_per_post_checkbox.setChecked(False); self.use_multithreading_checkbox.setChecked(True); self.external_links_checkbox.setChecked(False) - if self.manga_mode_checkbox: - self.manga_mode_checkbox.setChecked(False) + if self.manga_mode_checkbox: self.manga_mode_checkbox.setChecked(False) + + self.skip_words_scope = SKIP_SCOPE_FILES # Reset scope variable + self._update_skip_scope_button_text() # Update button text + self._handle_filter_mode_change(self.radio_all, True) self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked()) - self.filter_character_list("") - self.download_btn.setEnabled(True) - self.cancel_btn.setEnabled(False) + self.download_btn.setEnabled(True); self.cancel_btn.setEnabled(False) if self.reset_button: self.reset_button.setEnabled(True) if self.log_verbosity_button: self.log_verbosity_button.setText("Show Basic Log") + self._update_manga_filename_style_button_text() + self.update_ui_for_manga_mode(False) def prompt_add_character(self, character_name): global KNOWN_NAMES - reply = QMessageBox.question(self, "Add Filter Name to Known List?", - f"The name '{character_name}' was encountered or used as a filter.\nIt's not in your known names list (used for folder suggestions).\nAdd it now?", - QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) + reply = QMessageBox.question(self, "Add Filter Name to Known List?", f"The name '{character_name}' was encountered or used as a filter.\nIt's not in your known names list (used for folder suggestions).\nAdd it now?", QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) result = (reply == QMessageBox.Yes) if result: self.new_char_input.setText(character_name) - if self.add_new_character(): - self.log_signal.emit(f"✅ Added '{character_name}' to known names via background prompt.") - else: - result = False - self.log_signal.emit(f"ℹ️ Adding '{character_name}' via background prompt was declined or failed (e.g., similarity warning, duplicate).") + if self.add_new_character(): self.log_signal.emit(f"✅ Added '{character_name}' to known names via background prompt.") + else: result = False; self.log_signal.emit(f"ℹ️ Adding '{character_name}' via background prompt was declined or failed.") self.character_prompt_response_signal.emit(result) def receive_add_character_result(self, result): - with QMutexLocker(self.prompt_mutex): - self._add_character_response = result + with QMutexLocker(self.prompt_mutex): self._add_character_response = result self.log_signal.emit(f" Main thread received character prompt response: {'Action resulted in addition/confirmation' if result else 'Action resulted in no addition/declined'}") @@ -2009,40 +2210,27 @@ if __name__ == '__main__': import traceback try: qt_app = QApplication(sys.argv) - if getattr(sys, 'frozen', False): - base_dir = sys._MEIPASS - else: - base_dir = os.path.dirname(os.path.abspath(__file__)) - + if getattr(sys, 'frozen', False): base_dir = sys._MEIPASS + else: base_dir = os.path.dirname(os.path.abspath(__file__)) icon_path = os.path.join(base_dir, 'Kemono.ico') - if os.path.exists(icon_path): - qt_app.setWindowIcon(QIcon(icon_path)) - else: - print(f"Warning: Application icon 'Kemono.ico' not found at {icon_path}") + if os.path.exists(icon_path): qt_app.setWindowIcon(QIcon(icon_path)) + else: print(f"Warning: Application icon 'Kemono.ico' not found at {icon_path}") downloader_app_instance = DownloaderApp() downloader_app_instance.show() - # --- ADDED: Show Tour Dialog if needed --- - if TourDialog: # Check if TourDialog was imported successfully + if TourDialog: tour_result = TourDialog.run_tour_if_needed(downloader_app_instance) - if tour_result == QDialog.Accepted: - print("Tour completed by user.") - elif tour_result == QDialog.Rejected: - # This means tour was skipped OR already shown. - # You can use TourDialog.settings.value(TourDialog.TOUR_SHOWN_KEY) - # to differentiate if needed, but run_tour_if_needed handles the "show once" logic. - print("Tour skipped or was already shown.") - # --- END ADDED --- + if tour_result == QDialog.Accepted: print("Tour completed by user.") + elif tour_result == QDialog.Rejected: print("Tour skipped or was already shown.") exit_code = qt_app.exec_() print(f"Application finished with exit code: {exit_code}") sys.exit(exit_code) - except SystemExit: - pass # Allow clean exit + except SystemExit: pass except Exception as e: print("--- CRITICAL APPLICATION ERROR ---") print(f"An unhandled exception occurred: {e}") traceback.print_exc() print("--- END CRITICAL ERROR ---") - sys.exit(1) \ No newline at end of file + sys.exit(1) diff --git a/tour.py b/tour.py index df0877d..b73a1c5 100644 --- a/tour.py +++ b/tour.py @@ -11,22 +11,24 @@ class TourStepWidget(QWidget): def __init__(self, title_text, content_text, parent=None): super().__init__(parent) layout = QVBoxLayout(self) - layout.setContentsMargins(20, 20, 20, 20) # Padding around content - layout.setSpacing(15) # Spacing between title and content + layout.setContentsMargins(20, 20, 20, 20) + layout.setSpacing(10) # Adjusted spacing between title and content for bullet points title_label = QLabel(title_text) title_label.setAlignment(Qt.AlignCenter) - title_label.setStyleSheet("font-size: 18px; font-weight: bold; color: #E0E0E0; padding-bottom: 10px;") + # Increased padding-bottom for more space below title + title_label.setStyleSheet("font-size: 18px; font-weight: bold; color: #E0E0E0; padding-bottom: 15px;") content_label = QLabel(content_text) content_label.setWordWrap(True) - content_label.setAlignment(Qt.AlignLeft) # Align text to the left for readability - content_label.setTextFormat(Qt.RichText) - content_label.setStyleSheet("font-size: 12px; color: #C8C8C8; line-height: 1.6;") + content_label.setAlignment(Qt.AlignLeft) + content_label.setTextFormat(Qt.RichText) + # Adjusted line-height for bullet point readability + content_label.setStyleSheet("font-size: 11pt; color: #C8C8C8; line-height: 1.8;") layout.addWidget(title_label) layout.addWidget(content_label) - layout.addStretch(1) + layout.addStretch(1) class TourDialog(QDialog): """ @@ -34,12 +36,12 @@ class TourDialog(QDialog): Includes a "Never show again" checkbox. Uses QSettings to remember this preference. """ - tour_finished_normally = pyqtSignal() - tour_skipped = pyqtSignal() + tour_finished_normally = pyqtSignal() + tour_skipped = pyqtSignal() - CONFIG_ORGANIZATION_NAME = "KemonoDownloader" - CONFIG_APP_NAME_TOUR = "ApplicationTour" - TOUR_SHOWN_KEY = "neverShowTourAgainV2" + CONFIG_ORGANIZATION_NAME = "KemonoDownloader" + CONFIG_APP_NAME_TOUR = "ApplicationTour" + TOUR_SHOWN_KEY = "neverShowTourAgainV3" # Updated key for new tour content def __init__(self, parent=None): super().__init__(parent) @@ -48,19 +50,20 @@ class TourDialog(QDialog): self.setWindowTitle("Welcome to Kemono Downloader!") self.setModal(True) - self.setMinimumSize(520, 450) + # Set fixed square size, smaller than main window + self.setFixedSize(600, 620) # Slightly adjusted for potentially more text self.setStyleSheet(""" QDialog { background-color: #2E2E2E; border: 1px solid #5A5A5A; } QLabel { - color: #E0E0E0; + color: #E0E0E0; } QCheckBox { color: #C0C0C0; font-size: 10pt; - spacing: 5px; + spacing: 5px; } QCheckBox::indicator { width: 13px; @@ -83,117 +86,129 @@ class TourDialog(QDialog): } """) self._init_ui() - self._center_on_screen() # Call method to center the dialog + self._center_on_screen() def _center_on_screen(self): """Centers the dialog on the screen.""" try: - # Get the geometry of the screen screen_geometry = QDesktopWidget().screenGeometry() - # Get the geometry of the dialog dialog_geometry = self.frameGeometry() - - # Calculate the center point for the dialog center_point = screen_geometry.center() dialog_geometry.moveCenter(center_point) - - # Move the top-left point of the dialog to the calculated position self.move(dialog_geometry.topLeft()) - print(f"[Tour] Dialog centered at: {dialog_geometry.topLeft()}") except Exception as e: print(f"[Tour] Error centering dialog: {e}") def _init_ui(self): main_layout = QVBoxLayout(self) - main_layout.setContentsMargins(0, 0, 0, 0) + main_layout.setContentsMargins(0, 0, 0, 0) main_layout.setSpacing(0) self.stacked_widget = QStackedWidget() - main_layout.addWidget(self.stacked_widget, 1) + main_layout.addWidget(self.stacked_widget, 1) - # --- Define Tour Steps --- + # --- Define Tour Steps with Updated Content --- step1_content = ( - "Hello! This quick tour will walk you through the main features of the Kemono Downloader. " - "Our goal is to help you easily download content from Kemono and Coomer.

" - " • Use the Next and Back buttons to navigate.
" - " • Click Skip Tour to close this guide at any time.
" - " • Check 'Never show this tour again' if you don't want to see this on future startups." + "Hello! This quick tour will walk you through the main features of the Kemono Downloader." + "" ) self.step1 = TourStepWidget("👋 Welcome!", step1_content) step2_content = ( - "Let's start with the basics for downloading:

" - " • 🔗 Kemono Creator/Post URL:
" + "Let's start with the basics for downloading:" + "" ) - self.step2 = TourStepWidget("① Getting Started: URLs & Location", step2_content) + self.step2 = TourStepWidget("① Getting Started", step2_content) step3_content = ( - "Refine what you download with these filters:

" - " • 🎯 Filter by Character(s):
" - " Enter character names, separated by commas (e.g., Tifa, Aerith). " - "If 'Separate Folders by Name/Title' is on, this helps sort files into folders. " - "In Manga Mode, this filters posts by matching the post title. In Normal Mode, it filters individual files by their filename.

" - " • 🚫 Skip Posts/Files with Words:
" - " Enter words, separated by commas (e.g., WIP, sketch). " - "Files or posts containing these words in their name (or post title if 'Separate Folders' is off and not Manga Mode) will be skipped.

" - " • Filter Files (Radio Buttons):
" - " - All: Download all file types.
" - " - Images/GIFs: Only download common image formats and GIFs.
" - " - Videos: Only download common video formats.
" - " - 🔗 Only Links: Don't download files; instead, extract and display any external links found in post descriptions (like Mega, Google Drive links). The log area will show these links." + "Refine what you download with these filters:" + "" ) - self.step3 = TourStepWidget("② Filtering Your Downloads", step3_content) + self.step3 = TourStepWidget("② Filtering Downloads", step3_content) step4_content = ( - "More options to customize your downloads:

" - " • Skip .zip / Skip .rar:
" - " Check these to avoid downloading .zip or .rar archive files.

" - " • Download Thumbnails Only:
" - " If checked, only downloads the small preview images (thumbnails) instead of full-sized files. Useful for a quick overview.

" - " • Compress Large Images:
" - " If you have the 'Pillow' library installed, this will try to convert very large images (over 1.5MB) to a smaller WebP format to save space. If WebP isn't smaller, the original is kept.

" - " • 🗄️ Custom Folder Name (Single Post Only):
" - " When downloading a single post URL and using subfolders, you can type a specific name here for that post's folder." + "More options to customize your downloads:" + "" ) - self.step4 = TourStepWidget("③ Fine-Tuning: Archives & Images", step4_content) - + self.step4 = TourStepWidget("③ Fine-Tuning Downloads", step4_content) + step5_content = ( - "Organize your downloads and manage performance:

" - " • ⚙️ Separate Folders by Name/Title:
" - " If checked, the downloader tries to create subfolders based on character names (if you used the Character Filter) or by deriving a name from the post title using your 'Known Shows/Characters' list.

" - " • Subfolder per Post:
" - " Only active if 'Separate Folders' is on. Creates an additional subfolder for each individual post inside the character/title folder, named like 'PostID_PostTitle'.

" - " • 🚀 Use Multithreading (Threads):
" - " For creator pages, this can speed up downloads by processing multiple posts at once. For single post URLs, it always uses one thread. Be cautious with very high thread counts.

" - " • 📖 Manga/Comic Mode (Creator URLs only):
" - " Downloads posts from oldest to newest. It also renames files based on the post title and an extracted or generated sequence number (e.g., MangaTitle - 01.jpg, MangaTitle - 02.jpg). Best used with a character filter matching the series title for correct naming.

" - " • 🎭 Known Shows/Characters:
" - " Add names here (e.g., a game title, a character's full name). When 'Separate Folders' is on and no character filter is used, the app looks for these known names in post titles to create appropriate folders." + "Organize your downloads and manage performance:" + "" ) self.step5 = TourStepWidget("④ Organization & Performance", step5_content) step6_content = ( - "Monitoring and Controls:

" - " • 📜 Progress Log / Extracted Links Log:
" - " This area shows detailed messages about the download process or lists extracted links if 'Only Links' mode is active.

" - " • Show External Links in Log (Checkbox):
" - " If checked (and not in 'Only Links' mode), a second log panel appears to show external links found in post descriptions.

" - " • Show Basic/Full Log (Button):
" - " Toggles the main log between showing all messages (Full) or only important ones (Basic).

" - " • 🔄 Reset (Button):
" - " Clears all input fields and logs to their default state. Only works when no download is active.

" - " • ⬇️ Start Download / ❌ Cancel (Buttons):
" - " Start begins the process. Cancel stops an ongoing download." - "

You're ready to start downloading! Click 'Finish'." + "Monitoring and Controls:" + "" + "
You're all set! Click 'Finish' to close the tour and start using the downloader." ) self.step6 = TourStepWidget("⑤ Logs & Final Controls", step6_content) @@ -202,12 +217,12 @@ class TourDialog(QDialog): for step_widget in self.tour_steps: self.stacked_widget.addWidget(step_widget) - bottom_controls_layout = QVBoxLayout() - bottom_controls_layout.setContentsMargins(15, 10, 15, 15) + bottom_controls_layout = QVBoxLayout() + bottom_controls_layout.setContentsMargins(15, 10, 15, 15) # Adjusted margins bottom_controls_layout.setSpacing(10) self.never_show_again_checkbox = QCheckBox("Never show this tour again") - bottom_controls_layout.addWidget(self.never_show_again_checkbox, 0, Qt.AlignLeft) + bottom_controls_layout.addWidget(self.never_show_again_checkbox, 0, Qt.AlignLeft) buttons_layout = QHBoxLayout() buttons_layout.setSpacing(10) @@ -227,29 +242,28 @@ class TourDialog(QDialog): buttons_layout.addStretch(1) buttons_layout.addWidget(self.back_button) buttons_layout.addWidget(self.next_button) - - bottom_controls_layout.addLayout(buttons_layout) - main_layout.addLayout(bottom_controls_layout) + + bottom_controls_layout.addLayout(buttons_layout) + main_layout.addLayout(bottom_controls_layout) self._update_button_states() def _handle_exit_actions(self): if self.never_show_again_checkbox.isChecked(): self.settings.setValue(self.TOUR_SHOWN_KEY, True) - self.settings.sync() - print(f"[Tour] '{self.TOUR_SHOWN_KEY}' setting updated to True.") - else: - print(f"[Tour] '{self.TOUR_SHOWN_KEY}' setting not set to True (checkbox was unchecked on exit).") + self.settings.sync() + # else: + # print(f"[Tour] '{self.TOUR_SHOWN_KEY}' setting not set to True (checkbox was unchecked on exit).") def _next_step_action(self): if self.current_step < len(self.tour_steps) - 1: self.current_step += 1 self.stacked_widget.setCurrentIndex(self.current_step) - else: + else: self._handle_exit_actions() self.tour_finished_normally.emit() - self.accept() + self.accept() self._update_button_states() def _previous_step(self): @@ -261,7 +275,7 @@ class TourDialog(QDialog): def _skip_tour_action(self): self._handle_exit_actions() self.tour_skipped.emit() - self.reject() + self.reject() def _update_button_states(self): if self.current_step == len(self.tour_steps) - 1: @@ -272,45 +286,39 @@ class TourDialog(QDialog): @staticmethod def run_tour_if_needed(parent_app_window): - print("[Tour] Attempting to run tour (run_tour_if_needed called)...") try: settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR) - never_show_again = settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool) - print(f"[Tour] Current '{TourDialog.TOUR_SHOWN_KEY}' setting is: {never_show_again}") + never_show_again = settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool) if never_show_again: - print("[Tour] Skipping tour because 'Never show again' was previously selected.") - return QDialog.Rejected + return QDialog.Rejected - print("[Tour] 'Never show again' is False. Proceeding to create and show tour dialog.") - tour_dialog = TourDialog(parent_app_window) # _center_on_screen is called in __init__ - print("[Tour] TourDialog instance created successfully.") - - result = tour_dialog.exec_() - print(f"[Tour] Tour dialog exec_() finished. Result code: {result} (Accepted={QDialog.Accepted}, Rejected={QDialog.Rejected})") + tour_dialog = TourDialog(parent_app_window) + result = tour_dialog.exec_() return result except Exception as e: print(f"[Tour] CRITICAL ERROR in run_tour_if_needed: {e}") - traceback.print_exc() - return QDialog.Rejected + traceback.print_exc() + return QDialog.Rejected if __name__ == '__main__': app = QApplication(sys.argv) + + # --- For testing: force the tour to show by resetting the flag --- # print("[Tour Test] Resetting 'Never show again' flag for testing purposes.") # test_settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR) - # print(f"[Tour Test] Before reset, '{TourDialog.TOUR_SHOWN_KEY}' is: {test_settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool)}") - # test_settings.setValue(TourDialog.TOUR_SHOWN_KEY, False) - # test_settings.sync() - # print(f"[Tour Test] After reset, '{TourDialog.TOUR_SHOWN_KEY}' is: {test_settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool)}") + # test_settings.setValue(TourDialog.TOUR_SHOWN_KEY, False) # Set to False to force tour + # test_settings.sync() + # --- End testing block --- print("[Tour Test] Running tour standalone...") - result = TourDialog.run_tour_if_needed(None) + result = TourDialog.run_tour_if_needed(None) if result == QDialog.Accepted: print("[Tour Test] Tour dialog was accepted (Finished).") elif result == QDialog.Rejected: print("[Tour Test] Tour dialog was rejected (Skipped or previously set to 'Never show again').") - + final_settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR) print(f"[Tour Test] Final state of '{TourDialog.TOUR_SHOWN_KEY}' in settings: {final_settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool)}")