diff --git a/Known.txt b/Known.txt index c35c647..e69de29 100644 --- a/Known.txt +++ b/Known.txt @@ -1 +0,0 @@ -Hinata diff --git a/downloader_utils.py b/downloader_utils.py index fb0a6a2..e14bed3 100644 --- a/downloader_utils.py +++ b/downloader_utils.py @@ -5,7 +5,10 @@ import re import threading import queue import hashlib -from concurrent.futures import ThreadPoolExecutor, Future, CancelledError +import http.client +import traceback +from concurrent.futures import ThreadPoolExecutor, Future, CancelledError, as_completed +import html # Import the html module for unescaping from PyQt5.QtCore import QObject, pyqtSignal, QThread, QMutex, QMutexLocker from urllib.parse import urlparse @@ -15,843 +18,1063 @@ except ImportError: print("ERROR: Pillow library not found. Please install it: pip install Pillow") Image = None + from io import BytesIO -fastapi_app = None -KNOWN_NAMES = [] + +fastapi_app = None # Placeholder, not used in this script +KNOWN_NAMES = [] # Global list, populated by main.py + + +def is_title_match_for_character(post_title, character_name_filter): + """Checks if a post title contains a specific character name (case-insensitive, whole word).""" + if not post_title: + return False + if not character_name_filter: # If no filter, it's considered a match (or handle as per broader logic) + return True + + # Regex to match whole word, case insensitive + pattern = r"(?i)\b" + re.escape(character_name_filter) + r"\b" + + if re.search(pattern, post_title): + return True + return False + def clean_folder_name(name): - if not isinstance(name, str): name = str(name) - cleaned = re.sub(r'[^\w\s\-\_]', '', name) + """Cleans a string to be suitable for a folder name.""" + if not isinstance(name, str): name = str(name) # Ensure string + # Remove invalid characters, replace spaces with underscores + cleaned = re.sub(r'[^\w\s\-\_]', '', name) # Allow alphanumeric, whitespace, hyphen, underscore return cleaned.strip().replace(' ', '_') + def clean_filename(name): - if not isinstance(name, str): name = str(name) - cleaned = re.sub(r'[^\w\s\-\_\.]', '', name) - return cleaned.strip().replace(' ', '_') + """Cleans a string to be suitable for a filename, preserving extension.""" + if not isinstance(name, str): name = str(name) # Ensure string + # Remove invalid characters, replace spaces with underscores + # Allow alphanumeric, whitespace, hyphen, underscore, and period (for extension) + cleaned = re.sub(r'[^\w\s\-\_\.]', '', name) + return cleaned.strip().replace(' ', '_') + def extract_folder_name_from_title(title, unwanted_keywords): + """Extracts a potential folder name from a title, avoiding common unwanted keywords.""" if not title: return 'Uncategorized' title_lower = title.lower() - tokens = title_lower.split() + # Tokenize by words, prefer longer, more specific tokens if possible + tokens = re.findall(r'\b[\w\-]+\b', title_lower) # Find alphanumeric words with hyphens for token in tokens: - clean_token = clean_folder_name(token) + clean_token = clean_folder_name(token) # Clean the token itself if clean_token and clean_token not in unwanted_keywords: - return clean_token - return 'Uncategorized' + return clean_token # Return the first suitable token + # If no suitable token found, use the cleaned full title (or fallback) + cleaned_full_title = clean_folder_name(title) + return cleaned_full_title if cleaned_full_title else 'Uncategorized' -def match_folders_from_title(title, known_names, unwanted_keywords): - if not title: return [] - cleaned_title = clean_folder_name(title.lower()) + +def match_folders_from_title(title, names_to_match, unwanted_keywords): + """Matches known names (characters/shows) in a title to suggest folder names.""" + if not title or not names_to_match: return [] + title_lower = title.lower() matched_cleaned_names = set() + # Sort by length to match longer names first (e.g., "Luffy Gear 5" before "Luffy") + sorted_names_to_match = sorted(names_to_match, key=len, reverse=True) + + for name in sorted_names_to_match: + name_lower = name.lower() + if not name_lower: continue # Skip empty names + + pattern = r'\b' + re.escape(name_lower) + r'\b' # Whole word match + if re.search(pattern, title_lower): + cleaned_name = clean_folder_name(name).lower() # Clean the original matched name + if cleaned_name and cleaned_name not in unwanted_keywords: + matched_cleaned_names.add(cleaned_name) + return sorted(list(matched_cleaned_names)) - for name in known_names: - cleaned_name_for_match = clean_folder_name(name.lower()) - if not cleaned_name_for_match: continue - if cleaned_name_for_match in cleaned_title: - if cleaned_name_for_match not in unwanted_keywords: - matched_cleaned_names.add(cleaned_name_for_match) - return list(matched_cleaned_names) def is_image(filename): + """Checks if a filename likely represents an image.""" if not filename: return False return filename.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.gif')) + def is_video(filename): + """Checks if a filename likely represents a video.""" if not filename: return False return filename.lower().endswith(('.mp4', '.mov', '.mkv', '.webm', '.avi', '.wmv')) + def is_zip(filename): + """Checks if a filename likely represents a ZIP archive.""" if not filename: return False return filename.lower().endswith('.zip') + def is_rar(filename): + """Checks if a filename likely represents a RAR archive.""" if not filename: return False return filename.lower().endswith('.rar') + def is_post_url(url): + """Checks if a URL likely points to a specific post.""" if not isinstance(url, str): return False return '/post/' in urlparse(url).path + def extract_post_info(url_string): + """Extracts service, user ID, and post ID from a Kemono/Coomer URL.""" service, user_id, post_id = None, None, None - if not isinstance(url_string, str) or not url_string.strip(): - return None, None, None + if not isinstance(url_string, str) or not url_string.strip(): return None, None, None try: parsed_url = urlparse(url_string.strip()) domain = parsed_url.netloc.lower() + is_kemono = any(d in domain for d in ['kemono.su', 'kemono.party']) + is_coomer = any(d in domain for d in ['coomer.su', 'coomer.party']) + + if not (is_kemono or is_coomer): return None, None, None # Not a supported domain + path_parts = [part for part in parsed_url.path.strip('/').split('/') if part] - is_kemono = 'kemono.su' in domain or 'kemono.party' in domain - is_coomer = 'coomer.su' in domain or 'coomer.party' in domain - if not (is_kemono or is_coomer): - return None, None, None + + # Standard URL format: //user//post/ + # Or creator feed: //user/ if len(path_parts) >= 3 and path_parts[1].lower() == 'user': service = path_parts[0] user_id = path_parts[2] if len(path_parts) >= 5 and path_parts[3].lower() == 'post': post_id = path_parts[4] return service, user_id, post_id - if len(path_parts) >= 5 and path_parts[0].lower() == 'api' and path_parts[1].lower() == 'v1' and path_parts[3].lower() == 'user': + + # API URL format: /api/v1//user//post/ + # Or creator feed: /api/v1//user/ + if len(path_parts) >= 5 and path_parts[0].lower() == 'api' and \ + path_parts[1].lower() == 'v1' and path_parts[3].lower() == 'user': service = path_parts[2] user_id = path_parts[4] if len(path_parts) >= 7 and path_parts[5].lower() == 'post': - post_id = path_parts[6] + post_id = path_parts[6] return service, user_id, post_id - except ValueError: - print(f"Debug: ValueError parsing URL '{url_string}'") - return None, None, None + except Exception as e: + # Log error if needed, but return None, None, None for graceful failure print(f"Debug: Exception during extract_post_info for URL '{url_string}': {e}") - return None, None, None return None, None, None -def fetch_posts_paginated(api_url_base, headers, offset, logger): + +def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None): + """Fetches a single page of posts from the API. Checks cancellation_event if provided.""" + if cancellation_event and cancellation_event.is_set(): + logger(" Fetch cancelled before request.") + raise RuntimeError("Fetch operation cancelled by user.") # Or return empty list + paginated_url = f'{api_url_base}?o={offset}' logger(f" Fetching: {paginated_url}") try: - response = requests.get(paginated_url, headers=headers, timeout=45) - response.raise_for_status() - if 'application/json' not in response.headers.get('Content-Type', ''): - raise RuntimeError(f"Unexpected content type received: {response.headers.get('Content-Type')}. Body: {response.text[:200]}") + response = requests.get(paginated_url, headers=headers, timeout=(10, 60)) # connect timeout, read timeout + response.raise_for_status() # Raise HTTPError for bad responses (4XX or 5XX) + if 'application/json' not in response.headers.get('Content-Type', '').lower(): + logger(f"⚠️ Unexpected content type from API: {response.headers.get('Content-Type')}. Body: {response.text[:200]}") + return [] # Return empty list on unexpected content type return response.json() except requests.exceptions.Timeout: - raise RuntimeError(f"Timeout fetching page offset {offset}") + raise RuntimeError(f"Timeout fetching offset {offset} from {paginated_url}") except requests.exceptions.RequestException as e: - err_msg = f"Error fetching page offset {offset}: {e}" + err_msg = f"Error fetching offset {offset} from {paginated_url}: {e}" if e.response is not None: err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})" raise RuntimeError(err_msg) - except ValueError as e: - raise RuntimeError(f"Error decoding JSON response for offset {offset}: {e}. Body: {response.text[:200]}") - except Exception as e: - raise RuntimeError(f"Unexpected error processing page offset {offset}: {e}") + except ValueError as e: # JSONDecodeError inherits from ValueError + raise RuntimeError(f"Error decoding JSON from offset {offset} ({paginated_url}): {e}. Response text: {response.text[:200]}") + except Exception as e: # Catch any other unexpected errors + raise RuntimeError(f"Unexpected error fetching offset {offset} ({paginated_url}): {e}") -def download_from_api(api_url_input, logger=print): - headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'} + +def download_from_api(api_url_input, logger=print, start_page=None, end_page=None, manga_mode=False, cancellation_event=None): + """ + Generator function to fetch posts from Kemono/Coomer API. + Handles pagination and specific post fetching. + In manga_mode, it fetches all posts and yields them in reversed order (oldest first). + Checks cancellation_event if provided. + """ + headers = {'User-Agent': 'Mozilla/5.0'} # Basic user agent service, user_id, target_post_id = extract_post_info(api_url_input) - if not service or not user_id: - logger(f"❌ Invalid or unrecognized URL: {api_url_input}. Cannot fetch.") + if cancellation_event and cancellation_event.is_set(): + logger(" Download_from_api cancelled at start.") return - parsed_input = urlparse(api_url_input) - api_domain = parsed_input.netloc if ('kemono.su' in parsed_input.netloc.lower() or 'coomer.su' in parsed_input.netloc.lower() or 'kemono.party' in parsed_input.netloc.lower() or 'coomer.party' in parsed_input.netloc.lower()) else "kemono.su" - api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}" + if not service or not user_id: + logger(f"❌ Invalid URL or could not extract service/user: {api_url_input}") + return # Stop generation if URL is invalid - offset = 0 - page = 1 - processed_target_post = False + if target_post_id and (start_page or end_page): + logger("⚠️ Page range (start/end page) is ignored when a specific post URL is provided.") + start_page = end_page = None # Reset page range for single post URL + + is_creator_feed_for_manga = manga_mode and not target_post_id + + parsed_input = urlparse(api_url_input) + api_domain = parsed_input.netloc + # Ensure we use a valid API domain, defaulting if necessary + if not any(d in api_domain.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']): + logger(f"⚠️ Unrecognized domain '{api_domain}'. Defaulting to kemono.su for API calls.") + api_domain = "kemono.su" + + api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}" + page_size = 50 # API returns 50 posts per page + + if is_creator_feed_for_manga: + logger(" Manga Mode: Fetching all posts to reverse order (oldest posts processed first)...") + all_posts_for_manga_mode = [] + current_offset_manga = 0 + while True: # Loop to fetch all pages + if cancellation_event and cancellation_event.is_set(): + logger(" Manga mode post fetching cancelled.") + break + try: + posts_batch_manga = fetch_posts_paginated(api_base_url, headers, current_offset_manga, logger, cancellation_event) + if not isinstance(posts_batch_manga, list): + logger(f"❌ API Error (Manga Mode): Expected list of posts, got {type(posts_batch_manga)}.") + break + if not posts_batch_manga: # No more posts + logger("✅ Reached end of posts (Manga Mode fetch all).") + break + all_posts_for_manga_mode.extend(posts_batch_manga) + current_offset_manga += len(posts_batch_manga) + time.sleep(0.6) + except RuntimeError as e: + if "cancelled by user" in str(e).lower(): # Check if it was our cancellation + logger(f"ℹ️ Manga mode pagination stopped due to cancellation: {e}") + else: + logger(f"❌ {e}\n Aborting manga mode pagination.") + break + except Exception as e: + logger(f"❌ Unexpected error during manga mode fetch: {e}") + traceback.print_exc() + break + + if cancellation_event and cancellation_event.is_set(): return + + if all_posts_for_manga_mode: + logger(f" Manga Mode: Fetched {len(all_posts_for_manga_mode)} total posts. Reversing order...") + all_posts_for_manga_mode.reverse() + + for i in range(0, len(all_posts_for_manga_mode), page_size): + if cancellation_event and cancellation_event.is_set(): + logger(" Manga mode post yielding cancelled.") + break + yield all_posts_for_manga_mode[i:i + page_size] + else: + logger(" Manga Mode: No posts found to process.") + return + + current_page_num = 1 + current_offset = 0 + processed_target_post_flag = False + + if start_page and start_page > 1: + current_offset = (start_page - 1) * page_size + current_page_num = start_page + logger(f" Starting from page {current_page_num} (calculated offset {current_offset}).") while True: - if target_post_id and processed_target_post: - logger(f"✅ Target post {target_post_id} found and processed. Stopping.") + if cancellation_event and cancellation_event.is_set(): + logger(" Post fetching loop cancelled.") + break + if end_page and current_page_num > end_page: + logger(f"✅ Reached specified end page ({end_page}). Stopping.") + break + if target_post_id and processed_target_post_flag: + logger(f"✅ Target post {target_post_id} has been processed.") break - logger(f"\n🔄 Fetching page {page} (offset {offset}) for user {user_id} on {api_domain}...") try: - posts_batch = fetch_posts_paginated(api_base_url, headers, offset, logger) + posts_batch = fetch_posts_paginated(api_base_url, headers, current_offset, logger, cancellation_event) if not isinstance(posts_batch, list): - logger(f"❌ API Error: Expected a list of posts, got {type(posts_batch)}. Response: {str(posts_batch)[:200]}") - break + logger(f"❌ API Error: Expected list of posts, got {type(posts_batch)} at page {current_page_num}.") + break except RuntimeError as e: - logger(f"❌ {e}") - logger(" Aborting pagination due to error.") + if "cancelled by user" in str(e).lower(): + logger(f"ℹ️ Pagination stopped due to cancellation: {e}") + else: + logger(f"❌ {e}\n Aborting pagination at page {current_page_num}.") break except Exception as e: - logger(f"❌ Unexpected error during fetch loop: {e}") - break - - if not posts_batch: - if page == 1 and not target_post_id: - logger("😕 No posts found for this creator.") - elif not target_post_id: - logger("✅ Reached end of posts.") + logger(f"❌ Unexpected error fetching page {current_page_num}: {e}") + traceback.print_exc() break + + if not posts_batch: + if current_page_num == (start_page or 1) and not target_post_id : + logger("😕 No posts found on the first page checked.") + elif not target_post_id: + logger("✅ Reached end of posts (no more content).") + break - logger(f"📦 Found {len(posts_batch)} posts on page {page}.") - - if target_post_id: - matching_post = next((post for post in posts_batch if str(post.get('id')) == str(target_post_id)), None) - + if target_post_id: + matching_post = next((p for p in posts_batch if str(p.get('id')) == str(target_post_id)), None) if matching_post: - logger(f"🎯 Found target post {target_post_id} on page {page}.") - yield [matching_post] - processed_target_post = True + logger(f"🎯 Found target post {target_post_id}.") + yield [matching_post] + processed_target_post_flag = True else: - logger(f" Target post {target_post_id} not found on this page.") - pass - else: + logger(f"❌ Target post {target_post_id} not found in the batch from offset {current_offset}. This may indicate the post URL is incorrect or the API behavior is unexpected.") + break + else: yield posts_batch - if not (target_post_id and processed_target_post): - page_size = 50 - offset += page_size - page += 1 - time.sleep(0.6) - if target_post_id and not processed_target_post: - logger(f"❌ Target post ID {target_post_id} was not found for this creator.") + + if not (target_post_id and processed_target_post_flag): + if not posts_batch : break + current_offset += len(posts_batch) + current_page_num += 1 + time.sleep(0.6) + else: + break + + if target_post_id and not processed_target_post_flag and not (cancellation_event and cancellation_event.is_set()): + logger(f"❌ Target post {target_post_id} could not be found after checking relevant pages.") + + +def get_link_platform(url): + """Attempts to identify the platform of an external link.""" + try: + domain = urlparse(url).netloc.lower() + # Specific known platforms + if 'drive.google.com' in domain: return 'google drive' + if 'mega.nz' in domain or 'mega.io' in domain: return 'mega' + if 'dropbox.com' in domain: return 'dropbox' + if 'patreon.com' in domain: return 'patreon' + if 'instagram.com' in domain: return 'instagram' + if 'twitter.com' in domain or 'x.com' in domain: return 'twitter/x' + if 'discord.gg' in domain or 'discord.com/invite' in domain: return 'discord invite' + if 'pixiv.net' in domain: return 'pixiv' + if 'kemono.su' in domain or 'kemono.party' in domain: return 'kemono' # Explicitly identify kemono + if 'coomer.su' in domain or 'coomer.party' in domain: return 'coomer' # Explicitly identify coomer + + # Generic extraction for other domains + parts = domain.split('.') + if len(parts) >= 2: + # Return the second-to-last part for common structures (e.g., 'google' from google.com) + # Avoid returning generic TLDs like 'com', 'org', 'net' as the platform + if parts[-2] not in ['com', 'org', 'net', 'gov', 'edu', 'co'] or len(parts) == 2: + return parts[-2] + elif len(parts) >= 3: # Handle cases like 'google.co.uk' -> 'google' + return parts[-3] + else: # Fallback to full domain if unsure + return domain + return 'external' # Default if domain parsing fails + except Exception: return 'unknown' # Error case + class PostProcessorSignals(QObject): - progress_signal = pyqtSignal(str) - file_download_status_signal = pyqtSignal(bool) + """Defines signals used by PostProcessorWorker to communicate with the GUI thread.""" + progress_signal = pyqtSignal(str) + file_download_status_signal = pyqtSignal(bool) + # MODIFIED: Added link_text argument + external_link_signal = pyqtSignal(str, str, str, str) # post_title, link_text, link_url, platform + file_progress_signal = pyqtSignal(str, int, int) + class PostProcessorWorker: - def __init__(self, post_data, download_root, known_names, filter_character, + """Processes a single post: determines save paths, downloads files, handles compression.""" + def __init__(self, post_data, download_root, known_names, + filter_character_list, unwanted_keywords, filter_mode, skip_zip, skip_rar, - use_subfolders, target_post_id_from_initial_url, custom_folder_name, + use_subfolders, use_post_subfolders, target_post_id_from_initial_url, custom_folder_name, compress_images, download_thumbnails, service, user_id, api_url_input, cancellation_event, signals, downloaded_files, downloaded_file_hashes, downloaded_files_lock, downloaded_file_hashes_lock, - skip_words_list=None): + skip_words_list=None, show_external_links=False, + extract_links_only=False, num_file_threads=4, skip_current_file_flag=None, + manga_mode_active=False + ): self.post = post_data self.download_root = download_root self.known_names = known_names - self.filter_character = filter_character + self.filter_character_list = filter_character_list if filter_character_list else [] self.unwanted_keywords = unwanted_keywords self.filter_mode = filter_mode self.skip_zip = skip_zip self.skip_rar = skip_rar self.use_subfolders = use_subfolders + self.use_post_subfolders = use_post_subfolders self.target_post_id_from_initial_url = target_post_id_from_initial_url self.custom_folder_name = custom_folder_name self.compress_images = compress_images self.download_thumbnails = download_thumbnails self.service = service self.user_id = user_id - self.api_url_input = api_url_input + self.api_url_input = api_url_input self.cancellation_event = cancellation_event - self.signals = signals - self.skip_current_file_flag = threading.Event() - self.is_downloading_file = False - self.current_download_path = None - self.downloaded_files = downloaded_files - self.downloaded_file_hashes = downloaded_file_hashes - self.downloaded_files_lock = downloaded_files_lock - self.downloaded_file_hashes_lock = downloaded_file_hashes_lock + self.signals = signals + self.skip_current_file_flag = skip_current_file_flag + + self.downloaded_files = downloaded_files if downloaded_files is not None else set() + self.downloaded_file_hashes = downloaded_file_hashes if downloaded_file_hashes is not None else set() + self.downloaded_files_lock = downloaded_files_lock if downloaded_files_lock is not None else threading.Lock() + self.downloaded_file_hashes_lock = downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading.Lock() + self.skip_words_list = skip_words_list if skip_words_list is not None else [] + self.show_external_links = show_external_links + self.extract_links_only = extract_links_only + self.num_file_threads = num_file_threads + + self.manga_mode_active = manga_mode_active + if self.compress_images and Image is None: - self.logger("⚠️ Image compression enabled, but Pillow library is not loaded. Disabling compression.") + self.logger("⚠️ Image compression disabled: Pillow library not found.") self.compress_images = False def logger(self, message): + """Emits a log message via the progress_signal if available.""" if self.signals and hasattr(self.signals, 'progress_signal'): self.signals.progress_signal.emit(message) - else: - print(f"(Worker Log): {message}") + else: print(f"(Worker Log - No Signal): {message}") def check_cancel(self): - is_cancelled = self.cancellation_event.is_set() - return is_cancelled + """Checks if cancellation has been requested.""" + return self.cancellation_event.is_set() - def skip_file(self): - pass + def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event, post_title="", file_index_in_post=0): + """Downloads a single file, handles retries, compression, and hash checking.""" + if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1 - def process(self): - if self.check_cancel(): return 0, 0 + file_url = file_info.get('url') + api_original_filename = file_info.get('_original_name_for_log', file_info.get('name')) - total_downloaded_post = 0 - total_skipped_post = 0 - headers = {'User-Agent': 'Mozilla/5.0', 'Referer': f'https://{urlparse(self.api_url_input).netloc}/'} - url_pattern = re.compile(r'https?://[^\s<>"]+|www\.[^\s<>"]+') - LARGE_THUMBNAIL_THRESHOLD = 1 * 1024 * 1024 - - post = self.post - api_title = post.get('title', '') - title = api_title if api_title else 'untitled_post' - post_id = post.get('id', 'unknown_id') - post_file_info = post.get('file') - attachments = post.get('attachments', []) - post_content = post.get('content', '') - is_target_post = (self.target_post_id_from_initial_url is not None) and (str(post_id) == str(self.target_post_id_from_initial_url)) - - self.logger(f"\n--- Processing Post {post_id} ('{title[:50]}...') (Thread: {threading.current_thread().name}) ---") - if self.skip_words_list: - title_lower = title.lower() - for skip_word in self.skip_words_list: - if skip_word.lower() in title_lower: - self.logger(f" -> Skip Post (Title): Post {post_id} title ('{title[:30]}...') contains skip word '{skip_word}'. Skipping entire post.") - return 0, 1 - - - if not isinstance(attachments, list): - self.logger(f"⚠️ Corrupt attachment data for post {post_id}. Skipping attachments.") - attachments = [] - valid_folder_paths = [] - folder_decision_reason = "" - api_domain = urlparse(self.api_url_input).netloc if ('kemono.su' in urlparse(self.api_url_input).netloc.lower() or 'coomer.su' in urlparse(self.api_url_input).netloc.lower() or 'kemono.party' in urlparse(self.api_url_input).netloc.lower() or 'coomer.party' in urlparse(self.api_url_input).netloc.lower()) else "kemono.su" - if is_target_post and self.custom_folder_name and self.use_subfolders: - folder_path_full = os.path.join(self.download_root, self.custom_folder_name) - valid_folder_paths = [folder_path_full] - folder_decision_reason = f"Using custom folder for target post: '{self.custom_folder_name}'" - if not valid_folder_paths and self.use_subfolders: - folder_names_for_post = [] - if self.filter_character: - clean_char_filter = clean_folder_name(self.filter_character.lower()) - matched_names_in_title = match_folders_from_title(title, self.known_names, self.unwanted_keywords) - - if clean_char_filter and clean_char_filter in matched_names_in_title: - folder_names_for_post = [clean_char_filter] - folder_decision_reason = f"Character filter '{self.filter_character}' matched title. Using folder '{clean_char_filter}'." - else: - self.logger(f" -> Filter Skip Post {post_id}: Character filter '{self.filter_character}' not found in title matches ({matched_names_in_title}).") - return 0, 1 - else: - matched_folders = match_folders_from_title(title, self.known_names, self.unwanted_keywords) - if matched_folders: - folder_names_for_post = matched_folders - folder_decision_reason = f"Found known name(s) in title: {matched_folders}" - else: - extracted_folder = extract_folder_name_from_title(title, self.unwanted_keywords) - folder_names_for_post = [extracted_folder] - folder_decision_reason = f"No known names in title. Using derived folder: '{extracted_folder}'" - for folder_name in folder_names_for_post: - folder_path_full = os.path.join(self.download_root, folder_name) - valid_folder_paths.append(folder_path_full) - if not valid_folder_paths: - valid_folder_paths = [self.download_root] - if not folder_decision_reason: - folder_decision_reason = "Subfolders disabled or no specific folder determined. Using root download directory." - - - self.logger(f" Folder Decision: {folder_decision_reason}") - if not valid_folder_paths: - self.logger(f" ERROR: No valid folder paths determined for post {post_id}. Skipping.") - return 0, 1 - if post_content: - try: - found_links = re.findall(r'href=["\'](https?://[^"\']+)["\']', post_content) - if found_links: - self.logger(f"🔗 Links found in post content:") - unique_links = sorted(list(set(found_links))) - for link in unique_links[:10]: - if not any(x in link for x in ['.css', '.js', 'javascript:']): - self.logger(f" - {link}") - if len(unique_links) > 10: - self.logger(f" - ... ({len(unique_links) - 10} more links not shown)") - except Exception as e: - self.logger(f"⚠️ Error parsing content for links in post {post_id}: {e}") - files_to_process_for_download = [] - api_domain = urlparse(self.api_url_input).netloc if ('kemono.su' in urlparse(self.api_url_input).netloc.lower() or 'coomer.su' in urlparse(self.api_url_input).netloc.lower() or 'kemono.party' in urlparse(self.api_url_input).netloc.lower() or 'coomer.party' in urlparse(self.api_url_input).netloc.lower()) else "kemono.su" - - if self.download_thumbnails: - self.logger(f" Mode: Attempting to download thumbnail...") - self.logger(" Thumbnail download via API is disabled as the local API is not used.") - self.logger(f" -> Skipping Post {post_id}: Thumbnail download requested but API is disabled.") + if not file_url or not api_original_filename: + self.logger(f"⚠️ Skipping file from post {original_post_id_for_log}: Missing URL or original filename. Info: {str(file_info)[:100]}") return 0, 1 - else: - self.logger(f" Mode: Downloading post file/attachments.") - if post_file_info and isinstance(post_file_info, dict) and post_file_info.get('path'): - main_file_path = post_file_info['path'].lstrip('/') - main_file_name = post_file_info.get('name') or os.path.basename(main_file_path) - if main_file_name: - file_url = f"https://{api_domain}/data/{main_file_path}" - files_to_process_for_download.append({ - 'url': file_url, 'name': main_file_name, - '_is_thumbnail': False, '_source': 'post_file' - }) + # --- Check skip words on ORIGINAL filename FIRST --- + if self.skip_words_list: + name_to_check_lower = api_original_filename.lower() + # Simple check if any skip word is a substring + # For more precise matching (e.g., whole words), adjust this logic + if any(skip_word.lower() in name_to_check_lower for skip_word in self.skip_words_list): + matched_skip = next((sw for sw in self.skip_words_list if sw.lower() in name_to_check_lower), "unknown_skip_word") + self.logger(f" -> Skip File (Keyword on Original Name): '{api_original_filename}' contains '{matched_skip}'.") + return 0, 1 + # --- End skip word check --- + + _, original_ext = os.path.splitext(api_original_filename) + if original_ext and not original_ext.startswith('.'): original_ext = '.' + original_ext + elif not original_ext: + _, temp_ext = os.path.splitext(clean_filename(api_original_filename)) + if temp_ext and not temp_ext.startswith('.'): original_ext = '.' + temp_ext + elif temp_ext: original_ext = temp_ext + else: original_ext = '' + + filename_to_save = "" + + if self.manga_mode_active: + if post_title and post_title.strip(): + cleaned_post_title_full = clean_filename(post_title.strip()) + original_filename_base, _ = os.path.splitext(api_original_filename) + + extracted_sequence_from_original = "" + simple_end_match = re.search(r'(\d+)$', original_filename_base) + if simple_end_match: + extracted_sequence_from_original = simple_end_match.group(1) else: - self.logger(f" ⚠️ Skipping main post file: Missing filename (Path: {main_file_path})") - attachment_counter = 0 - for idx, attachment in enumerate(attachments): - if isinstance(attachment, dict) and attachment.get('path'): - attach_path = attachment['path'].lstrip('/') - attach_name = attachment.get('name') or os.path.basename(attach_path) - if attach_name: - base, ext = os.path.splitext(clean_filename(attach_name)) - final_attach_name = f"{post_id}_{attachment_counter}{ext}" - if base and base != f"{post_id}_{attachment_counter}": - final_attach_name = f"{post_id}_{attachment_counter}_{base}{ext}" + complex_match = re.search(r'(?:[ _.\-/]|^)(?:p|page|ch|chapter|ep|episode|v|vol|volume|no|num|number|pt|part)[ _.\-]*(\d+)', original_filename_base, re.IGNORECASE) + if complex_match: + extracted_sequence_from_original = complex_match.group(1) + + cleaned_title_base = re.sub( + r'[|\[\]()]*[ _.\-]*(?:page|p|ch|chapter|ep|episode|v|vol|volume|no|num|number|pt|part)s?[ _.\-]*\d+([ _.\-]+\d+)?$', + '', + cleaned_post_title_full, + flags=re.IGNORECASE + ).strip() + if not cleaned_title_base: + cleaned_title_base = cleaned_post_title_full + cleaned_title_base = cleaned_title_base.rstrip(' _.-') - attach_url = f"https://{api_domain}/data/{attach_path}" - files_to_process_for_download.append({ - 'url': attach_url, 'name': final_attach_name, - '_is_thumbnail': False, '_source': f'attachment_{idx+1}', - '_original_name_for_log': attach_name - }) - attachment_counter += 1 - - else: - self.logger(f" ⚠️ Skipping attachment {idx+1}: Missing filename (Path: {attach_path})") + if extracted_sequence_from_original: + filename_to_save = f"{cleaned_title_base} {extracted_sequence_from_original}{original_ext}" + self.logger(f" Manga Mode (Seq from Original): Renaming '{api_original_filename}' to '{filename_to_save}'") else: - self.logger(f" ⚠️ Skipping invalid attachment entry {idx+1}: {str(attachment)[:100]}") + fallback_sequence = str(file_index_in_post + 1) + filename_to_save = f"{cleaned_title_base} {fallback_sequence}{original_ext}" + self.logger(f" Manga Mode (No Seq in Original): Using cleaned title + file index '{fallback_sequence}'. Renaming '{api_original_filename}' to '{filename_to_save}'") + + counter = 1 + base_name_coll, ext_coll = os.path.splitext(filename_to_save) + temp_filename_for_collision_check = filename_to_save + while os.path.exists(os.path.join(target_folder_path, temp_filename_for_collision_check)): + temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}" + counter += 1 + if temp_filename_for_collision_check != filename_to_save: + filename_to_save = temp_filename_for_collision_check + self.logger(f" Manga Mode: Collision detected. Adjusted filename to '{filename_to_save}'") + else: + filename_to_save = clean_filename(api_original_filename) + self.logger(f"⚠️ Manga mode: Post title missing. Using cleaned original filename '{filename_to_save}'.") + else: + filename_to_save = clean_filename(api_original_filename) - if not files_to_process_for_download: - self.logger(f" No files found to download for post {post_id}.") - return 0, 0 + final_filename_for_sets_and_saving = filename_to_save - self.logger(f" Files identified for download: {len(files_to_process_for_download)}") - post_download_count = 0 - post_skip_count = 0 - local_processed_filenames = set() - local_filenames_lock = threading.Lock() + if not self.download_thumbnails: + is_img_type = is_image(api_original_filename) + is_vid_type = is_video(api_original_filename) + is_zip_type = is_zip(api_original_filename) + is_rar_type = is_rar(api_original_filename) + if self.filter_mode == 'image' and not is_img_type: self.logger(f" -> Filter Skip: '{api_original_filename}' (Not Image)"); return 0,1 + if self.filter_mode == 'video' and not is_vid_type: self.logger(f" -> Filter Skip: '{api_original_filename}' (Not Video)"); return 0,1 + if self.skip_zip and is_zip_type: self.logger(f" -> Pref Skip: '{api_original_filename}' (ZIP)"); return 0,1 + if self.skip_rar and is_rar_type: self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR)"); return 0,1 + target_folder_basename = os.path.basename(target_folder_path) + current_save_path = os.path.join(target_folder_path, final_filename_for_sets_and_saving) - for file_info in files_to_process_for_download: - if self.check_cancel(): break - if self.skip_current_file_flag.is_set(): - original_name_for_log = file_info.get('_original_name_for_log', file_info.get('name', 'unknown_file')) - self.logger(f"⏭️ File skip requested: {original_name_for_log}") - post_skip_count += 1 - self.skip_current_file_flag.clear() - continue + if os.path.exists(current_save_path) and os.path.getsize(current_save_path) > 0: + self.logger(f" -> Exists (Path): '{final_filename_for_sets_and_saving}' in '{target_folder_basename}'.") + with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving) + return 0, 1 + + with self.downloaded_files_lock: + if final_filename_for_sets_and_saving in self.downloaded_files: + self.logger(f" -> Global Skip (Filename): '{final_filename_for_sets_and_saving}' already recorded.") + return 0, 1 - file_url = file_info.get('url') - original_filename = file_info.get('name') - is_thumbnail = file_info.get('_is_thumbnail', False) - original_name_for_log = file_info.get('_original_name_for_log', original_filename) + max_retries = 3; retry_delay = 5; downloaded_size_bytes = 0 + calculated_file_hash = None; file_content_bytes = None; total_size_bytes = 0 + download_successful_flag = False + log_name_during_dl = f"{api_original_filename} (as {final_filename_for_sets_and_saving})" - if not file_url or not original_filename: - self.logger(f"⚠️ Skipping file entry due to missing URL or name: {str(file_info)[:100]}") - post_skip_count += 1 - continue + for attempt_num in range(max_retries + 1): + if self.check_cancel() or (skip_event and skip_event.is_set()): break + try: + if attempt_num > 0: + self.logger(f" Retrying '{log_name_during_dl}' (Attempt {attempt_num}/{max_retries})..."); + time.sleep(retry_delay * (2**(attempt_num-1))) - cleaned_save_filename = clean_filename(original_filename) - if self.skip_words_list: - filename_lower = cleaned_save_filename.lower() - file_skipped_by_word = False - for skip_word in self.skip_words_list: - if skip_word.lower() in filename_lower: - self.logger(f" -> Skip File (Filename): File '{original_name_for_log}' contains skip word '{skip_word}'.") - post_skip_count += 1 - file_skipped_by_word = True - break - if file_skipped_by_word: - continue - if not self.download_thumbnails: - file_skipped_by_filter = False - is_img = is_image(cleaned_save_filename) - is_vid = is_video(cleaned_save_filename) - is_zip_file = is_zip(cleaned_save_filename) - is_rar_file = is_rar(cleaned_save_filename) + if self.signals: self.signals.file_download_status_signal.emit(True) - if self.filter_mode == 'image' and not is_img: - self.logger(f" -> Filter Skip: '{original_name_for_log}' (Not image/gif)") - file_skipped_by_filter = True - elif self.filter_mode == 'video' and not is_vid: - self.logger(f" -> Filter Skip: '{original_name_for_log}' (Not video)") - file_skipped_by_filter = True - elif self.skip_zip and is_zip_file: - self.logger(f" -> Pref Skip: '{original_name_for_log}' (Zip)") - file_skipped_by_filter = True - elif self.skip_rar and is_rar_file: - self.logger(f" -> Pref Skip: '{original_name_for_log}' (RAR)") - file_skipped_by_filter = True + response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True) + response.raise_for_status() + total_size_bytes = int(response.headers.get('Content-Length', 0)) + file_content_bytes = BytesIO(); downloaded_size_bytes = 0; md5_hasher = hashlib.md5() + last_progress_time = time.time() - if file_skipped_by_filter: - post_skip_count += 1 - continue - file_downloaded_or_exists = False - for folder_path in valid_folder_paths: - if self.check_cancel(): break - try: - os.makedirs(folder_path, exist_ok=True) - except OSError as e: - self.logger(f"❌ Error ensuring directory exists {folder_path}: {e}. Skipping path.") - continue - except Exception as e: - self.logger(f"❌ Unexpected error creating dir {folder_path}: {e}. Skipping path.") - continue + for chunk in response.iter_content(chunk_size=1 * 1024 * 1024): # 1MB chunks + if self.check_cancel() or (skip_event and skip_event.is_set()): break # Check cancellation inside loop + if chunk: + file_content_bytes.write(chunk); md5_hasher.update(chunk); downloaded_size_bytes += len(chunk) + if time.time() - last_progress_time > 1 and total_size_bytes > 0 and self.signals: + self.signals.file_progress_signal.emit(log_name_during_dl, downloaded_size_bytes, total_size_bytes) + last_progress_time = time.time() + + if self.check_cancel() or (skip_event and skip_event.is_set()): break - save_path = os.path.join(folder_path, cleaned_save_filename) - folder_basename = os.path.basename(folder_path) - with local_filenames_lock: - if os.path.exists(save_path) and os.path.getsize(save_path) > 0: - self.logger(f" -> Exists Skip: '{original_name_for_log}' in '{folder_basename}'") - post_skip_count += 1 - file_downloaded_or_exists = True - with self.downloaded_files_lock: - self.downloaded_files.add(cleaned_save_filename) - break - elif cleaned_save_filename in local_processed_filenames: - self.logger(f" -> Local Skip: '{original_name_for_log}' in '{folder_basename}' (already processed in this post)") - post_skip_count += 1 - file_downloaded_or_exists = True - with self.downloaded_files_lock: - self.downloaded_files.add(cleaned_save_filename) - break - with self.downloaded_files_lock: - if cleaned_save_filename in self.downloaded_files: - self.logger(f" -> Global Filename Skip: '{original_name_for_log}' in '{folder_basename}' (filename already downloaded globally)") - post_skip_count += 1 - file_downloaded_or_exists = True - break - try: - self.logger(f"⬇️ Downloading '{original_name_for_log}' to '{folder_basename}'...") - self.current_download_path = save_path - self.is_downloading_file = True - self.signals.file_download_status_signal.emit(True) - response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True) - response.raise_for_status() - file_content_bytes = BytesIO() - downloaded_size = 0 - chunk_count = 0 - md5_hash = hashlib.md5() - - for chunk in response.iter_content(chunk_size=32 * 1024): - if self.check_cancel(): break - if self.skip_current_file_flag.is_set(): break - - if chunk: - file_content_bytes.write(chunk) - md5_hash.update(chunk) - downloaded_size += len(chunk) - chunk_count += 1 - if self.check_cancel() or self.skip_current_file_flag.is_set(): - self.logger(f" ⚠️ Download interrupted {'(cancelled)' if self.cancellation_event.is_set() else '(skipped)'} for {original_name_for_log}.") - if self.skip_current_file_flag.is_set(): - post_skip_count += 1 - self.skip_current_file_flag.clear() - break - final_save_path = save_path - current_filename_for_log = cleaned_save_filename - file_content_bytes.seek(0) - - if downloaded_size == 0 and chunk_count > 0: - self.logger(f"⚠️ Warning: Downloaded 0 bytes despite receiving chunks for {original_name_for_log}. Skipping save.") - post_skip_count += 1 - break - - if downloaded_size > 0: - calculated_hash = md5_hash.hexdigest() - with self.downloaded_file_hashes_lock: - if calculated_hash in self.downloaded_file_hashes: - self.logger(f" -> Content Skip: '{original_name_for_log}' (Hash: {calculated_hash}) already downloaded.") - post_skip_count += 1 - file_downloaded_or_exists = True - with self.downloaded_files_lock: - self.downloaded_files.add(cleaned_save_filename) - with local_filenames_lock: - local_processed_filenames.add(cleaned_save_filename) - break - else: - pass - - - if not file_downloaded_or_exists: - final_bytes_to_save = file_content_bytes - is_img_for_compress = is_image(cleaned_save_filename) - if is_img_for_compress and not is_thumbnail and self.compress_images and Image and downloaded_size > 1500 * 1024: - self.logger(f" Compressing large image ({downloaded_size / 1024:.2f} KB)...") - try: - with Image.open(file_content_bytes) as img: - original_format = img.format - if img.mode == 'P': img = img.convert('RGBA') - elif img.mode not in ['RGB', 'RGBA', 'L']: img = img.convert('RGB') - - compressed_bytes = BytesIO() - img.save(compressed_bytes, format='WebP', quality=75, method=4) - compressed_size = compressed_bytes.getbuffer().nbytes - if compressed_size < downloaded_size * 0.90: - self.logger(f" Compression success: {compressed_size / 1024:.2f} KB (WebP Q75)") - compressed_bytes.seek(0) - final_bytes_to_save = compressed_bytes - base, _ = os.path.splitext(cleaned_save_filename) - current_filename_for_log = base + '.webp' - final_save_path = os.path.join(folder_path, current_filename_for_log) - self.logger(f" Updated filename: {current_filename_for_log}") - else: - self.logger(f" Compression skipped: WebP not significantly smaller ({compressed_size / 1024:.2f} KB).") - file_content_bytes.seek(0) - final_bytes_to_save = file_content_bytes - - except Exception as comp_e: - self.logger(f"❌ Image compression failed for {original_name_for_log}: {comp_e}. Saving original.") - file_content_bytes.seek(0) - final_bytes_to_save = file_content_bytes - final_save_path = save_path - - elif is_img_for_compress and not is_thumbnail and self.compress_images: - self.logger(f" Skipping compression: Image size ({downloaded_size / 1024:.2f} KB) below threshold.") - file_content_bytes.seek(0) - final_bytes_to_save = file_content_bytes - - elif is_thumbnail and downloaded_size > LARGE_THUMBNAIL_THRESHOLD: - self.logger(f"⚠️ Downloaded thumbnail '{current_filename_for_log}' ({downloaded_size / 1024:.2f} KB) is large.") - file_content_bytes.seek(0) - final_bytes_to_save = file_content_bytes - else: - file_content_bytes.seek(0) - final_bytes_to_save = file_content_bytes - save_file = False - with self.downloaded_files_lock: - with local_filenames_lock: - if os.path.exists(final_save_path) and os.path.getsize(final_save_path) > 0: - self.logger(f" -> Exists Skip (pre-write): '{current_filename_for_log}' in '{folder_basename}'") - post_skip_count += 1 - file_downloaded_or_exists = True - elif current_filename_for_log in self.downloaded_files: - self.logger(f" -> Global Skip (pre-write): '{current_filename_for_log}' in '{folder_basename}' (already downloaded globally)") - post_skip_count += 1 - file_downloaded_or_exists = True - elif current_filename_for_log in local_processed_filenames: - self.logger(f" -> Local Skip (pre-write): '{current_filename_for_log}' in '{folder_basename}' (already processed in this post)") - post_skip_count += 1 - file_downloaded_or_exists = True - else: - save_file = True - - - if save_file: - try: - with open(final_save_path, 'wb') as f: - while True: - chunk = final_bytes_to_save.read(64 * 1024) - if not chunk: break - f.write(chunk) - with self.downloaded_file_hashes_lock: - self.downloaded_file_hashes.add(calculated_hash) - with self.downloaded_files_lock: - self.downloaded_files.add(current_filename_for_log) - with local_filenames_lock: - local_processed_filenames.add(current_filename_for_log) - - post_download_count += 1 - file_downloaded_or_exists = True - self.logger(f"✅ Saved: '{current_filename_for_log}' ({downloaded_size / 1024:.1f} KB, Hash: {calculated_hash[:8]}...) in '{folder_basename}'") - time.sleep(0.05) - - except IOError as io_err: - self.logger(f"❌ Save Fail: '{current_filename_for_log}' to '{folder_basename}'. Error: {io_err}") - post_skip_count += 1 - if os.path.exists(final_save_path): - try: os.remove(final_save_path) - except OSError: pass - break - except Exception as save_err: - self.logger(f"❌ Unexpected Save Error: '{current_filename_for_log}' in '{folder_basename}'. Error: {save_err}") - post_skip_count += 1 - if os.path.exists(final_save_path): - try: os.remove(final_save_path) - except OSError: pass - break - final_bytes_to_save.close() - if file_content_bytes is not final_bytes_to_save: - file_content_bytes.close() - if file_downloaded_or_exists: - break - except requests.exceptions.RequestException as e: - self.logger(f"❌ Download Fail: {original_name_for_log}. Error: {e}") - post_skip_count += 1 + if downloaded_size_bytes > 0: + calculated_file_hash = md5_hasher.hexdigest() + download_successful_flag = True + break + elif total_size_bytes == 0 and response.status_code == 200 : + self.logger(f" Note: '{log_name_during_dl}' is a 0-byte file according to server.") + calculated_file_hash = md5_hasher.hexdigest() + download_successful_flag = True break - except IOError as e: - self.logger(f"❌ File I/O Error: {original_name_for_log} in '{folder_basename}'. Error: {e}") - post_skip_count += 1 - break - except Exception as e: - self.logger(f"❌ Unexpected Error during download/save for {original_name_for_log}: {e}") - import traceback - self.logger(f" Traceback: {traceback.format_exc(limit=2)}") - post_skip_count += 1 - break + + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e: + self.logger(f" ❌ Download Error (Retryable): {log_name_during_dl}. Error: {e}") + except requests.exceptions.RequestException as e: + self.logger(f" ❌ Download Error (Non-Retryable): {log_name_during_dl}. Error: {e}"); break + except Exception as e: + self.logger(f" ❌ Unexpected Download Error: {log_name_during_dl}: {e}\n{traceback.format_exc(limit=2)}"); break + finally: + if self.signals: self.signals.file_download_status_signal.emit(False) - finally: - self.is_downloading_file = False - self.current_download_path = None - self.signals.file_download_status_signal.emit(False) - if self.check_cancel(): break - if self.skip_current_file_flag.is_set(): - self.skip_current_file_flag.clear() - if not file_downloaded_or_exists: - pass - if self.check_cancel(): - self.logger(f" Post {post_id} processing cancelled.") - return post_download_count, post_skip_count + if self.signals and total_size_bytes > 0 : + self.signals.file_progress_signal.emit(log_name_during_dl, downloaded_size_bytes, total_size_bytes) + + if self.check_cancel() or (skip_event and skip_event.is_set()): + self.logger(f" ⚠️ Download interrupted for {log_name_during_dl}.") + if file_content_bytes: file_content_bytes.close() + return 0, 1 + + if not download_successful_flag: + self.logger(f"❌ Download failed for '{log_name_during_dl}' after {max_retries} retries.") + if file_content_bytes: file_content_bytes.close() + return 0, 1 + + with self.downloaded_file_hashes_lock: + if calculated_file_hash in self.downloaded_file_hashes: + self.logger(f" -> Content Skip (Hash): '{log_name_during_dl}' (Hash: {calculated_file_hash[:8]}...).") + with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving) + if file_content_bytes: file_content_bytes.close() + return 0, 1 + + bytes_to_write = file_content_bytes; bytes_to_write.seek(0) + final_filename_after_processing = final_filename_for_sets_and_saving + current_save_path_final = current_save_path + + is_img_for_compress_check = is_image(api_original_filename) + if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024): + self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024*1024):.2f} MB)...") + try: + with Image.open(bytes_to_write) as img_obj: + if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA') + elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB') + + compressed_bytes_io = BytesIO() + img_obj.save(compressed_bytes_io, format='WebP', quality=80, method=4) + compressed_size = compressed_bytes_io.getbuffer().nbytes + + if compressed_size < downloaded_size_bytes * 0.9: + self.logger(f" Compression success: {compressed_size / (1024*1024):.2f} MB.") + bytes_to_write.close() + bytes_to_write = compressed_bytes_io; bytes_to_write.seek(0) + + base_name_orig, _ = os.path.splitext(final_filename_for_sets_and_saving) + final_filename_after_processing = base_name_orig + '.webp' + current_save_path_final = os.path.join(target_folder_path, final_filename_after_processing) + self.logger(f" Updated filename (compressed): {final_filename_after_processing}") + else: + self.logger(f" Compression skipped: WebP not significantly smaller."); bytes_to_write.seek(0) + except Exception as comp_e: + self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); bytes_to_write.seek(0) + + if final_filename_after_processing != final_filename_for_sets_and_saving and \ + os.path.exists(current_save_path_final) and os.path.getsize(current_save_path_final) > 0: + self.logger(f" -> Exists (Path - Post-Compress): '{final_filename_after_processing}' in '{target_folder_basename}'.") + with self.downloaded_files_lock: self.downloaded_files.add(final_filename_after_processing) + bytes_to_write.close() + return 0, 1 + + try: + os.makedirs(os.path.dirname(current_save_path_final), exist_ok=True) + with open(current_save_path_final, 'wb') as f_out: + f_out.write(bytes_to_write.getvalue()) + + with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash) + with self.downloaded_files_lock: self.downloaded_files.add(final_filename_after_processing) + + self.logger(f"✅ Saved: '{final_filename_after_processing}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{target_folder_basename}'") + time.sleep(0.05) + return 1, 0 + except Exception as save_err: + self.logger(f"❌ Save Fail for '{final_filename_after_processing}': {save_err}") + if os.path.exists(current_save_path_final): + try: os.remove(current_save_path_final); + except OSError: self.logger(f" -> Failed to remove partially saved file: {current_save_path_final}") + return 0, 1 + finally: + if bytes_to_write: bytes_to_write.close() - self.logger(f" Post {post_id} Summary: Downloaded={post_download_count}, Skipped={post_skip_count}") - return post_download_count, post_skip_count + def process(self): + """Main processing logic for a single post.""" + if self.check_cancel(): return 0, 0 + + total_downloaded_this_post = 0 + total_skipped_this_post = 0 + parsed_api_url = urlparse(self.api_url_input) + referer_url = f"https://{parsed_api_url.netloc}/" + headers = {'User-Agent': 'Mozilla/5.0', 'Referer': referer_url} + + # Regex to capture URL (group 1) and link text (group 2) + link_pattern = re.compile(r"""]*? # Any characters except > (non-greedy) + href=["'](https?://[^"']+)["'] # Capture href URL in group 1 + [^>]*? # Any characters except > (non-greedy) + > # Closing > of opening tag + (.*?) # Capture link text in group 2 (non-greedy) + # Closing anchor tag + """, re.IGNORECASE | re.VERBOSE | re.DOTALL) + + post_data = self.post + post_title = post_data.get('title', '') or 'untitled_post' + post_id = post_data.get('id', 'unknown_id') + post_main_file_info = post_data.get('file') + post_attachments = post_data.get('attachments', []) + post_content_html = post_data.get('content', '') + + is_target_post_by_id = (self.target_post_id_from_initial_url is not None) and \ + (str(post_id) == str(self.target_post_id_from_initial_url)) + + self.logger(f"\n--- Processing Post {post_id} ('{post_title[:50]}...') (Thread: {threading.current_thread().name}) ---") + + # --- Skip Check 1: Post Title --- + if self.skip_words_list: + title_lower = post_title.lower() + if any(skip_word.lower() in title_lower for skip_word in self.skip_words_list): + matched_skip = next((sw for sw in self.skip_words_list if sw.lower() in title_lower), "unknown_skip_word") + self.logger(f" -> Skip Post (Title Keyword): Title contains '{matched_skip}'.") + # Estimate skipped files count (main file + attachments) + num_potential_files = len(post_attachments) + (1 if post_main_file_info else 0) + return 0, num_potential_files + + # --- Skip Check 2: Character Filter (Only if subfolders enabled and not a target post) --- + if self.filter_character_list and not is_target_post_by_id and self.use_subfolders: + matched_by_char_filter = any(is_title_match_for_character(post_title, char_filter) for char_filter in self.filter_character_list) + if not matched_by_char_filter: + self.logger(f" -> Filter Skip Post: Title ('{post_title[:50]}...') doesn't match character filters.") + num_potential_files = len(post_attachments) + (1 if post_main_file_info else 0) + return 0, num_potential_files + + if not isinstance(post_attachments, list): + self.logger(f"⚠️ Corrupt attachment data for post {post_id} (expected list, got {type(post_attachments)}). Skipping attachments.") + post_attachments = [] + + # --- Determine Potential Save Folders --- + base_save_folders = [] + if self.use_subfolders: + if is_target_post_by_id and self.custom_folder_name: + base_save_folders = [self.custom_folder_name] + self.logger(f" Folder: Using custom folder for target post: '{self.custom_folder_name}'") + elif self.filter_character_list: + matched_chars = [clean_folder_name(cf.lower()) for cf in self.filter_character_list if is_title_match_for_character(post_title, cf)] + if matched_chars: + base_save_folders = matched_chars + self.logger(f" Folder: Matched character filter(s): {', '.join(base_save_folders)}") + else: + # If character filter is active but no match, we already skipped the post above + # If no character filter, derive from title/known names + matched_from_title = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords) + base_save_folders = matched_from_title if matched_from_title else [extract_folder_name_from_title(post_title, self.unwanted_keywords)] + self.logger(f" Folder: No character filter match. Using derived: {', '.join(base_save_folders)}") + else: # No character filter active + matched_from_title = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords) + base_save_folders = matched_from_title if matched_from_title else [extract_folder_name_from_title(post_title, self.unwanted_keywords)] + self.logger(f" Folder: Using derived: {', '.join(base_save_folders)}") + else: # Subfolders disabled + base_save_folders = [""] + self.logger(" Folder: Subfolders disabled. Using root download directory.") + + if not base_save_folders: # Fallback if somehow no folders were determined + base_save_folders = [clean_folder_name(post_title) or 'untitled_post_fallback'] + + # --- Skip Check 3: Potential Folder Name(s) --- + if self.skip_words_list and self.use_subfolders: # Only check folder names if subfolders are used + skip_post_due_to_folder = False + for folder_name in base_save_folders: + if not folder_name: continue # Skip check for root folder "" + folder_name_lower = folder_name.lower() + if any(skip_word.lower() in folder_name_lower for skip_word in self.skip_words_list): + matched_skip = next((sw for sw in self.skip_words_list if sw.lower() in folder_name_lower), "unknown_skip_word") + self.logger(f" -> Skip Post (Folder Keyword): Potential folder '{folder_name}' contains '{matched_skip}'.") + skip_post_due_to_folder = True + break # No need to check other folders for this post + if skip_post_due_to_folder: + num_potential_files = len(post_attachments) + (1 if post_main_file_info else 0) + return 0, num_potential_files + # --- End Folder Skip Check --- + + # --- External Link Processing (Can happen even if files are skipped later) --- + if (self.show_external_links or self.extract_links_only) and post_content_html: + try: + found_links_with_text = link_pattern.findall(post_content_html) + if found_links_with_text: + unique_links_data = {} + for link_url, raw_link_text in found_links_with_text: + link_url = link_url.strip() + clean_link_text = re.sub(r'<.*?>', '', raw_link_text) + clean_link_text = html.unescape(clean_link_text) + clean_link_text = clean_link_text.strip() + if not any(ext in link_url.lower() for ext in ['.css', '.js', '.ico', '.xml', '.svg']) \ + and not link_url.startswith('javascript:'): + if link_url not in unique_links_data and clean_link_text: + unique_links_data[link_url] = clean_link_text + elif link_url not in unique_links_data: + unique_links_data[link_url] = "[Link]" + links_emitted_count = 0 + scraped_platforms = {'kemono', 'coomer', 'patreon'} + for link_url, link_text in unique_links_data.items(): + platform = get_link_platform(link_url) + if platform not in scraped_platforms: + if self.signals and hasattr(self.signals, 'external_link_signal'): + self.signals.external_link_signal.emit(post_title, link_text, link_url, platform) + links_emitted_count +=1 + if links_emitted_count > 0: self.logger(f" 🔗 Found {links_emitted_count} potential external link(s) in post content.") + except Exception as e: self.logger(f"⚠️ Error parsing post content for links: {e}\n{traceback.format_exc(limit=2)}") + # --- End External Link Processing --- + + if self.extract_links_only: + self.logger(f" Extract Links Only mode: Skipping file download for post {post_id}.") + return 0, 0 + + # --- Determine Final Save Paths (after folder name skip check passed) --- + final_save_paths_for_post = [] + for base_folder_name in base_save_folders: + current_path = os.path.join(self.download_root, base_folder_name) + if self.use_post_subfolders and self.use_subfolders: + cleaned_title_for_subfolder = clean_folder_name(post_title) + post_specific_subfolder = f"{post_id}_{cleaned_title_for_subfolder}" if cleaned_title_for_subfolder else f"{post_id}_untitled" + final_save_paths_for_post.append(os.path.join(current_path, post_specific_subfolder)) + else: + final_save_paths_for_post.append(current_path) + + if not final_save_paths_for_post: + # This case should be less likely now with the earlier folder determination, but keep as fallback + self.logger(f" CRITICAL ERROR: No valid folder paths determined for post {post_id}. Skipping."); return 0, 1 + + # --- Prepare File List --- + files_to_download_info_list = [] + api_file_domain = parsed_api_url.netloc + + if self.download_thumbnails: + self.logger(f" Thumbnail-only mode for Post {post_id}. (Functionality depends on API providing clear thumbnail links).") + # Logic to find thumbnail links would go here + if not files_to_download_info_list: + self.logger(f" -> No specific thumbnail links found for post {post_id} in thumbnail-only mode.") + return 0, 0 + else: + if post_main_file_info and isinstance(post_main_file_info, dict) and post_main_file_info.get('path'): + file_path = post_main_file_info['path'].lstrip('/') + original_api_name = post_main_file_info.get('name') or os.path.basename(file_path) + if original_api_name: + files_to_download_info_list.append({ + 'url': f"https://{api_file_domain}{file_path}" if file_path.startswith('/') else f"https://{api_file_domain}/data/{file_path}", + 'name': original_api_name, + '_original_name_for_log': original_api_name, + '_is_thumbnail': False + }) + else: self.logger(f" ⚠️ Skipping main file for post {post_id}: Missing name (Path: {file_path})") + + for idx, att_info in enumerate(post_attachments): + if isinstance(att_info, dict) and att_info.get('path'): + att_path = att_info['path'].lstrip('/') + original_api_att_name = att_info.get('name') or os.path.basename(att_path) + if original_api_att_name: + files_to_download_info_list.append({ + 'url': f"https://{api_file_domain}{att_path}" if att_path.startswith('/') else f"https://{api_file_domain}/data/{att_path}", + 'name': original_api_att_name, + '_original_name_for_log': original_api_att_name, + '_is_thumbnail': False + }) + else: self.logger(f" ⚠️ Skipping attachment {idx+1} for post {post_id}: Missing name (Path: {att_path})") + else: self.logger(f" ⚠️ Skipping invalid attachment {idx+1} for post {post_id}: {str(att_info)[:100]}") + + if not files_to_download_info_list: + self.logger(f" No files found to download for post {post_id}.") + return 0, 0 + + self.logger(f" Identified {len(files_to_download_info_list)} file(s) for potential download from post {post_id}.") + + # --- Download Files (Skip Check 4: Original Filename happens inside _download_single_file) --- + with ThreadPoolExecutor(max_workers=self.num_file_threads, thread_name_prefix=f'P{post_id}File_') as file_pool: + futures_list = [] + for idx, file_info_to_dl in enumerate(files_to_download_info_list): + if self.check_cancel(): break + for save_location_path in final_save_paths_for_post: + if self.check_cancel(): break + futures_list.append(file_pool.submit( + self._download_single_file, + file_info_to_dl, + save_location_path, + headers, + post_id, + self.skip_current_file_flag, + post_title, + file_index_in_post=idx + )) + + for future in as_completed(futures_list): + if self.check_cancel(): break + try: + dl_count, skip_count = future.result() + total_downloaded_this_post += dl_count + total_skipped_this_post += skip_count + except CancelledError: + total_skipped_this_post += 1 + except Exception as exc_f: + self.logger(f"❌ File download task for post {post_id} resulted in error: {exc_f}") + total_skipped_this_post += 1 + + if self.signals and hasattr(self.signals, 'file_progress_signal'): + self.signals.file_progress_signal.emit("", 0, 0) + + if self.check_cancel(): self.logger(f" Post {post_id} processing cancelled."); + else: self.logger(f" Post {post_id} Summary: Downloaded={total_downloaded_this_post}, Skipped Files={total_skipped_this_post}") + + return total_downloaded_this_post, total_skipped_this_post + class DownloadThread(QThread): + """Manages the overall download process (primarily for single-threaded GUI mode).""" progress_signal = pyqtSignal(str) - add_character_prompt_signal = pyqtSignal(str) - file_download_status_signal = pyqtSignal(bool) - finished_signal = pyqtSignal(int, int, bool) + add_character_prompt_signal = pyqtSignal(str) + file_download_status_signal = pyqtSignal(bool) + finished_signal = pyqtSignal(int, int, bool) + # MODIFIED: Added link_text argument + external_link_signal = pyqtSignal(str, str, str, str) # post_title, link_text, link_url, platform + file_progress_signal = pyqtSignal(str, int, int) - - def __init__(self, api_url, output_dir, known_names_copy, - cancellation_event, single_post_id=None, - filter_character=None, filter_mode='all', skip_zip=True, skip_rar=True, - use_subfolders=True, custom_folder_name=None, compress_images=False, + def __init__(self, api_url_input, output_dir, known_names_copy, + cancellation_event, + filter_character_list=None, + filter_mode='all', skip_zip=True, skip_rar=True, + use_subfolders=True, use_post_subfolders=False, custom_folder_name=None, compress_images=False, download_thumbnails=False, service=None, user_id=None, - downloaded_files=None, downloaded_files_lock=None, - downloaded_file_hashes=None, downloaded_file_hashes_lock=None, - skip_words_list=None): + downloaded_files=None, downloaded_file_hashes=None, downloaded_files_lock=None, downloaded_file_hashes_lock=None, + skip_words_list=None, + show_external_links=False, + num_file_threads_for_worker=1, + skip_current_file_flag=None, start_page=None, end_page=None, + target_post_id_from_initial_url=None, + manga_mode_active=False, + unwanted_keywords=None + ): super().__init__() - self._init_failed = False - self.api_url_input = api_url + self.api_url_input = api_url_input self.output_dir = output_dir - self.known_names = list(known_names_copy) - self.cancellation_event = cancellation_event - self.initial_target_post_id = single_post_id - self.filter_character = filter_character + self.known_names = list(known_names_copy) + self.cancellation_event = cancellation_event + self.skip_current_file_flag = skip_current_file_flag + + self.initial_target_post_id = target_post_id_from_initial_url + + self.filter_character_list = filter_character_list if filter_character_list else [] self.filter_mode = filter_mode self.skip_zip = skip_zip self.skip_rar = skip_rar self.use_subfolders = use_subfolders + self.use_post_subfolders = use_post_subfolders self.custom_folder_name = custom_folder_name self.compress_images = compress_images self.download_thumbnails = download_thumbnails self.service = service self.user_id = user_id self.skip_words_list = skip_words_list if skip_words_list is not None else [] + self.downloaded_files = downloaded_files if downloaded_files is not None else set() self.downloaded_files_lock = downloaded_files_lock if downloaded_files_lock is not None else threading.Lock() self.downloaded_file_hashes = downloaded_file_hashes if downloaded_file_hashes is not None else set() self.downloaded_file_hashes_lock = downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading.Lock() - self.skip_current_file_flag = threading.Event() - self.is_downloading_file = False - self.current_download_path = None - self._add_character_response = None - self.prompt_mutex = QMutex() - if not self.service or not self.user_id: - log_msg = f"❌ Thread Init Error: Missing service ('{self.service}') or user ID ('{self.user_id}') for URL '{api_url}'" - print(log_msg) - try: self.progress_signal.emit(log_msg) - except RuntimeError: pass - self._init_failed = True + + self._add_character_response = None + self.prompt_mutex = QMutex() + + self.show_external_links = show_external_links + self.num_file_threads_for_worker = num_file_threads_for_worker + + self.start_page = start_page + self.end_page = end_page + + self.manga_mode_active = manga_mode_active + self.unwanted_keywords = unwanted_keywords if unwanted_keywords is not None else {'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'} - def run(self): - if self._init_failed: - self.finished_signal.emit(0, 0, False) - return - - unwanted_keywords = {'spicy', 'hd', 'nsfw', '4k', 'preview'} - grand_total_downloaded = 0 - grand_total_skipped = 0 - cancelled_by_user = False - - try: - if self.use_subfolders and self.filter_character and not self.custom_folder_name: - if not self._check_and_prompt_filter_character(): - self.finished_signal.emit(0, 0, False) - return - worker_signals_adapter = PostProcessorSignals() - worker_signals_adapter.progress_signal.connect(self.progress_signal) - worker_signals_adapter.file_download_status_signal.connect(self.file_download_status_signal) - - post_worker = PostProcessorWorker( - post_data=None, - download_root=self.output_dir, - known_names=self.known_names, - filter_character=self.filter_character, - unwanted_keywords=unwanted_keywords, - filter_mode=self.filter_mode, - skip_zip=self.skip_zip, - skip_rar=self.skip_rar, - use_subfolders=self.use_subfolders, - target_post_id_from_initial_url=self.initial_target_post_id, - custom_folder_name=self.custom_folder_name, - compress_images=self.compress_images, - download_thumbnails=self.download_thumbnails, - service=self.service, - user_id=self.user_id, - api_url_input=self.api_url_input, - cancellation_event=self.cancellation_event, - signals=worker_signals_adapter, - downloaded_files=self.downloaded_files, - downloaded_files_lock=self.downloaded_files_lock, - downloaded_file_hashes=self.downloaded_file_hashes, - downloaded_file_hashes_lock=self.downloaded_file_hashes_lock, - skip_words_list=self.skip_words_list, - ) - post_worker.skip_current_file_flag = self.skip_current_file_flag - self.progress_signal.emit(" Starting post fetch...") - def thread_logger(msg): - self.progress_signal.emit(msg) - - post_generator = download_from_api(self.api_url_input, logger=thread_logger) - - for posts_batch in post_generator: - if self.isInterruptionRequested(): - self.progress_signal.emit("⚠️ Download cancelled before processing batch.") - cancelled_by_user = True - break - - for post in posts_batch: - if self.isInterruptionRequested(): - self.progress_signal.emit("⚠️ Download cancelled during post processing.") - cancelled_by_user = True - break - post_worker.post = post - try: - downloaded, skipped = post_worker.process() - grand_total_downloaded += downloaded - grand_total_skipped += skipped - except Exception as proc_e: - post_id_err = post.get('id', 'N/A') if isinstance(post, dict) else 'N/A' - self.progress_signal.emit(f"❌ Error processing post {post_id_err}: {proc_e}") - import traceback - self.progress_signal.emit(traceback.format_exc(limit=2)) - grand_total_skipped += 1 - self.msleep(20) - - if cancelled_by_user: - break - if not cancelled_by_user: - self.progress_signal.emit("✅ Post fetching and processing complete.") - - - except Exception as e: - log_msg = f"\n❌ An critical error occurred in download thread: {e}" - self.progress_signal.emit(log_msg) - import traceback - tb_str = traceback.format_exc() - self.progress_signal.emit("--- Traceback ---") - for line in tb_str.splitlines(): - self.progress_signal.emit(" " + line) - self.progress_signal.emit("--- End Traceback ---") - cancelled_by_user = False - - finally: - self.finished_signal.emit(grand_total_downloaded, grand_total_skipped, cancelled_by_user) - - - def _check_and_prompt_filter_character(self): - clean_char_filter = clean_folder_name(self.filter_character.lower()) - known_names_lower = {name.lower() for name in self.known_names} - - if not clean_char_filter: - self.progress_signal.emit(f"❌ Filter name '{self.filter_character}' is invalid. Aborting.") - return False - - if self.filter_character.lower() not in known_names_lower: - self.progress_signal.emit(f"❓ Filter '{self.filter_character}' not found in known list.") - with QMutexLocker(self.prompt_mutex): - self._add_character_response = None - self.add_character_prompt_signal.emit(self.filter_character) - self.progress_signal.emit(" Waiting for user confirmation to add filter name...") - while self._add_character_response is None: - if self.isInterruptionRequested(): - self.progress_signal.emit("⚠️ Cancelled while waiting for user input on filter name.") - return False - self.msleep(200) - if self._add_character_response: - self.progress_signal.emit(f"✅ User confirmed adding '{self.filter_character}'. Continuing.") - if self.filter_character not in self.known_names: - self.known_names.append(self.filter_character) - return True - else: - self.progress_signal.emit(f"❌ User declined to add filter '{self.filter_character}'. Aborting download.") - return False - return True - - - def skip_file(self): - if self.isRunning() and self.is_downloading_file: - self.progress_signal.emit("⏭️ Skip requested for current file.") - self.skip_current_file_flag.set() - elif self.isRunning(): - self.progress_signal.emit("ℹ️ Skip requested, but no file download active.") - - - def receive_add_character_result(self, result): - with QMutexLocker(self.prompt_mutex): - self._add_character_response = result - self.progress_signal.emit(f" Received prompt response: {'Yes' if result else 'No'}") + if self.compress_images and Image is None: + self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).") + self.compress_images = False + def logger(self, message): + """Emits a log message via the progress_signal.""" + self.progress_signal.emit(str(message)) def isInterruptionRequested(self): - return super().isInterruptionRequested() or self.cancellation_event.is_set() \ No newline at end of file + """Overrides QThread's interruption check to also use the cancellation_event.""" + return super().isInterruptionRequested() or self.cancellation_event.is_set() + + def skip_file(self): + """Sets the skip_current_file_flag to skip the currently downloading file.""" + if self.isRunning() and self.skip_current_file_flag: + self.logger("⏭️ Skip requested for current file (single-thread mode).") + self.skip_current_file_flag.set() + else: self.logger("ℹ️ Skip file: No download active or flag not set.") + + def run(self): + """Main execution loop for the download thread.""" + grand_total_downloaded_files = 0 + grand_total_skipped_files = 0 + was_process_cancelled = False + + worker_signals_obj = PostProcessorSignals() + try: + worker_signals_obj.progress_signal.connect(self.progress_signal) + worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal) + worker_signals_obj.file_progress_signal.connect(self.file_progress_signal) + # Connect the worker's external_link_signal to this thread's external_link_signal + # This ensures links found by the worker (even in single-thread mode) are emitted by this thread + worker_signals_obj.external_link_signal.connect(self.external_link_signal) + + + self.logger(" Starting post fetch (single-threaded download process)...") + post_generator = download_from_api( + self.api_url_input, + logger=self.logger, + start_page=self.start_page, + end_page=self.end_page, + manga_mode=self.manga_mode_active, + cancellation_event=self.cancellation_event # Pass cancellation event + ) + + for posts_batch_data in post_generator: + if self.isInterruptionRequested(): was_process_cancelled = True; break + for individual_post_data in posts_batch_data: + if self.isInterruptionRequested(): was_process_cancelled = True; break + + post_processing_worker = PostProcessorWorker( + post_data=individual_post_data, + download_root=self.output_dir, + known_names=self.known_names, + filter_character_list=self.filter_character_list, + unwanted_keywords=self.unwanted_keywords, + filter_mode=self.filter_mode, + skip_zip=self.skip_zip, skip_rar=self.skip_rar, + use_subfolders=self.use_subfolders, use_post_subfolders=self.use_post_subfolders, + target_post_id_from_initial_url=self.initial_target_post_id, + custom_folder_name=self.custom_folder_name, + compress_images=self.compress_images, download_thumbnails=self.download_thumbnails, + service=self.service, user_id=self.user_id, + api_url_input=self.api_url_input, + cancellation_event=self.cancellation_event, + signals=worker_signals_obj, # Pass the connected signals object + downloaded_files=self.downloaded_files, downloaded_file_hashes=self.downloaded_file_hashes, + downloaded_files_lock=self.downloaded_files_lock, downloaded_file_hashes_lock=self.downloaded_file_hashes_lock, + skip_words_list=self.skip_words_list, + show_external_links=self.show_external_links, + extract_links_only=False, + num_file_threads=self.num_file_threads_for_worker, + skip_current_file_flag=self.skip_current_file_flag, + manga_mode_active=self.manga_mode_active + ) + try: + dl_count, skip_count = post_processing_worker.process() + grand_total_downloaded_files += dl_count + grand_total_skipped_files += skip_count + except Exception as proc_err: + post_id_for_err = individual_post_data.get('id', 'N/A') + self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}") + traceback.print_exc() + grand_total_skipped_files += len(individual_post_data.get('attachments', [])) + (1 if individual_post_data.get('file') else 0) + + if self.skip_current_file_flag and self.skip_current_file_flag.is_set(): + self.skip_current_file_flag.clear() + self.logger(" Skip current file flag was processed and cleared.") + + self.msleep(10) + if was_process_cancelled: break + + if not was_process_cancelled: self.logger("✅ All posts processed or end of content reached.") + + except Exception as main_thread_err: + self.logger(f"\n❌ Critical error within DownloadThread run loop: {main_thread_err}") + traceback.print_exc() + if not self.isInterruptionRequested(): was_process_cancelled = False + finally: + try: + if worker_signals_obj: + # Disconnect signals + worker_signals_obj.progress_signal.disconnect(self.progress_signal) + worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal) + worker_signals_obj.external_link_signal.disconnect(self.external_link_signal) + worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal) + except (TypeError, RuntimeError) as e: self.logger(f"ℹ️ Note during signal disconnection: {e}") + + self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, was_process_cancelled) + + def receive_add_character_result(self, result): + """Handles the response from a character add prompt (if GUI signals back to this thread).""" + with QMutexLocker(self.prompt_mutex): + self._add_character_response = result + self.logger(f" (DownloadThread) Received character prompt response: {'Yes' if result else 'No'}") + diff --git a/main.py b/main.py index 2e2b23f..4ff6f33 100644 --- a/main.py +++ b/main.py @@ -4,45 +4,78 @@ import time import requests import re import threading -import queue +import queue # Standard library queue, not directly used for the new link queue import hashlib -from concurrent.futures import ThreadPoolExecutor, Future, CancelledError +import http.client +import traceback +import random # <-- Import random for generating delays +from collections import deque # <-- Import deque for the link queue -from PyQt5.QtGui import QIcon +from concurrent.futures import ThreadPoolExecutor, CancelledError, Future + +from PyQt5.QtGui import ( + QIcon, + QIntValidator +) from PyQt5.QtWidgets import ( QApplication, QWidget, QLabel, QLineEdit, QTextEdit, QPushButton, QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget, - QRadioButton, QButtonGroup, QCheckBox + QRadioButton, QButtonGroup, QCheckBox, QSplitter, QSizePolicy ) -from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject +# Ensure QTimer is imported +from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject, QTimer from urllib.parse import urlparse try: from PIL import Image except ImportError: - Image = None # Will be handled in downloader_utils + Image = None from io import BytesIO -# Import from the new utils/backend file -from downloader_utils import ( - KNOWN_NAMES, - clean_folder_name, - extract_post_info, - download_from_api, - PostProcessorSignals, - PostProcessorWorker, - DownloadThread as BackendDownloadThread # Rename to avoid conflict if any -) +# --- Import from downloader_utils --- +try: + print("Attempting to import from downloader_utils...") + # Assuming downloader_utils_link_text is the correct version + from downloader_utils import ( + KNOWN_NAMES, + clean_folder_name, + extract_post_info, + download_from_api, + PostProcessorSignals, + PostProcessorWorker, + DownloadThread as BackendDownloadThread + ) + print("Successfully imported names from downloader_utils.") +except ImportError as e: + print(f"--- IMPORT ERROR ---") + print(f"Failed to import from 'downloader_utils.py': {e}") + # ... (rest of error handling as in your original file) ... + KNOWN_NAMES = [] + PostProcessorSignals = QObject + PostProcessorWorker = object + BackendDownloadThread = QThread + def clean_folder_name(n): return str(n) # Fallback + def extract_post_info(u): return None, None, None + def download_from_api(*a, **k): yield [] +except Exception as e: + print(f"--- UNEXPECTED IMPORT ERROR ---") + print(f"An unexpected error occurred during import: {e}") + traceback.print_exc() + print(f"-----------------------------", file=sys.stderr) + sys.exit(1) +# --- End Import --- class DownloaderApp(QWidget): character_prompt_response_signal = pyqtSignal(bool) log_signal = pyqtSignal(str) add_character_prompt_signal = pyqtSignal(str) - file_download_status_signal = pyqtSignal(bool) overall_progress_signal = pyqtSignal(int, int) finished_signal = pyqtSignal(int, int, bool) + # Signal now carries link_text (ensure this matches downloader_utils) + external_link_signal = pyqtSignal(str, str, str, str) # post_title, link_text, link_url, platform + file_progress_signal = pyqtSignal(str, int, int) def __init__(self): @@ -56,93 +89,131 @@ class DownloaderApp(QWidget): self.processed_posts_count = 0 self.download_counter = 0 self.skip_counter = 0 - self.worker_signals = PostProcessorSignals() + self.worker_signals = PostProcessorSignals() # Instance of signals for multi-thread workers self.prompt_mutex = QMutex() self._add_character_response = None self.downloaded_files = set() self.downloaded_files_lock = threading.Lock() self.downloaded_file_hashes = set() self.downloaded_file_hashes_lock = threading.Lock() - self.load_known_names_from_util() # Changed to reflect it's from utils - self.setWindowTitle("Kemono Downloader v2.3 (Content Dedupe & Skip)") - self.setGeometry(150, 150, 1050, 820) + # self.external_links = [] # This list seems unused now + self.show_external_links = False + + # --- For sequential delayed link display --- + self.external_link_queue = deque() + self._is_processing_external_link_queue = False + # --- END --- + + # --- ADDED: For Log Verbosity --- + self.basic_log_mode = False # Start with full log + self.log_verbosity_button = None + # --- END ADDED --- + + self.main_log_output = None + self.external_log_output = None + self.log_splitter = None # This is the VERTICAL splitter for logs + self.main_splitter = None # This will be the main HORIZONTAL splitter + self.reset_button = None + + self.manga_mode_checkbox = None + + self.load_known_names_from_util() + self.setWindowTitle("Kemono Downloader v3.0.0") + self.setGeometry(150, 150, 1050, 820) # Initial size self.setStyleSheet(self.get_dark_theme()) self.init_ui() self._connect_signals() self.log_signal.emit("ℹ️ Local API server functionality has been removed.") + self.log_signal.emit("ℹ️ 'Skip Current File' button has been removed.") + self.character_input.setToolTip("Enter one or more character names, separated by commas (e.g., yor, makima)") def _connect_signals(self): - self.worker_signals.progress_signal.connect(self.log) - self.worker_signals.file_download_status_signal.connect(self.update_skip_button_state) - self.log_signal.connect(self.log) + # Signals from the worker_signals object (used by PostProcessorWorker in multi-threaded mode) + if hasattr(self.worker_signals, 'progress_signal'): + self.worker_signals.progress_signal.connect(self.handle_main_log) + if hasattr(self.worker_signals, 'file_progress_signal'): + self.worker_signals.file_progress_signal.connect(self.update_file_progress_display) + # Connect the external_link_signal from worker_signals to the queue handler + if hasattr(self.worker_signals, 'external_link_signal'): + self.worker_signals.external_link_signal.connect(self.handle_external_link_signal) + + # App's own signals (some of which might be emitted by DownloadThread which then connects to these handlers) + self.log_signal.connect(self.handle_main_log) self.add_character_prompt_signal.connect(self.prompt_add_character) self.character_prompt_response_signal.connect(self.receive_add_character_result) self.overall_progress_signal.connect(self.update_progress_display) self.finished_signal.connect(self.download_finished) + # Connect the app's external_link_signal also to the queue handler + self.external_link_signal.connect(self.handle_external_link_signal) + self.file_progress_signal.connect(self.update_file_progress_display) + + self.character_search_input.textChanged.connect(self.filter_character_list) + self.external_links_checkbox.toggled.connect(self.update_external_links_setting) + self.thread_count_input.textChanged.connect(self.update_multithreading_label) + self.use_subfolder_per_post_checkbox.toggled.connect(self.update_ui_for_subfolders) + + if self.reset_button: + self.reset_button.clicked.connect(self.reset_application_state) + + # Connect log verbosity button if it exists + if self.log_verbosity_button: + self.log_verbosity_button.clicked.connect(self.toggle_log_verbosity) + + if self.manga_mode_checkbox: + self.manga_mode_checkbox.toggled.connect(self.update_ui_for_manga_mode) + self.link_input.textChanged.connect(lambda: self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)) + + # --- load_known_names_from_util, save_known_names, closeEvent --- + # These methods remain unchanged from your original file def load_known_names_from_util(self): - # KNOWN_NAMES is now managed in downloader_utils, but GUI needs to populate its list - # and this method also handles initial log messages. - loaded_names = [] + global KNOWN_NAMES if os.path.exists(self.config_file): try: with open(self.config_file, 'r', encoding='utf-8') as f: raw_names = [line.strip() for line in f] - loaded_names = sorted(list(set(filter(None, raw_names)))) - log_msg = f"ℹ️ Loaded {len(loaded_names)} known names from {self.config_file}" + # Filter out empty strings before setting KNOWN_NAMES + KNOWN_NAMES[:] = sorted(list(set(filter(None, raw_names)))) + log_msg = f"ℹ️ Loaded {len(KNOWN_NAMES)} known names from {self.config_file}" except Exception as e: log_msg = f"❌ Error loading config '{self.config_file}': {e}" QMessageBox.warning(self, "Config Load Error", f"Could not load list from {self.config_file}:\n{e}") - loaded_names = [] + KNOWN_NAMES[:] = [] else: log_msg = f"ℹ️ Config file '{self.config_file}' not found. Starting empty." - loaded_names = [] + KNOWN_NAMES[:] = [] - # Update the global KNOWN_NAMES in downloader_utils - # This requires downloader_utils.KNOWN_NAMES to be mutable (it's a list) - # Or pass the list back if it were a function in utils returning the list. - # For simplicity with global-like config, directly modify. - # Ensure downloader_utils.py defines KNOWN_NAMES = [] at the top. - import downloader_utils - downloader_utils.KNOWN_NAMES[:] = loaded_names # Modify in place + self.log_signal.emit(log_msg) - if hasattr(self, 'log_output'): - self.log_signal.emit(log_msg) - else: - print(log_msg) - # Populate the GUI list if it exists - if hasattr(self, 'character_list'): + if hasattr(self, 'character_list'): # Ensure character_list widget exists self.character_list.clear() - self.character_list.addItems(downloader_utils.KNOWN_NAMES) + self.character_list.addItems(KNOWN_NAMES) def save_known_names(self): - # KNOWN_NAMES is from downloader_utils - import downloader_utils + global KNOWN_NAMES try: - unique_sorted_names = sorted(list(set(filter(None, downloader_utils.KNOWN_NAMES)))) + # Ensure KNOWN_NAMES contains unique, non-empty, sorted strings + unique_sorted_names = sorted(list(set(filter(None, KNOWN_NAMES)))) + KNOWN_NAMES[:] = unique_sorted_names # Update global list in place + with open(self.config_file, 'w', encoding='utf-8') as f: for name in unique_sorted_names: f.write(name + '\n') - downloader_utils.KNOWN_NAMES[:] = unique_sorted_names # Update in place - if hasattr(self, 'log_signal'): - self.log_signal.emit(f"💾 Saved {len(unique_sorted_names)} known names to {self.config_file}") - else: - print(f"Saved {len(unique_sorted_names)} names to {self.config_file}") + self.log_signal.emit(f"💾 Saved {len(unique_sorted_names)} known names to {self.config_file}") except Exception as e: log_msg = f"❌ Error saving config '{self.config_file}': {e}" - if hasattr(self, 'log_signal'): - self.log_signal.emit(log_msg) - else: - print(log_msg) + self.log_signal.emit(log_msg) QMessageBox.warning(self, "Config Save Error", f"Could not save list to {self.config_file}:\n{e}") def closeEvent(self, event): self.save_known_names() should_exit = True - is_downloading = (self.download_thread and self.download_thread.isRunning()) or (self.thread_pool is not None) + is_downloading = (self.download_thread and self.download_thread.isRunning()) or \ + (self.thread_pool is not None and any(not f.done() for f in self.active_futures if f is not None)) + if is_downloading: reply = QMessageBox.question(self, "Confirm Exit", @@ -150,7 +221,22 @@ class DownloaderApp(QWidget): QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if reply == QMessageBox.Yes: self.log_signal.emit("⚠️ Cancelling active download due to application exit...") - self.cancel_download() + self.cancel_download() # Signal cancellation + # --- MODIFICATION START: Wait for threads to finish --- + self.log_signal.emit(" Waiting briefly for threads to acknowledge cancellation...") + # Wait for the single thread if it exists + if self.download_thread and self.download_thread.isRunning(): + self.download_thread.wait(3000) # Wait up to 3 seconds + if self.download_thread.isRunning(): + self.log_signal.emit(" ⚠️ Single download thread did not terminate gracefully.") + # Wait for the thread pool if it exists + if self.thread_pool: + # Shutdown was already initiated by cancel_download, just wait here + # Use wait=True here for cleaner exit + self.thread_pool.shutdown(wait=True, cancel_futures=True) + self.log_signal.emit(" Thread pool shutdown complete.") + self.thread_pool = None # Clear reference + # --- MODIFICATION END --- else: should_exit = False self.log_signal.emit("ℹ️ Application exit cancelled.") @@ -159,55 +245,107 @@ class DownloaderApp(QWidget): if should_exit: self.log_signal.emit("ℹ️ Application closing.") + # Ensure thread pool is None if already shut down above + if self.thread_pool: + self.log_signal.emit(" Final thread pool check: Shutting down...") + self.cancellation_event.set() + self.thread_pool.shutdown(wait=True, cancel_futures=True) + self.thread_pool = None self.log_signal.emit("👋 Exiting application.") event.accept() def init_ui(self): - main_layout = QHBoxLayout() - left_layout = QVBoxLayout() - right_layout = QVBoxLayout() - left_layout.addWidget(QLabel("🔗 Kemono Creator/Post URL:")) + # --- MODIFIED: Use QSplitter for main layout --- + self.main_splitter = QSplitter(Qt.Horizontal) + + # Create container widgets for left and right panels + left_panel_widget = QWidget() + right_panel_widget = QWidget() + + # Setup layouts for the panels + left_layout = QVBoxLayout(left_panel_widget) # Apply layout to widget + right_layout = QVBoxLayout(right_panel_widget) # Apply layout to widget + left_layout.setContentsMargins(10, 10, 10, 10) # Add some margins + right_layout.setContentsMargins(10, 10, 10, 10) + + # --- Populate Left Panel (Controls) --- + # (All the QLineEdit, QCheckBox, QPushButton, etc. setup code goes here, adding to left_layout) + # URL and Page Range Input + url_page_layout = QHBoxLayout() + url_page_layout.setContentsMargins(0,0,0,0) + url_page_layout.addWidget(QLabel("🔗 Kemono Creator/Post URL:")) self.link_input = QLineEdit() self.link_input.setPlaceholderText("e.g., https://kemono.su/patreon/user/12345 or .../post/98765") self.link_input.textChanged.connect(self.update_custom_folder_visibility) - left_layout.addWidget(self.link_input) + # self.link_input.setFixedWidth(int(self.width() * 0.45)) # Remove fixed width for splitter + url_page_layout.addWidget(self.link_input, 1) # Give it stretch factor + self.page_range_label = QLabel("Page Range:") + self.page_range_label.setStyleSheet("font-weight: bold; padding-left: 10px;") + self.start_page_input = QLineEdit() + self.start_page_input.setPlaceholderText("Start") + self.start_page_input.setFixedWidth(50) + self.start_page_input.setValidator(QIntValidator(1, 99999)) # Min 1 + self.to_label = QLabel("to") + self.end_page_input = QLineEdit() + self.end_page_input.setPlaceholderText("End") + self.end_page_input.setFixedWidth(50) + self.end_page_input.setValidator(QIntValidator(1, 99999)) # Min 1 + url_page_layout.addWidget(self.page_range_label) + url_page_layout.addWidget(self.start_page_input) + url_page_layout.addWidget(self.to_label) + url_page_layout.addWidget(self.end_page_input) + # url_page_layout.addStretch(1) # No need for stretch with splitter + left_layout.addLayout(url_page_layout) + + # Download Directory Input left_layout.addWidget(QLabel("📁 Download Location:")) self.dir_input = QLineEdit() self.dir_input.setPlaceholderText("Select folder where downloads will be saved") self.dir_button = QPushButton("Browse...") self.dir_button.clicked.connect(self.browse_directory) dir_layout = QHBoxLayout() - dir_layout.addWidget(self.dir_input, 1) + dir_layout.addWidget(self.dir_input, 1) # Input takes more space dir_layout.addWidget(self.dir_button) left_layout.addLayout(dir_layout) - self.custom_folder_widget = QWidget() + + # Custom Folder Name (for single post) + self.custom_folder_widget = QWidget() # Use a widget to hide/show group custom_folder_layout = QVBoxLayout(self.custom_folder_widget) - custom_folder_layout.setContentsMargins(0, 5, 0, 0) + custom_folder_layout.setContentsMargins(0, 5, 0, 0) # No top margin if hidden self.custom_folder_label = QLabel("🗄️ Custom Folder Name (Single Post Only):") self.custom_folder_input = QLineEdit() self.custom_folder_input.setPlaceholderText("Optional: Save this post to specific folder") custom_folder_layout.addWidget(self.custom_folder_label) custom_folder_layout.addWidget(self.custom_folder_input) - self.custom_folder_widget.setVisible(False) + self.custom_folder_widget.setVisible(False) # Initially hidden left_layout.addWidget(self.custom_folder_widget) + + # Character Filter Input self.character_filter_widget = QWidget() character_filter_layout = QVBoxLayout(self.character_filter_widget) - character_filter_layout.setContentsMargins(0, 5, 0, 0) - self.character_label = QLabel("🎯 Filter by Show/Character Name:") + character_filter_layout.setContentsMargins(0,5,0,0) + self.character_label = QLabel("🎯 Filter by Character(s) (comma-separated):") self.character_input = QLineEdit() - self.character_input.setPlaceholderText("Only download posts matching this known name in title") + self.character_input.setPlaceholderText("e.g., yor, makima, anya forger") character_filter_layout.addWidget(self.character_label) character_filter_layout.addWidget(self.character_input) - self.character_filter_widget.setVisible(True) + self.character_filter_widget.setVisible(True) # Visible by default left_layout.addWidget(self.character_filter_widget) + + # Skip Words Input left_layout.addWidget(QLabel("🚫 Skip Posts/Files with Words (comma-separated):")) self.skip_words_input = QLineEdit() self.skip_words_input.setPlaceholderText("e.g., WM, WIP, sketch, preview") left_layout.addWidget(self.skip_words_input) - options_layout_1 = QHBoxLayout() - options_layout_1.addWidget(QLabel("Filter Files:")) - self.radio_group = QButtonGroup(self) + + # File Type Filter Radio Buttons + file_filter_layout = QVBoxLayout() # Group label and radio buttons + file_filter_layout.setContentsMargins(0,0,0,0) # Compact + file_filter_layout.addWidget(QLabel("Filter Files:")) + radio_button_layout = QHBoxLayout() + radio_button_layout.setSpacing(10) + self.radio_group = QButtonGroup(self) # Ensures one selection self.radio_all = QRadioButton("All") self.radio_images = QRadioButton("Images/GIFs") self.radio_videos = QRadioButton("Videos") @@ -215,221 +353,474 @@ class DownloaderApp(QWidget): self.radio_group.addButton(self.radio_all) self.radio_group.addButton(self.radio_images) self.radio_group.addButton(self.radio_videos) - options_layout_1.addWidget(self.radio_all) - options_layout_1.addWidget(self.radio_images) - options_layout_1.addWidget(self.radio_videos) - options_layout_1.addStretch(1) - left_layout.addLayout(options_layout_1) - options_layout_2 = QHBoxLayout() - self.use_subfolders_checkbox = QCheckBox("Separate Folders by Name/Title") - self.use_subfolders_checkbox.setChecked(True) - self.use_subfolders_checkbox.toggled.connect(self.update_ui_for_subfolders) - options_layout_2.addWidget(self.use_subfolders_checkbox) + radio_button_layout.addWidget(self.radio_all) + radio_button_layout.addWidget(self.radio_images) + radio_button_layout.addWidget(self.radio_videos) + radio_button_layout.addStretch(1) # Pushes buttons to left + file_filter_layout.addLayout(radio_button_layout) + left_layout.addLayout(file_filter_layout) + # Checkboxes Group + checkboxes_group_layout = QVBoxLayout() + checkboxes_group_layout.setSpacing(10) # Spacing between rows of checkboxes + + row1_layout = QHBoxLayout() # First row of checkboxes + row1_layout.setSpacing(10) + self.skip_zip_checkbox = QCheckBox("Skip .zip") + self.skip_zip_checkbox.setChecked(True) + row1_layout.addWidget(self.skip_zip_checkbox) + self.skip_rar_checkbox = QCheckBox("Skip .rar") + self.skip_rar_checkbox.setChecked(True) + row1_layout.addWidget(self.skip_rar_checkbox) self.download_thumbnails_checkbox = QCheckBox("Download Thumbnails Only") self.download_thumbnails_checkbox.setChecked(False) self.download_thumbnails_checkbox.setToolTip("Thumbnail download functionality is currently limited without the API.") - options_layout_2.addWidget(self.download_thumbnails_checkbox) - options_layout_2.addStretch(1) - left_layout.addLayout(options_layout_2) - options_layout_3 = QHBoxLayout() - self.skip_zip_checkbox = QCheckBox("Skip .zip") - self.skip_zip_checkbox.setChecked(True) - options_layout_3.addWidget(self.skip_zip_checkbox) - self.skip_rar_checkbox = QCheckBox("Skip .rar") - self.skip_rar_checkbox.setChecked(True) - options_layout_3.addWidget(self.skip_rar_checkbox) - + row1_layout.addWidget(self.download_thumbnails_checkbox) self.compress_images_checkbox = QCheckBox("Compress Large Images (to WebP)") self.compress_images_checkbox.setChecked(False) self.compress_images_checkbox.setToolTip("Compress images > 1.5MB to WebP format (requires Pillow).") - options_layout_3.addWidget(self.compress_images_checkbox) - options_layout_3.addStretch(1) - left_layout.addLayout(options_layout_3) - options_layout_4 = QHBoxLayout() - self.use_multithreading_checkbox = QCheckBox(f"Use Multithreading ({4} Threads)") + row1_layout.addWidget(self.compress_images_checkbox) + row1_layout.addStretch(1) # Pushes checkboxes to left + checkboxes_group_layout.addLayout(row1_layout) + + # Advanced Settings Section + advanced_settings_label = QLabel("⚙️ Advanced Settings:") + checkboxes_group_layout.addWidget(advanced_settings_label) + + advanced_row1_layout = QHBoxLayout() # Subfolders options + advanced_row1_layout.setSpacing(10) + self.use_subfolders_checkbox = QCheckBox("Separate Folders by Name/Title") + self.use_subfolders_checkbox.setChecked(True) + self.use_subfolders_checkbox.toggled.connect(self.update_ui_for_subfolders) + advanced_row1_layout.addWidget(self.use_subfolders_checkbox) + self.use_subfolder_per_post_checkbox = QCheckBox("Subfolder per Post") + self.use_subfolder_per_post_checkbox.setChecked(False) + self.use_subfolder_per_post_checkbox.setToolTip("Creates a subfolder for each post inside the character/title folder.") + self.use_subfolder_per_post_checkbox.toggled.connect(self.update_ui_for_subfolders) # Also update UI + advanced_row1_layout.addWidget(self.use_subfolder_per_post_checkbox) + advanced_row1_layout.addStretch(1) + checkboxes_group_layout.addLayout(advanced_row1_layout) + + advanced_row2_layout = QHBoxLayout() # Multithreading, External Links, Manga Mode + advanced_row2_layout.setSpacing(10) + multithreading_layout = QHBoxLayout() # Group multithreading checkbox and input + multithreading_layout.setContentsMargins(0,0,0,0) + self.use_multithreading_checkbox = QCheckBox("Use Multithreading") self.use_multithreading_checkbox.setChecked(True) self.use_multithreading_checkbox.setToolTip("Speeds up downloads for full creator pages.\nSingle post URLs always use one thread.") - options_layout_4.addWidget(self.use_multithreading_checkbox) - options_layout_4.addStretch(1) - left_layout.addLayout(options_layout_4) + multithreading_layout.addWidget(self.use_multithreading_checkbox) + self.thread_count_label = QLabel("Threads:") + multithreading_layout.addWidget(self.thread_count_label) + self.thread_count_input = QLineEdit() + self.thread_count_input.setFixedWidth(40) + self.thread_count_input.setText("4") + self.thread_count_input.setToolTip("Number of threads (recommended: 4-10, max: 200).") + self.thread_count_input.setValidator(QIntValidator(1,200)) # Min 1, Max 200 + multithreading_layout.addWidget(self.thread_count_input) + advanced_row2_layout.addLayout(multithreading_layout) + + self.external_links_checkbox = QCheckBox("Show External Links in Log") + self.external_links_checkbox.setChecked(False) + advanced_row2_layout.addWidget(self.external_links_checkbox) + + self.manga_mode_checkbox = QCheckBox("Manga Mode") + self.manga_mode_checkbox.setToolTip("Process newest posts first, rename files based on post title (for creator feeds only).") + self.manga_mode_checkbox.setChecked(False) + advanced_row2_layout.addWidget(self.manga_mode_checkbox) + advanced_row2_layout.addStretch(1) + checkboxes_group_layout.addLayout(advanced_row2_layout) + + left_layout.addLayout(checkboxes_group_layout) + + # Download and Cancel Buttons btn_layout = QHBoxLayout() + btn_layout.setSpacing(10) self.download_btn = QPushButton("⬇️ Start Download") - self.download_btn.setStyleSheet("padding: 8px 15px; font-weight: bold;") + self.download_btn.setStyleSheet("padding: 8px 15px; font-weight: bold;") # Make it prominent self.download_btn.clicked.connect(self.start_download) self.cancel_btn = QPushButton("❌ Cancel") - self.cancel_btn.setEnabled(False) + self.cancel_btn.setEnabled(False) # Initially disabled self.cancel_btn.clicked.connect(self.cancel_download) - self.skip_file_btn = QPushButton("⏭️ Skip Current File") - self.skip_file_btn.setEnabled(False) - self.skip_file_btn.setToolTip("Only available in single-thread mode during file download.") - self.skip_file_btn.clicked.connect(self.skip_current_file) btn_layout.addWidget(self.download_btn) btn_layout.addWidget(self.cancel_btn) - btn_layout.addWidget(self.skip_file_btn) left_layout.addLayout(btn_layout) - left_layout.addSpacing(10) + left_layout.addSpacing(10) # Some space before known characters list + + # Known Characters/Shows List Management known_chars_label_layout = QHBoxLayout() + known_chars_label_layout.setSpacing(10) self.known_chars_label = QLabel("🎭 Known Shows/Characters (for Folder Names):") self.character_search_input = QLineEdit() self.character_search_input.setPlaceholderText("Search characters...") - known_chars_label_layout.addWidget(self.known_chars_label, 1) + known_chars_label_layout.addWidget(self.known_chars_label, 1) # Label takes more space known_chars_label_layout.addWidget(self.character_search_input) - left_layout.addLayout(known_chars_label_layout) - + self.character_list = QListWidget() - # KNOWN_NAMES will be populated by load_known_names_from_util - self.character_list.setSelectionMode(QListWidget.ExtendedSelection) - left_layout.addWidget(self.character_list, 1) - char_manage_layout = QHBoxLayout() + self.character_list.setSelectionMode(QListWidget.ExtendedSelection) # Allow multi-select for delete + left_layout.addWidget(self.character_list, 1) # Takes remaining vertical space + + char_manage_layout = QHBoxLayout() # Add/Delete character buttons + char_manage_layout.setSpacing(10) self.new_char_input = QLineEdit() self.new_char_input.setPlaceholderText("Add new show/character name") self.add_char_button = QPushButton("➕ Add") self.delete_char_button = QPushButton("🗑️ Delete Selected") self.add_char_button.clicked.connect(self.add_new_character) - self.new_char_input.returnPressed.connect(self.add_char_button.click) + self.new_char_input.returnPressed.connect(self.add_char_button.click) # Add on Enter self.delete_char_button.clicked.connect(self.delete_selected_character) - char_manage_layout.addWidget(self.new_char_input, 2) + char_manage_layout.addWidget(self.new_char_input, 2) # Input field wider char_manage_layout.addWidget(self.add_char_button, 1) char_manage_layout.addWidget(self.delete_char_button, 1) left_layout.addLayout(char_manage_layout) - right_layout.addWidget(QLabel("📜 Progress Log:")) - self.log_output = QTextEdit() - self.log_output.setReadOnly(True) - self.log_output.setMinimumWidth(450) - self.log_output.setLineWrapMode(QTextEdit.WidgetWidth) - right_layout.addWidget(self.log_output, 1) + left_layout.addStretch(0) # Prevent vertical stretching of controls + + # --- Populate Right Panel (Logs) --- + log_title_layout = QHBoxLayout() + log_title_layout.addWidget(QLabel("📜 Progress Log:")) + log_title_layout.addStretch(1) + + # --- ADDED: Log Verbosity Button --- + self.log_verbosity_button = QPushButton("Show Basic Log") + self.log_verbosity_button.setToolTip("Toggle between full and basic log details.") + self.log_verbosity_button.setFixedWidth(110) # Adjust width as needed + self.log_verbosity_button.setStyleSheet("padding: 4px 8px;") + log_title_layout.addWidget(self.log_verbosity_button) + # --- END ADDED --- + + self.reset_button = QPushButton("🔄 Reset") + self.reset_button.setToolTip("Reset all inputs and logs to default state (only when idle).") + self.reset_button.setFixedWidth(80) + self.reset_button.setStyleSheet("padding: 4px 8px;") # Smaller padding + log_title_layout.addWidget(self.reset_button) + right_layout.addLayout(log_title_layout) + + self.log_splitter = QSplitter(Qt.Vertical) # Keep the vertical splitter for logs + self.main_log_output = QTextEdit() + self.main_log_output.setReadOnly(True) + # self.main_log_output.setMinimumWidth(450) # Remove minimum width + self.main_log_output.setLineWrapMode(QTextEdit.NoWrap) # Disable line wrapping + self.main_log_output.setStyleSheet(""" + QTextEdit { + background-color: #3C3F41; border: 1px solid #5A5A5A; padding: 5px; + color: #F0F0F0; border-radius: 4px; font-family: Consolas, Courier New, monospace; font-size: 9.5pt; + }""") + self.external_log_output = QTextEdit() + self.external_log_output.setReadOnly(True) + # self.external_log_output.setMinimumWidth(450) # Remove minimum width + self.external_log_output.setLineWrapMode(QTextEdit.NoWrap) # Disable line wrapping + self.external_log_output.setStyleSheet(""" + QTextEdit { + background-color: #3C3F41; border: 1px solid #5A5A5A; padding: 5px; + color: #F0F0F0; border-radius: 4px; font-family: Consolas, Courier New, monospace; font-size: 9.5pt; + }""") + self.external_log_output.hide() # Initially hidden + self.log_splitter.addWidget(self.main_log_output) + self.log_splitter.addWidget(self.external_log_output) + self.log_splitter.setSizes([self.height(), 0]) # Main log takes all space initially + right_layout.addWidget(self.log_splitter, 1) # Log splitter takes available vertical space + self.progress_label = QLabel("Progress: Idle") self.progress_label.setStyleSheet("padding-top: 5px; font-style: italic;") right_layout.addWidget(self.progress_label) - main_layout.addLayout(left_layout, 5) - main_layout.addLayout(right_layout, 4) - self.setLayout(main_layout) + + self.file_progress_label = QLabel("") # For individual file progress + self.file_progress_label.setWordWrap(True) # Enable word wrapping for the status label + self.file_progress_label.setStyleSheet("padding-top: 2px; font-style: italic; color: #A0A0A0;") + right_layout.addWidget(self.file_progress_label) + + # --- Add panels to the main horizontal splitter --- + self.main_splitter.addWidget(left_panel_widget) + self.main_splitter.addWidget(right_panel_widget) + + # --- Set initial sizes for the splitter --- + # Calculate initial sizes (e.g., left 35%, right 65%) + initial_width = self.width() # Use the initial window width + left_width = int(initial_width * 0.35) + right_width = initial_width - left_width + self.main_splitter.setSizes([left_width, right_width]) + + # --- Set the main splitter as the central layout --- + # Need a top-level layout to hold the splitter + top_level_layout = QHBoxLayout(self) # Apply layout directly to the main widget (self) + top_level_layout.setContentsMargins(0,0,0,0) # No margins for the main layout + top_level_layout.addWidget(self.main_splitter) + # self.setLayout(top_level_layout) # Already set above + + # --- End Layout Modification --- + + # Initial UI state updates self.update_ui_for_subfolders(self.use_subfolders_checkbox.isChecked()) self.update_custom_folder_visibility() + self.update_external_links_setting(self.external_links_checkbox.isChecked()) + self.update_multithreading_label(self.thread_count_input.text()) + self.update_page_range_enabled_state() + if self.manga_mode_checkbox: # Ensure it exists before accessing + self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked()) + self.link_input.textChanged.connect(self.update_page_range_enabled_state) # Connect after init + self.load_known_names_from_util() # Load names into the list widget def get_dark_theme(self): return """ - QWidget { - background-color: #2E2E2E; - color: #E0E0E0; - font-family: Segoe UI, Arial, sans-serif; - font-size: 10pt; - } - QLineEdit, QTextEdit, QListWidget { - background-color: #3C3F41; - border: 1px solid #5A5A5A; - padding: 5px; - color: #F0F0F0; - border-radius: 4px; - } - QTextEdit { - font-family: Consolas, Courier New, monospace; - font-size: 9.5pt; - } - QPushButton { - background-color: #555; - color: #F0F0F0; - border: 1px solid #6A6A6A; - padding: 6px 12px; - border-radius: 4px; - min-height: 22px; - } - QPushButton:hover { - background-color: #656565; - border: 1px solid #7A7A7A; - } - QPushButton:pressed { - background-color: #4A4A4A; - } - QPushButton:disabled { - background-color: #404040; - color: #888; - border-color: #555; - } - QLabel { - font-weight: bold; - padding-top: 4px; - padding-bottom: 2px; - color: #C0C0C0; - } - QRadioButton, QCheckBox { - spacing: 5px; - color: #E0E0E0; - padding-top: 4px; - padding-bottom: 4px; - } - QRadioButton::indicator, QCheckBox::indicator { - width: 14px; - height: 14px; - } - QListWidget { - alternate-background-color: #353535; - border: 1px solid #5A5A5A; - } - QListWidget::item:selected { - background-color: #007ACC; - color: #FFFFFF; - } - QToolTip { - background-color: #4A4A4A; - color: #F0F0F0; - border: 1px solid #6A6A6A; - padding: 4px; - border-radius: 3px; - } - """ + QWidget { background-color: #2E2E2E; color: #E0E0E0; font-family: Segoe UI, Arial, sans-serif; font-size: 10pt; } + QLineEdit, QListWidget { background-color: #3C3F41; border: 1px solid #5A5A5A; padding: 5px; color: #F0F0F0; border-radius: 4px; } + QTextEdit { background-color: #3C3F41; border: 1px solid #5A5A5A; padding: 5px; color: #F0F0F0; border-radius: 4px; } + QPushButton { background-color: #555; color: #F0F0F0; border: 1px solid #6A6A6A; padding: 6px 12px; border-radius: 4px; min-height: 22px; } + QPushButton:hover { background-color: #656565; border: 1px solid #7A7A7A; } + QPushButton:pressed { background-color: #4A4A4A; } + QPushButton:disabled { background-color: #404040; color: #888; border-color: #555; } + QLabel { font-weight: bold; padding-top: 4px; padding-bottom: 2px; color: #C0C0C0; } + QRadioButton, QCheckBox { spacing: 5px; color: #E0E0E0; padding-top: 4px; padding-bottom: 4px; } + QRadioButton::indicator, QCheckBox::indicator { width: 14px; height: 14px; } + QListWidget { alternate-background-color: #353535; border: 1px solid #5A5A5A; } + QListWidget::item:selected { background-color: #007ACC; color: #FFFFFF; } + QToolTip { background-color: #4A4A4A; color: #F0F0F0; border: 1px solid #6A6A6A; padding: 4px; border-radius: 3px; } + QSplitter::handle { background-color: #5A5A5A; width: 5px; /* Make handle slightly wider */ } + QSplitter::handle:horizontal { width: 5px; } + QSplitter::handle:vertical { height: 5px; } + """ # Added styling for splitter handle + def browse_directory(self): current_dir = self.dir_input.text() if os.path.isdir(self.dir_input.text()) else "" folder = QFileDialog.getExistingDirectory(self, "Select Download Folder", current_dir) if folder: self.dir_input.setText(folder) - def log(self, message): + def handle_main_log(self, message): + # --- ADDED: Log Verbosity Filtering --- + if self.basic_log_mode: + # Define keywords/prefixes for messages to ALWAYS show in basic mode + basic_keywords = [ + '🚀 Starting Download', '🏁 Download Finished', '🏁 Download Cancelled', + '❌', '⚠️', '✅ All posts processed', '✅ Reached end of posts', + 'Summary:', 'Progress:', '[Fetcher]', # Show fetcher logs for context + 'CRITICAL ERROR', 'IMPORT ERROR' + ] + # Check if the message contains any of the basic keywords/prefixes + if not any(keyword in message for keyword in basic_keywords): + return # Skip appending this message in basic mode + # --- END ADDED --- + try: + # Ensure message is a string and replace null characters that can crash QTextEdit safe_message = str(message).replace('\x00', '[NULL]') - self.log_output.append(safe_message) - scrollbar = self.log_output.verticalScrollBar() - if scrollbar.value() >= scrollbar.maximum() - 30: + self.main_log_output.append(safe_message) + # Auto-scroll if near the bottom + scrollbar = self.main_log_output.verticalScrollBar() + if scrollbar.value() >= scrollbar.maximum() - 30: # Threshold for auto-scroll scrollbar.setValue(scrollbar.maximum()) except Exception as e: - print(f"GUI Log Error: {e}") - print(f"Original Message: {message}") + # Fallback logging if GUI logging fails + print(f"GUI Main Log Error: {e}\nOriginal Message: {message}") + # --- ADDED: Helper to check download state --- + def _is_download_active(self): + """Checks if a download thread or pool is currently active.""" + single_thread_active = self.download_thread and self.download_thread.isRunning() + # Check if pool exists AND has any futures that are not done + pool_active = self.thread_pool is not None and any(not f.done() for f in self.active_futures if f is not None) + return single_thread_active or pool_active + # --- END ADDED --- + + # --- ADDED: New system for handling external links with sequential CONDIITONAL delay --- + # MODIFIED: Slot now takes link_text as the second argument + def handle_external_link_signal(self, post_title, link_text, link_url, platform): + """Receives link signals, adds them to a queue, and triggers processing.""" + # We still receive post_title for potential future use, but use link_text for display + self.external_link_queue.append((link_text, link_url, platform)) + self._try_process_next_external_link() + + def _try_process_next_external_link(self): + """Processes the next link from the queue if not already processing.""" + if self._is_processing_external_link_queue or \ + not self.external_link_queue or \ + not (self.show_external_links and self.external_log_output and self.external_log_output.isVisible()): + return + + self._is_processing_external_link_queue = True + + # MODIFIED: Get link_text from queue + link_text, link_url, platform = self.external_link_queue.popleft() + self._append_link_to_external_log(link_text, link_url, platform) # Display this link now + + # --- MODIFIED: Conditional delay --- + if self._is_download_active(): + # Schedule the end of this link's "display period" with delay + delay_ms = random.randint(4000, 8000) # Random delay of 4-8 seconds + QTimer.singleShot(delay_ms, self._finish_current_link_processing) + else: + # No download active, process next link almost immediately + QTimer.singleShot(0, self._finish_current_link_processing) + # --- END MODIFIED --- + + def _finish_current_link_processing(self): + """Called after a delay (or immediately if download finished); allows the next link in the queue to be processed.""" + self._is_processing_external_link_queue = False + self._try_process_next_external_link() # Attempt to process the next link + + # MODIFIED: Method now takes link_text instead of title for display + def _append_link_to_external_log(self, link_text, link_url, platform): + """Appends a single formatted link to the external_log_output widget.""" + if not (self.show_external_links and self.external_log_output and self.external_log_output.isVisible()): + return + + # Use link_text for display, truncate if necessary + max_link_text_len = 35 # Adjust as needed + display_text = link_text[:max_link_text_len].strip() + "..." if len(link_text) > max_link_text_len else link_text + + # Format the string as requested: text - url - platform + formatted_link_text = f"{display_text} - {link_url} - {platform}" + separator = "-" * 45 # Adjust length as needed + + try: + self.external_log_output.append(separator) + self.external_log_output.append(formatted_link_text) + self.external_log_output.append("") # Add a blank line for spacing + + # Auto-scroll + scrollbar = self.external_log_output.verticalScrollBar() + if scrollbar.value() >= scrollbar.maximum() - 50: # Adjust threshold if needed + scrollbar.setValue(scrollbar.maximum()) + except Exception as e: + self.log_signal.emit(f"GUI External Log Append Error: {e}\nOriginal Message: {formatted_link_text}") + print(f"GUI External Log Error (Append): {e}\nOriginal Message: {formatted_link_text}") + # --- END ADDED --- + + + def update_file_progress_display(self, filename, downloaded_bytes, total_bytes): + if not filename and total_bytes == 0 and downloaded_bytes == 0: # Clear signal + self.file_progress_label.setText("") + return + + # MODIFIED: Truncate filename more aggressively (e.g., max 25 chars) + max_filename_len = 25 + display_filename = filename[:max_filename_len-3].strip() + "..." if len(filename) > max_filename_len else filename + + if total_bytes > 0: + downloaded_mb = downloaded_bytes / (1024 * 1024) + total_mb = total_bytes / (1024 * 1024) + progress_text = f"Downloading '{display_filename}' ({downloaded_mb:.1f}MB / {total_mb:.1f}MB)" + else: # If total size is unknown + downloaded_mb = downloaded_bytes / (1024 * 1024) + progress_text = f"Downloading '{display_filename}' ({downloaded_mb:.1f}MB)" + + # Check if the resulting text might still be too long (heuristic) + # This is a basic check, might need refinement based on typical log width + if len(progress_text) > 75: # Example threshold, adjust as needed + # If still too long, truncate the display_filename even more + display_filename = filename[:15].strip() + "..." if len(filename) > 18 else display_filename + if total_bytes > 0: + progress_text = f"DL '{display_filename}' ({downloaded_mb:.1f}/{total_mb:.1f}MB)" + else: + progress_text = f"DL '{display_filename}' ({downloaded_mb:.1f}MB)" + + self.file_progress_label.setText(progress_text) + + + def update_external_links_setting(self, checked): + self.show_external_links = checked + if checked: + self.external_log_output.show() + # Adjust splitter, give both logs some space + # Use the VERTICAL splitter for logs here + self.log_splitter.setSizes([self.height() // 2, self.height() // 2]) + self.main_log_output.setMinimumHeight(50) # Ensure it doesn't disappear + self.external_log_output.setMinimumHeight(50) + self.log_signal.emit("\n" + "="*40 + "\n🔗 External Links Log Enabled\n" + "="*40) + self.external_log_output.clear() # Clear previous content + self.external_log_output.append("🔗 External Links Found:") # Header + # --- ADDED: Try processing queue if log becomes visible --- + self._try_process_next_external_link() + # --- END ADDED --- + else: + self.external_log_output.hide() + # Use the VERTICAL splitter for logs here + self.log_splitter.setSizes([self.height(), 0]) # Main log takes all space + self.main_log_output.setMinimumHeight(0) # Reset min height + self.external_log_output.setMinimumHeight(0) + self.external_log_output.clear() # Clear content when hidden + self.log_signal.emit("\n" + "="*40 + "\n🔗 External Links Log Disabled\n" + "="*40) + # Optional: Clear queue when log is hidden? + # self.external_link_queue.clear() + # self._is_processing_external_link_queue = False def get_filter_mode(self): - if self.radio_images.isChecked(): - return 'image' - elif self.radio_videos.isChecked(): - return 'video' - return 'all' + if self.radio_images.isChecked(): return 'image' + if self.radio_videos.isChecked(): return 'video' + return 'all' # Default def add_new_character(self): - import downloader_utils # Ensure we are using the list from utils + global KNOWN_NAMES, clean_folder_name # Ensure clean_folder_name is accessible name_to_add = self.new_char_input.text().strip() if not name_to_add: QMessageBox.warning(self, "Input Error", "Name cannot be empty.") - return - name_lower = name_to_add.lower() - is_duplicate = any(existing.lower() == name_lower for existing in downloader_utils.KNOWN_NAMES) + return False # Indicate failure - if not is_duplicate: - downloader_utils.KNOWN_NAMES.append(name_to_add) - downloader_utils.KNOWN_NAMES.sort(key=str.lower) - self.character_list.clear() - self.character_list.addItems(downloader_utils.KNOWN_NAMES) - self.filter_character_list(self.character_search_input.text()) - self.log_signal.emit(f"✅ Added '{name_to_add}' to known names list.") - self.new_char_input.clear() - self.save_known_names() - else: - QMessageBox.warning(self, "Duplicate Name", f"The name '{name_to_add}' (or similar) already exists in the list.") + name_lower = name_to_add.lower() + + # 1. Exact Duplicate Check (case-insensitive) + is_exact_duplicate = any(existing.lower() == name_lower for existing in KNOWN_NAMES) + if is_exact_duplicate: + QMessageBox.warning(self, "Duplicate Name", f"The name '{name_to_add}' (case-insensitive) already exists.") + return False + + # 2. Similarity Check (substring, case-insensitive) + similar_names_details = [] # Store tuples of (new_name, existing_name) + for existing_name in KNOWN_NAMES: + existing_name_lower = existing_name.lower() + # Avoid self-comparison if somehow name_lower was already in a different case + if name_lower != existing_name_lower: + if name_lower in existing_name_lower or existing_name_lower in name_lower: + similar_names_details.append((name_to_add, existing_name)) + + if similar_names_details: + first_similar_new, first_similar_existing = similar_names_details[0] + + # Determine shorter and longer for the example message + shorter_name_for_msg, longer_name_for_msg = sorted( + [first_similar_new, first_similar_existing], key=len + ) + + msg_box = QMessageBox(self) + msg_box.setIcon(QMessageBox.Warning) + msg_box.setWindowTitle("Potential Name Conflict") + msg_box.setText( + f"The name '{first_similar_new}' is very similar to an existing name: '{first_similar_existing}'.\n\n" + f"For example, if a post title primarily matches the shorter name ('{shorter_name_for_msg}'), " + f"files might be saved under a folder for '{clean_folder_name(shorter_name_for_msg)}', " + f"even if the longer name ('{longer_name_for_msg}') was also relevant or intended for a more specific folder.\n" + "This could lead to files being grouped into less specific or overly broad folders than desired.\n\n" + "Do you want to change the name you are adding, or proceed anyway?" + ) + change_button = msg_box.addButton("Change Name", QMessageBox.RejectRole) + proceed_button = msg_box.addButton("Proceed Anyway", QMessageBox.AcceptRole) + msg_box.setDefaultButton(proceed_button) # Default to proceed + msg_box.setEscapeButton(change_button) # Escape cancels/rejects + + msg_box.exec_() + + if msg_box.clickedButton() == change_button: + self.log_signal.emit(f"ℹ️ User chose to change the name '{first_similar_new}' due to similarity with '{first_similar_existing}'.") + return False # Don't add, user will change input and click "Add" again + # If proceed_button is clicked (or dialog is closed and proceed is default) + self.log_signal.emit(f"⚠️ User chose to proceed with adding '{first_similar_new}' despite similarity with '{first_similar_existing}'.") + # Fall through to add the name + + # If no exact duplicate, and (no similar names OR user chose to proceed with similar name) + KNOWN_NAMES.append(name_to_add) + KNOWN_NAMES.sort(key=str.lower) # Keep the list sorted (case-insensitive for sorting) + self.character_list.clear() + self.character_list.addItems(KNOWN_NAMES) + self.filter_character_list(self.character_search_input.text()) # Re-apply filter + self.log_signal.emit(f"✅ Added '{name_to_add}' to known names list.") + self.new_char_input.clear() + self.save_known_names() # Save to file + return True # Indicate success def delete_selected_character(self): - import downloader_utils + global KNOWN_NAMES selected_items = self.character_list.selectedItems() if not selected_items: QMessageBox.warning(self, "Selection Error", "Please select one or more names to delete.") @@ -437,539 +828,897 @@ class DownloaderApp(QWidget): names_to_remove = {item.text() for item in selected_items} confirm = QMessageBox.question(self, "Confirm Deletion", - f"Are you sure you want to delete {len(names_to_remove)} selected name(s)?", + f"Are you sure you want to delete {len(names_to_remove)} name(s)?", QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if confirm == QMessageBox.Yes: - original_count = len(downloader_utils.KNOWN_NAMES) - downloader_utils.KNOWN_NAMES = [n for n in downloader_utils.KNOWN_NAMES if n not in names_to_remove] - removed_count = original_count - len(downloader_utils.KNOWN_NAMES) + original_count = len(KNOWN_NAMES) + # Filter out names to remove + KNOWN_NAMES = [n for n in KNOWN_NAMES if n not in names_to_remove] + removed_count = original_count - len(KNOWN_NAMES) if removed_count > 0: - self.log_signal.emit(f"🗑️ Removed {removed_count} name(s) from the list.") - self.character_list.clear() - downloader_utils.KNOWN_NAMES.sort(key=str.lower) - self.character_list.addItems(downloader_utils.KNOWN_NAMES) - self.filter_character_list(self.character_search_input.text()) - self.save_known_names() + self.log_signal.emit(f"🗑️ Removed {removed_count} name(s).") + self.character_list.clear() # Update UI + self.character_list.addItems(KNOWN_NAMES) + self.filter_character_list(self.character_search_input.text()) # Re-apply filter + self.save_known_names() # Save changes else: - self.log_signal.emit("ℹ️ No names were removed (selection might have changed?).") + self.log_signal.emit("ℹ️ No names were removed (they might not have been in the list or already deleted).") def update_custom_folder_visibility(self, url_text=None): - if url_text is None: - url_text = self.link_input.text() - - _, _, post_id = extract_post_info(url_text.strip()) # from downloader_utils + if url_text is None: url_text = self.link_input.text() # Get current text if not passed + _, _, post_id = extract_post_info(url_text.strip()) + # Show if it's a post URL AND subfolders are generally enabled should_show = bool(post_id) and self.use_subfolders_checkbox.isChecked() - self.custom_folder_widget.setVisible(should_show) - if not should_show: - self.custom_folder_input.clear() - + if not should_show: self.custom_folder_input.clear() # Clear if hidden def update_ui_for_subfolders(self, checked): - self.character_filter_widget.setVisible(checked) - self.update_custom_folder_visibility() - if not checked: - self.character_input.clear() + # Character filter input visibility depends on subfolder usage + self.character_filter_widget.setVisible(checked) + if not checked: self.character_input.clear() # Clear filter if hiding + + self.update_custom_folder_visibility() # Custom folder also depends on this + + # "Subfolder per Post" is only enabled if "Separate Folders" is also checked + self.use_subfolder_per_post_checkbox.setEnabled(checked) + if not checked: self.use_subfolder_per_post_checkbox.setChecked(False) # Uncheck if parent is disabled + + def update_page_range_enabled_state(self): + url_text = self.link_input.text().strip() + service, user_id, post_id = extract_post_info(url_text) + # Page range is for creator feeds (no post_id) + is_creator_feed = service is not None and user_id is not None and post_id is None + + manga_mode_active = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False + # Enable page range if it's a creator feed AND manga mode is NOT active + enable_page_range = is_creator_feed and not manga_mode_active + + for widget in [self.page_range_label, self.start_page_input, self.to_label, self.end_page_input]: + if widget: widget.setEnabled(enable_page_range) + if not enable_page_range: # Clear inputs if disabled + self.start_page_input.clear() + self.end_page_input.clear() + + def update_ui_for_manga_mode(self, checked): + url_text = self.link_input.text().strip() + _, _, post_id = extract_post_info(url_text) + is_creator_feed = not post_id if url_text else False # Manga mode only for creator feeds + + if self.manga_mode_checkbox: # Ensure checkbox exists + self.manga_mode_checkbox.setEnabled(is_creator_feed) # Only enable for creator feeds + if not is_creator_feed and self.manga_mode_checkbox.isChecked(): + self.manga_mode_checkbox.setChecked(False) # Uncheck if URL changes to non-creator feed + + # If manga mode is active (checked and enabled), disable page range + if is_creator_feed and self.manga_mode_checkbox and self.manga_mode_checkbox.isChecked(): + self.page_range_label.setEnabled(False) + self.start_page_input.setEnabled(False); self.start_page_input.clear() + self.to_label.setEnabled(False) + self.end_page_input.setEnabled(False); self.end_page_input.clear() + else: # Otherwise, let update_page_range_enabled_state handle it + self.update_page_range_enabled_state() + def filter_character_list(self, search_text): - search_text = search_text.lower() + search_text_lower = search_text.lower() for i in range(self.character_list.count()): item = self.character_list.item(i) - if search_text in item.text().lower(): - item.setHidden(False) - else: - item.setHidden(True) + item.setHidden(search_text_lower not in item.text().lower()) + + def update_multithreading_label(self, text): + try: + num_threads = int(text) + if num_threads > 0 : + self.use_multithreading_checkbox.setText(f"Use Multithreading ({num_threads} Threads)") + else: # Should be caught by validator, but defensive + self.use_multithreading_checkbox.setText("Use Multithreading (Invalid: >0)") + except ValueError: # If text is not a valid integer + self.use_multithreading_checkbox.setText("Use Multithreading (Invalid Input)") def update_progress_display(self, total_posts, processed_posts): if total_posts > 0: - try: - percent = (processed_posts / total_posts) * 100 - self.progress_label.setText(f"Progress: {processed_posts} / {total_posts} posts ({percent:.1f}%)") - except ZeroDivisionError: - self.progress_label.setText(f"Progress: {processed_posts} / {total_posts} posts") - elif processed_posts > 0: + progress_percent = (processed_posts / total_posts) * 100 + self.progress_label.setText(f"Progress: {processed_posts} / {total_posts} posts ({progress_percent:.1f}%)") + elif processed_posts > 0 : # If total_posts is unknown (e.g., single post) self.progress_label.setText(f"Progress: Processing post {processed_posts}...") - else: + else: # Initial state or no posts self.progress_label.setText("Progress: Starting...") + if total_posts > 0 or processed_posts > 0 : self.file_progress_label.setText("") # Clear file progress + + def start_download(self): - import downloader_utils # For KNOWN_NAMES - is_running = (self.download_thread and self.download_thread.isRunning()) or (self.thread_pool is not None) - if is_running: - self.log_signal.emit("⚠️ Download already in progress.") + global KNOWN_NAMES, BackendDownloadThread, PostProcessorWorker, extract_post_info, clean_folder_name + + if (self.download_thread and self.download_thread.isRunning()) or self.thread_pool: QMessageBox.warning(self, "Busy", "A download is already running.") return + api_url = self.link_input.text().strip() output_dir = self.dir_input.text().strip() filter_mode = self.get_filter_mode() skip_zip = self.skip_zip_checkbox.isChecked() skip_rar = self.skip_rar_checkbox.isChecked() use_subfolders = self.use_subfolders_checkbox.isChecked() + use_post_subfolders = self.use_subfolder_per_post_checkbox.isChecked() and use_subfolders compress_images = self.compress_images_checkbox.isChecked() download_thumbnails = self.download_thumbnails_checkbox.isChecked() use_multithreading = self.use_multithreading_checkbox.isChecked() - num_threads = 4 raw_skip_words = self.skip_words_input.text().strip() - skip_words_list = [] - if raw_skip_words: - skip_words_list = [word.strip() for word in raw_skip_words.split(',') if word.strip()] - service, user_id, post_id_from_url = extract_post_info(api_url) # from downloader_utils + skip_words_list = [word.strip().lower() for word in raw_skip_words.split(',') if word.strip()] + + manga_mode_is_checked = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False + + + if not api_url or not output_dir: + QMessageBox.critical(self, "Input Error", "URL and Download Directory are required."); return + service, user_id, post_id_from_url = extract_post_info(api_url) + if not service or not user_id: # Basic validation of extracted info + QMessageBox.critical(self, "Input Error", "Invalid or unsupported URL format."); return - if not api_url: - QMessageBox.critical(self, "Input Error", "Please enter a Kemono/Coomer URL.") - return - if not service or not user_id: - QMessageBox.critical(self, "Input Error", "Invalid or unsupported URL format.\nPlease provide a valid creator page or post URL.") - self.log_signal.emit(f"❌ Invalid URL detected: {api_url}") - return - if not output_dir: - QMessageBox.critical(self, "Input Error", "Please select a download directory.") - return if not os.path.isdir(output_dir): - reply = QMessageBox.question(self, "Directory Not Found", - f"The directory '{output_dir}' does not exist.\n\nCreate it?", - QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) - if reply == QMessageBox.Yes: - try: - os.makedirs(output_dir) - self.log_signal.emit(f"ℹ️ Created download directory: {output_dir}") - except Exception as e: - QMessageBox.critical(self, "Directory Error", f"Could not create directory:\n{e}") - self.log_signal.emit(f"❌ Failed to create directory: {output_dir} - {e}") - return - else: - return - if compress_images and Image is None: # Image imported in this file - QMessageBox.warning(self, "Dependency Missing", "Image compression requires the Pillow library, but it's not installed.\nPlease run: pip install Pillow\n\nCompression will be disabled for this session.") - self.log_signal.emit("❌ Cannot compress images: Pillow library not found.") - compress_images = False - filter_character = None - if use_subfolders and self.character_filter_widget.isVisible(): - filter_character = self.character_input.text().strip() or None + reply = QMessageBox.question(self, "Create Directory?", + f"The directory '{output_dir}' does not exist.\nCreate it now?", + QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) + if reply == QMessageBox.Yes: + try: + os.makedirs(output_dir, exist_ok=True) # exist_ok=True is safer + self.log_signal.emit(f"ℹ️ Created directory: {output_dir}") + except Exception as e: + QMessageBox.critical(self, "Directory Error", f"Could not create directory: {e}"); return + else: + self.log_signal.emit("❌ Download cancelled: Output directory does not exist and was not created.") + return - custom_folder_name = None + if compress_images and Image is None: # Check for Pillow if compression is enabled + QMessageBox.warning(self, "Missing Dependency", "Pillow library (for image compression) not found. Compression will be disabled.") + compress_images = False # Disable it for this run + self.compress_images_checkbox.setChecked(False) # Update UI + + manga_mode = manga_mode_is_checked and not post_id_from_url # Manga mode only for creator feeds + + num_threads_str = self.thread_count_input.text().strip() + MAX_ALLOWED_THREADS = 200 + MODERATE_THREAD_WARNING_THRESHOLD = 50 + try: + num_threads = int(num_threads_str) + if not (1 <= num_threads <= MAX_ALLOWED_THREADS): # Validate thread count + QMessageBox.critical(self, "Thread Count Error", f"Number of threads must be between 1 and {MAX_ALLOWED_THREADS}.") + self.thread_count_input.setText(str(min(max(1, num_threads), MAX_ALLOWED_THREADS))) # Correct to valid range + return + if num_threads > MODERATE_THREAD_WARNING_THRESHOLD: # Warn for very high thread counts + QMessageBox.information(self, "High Thread Count Note", + f"Using {num_threads} threads (above {MODERATE_THREAD_WARNING_THRESHOLD}) may increase resource usage and risk rate-limiting from the site.\n\nProceeding with caution.") + self.log_signal.emit(f"ℹ️ Using high thread count: {num_threads}.") + except ValueError: + QMessageBox.critical(self, "Thread Count Error", "Invalid number of threads. Please enter a numeric value."); return + + start_page_str, end_page_str = self.start_page_input.text().strip(), self.end_page_input.text().strip() + start_page, end_page = None, None + is_creator_feed = bool(not post_id_from_url) + + if is_creator_feed and not manga_mode: # Page range only for non-manga creator feeds + try: + if start_page_str: start_page = int(start_page_str) + if end_page_str: end_page = int(end_page_str) + if start_page is not None and start_page <= 0: raise ValueError("Start page must be positive.") + if end_page is not None and end_page <= 0: raise ValueError("End page must be positive.") + if start_page and end_page and start_page > end_page: + raise ValueError("Start page cannot be greater than end page.") + except ValueError as e: + QMessageBox.critical(self, "Page Range Error", f"Invalid page range: {e}"); return + elif manga_mode: # Manga mode processes all pages (reversed in downloader_utils) + start_page, end_page = None, None + + # --- ADDED: Clear link queue before starting new download --- + self.external_link_queue.clear() + self._is_processing_external_link_queue = False + # --- END ADDED --- + + + raw_character_filters_text = self.character_input.text().strip() + parsed_character_list = None + if raw_character_filters_text: + temp_list = [name.strip() for name in raw_character_filters_text.split(',') if name.strip()] + if temp_list: parsed_character_list = temp_list + + filter_character_list_to_pass = None # This will be passed to backend + if use_subfolders and parsed_character_list and not post_id_from_url: # Validate filters if used for subfolders + self.log_signal.emit(f"ℹ️ Validating character filters for subfolder naming: {', '.join(parsed_character_list)}") + valid_filters_for_backend = [] + user_cancelled_validation = False + for char_name in parsed_character_list: + cleaned_name_test = clean_folder_name(char_name) # Test if name is valid for folder + if not cleaned_name_test: + QMessageBox.warning(self, "Invalid Filter Name", f"Filter name '{char_name}' is invalid for a folder and will be skipped.") + self.log_signal.emit(f"⚠️ Skipping invalid filter for folder: '{char_name}'") + continue + + # Prompt to add to known_names if not already there + if char_name.lower() not in {kn.lower() for kn in KNOWN_NAMES}: + reply = QMessageBox.question(self, "Add Filter Name to Known List?", + f"The character filter '{char_name}' is not in your known names list (used for folder suggestions).\nAdd it now?", + QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel, QMessageBox.Yes) + if reply == QMessageBox.Yes: + self.new_char_input.setText(char_name) # Use existing add mechanism + if self.add_new_character(): # This now handles similarity checks too + self.log_signal.emit(f"✅ Added '{char_name}' to known names via filter prompt.") + valid_filters_for_backend.append(char_name) + else: # add_new_character returned False (e.g., user chose "Change Name" or it failed) + self.log_signal.emit(f"⚠️ Failed to add '{char_name}' via filter prompt (or user opted out). It will still be used for filtering this session if valid.") + # Still add to backend list for current session if it's a valid folder name + if cleaned_name_test: valid_filters_for_backend.append(char_name) + elif reply == QMessageBox.Cancel: + self.log_signal.emit(f"❌ Download cancelled by user during filter validation for '{char_name}'.") + user_cancelled_validation = True; break + else: # User chose No + self.log_signal.emit(f"ℹ️ Proceeding with filter '{char_name}' for matching without adding to known list.") + if cleaned_name_test: valid_filters_for_backend.append(char_name) + else: # Already in known names + if cleaned_name_test: valid_filters_for_backend.append(char_name) + + if user_cancelled_validation: return # Stop download if user cancelled + + if valid_filters_for_backend: + filter_character_list_to_pass = valid_filters_for_backend + self.log_signal.emit(f" Using validated character filters for subfolders: {', '.join(filter_character_list_to_pass)}") + else: + self.log_signal.emit("⚠️ No valid character filters remaining after validation for subfolder naming.") + elif parsed_character_list : # Filters provided, but not for subfolders (e.g. subfolders disabled) + filter_character_list_to_pass = parsed_character_list + self.log_signal.emit(f"ℹ️ Character filters provided: {', '.join(filter_character_list_to_pass)} (Subfolder creation rules may differ).") + + # --- ADDED: Manga Mode Filter Warning --- + if manga_mode and not filter_character_list_to_pass: + msg_box = QMessageBox(self) + msg_box.setIcon(QMessageBox.Warning) + msg_box.setWindowTitle("Manga Mode Filter Warning") + msg_box.setText( + "Manga Mode is enabled, but the 'Filter by Character(s)' field is empty.\n\n" + "For best results (correct file naming and grouping), please enter the exact Manga/Series title " + "(as used by the creator on the site) into the filter field.\n\n" + "Do you want to proceed without a filter (file names might be generic) or cancel?" + ) + proceed_button = msg_box.addButton("Proceed Anyway", QMessageBox.AcceptRole) # YesRole/AcceptRole makes it default + cancel_button = msg_box.addButton("Cancel Download", QMessageBox.RejectRole) # NoRole/RejectRole for cancel + + msg_box.exec_() + + if msg_box.clickedButton() == cancel_button: + self.log_signal.emit("❌ Download cancelled by user due to Manga Mode filter warning.") + return # Stop the download process here + else: + self.log_signal.emit("⚠️ Proceeding with Manga Mode without a specific title filter.") + # --- END ADDED --- + + + custom_folder_name_cleaned = None if use_subfolders and post_id_from_url and self.custom_folder_widget.isVisible(): raw_custom_name = self.custom_folder_input.text().strip() if raw_custom_name: - cleaned_custom = clean_folder_name(raw_custom_name) # from downloader_utils - if cleaned_custom: - custom_folder_name = cleaned_custom - else: - QMessageBox.warning(self, "Input Warning", f"Custom folder name '{raw_custom_name}' is invalid and will be ignored.") - self.log_signal.emit(f"⚠️ Invalid custom folder name ignored: {raw_custom_name}") - if use_subfolders and filter_character and not post_id_from_url: - clean_char_filter = clean_folder_name(filter_character.lower()) # from downloader_utils - known_names_lower = {name.lower() for name in downloader_utils.KNOWN_NAMES} + cleaned_custom = clean_folder_name(raw_custom_name) + if cleaned_custom: custom_folder_name_cleaned = cleaned_custom + else: self.log_signal.emit(f"⚠️ Invalid custom folder name ignored: '{raw_custom_name}'") - if not clean_char_filter: - self.log_signal.emit(f"❌ Filter name '{filter_character}' is invalid. Aborting.") - QMessageBox.critical(self, "Filter Error", "The provided filter name is invalid (contains only spaces or special characters).") - return - elif filter_character.lower() not in known_names_lower: - reply = QMessageBox.question(self, "Add Filter Name?", - f"The filter name '{filter_character}' is not in your known names list.\n\nAdd it now and continue?", - QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel, QMessageBox.Yes) - - if reply == QMessageBox.Yes: - self.new_char_input.setText(filter_character) - self.add_new_character() - if filter_character.lower() not in {name.lower() for name in downloader_utils.KNOWN_NAMES}: - self.log_signal.emit(f"⚠️ Failed to add '{filter_character}' automatically. Please add manually if needed.") - else: - self.log_signal.emit(f"✅ Added filter '{filter_character}' to list.") - elif reply == QMessageBox.No: - self.log_signal.emit(f"ℹ️ Proceeding without adding '{filter_character}'. Posts matching it might not be saved to a specific folder unless name is derived.") - else: - self.log_signal.emit("❌ Download cancelled by user during filter check.") - return - self.log_output.clear() - self.cancellation_event.clear() + # Reset UI elements for new download + self.main_log_output.clear() + if self.show_external_links: self.external_log_output.clear(); self.external_log_output.append("🔗 External Links Found:") # Changed title slightly + self.file_progress_label.setText("") + self.cancellation_event.clear() # IMPORTANT: Clear cancellation from previous run self.active_futures = [] - self.total_posts_to_process = 0 - self.processed_posts_count = 0 - self.download_counter = 0 - self.skip_counter = 0 - with self.downloaded_files_lock: - self.downloaded_files.clear() - with self.downloaded_file_hashes_lock: - self.downloaded_file_hashes.clear() - + self.total_posts_to_process = self.processed_posts_count = self.download_counter = self.skip_counter = 0 self.progress_label.setText("Progress: Initializing...") - self.log_signal.emit("="*40) - self.log_signal.emit(f"🚀 Starting Download Task @ {time.strftime('%Y-%m-%d %H:%M:%S')}") - self.log_signal.emit(f" URL: {api_url}") - self.log_signal.emit(f" Save Location: {output_dir}") - mode = "Single Post" if post_id_from_url else "Creator Feed" - self.log_signal.emit(f" Mode: {mode}") - self.log_signal.emit(f" Subfolders: {'Enabled' if use_subfolders else 'Disabled'}") - if use_subfolders: - if custom_folder_name: - self.log_signal.emit(f" Custom Folder (Post): '{custom_folder_name}'") - elif filter_character: - self.log_signal.emit(f" Character Filter: '{filter_character}'") - else: - self.log_signal.emit(f" Folder Naming: Automatic (Known Names > Title Extraction)") - self.log_signal.emit(f" File Type Filter: {filter_mode}") - self.log_signal.emit(f" Skip: {'.zip' if skip_zip else ''}{', ' if skip_zip and skip_rar else ''}{'.rar' if skip_rar else ''}{'None' if not (skip_zip or skip_rar) else ''}") - if skip_words_list: - self.log_signal.emit(f" Skip Words (Title/Filename): {', '.join(skip_words_list)}") - else: - self.log_signal.emit(f" Skip Words (Title/Filename): None") - self.log_signal.emit(f" Compress Images: {'Enabled' if compress_images else 'Disabled'}") - self.log_signal.emit(f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}") - should_use_multithreading = use_multithreading and not post_id_from_url - self.log_signal.emit(f" Threading: {'Multi-threaded' if should_use_multithreading else 'Single-threaded'}") - self.log_signal.emit("="*40) - self.set_ui_enabled(False) - self.cancel_btn.setEnabled(True) - try: - common_args = { - 'api_url': api_url, - 'output_dir': output_dir, - 'known_names_copy': list(downloader_utils.KNOWN_NAMES), # From downloader_utils - 'filter_character': filter_character, - 'filter_mode': filter_mode, - 'skip_zip': skip_zip, - 'skip_rar': skip_rar, - 'use_subfolders': use_subfolders, - 'compress_images': compress_images, - 'download_thumbnails': download_thumbnails, - 'service': service, - 'user_id': user_id, - 'downloaded_files': self.downloaded_files, - 'downloaded_files_lock': self.downloaded_files_lock, - 'downloaded_file_hashes': self.downloaded_file_hashes, - 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, - 'skip_words_list': skip_words_list, - } - if should_use_multithreading: - self.log_signal.emit(" Initializing multi-threaded download...") - multi_args = common_args.copy() - multi_args['num_threads'] = num_threads - self.start_multi_threaded_download(**multi_args) + # Log download parameters + log_messages = [ + "="*40, f"🚀 Starting Download @ {time.strftime('%Y-%m-%d %H:%M:%S')}", + f" URL: {api_url}", f" Save Location: {output_dir}", + f" Mode: {'Single Post' if post_id_from_url else 'Creator Feed'}", + ] + if is_creator_feed: + if manga_mode: + log_messages.append(" Page Range: All (Manga Mode - Oldest Posts Processed First)") else: + pr_log = "All" + if start_page or end_page: # Construct page range log string + pr_log = f"{f'From {start_page} ' if start_page else ''}{'to ' if start_page and end_page else ''}{f'{end_page}' if end_page else (f'Up to {end_page}' if end_page else (f'From {start_page}' if start_page else 'Specific Range'))}".strip() + log_messages.append(f" Page Range: {pr_log if pr_log else 'All'}") + + + log_messages.append(f" Subfolders: {'Enabled' if use_subfolders else 'Disabled'}") + if use_subfolders: + if custom_folder_name_cleaned: log_messages.append(f" Custom Folder (Post): '{custom_folder_name_cleaned}'") + elif filter_character_list_to_pass and not post_id_from_url: log_messages.append(f" Character Filters for Folders: {', '.join(filter_character_list_to_pass)}") + else: log_messages.append(f" Folder Naming: Automatic (based on title/known names)") + log_messages.append(f" Subfolder per Post: {'Enabled' if use_post_subfolders else 'Disabled'}") + + log_messages.extend([ + f" File Type Filter: {filter_mode}", + f" Skip Archives: {'.zip' if skip_zip else ''}{', ' if skip_zip and skip_rar else ''}{'.rar' if skip_rar else ''}{'None' if not (skip_zip or skip_rar) else ''}", + f" Skip Words (posts/files): {', '.join(skip_words_list) if skip_words_list else 'None'}", + f" Compress Images: {'Enabled' if compress_images else 'Disabled'}", + f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}", + f" Show External Links: {'Enabled' if self.show_external_links else 'Disabled'}" + ]) + if manga_mode: log_messages.append(f" Manga Mode (File Renaming by Post Title): Enabled") + + should_use_multithreading = use_multithreading and not post_id_from_url # Multi-threading for creator feeds + log_messages.append(f" Threading: {'Multi-threaded (posts)' if should_use_multithreading else 'Single-threaded (posts)'}") + if should_use_multithreading: log_messages.append(f" Number of Post Worker Threads: {num_threads}") + log_messages.append("="*40) + for msg in log_messages: self.log_signal.emit(msg) + + self.set_ui_enabled(False) # Disable UI during download + + unwanted_keywords_for_folders = {'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'} + + # Prepare arguments for worker threads/classes + args_template = { + 'api_url_input': api_url, + 'download_root': output_dir, # For PostProcessorWorker + 'output_dir': output_dir, # For DownloadThread __init__ + 'known_names': list(KNOWN_NAMES), # Pass a copy + 'known_names_copy': list(KNOWN_NAMES), # For DownloadThread __init__ + 'filter_character_list': filter_character_list_to_pass, + 'filter_mode': filter_mode, 'skip_zip': skip_zip, 'skip_rar': skip_rar, + 'use_subfolders': use_subfolders, 'use_post_subfolders': use_post_subfolders, + 'compress_images': compress_images, 'download_thumbnails': download_thumbnails, + 'service': service, 'user_id': user_id, + 'downloaded_files': self.downloaded_files, # Shared set + 'downloaded_files_lock': self.downloaded_files_lock, # Shared lock + 'downloaded_file_hashes': self.downloaded_file_hashes, # Shared set + 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, # Shared lock + 'skip_words_list': skip_words_list, + 'show_external_links': self.show_external_links, + 'start_page': start_page, + 'end_page': end_page, + 'target_post_id_from_initial_url': post_id_from_url, + 'custom_folder_name': custom_folder_name_cleaned, + 'manga_mode_active': manga_mode, + 'unwanted_keywords': unwanted_keywords_for_folders, + 'cancellation_event': self.cancellation_event, # Crucial for stopping threads + 'signals': self.worker_signals, # For multi-threaded PostProcessorWorker + } + + + try: + if should_use_multithreading: + self.log_signal.emit(f" Initializing multi-threaded download with {num_threads} post workers...") + self.start_multi_threaded_download(num_post_workers=num_threads, **args_template) + else: # Single post URL or multithreading disabled self.log_signal.emit(" Initializing single-threaded download...") - single_args = common_args.copy() - single_args['custom_folder_name'] = custom_folder_name - single_args['single_post_id'] = post_id_from_url - self.start_single_threaded_download(**single_args) + # Keys expected by DownloadThread constructor + dt_expected_keys = [ + 'api_url_input', 'output_dir', 'known_names_copy', 'cancellation_event', + 'filter_character_list', 'filter_mode', 'skip_zip', 'skip_rar', + 'use_subfolders', 'use_post_subfolders', 'custom_folder_name', + 'compress_images', 'download_thumbnails', 'service', 'user_id', + 'downloaded_files', 'downloaded_file_hashes', 'downloaded_files_lock', + 'downloaded_file_hashes_lock', 'skip_words_list', 'show_external_links', + 'num_file_threads_for_worker', 'skip_current_file_flag', + 'start_page', 'end_page', 'target_post_id_from_initial_url', + 'manga_mode_active', 'unwanted_keywords' + ] + args_template['num_file_threads_for_worker'] = 1 # Single thread mode, worker uses 1 file thread + args_template['skip_current_file_flag'] = None # No skip flag initially + + single_thread_args = {} + for key in dt_expected_keys: + if key in args_template: + single_thread_args[key] = args_template[key] + # Missing optional keys will use defaults in DownloadThread's __init__ + + self.start_single_threaded_download(**single_thread_args) except Exception as e: - self.log_signal.emit(f"❌ CRITICAL ERROR preparing download task: {e}") - import traceback - self.log_signal.emit(traceback.format_exc()) - QMessageBox.critical(self, "Start Error", f"Failed to start download task:\n{e}") - self.download_finished(0, 0, False) + self.log_signal.emit(f"❌ CRITICAL ERROR preparing download: {e}\n{traceback.format_exc()}") + QMessageBox.critical(self, "Start Error", f"Failed to start download:\n{e}") + self.download_finished(0,0,False) # Ensure UI is re-enabled def start_single_threaded_download(self, **kwargs): + global BackendDownloadThread try: - self.download_thread = BackendDownloadThread( # Use renamed import - cancellation_event = self.cancellation_event, - **kwargs - ) + self.download_thread = BackendDownloadThread(**kwargs) # Pass all relevant args - if self.download_thread._init_failed: - QMessageBox.critical(self, "Thread Error", "Failed to initialize the download thread.\nCheck the log for details.") - self.download_finished(0, 0, False) - return - self.download_thread.progress_signal.connect(self.log_signal) - self.download_thread.add_character_prompt_signal.connect(self.add_character_prompt_signal) - self.download_thread.file_download_status_signal.connect(self.file_download_status_signal) - self.download_thread.finished_signal.connect(self.finished_signal) - self.character_prompt_response_signal.connect(self.download_thread.receive_add_character_result) + # Connect signals from the DownloadThread instance + if hasattr(self.download_thread, 'progress_signal'): + self.download_thread.progress_signal.connect(self.handle_main_log) + if hasattr(self.download_thread, 'add_character_prompt_signal'): # Though less used by DownloadThread directly + self.download_thread.add_character_prompt_signal.connect(self.add_character_prompt_signal) + if hasattr(self.download_thread, 'finished_signal'): + self.download_thread.finished_signal.connect(self.finished_signal) # Connect to app's finished handler + if hasattr(self.download_thread, 'receive_add_character_result'): # For two-way prompt communication + self.character_prompt_response_signal.connect(self.download_thread.receive_add_character_result) + # MODIFIED: Connect external_link_signal to the new handler + if hasattr(self.download_thread, 'external_link_signal'): + self.download_thread.external_link_signal.connect(self.handle_external_link_signal) # Connect to queue handler + if hasattr(self.download_thread, 'file_progress_signal'): + self.download_thread.file_progress_signal.connect(self.update_file_progress_display) self.download_thread.start() - self.log_signal.emit("✅ Single download thread started.") - + self.log_signal.emit("✅ Single download thread (for posts) started.") except Exception as e: - self.log_signal.emit(f"❌ CRITICAL ERROR starting single-thread task: {e}") - import traceback - self.log_signal.emit(traceback.format_exc()) - QMessageBox.critical(self, "Thread Start Error", f"Failed to start download thread:\n{e}") - self.download_finished(0, 0, False) + self.log_signal.emit(f"❌ CRITICAL ERROR starting single-thread: {e}\n{traceback.format_exc()}") + QMessageBox.critical(self, "Thread Start Error", f"Failed to start download process: {e}") + self.download_finished(0,0,False) # Cleanup - - def start_multi_threaded_download(self, **kwargs): - import downloader_utils # For KNOWN_NAMES - num_threads = kwargs['num_threads'] - self.thread_pool = ThreadPoolExecutor(max_workers=num_threads, thread_name_prefix='Downloader_') - self.active_futures = [] + def start_multi_threaded_download(self, num_post_workers, **kwargs): + global PostProcessorWorker # Ensure it's the correct worker class + self.thread_pool = ThreadPoolExecutor(max_workers=num_post_workers, thread_name_prefix='PostWorker_') + self.active_futures = [] # Reset list of active futures self.processed_posts_count = 0 - self.total_posts_to_process = 0 + self.total_posts_to_process = 0 # Will be updated by _fetch_and_queue_posts self.download_counter = 0 self.skip_counter = 0 - worker_args_template = kwargs.copy() - del worker_args_template['num_threads'] + + # Start a separate thread to fetch post data and submit tasks to the pool + # This prevents the GUI from freezing during the initial API calls for post lists fetcher_thread = threading.Thread( - target=self._fetch_and_queue_posts, - args=(kwargs['api_url'], worker_args_template), - daemon=True, - name="PostFetcher" + target=self._fetch_and_queue_posts, + args=(kwargs['api_url_input'], kwargs, num_post_workers), + daemon=True, name="PostFetcher" # Daemon thread will exit when app exits ) fetcher_thread.start() - self.log_signal.emit(f"✅ Post fetcher thread started. {num_threads} worker threads initializing...") + self.log_signal.emit(f"✅ Post fetcher thread started. {num_post_workers} post worker threads initializing...") - def _fetch_and_queue_posts(self, api_url_input, worker_args_template): - import downloader_utils # For download_from_api - all_posts = [] - fetch_error = False + def _fetch_and_queue_posts(self, api_url_input_for_fetcher, worker_args_template, num_post_workers): + global PostProcessorWorker, download_from_api # Ensure correct references + all_posts_data = [] + fetch_error_occurred = False + + manga_mode_active_for_fetch = worker_args_template.get('manga_mode_active', False) + signals_for_worker = worker_args_template.get('signals') # This is self.worker_signals + if not signals_for_worker: # Should always be present + self.log_signal.emit("❌ CRITICAL ERROR: Signals object missing for worker in _fetch_and_queue_posts.") + self.finished_signal.emit(0,0,True) # Signal failure + return + try: - self.log_signal.emit(" Starting post fetch...") - def fetcher_logger(msg): - self.log_signal.emit(f"[Fetcher] {msg}") - - post_generator = downloader_utils.download_from_api(api_url_input, logger=fetcher_logger) - + self.log_signal.emit(" Fetching post data from API...") + post_generator = download_from_api( + api_url_input_for_fetcher, + logger=lambda msg: self.log_signal.emit(f"[Fetcher] {msg}"), # Prefix fetcher logs + start_page=worker_args_template.get('start_page'), + end_page=worker_args_template.get('end_page'), + manga_mode=manga_mode_active_for_fetch, + cancellation_event=self.cancellation_event # Pass cancellation event + ) for posts_batch in post_generator: - if self.cancellation_event.is_set(): - self.log_signal.emit("⚠️ Post fetching cancelled by user.") - fetch_error = True - break + if self.cancellation_event.is_set(): # Check cancellation frequently + fetch_error_occurred = True; self.log_signal.emit(" Post fetching cancelled by user."); break if isinstance(posts_batch, list): - all_posts.extend(posts_batch) - self.total_posts_to_process = len(all_posts) - if self.total_posts_to_process % 250 == 0: - self.log_signal.emit(f" Fetched {self.total_posts_to_process} posts...") - else: - self.log_signal.emit(f"❌ API returned non-list batch: {type(posts_batch)}. Stopping fetch.") - fetch_error = True - break - - if not fetch_error: - self.log_signal.emit(f"✅ Finished fetching. Total posts found: {self.total_posts_to_process}") - + all_posts_data.extend(posts_batch) + self.total_posts_to_process = len(all_posts_data) # Update total + # Log progress periodically for large feeds + if self.total_posts_to_process > 0 and self.total_posts_to_process % 100 == 0 : + self.log_signal.emit(f" Fetched {self.total_posts_to_process} posts so far...") + else: # Should not happen if download_from_api is correct + fetch_error_occurred = True + self.log_signal.emit(f"❌ API fetcher returned non-list type: {type(posts_batch)}"); break + if not fetch_error_occurred and not self.cancellation_event.is_set(): + self.log_signal.emit(f"✅ Post fetching complete. Total posts to process: {self.total_posts_to_process}") + except TypeError as te: # Catch common error if downloader_utils is outdated + self.log_signal.emit(f"❌ TypeError calling download_from_api: {te}") + self.log_signal.emit(" Check if 'downloader_utils.py' has the correct 'download_from_api' signature (including 'manga_mode' and 'cancellation_event').") + self.log_signal.emit(traceback.format_exc(limit=2)) + fetch_error_occurred = True + except RuntimeError as re: # Catch cancellation from fetch_posts_paginated + self.log_signal.emit(f"ℹ️ Post fetching runtime error (likely cancellation): {re}") + fetch_error_occurred = True # Treat as an error for cleanup except Exception as e: - self.log_signal.emit(f"❌ Unexpected Error during post fetching: {e}") - import traceback - self.log_signal.emit(traceback.format_exc(limit=3)) - fetch_error = True - if self.cancellation_event.is_set() or fetch_error: + self.log_signal.emit(f"❌ Error during post fetching: {e}\n{traceback.format_exc(limit=2)}") + fetch_error_occurred = True + + if self.cancellation_event.is_set() or fetch_error_occurred: self.finished_signal.emit(self.download_counter, self.skip_counter, self.cancellation_event.is_set()) - if self.thread_pool: - self.thread_pool.shutdown(wait=False, cancel_futures=True) - self.thread_pool = None + if self.thread_pool: # Ensure pool is shutdown if fetch fails or is cancelled + self.thread_pool.shutdown(wait=False, cancel_futures=True); self.thread_pool = None return - if self.total_posts_to_process == 0: - self.log_signal.emit("😕 No posts found or fetched successfully.") - self.finished_signal.emit(0, 0, False) - return - self.log_signal.emit(f" Submitting {self.total_posts_to_process} post tasks to worker pool...") - self.processed_posts_count = 0 - self.overall_progress_signal.emit(self.total_posts_to_process, 0) - common_worker_args = { - 'download_root': worker_args_template['output_dir'], - 'known_names': worker_args_template['known_names_copy'], # Already a copy from KNOWN_NAMES - 'filter_character': worker_args_template['filter_character'], - 'unwanted_keywords': {'spicy', 'hd', 'nsfw', '4k', 'preview'}, - 'filter_mode': worker_args_template['filter_mode'], - 'skip_zip': worker_args_template['skip_zip'], - 'skip_rar': worker_args_template['skip_rar'], - 'use_subfolders': worker_args_template['use_subfolders'], - 'target_post_id_from_initial_url': worker_args_template.get('single_post_id'), - 'custom_folder_name': worker_args_template.get('custom_folder_name'), - 'compress_images': worker_args_template['compress_images'], - 'download_thumbnails': worker_args_template['download_thumbnails'], - 'service': worker_args_template['service'], - 'user_id': worker_args_template['user_id'], - 'api_url_input': worker_args_template['api_url'], - 'cancellation_event': self.cancellation_event, - 'signals': self.worker_signals, - 'downloaded_files': self.downloaded_files, - 'downloaded_files_lock': self.downloaded_files_lock, - 'downloaded_file_hashes': self.downloaded_file_hashes, - 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, - 'skip_words_list': worker_args_template['skip_words_list'], + self.log_signal.emit("😕 No posts found or fetched to process.") + self.finished_signal.emit(0,0,False); return + + self.log_signal.emit(f" Submitting {self.total_posts_to_process} post processing tasks to thread pool...") + self.processed_posts_count = 0 # Reset for this run + self.overall_progress_signal.emit(self.total_posts_to_process, 0) # Initial progress update + + num_file_dl_threads = 4 # Default for PostProcessorWorker's internal pool + + # Define keys PostProcessorWorker expects (ensure this matches its __init__) + ppw_expected_keys = [ + 'post_data', 'download_root', 'known_names', 'filter_character_list', + 'unwanted_keywords', 'filter_mode', 'skip_zip', 'skip_rar', + 'use_subfolders', 'use_post_subfolders', 'target_post_id_from_initial_url', + 'custom_folder_name', 'compress_images', 'download_thumbnails', 'service', + 'user_id', 'api_url_input', 'cancellation_event', 'signals', + 'downloaded_files', 'downloaded_file_hashes', 'downloaded_files_lock', + 'downloaded_file_hashes_lock', 'skip_words_list', 'show_external_links', + 'extract_links_only', 'num_file_threads', 'skip_current_file_flag', + 'manga_mode_active' + ] + # Optional keys with defaults in PostProcessorWorker's __init__ + ppw_optional_keys_with_defaults = { + 'skip_words_list', 'show_external_links', 'extract_links_only', + 'num_file_threads', 'skip_current_file_flag', 'manga_mode_active' } - for post_data in all_posts: - if self.cancellation_event.is_set(): - self.log_signal.emit("⚠️ Cancellation detected during task submission.") - break - if not isinstance(post_data, dict): - self.log_signal.emit(f"⚠️ Skipping invalid post data item (type: {type(post_data)}).") - self.processed_posts_count += 1 - self.total_posts_to_process -=1 + for post_data_item in all_posts_data: + if self.cancellation_event.is_set(): break # Check before submitting each task + if not isinstance(post_data_item, dict): # Basic sanity check + self.log_signal.emit(f"⚠️ Skipping invalid post data item (not a dict): {type(post_data_item)}") + self.processed_posts_count += 1 # Count as processed/skipped continue - worker = PostProcessorWorker(post_data=post_data, **common_worker_args) # PostProcessorWorker from downloader_utils - try: - if self.thread_pool: - future = self.thread_pool.submit(worker.process) - future.add_done_callback(self._handle_future_result) - self.active_futures.append(future) - else: - self.log_signal.emit("⚠️ Thread pool shutdown before submitting all tasks.") - break - except RuntimeError as e: - self.log_signal.emit(f"⚠️ Error submitting task (pool might be shutting down): {e}") - break - except Exception as e: - self.log_signal.emit(f"❌ Unexpected error submitting task: {e}") - break - submitted_count = len(self.active_futures) - self.log_signal.emit(f" {submitted_count} / {self.total_posts_to_process} tasks submitted.") + # Build args for PostProcessorWorker instance + worker_init_args = {} + missing_keys = [] + for key in ppw_expected_keys: + if key == 'post_data': worker_init_args[key] = post_data_item + elif key == 'num_file_threads': worker_init_args[key] = num_file_dl_threads + elif key == 'signals': worker_init_args[key] = signals_for_worker # Use the app's worker_signals + elif key in worker_args_template: worker_init_args[key] = worker_args_template[key] + elif key in ppw_optional_keys_with_defaults: pass # Let worker use its default + else: missing_keys.append(key) # Required key is missing + + + if missing_keys: + self.log_signal.emit(f"❌ CRITICAL ERROR: Missing expected keys for PostProcessorWorker: {', '.join(missing_keys)}") + self.cancellation_event.set() # Stop all processing + break + + try: + worker_instance = PostProcessorWorker(**worker_init_args) + if self.thread_pool: # Ensure pool is still active + future = self.thread_pool.submit(worker_instance.process) + future.add_done_callback(self._handle_future_result) # Handle result/exception + self.active_futures.append(future) + else: # Pool might have been shut down due to earlier error/cancellation + self.log_signal.emit("⚠️ Thread pool not available. Cannot submit more tasks.") + break + except TypeError as te: # Error creating worker (e.g. wrong args) + self.log_signal.emit(f"❌ TypeError creating PostProcessorWorker: {te}") + passed_keys_str = ", ".join(sorted(worker_init_args.keys())) + self.log_signal.emit(f" Passed Args: [{passed_keys_str}]") + self.log_signal.emit(traceback.format_exc(limit=5)) + self.cancellation_event.set(); break # Stop all + except RuntimeError: # Pool might be shutting down + self.log_signal.emit("⚠️ Runtime error submitting task (pool likely shutting down)."); break + except Exception as e: # Other errors during submission + self.log_signal.emit(f"❌ Error submitting post {post_data_item.get('id','N/A')} to worker: {e}"); break + + if not self.cancellation_event.is_set(): + self.log_signal.emit(f" {len(self.active_futures)} post processing tasks submitted to pool.") + else: # If cancelled during submission loop + self.finished_signal.emit(self.download_counter, self.skip_counter, True) + if self.thread_pool: + self.thread_pool.shutdown(wait=False, cancel_futures=True); self.thread_pool = None def _handle_future_result(self, future: Future): self.processed_posts_count += 1 - downloaded_res, skipped_res = 0, 0 - + downloaded_files_from_future = 0 + skipped_files_from_future = 0 try: if future.cancelled(): - pass + self.log_signal.emit(" A post processing task was cancelled.") + # If a task was cancelled, it implies we might want to count its potential files as skipped + # This is hard to determine without knowing the post_data it was handling. + # For simplicity, we don't add to skip_counter here unless future.result() would have. elif future.exception(): - exc = future.exception() - self.log_signal.emit(f"❌ Error in worker thread: {exc}") - pass - else: - downloaded, skipped = future.result() - downloaded_res = downloaded - skipped_res = skipped + worker_exception = future.exception() + self.log_signal.emit(f"❌ Post processing worker error: {worker_exception}") + # Similar to cancelled, hard to know how many files were skipped due to error. + else: # Success + downloaded_files_from_future, skipped_files_from_future = future.result() + + # Lock for updating shared counters + with self.downloaded_files_lock: # Using this lock for these counters too + self.download_counter += downloaded_files_from_future + self.skip_counter += skipped_files_from_future - with threading.Lock(): - self.download_counter += downloaded_res - self.skip_counter += skipped_res self.overall_progress_signal.emit(self.total_posts_to_process, self.processed_posts_count) - except Exception as e: - self.log_signal.emit(f"❌ Error in result callback handling: {e}") - if self.processed_posts_count >= self.total_posts_to_process and self.total_posts_to_process > 0: + except Exception as e: # Error in this callback itself + self.log_signal.emit(f"❌ Error in _handle_future_result callback: {e}\n{traceback.format_exc(limit=2)}") + + # Check if all tasks are done + if self.total_posts_to_process > 0 and self.processed_posts_count >= self.total_posts_to_process: + # More robust check: ensure all submitted futures are actually done + all_done = all(f.done() for f in self.active_futures) + if all_done: + QApplication.processEvents() # Process any pending GUI events + self.log_signal.emit("🏁 All submitted post tasks have completed or failed.") + self.finished_signal.emit(self.download_counter, self.skip_counter, self.cancellation_event.is_set()) - if self.processed_posts_count >= self.total_posts_to_process: - self.log_signal.emit("🏁 All submitted tasks have completed or failed.") - cancelled = self.cancellation_event.is_set() - self.finished_signal.emit(self.download_counter, self.skip_counter, cancelled) def set_ui_enabled(self, enabled): - self.download_btn.setEnabled(enabled) - self.link_input.setEnabled(enabled) - self.dir_input.setEnabled(enabled) - self.dir_button.setEnabled(enabled) - self.radio_all.setEnabled(enabled) - self.radio_images.setEnabled(enabled) - self.radio_videos.setEnabled(enabled) - self.skip_zip_checkbox.setEnabled(enabled) - self.skip_rar_checkbox.setEnabled(enabled) - self.use_subfolders_checkbox.setEnabled(enabled) - self.compress_images_checkbox.setEnabled(enabled) - self.download_thumbnails_checkbox.setEnabled(enabled) - self.use_multithreading_checkbox.setEnabled(enabled) - self.skip_words_input.setEnabled(enabled) - self.character_search_input.setEnabled(enabled) - self.new_char_input.setEnabled(enabled) - self.add_char_button.setEnabled(enabled) - self.delete_char_button.setEnabled(enabled) - subfolders_on = self.use_subfolders_checkbox.isChecked() - self.custom_folder_widget.setEnabled(enabled and subfolders_on) - self.character_filter_widget.setEnabled(enabled and subfolders_on) - if enabled: - self.update_ui_for_subfolders(subfolders_on) - self.update_custom_folder_visibility() - self.cancel_btn.setEnabled(not enabled) - if enabled: - self.skip_file_btn.setEnabled(False) + # List of widgets to toggle enabled state + widgets_to_toggle = [ + self.download_btn, self.link_input, self.dir_input, self.dir_button, + self.radio_all, self.radio_images, self.radio_videos, + self.skip_zip_checkbox, self.skip_rar_checkbox, + self.use_subfolders_checkbox, self.compress_images_checkbox, + self.download_thumbnails_checkbox, self.use_multithreading_checkbox, + self.skip_words_input, self.character_search_input, self.new_char_input, + self.add_char_button, self.delete_char_button, + # self.external_links_checkbox, # MODIFIED: Keep this enabled + self.start_page_input, self.end_page_input, self.page_range_label, self.to_label, + self.character_input, self.custom_folder_input, self.custom_folder_label, + self.reset_button, + # self.log_verbosity_button, # MODIFIED: Keep this enabled + self.manga_mode_checkbox + ] + for widget in widgets_to_toggle: + if widget: # Check if widget exists + widget.setEnabled(enabled) + + # --- ADDED: Explicitly keep these enabled --- + if self.external_links_checkbox: + self.external_links_checkbox.setEnabled(True) + if self.log_verbosity_button: + self.log_verbosity_button.setEnabled(True) + # --- END ADDED --- + + # Handle dependent widgets + subfolders_currently_on = self.use_subfolders_checkbox.isChecked() + self.use_subfolder_per_post_checkbox.setEnabled(enabled and subfolders_currently_on) + + multithreading_currently_on = self.use_multithreading_checkbox.isChecked() + self.thread_count_input.setEnabled(enabled and multithreading_currently_on) + self.thread_count_label.setEnabled(enabled and multithreading_currently_on) + + self.cancel_btn.setEnabled(not enabled) # Cancel is enabled when download is running + + if enabled: # When re-enabling UI, refresh dependent states + self.update_ui_for_subfolders(subfolders_currently_on) + self.update_custom_folder_visibility() + self.update_page_range_enabled_state() + if self.manga_mode_checkbox: + self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked()) + def cancel_download(self): - if not self.cancel_btn.isEnabled(): return + if not self.cancel_btn.isEnabled() and not self.cancellation_event.is_set(): # Avoid multiple cancel calls + self.log_signal.emit("ℹ️ No active download to cancel or already cancelling.") + return - self.log_signal.emit("⚠️ Requesting cancellation...") - self.cancellation_event.set() - self.cancel_btn.setEnabled(False) - self.progress_label.setText("Progress: Cancelling...") - if self.thread_pool and self.active_futures: - cancelled_count = 0 - for future in self.active_futures: - if future.cancel(): - cancelled_count += 1 - if cancelled_count > 0: - self.log_signal.emit(f" Attempted to cancel {cancelled_count} pending/running tasks.") + self.log_signal.emit("⚠️ Requesting cancellation of download process...") + self.cancellation_event.set() # Signal all threads/workers to stop + if self.download_thread and self.download_thread.isRunning(): + # For QThread, requestInterruption() is a polite request. + # The thread's run() loop must check isInterruptionRequested() or self.cancellation_event. + self.download_thread.requestInterruption() + self.log_signal.emit(" Signaled single download thread to interrupt.") - def skip_current_file(self): - if self.download_thread and self.download_thread.isRunning(): - self.download_thread.skip_file() - elif self.thread_pool: - self.log_signal.emit("ℹ️ Skipping individual files is not supported in multi-threaded mode.") - QMessageBox.information(self, "Action Not Supported", "Skipping individual files is only available in single-threaded mode.") - else: - self.log_signal.emit("ℹ️ Skip requested, but no download is active.") - - - def update_skip_button_state(self, is_downloading_active): - can_skip = (not self.download_btn.isEnabled()) and \ - (self.download_thread and self.download_thread.isRunning()) and \ - is_downloading_active - if self.thread_pool is not None: - can_skip = False - - self.skip_file_btn.setEnabled(can_skip) - - - def download_finished(self, total_downloaded, total_skipped, cancelled): - self.log_signal.emit("="*40) - status = "Cancelled" if cancelled else "Finished" - self.log_signal.emit(f"🏁 Download {status}!") - self.log_signal.emit(f" Summary: Downloaded={total_downloaded}, Skipped={total_skipped}") - self.progress_label.setText(f"{status}: {total_downloaded} downloaded, {total_skipped} skipped.") - self.log_signal.emit("="*40) - if self.download_thread: - try: - self.character_prompt_response_signal.disconnect(self.download_thread.receive_add_character_result) - except TypeError: pass - self.download_thread = None + # --- MODIFICATION START: Initiate thread pool shutdown immediately --- if self.thread_pool: - self.log_signal.emit(" Shutting down worker thread pool...") + self.log_signal.emit(" Initiating immediate shutdown and cancellation of worker pool tasks...") + # Start shutdown non-blockingly, attempting to cancel futures self.thread_pool.shutdown(wait=False, cancel_futures=True) + # --- MODIFICATION END --- + + # --- ADDED: Clear link queue on cancel --- + self.external_link_queue.clear() + self._is_processing_external_link_queue = False + # --- END ADDED --- + + self.cancel_btn.setEnabled(False) # Disable cancel button after initiating cancellation + self.progress_label.setText("Progress: Cancelling...") + self.file_progress_label.setText("") + # The download_finished method will be called eventually when threads finally exit. + + + def download_finished(self, total_downloaded, total_skipped, cancelled_by_user): + # This method is the final cleanup point, called by DownloadThread or _handle_future_result + status_message = "Cancelled by user" if cancelled_by_user else "Finished" + self.log_signal.emit("="*40 + f"\n🏁 Download {status_message}!\n Summary: Downloaded Files={total_downloaded}, Skipped Files={total_skipped}\n" + "="*40) + self.progress_label.setText(f"{status_message}: {total_downloaded} downloaded, {total_skipped} skipped.") + self.file_progress_label.setText("") # Clear file progress + + # --- ADDED: Attempt to process any remaining links in queue if not cancelled --- + # This will now trigger the rapid display because _is_download_active() will be false + if not cancelled_by_user: + self._try_process_next_external_link() + # --- END ADDED --- + + # Disconnect signals from single download thread if it was used + if self.download_thread: + try: + if hasattr(self.download_thread, 'progress_signal'): self.download_thread.progress_signal.disconnect(self.handle_main_log) + if hasattr(self.download_thread, 'add_character_prompt_signal'): self.download_thread.add_character_prompt_signal.disconnect(self.add_character_prompt_signal) + if hasattr(self.download_thread, 'finished_signal'): self.download_thread.finished_signal.disconnect(self.finished_signal) + if hasattr(self.download_thread, 'receive_add_character_result'): self.character_prompt_response_signal.disconnect(self.download_thread.receive_add_character_result) + # MODIFIED: Ensure disconnection from the correct handler + if hasattr(self.download_thread, 'external_link_signal'): self.download_thread.external_link_signal.disconnect(self.handle_external_link_signal) + if hasattr(self.download_thread, 'file_progress_signal'): self.download_thread.file_progress_signal.disconnect(self.update_file_progress_display) + except (TypeError, RuntimeError) as e: + self.log_signal.emit(f"ℹ️ Note during single-thread signal disconnection: {e}") + self.download_thread = None # Clear reference + + # Shutdown thread pool if it exists and hasn't been cleared yet + # Use wait=True here to ensure cleanup before UI re-enables + if self.thread_pool: + self.log_signal.emit(" Ensuring worker thread pool is shut down...") + # Shutdown might have been initiated by cancel_download, but wait=True ensures completion. + self.thread_pool.shutdown(wait=True, cancel_futures=True) self.thread_pool = None - self.active_futures = [] - self.cancellation_event.clear() - self.set_ui_enabled(True) + self.active_futures = [] # Clear list of futures + + # Clear cancellation event here AFTER threads have likely stopped checking it + # self.cancellation_event.clear() + # Let's clear it in start_download and reset_application_state instead for safety. + + self.set_ui_enabled(True) # Re-enable UI + self.cancel_btn.setEnabled(False) # Disable cancel button + + # --- ADDED: Method to toggle log verbosity --- + def toggle_log_verbosity(self): + self.basic_log_mode = not self.basic_log_mode + if self.basic_log_mode: + self.log_verbosity_button.setText("Show Full Log") + self.log_signal.emit("="*20 + " Basic Log Mode Enabled " + "="*20) + else: + self.log_verbosity_button.setText("Show Basic Log") + self.log_signal.emit("="*20 + " Full Log Mode Enabled " + "="*20) + # --- END ADDED --- + + def reset_application_state(self): + is_running = (self.download_thread and self.download_thread.isRunning()) or \ + (self.thread_pool is not None and any(not f.done() for f in self.active_futures if f is not None)) + if is_running: + QMessageBox.warning(self, "Reset Error", "Cannot reset while a download is in progress. Please cancel the download first.") + return + + self.log_signal.emit("🔄 Resetting application state to defaults...") + self._reset_ui_to_defaults() # Reset UI elements to their initial state + self.main_log_output.clear() + self.external_log_output.clear() + if self.show_external_links: # Re-add header if shown + self.external_log_output.append("🔗 External Links Found:") + + # --- ADDED: Clear link queue on reset --- + self.external_link_queue.clear() + self._is_processing_external_link_queue = False + # --- END ADDED --- + + self.progress_label.setText("Progress: Idle") + self.file_progress_label.setText("") + + # Clear session-specific data + with self.downloaded_files_lock: + count = len(self.downloaded_files) + self.downloaded_files.clear() + if count > 0: self.log_signal.emit(f" Cleared {count} downloaded filename(s) from session memory.") + with self.downloaded_file_hashes_lock: + count = len(self.downloaded_file_hashes) + self.downloaded_file_hashes.clear() + if count > 0: self.log_signal.emit(f" Cleared {count} downloaded file hash(es) from session memory.") + + self.total_posts_to_process = 0 + self.processed_posts_count = 0 + self.download_counter = 0 + self.skip_counter = 0 + # self.external_links = [] # This list seems unused, keeping it commented + + self.cancellation_event.clear() # Ensure cancellation event is reset + + # --- ADDED: Reset log verbosity mode --- + self.basic_log_mode = False + if self.log_verbosity_button: + self.log_verbosity_button.setText("Show Basic Log") + # --- END ADDED --- + + self.log_signal.emit("✅ Application reset complete.") + + + def _reset_ui_to_defaults(self): + # Reset all input fields + self.link_input.clear() + self.dir_input.clear() + self.custom_folder_input.clear() + self.character_input.clear() + self.skip_words_input.clear() + self.start_page_input.clear() + self.end_page_input.clear() + self.new_char_input.clear() + self.character_search_input.clear() + self.thread_count_input.setText("4") + + # Reset radio buttons and checkboxes to defaults + self.radio_all.setChecked(True) + self.skip_zip_checkbox.setChecked(True) + self.skip_rar_checkbox.setChecked(True) + self.download_thumbnails_checkbox.setChecked(False) + self.compress_images_checkbox.setChecked(False) + self.use_subfolders_checkbox.setChecked(True) + self.use_subfolder_per_post_checkbox.setChecked(False) + self.use_multithreading_checkbox.setChecked(True) + self.external_links_checkbox.setChecked(False) + if self.manga_mode_checkbox: + self.manga_mode_checkbox.setChecked(False) + + # Explicitly call update methods that control UI element states + self.update_ui_for_subfolders(self.use_subfolders_checkbox.isChecked()) + self.update_custom_folder_visibility() + self.update_page_range_enabled_state() + self.update_multithreading_label(self.thread_count_input.text()) + if self.manga_mode_checkbox: + self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked()) + self.filter_character_list("") # Clear character list filter + + # Reset button states + self.download_btn.setEnabled(True) self.cancel_btn.setEnabled(False) - self.skip_file_btn.setEnabled(False) + if self.reset_button: self.reset_button.setEnabled(True) + # Reset log verbosity button text + if self.log_verbosity_button: self.log_verbosity_button.setText("Show Basic Log") + def prompt_add_character(self, character_name): - import downloader_utils # For KNOWN_NAMES - reply = QMessageBox.question(self, "Add Filter Name?", - f"The filter name '{character_name}' is not in your known list.\n\nAdd it now and continue download?", + global KNOWN_NAMES + # This method is called via a signal from a worker thread. + # It interacts with the GUI, so it's correctly placed in the GUI class. + reply = QMessageBox.question(self, "Add Filter Name to Known List?", + f"The name '{character_name}' was encountered or used as a filter.\nIt's not in your known names list (used for folder suggestions).\nAdd it now?", QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) result = (reply == QMessageBox.Yes) - if result: - self.new_char_input.setText(character_name) - if character_name.lower() not in {n.lower() for n in downloader_utils.KNOWN_NAMES}: - self.add_new_character() - if character_name.lower() not in {n.lower() for n in downloader_utils.KNOWN_NAMES}: - self.log_signal.emit(f"⚠️ Failed to add '{character_name}' via prompt. Check for errors.") - result = False + self.new_char_input.setText(character_name) # Populate input for add_new_character + # Call add_new_character, which now includes similarity checks and its own QMessageBox + # The result of add_new_character (True/False) reflects if it was actually added. + if self.add_new_character(): + self.log_signal.emit(f"✅ Added '{character_name}' to known names via background prompt.") else: - self.log_signal.emit(f"ℹ️ Filter name '{character_name}' was already present or added.") + # add_new_character handles its own logging and popups if it fails or user cancels similarity warning + result = False # Update result if add_new_character decided not to add + self.log_signal.emit(f"ℹ️ Adding '{character_name}' via background prompt was declined or failed (e.g., similarity warning, duplicate).") + # Send the final outcome (whether it was added or user said yes initially but then cancelled) self.character_prompt_response_signal.emit(result) def receive_add_character_result(self, result): - with QMutexLocker(self.prompt_mutex): + # This method receives the result from prompt_add_character (after it has tried to add the name) + # and is typically connected to the worker thread's logic to unblock it. + with QMutexLocker(self.prompt_mutex): # Ensure thread-safe access if worker modifies shared state based on this self._add_character_response = result - self.log_signal.emit(f" Received prompt response: {'Yes' if result else 'No'}") + self.log_signal.emit(f" Main thread received character prompt response: {'Action resulted in addition/confirmation' if result else 'Action resulted in no addition/declined'}") + if __name__ == '__main__': - qt_app = QApplication(sys.argv) - icon_path = os.path.join(os.path.dirname(__file__), 'Kemono.ico') - if os.path.exists(icon_path): - qt_app.setWindowIcon(QIcon(icon_path)) + import traceback + try: + qt_app = QApplication(sys.argv) + if getattr(sys, 'frozen', False): + base_dir = sys._MEIPASS + else: + base_dir = os.path.dirname(os.path.abspath(__file__)) + + icon_path = os.path.join(base_dir, 'Kemono.ico') + if os.path.exists(icon_path): + qt_app.setWindowIcon(QIcon(icon_path)) + else: + print(f"Warning: Application icon 'Kemono.ico' not found at {icon_path}") - downloader = DownloaderApp() - downloader.show() - exit_code = qt_app.exec_() - print(f"Application finished with exit code: {exit_code}") - sys.exit(exit_code) \ No newline at end of file + downloader_app_instance = DownloaderApp() + downloader_app_instance.show() + exit_code = qt_app.exec_() + print(f"Application finished with exit code: {exit_code}") + sys.exit(exit_code) + except SystemExit: + pass # Allow clean exit + except Exception as e: + print("--- CRITICAL APPLICATION ERROR ---") + print(f"An unhandled exception occurred: {e}") + traceback.print_exc() + print("--- END CRITICAL ERROR ---") + sys.exit(1)