diff --git a/Known.txt b/Known.txt index e69de29..6301234 100644 --- a/Known.txt +++ b/Known.txt @@ -0,0 +1,9 @@ +Back to Hell +Fade +Jett +Psylocke +Viper +clove +neon +reyna +sage diff --git a/downloader_utils.py b/downloader_utils.py index e14bed3..7b050ed 100644 --- a/downloader_utils.py +++ b/downloader_utils.py @@ -25,100 +25,122 @@ from io import BytesIO fastapi_app = None # Placeholder, not used in this script KNOWN_NAMES = [] # Global list, populated by main.py +IMAGE_EXTENSIONS = { + '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.tif', '.webp', + '.heic', '.heif', '.svg', '.ico', '.jfif', '.pjpeg', '.pjp', '.avif' +} +VIDEO_EXTENSIONS = { + '.mp4', '.mov', '.mkv', '.webm', '.avi', '.wmv', '.flv', '.mpeg', + '.mpg', '.m4v', '.3gp', '.ogv', '.ts', '.vob' +} def is_title_match_for_character(post_title, character_name_filter): """Checks if a post title contains a specific character name (case-insensitive, whole word).""" - if not post_title: + if not post_title or not character_name_filter: return False - if not character_name_filter: # If no filter, it's considered a match (or handle as per broader logic) - return True - - # Regex to match whole word, case insensitive + # Ensure character_name_filter is treated as a whole word, avoid partial matches within larger words. + # Regex: \b matches word boundary. re.escape handles special characters in filter. pattern = r"(?i)\b" + re.escape(character_name_filter) + r"\b" + return bool(re.search(pattern, post_title)) - if re.search(pattern, post_title): - return True - return False +def is_filename_match_for_character(filename, character_name_filter): + """Checks if a filename contains a specific character name (case-insensitive, substring).""" + if not filename or not character_name_filter: + return False + # For filenames, substring matching is often more practical. + return character_name_filter.lower() in filename.lower() def clean_folder_name(name): """Cleans a string to be suitable for a folder name.""" - if not isinstance(name, str): name = str(name) # Ensure string - # Remove invalid characters, replace spaces with underscores - cleaned = re.sub(r'[^\w\s\-\_]', '', name) # Allow alphanumeric, whitespace, hyphen, underscore - return cleaned.strip().replace(' ', '_') + if not isinstance(name, str): name = str(name) + # Remove characters that are generally problematic in folder names across OS + cleaned = re.sub(r'[^\w\s\-\_\.\(\)]', '', name) # Allow letters, numbers, whitespace, hyphens, underscores, periods, parentheses + cleaned = cleaned.strip() # Remove leading/trailing whitespace + # Replace sequences of whitespace with a single underscore + cleaned = re.sub(r'\s+', '_', cleaned) + return cleaned if cleaned else "untitled_folder" def clean_filename(name): - """Cleans a string to be suitable for a filename, preserving extension.""" - if not isinstance(name, str): name = str(name) # Ensure string - # Remove invalid characters, replace spaces with underscores - # Allow alphanumeric, whitespace, hyphen, underscore, and period (for extension) - cleaned = re.sub(r'[^\w\s\-\_\.]', '', name) - return cleaned.strip().replace(' ', '_') + """Cleans a string to be suitable for a file name.""" + if not isinstance(name, str): name = str(name) + # Remove characters that are generally problematic in file names across OS + cleaned = re.sub(r'[^\w\s\-\_\.\(\)]', '', name) # Allow letters, numbers, whitespace, hyphens, underscores, periods, parentheses + cleaned = cleaned.strip() # Remove leading/trailing whitespace + # Replace sequences of whitespace with a single underscore + cleaned = re.sub(r'\s+', '_', cleaned) + return cleaned if cleaned else "untitled_file" def extract_folder_name_from_title(title, unwanted_keywords): - """Extracts a potential folder name from a title, avoiding common unwanted keywords.""" + """Extracts a potential folder name from a title, avoiding unwanted keywords.""" if not title: return 'Uncategorized' title_lower = title.lower() - # Tokenize by words, prefer longer, more specific tokens if possible - tokens = re.findall(r'\b[\w\-]+\b', title_lower) # Find alphanumeric words with hyphens + # Try to find a meaningful token not in unwanted_keywords + tokens = re.findall(r'\b[\w\-]+\b', title_lower) # Find words for token in tokens: clean_token = clean_folder_name(token) # Clean the token itself - if clean_token and clean_token not in unwanted_keywords: - return clean_token # Return the first suitable token - # If no suitable token found, use the cleaned full title (or fallback) + if clean_token and clean_token.lower() not in unwanted_keywords: # Check against lowercased unwanted keywords + return clean_token + # Fallback to cleaned full title if no suitable token found cleaned_full_title = clean_folder_name(title) return cleaned_full_title if cleaned_full_title else 'Uncategorized' def match_folders_from_title(title, names_to_match, unwanted_keywords): - """Matches known names (characters/shows) in a title to suggest folder names.""" + """ + Matches names from a list against a title to determine potential folder names. + Prioritizes longer matches. + """ if not title or not names_to_match: return [] title_lower = title.lower() matched_cleaned_names = set() - # Sort by length to match longer names first (e.g., "Luffy Gear 5" before "Luffy") + # Sort names by length (descending) to match longer names first (e.g., "Spider-Man" before "Spider") sorted_names_to_match = sorted(names_to_match, key=len, reverse=True) for name in sorted_names_to_match: name_lower = name.lower() if not name_lower: continue # Skip empty names - pattern = r'\b' + re.escape(name_lower) + r'\b' # Whole word match + # Use word boundary regex to ensure whole word matching + pattern = r'\b' + re.escape(name_lower) + r'\b' if re.search(pattern, title_lower): - cleaned_name = clean_folder_name(name).lower() # Clean the original matched name - if cleaned_name and cleaned_name not in unwanted_keywords: - matched_cleaned_names.add(cleaned_name) + # Clean the original casing 'name' for folder creation, then lowercase for unwanted keyword check + cleaned_name_for_folder = clean_folder_name(name) + if cleaned_name_for_folder.lower() not in unwanted_keywords: # Check against lowercased unwanted keywords + matched_cleaned_names.add(cleaned_name_for_folder) # Add the cleaned name with original casing preserved as much as possible return sorted(list(matched_cleaned_names)) def is_image(filename): - """Checks if a filename likely represents an image.""" + """Checks if the filename has a common image extension.""" if not filename: return False - return filename.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.gif')) + _, ext = os.path.splitext(filename) + return ext.lower() in IMAGE_EXTENSIONS def is_video(filename): - """Checks if a filename likely represents a video.""" + """Checks if the filename has a common video extension.""" if not filename: return False - return filename.lower().endswith(('.mp4', '.mov', '.mkv', '.webm', '.avi', '.wmv')) + _, ext = os.path.splitext(filename) + return ext.lower() in VIDEO_EXTENSIONS def is_zip(filename): - """Checks if a filename likely represents a ZIP archive.""" + """Checks if the filename ends with .zip (case-insensitive).""" if not filename: return False return filename.lower().endswith('.zip') def is_rar(filename): - """Checks if a filename likely represents a RAR archive.""" + """Checks if the filename ends with .rar (case-insensitive).""" if not filename: return False return filename.lower().endswith('.rar') def is_post_url(url): - """Checks if a URL likely points to a specific post.""" + """Checks if the URL likely points to a specific post.""" if not isinstance(url, str): return False return '/post/' in urlparse(url).path @@ -130,15 +152,16 @@ def extract_post_info(url_string): try: parsed_url = urlparse(url_string.strip()) domain = parsed_url.netloc.lower() + # Check if the domain is one of the known Kemono or Coomer domains is_kemono = any(d in domain for d in ['kemono.su', 'kemono.party']) is_coomer = any(d in domain for d in ['coomer.su', 'coomer.party']) - if not (is_kemono or is_coomer): return None, None, None # Not a supported domain + if not (is_kemono or is_coomer): return None, None, None # Not a recognized service path_parts = [part for part in parsed_url.path.strip('/').split('/') if part] - # Standard URL format: //user//post/ - # Or creator feed: //user/ + # Standard URL structure: /{service}/user/{user_id}/post/{post_id} + # Or creator page: /{service}/user/{user_id} if len(path_parts) >= 3 and path_parts[1].lower() == 'user': service = path_parts[0] user_id = path_parts[2] @@ -146,8 +169,8 @@ def extract_post_info(url_string): post_id = path_parts[4] return service, user_id, post_id - # API URL format: /api/v1//user//post/ - # Or creator feed: /api/v1//user/ + # API URL structure: /api/v1/{service}/user/{user_id}/post/{post_id} + # Or API creator page: /api/v1/{service}/user/{user_id} if len(path_parts) >= 5 and path_parts[0].lower() == 'api' and \ path_parts[1].lower() == 'v1' and path_parts[3].lower() == 'user': service = path_parts[2] @@ -155,49 +178,53 @@ def extract_post_info(url_string): if len(path_parts) >= 7 and path_parts[5].lower() == 'post': post_id = path_parts[6] return service, user_id, post_id - + except Exception as e: - # Log error if needed, but return None, None, None for graceful failure + # Log or handle unexpected errors during URL parsing if necessary print(f"Debug: Exception during extract_post_info for URL '{url_string}': {e}") - return None, None, None + return None, None, None # Return None for all if parsing fails or structure is not matched def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None): - """Fetches a single page of posts from the API. Checks cancellation_event if provided.""" + """Fetches a single page of posts from the API.""" if cancellation_event and cancellation_event.is_set(): logger(" Fetch cancelled before request.") - raise RuntimeError("Fetch operation cancelled by user.") # Or return empty list + raise RuntimeError("Fetch operation cancelled by user.") # Raise error to stop pagination paginated_url = f'{api_url_base}?o={offset}' logger(f" Fetching: {paginated_url}") try: - response = requests.get(paginated_url, headers=headers, timeout=(10, 60)) # connect timeout, read timeout - response.raise_for_status() # Raise HTTPError for bad responses (4XX or 5XX) + response = requests.get(paginated_url, headers=headers, timeout=(10, 60)) # connect_timeout, read_timeout + response.raise_for_status() # Raises HTTPError for bad responses (4XX or 5XX) + # It's good practice to check content type before parsing JSON if 'application/json' not in response.headers.get('Content-Type', '').lower(): logger(f"⚠️ Unexpected content type from API: {response.headers.get('Content-Type')}. Body: {response.text[:200]}") - return [] # Return empty list on unexpected content type + return [] # Return empty list or raise error if JSON is strictly expected return response.json() except requests.exceptions.Timeout: + # Log specific timeout and re-raise or handle as a specific error raise RuntimeError(f"Timeout fetching offset {offset} from {paginated_url}") except requests.exceptions.RequestException as e: + # General request exception (includes HTTPError, ConnectionError, etc.) err_msg = f"Error fetching offset {offset} from {paginated_url}: {e}" if e.response is not None: err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})" raise RuntimeError(err_msg) except ValueError as e: # JSONDecodeError inherits from ValueError + # Handle cases where response is not valid JSON raise RuntimeError(f"Error decoding JSON from offset {offset} ({paginated_url}): {e}. Response text: {response.text[:200]}") - except Exception as e: # Catch any other unexpected errors + except Exception as e: + # Catch any other unexpected errors raise RuntimeError(f"Unexpected error fetching offset {offset} ({paginated_url}): {e}") def download_from_api(api_url_input, logger=print, start_page=None, end_page=None, manga_mode=False, cancellation_event=None): """ - Generator function to fetch posts from Kemono/Coomer API. - Handles pagination and specific post fetching. - In manga_mode, it fetches all posts and yields them in reversed order (oldest first). - Checks cancellation_event if provided. + Generator function to fetch post data from Kemono/Coomer API. + Handles pagination and yields batches of posts. + In Manga Mode, fetches all posts first, then yields them in reverse order (oldest first). """ - headers = {'User-Agent': 'Mozilla/5.0'} # Basic user agent + headers = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'} # Standard headers service, user_id, target_post_id = extract_post_info(api_url_input) if cancellation_event and cancellation_event.is_set(): @@ -206,60 +233,64 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non if not service or not user_id: logger(f"❌ Invalid URL or could not extract service/user: {api_url_input}") - return # Stop generation if URL is invalid + return + # Page range is ignored for single post URLs if target_post_id and (start_page or end_page): logger("⚠️ Page range (start/end page) is ignored when a specific post URL is provided.") - start_page = end_page = None # Reset page range for single post URL + start_page = end_page = None + # Manga mode is only applicable for creator feeds (not single posts) is_creator_feed_for_manga = manga_mode and not target_post_id parsed_input = urlparse(api_url_input) api_domain = parsed_input.netloc - # Ensure we use a valid API domain, defaulting if necessary + # Ensure we use a valid API domain, default to kemono.su if unrecognized if not any(d in api_domain.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']): logger(f"⚠️ Unrecognized domain '{api_domain}'. Defaulting to kemono.su for API calls.") - api_domain = "kemono.su" + api_domain = "kemono.su" # Or "coomer.party" if that's preferred default api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}" - page_size = 50 # API returns 50 posts per page + page_size = 50 # Kemono API typically returns 50 posts per page + # --- Manga Mode: Fetch all posts first, then reverse --- if is_creator_feed_for_manga: logger(" Manga Mode: Fetching all posts to reverse order (oldest posts processed first)...") all_posts_for_manga_mode = [] current_offset_manga = 0 - while True: # Loop to fetch all pages + while True: if cancellation_event and cancellation_event.is_set(): logger(" Manga mode post fetching cancelled.") break try: posts_batch_manga = fetch_posts_paginated(api_base_url, headers, current_offset_manga, logger, cancellation_event) - if not isinstance(posts_batch_manga, list): + if not isinstance(posts_batch_manga, list): # API should always return a list logger(f"❌ API Error (Manga Mode): Expected list of posts, got {type(posts_batch_manga)}.") break - if not posts_batch_manga: # No more posts + if not posts_batch_manga: # Empty list means no more posts logger("✅ Reached end of posts (Manga Mode fetch all).") break all_posts_for_manga_mode.extend(posts_batch_manga) - current_offset_manga += len(posts_batch_manga) - time.sleep(0.6) - except RuntimeError as e: - if "cancelled by user" in str(e).lower(): # Check if it was our cancellation + current_offset_manga += len(posts_batch_manga) # API doesn't use page_size in offset, but number of posts + time.sleep(0.6) # Be respectful to the API + except RuntimeError as e: # Catch errors from fetch_posts_paginated + if "cancelled by user" in str(e).lower(): logger(f"ℹ️ Manga mode pagination stopped due to cancellation: {e}") else: logger(f"❌ {e}\n Aborting manga mode pagination.") - break - except Exception as e: + break # Stop fetching on error + except Exception as e: # Catch any other unexpected errors logger(f"❌ Unexpected error during manga mode fetch: {e}") traceback.print_exc() break - if cancellation_event and cancellation_event.is_set(): return + if cancellation_event and cancellation_event.is_set(): return # Early exit if cancelled if all_posts_for_manga_mode: logger(f" Manga Mode: Fetched {len(all_posts_for_manga_mode)} total posts. Reversing order...") - all_posts_for_manga_mode.reverse() + all_posts_for_manga_mode.reverse() # Oldest posts first + # Yield in batches of page_size for i in range(0, len(all_posts_for_manga_mode), page_size): if cancellation_event and cancellation_event.is_set(): logger(" Manga mode post yielding cancelled.") @@ -267,25 +298,26 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non yield all_posts_for_manga_mode[i:i + page_size] else: logger(" Manga Mode: No posts found to process.") - return + return # End of Manga Mode logic + # --- Normal Mode or Single Post Mode --- current_page_num = 1 current_offset = 0 - processed_target_post_flag = False + processed_target_post_flag = False # For single post URLs if start_page and start_page > 1: - current_offset = (start_page - 1) * page_size + current_offset = (start_page - 1) * page_size # Calculate offset for starting page current_page_num = start_page logger(f" Starting from page {current_page_num} (calculated offset {current_offset}).") - while True: + while True: # Pagination loop if cancellation_event and cancellation_event.is_set(): logger(" Post fetching loop cancelled.") break if end_page and current_page_num > end_page: logger(f"✅ Reached specified end page ({end_page}). Stopping.") break - if target_post_id and processed_target_post_flag: + if target_post_id and processed_target_post_flag: # If single post was found and processed logger(f"✅ Target post {target_post_id} has been processed.") break @@ -294,53 +326,57 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non if not isinstance(posts_batch, list): logger(f"❌ API Error: Expected list of posts, got {type(posts_batch)} at page {current_page_num}.") break - except RuntimeError as e: + except RuntimeError as e: # Catch errors from fetch_posts_paginated if "cancelled by user" in str(e).lower(): logger(f"ℹ️ Pagination stopped due to cancellation: {e}") else: logger(f"❌ {e}\n Aborting pagination at page {current_page_num}.") break - except Exception as e: + except Exception as e: # Catch any other unexpected errors logger(f"❌ Unexpected error fetching page {current_page_num}: {e}") traceback.print_exc() break - - if not posts_batch: - if current_page_num == (start_page or 1) and not target_post_id : - logger("😕 No posts found on the first page checked.") - elif not target_post_id: - logger("✅ Reached end of posts (no more content).") - break - if target_post_id: + if not posts_batch: # No more posts + if current_page_num == (start_page or 1) and not target_post_id : # No posts on first page of a creator feed + logger("😕 No posts found on the first page checked.") + elif not target_post_id: # End of creator feed + logger("✅ Reached end of posts (no more content).") + break # Exit pagination loop + + if target_post_id: # Processing a single post URL matching_post = next((p for p in posts_batch if str(p.get('id')) == str(target_post_id)), None) if matching_post: logger(f"🎯 Found target post {target_post_id}.") - yield [matching_post] - processed_target_post_flag = True + yield [matching_post] # Yield as a list containing one item + processed_target_post_flag = True # Mark as processed else: + # This case should ideally not happen if the post ID is valid and API is consistent. + # If the API returns posts in pages, a specific post ID might not be on the first page if offset isn't 0. + # However, for a direct post URL, we expect it or an error. logger(f"❌ Target post {target_post_id} not found in the batch from offset {current_offset}. This may indicate the post URL is incorrect or the API behavior is unexpected.") - break - else: - yield posts_batch + break # Stop if target post not found where expected + else: # Processing a creator feed (not a single post) + yield posts_batch # Yield the batch of posts - if not (target_post_id and processed_target_post_flag): - if not posts_batch : break - current_offset += len(posts_batch) + if not (target_post_id and processed_target_post_flag): # If not a single post that was just processed + if not posts_batch : break # Should be redundant due to check above, but safe + current_offset += len(posts_batch) # Kemono API uses item offset, not page offset current_page_num += 1 - time.sleep(0.6) - else: - break - + time.sleep(0.6) # Be respectful to the API + else: # Single post was processed, exit loop + break + + # Final check if a specific target post was requested but not found if target_post_id and not processed_target_post_flag and not (cancellation_event and cancellation_event.is_set()): logger(f"❌ Target post {target_post_id} could not be found after checking relevant pages.") def get_link_platform(url): - """Attempts to identify the platform of an external link.""" + """Attempts to identify the platform of an external link from its domain.""" try: domain = urlparse(url).netloc.lower() - # Specific known platforms + # Specific known platforms (add more as needed) if 'drive.google.com' in domain: return 'google drive' if 'mega.nz' in domain or 'mega.io' in domain: return 'mega' if 'dropbox.com' in domain: return 'dropbox' @@ -351,29 +387,31 @@ def get_link_platform(url): if 'pixiv.net' in domain: return 'pixiv' if 'kemono.su' in domain or 'kemono.party' in domain: return 'kemono' # Explicitly identify kemono if 'coomer.su' in domain or 'coomer.party' in domain: return 'coomer' # Explicitly identify coomer - - # Generic extraction for other domains + + # Generic extraction for other domains (e.g., 'example' from 'www.example.com') parts = domain.split('.') if len(parts) >= 2: # Return the second-to-last part for common structures (e.g., 'google' from google.com) # Avoid returning generic TLDs like 'com', 'org', 'net' as the platform + # Handle cases like 'google.co.uk' -> 'google' if parts[-2] not in ['com', 'org', 'net', 'gov', 'edu', 'co'] or len(parts) == 2: - return parts[-2] - elif len(parts) >= 3: # Handle cases like 'google.co.uk' -> 'google' + return parts[-2] + elif len(parts) >= 3 and parts[-3] not in ['com', 'org', 'net', 'gov', 'edu', 'co']: return parts[-3] - else: # Fallback to full domain if unsure - return domain - return 'external' # Default if domain parsing fails + else: # Fallback to full domain if unsure or very short domain + return domain + return 'external' # Default if domain parsing fails or is too simple (e.g., 'localhost') except Exception: return 'unknown' # Error case class PostProcessorSignals(QObject): """Defines signals used by PostProcessorWorker to communicate with the GUI thread.""" - progress_signal = pyqtSignal(str) - file_download_status_signal = pyqtSignal(bool) - # MODIFIED: Added link_text argument - external_link_signal = pyqtSignal(str, str, str, str) # post_title, link_text, link_url, platform - file_progress_signal = pyqtSignal(str, int, int) + progress_signal = pyqtSignal(str) # Generic log messages + file_download_status_signal = pyqtSignal(bool) # True if a file download starts, False if ends/fails + # Signal carries post_title, link_text, link_url, platform + external_link_signal = pyqtSignal(str, str, str, str) + # Signal carries filename, downloaded_bytes, total_bytes for progress bar + file_progress_signal = pyqtSignal(str, int, int) class PostProcessorWorker: @@ -386,42 +424,45 @@ class PostProcessorWorker: api_url_input, cancellation_event, signals, downloaded_files, downloaded_file_hashes, downloaded_files_lock, downloaded_file_hashes_lock, skip_words_list=None, show_external_links=False, - extract_links_only=False, num_file_threads=4, skip_current_file_flag=None, + extract_links_only=False, + num_file_threads=4, skip_current_file_flag=None, manga_mode_active=False ): self.post = post_data self.download_root = download_root self.known_names = known_names self.filter_character_list = filter_character_list if filter_character_list else [] - self.unwanted_keywords = unwanted_keywords - self.filter_mode = filter_mode + self.unwanted_keywords = unwanted_keywords if unwanted_keywords is not None else set() + self.filter_mode = filter_mode # 'image', 'video', or 'all' self.skip_zip = skip_zip self.skip_rar = skip_rar self.use_subfolders = use_subfolders self.use_post_subfolders = use_post_subfolders - self.target_post_id_from_initial_url = target_post_id_from_initial_url - self.custom_folder_name = custom_folder_name + self.target_post_id_from_initial_url = target_post_id_from_initial_url # ID from initial URL if it was a post URL + self.custom_folder_name = custom_folder_name # For single post downloads self.compress_images = compress_images self.download_thumbnails = download_thumbnails self.service = service self.user_id = user_id - self.api_url_input = api_url_input + self.api_url_input = api_url_input # The original URL input by the user self.cancellation_event = cancellation_event - self.signals = signals - self.skip_current_file_flag = skip_current_file_flag - + self.signals = signals # For emitting progress, logs, etc. + self.skip_current_file_flag = skip_current_file_flag # Event to skip current file download + + # Sets and locks for tracking downloaded files/hashes across threads/workers self.downloaded_files = downloaded_files if downloaded_files is not None else set() self.downloaded_file_hashes = downloaded_file_hashes if downloaded_file_hashes is not None else set() self.downloaded_files_lock = downloaded_files_lock if downloaded_files_lock is not None else threading.Lock() self.downloaded_file_hashes_lock = downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading.Lock() - + self.skip_words_list = skip_words_list if skip_words_list is not None else [] - self.show_external_links = show_external_links - self.extract_links_only = extract_links_only - self.num_file_threads = num_file_threads + self.show_external_links = show_external_links # Whether to extract and log external links + self.extract_links_only = extract_links_only # If true, only extracts links, no downloads + self.num_file_threads = num_file_threads # Threads for downloading files within this post - self.manga_mode_active = manga_mode_active + self.manga_mode_active = manga_mode_active # True if manga mode is on + # Disable compression if Pillow is not available if self.compress_images and Image is None: self.logger("⚠️ Image compression disabled: Pillow library not found.") self.compress_images = False @@ -430,77 +471,103 @@ class PostProcessorWorker: """Emits a log message via the progress_signal if available.""" if self.signals and hasattr(self.signals, 'progress_signal'): self.signals.progress_signal.emit(message) - else: print(f"(Worker Log - No Signal): {message}") + else: # Fallback if signals are not connected (e.g., testing) + print(f"(Worker Log - No Signal): {message}") def check_cancel(self): """Checks if cancellation has been requested.""" return self.cancellation_event.is_set() - def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event, post_title="", file_index_in_post=0): + def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event, + post_title="", file_index_in_post=0): # Added post_title here """Downloads a single file, handles retries, compression, and hash checking.""" - if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1 + if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1 # Downloaded, Skipped file_url = file_info.get('url') + # Use '_original_name_for_log' if available (set in process()), otherwise 'name' api_original_filename = file_info.get('_original_name_for_log', file_info.get('name')) if not file_url or not api_original_filename: self.logger(f"⚠️ Skipping file from post {original_post_id_for_log}: Missing URL or original filename. Info: {str(file_info)[:100]}") return 0, 1 - # --- Check skip words on ORIGINAL filename FIRST --- + # --- Skip Check 1: Skip Words (Always based on Filename) --- if self.skip_words_list: - name_to_check_lower = api_original_filename.lower() - # Simple check if any skip word is a substring - # For more precise matching (e.g., whole words), adjust this logic - if any(skip_word.lower() in name_to_check_lower for skip_word in self.skip_words_list): - matched_skip = next((sw for sw in self.skip_words_list if sw.lower() in name_to_check_lower), "unknown_skip_word") - self.logger(f" -> Skip File (Keyword on Original Name): '{api_original_filename}' contains '{matched_skip}'.") - return 0, 1 - # --- End skip word check --- - + content_to_check_for_skip_words = api_original_filename.lower() # ALWAYS use filename for skip words + log_source_for_skip_words = f"Filename '{api_original_filename}'" + + for skip_word in self.skip_words_list: + if skip_word.lower() in content_to_check_for_skip_words: + self.logger(f" -> Skip File (Keyword Match): {log_source_for_skip_words} contains '{skip_word}'.") + return 0, 1 + + # --- Character Filter (Global Gate) --- + # If character filters are active, the item (post for manga, file for normal) must match. + if self.filter_character_list: + matches_any_character_filter = False + if self.manga_mode_active: + # Manga Mode: Character filter applies to POST TITLE + if any(is_title_match_for_character(post_title, char_filter) for char_filter in self.filter_character_list): + matches_any_character_filter = True + if not matches_any_character_filter: + # This log might be redundant if the post-level check in process() already skipped it, + # but it's a safeguard if a file somehow reaches here without its post title matching. + self.logger(f" -> Skip File (Manga Mode - Post Title No Char Match): Title '{post_title[:30]}' doesn't match active character filters for this file.") + return 0, 1 + else: # Normal mode: Character filter applies to FILENAME + if any(is_filename_match_for_character(api_original_filename, char_filter) for char_filter in self.filter_character_list): + matches_any_character_filter = True + if not matches_any_character_filter: + self.logger(f" -> Skip File (Normal Mode - Filename No Char Match): '{api_original_filename}' doesn't match active character filters.") + return 0, 1 + + # --- Filename Generation (Manga Mode vs Normal Mode) --- _, original_ext = os.path.splitext(api_original_filename) - if original_ext and not original_ext.startswith('.'): original_ext = '.' + original_ext - elif not original_ext: - _, temp_ext = os.path.splitext(clean_filename(api_original_filename)) + if original_ext and not original_ext.startswith('.'): original_ext = '.' + original_ext + elif not original_ext: # Try to derive extension if missing + _, temp_ext = os.path.splitext(clean_filename(api_original_filename)) # Clean first if temp_ext and not temp_ext.startswith('.'): original_ext = '.' + temp_ext elif temp_ext: original_ext = temp_ext - else: original_ext = '' - - filename_to_save = "" + else: original_ext = '' # No extension found + filename_to_save = "" if self.manga_mode_active: - if post_title and post_title.strip(): - cleaned_post_title_full = clean_filename(post_title.strip()) - original_filename_base, _ = os.path.splitext(api_original_filename) + # Manga mode renaming logic (uses post_title and sequence) + if post_title and post_title.strip(): + cleaned_post_title_full = clean_filename(post_title.strip()) # Clean the post title for filename use + original_filename_base, _ = os.path.splitext(api_original_filename) # Get base of original API filename + # Try to extract a sequence number from the original filename extracted_sequence_from_original = "" + # Simple number at the end: e.g., "image_01", "pic123" simple_end_match = re.search(r'(\d+)$', original_filename_base) if simple_end_match: - extracted_sequence_from_original = simple_end_match.group(1) + extracted_sequence_from_original = simple_end_match.group(1).zfill(2) # Pad with zero if needed else: + # More complex patterns like "page 01", "ch-2", "ep_003" complex_match = re.search(r'(?:[ _.\-/]|^)(?:p|page|ch|chapter|ep|episode|v|vol|volume|no|num|number|pt|part)[ _.\-]*(\d+)', original_filename_base, re.IGNORECASE) if complex_match: - extracted_sequence_from_original = complex_match.group(1) + extracted_sequence_from_original = complex_match.group(1).zfill(2) # Pad + # Base for new filename from post title, removing existing page/chapter numbers from title cleaned_title_base = re.sub( - r'[|\[\]()]*[ _.\-]*(?:page|p|ch|chapter|ep|episode|v|vol|volume|no|num|number|pt|part)s?[ _.\-]*\d+([ _.\-]+\d+)?$', + r'[|\[\]()]*[ _.\-]*(?:page|p|ch|chapter|ep|episode|v|vol|volume|no|num|number|pt|part)s?[ _.\-]*\d+([ _.\-]+\d+)?([ _.\-]*(?:END|FIN))?$', '', cleaned_post_title_full, flags=re.IGNORECASE ).strip() - if not cleaned_title_base: - cleaned_title_base = cleaned_post_title_full - cleaned_title_base = cleaned_title_base.rstrip(' _.-') - + if not cleaned_title_base: # Fallback if regex strips everything + cleaned_title_base = cleaned_post_title_full + cleaned_title_base = cleaned_title_base.rstrip(' _.-') # Clean trailing separators if extracted_sequence_from_original: - filename_to_save = f"{cleaned_title_base} {extracted_sequence_from_original}{original_ext}" - self.logger(f" Manga Mode (Seq from Original): Renaming '{api_original_filename}' to '{filename_to_save}'") + filename_to_save = f"{cleaned_title_base} - {extracted_sequence_from_original}{original_ext}" else: - fallback_sequence = str(file_index_in_post + 1) - filename_to_save = f"{cleaned_title_base} {fallback_sequence}{original_ext}" - self.logger(f" Manga Mode (No Seq in Original): Using cleaned title + file index '{fallback_sequence}'. Renaming '{api_original_filename}' to '{filename_to_save}'") + # Fallback to file index in post if no sequence found in original filename + fallback_sequence = str(file_index_in_post + 1).zfill(2) # Pad with zero + filename_to_save = f"{cleaned_title_base} - {fallback_sequence}{original_ext}" + # Handle potential filename collisions by appending a counter counter = 1 base_name_coll, ext_coll = os.path.splitext(filename_to_save) temp_filename_for_collision_check = filename_to_save @@ -508,139 +575,207 @@ class PostProcessorWorker: temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}" counter += 1 if temp_filename_for_collision_check != filename_to_save: + # self.logger(f" Manga Mode: Collision detected. Adjusted filename to '{temp_filename_for_collision_check}'") filename_to_save = temp_filename_for_collision_check - self.logger(f" Manga Mode: Collision detected. Adjusted filename to '{filename_to_save}'") - - else: - filename_to_save = clean_filename(api_original_filename) - self.logger(f"⚠️ Manga mode: Post title missing. Using cleaned original filename '{filename_to_save}'.") - else: + else: # Manga mode but post_title is missing (should be rare) + filename_to_save = clean_filename(api_original_filename) # Fallback to cleaned original + self.logger(f"⚠️ Manga mode: Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save}'.") + else: # Normal mode filename_to_save = clean_filename(api_original_filename) - final_filename_for_sets_and_saving = filename_to_save + final_filename_for_sets_and_saving = filename_to_save # This is the name used for saving and duplicate checks - if not self.download_thumbnails: - is_img_type = is_image(api_original_filename) + # --- File Type Filtering (applies to both modes, based on original filename) --- + if not self.download_thumbnails: # Thumbnail mode bypasses these filters + is_img_type = is_image(api_original_filename) # Check original type is_vid_type = is_video(api_original_filename) is_zip_type = is_zip(api_original_filename) is_rar_type = is_rar(api_original_filename) - if self.filter_mode == 'image' and not is_img_type: self.logger(f" -> Filter Skip: '{api_original_filename}' (Not Image)"); return 0,1 - if self.filter_mode == 'video' and not is_vid_type: self.logger(f" -> Filter Skip: '{api_original_filename}' (Not Video)"); return 0,1 - if self.skip_zip and is_zip_type: self.logger(f" -> Pref Skip: '{api_original_filename}' (ZIP)"); return 0,1 - if self.skip_rar and is_rar_type: self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR)"); return 0,1 - target_folder_basename = os.path.basename(target_folder_path) + if self.filter_mode == 'image' and not is_img_type: + self.logger(f" -> Filter Skip: '{api_original_filename}' (Not Image).") + return 0,1 + if self.filter_mode == 'video' and not is_vid_type: + self.logger(f" -> Filter Skip: '{api_original_filename}' (Not Video).") + return 0,1 + if self.skip_zip and is_zip_type: + self.logger(f" -> Pref Skip: '{api_original_filename}' (ZIP).") + return 0,1 + if self.skip_rar and is_rar_type: + self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR).") + return 0,1 + + target_folder_basename = os.path.basename(target_folder_path) # For logging current_save_path = os.path.join(target_folder_path, final_filename_for_sets_and_saving) + # --- Duplicate Checks (Path, Global Filename, Hash) --- if os.path.exists(current_save_path) and os.path.getsize(current_save_path) > 0: self.logger(f" -> Exists (Path): '{final_filename_for_sets_and_saving}' in '{target_folder_basename}'.") - with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving) + with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving) # Add to global set return 0, 1 - + with self.downloaded_files_lock: if final_filename_for_sets_and_saving in self.downloaded_files: - self.logger(f" -> Global Skip (Filename): '{final_filename_for_sets_and_saving}' already recorded.") + self.logger(f" -> Global Skip (Filename): '{final_filename_for_sets_and_saving}' already recorded as downloaded this session.") return 0, 1 - max_retries = 3; retry_delay = 5; downloaded_size_bytes = 0 - calculated_file_hash = None; file_content_bytes = None; total_size_bytes = 0 + # --- Download Loop with Retries --- + max_retries = 3 + retry_delay = 5 # seconds + downloaded_size_bytes = 0 + calculated_file_hash = None + file_content_bytes = None # BytesIO to hold downloaded content + total_size_bytes = 0 # From Content-Length header, set on first attempt download_successful_flag = False - log_name_during_dl = f"{api_original_filename} (as {final_filename_for_sets_and_saving})" - for attempt_num in range(max_retries + 1): - if self.check_cancel() or (skip_event and skip_event.is_set()): break + for attempt_num in range(max_retries + 1): # max_retries means max_retries + 1 attempts total + if self.check_cancel() or (skip_event and skip_event.is_set()): + break # Exit retry loop if cancelled try: - if attempt_num > 0: - self.logger(f" Retrying '{log_name_during_dl}' (Attempt {attempt_num}/{max_retries})..."); - time.sleep(retry_delay * (2**(attempt_num-1))) + if attempt_num > 0: + self.logger(f" Retrying '{api_original_filename}' (Attempt {attempt_num}/{max_retries})...") + time.sleep(retry_delay * (2**(attempt_num - 1))) # Exponential backoff - if self.signals: self.signals.file_download_status_signal.emit(True) + if self.signals and hasattr(self.signals, 'file_download_status_signal'): + self.signals.file_download_status_signal.emit(True) # Signal download start - response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True) - response.raise_for_status() - total_size_bytes = int(response.headers.get('Content-Length', 0)) - file_content_bytes = BytesIO(); downloaded_size_bytes = 0; md5_hasher = hashlib.md5() + response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True) # connect_timeout, read_timeout + response.raise_for_status() # Check for HTTP errors + + current_total_size_bytes_from_headers = int(response.headers.get('Content-Length', 0)) + + if attempt_num == 0: # First attempt, log initial size + total_size_bytes = current_total_size_bytes_from_headers + size_str = f"{total_size_bytes / (1024 * 1024):.2f} MB" if total_size_bytes > 0 else "unknown size" + self.logger(f"⬇️ Downloading: '{api_original_filename}' (Size: {size_str}) [Saving as: '{final_filename_for_sets_and_saving}']") + + # Use the size from the current attempt for progress reporting + current_attempt_total_size = current_total_size_bytes_from_headers + + file_content_buffer = BytesIO() # Buffer for this attempt's content + current_attempt_downloaded_bytes = 0 + md5_hasher = hashlib.md5() last_progress_time = time.time() for chunk in response.iter_content(chunk_size=1 * 1024 * 1024): # 1MB chunks - if self.check_cancel() or (skip_event and skip_event.is_set()): break # Check cancellation inside loop + if self.check_cancel() or (skip_event and skip_event.is_set()): + break # Stop reading chunks if cancelled if chunk: - file_content_bytes.write(chunk); md5_hasher.update(chunk); downloaded_size_bytes += len(chunk) - if time.time() - last_progress_time > 1 and total_size_bytes > 0 and self.signals: - self.signals.file_progress_signal.emit(log_name_during_dl, downloaded_size_bytes, total_size_bytes) + file_content_buffer.write(chunk) + md5_hasher.update(chunk) + current_attempt_downloaded_bytes += len(chunk) + # Emit progress signal periodically + if time.time() - last_progress_time > 1 and current_attempt_total_size > 0 and \ + self.signals and hasattr(self.signals, 'file_progress_signal'): + self.signals.file_progress_signal.emit( + api_original_filename, # Show original name in progress + current_attempt_downloaded_bytes, + current_attempt_total_size + ) last_progress_time = time.time() - if self.check_cancel() or (skip_event and skip_event.is_set()): break + if self.check_cancel() or (skip_event and skip_event.is_set()): + if file_content_buffer: file_content_buffer.close() + break # Break from retry loop if cancelled during chunk iteration - if downloaded_size_bytes > 0: + # Check if download was successful for this attempt + if current_attempt_downloaded_bytes > 0: # Successfully downloaded some data calculated_file_hash = md5_hasher.hexdigest() + downloaded_size_bytes = current_attempt_downloaded_bytes + if file_content_bytes: file_content_bytes.close() # Close previous attempt's buffer + file_content_bytes = file_content_buffer # Keep this attempt's content + file_content_bytes.seek(0) # Reset pointer for reading download_successful_flag = True - break - elif total_size_bytes == 0 and response.status_code == 200 : - self.logger(f" Note: '{log_name_during_dl}' is a 0-byte file according to server.") - calculated_file_hash = md5_hasher.hexdigest() + break # Exit retry loop on success + elif current_attempt_total_size == 0 and response.status_code == 200: # Handle 0-byte files + self.logger(f" Note: '{api_original_filename}' is a 0-byte file according to server.") + calculated_file_hash = md5_hasher.hexdigest() # Hash of empty content + downloaded_size_bytes = 0 + if file_content_bytes: file_content_bytes.close() + file_content_bytes = file_content_buffer # Keep empty buffer + file_content_bytes.seek(0) download_successful_flag = True - break - - except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e: - self.logger(f" ❌ Download Error (Retryable): {log_name_during_dl}. Error: {e}") - except requests.exceptions.RequestException as e: - self.logger(f" ❌ Download Error (Non-Retryable): {log_name_during_dl}. Error: {e}"); break - except Exception as e: - self.logger(f" ❌ Unexpected Download Error: {log_name_during_dl}: {e}\n{traceback.format_exc(limit=2)}"); break - finally: - if self.signals: self.signals.file_download_status_signal.emit(False) + break # Exit retry loop + else: # No data or failed attempt (e.g. connection dropped before any data) + if file_content_buffer: file_content_buffer.close() # Discard this attempt's buffer + + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e: + self.logger(f" ❌ Download Error (Retryable): {api_original_filename}. Error: {e}") + if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close() + except requests.exceptions.RequestException as e: # Non-retryable (like 404) + self.logger(f" ❌ Download Error (Non-Retryable): {api_original_filename}. Error: {e}") + if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close() + break # Break from retry loop + except Exception as e: # Other unexpected errors + self.logger(f" ❌ Unexpected Download Error: {api_original_filename}: {e}\n{traceback.format_exc(limit=2)}") + if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close() + break # Break from retry loop + finally: + if self.signals and hasattr(self.signals, 'file_download_status_signal'): + self.signals.file_download_status_signal.emit(False) # Signal download end/attempt end + # End of retry loop + + # Emit final progress update (e.g., 100% or 0/0 if failed) + if self.signals and hasattr(self.signals, 'file_progress_signal'): + # Use total_size_bytes from the first successful header read for consistency in total + final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes + self.signals.file_progress_signal.emit(api_original_filename, downloaded_size_bytes, final_total_for_progress) - if self.signals and total_size_bytes > 0 : - self.signals.file_progress_signal.emit(log_name_during_dl, downloaded_size_bytes, total_size_bytes) if self.check_cancel() or (skip_event and skip_event.is_set()): - self.logger(f" ⚠️ Download interrupted for {log_name_during_dl}.") + self.logger(f" ⚠️ Download interrupted for {api_original_filename}.") if file_content_bytes: file_content_bytes.close() - return 0, 1 + return 0, 1 # Skipped due to interruption if not download_successful_flag: - self.logger(f"❌ Download failed for '{log_name_during_dl}' after {max_retries} retries.") - if file_content_bytes: file_content_bytes.close() - return 0, 1 + self.logger(f"❌ Download failed for '{api_original_filename}' after {max_retries + 1} attempts.") + if file_content_bytes: file_content_bytes.close() + return 0, 1 # Skipped due to download failure + # --- Hash Check (post-download), Compression, Saving --- with self.downloaded_file_hashes_lock: if calculated_file_hash in self.downloaded_file_hashes: - self.logger(f" -> Content Skip (Hash): '{log_name_during_dl}' (Hash: {calculated_file_hash[:8]}...).") - with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving) + self.logger(f" -> Content Skip (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...) already downloaded this session.") + with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving) # Still mark filename as "processed" if file_content_bytes: file_content_bytes.close() - return 0, 1 + return 0, 1 # Skipped due to hash duplicate - bytes_to_write = file_content_bytes; bytes_to_write.seek(0) - final_filename_after_processing = final_filename_for_sets_and_saving - current_save_path_final = current_save_path + bytes_to_write = file_content_bytes # This is the BytesIO from the successful download + final_filename_after_processing = final_filename_for_sets_and_saving # May change if compressed + current_save_path_final = current_save_path # May change if filename changes due to compression - is_img_for_compress_check = is_image(api_original_filename) - if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024): + is_img_for_compress_check = is_image(api_original_filename) # Check original type for compression eligibility + if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024): # Compress if > 1.5MB self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024*1024):.2f} MB)...") try: + # Ensure bytes_to_write is at the beginning for Pillow + bytes_to_write.seek(0) with Image.open(bytes_to_write) as img_obj: + # Handle palette mode images and convert to RGB/RGBA for WebP if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA') - elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB') - + elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB') + compressed_bytes_io = BytesIO() - img_obj.save(compressed_bytes_io, format='WebP', quality=80, method=4) + img_obj.save(compressed_bytes_io, format='WebP', quality=80, method=4) # method 4 is a good balance compressed_size = compressed_bytes_io.getbuffer().nbytes + # Only use compressed if significantly smaller (e.g., >10% reduction) if compressed_size < downloaded_size_bytes * 0.9: self.logger(f" Compression success: {compressed_size / (1024*1024):.2f} MB.") - bytes_to_write.close() - bytes_to_write = compressed_bytes_io; bytes_to_write.seek(0) - + bytes_to_write.close() # Close original downloaded content stream + bytes_to_write = compressed_bytes_io # Use compressed content stream + bytes_to_write.seek(0) # Reset pointer for writing + base_name_orig, _ = os.path.splitext(final_filename_for_sets_and_saving) - final_filename_after_processing = base_name_orig + '.webp' + final_filename_after_processing = base_name_orig + '.webp' # Change extension current_save_path_final = os.path.join(target_folder_path, final_filename_after_processing) self.logger(f" Updated filename (compressed): {final_filename_after_processing}") else: - self.logger(f" Compression skipped: WebP not significantly smaller."); bytes_to_write.seek(0) + self.logger(f" Compression skipped: WebP not significantly smaller."); bytes_to_write.seek(0) # Reset pointer if not using compressed except Exception as comp_e: - self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); bytes_to_write.seek(0) + self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); bytes_to_write.seek(0) # Reset pointer + # Check for existence again if filename changed due to compression if final_filename_after_processing != final_filename_for_sets_and_saving and \ os.path.exists(current_save_path_final) and os.path.getsize(current_save_path_final) > 0: self.logger(f" -> Exists (Path - Post-Compress): '{final_filename_after_processing}' in '{target_folder_basename}'.") @@ -648,196 +783,206 @@ class PostProcessorWorker: bytes_to_write.close() return 0, 1 + # --- Save the file --- try: - os.makedirs(os.path.dirname(current_save_path_final), exist_ok=True) + os.makedirs(os.path.dirname(current_save_path_final), exist_ok=True) # Ensure directory exists with open(current_save_path_final, 'wb') as f_out: - f_out.write(bytes_to_write.getvalue()) + f_out.write(bytes_to_write.getvalue()) # Write content + # Add to downloaded sets upon successful save with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash) with self.downloaded_files_lock: self.downloaded_files.add(final_filename_after_processing) self.logger(f"✅ Saved: '{final_filename_after_processing}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{target_folder_basename}'") - time.sleep(0.05) - return 1, 0 + time.sleep(0.05) # Small delay, can be removed if not needed + return 1, 0 # Downloaded, Skipped except Exception as save_err: self.logger(f"❌ Save Fail for '{final_filename_after_processing}': {save_err}") - if os.path.exists(current_save_path_final): - try: os.remove(current_save_path_final); + if os.path.exists(current_save_path_final): # Attempt to remove partial file + try: os.remove(current_save_path_final); except OSError: self.logger(f" -> Failed to remove partially saved file: {current_save_path_final}") - return 0, 1 + return 0, 1 # Skipped due to save error finally: - if bytes_to_write: bytes_to_write.close() + if bytes_to_write: bytes_to_write.close() # Ensure stream is closed def process(self): """Main processing logic for a single post.""" - if self.check_cancel(): return 0, 0 + if self.check_cancel(): return 0, 0 # Downloaded, Skipped total_downloaded_this_post = 0 total_skipped_this_post = 0 - parsed_api_url = urlparse(self.api_url_input) - referer_url = f"https://{parsed_api_url.netloc}/" - headers = {'User-Agent': 'Mozilla/5.0', 'Referer': referer_url} - # Regex to capture URL (group 1) and link text (group 2) - link_pattern = re.compile(r"""]*? # Any characters except > (non-greedy) - href=["'](https?://[^"']+)["'] # Capture href URL in group 1 - [^>]*? # Any characters except > (non-greedy) - > # Closing > of opening tag - (.*?) # Capture link text in group 2 (non-greedy) - # Closing anchor tag - """, re.IGNORECASE | re.VERBOSE | re.DOTALL) + # Prepare headers for file downloads + parsed_api_url = urlparse(self.api_url_input) # Use the original input URL for referer base + referer_url = f"https://{parsed_api_url.netloc}/" + headers = {'User-Agent': 'Mozilla/5.0', 'Referer': referer_url, 'Accept': '*/*'} - post_data = self.post - post_title = post_data.get('title', '') or 'untitled_post' + # Regex for finding links in HTML content + link_pattern = re.compile(r"""]*>(.*?)""", + re.IGNORECASE | re.DOTALL) + + # Extract post details + post_data = self.post + post_title = post_data.get('title', '') or 'untitled_post' post_id = post_data.get('id', 'unknown_id') - post_main_file_info = post_data.get('file') - post_attachments = post_data.get('attachments', []) - post_content_html = post_data.get('content', '') - - is_target_post_by_id = (self.target_post_id_from_initial_url is not None) and \ - (str(post_id) == str(self.target_post_id_from_initial_url)) + post_main_file_info = post_data.get('file') # Main file object for the post + post_attachments = post_data.get('attachments', []) # List of attachment objects + post_content_html = post_data.get('content', '') # HTML content of the post + # Log post processing start self.logger(f"\n--- Processing Post {post_id} ('{post_title[:50]}...') (Thread: {threading.current_thread().name}) ---") - # --- Skip Check 1: Post Title --- - if self.skip_words_list: - title_lower = post_title.lower() - if any(skip_word.lower() in title_lower for skip_word in self.skip_words_list): - matched_skip = next((sw for sw in self.skip_words_list if sw.lower() in title_lower), "unknown_skip_word") - self.logger(f" -> Skip Post (Title Keyword): Title contains '{matched_skip}'.") - # Estimate skipped files count (main file + attachments) - num_potential_files = len(post_attachments) + (1 if post_main_file_info else 0) - return 0, num_potential_files + num_potential_files = len(post_attachments or []) + (1 if post_main_file_info and post_main_file_info.get('path') else 0) - # --- Skip Check 2: Character Filter (Only if subfolders enabled and not a target post) --- - if self.filter_character_list and not is_target_post_by_id and self.use_subfolders: - matched_by_char_filter = any(is_title_match_for_character(post_title, char_filter) for char_filter in self.filter_character_list) - if not matched_by_char_filter: - self.logger(f" -> Filter Skip Post: Title ('{post_title[:50]}...') doesn't match character filters.") - num_potential_files = len(post_attachments) + (1 if post_main_file_info else 0) - return 0, num_potential_files + # --- Post-Level Skip Word Check (REMOVED for Manga Mode based on Title) --- + # Skip words are now ALWAYS checked at the file level based on FILENAME in _download_single_file. + # The old Manga Mode post-level skip based on title is removed. - if not isinstance(post_attachments, list): + # --- Post-Level Character Filter Check (Only for Manga Mode, based on Title) --- + # If Manga Mode is active and character filters are set, the post title MUST match one of them. + # This acts as a gate for processing files from this post in Manga Mode. + if not self.extract_links_only and self.manga_mode_active and self.filter_character_list: + if not any(is_title_match_for_character(post_title, char_name) for char_name in self.filter_character_list): + self.logger(f" -> Skip Post (Manga Mode - Title No Char Match): Title '{post_title[:50]}' doesn't match active character filters.") + return 0, num_potential_files # Skip all files in this post + + # Validate attachments structure + if not isinstance(post_attachments, list): self.logger(f"⚠️ Corrupt attachment data for post {post_id} (expected list, got {type(post_attachments)}). Skipping attachments.") post_attachments = [] - # --- Determine Potential Save Folders --- - base_save_folders = [] - if self.use_subfolders: - if is_target_post_by_id and self.custom_folder_name: - base_save_folders = [self.custom_folder_name] - self.logger(f" Folder: Using custom folder for target post: '{self.custom_folder_name}'") - elif self.filter_character_list: - matched_chars = [clean_folder_name(cf.lower()) for cf in self.filter_character_list if is_title_match_for_character(post_title, cf)] - if matched_chars: - base_save_folders = matched_chars - self.logger(f" Folder: Matched character filter(s): {', '.join(base_save_folders)}") - else: - # If character filter is active but no match, we already skipped the post above - # If no character filter, derive from title/known names - matched_from_title = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords) - base_save_folders = matched_from_title if matched_from_title else [extract_folder_name_from_title(post_title, self.unwanted_keywords)] - self.logger(f" Folder: No character filter match. Using derived: {', '.join(base_save_folders)}") - else: # No character filter active - matched_from_title = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords) - base_save_folders = matched_from_title if matched_from_title else [extract_folder_name_from_title(post_title, self.unwanted_keywords)] - self.logger(f" Folder: Using derived: {', '.join(base_save_folders)}") - else: # Subfolders disabled - base_save_folders = [""] - self.logger(" Folder: Subfolders disabled. Using root download directory.") + # --- Determine Base Save Folders --- + potential_base_save_folders = [] # List of base folder names (not full paths yet) + if not self.extract_links_only: # Folder logic only applies if not just extracting links + if self.use_subfolders: + if self.filter_character_list: # User specified character names for folders + if self.manga_mode_active: + # Manga Mode: Only consider character folders if post title matches that character + for char_filter_name in self.filter_character_list: + if is_title_match_for_character(post_title, char_filter_name): + cleaned_folder = clean_folder_name(char_filter_name) + if cleaned_folder: potential_base_save_folders.append(cleaned_folder) + # If in manga mode and title didn't match any char filter, this list will be empty. + # The post-level skip above should have already caught this. + else: # Normal Mode: Create folders for all specified character filters + for char_filter_name in self.filter_character_list: + cleaned_folder = clean_folder_name(char_filter_name) + if cleaned_folder: potential_base_save_folders.append(cleaned_folder) + + if potential_base_save_folders: + self.logger(f" Folder Target(s) (from Character Filter list): {', '.join(potential_base_save_folders)}") + elif self.filter_character_list: + self.logger(f" Note: Post {post_id} title did not match character filters for folder assignment (Manga Mode) or no valid char folders.") + + else: # No character filter list from UI, derive folders from title using known_names + derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords) + if derived_folders: + potential_base_save_folders.extend(derived_folders) + self.logger(f" Folder Target(s) (Derived from Title & Known Names): {', '.join(derived_folders)}") + else: # Fallback if no known_names match + fallback_folder = extract_folder_name_from_title(post_title, self.unwanted_keywords) + potential_base_save_folders.append(fallback_folder) + self.logger(f" Folder Target (Fallback from Title): {fallback_folder}") + + if not potential_base_save_folders: # If still no folders, use a generic one based on post title or default + potential_base_save_folders.append(clean_folder_name(post_title if post_title else "untitled_creator_content")) + self.logger(f" Folder Target (Final Fallback): {potential_base_save_folders[0]}") - if not base_save_folders: # Fallback if somehow no folders were determined - base_save_folders = [clean_folder_name(post_title) or 'untitled_post_fallback'] + else: # Not using subfolders, all files go to download_root + potential_base_save_folders = [""] # Represents the root download directory - # --- Skip Check 3: Potential Folder Name(s) --- - if self.skip_words_list and self.use_subfolders: # Only check folder names if subfolders are used - skip_post_due_to_folder = False - for folder_name in base_save_folders: - if not folder_name: continue # Skip check for root folder "" - folder_name_lower = folder_name.lower() - if any(skip_word.lower() in folder_name_lower for skip_word in self.skip_words_list): - matched_skip = next((sw for sw in self.skip_words_list if sw.lower() in folder_name_lower), "unknown_skip_word") - self.logger(f" -> Skip Post (Folder Keyword): Potential folder '{folder_name}' contains '{matched_skip}'.") - skip_post_due_to_folder = True - break # No need to check other folders for this post - if skip_post_due_to_folder: - num_potential_files = len(post_attachments) + (1 if post_main_file_info else 0) - return 0, num_potential_files - # --- End Folder Skip Check --- + # --- Post-Level Skip Words in Folder Name --- + # This applies if subfolders are used and a folder name itself contains a skip word. + if not self.extract_links_only and self.use_subfolders and self.skip_words_list: + for folder_name_to_check in potential_base_save_folders: + if not folder_name_to_check: continue # Skip root "" + if any(skip_word.lower() in folder_name_to_check.lower() for skip_word in self.skip_words_list): + matched_skip = next((sw for sw in self.skip_words_list if sw.lower() in folder_name_to_check.lower()), "unknown_skip_word") + self.logger(f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check}' contains '{matched_skip}'.") + return 0, num_potential_files - # --- External Link Processing (Can happen even if files are skipped later) --- + # --- Extract and Log External Links --- if (self.show_external_links or self.extract_links_only) and post_content_html: try: - found_links_with_text = link_pattern.findall(post_content_html) - if found_links_with_text: - unique_links_data = {} - for link_url, raw_link_text in found_links_with_text: - link_url = link_url.strip() - clean_link_text = re.sub(r'<.*?>', '', raw_link_text) - clean_link_text = html.unescape(clean_link_text) - clean_link_text = clean_link_text.strip() - if not any(ext in link_url.lower() for ext in ['.css', '.js', '.ico', '.xml', '.svg']) \ - and not link_url.startswith('javascript:'): - if link_url not in unique_links_data and clean_link_text: - unique_links_data[link_url] = clean_link_text - elif link_url not in unique_links_data: - unique_links_data[link_url] = "[Link]" - links_emitted_count = 0 - scraped_platforms = {'kemono', 'coomer', 'patreon'} - for link_url, link_text in unique_links_data.items(): - platform = get_link_platform(link_url) - if platform not in scraped_platforms: - if self.signals and hasattr(self.signals, 'external_link_signal'): - self.signals.external_link_signal.emit(post_title, link_text, link_url, platform) - links_emitted_count +=1 - if links_emitted_count > 0: self.logger(f" 🔗 Found {links_emitted_count} potential external link(s) in post content.") + unique_links_data = {} # Store unique URLs and their text + for match in link_pattern.finditer(post_content_html): + link_url = match.group(1).strip() + link_inner_text = match.group(2) # Raw inner HTML of the tag + + if not any(ext in link_url.lower() for ext in ['.css', '.js', '.ico', '.xml', '.svg']) \ + and not link_url.startswith('javascript:') \ + and link_url not in unique_links_data: + + clean_link_text = re.sub(r'<.*?>', '', link_inner_text) + clean_link_text = html.unescape(clean_link_text).strip() + + display_text = clean_link_text if clean_link_text else "[Link]" + unique_links_data[link_url] = display_text + + links_emitted_count = 0 + scraped_platforms = {'kemono', 'coomer', 'patreon'} + + for link_url, link_text in unique_links_data.items(): + platform = get_link_platform(link_url) + if platform not in scraped_platforms: + if self.signals and hasattr(self.signals, 'external_link_signal'): + self.signals.external_link_signal.emit(post_title, link_text, link_url, platform) + links_emitted_count +=1 + + if links_emitted_count > 0: self.logger(f" 🔗 Found {links_emitted_count} potential external link(s) in post content.") except Exception as e: self.logger(f"⚠️ Error parsing post content for links: {e}\n{traceback.format_exc(limit=2)}") - # --- End External Link Processing --- if self.extract_links_only: - self.logger(f" Extract Links Only mode: Skipping file download for post {post_id}.") + self.logger(f" Extract Links Only mode: Finished processing post {post_id} for links.") return 0, 0 - # --- Determine Final Save Paths (after folder name skip check passed) --- - final_save_paths_for_post = [] - for base_folder_name in base_save_folders: - current_path = os.path.join(self.download_root, base_folder_name) - if self.use_post_subfolders and self.use_subfolders: - cleaned_title_for_subfolder = clean_folder_name(post_title) - post_specific_subfolder = f"{post_id}_{cleaned_title_for_subfolder}" if cleaned_title_for_subfolder else f"{post_id}_untitled" - final_save_paths_for_post.append(os.path.join(current_path, post_specific_subfolder)) - else: - final_save_paths_for_post.append(current_path) - - if not final_save_paths_for_post: - # This case should be less likely now with the earlier folder determination, but keep as fallback - self.logger(f" CRITICAL ERROR: No valid folder paths determined for post {post_id}. Skipping."); return 0, 1 - - # --- Prepare File List --- + # --- Prepare List of Files to Download --- files_to_download_info_list = [] - api_file_domain = parsed_api_url.netloc + api_file_domain = urlparse(self.api_url_input).netloc + if not api_file_domain: + api_file_domain = "kemono.su" if "kemono" in self.service.lower() else "coomer.party" - if self.download_thumbnails: - self.logger(f" Thumbnail-only mode for Post {post_id}. (Functionality depends on API providing clear thumbnail links).") - # Logic to find thumbnail links would go here - if not files_to_download_info_list: - self.logger(f" -> No specific thumbnail links found for post {post_id} in thumbnail-only mode.") - return 0, 0 - else: + + if self.download_thumbnails: + self.logger(f" Thumbnail-only mode for Post {post_id}.") + if post_main_file_info and isinstance(post_main_file_info, dict) and post_main_file_info.get('path'): + if is_image(post_main_file_info.get('name')): + file_path = post_main_file_info['path'].lstrip('/') + original_api_name = post_main_file_info.get('name') or os.path.basename(file_path) + if original_api_name: + files_to_download_info_list.append({ + 'url': f"https://{api_file_domain}{file_path}" if file_path.startswith('/') else f"https://{api_file_domain}/data/{file_path}", + 'name': original_api_name, + '_original_name_for_log': original_api_name, + '_is_thumbnail': True + }) + for att_info in post_attachments: + if isinstance(att_info, dict) and att_info.get('path') and is_image(att_info.get('name')): + att_path = att_info['path'].lstrip('/') + original_api_att_name = att_info.get('name') or os.path.basename(att_path) + if original_api_att_name: + files_to_download_info_list.append({ + 'url': f"https://{api_file_domain}{att_path}" if att_path.startswith('/') else f"https://{api_file_domain}/data/{att_path}", + 'name': original_api_att_name, + '_original_name_for_log': original_api_att_name, + '_is_thumbnail': True + }) + if not files_to_download_info_list: + self.logger(f" -> No image thumbnails found for post {post_id} in thumbnail-only mode.") + return 0, 0 + else: # Normal download mode if post_main_file_info and isinstance(post_main_file_info, dict) and post_main_file_info.get('path'): file_path = post_main_file_info['path'].lstrip('/') - original_api_name = post_main_file_info.get('name') or os.path.basename(file_path) + original_api_name = post_main_file_info.get('name') or os.path.basename(file_path) if original_api_name: files_to_download_info_list.append({ 'url': f"https://{api_file_domain}{file_path}" if file_path.startswith('/') else f"https://{api_file_domain}/data/{file_path}", - 'name': original_api_name, - '_original_name_for_log': original_api_name, - '_is_thumbnail': False + 'name': original_api_name, + '_original_name_for_log': original_api_name, + '_is_thumbnail': False }) else: self.logger(f" ⚠️ Skipping main file for post {post_id}: Missing name (Path: {file_path})") - for idx, att_info in enumerate(post_attachments): if isinstance(att_info, dict) and att_info.get('path'): att_path = att_info['path'].lstrip('/') @@ -858,57 +1003,115 @@ class PostProcessorWorker: self.logger(f" Identified {len(files_to_download_info_list)} file(s) for potential download from post {post_id}.") - # --- Download Files (Skip Check 4: Original Filename happens inside _download_single_file) --- + # --- File Download Loop (using ThreadPoolExecutor for individual files) --- with ThreadPoolExecutor(max_workers=self.num_file_threads, thread_name_prefix=f'P{post_id}File_') as file_pool: futures_list = [] - for idx, file_info_to_dl in enumerate(files_to_download_info_list): + for file_idx, file_info_to_dl in enumerate(files_to_download_info_list): if self.check_cancel(): break - for save_location_path in final_save_paths_for_post: + + actual_target_full_paths_for_this_file = [] + + if self.use_subfolders: + if self.filter_character_list: + for char_name_from_filter_list in self.filter_character_list: + assign_to_this_char_folder = False + if self.manga_mode_active: + # Manga Mode: Folder assignment is based on post_title matching char_name_from_filter_list + # This check is somewhat redundant if the post-level title check passed, + # but ensures files from this post go into the matched character's folder. + if is_title_match_for_character(post_title, char_name_from_filter_list): + assign_to_this_char_folder = True + else: # Normal mode + if is_filename_match_for_character(file_info_to_dl.get('_original_name_for_log'), char_name_from_filter_list): + assign_to_this_char_folder = True + + if assign_to_this_char_folder: + base_char_folder_path = os.path.join(self.download_root, clean_folder_name(char_name_from_filter_list)) + if self.use_post_subfolders: + cleaned_title_for_subfolder = clean_folder_name(post_title) + post_specific_subfolder_name = f"{post_id}_{cleaned_title_for_subfolder}" if cleaned_title_for_subfolder else f"{post_id}_untitled" + actual_target_full_paths_for_this_file.append(os.path.join(base_char_folder_path, post_specific_subfolder_name)) + else: + actual_target_full_paths_for_this_file.append(base_char_folder_path) + + else: + for base_folder_name in potential_base_save_folders: + base_folder_path = os.path.join(self.download_root, base_folder_name) + if self.use_post_subfolders: + cleaned_title_for_subfolder = clean_folder_name(post_title) + post_specific_subfolder_name = f"{post_id}_{cleaned_title_for_subfolder}" if cleaned_title_for_subfolder else f"{post_id}_untitled" + actual_target_full_paths_for_this_file.append(os.path.join(base_folder_path, post_specific_subfolder_name)) + else: + actual_target_full_paths_for_this_file.append(base_folder_path) + else: + actual_target_full_paths_for_this_file = [self.download_root] + + if self.target_post_id_from_initial_url and self.custom_folder_name: + custom_full_path = os.path.join(self.download_root, self.custom_folder_name) + actual_target_full_paths_for_this_file = [custom_full_path] + # self.logger(f" Using custom folder for single post: {custom_full_path}") # Logged once is enough + + + if not actual_target_full_paths_for_this_file: + self.logger(f" -> File Skip (No Target Folder): '{file_info_to_dl.get('_original_name_for_log')}' for post '{post_title[:30]}'. No character folder match or other path error.") + total_skipped_this_post +=1 + continue + + for target_path in set(actual_target_full_paths_for_this_file): if self.check_cancel(): break futures_list.append(file_pool.submit( self._download_single_file, file_info_to_dl, - save_location_path, + target_path, headers, post_id, self.skip_current_file_flag, - post_title, - file_index_in_post=idx + post_title, + file_idx )) - - for future in as_completed(futures_list): if self.check_cancel(): break + + for future in as_completed(futures_list): + if self.check_cancel(): + for f_to_cancel in futures_list: + if not f_to_cancel.done(): + f_to_cancel.cancel() + break try: - dl_count, skip_count = future.result() + dl_count, skip_count = future.result() total_downloaded_this_post += dl_count total_skipped_this_post += skip_count except CancelledError: total_skipped_this_post += 1 - except Exception as exc_f: + except Exception as exc_f: self.logger(f"❌ File download task for post {post_id} resulted in error: {exc_f}") total_skipped_this_post += 1 - + if self.signals and hasattr(self.signals, 'file_progress_signal'): self.signals.file_progress_signal.emit("", 0, 0) - if self.check_cancel(): self.logger(f" Post {post_id} processing cancelled."); + if self.check_cancel(): self.logger(f" Post {post_id} processing interrupted/cancelled."); else: self.logger(f" Post {post_id} Summary: Downloaded={total_downloaded_this_post}, Skipped Files={total_skipped_this_post}") return total_downloaded_this_post, total_skipped_this_post class DownloadThread(QThread): - """Manages the overall download process (primarily for single-threaded GUI mode).""" - progress_signal = pyqtSignal(str) - add_character_prompt_signal = pyqtSignal(str) - file_download_status_signal = pyqtSignal(bool) - finished_signal = pyqtSignal(int, int, bool) - # MODIFIED: Added link_text argument - external_link_signal = pyqtSignal(str, str, str, str) # post_title, link_text, link_url, platform - file_progress_signal = pyqtSignal(str, int, int) + """ + Manages the overall download process. + Fetches posts using download_from_api and then processes each post using PostProcessorWorker. + This class is typically used when the GUI needs a separate thread for the entire download operation + (e.g., when not using the multi-threaded PostFetcher model from the main app). + """ + progress_signal = pyqtSignal(str) # For general log messages + add_character_prompt_signal = pyqtSignal(str) # To ask user to add character to known list + file_download_status_signal = pyqtSignal(bool) # True when a file download starts, False when it ends + finished_signal = pyqtSignal(int, int, bool) # (total_downloaded, total_skipped, was_cancelled) + external_link_signal = pyqtSignal(str, str, str, str) # (post_title, link_text, link_url, platform) + file_progress_signal = pyqtSignal(str, int, int) # (filename, downloaded_bytes, total_bytes) def __init__(self, api_url_input, output_dir, known_names_copy, - cancellation_event, + cancellation_event, # threading.Event() filter_character_list=None, filter_mode='all', skip_zip=True, skip_rar=True, use_subfolders=True, use_post_subfolders=False, custom_folder_name=None, compress_images=False, @@ -916,21 +1119,22 @@ class DownloadThread(QThread): downloaded_files=None, downloaded_file_hashes=None, downloaded_files_lock=None, downloaded_file_hashes_lock=None, skip_words_list=None, show_external_links=False, - num_file_threads_for_worker=1, - skip_current_file_flag=None, start_page=None, end_page=None, - target_post_id_from_initial_url=None, + extract_links_only=False, + num_file_threads_for_worker=1, # Threads per PostProcessorWorker instance + skip_current_file_flag=None, # threading.Event() to skip one file + start_page=None, end_page=None, + target_post_id_from_initial_url=None, # If the input URL was a specific post manga_mode_active=False, - unwanted_keywords=None + unwanted_keywords=None # Set of keywords to avoid in auto-generated folder names ): super().__init__() + # --- Store all passed arguments as instance attributes --- self.api_url_input = api_url_input self.output_dir = output_dir - self.known_names = list(known_names_copy) - self.cancellation_event = cancellation_event - self.skip_current_file_flag = skip_current_file_flag - - self.initial_target_post_id = target_post_id_from_initial_url - + self.known_names = list(known_names_copy) # Use a copy + self.cancellation_event = cancellation_event + self.skip_current_file_flag = skip_current_file_flag + self.initial_target_post_id = target_post_id_from_initial_url self.filter_character_list = filter_character_list if filter_character_list else [] self.filter_mode = filter_mode self.skip_zip = skip_zip @@ -943,26 +1147,26 @@ class DownloadThread(QThread): self.service = service self.user_id = user_id self.skip_words_list = skip_words_list if skip_words_list is not None else [] - + # Shared sets and locks for tracking downloads across potential multiple workers (if this thread spawns them) self.downloaded_files = downloaded_files if downloaded_files is not None else set() self.downloaded_files_lock = downloaded_files_lock if downloaded_files_lock is not None else threading.Lock() self.downloaded_file_hashes = downloaded_file_hashes if downloaded_file_hashes is not None else set() self.downloaded_file_hashes_lock = downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading.Lock() - - self._add_character_response = None - self.prompt_mutex = QMutex() - + + self._add_character_response = None # For handling synchronous prompt results + self.prompt_mutex = QMutex() # Mutex for _add_character_response + self.show_external_links = show_external_links + self.extract_links_only = extract_links_only self.num_file_threads_for_worker = num_file_threads_for_worker - self.start_page = start_page self.end_page = end_page - self.manga_mode_active = manga_mode_active - self.unwanted_keywords = unwanted_keywords if unwanted_keywords is not None else {'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'} + self.unwanted_keywords = unwanted_keywords if unwanted_keywords is not None else \ + {'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'} # Default unwanted keywords - - if self.compress_images and Image is None: + # Disable compression if Pillow is not available + if self.compress_images and Image is None: self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).") self.compress_images = False @@ -971,51 +1175,54 @@ class DownloadThread(QThread): self.progress_signal.emit(str(message)) def isInterruptionRequested(self): - """Overrides QThread's interruption check to also use the cancellation_event.""" + """Checks if Qt interruption or manual cancellation event is set.""" return super().isInterruptionRequested() or self.cancellation_event.is_set() def skip_file(self): - """Sets the skip_current_file_flag to skip the currently downloading file.""" + """Sets the flag to skip the currently processing file (if any).""" if self.isRunning() and self.skip_current_file_flag: self.logger("⏭️ Skip requested for current file (single-thread mode).") - self.skip_current_file_flag.set() - else: self.logger("ℹ️ Skip file: No download active or flag not set.") + self.skip_current_file_flag.set() # Signal the PostProcessorWorker + else: self.logger("ℹ️ Skip file: No download active or skip flag not available.") def run(self): - """Main execution loop for the download thread.""" + """Main execution logic for the download thread.""" grand_total_downloaded_files = 0 grand_total_skipped_files = 0 was_process_cancelled = False + # Create a signals object for PostProcessorWorker instances + # This allows PostProcessorWorker to emit signals that this DownloadThread can connect to. worker_signals_obj = PostProcessorSignals() try: + # Connect signals from the worker_signals_obj to this thread's signals + # This effectively forwards signals from PostProcessorWorker up to the GUI worker_signals_obj.progress_signal.connect(self.progress_signal) worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal) worker_signals_obj.file_progress_signal.connect(self.file_progress_signal) - # Connect the worker's external_link_signal to this thread's external_link_signal - # This ensures links found by the worker (even in single-thread mode) are emitted by this thread worker_signals_obj.external_link_signal.connect(self.external_link_signal) - self.logger(" Starting post fetch (single-threaded download process)...") + # Get the generator for fetching posts post_generator = download_from_api( self.api_url_input, - logger=self.logger, + logger=self.logger, # Pass this thread's logger start_page=self.start_page, end_page=self.end_page, manga_mode=self.manga_mode_active, cancellation_event=self.cancellation_event # Pass cancellation event ) - for posts_batch_data in post_generator: + for posts_batch_data in post_generator: # Iterate through batches of posts if self.isInterruptionRequested(): was_process_cancelled = True; break - for individual_post_data in posts_batch_data: + for individual_post_data in posts_batch_data: # Iterate through posts in a batch if self.isInterruptionRequested(): was_process_cancelled = True; break + # Create a PostProcessorWorker for each post post_processing_worker = PostProcessorWorker( post_data=individual_post_data, download_root=self.output_dir, - known_names=self.known_names, + known_names=self.known_names, # Pass copy filter_character_list=self.filter_character_list, unwanted_keywords=self.unwanted_keywords, filter_mode=self.filter_mode, @@ -1026,18 +1233,21 @@ class DownloadThread(QThread): compress_images=self.compress_images, download_thumbnails=self.download_thumbnails, service=self.service, user_id=self.user_id, api_url_input=self.api_url_input, - cancellation_event=self.cancellation_event, - signals=worker_signals_obj, # Pass the connected signals object - downloaded_files=self.downloaded_files, downloaded_file_hashes=self.downloaded_file_hashes, - downloaded_files_lock=self.downloaded_files_lock, downloaded_file_hashes_lock=self.downloaded_file_hashes_lock, + cancellation_event=self.cancellation_event, + signals=worker_signals_obj, # Pass the shared signals object + downloaded_files=self.downloaded_files, # Pass shared sets and locks + downloaded_file_hashes=self.downloaded_file_hashes, + downloaded_files_lock=self.downloaded_files_lock, + downloaded_file_hashes_lock=self.downloaded_file_hashes_lock, skip_words_list=self.skip_words_list, show_external_links=self.show_external_links, - extract_links_only=False, + extract_links_only=self.extract_links_only, num_file_threads=self.num_file_threads_for_worker, skip_current_file_flag=self.skip_current_file_flag, manga_mode_active=self.manga_mode_active ) try: + # Process the post (this will block until the worker is done with this post) dl_count, skip_count = post_processing_worker.process() grand_total_downloaded_files += dl_count grand_total_skipped_files += skip_count @@ -1045,36 +1255,44 @@ class DownloadThread(QThread): post_id_for_err = individual_post_data.get('id', 'N/A') self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}") traceback.print_exc() - grand_total_skipped_files += len(individual_post_data.get('attachments', [])) + (1 if individual_post_data.get('file') else 0) + # Estimate skipped files for this post if worker failed catastrophically + num_potential_files_est = len(individual_post_data.get('attachments', [])) + \ + (1 if individual_post_data.get('file') else 0) + grand_total_skipped_files += num_potential_files_est + # Clear the skip_current_file_flag if it was set and processed if self.skip_current_file_flag and self.skip_current_file_flag.is_set(): - self.skip_current_file_flag.clear() - self.logger(" Skip current file flag was processed and cleared.") + self.skip_current_file_flag.clear() + self.logger(" Skip current file flag was processed and cleared by DownloadThread.") - self.msleep(10) - if was_process_cancelled: break + self.msleep(10) # Small delay to allow GUI to update, if needed + if was_process_cancelled: break # Break from batch loop if cancelled if not was_process_cancelled: self.logger("✅ All posts processed or end of content reached.") except Exception as main_thread_err: self.logger(f"\n❌ Critical error within DownloadThread run loop: {main_thread_err}") traceback.print_exc() - if not self.isInterruptionRequested(): was_process_cancelled = False + # Ensure was_process_cancelled reflects the state if error wasn't due to user cancellation + if not self.isInterruptionRequested(): was_process_cancelled = False # Error, not user cancel finally: + # Clean up: Disconnect signals to avoid issues if the thread is somehow reused or objects persist try: - if worker_signals_obj: - # Disconnect signals + if worker_signals_obj: # Check if it was initialized worker_signals_obj.progress_signal.disconnect(self.progress_signal) worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal) worker_signals_obj.external_link_signal.disconnect(self.external_link_signal) worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal) - except (TypeError, RuntimeError) as e: self.logger(f"ℹ️ Note during signal disconnection: {e}") - + except (TypeError, RuntimeError) as e: # Catch if signals were already disconnected or other issues + self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}") + + # Emit the finished signal with totals and cancellation status self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, was_process_cancelled) def receive_add_character_result(self, result): - """Handles the response from a character add prompt (if GUI signals back to this thread).""" - with QMutexLocker(self.prompt_mutex): + """Slot to receive the result from a character add prompt shown in the main thread.""" + with QMutexLocker(self.prompt_mutex): # Ensure thread-safe access self._add_character_response = result - self.logger(f" (DownloadThread) Received character prompt response: {'Yes' if result else 'No'}") - + self.logger(f" (DownloadThread) Received character prompt response: {'Yes (added/confirmed)' if result else 'No (declined/failed)'}") + # This response might be used by logic within the thread if it was waiting for it, + # though typically prompts are handled by the main GUI thread. diff --git a/main.py b/main.py index 864994b..4627a5a 100644 --- a/main.py +++ b/main.py @@ -20,10 +20,10 @@ from PyQt5.QtGui import ( from PyQt5.QtWidgets import ( QApplication, QWidget, QLabel, QLineEdit, QTextEdit, QPushButton, QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget, - QRadioButton, QButtonGroup, QCheckBox, QSplitter, QSizePolicy + QRadioButton, QButtonGroup, QCheckBox, QSplitter, QSizePolicy, QDialog ) # Ensure QTimer is imported -from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject, QTimer +from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject, QTimer from urllib.parse import urlparse try: @@ -37,9 +37,9 @@ from io import BytesIO try: print("Attempting to import from downloader_utils...") # Assuming downloader_utils_link_text is the correct version - from downloader_utils import ( + from downloader_utils import ( KNOWN_NAMES, - clean_folder_name, + clean_folder_name, extract_post_info, download_from_api, PostProcessorSignals, @@ -66,11 +66,32 @@ except Exception as e: sys.exit(1) # --- End Import --- +# --- Import Tour Dialog --- +try: + from tour import TourDialog + print("Successfully imported TourDialog from tour.py.") +except ImportError as e: + print(f"--- TOUR IMPORT ERROR ---") + print(f"Failed to import TourDialog from 'tour.py': {e}") + print("Tour functionality will be unavailable.") + TourDialog = None # Fallback if tour.py is missing +except Exception as e: + print(f"--- UNEXPECTED TOUR IMPORT ERROR ---") + print(f"An unexpected error occurred during tour import: {e}") + traceback.print_exc() + TourDialog = None +# --- End Tour Import --- + + # --- Constants for Thread Limits --- MAX_THREADS = 200 # Absolute maximum allowed by the input validator RECOMMENDED_MAX_THREADS = 50 # Threshold for showing the informational warning # --- END --- +# --- ADDED: Prefix for HTML messages in main log --- +HTML_PREFIX = "" # Used to identify HTML lines for insertHtml +# --- END ADDED --- + class DownloaderApp(QWidget): character_prompt_response_signal = pyqtSignal(bool) log_signal = pyqtSignal(str) @@ -106,8 +127,10 @@ class DownloaderApp(QWidget): # --- For sequential delayed link display --- self.external_link_queue = deque() self._is_processing_external_link_queue = False + self._current_link_post_title = None # Track title for grouping + self.extracted_links_cache = [] # Store all links when in "Only Links" mode # --- END --- - + # --- For Log Verbosity --- self.basic_log_mode = False # Start with full log (basic_log_mode is False) self.log_verbosity_button = None @@ -118,8 +141,19 @@ class DownloaderApp(QWidget): self.log_splitter = None # This is the VERTICAL splitter for logs self.main_splitter = None # This will be the main HORIZONTAL splitter self.reset_button = None + self.progress_log_label = None # To change title + + # --- For Link Search --- + self.link_search_input = None + self.link_search_button = None + # --- END --- + + # --- For Export Links --- + self.export_links_button = None + # --- END --- self.manga_mode_checkbox = None + self.radio_only_links = None # Define radio button attribute self.load_known_names_from_util() self.setWindowTitle("Kemono Downloader v2.9 (Manga Mode - No Skip Button)") @@ -140,7 +174,7 @@ class DownloaderApp(QWidget): self.worker_signals.file_progress_signal.connect(self.update_file_progress_display) # Connect the external_link_signal from worker_signals to the queue handler if hasattr(self.worker_signals, 'external_link_signal'): - self.worker_signals.external_link_signal.connect(self.handle_external_link_signal) + self.worker_signals.external_link_signal.connect(self.handle_external_link_signal) # App's own signals (some of which might be emitted by DownloadThread which then connects to these handlers) self.log_signal.connect(self.handle_main_log) @@ -149,7 +183,7 @@ class DownloaderApp(QWidget): self.overall_progress_signal.connect(self.update_progress_display) self.finished_signal.connect(self.download_finished) # Connect the app's external_link_signal also to the queue handler - self.external_link_signal.connect(self.handle_external_link_signal) + self.external_link_signal.connect(self.handle_external_link_signal) self.file_progress_signal.connect(self.update_file_progress_display) @@ -162,13 +196,31 @@ class DownloaderApp(QWidget): self.use_multithreading_checkbox.toggled.connect(self._handle_multithreading_toggle) # --- END MODIFIED --- + # --- MODIFIED: Connect radio group toggle --- + if self.radio_group: + self.radio_group.buttonToggled.connect(self._handle_filter_mode_change) # Use buttonToggled for group signal + # --- END MODIFIED --- + if self.reset_button: self.reset_button.clicked.connect(self.reset_application_state) - + # Connect log verbosity button if it exists if self.log_verbosity_button: self.log_verbosity_button.clicked.connect(self.toggle_log_verbosity) + # --- ADDED: Connect link search elements --- + if self.link_search_button: + self.link_search_button.clicked.connect(self._filter_links_log) + if self.link_search_input: + self.link_search_input.returnPressed.connect(self._filter_links_log) + self.link_search_input.textChanged.connect(self._filter_links_log) # Real-time filtering + # --- END ADDED --- + + # --- ADDED: Connect export links button --- + if self.export_links_button: + self.export_links_button.clicked.connect(self._export_links_to_file) + # --- END ADDED --- + if self.manga_mode_checkbox: self.manga_mode_checkbox.toggled.connect(self.update_ui_for_manga_mode) self.link_input.textChanged.connect(lambda: self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)) @@ -328,14 +380,14 @@ class DownloaderApp(QWidget): custom_folder_layout.addWidget(self.custom_folder_input) self.custom_folder_widget.setVisible(False) # Initially hidden left_layout.addWidget(self.custom_folder_widget) - + # Character Filter Input self.character_filter_widget = QWidget() character_filter_layout = QVBoxLayout(self.character_filter_widget) character_filter_layout.setContentsMargins(0,5,0,0) self.character_label = QLabel("🎯 Filter by Character(s) (comma-separated):") self.character_input = QLineEdit() - self.character_input.setPlaceholderText("e.g., yor, makima, anya forger") + self.character_input.setPlaceholderText("e.g., yor, Tifa, Reyna") character_filter_layout.addWidget(self.character_label) character_filter_layout.addWidget(self.character_input) self.character_filter_widget.setVisible(True) # Visible by default @@ -347,7 +399,7 @@ class DownloaderApp(QWidget): self.skip_words_input.setPlaceholderText("e.g., WM, WIP, sketch, preview") left_layout.addWidget(self.skip_words_input) - # File Type Filter Radio Buttons + # --- MODIFIED: File Type Filter Radio Buttons --- file_filter_layout = QVBoxLayout() # Group label and radio buttons file_filter_layout.setContentsMargins(0,0,0,0) # Compact file_filter_layout.addWidget(QLabel("Filter Files:")) @@ -357,16 +409,20 @@ class DownloaderApp(QWidget): self.radio_all = QRadioButton("All") self.radio_images = QRadioButton("Images/GIFs") self.radio_videos = QRadioButton("Videos") + self.radio_only_links = QRadioButton("🔗 Only Links") # New button self.radio_all.setChecked(True) self.radio_group.addButton(self.radio_all) self.radio_group.addButton(self.radio_images) self.radio_group.addButton(self.radio_videos) + self.radio_group.addButton(self.radio_only_links) # Add to group radio_button_layout.addWidget(self.radio_all) radio_button_layout.addWidget(self.radio_images) radio_button_layout.addWidget(self.radio_videos) + radio_button_layout.addWidget(self.radio_only_links) # Add to layout radio_button_layout.addStretch(1) # Pushes buttons to left file_filter_layout.addLayout(radio_button_layout) left_layout.addLayout(file_filter_layout) + # --- END MODIFIED --- # Checkboxes Group checkboxes_group_layout = QVBoxLayout() @@ -390,7 +446,7 @@ class DownloaderApp(QWidget): row1_layout.addWidget(self.compress_images_checkbox) row1_layout.addStretch(1) # Pushes checkboxes to left checkboxes_group_layout.addLayout(row1_layout) - + # Advanced Settings Section advanced_settings_label = QLabel("⚙️ Advanced Settings:") checkboxes_group_layout.addWidget(advanced_settings_label) @@ -432,14 +488,14 @@ class DownloaderApp(QWidget): self.external_links_checkbox = QCheckBox("Show External Links in Log") self.external_links_checkbox.setChecked(False) advanced_row2_layout.addWidget(self.external_links_checkbox) - - self.manga_mode_checkbox = QCheckBox("Manga Mode") + + self.manga_mode_checkbox = QCheckBox("Manga/Comic Mode") self.manga_mode_checkbox.setToolTip("Process newest posts first, rename files based on post title (for creator feeds only).") self.manga_mode_checkbox.setChecked(False) advanced_row2_layout.addWidget(self.manga_mode_checkbox) advanced_row2_layout.addStretch(1) checkboxes_group_layout.addLayout(advanced_row2_layout) - + left_layout.addLayout(checkboxes_group_layout) # Download and Cancel Buttons @@ -465,7 +521,7 @@ class DownloaderApp(QWidget): known_chars_label_layout.addWidget(self.known_chars_label, 1) # Label takes more space known_chars_label_layout.addWidget(self.character_search_input) left_layout.addLayout(known_chars_label_layout) - + self.character_list = QListWidget() self.character_list.setSelectionMode(QListWidget.ExtendedSelection) # Allow multi-select for delete left_layout.addWidget(self.character_list, 1) # Takes remaining vertical space @@ -487,9 +543,25 @@ class DownloaderApp(QWidget): # --- Populate Right Panel (Logs) --- log_title_layout = QHBoxLayout() - log_title_layout.addWidget(QLabel("📜 Progress Log:")) + self.progress_log_label = QLabel("📜 Progress Log:") # Store label reference + log_title_layout.addWidget(self.progress_log_label) log_title_layout.addStretch(1) - + + # --- ADDED: Link Search Bar --- + self.link_search_input = QLineEdit() + self.link_search_input.setPlaceholderText("Search Links...") + self.link_search_input.setVisible(False) # Initially hidden + self.link_search_input.setFixedWidth(150) # Adjust width + log_title_layout.addWidget(self.link_search_input) + + self.link_search_button = QPushButton("🔍") + self.link_search_button.setToolTip("Filter displayed links") + self.link_search_button.setVisible(False) # Initially hidden + self.link_search_button.setFixedWidth(30) + self.link_search_button.setStyleSheet("padding: 4px 4px;") + log_title_layout.addWidget(self.link_search_button) + # --- END ADDED --- + # --- ADDED: Log Verbosity Button --- self.log_verbosity_button = QPushButton("Show Basic Log") # Default text self.log_verbosity_button.setToolTip("Toggle between full and basic log details.") @@ -497,7 +569,7 @@ class DownloaderApp(QWidget): self.log_verbosity_button.setStyleSheet("padding: 4px 8px;") log_title_layout.addWidget(self.log_verbosity_button) # --- END ADDED --- - + self.reset_button = QPushButton("🔄 Reset") self.reset_button.setToolTip("Reset all inputs and logs to default state (only when idle).") self.reset_button.setFixedWidth(80) @@ -511,8 +583,8 @@ class DownloaderApp(QWidget): # self.main_log_output.setMinimumWidth(450) # Remove minimum width self.main_log_output.setLineWrapMode(QTextEdit.NoWrap) # Disable line wrapping self.main_log_output.setStyleSheet(""" - QTextEdit { - background-color: #3C3F41; border: 1px solid #5A5A5A; padding: 5px; + QTextEdit { + background-color: #3C3F41; border: 1px solid #5A5A5A; padding: 5px; color: #F0F0F0; border-radius: 4px; font-family: Consolas, Courier New, monospace; font-size: 9.5pt; }""") self.external_log_output = QTextEdit() @@ -520,8 +592,8 @@ class DownloaderApp(QWidget): # self.external_log_output.setMinimumWidth(450) # Remove minimum width self.external_log_output.setLineWrapMode(QTextEdit.NoWrap) # Disable line wrapping self.external_log_output.setStyleSheet(""" - QTextEdit { - background-color: #3C3F41; border: 1px solid #5A5A5A; padding: 5px; + QTextEdit { + background-color: #3C3F41; border: 1px solid #5A5A5A; padding: 5px; color: #F0F0F0; border-radius: 4px; font-family: Consolas, Courier New, monospace; font-size: 9.5pt; }""") self.external_log_output.hide() # Initially hidden @@ -530,6 +602,19 @@ class DownloaderApp(QWidget): self.log_splitter.setSizes([self.height(), 0]) # Main log takes all space initially right_layout.addWidget(self.log_splitter, 1) # Log splitter takes available vertical space + # --- ADDED: Export Links Button --- + export_button_layout = QHBoxLayout() + export_button_layout.addStretch(1) # Push button to the right + self.export_links_button = QPushButton("Export Links") + self.export_links_button.setToolTip("Export all extracted links to a .txt file.") + self.export_links_button.setFixedWidth(100) + self.export_links_button.setStyleSheet("padding: 4px 8px; margin-top: 5px;") + self.export_links_button.setEnabled(False) # Initially disabled + self.export_links_button.setVisible(False) # Initially hidden + export_button_layout.addWidget(self.export_links_button) + right_layout.addLayout(export_button_layout) # Add to bottom of right panel + # --- END ADDED --- + self.progress_label = QLabel("Progress: Idle") self.progress_label.setStyleSheet("padding-top: 5px; font-style: italic;") right_layout.addWidget(self.progress_label) @@ -546,10 +631,10 @@ class DownloaderApp(QWidget): # --- Set initial sizes for the splitter --- # Calculate initial sizes (e.g., left 30%, right 70%) initial_width = self.width() # Use the initial window width - left_width = int(initial_width * 0.30) + left_width = int(initial_width * 0.30) right_width = initial_width - left_width self.main_splitter.setSizes([left_width, right_width]) - + # --- Set the main splitter as the central layout --- # Need a top-level layout to hold the splitter top_level_layout = QHBoxLayout(self) # Apply layout directly to the main widget (self) @@ -570,6 +655,7 @@ class DownloaderApp(QWidget): self.link_input.textChanged.connect(self.update_page_range_enabled_state) # Connect after init self.load_known_names_from_util() # Load names into the list widget self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked()) # Set initial state + self._handle_filter_mode_change(self.radio_group.checkedButton(), True) # Set initial filter mode UI state def get_dark_theme(self): @@ -587,7 +673,7 @@ class DownloaderApp(QWidget): QListWidget { alternate-background-color: #353535; border: 1px solid #5A5A5A; } QListWidget::item:selected { background-color: #007ACC; color: #FFFFFF; } QToolTip { background-color: #4A4A4A; color: #F0F0F0; border: 1px solid #6A6A6A; padding: 4px; border-radius: 3px; } - QSplitter::handle { background-color: #5A5A5A; width: 5px; /* Make handle slightly wider */ } + QSplitter::handle { background-color: #5A5A5A; width: 5px; /* Make handle slightly wider */ } QSplitter::handle:horizontal { width: 5px; } QSplitter::handle:vertical { height: 5px; } """ # Added styling for splitter handle @@ -599,10 +685,15 @@ class DownloaderApp(QWidget): self.dir_input.setText(folder) def handle_main_log(self, message): - # --- ADDED: Log Verbosity Filtering --- - if self.basic_log_mode: + # --- MODIFIED: Check for HTML_PREFIX --- + is_html_message = message.startswith(HTML_PREFIX) + + if is_html_message: + # If it's HTML, strip the prefix and use insertHtml + display_message = message[len(HTML_PREFIX):] + use_html = True + elif self.basic_log_mode: # Apply basic filtering only if NOT HTML # Define keywords/prefixes for messages to ALWAYS show in basic mode - # Make these lowercase for case-insensitive matching basic_keywords = [ '🚀 starting download', '🏁 download finished', '🏁 download cancelled', '❌', '⚠️', '✅ all posts processed', '✅ reached end of posts', @@ -613,20 +704,26 @@ class DownloaderApp(QWidget): 'duplicate name', 'potential name conflict', 'invalid filter name', 'no valid character filters' ] - # Check if the lowercase message contains any of the basic keywords/prefixes message_lower = message.lower() if not any(keyword in message_lower for keyword in basic_keywords): - # Allow specific positive confirmations even in basic mode if not message.strip().startswith("✅ Saved:") and \ not message.strip().startswith("✅ Added") and \ not message.strip().startswith("✅ Application reset complete"): return # Skip appending less important messages in basic mode - # --- END ADDED --- - + display_message = message # Use original message if it passes basic filter + use_html = False + else: # Full log mode and not HTML + display_message = message + use_html = False + # --- END MODIFIED --- + try: # Ensure message is a string and replace null characters that can crash QTextEdit - safe_message = str(message).replace('\x00', '[NULL]') - self.main_log_output.append(safe_message) + safe_message = str(display_message).replace('\x00', '[NULL]') + if use_html: + self.main_log_output.insertHtml(safe_message) # Use insertHtml for formatted titles + else: + self.main_log_output.append(safe_message) # Use append for plain text # Auto-scroll if near the bottom scrollbar = self.main_log_output.verticalScrollBar() if scrollbar.value() >= scrollbar.maximum() - 30: # Threshold for auto-scroll @@ -648,54 +745,92 @@ class DownloaderApp(QWidget): # MODIFIED: Slot now takes link_text as the second argument def handle_external_link_signal(self, post_title, link_text, link_url, platform): """Receives link signals, adds them to a queue, and triggers processing.""" - # We still receive post_title for potential future use, but use link_text for display - self.external_link_queue.append((link_text, link_url, platform)) + link_data = (post_title, link_text, link_url, platform) + self.external_link_queue.append(link_data) + # --- ADDED: Cache link if in "Only Links" mode --- + if self.radio_only_links and self.radio_only_links.isChecked(): + self.extracted_links_cache.append(link_data) + # --- END ADDED --- self._try_process_next_external_link() def _try_process_next_external_link(self): """Processes the next link from the queue if not already processing.""" - if self._is_processing_external_link_queue or \ - not self.external_link_queue or \ - not (self.show_external_links and self.external_log_output and self.external_log_output.isVisible()): - return + if self._is_processing_external_link_queue or not self.external_link_queue: + return # Don't process if busy or queue empty + + # Determine if we should display based on mode and checkbox state + is_only_links_mode = self.radio_only_links and self.radio_only_links.isChecked() + should_display_in_external = self.show_external_links and not is_only_links_mode + + # Only proceed if displaying in *either* log is currently possible/enabled + if not (is_only_links_mode or should_display_in_external): + # If neither log is active/visible for this link, still need to allow queue processing + self._is_processing_external_link_queue = False + if self.external_link_queue: + QTimer.singleShot(0, self._try_process_next_external_link) + return self._is_processing_external_link_queue = True - - # MODIFIED: Get link_text from queue - link_text, link_url, platform = self.external_link_queue.popleft() - self._append_link_to_external_log(link_text, link_url, platform) # Display this link now - - # --- MODIFIED: Conditional delay --- - if self._is_download_active(): - # Schedule the end of this link's "display period" with delay - delay_ms = random.randint(4000, 8000) # Random delay of 4-8 seconds - QTimer.singleShot(delay_ms, self._finish_current_link_processing) + + link_data = self.external_link_queue.popleft() + + # --- MODIFIED: Schedule the display AND the next step based on mode --- + if is_only_links_mode: + # Schedule with fixed 0.4s delay for "Only Links" mode + delay_ms = 80 # 0.08 seconds + QTimer.singleShot(delay_ms, lambda data=link_data: self._display_and_schedule_next(data)) + elif self._is_download_active(): + # Schedule with random delay for other modes during download + delay_ms = random.randint(4000, 8000) + QTimer.singleShot(delay_ms, lambda data=link_data: self._display_and_schedule_next(data)) else: - # No download active, process next link almost immediately - QTimer.singleShot(0, self._finish_current_link_processing) + # No download active in other modes, process immediately + QTimer.singleShot(0, lambda data=link_data: self._display_and_schedule_next(data)) # --- END MODIFIED --- - def _finish_current_link_processing(self): - """Called after a delay (or immediately if download finished); allows the next link in the queue to be processed.""" - self._is_processing_external_link_queue = False - self._try_process_next_external_link() # Attempt to process the next link + # --- NEW Method --- + def _display_and_schedule_next(self, link_data): + """Displays the link in the correct log and schedules the check for the next link.""" + post_title, link_text, link_url, platform = link_data # Unpack all data + is_only_links_mode = self.radio_only_links and self.radio_only_links.isChecked() - # MODIFIED: Method now takes link_text instead of title for display - def _append_link_to_external_log(self, link_text, link_url, platform): + # Format the link text part + max_link_text_len = 35 + display_text = link_text[:max_link_text_len].strip() + "..." if len(link_text) > max_link_text_len else link_text + formatted_link_info = f"{display_text} - {link_url} - {platform}" + separator = "-" * 45 + + if is_only_links_mode: + # Check if the post title has changed + if post_title != self._current_link_post_title: + # Emit separator and new title (formatted as HTML) + self.log_signal.emit(HTML_PREFIX + "
" + separator + "
") + # Use HTML for bold blue title + title_html = f'{post_title}
' + self.log_signal.emit(HTML_PREFIX + title_html) + self._current_link_post_title = post_title # Update current title + + # Emit the link info as plain text (handle_main_log will append it) + self.log_signal.emit(formatted_link_info) + + elif self.show_external_links: + # Append directly to external log (plain text) + self._append_to_external_log(formatted_link_info, separator) + + # Allow the next link to be processed + self._is_processing_external_link_queue = False + self._try_process_next_external_link() # Check queue again + # --- END NEW Method --- + + # --- RENAMED and MODIFIED: Appends ONLY to external log --- + def _append_to_external_log(self, formatted_link_text, separator): """Appends a single formatted link to the external_log_output widget.""" - if not (self.show_external_links and self.external_log_output and self.external_log_output.isVisible()): + # Visibility check is done before calling this now + if not (self.external_log_output and self.external_log_output.isVisible()): return - # Use link_text for display, truncate if necessary - max_link_text_len = 35 # Adjust as needed - display_text = link_text[:max_link_text_len].strip() + "..." if len(link_text) > max_link_text_len else link_text - - # Format the string as requested: text - url - platform - formatted_link_text = f"{display_text} - {link_url} - {platform}" - separator = "-" * 45 # Adjust length as needed - try: - self.external_log_output.append(separator) + self.external_log_output.append(separator) self.external_log_output.append(formatted_link_text) self.external_log_output.append("") # Add a blank line for spacing @@ -704,9 +839,10 @@ class DownloaderApp(QWidget): if scrollbar.value() >= scrollbar.maximum() - 50: # Adjust threshold if needed scrollbar.setValue(scrollbar.maximum()) except Exception as e: + # Log errors related to external log to the main log self.log_signal.emit(f"GUI External Log Append Error: {e}\nOriginal Message: {formatted_link_text}") print(f"GUI External Log Error (Append): {e}\nOriginal Message: {formatted_link_text}") - # --- END ADDED --- + # --- END MODIFIED --- def update_file_progress_display(self, filename, downloaded_bytes, total_bytes): @@ -715,8 +851,8 @@ class DownloaderApp(QWidget): return # MODIFIED: Truncate filename more aggressively (e.g., max 25 chars) - max_filename_len = 25 - display_filename = filename[:max_filename_len-3].strip() + "..." if len(filename) > max_filename_len else filename + max_filename_len = 25 + display_filename = filename[:max_filename_len-3].strip() + "..." if len(filename) > max_filename_len else filename if total_bytes > 0: downloaded_mb = downloaded_bytes / (1024 * 1024) @@ -725,7 +861,7 @@ class DownloaderApp(QWidget): else: # If total size is unknown downloaded_mb = downloaded_bytes / (1024 * 1024) progress_text = f"Downloading '{display_filename}' ({downloaded_mb:.1f}MB)" - + # Check if the resulting text might still be too long (heuristic) # This is a basic check, might need refinement based on typical log width if len(progress_text) > 75: # Example threshold, adjust as needed @@ -740,36 +876,190 @@ class DownloaderApp(QWidget): def update_external_links_setting(self, checked): + # This function is now primarily controlled by _handle_filter_mode_change + # when the "Only Links" mode is NOT selected. + is_only_links_mode = self.radio_only_links and self.radio_only_links.isChecked() + if is_only_links_mode: + # In "Only Links" mode, the external log is always hidden. + if self.external_log_output: self.external_log_output.hide() + if self.log_splitter: self.log_splitter.setSizes([self.height(), 0]) + return + + # Proceed only if NOT in "Only Links" mode self.show_external_links = checked if checked: - self.external_log_output.show() + if self.external_log_output: self.external_log_output.show() # Adjust splitter, give both logs some space - # Use the VERTICAL splitter for logs here - self.log_splitter.setSizes([self.height() // 2, self.height() // 2]) - self.main_log_output.setMinimumHeight(50) # Ensure it doesn't disappear - self.external_log_output.setMinimumHeight(50) + if self.log_splitter: self.log_splitter.setSizes([self.height() // 2, self.height() // 2]) + if self.main_log_output: self.main_log_output.setMinimumHeight(50) # Ensure it doesn't disappear + if self.external_log_output: self.external_log_output.setMinimumHeight(50) self.log_signal.emit("\n" + "="*40 + "\n🔗 External Links Log Enabled\n" + "="*40) - self.external_log_output.clear() # Clear previous content - self.external_log_output.append("🔗 External Links Found:") # Header - # --- ADDED: Try processing queue if log becomes visible --- + if self.external_log_output: + self.external_log_output.clear() # Clear previous content + self.external_log_output.append("🔗 External Links Found:") # Header + # Try processing queue if log becomes visible self._try_process_next_external_link() - # --- END ADDED --- else: - self.external_log_output.hide() - # Use the VERTICAL splitter for logs here - self.log_splitter.setSizes([self.height(), 0]) # Main log takes all space - self.main_log_output.setMinimumHeight(0) # Reset min height - self.external_log_output.setMinimumHeight(0) - self.external_log_output.clear() # Clear content when hidden + if self.external_log_output: self.external_log_output.hide() + # Adjust splitter + if self.log_splitter: self.log_splitter.setSizes([self.height(), 0]) # Main log takes all space + if self.main_log_output: self.main_log_output.setMinimumHeight(0) # Reset min height + if self.external_log_output: self.external_log_output.setMinimumHeight(0) + if self.external_log_output: self.external_log_output.clear() # Clear content when hidden self.log_signal.emit("\n" + "="*40 + "\n🔗 External Links Log Disabled\n" + "="*40) - # Optional: Clear queue when log is hidden? - # self.external_link_queue.clear() - # self._is_processing_external_link_queue = False + + # --- ADDED: Handler for filter mode radio buttons --- + def _handle_filter_mode_change(self, button, checked): + # button can be None during initial setup sometimes + if not button or not checked: + return + + filter_mode_text = button.text() + is_only_links = (filter_mode_text == "🔗 Only Links") + + # --- MODIFIED: Enable/disable widgets based on mode --- + file_options_enabled = not is_only_links + widgets_to_disable_in_links_mode = [ + self.dir_input, self.dir_button, # Download Location + self.skip_zip_checkbox, self.skip_rar_checkbox, + self.download_thumbnails_checkbox, self.compress_images_checkbox, + self.use_subfolders_checkbox, self.use_subfolder_per_post_checkbox, + self.character_filter_widget, # Includes label and input + self.skip_words_input, + self.custom_folder_widget # Includes label and input + ] + # --- END MODIFIED --- + for widget in widgets_to_disable_in_links_mode: + if widget: widget.setEnabled(file_options_enabled) + + # --- ADDED: Show/hide link search bar and export button --- + if self.link_search_input: self.link_search_input.setVisible(is_only_links) + if self.link_search_button: self.link_search_button.setVisible(is_only_links) + if self.export_links_button: + self.export_links_button.setVisible(is_only_links) + self.export_links_button.setEnabled(is_only_links and bool(self.extracted_links_cache)) # Enable if cache has items + if not is_only_links and self.link_search_input: self.link_search_input.clear() # Clear search when hiding + # --- END ADDED --- + + # Specific handling for "Only Links" mode vs others + if is_only_links: + self.progress_log_label.setText("📜 Extracted Links Log:") # Change title + # Ensure external log is hidden and main log takes full vertical space + if self.external_log_output: self.external_log_output.hide() + if self.log_splitter: self.log_splitter.setSizes([self.height(), 0]) + if self.main_log_output: self.main_log_output.setMinimumHeight(0) + if self.external_log_output: self.external_log_output.setMinimumHeight(0) + # Clear logs for the new mode + if self.main_log_output: self.main_log_output.clear() + if self.external_log_output: self.external_log_output.clear() + # External links checkbox is irrelevant in this mode, keep it enabled but ignored + if self.external_links_checkbox: self.external_links_checkbox.setEnabled(True) + self.log_signal.emit("="*20 + " Mode changed to: Only Links " + "="*20) + # Start processing links immediately for the main log display + self._filter_links_log() # Display initially filtered (all) links + self._try_process_next_external_link() # Start paced display + + else: # Other modes (All, Images, Videos) + self.progress_log_label.setText("📜 Progress Log:") # Restore title + if self.external_links_checkbox: + self.external_links_checkbox.setEnabled(True) # Ensure checkbox is enabled + # Restore log visibility based on checkbox state + self.update_external_links_setting(self.external_links_checkbox.isChecked()) + # Re-enable potentially disabled subfolder options if needed + self.update_ui_for_subfolders(self.use_subfolders_checkbox.isChecked()) + self.log_signal.emit(f"="*20 + f" Mode changed to: {filter_mode_text} " + "="*20) + + # --- END ADDED --- + + # --- ADDED: Method to filter links in "Only Links" mode --- + def _filter_links_log(self): + """Filters and displays links from the cache in the main log.""" + if not (self.radio_only_links and self.radio_only_links.isChecked()): + return # Only filter when in "Only Links" mode + + search_term = self.link_search_input.text().lower().strip() + self.main_log_output.clear() # Clear current display + + current_title_for_display = None # Track title for grouping in this filtered view + separator = "-" * 45 + + for post_title, link_text, link_url, platform in self.extracted_links_cache: + # Check if the search term matches any part of the link info + matches_search = ( + not search_term or + search_term in link_text.lower() or + search_term in link_url.lower() or + search_term in platform.lower() + ) + + if matches_search: + # Check if the post title has changed + if post_title != current_title_for_display: + # Append separator and new title (formatted as HTML) + self.main_log_output.insertHtml("
" + separator + "
") + title_html = f'{post_title}
' + self.main_log_output.insertHtml(title_html) + current_title_for_display = post_title # Update current title + + # Format and append the link info as plain text + max_link_text_len = 35 + display_text = link_text[:max_link_text_len].strip() + "..." if len(link_text) > max_link_text_len else link_text + formatted_link_info = f"{display_text} - {link_url} - {platform}" + self.main_log_output.append(formatted_link_info) + + # Add a final blank line if any links were displayed + if self.main_log_output.toPlainText().strip(): + self.main_log_output.append("") + + # Scroll to top after filtering + self.main_log_output.verticalScrollBar().setValue(0) + # --- END ADDED --- + + # --- ADDED: Method to export links --- + def _export_links_to_file(self): + if not (self.radio_only_links and self.radio_only_links.isChecked()): + QMessageBox.information(self, "Export Links", "Link export is only available in 'Only Links' mode.") + return + if not self.extracted_links_cache: + QMessageBox.information(self, "Export Links", "No links have been extracted yet.") + return + + default_filename = "extracted_links.txt" + filepath, _ = QFileDialog.getSaveFileName(self, "Save Links", default_filename, "Text Files (*.txt);;All Files (*)") + + if filepath: + try: + with open(filepath, 'w', encoding='utf-8') as f: + current_title_for_export = None + separator = "-" * 60 + "\n" # For file output + + for post_title, link_text, link_url, platform in self.extracted_links_cache: + if post_title != current_title_for_export: + if current_title_for_export is not None: # Add separator before new title, except for the first one + f.write("\n" + separator + "\n") + f.write(f"Post Title: {post_title}\n\n") + current_title_for_export = post_title + + f.write(f" {link_text} - {link_url} - {platform}\n") + + self.log_signal.emit(f"✅ Links successfully exported to: {filepath}") + QMessageBox.information(self, "Export Successful", f"Links exported to:\n{filepath}") + except Exception as e: + self.log_signal.emit(f"❌ Error exporting links: {e}") + QMessageBox.critical(self, "Export Error", f"Could not export links: {e}") + # --- END ADDED --- + def get_filter_mode(self): - if self.radio_images.isChecked(): return 'image' - if self.radio_videos.isChecked(): return 'video' - return 'all' # Default + # This method returns the simplified filter mode string for the backend + if self.radio_only_links and self.radio_only_links.isChecked(): + # When "Only Links" is checked, the backend doesn't filter by file type, + # but it does need a 'filter_mode'. 'all' is a safe default. + # The actual link extraction is controlled by the 'extract_links_only' flag. + return 'all' + elif self.radio_images.isChecked(): return 'image' + elif self.radio_videos.isChecked(): return 'video' + return 'all' # Default for "All" radio or if somehow no radio is checked. def add_new_character(self): global KNOWN_NAMES, clean_folder_name # Ensure clean_folder_name is accessible @@ -797,7 +1087,7 @@ class DownloaderApp(QWidget): if similar_names_details: first_similar_new, first_similar_existing = similar_names_details[0] - + # Determine shorter and longer for the example message shorter_name_for_msg, longer_name_for_msg = sorted( [first_similar_new, first_similar_existing], key=len @@ -873,26 +1163,30 @@ class DownloaderApp(QWidget): _, _, post_id = extract_post_info(url_text.strip()) # Show if it's a post URL AND subfolders are generally enabled should_show = bool(post_id) and self.use_subfolders_checkbox.isChecked() - self.custom_folder_widget.setVisible(should_show) - if not should_show: self.custom_folder_input.clear() # Clear if hidden + # --- MODIFIED: Also hide if in "Only Links" mode --- + is_only_links = self.radio_only_links and self.radio_only_links.isChecked() + self.custom_folder_widget.setVisible(should_show and not is_only_links) + # --- END MODIFIED --- + if not self.custom_folder_widget.isVisible(): self.custom_folder_input.clear() # Clear if hidden def update_ui_for_subfolders(self, checked): # Character filter input visibility depends on subfolder usage - self.character_filter_widget.setVisible(checked) + is_only_links = self.radio_only_links and self.radio_only_links.isChecked() + self.character_filter_widget.setVisible(checked and not is_only_links) # Hide if only links if not checked: self.character_input.clear() # Clear filter if hiding self.update_custom_folder_visibility() # Custom folder also depends on this # "Subfolder per Post" is only enabled if "Separate Folders" is also checked - self.use_subfolder_per_post_checkbox.setEnabled(checked) - if not checked: self.use_subfolder_per_post_checkbox.setChecked(False) # Uncheck if parent is disabled + self.use_subfolder_per_post_checkbox.setEnabled(checked and not is_only_links) # Disable if only links + if not checked or is_only_links: self.use_subfolder_per_post_checkbox.setChecked(False) # Uncheck if parent is disabled or only links def update_page_range_enabled_state(self): url_text = self.link_input.text().strip() service, user_id, post_id = extract_post_info(url_text) # Page range is for creator feeds (no post_id) is_creator_feed = service is not None and user_id is not None and post_id is None - + manga_mode_active = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False # Enable page range if it's a creator feed AND manga mode is NOT active enable_page_range = is_creator_feed and not manga_mode_active @@ -937,9 +1231,9 @@ class DownloaderApp(QWidget): num_threads = int(text) if num_threads > 0 : self.use_multithreading_checkbox.setText(f"Use Multithreading ({num_threads} Threads)") - else: + else: self.use_multithreading_checkbox.setText("Use Multithreading (Invalid: >0)") - except ValueError: + except ValueError: self.use_multithreading_checkbox.setText("Use Multithreading (Invalid Input)") else: self.use_multithreading_checkbox.setText("Use Multithreading (1 Thread)") # Show 1 thread when disabled @@ -982,7 +1276,6 @@ class DownloaderApp(QWidget): api_url = self.link_input.text().strip() output_dir = self.dir_input.text().strip() - filter_mode = self.get_filter_mode() skip_zip = self.skip_zip_checkbox.isChecked() skip_rar = self.skip_rar_checkbox.isChecked() use_subfolders = self.use_subfolders_checkbox.isChecked() @@ -995,20 +1288,32 @@ class DownloaderApp(QWidget): manga_mode_is_checked = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False + extract_links_only = (self.radio_only_links and self.radio_only_links.isChecked()) + + # --- MODIFICATION FOR FILTER MODE --- + # Get the simplified filter mode for the backend (e.g., 'image', 'video', 'all') + backend_filter_mode = self.get_filter_mode() + # Get the user-facing text of the selected radio button for logging purposes + user_selected_filter_text = self.radio_group.checkedButton().text() if self.radio_group.checkedButton() else "All" + # --- END MODIFICATION FOR FILTER MODE --- + + + if not api_url: + QMessageBox.critical(self, "Input Error", "URL is required."); return + if not extract_links_only and not output_dir: + QMessageBox.critical(self, "Input Error", "Download Directory is required when not in 'Only Links' mode."); return - if not api_url or not output_dir: - QMessageBox.critical(self, "Input Error", "URL and Download Directory are required."); return service, user_id, post_id_from_url = extract_post_info(api_url) - if not service or not user_id: # Basic validation of extracted info + if not service or not user_id: QMessageBox.critical(self, "Input Error", "Invalid or unsupported URL format."); return - if not os.path.isdir(output_dir): + if not extract_links_only and not os.path.isdir(output_dir): reply = QMessageBox.question(self, "Create Directory?", f"The directory '{output_dir}' does not exist.\nCreate it now?", QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) if reply == QMessageBox.Yes: try: - os.makedirs(output_dir, exist_ok=True) # exist_ok=True is safer + os.makedirs(output_dir, exist_ok=True) self.log_signal.emit(f"ℹ️ Created directory: {output_dir}") except Exception as e: QMessageBox.critical(self, "Directory Error", f"Could not create directory: {e}"); return @@ -1016,22 +1321,19 @@ class DownloaderApp(QWidget): self.log_signal.emit("❌ Download cancelled: Output directory does not exist and was not created.") return - if compress_images and Image is None: # Check for Pillow if compression is enabled + if compress_images and Image is None: QMessageBox.warning(self, "Missing Dependency", "Pillow library (for image compression) not found. Compression will be disabled.") - compress_images = False # Disable it for this run - self.compress_images_checkbox.setChecked(False) # Update UI + compress_images = False + self.compress_images_checkbox.setChecked(False) - manga_mode = manga_mode_is_checked and not post_id_from_url # Manga mode only for creator feeds + manga_mode = manga_mode_is_checked and not post_id_from_url num_threads_str = self.thread_count_input.text().strip() - num_threads = 1 # Default to 1 if multithreading is off or input is invalid - if use_multithreading: # Only parse if multithreading is enabled + num_threads = 1 + if use_multithreading: try: num_threads_requested = int(num_threads_str) - - # --- MODIFIED: Tiered Thread Count Warning/Cap --- if num_threads_requested > MAX_THREADS: - # Hard cap warning (above 200) warning_message = ( f"You have requested {num_threads_requested} threads, which is above the maximum limit of {MAX_THREADS}.\n\n" f"High thread counts can lead to instability or rate-limiting.\n\n" @@ -1039,33 +1341,30 @@ class DownloaderApp(QWidget): ) QMessageBox.warning(self, "High Thread Count Warning", warning_message) self.log_signal.emit(f"⚠️ High thread count requested ({num_threads_requested}). Capping at {MAX_THREADS}.") - num_threads = MAX_THREADS # Apply the cap - self.thread_count_input.setText(str(num_threads)) # Update UI to show capped value + num_threads = MAX_THREADS + self.thread_count_input.setText(str(num_threads)) elif num_threads_requested > RECOMMENDED_MAX_THREADS: - # Informational warning (above 50 but <= 200) QMessageBox.information(self, "High Thread Count Note", f"Using {num_threads_requested} threads (above {RECOMMENDED_MAX_THREADS}) may increase resource usage and risk rate-limiting from the site.\n\nProceeding with caution.") self.log_signal.emit(f"ℹ️ Using high thread count: {num_threads_requested}.") - num_threads = num_threads_requested # Use the requested value - elif num_threads_requested < 1: # Should be caught by validator, but safety check + num_threads = num_threads_requested + elif num_threads_requested < 1: self.log_signal.emit(f"⚠️ Invalid thread count ({num_threads_requested}). Using 1 thread.") num_threads = 1 self.thread_count_input.setText(str(num_threads)) else: - num_threads = num_threads_requested # Use the requested value if within limits - # --- END MODIFIED --- - + num_threads = num_threads_requested except ValueError: QMessageBox.critical(self, "Thread Count Error", "Invalid number of threads. Please enter a numeric value."); return else: - num_threads = 1 # Explicitly set to 1 if multithreading checkbox is off + num_threads = 1 start_page_str, end_page_str = self.start_page_input.text().strip(), self.end_page_input.text().strip() start_page, end_page = None, None is_creator_feed = bool(not post_id_from_url) - if is_creator_feed and not manga_mode: # Page range only for non-manga creator feeds + if is_creator_feed and not manga_mode: try: if start_page_str: start_page = int(start_page_str) if end_page_str: end_page = int(end_page_str) @@ -1075,13 +1374,13 @@ class DownloaderApp(QWidget): raise ValueError("Start page cannot be greater than end page.") except ValueError as e: QMessageBox.critical(self, "Page Range Error", f"Invalid page range: {e}"); return - elif manga_mode: # Manga mode processes all pages (reversed in downloader_utils) - start_page, end_page = None, None - - # --- ADDED: Clear link queue before starting new download --- + elif manga_mode: + start_page, end_page = None, None + self.external_link_queue.clear() + self.extracted_links_cache = [] self._is_processing_external_link_queue = False - # --- END ADDED --- + self._current_link_post_title = None raw_character_filters_text = self.character_input.text().strip() @@ -1090,54 +1389,51 @@ class DownloaderApp(QWidget): temp_list = [name.strip() for name in raw_character_filters_text.split(',') if name.strip()] if temp_list: parsed_character_list = temp_list - filter_character_list_to_pass = None # This will be passed to backend - if use_subfolders and parsed_character_list and not post_id_from_url: # Validate filters if used for subfolders + filter_character_list_to_pass = None + if use_subfolders and parsed_character_list and not post_id_from_url and not extract_links_only: self.log_signal.emit(f"ℹ️ Validating character filters for subfolder naming: {', '.join(parsed_character_list)}") valid_filters_for_backend = [] user_cancelled_validation = False for char_name in parsed_character_list: - cleaned_name_test = clean_folder_name(char_name) # Test if name is valid for folder + cleaned_name_test = clean_folder_name(char_name) if not cleaned_name_test: QMessageBox.warning(self, "Invalid Filter Name", f"Filter name '{char_name}' is invalid for a folder and will be skipped.") self.log_signal.emit(f"⚠️ Skipping invalid filter for folder: '{char_name}'") continue - # Prompt to add to known_names if not already there if char_name.lower() not in {kn.lower() for kn in KNOWN_NAMES}: reply = QMessageBox.question(self, "Add Filter Name to Known List?", f"The character filter '{char_name}' is not in your known names list (used for folder suggestions).\nAdd it now?", QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel, QMessageBox.Yes) if reply == QMessageBox.Yes: - self.new_char_input.setText(char_name) # Use existing add mechanism - if self.add_new_character(): # This now handles similarity checks too + self.new_char_input.setText(char_name) + if self.add_new_character(): self.log_signal.emit(f"✅ Added '{char_name}' to known names via filter prompt.") valid_filters_for_backend.append(char_name) - else: # add_new_character returned False (e.g., user chose "Change Name" or it failed) + else: self.log_signal.emit(f"⚠️ Failed to add '{char_name}' via filter prompt (or user opted out). It will still be used for filtering this session if valid.") - # Still add to backend list for current session if it's a valid folder name if cleaned_name_test: valid_filters_for_backend.append(char_name) elif reply == QMessageBox.Cancel: self.log_signal.emit(f"❌ Download cancelled by user during filter validation for '{char_name}'.") user_cancelled_validation = True; break - else: # User chose No + else: self.log_signal.emit(f"ℹ️ Proceeding with filter '{char_name}' for matching without adding to known list.") if cleaned_name_test: valid_filters_for_backend.append(char_name) - else: # Already in known names + else: if cleaned_name_test: valid_filters_for_backend.append(char_name) - - if user_cancelled_validation: return # Stop download if user cancelled + + if user_cancelled_validation: return if valid_filters_for_backend: filter_character_list_to_pass = valid_filters_for_backend self.log_signal.emit(f" Using validated character filters for subfolders: {', '.join(filter_character_list_to_pass)}") else: self.log_signal.emit("⚠️ No valid character filters remaining after validation for subfolder naming.") - elif parsed_character_list : # Filters provided, but not for subfolders (e.g. subfolders disabled) + elif parsed_character_list : filter_character_list_to_pass = parsed_character_list self.log_signal.emit(f"ℹ️ Character filters provided: {', '.join(filter_character_list_to_pass)} (Subfolder creation rules may differ).") - - # --- ADDED: Manga Mode Filter Warning --- - if manga_mode and not filter_character_list_to_pass: + + if manga_mode and not filter_character_list_to_pass and not extract_links_only: msg_box = QMessageBox(self) msg_box.setIcon(QMessageBox.Warning) msg_box.setWindowTitle("Manga Mode Filter Warning") @@ -1147,115 +1443,128 @@ class DownloaderApp(QWidget): "(as used by the creator on the site) into the filter field.\n\n" "Do you want to proceed without a filter (file names might be generic) or cancel?" ) - proceed_button = msg_box.addButton("Proceed Anyway", QMessageBox.AcceptRole) # YesRole/AcceptRole makes it default - cancel_button = msg_box.addButton("Cancel Download", QMessageBox.RejectRole) # NoRole/RejectRole for cancel - + proceed_button = msg_box.addButton("Proceed Anyway", QMessageBox.AcceptRole) + cancel_button = msg_box.addButton("Cancel Download", QMessageBox.RejectRole) + msg_box.exec_() if msg_box.clickedButton() == cancel_button: self.log_signal.emit("❌ Download cancelled by user due to Manga Mode filter warning.") - return # Stop the download process here + return else: self.log_signal.emit("⚠️ Proceeding with Manga Mode without a specific title filter.") - # --- END ADDED --- custom_folder_name_cleaned = None - if use_subfolders and post_id_from_url and self.custom_folder_widget.isVisible(): + if use_subfolders and post_id_from_url and self.custom_folder_widget.isVisible() and not extract_links_only: raw_custom_name = self.custom_folder_input.text().strip() if raw_custom_name: cleaned_custom = clean_folder_name(raw_custom_name) if cleaned_custom: custom_folder_name_cleaned = cleaned_custom else: self.log_signal.emit(f"⚠️ Invalid custom folder name ignored: '{raw_custom_name}'") - # Reset UI elements for new download self.main_log_output.clear() - if self.show_external_links: self.external_log_output.clear(); self.external_log_output.append("🔗 External Links Found:") # Changed title slightly + if extract_links_only: + self.main_log_output.append("🔗 Extracting Links...") + if self.external_log_output: self.external_log_output.clear() + elif self.show_external_links: + self.external_log_output.clear() + self.external_log_output.append("🔗 External Links Found:") self.file_progress_label.setText("") - self.cancellation_event.clear() # IMPORTANT: Clear cancellation from previous run + self.cancellation_event.clear() self.active_futures = [] self.total_posts_to_process = self.processed_posts_count = self.download_counter = self.skip_counter = 0 self.progress_label.setText("Progress: Initializing...") - # Log download parameters log_messages = [ - "="*40, f"🚀 Starting Download @ {time.strftime('%Y-%m-%d %H:%M:%S')}", - f" URL: {api_url}", f" Save Location: {output_dir}", - f" Mode: {'Single Post' if post_id_from_url else 'Creator Feed'}", + "="*40, f"🚀 Starting {'Link Extraction' if extract_links_only else 'Download'} @ {time.strftime('%Y-%m-%d %H:%M:%S')}", + f" URL: {api_url}", ] + if not extract_links_only: + log_messages.append(f" Save Location: {output_dir}") + + log_messages.append(f" Mode: {'Single Post' if post_id_from_url else 'Creator Feed'}") + if is_creator_feed: if manga_mode: log_messages.append(" Page Range: All (Manga Mode - Oldest Posts Processed First)") else: pr_log = "All" - if start_page or end_page: # Construct page range log string + if start_page or end_page: pr_log = f"{f'From {start_page} ' if start_page else ''}{'to ' if start_page and end_page else ''}{f'{end_page}' if end_page else (f'Up to {end_page}' if end_page else (f'From {start_page}' if start_page else 'Specific Range'))}".strip() log_messages.append(f" Page Range: {pr_log if pr_log else 'All'}") + if not extract_links_only: + log_messages.append(f" Subfolders: {'Enabled' if use_subfolders else 'Disabled'}") + if use_subfolders: + if custom_folder_name_cleaned: log_messages.append(f" Custom Folder (Post): '{custom_folder_name_cleaned}'") + elif filter_character_list_to_pass and not post_id_from_url: log_messages.append(f" Character Filters for Folders: {', '.join(filter_character_list_to_pass)}") + else: log_messages.append(f" Folder Naming: Automatic (based on title/known names)") + log_messages.append(f" Subfolder per Post: {'Enabled' if use_post_subfolders else 'Disabled'}") - log_messages.append(f" Subfolders: {'Enabled' if use_subfolders else 'Disabled'}") - if use_subfolders: - if custom_folder_name_cleaned: log_messages.append(f" Custom Folder (Post): '{custom_folder_name_cleaned}'") - elif filter_character_list_to_pass and not post_id_from_url: log_messages.append(f" Character Filters for Folders: {', '.join(filter_character_list_to_pass)}") - else: log_messages.append(f" Folder Naming: Automatic (based on title/known names)") - log_messages.append(f" Subfolder per Post: {'Enabled' if use_post_subfolders else 'Disabled'}") + log_messages.extend([ + # --- MODIFIED LOGGING FOR FILTER MODE --- + f" File Type Filter: {user_selected_filter_text} (Backend processing as: {backend_filter_mode})", + # --- END MODIFIED LOGGING --- + f" Skip Archives: {'.zip' if skip_zip else ''}{', ' if skip_zip and skip_rar else ''}{'.rar' if skip_rar else ''}{'None' if not (skip_zip or skip_rar) else ''}", + f" Skip Words (posts/files): {', '.join(skip_words_list) if skip_words_list else 'None'}", + f" Compress Images: {'Enabled' if compress_images else 'Disabled'}", + f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}", + ]) + else: + log_messages.append(f" Mode: Extracting Links Only") # This handles the "Only Links" case - log_messages.extend([ - f" File Type Filter: {filter_mode}", - f" Skip Archives: {'.zip' if skip_zip else ''}{', ' if skip_zip and skip_rar else ''}{'.rar' if skip_rar else ''}{'None' if not (skip_zip or skip_rar) else ''}", - f" Skip Words (posts/files): {', '.join(skip_words_list) if skip_words_list else 'None'}", - f" Compress Images: {'Enabled' if compress_images else 'Disabled'}", - f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}", - f" Show External Links: {'Enabled' if self.show_external_links else 'Disabled'}" - ]) + log_messages.append(f" Show External Links: {'Enabled' if self.show_external_links else 'Disabled'}") if manga_mode: log_messages.append(f" Manga Mode (File Renaming by Post Title): Enabled") - should_use_multithreading = use_multithreading and not post_id_from_url # Multi-threading for creator feeds + should_use_multithreading = use_multithreading and not post_id_from_url log_messages.append(f" Threading: {'Multi-threaded (posts)' if should_use_multithreading else 'Single-threaded (posts)'}") - if should_use_multithreading: log_messages.append(f" Number of Post Worker Threads: {num_threads}") # Use potentially capped value + if should_use_multithreading: log_messages.append(f" Number of Post Worker Threads: {num_threads}") log_messages.append("="*40) for msg in log_messages: self.log_signal.emit(msg) - self.set_ui_enabled(False) # Disable UI during download + self.set_ui_enabled(False) unwanted_keywords_for_folders = {'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'} - # Prepare arguments for worker threads/classes args_template = { 'api_url_input': api_url, - 'download_root': output_dir, # For PostProcessorWorker - 'output_dir': output_dir, # For DownloadThread __init__ - 'known_names': list(KNOWN_NAMES), # Pass a copy - 'known_names_copy': list(KNOWN_NAMES), # For DownloadThread __init__ + 'download_root': output_dir, + 'output_dir': output_dir, + 'known_names': list(KNOWN_NAMES), + 'known_names_copy': list(KNOWN_NAMES), 'filter_character_list': filter_character_list_to_pass, - 'filter_mode': filter_mode, 'skip_zip': skip_zip, 'skip_rar': skip_rar, + # --- MODIFIED: Pass the correct backend_filter_mode --- + 'filter_mode': backend_filter_mode, + # --- END MODIFICATION --- + 'skip_zip': skip_zip, 'skip_rar': skip_rar, 'use_subfolders': use_subfolders, 'use_post_subfolders': use_post_subfolders, 'compress_images': compress_images, 'download_thumbnails': download_thumbnails, 'service': service, 'user_id': user_id, - 'downloaded_files': self.downloaded_files, # Shared set - 'downloaded_files_lock': self.downloaded_files_lock, # Shared lock - 'downloaded_file_hashes': self.downloaded_file_hashes, # Shared set - 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, # Shared lock + 'downloaded_files': self.downloaded_files, + 'downloaded_files_lock': self.downloaded_files_lock, + 'downloaded_file_hashes': self.downloaded_file_hashes, + 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, 'skip_words_list': skip_words_list, 'show_external_links': self.show_external_links, - 'start_page': start_page, - 'end_page': end_page, + 'extract_links_only': extract_links_only, + 'start_page': start_page, + 'end_page': end_page, 'target_post_id_from_initial_url': post_id_from_url, 'custom_folder_name': custom_folder_name_cleaned, 'manga_mode_active': manga_mode, 'unwanted_keywords': unwanted_keywords_for_folders, - 'cancellation_event': self.cancellation_event, # Crucial for stopping threads - 'signals': self.worker_signals, # For multi-threaded PostProcessorWorker + 'cancellation_event': self.cancellation_event, + 'signals': self.worker_signals, } try: if should_use_multithreading: - self.log_signal.emit(f" Initializing multi-threaded download with {num_threads} post workers...") # Use potentially capped value - self.start_multi_threaded_download(num_post_workers=num_threads, **args_template) # Pass capped value - else: # Single post URL or multithreading disabled - self.log_signal.emit(" Initializing single-threaded download...") - # Keys expected by DownloadThread constructor + self.log_signal.emit(f" Initializing multi-threaded {'link extraction' if extract_links_only else 'download'} with {num_threads} post workers...") + self.start_multi_threaded_download(num_post_workers=num_threads, **args_template) + else: + self.log_signal.emit(f" Initializing single-threaded {'link extraction' if extract_links_only else 'download'}...") dt_expected_keys = [ 'api_url_input', 'output_dir', 'known_names_copy', 'cancellation_event', 'filter_character_list', 'filter_mode', 'skip_zip', 'skip_rar', @@ -1263,44 +1572,42 @@ class DownloaderApp(QWidget): 'compress_images', 'download_thumbnails', 'service', 'user_id', 'downloaded_files', 'downloaded_file_hashes', 'downloaded_files_lock', 'downloaded_file_hashes_lock', 'skip_words_list', 'show_external_links', + 'extract_links_only', 'num_file_threads_for_worker', 'skip_current_file_flag', 'start_page', 'end_page', 'target_post_id_from_initial_url', 'manga_mode_active', 'unwanted_keywords' ] - args_template['num_file_threads_for_worker'] = 1 # Single thread mode, worker uses 1 file thread - args_template['skip_current_file_flag'] = None # No skip flag initially + args_template['num_file_threads_for_worker'] = 1 + args_template['skip_current_file_flag'] = None single_thread_args = {} for key in dt_expected_keys: if key in args_template: single_thread_args[key] = args_template[key] - # Missing optional keys will use defaults in DownloadThread's __init__ self.start_single_threaded_download(**single_thread_args) except Exception as e: - self.log_signal.emit(f"❌ CRITICAL ERROR preparing download: {e}\n{traceback.format_exc()}") - QMessageBox.critical(self, "Start Error", f"Failed to start download:\n{e}") - self.download_finished(0,0,False) # Ensure UI is re-enabled + self.log_signal.emit(f"❌ CRITICAL ERROR preparing {'link extraction' if extract_links_only else 'download'}: {e}\n{traceback.format_exc()}") + QMessageBox.critical(self, "Start Error", f"Failed to start process:\n{e}") + self.download_finished(0,0,False) def start_single_threaded_download(self, **kwargs): global BackendDownloadThread try: - self.download_thread = BackendDownloadThread(**kwargs) # Pass all relevant args + self.download_thread = BackendDownloadThread(**kwargs) - # Connect signals from the DownloadThread instance if hasattr(self.download_thread, 'progress_signal'): self.download_thread.progress_signal.connect(self.handle_main_log) - if hasattr(self.download_thread, 'add_character_prompt_signal'): # Though less used by DownloadThread directly + if hasattr(self.download_thread, 'add_character_prompt_signal'): self.download_thread.add_character_prompt_signal.connect(self.add_character_prompt_signal) if hasattr(self.download_thread, 'finished_signal'): - self.download_thread.finished_signal.connect(self.finished_signal) # Connect to app's finished handler - if hasattr(self.download_thread, 'receive_add_character_result'): # For two-way prompt communication + self.download_thread.finished_signal.connect(self.finished_signal) + if hasattr(self.download_thread, 'receive_add_character_result'): self.character_prompt_response_signal.connect(self.download_thread.receive_add_character_result) - # MODIFIED: Connect external_link_signal to the new handler - if hasattr(self.download_thread, 'external_link_signal'): - self.download_thread.external_link_signal.connect(self.handle_external_link_signal) # Connect to queue handler + if hasattr(self.download_thread, 'external_link_signal'): + self.download_thread.external_link_signal.connect(self.handle_external_link_signal) if hasattr(self.download_thread, 'file_progress_signal'): self.download_thread.file_progress_signal.connect(self.update_file_progress_display) @@ -1309,81 +1616,78 @@ class DownloaderApp(QWidget): except Exception as e: self.log_signal.emit(f"❌ CRITICAL ERROR starting single-thread: {e}\n{traceback.format_exc()}") QMessageBox.critical(self, "Thread Start Error", f"Failed to start download process: {e}") - self.download_finished(0,0,False) # Cleanup + self.download_finished(0,0,False) def start_multi_threaded_download(self, num_post_workers, **kwargs): - global PostProcessorWorker # Ensure it's the correct worker class + global PostProcessorWorker self.thread_pool = ThreadPoolExecutor(max_workers=num_post_workers, thread_name_prefix='PostWorker_') - self.active_futures = [] # Reset list of active futures + self.active_futures = [] self.processed_posts_count = 0 - self.total_posts_to_process = 0 # Will be updated by _fetch_and_queue_posts + self.total_posts_to_process = 0 self.download_counter = 0 self.skip_counter = 0 - # Start a separate thread to fetch post data and submit tasks to the pool - # This prevents the GUI from freezing during the initial API calls for post lists fetcher_thread = threading.Thread( target=self._fetch_and_queue_posts, - args=(kwargs['api_url_input'], kwargs, num_post_workers), - daemon=True, name="PostFetcher" # Daemon thread will exit when app exits + args=(kwargs['api_url_input'], kwargs, num_post_workers), + daemon=True, name="PostFetcher" ) fetcher_thread.start() self.log_signal.emit(f"✅ Post fetcher thread started. {num_post_workers} post worker threads initializing...") def _fetch_and_queue_posts(self, api_url_input_for_fetcher, worker_args_template, num_post_workers): - global PostProcessorWorker, download_from_api # Ensure correct references + global PostProcessorWorker, download_from_api all_posts_data = [] fetch_error_occurred = False manga_mode_active_for_fetch = worker_args_template.get('manga_mode_active', False) - signals_for_worker = worker_args_template.get('signals') # This is self.worker_signals - if not signals_for_worker: # Should always be present + signals_for_worker = worker_args_template.get('signals') + if not signals_for_worker: self.log_signal.emit("❌ CRITICAL ERROR: Signals object missing for worker in _fetch_and_queue_posts.") - self.finished_signal.emit(0,0,True) # Signal failure + self.finished_signal.emit(0,0,True) return try: self.log_signal.emit(" Fetching post data from API...") post_generator = download_from_api( api_url_input_for_fetcher, - logger=lambda msg: self.log_signal.emit(f"[Fetcher] {msg}"), # Prefix fetcher logs - start_page=worker_args_template.get('start_page'), + logger=lambda msg: self.log_signal.emit(f"[Fetcher] {msg}"), + start_page=worker_args_template.get('start_page'), end_page=worker_args_template.get('end_page'), manga_mode=manga_mode_active_for_fetch, - cancellation_event=self.cancellation_event # Pass cancellation event + cancellation_event=self.cancellation_event ) for posts_batch in post_generator: - if self.cancellation_event.is_set(): # Check cancellation frequently + if self.cancellation_event.is_set(): fetch_error_occurred = True; self.log_signal.emit(" Post fetching cancelled by user."); break if isinstance(posts_batch, list): all_posts_data.extend(posts_batch) - self.total_posts_to_process = len(all_posts_data) # Update total - # Log progress periodically for large feeds + self.total_posts_to_process = len(all_posts_data) if self.total_posts_to_process > 0 and self.total_posts_to_process % 100 == 0 : self.log_signal.emit(f" Fetched {self.total_posts_to_process} posts so far...") - else: # Should not happen if download_from_api is correct + else: fetch_error_occurred = True self.log_signal.emit(f"❌ API fetcher returned non-list type: {type(posts_batch)}"); break if not fetch_error_occurred and not self.cancellation_event.is_set(): self.log_signal.emit(f"✅ Post fetching complete. Total posts to process: {self.total_posts_to_process}") - except TypeError as te: # Catch common error if downloader_utils is outdated + except TypeError as te: self.log_signal.emit(f"❌ TypeError calling download_from_api: {te}") self.log_signal.emit(" Check if 'downloader_utils.py' has the correct 'download_from_api' signature (including 'manga_mode' and 'cancellation_event').") self.log_signal.emit(traceback.format_exc(limit=2)) fetch_error_occurred = True - except RuntimeError as re: # Catch cancellation from fetch_posts_paginated + except RuntimeError as re: self.log_signal.emit(f"ℹ️ Post fetching runtime error (likely cancellation): {re}") - fetch_error_occurred = True # Treat as an error for cleanup + fetch_error_occurred = True except Exception as e: self.log_signal.emit(f"❌ Error during post fetching: {e}\n{traceback.format_exc(limit=2)}") fetch_error_occurred = True if self.cancellation_event.is_set() or fetch_error_occurred: self.finished_signal.emit(self.download_counter, self.skip_counter, self.cancellation_event.is_set()) - if self.thread_pool: # Ensure pool is shutdown if fetch fails or is cancelled + if self.thread_pool: self.thread_pool.shutdown(wait=False, cancel_futures=True); self.thread_pool = None return @@ -1392,12 +1696,11 @@ class DownloaderApp(QWidget): self.finished_signal.emit(0,0,False); return self.log_signal.emit(f" Submitting {self.total_posts_to_process} post processing tasks to thread pool...") - self.processed_posts_count = 0 # Reset for this run - self.overall_progress_signal.emit(self.total_posts_to_process, 0) # Initial progress update + self.processed_posts_count = 0 + self.overall_progress_signal.emit(self.total_posts_to_process, 0) - num_file_dl_threads = 4 # Default for PostProcessorWorker's internal pool + num_file_dl_threads = 4 - # Define keys PostProcessorWorker expects (ensure this matches its __init__) ppw_expected_keys = [ 'post_data', 'download_root', 'known_names', 'filter_character_list', 'unwanted_keywords', 'filter_mode', 'skip_zip', 'skip_rar', @@ -1409,7 +1712,6 @@ class DownloaderApp(QWidget): 'extract_links_only', 'num_file_threads', 'skip_current_file_flag', 'manga_mode_active' ] - # Optional keys with defaults in PostProcessorWorker's __init__ ppw_optional_keys_with_defaults = { 'skip_words_list', 'show_external_links', 'extract_links_only', 'num_file_threads', 'skip_current_file_flag', 'manga_mode_active' @@ -1417,52 +1719,51 @@ class DownloaderApp(QWidget): for post_data_item in all_posts_data: - if self.cancellation_event.is_set(): break # Check before submitting each task - if not isinstance(post_data_item, dict): # Basic sanity check + if self.cancellation_event.is_set(): break + if not isinstance(post_data_item, dict): self.log_signal.emit(f"⚠️ Skipping invalid post data item (not a dict): {type(post_data_item)}") - self.processed_posts_count += 1 # Count as processed/skipped + self.processed_posts_count += 1 continue - # Build args for PostProcessorWorker instance worker_init_args = {} missing_keys = [] for key in ppw_expected_keys: if key == 'post_data': worker_init_args[key] = post_data_item elif key == 'num_file_threads': worker_init_args[key] = num_file_dl_threads - elif key == 'signals': worker_init_args[key] = signals_for_worker # Use the app's worker_signals + elif key == 'signals': worker_init_args[key] = signals_for_worker elif key in worker_args_template: worker_init_args[key] = worker_args_template[key] - elif key in ppw_optional_keys_with_defaults: pass # Let worker use its default - else: missing_keys.append(key) # Required key is missing + elif key in ppw_optional_keys_with_defaults: pass + else: missing_keys.append(key) if missing_keys: self.log_signal.emit(f"❌ CRITICAL ERROR: Missing expected keys for PostProcessorWorker: {', '.join(missing_keys)}") - self.cancellation_event.set() # Stop all processing + self.cancellation_event.set() break try: worker_instance = PostProcessorWorker(**worker_init_args) - if self.thread_pool: # Ensure pool is still active + if self.thread_pool: future = self.thread_pool.submit(worker_instance.process) - future.add_done_callback(self._handle_future_result) # Handle result/exception + future.add_done_callback(self._handle_future_result) self.active_futures.append(future) - else: # Pool might have been shut down due to earlier error/cancellation + else: self.log_signal.emit("⚠️ Thread pool not available. Cannot submit more tasks.") break - except TypeError as te: # Error creating worker (e.g. wrong args) + except TypeError as te: self.log_signal.emit(f"❌ TypeError creating PostProcessorWorker: {te}") passed_keys_str = ", ".join(sorted(worker_init_args.keys())) self.log_signal.emit(f" Passed Args: [{passed_keys_str}]") self.log_signal.emit(traceback.format_exc(limit=5)) - self.cancellation_event.set(); break # Stop all - except RuntimeError: # Pool might be shutting down + self.cancellation_event.set(); break + except RuntimeError: self.log_signal.emit("⚠️ Runtime error submitting task (pool likely shutting down)."); break - except Exception as e: # Other errors during submission + except Exception as e: self.log_signal.emit(f"❌ Error submitting post {post_data_item.get('id','N/A')} to worker: {e}"); break if not self.cancellation_event.is_set(): self.log_signal.emit(f" {len(self.active_futures)} post processing tasks submitted to pool.") - else: # If cancelled during submission loop + else: self.finished_signal.emit(self.download_counter, self.skip_counter, True) if self.thread_pool: self.thread_pool.shutdown(wait=False, cancel_futures=True); self.thread_pool = None @@ -1475,161 +1776,125 @@ class DownloaderApp(QWidget): try: if future.cancelled(): self.log_signal.emit(" A post processing task was cancelled.") - # If a task was cancelled, it implies we might want to count its potential files as skipped - # This is hard to determine without knowing the post_data it was handling. - # For simplicity, we don't add to skip_counter here unless future.result() would have. elif future.exception(): worker_exception = future.exception() self.log_signal.emit(f"❌ Post processing worker error: {worker_exception}") - # Similar to cancelled, hard to know how many files were skipped due to error. else: # Success downloaded_files_from_future, skipped_files_from_future = future.result() - # Lock for updating shared counters - with self.downloaded_files_lock: # Using this lock for these counters too + with self.downloaded_files_lock: self.download_counter += downloaded_files_from_future self.skip_counter += skipped_files_from_future self.overall_progress_signal.emit(self.total_posts_to_process, self.processed_posts_count) - except Exception as e: # Error in this callback itself + except Exception as e: self.log_signal.emit(f"❌ Error in _handle_future_result callback: {e}\n{traceback.format_exc(limit=2)}") - # Check if all tasks are done if self.total_posts_to_process > 0 and self.processed_posts_count >= self.total_posts_to_process: - # More robust check: ensure all submitted futures are actually done all_done = all(f.done() for f in self.active_futures) if all_done: - QApplication.processEvents() # Process any pending GUI events + QApplication.processEvents() self.log_signal.emit("🏁 All submitted post tasks have completed or failed.") self.finished_signal.emit(self.download_counter, self.skip_counter, self.cancellation_event.is_set()) def set_ui_enabled(self, enabled): - # List of widgets to toggle enabled state widgets_to_toggle = [ - self.download_btn, self.link_input, self.dir_input, self.dir_button, - self.radio_all, self.radio_images, self.radio_videos, + self.download_btn, self.link_input, + self.radio_all, self.radio_images, self.radio_videos, self.radio_only_links, self.skip_zip_checkbox, self.skip_rar_checkbox, self.use_subfolders_checkbox, self.compress_images_checkbox, self.download_thumbnails_checkbox, self.use_multithreading_checkbox, self.skip_words_input, self.character_search_input, self.new_char_input, - self.add_char_button, self.delete_char_button, - # self.external_links_checkbox, # Keep this enabled + self.add_char_button, self.delete_char_button, self.start_page_input, self.end_page_input, self.page_range_label, self.to_label, self.character_input, self.custom_folder_input, self.custom_folder_label, - self.reset_button, - # self.log_verbosity_button, # Keep this enabled + self.reset_button, self.manga_mode_checkbox ] for widget in widgets_to_toggle: - if widget: # Check if widget exists + if widget: widget.setEnabled(enabled) - - # --- Explicitly keep these enabled --- + if self.external_links_checkbox: - self.external_links_checkbox.setEnabled(True) + is_only_links = self.radio_only_links and self.radio_only_links.isChecked() + self.external_links_checkbox.setEnabled(not is_only_links) if self.log_verbosity_button: self.log_verbosity_button.setEnabled(True) - # --- END --- - # --- MODIFIED: Handle thread count input based on checkbox state --- + multithreading_currently_on = self.use_multithreading_checkbox.isChecked() self.thread_count_input.setEnabled(enabled and multithreading_currently_on) self.thread_count_label.setEnabled(enabled and multithreading_currently_on) - # --- END MODIFIED --- - # Handle other dependent widgets + subfolders_currently_on = self.use_subfolders_checkbox.isChecked() self.use_subfolder_per_post_checkbox.setEnabled(enabled and subfolders_currently_on) - self.cancel_btn.setEnabled(not enabled) # Cancel is enabled when download is running + self.cancel_btn.setEnabled(not enabled) - if enabled: # When re-enabling UI, refresh dependent states - self.update_ui_for_subfolders(subfolders_currently_on) - self.update_custom_folder_visibility() - self.update_page_range_enabled_state() - if self.manga_mode_checkbox: - self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked()) - self._handle_multithreading_toggle(multithreading_currently_on) # Refresh thread count state + if enabled: + self._handle_filter_mode_change(self.radio_group.checkedButton(), True) + self._handle_multithreading_toggle(multithreading_currently_on) def cancel_download(self): - if not self.cancel_btn.isEnabled() and not self.cancellation_event.is_set(): # Avoid multiple cancel calls + if not self.cancel_btn.isEnabled() and not self.cancellation_event.is_set(): self.log_signal.emit("ℹ️ No active download to cancel or already cancelling.") return self.log_signal.emit("⚠️ Requesting cancellation of download process...") - self.cancellation_event.set() # Signal all threads/workers to stop + self.cancellation_event.set() if self.download_thread and self.download_thread.isRunning(): - # For QThread, requestInterruption() is a polite request. - # The thread's run() loop must check isInterruptionRequested() or self.cancellation_event. - self.download_thread.requestInterruption() + self.download_thread.requestInterruption() self.log_signal.emit(" Signaled single download thread to interrupt.") - # --- MODIFICATION START: Initiate thread pool shutdown immediately --- if self.thread_pool: self.log_signal.emit(" Initiating immediate shutdown and cancellation of worker pool tasks...") - # Start shutdown non-blockingly, attempting to cancel futures self.thread_pool.shutdown(wait=False, cancel_futures=True) - # --- MODIFICATION END --- - - # --- ADDED: Clear link queue on cancel --- - self.external_link_queue.clear() - self._is_processing_external_link_queue = False - # --- END ADDED --- - self.cancel_btn.setEnabled(False) # Disable cancel button after initiating cancellation + self.external_link_queue.clear() + self._is_processing_external_link_queue = False + self._current_link_post_title = None + + self.cancel_btn.setEnabled(False) self.progress_label.setText("Progress: Cancelling...") self.file_progress_label.setText("") - # The download_finished method will be called eventually when threads finally exit. def download_finished(self, total_downloaded, total_skipped, cancelled_by_user): - # This method is the final cleanup point, called by DownloadThread or _handle_future_result status_message = "Cancelled by user" if cancelled_by_user else "Finished" self.log_signal.emit("="*40 + f"\n🏁 Download {status_message}!\n Summary: Downloaded Files={total_downloaded}, Skipped Files={total_skipped}\n" + "="*40) self.progress_label.setText(f"{status_message}: {total_downloaded} downloaded, {total_skipped} skipped.") - self.file_progress_label.setText("") # Clear file progress - - # --- ADDED: Attempt to process any remaining links in queue if not cancelled --- - # This will now trigger the rapid display because _is_download_active() will be false + self.file_progress_label.setText("") + if not cancelled_by_user: self._try_process_next_external_link() - # --- END ADDED --- - # Disconnect signals from single download thread if it was used if self.download_thread: - try: + try: if hasattr(self.download_thread, 'progress_signal'): self.download_thread.progress_signal.disconnect(self.handle_main_log) if hasattr(self.download_thread, 'add_character_prompt_signal'): self.download_thread.add_character_prompt_signal.disconnect(self.add_character_prompt_signal) if hasattr(self.download_thread, 'finished_signal'): self.download_thread.finished_signal.disconnect(self.finished_signal) if hasattr(self.download_thread, 'receive_add_character_result'): self.character_prompt_response_signal.disconnect(self.download_thread.receive_add_character_result) - # MODIFIED: Ensure disconnection from the correct handler if hasattr(self.download_thread, 'external_link_signal'): self.download_thread.external_link_signal.disconnect(self.handle_external_link_signal) if hasattr(self.download_thread, 'file_progress_signal'): self.download_thread.file_progress_signal.disconnect(self.update_file_progress_display) - except (TypeError, RuntimeError) as e: + except (TypeError, RuntimeError) as e: self.log_signal.emit(f"ℹ️ Note during single-thread signal disconnection: {e}") - self.download_thread = None # Clear reference + self.download_thread = None - # Shutdown thread pool if it exists and hasn't been cleared yet - # Use wait=True here to ensure cleanup before UI re-enables if self.thread_pool: self.log_signal.emit(" Ensuring worker thread pool is shut down...") - # Shutdown might have been initiated by cancel_download, but wait=True ensures completion. - self.thread_pool.shutdown(wait=True, cancel_futures=True) + self.thread_pool.shutdown(wait=True, cancel_futures=True) self.thread_pool = None - self.active_futures = [] # Clear list of futures + self.active_futures = [] - # Clear cancellation event here AFTER threads have likely stopped checking it - # self.cancellation_event.clear() - # Let's clear it in start_download and reset_application_state instead for safety. - self.set_ui_enabled(True) # Re-enable UI - self.cancel_btn.setEnabled(False) # Disable cancel button + self.set_ui_enabled(True) + self.cancel_btn.setEnabled(False) - # --- ADDED: Method to toggle log verbosity --- def toggle_log_verbosity(self): self.basic_log_mode = not self.basic_log_mode if self.basic_log_mode: @@ -1638,7 +1903,6 @@ class DownloaderApp(QWidget): else: self.log_verbosity_button.setText("Show Basic Log") self.log_signal.emit("="*20 + " Full Log Mode Enabled " + "="*20) - # --- END ADDED --- def reset_application_state(self): is_running = (self.download_thread and self.download_thread.isRunning()) or \ @@ -1648,21 +1912,20 @@ class DownloaderApp(QWidget): return self.log_signal.emit("🔄 Resetting application state to defaults...") - self._reset_ui_to_defaults() # Reset UI elements to their initial state + self._reset_ui_to_defaults() self.main_log_output.clear() self.external_log_output.clear() - if self.show_external_links: # Re-add header if shown - self.external_log_output.append("🔗 External Links Found:") + if self.show_external_links: + self.external_log_output.append("🔗 External Links Found:") - # --- ADDED: Clear link queue on reset --- self.external_link_queue.clear() + self.extracted_links_cache = [] self._is_processing_external_link_queue = False - # --- END ADDED --- + self._current_link_post_title = None self.progress_label.setText("Progress: Idle") self.file_progress_label.setText("") - # Clear session-specific data with self.downloaded_files_lock: count = len(self.downloaded_files) self.downloaded_files.clear() @@ -1676,21 +1939,17 @@ class DownloaderApp(QWidget): self.processed_posts_count = 0 self.download_counter = 0 self.skip_counter = 0 - # self.external_links = [] # This list seems unused, keeping it commented - self.cancellation_event.clear() # Ensure cancellation event is reset - - # --- ADDED: Reset log verbosity mode --- + self.cancellation_event.clear() + self.basic_log_mode = False if self.log_verbosity_button: self.log_verbosity_button.setText("Show Basic Log") - # --- END ADDED --- self.log_signal.emit("✅ Application reset complete.") def _reset_ui_to_defaults(self): - # Reset all input fields self.link_input.clear() self.dir_input.clear() self.custom_folder_input.clear() @@ -1702,78 +1961,59 @@ class DownloaderApp(QWidget): self.character_search_input.clear() self.thread_count_input.setText("4") - # Reset radio buttons and checkboxes to defaults self.radio_all.setChecked(True) self.skip_zip_checkbox.setChecked(True) self.skip_rar_checkbox.setChecked(True) self.download_thumbnails_checkbox.setChecked(False) self.compress_images_checkbox.setChecked(False) - self.use_subfolders_checkbox.setChecked(True) + self.use_subfolders_checkbox.setChecked(True) self.use_subfolder_per_post_checkbox.setChecked(False) - self.use_multithreading_checkbox.setChecked(True) - self.external_links_checkbox.setChecked(False) + self.use_multithreading_checkbox.setChecked(True) + self.external_links_checkbox.setChecked(False) if self.manga_mode_checkbox: - self.manga_mode_checkbox.setChecked(False) + self.manga_mode_checkbox.setChecked(False) - # Explicitly call update methods that control UI element states - self.update_ui_for_subfolders(self.use_subfolders_checkbox.isChecked()) - self.update_custom_folder_visibility() - self.update_page_range_enabled_state() - self.update_multithreading_label(self.thread_count_input.text()) - if self.manga_mode_checkbox: - self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked()) - self.filter_character_list("") # Clear character list filter - - # --- MODIFIED: Reset thread count state based on checkbox --- + self._handle_filter_mode_change(self.radio_all, True) self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked()) - # --- END MODIFIED --- - # Reset button states + self.filter_character_list("") + self.download_btn.setEnabled(True) self.cancel_btn.setEnabled(False) if self.reset_button: self.reset_button.setEnabled(True) - # Reset log verbosity button text if self.log_verbosity_button: self.log_verbosity_button.setText("Show Basic Log") def prompt_add_character(self, character_name): - global KNOWN_NAMES - # This method is called via a signal from a worker thread. - # It interacts with the GUI, so it's correctly placed in the GUI class. + global KNOWN_NAMES reply = QMessageBox.question(self, "Add Filter Name to Known List?", f"The name '{character_name}' was encountered or used as a filter.\nIt's not in your known names list (used for folder suggestions).\nAdd it now?", QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) result = (reply == QMessageBox.Yes) if result: - self.new_char_input.setText(character_name) # Populate input for add_new_character - # Call add_new_character, which now includes similarity checks and its own QMessageBox - # The result of add_new_character (True/False) reflects if it was actually added. - if self.add_new_character(): + self.new_char_input.setText(character_name) + if self.add_new_character(): self.log_signal.emit(f"✅ Added '{character_name}' to known names via background prompt.") else: - # add_new_character handles its own logging and popups if it fails or user cancels similarity warning - result = False # Update result if add_new_character decided not to add + result = False self.log_signal.emit(f"ℹ️ Adding '{character_name}' via background prompt was declined or failed (e.g., similarity warning, duplicate).") - # Send the final outcome (whether it was added or user said yes initially but then cancelled) self.character_prompt_response_signal.emit(result) def receive_add_character_result(self, result): - # This method receives the result from prompt_add_character (after it has tried to add the name) - # and is typically connected to the worker thread's logic to unblock it. - with QMutexLocker(self.prompt_mutex): # Ensure thread-safe access if worker modifies shared state based on this + with QMutexLocker(self.prompt_mutex): self._add_character_response = result self.log_signal.emit(f" Main thread received character prompt response: {'Action resulted in addition/confirmation' if result else 'Action resulted in no addition/declined'}") if __name__ == '__main__': - import traceback + import traceback try: qt_app = QApplication(sys.argv) - if getattr(sys, 'frozen', False): + if getattr(sys, 'frozen', False): base_dir = sys._MEIPASS else: base_dir = os.path.dirname(os.path.abspath(__file__)) - + icon_path = os.path.join(base_dir, 'Kemono.ico') if os.path.exists(icon_path): qt_app.setWindowIcon(QIcon(icon_path)) @@ -1782,14 +2022,27 @@ if __name__ == '__main__': downloader_app_instance = DownloaderApp() downloader_app_instance.show() + + # --- ADDED: Show Tour Dialog if needed --- + if TourDialog: # Check if TourDialog was imported successfully + tour_result = TourDialog.run_tour_if_needed(downloader_app_instance) + if tour_result == QDialog.Accepted: + print("Tour completed by user.") + elif tour_result == QDialog.Rejected: + # This means tour was skipped OR already shown. + # You can use TourDialog.settings.value(TourDialog.TOUR_SHOWN_KEY) + # to differentiate if needed, but run_tour_if_needed handles the "show once" logic. + print("Tour skipped or was already shown.") + # --- END ADDED --- + exit_code = qt_app.exec_() - print(f"Application finished with exit code: {exit_code}") + print(f"Application finished with exit code: {exit_code}") sys.exit(exit_code) except SystemExit: pass # Allow clean exit except Exception as e: print("--- CRITICAL APPLICATION ERROR ---") print(f"An unhandled exception occurred: {e}") - traceback.print_exc() + traceback.print_exc() print("--- END CRITICAL ERROR ---") - sys.exit(1) + sys.exit(1) \ No newline at end of file diff --git a/readme.md b/readme.md index 2b5b4cd..1042c77 100644 --- a/readme.md +++ b/readme.md @@ -147,3 +147,4 @@ Contributions are welcome! Open an issue or submit a pull request if you have im --- +Jett, Neon, Reyna, Clove, Viper, Sage \ No newline at end of file diff --git a/tour.py b/tour.py new file mode 100644 index 0000000..df0877d --- /dev/null +++ b/tour.py @@ -0,0 +1,317 @@ +import sys +import traceback # Added for enhanced error reporting +from PyQt5.QtWidgets import ( + QApplication, QDialog, QWidget, QLabel, QPushButton, QVBoxLayout, QHBoxLayout, + QStackedWidget, QSpacerItem, QSizePolicy, QCheckBox, QDesktopWidget +) +from PyQt5.QtCore import Qt, QSettings, pyqtSignal + +class TourStepWidget(QWidget): + """A single step/page in the tour.""" + def __init__(self, title_text, content_text, parent=None): + super().__init__(parent) + layout = QVBoxLayout(self) + layout.setContentsMargins(20, 20, 20, 20) # Padding around content + layout.setSpacing(15) # Spacing between title and content + + title_label = QLabel(title_text) + title_label.setAlignment(Qt.AlignCenter) + title_label.setStyleSheet("font-size: 18px; font-weight: bold; color: #E0E0E0; padding-bottom: 10px;") + + content_label = QLabel(content_text) + content_label.setWordWrap(True) + content_label.setAlignment(Qt.AlignLeft) # Align text to the left for readability + content_label.setTextFormat(Qt.RichText) + content_label.setStyleSheet("font-size: 12px; color: #C8C8C8; line-height: 1.6;") + + layout.addWidget(title_label) + layout.addWidget(content_label) + layout.addStretch(1) + +class TourDialog(QDialog): + """ + A dialog that shows a multi-page tour to the user. + Includes a "Never show again" checkbox. + Uses QSettings to remember this preference. + """ + tour_finished_normally = pyqtSignal() + tour_skipped = pyqtSignal() + + CONFIG_ORGANIZATION_NAME = "KemonoDownloader" + CONFIG_APP_NAME_TOUR = "ApplicationTour" + TOUR_SHOWN_KEY = "neverShowTourAgainV2" + + def __init__(self, parent=None): + super().__init__(parent) + self.settings = QSettings(self.CONFIG_ORGANIZATION_NAME, self.CONFIG_APP_NAME_TOUR) + self.current_step = 0 + + self.setWindowTitle("Welcome to Kemono Downloader!") + self.setModal(True) + self.setMinimumSize(520, 450) + self.setStyleSheet(""" + QDialog { + background-color: #2E2E2E; + border: 1px solid #5A5A5A; + } + QLabel { + color: #E0E0E0; + } + QCheckBox { + color: #C0C0C0; + font-size: 10pt; + spacing: 5px; + } + QCheckBox::indicator { + width: 13px; + height: 13px; + } + QPushButton { + background-color: #555; + color: #F0F0F0; + border: 1px solid #6A6A6A; + padding: 8px 15px; + border-radius: 4px; + min-height: 25px; + font-size: 11pt; + } + QPushButton:hover { + background-color: #656565; + } + QPushButton:pressed { + background-color: #4A4A4A; + } + """) + self._init_ui() + self._center_on_screen() # Call method to center the dialog + + def _center_on_screen(self): + """Centers the dialog on the screen.""" + try: + # Get the geometry of the screen + screen_geometry = QDesktopWidget().screenGeometry() + # Get the geometry of the dialog + dialog_geometry = self.frameGeometry() + + # Calculate the center point for the dialog + center_point = screen_geometry.center() + dialog_geometry.moveCenter(center_point) + + # Move the top-left point of the dialog to the calculated position + self.move(dialog_geometry.topLeft()) + print(f"[Tour] Dialog centered at: {dialog_geometry.topLeft()}") + except Exception as e: + print(f"[Tour] Error centering dialog: {e}") + + + def _init_ui(self): + main_layout = QVBoxLayout(self) + main_layout.setContentsMargins(0, 0, 0, 0) + main_layout.setSpacing(0) + + self.stacked_widget = QStackedWidget() + main_layout.addWidget(self.stacked_widget, 1) + + # --- Define Tour Steps --- + step1_content = ( + "Hello! This quick tour will walk you through the main features of the Kemono Downloader. " + "Our goal is to help you easily download content from Kemono and Coomer.

" + " • Use the Next and Back buttons to navigate.
" + " • Click Skip Tour to close this guide at any time.
" + " • Check 'Never show this tour again' if you don't want to see this on future startups." + ) + self.step1 = TourStepWidget("👋 Welcome!", step1_content) + + step2_content = ( + "Let's start with the basics for downloading:

" + " • 🔗 Kemono Creator/Post URL:
" + " Paste the full web address (URL) of a creator's page (e.g., https://kemono.su/patreon/user/12345) " + "or a specific post (e.g., .../post/98765). This tells the downloader where to look for content.

" + " • 📁 Download Location:
" + " Click 'Browse...' to choose a folder on your computer where all downloaded files will be saved. " + "It's important to select this before starting.

" + " • 📄 Page Range (for Creator URLs only):
" + " If you're downloading from a creator's page, you can specify a range of pages to download (e.g., pages 2 to 5). " + "Leave blank to try and download all pages. This is disabled if you enter a single post URL or use Manga Mode." + ) + self.step2 = TourStepWidget("① Getting Started: URLs & Location", step2_content) + + step3_content = ( + "Refine what you download with these filters:

" + " • 🎯 Filter by Character(s):
" + " Enter character names, separated by commas (e.g., Tifa, Aerith). " + "If 'Separate Folders by Name/Title' is on, this helps sort files into folders. " + "In Manga Mode, this filters posts by matching the post title. In Normal Mode, it filters individual files by their filename.

" + " • 🚫 Skip Posts/Files with Words:
" + " Enter words, separated by commas (e.g., WIP, sketch). " + "Files or posts containing these words in their name (or post title if 'Separate Folders' is off and not Manga Mode) will be skipped.

" + " • Filter Files (Radio Buttons):
" + " - All: Download all file types.
" + " - Images/GIFs: Only download common image formats and GIFs.
" + " - Videos: Only download common video formats.
" + " - 🔗 Only Links: Don't download files; instead, extract and display any external links found in post descriptions (like Mega, Google Drive links). The log area will show these links." + ) + self.step3 = TourStepWidget("② Filtering Your Downloads", step3_content) + + step4_content = ( + "More options to customize your downloads:

" + " • Skip .zip / Skip .rar:
" + " Check these to avoid downloading .zip or .rar archive files.

" + " • Download Thumbnails Only:
" + " If checked, only downloads the small preview images (thumbnails) instead of full-sized files. Useful for a quick overview.

" + " • Compress Large Images:
" + " If you have the 'Pillow' library installed, this will try to convert very large images (over 1.5MB) to a smaller WebP format to save space. If WebP isn't smaller, the original is kept.

" + " • 🗄️ Custom Folder Name (Single Post Only):
" + " When downloading a single post URL and using subfolders, you can type a specific name here for that post's folder." + ) + self.step4 = TourStepWidget("③ Fine-Tuning: Archives & Images", step4_content) + + step5_content = ( + "Organize your downloads and manage performance:

" + " • ⚙️ Separate Folders by Name/Title:
" + " If checked, the downloader tries to create subfolders based on character names (if you used the Character Filter) or by deriving a name from the post title using your 'Known Shows/Characters' list.

" + " • Subfolder per Post:
" + " Only active if 'Separate Folders' is on. Creates an additional subfolder for each individual post inside the character/title folder, named like 'PostID_PostTitle'.

" + " • 🚀 Use Multithreading (Threads):
" + " For creator pages, this can speed up downloads by processing multiple posts at once. For single post URLs, it always uses one thread. Be cautious with very high thread counts.

" + " • 📖 Manga/Comic Mode (Creator URLs only):
" + " Downloads posts from oldest to newest. It also renames files based on the post title and an extracted or generated sequence number (e.g., MangaTitle - 01.jpg, MangaTitle - 02.jpg). Best used with a character filter matching the series title for correct naming.

" + " • 🎭 Known Shows/Characters:
" + " Add names here (e.g., a game title, a character's full name). When 'Separate Folders' is on and no character filter is used, the app looks for these known names in post titles to create appropriate folders." + ) + self.step5 = TourStepWidget("④ Organization & Performance", step5_content) + + step6_content = ( + "Monitoring and Controls:

" + " • 📜 Progress Log / Extracted Links Log:
" + " This area shows detailed messages about the download process or lists extracted links if 'Only Links' mode is active.

" + " • Show External Links in Log (Checkbox):
" + " If checked (and not in 'Only Links' mode), a second log panel appears to show external links found in post descriptions.

" + " • Show Basic/Full Log (Button):
" + " Toggles the main log between showing all messages (Full) or only important ones (Basic).

" + " • 🔄 Reset (Button):
" + " Clears all input fields and logs to their default state. Only works when no download is active.

" + " • ⬇️ Start Download / ❌ Cancel (Buttons):
" + " Start begins the process. Cancel stops an ongoing download." + "

You're ready to start downloading! Click 'Finish'." + ) + self.step6 = TourStepWidget("⑤ Logs & Final Controls", step6_content) + + + self.tour_steps = [self.step1, self.step2, self.step3, self.step4, self.step5, self.step6] + for step_widget in self.tour_steps: + self.stacked_widget.addWidget(step_widget) + + bottom_controls_layout = QVBoxLayout() + bottom_controls_layout.setContentsMargins(15, 10, 15, 15) + bottom_controls_layout.setSpacing(10) + + self.never_show_again_checkbox = QCheckBox("Never show this tour again") + bottom_controls_layout.addWidget(self.never_show_again_checkbox, 0, Qt.AlignLeft) + + buttons_layout = QHBoxLayout() + buttons_layout.setSpacing(10) + + self.skip_button = QPushButton("Skip Tour") + self.skip_button.clicked.connect(self._skip_tour_action) + + self.back_button = QPushButton("Back") + self.back_button.clicked.connect(self._previous_step) + self.back_button.setEnabled(False) + + self.next_button = QPushButton("Next") + self.next_button.clicked.connect(self._next_step_action) + self.next_button.setDefault(True) + + buttons_layout.addWidget(self.skip_button) + buttons_layout.addStretch(1) + buttons_layout.addWidget(self.back_button) + buttons_layout.addWidget(self.next_button) + + bottom_controls_layout.addLayout(buttons_layout) + main_layout.addLayout(bottom_controls_layout) + + self._update_button_states() + + def _handle_exit_actions(self): + if self.never_show_again_checkbox.isChecked(): + self.settings.setValue(self.TOUR_SHOWN_KEY, True) + self.settings.sync() + print(f"[Tour] '{self.TOUR_SHOWN_KEY}' setting updated to True.") + else: + print(f"[Tour] '{self.TOUR_SHOWN_KEY}' setting not set to True (checkbox was unchecked on exit).") + + + def _next_step_action(self): + if self.current_step < len(self.tour_steps) - 1: + self.current_step += 1 + self.stacked_widget.setCurrentIndex(self.current_step) + else: + self._handle_exit_actions() + self.tour_finished_normally.emit() + self.accept() + self._update_button_states() + + def _previous_step(self): + if self.current_step > 0: + self.current_step -= 1 + self.stacked_widget.setCurrentIndex(self.current_step) + self._update_button_states() + + def _skip_tour_action(self): + self._handle_exit_actions() + self.tour_skipped.emit() + self.reject() + + def _update_button_states(self): + if self.current_step == len(self.tour_steps) - 1: + self.next_button.setText("Finish") + else: + self.next_button.setText("Next") + self.back_button.setEnabled(self.current_step > 0) + + @staticmethod + def run_tour_if_needed(parent_app_window): + print("[Tour] Attempting to run tour (run_tour_if_needed called)...") + try: + settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR) + never_show_again = settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool) + print(f"[Tour] Current '{TourDialog.TOUR_SHOWN_KEY}' setting is: {never_show_again}") + + if never_show_again: + print("[Tour] Skipping tour because 'Never show again' was previously selected.") + return QDialog.Rejected + + print("[Tour] 'Never show again' is False. Proceeding to create and show tour dialog.") + tour_dialog = TourDialog(parent_app_window) # _center_on_screen is called in __init__ + print("[Tour] TourDialog instance created successfully.") + + result = tour_dialog.exec_() + print(f"[Tour] Tour dialog exec_() finished. Result code: {result} (Accepted={QDialog.Accepted}, Rejected={QDialog.Rejected})") + return result + except Exception as e: + print(f"[Tour] CRITICAL ERROR in run_tour_if_needed: {e}") + traceback.print_exc() + return QDialog.Rejected + +if __name__ == '__main__': + app = QApplication(sys.argv) + # print("[Tour Test] Resetting 'Never show again' flag for testing purposes.") + # test_settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR) + # print(f"[Tour Test] Before reset, '{TourDialog.TOUR_SHOWN_KEY}' is: {test_settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool)}") + # test_settings.setValue(TourDialog.TOUR_SHOWN_KEY, False) + # test_settings.sync() + # print(f"[Tour Test] After reset, '{TourDialog.TOUR_SHOWN_KEY}' is: {test_settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool)}") + + print("[Tour Test] Running tour standalone...") + result = TourDialog.run_tour_if_needed(None) + + if result == QDialog.Accepted: + print("[Tour Test] Tour dialog was accepted (Finished).") + elif result == QDialog.Rejected: + print("[Tour Test] Tour dialog was rejected (Skipped or previously set to 'Never show again').") + + final_settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR) + print(f"[Tour Test] Final state of '{TourDialog.TOUR_SHOWN_KEY}' in settings: {final_settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool)}") + + sys.exit() \ No newline at end of file