diff --git a/Known.txt b/Known.txt index 58451fa..e69de29 100644 --- a/Known.txt +++ b/Known.txt @@ -1,8 +0,0 @@ -Boa Hancock -Hairy D.va -Mercy -Misc -Nami -Robin -Sombra -Yamato diff --git a/downloader_utils.py b/downloader_utils.py index 9a9d647..9e6df21 100644 --- a/downloader_utils.py +++ b/downloader_utils.py @@ -38,10 +38,7 @@ SKIP_SCOPE_BOTH = "both" CHAR_SCOPE_TITLE = "title" CHAR_SCOPE_FILES = "files" CHAR_SCOPE_BOTH = "both" - -# DUPLICATE_MODE_RENAME is removed. Renaming only happens within a target folder if needed. -DUPLICATE_MODE_DELETE = "delete" -DUPLICATE_MODE_MOVE_TO_SUBFOLDER = "move" +CHAR_SCOPE_COMMENTS = "comments" fastapi_app = None KNOWN_NAMES = [] @@ -99,6 +96,15 @@ def clean_filename(name): cleaned = re.sub(r'\s+', '_', cleaned) return cleaned if cleaned else "untitled_file" +def strip_html_tags(html_text): + if not html_text: return "" + # First, unescape HTML entities + text = html.unescape(html_text) + # Then, remove HTML tags using a simple regex + # This is a basic approach and might not handle all complex HTML perfectly + clean_pattern = re.compile('<.*?>') + cleaned_text = re.sub(clean_pattern, '', text) + return cleaned_text.strip() def extract_folder_name_from_title(title, unwanted_keywords): if not title: return 'Uncategorized' @@ -221,6 +227,31 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev except Exception as e: raise RuntimeError(f"Unexpected error fetching offset {offset} ({paginated_url}): {e}") +def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None): + if cancellation_event and cancellation_event.is_set(): + logger(" Comment fetch cancelled before request.") + raise RuntimeError("Comment fetch operation cancelled by user.") + + comments_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}/comments" + logger(f" Fetching comments: {comments_api_url}") + try: + response = requests.get(comments_api_url, headers=headers, timeout=(10, 30)) # Shorter timeout for comments + response.raise_for_status() + if 'application/json' not in response.headers.get('Content-Type', '').lower(): + logger(f"⚠️ Unexpected content type from comments API: {response.headers.get('Content-Type')}. Body: {response.text[:200]}") + return [] # Return empty list if not JSON + return response.json() + except requests.exceptions.Timeout: + raise RuntimeError(f"Timeout fetching comments for post {post_id} from {comments_api_url}") + except requests.exceptions.RequestException as e: + err_msg = f"Error fetching comments for post {post_id} from {comments_api_url}: {e}" + if e.response is not None: + err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})" + raise RuntimeError(err_msg) + except ValueError as e: # JSONDecodeError inherits from ValueError + raise RuntimeError(f"Error decoding JSON from comments API for post {post_id} ({comments_api_url}): {e}. Response text: {response.text[:200]}") + except Exception as e: + raise RuntimeError(f"Unexpected error fetching comments for post {post_id} ({comments_api_url}): {e}") def download_from_api(api_url_input, logger=print, start_page=None, end_page=None, manga_mode=False, cancellation_event=None): headers = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'} @@ -412,7 +443,7 @@ class PostProcessorWorker: char_filter_scope=CHAR_SCOPE_FILES, remove_from_filename_words_list=None, allow_multipart_download=True, - duplicate_file_mode=DUPLICATE_MODE_DELETE): + ): # Removed duplicate_file_mode and session-wide tracking self.post = post_data self.download_root = download_root self.known_names = known_names @@ -450,7 +481,7 @@ class PostProcessorWorker: self.char_filter_scope = char_filter_scope self.remove_from_filename_words_list = remove_from_filename_words_list if remove_from_filename_words_list is not None else [] self.allow_multipart_download = allow_multipart_download - self.duplicate_file_mode = duplicate_file_mode # This will be the effective mode (possibly overridden by main.py for manga) + # self.duplicate_file_mode and session-wide tracking removed if self.compress_images and Image is None: self.logger("⚠️ Image compression disabled: Pillow library not found.") @@ -469,10 +500,7 @@ class PostProcessorWorker: post_title="", file_index_in_post=0, num_files_in_this_post=1): was_original_name_kept_flag = False final_filename_saved_for_return = "" - - # current_target_folder_path is the actual folder where the file will be saved. - # It starts as the main character/post folder (target_folder_path) by default. - current_target_folder_path = target_folder_path + # target_folder_path is the base character/post folder. if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1, "", False @@ -561,44 +589,29 @@ class PostProcessorWorker: self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR).") return 0, 1, api_original_filename, False + # --- Pre-Download Duplicate Handling (Standard Mode Only - Manga mode has its own suffixing) --- if not self.manga_mode_active: - # --- Pre-Download Duplicate Handling (Standard Mode Only) --- - is_duplicate_for_main_folder_by_path = os.path.exists(os.path.join(target_folder_path, filename_to_save_in_main_path)) and \ - os.path.getsize(os.path.join(target_folder_path, filename_to_save_in_main_path)) > 0 + path_in_main_folder_check = os.path.join(target_folder_path, filename_to_save_in_main_path) + is_duplicate_by_path = os.path.exists(path_in_main_folder_check) and \ + os.path.getsize(path_in_main_folder_check) > 0 - is_duplicate_for_main_folder_by_session_name = False + is_duplicate_by_session_name = False with self.downloaded_files_lock: if filename_to_save_in_main_path in self.downloaded_files: - is_duplicate_for_main_folder_by_session_name = True + is_duplicate_by_session_name = True - if is_duplicate_for_main_folder_by_path or is_duplicate_for_main_folder_by_session_name: - if self.duplicate_file_mode == DUPLICATE_MODE_DELETE: - reason = "Path Exists" if is_duplicate_for_main_folder_by_path else "Session Name" - self.logger(f" -> Delete Duplicate ({reason}): '{filename_to_save_in_main_path}'. Skipping download.") - with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) - return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag - - elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER: - reason = "Path Exists" if is_duplicate_for_main_folder_by_path else "Session Name" - self.logger(f" -> Pre-DL Move ({reason}): '{filename_to_save_in_main_path}'. Will target 'Duplicate' subfolder.") - current_target_folder_path = os.path.join(target_folder_path, "Duplicate") - with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) + if is_duplicate_by_path or is_duplicate_by_session_name: + reason = "Path Exists" if is_duplicate_by_path else "Session Name" + self.logger(f" -> Skip Duplicate ({reason}, Pre-DL): '{filename_to_save_in_main_path}'. Skipping download.") + with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Mark as processed + return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag + # Ensure base target folder exists (used for .part file with multipart) try: - os.makedirs(current_target_folder_path, exist_ok=True) + os.makedirs(target_folder_path, exist_ok=True) # For .part file except OSError as e: - self.logger(f" ❌ Critical error creating directory '{current_target_folder_path}': {e}. Skipping file '{api_original_filename}'.") + self.logger(f" ❌ Critical error creating directory '{target_folder_path}': {e}. Skipping file '{api_original_filename}'.") return 0, 1, api_original_filename, False - - # If mode is MOVE (and not manga mode), and current_target_folder_path is now "Duplicate", - # check if the file *already* exists by its base name in this "Duplicate" folder. (Standard Mode Only) - if not self.manga_mode_active and \ - self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER and \ - "Duplicate" in current_target_folder_path.split(os.sep) and \ - os.path.exists(os.path.join(current_target_folder_path, filename_to_save_in_main_path)): - self.logger(f" -> File '{filename_to_save_in_main_path}' already exists in '{os.path.basename(current_target_folder_path)}' subfolder. Skipping download.") - # The name was already added to downloaded_files if it was a pre-DL move. - return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag # --- Download Attempt --- max_retries = 3 @@ -633,9 +646,10 @@ class PostProcessorWorker: if self.signals and hasattr(self.signals, 'file_download_status_signal'): self.signals.file_download_status_signal.emit(False) - mp_save_path_base = os.path.join(current_target_folder_path, filename_to_save_in_main_path) + # .part file is always based on the main target_folder_path and filename_to_save_in_main_path + mp_save_path_base_for_part = os.path.join(target_folder_path, filename_to_save_in_main_path) mp_success, mp_bytes, mp_hash, mp_file_handle = download_file_in_parts( - file_url, mp_save_path_base, total_size_bytes, num_parts_for_file, headers, + file_url, mp_save_path_base_for_part, total_size_bytes, num_parts_for_file, headers, api_original_filename, self.signals, self.cancellation_event, skip_event, self.logger ) if mp_success: @@ -705,130 +719,132 @@ class PostProcessorWorker: if file_content_bytes: file_content_bytes.close() return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag - if not self.manga_mode_active: - # --- Post-Download Hash Check (Standard Mode Only) --- - with self.downloaded_file_hashes_lock: - if calculated_file_hash in self.downloaded_file_hashes: - if self.duplicate_file_mode == DUPLICATE_MODE_DELETE: - self.logger(f" -> Delete Duplicate (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...). Skipping save.") - with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) - if file_content_bytes: file_content_bytes.close() - return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag - - elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER: - self.logger(f" -> Post-DL Move (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...). Content already downloaded.") - if "Duplicate" not in current_target_folder_path.split(os.sep): - current_target_folder_path = os.path.join(target_folder_path, "Duplicate") - self.logger(f" Redirecting to 'Duplicate' subfolder: '{current_target_folder_path}'") - # Ensure "Duplicate" folder exists if this is a new redirection due to hash - try: os.makedirs(current_target_folder_path, exist_ok=True) - except OSError as e_mkdir_hash: self.logger(f" Error creating Duplicate folder for hash collision: {e_mkdir_hash}") - with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) - - # --- Final Filename Determination for Saving --- - filename_for_actual_save = filename_to_save_in_main_path + # --- Universal Post-Download Hash Check --- + with self.downloaded_file_hashes_lock: + if calculated_file_hash in self.downloaded_file_hashes: + self.logger(f" -> Skip Saving Duplicate (Hash Match): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...).") + with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Mark logical name + if file_content_bytes: file_content_bytes.close() + # If it was a multipart download, its .part file needs cleanup + if not isinstance(file_content_bytes, BytesIO): # Indicates multipart download + part_file_to_remove = os.path.join(target_folder_path, filename_to_save_in_main_path + ".part") + if os.path.exists(part_file_to_remove): + try: os.remove(part_file_to_remove); + except OSError: self.logger(f" -> Failed to remove .part file for hash duplicate: {part_file_to_remove}") + return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag - # If mode is MOVE (and not manga mode) and the file is destined for the main folder, - # but a file with that name *now* exists (e.g. race condition, or different file with same name not caught by hash), - # reroute it to the "Duplicate" folder. - if not self.manga_mode_active and \ - self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER and \ - current_target_folder_path == target_folder_path and \ - os.path.exists(os.path.join(current_target_folder_path, filename_for_actual_save)): - self.logger(f" -> Post-DL Move (Late Name Collision in Main): '{filename_for_actual_save}'. Moving to 'Duplicate'.") - current_target_folder_path = os.path.join(target_folder_path, "Duplicate") - try: # Ensure "Duplicate" folder exists if this is a new redirection - os.makedirs(current_target_folder_path, exist_ok=True) - except OSError as e_mkdir: self.logger(f" Error creating Duplicate folder during late move: {e_mkdir}") - # The name filename_to_save_in_main_path was already added to downloaded_files if it was a pre-DL name collision. - # If it was a hash collision that got rerouted, it was also added. - # If this is a new reroute due to late name collision, ensure it's marked. + # --- Determine Save Location and Final Filename --- + effective_save_folder = target_folder_path # Default: main character/post folder + # filename_to_save_in_main_path is the logical name after cleaning, manga styling, word removal + filename_after_styling_and_word_removal = filename_to_save_in_main_path + # "Move" logic and "Duplicate" subfolder logic removed. + # effective_save_folder will always be target_folder_path. + + try: # Ensure the chosen save folder (main or Duplicate) exists + os.makedirs(effective_save_folder, exist_ok=True) + except OSError as e: + self.logger(f" ❌ Critical error creating directory '{effective_save_folder}': {e}. Skipping file '{api_original_filename}'.") + if file_content_bytes: file_content_bytes.close() + # Cleanup .part file if multipart + if not isinstance(file_content_bytes, BytesIO): + part_file_to_remove = os.path.join(target_folder_path, filename_to_save_in_main_path + ".part") + if os.path.exists(part_file_to_remove): os.remove(part_file_to_remove) + return 0, 1, api_original_filename, False + + # --- Image Compression --- + # This operates on file_content_bytes (which is BytesIO or a file handle from multipart) + # It might change filename_after_styling_and_word_removal's extension (e.g., .jpg to .webp) + # and returns new data_to_write_after_compression (BytesIO) or original file_content_bytes. + data_to_write_after_compression = file_content_bytes + filename_after_compression = filename_after_styling_and_word_removal - # Apply numeric suffix renaming (_1, _2) *only if needed within the current_target_folder_path* - # This means: - # - If current_target_folder_path is the main folder (and not MOVE mode, or MOVE mode but file was unique): - # Renaming happens if a file with filename_for_actual_save exists there. - # - If current_target_folder_path is "Duplicate" (because of MOVE mode): - # Renaming happens if filename_for_actual_save exists *within "Duplicate"*. - counter = 1 - base_name_final_coll, ext_final_coll = os.path.splitext(filename_for_actual_save) - temp_filename_final_check = filename_for_actual_save - while os.path.exists(os.path.join(current_target_folder_path, temp_filename_final_check)): - temp_filename_final_check = f"{base_name_final_coll}_{counter}{ext_final_coll}" - counter += 1 - if temp_filename_final_check != filename_for_actual_save: - self.logger(f" Final rename for target folder '{os.path.basename(current_target_folder_path)}': '{temp_filename_final_check}' (was '{filename_for_actual_save}')") - filename_for_actual_save = temp_filename_final_check - - bytes_to_write = file_content_bytes - final_filename_after_processing = filename_for_actual_save - current_save_path_final = os.path.join(current_target_folder_path, final_filename_after_processing) - is_img_for_compress_check = is_image(api_original_filename) if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024): self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024*1024):.2f} MB)...") try: - bytes_to_write.seek(0) - with Image.open(bytes_to_write) as img_obj: + file_content_bytes.seek(0) + with Image.open(file_content_bytes) as img_obj: if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA') elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB') compressed_bytes_io = BytesIO() img_obj.save(compressed_bytes_io, format='WebP', quality=80, method=4) compressed_size = compressed_bytes_io.getbuffer().nbytes - if compressed_size < downloaded_size_bytes * 0.9: + if compressed_size < downloaded_size_bytes * 0.9: # If significantly smaller self.logger(f" Compression success: {compressed_size / (1024*1024):.2f} MB.") - if hasattr(bytes_to_write, 'close'): bytes_to_write.close() - - original_part_file_path = os.path.join(current_target_folder_path, filename_to_save_in_main_path) + ".part" # Use original base for .part - if os.path.exists(original_part_file_path): - os.remove(original_part_file_path) - - bytes_to_write = compressed_bytes_io; bytes_to_write.seek(0) - base_name_orig, _ = os.path.splitext(filename_for_actual_save) - final_filename_after_processing = base_name_orig + '.webp' - current_save_path_final = os.path.join(current_target_folder_path, final_filename_after_processing) - self.logger(f" Updated filename (compressed): {final_filename_after_processing}") + data_to_write_after_compression = compressed_bytes_io; data_to_write_after_compression.seek(0) + base_name_orig, _ = os.path.splitext(filename_after_compression) + filename_after_compression = base_name_orig + '.webp' + self.logger(f" Updated filename (compressed): {filename_after_compression}") else: - self.logger(f" Compression skipped: WebP not significantly smaller."); bytes_to_write.seek(0) + self.logger(f" Compression skipped: WebP not significantly smaller."); file_content_bytes.seek(0) # Reset original stream + data_to_write_after_compression = file_content_bytes # Use original except Exception as comp_e: - self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); bytes_to_write.seek(0) + self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); file_content_bytes.seek(0) + data_to_write_after_compression = file_content_bytes # Use original - if final_filename_after_processing != filename_for_actual_save and \ - os.path.exists(current_save_path_final) and os.path.getsize(current_save_path_final) > 0: - self.logger(f" -> Exists (Path - Post-Compress): '{final_filename_after_processing}' in '{os.path.basename(current_target_folder_path)}'.") - with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) - if bytes_to_write and hasattr(bytes_to_write, 'close'): bytes_to_write.close() - return 0, 1, final_filename_after_processing, was_original_name_kept_flag + # --- Final Numeric Suffixing in the effective_save_folder --- + final_filename_on_disk = filename_after_compression # This is the name after potential compression + temp_base, temp_ext = os.path.splitext(final_filename_on_disk) + suffix_counter = 1 + while os.path.exists(os.path.join(effective_save_folder, final_filename_on_disk)): + final_filename_on_disk = f"{temp_base}_{suffix_counter}{temp_ext}" + suffix_counter += 1 + + if final_filename_on_disk != filename_after_compression: + self.logger(f" Applied numeric suffix in '{os.path.basename(effective_save_folder)}': '{final_filename_on_disk}' (was '{filename_after_compression}')") + + # --- Save File --- + final_save_path = os.path.join(effective_save_folder, final_filename_on_disk) try: - os.makedirs(current_target_folder_path, exist_ok=True) + # data_to_write_after_compression is BytesIO (single stream, or compressed multipart) + # OR it's the original file_content_bytes (which is a file handle if uncompressed multipart) - if isinstance(bytes_to_write, BytesIO): - with open(current_save_path_final, 'wb') as f_out: - f_out.write(bytes_to_write.getvalue()) - else: - if hasattr(bytes_to_write, 'close'): bytes_to_write.close() - source_part_file = os.path.join(current_target_folder_path, filename_to_save_in_main_path) + ".part" # Use original base for .part - os.rename(source_part_file, current_save_path_final) + if data_to_write_after_compression is file_content_bytes and not isinstance(file_content_bytes, BytesIO): + # This means uncompressed multipart download. Original .part file handle is file_content_bytes. + # The .part file is at target_folder_path/filename_to_save_in_main_path.part + original_part_file_actual_path = file_content_bytes.name + file_content_bytes.close() # Close handle first + os.rename(original_part_file_actual_path, final_save_path) + self.logger(f" Renamed .part file to final: {final_save_path}") + else: # Single stream download, or compressed multipart. Write from BytesIO. + with open(final_save_path, 'wb') as f_out: + f_out.write(data_to_write_after_compression.getvalue()) + + # If original was multipart and then compressed, clean up original .part file + if data_to_write_after_compression is not file_content_bytes and not isinstance(file_content_bytes, BytesIO): + original_part_file_actual_path = file_content_bytes.name + file_content_bytes.close() + if os.path.exists(original_part_file_actual_path): + try: os.remove(original_part_file_actual_path) + except OSError as e_rem: self.logger(f" -> Failed to remove .part after compression: {e_rem}") with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash) - with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) + with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Track by logical name - final_filename_saved_for_return = final_filename_after_processing - self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{os.path.basename(current_target_folder_path)}'") + final_filename_saved_for_return = final_filename_on_disk + self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{os.path.basename(effective_save_folder)}'") + # Session-wide base name tracking removed. time.sleep(0.05) return 1, 0, final_filename_saved_for_return, was_original_name_kept_flag except Exception as save_err: - self.logger(f"❌ Save Fail for '{final_filename_after_processing}': {save_err}") - if os.path.exists(current_save_path_final): - try: os.remove(current_save_path_final); - except OSError: self.logger(f" -> Failed to remove partially saved file: {current_save_path_final}") + self.logger(f"❌ Save Fail for '{final_filename_on_disk}': {save_err}") + if os.path.exists(final_save_path): + try: os.remove(final_save_path); + except OSError: self.logger(f" -> Failed to remove partially saved file: {final_save_path}") return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag finally: - if bytes_to_write and hasattr(bytes_to_write, 'close'): - bytes_to_write.close() + # Ensure all handles are closed + if data_to_write_after_compression and hasattr(data_to_write_after_compression, 'close'): + data_to_write_after_compression.close() + # If original file_content_bytes was a different handle (e.g. multipart before compression) and not closed yet + if file_content_bytes and file_content_bytes is not data_to_write_after_compression and hasattr(file_content_bytes, 'close'): + try: + if not file_content_bytes.closed: # Check if already closed + file_content_bytes.close() + except Exception: pass # Ignore errors on close if already handled def process(self): @@ -858,36 +874,140 @@ class PostProcessorWorker: post_is_candidate_by_title_char_match = False char_filter_that_matched_title = None + post_is_candidate_by_comment_char_match = False + # New variables for CHAR_SCOPE_COMMENTS file-first logic + post_is_candidate_by_file_char_match_in_comment_scope = False + char_filter_that_matched_file_in_comment_scope = None + char_filter_that_matched_comment = None if self.filter_character_list_objects and \ (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH): - self.logger(f" [Debug Title Match] Checking post title '{post_title}' against {len(self.filter_character_list_objects)} filter objects. Scope: {self.char_filter_scope}") + # self.logger(f" [Debug Title Match] Checking post title '{post_title}' against {len(self.filter_character_list_objects)} filter objects. Scope: {self.char_filter_scope}") for idx, filter_item_obj in enumerate(self.filter_character_list_objects): - self.logger(f" [Debug Title Match] Filter obj #{idx}: {filter_item_obj}") + if self.check_cancel(): break + # self.logger(f" [Debug Title Match] Filter obj #{idx}: {filter_item_obj}") terms_to_check_for_title = list(filter_item_obj["aliases"]) if filter_item_obj["is_group"]: if filter_item_obj["name"] not in terms_to_check_for_title: terms_to_check_for_title.append(filter_item_obj["name"]) unique_terms_for_title_check = list(set(terms_to_check_for_title)) - self.logger(f" [Debug Title Match] Unique terms for this filter obj: {unique_terms_for_title_check}") + # self.logger(f" [Debug Title Match] Unique terms for this filter obj: {unique_terms_for_title_check}") for term_to_match in unique_terms_for_title_check: - self.logger(f" [Debug Title Match] Checking term: '{term_to_match}'") + # self.logger(f" [Debug Title Match] Checking term: '{term_to_match}'") match_found_for_term = is_title_match_for_character(post_title, term_to_match) - self.logger(f" [Debug Title Match] Result for '{term_to_match}': {match_found_for_term}") + # self.logger(f" [Debug Title Match] Result for '{term_to_match}': {match_found_for_term}") if match_found_for_term: post_is_candidate_by_title_char_match = True char_filter_that_matched_title = filter_item_obj self.logger(f" Post title matches char filter term '{term_to_match}' (from group/name '{filter_item_obj['name']}', Scope: {self.char_filter_scope}). Post is candidate.") break if post_is_candidate_by_title_char_match: break - self.logger(f" [Debug Title Match] Final post_is_candidate_by_title_char_match: {post_is_candidate_by_title_char_match}") + # self.logger(f" [Debug Title Match] Final post_is_candidate_by_title_char_match: {post_is_candidate_by_title_char_match}") - if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match: - self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.") - return 0, num_potential_files_in_post, [] + # --- Populate all_files_from_post_api before character filter logic that needs it --- + # This is needed for the file-first check in CHAR_SCOPE_COMMENTS + all_files_from_post_api_for_char_check = [] + api_file_domain_for_char_check = urlparse(self.api_url_input).netloc + if not api_file_domain_for_char_check or not any(d in api_file_domain_for_char_check.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']): + api_file_domain_for_char_check = "kemono.su" if "kemono" in self.service.lower() else "coomer.party" + if post_main_file_info and isinstance(post_main_file_info, dict) and post_main_file_info.get('path'): + original_api_name = post_main_file_info.get('name') or os.path.basename(post_main_file_info['path'].lstrip('/')) + if original_api_name: + all_files_from_post_api_for_char_check.append({'_original_name_for_log': original_api_name}) + + for att_info in post_attachments: + if isinstance(att_info, dict) and att_info.get('path'): + original_api_att_name = att_info.get('name') or os.path.basename(att_info['path'].lstrip('/')) + if original_api_att_name: + all_files_from_post_api_for_char_check.append({'_original_name_for_log': original_api_att_name}) + # --- End population of all_files_from_post_api_for_char_check --- + + + if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_COMMENTS: + self.logger(f" [Char Scope: Comments] Phase 1: Checking post files for matches before comments for post ID '{post_id}'.") + for file_info_item in all_files_from_post_api_for_char_check: # Use the pre-populated list of file names + if self.check_cancel(): break + current_api_original_filename_for_check = file_info_item.get('_original_name_for_log') + if not current_api_original_filename_for_check: continue + + for filter_item_obj in self.filter_character_list_objects: + terms_to_check = list(filter_item_obj["aliases"]) + if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check: + terms_to_check.append(filter_item_obj["name"]) + + for term_to_match in terms_to_check: + if is_filename_match_for_character(current_api_original_filename_for_check, term_to_match): + post_is_candidate_by_file_char_match_in_comment_scope = True + char_filter_that_matched_file_in_comment_scope = filter_item_obj + self.logger(f" Match Found (File in Comments Scope): File '{current_api_original_filename_for_check}' matches char filter term '{term_to_match}' (from group/name '{filter_item_obj['name']}'). Post is candidate.") + break + if post_is_candidate_by_file_char_match_in_comment_scope: break + if post_is_candidate_by_file_char_match_in_comment_scope: break + self.logger(f" [Char Scope: Comments] Phase 1 Result: post_is_candidate_by_file_char_match_in_comment_scope = {post_is_candidate_by_file_char_match_in_comment_scope}") + + if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_COMMENTS: + if not post_is_candidate_by_file_char_match_in_comment_scope: + self.logger(f" [Char Scope: Comments] Phase 2: No file match found. Checking post comments for post ID '{post_id}'.") + try: + parsed_input_url_for_comments = urlparse(self.api_url_input) + api_domain_for_comments = parsed_input_url_for_comments.netloc + if not any(d in api_domain_for_comments.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']): + self.logger(f"⚠️ Unrecognized domain '{api_domain_for_comments}' for comment API. Defaulting based on service.") + api_domain_for_comments = "kemono.su" if "kemono" in self.service.lower() else "coomer.party" + + comments_data = fetch_post_comments( + api_domain_for_comments, self.service, self.user_id, post_id, + headers, self.logger, self.cancellation_event + ) + if comments_data: + self.logger(f" Fetched {len(comments_data)} comments for post {post_id}.") + for comment_item_idx, comment_item in enumerate(comments_data): + if self.check_cancel(): break + raw_comment_content = comment_item.get('content', '') + if not raw_comment_content: continue + + cleaned_comment_text = strip_html_tags(raw_comment_content) + if not cleaned_comment_text.strip(): continue + + for filter_item_obj in self.filter_character_list_objects: + terms_to_check_comment = list(filter_item_obj["aliases"]) + if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_comment: + terms_to_check_comment.append(filter_item_obj["name"]) + + for term_to_match_comment in terms_to_check_comment: + if is_title_match_for_character(cleaned_comment_text, term_to_match_comment): # Re-use title matcher + post_is_candidate_by_comment_char_match = True + char_filter_that_matched_comment = filter_item_obj + self.logger(f" Match Found (Comment in Comments Scope): Comment in post {post_id} matches char filter term '{term_to_match_comment}' (from group/name '{filter_item_obj['name']}'). Post is candidate.") + self.logger(f" Matching comment (first 100 chars): '{cleaned_comment_text[:100]}...'") + break + if post_is_candidate_by_comment_char_match: break + if post_is_candidate_by_comment_char_match: break + else: + self.logger(f" No comments found or fetched for post {post_id} to check against character filters.") + + except RuntimeError as e_fetch_comment: + self.logger(f" ⚠️ Error fetching or processing comments for post {post_id}: {e_fetch_comment}") + except Exception as e_generic_comment: + self.logger(f" ❌ Unexpected error during comment processing for post {post_id}: {e_generic_comment}\n{traceback.format_exc(limit=2)}") + self.logger(f" [Char Scope: Comments] Phase 2 Result: post_is_candidate_by_comment_char_match = {post_is_candidate_by_comment_char_match}") + else: # post_is_candidate_by_file_char_match_in_comment_scope was True + self.logger(f" [Char Scope: Comments] Phase 2: Skipped comment check for post ID '{post_id}' because a file match already made it a candidate.") + + # --- Skip Post Logic based on Title or Comment Scope (if filters are active) --- + if self.filter_character_list_objects: + if self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match: + self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.") + return 0, num_potential_files_in_post, [] + if self.char_filter_scope == CHAR_SCOPE_COMMENTS and \ + not post_is_candidate_by_file_char_match_in_comment_scope and \ + not post_is_candidate_by_comment_char_match: # MODIFIED: Check both file and comment match flags + self.logger(f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id}', Title '{post_title[:50]}...'") + return 0, num_potential_files_in_post, [] + if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH): post_title_lower = post_title.lower() for skip_word in self.skip_words_list: @@ -907,9 +1027,26 @@ class PostProcessorWorker: base_folder_names_for_post_content = [] if not self.extract_links_only and self.use_subfolders: - if post_is_candidate_by_title_char_match and char_filter_that_matched_title: - base_folder_names_for_post_content = [clean_folder_name(char_filter_that_matched_title["name"])] - elif not self.filter_character_list_objects: + primary_char_filter_for_folder = None + log_reason_for_folder = "" + + if self.char_filter_scope == CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment: + # For CHAR_SCOPE_COMMENTS, prioritize file match for folder name if it happened + if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope: + primary_char_filter_for_folder = char_filter_that_matched_file_in_comment_scope + log_reason_for_folder = "Matched char filter in filename (Comments scope)" + elif post_is_candidate_by_comment_char_match and char_filter_that_matched_comment: # Fallback to comment match + primary_char_filter_for_folder = char_filter_that_matched_comment + log_reason_for_folder = "Matched char filter in comments (Comments scope, no file match)" + elif (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and char_filter_that_matched_title: # Existing logic for other scopes + primary_char_filter_for_folder = char_filter_that_matched_title + log_reason_for_folder = "Matched char filter in title" + # If scope is FILES, primary_char_filter_for_folder will be None here. Folder determined per file. + + if primary_char_filter_for_folder: + base_folder_names_for_post_content = [clean_folder_name(primary_char_filter_for_folder["name"])] + self.logger(f" Base folder name(s) for post content ({log_reason_for_folder}): {', '.join(base_folder_names_for_post_content)}") + elif not self.filter_character_list_objects: # No char filters defined, use generic logic derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords) if derived_folders: base_folder_names_for_post_content.extend(derived_folders) @@ -917,11 +1054,10 @@ class PostProcessorWorker: base_folder_names_for_post_content.append(extract_folder_name_from_title(post_title, self.unwanted_keywords)) if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]: base_folder_names_for_post_content = [clean_folder_name(post_title if post_title else "untitled_creator_content")] + self.logger(f" Base folder name(s) for post content (Generic title parsing - no char filters): {', '.join(base_folder_names_for_post_content)}") + # If char filters are defined, and scope is FILES, then base_folder_names_for_post_content remains empty. + # The folder will be determined by char_filter_info_that_matched_file later. - if base_folder_names_for_post_content: - log_reason = "Matched char filter" if (post_is_candidate_by_title_char_match and char_filter_that_matched_title) else "Generic title parsing (no char filters)" - self.logger(f" Base folder name(s) for post content ({log_reason}): {', '.join(base_folder_names_for_post_content)}") - if not self.extract_links_only and self.use_subfolders and self.skip_words_list: for folder_name_to_check in base_folder_names_for_post_content: if not folder_name_to_check: continue @@ -1066,19 +1202,32 @@ class PostProcessorWorker: char_filter_info_that_matched_file = char_filter_that_matched_title self.logger(f" File '{current_api_original_filename}' is candidate because post title matched. Scope: Both (Title part).") else: - for filter_item_obj in self.filter_character_list_objects: - terms_to_check_for_file_both = list(filter_item_obj["aliases"]) - if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_for_file_both: - terms_to_check_for_file_both.append(filter_item_obj["name"]) - unique_terms_for_file_both_check = list(set(terms_to_check_for_file_both)) + # This part is for the "File" part of "Both" scope + for filter_item_obj_both_file in self.filter_character_list_objects: + terms_to_check_for_file_both = list(filter_item_obj_both_file["aliases"]) + if filter_item_obj_both_file["is_group"] and filter_item_obj_both_file["name"] not in terms_to_check_for_file_both: + terms_to_check_for_file_both.append(filter_item_obj_both_file["name"]) + # Ensure unique_terms_for_file_both_check is defined here + unique_terms_for_file_both_check = list(set(terms_to_check_for_file_both)) for term_to_match in unique_terms_for_file_both_check: if is_filename_match_for_character(current_api_original_filename, term_to_match): file_is_candidate_by_char_filter_scope = True - char_filter_info_that_matched_file = filter_item_obj + char_filter_info_that_matched_file = filter_item_obj_both_file # Use the filter that matched the file self.logger(f" File '{current_api_original_filename}' matches char filter term '{term_to_match}' (from '{filter_item_obj['name']}'). Scope: Both (File part).") break if file_is_candidate_by_char_filter_scope: break + elif self.char_filter_scope == CHAR_SCOPE_COMMENTS: + # If the post is a candidate (either by file or comment under this scope), then this file is also a candidate. + # The folder naming will use the filter that made the POST a candidate. + if post_is_candidate_by_file_char_match_in_comment_scope: # Post was candidate due to a file match + file_is_candidate_by_char_filter_scope = True + char_filter_info_that_matched_file = char_filter_that_matched_file_in_comment_scope # Use the filter that matched a file in the post + self.logger(f" File '{current_api_original_filename}' is candidate because a file in this post matched char filter (Overall Scope: Comments).") + elif post_is_candidate_by_comment_char_match: # Post was candidate due to comment match (no file match for post) + file_is_candidate_by_char_filter_scope = True + char_filter_info_that_matched_file = char_filter_that_matched_comment # Use the filter that matched comments + self.logger(f" File '{current_api_original_filename}' is candidate because post comments matched char filter (Overall Scope: Comments).") if not file_is_candidate_by_char_filter_scope: self.logger(f" -> Skip File (Char Filter Scope '{self.char_filter_scope}'): '{current_api_original_filename}' no match.") @@ -1178,7 +1327,7 @@ class DownloadThread(QThread): char_filter_scope=CHAR_SCOPE_FILES, remove_from_filename_words_list=None, allow_multipart_download=True, - duplicate_file_mode=DUPLICATE_MODE_DELETE): # Default to DELETE + ): # Removed duplicate_file_mode and session-wide tracking super().__init__() self.api_url_input = api_url_input self.output_dir = output_dir @@ -1219,7 +1368,7 @@ class DownloadThread(QThread): self.char_filter_scope = char_filter_scope self.remove_from_filename_words_list = remove_from_filename_words_list self.allow_multipart_download = allow_multipart_download - self.duplicate_file_mode = duplicate_file_mode + # self.duplicate_file_mode and session-wide tracking removed if self.compress_images and Image is None: self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).") self.compress_images = False @@ -1297,7 +1446,7 @@ class DownloadThread(QThread): char_filter_scope=self.char_filter_scope, remove_from_filename_words_list=self.remove_from_filename_words_list, allow_multipart_download=self.allow_multipart_download, - duplicate_file_mode=self.duplicate_file_mode) + ) # Removed duplicate_file_mode and session-wide tracking try: dl_count, skip_count, kept_originals_this_post = post_processing_worker.process() grand_total_downloaded_files += dl_count diff --git a/main.py b/main.py index b5c4a06..16e5740 100644 --- a/main.py +++ b/main.py @@ -48,8 +48,9 @@ try: SKIP_SCOPE_POSTS, SKIP_SCOPE_BOTH, CHAR_SCOPE_TITLE, # Added for completeness if used directly - CHAR_SCOPE_FILES, # Added - CHAR_SCOPE_BOTH # Added + CHAR_SCOPE_FILES, # Ensure this is imported + CHAR_SCOPE_BOTH, + CHAR_SCOPE_COMMENTS ) print("Successfully imported names from downloader_utils.") except ImportError as e: @@ -68,6 +69,7 @@ except ImportError as e: CHAR_SCOPE_TITLE = "title" CHAR_SCOPE_FILES = "files" CHAR_SCOPE_BOTH = "both" + CHAR_SCOPE_COMMENTS = "comments" except Exception as e: print(f"--- UNEXPECTED IMPORT ERROR ---") @@ -80,6 +82,7 @@ except Exception as e: MAX_THREADS = 200 RECOMMENDED_MAX_THREADS = 50 MAX_FILE_THREADS_PER_POST_OR_WORKER = 10 +MAX_POST_WORKERS_WHEN_COMMENT_FILTERING = 3 # New constant HTML_PREFIX = "" @@ -92,13 +95,7 @@ SKIP_WORDS_SCOPE_KEY = "skipWordsScopeV1" ALLOW_MULTIPART_DOWNLOAD_KEY = "allowMultipartDownloadV1" CHAR_FILTER_SCOPE_KEY = "charFilterScopeV1" -# CHAR_SCOPE_TITLE, CHAR_SCOPE_FILES, CHAR_SCOPE_BOTH are already defined or imported - -DUPLICATE_FILE_MODE_KEY = "duplicateFileModeV1" -# DUPLICATE_MODE_RENAME is removed. Renaming only happens within a target folder if needed. -DUPLICATE_MODE_DELETE = "delete" -DUPLICATE_MODE_MOVE_TO_SUBFOLDER = "move" # New mode - +# CHAR_SCOPE_TITLE, CHAR_SCOPE_FILES, CHAR_SCOPE_BOTH, CHAR_SCOPE_COMMENTS are already defined or imported # --- Tour Classes (Moved from tour.py) --- class TourStepWidget(QWidget): @@ -480,25 +477,21 @@ class DownloaderApp(QWidget): self.radio_only_links = None self.radio_only_archives = None - self.skip_scope_toggle_button = None self.char_filter_scope_toggle_button = None - self.all_kept_original_filenames = [] - self.manga_filename_style = self.settings.value(MANGA_FILENAME_STYLE_KEY, STYLE_POST_TITLE, type=str) self.skip_words_scope = self.settings.value(SKIP_WORDS_SCOPE_KEY, SKIP_SCOPE_POSTS, type=str) - self.char_filter_scope = self.settings.value(CHAR_FILTER_SCOPE_KEY, CHAR_SCOPE_TITLE, type=str) + self.char_filter_scope = self.settings.value(CHAR_FILTER_SCOPE_KEY, CHAR_SCOPE_FILES, type=str) # Default to Files # Always default multi-part download to OFF on launch, ignoring any saved setting. self.allow_multipart_download_setting = False - self.duplicate_file_mode = self.settings.value(DUPLICATE_FILE_MODE_KEY, DUPLICATE_MODE_DELETE, type=str) # Default to DELETE print(f"ℹ️ Known.txt will be loaded/saved at: {self.config_file}") - self.load_known_names_from_util() self.setWindowTitle("Kemono Downloader v3.2.0") # self.setGeometry(150, 150, 1050, 820) # Initial geometry will be set after showing self.setStyleSheet(self.get_dark_theme()) + self.init_ui() self._connect_signals() @@ -510,7 +503,6 @@ class DownloaderApp(QWidget): self.log_signal.emit(f"ℹ️ Skip words scope loaded: '{self.skip_words_scope}'") self.log_signal.emit(f"ℹ️ Character filter scope loaded: '{self.char_filter_scope}'") self.log_signal.emit(f"ℹ️ Multi-part download defaults to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'} on launch") - self.log_signal.emit(f"ℹ️ Duplicate file handling mode loaded: '{self.duplicate_file_mode.capitalize()}'") def _connect_signals(self): @@ -560,7 +552,6 @@ class DownloaderApp(QWidget): self.char_filter_scope_toggle_button.clicked.connect(self._cycle_char_filter_scope) if hasattr(self, 'multipart_toggle_button'): self.multipart_toggle_button.clicked.connect(self._toggle_multipart_mode) - if hasattr(self, 'duplicate_mode_toggle_button'): self.duplicate_mode_toggle_button.clicked.connect(self._cycle_duplicate_mode) def load_known_names_from_util(self): @@ -606,7 +597,6 @@ class DownloaderApp(QWidget): self.settings.setValue(SKIP_WORDS_SCOPE_KEY, self.skip_words_scope) self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope) self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting) - self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode) # Save current mode self.settings.sync() should_exit = True @@ -726,17 +716,17 @@ class DownloaderApp(QWidget): self.character_input = QLineEdit() self.character_input.setPlaceholderText("e.g., Tifa, Aerith, (Cloud, Zack)") self.character_input.setToolTip( - "Filter files or posts by character/series names (comma-separated).\n" - " - Normal Mode: Filters individual files by matching their filenames.\n" - " - Manga/Comic Mode: Filters entire posts by matching the post title.\n" + "Filter by character/series names (comma-separated, e.g., Tifa, Aerith).\n" + "The behavior of this filter (Files, Title, Both, or Comments) is controlled by the 'Filter: [Scope]' button next to this input.\n" "Also used for folder naming if 'Separate Folders' is enabled.\n" "Group aliases for a combined folder name: (alias1, alias2) -> folder 'alias1 alias2'.\n" "Example: yor, Tifa, (Boa, Hancock)") char_input_and_button_layout.addWidget(self.character_input, 3) self.char_filter_scope_toggle_button = QPushButton() + # Initial text and tooltip will be set by calling _update_char_filter_scope_button_text() + # at the end of init_ui or when the scope is first set. self._update_char_filter_scope_button_text() - self.char_filter_scope_toggle_button.setToolTip("Click to cycle character filter scope (Files -> Title -> Both)") self.char_filter_scope_toggle_button.setStyleSheet("padding: 6px 10px;") self.char_filter_scope_toggle_button.setMinimumWidth(100) char_input_and_button_layout.addWidget(self.char_filter_scope_toggle_button, 1) @@ -794,7 +784,6 @@ class DownloaderApp(QWidget): skip_input_and_button_layout.addWidget(self.skip_words_input, 1) # Input field takes available space self.skip_scope_toggle_button = QPushButton() self._update_skip_scope_button_text() - self.skip_scope_toggle_button.setToolTip("Click to cycle skip scope (Files -> Posts -> Both)") self.skip_scope_toggle_button.setStyleSheet("padding: 6px 10px;") self.skip_scope_toggle_button.setMinimumWidth(100) skip_input_and_button_layout.addWidget(self.skip_scope_toggle_button, 0) # Button takes its minimum @@ -1017,38 +1006,26 @@ class DownloaderApp(QWidget): log_title_layout.addWidget(self.link_search_button) self.manga_rename_toggle_button = QPushButton() - # Tooltip is dynamically set by _update_manga_filename_style_button_text self.manga_rename_toggle_button.setVisible(False) self.manga_rename_toggle_button.setFixedWidth(140) self.manga_rename_toggle_button.setStyleSheet("padding: 4px 8px;") self._update_manga_filename_style_button_text() log_title_layout.addWidget(self.manga_rename_toggle_button) - self.multipart_toggle_button = QPushButton() # Create the button - # Tooltip is dynamically set by _update_multipart_toggle_button_text + self.multipart_toggle_button = QPushButton() self.multipart_toggle_button.setToolTip("Toggle between Multi-part and Single-stream downloads for large files.") self.multipart_toggle_button.setFixedWidth(130) # Adjust width as needed self.multipart_toggle_button.setStyleSheet("padding: 4px 8px;") # Added padding self._update_multipart_toggle_button_text() # Set initial text log_title_layout.addWidget(self.multipart_toggle_button) # Add to layout - self.duplicate_mode_toggle_button = QPushButton() - # Tooltip is dynamically set by _update_duplicate_mode_button_text - self.duplicate_mode_toggle_button.setToolTip("Toggle how duplicate filenames are handled (Rename or Delete).") - self.duplicate_mode_toggle_button.setFixedWidth(150) # Adjust width - self.duplicate_mode_toggle_button.setStyleSheet("padding: 4px 8px;") # Added padding - self._update_duplicate_mode_button_text() # Set initial text - log_title_layout.addWidget(self.duplicate_mode_toggle_button) - self.log_verbosity_button = QPushButton("Show Basic Log") - # Tooltip already exists for log_verbosity_button self.log_verbosity_button.setToolTip("Toggle between full and basic log details.") self.log_verbosity_button.setFixedWidth(110) self.log_verbosity_button.setStyleSheet("padding: 4px 8px;") log_title_layout.addWidget(self.log_verbosity_button) self.reset_button = QPushButton("🔄 Reset") - # Tooltip already exists for reset_button self.reset_button.setToolTip("Reset all inputs and logs to default state (only when idle).") self.reset_button.setFixedWidth(80) self.reset_button.setStyleSheet("padding: 4px 8px;") @@ -1125,7 +1102,6 @@ class DownloaderApp(QWidget): self._update_manga_filename_style_button_text() self._update_skip_scope_button_text() self._update_char_filter_scope_button_text() - self._update_duplicate_mode_button_text() def _center_on_screen(self): """Centers the widget on the screen.""" @@ -1382,8 +1358,7 @@ class DownloaderApp(QWidget): self.skip_scope_toggle_button.setVisible(not (is_only_links or is_only_archives)) if hasattr(self, 'multipart_toggle_button') and self.multipart_toggle_button: self.multipart_toggle_button.setVisible(not (is_only_links or is_only_archives)) - # Other log header buttons (manga, duplicate, char filter scope) are handled by - # update_ui_for_manga_mode and update_ui_for_subfolders, which are called below. + # Other log header buttons (manga, char filter scope) are handled by update_ui_for_manga_mode and update_ui_for_subfolders if self.link_search_input: self.link_search_input.setVisible(is_only_links) if self.link_search_button: self.link_search_button.setVisible(is_only_links) @@ -1466,9 +1441,7 @@ class DownloaderApp(QWidget): self.update_ui_for_subfolders(subfolders_on) self.update_custom_folder_visibility() - # Ensure manga mode UI updates (which includes the visibility of - # manga_rename_toggle_button and duplicate_mode_toggle_button) - # are triggered after filter mode changes. + # Ensure manga mode UI updates (which includes the visibility of manga_rename_toggle_button) self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False) @@ -1556,12 +1529,46 @@ class DownloaderApp(QWidget): if self.skip_scope_toggle_button: if self.skip_words_scope == SKIP_SCOPE_FILES: self.skip_scope_toggle_button.setText("Scope: Files") + self.skip_scope_toggle_button.setToolTip( + "Current Skip Scope: Files\n\n" + "Skips individual files if their names contain any of the 'Skip with Words'.\n" + "Example: Skip words \"WIP, sketch\".\n" + "- File \"art_WIP.jpg\" -> SKIPPED.\n" + "- File \"final_art.png\" -> DOWNLOADED (if other conditions met).\n" + "Post is still processed for other non-skipped files.\n\n" + "Click to cycle to: Posts" + ) elif self.skip_words_scope == SKIP_SCOPE_POSTS: self.skip_scope_toggle_button.setText("Scope: Posts") + self.skip_scope_toggle_button.setToolTip( + "Current Skip Scope: Posts\n\n" + "Skips entire posts if their titles contain any of the 'Skip with Words'.\n" + "All files from a skipped post are ignored.\n" + "Example: Skip words \"preview, announcement\".\n" + "- Post \"Exciting Announcement!\" -> SKIPPED.\n" + "- Post \"Finished Artwork\" -> PROCESSED (if other conditions met).\n\n" + "Click to cycle to: Both" + ) elif self.skip_words_scope == SKIP_SCOPE_BOTH: self.skip_scope_toggle_button.setText("Scope: Both") + self.skip_scope_toggle_button.setToolTip( + "Current Skip Scope: Both (Posts then Files)\n\n" + "1. Checks post title: If title contains a skip word, the entire post is SKIPPED.\n" + "2. If post title is OK, then checks individual filenames: If a filename contains a skip word, only that file is SKIPPED.\n" + "Example: Skip words \"WIP, sketch\".\n" + "- Post \"Sketches and WIPs\" (title match) -> ENTIRE POST SKIPPED.\n" + "- Post \"Art Update\" (title OK) with files:\n" + " - \"character_WIP.jpg\" (file match) -> SKIPPED.\n" + " - \"final_scene.png\" (file OK) -> DOWNLOADED.\n\n" + "Click to cycle to: Files" + ) else: self.skip_scope_toggle_button.setText("Scope: Unknown") + self.skip_scope_toggle_button.setToolTip( + "Current Skip Scope: Unknown\n\n" + "The skip words scope is in an unknown state. Please cycle or reset.\n\n" + "Click to cycle to: Files" + ) def _cycle_skip_scope(self): @@ -1585,28 +1592,74 @@ class DownloaderApp(QWidget): if self.char_filter_scope_toggle_button: if self.char_filter_scope == CHAR_SCOPE_FILES: self.char_filter_scope_toggle_button.setText("Filter: Files") + self.char_filter_scope_toggle_button.setToolTip( + "Current Scope: Files\n\n" + "Filters individual files by name. A post is kept if any file matches.\n" + "Only matching files from that post are downloaded.\n" + "Example: Filter 'Tifa'. File 'Tifa_artwork.jpg' matches and is downloaded.\n" + "Folder Naming: Uses character from matching filename.\n\n" + "Click to cycle to: Title" + ) elif self.char_filter_scope == CHAR_SCOPE_TITLE: self.char_filter_scope_toggle_button.setText("Filter: Title") + self.char_filter_scope_toggle_button.setToolTip( + "Current Scope: Title\n\n" + "Filters entire posts by their title. All files from a matching post are downloaded.\n" + "Example: Filter 'Aerith'. Post titled 'Aerith's Garden' matches; all its files are downloaded.\n" + "Folder Naming: Uses character from matching post title.\n\n" + "Click to cycle to: Both" + ) elif self.char_filter_scope == CHAR_SCOPE_BOTH: self.char_filter_scope_toggle_button.setText("Filter: Both") + self.char_filter_scope_toggle_button.setToolTip( + "Current Scope: Both (Title then Files)\n\n" + "1. Checks post title: If matches, all files from post are downloaded.\n" + "2. If title doesn't match, checks filenames: If any file matches, only that file is downloaded.\n" + "Example: Filter 'Cloud'.\n" + " - Post 'Cloud Strife' (title match) -> all files downloaded.\n" + " - Post 'Bike Chase' with 'Cloud_fenrir.jpg' (file match) -> only 'Cloud_fenrir.jpg' downloaded.\n" + "Folder Naming: Prioritizes title match, then file match.\n\n" + "Click to cycle to: Comments" + ) + elif self.char_filter_scope == CHAR_SCOPE_COMMENTS: + self.char_filter_scope_toggle_button.setText("Filter: Comments (Beta)") + self.char_filter_scope_toggle_button.setToolTip( + "Current Scope: Comments (Beta - Files first, then Comments as fallback)\n\n" + "1. Checks filenames: If any file in the post matches the filter, the entire post is downloaded. Comments are NOT checked for this filter term.\n" + "2. If no file matches, THEN checks post comments: If a comment matches, the entire post is downloaded.\n" + "Example: Filter 'Barret'.\n" + " - Post A: Files 'Barret_gunarm.jpg', 'other.png'. File 'Barret_gunarm.jpg' matches. All files from Post A downloaded. Comments not checked for 'Barret'.\n" + " - Post B: Files 'dyne.jpg', 'weapon.gif'. Comments: '...a drawing of Barret Wallace...'. No file match for 'Barret'. Comment matches. All files from Post B downloaded.\n" + "Folder Naming: Prioritizes character from file match, then from comment match.\n\n" + "Click to cycle to: Files" + ) else: self.char_filter_scope_toggle_button.setText("Filter: Unknown") + self.char_filter_scope_toggle_button.setToolTip( + "Current Scope: Unknown\n\n" + "The character filter scope is in an unknown state. Please cycle or reset.\n\n" + "Click to cycle to: Files" + ) def _cycle_char_filter_scope(self): + # Cycle: Files -> Title -> Both -> Comments -> Files if self.char_filter_scope == CHAR_SCOPE_FILES: self.char_filter_scope = CHAR_SCOPE_TITLE elif self.char_filter_scope == CHAR_SCOPE_TITLE: self.char_filter_scope = CHAR_SCOPE_BOTH elif self.char_filter_scope == CHAR_SCOPE_BOTH: + self.char_filter_scope = CHAR_SCOPE_COMMENTS + elif self.char_filter_scope == CHAR_SCOPE_COMMENTS: self.char_filter_scope = CHAR_SCOPE_FILES else: - self.char_filter_scope = CHAR_SCOPE_FILES + self.char_filter_scope = CHAR_SCOPE_FILES # Default fallback self._update_char_filter_scope_button_text() self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope) self.log_signal.emit(f"ℹ️ Character filter scope changed to: '{self.char_filter_scope}'") + def add_new_character(self): global KNOWN_NAMES, clean_folder_name name_to_add = self.new_char_input.text().strip() @@ -1751,18 +1804,34 @@ class DownloaderApp(QWidget): if self.manga_filename_style == STYLE_POST_TITLE: self.manga_rename_toggle_button.setText("Name: Post Title") self.manga_rename_toggle_button.setToolTip( - "Manga files: First file named by post title. Subsequent files in same post keep original names.\n" - "Click to change to original file names for all files." + "Manga Filename Style: Post Title\n\n" + "When Manga/Comic Mode is active for a creator feed:\n" + "- The *first* file in a post is named after the post's title (e.g., \"MyMangaChapter1.jpg\").\n" + "- Any *subsequent* files within the *same post* will retain their original filenames (e.g., \"page_02.png\", \"bonus_art.jpg\").\n" + "- This is generally recommended for better organization of sequential content.\n" + "- Example: Post \"Chapter 1: The Beginning\" with files \"001.jpg\", \"002.jpg\".\n" + " Downloads as: \"Chapter 1 The Beginning.jpg\", \"002.jpg\".\n\n" + "Click to change to: Original File Name" ) elif self.manga_filename_style == STYLE_ORIGINAL_NAME: self.manga_rename_toggle_button.setText("Name: Original File") self.manga_rename_toggle_button.setToolTip( - "Manga files will keep their original names as provided by the site (e.g., 001.jpg, page_01.png).\n" - "Click to change to post title based naming for the first file." + "Manga Filename Style: Original File Name\n\n" + "When Manga/Comic Mode is active for a creator feed:\n" + "- *All* files in a post will attempt to keep their original filenames as provided by the site (e.g., \"001.jpg\", \"page_02.png\").\n" + "- This can be useful if original names are already well-structured and sequential.\n" + "- If original names are inconsistent, using \"Post Title\" style is often better.\n" + "- Example: Post \"Chapter 1: The Beginning\" with files \"001.jpg\", \"002.jpg\".\n" + " Downloads as: \"001.jpg\", \"002.jpg\".\n\n" + "Click to change to: Post Title" ) else: self.manga_rename_toggle_button.setText("Name: Unknown Style") - self.manga_rename_toggle_button.setToolTip("Manga filename style is in an unknown state.") + self.manga_rename_toggle_button.setToolTip( + "Manga Filename Style: Unknown\n\n" + "The manga filename style is in an unknown state. Please cycle or reset.\n\n" + "Click to change to: Post Title" + ) def _toggle_manga_filename_style(self): @@ -1816,11 +1885,6 @@ class DownloaderApp(QWidget): # Visible if manga mode is on AND not in "Only Links" or "Only Archives" mode self.manga_rename_toggle_button.setVisible(manga_mode_effectively_on and not (is_only_links_mode or is_only_archives_mode)) - if hasattr(self, 'duplicate_mode_toggle_button'): - # Visible if manga mode is OFF AND not in "Only Links" or "Only Archives" mode - self.duplicate_mode_toggle_button.setVisible( - not manga_mode_effectively_on and not (is_only_links_mode or is_only_archives_mode) - ) if manga_mode_effectively_on: if self.page_range_label: self.page_range_label.setEnabled(False) @@ -1909,12 +1973,11 @@ class DownloaderApp(QWidget): raw_skip_words = self.skip_words_input.text().strip() skip_words_list = [word.strip().lower() for word in raw_skip_words.split(',') if word.strip()] - current_skip_words_scope = self.get_skip_words_scope() raw_remove_filename_words = self.remove_from_filename_input.text().strip() if hasattr(self, 'remove_from_filename_input') else "" - effective_duplicate_file_mode = self.duplicate_file_mode # Start with user's choice allow_multipart = self.allow_multipart_download_setting # Use the internal setting remove_from_filename_words_list = [word.strip() for word in raw_remove_filename_words.split(',') if word.strip()] + current_skip_words_scope = self.get_skip_words_scope() current_char_filter_scope = self.get_char_filter_scope() manga_mode_is_checked = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False @@ -1967,10 +2030,8 @@ class DownloaderApp(QWidget): elif manga_mode: start_page, end_page = None, None - # effective_duplicate_file_mode will be self.duplicate_file_mode (UI button's state). # Manga Mode specific duplicate handling is now managed entirely within downloader_utils.py self.external_link_queue.clear(); self.extracted_links_cache = []; self._is_processing_external_link_queue = False; self._current_link_post_title = None - self.all_kept_original_filenames = [] raw_character_filters_text = self.character_input.text().strip() @@ -2130,6 +2191,7 @@ class DownloaderApp(QWidget): self.total_posts_to_process = 0; self.processed_posts_count = 0; self.download_counter = 0; self.skip_counter = 0 self.progress_label.setText("Progress: Initializing...") + effective_num_post_workers = 1 effective_num_file_threads_per_worker = 1 @@ -2179,8 +2241,7 @@ class DownloaderApp(QWidget): f" Skip Words Scope: {current_skip_words_scope.capitalize()}", f" Remove Words from Filename: {', '.join(remove_from_filename_words_list) if remove_from_filename_words_list else 'None'}", f" Compress Images: {'Enabled' if compress_images else 'Disabled'}", - f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}", - f" Multi-part Download: {'Enabled' if allow_multipart else 'Disabled'}" + f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}" # Removed duplicate file handling log ]) else: log_messages.append(f" Mode: Extracting Links Only") @@ -2192,7 +2253,6 @@ class DownloaderApp(QWidget): log_messages.append(f" ↳ Manga Filename Style: {'Post Title Based' if self.manga_filename_style == STYLE_POST_TITLE else 'Original File Name'}") if filter_character_list_to_pass: log_messages.append(f" ↳ Manga Character Filter (for naming/folder): {', '.join(item['name'] for item in filter_character_list_to_pass)}") - log_messages.append(f" ↳ Char Filter Scope (Manga): {current_char_filter_scope.capitalize()}") log_messages.append(f" ↳ Manga Duplicates: Will be renamed with numeric suffix if names clash (e.g., _1, _2).") should_use_multithreading_for_posts = use_multithreading_enabled_by_checkbox and not post_id_from_url @@ -2242,8 +2302,8 @@ class DownloaderApp(QWidget): 'signals': self.worker_signals, 'manga_filename_style': self.manga_filename_style, 'num_file_threads_for_worker': effective_num_file_threads_per_worker, - 'allow_multipart_download': allow_multipart, # Corrected from previous thought - 'duplicate_file_mode': effective_duplicate_file_mode # Pass the potentially overridden mode + 'allow_multipart_download': allow_multipart, + # 'duplicate_file_mode' and session-wide tracking removed } try: @@ -2258,13 +2318,11 @@ class DownloaderApp(QWidget): 'use_subfolders', 'use_post_subfolders', 'custom_folder_name', 'compress_images', 'download_thumbnails', 'service', 'user_id', 'downloaded_files', 'downloaded_file_hashes', 'remove_from_filename_words_list', - 'downloaded_files_lock', 'downloaded_file_hashes_lock', - 'skip_words_list', 'skip_words_scope', 'char_filter_scope', - 'show_external_links', 'extract_links_only', - 'num_file_threads_for_worker', - 'skip_current_file_flag', - 'start_page', 'end_page', 'target_post_id_from_initial_url', - 'manga_mode_active', 'unwanted_keywords', 'manga_filename_style', 'duplicate_file_mode', + 'downloaded_files_lock', 'downloaded_file_hashes_lock', + 'skip_words_list', 'skip_words_scope', 'char_filter_scope', + 'show_external_links', 'extract_links_only', 'num_file_threads_for_worker', + 'start_page', 'end_page', 'target_post_id_from_initial_url', 'duplicate_file_mode', + 'manga_mode_active', 'unwanted_keywords', 'manga_filename_style', 'allow_multipart_download' ] args_template['skip_current_file_flag'] = None @@ -2385,18 +2443,17 @@ class DownloaderApp(QWidget): 'downloaded_files_lock', 'downloaded_file_hashes_lock', 'remove_from_filename_words_list', 'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'show_external_links', 'extract_links_only', 'allow_multipart_download', - 'num_file_threads', - 'skip_current_file_flag', + 'num_file_threads', 'skip_current_file_flag', 'manga_mode_active', 'manga_filename_style' ] # Ensure 'allow_multipart_download' is also considered for optional keys if it has a default in PostProcessorWorker ppw_optional_keys_with_defaults = { 'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'remove_from_filename_words_list', - 'show_external_links', 'extract_links_only', - 'num_file_threads', 'skip_current_file_flag', 'manga_mode_active', 'manga_filename_style' + 'show_external_links', 'extract_links_only', 'duplicate_file_mode', # Added duplicate_file_mode here + 'num_file_threads', 'skip_current_file_flag', 'manga_mode_active', 'manga_filename_style', + 'processed_base_filenames_session_wide', 'processed_base_filenames_session_wide_lock' # Add these } - - + for post_data_item in all_posts_data: if self.cancellation_event.is_set(): break if not isinstance(post_data_item, dict): @@ -2464,12 +2521,10 @@ class DownloaderApp(QWidget): widgets_to_toggle = [ self.download_btn, self.link_input, self.radio_all, self.radio_images, self.radio_videos, self.radio_only_links, self.skip_zip_checkbox, self.skip_rar_checkbox, self.use_subfolders_checkbox, self.compress_images_checkbox, self.download_thumbnails_checkbox, self.use_multithreading_checkbox, self.skip_words_input, self.character_search_input, - self.new_char_input, self.add_char_button, self.delete_char_button, - self.char_filter_scope_toggle_button, - self.start_page_input, self.end_page_input, - self.page_range_label, self.to_label, self.character_input, self.custom_folder_input, self.custom_folder_label, self.remove_from_filename_input, - self.reset_button, self.manga_mode_checkbox, self.manga_rename_toggle_button, self.multipart_toggle_button, - self.skip_scope_toggle_button + self.new_char_input, self.add_char_button, self.delete_char_button, self.char_filter_scope_toggle_button, # duplicate_file_mode_toggle_button removed + self.start_page_input, self.end_page_input, self.page_range_label, self.to_label, + self.character_input, self.custom_folder_input, self.custom_folder_label, self.remove_from_filename_input, + self.reset_button, self.manga_mode_checkbox, self.manga_rename_toggle_button, self.multipart_toggle_button, self.skip_scope_toggle_button ] for widget in widgets_to_toggle: @@ -2663,15 +2718,10 @@ class DownloaderApp(QWidget): self.settings.setValue(SKIP_WORDS_SCOPE_KEY, self.skip_words_scope) self._update_skip_scope_button_text() - self.char_filter_scope = CHAR_SCOPE_TITLE + self.char_filter_scope = CHAR_SCOPE_FILES # Default to Files on full reset self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope) self._update_char_filter_scope_button_text() - self.duplicate_file_mode = DUPLICATE_MODE_DELETE # Reset to default (Delete) - self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode) - - self._update_duplicate_mode_button_text() - self.settings.sync() self._update_manga_filename_style_button_text() self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False) @@ -2693,12 +2743,8 @@ class DownloaderApp(QWidget): self.skip_words_scope = SKIP_SCOPE_POSTS self._update_skip_scope_button_text() - self.char_filter_scope = CHAR_SCOPE_TITLE + self.char_filter_scope = CHAR_SCOPE_FILES # Default to Files self._update_char_filter_scope_button_text() - self.duplicate_file_mode = DUPLICATE_MODE_DELETE # Default to DELETE - self._update_duplicate_mode_button_text() - - self._handle_filter_mode_change(self.radio_all, True) self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked()) self.filter_character_list("") @@ -2728,6 +2774,26 @@ class DownloaderApp(QWidget): if hasattr(self, 'multipart_toggle_button'): text = "Multi-part: ON" if self.allow_multipart_download_setting else "Multi-part: OFF" self.multipart_toggle_button.setText(text) + if self.allow_multipart_download_setting: + self.multipart_toggle_button.setToolTip( + "Multi-part Download: ON\n\n" + "Enables downloading large files in multiple segments (parts) simultaneously.\n" + "- Can significantly speed up downloads for *single large files* (e.g., videos, large archives) if the server supports it.\n" + "- May increase CPU/network usage.\n" + "- For creator feeds with many *small files* (e.g., images), this might not offer speed benefits and could make the UI/log feel busy.\n" + "- If a multi-part download fails for a file, it will automatically retry with a single stream.\n" + "- Example: A 500MB video might be downloaded in 5 parts of 100MB each, concurrently.\n\n" + "Click to turn OFF (use single-stream for all files)." + ) + else: + self.multipart_toggle_button.setToolTip( + "Multi-part Download: OFF\n\n" + "All files will be downloaded using a single connection (stream).\n" + "- This is generally stable and works well for most scenarios, especially for feeds with many smaller files.\n" + "- Large files will be downloaded sequentially in one go.\n" + "- Example: A 500MB video will be downloaded as one continuous stream.\n\n" + "Click to turn ON (enable multi-part for large files, see advisory on click)." + ) def _toggle_multipart_mode(self): # If currently OFF, and user is trying to turn it ON @@ -2762,23 +2828,6 @@ class DownloaderApp(QWidget): self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting) self.log_signal.emit(f"ℹ️ Multi-part download set to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'}") - def _update_duplicate_mode_button_text(self): - if hasattr(self, 'duplicate_mode_toggle_button'): - if self.duplicate_file_mode == DUPLICATE_MODE_DELETE: - self.duplicate_mode_toggle_button.setText("Duplicates: Delete") - elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER: - self.duplicate_mode_toggle_button.setText("Duplicates: Move") - else: # Should not happen - self.duplicate_mode_toggle_button.setText("Duplicates: Move") # Default to Move if unknown - - def _cycle_duplicate_mode(self): - if self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER: - self.duplicate_file_mode = DUPLICATE_MODE_DELETE - else: # If it's DELETE or unknown, cycle back to MOVE - self.duplicate_file_mode = DUPLICATE_MODE_MOVE_TO_SUBFOLDER - self._update_duplicate_mode_button_text() - self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode) - self.log_signal.emit(f"ℹ️ Duplicate file handling mode changed to: '{self.duplicate_file_mode.capitalize()}'") if __name__ == '__main__': import traceback