From f303b8b0207fb94bc447dc8fe157b405be5f3757 Mon Sep 17 00:00:00 2001 From: Yuvi9587 <114073886+Yuvi9587@users.noreply.github.com> Date: Wed, 16 Jul 2025 09:02:47 -0700 Subject: [PATCH] Commit --- src/config/constants.py | 4 + src/core/workers.py | 309 +++++++++++++++---------- src/ui/dialogs/KeepDuplicatesDialog.py | 122 ++++++++++ src/ui/dialogs/SupportDialog.py | 126 +++++++--- src/ui/main_window.py | 80 ++++++- src/utils/resolution.py | 2 + 6 files changed, 472 insertions(+), 171 deletions(-) create mode 100644 src/ui/dialogs/KeepDuplicatesDialog.py diff --git a/src/config/constants.py b/src/config/constants.py index 7f2a662..78fc000 100644 --- a/src/config/constants.py +++ b/src/config/constants.py @@ -113,3 +113,7 @@ CREATOR_DOWNLOAD_DEFAULT_FOLDER_IGNORE_WORDS = { "fri", "friday", "sat", "saturday", "sun", "sunday" # add more according to need } + +# --- Duplicate Handling Modes --- +DUPLICATE_HANDLING_HASH = "hash" +DUPLICATE_HANDLING_KEEP_ALL = "keep_all" \ No newline at end of file diff --git a/src/core/workers.py b/src/core/workers.py index 43a4bc1..a6aae6f 100644 --- a/src/core/workers.py +++ b/src/core/workers.py @@ -9,7 +9,7 @@ import uuid import http import html import json -from collections import deque +from collections import deque, defaultdict import hashlib from concurrent.futures import ThreadPoolExecutor, as_completed, CancelledError, Future from io import BytesIO @@ -71,105 +71,114 @@ class PostProcessorSignals (QObject ): worker_finished_signal = pyqtSignal(tuple) class PostProcessorWorker: - def __init__ (self ,post_data ,download_root ,known_names , - filter_character_list ,emitter , - unwanted_keywords ,filter_mode ,skip_zip ,skip_rar , - use_subfolders ,use_post_subfolders ,target_post_id_from_initial_url ,custom_folder_name , - compress_images ,download_thumbnails ,service ,user_id ,pause_event , - api_url_input ,cancellation_event , - downloaded_files ,downloaded_file_hashes ,downloaded_files_lock ,downloaded_file_hashes_lock , - dynamic_character_filter_holder =None ,skip_words_list =None , - skip_words_scope =SKIP_SCOPE_FILES , - show_external_links =False , - extract_links_only =False , - num_file_threads =4 ,skip_current_file_flag =None , - manga_mode_active =False , - manga_filename_style =STYLE_POST_TITLE , - char_filter_scope =CHAR_SCOPE_FILES , - remove_from_filename_words_list =None , - allow_multipart_download =True , - cookie_text ="", - use_cookie =False , - override_output_dir =None , - selected_cookie_file =None , - app_base_dir =None , - manga_date_prefix =MANGA_DATE_PREFIX_DEFAULT , - manga_date_file_counter_ref =None , - scan_content_for_images =False , - creator_download_folder_ignore_words =None , - manga_global_file_counter_ref =None , - use_date_prefix_for_subfolder=False, - keep_in_post_duplicates=False, - session_file_path=None, - session_lock=None, - text_only_scope=None, - text_export_format='txt', - single_pdf_mode=False, - project_root_dir=None, - processed_post_ids=None - ): - self .post =post_data - self .download_root =download_root - self .known_names =known_names - self .filter_character_list_objects_initial =filter_character_list if filter_character_list else [] - self .dynamic_filter_holder =dynamic_character_filter_holder - self .unwanted_keywords =unwanted_keywords if unwanted_keywords is not None else set () - self .filter_mode =filter_mode - self .skip_zip =skip_zip - self .skip_rar =skip_rar - self .use_subfolders =use_subfolders - self .use_post_subfolders =use_post_subfolders - self .target_post_id_from_initial_url =target_post_id_from_initial_url - self .custom_folder_name =custom_folder_name - self .compress_images =compress_images - self .download_thumbnails =download_thumbnails - self .service =service - self .user_id =user_id - self .api_url_input =api_url_input - self .cancellation_event =cancellation_event - self .pause_event =pause_event - self .emitter =emitter - if not self .emitter : - raise ValueError ("PostProcessorWorker requires an emitter (signals object or queue).") - self .skip_current_file_flag =skip_current_file_flag - self .downloaded_files =downloaded_files if downloaded_files is not None else set () - self .downloaded_file_hashes =downloaded_file_hashes if downloaded_file_hashes is not None else set () - self .downloaded_files_lock =downloaded_files_lock if downloaded_files_lock is not None else threading .Lock () - self .downloaded_file_hashes_lock =downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading .Lock () - self .skip_words_list =skip_words_list if skip_words_list is not None else [] - self .skip_words_scope =skip_words_scope - self .show_external_links =show_external_links - self .extract_links_only =extract_links_only - self .num_file_threads =num_file_threads - self .manga_mode_active =manga_mode_active - self .manga_filename_style =manga_filename_style - self .char_filter_scope =char_filter_scope - self .remove_from_filename_words_list =remove_from_filename_words_list if remove_from_filename_words_list is not None else [] - self .allow_multipart_download =allow_multipart_download - self .manga_date_file_counter_ref =manga_date_file_counter_ref - self .selected_cookie_file =selected_cookie_file - self .app_base_dir =app_base_dir - self .cookie_text =cookie_text - self .manga_date_prefix =manga_date_prefix - self .manga_global_file_counter_ref =manga_global_file_counter_ref - self .use_cookie =use_cookie - self .override_output_dir =override_output_dir - self .scan_content_for_images =scan_content_for_images - self .creator_download_folder_ignore_words =creator_download_folder_ignore_words + + def __init__(self, post_data, download_root, known_names, + filter_character_list, emitter, + unwanted_keywords, filter_mode, skip_zip, skip_rar, + use_subfolders, use_post_subfolders, target_post_id_from_initial_url, custom_folder_name, + compress_images, download_thumbnails, service, user_id, pause_event, + api_url_input, cancellation_event, + downloaded_files, downloaded_file_hashes, downloaded_files_lock, downloaded_file_hashes_lock, + dynamic_character_filter_holder=None, skip_words_list=None, + skip_words_scope=SKIP_SCOPE_FILES, + show_external_links=False, + extract_links_only=False, + num_file_threads=4, skip_current_file_flag=None, + manga_mode_active=False, + manga_filename_style=STYLE_POST_TITLE, + char_filter_scope=CHAR_SCOPE_FILES, + remove_from_filename_words_list=None, + allow_multipart_download=True, + cookie_text="", + use_cookie=False, + override_output_dir=None, + selected_cookie_file=None, + app_base_dir=None, + manga_date_prefix=MANGA_DATE_PREFIX_DEFAULT, + manga_date_file_counter_ref=None, + scan_content_for_images=False, + creator_download_folder_ignore_words=None, + manga_global_file_counter_ref=None, + use_date_prefix_for_subfolder=False, + keep_in_post_duplicates=False, + keep_duplicates_mode=DUPLICATE_HANDLING_HASH, + keep_duplicates_limit=0, + downloaded_hash_counts=None, + downloaded_hash_counts_lock=None, + session_file_path=None, + session_lock=None, + text_only_scope=None, + text_export_format='txt', + single_pdf_mode=False, + project_root_dir=None, + processed_post_ids=None + ): + self.post = post_data + self.download_root = download_root + self.known_names = known_names + self.filter_character_list_objects_initial = filter_character_list if filter_character_list else [] + self.dynamic_filter_holder = dynamic_character_filter_holder + self.unwanted_keywords = unwanted_keywords if unwanted_keywords is not None else set() + self.filter_mode = filter_mode + self.skip_zip = skip_zip + self.skip_rar = skip_rar + self.use_subfolders = use_subfolders + self.use_post_subfolders = use_post_subfolders + self.target_post_id_from_initial_url = target_post_id_from_initial_url + self.custom_folder_name = custom_folder_name + self.compress_images = compress_images + self.download_thumbnails = download_thumbnails + self.service = service + self.user_id = user_id + self.api_url_input = api_url_input + self.cancellation_event = cancellation_event + self.pause_event = pause_event + self.emitter = emitter + if not self.emitter: + raise ValueError("PostProcessorWorker requires an emitter (signals object or queue).") + self.skip_current_file_flag = skip_current_file_flag + self.downloaded_files = downloaded_files if downloaded_files is not None else set() + self.downloaded_file_hashes = downloaded_file_hashes if downloaded_file_hashes is not None else set() + self.downloaded_files_lock = downloaded_files_lock if downloaded_files_lock is not None else threading.Lock() + self.downloaded_file_hashes_lock = downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading.Lock() + self.skip_words_list = skip_words_list if skip_words_list is not None else [] + self.skip_words_scope = skip_words_scope + self.show_external_links = show_external_links + self.extract_links_only = extract_links_only + self.num_file_threads = num_file_threads + self.manga_mode_active = manga_mode_active + self.manga_filename_style = manga_filename_style + self.char_filter_scope = char_filter_scope + self.remove_from_filename_words_list = remove_from_filename_words_list if remove_from_filename_words_list is not None else [] + self.allow_multipart_download = allow_multipart_download + self.manga_date_file_counter_ref = manga_date_file_counter_ref + self.selected_cookie_file = selected_cookie_file + self.app_base_dir = app_base_dir + self.cookie_text = cookie_text + self.manga_date_prefix = manga_date_prefix + self.manga_global_file_counter_ref = manga_global_file_counter_ref + self.use_cookie = use_cookie + self.override_output_dir = override_output_dir + self.scan_content_for_images = scan_content_for_images + self.creator_download_folder_ignore_words = creator_download_folder_ignore_words self.use_date_prefix_for_subfolder = use_date_prefix_for_subfolder self.keep_in_post_duplicates = keep_in_post_duplicates + self.keep_duplicates_mode = keep_duplicates_mode + self.keep_duplicates_limit = keep_duplicates_limit + self.downloaded_hash_counts = downloaded_hash_counts if downloaded_hash_counts is not None else defaultdict(int) + self.downloaded_hash_counts_lock = downloaded_hash_counts_lock if downloaded_hash_counts_lock is not None else threading.Lock() self.session_file_path = session_file_path self.session_lock = session_lock self.text_only_scope = text_only_scope self.text_export_format = text_export_format - self.single_pdf_mode = single_pdf_mode + self.single_pdf_mode = single_pdf_mode self.project_root_dir = project_root_dir self.processed_post_ids = processed_post_ids if processed_post_ids is not None else [] - if self .compress_images and Image is None : + if self.compress_images and Image is None: + self.logger("⚠️ Image compression disabled: Pillow library not found.") + self.compress_images = False - self .logger ("⚠️ Image compression disabled: Pillow library not found.") - self .compress_images =False def _emit_signal (self ,signal_type_str ,*payload_args ): """Helper to emit signal either directly or via queue.""" if isinstance (self .emitter ,queue .Queue ): @@ -179,6 +188,7 @@ class PostProcessorWorker: signal_attr .emit (*payload_args ) else : print (f"(Worker Log - Unrecognized Emitter for {signal_type_str }): {payload_args [0 ]if payload_args else ''}") + def logger (self ,message ): self ._emit_signal ('progress',message ) def check_cancel (self ): @@ -408,6 +418,7 @@ class PostProcessorWorker: total_size_bytes = 0 download_successful_flag = False last_exception_for_retry_later = None + is_permanent_error = False data_to_write_io = None response_for_this_attempt = None @@ -512,12 +523,14 @@ class PostProcessorWorker: except requests.exceptions.RequestException as e: self.logger(f" ❌ Download Error (Non-Retryable): {api_original_filename}. Error: {e}") last_exception_for_retry_later = e + is_permanent_error = True if ("Failed to resolve" in str(e) or "NameResolutionError" in str(e)): self.logger(" 💡 This looks like a DNS resolution problem. Please check your internet connection, DNS settings, or VPN.") break except Exception as e: self.logger(f" ❌ Unexpected Download Error: {api_original_filename}: {e}\n{traceback.format_exc(limit=2)}") last_exception_for_retry_later = e + is_permanent_error = True break finally: if response_for_this_attempt: @@ -544,7 +557,6 @@ class PostProcessorWorker: self.logger(f" ⚠️ Failed to rescue file despite matching size. Error: {rescue_exc}") if self.check_cancel() or (skip_event and skip_event.is_set()) or (self.pause_event and self.pause_event.is_set() and not download_successful_flag): - self.logger(f" ⚠️ Download process interrupted for {api_original_filename}.") if downloaded_part_file_path and os.path.exists(downloaded_part_file_path): try: os.remove(downloaded_part_file_path) @@ -556,20 +568,39 @@ class PostProcessorWorker: if self._check_pause(f"Post-download hash check for '{api_original_filename}'"): return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None - ### START OF CHANGE 1: INSERT THIS NEW BLOCK ### - with self.downloaded_file_hashes_lock: - if calculated_file_hash in self.downloaded_file_hashes: - self.logger(f" -> Skip (Content Duplicate): '{api_original_filename}' is identical to a file already downloaded. Discarding.") - # Clean up the downloaded temporary file as it's a duplicate. - if downloaded_part_file_path and os.path.exists(downloaded_part_file_path): - try: - os.remove(downloaded_part_file_path) - except OSError: - pass - return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None + # --- Final Corrected Duplicate Handling Logic --- + should_skip = False + with self.downloaded_hash_counts_lock: + current_count = self.downloaded_hash_counts.get(calculated_file_hash, 0) + + # Default to not skipping + decision_to_skip = False + + # Apply logic based on mode + if self.keep_duplicates_mode == DUPLICATE_HANDLING_HASH: + if current_count >= 1: + decision_to_skip = True + self.logger(f" -> Skip (Content Duplicate): '{api_original_filename}' is identical to a file already downloaded. Discarding.") + + elif self.keep_duplicates_mode == DUPLICATE_HANDLING_KEEP_ALL and self.keep_duplicates_limit > 0: + if current_count >= self.keep_duplicates_limit: + decision_to_skip = True + self.logger(f" -> Skip (Duplicate Limit Reached): Limit of {self.keep_duplicates_limit} for this file content has been met. Discarding.") + + # If we are NOT skipping this file, we MUST increment the count. + if not decision_to_skip: + self.downloaded_hash_counts[calculated_file_hash] = current_count + 1 + + should_skip = decision_to_skip + # --- End of Final Corrected Logic --- + + if should_skip: + if downloaded_part_file_path and os.path.exists(downloaded_part_file_path): + try: + os.remove(downloaded_part_file_path) + except OSError: pass + return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None - # If the content is unique, we proceed to save. - # Now, handle FILENAME collisions by adding a numeric suffix if needed. effective_save_folder = target_folder_path base_name, extension = os.path.splitext(filename_to_save_in_main_path) counter = 1 @@ -603,8 +634,6 @@ class PostProcessorWorker: with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash) - with self.downloaded_files_lock: - self.downloaded_files.add(final_filename_on_disk) final_filename_saved_for_return = final_filename_on_disk self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024 * 1024):.2f} MB) in '{os.path.basename(effective_save_folder)}'") @@ -629,15 +658,12 @@ class PostProcessorWorker: except Exception as save_err: self.logger(f"->>Save Fail for '{final_filename_on_disk}': {save_err}") - # --- START OF THE FIX --- - # If saving/renaming fails, try to clean up the orphaned .part file. if downloaded_part_file_path and os.path.exists(downloaded_part_file_path): try: os.remove(downloaded_part_file_path) self.logger(f" Cleaned up temporary file after save error: {os.path.basename(downloaded_part_file_path)}") except OSError as e_rem: self.logger(f" ⚠️ Could not clean up temporary file '{os.path.basename(downloaded_part_file_path)}' after save error: {e_rem}") - # --- END OF THE FIX --- if os.path.exists(final_save_path): try: @@ -656,22 +682,24 @@ class PostProcessorWorker: if data_to_write_io and hasattr(data_to_write_io, 'close'): data_to_write_io.close() else: - # This is the path if the download was not successful after all retries self.logger(f"->>Download Fail for '{api_original_filename}' (Post ID: {original_post_id_for_log}). No successful download after retries.") - retry_later_details = { + details_for_failure = { 'file_info': file_info, 'target_folder_path': target_folder_path, 'headers': headers, 'original_post_id_for_log': original_post_id_for_log, 'post_title': post_title, 'file_index_in_post': file_index_in_post, 'num_files_in_this_post': num_files_in_this_post } - return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER, retry_later_details + if is_permanent_error: + return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION, details_for_failure + else: + return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER, details_for_failure + def process(self): - # Default "empty" result tuple. It will be updated before any return path. result_tuple = (0, 0, [], [], [], None, None) try: if self._check_pause(f"Post processing for ID {self.post.get('id', 'N/A')}"): result_tuple = (0, 0, [], [], [], None, None) - return result_tuple # Return for the direct caller + return result_tuple if self.check_cancel(): result_tuple = (0, 0, [], [], [], None, None) return result_tuple @@ -1236,6 +1264,25 @@ class PostProcessorWorker: else: self.logger(f" ⚠️ Skipping invalid attachment {idx + 1} for post {post_id}: {str(att_info)[:100]}") + # --- START: Conditionally de-duplicate files from API response --- + # Only de-duplicate by URL if we are in the default hash-skipping mode. + # If the user wants to keep everything, we must process all entries from the API. + if self.keep_duplicates_mode == DUPLICATE_HANDLING_HASH: + unique_files_by_url = {} + for file_info in all_files_from_post_api: + # Use the file URL as a unique key to avoid processing the same file multiple times + file_url = file_info.get('url') + if file_url and file_url not in unique_files_by_url: + unique_files_by_url[file_url] = file_info + + original_count = len(all_files_from_post_api) + all_files_from_post_api = list(unique_files_by_url.values()) + new_count = len(all_files_from_post_api) + + if new_count < original_count: + self.logger(f" De-duplicated file list: Removed {original_count - new_count} redundant entries from the API response.") + # --- END: Conditionally de-duplicate files from API response --- + if self.scan_content_for_images and post_content_html and not self.extract_links_only: self.logger(f" Scanning post content for additional image URLs (Post ID: {post_id})...") parsed_input_url = urlparse(self.api_url_input) @@ -1528,9 +1575,7 @@ class PostProcessorWorker: 'service': self.service, 'user_id': self.user_id, } - if self.check_cancel(): - self.logger(f" Post {post_id} processing interrupted/cancelled.") - else: + if not self.check_cancel(): self.logger(f" Post {post_id} Summary: Downloaded={total_downloaded_this_post}, Skipped Files={total_skipped_this_post}") if not self.extract_links_only and self.use_post_subfolders and total_downloaded_this_post == 0: @@ -1542,18 +1587,14 @@ class PostProcessorWorker: except OSError as e_rmdir: self.logger(f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness}': {e_rmdir}") - # After all processing, set the final result tuple for the normal execution path result_tuple = (total_downloaded_this_post, total_skipped_this_post, kept_original_filenames_for_log, retryable_failures_this_post, permanent_failures_this_post, history_data_for_this_post, None) finally: - # This block is GUARANTEED to execute, sending the signal for multi-threaded mode. self._emit_signal('worker_finished', result_tuple) - - # This line is the critical fix. It ensures the method always returns a tuple - # for the single-threaded mode that directly calls it. + return result_tuple class DownloadThread(QThread): @@ -1578,7 +1619,7 @@ class DownloadThread(QThread): download_thumbnails=False, service=None, user_id=None, downloaded_files=None, downloaded_file_hashes=None, downloaded_files_lock=None, downloaded_file_hashes_lock=None, skip_words_list=None, - skip_words_scope=SKIP_SCOPE_FILES, + skip_words_scope='files', show_external_links=False, extract_links_only=False, num_file_threads_for_worker=1, @@ -1587,10 +1628,10 @@ class DownloadThread(QThread): target_post_id_from_initial_url=None, manga_mode_active=False, unwanted_keywords=None, - manga_filename_style=STYLE_POST_TITLE, - char_filter_scope=CHAR_SCOPE_FILES, + manga_filename_style='post_title', + char_filter_scope='files', remove_from_filename_words_list=None, - manga_date_prefix=MANGA_DATE_PREFIX_DEFAULT, + manga_date_prefix='', allow_multipart_download=True, selected_cookie_file=None, override_output_dir=None, @@ -1602,6 +1643,10 @@ class DownloadThread(QThread): creator_download_folder_ignore_words=None, use_date_prefix_for_subfolder=False, keep_in_post_duplicates=False, + keep_duplicates_mode='hash', + keep_duplicates_limit=0, + downloaded_hash_counts=None, + downloaded_hash_counts_lock=None, cookie_text="", session_file_path=None, session_lock=None, @@ -1609,7 +1654,7 @@ class DownloadThread(QThread): text_export_format='txt', single_pdf_mode=False, project_root_dir=None, - processed_post_ids=None): # Add processed_post_ids here + processed_post_ids=None): super().__init__() self.api_url_input = api_url_input self.output_dir = output_dir @@ -1660,6 +1705,10 @@ class DownloadThread(QThread): self.creator_download_folder_ignore_words = creator_download_folder_ignore_words self.use_date_prefix_for_subfolder = use_date_prefix_for_subfolder self.keep_in_post_duplicates = keep_in_post_duplicates + self.keep_duplicates_mode = keep_duplicates_mode + self.keep_duplicates_limit = keep_duplicates_limit + self.downloaded_hash_counts = downloaded_hash_counts + self.downloaded_hash_counts_lock = downloaded_hash_counts_lock self.manga_global_file_counter_ref = manga_global_file_counter_ref self.session_file_path = session_file_path self.session_lock = session_lock @@ -1668,7 +1717,7 @@ class DownloadThread(QThread): self.text_export_format = text_export_format self.single_pdf_mode = single_pdf_mode self.project_root_dir = project_root_dir - self.processed_post_ids = processed_post_ids if processed_post_ids is not None else [] # Add this line + self.processed_post_ids = processed_post_ids if processed_post_ids is not None else [] if self.compress_images and Image is None: self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).") @@ -1769,6 +1818,10 @@ class DownloadThread(QThread): manga_date_file_counter_ref=self.manga_date_file_counter_ref, use_date_prefix_for_subfolder=self.use_date_prefix_for_subfolder, keep_in_post_duplicates=self.keep_in_post_duplicates, + keep_duplicates_mode=self.keep_duplicates_mode, + keep_duplicates_limit=self.keep_duplicates_limit, + downloaded_hash_counts=self.downloaded_hash_counts, + downloaded_hash_counts_lock=self.downloaded_hash_counts_lock, creator_download_folder_ignore_words=self.creator_download_folder_ignore_words, session_file_path=self.session_file_path, session_lock=self.session_lock, diff --git a/src/ui/dialogs/KeepDuplicatesDialog.py b/src/ui/dialogs/KeepDuplicatesDialog.py new file mode 100644 index 0000000..ec8bf7f --- /dev/null +++ b/src/ui/dialogs/KeepDuplicatesDialog.py @@ -0,0 +1,122 @@ +# KeepDuplicatesDialog.py + +# --- PyQt5 Imports --- +from PyQt5.QtWidgets import ( + QDialog, QVBoxLayout, QGroupBox, QRadioButton, + QPushButton, QHBoxLayout, QButtonGroup, QLabel, QLineEdit +) +from PyQt5.QtGui import QIntValidator + +# --- Local Application Imports --- +from ...i18n.translator import get_translation +from ...config.constants import DUPLICATE_HANDLING_HASH, DUPLICATE_HANDLING_KEEP_ALL + +class KeepDuplicatesDialog(QDialog): + """A dialog to choose the duplicate handling method, with a limit option.""" + + def __init__(self, current_mode, current_limit, parent=None): + super().__init__(parent) + self.parent_app = parent + self.selected_mode = current_mode + self.limit = current_limit + + self._init_ui() + self._retranslate_ui() + + if self.parent_app and hasattr(self.parent_app, '_apply_theme_to_widget'): + self.parent_app._apply_theme_to_widget(self) + + # Set the initial state based on current settings + if current_mode == DUPLICATE_HANDLING_KEEP_ALL: + self.radio_keep_everything.setChecked(True) + self.limit_input.setText(str(current_limit) if current_limit > 0 else "") + else: + self.radio_skip_by_hash.setChecked(True) + self.limit_input.setEnabled(False) + + def _init_ui(self): + """Initializes the UI components.""" + main_layout = QVBoxLayout(self) + info_label = QLabel() + info_label.setWordWrap(True) + main_layout.addWidget(info_label) + + options_group = QGroupBox() + options_layout = QVBoxLayout(options_group) + self.button_group = QButtonGroup(self) + + # --- Skip by Hash Option --- + self.radio_skip_by_hash = QRadioButton() + self.button_group.addButton(self.radio_skip_by_hash) + options_layout.addWidget(self.radio_skip_by_hash) + + # --- Keep Everything Option with Limit Input --- + keep_everything_layout = QHBoxLayout() + self.radio_keep_everything = QRadioButton() + self.button_group.addButton(self.radio_keep_everything) + keep_everything_layout.addWidget(self.radio_keep_everything) + keep_everything_layout.addStretch(1) + + self.limit_label = QLabel() + self.limit_input = QLineEdit() + self.limit_input.setValidator(QIntValidator(0, 99)) + self.limit_input.setFixedWidth(50) + keep_everything_layout.addWidget(self.limit_label) + keep_everything_layout.addWidget(self.limit_input) + options_layout.addLayout(keep_everything_layout) + + main_layout.addWidget(options_group) + + # --- OK and Cancel buttons --- + button_layout = QHBoxLayout() + self.ok_button = QPushButton() + self.cancel_button = QPushButton() + button_layout.addStretch(1) + button_layout.addWidget(self.ok_button) + button_layout.addWidget(self.cancel_button) + main_layout.addLayout(button_layout) + + # --- Connections --- + self.ok_button.clicked.connect(self.accept) + self.cancel_button.clicked.connect(self.reject) + self.radio_keep_everything.toggled.connect(self.limit_input.setEnabled) + + def _tr(self, key, default_text=""): + if self.parent_app and callable(get_translation): + return get_translation(self.parent_app.current_selected_language, key, default_text) + return default_text + + def _retranslate_ui(self): + """Sets the text for UI elements.""" + self.setWindowTitle(self._tr("duplicates_dialog_title", "Duplicate Handling Options")) + self.findChild(QLabel).setText(self._tr("duplicates_dialog_info", + "Choose how to handle files that have identical content to already downloaded files.")) + self.findChild(QGroupBox).setTitle(self._tr("duplicates_dialog_group_title", "Mode")) + + self.radio_skip_by_hash.setText(self._tr("duplicates_dialog_skip_hash", "Skip by Hash (Recommended)")) + self.radio_keep_everything.setText(self._tr("duplicates_dialog_keep_all", "Keep Everything")) + + self.limit_label.setText(self._tr("duplicates_limit_label", "Limit:")) + self.limit_input.setPlaceholderText(self._tr("duplicates_limit_placeholder", "0=all")) + self.limit_input.setToolTip(self._tr("duplicates_limit_tooltip", + "Set a limit for identical files to keep. 0 means no limit.")) + + self.ok_button.setText(self._tr("ok_button", "OK")) + self.cancel_button.setText(self._tr("cancel_button_text_simple", "Cancel")) + + def accept(self): + """Sets the selected mode and limit when OK is clicked.""" + if self.radio_keep_everything.isChecked(): + self.selected_mode = DUPLICATE_HANDLING_KEEP_ALL + try: + self.limit = int(self.limit_input.text()) if self.limit_input.text() else 0 + except ValueError: + self.limit = 0 + else: + self.selected_mode = DUPLICATE_HANDLING_HASH + self.limit = 0 + super().accept() + + def get_selected_options(self): + """Returns the chosen mode and limit as a dictionary.""" + return {"mode": self.selected_mode, "limit": self.limit} \ No newline at end of file diff --git a/src/ui/dialogs/SupportDialog.py b/src/ui/dialogs/SupportDialog.py index 6f219df..776357b 100644 --- a/src/ui/dialogs/SupportDialog.py +++ b/src/ui/dialogs/SupportDialog.py @@ -1,14 +1,35 @@ # src/ui/dialogs/SupportDialog.py -from PyQt5.QtWidgets import ( - QDialog, QVBoxLayout, QLabel, QFrame, QDialogButtonBox -) -from PyQt5.QtCore import Qt -from PyQt5.QtGui import QFont +# --- Standard Library Imports --- +import sys +import os -# Assuming execution from project root, so we can import from utils +# --- PyQt5 Imports --- +from PyQt5.QtWidgets import ( + QDialog, QVBoxLayout, QLabel, QFrame, QDialogButtonBox, QGridLayout +) +from PyQt5.QtCore import Qt, QSize +from PyQt5.QtGui import QFont, QPixmap + +# --- Local Application Imports --- from ...utils.resolution import get_dark_theme +# --- Helper function for robust asset loading --- +def get_asset_path(filename): + """ + Gets the absolute path to a file in the assets folder, + handling both development and frozen (PyInstaller) environments. + """ + if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'): + # Running in a PyInstaller bundle + base_path = sys._MEIPASS + else: + # Running in a normal Python environment from src/ui/dialogs/ + base_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + return os.path.join(base_path, 'assets', filename) + + class SupportDialog(QDialog): """ A dialog to show support and donation options. @@ -17,11 +38,16 @@ class SupportDialog(QDialog): super().__init__(parent) self.parent_app = parent self.setWindowTitle("❤️ Support the Developer") - self.setMinimumWidth(400) + self.setMinimumWidth(450) + self._init_ui() + self._apply_theme() + + def _init_ui(self): + """Initializes all UI components and layouts for the dialog.""" # Main layout - layout = QVBoxLayout(self) - layout.setSpacing(15) + main_layout = QVBoxLayout(self) + main_layout.setSpacing(15) # Title Label title_label = QLabel("Thank You for Your Support!") @@ -30,7 +56,7 @@ class SupportDialog(QDialog): font.setBold(True) title_label.setFont(font) title_label.setAlignment(Qt.AlignCenter) - layout.addWidget(title_label) + main_layout.addWidget(title_label) # Informational Text info_label = QLabel( @@ -39,50 +65,86 @@ class SupportDialog(QDialog): ) info_label.setWordWrap(True) info_label.setAlignment(Qt.AlignCenter) - layout.addWidget(info_label) + main_layout.addWidget(info_label) # Separator line = QFrame() line.setFrameShape(QFrame.HLine) line.setFrameShadow(QFrame.Sunken) - layout.addWidget(line) + main_layout.addWidget(line) - # Donation Options - options_layout = QVBoxLayout() - options_layout.setSpacing(10) + # --- Donation Options Layout (using a grid for icons and text) --- + options_layout = QGridLayout() + options_layout.setSpacing(18) + options_layout.setColumnStretch(0, 1) # Add stretch to center the content horizontally + options_layout.setColumnStretch(3, 1) + + link_font = self.font() + link_font.setPointSize(12) + link_font.setBold(True) + + scale = getattr(self.parent_app, 'scale_factor', 1.0) + icon_size = int(32 * scale) # --- Ko-fi --- - kofi_label = QLabel( + kofi_icon_label = QLabel() + kofi_pixmap = QPixmap(get_asset_path("kofi.png")) + if not kofi_pixmap.isNull(): + kofi_icon_label.setPixmap(kofi_pixmap.scaled(QSize(icon_size, icon_size), Qt.KeepAspectRatio, Qt.SmoothTransformation)) + + kofi_text_label = QLabel( '' '☕ Buy me a Ko-fi' '' ) - kofi_label.setOpenExternalLinks(True) - kofi_label.setAlignment(Qt.AlignCenter) - font.setPointSize(12) - kofi_label.setFont(font) - options_layout.addWidget(kofi_label) + kofi_text_label.setOpenExternalLinks(True) + kofi_text_label.setFont(link_font) + options_layout.addWidget(kofi_icon_label, 0, 1, Qt.AlignRight | Qt.AlignVCenter) + options_layout.addWidget(kofi_text_label, 0, 2, Qt.AlignLeft | Qt.AlignVCenter) + # --- GitHub Sponsors --- - github_label = QLabel( - '' + github_icon_label = QLabel() + github_pixmap = QPixmap(get_asset_path("github_sponsors.png")) + if not github_pixmap.isNull(): + github_icon_label.setPixmap(github_pixmap.scaled(QSize(icon_size, icon_size), Qt.KeepAspectRatio, Qt.SmoothTransformation)) + + github_text_label = QLabel( + '' '💜 Sponsor on GitHub' '' ) - github_label.setOpenExternalLinks(True) - github_label.setAlignment(Qt.AlignCenter) - github_label.setFont(font) - options_layout.addWidget(github_label) + github_text_label.setOpenExternalLinks(True) + github_text_label.setFont(link_font) - layout.addLayout(options_layout) + options_layout.addWidget(github_icon_label, 1, 1, Qt.AlignRight | Qt.AlignVCenter) + options_layout.addWidget(github_text_label, 1, 2, Qt.AlignLeft | Qt.AlignVCenter) + + # --- Buy Me a Coffee (New) --- + bmac_icon_label = QLabel() + bmac_pixmap = QPixmap(get_asset_path("bmac.png")) + if not bmac_pixmap.isNull(): + bmac_icon_label.setPixmap(bmac_pixmap.scaled(QSize(icon_size, icon_size), Qt.KeepAspectRatio, Qt.SmoothTransformation)) + + bmac_text_label = QLabel( + '' + '🍺 Buy Me a Coffee' + '' + ) + bmac_text_label.setOpenExternalLinks(True) + bmac_text_label.setFont(link_font) + + options_layout.addWidget(bmac_icon_label, 2, 1, Qt.AlignRight | Qt.AlignVCenter) + options_layout.addWidget(bmac_text_label, 2, 2, Qt.AlignLeft | Qt.AlignVCenter) + + main_layout.addLayout(options_layout) # Close Button self.button_box = QDialogButtonBox(QDialogButtonBox.Close) self.button_box.rejected.connect(self.reject) - layout.addWidget(self.button_box) + main_layout.addWidget(self.button_box) - self.setLayout(layout) - self._apply_theme() + self.setLayout(main_layout) def _apply_theme(self): """Applies the current theme from the parent application.""" @@ -90,4 +152,4 @@ class SupportDialog(QDialog): scale = getattr(self.parent_app, 'scale_factor', 1) self.setStyleSheet(get_dark_theme(scale)) else: - self.setStyleSheet("") + self.setStyleSheet("") \ No newline at end of file diff --git a/src/ui/main_window.py b/src/ui/main_window.py index 9f47880..51897d8 100644 --- a/src/ui/main_window.py +++ b/src/ui/main_window.py @@ -12,7 +12,7 @@ import subprocess import datetime import requests import unicodedata -from collections import deque +from collections import deque, defaultdict import threading from concurrent.futures import Future, ThreadPoolExecutor ,CancelledError from urllib .parse import urlparse @@ -57,6 +57,7 @@ from .dialogs.ConfirmAddAllDialog import ConfirmAddAllDialog from .dialogs.MoreOptionsDialog import MoreOptionsDialog from .dialogs.SinglePDF import create_single_pdf_from_content from .dialogs.SupportDialog import SupportDialog +from .dialogs.KeepDuplicatesDialog import KeepDuplicatesDialog class DynamicFilterHolder: """A thread-safe class to hold and update character filters during a download.""" @@ -223,6 +224,10 @@ class DownloaderApp (QWidget ): self.more_filter_scope = None self.text_export_format = 'pdf' self.single_pdf_setting = False + self.keep_duplicates_mode = DUPLICATE_HANDLING_HASH + self.keep_duplicates_limit = 0 # 0 means no limit + self.downloaded_hash_counts = defaultdict(int) + self.downloaded_hash_counts_lock = threading.Lock() self.session_temp_files = [] print(f"ℹ️ Known.txt will be loaded/saved at: {self.config_file}") @@ -695,6 +700,8 @@ class DownloaderApp (QWidget ): self .cookie_text_input .textChanged .connect (self ._handle_cookie_text_manual_change ) if hasattr (self ,'download_thumbnails_checkbox'): self .download_thumbnails_checkbox .toggled .connect (self ._handle_thumbnail_mode_change ) + if hasattr(self, 'keep_duplicates_checkbox'): + self.keep_duplicates_checkbox.toggled.connect(self._handle_keep_duplicates_toggled) self .gui_update_timer .timeout .connect (self ._process_worker_queue ) self .gui_update_timer .start (100 ) self .log_signal .connect (self .handle_main_log ) @@ -2628,7 +2635,8 @@ class DownloaderApp (QWidget ): self .file_progress_label .setText ("") def start_download(self, direct_api_url=None, override_output_dir=None, is_restore=False): - self.is_finishing = False + self.is_finishing = False + self.downloaded_hash_counts.clear() global KNOWN_NAMES, BackendDownloadThread, PostProcessorWorker, extract_post_info, clean_folder_name, MAX_FILE_THREADS_PER_POST_OR_WORKER self._clear_stale_temp_files() @@ -3071,7 +3079,6 @@ class DownloaderApp (QWidget ): else: log_messages.append(f" Mode: Creator Feed") log_messages.append(f" Post Processing: {'Multi-threaded (' + str(effective_num_post_workers) + ' workers)' if effective_num_post_workers > 1 else 'Single-threaded (1 worker)'}") - log_messages.append(f" ↳ File Downloads per Worker: Up to {effective_num_file_threads_per_worker} concurrent file(s)") pr_log = "All" if start_page or end_page: pr_log = f"{f'From {start_page} ' if start_page else ''}{'to ' if start_page and end_page else ''}{f'{end_page}' if end_page else (f'Up to {end_page}' if end_page else (f'From {start_page}' if start_page else 'Specific Range'))}".strip() @@ -3192,7 +3199,11 @@ class DownloaderApp (QWidget ): 'session_lock': self.session_lock, 'creator_download_folder_ignore_words': creator_folder_ignore_words_for_run, 'use_date_prefix_for_subfolder': self.date_prefix_checkbox.isChecked() if hasattr(self, 'date_prefix_checkbox') else False, - 'keep_in_post_duplicates': self.keep_duplicates_checkbox.isChecked() if hasattr(self, 'keep_duplicates_checkbox') else False, + 'keep_in_post_duplicates': self.keep_duplicates_checkbox.isChecked(), + 'keep_duplicates_mode': self.keep_duplicates_mode, + 'keep_duplicates_limit': self.keep_duplicates_limit, + 'downloaded_hash_counts': self.downloaded_hash_counts, + 'downloaded_hash_counts_lock': self.downloaded_hash_counts_lock, 'skip_current_file_flag': None, 'processed_post_ids': processed_post_ids_for_restore, } @@ -3222,6 +3233,8 @@ class DownloaderApp (QWidget ): 'allow_multipart_download', 'use_cookie', 'cookie_text', 'app_base_dir', 'selected_cookie_file', 'override_output_dir', 'project_root_dir', 'text_only_scope', 'text_export_format', 'single_pdf_mode', + 'use_date_prefix_for_subfolder','keep_in_post_duplicates', 'keep_duplicates_mode', + 'keep_duplicates_limit', 'downloaded_hash_counts', 'downloaded_hash_counts_lock', 'processed_post_ids' ] args_template['skip_current_file_flag'] = None @@ -3494,9 +3507,9 @@ class DownloaderApp (QWidget ): 'skip_current_file_flag','manga_date_file_counter_ref','scan_content_for_images', 'manga_mode_active','manga_filename_style','manga_date_prefix','text_only_scope', 'text_export_format', 'single_pdf_mode', - 'use_date_prefix_for_subfolder','keep_in_post_duplicates','manga_global_file_counter_ref', + 'use_date_prefix_for_subfolder','keep_in_post_duplicates','keep_duplicates_mode','manga_global_file_counter_ref', 'creator_download_folder_ignore_words','session_file_path','project_root_dir','session_lock', - 'processed_post_ids' # This key was missing + 'processed_post_ids', 'keep_duplicates_limit', 'downloaded_hash_counts', 'downloaded_hash_counts_lock' ] num_file_dl_threads_for_each_worker = worker_args_template.get('num_file_threads_for_worker', 1) @@ -3537,7 +3550,7 @@ class DownloaderApp (QWidget ): if permanent: self.permanently_failed_files_for_dialog.extend(permanent) - self._update_error_button_count() # <-- THIS IS THE FIX + self._update_error_button_count() # Other result handling if history_data: self._add_to_history_candidates(history_data) @@ -3676,7 +3689,7 @@ class DownloaderApp (QWidget ): self .external_links_checkbox ,self .manga_mode_checkbox ,self .manga_rename_toggle_button ,self .use_cookie_checkbox ,self .cookie_text_input ,self .cookie_browse_button , self .multipart_toggle_button ,self .radio_only_audio , self .character_search_input ,self .new_char_input ,self .add_char_button ,self .add_to_filter_button ,self .delete_char_button , - self .reset_button + self .reset_button, self.radio_more, self.keep_duplicates_checkbox ] widgets_to_enable_on_pause =self ._get_configurable_widgets_on_pause () @@ -4063,6 +4076,42 @@ class DownloaderApp (QWidget ): self .set_ui_enabled (True ) self .cancellation_message_logged_this_session =False + def _handle_keep_duplicates_toggled(self, checked): + """Shows the duplicate handling dialog when the checkbox is checked.""" + if checked: + dialog = KeepDuplicatesDialog(self.keep_duplicates_mode, self.keep_duplicates_limit, self) + if dialog.exec_() == QDialog.Accepted: + options = dialog.get_selected_options() + self.keep_duplicates_mode = options["mode"] + self.keep_duplicates_limit = options["limit"] + + limit_text = f"with a limit of {self.keep_duplicates_limit}" if self.keep_duplicates_limit > 0 else "with no limit" + self.log_signal.emit(f"ℹ️ Duplicate handling mode set to: '{self.keep_duplicates_mode}' {limit_text}.") + self.log_signal.emit(f"") + self.log_signal.emit(f"") + + # Log warning only after the confirmation and only if the specific mode is selected + if self.keep_duplicates_mode == DUPLICATE_HANDLING_KEEP_ALL: + self._log_keep_everything_warning() + else: + self.keep_duplicates_checkbox.setChecked(False) + else: + self.keep_duplicates_mode = DUPLICATE_HANDLING_HASH + self.keep_duplicates_limit = 0 + self.log_signal.emit("ℹ️ 'Keep Duplicates' disabled. Reverted to default hash checking.") + + def _log_keep_everything_warning(self): + """Logs a formatted warning when the 'Keep Everything' mode is selected.""" + + warning_html = ( + f'{HTML_PREFIX}' + '
This mode will download every single file from the API response for a post,
' + 'even if they have identical content. This can lead to many redundant files.
' + 'Recommendation: Consider using the limit feature.
' + 'For example, setting the limit to 2 will download a file with the same content up to two times.
' + ) + self.log_signal.emit(warning_html) def _handle_thumbnail_mode_change (self ,thumbnails_checked ): """Handles UI changes when 'Download Thumbnails Only' is toggled.""" @@ -4266,9 +4315,7 @@ class DownloaderApp (QWidget ): if self .progress_log_label :self .progress_log_label .setText (self ._tr ("progress_log_label_text","📜 Progress Log:")) def reset_application_state(self): - # --- Stop all background tasks and threads --- if self._is_download_active(): - # Try to cancel download thread if self.download_thread and self.download_thread.isRunning(): self.log_signal.emit("⚠️ Cancelling active download thread for reset...") self.cancellation_event.set() @@ -4308,6 +4355,14 @@ class DownloaderApp (QWidget ): if self.pause_event: self.pause_event.clear() self.is_paused = False + + self.log_signal.emit("🔄 Resetting application state to defaults...") + self._clear_session_file() + self._reset_ui_to_defaults() + self._load_saved_download_location() + self.main_log_output.clear() + self.external_log_output.clear() + # --- Reset UI and all state --- self.log_signal.emit("🔄 Resetting application state to defaults...") @@ -4407,6 +4462,10 @@ class DownloaderApp (QWidget ): self.use_multithreading_checkbox.setChecked(True) if self.favorite_mode_checkbox: self.favorite_mode_checkbox.setChecked(False) + + if hasattr(self, 'keep_duplicates_checkbox'): + self.keep_duplicates_checkbox.setChecked(False) + self.external_links_checkbox.setChecked(False) if self.manga_mode_checkbox: self.manga_mode_checkbox.setChecked(False) @@ -4451,7 +4510,6 @@ class DownloaderApp (QWidget ): if self.pause_event: self.pause_event.clear() - # Reset extracted/external links state self.external_link_queue.clear() self.extracted_links_cache = [] self._is_processing_external_link_queue = False diff --git a/src/utils/resolution.py b/src/utils/resolution.py index efc78d8..a872027 100644 --- a/src/utils/resolution.py +++ b/src/utils/resolution.py @@ -559,11 +559,13 @@ def get_dark_theme(scale=1): border: 1px solid #6A6A6A; padding: {tooltip_padding}px; border-radius: 3px; + font-size: {font_size}pt; }} QSplitter::handle {{ background-color: #5A5A5A; }} QSplitter::handle:horizontal {{ width: {int(5 * scale)}px; }} QSplitter::handle:vertical {{ height: {int(5 * scale)}px; }} """ + def apply_theme_to_app(main_app, theme_name, initial_load=False): """ Applies the selected theme and scaling to the main application window.