diff --git a/downloader_utils.py b/downloader_utils.py index 8c50244..c834d01 100644 --- a/downloader_utils.py +++ b/downloader_utils.py @@ -30,6 +30,7 @@ from io import BytesIO STYLE_POST_TITLE = "post_title" STYLE_ORIGINAL_NAME = "original_name" +STYLE_DATE_BASED = "date_based" # For manga date-based sequential naming SKIP_SCOPE_FILES = "files" SKIP_SCOPE_POSTS = "posts" @@ -313,16 +314,37 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non if cancellation_event and cancellation_event.is_set(): return if all_posts_for_manga_mode: - logger(f" Manga Mode: Fetched {len(all_posts_for_manga_mode)} total posts. Reversing order...") - all_posts_for_manga_mode.reverse() + logger(f" Manga Mode: Fetched {len(all_posts_for_manga_mode)} total posts. Sorting by publication date (oldest first)...") + + def sort_key_tuple(post): + published_date_str = post.get('published') + added_date_str = post.get('added') + post_id_str = post.get('id', "0") + + primary_sort_val = "0000-00-00T00:00:00" # Default for missing dates (effectively oldest) + if published_date_str: + primary_sort_val = published_date_str + elif added_date_str: + logger(f" ⚠️ Post ID {post_id_str} missing 'published' date, using 'added' date '{added_date_str}' for primary sorting.") + primary_sort_val = added_date_str + else: + logger(f" ⚠️ Post ID {post_id_str} missing both 'published' and 'added' dates. Placing at start of sort (using default earliest date).") + + secondary_sort_val = 0 # Default for non-integer IDs + try: + secondary_sort_val = int(post_id_str) + except ValueError: + logger(f" ⚠️ Post ID '{post_id_str}' is not a valid integer for secondary sorting, using 0.") + + return (primary_sort_val, secondary_sort_val) + + all_posts_for_manga_mode.sort(key=sort_key_tuple) # Sorts ascending by (date, id) for i in range(0, len(all_posts_for_manga_mode), page_size): if cancellation_event and cancellation_event.is_set(): logger(" Manga mode post yielding cancelled.") break yield all_posts_for_manga_mode[i:i + page_size] - else: - logger(" Manga Mode: No posts found to process.") return current_page_num = 1 @@ -428,13 +450,13 @@ class PostProcessorSignals(QObject): class PostProcessorWorker: def __init__(self, post_data, download_root, known_names, - filter_character_list, + filter_character_list, emitter, # Changed signals to emitter unwanted_keywords, filter_mode, skip_zip, skip_rar, use_subfolders, use_post_subfolders, target_post_id_from_initial_url, custom_folder_name, compress_images, download_thumbnails, service, user_id, - api_url_input, cancellation_event, signals, + api_url_input, cancellation_event, downloaded_files, downloaded_file_hashes, downloaded_files_lock, downloaded_file_hashes_lock, - skip_words_list=None, + skip_words_list=None, skip_words_scope=SKIP_SCOPE_FILES, show_external_links=False, extract_links_only=False, @@ -444,7 +466,8 @@ class PostProcessorWorker: char_filter_scope=CHAR_SCOPE_FILES, remove_from_filename_words_list=None, allow_multipart_download=True, - ): # Removed duplicate_file_mode and session-wide tracking + manga_date_file_counter_ref=None, # New parameter for date-based manga naming + ): self.post = post_data self.download_root = download_root self.known_names = known_names @@ -463,7 +486,10 @@ class PostProcessorWorker: self.user_id = user_id self.api_url_input = api_url_input self.cancellation_event = cancellation_event - self.signals = signals + self.emitter = emitter # Store the emitter + if not self.emitter: + # This case should ideally be prevented by the caller + raise ValueError("PostProcessorWorker requires an emitter (signals object or queue).") self.skip_current_file_flag = skip_current_file_flag self.downloaded_files = downloaded_files if downloaded_files is not None else set() @@ -482,23 +508,35 @@ class PostProcessorWorker: self.char_filter_scope = char_filter_scope self.remove_from_filename_words_list = remove_from_filename_words_list if remove_from_filename_words_list is not None else [] self.allow_multipart_download = allow_multipart_download - # self.duplicate_file_mode and session-wide tracking removed + self.manga_date_file_counter_ref = manga_date_file_counter_ref # Store the reference if self.compress_images and Image is None: self.logger("⚠️ Image compression disabled: Pillow library not found.") self.compress_images = False - def logger(self, message): - if self.signals and hasattr(self.signals, 'progress_signal'): - self.signals.progress_signal.emit(message) + def _emit_signal(self, signal_type_str, *payload_args): + """Helper to emit signal either directly or via queue.""" + if isinstance(self.emitter, queue.Queue): + self.emitter.put({'type': signal_type_str, 'payload': payload_args}) + elif self.emitter and hasattr(self.emitter, f"{signal_type_str}_signal"): + # Assuming emitter is a QObject with pyqtSignal attributes + # e.g., emitter.progress_signal.emit(*payload_args) + signal_attr = getattr(self.emitter, f"{signal_type_str}_signal") + signal_attr.emit(*payload_args) else: - print(f"(Worker Log - No Signal): {message}") + # Fallback or error logging if emitter is not recognized + print(f"(Worker Log - Unrecognized Emitter for {signal_type_str}): {payload_args[0] if payload_args else ''}") + + def logger(self, message): + self._emit_signal('progress', message) def check_cancel(self): return self.cancellation_event.is_set() def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event, - post_title="", file_index_in_post=0, num_files_in_this_post=1): + # emitter_for_file_ops, # This will be self.emitter + post_title="", file_index_in_post=0, num_files_in_this_post=1, + manga_date_file_counter_ref=None): # Added manga_date_file_counter_ref was_original_name_kept_flag = False final_filename_saved_for_return = "" # target_folder_path is the base character/post folder. @@ -537,8 +575,33 @@ class PostProcessorWorker: else: filename_to_save_in_main_path = f"{cleaned_post_title_base}{original_ext}" else: - filename_to_save_in_main_path = clean_filename(api_original_filename) + filename_to_save_in_main_path = clean_filename(api_original_filename) # Fallback to original if no title self.logger(f"⚠️ Manga mode (Post Title Style): Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save_in_main_path}'.") + elif self.manga_filename_style == STYLE_DATE_BASED: + current_thread_name = threading.current_thread().name + self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Manga Date Mode. Counter Ref ID: {id(manga_date_file_counter_ref)}, Value before access: {manga_date_file_counter_ref}") + + if manga_date_file_counter_ref is not None and len(manga_date_file_counter_ref) == 2: + counter_val_for_filename = -1 + counter_lock = manga_date_file_counter_ref[1] + + self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Attempting to acquire lock. Counter value before lock: {manga_date_file_counter_ref[0]}") + with counter_lock: + self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Lock acquired. Counter value at lock acquisition: {manga_date_file_counter_ref[0]}") + counter_val_for_filename = manga_date_file_counter_ref[0] + # Increment is done here, under lock, before this number is used by another thread. + # This number is now "reserved" for this file. + # If this file download fails, this number is "lost" (sequence will have a gap). This is acceptable. + manga_date_file_counter_ref[0] += 1 + self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Incremented counter. New counter value: {manga_date_file_counter_ref[0]}. Filename will use: {counter_val_for_filename}") + + filename_to_save_in_main_path = f"{counter_val_for_filename:03d}{original_ext}" + self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Lock released. Generated filename: {filename_to_save_in_main_path}") + else: + self.logger(f"⚠️ Manga Date Mode: Counter ref not provided or malformed for '{api_original_filename}'. Using original. Ref: {manga_date_file_counter_ref}") + # This log line below had a typo, fixed to reflect Date Mode context + filename_to_save_in_main_path = clean_filename(api_original_filename) + self.logger(f"⚠️ Manga mode (Date Based Style Fallback): Using cleaned original filename '{filename_to_save_in_main_path}' for post {original_post_id_for_log}.") else: self.logger(f"⚠️ Manga mode: Unknown filename style '{self.manga_filename_style}'. Defaulting to original filename for '{api_original_filename}'.") filename_to_save_in_main_path = clean_filename(api_original_filename) @@ -629,9 +692,8 @@ class PostProcessorWorker: if attempt_num_single_stream > 0: self.logger(f" Retrying download for '{api_original_filename}' (Overall Attempt {attempt_num_single_stream + 1}/{max_retries + 1})...") time.sleep(retry_delay * (2**(attempt_num_single_stream - 1))) - - if self.signals and hasattr(self.signals, 'file_download_status_signal'): - self.signals.file_download_status_signal.emit(True) + + self._emit_signal('file_download_status', True) response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True) response.raise_for_status() @@ -644,14 +706,14 @@ class PostProcessorWorker: if attempt_multipart: response.close() - if self.signals and hasattr(self.signals, 'file_download_status_signal'): - self.signals.file_download_status_signal.emit(False) + self._emit_signal('file_download_status', False) # .part file is always based on the main target_folder_path and filename_to_save_in_main_path mp_save_path_base_for_part = os.path.join(target_folder_path, filename_to_save_in_main_path) mp_success, mp_bytes, mp_hash, mp_file_handle = download_file_in_parts( - file_url, mp_save_path_base_for_part, total_size_bytes, num_parts_for_file, headers, - api_original_filename, self.signals, self.cancellation_event, skip_event, self.logger + file_url, mp_save_path_base_for_part, total_size_bytes, num_parts_for_file, headers, api_original_filename, + emitter_for_multipart=self.emitter, # Pass the worker's emitter + cancellation_event=self.cancellation_event, skip_event=skip_event, logger_func=self.logger ) if mp_success: download_successful_flag = True @@ -676,9 +738,8 @@ class PostProcessorWorker: if chunk: file_content_buffer.write(chunk); md5_hasher.update(chunk) current_attempt_downloaded_bytes += len(chunk) - if time.time() - last_progress_time > 1 and total_size_bytes > 0 and \ - self.signals and hasattr(self.signals, 'file_progress_signal'): - self.signals.file_progress_signal.emit(api_original_filename, (current_attempt_downloaded_bytes, total_size_bytes)) + if time.time() - last_progress_time > 1 and total_size_bytes > 0: + self._emit_signal('file_progress', api_original_filename, (current_attempt_downloaded_bytes, total_size_bytes)) last_progress_time = time.time() if self.check_cancel() or (skip_event and skip_event.is_set()): @@ -703,12 +764,11 @@ class PostProcessorWorker: self.logger(f" ❌ Unexpected Download Error: {api_original_filename}: {e}\n{traceback.format_exc(limit=2)}") if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close(); break finally: - if self.signals and hasattr(self.signals, 'file_download_status_signal'): - self.signals.file_download_status_signal.emit(False) + self._emit_signal('file_download_status', False) - if self.signals and hasattr(self.signals, 'file_progress_signal'): - final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes - self.signals.file_progress_signal.emit(api_original_filename, (downloaded_size_bytes, final_total_for_progress)) + # Final progress update for single stream + final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes + self._emit_signal('file_progress', api_original_filename, (downloaded_size_bytes, final_total_for_progress)) if self.check_cancel() or (skip_event and skip_event.is_set()): self.logger(f" ⚠️ Download process interrupted for {api_original_filename}.") @@ -787,14 +847,19 @@ class PostProcessorWorker: # --- Final Numeric Suffixing in the effective_save_folder --- final_filename_on_disk = filename_after_compression # This is the name after potential compression - temp_base, temp_ext = os.path.splitext(final_filename_on_disk) - suffix_counter = 1 - while os.path.exists(os.path.join(effective_save_folder, final_filename_on_disk)): - final_filename_on_disk = f"{temp_base}_{suffix_counter}{temp_ext}" - suffix_counter += 1 - - if final_filename_on_disk != filename_after_compression: - self.logger(f" Applied numeric suffix in '{os.path.basename(effective_save_folder)}': '{final_filename_on_disk}' (was '{filename_after_compression}')") + # If Manga Date Based style, we trust the counter from main.py. + # Suffixing should not be needed if the counter initialization was correct. + # If a file with the generated DDD.ext name exists, it will be overwritten. + if not (self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED): + temp_base, temp_ext = os.path.splitext(final_filename_on_disk) + suffix_counter = 1 + # Check for existing file and apply suffix only if not in date-based manga mode + while os.path.exists(os.path.join(effective_save_folder, final_filename_on_disk)): + final_filename_on_disk = f"{temp_base}_{suffix_counter}{temp_ext}" + suffix_counter += 1 + if final_filename_on_disk != filename_after_compression: # Log if a suffix was applied + self.logger(f" Applied numeric suffix in '{os.path.basename(effective_save_folder)}': '{final_filename_on_disk}' (was '{filename_after_compression}')") + # else: for STYLE_DATE_BASED, final_filename_on_disk remains filename_after_compression. # --- Save File --- final_save_path = os.path.join(effective_save_folder, final_filename_on_disk) @@ -824,7 +889,7 @@ class PostProcessorWorker: with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash) with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Track by logical name - + # The counter for STYLE_DATE_BASED is now incremented *before* filename generation, under lock. final_filename_saved_for_return = final_filename_on_disk self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{os.path.basename(effective_save_folder)}'") # Session-wide base name tracking removed. @@ -1002,15 +1067,14 @@ class PostProcessorWorker: if self.filter_character_list_objects: if self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match: self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.") - if self.signals and hasattr(self.signals, 'missed_character_post_signal'): - self.signals.missed_character_post_signal.emit(post_title, "No title match for character filter") + self._emit_signal('missed_character_post', post_title, "No title match for character filter") return 0, num_potential_files_in_post, [] if self.char_filter_scope == CHAR_SCOPE_COMMENTS and \ not post_is_candidate_by_file_char_match_in_comment_scope and \ not post_is_candidate_by_comment_char_match: # MODIFIED: Check both file and comment match flags self.logger(f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id}', Title '{post_title[:50]}...'") if self.signals and hasattr(self.signals, 'missed_character_post_signal'): - self.signals.missed_character_post_signal.emit(post_title, "No character match in files or comments (Comments scope)") + self._emit_signal('missed_character_post', post_title, "No character match in files or comments (Comments scope)") return 0, num_potential_files_in_post, [] if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH): @@ -1027,8 +1091,7 @@ class PostProcessorWorker: (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and \ not post_is_candidate_by_title_char_match: self.logger(f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title[:50]}' doesn't match filters.") - if self.signals and hasattr(self.signals, 'missed_character_post_signal'): - self.signals.missed_character_post_signal.emit(post_title, "Manga Mode: No title match for character filter (Title/Both scope)") + self._emit_signal('missed_character_post', post_title, "Manga Mode: No title match for character filter (Title/Both scope)") return 0, num_potential_files_in_post, [] if not isinstance(post_attachments, list): @@ -1099,9 +1162,8 @@ class PostProcessorWorker: for link_url, link_text in unique_links_data.items(): platform = get_link_platform(link_url) if platform not in scraped_platforms: - if self.signals and hasattr(self.signals, 'external_link_signal'): - self.signals.external_link_signal.emit(post_title, link_text, link_url, platform) - links_emitted_count +=1 + self._emit_signal('external_link', post_title, link_text, link_url, platform) + links_emitted_count +=1 if links_emitted_count > 0: self.logger(f" 🔗 Found {links_emitted_count} potential external link(s) in post content.") except Exception as e: self.logger(f"⚠️ Error parsing post content for links: {e}\n{traceback.format_exc(limit=2)}") @@ -1147,6 +1209,16 @@ class PostProcessorWorker: if not all_files_from_post_api: self.logger(f" -> No image thumbnails found for post {post_id} in thumbnail-only mode.") return 0, 0, [] + + # Sort files within the post by original name if in Date Based manga mode + if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED: + def natural_sort_key_for_files(file_api_info): + name = file_api_info.get('_original_name_for_log', '').lower() + # Split into text and number parts for natural sorting (e.g., "file2.jpg" before "file10.jpg") + return [int(text) if text.isdigit() else text for text in re.split('([0-9]+)', name)] + + all_files_from_post_api.sort(key=natural_sort_key_for_files) + self.logger(f" Manga Date Mode: Sorted {len(all_files_from_post_api)} files within post {post_id} by original name for sequential numbering.") if not all_files_from_post_api: @@ -1274,9 +1346,10 @@ class PostProcessorWorker: headers, post_id, self.skip_current_file_flag, - post_title, - file_idx, - num_files_in_this_post_for_naming + post_title=post_title, # Keyword argument + manga_date_file_counter_ref=self.manga_date_file_counter_ref if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED else None, + file_index_in_post=file_idx, # Changed to keyword argument + num_files_in_this_post=num_files_in_this_post_for_naming # Changed to keyword argument )) for future in as_completed(futures_list): @@ -1298,8 +1371,8 @@ class PostProcessorWorker: self.logger(f"❌ File download task for post {post_id} resulted in error: {exc_f}") total_skipped_this_post += 1 - if self.signals and hasattr(self.signals, 'file_progress_signal'): - self.signals.file_progress_signal.emit("", None) + # Clear file progress display after all files in a post are done + self._emit_signal('file_progress', "", None) if self.check_cancel(): self.logger(f" Post {post_id} processing interrupted/cancelled."); else: self.logger(f" Post {post_id} Summary: Downloaded={total_downloaded_this_post}, Skipped Files={total_skipped_this_post}") @@ -1338,7 +1411,8 @@ class DownloadThread(QThread): char_filter_scope=CHAR_SCOPE_FILES, remove_from_filename_words_list=None, allow_multipart_download=True, - ): # Removed duplicate_file_mode and session-wide tracking + manga_date_file_counter_ref=None, # New parameter + ): super().__init__() self.api_url_input = api_url_input self.output_dir = output_dir @@ -1379,7 +1453,7 @@ class DownloadThread(QThread): self.char_filter_scope = char_filter_scope self.remove_from_filename_words_list = remove_from_filename_words_list self.allow_multipart_download = allow_multipart_download - # self.duplicate_file_mode and session-wide tracking removed + self.manga_date_file_counter_ref = manga_date_file_counter_ref # Store for passing to worker if self.compress_images and Image is None: self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).") self.compress_images = False @@ -1404,13 +1478,15 @@ class DownloadThread(QThread): grand_list_of_kept_original_filenames = [] was_process_cancelled = False + # This DownloadThread (being a QThread) will use its own signals object + # to communicate with PostProcessorWorker if needed. worker_signals_obj = PostProcessorSignals() try: worker_signals_obj.progress_signal.connect(self.progress_signal) worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal) worker_signals_obj.file_progress_signal.connect(self.file_progress_signal) worker_signals_obj.external_link_signal.connect(self.external_link_signal) - worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal) # New connection + worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal) self.logger(" Starting post fetch (single-threaded download process)...") post_generator = download_from_api( @@ -1441,8 +1517,8 @@ class DownloadThread(QThread): compress_images=self.compress_images, download_thumbnails=self.download_thumbnails, service=self.service, user_id=self.user_id, api_url_input=self.api_url_input, - cancellation_event=self.cancellation_event, - signals=worker_signals_obj, + cancellation_event=self.cancellation_event, # emitter is PostProcessorSignals for single-thread + emitter=worker_signals_obj, # Pass the signals object as the emitter downloaded_files=self.downloaded_files, downloaded_file_hashes=self.downloaded_file_hashes, downloaded_files_lock=self.downloaded_files_lock, @@ -1458,7 +1534,8 @@ class DownloadThread(QThread): char_filter_scope=self.char_filter_scope, remove_from_filename_words_list=self.remove_from_filename_words_list, allow_multipart_download=self.allow_multipart_download, - ) # Removed duplicate_file_mode and session-wide tracking + manga_date_file_counter_ref=self.manga_date_file_counter_ref, # Pass it here + ) try: dl_count, skip_count, kept_originals_this_post = post_processing_worker.process() grand_total_downloaded_files += dl_count @@ -1494,7 +1571,7 @@ class DownloadThread(QThread): worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal) worker_signals_obj.external_link_signal.disconnect(self.external_link_signal) worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal) - worker_signals_obj.missed_character_post_signal.disconnect(self.missed_character_post_signal) # New disconnection + worker_signals_obj.missed_character_post_signal.disconnect(self.missed_character_post_signal) except (TypeError, RuntimeError) as e: self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}") diff --git a/main.py b/main.py index ff9b386..68026c5 100644 --- a/main.py +++ b/main.py @@ -19,8 +19,8 @@ from PyQt5.QtGui import ( ) from PyQt5.QtWidgets import ( QApplication, QWidget, QLabel, QLineEdit, QTextEdit, QPushButton, - QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget, - QRadioButton, QButtonGroup, QCheckBox, QSplitter, QSizePolicy, QDialog, QStackedWidget, + QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget, QRadioButton, QButtonGroup, QCheckBox, QSplitter, + QDialog, QStackedWidget, QFrame, QAbstractButton ) @@ -91,6 +91,7 @@ CONFIG_APP_NAME_MAIN = "ApplicationSettings" MANGA_FILENAME_STYLE_KEY = "mangaFilenameStyleV1" STYLE_POST_TITLE = "post_title" STYLE_ORIGINAL_NAME = "original_name" +STYLE_DATE_BASED = "date_based" # New style for date-based naming SKIP_WORDS_SCOPE_KEY = "skipWordsScopeV1" ALLOW_MULTIPART_DOWNLOAD_KEY = "allowMultipartDownloadV1" @@ -487,6 +488,11 @@ class DownloaderApp(QWidget): self.download_counter = 0 self.skip_counter = 0 + # For handling signals from worker threads via a queue + self.worker_to_gui_queue = queue.Queue() + self.gui_update_timer = QTimer(self) + self.actual_gui_signals = PostProcessorSignals() # Renamed from self.worker_signals + self.worker_signals = PostProcessorSignals() self.prompt_mutex = QMutex() self._add_character_response = None @@ -561,23 +567,27 @@ class DownloaderApp(QWidget): def _connect_signals(self): - if hasattr(self.worker_signals, 'progress_signal'): - self.worker_signals.progress_signal.connect(self.handle_main_log) - if hasattr(self.worker_signals, 'file_progress_signal'): - self.worker_signals.file_progress_signal.connect(self.update_file_progress_display) - if hasattr(self.worker_signals, 'missed_character_post_signal'): # New - self.worker_signals.missed_character_post_signal.connect(self.handle_missed_character_post) - if hasattr(self.worker_signals, 'external_link_signal'): - self.worker_signals.external_link_signal.connect(self.handle_external_link_signal) + # Signals from the GUI's perspective (emitted by _process_worker_queue or directly) + self.actual_gui_signals.progress_signal.connect(self.handle_main_log) + self.actual_gui_signals.file_progress_signal.connect(self.update_file_progress_display) + self.actual_gui_signals.missed_character_post_signal.connect(self.handle_missed_character_post) + self.actual_gui_signals.external_link_signal.connect(self.handle_external_link_signal) + self.actual_gui_signals.file_download_status_signal.connect(lambda status: None) # Placeholder if needed, or connect to UI + # Timer for processing the worker queue + self.gui_update_timer.timeout.connect(self._process_worker_queue) + self.gui_update_timer.start(100) # Check queue every 100ms + + # Direct GUI signals self.log_signal.connect(self.handle_main_log) self.add_character_prompt_signal.connect(self.prompt_add_character) self.character_prompt_response_signal.connect(self.receive_add_character_result) self.overall_progress_signal.connect(self.update_progress_display) self.finished_signal.connect(self.download_finished) - self.external_link_signal.connect(self.handle_external_link_signal) - self.file_progress_signal.connect(self.update_file_progress_display) + # self.external_link_signal.connect(self.handle_external_link_signal) # Covered by actual_gui_signals + # self.file_progress_signal.connect(self.update_file_progress_display) # Covered by actual_gui_signals + # UI element connections if hasattr(self, 'character_search_input'): self.character_search_input.textChanged.connect(self.filter_character_list) if hasattr(self, 'external_links_checkbox'): self.external_links_checkbox.toggled.connect(self.update_external_links_setting) if hasattr(self, 'thread_count_input'): self.thread_count_input.textChanged.connect(self.update_multithreading_label) @@ -608,8 +618,33 @@ class DownloaderApp(QWidget): if self.char_filter_scope_toggle_button: self.char_filter_scope_toggle_button.clicked.connect(self._cycle_char_filter_scope) - if hasattr(self, 'multipart_toggle_button'): self.multipart_toggle_button.clicked.connect(self._toggle_multipart_mode) + if hasattr(self, 'multipart_toggle_button'): self.multipart_toggle_button.clicked.connect(self._toggle_multipart_mode) # Keep this if it's separate + def _process_worker_queue(self): + """Processes messages from the worker queue and emits Qt signals from the GUI thread.""" + while not self.worker_to_gui_queue.empty(): + try: + item = self.worker_to_gui_queue.get_nowait() + signal_type = item.get('type') + payload = item.get('payload', tuple()) # Default to empty tuple + + if signal_type == 'progress': + self.actual_gui_signals.progress_signal.emit(*payload) + elif signal_type == 'file_download_status': # Changed from 'file_status' + self.actual_gui_signals.file_download_status_signal.emit(*payload) + elif signal_type == 'external_link': # Changed from 'ext_link' + self.actual_gui_signals.external_link_signal.emit(*payload) + elif signal_type == 'file_progress': + self.actual_gui_signals.file_progress_signal.emit(*payload) + elif signal_type == 'missed_character_post': + self.actual_gui_signals.missed_character_post_signal.emit(*payload) + else: + self.log_signal.emit(f"⚠️ Unknown signal type from worker queue: {signal_type}") + self.worker_to_gui_queue.task_done() + except queue.Empty: + break # Should not happen with while not empty, but good practice + except Exception as e: + self.log_signal.emit(f"❌ Error processing worker queue: {e}") def load_known_names_from_util(self): global KNOWN_NAMES @@ -719,7 +754,8 @@ class DownloaderApp(QWidget): self.link_input.setPlaceholderText("e.g., https://kemono.su/patreon/user/12345 or .../post/98765") self.link_input.setToolTip("Enter the full URL of a Kemono/Coomer creator's page or a specific post.\nExample (Creator): https://kemono.su/patreon/user/12345\nExample (Post): https://kemono.su/patreon/user/12345/post/98765") self.link_input.textChanged.connect(self.update_custom_folder_visibility) - url_page_layout.addWidget(self.link_input, 1) + url_page_layout.addWidget(self.link_input, 1) # URL input takes available space + self.page_range_label = QLabel("Page Range:") self.page_range_label.setStyleSheet("font-weight: bold; padding-left: 10px;") @@ -1166,7 +1202,7 @@ class DownloaderApp(QWidget): self.update_page_range_enabled_state() if self.manga_mode_checkbox: self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked()) - if hasattr(self, 'link_input'): self.link_input.textChanged.connect(self.update_page_range_enabled_state) + if hasattr(self, 'link_input'): self.link_input.textChanged.connect(lambda: self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)) # Also trigger manga UI update self.load_known_names_from_util() self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked()) if hasattr(self, 'radio_group') and self.radio_group.checkedButton(): @@ -1174,6 +1210,7 @@ class DownloaderApp(QWidget): self._update_manga_filename_style_button_text() self._update_skip_scope_button_text() self._update_char_filter_scope_button_text() + self._update_multithreading_for_date_mode() # Ensure correct initial state def _center_on_screen(self): """Centers the widget on the screen.""" @@ -1958,6 +1995,16 @@ class DownloaderApp(QWidget): " Downloads as: \"001.jpg\", \"002.jpg\".\n\n" "Click to change to: Post Title" ) + elif self.manga_filename_style == STYLE_DATE_BASED: + self.manga_rename_toggle_button.setText("Name: Date Based") + self.manga_rename_toggle_button.setToolTip( + "Manga Filename Style: Date Based\n\n" + "When Manga/Comic Mode is active for a creator feed:\n" + "- Files will be named sequentially (001.ext, 002.ext, ...) based on post publication order (oldest to newest).\n" + "- To ensure correct numbering, multithreading for post processing is automatically disabled when this style is active.\n\n" + "Click to change to: Post Title" + ) + else: self.manga_rename_toggle_button.setText("Name: Unknown Style") self.manga_rename_toggle_button.setToolTip( @@ -1971,17 +2018,20 @@ class DownloaderApp(QWidget): current_style = self.manga_filename_style new_style = "" - if current_style == STYLE_POST_TITLE: + if current_style == STYLE_POST_TITLE: # Title -> Original new_style = STYLE_ORIGINAL_NAME - reply = QMessageBox.information(self, "Manga Filename Preference", - "Using 'Name: Post Title' (first file by title, others original) is recommended for Manga Mode.\n\n" - "Using 'Name: Original File' for all files might lead to less organized downloads if original names are inconsistent or non-sequential.\n\n" - "Proceed with using 'Name: Original File' for all files?", - QMessageBox.Yes | QMessageBox.No, QMessageBox.No) - if reply == QMessageBox.No: - self.log_signal.emit("ℹ️ Manga filename style change to 'Original File' cancelled by user.") - return - elif current_style == STYLE_ORIGINAL_NAME: + # The warning for original name style + # reply = QMessageBox.information(self, "Manga Filename Preference", + # "Using 'Name: Post Title' (first file by title, others original) is recommended for Manga Mode.\n\n" + # "Using 'Name: Original File' for all files might lead to less organized downloads if original names are inconsistent or non-sequential.\n\n" + # "Proceed with using 'Name: Original File' for all files?", + # QMessageBox.Yes | QMessageBox.No, QMessageBox.No) + # if reply == QMessageBox.No: + # self.log_signal.emit("ℹ️ Manga filename style change to 'Original File' cancelled by user.") + # return + elif current_style == STYLE_ORIGINAL_NAME: # Original -> Date + new_style = STYLE_DATE_BASED + elif current_style == STYLE_DATE_BASED: # Date -> Title new_style = STYLE_POST_TITLE else: self.log_signal.emit(f"⚠️ Unknown current manga filename style: {current_style}. Resetting to default ('{STYLE_POST_TITLE}').") @@ -1991,6 +2041,7 @@ class DownloaderApp(QWidget): self.settings.setValue(MANGA_FILENAME_STYLE_KEY, self.manga_filename_style) self.settings.sync() self._update_manga_filename_style_button_text() + self._update_multithreading_for_date_mode() # Update multithreading state based on new style self.log_signal.emit(f"ℹ️ Manga filename style changed to: '{self.manga_filename_style}'") @@ -2036,6 +2087,7 @@ class DownloaderApp(QWidget): if not enable_char_filter_widgets: self.character_input.clear() if self.char_filter_scope_toggle_button: self.char_filter_scope_toggle_button.setEnabled(enable_char_filter_widgets) + self._update_multithreading_for_date_mode() # Update multithreading state based on manga mode def filter_character_list(self, search_text): @@ -2067,6 +2119,29 @@ class DownloaderApp(QWidget): self.thread_count_label.setEnabled(True) self.update_multithreading_label(self.thread_count_input.text()) + def _update_multithreading_for_date_mode(self): + """ + Checks if Manga Mode is ON and 'Date Based' style is selected. + If so, disables multithreading. Otherwise, enables it. + """ + if not hasattr(self, 'manga_mode_checkbox') or not hasattr(self, 'use_multithreading_checkbox'): + return # UI elements not ready + + manga_on = self.manga_mode_checkbox.isChecked() + is_date_style = (self.manga_filename_style == STYLE_DATE_BASED) + + if manga_on and is_date_style: + if self.use_multithreading_checkbox.isChecked() or self.use_multithreading_checkbox.isEnabled(): + # Only log if a change is made or it was previously enabled + if self.use_multithreading_checkbox.isChecked(): + self.log_signal.emit("ℹ️ Manga Date Mode: Multithreading for post processing has been disabled to ensure correct sequential file numbering.") + self.use_multithreading_checkbox.setChecked(False) + self.use_multithreading_checkbox.setEnabled(False) + self._handle_multithreading_toggle(False) # Update label to show "1 Thread" + else: + if not self.use_multithreading_checkbox.isEnabled(): # Only re-enable if it was disabled by this logic + self.use_multithreading_checkbox.setEnabled(True) + self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked()) # Update label based on current state def update_progress_display(self, total_posts, processed_posts): if total_posts > 0: @@ -2325,16 +2400,63 @@ class DownloaderApp(QWidget): self.progress_label.setText("Progress: Initializing...") + self.manga_date_file_counter_obj = [1, threading.Lock()] # Default: [value, lock] + if manga_mode and self.manga_filename_style == STYLE_DATE_BASED and not extract_links_only: + # Determine the directory to scan for existing numbered files for this series + # This path should be the "series" root, before any "per-post" subfolders. + series_scan_directory = output_dir # Base download location + + if use_subfolders: # If 'Separate Folders by Name/Title' is ON + # Try to get folder name from character filter (manga series title) + if filter_character_list_to_pass and filter_character_list_to_pass[0] and filter_character_list_to_pass[0].get("name"): + # Assuming the first filter is the series name for folder creation + series_folder_name = clean_folder_name(filter_character_list_to_pass[0]["name"]) + series_scan_directory = os.path.join(series_scan_directory, series_folder_name) + elif service and user_id: # Fallback if no char filter, but subfolders are on + # This might group multiple series from one creator if no distinct char filter is used. + # The counter is per download operation, so this is consistent. + creator_based_folder_name = clean_folder_name(user_id) # Or a more specific creator name convention + series_scan_directory = os.path.join(series_scan_directory, creator_based_folder_name) + # If neither, series_scan_directory remains output_dir (files go directly there if use_subfolders is on but no name found) + # If use_subfolders is OFF, files go into output_dir. So, series_scan_directory remains output_dir. + + highest_num = 0 + if os.path.isdir(series_scan_directory): + self.log_signal.emit(f"ℹ️ Manga Date Mode: Scanning for existing numbered files in '{series_scan_directory}' and its subdirectories...") + for dirpath, _, filenames_in_dir in os.walk(series_scan_directory): + for filename_to_check in filenames_in_dir: + # Check the base name (without extension) for leading digits + base_name_no_ext = os.path.splitext(filename_to_check)[0] + match = re.match(r"(\d{3,})", base_name_no_ext) # Matches "001" from "001.jpg" or "001_13.jpg" + if match: + try: + num = int(match.group(1)) + if num > highest_num: + highest_num = num + except ValueError: + continue + else: + self.log_signal.emit(f"ℹ️ Manga Date Mode: Scan directory '{series_scan_directory}' not found or is not a directory. Starting counter at 1.") + self.manga_date_file_counter_obj = [highest_num + 1, threading.Lock()] # [value, lock] + self.log_signal.emit(f"ℹ️ Manga Date Mode: Initialized file counter at {self.manga_date_file_counter_obj[0]}.") effective_num_post_workers = 1 effective_num_file_threads_per_worker = 1 + # Determine if multithreading for posts should be used if post_id_from_url: + # Single post URL: no post workers, but file threads can be > 1 if use_multithreading_enabled_by_checkbox: effective_num_file_threads_per_worker = max(1, min(num_threads_from_gui, MAX_FILE_THREADS_PER_POST_OR_WORKER)) else: - if use_multithreading_enabled_by_checkbox: - effective_num_post_workers = max(1, min(num_threads_from_gui, MAX_THREADS)) - effective_num_file_threads_per_worker = max(1, min(num_threads_from_gui, MAX_FILE_THREADS_PER_POST_OR_WORKER)) + # Creator feed + if manga_mode and self.manga_filename_style == STYLE_DATE_BASED: + # Force single post worker for date-based manga mode + effective_num_post_workers = 1 + # File threads per worker can still be > 1 if user set it + effective_num_file_threads_per_worker = max(1, min(num_threads_from_gui, MAX_FILE_THREADS_PER_POST_OR_WORKER)) if use_multithreading_enabled_by_checkbox else 1 + elif use_multithreading_enabled_by_checkbox: # Standard creator feed with multithreading enabled + effective_num_post_workers = max(1, min(num_threads_from_gui, MAX_THREADS)) # For posts + effective_num_file_threads_per_worker = max(1, min(num_threads_from_gui, MAX_FILE_THREADS_PER_POST_OR_WORKER)) # For files within each post worker log_messages = ["="*40, f"🚀 Starting {'Link Extraction' if extract_links_only else ('Archive Download' if backend_filter_mode == 'archive' else 'Download')} @ {time.strftime('%Y-%m-%d %H:%M:%S')}", f" URL: {api_url}"] @@ -2389,7 +2511,12 @@ class DownloaderApp(QWidget): log_messages.append(f" ↳ Manga Duplicates: Will be renamed with numeric suffix if names clash (e.g., _1, _2).") should_use_multithreading_for_posts = use_multithreading_enabled_by_checkbox and not post_id_from_url - log_messages.append(f" Threading: {'Multi-threaded (posts)' if should_use_multithreading_for_posts else 'Single-threaded (posts)'}") + # Adjust log message if date-based manga mode forced single thread + if manga_mode and self.manga_filename_style == STYLE_DATE_BASED and not post_id_from_url: + log_messages.append(f" Threading: Single-threaded (posts) - Enforced by Manga Date Mode") + should_use_multithreading_for_posts = False # Ensure this reflects the forced state + else: + log_messages.append(f" Threading: {'Multi-threaded (posts)' if should_use_multithreading_for_posts else 'Single-threaded (posts)'}") if should_use_multithreading_for_posts: log_messages.append(f" Number of Post Worker Threads: {effective_num_post_workers}") log_messages.append("="*40) @@ -2432,9 +2559,10 @@ class DownloaderApp(QWidget): 'manga_mode_active': manga_mode, 'unwanted_keywords': unwanted_keywords_for_folders, 'cancellation_event': self.cancellation_event, - 'signals': self.worker_signals, + # 'emitter' will be set based on single/multi-thread mode below 'manga_filename_style': self.manga_filename_style, 'num_file_threads_for_worker': effective_num_file_threads_per_worker, + 'manga_date_file_counter_ref': self.manga_date_file_counter_obj if manga_mode and self.manga_filename_style == STYLE_DATE_BASED else None, 'allow_multipart_download': allow_multipart, # 'duplicate_file_mode' and session-wide tracking removed } @@ -2442,9 +2570,11 @@ class DownloaderApp(QWidget): try: if should_use_multithreading_for_posts: self.log_signal.emit(f" Initializing multi-threaded {'link extraction' if extract_links_only else 'download'} with {effective_num_post_workers} post workers...") + args_template['emitter'] = self.worker_to_gui_queue # For multi-threaded, use the queue self.start_multi_threaded_download(num_post_workers=effective_num_post_workers, **args_template) else: self.log_signal.emit(f" Initializing single-threaded {'link extraction' if extract_links_only else 'download'}...") + # For single-threaded, DownloadThread creates its own PostProcessorSignals and passes it as emitter. dt_expected_keys = [ 'api_url_input', 'output_dir', 'known_names_copy', 'cancellation_event', 'filter_character_list', 'filter_mode', 'skip_zip', 'skip_rar', @@ -2454,7 +2584,8 @@ class DownloaderApp(QWidget): 'downloaded_files_lock', 'downloaded_file_hashes_lock', 'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'show_external_links', 'extract_links_only', 'num_file_threads_for_worker', - 'start_page', 'end_page', 'target_post_id_from_initial_url', 'duplicate_file_mode', + 'start_page', 'end_page', 'target_post_id_from_initial_url', + 'manga_date_file_counter_ref', # Ensure this is passed for single thread mode 'manga_mode_active', 'unwanted_keywords', 'manga_filename_style', 'allow_multipart_download' ] @@ -2478,7 +2609,6 @@ class DownloaderApp(QWidget): if hasattr(self.download_thread, 'external_link_signal'): self.download_thread.external_link_signal.connect(self.handle_external_link_signal) if hasattr(self.download_thread, 'file_progress_signal'): self.download_thread.file_progress_signal.connect(self.update_file_progress_display) if hasattr(self.download_thread, 'missed_character_post_signal'): # New - self.download_thread.missed_character_post_signal.connect(self.handle_missed_character_post) self.download_thread.start() self.log_signal.emit("✅ Single download thread (for posts) started.") @@ -2513,9 +2643,10 @@ class DownloaderApp(QWidget): fetch_error_occurred = False manga_mode_active_for_fetch = worker_args_template.get('manga_mode_active', False) - signals_for_worker = worker_args_template.get('signals') - if not signals_for_worker: - self.log_signal.emit("❌ CRITICAL ERROR: Signals object missing for worker in _fetch_and_queue_posts."); + # In multi-threaded mode, the emitter is the queue. + emitter_for_worker = worker_args_template.get('emitter') # This should be self.worker_to_gui_queue + if not emitter_for_worker: # Should not happen if logic in start_download is correct + self.log_signal.emit("❌ CRITICAL ERROR: Emitter (queue) missing for worker in _fetch_and_queue_posts."); self.finished_signal.emit(0,0,True, []); return @@ -2572,13 +2703,13 @@ class DownloaderApp(QWidget): ppw_expected_keys = [ 'post_data', 'download_root', 'known_names', 'filter_character_list', 'unwanted_keywords', 'filter_mode', 'skip_zip', 'skip_rar', 'use_subfolders', 'use_post_subfolders', - 'target_post_id_from_initial_url', 'custom_folder_name', 'compress_images', + 'target_post_id_from_initial_url', 'custom_folder_name', 'compress_images', 'emitter', 'download_thumbnails', 'service', 'user_id', 'api_url_input', - 'cancellation_event', 'signals', 'downloaded_files', 'downloaded_file_hashes', + 'cancellation_event', 'downloaded_files', 'downloaded_file_hashes', 'downloaded_files_lock', 'downloaded_file_hashes_lock', 'remove_from_filename_words_list', 'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'show_external_links', 'extract_links_only', 'allow_multipart_download', - 'num_file_threads', 'skip_current_file_flag', + 'num_file_threads', 'skip_current_file_flag', 'manga_date_file_counter_ref', 'manga_mode_active', 'manga_filename_style' ] # Ensure 'allow_multipart_download' is also considered for optional keys if it has a default in PostProcessorWorker @@ -2586,7 +2717,7 @@ class DownloaderApp(QWidget): 'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'remove_from_filename_words_list', 'show_external_links', 'extract_links_only', 'duplicate_file_mode', # Added duplicate_file_mode here 'num_file_threads', 'skip_current_file_flag', 'manga_mode_active', 'manga_filename_style', - 'processed_base_filenames_session_wide', 'processed_base_filenames_session_wide_lock' # Add these + 'manga_date_file_counter_ref' # Add this } for post_data_item in all_posts_data: @@ -2600,7 +2731,7 @@ class DownloaderApp(QWidget): for key in ppw_expected_keys: if key == 'post_data': worker_init_args[key] = post_data_item elif key == 'num_file_threads': worker_init_args[key] = num_file_dl_threads_for_each_worker - elif key == 'signals': worker_init_args[key] = signals_for_worker + elif key == 'emitter': worker_init_args[key] = emitter_for_worker # Pass the queue elif key in worker_args_template: worker_init_args[key] = worker_args_template[key] elif key in ppw_optional_keys_with_defaults: pass else: missing_keys.append(key) @@ -2777,7 +2908,6 @@ class DownloaderApp(QWidget): if kept_original_names_list is None: kept_original_names_list = [] - status_message = "Cancelled by user" if cancelled_by_user else "Finished" summary_log = "="*40 @@ -2801,10 +2931,6 @@ class DownloaderApp(QWidget): self.log_signal.emit(HTML_PREFIX + html_list_items) self.log_signal.emit("="*40) - - self.progress_label.setText(f"{status_message}: {total_downloaded} downloaded, {total_skipped} skipped."); self.file_progress_label.setText("") - if not cancelled_by_user: self._try_process_next_external_link() - if self.download_thread: try: if hasattr(self.download_thread, 'progress_signal'): self.download_thread.progress_signal.disconnect(self.handle_main_log) @@ -2815,15 +2941,23 @@ class DownloaderApp(QWidget): if hasattr(self.download_thread, 'file_progress_signal'): self.download_thread.file_progress_signal.disconnect(self.update_file_progress_display) if hasattr(self.download_thread, 'missed_character_post_signal'): # New self.download_thread.missed_character_post_signal.disconnect(self.handle_missed_character_post) - except (TypeError, RuntimeError) as e: self.log_signal.emit(f"ℹ️ Note during single-thread signal disconnection: {e}") - # Ensure these are cleared if the download_finished is for the single download thread - if self.download_thread and not self.download_thread.isRunning(): # Check if it was this thread - self.download_thread = None + except (TypeError, RuntimeError) as e: + self.log_signal.emit(f"ℹ️ Note during single-thread signal disconnection: {e}") + + if not self.download_thread.isRunning(): # Check if it was this thread + self.download_thread = None - if self.thread_pool: self.log_signal.emit(" Ensuring worker thread pool is shut down..."); self.thread_pool.shutdown(wait=True, cancel_futures=True); self.thread_pool = None + self.progress_label.setText(f"{status_message}: {total_downloaded} downloaded, {total_skipped} skipped.") + self.file_progress_label.setText("") + if not cancelled_by_user: self._try_process_next_external_link() + + if self.thread_pool: + self.log_signal.emit(" Ensuring worker thread pool is shut down...") + self.thread_pool.shutdown(wait=True, cancel_futures=True) + self.thread_pool = None self.active_futures = [] - - self.set_ui_enabled(True); self.cancel_btn.setEnabled(False) + self.set_ui_enabled(True) + self.cancel_btn.setEnabled(False) def toggle_active_log_view(self): if self.current_log_view == 'progress': @@ -2889,8 +3023,6 @@ class DownloaderApp(QWidget): self._update_manga_filename_style_button_text() self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False) - self.log_signal.emit("✅ Application reset complete.") - def _reset_ui_to_defaults(self): self.link_input.clear(); self.dir_input.clear(); self.custom_folder_input.clear(); self.character_input.clear(); self.skip_words_input.clear(); self.start_page_input.clear(); self.end_page_input.clear(); self.new_char_input.clear(); @@ -3007,7 +3139,6 @@ class DownloaderApp(QWidget): self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting) self.log_signal.emit(f"ℹ️ Multi-part download set to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'}") - if __name__ == '__main__': import traceback try: @@ -3071,4 +3202,3 @@ if __name__ == '__main__': print(f"An unhandled exception occurred: {e}") traceback.print_exc() print("--- END CRITICAL ERROR ---") - sys.exit(1) diff --git a/multipart_downloader.py b/multipart_downloader.py index e1798ab..d93976b 100644 --- a/multipart_downloader.py +++ b/multipart_downloader.py @@ -13,14 +13,14 @@ DOWNLOAD_CHUNK_SIZE_ITER = 1024 * 256 # 256KB for iter_content within a chunk d def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte, headers, - part_num, total_parts, progress_data, cancellation_event, skip_event, logger, - signals=None, api_original_filename=None): # Added signals and api_original_filename + part_num, total_parts, progress_data, cancellation_event, skip_event, + logger_func, emitter=None, api_original_filename=None): # Renamed logger, signals to emitter """Downloads a single chunk of a file and writes it to the temp file.""" if cancellation_event and cancellation_event.is_set(): - logger(f" [Chunk {part_num + 1}/{total_parts}] Download cancelled before start.") + logger_func(f" [Chunk {part_num + 1}/{total_parts}] Download cancelled before start.") return 0, False # bytes_downloaded, success if skip_event and skip_event.is_set(): - logger(f" [Chunk {part_num + 1}/{total_parts}] Skip event triggered before start.") + logger_func(f" [Chunk {part_num + 1}/{total_parts}] Skip event triggered before start.") return 0, False chunk_headers = headers.copy() @@ -44,15 +44,15 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte, for attempt in range(MAX_CHUNK_DOWNLOAD_RETRIES + 1): if cancellation_event and cancellation_event.is_set(): - logger(f" [Chunk {part_num + 1}/{total_parts}] Cancelled during retry loop.") + logger_func(f" [Chunk {part_num + 1}/{total_parts}] Cancelled during retry loop.") return bytes_this_chunk, False if skip_event and skip_event.is_set(): - logger(f" [Chunk {part_num + 1}/{total_parts}] Skip event during retry loop.") + logger_func(f" [Chunk {part_num + 1}/{total_parts}] Skip event during retry loop.") return bytes_this_chunk, False try: if attempt > 0: - logger(f" [Chunk {part_num + 1}/{total_parts}] Retrying download (Attempt {attempt}/{MAX_CHUNK_DOWNLOAD_RETRIES})...") + logger_func(f" [Chunk {part_num + 1}/{total_parts}] Retrying download (Attempt {attempt}/{MAX_CHUNK_DOWNLOAD_RETRIES})...") time.sleep(CHUNK_DOWNLOAD_RETRY_DELAY * (2 ** (attempt - 1))) # Reset speed calculation on retry last_speed_calc_time = time.time() @@ -60,14 +60,14 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte, # Enhanced log message for chunk start log_msg = f" 🚀 [Chunk {part_num + 1}/{total_parts}] Starting download: bytes {start_byte}-{end_byte if end_byte != -1 else 'EOF'}" - logger(log_msg) + logger_func(log_msg) print(f"DEBUG_MULTIPART: {log_msg}") # Direct console print for debugging response = requests.get(chunk_url, headers=chunk_headers, timeout=(10, 120), stream=True) response.raise_for_status() # For 0-byte files, if end_byte was -1, we expect 0 content. if start_byte == 0 and end_byte == -1 and int(response.headers.get('Content-Length', 0)) == 0: - logger(f" [Chunk {part_num + 1}/{total_parts}] Confirmed 0-byte file.") + logger_func(f" [Chunk {part_num + 1}/{total_parts}] Confirmed 0-byte file.") with progress_data['lock']: progress_data['chunks_status'][part_num]['active'] = False progress_data['chunks_status'][part_num]['speed_bps'] = 0 @@ -77,10 +77,10 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte, f.seek(start_byte) for data_segment in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE_ITER): if cancellation_event and cancellation_event.is_set(): - logger(f" [Chunk {part_num + 1}/{total_parts}] Cancelled during data iteration.") + logger_func(f" [Chunk {part_num + 1}/{total_parts}] Cancelled during data iteration.") return bytes_this_chunk, False if skip_event and skip_event.is_set(): - logger(f" [Chunk {part_num + 1}/{total_parts}] Skip event during data iteration.") + logger_func(f" [Chunk {part_num + 1}/{total_parts}] Skip event during data iteration.") return bytes_this_chunk, False if data_segment: f.write(data_segment) @@ -103,26 +103,29 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte, # Emit progress more frequently from within the chunk download if current_time - last_progress_emit_time_for_chunk > 0.1: # Emit up to 10 times/sec per chunk - if signals and hasattr(signals, 'file_progress_signal'): + if emitter: # Ensure we read the latest total downloaded from progress_data # Send a copy of the chunks_status list status_list_copy = [dict(s) for s in progress_data['chunks_status']] # Make a deep enough copy - signals.file_progress_signal.emit(api_original_filename, status_list_copy) + if isinstance(emitter, queue.Queue): + emitter.put({'type': 'file_progress', 'payload': (api_original_filename, status_list_copy)}) + elif hasattr(emitter, 'file_progress_signal'): # PostProcessorSignals-like + emitter.file_progress_signal.emit(api_original_filename, status_list_copy) last_progress_emit_time_for_chunk = current_time return bytes_this_chunk, True except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e: - logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Retryable error: {e}") + logger_func(f" ❌ [Chunk {part_num + 1}/{total_parts}] Retryable error: {e}") if attempt == MAX_CHUNK_DOWNLOAD_RETRIES: - logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Failed after {MAX_CHUNK_DOWNLOAD_RETRIES} retries.") + logger_func(f" ❌ [Chunk {part_num + 1}/{total_parts}] Failed after {MAX_CHUNK_DOWNLOAD_RETRIES} retries.") return bytes_this_chunk, False except requests.exceptions.RequestException as e: # Includes 4xx/5xx errors after raise_for_status - logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Non-retryable error: {e}") + logger_func(f" ❌ [Chunk {part_num + 1}/{total_parts}] Non-retryable error: {e}") return bytes_this_chunk, False except Exception as e: - logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Unexpected error: {e}\n{traceback.format_exc(limit=1)}") + logger_func(f" ❌ [Chunk {part_num + 1}/{total_parts}] Unexpected error: {e}\n{traceback.format_exc(limit=1)}") return bytes_this_chunk, False - + # Ensure final status is marked as inactive if loop finishes due to retries with progress_data['lock']: progress_data['chunks_status'][part_num]['active'] = False @@ -130,15 +133,15 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte, return bytes_this_chunk, False # Should be unreachable -def download_file_in_parts(file_url, save_path, total_size, num_parts, headers, - api_original_filename, signals, cancellation_event, skip_event, logger): +def download_file_in_parts(file_url, save_path, total_size, num_parts, headers, api_original_filename, + emitter_for_multipart, cancellation_event, skip_event, logger_func): # Renamed signals, logger """ Downloads a file in multiple parts concurrently. Returns: (download_successful_flag, downloaded_bytes, calculated_file_hash, temp_file_handle_or_None) The temp_file_handle will be an open read-binary file handle to the .part file if successful, otherwise None. It is the responsibility of the caller to close this handle and rename/delete the .part file. """ - logger(f"⬇️ Initializing Multi-part Download ({num_parts} parts) for: '{api_original_filename}' (Size: {total_size / (1024*1024):.2f} MB)") + logger_func(f"⬇️ Initializing Multi-part Download ({num_parts} parts) for: '{api_original_filename}' (Size: {total_size / (1024*1024):.2f} MB)") temp_file_path = save_path + ".part" try: @@ -146,7 +149,7 @@ def download_file_in_parts(file_url, save_path, total_size, num_parts, headers, if total_size > 0: f_temp.truncate(total_size) # Pre-allocate space except IOError as e: - logger(f" ❌ Error creating/truncating temp file '{temp_file_path}': {e}") + logger_func(f" ❌ Error creating/truncating temp file '{temp_file_path}': {e}") return False, 0, None, None chunk_size_calc = total_size // num_parts @@ -167,7 +170,7 @@ def download_file_in_parts(file_url, save_path, total_size, num_parts, headers, chunk_actual_sizes.append(end - start + 1) if not chunks_ranges and total_size > 0: - logger(f" ⚠️ No valid chunk ranges for multipart download of '{api_original_filename}'. Aborting multipart.") + logger_func(f" ⚠️ No valid chunk ranges for multipart download of '{api_original_filename}'. Aborting multipart.") if os.path.exists(temp_file_path): os.remove(temp_file_path) return False, 0, None, None @@ -191,8 +194,9 @@ def download_file_in_parts(file_url, save_path, total_size, num_parts, headers, chunk_futures.append(chunk_pool.submit( _download_individual_chunk, chunk_url=file_url, temp_file_path=temp_file_path, start_byte=start, end_byte=end, headers=headers, part_num=i, total_parts=num_parts, - progress_data=progress_data, cancellation_event=cancellation_event, skip_event=skip_event, logger=logger, - signals=signals, api_original_filename=api_original_filename # Pass them here + progress_data=progress_data, cancellation_event=cancellation_event, skip_event=skip_event, + logger_func=logger_func, emitter=emitter_for_multipart, # Pass emitter + api_original_filename=api_original_filename )) for future in as_completed(chunk_futures): @@ -201,21 +205,23 @@ def download_file_in_parts(file_url, save_path, total_size, num_parts, headers, total_bytes_from_chunks += bytes_downloaded_this_chunk if not success_this_chunk: all_chunks_successful = False - # Progress is emitted from within _download_individual_chunk if cancellation_event and cancellation_event.is_set(): - logger(f" Multi-part download for '{api_original_filename}' cancelled by main event.") + logger_func(f" Multi-part download for '{api_original_filename}' cancelled by main event.") all_chunks_successful = False - + # Ensure a final progress update is sent with all chunks marked inactive (unless still active due to error) - if signals and hasattr(signals, 'file_progress_signal'): + if emitter_for_multipart: with progress_data['lock']: # Ensure all chunks are marked inactive for the final signal if download didn't fully succeed or was cancelled - status_list_copy = [dict(s) for s in progress_data['chunks_status']] - signals.file_progress_signal.emit(api_original_filename, status_list_copy) + status_list_copy = [dict(s) for s in progress_data['chunks_status']] + if isinstance(emitter_for_multipart, queue.Queue): + emitter_for_multipart.put({'type': 'file_progress', 'payload': (api_original_filename, status_list_copy)}) + elif hasattr(emitter_for_multipart, 'file_progress_signal'): # PostProcessorSignals-like + emitter_for_multipart.file_progress_signal.emit(api_original_filename, status_list_copy) if all_chunks_successful and (total_bytes_from_chunks == total_size or total_size == 0): - logger(f" ✅ Multi-part download successful for '{api_original_filename}'. Total bytes: {total_bytes_from_chunks}") + logger_func(f" ✅ Multi-part download successful for '{api_original_filename}'. Total bytes: {total_bytes_from_chunks}") md5_hasher = hashlib.md5() with open(temp_file_path, 'rb') as f_hash: for buf in iter(lambda: f_hash.read(4096*10), b''): # Read in larger buffers for hashing @@ -225,8 +231,8 @@ def download_file_in_parts(file_url, save_path, total_size, num_parts, headers, # The caller is responsible for closing this handle and renaming/deleting the .part file. return True, total_bytes_from_chunks, calculated_hash, open(temp_file_path, 'rb') else: - logger(f" ❌ Multi-part download failed for '{api_original_filename}'. Success: {all_chunks_successful}, Bytes: {total_bytes_from_chunks}/{total_size}. Cleaning up.") + logger_func(f" ❌ Multi-part download failed for '{api_original_filename}'. Success: {all_chunks_successful}, Bytes: {total_bytes_from_chunks}/{total_size}. Cleaning up.") if os.path.exists(temp_file_path): try: os.remove(temp_file_path) - except OSError as e: logger(f" Failed to remove temp part file '{temp_file_path}': {e}") + except OSError as e: logger_func(f" Failed to remove temp part file '{temp_file_path}': {e}") return False, total_bytes_from_chunks, None, None \ No newline at end of file diff --git a/readme.md b/readme.md index a42bdd3..fca28c5 100644 --- a/readme.md +++ b/readme.md @@ -1,141 +1,212 @@ -# Kemono Downloader v3.3.0 +# Kemono Downloader v3.4.0 A powerful, feature-rich GUI application for downloading content from **[Kemono.su](https://kemono.su)** and **[Coomer.party](https://coomer.party)**. -Built with **PyQt5**, this tool is ideal for users who want deep filtering, customizable folder structure, efficient downloads, and intelligent automation — all within a modern GUI. +Built with **PyQt5**, this tool is ideal for users who want deep filtering, customizable folder structures, efficient downloads, and intelligent automation — all within a modern, user-friendly graphical interface. --- -## 🔄 Recent Updates (v3.3.0) +## ✨ What's New in v3.4.0? -### Skipped Characters Review (Eye Toggle) -- After a download, you can toggle a log view to review characters or keywords that were skipped based on your filters. -- Helps catch overlooked content you might want to adjust filters for. - -### Grouped Folder Naming -- You can group aliases together using parentheses. -- Example: `(Boa, Hancock), Robin` → Downloads for "Boa" and "Hancock" go into one folder: `Boa Hancock`. -- Great for creators who use inconsistent naming. +This version brings significant enhancements to manga/comic downloading, filtering capabilities, and user experience: --- -## 🖥 User Interface & Workflow +### 📖 Enhanced Manga/Comic Mode -### Clean PyQt5 GUI -- Simple and responsive interface -- Dark theme for long usage comfort -- Persistent settings saved between sessions -- Introductory tour for first-time users +- **New "Date Based" Filename Style:** -### Download Modes -- Download from: - - **Single Post URL** - - **Entire Creator Feed** -- Optional: - - **Page Range** for creator feeds - - **Custom folder name** for single-post downloads + - Perfect for truly sequential content! Files are named numerically (e.g., `001.jpg`, `002.jpg`, `003.ext`...) across an *entire creator's feed*, strictly following post publication order. + + - **Smart Numbering:** Automatically resumes from the highest existing number found in the series folder (and subfolders, if "Subfolder per Post" is enabled). + + - **Guaranteed Order:** Disables multi-threading for post processing to ensure sequential accuracy. + + - Works alongside the existing "Post Title" and "Original File Name" styles. --- -## 🧠 Smart Filtering +### ✂️ "Remove Words from Filename" Feature -### Character Name Filtering -- Input comma-separated names to only include relevant content. -- Filtering modes: - - **Files**: Checks filenames - - **Titles**: Checks post titles - - **Both**: Hybrid mode - - **Comments**: Also scans post comments for matches +- Specify comma-separated words or phrases (case-insensitive) that will be automatically removed from filenames. -### Skip Words -- Enter words to **exclude** files or posts. -- Modes: File-level, Post-level, or Both -- Helps exclude WIPs, previews, sketches, etc. - -### File Type Filters -- Filter download targets by type: - - All - - Images/GIFs - - Videos - - Archives - - External Links (no downloads) - -### Filename Cleanup -- Auto-remove unwanted keywords from filenames (e.g., `[HD]`, `patreon`) +- Example: `patreon, [HD], _final` transforms `AwesomeArt_patreon_[HD]_final.jpg` into `AwesomeArt.jpg`. --- -## 📚 Manga/Comic Mode +### 📦 New "Only Archives" File Filter Mode -Special handling for serialized content: -- Automatically fetches posts **oldest to newest** -- File naming options: - - Use **Post Title** (e.g., `MyChapter1.jpg`) - - Use **Original Filename** (e.g., `page_001.png`) -- Ignores page ranges and applies full-feed scan -- Works best when paired with grouped name filters (e.g., series titles) +- Exclusively downloads `.zip` and `.rar` files. + +- Automatically disables conflicting options like "Skip .zip/.rar" and external link logging. --- -## 📁 Folder Structure & Naming +### 🗣️ Improved Character Filter Scope - "Comments (Beta)" -- Auto-foldering by: - - Character name - - Post title - - Custom name (for post URLs) -- Optional: - - Subfolder per post -- Auto-detection and fallback from `Known.txt` if needed -- Smart cleaning of folder/file names to remove illegal characters +- **File-First Check:** Prioritizes matching filenames before checking post comments for character names. + +- **Comment Fallback:** Only checks comments if no filename match is found, reducing unnecessary API calls. --- -## 🖼 Thumbnail & Compression Tools +### 🧐 Refined "Missed Character Log" -- **Thumbnail Mode**: Downloads only the preview thumbnails -- **Image Compression** (via Pillow): - - Large images auto-converted to WebP - - Only saved if final size is significantly smaller +- Displays a capitalized, alphabetized list of key terms from skipped post titles. + +- Makes it easier to spot patterns or characters that might be unintentionally excluded. --- -## ⚙️ Performance Features +### 🚀 Enhanced Multi-part Download Progress -- **Multithreading**: Set number of threads for concurrent file and post downloads -- **Multi-part Downloads**: - - Large files split into multiple threads for faster retrieval - - Detailed chunk-level progress tracking - - Smart retries and fallback on failure +- Granular visibility into active chunk downloads and combined speed for large files. --- -## 📋 Logging & Progress +### 🗺️ Updated Onboarding Tour -- Real-time log output with two views: - - **Progress Log** - - **Missed Character Summary** -- Log filters external links and organizes them separately -- Export logs as `.txt` for backup/reference -- Auto-log failed/skipped files and links +- Improved guide for new users, covering v3.4.0 features and existing core functions. --- -## 🗃 Config System +### 🛡️ Robust Configuration Path -- `Known.txt`: Add frequently used names for fallback filtering and folder naming -- Auto-loaded and saved in system AppData (or local fallback) -- GUI for editing known names inside the app +- Settings and `Known.txt` are now stored in the system-standard application data folder (e.g., `AppData`, `~/.local/share`). + +--- + +## 🖥️ Core Features + +--- + +### User Interface & Workflow + +- **Clean PyQt5 GUI** — Simple, modern, and dark-themed. + +- **Persistent Settings** — Saves preferences between sessions. + +- **Download Modes:** + - Single Post URL + - Entire Creator Feed + +- **Flexible Options:** + - Specify Page Range (disabled in Manga Mode) + - Custom Folder Name for single posts + +--- + +### 🧠 Smart Filtering + +- **Character Name Filtering:** + - Use `Tifa, Aerith` or group `(Boa, Hancock)` → folder `Boa Hancock` + + - **Filter Scopes:** + - `Files` + - `Title` + - `Both (Title then Files)` + - `Comments (Beta - Files first)` + +- **Skip with Words:** + - Exclude with `WIP, sketch, preview` + + - **Skip Scopes:** + - `Files` + - `Posts` + - `Both (Posts then Files)` + +- **File Type Filters:** + - `All`, `Images/GIFs`, `Videos`, `📦 Only Archives`, `🔗 Only Links` + +- **Filename Cleanup:** + - Remove illegal and unwanted characters or phrases + +--- + +### 📚 Manga/Comic Mode (Creator Feeds Only) + +- **Chronological Processing** — Oldest posts first + +- **Filename Style Options:** + - `Name: Post Title (Default)` + - `Name: Original File` + - `Name: Date Based (New)` + +- **Best With:** Character filters set to manga/series title + +--- + +### 📁 Folder Structure & Naming + +- **Subfolders:** + - Auto-created based on character name, post title, or `Known.txt` + + - "Subfolder per Post" option for further nesting + +- **Smart Naming:** Cleans invalid characters and structures logically + +--- + +### 🖼️ Thumbnail & Compression Tools + +- **Download Thumbnails Only** + +- **Compress to WebP** (via Pillow) + - Converts large images to smaller WebP versions + +--- + +### ⚙️ Performance Features + +- **Multithreading:** + - For both post processing and file downloading + +- **Multi-part Downloads:** + - Toggleable in GUI + - Splits large files into chunks + - Granular chunk-level progress display + +--- + +### 📋 Logging & Progress + +- **Real-time Logs:** Activity, errors, skipped posts + +- **Missed Character Log:** Shows skipped keywords in easy-to-read list + +- **External Links Log:** Shows links (unless disabled in some modes) + +- **Export Links:** Save `.txt` of links (Only Links mode) + +--- + +### 🗃️ Config System + +- **Known.txt:** + - Stores names for smart folder suggestions + - Supports aliases via `(alias1, alias2)` + +- **Stored in Standard App Data Path** + +- **Editable Within GUI** --- ## 💻 Installation +--- + ### Requirements -- Python 3.6 or higher + +- Python 3.6 or higher - pip +--- + ### Install Dependencies + ```bash pip install PyQt5 requests Pillow + ``` ***