From fb446a1e288f0d3218490e96077eac5e886e557b Mon Sep 17 00:00:00 2001 From: Yuvi9587 <114073886+Yuvi9587@users.noreply.github.com> Date: Mon, 14 Jul 2025 20:17:48 -0700 Subject: [PATCH] Commit --- main_window.py | 5517 +++++++++++++++++++++++++++ src/core/workers.py | 1906 ++++----- src/ui/dialogs/MoreOptionsDialog.py | 1 + src/ui/flow_layout.py | 93 - src/ui/main_window.py | 1310 ++++--- workers.py | 2064 ++++++++++ 6 files changed, 9360 insertions(+), 1531 deletions(-) create mode 100644 main_window.py delete mode 100644 src/ui/flow_layout.py create mode 100644 workers.py diff --git a/main_window.py b/main_window.py new file mode 100644 index 0000000..930462d --- /dev/null +++ b/main_window.py @@ -0,0 +1,5517 @@ +# --- Standard Library Imports --- +import sys +import os +import time +import queue +import traceback +import html +import http +import json +import re +import subprocess +import datetime +import requests +import unicodedata +from collections import deque +import threading +from concurrent.futures import Future, ThreadPoolExecutor ,CancelledError +from urllib .parse import urlparse + +# --- PyQt5 Imports --- +from PyQt5.QtGui import QIcon, QIntValidator, QDesktopServices +from PyQt5.QtWidgets import ( + QApplication, QWidget, QLabel, QLineEdit, QTextEdit, QPushButton, + QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget, QRadioButton, + QButtonGroup, QCheckBox, QSplitter, QGroupBox, QDialog, QStackedWidget, + QScrollArea, QListWidgetItem, QSizePolicy, QProgressBar, QAbstractItemView, QFrame, + QMainWindow, QAction, QGridLayout +) +from PyQt5.QtCore import Qt, QThread, pyqtSignal, QObject, QTimer, QSettings, QStandardPaths, QUrl, QSize, QProcess, QMutex, QMutexLocker + +# --- Local Application Imports --- +from ..services.drive_downloader import download_mega_file as drive_download_mega_file ,download_gdrive_file ,download_dropbox_file +from ..core.workers import DownloadThread as BackendDownloadThread +from ..core.workers import PostProcessorWorker +from ..core.workers import PostProcessorSignals +from ..core.api_client import download_from_api +from ..core.manager import DownloadManager +from .assets import get_app_icon_object +from ..config.constants import * +from ..utils.file_utils import KNOWN_NAMES, clean_folder_name +from ..utils.network_utils import extract_post_info, prepare_cookies_for_request +from ..i18n.translator import get_translation +from .dialogs.EmptyPopupDialog import EmptyPopupDialog +from .dialogs.CookieHelpDialog import CookieHelpDialog +from .dialogs.FavoriteArtistsDialog import FavoriteArtistsDialog +from .dialogs.KnownNamesFilterDialog import KnownNamesFilterDialog +from .dialogs.HelpGuideDialog import HelpGuideDialog +from .dialogs.FutureSettingsDialog import FutureSettingsDialog +from .dialogs.ErrorFilesDialog import ErrorFilesDialog +from .dialogs.DownloadHistoryDialog import DownloadHistoryDialog +from .dialogs.DownloadExtractedLinksDialog import DownloadExtractedLinksDialog +from .dialogs.FavoritePostsDialog import FavoritePostsDialog +from .dialogs.FavoriteArtistsDialog import FavoriteArtistsDialog +from .dialogs.ConfirmAddAllDialog import ConfirmAddAllDialog +from .dialogs.MoreOptionsDialog import MoreOptionsDialog +from .dialogs.SinglePDF import create_single_pdf_from_content + +class DynamicFilterHolder: + """A thread-safe class to hold and update character filters during a download.""" + def __init__(self, initial_filters=None): + self.lock = threading.Lock() + self._filters = initial_filters if initial_filters is not None else [] + + def get_filters(self): + with self.lock: + return [dict(f) for f in self._filters] + + def set_filters(self, new_filters): + with self.lock: + self._filters = [dict(f) for f in (new_filters if new_filters else [])] + + +class PostProcessorSignals(QObject): + """A collection of signals for the DownloaderApp to communicate with itself across threads.""" + progress_signal = pyqtSignal(str) + file_download_status_signal = pyqtSignal(bool) + external_link_signal = pyqtSignal(str, str, str, str, str) + file_progress_signal = pyqtSignal(str, object) + file_successfully_downloaded_signal = pyqtSignal(dict) + missed_character_post_signal = pyqtSignal(str, str) + worker_finished_signal = pyqtSignal(tuple) + finished_signal = pyqtSignal(int, int, bool, list) + retryable_file_failed_signal = pyqtSignal(list) + permanent_file_failed_signal = pyqtSignal(list) + +class DownloaderApp (QWidget ): + character_prompt_response_signal =pyqtSignal (bool ) + log_signal =pyqtSignal (str ) + add_character_prompt_signal =pyqtSignal (str ) + overall_progress_signal =pyqtSignal (int ,int ) + file_successfully_downloaded_signal =pyqtSignal (dict ) + post_processed_for_history_signal =pyqtSignal (dict ) + finished_signal =pyqtSignal (int ,int ,bool ,list ) + external_link_signal =pyqtSignal (str ,str ,str ,str ,str ) + file_progress_signal =pyqtSignal (str ,object ) + + + def __init__(self): + super().__init__() + self.settings = QSettings(CONFIG_ORGANIZATION_NAME, CONFIG_APP_NAME_MAIN) + + # --- CORRECT PATH DEFINITION --- + # This block correctly determines the application's base directory whether + # it's running from source or as a frozen executable. + if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'): + # Path for PyInstaller one-file bundle + self.app_base_dir = os.path.dirname(sys.executable) + else: + # Path for running from source code + self.app_base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')) + + # All file paths will now correctly use the single, correct app_base_dir + self.config_file = os.path.join(self.app_base_dir, "appdata", "Known.txt") + self.session_file_path = os.path.join(self.app_base_dir, "appdata", "session.json") + self.persistent_history_file = os.path.join(self.app_base_dir, "appdata", "download_history.json") + + self.download_thread = None + self.thread_pool = None + self.cancellation_event = threading.Event() + self.session_lock = threading.Lock() + self.interrupted_session_data = None + self.is_restore_pending = False + self.external_link_download_thread = None + self.pause_event = threading.Event() + self.active_futures = [] + self.total_posts_to_process = 0 + self.dynamic_character_filter_holder = DynamicFilterHolder() + self.processed_posts_count = 0 + self.creator_name_cache = {} + self.log_signal.emit(f"ℹ️ App base directory: {self.app_base_dir}") + self.log_signal.emit(f"ℹ️ Persistent history file path set to: {self.persistent_history_file}") + + # --- The rest of your __init__ method continues from here --- + self.last_downloaded_files_details = deque(maxlen=3) + self.download_history_candidates = deque(maxlen=8) + self.final_download_history_entries = [] + self.favorite_download_queue = deque() + self.is_processing_favorites_queue = False + self.download_counter = 0 + self.permanently_failed_files_for_dialog = [] + self.last_link_input_text_for_queue_sync = "" + self.is_fetcher_thread_running = False + self._restart_pending = False + self.download_history_log = deque(maxlen=50) + self.skip_counter = 0 + self.all_kept_original_filenames = [] + self.cancellation_message_logged_this_session = False + self.favorite_scope_toggle_button = None + self.favorite_download_scope = FAVORITE_SCOPE_SELECTED_LOCATION + self.manga_mode_checkbox = None + self.selected_cookie_filepath = None + self.retryable_failed_files_info = [] + self.is_paused = False + self.worker_to_gui_queue = queue.Queue() + self.gui_update_timer = QTimer(self) + self.actual_gui_signals = PostProcessorSignals() + self.worker_signals = PostProcessorSignals() + self.prompt_mutex = QMutex() + self._add_character_response = None + self._original_scan_content_tooltip = ("If checked, the downloader will scan the HTML content of posts for image URLs (from tags or direct links).\n" + "now This includes resolving relative paths from tags to full URLs.\n" + "Relative paths in tags (e.g., /data/image.jpg) will be resolved to full URLs.\n" + "Useful for cases where images are in the post description but not in the API's file/attachment list.") + self.downloaded_files = set() + self.downloaded_files_lock = threading.Lock() + self.downloaded_file_hashes = set() + self.downloaded_file_hashes_lock = threading.Lock() + self.show_external_links = False + self.external_link_queue = deque() + self._is_processing_external_link_queue = False + self._current_link_post_title = None + self.extracted_links_cache = [] + self.manga_rename_toggle_button = None + self.favorite_mode_checkbox = None + self.url_or_placeholder_stack = None + self.url_input_widget = None + self.url_placeholder_widget = None + self.favorite_action_buttons_widget = None + self.favorite_mode_artists_button = None + self.favorite_mode_posts_button = None + self.standard_action_buttons_widget = None + self.bottom_action_buttons_stack = None + self.main_log_output = None + self.external_log_output = None + self.log_splitter = None + self.main_splitter = None + self.reset_button = None + self.progress_log_label = None + self.log_verbosity_toggle_button = None + self.missed_character_log_output = None + self.log_view_stack = None + self.current_log_view = 'progress' + self.link_search_input = None + self.link_search_button = None + self.export_links_button = None + self.radio_only_links = None + self.radio_only_archives = None + self.missed_title_key_terms_count = {} + self.missed_title_key_terms_examples = {} + self.logged_summary_for_key_term = set() + self.STOP_WORDS = set(["a", "an", "the", "is", "was", "were", "of", "for", "with", "in", "on", "at", "by", "to", "and", "or", "but", "i", "you", "he", "she", "it", "we", "they", "my", "your", "his", "her", "its", "our", "their", "com", "net", "org", "www"]) + self.already_logged_bold_key_terms = set() + self.missed_key_terms_buffer = [] + self.char_filter_scope_toggle_button = None + self.skip_words_scope = SKIP_SCOPE_POSTS + self.char_filter_scope = CHAR_SCOPE_TITLE + self.manga_filename_style = self.settings.value(MANGA_FILENAME_STYLE_KEY, STYLE_POST_TITLE, type=str) + self.current_theme = self.settings.value(THEME_KEY, "dark", type=str) + self.only_links_log_display_mode = LOG_DISPLAY_LINKS + self.mega_download_log_preserved_once = False + self.allow_multipart_download_setting = False + self.use_cookie_setting = False + self.scan_content_images_setting = self.settings.value(SCAN_CONTENT_IMAGES_KEY, False, type=bool) + self.cookie_text_setting = "" + self.current_selected_language = self.settings.value(LANGUAGE_KEY, "en", type=str) + self.more_filter_scope = None + self.text_export_format = 'pdf' + self.single_pdf_setting = False + self.session_temp_files = [] + + print(f"ℹ️ Known.txt will be loaded/saved at: {self.config_file}") + + try: + base_path_for_icon = "" + if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'): + base_path_for_icon = sys._MEIPASS + else: + base_path_for_icon = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')) + + icon_path_for_window = os.path.join(base_path_for_icon, 'assets', 'Kemono.ico') + + if os.path.exists(icon_path_for_window): + self.setWindowIcon(QIcon(icon_path_for_window)) + else: + if getattr(sys, 'frozen', False): + executable_dir = os.path.dirname(sys.executable) + fallback_icon_path = os.path.join(executable_dir, 'assets', 'Kemono.ico') + if os.path.exists(fallback_icon_path): + self.setWindowIcon(QIcon(fallback_icon_path)) + else: + self.log_signal.emit(f"⚠️ Main window icon 'assets/Kemono.ico' not found at {icon_path_for_window} or {fallback_icon_path}") + else: + self.log_signal.emit(f"⚠️ Main window icon 'assets/Kemono.ico' not found at {icon_path_for_window}") + except Exception as e_icon_app: + self.log_signal.emit(f"❌ Error setting main window icon in DownloaderApp init: {e_icon_app}") + + self.url_label_widget = None + self.download_location_label_widget = None + self.remove_from_filename_label_widget = None + self.skip_words_label_widget = None + self.setWindowTitle("Kemono Downloader v6.0.0") + self.init_ui() + self._connect_signals() + self.log_signal.emit("ℹ️ Local API server functionality has been removed.") + self.log_signal.emit("ℹ️ 'Skip Current File' button has been removed.") + if hasattr(self, 'character_input'): + self.character_input.setToolTip(self._tr("character_input_tooltip", "Enter character names (comma-separated)...")) + self.log_signal.emit(f"ℹ️ Manga filename style loaded: '{self.manga_filename_style}'") + self.log_signal.emit(f"ℹ️ Skip words scope loaded: '{self.skip_words_scope}'") + self.log_signal.emit(f"ℹ️ Character filter scope set to default: '{self.char_filter_scope}'") + self.log_signal.emit(f"ℹ️ Multi-part download defaults to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'}") + self.log_signal.emit(f"ℹ️ Cookie text defaults to: Empty on launch") + self.log_signal.emit(f"ℹ️ 'Use Cookie' setting defaults to: Disabled on launch") + self.log_signal.emit(f"ℹ️ Scan post content for images defaults to: {'Enabled' if self.scan_content_images_setting else 'Disabled'}") + self.log_signal.emit(f"ℹ️ Application language loaded: '{self.current_selected_language.upper()}' (UI may not reflect this yet).") + self._retranslate_main_ui() + self._load_persistent_history() + self._load_saved_download_location() + self._update_button_states_and_connections() + self._check_for_interrupted_session() + + + def get_checkbox_map(self): + """Returns a mapping of checkbox attribute names to their corresponding settings key.""" + return { + 'skip_zip_checkbox': 'skip_zip', + 'skip_rar_checkbox': 'skip_rar', + 'download_thumbnails_checkbox': 'download_thumbnails', + 'compress_images_checkbox': 'compress_images', + 'use_subfolders_checkbox': 'use_subfolders', + 'use_subfolder_per_post_checkbox': 'use_post_subfolders', + 'use_multithreading_checkbox': 'use_multithreading', + 'external_links_checkbox': 'show_external_links', + 'keep_duplicates_checkbox': 'keep_in_post_duplicates', + 'date_prefix_checkbox': 'use_date_prefix_for_subfolder', + 'manga_mode_checkbox': 'manga_mode_active', + 'scan_content_images_checkbox': 'scan_content_for_images', + 'use_cookie_checkbox': 'use_cookie', + 'favorite_mode_checkbox': 'favorite_mode_active' + } + + def _get_current_ui_settings_as_dict(self, api_url_override=None, output_dir_override=None): + """Gathers all relevant UI settings into a JSON-serializable dictionary.""" + settings = {} + + settings['api_url'] = api_url_override if api_url_override is not None else self.link_input.text().strip() + settings['output_dir'] = output_dir_override if output_dir_override is not None else self.dir_input.text().strip() + settings['character_filter_text'] = self.character_input.text().strip() + settings['skip_words_text'] = self.skip_words_input.text().strip() + settings['remove_words_text'] = self.remove_from_filename_input.text().strip() + settings['custom_folder_name'] = self.custom_folder_input.text().strip() + settings['cookie_text'] = self.cookie_text_input.text().strip() + if hasattr(self, 'manga_date_prefix_input'): + settings['manga_date_prefix'] = self.manga_date_prefix_input.text().strip() + + try: settings['num_threads'] = int(self.thread_count_input.text().strip()) + except (ValueError, AttributeError): settings['num_threads'] = 4 + try: settings['start_page'] = int(self.start_page_input.text().strip()) if self.start_page_input.text().strip() else None + except (ValueError, AttributeError): settings['start_page'] = None + try: settings['end_page'] = int(self.end_page_input.text().strip()) if self.end_page_input.text().strip() else None + except (ValueError, AttributeError): settings['end_page'] = None + + for checkbox_name, key in self.get_checkbox_map().items(): + if checkbox := getattr(self, checkbox_name, None): settings[key] = checkbox.isChecked() + + settings['filter_mode'] = self.get_filter_mode() + settings['only_links'] = self.radio_only_links.isChecked() + + settings['skip_words_scope'] = self.skip_words_scope + settings['char_filter_scope'] = self.char_filter_scope + settings['manga_filename_style'] = self.manga_filename_style + settings['allow_multipart_download'] = self.allow_multipart_download_setting + + return settings + + + def _tr (self ,key ,default_text =""): + """Helper to get translation based on current app language for the main window.""" + if callable (get_translation ): + return get_translation (self .current_selected_language ,key ,default_text ) + return default_text + + def _load_saved_download_location (self ): + saved_location =self .settings .value (DOWNLOAD_LOCATION_KEY ,"",type =str ) + if saved_location and os .path .isdir (saved_location ): + if hasattr (self ,'dir_input')and self .dir_input : + self .dir_input .setText (saved_location ) + self .log_signal .emit (f"ℹ️ Loaded saved download location: {saved_location }") + else : + self .log_signal .emit (f"⚠️ Found saved download location '{saved_location }', but dir_input not ready.") + elif saved_location : + self .log_signal .emit (f"⚠️ Found saved download location '{saved_location }', but it's not a valid directory. Ignoring.") + + def _check_for_interrupted_session(self): + """Checks for an incomplete session file on startup and prepares the UI for restore if found.""" + if os.path.exists(self.session_file_path): + try: + with open(self.session_file_path, 'r', encoding='utf-8') as f: + session_data = json.load(f) + + if "ui_settings" not in session_data or "download_state" not in session_data: + raise ValueError("Invalid session file structure.") + + failed_files_from_session = session_data.get('download_state', {}).get('permanently_failed_files', []) + if failed_files_from_session: + self.permanently_failed_files_for_dialog.clear() + self.permanently_failed_files_for_dialog.extend(failed_files_from_session) + self.log_signal.emit(f"ℹ️ Restored {len(failed_files_from_session)} failed file entries from the previous session.") + + self.interrupted_session_data = session_data + self.log_signal.emit("ℹ️ Incomplete download session found. UI updated for restore.") + self._prepare_ui_for_restore() + + except Exception as e: + self.log_signal.emit(f"❌ Error reading session file: {e}. Deleting corrupt session file.") + os.remove(self.session_file_path) + self.interrupted_session_data = None + self.is_restore_pending = False + + def _prepare_ui_for_restore(self): + """Configures the UI to a 'restore pending' state.""" + if not self.interrupted_session_data: + return + + self.log_signal.emit(" UI updated for session restore.") + settings = self.interrupted_session_data.get("ui_settings", {}) + self._load_ui_from_settings_dict(settings) + + self.is_restore_pending = True + self._update_button_states_and_connections() # Update buttons for restore state, UI remains editable + + def _clear_session_and_reset_ui(self): + """Clears the session file and resets the UI to its default state.""" + self._clear_session_file() + self.interrupted_session_data = None + self.is_restore_pending = False + self._update_button_states_and_connections() # Ensure buttons are updated to idle state + self.reset_application_state() + + def _clear_session_file(self): + """Safely deletes the session file.""" + if os.path.exists(self.session_file_path): + try: + os.remove(self.session_file_path) + self.log_signal.emit("ℹ️ Interrupted session file cleared.") + except Exception as e: + self.log_signal.emit(f"❌ Failed to clear session file: {e}") + + def _save_session_file(self, session_data): + """Safely saves the session data to the session file using an atomic write pattern.""" + temp_session_file_path = self.session_file_path + ".tmp" + try: + with open(temp_session_file_path, 'w', encoding='utf-8') as f: + json.dump(session_data, f, indent=2) + os.replace(temp_session_file_path, self.session_file_path) + except Exception as e: + self.log_signal.emit(f"❌ Failed to save session state: {e}") + if os.path.exists(temp_session_file_path): + try: + os.remove(temp_session_file_path) + except Exception as e_rem: + self.log_signal.emit(f"❌ Failed to remove temp session file: {e_rem}") + + def _update_button_states_and_connections(self): + """ + Updates the text and click connections of the main action buttons + based on the current application state (downloading, paused, restore pending, idle). + """ + # Disconnect all signals first to prevent multiple connections + try: self.download_btn.clicked.disconnect() + except TypeError: pass + try: self.pause_btn.clicked.disconnect() + except TypeError: pass + try: self.cancel_btn.clicked.disconnect() + except TypeError: pass + + is_download_active = self._is_download_active() + + if self.is_restore_pending: + # State: Restore Pending + self.download_btn.setText(self._tr("start_download_button_text", "⬇️ Start Download")) + self.download_btn.setEnabled(True) + self.download_btn.clicked.connect(self.start_download) + self.download_btn.setToolTip(self._tr("start_download_discard_tooltip", "Click to start a new download, discarding the previous session.")) + + self.pause_btn.setText(self._tr("restore_download_button_text", "🔄 Restore Download")) + self.pause_btn.setEnabled(True) + self.pause_btn.clicked.connect(self.restore_download) + self.pause_btn.setToolTip(self._tr("restore_download_button_tooltip", "Click to restore the interrupted download.")) + + # --- START: CORRECTED CANCEL BUTTON LOGIC --- + self.cancel_btn.setText(self._tr("discard_session_button_text", "🗑️ Discard Session")) + self.cancel_btn.setEnabled(True) + self.cancel_btn.clicked.connect(self._clear_session_and_reset_ui) + self.cancel_btn.setToolTip(self._tr("discard_session_tooltip", "Click to discard the interrupted session and reset the UI.")) + + elif is_download_active: + # State: Downloading / Paused + self.download_btn.setText(self._tr("start_download_button_text", "⬇️ Start Download")) + self.download_btn.setEnabled(False) # Cannot start new download while one is active + + self.pause_btn.setText(self._tr("resume_download_button_text", "▶️ Resume Download") if self.is_paused else self._tr("pause_download_button_text", "⏸️ Pause Download")) + self.pause_btn.setEnabled(True) + self.pause_btn.clicked.connect(self._handle_pause_resume_action) + self.pause_btn.setToolTip(self._tr("resume_download_button_tooltip", "Click to resume the download.") if self.is_paused else self._tr("pause_download_button_tooltip", "Click to pause the download.")) + + self.cancel_btn.setText(self._tr("cancel_button_text", "❌ Cancel & Reset UI")) + self.cancel_btn.setEnabled(True) + self.cancel_btn.clicked.connect(self.cancel_download_button_action) + self.cancel_btn.setToolTip(self._tr("cancel_button_tooltip", "Click to cancel the ongoing download/extraction process and reset the UI fields (preserving URL and Directory).")) + else: + # State: Idle (No download, no restore pending) + self.download_btn.setText(self._tr("start_download_button_text", "⬇️ Start Download")) + self.download_btn.setEnabled(True) + self.download_btn.clicked.connect(self.start_download) + + self.pause_btn.setText(self._tr("pause_download_button_text", "⏸️ Pause Download")) + self.pause_btn.setEnabled(False) # No active download to pause + self.pause_btn.setToolTip(self._tr("pause_download_button_tooltip", "Click to pause the ongoing download process.")) + + self.cancel_btn.setText(self._tr("cancel_button_text", "❌ Cancel & Reset UI")) + self.cancel_btn.setEnabled(False) # No active download to cancel + self.cancel_btn.setToolTip(self._tr("cancel_button_tooltip", "Click to cancel the ongoing download/extraction process and reset the UI fields (preserving URL and Directory).")) + + + def _retranslate_main_ui (self ): + """Retranslates static text elements in the main UI.""" + if self .url_label_widget : + self .url_label_widget .setText (self ._tr ("creator_post_url_label","🔗 Kemono Creator/Post URL:")) + if self .download_location_label_widget : + self .download_location_label_widget .setText (self ._tr ("download_location_label","📁 Download Location:")) + if hasattr (self ,'character_label')and self .character_label : + self .character_label .setText (self ._tr ("filter_by_character_label","🎯 Filter by Character(s) (comma-separated):")) + if self .skip_words_label_widget : + self .skip_words_label_widget .setText (self ._tr ("skip_with_words_label","🚫 Skip with Words (comma-separated):")) + if self .remove_from_filename_label_widget : + self .remove_from_filename_label_widget .setText (self ._tr ("remove_words_from_name_label","✂️ Remove Words from name:")) + if hasattr (self ,'radio_all'):self .radio_all .setText (self ._tr ("filter_all_radio","All")) + if hasattr (self ,'radio_images'):self .radio_images .setText (self ._tr ("filter_images_radio","Images/GIFs")) + if hasattr (self ,'radio_videos'):self .radio_videos .setText (self ._tr ("filter_videos_radio","Videos")) + if hasattr (self ,'radio_only_archives'):self .radio_only_archives .setText (self ._tr ("filter_archives_radio","📦 Only Archives")) + if hasattr (self ,'radio_only_links'):self .radio_only_links .setText (self ._tr ("filter_links_radio","🔗 Only Links")) + if hasattr (self ,'radio_only_audio'):self .radio_only_audio .setText (self ._tr ("filter_audio_radio","🎧 Only Audio")) + if hasattr (self ,'favorite_mode_checkbox'):self .favorite_mode_checkbox .setText (self ._tr ("favorite_mode_checkbox_label","⭐ Favorite Mode")) + if hasattr (self ,'dir_button'):self .dir_button .setText (self ._tr ("browse_button_text","Browse...")) + self ._update_char_filter_scope_button_text () + self ._update_skip_scope_button_text () + + if hasattr (self ,'skip_zip_checkbox'):self .skip_zip_checkbox .setText (self ._tr ("skip_zip_checkbox_label","Skip .zip")) + if hasattr (self ,'skip_rar_checkbox'):self .skip_rar_checkbox .setText (self ._tr ("skip_rar_checkbox_label","Skip .rar")) + if hasattr (self ,'download_thumbnails_checkbox'):self .download_thumbnails_checkbox .setText (self ._tr ("download_thumbnails_checkbox_label","Download Thumbnails Only")) + if hasattr (self ,'scan_content_images_checkbox'):self .scan_content_images_checkbox .setText (self ._tr ("scan_content_images_checkbox_label","Scan Content for Images")) + if hasattr (self ,'compress_images_checkbox'):self .compress_images_checkbox .setText (self ._tr ("compress_images_checkbox_label","Compress to WebP")) + if hasattr (self ,'use_subfolders_checkbox'):self .use_subfolders_checkbox .setText (self ._tr ("separate_folders_checkbox_label","Separate Folders by Name/Title")) + if hasattr (self ,'use_subfolder_per_post_checkbox'):self .use_subfolder_per_post_checkbox .setText (self ._tr ("subfolder_per_post_checkbox_label","Subfolder per Post")) + if hasattr (self ,'use_cookie_checkbox'):self .use_cookie_checkbox .setText (self ._tr ("use_cookie_checkbox_label","Use Cookie")) + if hasattr (self ,'use_multithreading_checkbox'):self .update_multithreading_label (self .thread_count_input .text ()if hasattr (self ,'thread_count_input')else "1") + if hasattr (self ,'external_links_checkbox'):self .external_links_checkbox .setText (self ._tr ("show_external_links_checkbox_label","Show External Links in Log")) + if hasattr (self ,'manga_mode_checkbox'):self .manga_mode_checkbox .setText (self ._tr ("manga_comic_mode_checkbox_label","Manga/Comic Mode")) + if hasattr (self ,'thread_count_label'):self .thread_count_label .setText (self ._tr ("threads_label","Threads:")) + + if hasattr (self ,'character_input'): + self .character_input .setToolTip (self ._tr ("character_input_tooltip","Enter character names (comma-separated)...")) + if hasattr (self ,'download_btn'):self .download_btn .setToolTip (self ._tr ("start_download_button_tooltip","Click to start the download or link extraction process with the current settings.")) + + + + + + current_download_is_active =self ._is_download_active ()if hasattr (self ,'_is_download_active')else False + self .set_ui_enabled (not current_download_is_active ) + + if hasattr (self ,'known_chars_label'):self .known_chars_label .setText (self ._tr ("known_chars_label_text","🎭 Known Shows/Characters (for Folder Names):")) + if hasattr (self ,'open_known_txt_button'):self .open_known_txt_button .setText (self ._tr ("open_known_txt_button_text","Open Known.txt"));self .open_known_txt_button .setToolTip (self ._tr ("open_known_txt_button_tooltip","Open the 'Known.txt' file...")) + if hasattr (self ,'add_char_button'):self .add_char_button .setText (self ._tr ("add_char_button_text","➕ Add"));self .add_char_button .setToolTip (self ._tr ("add_char_button_tooltip","Add the name from the input field...")) + if hasattr (self ,'add_to_filter_button'):self .add_to_filter_button .setText (self ._tr ("add_to_filter_button_text","⤵️ Add to Filter"));self .add_to_filter_button .setToolTip (self ._tr ("add_to_filter_button_tooltip","Select names from 'Known Shows/Characters' list...")) + if hasattr (self ,'character_list'): + self .character_list .setToolTip (self ._tr ("known_chars_list_tooltip","This list contains names used for automatic folder creation...")) + if hasattr (self ,'delete_char_button'):self .delete_char_button .setText (self ._tr ("delete_char_button_text","🗑️ Delete Selected"));self .delete_char_button .setToolTip (self ._tr ("delete_char_button_tooltip","Delete the selected name(s)...")) + + if hasattr (self ,'cancel_btn'):self .cancel_btn .setToolTip (self ._tr ("cancel_button_tooltip","Click to cancel the ongoing download/extraction process and reset the UI fields (preserving URL and Directory).")) + if hasattr (self ,'error_btn'):self .error_btn .setText (self ._tr ("error_button_text","Error"));self .error_btn .setToolTip (self ._tr ("error_button_tooltip","View files skipped due to errors and optionally retry them.")) + if hasattr (self ,'progress_log_label'):self .progress_log_label .setText (self ._tr ("progress_log_label_text","📜 Progress Log:")) + if hasattr (self ,'reset_button'):self .reset_button .setText (self ._tr ("reset_button_text","🔄 Reset"));self .reset_button .setToolTip (self ._tr ("reset_button_tooltip","Reset all inputs and logs to default state (only when idle).")) + self ._update_multipart_toggle_button_text () + if hasattr (self ,'progress_label')and not self ._is_download_active ():self .progress_label .setText (self ._tr ("progress_idle_text","Progress: Idle")) + if hasattr (self ,'favorite_mode_artists_button'):self .favorite_mode_artists_button .setText (self ._tr ("favorite_artists_button_text","🖼️ Favorite Artists"));self .favorite_mode_artists_button .setToolTip (self ._tr ("favorite_artists_button_tooltip","Browse and download from your favorite artists...")) + if hasattr (self ,'favorite_mode_posts_button'):self .favorite_mode_posts_button .setText (self ._tr ("favorite_posts_button_text","📄 Favorite Posts"));self .favorite_mode_posts_button .setToolTip (self ._tr ("favorite_posts_button_tooltip","Browse and download your favorite posts...")) + self ._update_favorite_scope_button_text () + if hasattr (self ,'page_range_label'):self .page_range_label .setText (self ._tr ("page_range_label_text","Page Range:")) + if hasattr (self ,'start_page_input'): + self .start_page_input .setPlaceholderText (self ._tr ("start_page_input_placeholder","Start")) + self .start_page_input .setToolTip (self ._tr ("start_page_input_tooltip","For creator URLs: Specify the starting page number...")) + if hasattr (self ,'to_label'):self .to_label .setText (self ._tr ("page_range_to_label_text","to")) + if hasattr (self ,'end_page_input'): + self .end_page_input .setPlaceholderText (self ._tr ("end_page_input_placeholder","End")) + self .end_page_input .setToolTip (self ._tr ("end_page_input_tooltip","For creator URLs: Specify the ending page number...")) + if hasattr (self ,'fav_mode_active_label'): + self .fav_mode_active_label .setText (self ._tr ("fav_mode_active_label_text","⭐ Favorite Mode is active...")) + if hasattr (self ,'cookie_browse_button'): + self .cookie_browse_button .setToolTip (self ._tr ("cookie_browse_button_tooltip","Browse for a cookie file...")) + self ._update_manga_filename_style_button_text () + if hasattr (self ,'export_links_button'):self .export_links_button .setText (self ._tr ("export_links_button_text","Export Links")) + if hasattr (self ,'download_extracted_links_button'):self .download_extracted_links_button .setText (self ._tr ("download_extracted_links_button_text","Download")) + self ._update_log_display_mode_button_text () + + + if hasattr (self ,'radio_all'):self .radio_all .setToolTip (self ._tr ("radio_all_tooltip","Download all file types found in posts.")) + if hasattr (self ,'radio_images'):self .radio_images .setToolTip (self ._tr ("radio_images_tooltip","Download only common image formats (JPG, PNG, GIF, WEBP, etc.).")) + if hasattr (self ,'radio_videos'):self .radio_videos .setToolTip (self ._tr ("radio_videos_tooltip","Download only common video formats (MP4, MKV, WEBM, MOV, etc.).")) + if hasattr (self ,'radio_only_archives'):self .radio_only_archives .setToolTip (self ._tr ("radio_only_archives_tooltip","Exclusively download .zip and .rar files. Other file-specific options are disabled.")) + if hasattr (self ,'radio_only_audio'):self .radio_only_audio .setToolTip (self ._tr ("radio_only_audio_tooltip","Download only common audio formats (MP3, WAV, FLAC, etc.).")) + if hasattr (self ,'radio_only_links'):self .radio_only_links .setToolTip (self ._tr ("radio_only_links_tooltip","Extract and display external links from post descriptions instead of downloading files.\nDownload-related options will be disabled.")) + + + if hasattr (self ,'use_subfolders_checkbox'):self .use_subfolders_checkbox .setToolTip (self ._tr ("use_subfolders_checkbox_tooltip","Create subfolders based on 'Filter by Character(s)' input...")) + if hasattr (self ,'use_subfolder_per_post_checkbox'):self .use_subfolder_per_post_checkbox .setToolTip (self ._tr ("use_subfolder_per_post_checkbox_tooltip","Creates a subfolder for each post...")) + if hasattr (self ,'use_cookie_checkbox'):self .use_cookie_checkbox .setToolTip (self ._tr ("use_cookie_checkbox_tooltip","If checked, will attempt to use cookies...")) + if hasattr (self ,'use_multithreading_checkbox'):self .use_multithreading_checkbox .setToolTip (self ._tr ("use_multithreading_checkbox_tooltip","Enables concurrent operations...")) + if hasattr (self ,'thread_count_input'):self .thread_count_input .setToolTip (self ._tr ("thread_count_input_tooltip","Number of concurrent operations...")) + if hasattr (self ,'external_links_checkbox'):self .external_links_checkbox .setToolTip (self ._tr ("external_links_checkbox_tooltip","If checked, a secondary log panel appears...")) + if hasattr (self ,'manga_mode_checkbox'):self .manga_mode_checkbox .setToolTip (self ._tr ("manga_mode_checkbox_tooltip","Downloads posts from oldest to newest...")) + + if hasattr (self ,'scan_content_images_checkbox'):self .scan_content_images_checkbox .setToolTip (self ._tr ("scan_content_images_checkbox_tooltip",self ._original_scan_content_tooltip )) + if hasattr (self ,'download_thumbnails_checkbox'):self .download_thumbnails_checkbox .setToolTip (self ._tr ("download_thumbnails_checkbox_tooltip","Downloads small preview images...")) + if hasattr (self ,'skip_words_input'): + self .skip_words_input .setToolTip (self ._tr ("skip_words_input_tooltip", + ("Enter words, comma-separated, to skip downloading certain content (e.g., WIP, sketch, preview).\n\n" + "The 'Scope: [Type]' button next to this input cycles how this filter applies:\n" + "- Scope: Files: Skips individual files if their names contain any of these words.\n" + "- Scope: Posts: Skips entire posts if their titles contain any of these words.\n" + "- Scope: Both: Applies both (post title first, then individual files if post title is okay)."))) + if hasattr (self ,'remove_from_filename_input'): + self .remove_from_filename_input .setToolTip (self ._tr ("remove_words_input_tooltip", + ("Enter words, comma-separated, to remove from downloaded filenames (case-insensitive).\n" + "Useful for cleaning up common prefixes/suffixes.\nExample: patreon, kemono, [HD], _final"))) + + if hasattr (self ,'link_input'): + self .link_input .setPlaceholderText (self ._tr ("link_input_placeholder_text","e.g., https://kemono.su/patreon/user/12345 or .../post/98765")) + self .link_input .setToolTip (self ._tr ("link_input_tooltip_text","Enter the full URL...")) + if hasattr (self ,'dir_input'): + self .dir_input .setPlaceholderText (self ._tr ("dir_input_placeholder_text","Select folder where downloads will be saved")) + self .dir_input .setToolTip (self ._tr ("dir_input_tooltip_text","Enter or browse to the main folder...")) + if hasattr (self ,'character_input'): + self .character_input .setPlaceholderText (self ._tr ("character_input_placeholder_text","e.g., Tifa, Aerith, (Cloud, Zack)")) + if hasattr (self ,'custom_folder_input'): + self .custom_folder_input .setPlaceholderText (self ._tr ("custom_folder_input_placeholder_text","Optional: Save this post to specific folder")) + self .custom_folder_input .setToolTip (self ._tr ("custom_folder_input_tooltip_text","If downloading a single post URL...")) + if hasattr (self ,'skip_words_input'): + self .skip_words_input .setPlaceholderText (self ._tr ("skip_words_input_placeholder_text","e.g., WM, WIP, sketch, preview")) + if hasattr (self ,'remove_from_filename_input'): + self .remove_from_filename_input .setPlaceholderText (self ._tr ("remove_from_filename_input_placeholder_text","e.g., patreon, HD")) + self ._update_cookie_input_placeholders_and_tooltips () + if hasattr (self ,'character_search_input'): + self .character_search_input .setPlaceholderText (self ._tr ("character_search_input_placeholder_text","Search characters...")) + self .character_search_input .setToolTip (self ._tr ("character_search_input_tooltip_text","Type here to filter the list...")) + if hasattr (self ,'new_char_input'): + self .new_char_input .setPlaceholderText (self ._tr ("new_char_input_placeholder_text","Add new show/character name")) + self .new_char_input .setToolTip (self ._tr ("new_char_input_tooltip_text","Enter a new show, game, or character name...")) + if hasattr (self ,'link_search_input'): + self .link_search_input .setPlaceholderText (self ._tr ("link_search_input_placeholder_text","Search Links...")) + self .link_search_input .setToolTip (self ._tr ("link_search_input_tooltip_text","When in 'Only Links' mode...")) + if hasattr (self ,'manga_date_prefix_input'): + self .manga_date_prefix_input .setPlaceholderText (self ._tr ("manga_date_prefix_input_placeholder_text","Prefix for Manga Filenames")) + self .manga_date_prefix_input .setToolTip (self ._tr ("manga_date_prefix_input_tooltip_text","Optional prefix for 'Date Based'...")) + if hasattr (self ,'empty_popup_button'):self .empty_popup_button .setToolTip (self ._tr ("empty_popup_button_tooltip_text","Open Creator Selection...")) + if hasattr (self ,'known_names_help_button'):self .known_names_help_button .setToolTip (self ._tr ("known_names_help_button_tooltip_text","Open the application feature guide.")) + if hasattr (self ,'future_settings_button'):self .future_settings_button .setToolTip (self ._tr ("future_settings_button_tooltip_text","Open application settings...")) + if hasattr (self ,'link_search_button'):self .link_search_button .setToolTip (self ._tr ("link_search_button_tooltip_text","Filter displayed links")) + def apply_theme (self ,theme_name ,initial_load =False ): + self .current_theme =theme_name + if not initial_load : + self .settings .setValue (THEME_KEY ,theme_name ) + self .settings .sync () + + if theme_name =="dark": + self .setStyleSheet (self .get_dark_theme ()) + if not initial_load : + self .log_signal .emit ("🎨 Switched to Dark Mode.") + else : + self .setStyleSheet ("") + if not initial_load : + self .log_signal .emit ("🎨 Switched to Light Mode.") + self .update () + + def _get_tooltip_for_character_input (self ): + return ( + self ._tr ("character_input_tooltip","Default tooltip if translation fails.") + ) + def _connect_signals (self ): + self .actual_gui_signals .progress_signal .connect (self .handle_main_log ) + self .actual_gui_signals .file_progress_signal .connect (self .update_file_progress_display ) + self .actual_gui_signals .missed_character_post_signal .connect (self .handle_missed_character_post ) + self .actual_gui_signals .external_link_signal .connect (self .handle_external_link_signal ) + self .actual_gui_signals .file_successfully_downloaded_signal .connect (self ._handle_actual_file_downloaded ) + self.actual_gui_signals.worker_finished_signal.connect(self._handle_worker_result) + self .actual_gui_signals .file_download_status_signal .connect (lambda status :None ) + + if hasattr (self ,'character_input'): + self .character_input .textChanged .connect (self ._on_character_input_changed_live ) + if hasattr (self ,'use_cookie_checkbox'): + self .use_cookie_checkbox .toggled .connect (self ._update_cookie_input_visibility ) + if hasattr (self ,'link_input'): + self .link_input .textChanged .connect (self ._sync_queue_with_link_input ) + if hasattr (self ,'cookie_browse_button'): + self .cookie_browse_button .clicked .connect (self ._browse_cookie_file ) + if hasattr (self ,'cookie_text_input'): + self .cookie_text_input .textChanged .connect (self ._handle_cookie_text_manual_change ) + if hasattr (self ,'download_thumbnails_checkbox'): + self .download_thumbnails_checkbox .toggled .connect (self ._handle_thumbnail_mode_change ) + self .gui_update_timer .timeout .connect (self ._process_worker_queue ) + self .gui_update_timer .start (100 ) + self .log_signal .connect (self .handle_main_log ) + self .add_character_prompt_signal .connect (self .prompt_add_character ) + self .character_prompt_response_signal .connect (self .receive_add_character_result ) + self .overall_progress_signal .connect (self .update_progress_display ) + self .post_processed_for_history_signal .connect (self ._add_to_history_candidates ) + self .finished_signal .connect (self .download_finished ) + if hasattr (self ,'character_search_input'):self .character_search_input .textChanged .connect (self .filter_character_list ) + if hasattr (self ,'external_links_checkbox'):self .external_links_checkbox .toggled .connect (self .update_external_links_setting ) + if hasattr (self ,'thread_count_input'):self .thread_count_input .textChanged .connect (self .update_multithreading_label ) + if hasattr (self ,'use_subfolder_per_post_checkbox'):self .use_subfolder_per_post_checkbox .toggled .connect (self .update_ui_for_subfolders ) + if hasattr (self ,'use_multithreading_checkbox'):self .use_multithreading_checkbox .toggled .connect (self ._handle_multithreading_toggle ) + + if hasattr (self ,'radio_group')and self .radio_group : + self .radio_group .buttonToggled .connect (self ._handle_filter_mode_change ) + + if self .reset_button :self .reset_button .clicked .connect (self .reset_application_state ) + if self .log_verbosity_toggle_button :self .log_verbosity_toggle_button .clicked .connect (self .toggle_active_log_view ) + + if self .link_search_button :self .link_search_button .clicked .connect (self ._filter_links_log ) + if self .link_search_input : + self .link_search_input .returnPressed .connect (self ._filter_links_log ) + self .link_search_input .textChanged .connect (self ._filter_links_log ) + if self .export_links_button :self .export_links_button .clicked .connect (self ._export_links_to_file ) + + if self .manga_mode_checkbox :self .manga_mode_checkbox .toggled .connect (self .update_ui_for_manga_mode ) + + + if hasattr (self ,'download_extracted_links_button'): + self .download_extracted_links_button .clicked .connect (self ._show_download_extracted_links_dialog ) + + if hasattr (self ,'log_display_mode_toggle_button'): + self .log_display_mode_toggle_button .clicked .connect (self ._toggle_log_display_mode ) + + if self .manga_rename_toggle_button :self .manga_rename_toggle_button .clicked .connect (self ._toggle_manga_filename_style ) + + if hasattr (self ,'link_input'): + self .link_input .textChanged .connect (lambda :self .update_ui_for_manga_mode (self .manga_mode_checkbox .isChecked ()if self .manga_mode_checkbox else False )) + + if self .skip_scope_toggle_button : + self .skip_scope_toggle_button .clicked .connect (self ._cycle_skip_scope ) + + if self .char_filter_scope_toggle_button : + self .char_filter_scope_toggle_button .clicked .connect (self ._cycle_char_filter_scope ) + + if hasattr (self ,'multipart_toggle_button'):self .multipart_toggle_button .clicked .connect (self ._toggle_multipart_mode ) + + + if hasattr (self ,'favorite_mode_checkbox'): + self .favorite_mode_checkbox .toggled .connect (self ._handle_favorite_mode_toggle ) + + if hasattr (self ,'open_known_txt_button'): + self .open_known_txt_button .clicked .connect (self ._open_known_txt_file ) + + if hasattr (self ,'add_to_filter_button'): + self .add_to_filter_button .clicked .connect (self ._show_add_to_filter_dialog ) + if hasattr (self ,'favorite_mode_artists_button'): + self .favorite_mode_artists_button .clicked .connect (self ._show_favorite_artists_dialog ) + if hasattr (self ,'favorite_mode_posts_button'): + self .favorite_mode_posts_button .clicked .connect (self ._show_favorite_posts_dialog ) + if hasattr (self ,'favorite_scope_toggle_button'): + self .favorite_scope_toggle_button .clicked .connect (self ._cycle_favorite_scope ) + if hasattr (self ,'history_button'): + self .history_button .clicked .connect (self ._show_download_history_dialog ) + if hasattr (self ,'error_btn'): + self .error_btn .clicked .connect (self ._show_error_files_dialog ) + + def _on_character_input_changed_live (self ,text ): + """ + Called when the character input field text changes. + If a download is active (running or paused), this updates the dynamic filter holder. + """ + if self ._is_download_active (): + QCoreApplication .processEvents () + raw_character_filters_text =self .character_input .text ().strip () + parsed_filters =self ._parse_character_filters (raw_character_filters_text ) + + self .dynamic_character_filter_holder .set_filters (parsed_filters ) + + def _parse_character_filters (self ,raw_text ): + """Helper to parse character filter string into list of objects.""" + parsed_character_filter_objects =[] + if raw_text : + raw_parts =[] + current_part_buffer ="" + in_group_parsing =False + for char_token in raw_text : + if char_token =='('and not in_group_parsing : + in_group_parsing =True + current_part_buffer +=char_token + elif char_token ==')'and in_group_parsing : + in_group_parsing =False + current_part_buffer +=char_token + elif char_token ==','and not in_group_parsing : + if current_part_buffer .strip ():raw_parts .append (current_part_buffer .strip ()) + current_part_buffer ="" + else : + current_part_buffer +=char_token + if current_part_buffer .strip ():raw_parts .append (current_part_buffer .strip ()) + + for part_str in raw_parts : + part_str =part_str .strip () + if not part_str :continue + + is_tilde_group =part_str .startswith ("(")and part_str .endswith (")~") + is_standard_group_for_splitting =part_str .startswith ("(")and part_str .endswith (")")and not is_tilde_group + + if is_tilde_group : + group_content_str =part_str [1 :-2 ].strip () + aliases_in_group =[alias .strip ()for alias in group_content_str .split (',')if alias .strip ()] + if aliases_in_group : + group_folder_name =" ".join (aliases_in_group ) + parsed_character_filter_objects .append ({"name":group_folder_name ,"is_group":True ,"aliases":aliases_in_group }) + elif is_standard_group_for_splitting : + group_content_str =part_str [1 :-1 ].strip () + aliases_in_group =[alias .strip ()for alias in group_content_str .split (',')if alias .strip ()] + if aliases_in_group : + group_folder_name =" ".join (aliases_in_group ) + parsed_character_filter_objects .append ({ + "name":group_folder_name , + "is_group":True , + "aliases":aliases_in_group , + "components_are_distinct_for_known_txt":True + }) + else : + parsed_character_filter_objects .append ({"name":part_str ,"is_group":False ,"aliases":[part_str ],"components_are_distinct_for_known_txt":False }) + return parsed_character_filter_objects + + def _process_worker_queue (self ): + """Processes messages from the worker queue and emits Qt signals from the GUI thread.""" + while not self .worker_to_gui_queue .empty (): + try : + item =self .worker_to_gui_queue .get_nowait () + signal_type =item .get ('type') + payload =item .get ('payload',tuple ()) + + if signal_type =='progress': + self .actual_gui_signals .progress_signal .emit (*payload ) + elif signal_type =='file_download_status': + self .actual_gui_signals .file_download_status_signal .emit (*payload ) + elif signal_type =='external_link': + self .actual_gui_signals .external_link_signal .emit (*payload ) + elif signal_type =='file_progress': + self .actual_gui_signals .file_progress_signal .emit (*payload ) + elif signal_type =='missed_character_post': + self .actual_gui_signals .missed_character_post_signal .emit (*payload ) + elif signal_type =='file_successfully_downloaded': + self ._handle_actual_file_downloaded (payload [0 ]if payload else {}) + elif signal_type =='file_successfully_downloaded': + self ._handle_file_successfully_downloaded (payload [0 ]) + elif signal_type == 'worker_finished': # <-- ADD THIS ELIF BLOCK + self.actual_gui_signals.worker_finished_signal.emit(payload[0] if payload else tuple()) + else: + self .log_signal .emit (f"⚠️ Unknown signal type from worker queue: {signal_type }") + self .worker_to_gui_queue .task_done () + except queue .Empty : + break + except Exception as e : + self .log_signal .emit (f"❌ Error processing worker queue: {e }") + + def load_known_names_from_util (self ): + global KNOWN_NAMES + if os .path .exists (self .config_file ): + parsed_known_objects =[] + try : + with open (self .config_file ,'r',encoding ='utf-8')as f : + for line_num ,line in enumerate (f ,1 ): + line =line .strip () + if not line :continue + + if line .startswith ("(")and line .endswith (")"): + content =line [1 :-1 ].strip () + parts =[p .strip ()for p in content .split (',')if p .strip ()] + if parts : + folder_name_raw =content .replace (',',' ') + folder_name_cleaned =clean_folder_name (folder_name_raw ) + + unique_aliases_set ={p for p in parts } + final_aliases_list =sorted (list (unique_aliases_set ),key =str .lower ) + + if not folder_name_cleaned : + if hasattr (self ,'log_signal'):self .log_signal .emit (f"⚠️ Group resulted in empty folder name after cleaning in Known.txt on line {line_num }: '{line }'. Skipping entry.") + continue + + parsed_known_objects .append ({ + "name":folder_name_cleaned , + "is_group":True , + "aliases":final_aliases_list + }) + else : + if hasattr (self ,'log_signal'):self .log_signal .emit (f"⚠️ Empty group found in Known.txt on line {line_num }: '{line }'") + else : + parsed_known_objects .append ({ + "name":line , + "is_group":False , + "aliases":[line ] + }) + parsed_known_objects .sort (key =lambda x :x ["name"].lower ()) + KNOWN_NAMES [:]=parsed_known_objects + log_msg =f"ℹ️ Loaded {len (KNOWN_NAMES )} known entries from {self .config_file }" + except Exception as e : + log_msg =f"❌ Error loading config '{self .config_file }': {e }" + QMessageBox .warning (self ,"Config Load Error",f"Could not load list from {self .config_file }:\n{e }") + KNOWN_NAMES [:]=[] + else : + self .character_input .setToolTip ("Names, comma-separated. Group aliases: (alias1, alias2, alias3) becomes folder name 'alias1 alias2 alias3' (after cleaning).\nAll names in the group are used as aliases for matching.\nE.g., yor, (Boa, Hancock, Snake Princess)") + log_msg =f"ℹ️ Config file '{self .config_file }' not found. It will be created on save." + KNOWN_NAMES [:]=[] + + if hasattr (self ,'log_signal'):self .log_signal .emit (log_msg ) + + if hasattr (self ,'character_list'): + self .character_list .clear () + if not KNOWN_NAMES : + self .log_signal .emit ("ℹ️ 'Known.txt' is empty or was not found. No default entries will be added.") + + self .character_list .addItems ([entry ["name"]for entry in KNOWN_NAMES ]) + + def save_known_names(self): + """ + Saves the current list of known names (KNOWN_NAMES) to the config file. + This version includes a fix to ensure the destination directory exists + before attempting to write the file, preventing crashes in new installations. + """ + global KNOWN_NAMES + try: + # --- FIX STARTS HERE --- + # Get the directory path from the full file path. + config_dir = os.path.dirname(self.config_file) + # Create the directory if it doesn't exist. 'exist_ok=True' prevents + # an error if the directory is already there. + os.makedirs(config_dir, exist_ok=True) + # --- FIX ENDS HERE --- + + with open(self.config_file, 'w', encoding='utf-8') as f: + for entry in KNOWN_NAMES: + if entry["is_group"]: + # For groups, write the aliases in a sorted, comma-separated format inside parentheses. + f.write(f"({', '.join(sorted(entry['aliases'], key=str.lower))})\n") + else: + # For single entries, write the name on its own line. + f.write(entry["name"] + '\n') + + if hasattr(self, 'log_signal'): + self.log_signal.emit(f"💾 Saved {len(KNOWN_NAMES)} known entries to {self.config_file}") + + except Exception as e: + # If any error occurs during saving, log it and show a warning popup. + log_msg = f"❌ Error saving config '{self.config_file}': {e}" + if hasattr(self, 'log_signal'): + self.log_signal.emit(log_msg) + QMessageBox.warning(self, "Config Save Error", f"Could not save list to {self.config_file}:\n{e}") + + def closeEvent (self ,event ): + self .save_known_names () + self .settings .setValue (MANGA_FILENAME_STYLE_KEY ,self .manga_filename_style ) + self .settings .setValue (ALLOW_MULTIPART_DOWNLOAD_KEY ,self .allow_multipart_download_setting ) + self .settings .setValue (COOKIE_TEXT_KEY ,self .cookie_text_input .text ()if hasattr (self ,'cookie_text_input')else "") + self .settings .setValue (SCAN_CONTENT_IMAGES_KEY ,self .scan_content_images_checkbox .isChecked ()if hasattr (self ,'scan_content_images_checkbox')else False ) + self .settings .setValue (USE_COOKIE_KEY ,self .use_cookie_checkbox .isChecked ()if hasattr (self ,'use_cookie_checkbox')else False ) + self .settings .setValue (THEME_KEY ,self .current_theme ) + self .settings .setValue (LANGUAGE_KEY ,self .current_selected_language ) + self .settings .sync () + self ._save_persistent_history () + + should_exit =True + is_downloading =self ._is_download_active () + + if is_downloading : + reply =QMessageBox .question (self ,"Confirm Exit", + "Download in progress. Are you sure you want to exit and cancel?", + QMessageBox .Yes |QMessageBox .No ,QMessageBox .No ) + if reply ==QMessageBox .Yes : + self .log_signal .emit ("⚠️ Cancelling active download due to application exit...") + self .cancellation_event .set () + if self .download_thread and self .download_thread .isRunning (): + self .download_thread .requestInterruption () + self .log_signal .emit (" Signaled single download thread to interrupt.") + if self .download_thread and self .download_thread .isRunning (): + self .log_signal .emit (" Waiting for single download thread to finish...") + self .download_thread .wait (3000 ) + if self .download_thread .isRunning (): + self .log_signal .emit (" ⚠️ Single download thread did not terminate gracefully.") + + if self .thread_pool : + self .log_signal .emit (" Shutting down thread pool (waiting for completion)...") + self .thread_pool .shutdown (wait =True ,cancel_futures =True ) + self .log_signal .emit (" Thread pool shutdown complete.") + self .thread_pool =None + self .log_signal .emit (" Cancellation for exit complete.") + else : + should_exit =False + self .log_signal .emit ("ℹ️ Application exit cancelled.") + event .ignore () + return + + if should_exit : + self .log_signal .emit ("ℹ️ Application closing.") + if self .thread_pool : + self .log_signal .emit (" Final thread pool check: Shutting down...") + self .cancellation_event .set () + self .thread_pool .shutdown (wait =True ,cancel_futures =True ) + self .thread_pool =None + self .log_signal .emit ("👋 Exiting application.") + event .accept () + + + def _request_restart_application (self ): + self .log_signal .emit ("🔄 Application restart requested by user for language change.") + self ._restart_pending =True + self .close () + + def _do_actual_restart (self ): + try : + self .log_signal .emit (" Performing application restart...") + python_executable =sys .executable + script_args =sys .argv + + + if getattr (sys ,'frozen',False ): + + + + QProcess .startDetached (python_executable ,script_args [1 :]) + else : + + + QProcess .startDetached (python_executable ,script_args ) + + QCoreApplication .instance ().quit () + except Exception as e : + self .log_signal .emit (f"❌ CRITICAL: Failed to start new application instance: {e }") + QMessageBox .critical (self ,"Restart Failed", + f"Could not automatically restart the application: {e }\n\nPlease restart it manually.") + + def init_ui(self): + self.main_splitter = QSplitter(Qt.Horizontal) + + # --- Use a scroll area for the left panel for consistency --- + left_scroll_area = QScrollArea() + left_scroll_area.setWidgetResizable(True) + left_scroll_area.setFrameShape(QFrame.NoFrame) + + left_panel_widget = QWidget() + left_layout = QVBoxLayout(left_panel_widget) + left_scroll_area.setWidget(left_panel_widget) + + right_panel_widget = QWidget() + right_layout = QVBoxLayout(right_panel_widget) + + left_layout.setContentsMargins(10, 10, 10, 10) + right_layout.setContentsMargins(10, 10, 10, 10) + self.apply_theme(self.current_theme, initial_load=True) + + # --- URL and Page Range --- + self.url_input_widget = QWidget() + url_input_layout = QHBoxLayout(self.url_input_widget) + url_input_layout.setContentsMargins(0, 0, 0, 0) + self.url_label_widget = QLabel() + url_input_layout.addWidget(self.url_label_widget) + self.link_input = QLineEdit() + self.link_input.setPlaceholderText("e.g., https://kemono.su/patreon/user/12345 or .../post/98765") + self.link_input.textChanged.connect(self.update_custom_folder_visibility) # Connects the custom folder logic + url_input_layout.addWidget(self.link_input, 1) + self.empty_popup_button = QPushButton("🎨") + self.empty_popup_button.setStyleSheet("padding: 4px 6px;") + self.empty_popup_button.clicked.connect(self._show_empty_popup) + url_input_layout.addWidget(self.empty_popup_button) + self.page_range_label = QLabel(self._tr("page_range_label_text", "Page Range:")) + self.page_range_label.setStyleSheet("font-weight: bold; padding-left: 10px;") + url_input_layout.addWidget(self.page_range_label) + self.start_page_input = QLineEdit() + self.start_page_input.setPlaceholderText(self._tr("start_page_input_placeholder", "Start")) + self.start_page_input.setFixedWidth(50) + self.start_page_input.setValidator(QIntValidator(1, 99999)) + url_input_layout.addWidget(self.start_page_input) + self.to_label = QLabel(self._tr("page_range_to_label_text", "to")) + url_input_layout.addWidget(self.to_label) + self.end_page_input = QLineEdit() + self.end_page_input.setPlaceholderText(self._tr("end_page_input_placeholder", "End")) + self.end_page_input.setFixedWidth(50) + self.end_page_input.setToolTip(self._tr("end_page_input_tooltip", "For creator URLs: Specify the ending page number...")) + self.end_page_input.setValidator(QIntValidator(1, 99999)) + url_input_layout.addWidget(self.end_page_input) + self.url_placeholder_widget = QWidget() + placeholder_layout = QHBoxLayout(self.url_placeholder_widget) + placeholder_layout.setContentsMargins(0, 0, 0, 0) + self.fav_mode_active_label = QLabel(self._tr("fav_mode_active_label_text", "⭐ Favorite Mode is active...")) + self.fav_mode_active_label.setAlignment(Qt.AlignCenter) + placeholder_layout.addWidget(self.fav_mode_active_label) + self.url_or_placeholder_stack = QStackedWidget() + self.url_or_placeholder_stack.addWidget(self.url_input_widget) + self.url_or_placeholder_stack.addWidget(self.url_placeholder_widget) + left_layout.addWidget(self.url_or_placeholder_stack) + + # --- Download Location --- + self.download_location_label_widget = QLabel() + left_layout.addWidget(self.download_location_label_widget) + dir_layout = QHBoxLayout() + self.dir_input = QLineEdit() + self.dir_input.setPlaceholderText("Select folder where downloads will be saved") + self.dir_button = QPushButton("Browse...") + self.dir_button.setStyleSheet("padding: 4px 10px;") + self.dir_button.clicked.connect(self.browse_directory) + dir_layout.addWidget(self.dir_input, 1) + dir_layout.addWidget(self.dir_button) + left_layout.addLayout(dir_layout) + + # --- Filters and Custom Folder Container (from old layout) --- + self.filters_and_custom_folder_container_widget = QWidget() + filters_and_custom_folder_layout = QHBoxLayout(self.filters_and_custom_folder_container_widget) + filters_and_custom_folder_layout.setContentsMargins(0, 5, 0, 0) + filters_and_custom_folder_layout.setSpacing(10) + self.character_filter_widget = QWidget() + character_filter_v_layout = QVBoxLayout(self.character_filter_widget) + character_filter_v_layout.setContentsMargins(0, 0, 0, 0) + character_filter_v_layout.setSpacing(2) + self.character_label = QLabel("🎯 Filter by Character(s) (comma-separated):") + character_filter_v_layout.addWidget(self.character_label) + char_input_and_button_layout = QHBoxLayout() + char_input_and_button_layout.setContentsMargins(0, 0, 0, 0) + char_input_and_button_layout.setSpacing(10) + self.character_input = QLineEdit() + self.character_input.setPlaceholderText("e.g., Tifa, Aerith, (Cloud, Zack)") + char_input_and_button_layout.addWidget(self.character_input, 3) + self.char_filter_scope_toggle_button = QPushButton() + self._update_char_filter_scope_button_text() + char_input_and_button_layout.addWidget(self.char_filter_scope_toggle_button, 1) + character_filter_v_layout.addLayout(char_input_and_button_layout) + + # --- Custom Folder Widget Definition --- + self.custom_folder_widget = QWidget() + custom_folder_v_layout = QVBoxLayout(self.custom_folder_widget) + custom_folder_v_layout.setContentsMargins(0, 0, 0, 0) + custom_folder_v_layout.setSpacing(2) + self.custom_folder_label = QLabel("🗄️ Custom Folder Name (Single Post Only):") + self.custom_folder_input = QLineEdit() + self.custom_folder_input.setPlaceholderText("Optional: Save this post to specific folder") + custom_folder_v_layout.addWidget(self.custom_folder_label) + custom_folder_v_layout.addWidget(self.custom_folder_input) + self.custom_folder_widget.setVisible(False) + + filters_and_custom_folder_layout.addWidget(self.character_filter_widget, 1) + filters_and_custom_folder_layout.addWidget(self.custom_folder_widget, 1) + left_layout.addWidget(self.filters_and_custom_folder_container_widget) + + # --- Word Manipulation Container --- + word_manipulation_container_widget = QWidget() + word_manipulation_outer_layout = QHBoxLayout(word_manipulation_container_widget) + word_manipulation_outer_layout.setContentsMargins(0, 0, 0, 0) + word_manipulation_outer_layout.setSpacing(15) + skip_words_widget = QWidget() + skip_words_vertical_layout = QVBoxLayout(skip_words_widget) + skip_words_vertical_layout.setContentsMargins(0, 0, 0, 0) + skip_words_vertical_layout.setSpacing(2) + self.skip_words_label_widget = QLabel() + skip_words_vertical_layout.addWidget(self.skip_words_label_widget) + skip_input_and_button_layout = QHBoxLayout() + skip_input_and_button_layout.setContentsMargins(0, 0, 0, 0) + skip_input_and_button_layout.setSpacing(10) + self.skip_words_input = QLineEdit() + self.skip_words_input.setPlaceholderText("e.g., WM, WIP, sketch, preview") + skip_input_and_button_layout.addWidget(self.skip_words_input, 1) + self.skip_scope_toggle_button = QPushButton() + self._update_skip_scope_button_text() + skip_input_and_button_layout.addWidget(self.skip_scope_toggle_button, 0) + skip_words_vertical_layout.addLayout(skip_input_and_button_layout) + word_manipulation_outer_layout.addWidget(skip_words_widget, 7) + remove_words_widget = QWidget() + remove_words_vertical_layout = QVBoxLayout(remove_words_widget) + remove_words_vertical_layout.setContentsMargins(0, 0, 0, 0) + remove_words_vertical_layout.setSpacing(2) + self.remove_from_filename_label_widget = QLabel() + remove_words_vertical_layout.addWidget(self.remove_from_filename_label_widget) + self.remove_from_filename_input = QLineEdit() + self.remove_from_filename_input.setPlaceholderText("e.g., patreon, HD") + remove_words_vertical_layout.addWidget(self.remove_from_filename_input) + word_manipulation_outer_layout.addWidget(remove_words_widget, 3) + left_layout.addWidget(word_manipulation_container_widget) + + # --- File Filter Layout --- + file_filter_layout = QVBoxLayout() + file_filter_layout.setContentsMargins(0, 10, 0, 0) + file_filter_layout.addWidget(QLabel("Filter Files:")) + radio_button_layout = QHBoxLayout() + radio_button_layout.setSpacing(10) + self.radio_group = QButtonGroup(self) + self.radio_all = QRadioButton("All") + self.radio_images = QRadioButton("Images/GIFs") + self.radio_videos = QRadioButton("Videos") + self.radio_only_archives = QRadioButton("📦 Only Archives") + self.radio_only_audio = QRadioButton("🎧 Only Audio") + self.radio_only_links = QRadioButton("🔗 Only Links") + self.radio_more = QRadioButton("More") + + self.radio_all.setChecked(True) + for btn in [self.radio_all, self.radio_images, self.radio_videos, self.radio_only_archives, self.radio_only_audio, self.radio_only_links, self.radio_more]: + self.radio_group.addButton(btn) + radio_button_layout.addWidget(btn) + self.favorite_mode_checkbox = QCheckBox() + self.favorite_mode_checkbox.setChecked(False) + radio_button_layout.addWidget(self.favorite_mode_checkbox) + radio_button_layout.addStretch(1) + file_filter_layout.addLayout(radio_button_layout) + left_layout.addLayout(file_filter_layout) + + # --- Checkboxes Group --- + checkboxes_group_layout = QVBoxLayout() + checkboxes_group_layout.setSpacing(10) + row1_layout = QHBoxLayout() + row1_layout.setSpacing(10) + self.skip_zip_checkbox = QCheckBox("Skip .zip") + self.skip_zip_checkbox.setChecked(True) + row1_layout.addWidget(self.skip_zip_checkbox) + self.skip_rar_checkbox = QCheckBox("Skip .rar") + self.skip_rar_checkbox.setChecked(True) + row1_layout.addWidget(self.skip_rar_checkbox) + self.download_thumbnails_checkbox = QCheckBox("Download Thumbnails Only") + row1_layout.addWidget(self.download_thumbnails_checkbox) + self.scan_content_images_checkbox = QCheckBox("Scan Content for Images") + self.scan_content_images_checkbox.setChecked(self.scan_content_images_setting) + row1_layout.addWidget(self.scan_content_images_checkbox) + self.compress_images_checkbox = QCheckBox("Compress to WebP") + self.compress_images_checkbox.setToolTip("Compress images > 1.5MB to WebP format (requires Pillow).") + row1_layout.addWidget(self.compress_images_checkbox) + self.keep_duplicates_checkbox = QCheckBox("Keep Duplicates") + self.keep_duplicates_checkbox.setToolTip("If checked, downloads all files from a post even if they have the same name.") + row1_layout.addWidget(self.keep_duplicates_checkbox) + row1_layout.addStretch(1) + checkboxes_group_layout.addLayout(row1_layout) + + # --- Advanced Settings --- + advanced_settings_label = QLabel("⚙️ Advanced Settings:") + checkboxes_group_layout.addWidget(advanced_settings_label) + advanced_row1_layout = QHBoxLayout() + advanced_row1_layout.setSpacing(10) + self.use_subfolders_checkbox = QCheckBox("Separate Folders by Name/Title") + self.use_subfolders_checkbox.setChecked(True) + self.use_subfolders_checkbox.toggled.connect(self.update_ui_for_subfolders) + advanced_row1_layout.addWidget(self.use_subfolders_checkbox) + self.use_subfolder_per_post_checkbox = QCheckBox("Subfolder per Post") + self.use_subfolder_per_post_checkbox.toggled.connect(self.update_ui_for_subfolders) + advanced_row1_layout.addWidget(self.use_subfolder_per_post_checkbox) + self.date_prefix_checkbox = QCheckBox("Date Prefix") + self.date_prefix_checkbox.setToolTip("When 'Subfolder per Post' is active, prefix the folder name with the post's upload date.") + advanced_row1_layout.addWidget(self.date_prefix_checkbox) + self.use_cookie_checkbox = QCheckBox("Use Cookie") + self.use_cookie_checkbox.setChecked(self.use_cookie_setting) + self.cookie_text_input = QLineEdit() + self.cookie_text_input.setPlaceholderText("if no Select cookies.txt)") + self.cookie_text_input.setText(self.cookie_text_setting) + advanced_row1_layout.addWidget(self.use_cookie_checkbox) + advanced_row1_layout.addWidget(self.cookie_text_input, 2) + self.cookie_browse_button = QPushButton("Browse...") + self.cookie_browse_button.setFixedWidth(80) + self.cookie_browse_button.setStyleSheet("padding: 4px 8px;") + advanced_row1_layout.addWidget(self.cookie_browse_button) + advanced_row1_layout.addStretch(1) + checkboxes_group_layout.addLayout(advanced_row1_layout) + advanced_row2_layout = QHBoxLayout() + advanced_row2_layout.setSpacing(10) + multithreading_layout = QHBoxLayout() + multithreading_layout.setContentsMargins(0, 0, 0, 0) + self.use_multithreading_checkbox = QCheckBox("Use Multithreading") + self.use_multithreading_checkbox.setChecked(True) + multithreading_layout.addWidget(self.use_multithreading_checkbox) + self.thread_count_label = QLabel("Threads:") + multithreading_layout.addWidget(self.thread_count_label) + self.thread_count_input = QLineEdit("4") + self.thread_count_input.setFixedWidth(40) + self.thread_count_input.setValidator(QIntValidator(1, MAX_THREADS)) + multithreading_layout.addWidget(self.thread_count_input) + advanced_row2_layout.addLayout(multithreading_layout) + self.external_links_checkbox = QCheckBox("Show External Links in Log") + advanced_row2_layout.addWidget(self.external_links_checkbox) + self.manga_mode_checkbox = QCheckBox("Manga/Comic Mode") + advanced_row2_layout.addWidget(self.manga_mode_checkbox) + advanced_row2_layout.addStretch(1) + checkboxes_group_layout.addLayout(advanced_row2_layout) + left_layout.addLayout(checkboxes_group_layout) + + # --- Action Buttons --- + self.standard_action_buttons_widget = QWidget() + btn_layout = QHBoxLayout(self.standard_action_buttons_widget) + btn_layout.setContentsMargins(0, 10, 0, 0) + btn_layout.setSpacing(10) + self.download_btn = QPushButton("⬇️ Start Download") + self.download_btn.setStyleSheet("padding: 4px 12px; font-weight: bold;") + self.download_btn.clicked.connect(self.start_download) + self.pause_btn = QPushButton("⏸️ Pause Download") + self.pause_btn.setEnabled(False) + self.pause_btn.setStyleSheet("padding: 4px 12px;") + self.pause_btn.clicked.connect(self._handle_pause_resume_action) + self.cancel_btn = QPushButton("❌ Cancel & Reset UI") + self.cancel_btn.setEnabled(False) + self.cancel_btn.setStyleSheet("padding: 4px 12px;") + self.cancel_btn.clicked.connect(self.cancel_download_button_action) + self.error_btn = QPushButton("Error") + self.error_btn.setToolTip("View files skipped due to errors and optionally retry them.") + self.error_btn.setStyleSheet("padding: 4px 8px;") + self.error_btn.setEnabled(True) + btn_layout.addWidget(self.download_btn) + btn_layout.addWidget(self.pause_btn) + btn_layout.addWidget(self.cancel_btn) + btn_layout.addWidget(self.error_btn) + self.favorite_action_buttons_widget = QWidget() + favorite_buttons_layout = QHBoxLayout(self.favorite_action_buttons_widget) + self.favorite_mode_artists_button = QPushButton("🖼️ Favorite Artists") + self.favorite_mode_posts_button = QPushButton("📄 Favorite Posts") + self.favorite_scope_toggle_button = QPushButton() + favorite_buttons_layout.addWidget(self.favorite_mode_artists_button) + favorite_buttons_layout.addWidget(self.favorite_mode_posts_button) + favorite_buttons_layout.addWidget(self.favorite_scope_toggle_button) + self.bottom_action_buttons_stack = QStackedWidget() + self.bottom_action_buttons_stack.addWidget(self.standard_action_buttons_widget) + self.bottom_action_buttons_stack.addWidget(self.favorite_action_buttons_widget) + left_layout.addWidget(self.bottom_action_buttons_stack) + left_layout.addSpacing(10) + + # --- Known Names Layout --- + known_chars_label_layout = QHBoxLayout() + known_chars_label_layout.setSpacing(10) + self.known_chars_label = QLabel("🎭 Known Shows/Characters (for Folder Names):") + known_chars_label_layout.addWidget(self.known_chars_label) + self.open_known_txt_button = QPushButton("Open Known.txt") + self.open_known_txt_button.setStyleSheet("padding: 4px 8px;") + self.open_known_txt_button.setFixedWidth(120) + known_chars_label_layout.addWidget(self.open_known_txt_button) + self.character_search_input = QLineEdit() + self.character_search_input.setPlaceholderText("Search characters...") + known_chars_label_layout.addWidget(self.character_search_input, 1) + left_layout.addLayout(known_chars_label_layout) + self.character_list = QListWidget() + self.character_list.setSelectionMode(QListWidget.ExtendedSelection) + self.character_list.setMaximumHeight(150) # Set smaller height + left_layout.addWidget(self.character_list, 1) + char_manage_layout = QHBoxLayout() + char_manage_layout.setSpacing(10) + self.new_char_input = QLineEdit() + self.new_char_input.setPlaceholderText("Add new show/character name") + self.new_char_input.setStyleSheet("padding: 3px 5px;") + self.add_char_button = QPushButton("➕ Add") + self.add_char_button.setStyleSheet("padding: 4px 10px;") + self.add_to_filter_button = QPushButton("⤵️ Add to Filter") + self.add_to_filter_button.setToolTip("Select names... to add to the 'Filter by Character(s)' field.") + self.add_to_filter_button.setStyleSheet("padding: 4px 10px;") + self.delete_char_button = QPushButton("🗑️ Delete Selected") + self.delete_char_button.setToolTip("Delete the selected name(s)...") + self.delete_char_button.setStyleSheet("padding: 4px 10px;") + self.add_char_button.clicked.connect(self._handle_ui_add_new_character) + self.new_char_input.returnPressed.connect(self.add_char_button.click) + self.delete_char_button.clicked.connect(self.delete_selected_character) + char_manage_layout.addWidget(self.new_char_input, 2) + char_manage_layout.addWidget(self.add_char_button, 0) + self.known_names_help_button = QPushButton("?") + self.known_names_help_button.setFixedWidth(35) + self.known_names_help_button.setStyleSheet("padding: 4px 6px;") + self.known_names_help_button.clicked.connect(self._show_feature_guide) + self.history_button = QPushButton("📜") + self.history_button.setFixedWidth(35) + self.history_button.setStyleSheet("padding: 4px 6px;") + self.history_button.setToolTip(self._tr("history_button_tooltip_text", "View download history")) + self.future_settings_button = QPushButton("⚙️") + self.future_settings_button.setFixedWidth(35) + self.future_settings_button.setStyleSheet("padding: 4px 6px;") + self.future_settings_button.clicked.connect(self._show_future_settings_dialog) + char_manage_layout.addWidget(self.add_to_filter_button, 1) + char_manage_layout.addWidget(self.delete_char_button, 1) + char_manage_layout.addWidget(self.known_names_help_button, 0) + char_manage_layout.addWidget(self.history_button, 0) + char_manage_layout.addWidget(self.future_settings_button, 0) + left_layout.addLayout(char_manage_layout) + left_layout.addStretch(0) + + # --- Right Panel (Logs) --- + # (This part of the layout is unchanged and remains correct) + log_title_layout = QHBoxLayout() + self.progress_log_label = QLabel("📜 Progress Log:") + log_title_layout.addWidget(self.progress_log_label) + log_title_layout.addStretch(1) + self.link_search_input = QLineEdit() + self.link_search_input.setPlaceholderText("Search Links...") + self.link_search_input.setVisible(False) + log_title_layout.addWidget(self.link_search_input) + self.link_search_button = QPushButton("🔍") + self.link_search_button.setVisible(False) + self.link_search_button.setFixedWidth(30) + self.link_search_button.setStyleSheet("padding: 4px 4px;") + log_title_layout.addWidget(self.link_search_button) + self.manga_rename_toggle_button = QPushButton() + self.manga_rename_toggle_button.setVisible(False) + self.manga_rename_toggle_button.setFixedWidth(140) + self.manga_rename_toggle_button.setStyleSheet("padding: 4px 8px;") + self._update_manga_filename_style_button_text() + log_title_layout.addWidget(self.manga_rename_toggle_button) + self.manga_date_prefix_input = QLineEdit() + self.manga_date_prefix_input.setPlaceholderText("Prefix for Manga Filenames") + self.manga_date_prefix_input.setVisible(False) + log_title_layout.addWidget(self.manga_date_prefix_input) + self.multipart_toggle_button = QPushButton() + self.multipart_toggle_button.setToolTip("Toggle between Multi-part and Single-stream downloads for large files.") + self.multipart_toggle_button.setFixedWidth(130) + self.multipart_toggle_button.setStyleSheet("padding: 4px 8px;") + self._update_multipart_toggle_button_text() + log_title_layout.addWidget(self.multipart_toggle_button) + self.EYE_ICON = "\U0001F441" + self.CLOSED_EYE_ICON = "\U0001F648" + self.log_verbosity_toggle_button = QPushButton(self.EYE_ICON) + self.log_verbosity_toggle_button.setFixedWidth(45) + self.log_verbosity_toggle_button.setStyleSheet("font-size: 11pt; padding: 4px 2px;") + log_title_layout.addWidget(self.log_verbosity_toggle_button) + self.reset_button = QPushButton("🔄 Reset") + self.reset_button.setFixedWidth(80) + self.reset_button.setStyleSheet("padding: 4px 8px;") + log_title_layout.addWidget(self.reset_button) + right_layout.addLayout(log_title_layout) + self.log_splitter = QSplitter(Qt.Vertical) + self.log_view_stack = QStackedWidget() + self.main_log_output = QTextEdit() + self.main_log_output.setReadOnly(True) + self.main_log_output.setLineWrapMode(QTextEdit.NoWrap) + self.log_view_stack.addWidget(self.main_log_output) + self.missed_character_log_output = QTextEdit() + self.missed_character_log_output.setReadOnly(True) + self.missed_character_log_output.setLineWrapMode(QTextEdit.NoWrap) + self.log_view_stack.addWidget(self.missed_character_log_output) + self.external_log_output = QTextEdit() + self.external_log_output.setReadOnly(True) + self.external_log_output.setLineWrapMode(QTextEdit.NoWrap) + self.external_log_output.hide() + self.log_splitter.addWidget(self.log_view_stack) + self.log_splitter.addWidget(self.external_log_output) + self.log_splitter.setSizes([self.height(), 0]) + right_layout.addWidget(self.log_splitter, 1) + export_button_layout = QHBoxLayout() + export_button_layout.addStretch(1) + self.export_links_button = QPushButton(self._tr("export_links_button_text", "Export Links")) + self.export_links_button.setFixedWidth(100) + self.export_links_button.setStyleSheet("padding: 4px 8px; margin-top: 5px;") + self.export_links_button.setEnabled(False) + self.export_links_button.setVisible(False) + export_button_layout.addWidget(self.export_links_button) + self.download_extracted_links_button = QPushButton(self._tr("download_extracted_links_button_text", "Download")) + self.download_extracted_links_button.setFixedWidth(100) + self.download_extracted_links_button.setStyleSheet("padding: 4px 8px; margin-top: 5px;") + self.download_extracted_links_button.setEnabled(False) + self.download_extracted_links_button.setVisible(False) + export_button_layout.addWidget(self.download_extracted_links_button) + self.log_display_mode_toggle_button = QPushButton() + self.log_display_mode_toggle_button.setFixedWidth(120) + self.log_display_mode_toggle_button.setStyleSheet("padding: 4px 8px; margin-top: 5px;") + self.log_display_mode_toggle_button.setVisible(False) + export_button_layout.addWidget(self.log_display_mode_toggle_button) + right_layout.addLayout(export_button_layout) + self.progress_label = QLabel("Progress: Idle") + self.progress_label.setStyleSheet("padding-top: 5px; font-style: italic;") + right_layout.addWidget(self.progress_label) + self.file_progress_label = QLabel("") + self.file_progress_label.setToolTip("Shows the progress of individual file downloads, including speed and size.") + self.file_progress_label.setWordWrap(True) + self.file_progress_label.setStyleSheet("padding-top: 2px; font-style: italic; color: #A0A0A0;") + right_layout.addWidget(self.file_progress_label) + + # --- Final Assembly --- + self.main_splitter.addWidget(left_scroll_area) # Use the scroll area + self.main_splitter.addWidget(right_panel_widget) + self.main_splitter.setStretchFactor(0, 7) + self.main_splitter.setStretchFactor(1, 3) + top_level_layout = QHBoxLayout(self) + top_level_layout.setContentsMargins(0, 0, 0, 0) + top_level_layout.addWidget(self.main_splitter) + + # --- Initial UI State Updates --- + self.update_ui_for_subfolders(self.use_subfolders_checkbox.isChecked()) + self.update_external_links_setting(self.external_links_checkbox.isChecked()) + self.update_multithreading_label(self.thread_count_input.text()) + self.update_page_range_enabled_state() + if self.manga_mode_checkbox: + self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked()) + if hasattr(self, 'link_input'): + self.link_input.textChanged.connect(lambda: self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)) + self._load_creator_name_cache_from_json() + self.load_known_names_from_util() + self._update_cookie_input_visibility(self.use_cookie_checkbox.isChecked() if hasattr(self, 'use_cookie_checkbox') else False) + self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked()) + if hasattr(self, 'radio_group') and self.radio_group.checkedButton(): + self._handle_filter_mode_change(self.radio_group.checkedButton(), True) + self.radio_group.buttonToggled.connect(self._handle_more_options_toggled) # Add this line + + self._update_manga_filename_style_button_text() + self._update_skip_scope_button_text() + self._update_char_filter_scope_button_text() + self._update_multithreading_for_date_mode() + if hasattr(self, 'download_thumbnails_checkbox'): + self._handle_thumbnail_mode_change(self.download_thumbnails_checkbox.isChecked()) + if hasattr(self, 'favorite_mode_checkbox'): + self._handle_favorite_mode_toggle(False) + + def _load_persistent_history (self ): + """Loads download history from a persistent file.""" + self .log_signal .emit (f"📜 Attempting to load history from: {self .persistent_history_file }") + if os .path .exists (self .persistent_history_file ): + try : + with open (self .persistent_history_file ,'r',encoding ='utf-8')as f : + loaded_data =json .load (f ) + + if isinstance (loaded_data ,dict ): + self .last_downloaded_files_details .clear () + self .last_downloaded_files_details .extend (loaded_data .get ("last_downloaded_files",[])) + self .final_download_history_entries =loaded_data .get ("first_processed_posts",[]) + self .log_signal .emit (f"✅ Loaded {len (self .last_downloaded_files_details )} last downloaded files and {len (self .final_download_history_entries )} first processed posts from persistent history.") + elif loaded_data is None and os .path .getsize (self .persistent_history_file )==0 : + self .log_signal .emit (f"ℹ️ Persistent history file is empty. Initializing with empty history.") + self .final_download_history_entries =[] + self .last_downloaded_files_details .clear () + elif isinstance(loaded_data, list): # Handle old format where only first_processed_posts was saved + self.log_signal.emit("⚠️ Persistent history file is in old format (only first_processed_posts). Converting to new format.") + self.final_download_history_entries = loaded_data + self.last_downloaded_files_details.clear() + self._save_persistent_history() # Save in new format immediately + else : + self .log_signal .emit (f"⚠️ Persistent history file has incorrect format. Expected list, got {type (loaded_history )}. Ignoring.") + self .final_download_history_entries =[] + except json .JSONDecodeError : + self .log_signal .emit (f"⚠️ Error decoding persistent history file. It might be corrupted. Ignoring.") + self .final_download_history_entries =[] + except Exception as e : + self .log_signal .emit (f"❌ Error loading persistent history: {e }") + self .final_download_history_entries =[] + else : + self .log_signal .emit (f"⚠️ Persistent history file NOT FOUND at: {self .persistent_history_file }. Starting with empty history.") + self .final_download_history_entries =[] + self ._save_persistent_history () + + + def _save_persistent_history(self): + """Saves download history to a persistent file.""" + self.log_signal.emit(f"📜 Attempting to save history to: {self.persistent_history_file}") + try: + history_dir = os.path.dirname(self.persistent_history_file) + self.log_signal.emit(f" History directory: {history_dir}") + if not os.path.exists(history_dir): + os.makedirs(history_dir, exist_ok=True) + self.log_signal.emit(f" Created history directory: {history_dir}") + + history_data = { + "last_downloaded_files": list(self.last_downloaded_files_details), + "first_processed_posts": self.final_download_history_entries + } + with open(self.persistent_history_file, 'w', encoding='utf-8') as f: + json.dump(history_data, f, indent=2) + self.log_signal.emit(f"✅ Saved {len(self.final_download_history_entries)} history entries to: {self.persistent_history_file}") + except Exception as e: + self.log_signal.emit(f"❌ Error saving persistent history to {self.persistent_history_file}: {e}") + + + def _load_creator_name_cache_from_json (self ): + """Loads creator id-name-service mappings from creators.json into self.creator_name_cache.""" + self .log_signal .emit ("ℹ️ Attempting to load creators.json for creator name cache.") + + if getattr (sys ,'frozen',False )and hasattr (sys ,'_MEIPASS'): + base_path_for_creators =sys ._MEIPASS + else : + base_path_for_creators =self .app_base_dir + + creators_file_path =os .path .join (base_path_for_creators ,"data" ,"creators.json") + + if not os .path .exists (creators_file_path ): + self .log_signal .emit (f"⚠️ 'creators.json' not found at {creators_file_path }. Creator name cache will be empty.") + self .creator_name_cache .clear () + return + + try : + with open (creators_file_path ,'r',encoding ='utf-8')as f : + loaded_data =json .load (f ) + + creators_list =[] + if isinstance (loaded_data ,list )and len (loaded_data )>0 and isinstance (loaded_data [0 ],list ): + creators_list =loaded_data [0 ] + elif isinstance (loaded_data ,list )and all (isinstance (item ,dict )for item in loaded_data ): + creators_list =loaded_data + else : + self .log_signal .emit (f"⚠️ 'creators.json' has an unexpected format. Creator name cache may be incomplete.") + + for creator_data in creators_list : + creator_id =creator_data .get ("id") + name =creator_data .get ("name") + service =creator_data .get ("service") + if creator_id and name and service : + self .creator_name_cache [(service .lower (),str (creator_id ))]=name + self .log_signal .emit (f"✅ Successfully loaded {len (self .creator_name_cache )} creator names into cache from 'creators.json'.") + except Exception as e : + self .log_signal .emit (f"❌ Error loading 'creators.json' for name cache: {e }") + self .creator_name_cache .clear () + + def _show_download_history_dialog (self ): + """Shows the dialog with the finalized download history.""" + last_3_downloaded =list (self .last_downloaded_files_details ) + first_processed =self .final_download_history_entries + + if not last_3_downloaded and not first_processed : + QMessageBox .information ( + self , + self ._tr ("download_history_dialog_title_empty","Download History (Empty)"), + self ._tr ("no_download_history_header","No Downloads Yet") + ) + return + + dialog = DownloadHistoryDialog(last_3_downloaded, first_processed, self) + dialog .exec_ () + + def _handle_actual_file_downloaded (self ,file_details_dict ): + """Handles a successfully downloaded file for the 'last 3 downloaded' history.""" + if not file_details_dict : + return + file_details_dict ['download_timestamp']=time .time () + creator_key =(file_details_dict .get ('service','').lower (),str (file_details_dict .get ('user_id',''))) + file_details_dict ['creator_display_name']=self .creator_name_cache .get (creator_key ,file_details_dict .get ('folder_context_name','Unknown Creator/Series')) + self .last_downloaded_files_details .append (file_details_dict ) + + + def _handle_file_successfully_downloaded (self ,history_entry_dict ): + """Handles a successfully downloaded file for history logging.""" + if len (self .download_history_log )>=self .download_history_log .maxlen : + self .download_history_log .popleft () + self .download_history_log .append (history_entry_dict ) + + + def _handle_actual_file_downloaded (self ,file_details_dict ): + """Handles a successfully downloaded file for the 'last 3 downloaded' history.""" + if not file_details_dict : + return + + file_details_dict ['download_timestamp']=time .time () + + + creator_key =( + file_details_dict .get ('service','').lower (), + str (file_details_dict .get ('user_id','')) + ) + creator_display_name =self .creator_name_cache .get (creator_key ,file_details_dict .get ('folder_context_name','Unknown Creator')) + file_details_dict ['creator_display_name']=creator_display_name + + self .last_downloaded_files_details .append (file_details_dict ) + + + def _handle_favorite_mode_toggle (self ,checked ): + if not self .url_or_placeholder_stack or not self .bottom_action_buttons_stack : + return + + self ._handle_favorite_mode_toggle (self .favorite_mode_checkbox .isChecked ()) + self ._update_favorite_scope_button_text () + if hasattr (self ,'link_input'): + self .last_link_input_text_for_queue_sync =self .link_input .text () + + def _update_download_extracted_links_button_state (self ): + if hasattr (self ,'download_extracted_links_button')and self .download_extracted_links_button : + is_only_links =self .radio_only_links and self .radio_only_links .isChecked () + if not is_only_links : + self .download_extracted_links_button .setEnabled (False ) + return + + supported_platforms_for_button ={'mega','google drive','dropbox'} + has_supported_links =any ( + link_info [3 ].lower ()in supported_platforms_for_button for link_info in self .extracted_links_cache + ) + self .download_extracted_links_button .setEnabled (is_only_links and has_supported_links ) + + def _show_download_extracted_links_dialog (self ): + """Shows the placeholder dialog for downloading extracted links.""" + if not (self .radio_only_links and self .radio_only_links .isChecked ()): + self .log_signal .emit ("ℹ️ Download extracted links button clicked, but not in 'Only Links' mode.") + return + + supported_platforms ={'mega','google drive','dropbox'} + links_to_show_in_dialog =[] + for link_data_tuple in self .extracted_links_cache : + platform =link_data_tuple [3 ].lower () + if platform in supported_platforms : + links_to_show_in_dialog .append ({ + 'title':link_data_tuple [0 ], + 'link_text':link_data_tuple [1 ], + 'url':link_data_tuple [2 ], + 'platform':platform , + 'key':link_data_tuple [4 ] + }) + + if not links_to_show_in_dialog : + QMessageBox .information (self ,"No Supported Links","No Mega, Google Drive, or Dropbox links were found in the extracted links.") + return + + dialog = DownloadExtractedLinksDialog(links_to_show_in_dialog, self) + dialog .download_requested .connect (self ._handle_extracted_links_download_request ) + dialog .exec_ () + + def _handle_extracted_links_download_request (self ,selected_links_info ): + if not selected_links_info : + self .log_signal .emit ("ℹ️ No links selected for download from dialog.") + return + + + if self .radio_only_links and self .radio_only_links .isChecked ()and self .only_links_log_display_mode ==LOG_DISPLAY_DOWNLOAD_PROGRESS : + self .main_log_output .clear () + self .log_signal .emit ("ℹ️ Displaying Mega download progress (extracted links hidden)...") + self .mega_download_log_preserved_once =False + + current_main_dir =self .dir_input .text ().strip () + download_dir_for_mega ="" + + if current_main_dir and os .path .isdir (current_main_dir ): + download_dir_for_mega =current_main_dir + self .log_signal .emit (f"ℹ️ Using existing main download location for external links: {download_dir_for_mega }") + else : + if not current_main_dir : + self .log_signal .emit ("ℹ️ Main download location is empty. Prompting for download folder.") + else : + self .log_signal .emit ( + f"⚠️ Main download location '{current_main_dir }' is not a valid directory. Prompting for download folder.") + + + suggestion_path =current_main_dir if current_main_dir else QStandardPaths .writableLocation (QStandardPaths .DownloadLocation ) + + chosen_dir =QFileDialog .getExistingDirectory ( + self , + self ._tr ("select_download_folder_mega_dialog_title","Select Download Folder for External Links"), + suggestion_path , + options =QFileDialog .ShowDirsOnly |QFileDialog .DontUseNativeDialog + ) + + if not chosen_dir : + self .log_signal .emit ("ℹ️ External links download cancelled - no download directory selected from prompt.") + return + download_dir_for_mega =chosen_dir + + + self .log_signal .emit (f"ℹ️ Preparing to download {len (selected_links_info )} selected external link(s) to: {download_dir_for_mega }") + if not os .path .exists (download_dir_for_mega ): + self .log_signal .emit (f"❌ Critical Error: Selected download directory '{download_dir_for_mega }' does not exist.") + return + + + tasks_for_thread =selected_links_info + + if self .external_link_download_thread and self .external_link_download_thread .isRunning (): + QMessageBox .warning (self ,"Busy","Another external link download is already in progress.") + return + + self .external_link_download_thread =ExternalLinkDownloadThread ( + tasks_for_thread , + download_dir_for_mega , + self .log_signal .emit , + self + ) + self .external_link_download_thread .finished .connect (self ._on_external_link_download_thread_finished ) + + self .external_link_download_thread .progress_signal .connect (self .handle_main_log ) + self .external_link_download_thread .file_complete_signal .connect (self ._on_single_external_file_complete ) + + + + self .set_ui_enabled (False ) + + self .progress_label .setText (self ._tr ("progress_processing_post_text","Progress: Processing post {processed_posts}...").format (processed_posts =f"External Links (0/{len (tasks_for_thread )})")) + self .external_link_download_thread .start () + + def _on_external_link_download_thread_finished (self ): + self .log_signal .emit ("✅ External link download thread finished.") + self .progress_label .setText (f"{self ._tr ('status_completed','Completed')}: External link downloads. {self ._tr ('ready_for_new_task_text','Ready for new task.')}") + + self .mega_download_log_preserved_once =True + self .log_signal .emit ("INTERNAL: mega_download_log_preserved_once SET to True.") + + if self .radio_only_links and self .radio_only_links .isChecked (): + self .log_signal .emit (HTML_PREFIX +"

--- End of Mega Download Log ---
") + + + + self .set_ui_enabled (True ) + + + + if self .mega_download_log_preserved_once : + self .mega_download_log_preserved_once =False + self .log_signal .emit ("INTERNAL: mega_download_log_preserved_once RESET to False.") + + if self .external_link_download_thread : + self .external_link_download_thread .deleteLater () + self .external_link_download_thread =None + + def _on_single_external_file_complete (self ,url ,success ): + pass + + + def _show_future_settings_dialog(self): + """Shows the placeholder dialog for future settings.""" + # --- DEBUGGING CODE TO FIND THE UNEXPECTED CALL --- + import traceback + print("--- DEBUG: _show_future_settings_dialog() was called. See stack trace below. ---") + traceback.print_stack() + print("--------------------------------------------------------------------------------") + + # Correctly create the dialog instance once with the parent set to self. + dialog = FutureSettingsDialog(self) + dialog.exec_() + + def _check_if_all_work_is_done(self): + """ + Checks if the fetcher thread is done AND if all submitted tasks have been processed. + If so, finalizes the download. + """ + # Conditions for being completely finished: + fetcher_is_done = not self.is_fetcher_thread_running + all_workers_are_done = (self.total_posts_to_process > 0 and self.processed_posts_count >= self.total_posts_to_process) + + if fetcher_is_done and all_workers_are_done: + self.log_signal.emit("🏁 All fetcher and worker tasks complete.") + self.finished_signal.emit(self.download_counter, self.skip_counter, self.cancellation_event.is_set(), self.all_kept_original_filenames) + + def _sync_queue_with_link_input (self ,current_text ): + """ + Synchronizes the favorite_download_queue with the link_input text. + Removes creators from the queue if their names are removed from the input field. + Only affects items added via 'creator_popup_selection'. + """ + if not self .favorite_download_queue : + self .last_link_input_text_for_queue_sync =current_text + return + + current_names_in_input ={name .strip ().lower ()for name in current_text .split (',')if name .strip ()} + + queue_copy =list (self .favorite_download_queue ) + removed_count =0 + + for item in queue_copy : + if item .get ('type')=='creator_popup_selection': + item_name_lower =item .get ('name','').lower () + if item_name_lower and item_name_lower not in current_names_in_input : + try : + self .favorite_download_queue .remove (item ) + self .log_signal .emit (f"ℹ️ Creator '{item .get ('name')}' removed from download queue due to removal from URL input.") + removed_count +=1 + except ValueError : + self .log_signal .emit (f"⚠️ Tried to remove '{item .get ('name')}' from queue, but it was not found (sync).") + + self .last_link_input_text_for_queue_sync =current_text + + def _browse_cookie_file (self ): + """Opens a file dialog to select a cookie file.""" + start_dir =QStandardPaths .writableLocation (QStandardPaths .DownloadLocation ) + if not start_dir : + start_dir =os .path .dirname (self .config_file ) + + filepath ,_ =QFileDialog .getOpenFileName (self ,"Select Cookie File",start_dir ,"Text files (*.txt);;All files (*)") + if filepath : + self .selected_cookie_filepath =filepath + self .log_signal .emit (f"ℹ️ Selected cookie file: {filepath }") + if hasattr (self ,'cookie_text_input'): + self .cookie_text_input .blockSignals (True ) + self .cookie_text_input .setText (filepath ) + self .cookie_text_input .setToolTip (self ._tr ("cookie_text_input_tooltip_file_selected","Using selected cookie file: {filepath}").format (filepath =filepath )) + self .cookie_text_input .setPlaceholderText (self ._tr ("cookie_text_input_placeholder_with_file_selected_text","Using selected cookie file (see Browse...)")) + self .cookie_text_input .setReadOnly (True ) + self .cookie_text_input .setPlaceholderText ("") + self .cookie_text_input .blockSignals (False ) + + def _update_cookie_input_placeholders_and_tooltips (self ): + if hasattr (self ,'cookie_text_input'): + if self .selected_cookie_filepath : + self .cookie_text_input .setPlaceholderText (self ._tr ("cookie_text_input_placeholder_with_file_selected_text","Using selected cookie file...")) + self .cookie_text_input .setToolTip (self ._tr ("cookie_text_input_tooltip_file_selected","Using selected cookie file: {filepath}").format (filepath =self .selected_cookie_filepath )) + else : + self .cookie_text_input .setPlaceholderText (self ._tr ("cookie_text_input_placeholder_no_file_selected_text","Cookie string (if no cookies.txt selected)")) + self .cookie_text_input .setToolTip (self ._tr ("cookie_text_input_tooltip","Enter your cookie string directly...")) + self .cookie_text_input .setReadOnly (True ) + self .cookie_text_input .setPlaceholderText ("") + self .cookie_text_input .blockSignals (False ) + + def _center_on_screen (self ): + """Centers the widget on the screen.""" + try : + primary_screen =QApplication .primaryScreen () + if not primary_screen : + screens =QApplication .screens () + if not screens :return + primary_screen =screens [0 ] + + available_geo =primary_screen .availableGeometry () + widget_geo =self .frameGeometry () + + x =available_geo .x ()+(available_geo .width ()-widget_geo .width ())//2 + y =available_geo .y ()+(available_geo .height ()-widget_geo .height ())//2 + self .move (x ,y ) + except Exception as e : + self .log_signal .emit (f"⚠️ Error centering window: {e }") + + def _handle_cookie_text_manual_change (self ,text ): + """Handles manual changes to the cookie text input, especially clearing a browsed path.""" + if not hasattr (self ,'cookie_text_input')or not hasattr (self ,'use_cookie_checkbox'): + return + if self .selected_cookie_filepath and not text .strip ()and self .use_cookie_checkbox .isChecked (): + self .selected_cookie_filepath =None + self .cookie_text_input .setReadOnly (False ) + self ._update_cookie_input_placeholders_and_tooltips () + self .log_signal .emit ("ℹ️ Browsed cookie file path cleared from input. Switched to manual cookie string mode.") + + + def get_dark_theme (self ): + return """ + QWidget { background-color: #2E2E2E; color: #E0E0E0; font-family: Segoe UI, Arial, sans-serif; font-size: 10pt; } + QLineEdit, QListWidget { background-color: #3C3F41; border: 1px solid #5A5A5A; padding: 5px; color: #F0F0F0; border-radius: 4px; } + QTextEdit { background-color: #3C3F41; border: 1px solid #5A5A5A; padding: 5px; + color: #F0F0F0; border-radius: 4px; + font-family: Consolas, Courier New, monospace; font-size: 9.5pt; } + /* --- FIX: Adjusted padding to match QLineEdit and removed min-height --- */ + QPushButton { background-color: #555; color: #F0F0F0; border: 1px solid #6A6A6A; padding: 5px 12px; border-radius: 4px; } + QPushButton:hover { background-color: #656565; border: 1px solid #7A7A7A; } + QPushButton:pressed { background-color: #4A4A4A; } + QPushButton:disabled { background-color: #404040; color: #888; border-color: #555; } + QLabel { font-weight: bold; padding-top: 4px; padding-bottom: 2px; color: #C0C0C0; } + QRadioButton, QCheckBox { spacing: 5px; color: #E0E0E0; padding-top: 4px; padding-bottom: 4px; } + QRadioButton::indicator, QCheckBox::indicator { width: 14px; height: 14px; } + QListWidget { alternate-background-color: #353535; border: 1px solid #5A5A5A; } + QListWidget::item:selected { background-color: #007ACC; color: #FFFFFF; } + QToolTip { background-color: #4A4A4A; color: #F0F0F0; border: 1px solid #6A6A6A; padding: 4px; border-radius: 3px; } + QSplitter::handle { background-color: #5A5A5A; } + QSplitter::handle:horizontal { width: 5px; } + QSplitter::handle:vertical { height: 5px; } + QFrame[frameShape="4"], QFrame[frameShape="5"] { + border: 1px solid #4A4A4A; + border-radius: 3px; + } + """ + + def browse_directory (self ): + initial_dir_text =self .dir_input .text () + start_path ="" + if initial_dir_text and os .path .isdir (initial_dir_text ): + start_path =initial_dir_text + else : + home_location =QStandardPaths .writableLocation (QStandardPaths .HomeLocation ) + documents_location =QStandardPaths .writableLocation (QStandardPaths .DocumentsLocation ) + if home_location and os .path .isdir (home_location ): + start_path =home_location + elif documents_location and os .path .isdir (documents_location ): + start_path =documents_location + + self .log_signal .emit (f"ℹ️ Opening folder dialog. Suggested start path: '{start_path }'") + + try : + folder =QFileDialog .getExistingDirectory ( + self , + "Select Download Folder", + start_path , + options =QFileDialog .DontUseNativeDialog |QFileDialog .ShowDirsOnly + ) + + if folder : + self .dir_input .setText (folder ) + self .log_signal .emit (f"ℹ️ Folder selected: {folder }") + else : + self .log_signal .emit (f"ℹ️ Folder selection cancelled by user.") + except RuntimeError as e : + self .log_signal .emit (f"❌ RuntimeError opening folder dialog: {e }. This might indicate a deeper Qt or system issue.") + QMessageBox .critical (self ,"Dialog Error",f"A runtime error occurred while trying to open the folder dialog: {e }") + except Exception as e : + self .log_signal .emit (f"❌ Unexpected error opening folder dialog: {e }\n{traceback .format_exc (limit =3 )}") + QMessageBox .critical (self ,"Dialog Error",f"An unexpected error occurred with the folder selection dialog: {e }") + + def handle_main_log (self ,message ): + # vvv ADD THIS BLOCK AT THE TOP OF THE METHOD vvv + if message.startswith("TEMP_FILE_PATH:"): + filepath = message.split(":", 1)[1] + if self.single_pdf_setting: + self.session_temp_files.append(filepath) + return + is_html_message =message .startswith (HTML_PREFIX ) + display_message =message + use_html =False + + if is_html_message : + display_message =message [len (HTML_PREFIX ):] + use_html =True + + try : + safe_message =str (display_message ).replace ('\x00','[NULL]') + if use_html : + self .main_log_output .insertHtml (safe_message ) + else : + self .main_log_output .append (safe_message ) + + scrollbar =self .main_log_output .verticalScrollBar () + if scrollbar .value ()>=scrollbar .maximum ()-30 : + scrollbar .setValue (scrollbar .maximum ()) + except Exception as e : + print (f"GUI Main Log Error: {e }\nOriginal Message: {message }") + def _extract_key_term_from_title (self ,title ): + if not title : + return None + title_cleaned =re .sub (r'\[.*?\]','',title ) + title_cleaned =re .sub (r'\(.*?\)','',title_cleaned ) + title_cleaned =title_cleaned .strip () + word_matches =list (re .finditer (r'\b[a-zA-Z][a-zA-Z0-9_-]*\b',title_cleaned )) + + capitalized_candidates =[] + for match in word_matches : + word =match .group (0 ) + if word .istitle ()and word .lower ()not in self .STOP_WORDS and len (word )>2 : + if not (len (word )>3 and word .isupper ()): + capitalized_candidates .append ({'text':word ,'len':len (word ),'pos':match .start ()}) + + if capitalized_candidates : + capitalized_candidates .sort (key =lambda x :(x ['len'],x ['pos']),reverse =True ) + return capitalized_candidates [0 ]['text'] + non_capitalized_words_info =[] + for match in word_matches : + word =match .group (0 ) + if word .lower ()not in self .STOP_WORDS and len (word )>3 : + non_capitalized_words_info .append ({'text':word ,'len':len (word ),'pos':match .start ()}) + + if non_capitalized_words_info : + non_capitalized_words_info .sort (key =lambda x :(x ['len'],x ['pos']),reverse =True ) + return non_capitalized_words_info [0 ]['text'] + + return None + + def handle_missed_character_post (self ,post_title ,reason ): + if self .missed_character_log_output : + key_term =self ._extract_key_term_from_title (post_title ) + + if key_term : + normalized_key_term =key_term .lower () + if normalized_key_term not in self .already_logged_bold_key_terms : + self .already_logged_bold_key_terms .add (normalized_key_term ) + self .missed_key_terms_buffer .append (key_term ) + self ._refresh_missed_character_log () + else : + print (f"Debug (Missed Char Log): Title='{post_title }', Reason='{reason }'") + + def _refresh_missed_character_log (self ): + if self .missed_character_log_output : + self .missed_character_log_output .clear () + sorted_terms =sorted (self .missed_key_terms_buffer ,key =str .lower ) + separator_line ="-"*40 + + for term in sorted_terms : + display_term =term .capitalize () + + self .missed_character_log_output .append (separator_line ) + self .missed_character_log_output .append (f'

{display_term }

') + self .missed_character_log_output .append (separator_line ) + self .missed_character_log_output .append ("") + + scrollbar =self .missed_character_log_output .verticalScrollBar () + scrollbar .setValue (0 ) + + def _is_download_active (self ): + single_thread_active =self .download_thread and self .download_thread .isRunning () + fetcher_active =hasattr (self ,'is_fetcher_thread_running')and self .is_fetcher_thread_running + pool_has_active_tasks =self .thread_pool is not None and any (not f .done ()for f in self .active_futures if f is not None ) + retry_pool_active =hasattr (self ,'retry_thread_pool')and self .retry_thread_pool is not None and hasattr (self ,'active_retry_futures')and any (not f .done ()for f in self .active_retry_futures if f is not None ) + + + external_dl_thread_active =hasattr (self ,'external_link_download_thread')and self .external_link_download_thread is not None and self .external_link_download_thread .isRunning () + + return single_thread_active or fetcher_active or pool_has_active_tasks or retry_pool_active or external_dl_thread_active + + def handle_external_link_signal (self ,post_title ,link_text ,link_url ,platform ,decryption_key ): + link_data =(post_title ,link_text ,link_url ,platform ,decryption_key ) + self .external_link_queue .append (link_data ) + if self .radio_only_links and self .radio_only_links .isChecked (): + self .extracted_links_cache .append (link_data ) + self ._update_download_extracted_links_button_state () + + is_only_links_mode =self .radio_only_links and self .radio_only_links .isChecked () + should_display_in_external_log =self .show_external_links and not is_only_links_mode + + if not (is_only_links_mode or should_display_in_external_log ): + self ._is_processing_external_link_queue =False + if self .external_link_queue : + QTimer .singleShot (0 ,self ._try_process_next_external_link ) + return + + + if link_data not in self .extracted_links_cache : + self .extracted_links_cache .append (link_data ) + + def _try_process_next_external_link (self ): + if self ._is_processing_external_link_queue or not self .external_link_queue : + return + + is_only_links_mode =self .radio_only_links and self .radio_only_links .isChecked () + should_display_in_external_log =self .show_external_links and not is_only_links_mode + + if not (is_only_links_mode or should_display_in_external_log ): + self ._is_processing_external_link_queue =False + if self .external_link_queue : + QTimer .singleShot (0 ,self ._try_process_next_external_link ) + return + + self ._is_processing_external_link_queue =True + link_data =self .external_link_queue .popleft () + + if is_only_links_mode : + QTimer .singleShot (0 ,lambda data =link_data :self ._display_and_schedule_next (data )) + elif self ._is_download_active (): + delay_ms =random .randint (4000 ,8000 ) + QTimer .singleShot (delay_ms ,lambda data =link_data :self ._display_and_schedule_next (data )) + else : + QTimer .singleShot (0 ,lambda data =link_data :self ._display_and_schedule_next (data )) + + + def _display_and_schedule_next (self ,link_data ): + post_title ,link_text ,link_url ,platform ,decryption_key =link_data + is_only_links_mode =self .radio_only_links and self .radio_only_links .isChecked () + + max_link_text_len =50 + display_text =(link_text [:max_link_text_len ].strip ()+"..." + if len (link_text )>max_link_text_len else link_text .strip ()) + formatted_link_info =f"{display_text } - {link_url } - {platform }" + + if decryption_key : + formatted_link_info +=f" (Decryption Key: {decryption_key })" + + if is_only_links_mode : + if post_title !=self ._current_link_post_title : + separator_html ="
"+"-"*45 +"
" + if self ._current_link_post_title is not None : + self .log_signal .emit (HTML_PREFIX +separator_html ) + title_html =f'{html .escape (post_title )}
' + self .log_signal .emit (HTML_PREFIX +title_html ) + self ._current_link_post_title =post_title + + self .log_signal .emit (formatted_link_info ) + elif self .show_external_links : + separator ="-"*45 + self ._append_to_external_log (formatted_link_info ,separator ) + + self ._is_processing_external_link_queue =False + self ._try_process_next_external_link () + + + def _append_to_external_log (self ,formatted_link_text ,separator ): + if not (self .external_log_output and self .external_log_output .isVisible ()): + return + + try : + self .external_log_output .append (formatted_link_text ) + self .external_log_output .append ("") + + scrollbar =self .external_log_output .verticalScrollBar () + if scrollbar .value ()>=scrollbar .maximum ()-50 : + scrollbar .setValue (scrollbar .maximum ()) + except Exception as e : + self .log_signal .emit (f"GUI External Log Append Error: {e }\nOriginal Message: {formatted_link_text }") + print (f"GUI External Log Error (Append): {e }\nOriginal Message: {formatted_link_text }") + + + def update_file_progress_display (self ,filename ,progress_info ): + if not filename and progress_info is None : + self .file_progress_label .setText ("") + return + + if isinstance (progress_info ,list ): + if not progress_info : + self .file_progress_label .setText (self ._tr ("downloading_multipart_initializing_text","File: {filename} - Initializing parts...").format (filename =filename )) + return + + total_downloaded_overall =sum (cs .get ('downloaded',0 )for cs in progress_info ) + total_file_size_overall =sum (cs .get ('total',0 )for cs in progress_info ) + + active_chunks_count =0 + combined_speed_bps =0 + for cs in progress_info : + if cs .get ('active',False ): + active_chunks_count +=1 + combined_speed_bps +=cs .get ('speed_bps',0 ) + + dl_mb =total_downloaded_overall /(1024 *1024 ) + total_mb =total_file_size_overall /(1024 *1024 ) + speed_MBps =(combined_speed_bps /8 )/(1024 *1024 ) + + progress_text =self ._tr ("downloading_multipart_text","DL '{filename}...': {downloaded_mb:.1f}/{total_mb:.1f} MB ({parts} parts @ {speed:.2f} MB/s)").format (filename =filename [:20 ],downloaded_mb =dl_mb ,total_mb =total_mb ,parts =active_chunks_count ,speed =speed_MBps ) + self .file_progress_label .setText (progress_text ) + + elif isinstance (progress_info ,tuple )and len (progress_info )==2 : + downloaded_bytes ,total_bytes =progress_info + + if not filename and total_bytes ==0 and downloaded_bytes ==0 : + self .file_progress_label .setText ("") + return + + max_fn_len =25 + disp_fn =filename if len (filename )<=max_fn_len else filename [:max_fn_len -3 ].strip ()+"..." + + dl_mb =downloaded_bytes /(1024 *1024 ) + if total_bytes >0 : + tot_mb =total_bytes /(1024 *1024 ) + prog_text_base =self ._tr ("downloading_file_known_size_text","Downloading '{filename}' ({downloaded_mb:.1f}MB / {total_mb:.1f}MB)").format (filename =disp_fn ,downloaded_mb =dl_mb ,total_mb =tot_mb ) + else : + prog_text_base =self ._tr ("downloading_file_unknown_size_text","Downloading '{filename}' ({downloaded_mb:.1f}MB)").format (filename =disp_fn ,downloaded_mb =dl_mb ) + + self .file_progress_label .setText (prog_text_base ) + elif filename and progress_info is None : + self .file_progress_label .setText ("") + elif not filename and not progress_info : + self .file_progress_label .setText ("") + + def _clear_stale_temp_files(self): + """On startup, cleans any temp files from a previous crashed session.""" + try: + temp_dir = os.path.join(self.app_base_dir, "appdata") + if not os.path.isdir(temp_dir): + return + + for filename in os.listdir(temp_dir): + if filename.startswith("tmp_") and filename.endswith(".json"): + try: + os.remove(os.path.join(temp_dir, filename)) + self.log_signal.emit(f" 🧹 Removed stale temp file: {filename}") + except OSError: + pass # File might be locked, skip + except Exception as e: + self.log_signal.emit(f"⚠️ Error cleaning stale temp files: {e}") + + def _cleanup_temp_files(self): + """Deletes all temporary files collected during the session.""" + if not self.session_temp_files: + return + + self.log_signal.emit(" Cleaning up temporary files...") + for filepath in self.session_temp_files: + try: + if os.path.exists(filepath): + os.remove(filepath) + except Exception as e: + self.log_signal.emit(f" ⚠️ Could not delete temp file '{filepath}': {e}") + self.session_temp_files = [] + + def update_external_links_setting (self ,checked ): + is_only_links_mode =self .radio_only_links and self .radio_only_links .isChecked () + is_only_archives_mode =self .radio_only_archives and self .radio_only_archives .isChecked () + + if is_only_links_mode or is_only_archives_mode : + if self .external_log_output :self .external_log_output .hide () + if self .log_splitter :self .log_splitter .setSizes ([self .height (),0 ]) + return + + self .show_external_links =checked + if checked : + if self .external_log_output :self .external_log_output .show () + if self .log_splitter :self .log_splitter .setSizes ([self .height ()//2 ,self .height ()//2 ]) + if self .main_log_output :self .main_log_output .setMinimumHeight (50 ) + if self .external_log_output :self .external_log_output .setMinimumHeight (50 ) + self .log_signal .emit ("\n"+"="*40 +"\n🔗 External Links Log Enabled\n"+"="*40 ) + if self .external_log_output : + self .external_log_output .clear () + self .external_log_output .append ("🔗 External Links Found:") + self ._try_process_next_external_link () + else : + if self .external_log_output :self .external_log_output .hide () + if self .log_splitter :self .log_splitter .setSizes ([self .height (),0 ]) + if self .main_log_output :self .main_log_output .setMinimumHeight (0 ) + if self .external_log_output :self .external_log_output .setMinimumHeight (0 ) + if self .external_log_output :self .external_log_output .clear () + self .log_signal .emit ("\n"+"="*40 +"\n🔗 External Links Log Disabled\n"+"="*40 ) + + + def _handle_filter_mode_change(self, button, checked): + # If a button other than "More" is selected, reset the UI + if button != self.radio_more and checked: + self.radio_more.setText("More") + self.more_filter_scope = None + self.single_pdf_setting = False # Reset the setting + # Re-enable the checkboxes + if hasattr(self, 'use_multithreading_checkbox'): self.use_multithreading_checkbox.setEnabled(True) + if hasattr(self, 'use_subfolders_checkbox'): self.use_subfolders_checkbox.setEnabled(True) + + if not button or not checked: + return + + is_only_links =(button ==self .radio_only_links ) + is_only_audio =(hasattr (self ,'radio_only_audio')and self .radio_only_audio is not None and button ==self .radio_only_audio ) + is_only_archives =(hasattr (self ,'radio_only_archives')and self .radio_only_archives is not None and button ==self .radio_only_archives ) + + if self .skip_scope_toggle_button : + self .skip_scope_toggle_button .setVisible (not (is_only_links or is_only_archives or is_only_audio )) + if hasattr (self ,'multipart_toggle_button')and self .multipart_toggle_button : + self .multipart_toggle_button .setVisible (not (is_only_links or is_only_archives or is_only_audio )) + + if self .link_search_input :self .link_search_input .setVisible (is_only_links ) + if self .link_search_button :self .link_search_button .setVisible (is_only_links ) + if self .export_links_button : + self .export_links_button .setVisible (is_only_links ) + self .export_links_button .setEnabled (is_only_links and bool (self .extracted_links_cache )) + + if hasattr (self ,'download_extracted_links_button')and self .download_extracted_links_button : + self .download_extracted_links_button .setVisible (is_only_links ) + self ._update_download_extracted_links_button_state () + + if self .download_btn : + if is_only_links : + self .download_btn .setText (self ._tr ("extract_links_button_text","🔗 Extract Links")) + else : + self .download_btn .setText (self ._tr ("start_download_button_text","⬇️ Start Download")) + if not is_only_links and self .link_search_input :self .link_search_input .clear () + + file_download_mode_active =not is_only_links + + + + if self .use_subfolders_checkbox :self .use_subfolders_checkbox .setEnabled (file_download_mode_active ) + if self .skip_words_input :self .skip_words_input .setEnabled (file_download_mode_active ) + if self .skip_scope_toggle_button :self .skip_scope_toggle_button .setEnabled (file_download_mode_active ) + if hasattr (self ,'remove_from_filename_input'):self .remove_from_filename_input .setEnabled (file_download_mode_active ) + + if self .skip_zip_checkbox : + can_skip_zip =file_download_mode_active and not is_only_archives + self .skip_zip_checkbox .setEnabled (can_skip_zip ) + if is_only_archives : + self .skip_zip_checkbox .setChecked (False ) + + if self .skip_rar_checkbox : + can_skip_rar =file_download_mode_active and not is_only_archives + self .skip_rar_checkbox .setEnabled (can_skip_rar ) + if is_only_archives : + self .skip_rar_checkbox .setChecked (False ) + + other_file_proc_enabled =file_download_mode_active and not is_only_archives + if self .download_thumbnails_checkbox :self .download_thumbnails_checkbox .setEnabled (other_file_proc_enabled ) + if self .compress_images_checkbox :self .compress_images_checkbox .setEnabled (other_file_proc_enabled ) + + if self .external_links_checkbox : + can_show_external_log_option =file_download_mode_active and not is_only_archives + self .external_links_checkbox .setEnabled (can_show_external_log_option ) + if not can_show_external_log_option : + self .external_links_checkbox .setChecked (False ) + + + if is_only_links : + self .progress_log_label .setText ("📜 Extracted Links Log:") + if self .external_log_output :self .external_log_output .hide () + if self .log_splitter :self .log_splitter .setSizes ([self .height (),0 ]) + + + do_clear_log_in_filter_change =True + if self .mega_download_log_preserved_once and self .only_links_log_display_mode ==LOG_DISPLAY_DOWNLOAD_PROGRESS : + do_clear_log_in_filter_change =False + + if self .main_log_output and do_clear_log_in_filter_change : + self .log_signal .emit ("INTERNAL: _handle_filter_mode_change - About to clear log.") + self .main_log_output .clear () + self .log_signal .emit ("INTERNAL: _handle_filter_mode_change - Log cleared by _handle_filter_mode_change.") + + if self .main_log_output :self .main_log_output .setMinimumHeight (0 ) + self .log_signal .emit ("="*20 +" Mode changed to: Only Links "+"="*20 ) + self ._try_process_next_external_link () + elif is_only_archives : + self .progress_log_label .setText ("📜 Progress Log (Archives Only):") + if self .external_log_output :self .external_log_output .hide () + if self .log_splitter :self .log_splitter .setSizes ([self .height (),0 ]) + if self .main_log_output :self .main_log_output .clear () + self .log_signal .emit ("="*20 +" Mode changed to: Only Archives "+"="*20 ) + elif is_only_audio : + self .progress_log_label .setText (self ._tr ("progress_log_label_text","📜 Progress Log:")+f" ({self ._tr ('filter_audio_radio','🎧 Only Audio')})") + if self .external_log_output :self .external_log_output .hide () + if self .log_splitter :self .log_splitter .setSizes ([self .height (),0 ]) + if self .main_log_output :self .main_log_output .clear () + self .log_signal .emit ("="*20 +f" Mode changed to: {self ._tr ('filter_audio_radio','🎧 Only Audio')} "+"="*20 ) + else : + self .progress_log_label .setText (self ._tr ("progress_log_label_text","📜 Progress Log:")) + self .update_external_links_setting (self .external_links_checkbox .isChecked ()if self .external_links_checkbox else False ) + self .log_signal .emit (f"="*20 +f" Mode changed to: {button .text ()} "+"="*20 ) + + + if is_only_links : + self ._filter_links_log () + + if hasattr (self ,'log_display_mode_toggle_button'): + self .log_display_mode_toggle_button .setVisible (is_only_links ) + self ._update_log_display_mode_button_text () + + subfolders_on =self .use_subfolders_checkbox .isChecked ()if self .use_subfolders_checkbox else False + manga_on =self .manga_mode_checkbox .isChecked ()if self .manga_mode_checkbox else False + + character_filter_should_be_active =file_download_mode_active and not is_only_archives + + if self .character_filter_widget : + self .character_filter_widget .setVisible (character_filter_should_be_active ) + + enable_character_filter_related_widgets =character_filter_should_be_active + + if self .character_input : + self .character_input .setEnabled (enable_character_filter_related_widgets ) + if not enable_character_filter_related_widgets : + self .character_input .clear () + + if self .char_filter_scope_toggle_button : + self .char_filter_scope_toggle_button .setEnabled (enable_character_filter_related_widgets ) + + self .update_ui_for_subfolders (subfolders_on ) + self .update_custom_folder_visibility () + self .update_ui_for_manga_mode (self .manga_mode_checkbox .isChecked ()if self .manga_mode_checkbox else False ) + + + def _filter_links_log (self ): + if not (self .radio_only_links and self .radio_only_links .isChecked ()):return + + search_term =self .link_search_input .text ().lower ().strip ()if self .link_search_input else "" + + if self .mega_download_log_preserved_once and self .only_links_log_display_mode ==LOG_DISPLAY_DOWNLOAD_PROGRESS : + + + self .log_signal .emit ("INTERNAL: _filter_links_log - Preserving Mega log (due to mega_download_log_preserved_once).") + elif self .only_links_log_display_mode ==LOG_DISPLAY_DOWNLOAD_PROGRESS : + + + + self .log_signal .emit ("INTERNAL: _filter_links_log - In Progress View. Clearing for placeholder.") + if self .main_log_output :self .main_log_output .clear () + self .log_signal .emit ("INTERNAL: _filter_links_log - Cleared for progress placeholder.") + self .log_signal .emit ("ℹ️ Switched to Mega download progress view. Extracted links are hidden.\n" + " Perform a Mega download to see its progress here, or switch back to 🔗 view.") + self .log_signal .emit ("INTERNAL: _filter_links_log - Placeholder message emitted.") + + else : + + self .log_signal .emit ("INTERNAL: _filter_links_log - In links view branch. About to clear.") + if self .main_log_output :self .main_log_output .clear () + self .log_signal .emit ("INTERNAL: _filter_links_log - Cleared for links view.") + + current_title_for_display =None + any_links_displayed_this_call =False + separator_html ="
"+"-"*45 +"
" + + for post_title ,link_text ,link_url ,platform ,decryption_key in self .extracted_links_cache : + matches_search =(not search_term or + search_term in link_text .lower ()or + search_term in link_url .lower ()or + search_term in platform .lower ()or + (decryption_key and search_term in decryption_key .lower ())) + if not matches_search : + continue + + any_links_displayed_this_call =True + if post_title !=current_title_for_display : + if current_title_for_display is not None : + if self .main_log_output :self .main_log_output .insertHtml (separator_html ) + + title_html =f'{html .escape (post_title )}
' + if self .main_log_output :self .main_log_output .insertHtml (title_html ) + current_title_for_display =post_title + + max_link_text_len =50 + display_text =(link_text [:max_link_text_len ].strip ()+"..."if len (link_text )>max_link_text_len else link_text .strip ()) + + plain_link_info_line =f"{display_text } - {link_url } - {platform }" + if decryption_key : + plain_link_info_line +=f" (Decryption Key: {decryption_key })" + if self .main_log_output : + self .main_log_output .append (plain_link_info_line ) + + if any_links_displayed_this_call : + if self .main_log_output :self .main_log_output .append ("") + elif not search_term and self .main_log_output : + self .log_signal .emit (" (No links extracted yet or all filtered out in links view)") + + + if self .main_log_output :self .main_log_output .verticalScrollBar ().setValue (self .main_log_output .verticalScrollBar ().maximum ()) + + + def _export_links_to_file (self ): + if not (self .radio_only_links and self .radio_only_links .isChecked ()): + QMessageBox .information (self ,"Export Links","Link export is only available in 'Only Links' mode.") + return + if not self .extracted_links_cache : + QMessageBox .information (self ,"Export Links","No links have been extracted yet.") + return + + default_filename ="extracted_links.txt" + filepath ,_ =QFileDialog .getSaveFileName (self ,"Save Links",default_filename ,"Text Files (*.txt);;All Files (*)") + + if filepath : + try : + with open (filepath ,'w',encoding ='utf-8')as f : + current_title_for_export =None + separator ="-"*60 +"\n" + for post_title ,link_text ,link_url ,platform ,decryption_key in self .extracted_links_cache : + if post_title !=current_title_for_export : + if current_title_for_export is not None : + f .write ("\n"+separator +"\n") + f .write (f"Post Title: {post_title }\n\n") + current_title_for_export =post_title + line_to_write =f" {link_text } - {link_url } - {platform }" + if decryption_key : + line_to_write +=f" (Decryption Key: {decryption_key })" + f .write (line_to_write +"\n") + self .log_signal .emit (f"✅ Links successfully exported to: {filepath }") + QMessageBox .information (self ,"Export Successful",f"Links exported to:\n{filepath }") + except Exception as e : + self .log_signal .emit (f"❌ Error exporting links: {e }") + QMessageBox .critical (self ,"Export Error",f"Could not export links: {e }") + + + def get_filter_mode (self ): + if self.radio_more and self.radio_more.isChecked(): + return 'text_only' + elif self.radio_only_links and self.radio_only_links.isChecked(): + return 'all' + elif self .radio_images .isChecked (): + return 'image' + elif self .radio_videos .isChecked (): + return 'video' + elif self .radio_only_archives and self .radio_only_archives .isChecked (): + return 'archive' + elif hasattr (self ,'radio_only_audio')and self .radio_only_audio .isChecked (): + return 'audio' + elif self .radio_all .isChecked (): + return 'all' + return 'all' + + + def get_skip_words_scope (self ): + return self .skip_words_scope + + + def _update_skip_scope_button_text (self ): + if self .skip_scope_toggle_button : + if self .skip_words_scope ==SKIP_SCOPE_FILES : + self .skip_scope_toggle_button .setText (self ._tr ("skip_scope_files_text","Scope: Files")) + self .skip_scope_toggle_button .setToolTip (self ._tr ("skip_scope_files_tooltip","Tooltip for skip scope files")) + elif self .skip_words_scope ==SKIP_SCOPE_POSTS : + self .skip_scope_toggle_button .setText (self ._tr ("skip_scope_posts_text","Scope: Posts")) + self .skip_scope_toggle_button .setToolTip (self ._tr ("skip_scope_posts_tooltip","Tooltip for skip scope posts")) + elif self .skip_words_scope ==SKIP_SCOPE_BOTH : + self .skip_scope_toggle_button .setText (self ._tr ("skip_scope_both_text","Scope: Both")) + self .skip_scope_toggle_button .setToolTip (self ._tr ("skip_scope_both_tooltip","Tooltip for skip scope both")) + else : + self .skip_scope_toggle_button .setText (self ._tr ("skip_scope_unknown_text","Scope: Unknown")) + self .skip_scope_toggle_button .setToolTip (self ._tr ("skip_scope_unknown_tooltip","Tooltip for skip scope unknown")) + + + def _cycle_skip_scope (self ): + if self .skip_words_scope ==SKIP_SCOPE_POSTS : + self .skip_words_scope =SKIP_SCOPE_FILES + elif self .skip_words_scope ==SKIP_SCOPE_FILES : + self .skip_words_scope =SKIP_SCOPE_BOTH + elif self .skip_words_scope ==SKIP_SCOPE_BOTH : + self .skip_words_scope =SKIP_SCOPE_POSTS + else : + self .skip_words_scope =SKIP_SCOPE_POSTS + + self ._update_skip_scope_button_text () + self .settings .setValue (SKIP_WORDS_SCOPE_KEY ,self .skip_words_scope ) + self .log_signal .emit (f"ℹ️ Skip words scope changed to: '{self .skip_words_scope }'") + + def get_char_filter_scope (self ): + return self .char_filter_scope + + def _update_char_filter_scope_button_text (self ): + if self .char_filter_scope_toggle_button : + if self .char_filter_scope ==CHAR_SCOPE_FILES : + self .char_filter_scope_toggle_button .setText (self ._tr ("char_filter_scope_files_text","Filter: Files")) + self .char_filter_scope_toggle_button .setToolTip (self ._tr ("char_filter_scope_files_tooltip","Tooltip for char filter files")) + elif self .char_filter_scope ==CHAR_SCOPE_TITLE : + self .char_filter_scope_toggle_button .setText (self ._tr ("char_filter_scope_title_text","Filter: Title")) + self .char_filter_scope_toggle_button .setToolTip (self ._tr ("char_filter_scope_title_tooltip","Tooltip for char filter title")) + elif self .char_filter_scope ==CHAR_SCOPE_BOTH : + self .char_filter_scope_toggle_button .setText (self ._tr ("char_filter_scope_both_text","Filter: Both")) + self .char_filter_scope_toggle_button .setToolTip (self ._tr ("char_filter_scope_both_tooltip","Tooltip for char filter both")) + elif self .char_filter_scope ==CHAR_SCOPE_COMMENTS : + self .char_filter_scope_toggle_button .setText (self ._tr ("char_filter_scope_comments_text","Filter: Comments (Beta)")) + self .char_filter_scope_toggle_button .setToolTip (self ._tr ("char_filter_scope_comments_tooltip","Tooltip for char filter comments")) + else : + self .char_filter_scope_toggle_button .setText (self ._tr ("char_filter_scope_unknown_text","Filter: Unknown")) + self .char_filter_scope_toggle_button .setToolTip (self ._tr ("char_filter_scope_unknown_tooltip","Tooltip for char filter unknown")) + + def _cycle_char_filter_scope (self ): + if self .char_filter_scope ==CHAR_SCOPE_TITLE : + self .char_filter_scope =CHAR_SCOPE_FILES + elif self .char_filter_scope ==CHAR_SCOPE_FILES : + self .char_filter_scope =CHAR_SCOPE_BOTH + elif self .char_filter_scope ==CHAR_SCOPE_BOTH : + self .char_filter_scope =CHAR_SCOPE_COMMENTS + elif self .char_filter_scope ==CHAR_SCOPE_COMMENTS : + self .char_filter_scope =CHAR_SCOPE_TITLE + else : + self .char_filter_scope =CHAR_SCOPE_TITLE + + self ._update_char_filter_scope_button_text () + self .settings .setValue (CHAR_FILTER_SCOPE_KEY ,self .char_filter_scope ) + self .log_signal .emit (f"ℹ️ Character filter scope changed to: '{self .char_filter_scope }'") + + def _handle_ui_add_new_character (self ): + """Handles adding a new character from the UI input field.""" + name_from_ui_input =self .new_char_input .text ().strip () + successfully_added_any =False + + if not name_from_ui_input : + QMessageBox .warning (self ,"Input Error","Name cannot be empty.") + return + + if name_from_ui_input .startswith ("(")and name_from_ui_input .endswith (")~"): + content =name_from_ui_input [1 :-2 ].strip () + aliases =[alias .strip ()for alias in content .split (',')if alias .strip ()] + if aliases : + folder_name =" ".join (aliases ) + if self .add_new_character (name_to_add =folder_name , + is_group_to_add =True , + aliases_to_add =aliases , + suppress_similarity_prompt =False ): + successfully_added_any =True + else : + QMessageBox .warning (self ,"Input Error","Empty group content for `~` format.") + + elif name_from_ui_input .startswith ("(")and name_from_ui_input .endswith (")"): + content =name_from_ui_input [1 :-1 ].strip () + names_to_add_separately =[name .strip ()for name in content .split (',')if name .strip ()] + if names_to_add_separately : + for name_item in names_to_add_separately : + if self .add_new_character (name_to_add =name_item , + is_group_to_add =False , + aliases_to_add =[name_item ], + suppress_similarity_prompt =False ): + successfully_added_any =True + else : + QMessageBox .warning (self ,"Input Error","Empty group content for standard group format.") + else : + if self .add_new_character (name_to_add =name_from_ui_input , + is_group_to_add =False , + aliases_to_add =[name_from_ui_input ], + suppress_similarity_prompt =False ): + successfully_added_any =True + + if successfully_added_any : + self .new_char_input .clear () + self .save_known_names () + + + def add_new_character (self ,name_to_add ,is_group_to_add ,aliases_to_add ,suppress_similarity_prompt =False ): + global KNOWN_NAMES ,clean_folder_name + if not name_to_add : + QMessageBox .warning (self ,"Input Error","Name cannot be empty.");return False + + name_to_add_lower =name_to_add .lower () + for kn_entry in KNOWN_NAMES : + if kn_entry ["name"].lower ()==name_to_add_lower : + QMessageBox .warning (self ,"Duplicate Name",f"The primary folder name '{name_to_add }' already exists.");return False + if not is_group_to_add and name_to_add_lower in [a .lower ()for a in kn_entry ["aliases"]]: + QMessageBox .warning (self ,"Duplicate Alias",f"The name '{name_to_add }' already exists as an alias for '{kn_entry ['name']}'.");return False + + similar_names_details =[] + for kn_entry in KNOWN_NAMES : + for term_to_check_similarity_against in kn_entry ["aliases"]: + term_lower =term_to_check_similarity_against .lower () + if name_to_add_lower !=term_lower and (name_to_add_lower in term_lower or term_lower in name_to_add_lower ): + similar_names_details .append ((name_to_add ,kn_entry ["name"])) + break + for new_alias in aliases_to_add : + if new_alias .lower ()!=term_to_check_similarity_against .lower ()and (new_alias .lower ()in term_to_check_similarity_against .lower ()or term_to_check_similarity_against .lower ()in new_alias .lower ()): + similar_names_details .append ((new_alias ,kn_entry ["name"])) + break + + if similar_names_details and not suppress_similarity_prompt : + if similar_names_details : + first_similar_new ,first_similar_existing =similar_names_details [0 ] + shorter ,longer =sorted ([first_similar_new ,first_similar_existing ],key =len ) + + msg_box =QMessageBox (self ) + msg_box .setIcon (QMessageBox .Warning ) + msg_box .setWindowTitle ("Potential Name Conflict") + msg_box .setText ( + f"The name '{first_similar_new }' is very similar to an existing name: '{first_similar_existing }'.\n\n" + f"This could lead to unexpected folder grouping (e.g., under '{clean_folder_name (shorter )}' instead of a more specific '{clean_folder_name (longer )}' or vice-versa).\n\n" + "Do you want to change the name you are adding, or proceed anyway?" + ) + change_button =msg_box .addButton ("Change Name",QMessageBox .RejectRole ) + proceed_button =msg_box .addButton ("Proceed Anyway",QMessageBox .AcceptRole ) + msg_box .setDefaultButton (proceed_button ) + msg_box .setEscapeButton (change_button ) + msg_box .exec_ () + + if msg_box .clickedButton ()==change_button : + self .log_signal .emit (f"ℹ️ User chose to change '{first_similar_new }' due to similarity with an alias of '{first_similar_existing }'.") + return False + self .log_signal .emit (f"⚠️ User proceeded with adding '{first_similar_new }' despite similarity with an alias of '{first_similar_existing }'.") + new_entry ={ + "name":name_to_add , + "is_group":is_group_to_add , + "aliases":sorted (list (set (aliases_to_add )),key =str .lower ) + } + if is_group_to_add : + for new_alias in new_entry ["aliases"]: + if any (new_alias .lower ()==kn_entry ["name"].lower ()for kn_entry in KNOWN_NAMES if kn_entry ["name"].lower ()!=name_to_add_lower ): + QMessageBox .warning (self ,"Alias Conflict",f"Alias '{new_alias }' (for group '{name_to_add }') conflicts with an existing primary name.");return False + KNOWN_NAMES .append (new_entry ) + KNOWN_NAMES .sort (key =lambda x :x ["name"].lower ()) + + self .character_list .clear () + self .character_list .addItems ([entry ["name"]for entry in KNOWN_NAMES ]) + self .filter_character_list (self .character_search_input .text ()) + + log_msg_suffix =f" (as group with aliases: {', '.join (new_entry ['aliases'])})"if is_group_to_add and len (new_entry ['aliases'])>1 else "" + self .log_signal .emit (f"✅ Added '{name_to_add }' to known names list{log_msg_suffix }.") + self .new_char_input .clear () + return True + + def _handle_more_options_toggled(self, button, checked): + """Shows the MoreOptionsDialog when the 'More' radio button is selected.""" + if button == self.radio_more and checked: + current_scope = self.more_filter_scope or MoreOptionsDialog.SCOPE_CONTENT + current_format = self.text_export_format or 'pdf' + + dialog = MoreOptionsDialog(self, current_scope=current_scope, current_format=current_format, single_pdf_checked=self.single_pdf_setting) + + if dialog.exec_() == QDialog.Accepted: + self.more_filter_scope = dialog.get_selected_scope() + self.text_export_format = dialog.get_selected_format() + self.single_pdf_setting = dialog.get_single_pdf_state() + + scope_text = "Comments" if self.more_filter_scope == MoreOptionsDialog.SCOPE_COMMENTS else "Description" + + format_display = f" ({self.text_export_format.upper()})" + if self.single_pdf_setting: + format_display = " (Single PDF)" + # --- NEW: Disable checkboxes if Single PDF is active --- + if hasattr(self, 'use_multithreading_checkbox'): + self.use_multithreading_checkbox.setChecked(False) + self.use_multithreading_checkbox.setEnabled(False) + if hasattr(self, 'use_subfolders_checkbox'): + self.use_subfolders_checkbox.setChecked(False) + self.use_subfolders_checkbox.setEnabled(False) + else: + # --- NEW: Re-enable checkboxes if Single PDF is not active --- + if hasattr(self, 'use_multithreading_checkbox'): self.use_multithreading_checkbox.setEnabled(True) + if hasattr(self, 'use_subfolders_checkbox'): self.use_subfolders_checkbox.setEnabled(True) + + + self.radio_more.setText(f"{scope_text}{format_display}") + + self.log_signal.emit(f"ℹ️ 'More' filter scope set to: {scope_text}, Format: {self.text_export_format.upper()}") + self.log_signal.emit(f"ℹ️ Single PDF setting: {'Enabled' if self.single_pdf_setting else 'Disabled'}") + else: + self.log_signal.emit("ℹ️ 'More' filter selection cancelled. Reverting to 'All'.") + self.radio_all.setChecked(True) + + def delete_selected_character (self ): + global KNOWN_NAMES + selected_items =self .character_list .selectedItems () + if not selected_items : + QMessageBox .warning (self ,"Selection Error","Please select one or more names to delete.");return + + primary_names_to_remove ={item .text ()for item in selected_items } + confirm =QMessageBox .question (self ,"Confirm Deletion", + f"Are you sure you want to delete {len (primary_names_to_remove )} selected entry/entries (and their aliases)?", + QMessageBox .Yes |QMessageBox .No ,QMessageBox .No ) + if confirm ==QMessageBox .Yes : + original_count =len (KNOWN_NAMES ) + KNOWN_NAMES [:]=[entry for entry in KNOWN_NAMES if entry ["name"]not in primary_names_to_remove ] + removed_count =original_count -len (KNOWN_NAMES ) + + if removed_count >0 : + self .log_signal .emit (f"🗑️ Removed {removed_count } name(s).") + self .character_list .clear () + self .character_list .addItems ([entry ["name"]for entry in KNOWN_NAMES ]) + self .filter_character_list (self .character_search_input .text ()) + self .save_known_names () + else : + self .log_signal .emit ("ℹ️ No names were removed (they might not have been in the list).") + + + def update_custom_folder_visibility (self ,url_text =None ): + if url_text is None : + url_text =self .link_input .text () + + _ ,_ ,post_id =extract_post_info (url_text .strip ()) + + is_single_post_url =bool (post_id ) + subfolders_enabled =self .use_subfolders_checkbox .isChecked ()if self .use_subfolders_checkbox else False + + not_only_links_or_archives_mode =not ( + (self .radio_only_links and self .radio_only_links .isChecked ())or + (self .radio_only_archives and self .radio_only_archives .isChecked ())or + (hasattr (self ,'radio_only_audio')and self .radio_only_audio .isChecked ()) + ) + + should_show_custom_folder =is_single_post_url and subfolders_enabled and not_only_links_or_archives_mode + + if self .custom_folder_widget : + self .custom_folder_widget .setVisible (should_show_custom_folder ) + + if not (self .custom_folder_widget and self .custom_folder_widget .isVisible ()): + if self .custom_folder_input :self .custom_folder_input .clear () + + + def update_ui_for_subfolders (self ,separate_folders_by_name_title_checked :bool ): + is_only_links =self .radio_only_links and self .radio_only_links .isChecked () + is_only_archives =self .radio_only_archives and self .radio_only_archives .isChecked () + is_only_audio =hasattr (self ,'radio_only_audio')and self .radio_only_audio .isChecked () + + can_enable_subfolder_per_post_checkbox =not is_only_links + + if self .use_subfolder_per_post_checkbox : + self .use_subfolder_per_post_checkbox .setEnabled (can_enable_subfolder_per_post_checkbox ) + + if not can_enable_subfolder_per_post_checkbox : + self .use_subfolder_per_post_checkbox .setChecked (False ) + + if hasattr(self, 'date_prefix_checkbox'): + # The Date Prefix checkbox should only be enabled if "Subfolder per Post" is both enabled and checked + can_enable_date_prefix = self.use_subfolder_per_post_checkbox.isEnabled() and self.use_subfolder_per_post_checkbox.isChecked() + self.date_prefix_checkbox.setEnabled(can_enable_date_prefix) + if not can_enable_date_prefix: + self.date_prefix_checkbox.setChecked(False) + + self .update_custom_folder_visibility () + + + def _update_cookie_input_visibility (self ,checked ): + cookie_text_input_exists =hasattr (self ,'cookie_text_input') + cookie_browse_button_exists =hasattr (self ,'cookie_browse_button') + + if cookie_text_input_exists or cookie_browse_button_exists : + is_only_links =self .radio_only_links and self .radio_only_links .isChecked () + if cookie_text_input_exists :self .cookie_text_input .setVisible (checked ) + if cookie_browse_button_exists :self .cookie_browse_button .setVisible (checked ) + + can_enable_cookie_text =checked and not is_only_links + enable_state_for_fields =can_enable_cookie_text and (self .download_btn .isEnabled ()or self .is_paused ) + + if cookie_text_input_exists : + self .cookie_text_input .setEnabled (enable_state_for_fields ) + if self .selected_cookie_filepath and checked : + self .cookie_text_input .setText (self .selected_cookie_filepath ) + self .cookie_text_input .setReadOnly (True ) + self .cookie_text_input .setPlaceholderText ("") + elif checked : + self .cookie_text_input .setReadOnly (False ) + self .cookie_text_input .setPlaceholderText ("Cookie string (if no cookies.txt)") + + if cookie_browse_button_exists :self .cookie_browse_button .setEnabled (enable_state_for_fields ) + + if not checked : + self .selected_cookie_filepath =None + + + def update_page_range_enabled_state (self ): + url_text =self .link_input .text ().strip ()if self .link_input else "" + _ ,_ ,post_id =extract_post_info (url_text ) + + is_creator_feed =not post_id if url_text else False + enable_page_range =is_creator_feed + + for widget in [self .page_range_label ,self .start_page_input ,self .to_label ,self .end_page_input ]: + if widget :widget .setEnabled (enable_page_range ) + + if not enable_page_range : + if self .start_page_input :self .start_page_input .clear () + if self .end_page_input :self .end_page_input .clear () + + + def _update_manga_filename_style_button_text (self ): + if self .manga_rename_toggle_button : + if self .manga_filename_style ==STYLE_POST_TITLE : + self .manga_rename_toggle_button .setText (self ._tr ("manga_style_post_title_text","Name: Post Title")) + + elif self .manga_filename_style ==STYLE_ORIGINAL_NAME : + self .manga_rename_toggle_button .setText (self ._tr ("manga_style_original_file_text","Name: Original File")) + + elif self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING : + self .manga_rename_toggle_button .setText (self ._tr ("manga_style_title_global_num_text","Name: Title+G.Num")) + + elif self .manga_filename_style ==STYLE_DATE_BASED : + self .manga_rename_toggle_button .setText (self ._tr ("manga_style_date_based_text","Name: Date Based")) + + elif self .manga_filename_style ==STYLE_POST_ID: # Add this block + self .manga_rename_toggle_button .setText (self ._tr ("manga_style_post_id_text","Name: Post ID")) + + elif self .manga_filename_style ==STYLE_DATE_POST_TITLE : + self .manga_rename_toggle_button .setText (self ._tr ("manga_style_date_post_title_text","Name: Date + Title")) + + else : + self .manga_rename_toggle_button .setText (self ._tr ("manga_style_unknown_text","Name: Unknown Style")) + + + self .manga_rename_toggle_button .setToolTip ("Click to cycle Manga Filename Style (when Manga Mode is active for a creator feed).") + + +# In main_window.py + + def _toggle_manga_filename_style (self ): + current_style =self .manga_filename_style + new_style ="" + if current_style ==STYLE_POST_TITLE : + new_style =STYLE_ORIGINAL_NAME + elif current_style ==STYLE_ORIGINAL_NAME : + new_style =STYLE_DATE_POST_TITLE + elif current_style ==STYLE_DATE_POST_TITLE : + new_style =STYLE_POST_TITLE_GLOBAL_NUMBERING + elif current_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING : + new_style =STYLE_DATE_BASED + elif current_style ==STYLE_DATE_BASED : + new_style =STYLE_POST_ID # Change this line + elif current_style ==STYLE_POST_ID: # Add this block + new_style =STYLE_POST_TITLE + else : + self .log_signal .emit (f"⚠️ Unknown current manga filename style: {current_style }. Resetting to default ('{STYLE_POST_TITLE }').") + new_style =STYLE_POST_TITLE + + self .manga_filename_style =new_style + self .settings .setValue (MANGA_FILENAME_STYLE_KEY ,self .manga_filename_style ) + self .settings .sync () + self ._update_manga_filename_style_button_text () + self .update_ui_for_manga_mode (self .manga_mode_checkbox .isChecked ()if self .manga_mode_checkbox else False ) + self .log_signal .emit (f"ℹ️ Manga filename style changed to: '{self .manga_filename_style }'") + + def _handle_favorite_mode_toggle (self ,checked ): + if not self .url_or_placeholder_stack or not self .bottom_action_buttons_stack : + return + + self .url_or_placeholder_stack .setCurrentIndex (1 if checked else 0 ) + self .bottom_action_buttons_stack .setCurrentIndex (1 if checked else 0 ) + + if checked : + if self .link_input : + self .link_input .clear () + self .link_input .setEnabled (False ) + for widget in [self .page_range_label ,self .start_page_input ,self .to_label ,self .end_page_input ]: + if widget :widget .setEnabled (False ) + if self .start_page_input :self .start_page_input .clear () + if self .end_page_input :self .end_page_input .clear () + + self .update_custom_folder_visibility () + self .update_page_range_enabled_state () + if self .manga_mode_checkbox : + self .manga_mode_checkbox .setChecked (False ) + self .manga_mode_checkbox .setEnabled (False ) + if hasattr (self ,'use_cookie_checkbox'): + self .use_cookie_checkbox .setChecked (True ) + self .use_cookie_checkbox .setEnabled (False ) + if hasattr (self ,'use_cookie_checkbox'): + self ._update_cookie_input_visibility (True ) + self .update_ui_for_manga_mode (False ) + + if hasattr (self ,'favorite_mode_artists_button'): + self .favorite_mode_artists_button .setEnabled (True ) + if hasattr (self ,'favorite_mode_posts_button'): + self .favorite_mode_posts_button .setEnabled (True ) + + else : + if self .link_input :self .link_input .setEnabled (True ) + self .update_page_range_enabled_state () + self .update_custom_folder_visibility () + self .update_ui_for_manga_mode (self .manga_mode_checkbox .isChecked ()if self .manga_mode_checkbox else False ) + + if hasattr (self ,'use_cookie_checkbox'): + self .use_cookie_checkbox .setEnabled (True ) + if hasattr (self ,'use_cookie_checkbox'): + self ._update_cookie_input_visibility (self .use_cookie_checkbox .isChecked ()) + + if hasattr (self ,'favorite_mode_artists_button'): + self .favorite_mode_artists_button .setEnabled (False ) + if hasattr (self ,'favorite_mode_posts_button'): + self .favorite_mode_posts_button .setEnabled (False ) + + def update_ui_for_manga_mode (self ,checked ): + is_only_links_mode =self .radio_only_links and self .radio_only_links .isChecked () + is_only_archives_mode =self .radio_only_archives and self .radio_only_archives .isChecked () + is_only_audio_mode =hasattr (self ,'radio_only_audio')and self .radio_only_audio .isChecked () + + url_text =self .link_input .text ().strip ()if self .link_input else "" + _ ,_ ,post_id =extract_post_info (url_text ) + + is_creator_feed =not post_id if url_text else False + is_favorite_mode_on =self .favorite_mode_checkbox .isChecked ()if self .favorite_mode_checkbox else False + + if self .manga_mode_checkbox : + self .manga_mode_checkbox .setEnabled (is_creator_feed and not is_favorite_mode_on ) + if not is_creator_feed and self .manga_mode_checkbox .isChecked (): + self .manga_mode_checkbox .setChecked (False ) + checked =self .manga_mode_checkbox .isChecked () + + manga_mode_effectively_on =is_creator_feed and checked + + if self .manga_rename_toggle_button : + self .manga_rename_toggle_button .setVisible (manga_mode_effectively_on and not (is_only_links_mode or is_only_archives_mode or is_only_audio_mode )) + + self .update_page_range_enabled_state () + + current_filename_style =self .manga_filename_style + + enable_char_filter_widgets =not is_only_links_mode and not is_only_archives_mode + + if self .character_input : + self .character_input .setEnabled (enable_char_filter_widgets ) + if not enable_char_filter_widgets :self .character_input .clear () + if self .char_filter_scope_toggle_button : + self .char_filter_scope_toggle_button .setEnabled (enable_char_filter_widgets ) + if self .character_filter_widget : + self .character_filter_widget .setVisible (enable_char_filter_widgets ) + + show_date_prefix_input =( + manga_mode_effectively_on and + (current_filename_style ==STYLE_DATE_BASED or + current_filename_style ==STYLE_ORIGINAL_NAME )and + not (is_only_links_mode or is_only_archives_mode or is_only_audio_mode ) + ) + if hasattr (self ,'manga_date_prefix_input'): + self .manga_date_prefix_input .setVisible (show_date_prefix_input ) + if show_date_prefix_input : + self .manga_date_prefix_input .setMaximumWidth (120 ) + self .manga_date_prefix_input .setMinimumWidth (60 ) + else : + self .manga_date_prefix_input .clear () + self .manga_date_prefix_input .setMaximumWidth (16777215 ) + self .manga_date_prefix_input .setMinimumWidth (0 ) + + if hasattr (self ,'multipart_toggle_button'): + + hide_multipart_button_due_mode =is_only_links_mode or is_only_archives_mode or is_only_audio_mode + hide_multipart_button_due_manga_mode =manga_mode_effectively_on + self .multipart_toggle_button .setVisible (not (hide_multipart_button_due_mode or hide_multipart_button_due_manga_mode )) + + self ._update_multithreading_for_date_mode () + + + def filter_character_list (self ,search_text ): + search_text_lower =search_text .lower () + for i in range (self .character_list .count ()): + item =self .character_list .item (i ) + item .setHidden (search_text_lower not in item .text ().lower ()) + + + def update_multithreading_label (self ,text ): + if self .use_multithreading_checkbox .isChecked (): + base_text =self ._tr ("use_multithreading_checkbox_base_label","Use Multithreading") + try : + num_threads_val =int (text ) + if num_threads_val >0 :self .use_multithreading_checkbox .setText (f"{base_text } ({num_threads_val } Threads)") + else :self .use_multithreading_checkbox .setText (f"{base_text } (Invalid: >0)") + except ValueError : + self .use_multithreading_checkbox .setText (f"{base_text } (Invalid Input)") + else : + self .use_multithreading_checkbox .setText (f"{self ._tr ('use_multithreading_checkbox_base_label','Use Multithreading')} (1 Thread)") + + + def _handle_multithreading_toggle (self ,checked ): + if not checked : + self .thread_count_input .setEnabled (False ) + self .thread_count_label .setEnabled (False ) + self .use_multithreading_checkbox .setText ("Use Multithreading (1 Thread)") + else : + self .thread_count_input .setEnabled (True ) + self .thread_count_label .setEnabled (True ) + self .update_multithreading_label (self .thread_count_input .text ()) + + def _update_multithreading_for_date_mode (self ): + """ + Checks if Manga Mode is ON and 'Date Based' style is selected. + If so, disables multithreading. Otherwise, enables it. + """ + if not hasattr (self ,'manga_mode_checkbox')or not hasattr (self ,'use_multithreading_checkbox'): + return + + manga_on =self .manga_mode_checkbox .isChecked () + is_sequential_style_requiring_single_thread =( + self .manga_filename_style ==STYLE_DATE_BASED or + self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING + ) + if manga_on and is_sequential_style_requiring_single_thread : + if self .use_multithreading_checkbox .isChecked ()or self .use_multithreading_checkbox .isEnabled (): + if self .use_multithreading_checkbox .isChecked (): + self .log_signal .emit ("ℹ️ Manga Date Mode: Multithreading for post processing has been disabled to ensure correct sequential file numbering.") + self .use_multithreading_checkbox .setChecked (False ) + self .use_multithreading_checkbox .setEnabled (False ) + self ._handle_multithreading_toggle (False ) + else : + if not self .use_multithreading_checkbox .isEnabled (): + self .use_multithreading_checkbox .setEnabled (True ) + self ._handle_multithreading_toggle (self .use_multithreading_checkbox .isChecked ()) + + def update_progress_display (self ,total_posts ,processed_posts ): + if total_posts >0 : + progress_percent =(processed_posts /total_posts )*100 + self .progress_label .setText (self ._tr ("progress_posts_text","Progress: {processed_posts} / {total_posts} posts ({progress_percent:.1f}%)").format (processed_posts =processed_posts ,total_posts =total_posts ,progress_percent =progress_percent )) + elif processed_posts >0 : + self .progress_label .setText (self ._tr ("progress_processing_post_text","Progress: Processing post {processed_posts}...").format (processed_posts =processed_posts )) + else : + self .progress_label .setText (self ._tr ("progress_starting_text","Progress: Starting...")) + + if total_posts >0 or processed_posts >0 : + self .file_progress_label .setText ("") + + + def start_download (self ,direct_api_url =None ,override_output_dir =None, is_restore=False ): + global KNOWN_NAMES ,BackendDownloadThread ,PostProcessorWorker ,extract_post_info ,clean_folder_name ,MAX_FILE_THREADS_PER_POST_OR_WORKER + + self._clear_stale_temp_files() + self.session_temp_files = [] + + if self ._is_download_active (): + QMessageBox.warning(self, "Busy", "A download is already in progress.") + return False + + + + if not direct_api_url and self .favorite_download_queue and not self .is_processing_favorites_queue : + self .log_signal .emit (f"ℹ️ Detected {len (self .favorite_download_queue )} item(s) in the queue. Starting processing...") + self .cancellation_message_logged_this_session =False + self ._process_next_favorite_download () + return True + + if not is_restore and self.interrupted_session_data: + self.log_signal.emit("ℹ️ New download started. Discarding previous interrupted session.") + self._clear_session_file() + self.interrupted_session_data = None + self.is_restore_pending = False + api_url =direct_api_url if direct_api_url else self .link_input .text ().strip () + self .download_history_candidates .clear () + self._update_button_states_and_connections() # Ensure buttons are updated to active state + + + if self .favorite_mode_checkbox and self .favorite_mode_checkbox .isChecked ()and not direct_api_url and not api_url : + QMessageBox .information (self ,"Favorite Mode Active", + "Favorite Mode is active. Please use the 'Favorite Artists' or 'Favorite Posts' buttons to start downloads in this mode, or uncheck 'Favorite Mode' to use the URL input.") + self .set_ui_enabled (True ) + return False + + main_ui_download_dir =self .dir_input .text ().strip () + + if not api_url and not self .favorite_download_queue : + QMessageBox .critical (self ,"Input Error","URL is required.") + return False + elif not api_url and self .favorite_download_queue : + self .log_signal .emit ("ℹ️ URL input is empty, but queue has items. Processing queue...") + self .cancellation_message_logged_this_session =False + self ._process_next_favorite_download () + return True + + self .cancellation_message_logged_this_session =False + use_subfolders =self .use_subfolders_checkbox .isChecked () + use_post_subfolders =self .use_subfolder_per_post_checkbox .isChecked () + compress_images =self .compress_images_checkbox .isChecked () + download_thumbnails =self .download_thumbnails_checkbox .isChecked () + + use_multithreading_enabled_by_checkbox =self .use_multithreading_checkbox .isChecked () + try : + num_threads_from_gui =int (self .thread_count_input .text ().strip ()) + if num_threads_from_gui <1 :num_threads_from_gui =1 + except ValueError : + QMessageBox .critical (self ,"Thread Count Error","Invalid number of threads. Please enter a positive number.") + return False + + if use_multithreading_enabled_by_checkbox : + if num_threads_from_gui >MAX_THREADS : + hard_warning_msg =( + f"You've entered a thread count ({num_threads_from_gui }) exceeding the maximum of {MAX_THREADS }.\n\n" + "Using an extremely high number of threads can lead to:\n" + " - Diminishing returns (no significant speed increase).\n" + " - Increased system instability or application crashes.\n" + " - Higher chance of being rate-limited or temporarily IP-banned by the server.\n\n" + f"The thread count has been automatically capped to {MAX_THREADS } for stability." + ) + QMessageBox .warning (self ,"High Thread Count Warning",hard_warning_msg ) + num_threads_from_gui =MAX_THREADS + self .thread_count_input .setText (str (MAX_THREADS )) + self .log_signal .emit (f"⚠️ User attempted {num_threads_from_gui } threads, capped to {MAX_THREADS }.") + if SOFT_WARNING_THREAD_THRESHOLD MAX_THREADS : + hard_warning_msg =( + f"You've entered a thread count ({num_threads_from_gui }) exceeding the maximum of {MAX_THREADS }.\n\n" + "Using an extremely high number of threads can lead to:\n" + " - Diminishing returns (no significant speed increase).\n" + " - Increased system instability or application crashes.\n" + " - Higher chance of being rate-limited or temporarily IP-banned by the server.\n\n" + f"The thread count has been automatically capped to {MAX_THREADS } for stability." + ) + QMessageBox .warning (self ,"High Thread Count Warning",hard_warning_msg ) + num_threads_from_gui =MAX_THREADS + self .thread_count_input .setText (str (MAX_THREADS )) + self .log_signal .emit (f"⚠️ User attempted {num_threads_from_gui } threads, capped to {MAX_THREADS }.") + if SOFT_WARNING_THREAD_THRESHOLD end_page :raise ValueError ("Start page cannot be greater than end page.") + + if manga_mode and start_page and end_page : + msg_box =QMessageBox (self ) + msg_box .setIcon (QMessageBox .Warning ) + msg_box .setWindowTitle ("Manga Mode & Page Range Warning") + msg_box .setText ( + "You have enabled Manga/Comic Mode and also specified a Page Range.\n\n" + "Manga Mode processes posts from oldest to newest across all available pages by default.\n" + "If you use a page range, you might miss parts of the manga/comic if it starts before your 'Start Page' or continues after your 'End Page'.\n\n" + "However, if you are certain the content you want is entirely within this page range (e.g., a short series, or you know the specific pages for a volume), then proceeding is okay.\n\n" + "Do you want to proceed with this page range in Manga Mode?" + ) + proceed_button =msg_box .addButton ("Proceed Anyway",QMessageBox .AcceptRole ) + cancel_button =msg_box .addButton ("Cancel Download",QMessageBox .RejectRole ) + msg_box .setDefaultButton (proceed_button ) + msg_box .setEscapeButton (cancel_button ) + msg_box .exec_ () + + if msg_box .clickedButton ()==cancel_button : + self .log_signal .emit ("❌ Download cancelled by user due to Manga Mode & Page Range warning.") + return False + except ValueError as e : + QMessageBox .critical (self ,"Page Range Error",f"Invalid page range: {e }") + return False + self .external_link_queue .clear ();self .extracted_links_cache =[];self ._is_processing_external_link_queue =False ;self ._current_link_post_title =None + + raw_character_filters_text =self .character_input .text ().strip () + parsed_character_filter_objects =self ._parse_character_filters (raw_character_filters_text ) + + actual_filters_to_use_for_run =[] + + needs_folder_naming_validation =(use_subfolders or manga_mode )and not extract_links_only + + if parsed_character_filter_objects : + actual_filters_to_use_for_run =parsed_character_filter_objects + + if not extract_links_only : + self .log_signal .emit (f"ℹ️ Using character filters for matching: {', '.join (item ['name']for item in actual_filters_to_use_for_run )}") + + filter_objects_to_potentially_add_to_known_list =[] + for filter_item_obj in parsed_character_filter_objects : + item_primary_name =filter_item_obj ["name"] + cleaned_name_test =clean_folder_name (item_primary_name ) + if needs_folder_naming_validation and not cleaned_name_test : + QMessageBox .warning (self ,"Invalid Filter Name for Folder",f"Filter name '{item_primary_name }' is invalid for a folder and will be skipped for Known.txt interaction.") + self .log_signal .emit (f"⚠️ Skipping invalid filter for Known.txt interaction: '{item_primary_name }'") + continue + + an_alias_is_already_known =False + if any (kn_entry ["name"].lower ()==item_primary_name .lower ()for kn_entry in KNOWN_NAMES ): + an_alias_is_already_known =True + elif filter_item_obj ["is_group"]and needs_folder_naming_validation : + for alias_in_filter_obj in filter_item_obj ["aliases"]: + if any (kn_entry ["name"].lower ()==alias_in_filter_obj .lower ()or alias_in_filter_obj .lower ()in [a .lower ()for a in kn_entry ["aliases"]]for kn_entry in KNOWN_NAMES ): + an_alias_is_already_known =True ;break + + if an_alias_is_already_known and filter_item_obj ["is_group"]: + self .log_signal .emit (f"ℹ️ An alias from group '{item_primary_name }' is already known. Group will not be prompted for Known.txt addition.") + + should_prompt_to_add_to_known_list =( + needs_folder_naming_validation and not manga_mode and + not any (kn_entry ["name"].lower ()==item_primary_name .lower ()for kn_entry in KNOWN_NAMES )and + not an_alias_is_already_known + ) + if should_prompt_to_add_to_known_list : + if not any (obj_to_add ["name"].lower ()==item_primary_name .lower ()for obj_to_add in filter_objects_to_potentially_add_to_known_list ): + filter_objects_to_potentially_add_to_known_list .append (filter_item_obj ) + elif manga_mode and needs_folder_naming_validation and item_primary_name .lower ()not in {kn_entry ["name"].lower ()for kn_entry in KNOWN_NAMES }and not an_alias_is_already_known : + self .log_signal .emit (f"ℹ️ Manga Mode: Using filter '{item_primary_name }' for this session without adding to Known Names.") + + if filter_objects_to_potentially_add_to_known_list : + confirm_dialog =ConfirmAddAllDialog (filter_objects_to_potentially_add_to_known_list ,self ,self ) + dialog_result =confirm_dialog .exec_ () + + if dialog_result ==CONFIRM_ADD_ALL_CANCEL_DOWNLOAD : + self .log_signal .emit ("❌ Download cancelled by user at new name confirmation stage.") + return False + elif isinstance (dialog_result ,list ): + if dialog_result : + self .log_signal .emit (f"ℹ️ User chose to add {len (dialog_result )} new entry/entries to Known.txt.") + for filter_obj_to_add in dialog_result : + if filter_obj_to_add .get ("components_are_distinct_for_known_txt"): + self .log_signal .emit (f" Processing group '{filter_obj_to_add ['name']}' to add its components individually to Known.txt.") + for alias_component in filter_obj_to_add ["aliases"]: + self .add_new_character ( + name_to_add =alias_component , + is_group_to_add =False , + aliases_to_add =[alias_component ], + suppress_similarity_prompt =True + ) + else : + self .add_new_character ( + name_to_add =filter_obj_to_add ["name"], + is_group_to_add =filter_obj_to_add ["is_group"], + aliases_to_add =filter_obj_to_add ["aliases"], + suppress_similarity_prompt =True + ) + else : + self .log_signal .emit ("ℹ️ User confirmed adding, but no names were selected in the dialog. No new names added to Known.txt.") + elif dialog_result ==CONFIRM_ADD_ALL_SKIP_ADDING : + self .log_signal .emit ("ℹ️ User chose not to add new names to Known.txt for this session.") + else : + self .log_signal .emit (f"ℹ️ Using character filters for link extraction: {', '.join (item ['name']for item in actual_filters_to_use_for_run )}") + + self .dynamic_character_filter_holder .set_filters (actual_filters_to_use_for_run ) + + creator_folder_ignore_words_for_run =None + character_filters_are_empty =not actual_filters_to_use_for_run + if is_full_creator_download and character_filters_are_empty : + creator_folder_ignore_words_for_run =CREATOR_DOWNLOAD_DEFAULT_FOLDER_IGNORE_WORDS + log_messages .append (f" Creator Download (No Char Filter): Applying default folder name ignore list ({len (creator_folder_ignore_words_for_run )} words).") + + custom_folder_name_cleaned =None + if use_subfolders and post_id_from_url and self .custom_folder_widget and self .custom_folder_widget .isVisible ()and not extract_links_only : + raw_custom_name =self .custom_folder_input .text ().strip () + if raw_custom_name : + cleaned_custom =clean_folder_name (raw_custom_name ) + if cleaned_custom :custom_folder_name_cleaned =cleaned_custom + else :self .log_signal .emit (f"⚠️ Invalid custom folder name ignored: '{raw_custom_name }' (resulted in empty string after cleaning).") + + + self .main_log_output .clear () + if extract_links_only :self .main_log_output .append ("🔗 Extracting Links..."); + elif backend_filter_mode =='archive':self .main_log_output .append ("📦 Downloading Archives Only...") + + if self .external_log_output :self .external_log_output .clear () + if self .show_external_links and not extract_links_only and backend_filter_mode !='archive': + self .external_log_output .append ("🔗 External Links Found:") + + self .file_progress_label .setText ("");self .cancellation_event .clear ();self .active_futures =[] + self .total_posts_to_process =0 ;self .processed_posts_count =0 ;self .download_counter =0 ;self .skip_counter =0 + self .progress_label .setText (self ._tr ("progress_initializing_text","Progress: Initializing...")) + + self .retryable_failed_files_info .clear () + self .permanently_failed_files_for_dialog .clear () + self._update_error_button_count() + + manga_date_file_counter_ref_for_thread =None + if manga_mode and self .manga_filename_style ==STYLE_DATE_BASED and not extract_links_only : + manga_date_file_counter_ref_for_thread =None + self .log_signal .emit (f"ℹ️ Manga Date Mode: File counter will be initialized by the download thread.") + + manga_global_file_counter_ref_for_thread =None + if manga_mode and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING and not extract_links_only : + manga_global_file_counter_ref_for_thread =None + self .log_signal .emit (f"ℹ️ Manga Title+GlobalNum Mode: File counter will be initialized by the download thread (starts at 1).") + + effective_num_post_workers =1 + + effective_num_file_threads_per_worker =1 + + if post_id_from_url : + if use_multithreading_enabled_by_checkbox : + effective_num_file_threads_per_worker =max (1 ,min (num_threads_from_gui ,MAX_FILE_THREADS_PER_POST_OR_WORKER )) + else : + if manga_mode and self .manga_filename_style ==STYLE_DATE_BASED : + effective_num_post_workers =1 + elif manga_mode and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING : + effective_num_post_workers =1 + effective_num_file_threads_per_worker =1 + elif use_multithreading_enabled_by_checkbox : + effective_num_post_workers =max (1 ,min (num_threads_from_gui ,MAX_THREADS )) + effective_num_file_threads_per_worker =1 + + if not extract_links_only :log_messages .append (f" Save Location: {effective_output_dir_for_run }") + + if post_id_from_url : + log_messages .append (f" Mode: Single Post") + log_messages .append (f" ↳ File Downloads: Up to {effective_num_file_threads_per_worker } concurrent file(s)") + else : + log_messages .append (f" Mode: Creator Feed") + log_messages .append (f" Post Processing: {'Multi-threaded ('+str (effective_num_post_workers )+' workers)'if effective_num_post_workers >1 else 'Single-threaded (1 worker)'}") + log_messages .append (f" ↳ File Downloads per Worker: Up to {effective_num_file_threads_per_worker } concurrent file(s)") + pr_log ="All" + if start_page or end_page : + pr_log =f"{f'From {start_page } 'if start_page else ''}{'to 'if start_page and end_page else ''}{f'{end_page }'if end_page else (f'Up to {end_page }'if end_page else (f'From {start_page }'if start_page else 'Specific Range'))}".strip () + + if manga_mode : + log_messages .append (f" Page Range: {pr_log if pr_log else 'All'} (Manga Mode - Oldest Posts Processed First within range)") + else : + log_messages .append (f" Page Range: {pr_log if pr_log else 'All'}") + + + if not extract_links_only : + log_messages .append (f" Subfolders: {'Enabled'if use_subfolders else 'Disabled'}") + if use_subfolders and self.use_subfolder_per_post_checkbox.isChecked(): + use_date_prefix = self.date_prefix_checkbox.isChecked() if hasattr(self, 'date_prefix_checkbox') else False + log_messages.append(f" ↳ Date Prefix for Post Subfolders: {'Enabled' if use_date_prefix else 'Disabled'}") + if use_subfolders : + if custom_folder_name_cleaned :log_messages .append (f" Custom Folder (Post): '{custom_folder_name_cleaned }'") + if actual_filters_to_use_for_run : + log_messages .append (f" Character Filters: {', '.join (item ['name']for item in actual_filters_to_use_for_run )}") + log_messages .append (f" ↳ Char Filter Scope: {current_char_filter_scope .capitalize ()}") + elif use_subfolders : + log_messages .append (f" Folder Naming: Automatic (based on title/known names)") + + + keep_duplicates = self.keep_duplicates_checkbox.isChecked() if hasattr(self, 'keep_duplicates_checkbox') else False + log_messages.extend([ + f" File Type Filter: {user_selected_filter_text} (Backend processing as: {backend_filter_mode})", + f" Keep In-Post Duplicates: {'Enabled' if keep_duplicates else 'Disabled'}", + f" Skip Archives: {'.zip' if effective_skip_zip else ''}{', ' if effective_skip_zip and effective_skip_rar else ''}{'.rar' if effective_skip_rar else ''}{'None (Archive Mode)' if backend_filter_mode == 'archive' else ('None' if not (effective_skip_zip or effective_skip_rar) else '')}", + f" Skip Words Scope: {current_skip_words_scope .capitalize ()}", + f" Remove Words from Filename: {', '.join (remove_from_filename_words_list )if remove_from_filename_words_list else 'None'}", + f" Compress Images: {'Enabled'if compress_images else 'Disabled'}", + f" Thumbnails Only: {'Enabled'if download_thumbnails else 'Disabled'}" + ]) + log_messages .append (f" Scan Post Content for Images: {'Enabled'if scan_content_for_images else 'Disabled'}") + else : + log_messages .append (f" Mode: Extracting Links Only") + + log_messages .append (f" Show External Links: {'Enabled'if self .show_external_links and not extract_links_only and backend_filter_mode !='archive'else 'Disabled'}") + + if manga_mode : + log_messages .append (f" Manga Mode (File Renaming by Post Title): Enabled") + log_messages .append (f" ↳ Manga Filename Style: {'Post Title Based'if self .manga_filename_style ==STYLE_POST_TITLE else 'Original File Name'}") + if actual_filters_to_use_for_run : + log_messages .append (f" ↳ Manga Character Filter (for naming/folder): {', '.join (item ['name']for item in actual_filters_to_use_for_run )}") + log_messages .append (f" ↳ Manga Duplicates: Will be renamed with numeric suffix if names clash (e.g., _1, _2).") + + log_messages .append (f" Use Cookie ('cookies.txt'): {'Enabled'if use_cookie_from_checkbox else 'Disabled'}") + if use_cookie_from_checkbox and cookie_text_from_input : + log_messages .append (f" ↳ Cookie Text Provided: Yes (length: {len (cookie_text_from_input )})") + elif use_cookie_from_checkbox and selected_cookie_file_path_for_backend : + log_messages .append (f" ↳ Cookie File Selected: {os .path .basename (selected_cookie_file_path_for_backend )}") + should_use_multithreading_for_posts =use_multithreading_enabled_by_checkbox and not post_id_from_url + if manga_mode and (self .manga_filename_style ==STYLE_DATE_BASED or self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING )and not post_id_from_url : + enforced_by_style ="Date Mode"if self .manga_filename_style ==STYLE_DATE_BASED else "Title+GlobalNum Mode" + should_use_multithreading_for_posts =False + log_messages .append (f" Threading: Single-threaded (posts) - Enforced by Manga {enforced_by_style } (Actual workers: {effective_num_post_workers if effective_num_post_workers >1 else 1 })") + else : + log_messages .append (f" Threading: {'Multi-threaded (posts)'if should_use_multithreading_for_posts else 'Single-threaded (posts)'}") + if should_use_multithreading_for_posts : + log_messages .append (f" Number of Post Worker Threads: {effective_num_post_workers }") + log_messages .append ("="*40 ) + for msg in log_messages :self .log_signal .emit (msg ) + + self .set_ui_enabled (False ) + + + from src.config.constants import FOLDER_NAME_STOP_WORDS + + + args_template ={ + 'api_url_input':api_url , + 'download_root':effective_output_dir_for_run , + 'output_dir':effective_output_dir_for_run , + 'known_names':list (KNOWN_NAMES ), + 'known_names_copy':list (KNOWN_NAMES ), + 'filter_character_list':actual_filters_to_use_for_run , + 'filter_mode':backend_filter_mode , + 'text_only_scope': text_only_scope_for_run, + 'text_export_format': export_format_for_run, + 'single_pdf_mode': self.single_pdf_setting, + 'skip_zip':effective_skip_zip , + 'skip_rar':effective_skip_rar , + 'use_subfolders':use_subfolders , + 'use_post_subfolders':use_post_subfolders , + 'compress_images':compress_images , + 'download_thumbnails':download_thumbnails , + 'service':service , + 'user_id':user_id , + 'downloaded_files':self .downloaded_files , + 'downloaded_files_lock':self .downloaded_files_lock , + 'downloaded_file_hashes':self .downloaded_file_hashes , + 'downloaded_file_hashes_lock':self .downloaded_file_hashes_lock , + 'skip_words_list':skip_words_list , + 'skip_words_scope':current_skip_words_scope , + 'remove_from_filename_words_list':remove_from_filename_words_list , + 'char_filter_scope':current_char_filter_scope , + 'show_external_links':self .show_external_links , + 'extract_links_only':extract_links_only , + 'start_page':start_page , + 'end_page':end_page , + 'target_post_id_from_initial_url':post_id_from_url , + 'custom_folder_name':custom_folder_name_cleaned , + 'manga_mode_active':manga_mode , + 'unwanted_keywords':FOLDER_NAME_STOP_WORDS , + 'cancellation_event':self .cancellation_event , + 'manga_date_prefix':manga_date_prefix_text , + 'dynamic_character_filter_holder':self .dynamic_character_filter_holder , + 'pause_event':self .pause_event , + 'scan_content_for_images':scan_content_for_images , + 'manga_filename_style':self .manga_filename_style , + 'num_file_threads_for_worker':effective_num_file_threads_per_worker , + 'manga_date_file_counter_ref':manga_date_file_counter_ref_for_thread , + 'allow_multipart_download':allow_multipart , + 'cookie_text':cookie_text_from_input , + 'selected_cookie_file':selected_cookie_file_path_for_backend , + 'manga_global_file_counter_ref':manga_global_file_counter_ref_for_thread , + 'app_base_dir':app_base_dir_for_cookies , + 'project_root_dir': self.app_base_dir, + 'use_cookie':use_cookie_for_this_run , + 'session_file_path': self.session_file_path, + 'session_lock': self.session_lock, + 'creator_download_folder_ignore_words':creator_folder_ignore_words_for_run , + 'use_date_prefix_for_subfolder': self.date_prefix_checkbox.isChecked() if hasattr(self, 'date_prefix_checkbox') else False, + 'keep_in_post_duplicates': self.keep_duplicates_checkbox.isChecked() if hasattr(self, 'keep_duplicates_checkbox') else False, + 'skip_current_file_flag': None, + } + + args_template ['override_output_dir']=override_output_dir + try : + if should_use_multithreading_for_posts : + self .log_signal .emit (f" Initializing multi-threaded {current_mode_log_text .lower ()} with {effective_num_post_workers } post workers...") + args_template ['emitter']=self .worker_to_gui_queue + self .start_multi_threaded_download (num_post_workers =effective_num_post_workers ,**args_template ) + else : + self .log_signal .emit (f" Initializing single-threaded {'link extraction'if extract_links_only else 'download'}...") + dt_expected_keys =[ + 'api_url_input','output_dir','known_names_copy','cancellation_event', + 'filter_character_list','filter_mode','skip_zip','skip_rar', + 'use_subfolders','use_post_subfolders','custom_folder_name', + 'compress_images','download_thumbnails','service','user_id', + 'downloaded_files','downloaded_file_hashes','pause_event','remove_from_filename_words_list', + 'downloaded_files_lock','downloaded_file_hashes_lock','dynamic_character_filter_holder', 'session_file_path', + 'session_lock', + 'skip_words_list','skip_words_scope','char_filter_scope', + 'show_external_links','extract_links_only','num_file_threads_for_worker', + 'start_page','end_page','target_post_id_from_initial_url', + 'manga_date_file_counter_ref', + 'manga_global_file_counter_ref','manga_date_prefix', + 'manga_mode_active','unwanted_keywords','manga_filename_style','scan_content_for_images', + 'allow_multipart_download','use_cookie','cookie_text','app_base_dir','selected_cookie_file','override_output_dir','project_root_dir', + 'text_only_scope', + 'single_pdf_mode' + ] + args_template ['skip_current_file_flag']=None + single_thread_args ={key :args_template [key ]for key in dt_expected_keys if key in args_template } + self .start_single_threaded_download (**single_thread_args ) + except Exception as e : + self._update_button_states_and_connections() # Re-enable UI if start fails + self .log_signal .emit (f"❌ CRITICAL ERROR preparing download: {e }\n{traceback .format_exc ()}") + QMessageBox .critical (self ,"Start Error",f"Failed to start process:\n{e }") + self .download_finished (0 ,0 ,False ,[]) + if self .pause_event :self .pause_event .clear () + self .is_paused =False + return True + + def restore_download(self): + """Initiates the download restoration process.""" + if self._is_download_active(): + QMessageBox.warning(self, "Busy", "A download is already in progress.") + return + + if not self.interrupted_session_data: + self.log_signal.emit("❌ No session data to restore.") + self._clear_session_and_reset_ui() + return + + self.log_signal.emit("🔄 Restoring download session...") + # The main start_download function now handles the restore logic + self.is_restore_pending = True # Set state to indicate restore is in progress + self.start_download(is_restore=True) + + def start_single_threaded_download (self ,**kwargs ): + global BackendDownloadThread + try : + self .download_thread =BackendDownloadThread (**kwargs ) + if self .pause_event :self .pause_event .clear () + self .is_paused =False + if hasattr (self .download_thread ,'progress_signal'):self .download_thread .progress_signal .connect (self .handle_main_log ) + if hasattr (self .download_thread ,'add_character_prompt_signal'):self .download_thread .add_character_prompt_signal .connect (self .add_character_prompt_signal ) + if hasattr (self .download_thread ,'finished_signal'):self .download_thread .finished_signal .connect (self .download_finished ) + if hasattr (self .download_thread ,'receive_add_character_result'):self .character_prompt_response_signal .connect (self .download_thread .receive_add_character_result ) + if hasattr (self .download_thread ,'external_link_signal'):self .download_thread .external_link_signal .connect (self .handle_external_link_signal ) + if hasattr (self .download_thread ,'file_progress_signal'):self .download_thread .file_progress_signal .connect (self .update_file_progress_display ) + if hasattr (self .download_thread ,'missed_character_post_signal'): + self .download_thread .missed_character_post_signal .connect (self .handle_missed_character_post ) + if hasattr (self .download_thread ,'retryable_file_failed_signal'): + + if hasattr (self .download_thread ,'file_successfully_downloaded_signal'): + self .download_thread .file_successfully_downloaded_signal .connect (self ._handle_actual_file_downloaded ) + if hasattr (self .download_thread ,'post_processed_for_history_signal'): + self .download_thread .post_processed_for_history_signal .connect (self ._add_to_history_candidates ) + self .download_thread .retryable_file_failed_signal .connect (self ._handle_retryable_file_failure ) + if hasattr (self .download_thread ,'permanent_file_failed_signal'): + self .download_thread .permanent_file_failed_signal .connect (self ._handle_permanent_file_failure_from_thread ) + self .download_thread .start () + self .log_signal .emit ("✅ Single download thread (for posts) started.") + self._update_button_states_and_connections() # Update buttons after thread starts + except Exception as e : + self .log_signal .emit (f"❌ CRITICAL ERROR starting single-thread: {e }\n{traceback .format_exc ()}") + QMessageBox .critical (self ,"Thread Start Error",f"Failed to start download process: {e }") + if self .pause_event :self .pause_event .clear () + self .is_paused =False + + def _show_error_files_dialog (self ): + """Shows the dialog with files that were skipped due to errors.""" + if not self .permanently_failed_files_for_dialog : + QMessageBox .information ( + self , + self ._tr ("no_errors_logged_title","No Errors Logged"), + self ._tr ("no_errors_logged_message","No files were recorded as skipped due to errors in the last session or after retries.")) + return + dialog =ErrorFilesDialog (self .permanently_failed_files_for_dialog ,self ,self ) + dialog .retry_selected_signal .connect (self ._handle_retry_from_error_dialog ) + dialog .exec_ () + def _handle_retry_from_error_dialog (self ,selected_files_to_retry ): + self ._start_failed_files_retry_session (files_to_retry_list =selected_files_to_retry ) + self._update_error_button_count() + + def _handle_retryable_file_failure (self ,list_of_retry_details ): + """Appends details of files that failed but might be retryable later.""" + if list_of_retry_details : + self .retryable_failed_files_info .extend (list_of_retry_details ) + + def _handle_permanent_file_failure_from_thread (self ,list_of_permanent_failure_details ): + """Handles permanently failed files signaled by the single BackendDownloadThread.""" + if list_of_permanent_failure_details : + self .permanently_failed_files_for_dialog .extend (list_of_permanent_failure_details ) + self .log_signal .emit (f"ℹ️ {len (list_of_permanent_failure_details )} file(s) from single-thread download marked as permanently failed for this session.") + self._update_error_button_count() + + def _submit_post_to_worker_pool (self ,post_data_item ,worker_args_template ,num_file_dl_threads_for_each_worker ,emitter_for_worker ,ppw_expected_keys ,ppw_optional_keys_with_defaults ): + """Helper to prepare and submit a single post processing task to the thread pool.""" + global PostProcessorWorker + if not isinstance (post_data_item ,dict ): + self .log_signal .emit (f"⚠️ Skipping invalid post data item (not a dict): {type (post_data_item )}"); + return False + + worker_init_args ={} + missing_keys =[] + for key in ppw_expected_keys : + if key =='post_data':worker_init_args [key ]=post_data_item + elif key =='num_file_threads':worker_init_args [key ]=num_file_dl_threads_for_each_worker + elif key =='emitter':worker_init_args [key ]=emitter_for_worker + elif key in worker_args_template :worker_init_args [key ]=worker_args_template [key ] + elif key in ppw_optional_keys_with_defaults :pass + else :missing_keys .append (key ) + + if missing_keys : + self .log_signal .emit (f"❌ CRITICAL ERROR: Missing keys for PostProcessorWorker: {', '.join (missing_keys )}"); + self .cancellation_event .set () + return False + + try : + worker_instance =PostProcessorWorker (**worker_init_args ) + if self .thread_pool : + future =self .thread_pool .submit (worker_instance .process ) + self .active_futures .append (future ) + return True + else : + self .log_signal .emit ("⚠️ Thread pool not available. Cannot submit task."); + self .cancellation_event .set () + return False + except TypeError as te : + self .log_signal .emit (f"❌ TypeError creating PostProcessorWorker: {te }\n Passed Args: [{', '.join (sorted (worker_init_args .keys ()))}]\n{traceback .format_exc (limit =5 )}") + self .cancellation_event .set () + return False + except RuntimeError : + self .log_signal .emit (f"⚠️ RuntimeError submitting task (pool likely shutting down).") + self .cancellation_event .set () + return False + except Exception as e : + self .log_signal .emit (f"❌ Error submitting post {post_data_item .get ('id','N/A')} to worker: {e }") + self .cancellation_event .set () + return False + + def _load_ui_from_settings_dict(self, settings: dict): + """Populates the UI with values from a settings dictionary.""" + # Text inputs + self.link_input.setText(settings.get('api_url', '')) + self.dir_input.setText(settings.get('output_dir', '')) + self.character_input.setText(settings.get('character_filter_text', '')) + self.skip_words_input.setText(settings.get('skip_words_text', '')) + self.remove_from_filename_input.setText(settings.get('remove_words_text', '')) + self.custom_folder_input.setText(settings.get('custom_folder_name', '')) + self.cookie_text_input.setText(settings.get('cookie_text', '')) + if hasattr(self, 'manga_date_prefix_input'): + self.manga_date_prefix_input.setText(settings.get('manga_date_prefix', '')) + + # Numeric inputs + self.thread_count_input.setText(str(settings.get('num_threads', 4))) + self.start_page_input.setText(str(settings.get('start_page', '')) if settings.get('start_page') is not None else '') + self.end_page_input.setText(str(settings.get('end_page', '')) if settings.get('end_page') is not None else '') + + # Checkboxes + for checkbox_name, key in self.get_checkbox_map().items(): + checkbox = getattr(self, checkbox_name, None) + if checkbox: + checkbox.setChecked(settings.get(key, False)) + + # Radio buttons + if settings.get('only_links'): self.radio_only_links.setChecked(True) + else: + filter_mode = settings.get('filter_mode', 'all') + if filter_mode == 'image': self.radio_images.setChecked(True) + elif filter_mode == 'video': self.radio_videos.setChecked(True) + elif filter_mode == 'archive': self.radio_only_archives.setChecked(True) + elif filter_mode == 'audio' and hasattr(self, 'radio_only_audio'): self.radio_only_audio.setChecked(True) + else: self.radio_all.setChecked(True) + + # Toggle button states + self.skip_words_scope = settings.get('skip_words_scope', SKIP_SCOPE_POSTS) + self.char_filter_scope = settings.get('char_filter_scope', CHAR_SCOPE_TITLE) + self.manga_filename_style = settings.get('manga_filename_style', STYLE_POST_TITLE) + self.allow_multipart_download_setting = settings.get('allow_multipart_download', False) + + # Update button texts after setting states + self._update_skip_scope_button_text() + self._update_char_filter_scope_button_text() + self._update_manga_filename_style_button_text() + self._update_multipart_toggle_button_text() + + def start_multi_threaded_download (self ,num_post_workers ,**kwargs ): + global PostProcessorWorker + if self .thread_pool is None : + if self .pause_event :self .pause_event .clear () + self .is_paused =False + self .thread_pool =ThreadPoolExecutor (max_workers =num_post_workers ,thread_name_prefix ='PostWorker_') + + self .active_futures =[] + self .processed_posts_count =0 ;self .total_posts_to_process =0 ;self .download_counter =0 ;self .skip_counter =0 + self .all_kept_original_filenames =[] + self .is_fetcher_thread_running =True + + fetcher_thread =threading .Thread ( + target =self ._fetch_and_queue_posts , + args =(kwargs ['api_url_input'],kwargs ,num_post_workers ), + daemon =True , + name ="PostFetcher" + ) + fetcher_thread .start () + self .log_signal .emit (f"✅ Post fetcher thread started. {num_post_workers } post worker threads initializing...") + self._update_button_states_and_connections() # Update buttons after fetcher thread starts + + def _fetch_and_queue_posts(self, api_url_input_for_fetcher, worker_args_template, num_post_workers): + """ + Fetches post data and submits tasks to the pool. It does NOT wait for completion. + """ + global PostProcessorWorker, download_from_api + + try: + # This section remains the same as before + post_generator = download_from_api( + api_url_input_for_fetcher, + logger=lambda msg: self.log_signal.emit(f"[Fetcher] {msg}"), + start_page=worker_args_template.get('start_page'), + end_page=worker_args_template.get('end_page'), + manga_mode=worker_args_template.get('manga_mode_active', False), + cancellation_event=self.cancellation_event, + pause_event=self.pause_event, + use_cookie=worker_args_template.get('use_cookie'), + cookie_text=worker_args_template.get('cookie_text'), + selected_cookie_file=worker_args_template.get('selected_cookie_file'), + app_base_dir=worker_args_template.get('app_base_dir'), + manga_filename_style_for_sort_check=worker_args_template.get('manga_filename_style') + ) + + ppw_expected_keys = [ + 'post_data','download_root','known_names','filter_character_list','unwanted_keywords', + 'filter_mode','skip_zip','skip_rar','use_subfolders','use_post_subfolders', + 'target_post_id_from_initial_url','custom_folder_name','compress_images','emitter', + 'pause_event','download_thumbnails','service','user_id','api_url_input', + 'cancellation_event','downloaded_files','downloaded_file_hashes','downloaded_files_lock', + 'downloaded_file_hashes_lock','remove_from_filename_words_list','dynamic_character_filter_holder', + 'skip_words_list','skip_words_scope','char_filter_scope','show_external_links', + 'extract_links_only','allow_multipart_download','use_cookie','cookie_text', + 'app_base_dir','selected_cookie_file','override_output_dir','num_file_threads', + 'skip_current_file_flag','manga_date_file_counter_ref','scan_content_for_images', + 'manga_mode_active','manga_filename_style','manga_date_prefix','text_only_scope', + 'text_export_format', 'single_pdf_mode', + 'use_date_prefix_for_subfolder','keep_in_post_duplicates','manga_global_file_counter_ref', + 'creator_download_folder_ignore_words','session_file_path','project_root_dir','session_lock' + ] + + num_file_dl_threads_for_each_worker = worker_args_template.get('num_file_threads_for_worker', 1) + emitter_for_worker = worker_args_template.get('emitter') + + for posts_batch in post_generator: + if self.cancellation_event.is_set(): + break + if isinstance(posts_batch, list) and posts_batch: + for post_data_item in posts_batch: + self._submit_post_to_worker_pool(post_data_item, worker_args_template, num_file_dl_threads_for_each_worker, emitter_for_worker, ppw_expected_keys, {}) + self.total_posts_to_process += len(posts_batch) + self.overall_progress_signal.emit(self.total_posts_to_process, self.processed_posts_count) + + except Exception as e: + self.log_signal.emit(f"❌ Error during post fetching: {e}\n{traceback.format_exc(limit=2)}") + finally: + # The fetcher's only job is to mark itself as done. + self.is_fetcher_thread_running = False + self.log_signal.emit("ℹ️ Post fetcher thread has finished submitting tasks.") + + def _handle_worker_result(self, result_tuple: tuple): + """ + Safely processes results from a worker. This is now the ONLY place + that checks if the entire download process is complete. + """ + self.processed_posts_count += 1 + + try: + (downloaded, skipped, kept_originals, retryable, + permanent, history_data, + temp_filepath) = result_tuple + + if temp_filepath: self.session_temp_files.append(temp_filepath) + + with self.downloaded_files_lock: + self.download_counter += downloaded + self.skip_counter += skipped + + # Other result handling can go here if needed + if history_data: self._add_to_history_candidates(history_data) + if permanent: self.permanently_failed_files_for_dialog.extend(permanent) + + self.overall_progress_signal.emit(self.total_posts_to_process, self.processed_posts_count) + + except Exception as e: + self.log_signal.emit(f"❌ Error in _handle_worker_result: {e}\n{traceback.format_exc(limit=2)}") + + # THE CRITICAL CHECK: + # Is the fetcher thread done AND have we processed all the tasks it submitted? + if not self.is_fetcher_thread_running and self.processed_posts_count >= self.total_posts_to_process: + self.log_signal.emit("🏁 All fetcher and worker tasks complete.") + self.finished_signal.emit(self.download_counter, self.skip_counter, self.cancellation_event.is_set(), self.all_kept_original_filenames) + + def _trigger_single_pdf_creation(self): + """Reads temp files, sorts them by date, then creates the single PDF.""" + self.log_signal.emit("="*40) + self.log_signal.emit("Creating single PDF from collected text files...") + + posts_content_data = [] + for temp_filepath in self.session_temp_files: + try: + with open(temp_filepath, 'r', encoding='utf-8') as f: + data = json.load(f) + posts_content_data.append(data) + except Exception as e: + self.log_signal.emit(f" ⚠️ Could not read temp file '{temp_filepath}': {e}") + + if not posts_content_data: + self.log_signal.emit(" No content was collected. Aborting PDF creation.") + return + + output_dir = self.dir_input.text().strip() or QStandardPaths.writableLocation(QStandardPaths.DownloadLocation) + default_filename = os.path.join(output_dir, "Consolidated_Content.pdf") + filepath, _ = QFileDialog.getSaveFileName(self, "Save Single PDF", default_filename, "PDF Files (*.pdf)") + + if not filepath: + self.log_signal.emit(" Single PDF creation cancelled by user.") + return + + if not filepath.lower().endswith('.pdf'): + filepath += '.pdf' + + font_path = os.path.join(self.app_base_dir, 'data', 'dejavu-sans', 'DejaVuSans.ttf') + + # vvv THIS IS THE KEY CHANGE vvv + # Sort content by the 'published' date. ISO-formatted dates sort correctly as strings. + # Use a fallback value 'Z' to place any posts without a date at the end. + self.log_signal.emit(" Sorting collected posts by date (oldest first)...") + sorted_content = sorted(posts_content_data, key=lambda x: x.get('published', 'Z')) + # ^^^ END OF KEY CHANGE ^^^ + + create_single_pdf_from_content(sorted_content, filepath, font_path, logger=self.log_signal.emit) + self.log_signal.emit("="*40) + + def _add_to_history_candidates (self ,history_data ): + """Adds processed post data to the history candidates list.""" + if history_data and len (self .download_history_candidates )<8 : + history_data ['download_date_timestamp']=time .time () + creator_key =(history_data .get ('service','').lower (),str (history_data .get ('user_id',''))) + history_data ['creator_name']=self .creator_name_cache .get (creator_key ,history_data .get ('user_id','Unknown')) + self .download_history_candidates .append (history_data ) + + def _finalize_download_history (self ): + """Processes candidates and selects the final 3 history entries. + Only updates final_download_history_entries if new candidates are available. + """ + if not self .download_history_candidates : + + + self .log_signal .emit ("ℹ️ No new history candidates from this session. Preserving existing history.") + + + self .download_history_candidates .clear () + return + + candidates =list (self .download_history_candidates ) + now =datetime .datetime .now (datetime .timezone .utc ) + + def get_sort_key (entry ): + upload_date_str =entry .get ('upload_date_str') + if not upload_date_str : + return datetime .timedelta .max + try : + + upload_dt =datetime .datetime .fromisoformat (upload_date_str .replace ('Z','+00:00')) + if upload_dt .tzinfo is None : + upload_dt =upload_dt .replace (tzinfo =datetime .timezone .utc ) + return abs (now -upload_dt ) + except ValueError : + return datetime .timedelta .max + + candidates .sort (key =get_sort_key ) + self .final_download_history_entries =candidates [:3 ] + self .log_signal .emit (f"ℹ️ Finalized download history: {len (self .final_download_history_entries )} entries selected.") + self .download_history_candidates .clear () + + + self ._save_persistent_history () + + def _get_configurable_widgets_on_pause (self ): + """Returns a list of widgets that should be re-enabled when paused.""" + return [ + self .dir_input ,self .dir_button , + self .character_input ,self .char_filter_scope_toggle_button , + self .skip_words_input ,self .skip_scope_toggle_button , + self .remove_from_filename_input , + self .radio_all ,self .radio_images ,self .radio_videos , + self .radio_only_archives ,self .radio_only_links , + self .skip_zip_checkbox ,self .skip_rar_checkbox , + self .download_thumbnails_checkbox ,self .compress_images_checkbox , + self .use_subfolders_checkbox ,self .use_subfolder_per_post_checkbox , + self .manga_mode_checkbox , + self .manga_rename_toggle_button , + self .cookie_browse_button , + self .favorite_mode_checkbox , + self .multipart_toggle_button , + self .cookie_text_input , + self .scan_content_images_checkbox , + self .use_cookie_checkbox , + self .external_links_checkbox + ] + + def set_ui_enabled (self ,enabled ): + all_potentially_toggleable_widgets =[ + self .link_input ,self .dir_input ,self .dir_button , + self .page_range_label ,self .start_page_input ,self .to_label ,self .end_page_input , + self .character_input ,self .char_filter_scope_toggle_button ,self .character_filter_widget , + self .filters_and_custom_folder_container_widget , + self .custom_folder_label ,self .custom_folder_input , + self .skip_words_input ,self .skip_scope_toggle_button ,self .remove_from_filename_input , + self .radio_all ,self .radio_images ,self .radio_videos ,self .radio_only_archives ,self .radio_only_links , + self .skip_zip_checkbox ,self .skip_rar_checkbox ,self .download_thumbnails_checkbox ,self .compress_images_checkbox , + self .use_subfolders_checkbox ,self .use_subfolder_per_post_checkbox ,self .scan_content_images_checkbox , + self .use_multithreading_checkbox ,self .thread_count_input ,self .thread_count_label , + self .favorite_mode_checkbox , + self .external_links_checkbox ,self .manga_mode_checkbox ,self .manga_rename_toggle_button ,self .use_cookie_checkbox ,self .cookie_text_input ,self .cookie_browse_button , + self .multipart_toggle_button ,self .radio_only_audio , + self .character_search_input ,self .new_char_input ,self .add_char_button ,self .add_to_filter_button ,self .delete_char_button , + self .reset_button + ] + + widgets_to_enable_on_pause =self ._get_configurable_widgets_on_pause () + is_fav_mode_active =self .favorite_mode_checkbox .isChecked ()if self .favorite_mode_checkbox else False + download_is_active_or_paused =not enabled + + if not enabled : + if self .bottom_action_buttons_stack : + self .bottom_action_buttons_stack .setCurrentIndex (0 ) + + if self .external_link_download_thread and self .external_link_download_thread .isRunning (): + self .log_signal .emit ("ℹ️ Cancelling active Mega download due to UI state change.") + self .external_link_download_thread .cancel () + else : + pass + + + for widget in all_potentially_toggleable_widgets : + if not widget :continue + + + if widget is self .favorite_mode_artists_button or widget is self .favorite_mode_posts_button :continue + elif self .is_paused and widget in widgets_to_enable_on_pause : + widget .setEnabled (True ) + elif widget is self .favorite_mode_checkbox : + widget .setEnabled (enabled ) + elif widget is self .use_cookie_checkbox and is_fav_mode_active : + widget .setEnabled (False ) + elif widget is self .use_cookie_checkbox and self .is_paused and widget in widgets_to_enable_on_pause : + widget .setEnabled (True ) + else : + widget .setEnabled (enabled ) + + if self .link_input : + self .link_input .setEnabled (enabled and not is_fav_mode_active ) + + + + if not enabled : + if self .favorite_mode_artists_button : + self .favorite_mode_artists_button .setEnabled (False ) + if self .favorite_mode_posts_button : + self .favorite_mode_posts_button .setEnabled (False ) + + if self .download_btn : + self .download_btn .setEnabled (enabled and not is_fav_mode_active ) + + + if self .external_links_checkbox : + is_only_links =self .radio_only_links and self .radio_only_links .isChecked () + is_only_archives =self .radio_only_archives and self .radio_only_archives .isChecked () + is_only_audio =hasattr (self ,'radio_only_audio')and self .radio_only_audio .isChecked () + can_enable_ext_links =enabled and not is_only_links and not is_only_archives and not is_only_audio + self .external_links_checkbox .setEnabled (can_enable_ext_links ) + if self .is_paused and not is_only_links and not is_only_archives and not is_only_audio : + self .external_links_checkbox .setEnabled (True ) + if hasattr (self ,'use_cookie_checkbox'): + self ._update_cookie_input_visibility (self .use_cookie_checkbox .isChecked ()) + + if self .log_verbosity_toggle_button :self .log_verbosity_toggle_button .setEnabled (True ) + + multithreading_currently_on =self .use_multithreading_checkbox .isChecked () + if self .thread_count_input :self .thread_count_input .setEnabled (enabled and multithreading_currently_on ) + if self .thread_count_label :self .thread_count_label .setEnabled (enabled and multithreading_currently_on ) + + subfolders_currently_on =self .use_subfolders_checkbox .isChecked () + if self .use_subfolder_per_post_checkbox : + self .use_subfolder_per_post_checkbox .setEnabled (enabled or (self .is_paused and self .use_subfolder_per_post_checkbox in widgets_to_enable_on_pause )) + if self .cancel_btn :self .cancel_btn .setEnabled (download_is_active_or_paused ) + if self .pause_btn : + self .pause_btn .setEnabled (download_is_active_or_paused ) + if download_is_active_or_paused : + self .pause_btn .setText (self ._tr ("resume_download_button_text","▶️ Resume Download")if self .is_paused else self ._tr ("pause_download_button_text","⏸️ Pause Download")) + self .pause_btn .setToolTip (self ._tr ("resume_download_button_tooltip","Click to resume the download.")if self .is_paused else self ._tr ("pause_download_button_tooltip","Click to pause the download.")) + else : + self .pause_btn .setText (self ._tr ("pause_download_button_text","⏸️ Pause Download")) + self .pause_btn .setToolTip (self ._tr ("pause_download_button_tooltip","Click to pause the ongoing download process.")) + self .is_paused =False + if self .cancel_btn :self .cancel_btn .setText (self ._tr ("cancel_button_text","❌ Cancel & Reset UI")) + if enabled : + if self .pause_event :self .pause_event .clear () + if enabled or self .is_paused : + self ._handle_multithreading_toggle (multithreading_currently_on ) + self .update_ui_for_manga_mode (self .manga_mode_checkbox .isChecked ()if self .manga_mode_checkbox else False ) + self .update_custom_folder_visibility (self .link_input .text ()) + self .update_page_range_enabled_state () + if self .radio_group and self .radio_group .checkedButton (): + self ._handle_filter_mode_change (self .radio_group .checkedButton (),True ) + self .update_ui_for_subfolders (subfolders_currently_on ) + self ._handle_favorite_mode_toggle (is_fav_mode_active ) + + def _handle_pause_resume_action (self ): + if self ._is_download_active (): + self .is_paused =not self .is_paused + if self .is_paused : + if self .pause_event :self .pause_event .set () + self .log_signal .emit ("ℹ️ Download paused by user. Some settings can now be changed for subsequent operations.") + else : + if self .pause_event :self .pause_event .clear () + self .log_signal .emit ("ℹ️ Download resumed by user.") + self .set_ui_enabled (False ) + + def _perform_soft_ui_reset (self ,preserve_url =None ,preserve_dir =None ): + """Resets UI elements and some state to app defaults, then applies preserved inputs.""" + self .log_signal .emit ("🔄 Performing soft UI reset...") + self .link_input .clear () + self .dir_input .clear () + self .custom_folder_input .clear ();self .character_input .clear (); + self .skip_words_input .clear ();self .start_page_input .clear ();self .end_page_input .clear ();self .new_char_input .clear (); + if hasattr (self ,'remove_from_filename_input'):self .remove_from_filename_input .clear () + self .character_search_input .clear ();self .thread_count_input .setText ("4");self .radio_all .setChecked (True ); + self .skip_zip_checkbox .setChecked (True );self .skip_rar_checkbox .setChecked (True );self .download_thumbnails_checkbox .setChecked (False ); + self .compress_images_checkbox .setChecked (False );self .use_subfolders_checkbox .setChecked (True ); + self .use_subfolder_per_post_checkbox .setChecked (False );self .use_multithreading_checkbox .setChecked (True ); + if self .favorite_mode_checkbox :self .favorite_mode_checkbox .setChecked (False ) + if hasattr (self ,'scan_content_images_checkbox'):self .scan_content_images_checkbox .setChecked (False ) + self .external_links_checkbox .setChecked (False ) + if self .manga_mode_checkbox :self .manga_mode_checkbox .setChecked (False ) + if hasattr (self ,'use_cookie_checkbox'):self .use_cookie_checkbox .setChecked (self .use_cookie_setting ) + if not (hasattr (self ,'use_cookie_checkbox')and self .use_cookie_checkbox .isChecked ()): + self .selected_cookie_filepath =None + if hasattr (self ,'cookie_text_input'):self .cookie_text_input .setText (self .cookie_text_setting if self .use_cookie_setting else "") + self .allow_multipart_download_setting =False + self ._update_multipart_toggle_button_text () + + self .skip_words_scope =SKIP_SCOPE_POSTS + self ._update_skip_scope_button_text () + + if hasattr (self ,'manga_date_prefix_input'):self .manga_date_prefix_input .clear () + + self .char_filter_scope =CHAR_SCOPE_TITLE + self ._update_char_filter_scope_button_text () + + self .manga_filename_style =STYLE_POST_TITLE + self ._update_manga_filename_style_button_text () + if preserve_url is not None : + self .link_input .setText (preserve_url ) + if preserve_dir is not None : + self .dir_input .setText (preserve_dir ) + self .external_link_queue .clear ();self .extracted_links_cache =[] + self ._is_processing_external_link_queue =False ;self ._current_link_post_title =None + if self .pause_event :self .pause_event .clear () + self.is_restore_pending = False + self .total_posts_to_process =0 ;self .processed_posts_count =0 + self .download_counter =0 ;self .skip_counter =0 + self .all_kept_original_filenames =[] + self .is_paused =False + self ._handle_multithreading_toggle (self .use_multithreading_checkbox .isChecked ()) + + self._update_button_states_and_connections() # Reset button states and connections + self .favorite_download_queue .clear () + self .is_processing_favorites_queue =False + + self .only_links_log_display_mode =LOG_DISPLAY_LINKS + + if hasattr (self ,'link_input'): + if self .download_extracted_links_button : + self .download_extracted_links_button .setEnabled (False ) + + self .last_link_input_text_for_queue_sync =self .link_input .text () + self .permanently_failed_files_for_dialog .clear () + self .filter_character_list (self .character_search_input .text ()) + self .favorite_download_scope =FAVORITE_SCOPE_SELECTED_LOCATION + self ._update_favorite_scope_button_text () + + self .set_ui_enabled (True ) + self.interrupted_session_data = None # Clear session data from memory + self .update_custom_folder_visibility (self .link_input .text ()) + self .update_page_range_enabled_state () + self ._update_cookie_input_visibility (self .use_cookie_checkbox .isChecked ()if hasattr (self ,'use_cookie_checkbox')else False ) + if hasattr (self ,'favorite_mode_checkbox'): + self ._handle_favorite_mode_toggle (False ) + + self .log_signal .emit ("✅ Soft UI reset complete. Preserved URL and Directory (if provided).") + + def _update_log_display_mode_button_text (self ): + if hasattr (self ,'log_display_mode_toggle_button'): + if self .only_links_log_display_mode ==LOG_DISPLAY_LINKS : + self .log_display_mode_toggle_button .setText (self ._tr ("log_display_mode_links_view_text","🔗 Links View")) + self .log_display_mode_toggle_button .setToolTip ( + "Current View: Extracted Links.\n" + "After Mega download, Mega log is shown THEN links are appended.\n" + "Click to switch to 'Download Progress View'." + ) + else : + self .log_display_mode_toggle_button .setText (self ._tr ("log_display_mode_progress_view_text","⬇️ Progress View")) + self .log_display_mode_toggle_button .setToolTip ( + "Current View: Mega Download Progress.\n" + "After Mega download, ONLY Mega log is shown (links hidden).\n" + "Click to switch to 'Extracted Links View'." + ) + + def _toggle_log_display_mode (self ): + self .only_links_log_display_mode =LOG_DISPLAY_DOWNLOAD_PROGRESS if self .only_links_log_display_mode ==LOG_DISPLAY_LINKS else LOG_DISPLAY_LINKS + self ._update_log_display_mode_button_text () + self ._filter_links_log () + + def cancel_download_button_action (self ): + if not self .cancel_btn .isEnabled ()and not self .cancellation_event .is_set ():self .log_signal .emit ("ℹ️ No active download to cancel or already cancelling.");return + self .log_signal .emit ("⚠️ Requesting cancellation of download process (soft reset)...") + + self._clear_session_file() # Clear session file on explicit cancel + if self .external_link_download_thread and self .external_link_download_thread .isRunning (): + self .log_signal .emit (" Cancelling active External Link download thread...") + self .external_link_download_thread .cancel () + + current_url =self .link_input .text () + current_dir =self .dir_input .text () + + self .cancellation_event .set () + self .is_fetcher_thread_running =False + if self .download_thread and self .download_thread .isRunning ():self .download_thread .requestInterruption ();self .log_signal .emit (" Signaled single download thread to interrupt.") + if self .thread_pool : + self .log_signal .emit (" Initiating non-blocking shutdown and cancellation of worker pool tasks...") + self .thread_pool .shutdown (wait =False ,cancel_futures =True ) + self .thread_pool =None + self .active_futures =[] + + self .external_link_queue .clear ();self ._is_processing_external_link_queue =False ;self ._current_link_post_title =None + + self ._perform_soft_ui_reset (preserve_url =current_url ,preserve_dir =current_dir ) + + self .progress_label .setText (f"{self ._tr ('status_cancelled_by_user','Cancelled by user')}. {self ._tr ('ready_for_new_task_text','Ready for new task.')}") + self .file_progress_label .setText ("") + if self .pause_event :self .pause_event .clear () + self .log_signal .emit ("ℹ️ UI reset. Ready for new operation. Background tasks are being terminated.") + self .is_paused =False + if hasattr (self ,'retryable_failed_files_info')and self .retryable_failed_files_info : + self .log_signal .emit (f" Discarding {len (self .retryable_failed_files_info )} pending retryable file(s) due to cancellation.") + self .cancellation_message_logged_this_session =False + self .retryable_failed_files_info .clear () + self .favorite_download_queue .clear () + self .permanently_failed_files_for_dialog .clear () + self .is_processing_favorites_queue =False + self .favorite_download_scope =FAVORITE_SCOPE_SELECTED_LOCATION + self ._update_favorite_scope_button_text () + if hasattr (self ,'link_input'): + self .last_link_input_text_for_queue_sync =self .link_input .text () + self .cancellation_message_logged_this_session =False + + def _get_domain_for_service (self ,service_name :str )->str : + """Determines the base domain for a given service.""" + if not isinstance (service_name ,str ): + return "kemono.su" + service_lower =service_name .lower () + coomer_primary_services ={'onlyfans','fansly','manyvids','candfans','gumroad','patreon','subscribestar','dlsite','discord','fantia','boosty','pixiv','fanbox'} + if service_lower in coomer_primary_services and service_lower not in ['patreon','discord','fantia','boosty','pixiv','fanbox']: + return "coomer.su" + return "kemono.su" + + def download_finished (self ,total_downloaded ,total_skipped ,cancelled_by_user ,kept_original_names_list =None ): + if kept_original_names_list is None : + kept_original_names_list =list (self .all_kept_original_filenames )if hasattr (self ,'all_kept_original_filenames')else [] + if kept_original_names_list is None : + kept_original_names_list =[] + + if not cancelled_by_user and not self.retryable_failed_files_info: + self._clear_session_file() + self.interrupted_session_data = None + self.is_restore_pending = False + + self ._finalize_download_history () + status_message =self ._tr ("status_cancelled_by_user","Cancelled by user")if cancelled_by_user else self ._tr ("status_completed","Completed") + if cancelled_by_user and self .retryable_failed_files_info : + self .log_signal .emit (f" Download cancelled, discarding {len (self .retryable_failed_files_info )} file(s) that were pending retry.") + self .retryable_failed_files_info .clear () + + summary_log ="="*40 + summary_log +=f"\n🏁 Download {status_message }!\n Summary: Downloaded Files={total_downloaded }, Skipped Files={total_skipped }\n" + summary_log +="="*40 + self.log_signal.emit (summary_log) + + # Safely shut down the thread pool now that all work is done. + if self.thread_pool: + self.log_signal.emit(" Shutting down worker thread pool...") + self.thread_pool.shutdown(wait=False) + self.thread_pool = None + self.log_signal.emit(" Thread pool shut down.") + + try: + if self.single_pdf_setting and self.session_temp_files and not cancelled_by_user: + self._trigger_single_pdf_creation() + finally: + # This ensures cleanup happens even if PDF creation fails or is cancelled + self._cleanup_temp_files() + self.single_pdf_setting = False + + # Reset session state for the next run + self.session_text_content = [] + self.single_pdf_setting = False + + if kept_original_names_list : + intro_msg =( + HTML_PREFIX + + "

ℹ️ The following files from multi-file manga posts " + "(after the first file) kept their original names:

" + ) + self .log_signal .emit (intro_msg ) + + html_list_items ="" + + self .log_signal .emit (HTML_PREFIX +html_list_items ) + self .log_signal .emit ("="*40 ) + + if self .download_thread : + try : + if hasattr (self .download_thread ,'progress_signal'):self .download_thread .progress_signal .disconnect (self .handle_main_log ) + if hasattr (self .download_thread ,'add_character_prompt_signal'):self .download_thread .add_character_prompt_signal .disconnect (self .add_character_prompt_signal ) + if hasattr (self .download_thread ,'finished_signal'):self .download_thread .finished_signal .disconnect (self .download_finished ) + if hasattr (self .download_thread ,'receive_add_character_result'):self .character_prompt_response_signal .disconnect (self .download_thread .receive_add_character_result ) + if hasattr (self .download_thread ,'external_link_signal'):self .download_thread .external_link_signal .disconnect (self .handle_external_link_signal ) + if hasattr (self .download_thread ,'file_progress_signal'):self .download_thread .file_progress_signal .disconnect (self .update_file_progress_display ) + if hasattr (self .download_thread ,'missed_character_post_signal'): + self .download_thread .missed_character_post_signal .disconnect (self .handle_missed_character_post ) + if hasattr (self .download_thread ,'retryable_file_failed_signal'): + self .download_thread .retryable_file_failed_signal .disconnect (self ._handle_retryable_file_failure ) + if hasattr (self .download_thread ,'file_successfully_downloaded_signal'): + self .download_thread .file_successfully_downloaded_signal .disconnect (self ._handle_actual_file_downloaded ) + if hasattr (self .download_thread ,'post_processed_for_history_signal'): + self .download_thread .post_processed_for_history_signal .disconnect (self ._add_to_history_candidates ) + except (TypeError ,RuntimeError )as e : + self .log_signal .emit (f"ℹ️ Note during single-thread signal disconnection: {e }") + + if not self .download_thread .isRunning (): + + if self .download_thread : + self .download_thread .deleteLater () + self .download_thread =None + + self .progress_label .setText ( + f"{status_message }: " + f"{total_downloaded } {self ._tr ('files_downloaded_label','downloaded')}, " + f"{total_skipped } {self ._tr ('files_skipped_label','skipped')}." + ) + self .file_progress_label .setText ("") + if not cancelled_by_user :self ._try_process_next_external_link () + + if self .thread_pool : + self .log_signal .emit (" Ensuring worker thread pool is shut down...") + self .thread_pool .shutdown (wait =True ,cancel_futures =True ) + self .thread_pool =None + + self .active_futures =[] + if self .pause_event :self .pause_event .clear () + self .cancel_btn .setEnabled (False ) + self .is_paused =False + if not cancelled_by_user and self .retryable_failed_files_info : + num_failed =len (self .retryable_failed_files_info ) + reply =QMessageBox .question (self ,"Retry Failed Downloads?", + f"{num_failed } file(s) failed with potentially recoverable errors (e.g., IncompleteRead).\n\n" + "Would you like to attempt to download these failed files again?", + QMessageBox .Yes |QMessageBox .No ,QMessageBox .Yes ) + if reply ==QMessageBox .Yes : + self ._start_failed_files_retry_session () + return + else : + self .log_signal .emit ("ℹ️ User chose not to retry failed files.") + self .permanently_failed_files_for_dialog .extend (self .retryable_failed_files_info ) + if self .permanently_failed_files_for_dialog : + self .log_signal .emit (f"🆘 Error button enabled. {len (self .permanently_failed_files_for_dialog )} file(s) can be viewed.") + self .cancellation_message_logged_this_session =False + self .retryable_failed_files_info .clear () + + self .is_fetcher_thread_running =False + + if self .is_processing_favorites_queue : + if not self .favorite_download_queue : + self .is_processing_favorites_queue =False + self .log_signal .emit (f"✅ All {self .current_processing_favorite_item_info .get ('type','item')} downloads from favorite queue have been processed.") + self .set_ui_enabled (not self ._is_download_active ()) + else : + self ._process_next_favorite_download () + else : + self .set_ui_enabled (True ) + self .cancellation_message_logged_this_session =False + + def _handle_thumbnail_mode_change (self ,thumbnails_checked ): + """Handles UI changes when 'Download Thumbnails Only' is toggled.""" + if not hasattr (self ,'scan_content_images_checkbox'): + return + + if thumbnails_checked : + self .scan_content_images_checkbox .setChecked (True ) + self .scan_content_images_checkbox .setEnabled (False ) + self .scan_content_images_checkbox .setToolTip ( + "Automatically enabled and locked because 'Download Thumbnails Only' is active.\n" + "In this mode, only images found by content scanning will be downloaded." + ) + else : + self .scan_content_images_checkbox .setEnabled (True ) + self .scan_content_images_checkbox .setChecked (False ) + self .scan_content_images_checkbox .setToolTip (self ._original_scan_content_tooltip ) + + def _start_failed_files_retry_session (self ,files_to_retry_list =None ): + if files_to_retry_list : + self .files_for_current_retry_session =list (files_to_retry_list ) + self .permanently_failed_files_for_dialog =[f for f in self .permanently_failed_files_for_dialog if f not in files_to_retry_list ] + else : + self .files_for_current_retry_session =list (self .retryable_failed_files_info ) + self .retryable_failed_files_info .clear () + self .log_signal .emit (f"🔄 Starting retry session for {len (self .files_for_current_retry_session )} file(s)...") + self .set_ui_enabled (False ) + if self .cancel_btn :self .cancel_btn .setText (self ._tr ("cancel_retry_button_text","❌ Cancel Retry")) + + + self .active_retry_futures =[] + self .processed_retry_count =0 + self .succeeded_retry_count =0 + self .failed_retry_count_in_session =0 + self .total_files_for_retry =len (self .files_for_current_retry_session ) + self .active_retry_futures_map ={} + + self .progress_label .setText (self ._tr ("progress_posts_text","Progress: {processed_posts} / {total_posts} posts ({progress_percent:.1f}%)").format (processed_posts =0 ,total_posts =self .total_files_for_retry ,progress_percent =0.0 ).replace ("posts","files")) + self .cancellation_event .clear () + + num_retry_threads =1 + try : + num_threads_from_gui =int (self .thread_count_input .text ().strip ()) + num_retry_threads =max (1 ,min (num_threads_from_gui ,MAX_FILE_THREADS_PER_POST_OR_WORKER ,self .total_files_for_retry if self .total_files_for_retry >0 else 1 )) + except ValueError : + num_retry_threads =1 + + self .retry_thread_pool =ThreadPoolExecutor (max_workers =num_retry_threads ,thread_name_prefix ='RetryFile_') + common_ppw_args_for_retry ={ + 'download_root':self .dir_input .text ().strip (), + 'known_names':list (KNOWN_NAMES ), + 'emitter':self .worker_to_gui_queue , + 'unwanted_keywords':{'spicy','hd','nsfw','4k','preview','teaser','clip'}, + 'filter_mode':self .get_filter_mode (), + 'skip_zip':self .skip_zip_checkbox .isChecked (), + 'skip_rar':self .skip_rar_checkbox .isChecked (), + 'use_subfolders':self .use_subfolders_checkbox .isChecked (), + 'use_post_subfolders':self .use_subfolder_per_post_checkbox .isChecked (), + 'compress_images':self .compress_images_checkbox .isChecked (), + 'download_thumbnails':self .download_thumbnails_checkbox .isChecked (), + 'pause_event':self .pause_event , + 'cancellation_event':self .cancellation_event , + 'downloaded_files':self .downloaded_files , + 'downloaded_file_hashes':self .downloaded_file_hashes , + 'downloaded_files_lock':self .downloaded_files_lock , + 'downloaded_file_hashes_lock':self .downloaded_file_hashes_lock , + 'skip_words_list':[word .strip ().lower ()for word in self .skip_words_input .text ().strip ().split (',')if word .strip ()], + 'skip_words_scope':self .get_skip_words_scope (), + 'char_filter_scope':self .get_char_filter_scope (), + 'remove_from_filename_words_list':[word .strip ()for word in self .remove_from_filename_input .text ().strip ().split (',')if word .strip ()]if hasattr (self ,'remove_from_filename_input')else [], + 'allow_multipart_download':self .allow_multipart_download_setting , + 'filter_character_list':None , + 'dynamic_character_filter_holder':None , + 'target_post_id_from_initial_url':None , + 'custom_folder_name':None , + 'num_file_threads':1 , + 'manga_date_file_counter_ref':None , + } + + for job_details in self .files_for_current_retry_session : + future =self .retry_thread_pool .submit (self ._execute_single_file_retry ,job_details ,common_ppw_args_for_retry ) + future .add_done_callback (self ._handle_retry_future_result ) + self .active_retry_futures_map [future ]=job_details + self .active_retry_futures .append (future ) + + def _execute_single_file_retry (self ,job_details ,common_args ): + """Executes a single file download retry attempt.""" + dummy_post_data ={'id':job_details ['original_post_id_for_log'],'title':job_details ['post_title']} + + ppw_init_args ={ + **common_args , + 'post_data':dummy_post_data , + 'service':job_details .get ('service','unknown_service'), + 'user_id':job_details .get ('user_id','unknown_user'), + 'api_url_input':job_details .get ('api_url_input',''), + 'manga_mode_active':job_details .get ('manga_mode_active_for_file',False ), + 'manga_filename_style':job_details .get ('manga_filename_style_for_file',STYLE_POST_TITLE ), + 'scan_content_for_images':common_args .get ('scan_content_for_images',False ), + 'use_cookie':common_args .get ('use_cookie',False ), + 'cookie_text':common_args .get ('cookie_text',""), + 'selected_cookie_file':common_args .get ('selected_cookie_file',None ), + 'app_base_dir':common_args .get ('app_base_dir',None ), + } + worker =PostProcessorWorker (**ppw_init_args ) + + dl_count ,skip_count ,filename_saved ,original_kept ,status ,_ =worker ._download_single_file ( + file_info =job_details ['file_info'], + target_folder_path =job_details ['target_folder_path'], + headers =job_details ['headers'], + original_post_id_for_log =job_details ['original_post_id_for_log'], + skip_event =None , + post_title =job_details ['post_title'], + file_index_in_post =job_details ['file_index_in_post'], + num_files_in_this_post =job_details ['num_files_in_this_post'], + forced_filename_override =job_details .get ('forced_filename_override') + ) + + + + is_successful_download =(status ==FILE_DOWNLOAD_STATUS_SUCCESS ) + is_resolved_as_skipped =(status ==FILE_DOWNLOAD_STATUS_SKIPPED ) + + return is_successful_download or is_resolved_as_skipped + + def _handle_retry_future_result (self ,future ): + self .processed_retry_count +=1 + was_successful =False + try : + if future .cancelled (): + self .log_signal .emit (" A retry task was cancelled.") + elif future .exception (): + self .log_signal .emit (f"❌ Retry task worker error: {future .exception ()}") + else : + was_successful =future .result () + job_details =self .active_retry_futures_map .pop (future ,None ) + if was_successful : + self .succeeded_retry_count +=1 + else : + self .failed_retry_count_in_session +=1 + if job_details : + self .permanently_failed_files_for_dialog .append (job_details ) + except Exception as e : + self .log_signal .emit (f"❌ Error in _handle_retry_future_result: {e }") + self .failed_retry_count_in_session +=1 + + progress_percent_retry =(self .processed_retry_count /self .total_files_for_retry *100 )if self .total_files_for_retry >0 else 0 + self .progress_label .setText ( + self ._tr ("progress_posts_text","Progress: {processed_posts} / {total_posts} posts ({progress_percent:.1f}%)").format (processed_posts =self .processed_retry_count ,total_posts =self .total_files_for_retry ,progress_percent =progress_percent_retry ).replace ("posts","files")+ + f" ({self ._tr ('succeeded_text','Succeeded')}: {self .succeeded_retry_count }, {self ._tr ('failed_text','Failed')}: {self .failed_retry_count_in_session })" + ) + + if self .processed_retry_count >=self .total_files_for_retry : + if all (f .done ()for f in self .active_retry_futures ): + QTimer .singleShot (0 ,self ._retry_session_finished ) + + + def _retry_session_finished (self ): + self .log_signal .emit ("🏁 Retry session finished.") + self .log_signal .emit (f" Summary: {self .succeeded_retry_count } Succeeded, {self .failed_retry_count_in_session } Failed.") + + if self .retry_thread_pool : + self .retry_thread_pool .shutdown (wait =True ) + self .retry_thread_pool =None + + if self .external_link_download_thread and not self .external_link_download_thread .isRunning (): + self .external_link_download_thread .deleteLater () + self .external_link_download_thread =None + + self .active_retry_futures .clear () + self .active_retry_futures_map .clear () + self .files_for_current_retry_session .clear () + + if self .permanently_failed_files_for_dialog : + self .log_signal .emit (f"🆘 {self ._tr ('error_button_text','Error')} button enabled. {len (self .permanently_failed_files_for_dialog )} file(s) ultimately failed and can be viewed.") + + self .set_ui_enabled (not self ._is_download_active ()) + if self .cancel_btn :self .cancel_btn .setText (self ._tr ("cancel_button_text","❌ Cancel & Reset UI")) + self .progress_label .setText ( + f"{self ._tr ('retry_finished_text','Retry Finished')}. " + f"{self ._tr ('succeeded_text','Succeeded')}: {self .succeeded_retry_count }, " + f"{self ._tr ('failed_text','Failed')}: {self .failed_retry_count_in_session }. " + f"{self ._tr ('ready_for_new_task_text','Ready for new task.')}") + self .file_progress_label .setText ("") + if self .pause_event :self .pause_event .clear () + self .is_paused =False + + def toggle_active_log_view (self ): + if self .current_log_view =='progress': + self .current_log_view ='missed_character' + if self .log_view_stack :self .log_view_stack .setCurrentIndex (1 ) + if self .log_verbosity_toggle_button : + self .log_verbosity_toggle_button .setText (self .CLOSED_EYE_ICON ) + self .log_verbosity_toggle_button .setToolTip ("Current View: Missed Character Log. Click to switch to Progress Log.") + if self .progress_log_label :self .progress_log_label .setText (self ._tr ("missed_character_log_label_text","🚫 Missed Character Log:")) + else : + self .current_log_view ='progress' + if self .log_view_stack :self .log_view_stack .setCurrentIndex (0 ) + if self .log_verbosity_toggle_button : + self .log_verbosity_toggle_button .setText (self .EYE_ICON ) + self .log_verbosity_toggle_button .setToolTip ("Current View: Progress Log. Click to switch to Missed Character Log.") + if self .progress_log_label :self .progress_log_label .setText (self ._tr ("progress_log_label_text","📜 Progress Log:")) + + def reset_application_state(self): + # --- Stop all background tasks and threads --- + if self._is_download_active(): + # Try to cancel download thread + if self.download_thread and self.download_thread.isRunning(): + self.log_signal.emit("⚠️ Cancelling active download thread for reset...") + self.cancellation_event.set() + self.download_thread.requestInterruption() + self.download_thread.wait(3000) + if self.download_thread.isRunning(): + self.log_signal.emit(" ⚠️ Download thread did not terminate gracefully.") + self.download_thread.deleteLater() + self.download_thread = None + + # Try to cancel thread pool + if self.thread_pool: + self.log_signal.emit(" Shutting down thread pool for reset...") + self.thread_pool.shutdown(wait=True, cancel_futures=True) + self.thread_pool = None + self.active_futures = [] + + # Try to cancel external link download thread + if self.external_link_download_thread and self.external_link_download_thread.isRunning(): + self.log_signal.emit(" Cancelling external link download thread for reset...") + self.external_link_download_thread.cancel() + self.external_link_download_thread.wait(3000) + self.external_link_download_thread.deleteLater() + self.external_link_download_thread = None + + # Try to cancel retry thread pool + if hasattr(self, 'retry_thread_pool') and self.retry_thread_pool: + self.log_signal.emit(" Shutting down retry thread pool for reset...") + self.retry_thread_pool.shutdown(wait=True) + self.retry_thread_pool = None + if hasattr(self, 'active_retry_futures'): + self.active_retry_futures.clear() + if hasattr(self, 'active_retry_futures_map'): + self.active_retry_futures_map.clear() + + self.cancellation_event.clear() + if self.pause_event: + self.pause_event.clear() + self.is_paused = False + + # --- Reset UI and all state --- + self.log_signal.emit("🔄 Resetting application state to defaults...") + self._reset_ui_to_defaults() + self._load_saved_download_location() + self.main_log_output.clear() + self.external_log_output.clear() + if self.missed_character_log_output: + self.missed_character_log_output.clear() + + self.current_log_view = 'progress' + if self.log_view_stack: + self.log_view_stack.setCurrentIndex(0) + if self.progress_log_label: + self.progress_log_label.setText(self._tr("progress_log_label_text", "📜 Progress Log:")) + if self.log_verbosity_toggle_button: + self.log_verbosity_toggle_button.setText(self.EYE_ICON) + self.log_verbosity_toggle_button.setToolTip("Current View: Progress Log. Click to switch to Missed Character Log.") + + # Clear all download-related state + self.external_link_queue.clear() + self.extracted_links_cache = [] + self._is_processing_external_link_queue = False + self._current_link_post_title = None + self.progress_label.setText(self._tr("progress_idle_text", "Progress: Idle")) + self.file_progress_label.setText("") + with self.downloaded_files_lock: + self.downloaded_files.clear() + with self.downloaded_file_hashes_lock: + self.downloaded_file_hashes.clear() + self.missed_title_key_terms_count.clear() + self.missed_title_key_terms_examples.clear() + self.logged_summary_for_key_term.clear() + self.already_logged_bold_key_terms.clear() + self.missed_key_terms_buffer.clear() + self.favorite_download_queue.clear() + self.only_links_log_display_mode = LOG_DISPLAY_LINKS + self.mega_download_log_preserved_once = False + self.permanently_failed_files_for_dialog.clear() + self._update_error_button_count() + self.favorite_download_scope = FAVORITE_SCOPE_SELECTED_LOCATION + self._update_favorite_scope_button_text() + self.retryable_failed_files_info.clear() + self.cancellation_message_logged_this_session = False + self.is_processing_favorites_queue = False + self.total_posts_to_process = 0 + self.processed_posts_count = 0 + self.download_counter = 0 + self.skip_counter = 0 + self.all_kept_original_filenames = [] + self.is_paused = False + self.is_fetcher_thread_running = False + self.interrupted_session_data = None + self.is_restore_pending = False + + self.settings.setValue(MANGA_FILENAME_STYLE_KEY, self.manga_filename_style) + self.settings.setValue(SKIP_WORDS_SCOPE_KEY, self.skip_words_scope) + self.settings.sync() + self._update_manga_filename_style_button_text() + self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False) + + self.set_ui_enabled(True) + self.log_signal.emit("✅ Application fully reset. Ready for new download.") + self.is_processing_favorites_queue = False + self.current_processing_favorite_item_info = None + self.favorite_download_queue.clear() + self.interrupted_session_data = None + self.is_restore_pending = False + self.last_link_input_text_for_queue_sync = "" + # Replace your current reset_application_state with the above. + + def _reset_ui_to_defaults(self): + """Resets all UI elements and relevant state to their default values.""" + # Clear all text fields + self.link_input.clear() + self.custom_folder_input.clear() + self.character_input.clear() + self.skip_words_input.clear() + self.start_page_input.clear() + self.end_page_input.clear() + self.new_char_input.clear() + if hasattr(self, 'remove_from_filename_input'): + self.remove_from_filename_input.clear() + self.character_search_input.clear() + self.thread_count_input.setText("4") + if hasattr(self, 'manga_date_prefix_input'): + self.manga_date_prefix_input.clear() + + # Set radio buttons and checkboxes to defaults + self.radio_all.setChecked(True) + self.skip_zip_checkbox.setChecked(True) + self.skip_rar_checkbox.setChecked(True) + self.download_thumbnails_checkbox.setChecked(False) + self.compress_images_checkbox.setChecked(False) + self.use_subfolders_checkbox.setChecked(True) + self.use_subfolder_per_post_checkbox.setChecked(False) + self.use_multithreading_checkbox.setChecked(True) + if self.favorite_mode_checkbox: + self.favorite_mode_checkbox.setChecked(False) + self.external_links_checkbox.setChecked(False) + if self.manga_mode_checkbox: + self.manga_mode_checkbox.setChecked(False) + if hasattr(self, 'use_cookie_checkbox'): + self.use_cookie_checkbox.setChecked(False) + self.selected_cookie_filepath = None + if hasattr(self, 'cookie_text_input'): + self.cookie_text_input.clear() + + # Reset log and progress displays + if self.main_log_output: + self.main_log_output.clear() + if self.external_log_output: + self.external_log_output.clear() + if self.missed_character_log_output: + self.missed_character_log_output.clear() + self.progress_label.setText(self._tr("progress_idle_text", "Progress: Idle")) + self.file_progress_label.setText("") + + # Reset internal state + self.missed_title_key_terms_count.clear() + self.missed_title_key_terms_examples.clear() + self.logged_summary_for_key_term.clear() + self.already_logged_bold_key_terms.clear() + self.missed_key_terms_buffer.clear() + self.permanently_failed_files_for_dialog.clear() + self.only_links_log_display_mode = LOG_DISPLAY_LINKS + self.cancellation_message_logged_this_session = False + self.mega_download_log_preserved_once = False + self.allow_multipart_download_setting = False + self.skip_words_scope = SKIP_SCOPE_POSTS + self.char_filter_scope = CHAR_SCOPE_TITLE + self.manga_filename_style = STYLE_POST_TITLE + self.favorite_download_scope = FAVORITE_SCOPE_SELECTED_LOCATION + self._update_skip_scope_button_text() + self._update_char_filter_scope_button_text() + self._update_manga_filename_style_button_text() + self._update_multipart_toggle_button_text() + self._update_favorite_scope_button_text() + self.current_log_view = 'progress' + self.is_paused = False + if self.pause_event: + self.pause_event.clear() + + # Reset extracted/external links state + self.external_link_queue.clear() + self.extracted_links_cache = [] + self._is_processing_external_link_queue = False + self._current_link_post_title = None + if self.download_extracted_links_button: + self.download_extracted_links_button.setEnabled(False) + + # Reset favorite/queue/session state + self.favorite_download_queue.clear() + self.is_processing_favorites_queue = False + self.current_processing_favorite_item_info = None + self.interrupted_session_data = None + self.is_restore_pending = False + self.last_link_input_text_for_queue_sync = "" + self._update_button_states_and_connections() + # Reset counters and progress + self.total_posts_to_process = 0 + self.processed_posts_count = 0 + self.download_counter = 0 + self.skip_counter = 0 + self.all_kept_original_filenames = [] + + # Reset log view and UI state + if self.log_view_stack: + self.log_view_stack.setCurrentIndex(0) + if self.progress_log_label: + self.progress_log_label.setText(self._tr("progress_log_label_text", "📜 Progress Log:")) + if self.log_verbosity_toggle_button: + self.log_verbosity_toggle_button.setText(self.EYE_ICON) + self.log_verbosity_toggle_button.setToolTip("Current View: Progress Log. Click to switch to Missed Character Log.") + + # Reset character list filter + self.filter_character_list("") + + # Update UI for manga mode and multithreading + self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked()) + self.update_ui_for_manga_mode(False) + self.update_custom_folder_visibility(self.link_input.text()) + self.update_page_range_enabled_state() + self._update_cookie_input_visibility(False) + self._update_cookie_input_placeholders_and_tooltips() + + # Reset button states + self.download_btn.setEnabled(True) + self.cancel_btn.setEnabled(False) + if self.reset_button: + self.reset_button.setEnabled(True) + self.reset_button.setText(self._tr("reset_button_text", "🔄 Reset")) + self.reset_button.setToolTip(self._tr("reset_button_tooltip", "Reset all inputs and logs to default state (only when idle).")) + + # Reset favorite mode UI + if hasattr(self, 'favorite_mode_checkbox'): + self._handle_favorite_mode_toggle(False) + if hasattr(self, 'scan_content_images_checkbox'): + self.scan_content_images_checkbox.setChecked(False) + if hasattr(self, 'download_thumbnails_checkbox'): + self._handle_thumbnail_mode_change(self.download_thumbnails_checkbox.isChecked()) + + self.set_ui_enabled(True) + self.log_signal.emit("✅ UI reset to defaults. Ready for new operation.") + self._update_button_states_and_connections() + + def _show_feature_guide (self ): + steps_content_keys =[ + ("help_guide_step1_title","help_guide_step1_content"), + ("help_guide_step2_title","help_guide_step2_content"), + ("help_guide_step3_title","help_guide_step3_content"), + ("help_guide_step4_title","help_guide_step4_content"), + ("help_guide_step5_title","help_guide_step5_content"), + ("help_guide_step6_title","help_guide_step6_content"), + ("help_guide_step7_title","help_guide_step7_content"), + ("help_guide_step8_title","help_guide_step8_content"), + ("help_guide_step9_title","help_guide_step9_content"), + ("column_header_post_title","Post Title"), + ("column_header_date_uploaded","Date Uploaded"), + ] + + steps =[ + ] + for title_key ,content_key in steps_content_keys : + title =self ._tr (title_key ,title_key ) + content =self ._tr (content_key ,f"Content for {content_key } not found.") + steps .append ((title ,content )) + + guide_dialog =HelpGuideDialog (steps ,self ) + guide_dialog .exec_ () + + def prompt_add_character (self ,character_name ): + global KNOWN_NAMES + reply =QMessageBox .question (self ,"Add Filter Name to Known List?",f"The name '{character_name }' was encountered or used as a filter.\nIt's not in your known names list (used for folder suggestions).\nAdd it now?",QMessageBox .Yes |QMessageBox .No ,QMessageBox .Yes ) + result =(reply ==QMessageBox .Yes ) + if result : + if self .add_new_character (name_to_add =character_name , + is_group_to_add =False , + aliases_to_add =[character_name ], + suppress_similarity_prompt =False ): + self .log_signal .emit (f"✅ Added '{character_name }' to known names via background prompt.") + else :result =False ;self .log_signal .emit (f"ℹ️ Adding '{character_name }' via background prompt was declined, failed, or a similar name conflict was not overridden.") + self .character_prompt_response_signal .emit (result ) + + def receive_add_character_result (self ,result ): + with QMutexLocker (self .prompt_mutex ):self ._add_character_response =result + self .log_signal .emit (f" Main thread received character prompt response: {'Action resulted in addition/confirmation'if result else 'Action resulted in no addition/declined'}") + + def _update_multipart_toggle_button_text (self ): + if hasattr (self ,'multipart_toggle_button'): + if self .allow_multipart_download_setting : + self .multipart_toggle_button .setText (self ._tr ("multipart_on_button_text","Multi-part: ON")) + self .multipart_toggle_button .setToolTip (self ._tr ("multipart_on_button_tooltip","Tooltip for multipart ON")) + else : + self .multipart_toggle_button .setText (self ._tr ("multipart_off_button_text","Multi-part: OFF")) + self .multipart_toggle_button .setToolTip (self ._tr ("multipart_off_button_tooltip","Tooltip for multipart OFF")) + + def _update_error_button_count(self): + """Updates the Error button text to show the count of failed files.""" + if not hasattr(self, 'error_btn'): + return + + count = len(self.permanently_failed_files_for_dialog) + base_text = self._tr("error_button_text", "Error") + + if count > 0: + self.error_btn.setText(f"({count}) {base_text}") + else: + self.error_btn.setText(base_text) + + def _toggle_multipart_mode (self ): + if not self .allow_multipart_download_setting : + msg_box =QMessageBox (self ) + msg_box .setIcon (QMessageBox .Warning ) + msg_box .setWindowTitle ("Multi-part Download Advisory") + msg_box .setText ( + "Multi-part download advisory:

" + "
" + "Do you want to enable multi-part download?" + ) + proceed_button =msg_box .addButton ("Proceed Anyway",QMessageBox .AcceptRole ) + cancel_button =msg_box .addButton ("Cancel",QMessageBox .RejectRole ) + msg_box .setDefaultButton (proceed_button ) + msg_box .exec_ () + + if msg_box .clickedButton ()==cancel_button : + self .log_signal .emit ("ℹ️ Multi-part download enabling cancelled by user.") + return + + self .allow_multipart_download_setting =not self .allow_multipart_download_setting + self ._update_multipart_toggle_button_text () + self .settings .setValue (ALLOW_MULTIPART_DOWNLOAD_KEY ,self .allow_multipart_download_setting ) + self .log_signal .emit (f"ℹ️ Multi-part download set to: {'Enabled'if self .allow_multipart_download_setting else 'Disabled'}") + + def _open_known_txt_file (self ): + if not os .path .exists (self .config_file ): + QMessageBox .warning (self ,"File Not Found", + f"The file 'Known.txt' was not found at:\n{self .config_file }\n\n" + "It will be created automatically when you add a known name or close the application.") + self .log_signal .emit (f"ℹ️ 'Known.txt' not found at {self .config_file }. It will be created later.") + return + + try : + if sys .platform =="win32": + os .startfile (self .config_file ) + elif sys .platform =="darwin": + subprocess .call (['open',self .config_file ]) + else : + subprocess .call (['xdg-open',self .config_file ]) + self .log_signal .emit (f"ℹ️ Attempted to open '{os .path .basename (self .config_file )}' with the default editor.") + except FileNotFoundError : + QMessageBox .critical (self ,"Error",f"Could not find '{os .path .basename (self .config_file )}' at {self .config_file } to open it.") + self .log_signal .emit (f"❌ Error: '{os .path .basename (self .config_file )}' not found at {self .config_file } when trying to open.") + except Exception as e : + QMessageBox .critical (self ,"Error Opening File",f"Could not open '{os .path .basename (self .config_file )}':\n{e }") + self .log_signal .emit (f"❌ Error opening '{os .path .basename (self .config_file )}': {e }") + + def _show_add_to_filter_dialog (self ): + global KNOWN_NAMES + if not KNOWN_NAMES : + QMessageBox .information (self ,"No Known Names","Your 'Known.txt' list is empty. Add some names first.") + return + + dialog = KnownNamesFilterDialog(KNOWN_NAMES, self) + if dialog .exec_ ()==QDialog .Accepted : + selected_entries =dialog .get_selected_entries () + if selected_entries : + self ._add_names_to_character_filter_input (selected_entries ) + + def _add_names_to_character_filter_input (self ,selected_entries ): + """ + Adds the selected known name entries to the character filter input field. + """ + if not selected_entries : + return + + names_to_add_str_list =[] + for entry in selected_entries : + if entry .get ("is_group"): + aliases_str =", ".join (entry .get ("aliases",[])) + names_to_add_str_list .append (f"({aliases_str })~") + else : + names_to_add_str_list .append (entry .get ("name","")) + + names_to_add_str_list =[s for s in names_to_add_str_list if s ] + + if not names_to_add_str_list : + return + + current_filter_text =self .character_input .text ().strip () + new_text_to_append =", ".join (names_to_add_str_list ) + + self .character_input .setText (f"{current_filter_text }, {new_text_to_append }"if current_filter_text else new_text_to_append ) + self .log_signal .emit (f"ℹ️ Added to character filter: {new_text_to_append }") + + def _update_favorite_scope_button_text (self ): + if not hasattr (self ,'favorite_scope_toggle_button')or not self .favorite_scope_toggle_button : + return + if self .favorite_download_scope ==FAVORITE_SCOPE_SELECTED_LOCATION : + self .favorite_scope_toggle_button .setText (self ._tr ("favorite_scope_selected_location_text","Scope: Selected Location")) + + elif self .favorite_download_scope ==FAVORITE_SCOPE_ARTIST_FOLDERS : + self .favorite_scope_toggle_button .setText (self ._tr ("favorite_scope_artist_folders_text","Scope: Artist Folders")) + + else : + self .favorite_scope_toggle_button .setText (self ._tr ("favorite_scope_unknown_text","Scope: Unknown")) + + + def _cycle_favorite_scope (self ): + if self .favorite_download_scope ==FAVORITE_SCOPE_SELECTED_LOCATION : + self .favorite_download_scope =FAVORITE_SCOPE_ARTIST_FOLDERS + else : + self .favorite_download_scope =FAVORITE_SCOPE_SELECTED_LOCATION + self ._update_favorite_scope_button_text () + self .log_signal .emit (f"ℹ️ Favorite download scope changed to: '{self .favorite_download_scope }'") + + def _show_empty_popup (self ): + """Creates and shows the empty popup dialog.""" + if self.is_restore_pending: + QMessageBox.information(self, self._tr("restore_pending_title", "Restore Pending"), + self._tr("restore_pending_message_creator_selection", + "Please 'Restore Download' or 'Discard Session' before selecting new creators.")) + return + + # Correctly create the dialog instance + dialog = EmptyPopupDialog(self.app_base_dir, self) + if dialog.exec_() == QDialog.Accepted: + if hasattr(dialog, 'selected_creators_for_queue') and dialog.selected_creators_for_queue: + self.favorite_download_queue.clear() + + for creator_data in dialog.selected_creators_for_queue: + service = creator_data.get('service') + creator_id = creator_data.get('id') + creator_name = creator_data.get('name', 'Unknown Creator') + domain = dialog._get_domain_for_service(service) + + if service and creator_id: + url = f"https://{domain}/{service}/user/{creator_id}" + queue_item = { + 'url': url, + 'name': creator_name, + 'name_for_folder': creator_name, + 'type': 'creator_popup_selection', + 'scope_from_popup': dialog.current_scope_mode + } + self.favorite_download_queue.append(queue_item) + + if self.favorite_download_queue: + # --- NEW: This block adds the selected creator names to the input field --- + if hasattr(self, 'link_input'): + # 1. Get all the names from the queue + creator_names = [item['name'] for item in self.favorite_download_queue] + # 2. Join them into a single string + display_text = ", ".join(creator_names) + # 3. Set the text of the URL input field + self.link_input.setText(display_text) + + self.log_signal.emit(f"ℹ️ {len(self.favorite_download_queue)} creators added to download queue from popup. Click 'Start Download' to process.") + if hasattr(self, 'link_input'): + self.last_link_input_text_for_queue_sync = self.link_input.text() + + def _show_favorite_artists_dialog (self ): + if self ._is_download_active ()or self .is_processing_favorites_queue : + QMessageBox .warning (self ,"Busy","Another download operation is already in progress.") + return + + cookies_config ={ + 'use_cookie':self .use_cookie_checkbox .isChecked ()if hasattr (self ,'use_cookie_checkbox')else False , + 'cookie_text':self .cookie_text_input .text ()if hasattr (self ,'cookie_text_input')else "", + 'selected_cookie_file':self .selected_cookie_filepath , + 'app_base_dir':self .app_base_dir + } + + dialog =FavoriteArtistsDialog (self ,cookies_config ) + if dialog .exec_ ()==QDialog .Accepted : + selected_artists =dialog .get_selected_artists () + if selected_artists : + if len (selected_artists )>1 and self .link_input : + display_names =", ".join ([artist ['name']for artist in selected_artists ]) + if self .link_input : + self .link_input .clear () + self .link_input .setPlaceholderText (f"{len (selected_artists )} favorite artists selected for download queue.") + self .log_signal .emit (f"ℹ️ Multiple favorite artists selected. Displaying names: {display_names }") + elif len (selected_artists )==1 : + self .link_input .setText (selected_artists [0 ]['url']) + self .log_signal .emit (f"ℹ️ Single favorite artist selected: {selected_artists [0 ]['name']}") + + self .log_signal .emit (f"ℹ️ Queuing {len (selected_artists )} favorite artist(s) for download.") + for artist_data in selected_artists : + self .favorite_download_queue .append ({'url':artist_data ['url'],'name':artist_data ['name'],'name_for_folder':artist_data ['name'],'type':'artist'}) + + if not self .is_processing_favorites_queue : + self ._process_next_favorite_download () + else : + self .log_signal .emit ("ℹ️ No favorite artists were selected for download.") + QMessageBox .information (self , + self ._tr ("fav_artists_no_selection_title","No Selection"), + self ._tr ("fav_artists_no_selection_message","Please select at least one artist to download.")) + else : + self .log_signal .emit ("ℹ️ Favorite artists selection cancelled.") + + def _show_favorite_posts_dialog (self ): + if self ._is_download_active ()or self .is_processing_favorites_queue : + QMessageBox .warning (self ,"Busy","Another download operation is already in progress.") + return + + cookies_config ={ + 'use_cookie':self .use_cookie_checkbox .isChecked ()if hasattr (self ,'use_cookie_checkbox')else False , + 'cookie_text':self .cookie_text_input .text ()if hasattr (self ,'cookie_text_input')else "", + 'selected_cookie_file':self .selected_cookie_filepath , + 'app_base_dir':self .app_base_dir + } + global KNOWN_NAMES + + target_domain_preference_for_fetch =None + + if cookies_config ['use_cookie']: + self .log_signal .emit ("Favorite Posts: 'Use Cookie' is checked. Determining target domain...") + kemono_cookies =prepare_cookies_for_request ( + cookies_config ['use_cookie'], + cookies_config ['cookie_text'], + cookies_config ['selected_cookie_file'], + cookies_config ['app_base_dir'], + lambda msg :self .log_signal .emit (f"[FavPosts Cookie Check - Kemono] {msg }"), + target_domain ="kemono.su" + ) + coomer_cookies =prepare_cookies_for_request ( + cookies_config ['use_cookie'], + cookies_config ['cookie_text'], + cookies_config ['selected_cookie_file'], + cookies_config ['app_base_dir'], + lambda msg :self .log_signal .emit (f"[FavPosts Cookie Check - Coomer] {msg }"), + target_domain ="coomer.su" + ) + + kemono_ok =bool (kemono_cookies ) + coomer_ok =bool (coomer_cookies ) + + if kemono_ok and not coomer_ok : + target_domain_preference_for_fetch ="kemono.su" + self .log_signal .emit (" ↳ Only Kemono.su cookies loaded. Will fetch favorites from Kemono.su only.") + elif coomer_ok and not kemono_ok : + target_domain_preference_for_fetch ="coomer.su" + self .log_signal .emit (" ↳ Only Coomer.su cookies loaded. Will fetch favorites from Coomer.su only.") + elif kemono_ok and coomer_ok : + target_domain_preference_for_fetch =None + self .log_signal .emit (" ↳ Cookies for both Kemono.su and Coomer.su loaded. Will attempt to fetch from both.") + else : + self .log_signal .emit (" ↳ No valid cookies loaded for Kemono.su or Coomer.su.") + cookie_help_dialog =CookieHelpDialog (self ,self ) + cookie_help_dialog .exec_ () + return + else : + self .log_signal .emit ("Favorite Posts: 'Use Cookie' is NOT checked. Cookies are required.") + cookie_help_dialog =CookieHelpDialog (self ,self ) + cookie_help_dialog .exec_ () + return + + dialog =FavoritePostsDialog (self ,cookies_config ,KNOWN_NAMES ,target_domain_preference_for_fetch ) + if dialog .exec_ ()==QDialog .Accepted : + selected_posts =dialog .get_selected_posts () + if selected_posts : + self .log_signal .emit (f"ℹ️ Queuing {len (selected_posts )} favorite post(s) for download.") + for post_data in selected_posts : + domain =self ._get_domain_for_service (post_data ['service']) + direct_post_url =f"https://{domain }/{post_data ['service']}/user/{str (post_data ['creator_id'])}/post/{str (post_data ['post_id'])}" + + queue_item ={ + 'url':direct_post_url , + 'name':post_data ['title'], + 'name_for_folder':post_data ['creator_name_resolved'], + 'type':'post' + } + self .favorite_download_queue .append (queue_item ) + + if not self .is_processing_favorites_queue : + self ._process_next_favorite_download () + else : + self .log_signal .emit ("ℹ️ No favorite posts were selected for download.") + else : + self .log_signal .emit ("ℹ️ Favorite posts selection cancelled.") + + def _process_next_favorite_download (self ): + + if self.favorite_download_queue and not self.is_processing_favorites_queue: + manga_mode_is_checked = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False + char_filter_is_empty = not self.character_input.text().strip() + extract_links_only = (self.radio_only_links and self.radio_only_links.isChecked()) + + if manga_mode_is_checked and char_filter_is_empty and not extract_links_only: + msg_box = QMessageBox(self) + msg_box.setIcon(QMessageBox.Warning) + msg_box.setWindowTitle("Manga Mode Filter Warning") + msg_box.setText( + "Manga Mode is enabled, but 'Filter by Character(s)' is empty.\n\n" + "This is a one-time warning for this entire batch of downloads.\n\n" + "Proceeding without a filter may result in generic filenames and folders.\n\n" + "Proceed with the entire batch?" + ) + proceed_button = msg_box.addButton("Proceed Anyway", QMessageBox.AcceptRole) + cancel_button = msg_box.addButton("Cancel Entire Batch", QMessageBox.RejectRole) + msg_box.exec_() + if msg_box.clickedButton() == cancel_button: + self.log_signal.emit("❌ Entire favorite queue cancelled by user at Manga Mode warning.") + self.favorite_download_queue.clear() + self.is_processing_favorites_queue = False + self.set_ui_enabled(True) + return # Stop processing the queue + + if self ._is_download_active (): + self .log_signal .emit ("ℹ️ Waiting for current download to finish before starting next favorite.") + return + if not self .favorite_download_queue : + if self .is_processing_favorites_queue : + self .is_processing_favorites_queue =False + item_type_log ="item" + if hasattr (self ,'current_processing_favorite_item_info')and self .current_processing_favorite_item_info : + item_type_log =self .current_processing_favorite_item_info .get ('type','item') + self .log_signal .emit (f"✅ All {item_type_log } downloads from favorite queue have been processed.") + self .set_ui_enabled (True ) + return + if not self .is_processing_favorites_queue : + self .is_processing_favorites_queue =True + self .current_processing_favorite_item_info =self .favorite_download_queue .popleft () + next_url =self .current_processing_favorite_item_info ['url'] + item_display_name =self .current_processing_favorite_item_info .get ('name','Unknown Item') + + item_type =self .current_processing_favorite_item_info .get ('type','artist') + self .log_signal .emit (f"▶️ Processing next favorite from queue: '{item_display_name }' ({next_url })") + + override_dir =None + item_scope =self .current_processing_favorite_item_info .get ('scope_from_popup') + if item_scope is None : + item_scope =self .favorite_download_scope + + main_download_dir =self .dir_input .text ().strip () + + should_create_artist_folder =False + if item_type =='creator_popup_selection'and item_scope ==EmptyPopupDialog .SCOPE_CREATORS : + should_create_artist_folder =True + elif item_type !='creator_popup_selection'and self .favorite_download_scope ==FAVORITE_SCOPE_ARTIST_FOLDERS : + should_create_artist_folder =True + + if should_create_artist_folder and main_download_dir : + folder_name_key =self .current_processing_favorite_item_info .get ('name_for_folder','Unknown_Folder') + item_specific_folder_name =clean_folder_name (folder_name_key ) + override_dir =os .path .normpath (os .path .join (main_download_dir ,item_specific_folder_name )) + self .log_signal .emit (f" Scope requires artist folder. Target directory: '{override_dir }'") + + success_starting_download =self .start_download (direct_api_url =next_url ,override_output_dir =override_dir ) + + if not success_starting_download : + self .log_signal .emit (f"⚠️ Failed to initiate download for '{item_display_name }'. Skipping this item in queue.") + self .download_finished (total_downloaded =0 ,total_skipped =1 ,cancelled_by_user =True ,kept_original_names_list =[]) + +class ExternalLinkDownloadThread (QThread ): + """A QThread to handle downloading multiple external links sequentially.""" + progress_signal =pyqtSignal (str ) + file_complete_signal =pyqtSignal (str ,bool ) + finished_signal =pyqtSignal () + + def __init__ (self ,tasks_to_download ,download_base_path ,parent_logger_func ,parent =None ): + super ().__init__ (parent ) + self .tasks =tasks_to_download + self .download_base_path =download_base_path + self .parent_logger_func =parent_logger_func + self .is_cancelled =False + + def run (self ): + self .progress_signal .emit (f"ℹ️ Starting external link download thread for {len (self .tasks )} link(s).") + for i ,task_info in enumerate (self .tasks ): + if self .is_cancelled : + self .progress_signal .emit ("External link download cancelled by user.") + break + + platform =task_info .get ('platform','unknown').lower () + full_mega_url =task_info ['url'] + post_title =task_info ['title'] + key =task_info .get ('key','') + + self .progress_signal .emit (f"Download ({i +1 }/{len (self .tasks )}): Starting '{post_title }' ({platform .upper ()}) from {full_mega_url }") + + try : + if platform =='mega': + + if key : + parsed_original_url =urlparse (full_mega_url ) + if key not in parsed_original_url .fragment : + base_url_no_fragment =full_mega_url .split ('#')[0 ] + full_mega_url_with_key =f"{base_url_no_fragment }#{key }" + self .progress_signal .emit (f" Adjusted Mega URL with key: {full_mega_url_with_key }") + else : + full_mega_url_with_key =full_mega_url + else : + full_mega_url_with_key =full_mega_url + drive_download_mega_file (full_mega_url_with_key ,self .download_base_path ,logger_func =self .parent_logger_func ) + elif platform =='google drive': + download_gdrive_file (full_mega_url ,self .download_base_path ,logger_func =self .parent_logger_func ) + elif platform =='dropbox': + download_dropbox_file (full_mega_url ,self .download_base_path ,logger_func =self .parent_logger_func ) + else : + self .progress_signal .emit (f"⚠️ Unsupported platform '{platform }' for link: {full_mega_url }") + self .file_complete_signal .emit (full_mega_url ,False ) + continue + self .file_complete_signal .emit (full_mega_url ,True ) + except Exception as e : + self .progress_signal .emit (f"❌ Error downloading ({platform .upper ()}) link '{full_mega_url }' (from post '{post_title }'): {e }") + self .file_complete_signal .emit (full_mega_url ,False ) + self .finished_signal .emit () + + def cancel (self ): + self .is_cancelled =True \ No newline at end of file diff --git a/src/core/workers.py b/src/core/workers.py index f2612c9..89bfb31 100644 --- a/src/core/workers.py +++ b/src/core/workers.py @@ -102,7 +102,6 @@ class PostProcessorWorker: keep_in_post_duplicates=False, session_file_path=None, session_lock=None, - processed_ids_to_skip=None, text_only_scope=None, text_export_format='txt', single_pdf_mode=False, @@ -160,7 +159,6 @@ class PostProcessorWorker: self.keep_in_post_duplicates = keep_in_post_duplicates self.session_file_path = session_file_path self.session_lock = session_lock - self.processed_ids_to_skip = processed_ids_to_skip self.text_only_scope = text_only_scope self.text_export_format = text_export_format self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE @@ -372,10 +370,10 @@ class PostProcessorWorker: filename_to_save_in_main_path =cleaned_original_api_filename was_original_name_kept_flag =False + + if self .remove_from_filename_words_list and filename_to_save_in_main_path : - # Store the name before this specific modification, so we can revert if it gets destroyed. - name_before_word_removal = filename_to_save_in_main_path - + base_name_for_removal ,ext_for_removal =os .path .splitext (filename_to_save_in_main_path ) modified_base_name =base_name_for_removal for word_to_remove in self .remove_from_filename_words_list : @@ -385,13 +383,12 @@ class PostProcessorWorker: modified_base_name =re .sub (r'[_.\s-]+',' ',modified_base_name ) modified_base_name =re .sub (r'\s+',' ',modified_base_name ) modified_base_name =modified_base_name .strip () - if modified_base_name and modified_base_name !=ext_for_removal .lstrip ('.'): filename_to_save_in_main_path =modified_base_name +ext_for_removal else : - # If the name was stripped to nothing, revert to the name from before this block. - self.logger(f" ⚠️ Filename was empty after removing words. Reverting to '{name_before_word_removal}'.") - filename_to_save_in_main_path = name_before_word_removal + filename_to_save_in_main_path =base_name_for_removal +ext_for_removal + + if not self .download_thumbnails : @@ -886,820 +883,876 @@ class PostProcessorWorker: if data_to_write_io and hasattr (data_to_write_io ,'close'): data_to_write_io .close () - def process(self): - # --- FIX START: This entire method is now wrapped in a try...finally block --- - # to ensure it always reports completion back to the main window. - - # Initialize result values to safe defaults for failure cases. - total_downloaded_this_post = 0 - total_skipped_this_post = 0 - kept_original_filenames_for_log = [] - retryable_failures_this_post = [] - permanent_failures_this_post = [] - history_data_for_this_post = None - temp_filepath_for_return = None + def process (self ): + if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None, None + if self .check_cancel ():return 0 ,0 ,[],[],[],None, None + current_character_filters =self ._get_current_character_filters () + kept_original_filenames_for_log =[] + retryable_failures_this_post =[] + permanent_failures_this_post =[] + total_downloaded_this_post =0 + total_skipped_this_post =0 + history_data_for_this_post =None - try: - post_id_for_skip_check = self.post.get('id') - if self.processed_ids_to_skip and post_id_for_skip_check in self.processed_ids_to_skip: - self.logger(f" -> Skipping Post {post_id_for_skip_check} (already processed in previous session).") - # We must emit 'worker_finished' so the main UI can count this as a completed (skipped) task. - num_potential_files_in_post = len(self.post.get('attachments', [])) + (1 if self.post.get('file') else 0) - total_skipped_this_post = num_potential_files_in_post - # The rest of the result tuple can be empty defaults - result_tuple = (0, total_skipped_this_post, [], [], [], None, None) - self._emit_signal('worker_finished', result_tuple) - return result_tuple + parsed_api_url =urlparse (self .api_url_input ) + referer_url =f"https://{parsed_api_url .netloc }/" + headers ={'User-Agent':'Mozilla/5.0','Referer':referer_url ,'Accept':'*/*'} + link_pattern =re .compile (r"""]*>(.*?)""", + re .IGNORECASE |re .DOTALL ) + post_data =self .post + post_title =post_data .get ('title','')or 'untitled_post' + post_id =post_data .get ('id','unknown_id') + post_main_file_info =post_data .get ('file') + post_attachments =post_data .get ('attachments',[]) - # ALL OF THE ORIGINAL LOGIC OF THE `process` METHOD GOES HERE - if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None, None - if self .check_cancel ():return 0 ,0 ,[],[],[],None, None - current_character_filters =self ._get_current_character_filters () - - parsed_api_url =urlparse (self .api_url_input ) - referer_url =f"https://{parsed_api_url .netloc }/" - headers ={'User-Agent':'Mozilla/5.0','Referer':referer_url ,'Accept':'*/*'} - link_pattern =re .compile (r"""]*>(.*?)""", - re .IGNORECASE |re .DOTALL ) - post_data =self .post - post_title =post_data .get ('title','')or 'untitled_post' - post_id =post_data .get ('id','unknown_id') - post_main_file_info =post_data .get ('file') - post_attachments =post_data .get ('attachments',[]) + effective_unwanted_keywords_for_folder_naming =self .unwanted_keywords .copy () + is_full_creator_download_no_char_filter =not self .target_post_id_from_initial_url and not current_character_filters + if is_full_creator_download_no_char_filter and self .creator_download_folder_ignore_words : + self .logger (f" Applying creator download specific folder ignore words ({len (self .creator_download_folder_ignore_words )} words).") + effective_unwanted_keywords_for_folder_naming .update (self .creator_download_folder_ignore_words ) - effective_unwanted_keywords_for_folder_naming =self .unwanted_keywords .copy () - is_full_creator_download_no_char_filter =not self .target_post_id_from_initial_url and not current_character_filters - if is_full_creator_download_no_char_filter and self .creator_download_folder_ignore_words : - self .logger (f" Applying creator download specific folder ignore words ({len (self .creator_download_folder_ignore_words )} words).") - effective_unwanted_keywords_for_folder_naming .update (self .creator_download_folder_ignore_words ) - - post_content_html =post_data .get ('content','') - self .logger (f"\n--- Processing Post {post_id } ('{post_title [:50 ]}...') (Thread: {threading .current_thread ().name }) ---") - num_potential_files_in_post =len (post_attachments or [])+(1 if post_main_file_info and post_main_file_info .get ('path')else 0 ) - post_is_candidate_by_title_char_match =False - char_filter_that_matched_title =None - post_is_candidate_by_comment_char_match =False - post_is_candidate_by_file_char_match_in_comment_scope =False - char_filter_that_matched_file_in_comment_scope =None - char_filter_that_matched_comment =None - if current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH ): - if self ._check_pause (f"Character title filter for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None - for idx ,filter_item_obj in enumerate (current_character_filters ): - if self .check_cancel ():break - terms_to_check_for_title =list (filter_item_obj ["aliases"]) - if filter_item_obj ["is_group"]: - if filter_item_obj ["name"]not in terms_to_check_for_title : - terms_to_check_for_title .append (filter_item_obj ["name"]) - unique_terms_for_title_check =list (set (terms_to_check_for_title )) - for term_to_match in unique_terms_for_title_check : - match_found_for_term =is_title_match_for_character (post_title ,term_to_match ) - if match_found_for_term : - post_is_candidate_by_title_char_match =True - char_filter_that_matched_title =filter_item_obj - self .logger (f" Post title matches char filter term '{term_to_match }' (from group/name '{filter_item_obj ['name']}', Scope: {self .char_filter_scope }). Post is candidate.") + post_content_html =post_data .get ('content','') + self .logger (f"\n--- Processing Post {post_id } ('{post_title [:50 ]}...') (Thread: {threading .current_thread ().name }) ---") + num_potential_files_in_post =len (post_attachments or [])+(1 if post_main_file_info and post_main_file_info .get ('path')else 0 ) + post_is_candidate_by_title_char_match =False + char_filter_that_matched_title =None + post_is_candidate_by_comment_char_match =False + post_is_candidate_by_file_char_match_in_comment_scope =False + char_filter_that_matched_file_in_comment_scope =None + char_filter_that_matched_comment =None + if current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH ): + if self ._check_pause (f"Character title filter for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + for idx ,filter_item_obj in enumerate (current_character_filters ): + if self .check_cancel ():break + terms_to_check_for_title =list (filter_item_obj ["aliases"]) + if filter_item_obj ["is_group"]: + if filter_item_obj ["name"]not in terms_to_check_for_title : + terms_to_check_for_title .append (filter_item_obj ["name"]) + unique_terms_for_title_check =list (set (terms_to_check_for_title )) + for term_to_match in unique_terms_for_title_check : + match_found_for_term =is_title_match_for_character (post_title ,term_to_match ) + if match_found_for_term : + post_is_candidate_by_title_char_match =True + char_filter_that_matched_title =filter_item_obj + self .logger (f" Post title matches char filter term '{term_to_match }' (from group/name '{filter_item_obj ['name']}', Scope: {self .char_filter_scope }). Post is candidate.") + break + if post_is_candidate_by_title_char_match :break + all_files_from_post_api_for_char_check =[] + api_file_domain_for_char_check =urlparse (self .api_url_input ).netloc + if not api_file_domain_for_char_check or not any (d in api_file_domain_for_char_check .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']): + api_file_domain_for_char_check ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party" + if post_main_file_info and isinstance (post_main_file_info ,dict )and post_main_file_info .get ('path'): + original_api_name =post_main_file_info .get ('name')or os .path .basename (post_main_file_info ['path'].lstrip ('/')) + if original_api_name : + all_files_from_post_api_for_char_check .append ({'_original_name_for_log':original_api_name }) + for att_info in post_attachments : + if isinstance (att_info ,dict )and att_info .get ('path'): + original_api_att_name =att_info .get ('name')or os .path .basename (att_info ['path'].lstrip ('/')) + if original_api_att_name : + all_files_from_post_api_for_char_check .append ({'_original_name_for_log':original_api_att_name }) + if current_character_filters and self .char_filter_scope ==CHAR_SCOPE_COMMENTS : + self .logger (f" [Char Scope: Comments] Phase 1: Checking post files for matches before comments for post ID '{post_id }'.") + if self ._check_pause (f"File check (comments scope) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + for file_info_item in all_files_from_post_api_for_char_check : + if self .check_cancel ():break + current_api_original_filename_for_check =file_info_item .get ('_original_name_for_log') + if not current_api_original_filename_for_check :continue + for filter_item_obj in current_character_filters : + terms_to_check =list (filter_item_obj ["aliases"]) + if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check : + terms_to_check .append (filter_item_obj ["name"]) + for term_to_match in terms_to_check : + if is_filename_match_for_character (current_api_original_filename_for_check ,term_to_match ): + post_is_candidate_by_file_char_match_in_comment_scope =True + char_filter_that_matched_file_in_comment_scope =filter_item_obj + self .logger (f" Match Found (File in Comments Scope): File '{current_api_original_filename_for_check }' matches char filter term '{term_to_match }' (from group/name '{filter_item_obj ['name']}'). Post is candidate.") break - if post_is_candidate_by_title_char_match :break - all_files_from_post_api_for_char_check =[] - api_file_domain_for_char_check =urlparse (self .api_url_input ).netloc - if not api_file_domain_for_char_check or not any (d in api_file_domain_for_char_check .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']): - api_file_domain_for_char_check ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party" - if post_main_file_info and isinstance (post_main_file_info ,dict )and post_main_file_info .get ('path'): - original_api_name =post_main_file_info .get ('name')or os .path .basename (post_main_file_info ['path'].lstrip ('/')) - if original_api_name : - all_files_from_post_api_for_char_check .append ({'_original_name_for_log':original_api_name }) - for att_info in post_attachments : - if isinstance (att_info ,dict )and att_info .get ('path'): - original_api_att_name =att_info .get ('name')or os .path .basename (att_info ['path'].lstrip ('/')) - if original_api_att_name : - all_files_from_post_api_for_char_check .append ({'_original_name_for_log':original_api_att_name }) - if current_character_filters and self .char_filter_scope ==CHAR_SCOPE_COMMENTS : - self .logger (f" [Char Scope: Comments] Phase 1: Checking post files for matches before comments for post ID '{post_id }'.") - if self ._check_pause (f"File check (comments scope) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None - for file_info_item in all_files_from_post_api_for_char_check : - if self .check_cancel ():break - current_api_original_filename_for_check =file_info_item .get ('_original_name_for_log') - if not current_api_original_filename_for_check :continue - for filter_item_obj in current_character_filters : - terms_to_check =list (filter_item_obj ["aliases"]) - if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check : - terms_to_check .append (filter_item_obj ["name"]) - for term_to_match in terms_to_check : - if is_filename_match_for_character (current_api_original_filename_for_check ,term_to_match ): - post_is_candidate_by_file_char_match_in_comment_scope =True - char_filter_that_matched_file_in_comment_scope =filter_item_obj - self .logger (f" Match Found (File in Comments Scope): File '{current_api_original_filename_for_check }' matches char filter term '{term_to_match }' (from group/name '{filter_item_obj ['name']}'). Post is candidate.") - break - if post_is_candidate_by_file_char_match_in_comment_scope :break if post_is_candidate_by_file_char_match_in_comment_scope :break - self .logger (f" [Char Scope: Comments] Phase 1 Result: post_is_candidate_by_file_char_match_in_comment_scope = {post_is_candidate_by_file_char_match_in_comment_scope }") - if current_character_filters and self .char_filter_scope ==CHAR_SCOPE_COMMENTS : - if not post_is_candidate_by_file_char_match_in_comment_scope : - if self ._check_pause (f"Comment check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None - self .logger (f" [Char Scope: Comments] Phase 2: No file match found. Checking post comments for post ID '{post_id }'.") - try : - parsed_input_url_for_comments =urlparse (self .api_url_input ) - api_domain_for_comments =parsed_input_url_for_comments .netloc - if not any (d in api_domain_for_comments .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']): - self .logger (f"⚠️ Unrecognized domain '{api_domain_for_comments }' for comment API. Defaulting based on service.") - api_domain_for_comments ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party" - comments_data =fetch_post_comments ( - api_domain_for_comments ,self .service ,self .user_id ,post_id , - headers ,self .logger ,self .cancellation_event ,self .pause_event , - cookies_dict =prepare_cookies_for_request ( - self .use_cookie ,self .cookie_text ,self .selected_cookie_file ,self .app_base_dir ,self .logger - ) - ) - if comments_data : - self .logger (f" Fetched {len (comments_data )} comments for post {post_id }.") - for comment_item_idx ,comment_item in enumerate (comments_data ): - if self .check_cancel ():break - raw_comment_content =comment_item .get ('content','') - if not raw_comment_content :continue - cleaned_comment_text =strip_html_tags (raw_comment_content ) - if not cleaned_comment_text .strip ():continue - for filter_item_obj in current_character_filters : - terms_to_check_comment =list (filter_item_obj ["aliases"]) - if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_comment : - terms_to_check_comment .append (filter_item_obj ["name"]) - for term_to_match_comment in terms_to_check_comment : - if is_title_match_for_character (cleaned_comment_text ,term_to_match_comment ): - post_is_candidate_by_comment_char_match =True - char_filter_that_matched_comment =filter_item_obj - self .logger (f" Match Found (Comment in Comments Scope): Comment in post {post_id } matches char filter term '{term_to_match_comment }' (from group/name '{filter_item_obj ['name']}'). Post is candidate.") - self .logger (f" Matching comment (first 100 chars): '{cleaned_comment_text [:100 ]}...'") - break - if post_is_candidate_by_comment_char_match :break - if post_is_candidate_by_comment_char_match :break - else : - self .logger (f" No comments found or fetched for post {post_id } to check against character filters.") - except RuntimeError as e_fetch_comment : - self .logger (f" ⚠️ Error fetching or processing comments for post {post_id }: {e_fetch_comment }") - except Exception as e_generic_comment : - self .logger (f" ❌ Unexpected error during comment processing for post {post_id }: {e_generic_comment }\n{traceback .format_exc (limit =2 )}") - self .logger (f" [Char Scope: Comments] Phase 2 Result: post_is_candidate_by_comment_char_match = {post_is_candidate_by_comment_char_match }") - else : - self .logger (f" [Char Scope: Comments] Phase 2: Skipped comment check for post ID '{post_id }' because a file match already made it a candidate.") - if current_character_filters : - if self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match : - self .logger (f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title [:50 ]}' does not match character filters.") - self ._emit_signal ('missed_character_post',post_title ,"No title match for character filter") - return 0 ,num_potential_files_in_post ,[],[],[],None, None - if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match : - self .logger (f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id }', Title '{post_title [:50 ]}...'") - if self .emitter and hasattr (self .emitter ,'missed_character_post_signal'): - self ._emit_signal ('missed_character_post',post_title ,"No character match in files or comments (Comments scope)") - return 0 ,num_potential_files_in_post ,[],[],[],None, None - if self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH ): - if self ._check_pause (f"Skip words (post title) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None - post_title_lower =post_title .lower () - for skip_word in self .skip_words_list : - if skip_word .lower ()in post_title_lower : - self .logger (f" -> Skip Post (Keyword in Title '{skip_word }'): '{post_title [:50 ]}...'. Scope: {self .skip_words_scope }") - return 0 ,num_potential_files_in_post ,[],[],[],None, None - if not self .extract_links_only and self .manga_mode_active and current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and not post_is_candidate_by_title_char_match : - self .logger (f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title [:50 ]}' doesn't match filters.") - self ._emit_signal ('missed_character_post',post_title ,"Manga Mode: No title match for character filter (Title/Both scope)") - return 0 ,num_potential_files_in_post ,[],[],[],None, None - if not isinstance (post_attachments ,list ): - self .logger (f"⚠️ Corrupt attachment data for post {post_id } (expected list, got {type (post_attachments )}). Skipping attachments.") - post_attachments =[] - base_folder_names_for_post_content =[] - determined_post_save_path_for_history =self .override_output_dir if self .override_output_dir else self .download_root - if not self .extract_links_only and self .use_subfolders : - if self ._check_pause (f"Subfolder determination for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None - primary_char_filter_for_folder =None - log_reason_for_folder ="" - if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment : - if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope : - primary_char_filter_for_folder =char_filter_that_matched_file_in_comment_scope - log_reason_for_folder ="Matched char filter in filename (Comments scope)" - elif post_is_candidate_by_comment_char_match and char_filter_that_matched_comment : - primary_char_filter_for_folder =char_filter_that_matched_comment - log_reason_for_folder ="Matched char filter in comments (Comments scope, no file match)" - elif (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and char_filter_that_matched_title : - primary_char_filter_for_folder =char_filter_that_matched_title - log_reason_for_folder ="Matched char filter in title" - if primary_char_filter_for_folder : - base_folder_names_for_post_content =[clean_folder_name (primary_char_filter_for_folder ["name"])] - cleaned_primary_folder_name =clean_folder_name (primary_char_filter_for_folder ["name"]) - if cleaned_primary_folder_name .lower ()in effective_unwanted_keywords_for_folder_naming and cleaned_primary_folder_name .lower ()!="untitled_folder": - self .logger (f" ⚠️ Primary char filter folder name '{cleaned_primary_folder_name }' is in ignore list. Using generic name.") - base_folder_names_for_post_content =["Generic Post Content"] - else : - base_folder_names_for_post_content =[cleaned_primary_folder_name ] - self .logger (f" Base folder name(s) for post content ({log_reason_for_folder }): {', '.join (base_folder_names_for_post_content )}") - elif not current_character_filters : - derived_folders_from_title_via_known_txt =match_folders_from_title ( - post_title , - self .known_names , - effective_unwanted_keywords_for_folder_naming + if post_is_candidate_by_file_char_match_in_comment_scope :break + self .logger (f" [Char Scope: Comments] Phase 1 Result: post_is_candidate_by_file_char_match_in_comment_scope = {post_is_candidate_by_file_char_match_in_comment_scope }") + if current_character_filters and self .char_filter_scope ==CHAR_SCOPE_COMMENTS : + if not post_is_candidate_by_file_char_match_in_comment_scope : + if self ._check_pause (f"Comment check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + self .logger (f" [Char Scope: Comments] Phase 2: No file match found. Checking post comments for post ID '{post_id }'.") + try : + parsed_input_url_for_comments =urlparse (self .api_url_input ) + api_domain_for_comments =parsed_input_url_for_comments .netloc + if not any (d in api_domain_for_comments .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']): + self .logger (f"⚠️ Unrecognized domain '{api_domain_for_comments }' for comment API. Defaulting based on service.") + api_domain_for_comments ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party" + comments_data =fetch_post_comments ( + api_domain_for_comments ,self .service ,self .user_id ,post_id , + headers ,self .logger ,self .cancellation_event ,self .pause_event , + cookies_dict =prepare_cookies_for_request ( + self .use_cookie ,self .cookie_text ,self .selected_cookie_file ,self .app_base_dir ,self .logger ) - valid_derived_folders_from_title_known_txt =[ - name for name in derived_folders_from_title_via_known_txt - if name and name .strip ()and name .lower ()!="untitled_folder" - ] - if valid_derived_folders_from_title_known_txt : - base_folder_names_for_post_content .extend (valid_derived_folders_from_title_known_txt ) - self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Post Title): {', '.join (base_folder_names_for_post_content )}") + ) + if comments_data : + self .logger (f" Fetched {len (comments_data )} comments for post {post_id }.") + for comment_item_idx ,comment_item in enumerate (comments_data ): + if self .check_cancel ():break + raw_comment_content =comment_item .get ('content','') + if not raw_comment_content :continue + cleaned_comment_text =strip_html_tags (raw_comment_content ) + if not cleaned_comment_text .strip ():continue + for filter_item_obj in current_character_filters : + terms_to_check_comment =list (filter_item_obj ["aliases"]) + if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_comment : + terms_to_check_comment .append (filter_item_obj ["name"]) + for term_to_match_comment in terms_to_check_comment : + if is_title_match_for_character (cleaned_comment_text ,term_to_match_comment ): + post_is_candidate_by_comment_char_match =True + char_filter_that_matched_comment =filter_item_obj + self .logger (f" Match Found (Comment in Comments Scope): Comment in post {post_id } matches char filter term '{term_to_match_comment }' (from group/name '{filter_item_obj ['name']}'). Post is candidate.") + self .logger (f" Matching comment (first 100 chars): '{cleaned_comment_text [:100 ]}...'") + break + if post_is_candidate_by_comment_char_match :break + if post_is_candidate_by_comment_char_match :break else : - candidate_name_from_title_basic_clean =extract_folder_name_from_title ( - post_title , - FOLDER_NAME_STOP_WORDS - ) - title_is_only_creator_ignored_words =False - if candidate_name_from_title_basic_clean and candidate_name_from_title_basic_clean .lower ()!="untitled_folder"and self .creator_download_folder_ignore_words : - candidate_title_words ={word .lower ()for word in candidate_name_from_title_basic_clean .split ()} - if candidate_title_words and candidate_title_words .issubset (self .creator_download_folder_ignore_words ): - title_is_only_creator_ignored_words =True - self .logger (f" Title-derived name '{candidate_name_from_title_basic_clean }' consists only of creator-specific ignore words.") - if title_is_only_creator_ignored_words : - self .logger (f" Attempting Known.txt match on filenames as title was poor ('{candidate_name_from_title_basic_clean }').") - filenames_to_check =[ - f_info ['_original_name_for_log']for f_info in all_files_from_post_api_for_char_check - if f_info .get ('_original_name_for_log') - ] - derived_folders_from_filenames_known_txt =set () - if filenames_to_check : - for fname in filenames_to_check : - matches =match_folders_from_title ( - fname , - self .known_names , - effective_unwanted_keywords_for_folder_naming - ) - for m in matches : - if m and m .strip ()and m .lower ()!="untitled_folder": - derived_folders_from_filenames_known_txt .add (m ) - if derived_folders_from_filenames_known_txt : - base_folder_names_for_post_content .extend (list (derived_folders_from_filenames_known_txt )) - self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Filenames): {', '.join (base_folder_names_for_post_content )}") - else : - final_title_extract =extract_folder_name_from_title ( - post_title ,effective_unwanted_keywords_for_folder_naming + self .logger (f" No comments found or fetched for post {post_id } to check against character filters.") + except RuntimeError as e_fetch_comment : + self .logger (f" ⚠️ Error fetching or processing comments for post {post_id }: {e_fetch_comment }") + except Exception as e_generic_comment : + self .logger (f" ❌ Unexpected error during comment processing for post {post_id }: {e_generic_comment }\n{traceback .format_exc (limit =2 )}") + self .logger (f" [Char Scope: Comments] Phase 2 Result: post_is_candidate_by_comment_char_match = {post_is_candidate_by_comment_char_match }") + else : + self .logger (f" [Char Scope: Comments] Phase 2: Skipped comment check for post ID '{post_id }' because a file match already made it a candidate.") + if current_character_filters : + if self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match : + self .logger (f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title [:50 ]}' does not match character filters.") + self ._emit_signal ('missed_character_post',post_title ,"No title match for character filter") + return 0 ,num_potential_files_in_post ,[],[],[],None, None + if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match : + self .logger (f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id }', Title '{post_title [:50 ]}...'") + if self .emitter and hasattr (self .emitter ,'missed_character_post_signal'): + self ._emit_signal ('missed_character_post',post_title ,"No character match in files or comments (Comments scope)") + return 0 ,num_potential_files_in_post ,[],[],[],None, None + if self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH ): + if self ._check_pause (f"Skip words (post title) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + post_title_lower =post_title .lower () + for skip_word in self .skip_words_list : + if skip_word .lower ()in post_title_lower : + self .logger (f" -> Skip Post (Keyword in Title '{skip_word }'): '{post_title [:50 ]}...'. Scope: {self .skip_words_scope }") + return 0 ,num_potential_files_in_post ,[],[],[],None, None + if not self .extract_links_only and self .manga_mode_active and current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and not post_is_candidate_by_title_char_match : + self .logger (f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title [:50 ]}' doesn't match filters.") + self ._emit_signal ('missed_character_post',post_title ,"Manga Mode: No title match for character filter (Title/Both scope)") + return 0 ,num_potential_files_in_post ,[],[],[],None, None + if not isinstance (post_attachments ,list ): + self .logger (f"⚠️ Corrupt attachment data for post {post_id } (expected list, got {type (post_attachments )}). Skipping attachments.") + post_attachments =[] + base_folder_names_for_post_content =[] + determined_post_save_path_for_history =self .override_output_dir if self .override_output_dir else self .download_root + if not self .extract_links_only and self .use_subfolders : + if self ._check_pause (f"Subfolder determination for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + primary_char_filter_for_folder =None + log_reason_for_folder ="" + if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment : + if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope : + primary_char_filter_for_folder =char_filter_that_matched_file_in_comment_scope + log_reason_for_folder ="Matched char filter in filename (Comments scope)" + elif post_is_candidate_by_comment_char_match and char_filter_that_matched_comment : + primary_char_filter_for_folder =char_filter_that_matched_comment + log_reason_for_folder ="Matched char filter in comments (Comments scope, no file match)" + elif (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and char_filter_that_matched_title : + primary_char_filter_for_folder =char_filter_that_matched_title + log_reason_for_folder ="Matched char filter in title" + if primary_char_filter_for_folder : + base_folder_names_for_post_content =[clean_folder_name (primary_char_filter_for_folder ["name"])] + cleaned_primary_folder_name =clean_folder_name (primary_char_filter_for_folder ["name"]) + if cleaned_primary_folder_name .lower ()in effective_unwanted_keywords_for_folder_naming and cleaned_primary_folder_name .lower ()!="untitled_folder": + self .logger (f" ⚠️ Primary char filter folder name '{cleaned_primary_folder_name }' is in ignore list. Using generic name.") + base_folder_names_for_post_content =["Generic Post Content"] + else : + base_folder_names_for_post_content =[cleaned_primary_folder_name ] + self .logger (f" Base folder name(s) for post content ({log_reason_for_folder }): {', '.join (base_folder_names_for_post_content )}") + elif not current_character_filters : + + derived_folders_from_title_via_known_txt =match_folders_from_title ( + post_title , + self .known_names , + effective_unwanted_keywords_for_folder_naming + ) + + valid_derived_folders_from_title_known_txt =[ + name for name in derived_folders_from_title_via_known_txt + if name and name .strip ()and name .lower ()!="untitled_folder" + ] + + if valid_derived_folders_from_title_known_txt : + base_folder_names_for_post_content .extend (valid_derived_folders_from_title_known_txt ) + self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Post Title): {', '.join (base_folder_names_for_post_content )}") + else : + + + + + candidate_name_from_title_basic_clean =extract_folder_name_from_title ( + post_title , + FOLDER_NAME_STOP_WORDS + ) + + title_is_only_creator_ignored_words =False + if candidate_name_from_title_basic_clean and candidate_name_from_title_basic_clean .lower ()!="untitled_folder"and self .creator_download_folder_ignore_words : + + candidate_title_words ={word .lower ()for word in candidate_name_from_title_basic_clean .split ()} + if candidate_title_words and candidate_title_words .issubset (self .creator_download_folder_ignore_words ): + title_is_only_creator_ignored_words =True + self .logger (f" Title-derived name '{candidate_name_from_title_basic_clean }' consists only of creator-specific ignore words.") + + if title_is_only_creator_ignored_words : + + self .logger (f" Attempting Known.txt match on filenames as title was poor ('{candidate_name_from_title_basic_clean }').") + + filenames_to_check =[ + f_info ['_original_name_for_log']for f_info in all_files_from_post_api_for_char_check + if f_info .get ('_original_name_for_log') + ] + + derived_folders_from_filenames_known_txt =set () + if filenames_to_check : + for fname in filenames_to_check : + matches =match_folders_from_title ( + fname , + self .known_names , + effective_unwanted_keywords_for_folder_naming ) - base_folder_names_for_post_content .append (final_title_extract ) - self .logger (f" No Known.txt match from filenames. Using title-derived name (with full ignore list): '{final_title_extract }'") + for m in matches : + if m and m .strip ()and m .lower ()!="untitled_folder": + derived_folders_from_filenames_known_txt .add (m ) + + if derived_folders_from_filenames_known_txt : + base_folder_names_for_post_content .extend (list (derived_folders_from_filenames_known_txt )) + self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Filenames): {', '.join (base_folder_names_for_post_content )}") else : - extracted_name_from_title_full_ignore =extract_folder_name_from_title ( + final_title_extract =extract_folder_name_from_title ( post_title ,effective_unwanted_keywords_for_folder_naming ) - base_folder_names_for_post_content .append (extracted_name_from_title_full_ignore ) - self .logger (f" Base folder name(s) for post content (Generic title parsing - title not solely creator-ignored words): {', '.join (base_folder_names_for_post_content )}") - base_folder_names_for_post_content =[ - name for name in base_folder_names_for_post_content if name and name .strip () - ] - if not base_folder_names_for_post_content : - final_fallback_name =clean_folder_name (post_title if post_title and post_title .strip ()else "Generic Post Content") - base_folder_names_for_post_content =[final_fallback_name ] - self .logger (f" Ultimate fallback folder name: {final_fallback_name }") - if base_folder_names_for_post_content : - determined_post_save_path_for_history =os .path .join (determined_post_save_path_for_history ,base_folder_names_for_post_content [0 ]) - if not self .extract_links_only and self .use_post_subfolders : - cleaned_post_title_for_sub =clean_folder_name (post_title ) - post_id_for_fallback =self .post .get ('id','unknown_id') - if not cleaned_post_title_for_sub or cleaned_post_title_for_sub =="untitled_folder": - self .logger (f" ⚠️ Post title '{post_title }' resulted in a generic subfolder name. Using 'post_{post_id_for_fallback }' as base.") - original_cleaned_post_title_for_sub =f"post_{post_id_for_fallback }" - else : - original_cleaned_post_title_for_sub =cleaned_post_title_for_sub - if self.use_date_prefix_for_subfolder: - published_date_str = self.post.get('published') or self.post.get('added') - if published_date_str: - try: - date_prefix = published_date_str.split('T')[0] - original_cleaned_post_title_for_sub = f"{date_prefix} {original_cleaned_post_title_for_sub}" - self.logger(f" ℹ️ Applying date prefix to subfolder: '{original_cleaned_post_title_for_sub}'") - except Exception as e: - self.logger(f" ⚠️ Could not parse date '{published_date_str}' for prefix. Using original name. Error: {e}") - else: - self.logger(" ⚠️ 'Date Prefix' is checked, but post has no 'published' or 'added' date. Omitting prefix.") - base_path_for_post_subfolder =determined_post_save_path_for_history - suffix_counter =0 - final_post_subfolder_name ="" - while True : - if suffix_counter ==0 : - name_candidate =original_cleaned_post_title_for_sub + base_folder_names_for_post_content .append (final_title_extract ) + self .logger (f" No Known.txt match from filenames. Using title-derived name (with full ignore list): '{final_title_extract }'") else : - name_candidate =f"{original_cleaned_post_title_for_sub }_{suffix_counter }" - potential_post_subfolder_path =os .path .join (base_path_for_post_subfolder ,name_candidate ) - try : - os .makedirs (potential_post_subfolder_path ,exist_ok =False ) - final_post_subfolder_name =name_candidate - if suffix_counter >0 : - self .logger (f" Post subfolder name conflict: Using '{final_post_subfolder_name }' instead of '{original_cleaned_post_title_for_sub }' to avoid mixing posts.") - break - except FileExistsError : - suffix_counter +=1 - if suffix_counter >100 : - self .logger (f" ⚠️ Exceeded 100 attempts to find unique subfolder name for '{original_cleaned_post_title_for_sub }'. Using UUID.") - final_post_subfolder_name =f"{original_cleaned_post_title_for_sub }_{uuid .uuid4 ().hex [:8 ]}" - os .makedirs (os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name ),exist_ok =True ) - break - except OSError as e_mkdir : - self .logger (f" ❌ Error creating directory '{potential_post_subfolder_path }': {e_mkdir }. Files for this post might be saved in parent or fail.") - final_post_subfolder_name =original_cleaned_post_title_for_sub - break - determined_post_save_path_for_history =os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name ) + extracted_name_from_title_full_ignore =extract_folder_name_from_title ( + post_title ,effective_unwanted_keywords_for_folder_naming + ) + base_folder_names_for_post_content .append (extracted_name_from_title_full_ignore ) + self .logger (f" Base folder name(s) for post content (Generic title parsing - title not solely creator-ignored words): {', '.join (base_folder_names_for_post_content )}") - if self.filter_mode == 'text_only' and not self.extract_links_only: - self.logger(f" Mode: Text Only (Scope: {self.text_only_scope})") - post_title_lower = post_title.lower() - if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH): - for skip_word in self.skip_words_list: - if skip_word.lower() in post_title_lower: - self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'.") - return 0, num_potential_files_in_post, [], [], [], None, None - if current_character_filters and not post_is_candidate_by_title_char_match and not post_is_candidate_by_comment_char_match and not post_is_candidate_by_file_char_match_in_comment_scope: - self.logger(f" -> Skip Post (No character match for text extraction): '{post_title[:50]}...'.") - return 0, num_potential_files_in_post, [], [], [], None, None - raw_text_content = "" - final_post_data = post_data - if self.text_only_scope == 'content' and 'content' not in final_post_data: - self.logger(f" Post {post_id} is missing 'content' field, fetching full data...") + base_folder_names_for_post_content =[ + name for name in base_folder_names_for_post_content if name and name .strip () + ] + if not base_folder_names_for_post_content : + final_fallback_name =clean_folder_name (post_title if post_title and post_title .strip ()else "Generic Post Content") + base_folder_names_for_post_content =[final_fallback_name ] + self .logger (f" Ultimate fallback folder name: {final_fallback_name }") + + if base_folder_names_for_post_content : + determined_post_save_path_for_history =os .path .join (determined_post_save_path_for_history ,base_folder_names_for_post_content [0 ]) + + if not self .extract_links_only and self .use_post_subfolders : + cleaned_post_title_for_sub =clean_folder_name (post_title ) + post_id_for_fallback =self .post .get ('id','unknown_id') + + + if not cleaned_post_title_for_sub or cleaned_post_title_for_sub =="untitled_folder": + self .logger (f" ⚠️ Post title '{post_title }' resulted in a generic subfolder name. Using 'post_{post_id_for_fallback }' as base.") + original_cleaned_post_title_for_sub =f"post_{post_id_for_fallback }" + else : + original_cleaned_post_title_for_sub =cleaned_post_title_for_sub + + if self.use_date_prefix_for_subfolder: + # Prioritize 'published' date, fall back to 'added' date + published_date_str = self.post.get('published') or self.post.get('added') + if published_date_str: + try: + # Extract just the date part (YYYY-MM-DD) + date_prefix = published_date_str.split('T')[0] + # Prepend the date to the folder name + original_cleaned_post_title_for_sub = f"{date_prefix} {original_cleaned_post_title_for_sub}" + self.logger(f" ℹ️ Applying date prefix to subfolder: '{original_cleaned_post_title_for_sub}'") + except Exception as e: + self.logger(f" ⚠️ Could not parse date '{published_date_str}' for prefix. Using original name. Error: {e}") + else: + self.logger(" ⚠️ 'Date Prefix' is checked, but post has no 'published' or 'added' date. Omitting prefix.") + + base_path_for_post_subfolder =determined_post_save_path_for_history + + suffix_counter =0 + final_post_subfolder_name ="" + + while True : + if suffix_counter ==0 : + name_candidate =original_cleaned_post_title_for_sub + else : + name_candidate =f"{original_cleaned_post_title_for_sub }_{suffix_counter }" + + potential_post_subfolder_path =os .path .join (base_path_for_post_subfolder ,name_candidate ) + + try : + os .makedirs (potential_post_subfolder_path ,exist_ok =False ) + final_post_subfolder_name =name_candidate + if suffix_counter >0 : + self .logger (f" Post subfolder name conflict: Using '{final_post_subfolder_name }' instead of '{original_cleaned_post_title_for_sub }' to avoid mixing posts.") + break + except FileExistsError : + suffix_counter +=1 + if suffix_counter >100 : + self .logger (f" ⚠️ Exceeded 100 attempts to find unique subfolder name for '{original_cleaned_post_title_for_sub }'. Using UUID.") + final_post_subfolder_name =f"{original_cleaned_post_title_for_sub }_{uuid .uuid4 ().hex [:8 ]}" + os .makedirs (os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name ),exist_ok =True ) + break + except OSError as e_mkdir : + self .logger (f" ❌ Error creating directory '{potential_post_subfolder_path }': {e_mkdir }. Files for this post might be saved in parent or fail.") + final_post_subfolder_name =original_cleaned_post_title_for_sub + break + + determined_post_save_path_for_history =os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name ) + if self.filter_mode == 'text_only' and not self.extract_links_only: + self.logger(f" Mode: Text Only (Scope: {self.text_only_scope})") + + # --- Apply Title-based filters to ensure post is a candidate --- + post_title_lower = post_title.lower() + if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH): + for skip_word in self.skip_words_list: + if skip_word.lower() in post_title_lower: + self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'.") + return 0, num_potential_files_in_post, [], [], [], None, None + + if current_character_filters and not post_is_candidate_by_title_char_match and not post_is_candidate_by_comment_char_match and not post_is_candidate_by_file_char_match_in_comment_scope: + self.logger(f" -> Skip Post (No character match for text extraction): '{post_title[:50]}...'.") + return 0, num_potential_files_in_post, [], [], [], None, None + + # --- Get the text content based on scope --- + raw_text_content = "" + final_post_data = post_data + + # Fetch full post data if content is missing and scope is 'content' + if self.text_only_scope == 'content' and 'content' not in final_post_data: + self.logger(f" Post {post_id} is missing 'content' field, fetching full data...") + parsed_url = urlparse(self.api_url_input) + api_domain = parsed_url.netloc + cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain) + + from .api_client import fetch_single_post_data # Local import to avoid circular dependency issues + full_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies) + if full_data: + final_post_data = full_data + + if self.text_only_scope == 'content': + raw_text_content = final_post_data.get('content', '') + elif self.text_only_scope == 'comments': + try: parsed_url = urlparse(self.api_url_input) api_domain = parsed_url.netloc - cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain) - from .api_client import fetch_single_post_data - full_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies) - if full_data: - final_post_data = full_data - if self.text_only_scope == 'content': - raw_text_content = final_post_data.get('content', '') - elif self.text_only_scope == 'comments': - try: - parsed_url = urlparse(self.api_url_input) - api_domain = parsed_url.netloc - comments_data = fetch_post_comments(api_domain, self.service, self.user_id, post_id, headers, self.logger, self.cancellation_event, self.pause_event) - if comments_data: - comment_texts = [] - for comment in comments_data: - user = comment.get('user', {}).get('name', 'Unknown User') - timestamp = comment.get('updated', 'No Date') - body = strip_html_tags(comment.get('content', '')) - comment_texts.append(f"--- Comment by {user} on {timestamp} ---\n{body}\n") - raw_text_content = "\n".join(comment_texts) - except Exception as e: - self.logger(f" ❌ Error fetching comments for text-only mode: {e}") - if not raw_text_content or not raw_text_content.strip(): - self.logger(" -> Skip Saving Text: No content/comments found or fetched.") - return 0, num_potential_files_in_post, [], [], [], None, None - paragraph_pattern = re.compile(r'(.*?)

', re.IGNORECASE | re.DOTALL) - html_paragraphs = paragraph_pattern.findall(raw_text_content) - cleaned_text = "" - if not html_paragraphs: - self.logger(" ⚠️ No

tags found. Falling back to basic HTML cleaning for the whole block.") - text_with_br = re.sub(r'', '\n', raw_text_content, flags=re.IGNORECASE) - cleaned_text = re.sub(r'<.*?>', '', text_with_br) - else: - cleaned_paragraphs_list = [] - for p_content in html_paragraphs: - p_with_br = re.sub(r'', '\n', p_content, flags=re.IGNORECASE) - p_cleaned = re.sub(r'<.*?>', '', p_with_br) - p_final = html.unescape(p_cleaned).strip() - if p_final: - cleaned_paragraphs_list.append(p_final) - cleaned_text = '\n\n'.join(cleaned_paragraphs_list) - cleaned_text = cleaned_text.replace('…', '...') - if self.single_pdf_mode: - if not cleaned_text: - return 0, 0, [], [], [], None, None - content_data = { - 'title': post_title, - 'content': cleaned_text, - 'published': self.post.get('published') or self.post.get('added') - } - temp_dir = os.path.join(self.app_base_dir, "appdata") - os.makedirs(temp_dir, exist_ok=True) - temp_filename = f"tmp_{post_id}_{uuid.uuid4().hex[:8]}.json" - temp_filepath = os.path.join(temp_dir, temp_filename) - try: - with open(temp_filepath, 'w', encoding='utf-8') as f: - json.dump(content_data, f, indent=2) - self.logger(f" Saved temporary text for '{post_title}' for single PDF compilation.") - self._emit_signal('worker_finished', (0, 0, [], [], [], None, temp_filepath)) - return (0, 0, [], [], [], None, temp_filepath) - except Exception as e: - self.logger(f" ❌ Failed to write temporary file for single PDF: {e}") - self._emit_signal('worker_finished', (0, 0, [], [], [], [], None)) - return (0, 0, [], [], [], [], None) - else: - file_extension = self.text_export_format - txt_filename = clean_filename(post_title) + f".{file_extension}" - final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename) - try: - os.makedirs(determined_post_save_path_for_history, exist_ok=True) - base, ext = os.path.splitext(final_save_path) - counter = 1 - while os.path.exists(final_save_path): - final_save_path = f"{base}_{counter}{ext}" - counter += 1 - if file_extension == 'pdf': - if FPDF: - self.logger(f" Converting to PDF...") - pdf = PDF() - font_path = "" - if self.project_root_dir: - font_path = os.path.join(self.project_root_dir, 'data', 'dejavu-sans', 'DejaVuSans.ttf') - try: - if not os.path.exists(font_path): raise RuntimeError(f"Font file not found: {font_path}") - pdf.add_font('DejaVu', '', font_path, uni=True) - pdf.set_font('DejaVu', '', 12) - except Exception as font_error: - self.logger(f" ⚠️ Could not load DejaVu font: {font_error}. Falling back to Arial.") - pdf.set_font('Arial', '', 12) - pdf.add_page() - pdf.multi_cell(0, 5, cleaned_text) - pdf.output(final_save_path) - else: - self.logger(f" ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.") - final_save_path = os.path.splitext(final_save_path)[0] + ".txt" - with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text) - elif file_extension == 'docx': - if Document: - self.logger(f" Converting to DOCX...") - document = Document() - document.add_paragraph(cleaned_text) - document.save(final_save_path) - else: - self.logger(f" ⚠️ Cannot create DOCX: 'python-docx' library not installed. Saving as .txt.") - final_save_path = os.path.splitext(final_save_path)[0] + ".txt" - with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text) - else: - with open(final_save_path, 'w', encoding='utf-8') as f: - f.write(cleaned_text) - self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'") - return 1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None - except Exception as e: - self.logger(f" ❌ Critical error saving text file '{txt_filename}': {e}") - return 0, num_potential_files_in_post, [], [], [], None, None - if not self .extract_links_only and self .use_subfolders and self .skip_words_list : - if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None - for folder_name_to_check in base_folder_names_for_post_content : - if not folder_name_to_check :continue - if any (skip_word .lower ()in folder_name_to_check .lower ()for skip_word in self .skip_words_list ): - matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word") - self .logger (f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.") - return 0 ,num_potential_files_in_post ,[],[],[],None, None - if (self .show_external_links or self .extract_links_only )and post_content_html : - if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None - try : - mega_key_pattern =re .compile (r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b') - unique_links_data ={} - for match in link_pattern .finditer (post_content_html ): - link_url =match .group (1 ).strip () - link_url =html .unescape (link_url ) - link_inner_text =match .group (2 ) - if not any (ext in link_url .lower ()for ext in ['.css','.js','.ico','.xml','.svg'])and not link_url .startswith ('javascript:')and link_url not in unique_links_data : - clean_link_text =re .sub (r'<.*?>','',link_inner_text ) - clean_link_text =html .unescape (clean_link_text ).strip () - display_text =clean_link_text if clean_link_text else "[Link]" - unique_links_data [link_url ]=display_text - links_emitted_count =0 - scraped_platforms ={'kemono','coomer','patreon'} - for link_url ,link_text in unique_links_data .items (): - platform =get_link_platform (link_url ) - decryption_key_found ="" - if platform =='mega': - parsed_mega_url =urlparse (link_url ) - if parsed_mega_url .fragment : - potential_key_from_fragment =parsed_mega_url .fragment .split ('!')[-1 ] - if mega_key_pattern .fullmatch (potential_key_from_fragment ): - decryption_key_found =potential_key_from_fragment - if not decryption_key_found and link_text : - key_match_in_text =mega_key_pattern .search (link_text ) - if key_match_in_text : - decryption_key_found =key_match_in_text .group (1 ) - if not decryption_key_found and self .extract_links_only and post_content_html : - key_match_in_content =mega_key_pattern .search (strip_html_tags (post_content_html )) - if key_match_in_content : - decryption_key_found =key_match_in_content .group (1 ) - if platform not in scraped_platforms : - self ._emit_signal ('external_link',post_title ,link_text ,link_url ,platform ,decryption_key_found or "") - links_emitted_count +=1 - if links_emitted_count >0 :self .logger (f" 🔗 Found {links_emitted_count } potential external link(s) in post content.") - except Exception as e :self .logger (f"⚠️ Error parsing post content for links: {e }\n{traceback .format_exc (limit =2 )}") - if self .extract_links_only : - self .logger (f" Extract Links Only mode: Finished processing post {post_id } for links.") - return 0 ,0 ,[],[],[],None - all_files_from_post_api =[] - api_file_domain =urlparse (self .api_url_input ).netloc - if not api_file_domain or not any (d in api_file_domain .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']): - api_file_domain ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party" - if post_main_file_info and isinstance (post_main_file_info ,dict )and post_main_file_info .get ('path'): - file_path =post_main_file_info ['path'].lstrip ('/') - original_api_name =post_main_file_info .get ('name')or os .path .basename (file_path ) - if original_api_name : - all_files_from_post_api .append ({ - 'url':f"https://{api_file_domain }{file_path }"if file_path .startswith ('/')else f"https://{api_file_domain }/data/{file_path }", - 'name':original_api_name , - '_original_name_for_log':original_api_name , - '_is_thumbnail':is_image (original_api_name ) - }) - else :self .logger (f" ⚠️ Skipping main file for post {post_id }: Missing name (Path: {file_path })") - for idx ,att_info in enumerate (post_attachments ): - if isinstance (att_info ,dict )and att_info .get ('path'): - att_path =att_info ['path'].lstrip ('/') - original_api_att_name =att_info .get ('name')or os .path .basename (att_path ) - if original_api_att_name : - all_files_from_post_api .append ({ - 'url':f"https://{api_file_domain }{att_path }"if att_path .startswith ('/')else f"https://{api_file_domain }/data/{att_path }", - 'name':original_api_att_name , - '_original_name_for_log':original_api_att_name , - '_is_thumbnail':is_image (original_api_att_name ) - }) - else :self .logger (f" ⚠️ Skipping attachment {idx +1 } for post {post_id }: Missing name (Path: {att_path })") - else :self .logger (f" ⚠️ Skipping invalid attachment {idx +1 } for post {post_id }: {str (att_info )[:100 ]}") - if self .scan_content_for_images and post_content_html and not self .extract_links_only : - self .logger (f" Scanning post content for additional image URLs (Post ID: {post_id })...") - parsed_input_url =urlparse (self .api_url_input ) - base_url_for_relative_paths =f"{parsed_input_url .scheme }://{parsed_input_url .netloc }" - img_ext_pattern ="|".join (ext .lstrip ('.')for ext in IMAGE_EXTENSIONS ) - direct_url_pattern_str =r"""(?i)\b(https?://[^\s"'<>\[\]\{\}\|\^\\^~\[\]`]+\.(?:"""+img_ext_pattern +r"""))\b""" - img_tag_src_pattern_str =r"""]*?src\s*=\s*["']([^"']+)["']""" - found_image_sources =set () - for direct_url_match in re .finditer (direct_url_pattern_str ,post_content_html ): - found_image_sources .add (direct_url_match .group (1 )) - for img_tag_match in re .finditer (img_tag_src_pattern_str ,post_content_html ,re .IGNORECASE ): - src_attr =img_tag_match .group (1 ).strip () - src_attr =html .unescape (src_attr ) - if not src_attr :continue - resolved_src_url ="" - if src_attr .startswith (('http://','https://')): - resolved_src_url =src_attr - elif src_attr .startswith ('//'): - resolved_src_url =f"{parsed_input_url .scheme }:{src_attr }" - elif src_attr .startswith ('/'): - resolved_src_url =f"{base_url_for_relative_paths }{src_attr }" - if resolved_src_url : - parsed_resolved_url =urlparse (resolved_src_url ) - if any (parsed_resolved_url .path .lower ().endswith (ext )for ext in IMAGE_EXTENSIONS ): - found_image_sources .add (resolved_src_url ) - if found_image_sources : - self .logger (f" Found {len (found_image_sources )} potential image URLs/sources in content.") - existing_urls_in_api_list ={f_info ['url']for f_info in all_files_from_post_api } - for found_url in found_image_sources : - if self .check_cancel ():break - if found_url in existing_urls_in_api_list : - self .logger (f" Skipping URL from content (already in API list or previously added from content): {found_url [:70 ]}...") - continue - try : - parsed_found_url =urlparse (found_url ) - url_filename =os .path .basename (parsed_found_url .path ) - if not url_filename or not is_image (url_filename ): - self .logger (f" Skipping URL from content (no filename part or not an image extension): {found_url [:70 ]}...") - continue - self .logger (f" Adding image from content: {url_filename } (URL: {found_url [:70 ]}...)") - all_files_from_post_api .append ({ - 'url':found_url , - 'name':url_filename , - '_original_name_for_log':url_filename , - '_is_thumbnail':False , - '_from_content_scan':True - }) - existing_urls_in_api_list .add (found_url ) - except Exception as e_url_parse : - self .logger (f" Error processing URL from content '{found_url [:70 ]}...': {e_url_parse }") - else : - self .logger (f" No additional image URLs found in post content scan for post {post_id }.") - if self .download_thumbnails : - if self .scan_content_for_images : - self .logger (f" Mode: 'Download Thumbnails Only' + 'Scan Content for Images' active. Prioritizing images from content scan for post {post_id }.") - all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_from_content_scan')] - if not all_files_from_post_api : - self .logger (f" -> No images found via content scan for post {post_id } in this combined mode.") - return 0 ,0 ,[],[],[],None - else : - self .logger (f" Mode: 'Download Thumbnails Only' active. Filtering for API thumbnails for post {post_id }.") - all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_is_thumbnail')] - if not all_files_from_post_api : - self .logger (f" -> No API image thumbnails found for post {post_id } in thumbnail-only mode.") - return 0 ,0 ,[],[],[],None - if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED : - def natural_sort_key_for_files (file_api_info ): - name =file_api_info .get ('_original_name_for_log','').lower () - return [int (text )if text .isdigit ()else text for text in re .split ('([0-9]+)',name )] - all_files_from_post_api .sort (key =natural_sort_key_for_files ) - self .logger (f" Manga Date Mode: Sorted {len (all_files_from_post_api )} files within post {post_id } by original name for sequential numbering.") - if not all_files_from_post_api : - self .logger (f" No files found to download for post {post_id }.") - return 0 ,0 ,[],[],[],None - files_to_download_info_list =[] - processed_original_filenames_in_this_post =set () - if self.keep_in_post_duplicates: - files_to_download_info_list.extend(all_files_from_post_api) - self.logger(f" ℹ️ 'Keep Duplicates' is on. All {len(all_files_from_post_api)} files from post will be processed.") + comments_data = fetch_post_comments(api_domain, self.service, self.user_id, post_id, headers, self.logger, self.cancellation_event, self.pause_event) + if comments_data: + comment_texts = [] + for comment in comments_data: + user = comment.get('user', {}).get('name', 'Unknown User') + timestamp = comment.get('updated', 'No Date') + body = strip_html_tags(comment.get('content', '')) + comment_texts.append(f"--- Comment by {user} on {timestamp} ---\n{body}\n") + raw_text_content = "\n".join(comment_texts) + except Exception as e: + self.logger(f" ❌ Error fetching comments for text-only mode: {e}") + + if not raw_text_content or not raw_text_content.strip(): + self.logger(" -> Skip Saving Text: No content/comments found or fetched.") + return 0, num_potential_files_in_post, [], [], [], None, None + + # --- Robust HTML-to-TEXT Conversion --- + paragraph_pattern = re.compile(r'(.*?)

', re.IGNORECASE | re.DOTALL) + html_paragraphs = paragraph_pattern.findall(raw_text_content) + cleaned_text = "" + if not html_paragraphs: + self.logger(" ⚠️ No

tags found. Falling back to basic HTML cleaning for the whole block.") + text_with_br = re.sub(r'', '\n', raw_text_content, flags=re.IGNORECASE) + cleaned_text = re.sub(r'<.*?>', '', text_with_br) else: - for file_info in all_files_from_post_api: - current_api_original_filename = file_info.get('_original_name_for_log') - if current_api_original_filename in processed_original_filenames_in_this_post: - self.logger(f" -> Skip Duplicate Original Name (within post {post_id}): '{current_api_original_filename}' already processed/listed for this post.") - total_skipped_this_post += 1 - else: - files_to_download_info_list.append(file_info) - if current_api_original_filename: - processed_original_filenames_in_this_post.add(current_api_original_filename) - if not files_to_download_info_list: - self .logger (f" All files for post {post_id } were duplicate original names or skipped earlier.") - return 0 ,total_skipped_this_post ,[],[],[],None - self .logger (f" Identified {len (files_to_download_info_list )} unique original file(s) for potential download from post {post_id }.") - with ThreadPoolExecutor (max_workers =self .num_file_threads ,thread_name_prefix =f'P{post_id }File_')as file_pool : - futures_list =[] - for file_idx ,file_info_to_dl in enumerate (files_to_download_info_list ): - if self ._check_pause (f"File processing loop for post {post_id }, file {file_idx }"):break + cleaned_paragraphs_list = [] + for p_content in html_paragraphs: + p_with_br = re.sub(r'', '\n', p_content, flags=re.IGNORECASE) + p_cleaned = re.sub(r'<.*?>', '', p_with_br) + p_final = html.unescape(p_cleaned).strip() + if p_final: + cleaned_paragraphs_list.append(p_final) + cleaned_text = '\n\n'.join(cleaned_paragraphs_list) + cleaned_text = cleaned_text.replace('…', '...') + + # --- Logic for Single PDF Mode (File-based) --- + if self.single_pdf_mode: + if not cleaned_text: + return 0, 0, [], [], [], None, None + + content_data = { + 'title': post_title, + 'content': cleaned_text, + 'published': self.post.get('published') or self.post.get('added') + } + temp_dir = os.path.join(self.app_base_dir, "appdata") + os.makedirs(temp_dir, exist_ok=True) + temp_filename = f"tmp_{post_id}_{uuid.uuid4().hex[:8]}.json" + temp_filepath = os.path.join(temp_dir, temp_filename) + + try: + with open(temp_filepath, 'w', encoding='utf-8') as f: + json.dump(content_data, f, indent=2) + self.logger(f" Saved temporary text for '{post_title}' for single PDF compilation.") + return 0, 0, [], [], [], None, temp_filepath + except Exception as e: + self.logger(f" ❌ Failed to write temporary file for single PDF: {e}") + return 0, 0, [], [], [], None, None + + # --- Logic for Individual File Saving --- + else: + file_extension = self.text_export_format + txt_filename = clean_filename(post_title) + f".{file_extension}" + final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename) + + try: + os.makedirs(determined_post_save_path_for_history, exist_ok=True) + base, ext = os.path.splitext(final_save_path) + counter = 1 + while os.path.exists(final_save_path): + final_save_path = f"{base}_{counter}{ext}" + counter += 1 + + if file_extension == 'pdf': + if FPDF: + self.logger(f" Converting to PDF...") + pdf = PDF() + font_path = "" + if self.project_root_dir: + font_path = os.path.join(self.project_root_dir, 'data', 'dejavu-sans', 'DejaVuSans.ttf') + try: + if not os.path.exists(font_path): raise RuntimeError(f"Font file not found: {font_path}") + pdf.add_font('DejaVu', '', font_path, uni=True) + pdf.set_font('DejaVu', '', 12) + except Exception as font_error: + self.logger(f" ⚠️ Could not load DejaVu font: {font_error}. Falling back to Arial.") + pdf.set_font('Arial', '', 12) + pdf.add_page() + pdf.multi_cell(0, 5, cleaned_text) + pdf.output(final_save_path) + else: + self.logger(f" ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.") + final_save_path = os.path.splitext(final_save_path)[0] + ".txt" + with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text) + + elif file_extension == 'docx': + if Document: + self.logger(f" Converting to DOCX...") + document = Document() + document.add_paragraph(cleaned_text) + document.save(final_save_path) + else: + self.logger(f" ⚠️ Cannot create DOCX: 'python-docx' library not installed. Saving as .txt.") + final_save_path = os.path.splitext(final_save_path)[0] + ".txt" + with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text) + + else: # Default to TXT + with open(final_save_path, 'w', encoding='utf-8') as f: + f.write(cleaned_text) + + self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'") + return 1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None + except Exception as e: + self.logger(f" ❌ Critical error saving text file '{txt_filename}': {e}") + return 0, num_potential_files_in_post, [], [], [], None, None + + if not self .extract_links_only and self .use_subfolders and self .skip_words_list : + if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + for folder_name_to_check in base_folder_names_for_post_content : + if not folder_name_to_check :continue + if any (skip_word .lower ()in folder_name_to_check .lower ()for skip_word in self .skip_words_list ): + matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word") + self .logger (f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.") + return 0 ,num_potential_files_in_post ,[],[],[],None, None + if (self .show_external_links or self .extract_links_only )and post_content_html : + if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + try : + mega_key_pattern =re .compile (r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b') + unique_links_data ={} + for match in link_pattern .finditer (post_content_html ): + link_url =match .group (1 ).strip () + link_url =html .unescape (link_url ) + link_inner_text =match .group (2 ) + if not any (ext in link_url .lower ()for ext in ['.css','.js','.ico','.xml','.svg'])and not link_url .startswith ('javascript:')and link_url not in unique_links_data : + clean_link_text =re .sub (r'<.*?>','',link_inner_text ) + clean_link_text =html .unescape (clean_link_text ).strip () + display_text =clean_link_text if clean_link_text else "[Link]" + unique_links_data [link_url ]=display_text + links_emitted_count =0 + scraped_platforms ={'kemono','coomer','patreon'} + for link_url ,link_text in unique_links_data .items (): + platform =get_link_platform (link_url ) + decryption_key_found ="" + if platform =='mega': + parsed_mega_url =urlparse (link_url ) + if parsed_mega_url .fragment : + potential_key_from_fragment =parsed_mega_url .fragment .split ('!')[-1 ] + if mega_key_pattern .fullmatch (potential_key_from_fragment ): + decryption_key_found =potential_key_from_fragment + + if not decryption_key_found and link_text : + key_match_in_text =mega_key_pattern .search (link_text ) + if key_match_in_text : + decryption_key_found =key_match_in_text .group (1 ) + if not decryption_key_found and self .extract_links_only and post_content_html : + key_match_in_content =mega_key_pattern .search (strip_html_tags (post_content_html )) + if key_match_in_content : + decryption_key_found =key_match_in_content .group (1 ) + if platform not in scraped_platforms : + self ._emit_signal ('external_link',post_title ,link_text ,link_url ,platform ,decryption_key_found or "") + links_emitted_count +=1 + if links_emitted_count >0 :self .logger (f" 🔗 Found {links_emitted_count } potential external link(s) in post content.") + except Exception as e :self .logger (f"⚠️ Error parsing post content for links: {e }\n{traceback .format_exc (limit =2 )}") + if self .extract_links_only : + self .logger (f" Extract Links Only mode: Finished processing post {post_id } for links.") + return 0 ,0 ,[],[],[],None + all_files_from_post_api =[] + api_file_domain =urlparse (self .api_url_input ).netloc + if not api_file_domain or not any (d in api_file_domain .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']): + api_file_domain ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party" + if post_main_file_info and isinstance (post_main_file_info ,dict )and post_main_file_info .get ('path'): + file_path =post_main_file_info ['path'].lstrip ('/') + original_api_name =post_main_file_info .get ('name')or os .path .basename (file_path ) + if original_api_name : + all_files_from_post_api .append ({ + 'url':f"https://{api_file_domain }{file_path }"if file_path .startswith ('/')else f"https://{api_file_domain }/data/{file_path }", + 'name':original_api_name , + '_original_name_for_log':original_api_name , + '_is_thumbnail':is_image (original_api_name ) + }) + else :self .logger (f" ⚠️ Skipping main file for post {post_id }: Missing name (Path: {file_path })") + for idx ,att_info in enumerate (post_attachments ): + if isinstance (att_info ,dict )and att_info .get ('path'): + att_path =att_info ['path'].lstrip ('/') + original_api_att_name =att_info .get ('name')or os .path .basename (att_path ) + if original_api_att_name : + all_files_from_post_api .append ({ + 'url':f"https://{api_file_domain }{att_path }"if att_path .startswith ('/')else f"https://{api_file_domain }/data/{att_path }", + 'name':original_api_att_name , + '_original_name_for_log':original_api_att_name , + '_is_thumbnail':is_image (original_api_att_name ) + }) + else :self .logger (f" ⚠️ Skipping attachment {idx +1 } for post {post_id }: Missing name (Path: {att_path })") + else :self .logger (f" ⚠️ Skipping invalid attachment {idx +1 } for post {post_id }: {str (att_info )[:100 ]}") + if self .scan_content_for_images and post_content_html and not self .extract_links_only : + self .logger (f" Scanning post content for additional image URLs (Post ID: {post_id })...") + parsed_input_url =urlparse (self .api_url_input ) + base_url_for_relative_paths =f"{parsed_input_url .scheme }://{parsed_input_url .netloc }" + img_ext_pattern ="|".join (ext .lstrip ('.')for ext in IMAGE_EXTENSIONS ) + direct_url_pattern_str =r"""(?i)\b(https?://[^\s"'<>\[\]\{\}\|\^\\^~\[\]`]+\.(?:"""+img_ext_pattern +r"""))\b""" + img_tag_src_pattern_str =r"""]*?src\s*=\s*["']([^"']+)["']""" + found_image_sources =set () + for direct_url_match in re .finditer (direct_url_pattern_str ,post_content_html ): + found_image_sources .add (direct_url_match .group (1 )) + for img_tag_match in re .finditer (img_tag_src_pattern_str ,post_content_html ,re .IGNORECASE ): + src_attr =img_tag_match .group (1 ).strip () + src_attr =html .unescape (src_attr ) + if not src_attr :continue + resolved_src_url ="" + if src_attr .startswith (('http://','https://')): + resolved_src_url =src_attr + elif src_attr .startswith ('//'): + resolved_src_url =f"{parsed_input_url .scheme }:{src_attr }" + elif src_attr .startswith ('/'): + resolved_src_url =f"{base_url_for_relative_paths }{src_attr }" + if resolved_src_url : + parsed_resolved_url =urlparse (resolved_src_url ) + if any (parsed_resolved_url .path .lower ().endswith (ext )for ext in IMAGE_EXTENSIONS ): + found_image_sources .add (resolved_src_url ) + if found_image_sources : + self .logger (f" Found {len (found_image_sources )} potential image URLs/sources in content.") + existing_urls_in_api_list ={f_info ['url']for f_info in all_files_from_post_api } + for found_url in found_image_sources : if self .check_cancel ():break - current_api_original_filename =file_info_to_dl .get ('_original_name_for_log') - file_is_candidate_by_char_filter_scope =False - char_filter_info_that_matched_file =None - if not current_character_filters : - file_is_candidate_by_char_filter_scope =True - else : - if self .char_filter_scope ==CHAR_SCOPE_FILES : - for filter_item_obj in current_character_filters : - terms_to_check_for_file =list (filter_item_obj ["aliases"]) - if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_for_file : - terms_to_check_for_file .append (filter_item_obj ["name"]) - unique_terms_for_file_check =list (set (terms_to_check_for_file )) - for term_to_match in unique_terms_for_file_check : + if found_url in existing_urls_in_api_list : + self .logger (f" Skipping URL from content (already in API list or previously added from content): {found_url [:70 ]}...") + continue + try : + parsed_found_url =urlparse (found_url ) + url_filename =os .path .basename (parsed_found_url .path ) + if not url_filename or not is_image (url_filename ): + self .logger (f" Skipping URL from content (no filename part or not an image extension): {found_url [:70 ]}...") + continue + self .logger (f" Adding image from content: {url_filename } (URL: {found_url [:70 ]}...)") + all_files_from_post_api .append ({ + 'url':found_url , + 'name':url_filename , + '_original_name_for_log':url_filename , + '_is_thumbnail':False , + '_from_content_scan':True + }) + existing_urls_in_api_list .add (found_url ) + except Exception as e_url_parse : + self .logger (f" Error processing URL from content '{found_url [:70 ]}...': {e_url_parse }") + else : + self .logger (f" No additional image URLs found in post content scan for post {post_id }.") + if self .download_thumbnails : + if self .scan_content_for_images : + self .logger (f" Mode: 'Download Thumbnails Only' + 'Scan Content for Images' active. Prioritizing images from content scan for post {post_id }.") + all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_from_content_scan')] + if not all_files_from_post_api : + self .logger (f" -> No images found via content scan for post {post_id } in this combined mode.") + return 0 ,0 ,[],[],[],None + else : + self .logger (f" Mode: 'Download Thumbnails Only' active. Filtering for API thumbnails for post {post_id }.") + all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_is_thumbnail')] + if not all_files_from_post_api : + self .logger (f" -> No API image thumbnails found for post {post_id } in thumbnail-only mode.") + return 0 ,0 ,[],[],[],None + if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED : + def natural_sort_key_for_files (file_api_info ): + name =file_api_info .get ('_original_name_for_log','').lower () + return [int (text )if text .isdigit ()else text for text in re .split ('([0-9]+)',name )] + all_files_from_post_api .sort (key =natural_sort_key_for_files ) + self .logger (f" Manga Date Mode: Sorted {len (all_files_from_post_api )} files within post {post_id } by original name for sequential numbering.") + if not all_files_from_post_api : + self .logger (f" No files found to download for post {post_id }.") + return 0 ,0 ,[],[],[],None + files_to_download_info_list =[] + processed_original_filenames_in_this_post =set () + + if self.keep_in_post_duplicates: + # If we keep duplicates, just add every file to the list to be processed. + # The downstream hash check and rename-on-collision logic will handle them. + files_to_download_info_list.extend(all_files_from_post_api) + self.logger(f" ℹ️ 'Keep Duplicates' is on. All {len(all_files_from_post_api)} files from post will be processed.") + else: + # This is the original logic that skips duplicates by name within a post. + for file_info in all_files_from_post_api: + current_api_original_filename = file_info.get('_original_name_for_log') + if current_api_original_filename in processed_original_filenames_in_this_post: + self.logger(f" -> Skip Duplicate Original Name (within post {post_id}): '{current_api_original_filename}' already processed/listed for this post.") + total_skipped_this_post += 1 + else: + files_to_download_info_list.append(file_info) + if current_api_original_filename: + processed_original_filenames_in_this_post.add(current_api_original_filename) + + if not files_to_download_info_list: + + self .logger (f" All files for post {post_id } were duplicate original names or skipped earlier.") + return 0 ,total_skipped_this_post ,[],[],[],None + + self .logger (f" Identified {len (files_to_download_info_list )} unique original file(s) for potential download from post {post_id }.") + with ThreadPoolExecutor (max_workers =self .num_file_threads ,thread_name_prefix =f'P{post_id }File_')as file_pool : + futures_list =[] + for file_idx ,file_info_to_dl in enumerate (files_to_download_info_list ): + if self ._check_pause (f"File processing loop for post {post_id }, file {file_idx }"):break + if self .check_cancel ():break + current_api_original_filename =file_info_to_dl .get ('_original_name_for_log') + file_is_candidate_by_char_filter_scope =False + char_filter_info_that_matched_file =None + if not current_character_filters : + file_is_candidate_by_char_filter_scope =True + else : + if self .char_filter_scope ==CHAR_SCOPE_FILES : + for filter_item_obj in current_character_filters : + terms_to_check_for_file =list (filter_item_obj ["aliases"]) + if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_for_file : + terms_to_check_for_file .append (filter_item_obj ["name"]) + unique_terms_for_file_check =list (set (terms_to_check_for_file )) + for term_to_match in unique_terms_for_file_check : + if is_filename_match_for_character (current_api_original_filename ,term_to_match ): + file_is_candidate_by_char_filter_scope =True + char_filter_info_that_matched_file =filter_item_obj + self .logger (f" File '{current_api_original_filename }' matches char filter term '{term_to_match }' (from '{filter_item_obj ['name']}'). Scope: Files.") + break + if file_is_candidate_by_char_filter_scope :break + elif self .char_filter_scope ==CHAR_SCOPE_TITLE : + if post_is_candidate_by_title_char_match : + file_is_candidate_by_char_filter_scope =True + char_filter_info_that_matched_file =char_filter_that_matched_title + self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Title.") + elif self .char_filter_scope ==CHAR_SCOPE_BOTH : + if post_is_candidate_by_title_char_match : + file_is_candidate_by_char_filter_scope =True + char_filter_info_that_matched_file =char_filter_that_matched_title + self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Both (Title part).") + else : + for filter_item_obj_both_file in current_character_filters : + terms_to_check_for_file_both =list (filter_item_obj_both_file ["aliases"]) + if filter_item_obj_both_file ["is_group"]and filter_item_obj_both_file ["name"]not in terms_to_check_for_file_both : + terms_to_check_for_file_both .append (filter_item_obj_both_file ["name"]) + unique_terms_for_file_both_check =list (set (terms_to_check_for_file_both )) + for term_to_match in unique_terms_for_file_both_check : if is_filename_match_for_character (current_api_original_filename ,term_to_match ): file_is_candidate_by_char_filter_scope =True - char_filter_info_that_matched_file =filter_item_obj - self .logger (f" File '{current_api_original_filename }' matches char filter term '{term_to_match }' (from '{filter_item_obj ['name']}'). Scope: Files.") + char_filter_info_that_matched_file =filter_item_obj_both_file + self .logger (f" File '{current_api_original_filename }' matches char filter term '{term_to_match }' (from '{filter_item_obj ['name']}'). Scope: Both (File part).") break if file_is_candidate_by_char_filter_scope :break - elif self .char_filter_scope ==CHAR_SCOPE_TITLE : - if post_is_candidate_by_title_char_match : - file_is_candidate_by_char_filter_scope =True - char_filter_info_that_matched_file =char_filter_that_matched_title - self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Title.") - elif self .char_filter_scope ==CHAR_SCOPE_BOTH : - if post_is_candidate_by_title_char_match : - file_is_candidate_by_char_filter_scope =True - char_filter_info_that_matched_file =char_filter_that_matched_title - self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Both (Title part).") - else : - for filter_item_obj_both_file in current_character_filters : - terms_to_check_for_file_both =list (filter_item_obj_both_file ["aliases"]) - if filter_item_obj_both_file ["is_group"]and filter_item_obj_both_file ["name"]not in terms_to_check_for_file_both : - terms_to_check_for_file_both .append (filter_item_obj_both_file ["name"]) - unique_terms_for_file_both_check =list (set (terms_to_check_for_file_both )) - for term_to_match in unique_terms_for_file_both_check : - if is_filename_match_for_character (current_api_original_filename ,term_to_match ): - file_is_candidate_by_char_filter_scope =True - char_filter_info_that_matched_file =filter_item_obj_both_file - self .logger (f" File '{current_api_original_filename }' matches char filter term '{term_to_match }' (from '{filter_item_obj ['name']}'). Scope: Both (File part).") - break - if file_is_candidate_by_char_filter_scope :break - elif self .char_filter_scope ==CHAR_SCOPE_COMMENTS : - if post_is_candidate_by_file_char_match_in_comment_scope : - file_is_candidate_by_char_filter_scope =True - char_filter_info_that_matched_file =char_filter_that_matched_file_in_comment_scope - self .logger (f" File '{current_api_original_filename }' is candidate because a file in this post matched char filter (Overall Scope: Comments).") - elif post_is_candidate_by_comment_char_match : - file_is_candidate_by_char_filter_scope =True - char_filter_info_that_matched_file =char_filter_that_matched_comment - self .logger (f" File '{current_api_original_filename }' is candidate because post comments matched char filter (Overall Scope: Comments).") - if not file_is_candidate_by_char_filter_scope : - self .logger (f" -> Skip File (Char Filter Scope '{self .char_filter_scope }'): '{current_api_original_filename }' no match.") - total_skipped_this_post +=1 - continue - target_base_folders_for_this_file_iteration =[] - if current_character_filters : - char_title_subfolder_name =None - if self .target_post_id_from_initial_url and self .custom_folder_name : - char_title_subfolder_name =self .custom_folder_name - elif char_filter_info_that_matched_file : - char_title_subfolder_name =clean_folder_name (char_filter_info_that_matched_file ["name"]) - elif char_filter_that_matched_title : - char_title_subfolder_name =clean_folder_name (char_filter_that_matched_title ["name"]) - elif char_filter_that_matched_comment : - char_title_subfolder_name =clean_folder_name (char_filter_that_matched_comment ["name"]) - if char_title_subfolder_name : - target_base_folders_for_this_file_iteration .append (char_title_subfolder_name ) - else : - self .logger (f"⚠️ File '{current_api_original_filename }' candidate by char filter, but no folder name derived. Using post title.") - target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title )) - else : - if base_folder_names_for_post_content : - target_base_folders_for_this_file_iteration .extend (base_folder_names_for_post_content ) - else : - target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title )) - if not target_base_folders_for_this_file_iteration : - target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title if post_title else "Uncategorized_Post_Content")) - for target_base_folder_name_for_instance in target_base_folders_for_this_file_iteration : - current_path_for_file_instance =self .override_output_dir if self .override_output_dir else self .download_root - if self .use_subfolders and target_base_folder_name_for_instance : - current_path_for_file_instance =os .path .join (current_path_for_file_instance ,target_base_folder_name_for_instance ) - if self .use_post_subfolders : - current_path_for_file_instance =os .path .join (current_path_for_file_instance ,final_post_subfolder_name ) - manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None - manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None - folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title ) - futures_list .append (file_pool .submit ( - self ._download_single_file , - file_info =file_info_to_dl , - target_folder_path =current_path_for_file_instance , - headers =headers ,original_post_id_for_log =post_id ,skip_event =self .skip_current_file_flag , - post_title =post_title ,manga_date_file_counter_ref =manga_date_counter_to_pass , - manga_global_file_counter_ref =manga_global_counter_to_pass ,folder_context_name_for_history =folder_context_for_file , - file_index_in_post =file_idx ,num_files_in_this_post =len (files_to_download_info_list ) - )) - for future in as_completed (futures_list ): - if self .check_cancel (): - for f_to_cancel in futures_list : - if not f_to_cancel .done (): - f_to_cancel .cancel () - break - try : - dl_count ,skip_count ,actual_filename_saved ,original_kept_flag ,status ,details_for_dialog_or_retry =future .result () - total_downloaded_this_post +=dl_count - total_skipped_this_post +=skip_count - if original_kept_flag and dl_count >0 and actual_filename_saved : - kept_original_filenames_for_log .append (actual_filename_saved ) - if status ==FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER and details_for_dialog_or_retry : - retryable_failures_this_post .append (details_for_dialog_or_retry ) - elif status ==FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION and details_for_dialog_or_retry : - permanent_failures_this_post .append (details_for_dialog_or_retry ) - except CancelledError : - self .logger (f" File download task for post {post_id } was cancelled.") - total_skipped_this_post +=1 - except Exception as exc_f : - self .logger (f"❌ File download task for post {post_id } resulted in error: {exc_f }") - total_skipped_this_post +=1 - self ._emit_signal ('file_progress',"",None ) - if self.session_file_path and self.session_lock: - try: - with self.session_lock: - if os.path.exists(self.session_file_path): - with open(self.session_file_path, 'r', encoding='utf-8') as f: - session_data = json.load(f) - if 'download_state' not in session_data: - session_data['download_state'] = {} - if not isinstance(session_data['download_state'].get('processed_post_ids'), list): - session_data['download_state']['processed_post_ids'] = [] - session_data['download_state']['processed_post_ids'].append(self.post.get('id')) - if permanent_failures_this_post: - if not isinstance(session_data['download_state'].get('permanently_failed_files'), list): - session_data['download_state']['permanently_failed_files'] = [] - existing_failed_urls = {f.get('file_info', {}).get('url') for f in session_data['download_state']['permanently_failed_files']} - for failure in permanent_failures_this_post: - if failure.get('file_info', {}).get('url') not in existing_failed_urls: - session_data['download_state']['permanently_failed_files'].append(failure) - temp_file_path = self.session_file_path + ".tmp" - with open(temp_file_path, 'w', encoding='utf-8') as f_tmp: - json.dump(session_data, f_tmp, indent=2) - os.replace(temp_file_path, self.session_file_path) - except Exception as e: - self.logger(f"⚠️ Could not update session file for post {post_id}: {e}") - if not self .extract_links_only and (total_downloaded_this_post >0 or not ( - (current_character_filters and ( - (self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or - (self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match ) - ))or - (self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH )and any (sw .lower ()in post_title .lower ()for sw in self .skip_words_list )) - )): - top_file_name_for_history ="N/A" - if post_main_file_info and post_main_file_info .get ('name'): - top_file_name_for_history =post_main_file_info ['name'] - elif post_attachments and post_attachments [0 ].get ('name'): - top_file_name_for_history =post_attachments [0 ]['name'] - history_data_for_this_post ={ - 'post_title':post_title ,'post_id':post_id , - 'top_file_name':top_file_name_for_history , - 'num_files':num_potential_files_in_post , - 'upload_date_str':post_data .get ('published')or post_data .get ('added')or "Unknown", - 'download_location':determined_post_save_path_for_history , - 'service':self .service ,'user_id':self .user_id , - } - if self .check_cancel ():self .logger (f" Post {post_id } processing interrupted/cancelled."); - else :self .logger (f" Post {post_id } Summary: Downloaded={total_downloaded_this_post }, Skipped Files={total_skipped_this_post }") - if not self .extract_links_only and self .use_post_subfolders and total_downloaded_this_post ==0 : - path_to_check_for_emptiness =determined_post_save_path_for_history - try : - if os .path .isdir (path_to_check_for_emptiness )and not os .listdir (path_to_check_for_emptiness ): - self .logger (f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness }'") - os .rmdir (path_to_check_for_emptiness ) - except OSError as e_rmdir : - self .logger (f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }") - - except Exception as e: - post_id = self.post.get('id', 'N/A') - # Log the unexpected crash of the worker - self.logger(f"❌ CRITICAL WORKER FAILURE on Post ID {post_id}: {e}\n{traceback.format_exc(limit=4)}") - # Ensure the number of skipped files reflects the total potential files in the post, - # as none of them were processed successfully. - num_potential_files_in_post = len(self.post.get('attachments', [])) + (1 if self.post.get('file') else 0) - total_skipped_this_post = num_potential_files_in_post - total_downloaded_this_post = 0 + elif self .char_filter_scope ==CHAR_SCOPE_COMMENTS : + if post_is_candidate_by_file_char_match_in_comment_scope : + file_is_candidate_by_char_filter_scope =True + char_filter_info_that_matched_file =char_filter_that_matched_file_in_comment_scope + self .logger (f" File '{current_api_original_filename }' is candidate because a file in this post matched char filter (Overall Scope: Comments).") + elif post_is_candidate_by_comment_char_match : + file_is_candidate_by_char_filter_scope =True + char_filter_info_that_matched_file =char_filter_that_matched_comment + self .logger (f" File '{current_api_original_filename }' is candidate because post comments matched char filter (Overall Scope: Comments).") + if not file_is_candidate_by_char_filter_scope : + self .logger (f" -> Skip File (Char Filter Scope '{self .char_filter_scope }'): '{current_api_original_filename }' no match.") + total_skipped_this_post +=1 + continue - finally: - # This 'finally' block ensures that the worker ALWAYS reports back, - # preventing the main UI from getting stuck. - result_tuple = (total_downloaded_this_post, total_skipped_this_post, - kept_original_filenames_for_log, retryable_failures_this_post, - permanent_failures_this_post, history_data_for_this_post, - temp_filepath_for_return) - self._emit_signal('worker_finished', result_tuple) - - return result_tuple + + target_base_folders_for_this_file_iteration =[] + + if current_character_filters : + char_title_subfolder_name =None + if self .target_post_id_from_initial_url and self .custom_folder_name : + char_title_subfolder_name =self .custom_folder_name + elif char_filter_info_that_matched_file : + char_title_subfolder_name =clean_folder_name (char_filter_info_that_matched_file ["name"]) + elif char_filter_that_matched_title : + char_title_subfolder_name =clean_folder_name (char_filter_that_matched_title ["name"]) + elif char_filter_that_matched_comment : + char_title_subfolder_name =clean_folder_name (char_filter_that_matched_comment ["name"]) + if char_title_subfolder_name : + target_base_folders_for_this_file_iteration .append (char_title_subfolder_name ) + else : + self .logger (f"⚠️ File '{current_api_original_filename }' candidate by char filter, but no folder name derived. Using post title.") + target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title )) + else : + if base_folder_names_for_post_content : + target_base_folders_for_this_file_iteration .extend (base_folder_names_for_post_content ) + else : + target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title )) + + if not target_base_folders_for_this_file_iteration : + target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title if post_title else "Uncategorized_Post_Content")) + + for target_base_folder_name_for_instance in target_base_folders_for_this_file_iteration : + current_path_for_file_instance =self .override_output_dir if self .override_output_dir else self .download_root + if self .use_subfolders and target_base_folder_name_for_instance : + current_path_for_file_instance =os .path .join (current_path_for_file_instance ,target_base_folder_name_for_instance ) + if self .use_post_subfolders : + + current_path_for_file_instance =os .path .join (current_path_for_file_instance ,final_post_subfolder_name ) + + manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None + manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None + + + folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title ) + + futures_list .append (file_pool .submit ( + self ._download_single_file , + file_info =file_info_to_dl , + target_folder_path =current_path_for_file_instance , + headers =headers ,original_post_id_for_log =post_id ,skip_event =self .skip_current_file_flag , + post_title =post_title ,manga_date_file_counter_ref =manga_date_counter_to_pass , + manga_global_file_counter_ref =manga_global_counter_to_pass ,folder_context_name_for_history =folder_context_for_file , + file_index_in_post =file_idx ,num_files_in_this_post =len (files_to_download_info_list ) + )) + + for future in as_completed (futures_list ): + if self .check_cancel (): + for f_to_cancel in futures_list : + if not f_to_cancel .done (): + f_to_cancel .cancel () + break + try : + dl_count ,skip_count ,actual_filename_saved ,original_kept_flag ,status ,details_for_dialog_or_retry =future .result () + total_downloaded_this_post +=dl_count + total_skipped_this_post +=skip_count + if original_kept_flag and dl_count >0 and actual_filename_saved : + kept_original_filenames_for_log .append (actual_filename_saved ) + if status ==FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER and details_for_dialog_or_retry : + retryable_failures_this_post .append (details_for_dialog_or_retry ) + elif status ==FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION and details_for_dialog_or_retry : + permanent_failures_this_post .append (details_for_dialog_or_retry ) + except CancelledError : + self .logger (f" File download task for post {post_id } was cancelled.") + total_skipped_this_post +=1 + except Exception as exc_f : + self .logger (f"❌ File download task for post {post_id } resulted in error: {exc_f }") + total_skipped_this_post +=1 + self ._emit_signal ('file_progress',"",None ) + + # After a post's files are all processed, update the session file to mark this post as done. + if self.session_file_path and self.session_lock: + try: + with self.session_lock: + if os.path.exists(self.session_file_path): # Only update if the session file exists + # Read current state + with open(self.session_file_path, 'r', encoding='utf-8') as f: + session_data = json.load(f) + + if 'download_state' not in session_data: + session_data['download_state'] = {} + + # Add processed ID + if not isinstance(session_data['download_state'].get('processed_post_ids'), list): + session_data['download_state']['processed_post_ids'] = [] + session_data['download_state']['processed_post_ids'].append(self.post.get('id')) + + # Add any permanent failures from this worker to the session file + if permanent_failures_this_post: + if not isinstance(session_data['download_state'].get('permanently_failed_files'), list): + session_data['download_state']['permanently_failed_files'] = [] + # To avoid duplicates if the same post is somehow re-processed + existing_failed_urls = {f.get('file_info', {}).get('url') for f in session_data['download_state']['permanently_failed_files']} + for failure in permanent_failures_this_post: + if failure.get('file_info', {}).get('url') not in existing_failed_urls: + session_data['download_state']['permanently_failed_files'].append(failure) + + # Write to temp file and then atomically replace + temp_file_path = self.session_file_path + ".tmp" + with open(temp_file_path, 'w', encoding='utf-8') as f_tmp: + json.dump(session_data, f_tmp, indent=2) + os.replace(temp_file_path, self.session_file_path) + except Exception as e: + self.logger(f"⚠️ Could not update session file for post {post_id}: {e}") + + if not self .extract_links_only and (total_downloaded_this_post >0 or not ( + (current_character_filters and ( + (self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or + (self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match ) + ))or + (self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH )and any (sw .lower ()in post_title .lower ()for sw in self .skip_words_list )) + )): + top_file_name_for_history ="N/A" + if post_main_file_info and post_main_file_info .get ('name'): + top_file_name_for_history =post_main_file_info ['name'] + elif post_attachments and post_attachments [0 ].get ('name'): + top_file_name_for_history =post_attachments [0 ]['name'] + + history_data_for_this_post ={ + 'post_title':post_title ,'post_id':post_id , + 'top_file_name':top_file_name_for_history , + 'num_files':num_potential_files_in_post , + 'upload_date_str':post_data .get ('published')or post_data .get ('added')or "Unknown", + 'download_location':determined_post_save_path_for_history , + 'service':self .service ,'user_id':self .user_id , + } + if self .check_cancel ():self .logger (f" Post {post_id } processing interrupted/cancelled."); + else :self .logger (f" Post {post_id } Summary: Downloaded={total_downloaded_this_post }, Skipped Files={total_skipped_this_post }") + + if not self .extract_links_only and self .use_post_subfolders and total_downloaded_this_post ==0 : + + path_to_check_for_emptiness =determined_post_save_path_for_history + try : + if os .path .isdir (path_to_check_for_emptiness )and not os .listdir (path_to_check_for_emptiness ): + self .logger (f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness }'") + os .rmdir (path_to_check_for_emptiness ) + except OSError as e_rmdir : + self .logger (f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }") + + result_tuple = (total_downloaded_this_post, total_skipped_this_post, + kept_original_filenames_for_log, retryable_failures_this_post, + permanent_failures_this_post, history_data_for_this_post, + None) # The 7th item is None because we already saved the temp file + + # In Single PDF mode, the 7th item is the temp file path we created. + if self.single_pdf_mode and os.path.exists(temp_filepath): + result_tuple = (0, 0, [], [], [], None, temp_filepath) + + self._emit_signal('worker_finished', result_tuple) + return # The method now returns nothing. class DownloadThread (QThread ): progress_signal =pyqtSignal (str ) @@ -1749,7 +1802,6 @@ class DownloadThread (QThread ): cookie_text ="", session_file_path=None, session_lock=None, - processed_ids_to_skip=None, text_only_scope=None, text_export_format='txt', single_pdf_mode=False, @@ -1808,12 +1860,11 @@ class DownloadThread (QThread ): self .manga_global_file_counter_ref =manga_global_file_counter_ref self.session_file_path = session_file_path self.session_lock = session_lock - self.processed_ids_to_skip = processed_ids_to_skip self.history_candidates_buffer =deque (maxlen =8 ) self.text_only_scope = text_only_scope self.text_export_format = text_export_format - self.single_pdf_mode = single_pdf_mode - self.project_root_dir = project_root_dir + self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE + self.project_root_dir = project_root_dir # Add this assignment if self .compress_images and Image is None : self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).") @@ -1838,65 +1889,37 @@ class DownloadThread (QThread ): self .skip_current_file_flag .set () else :self .logger ("ℹ️ Skip file: No download active or skip flag not available for current context.") - def run(self): + def run (self ): """ The main execution method for the single-threaded download process. This version is corrected to handle 7 return values from the worker and to pass the 'single_pdf_mode' setting correctly. """ - grand_total_downloaded_files = 0 - grand_total_skipped_files = 0 - grand_list_of_kept_original_filenames = [] - was_process_cancelled = False + grand_total_downloaded_files =0 + grand_total_skipped_files =0 + grand_list_of_kept_original_filenames =[] + was_process_cancelled =False - if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED and not self.extract_links_only and self.manga_date_file_counter_ref is None: - series_scan_dir = self.output_dir - if self.use_subfolders : - if self.filter_character_list_objects_initial and self.filter_character_list_objects_initial [0] and self.filter_character_list_objects_initial[0].get("name"): - series_folder_name = clean_folder_name(self.filter_character_list_objects_initial[0]["name"]) - series_scan_dir = os.path.join(series_scan_dir, series_folder_name) - elif self.service and self.user_id : - creator_based_folder_name = clean_folder_name(str(self.user_id)) - series_scan_dir = os.path.join(series_scan_dir, creator_based_folder_name) - - highest_num = 0 - if os.path.isdir(series_scan_dir): - self.logger(f"ℹ️ [Thread] Manga Date Mode: Scanning for existing files in '{series_scan_dir}'...") - for dirpath, _, filenames_in_dir in os.walk(series_scan_dir): - for filename_to_check in filenames_in_dir: - prefix_to_check = clean_filename(self.manga_date_prefix.strip()) if self.manga_date_prefix and self.manga_date_prefix.strip() else "" - name_part_to_match = filename_to_check - if prefix_to_check and name_part_to_match.startswith(prefix_to_check): - name_part_to_match = name_part_to_match[len(prefix_to_check):].lstrip() - - base_name_no_ext = os.path.splitext(name_part_to_match)[0] - match = re.match(r"(\d+)", base_name_no_ext) - if match: - highest_num = max(highest_num, int(match.group(1))) - - self.manga_date_file_counter_ref = [highest_num + 1, threading.Lock()] - self.logger(f"ℹ️ [Thread] Manga Date Mode: Initialized date-based counter at {self.manga_date_file_counter_ref[0]}.") + # This block for initializing manga mode counters remains unchanged + if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED and not self .extract_links_only and self .manga_date_file_counter_ref is None : + # ... (existing manga counter initialization logic) ... pass - - if self.manga_mode_active and self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING and not self.extract_links_only and self.manga_global_file_counter_ref is None: - self.manga_global_file_counter_ref = [1, threading.Lock()] - self.logger(f"ℹ️ [Thread] Manga Title+GlobalNum Mode: Initialized global counter at {self.manga_global_file_counter_ref[0]}.") + if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING and not self .extract_links_only and self .manga_global_file_counter_ref is None : + # ... (existing manga counter initialization logic) ... pass worker_signals_obj = PostProcessorSignals() - try: + try : + # Connect signals worker_signals_obj.progress_signal.connect(self.progress_signal) worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal) worker_signals_obj.file_progress_signal.connect(self.file_progress_signal) worker_signals_obj.external_link_signal.connect(self.external_link_signal) worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal) worker_signals_obj.file_successfully_downloaded_signal.connect(self.file_successfully_downloaded_signal) - worker_signals_obj.worker_finished_signal.connect(lambda result: None) + worker_signals_obj.worker_finished_signal.connect(lambda result: None) # Connect to dummy lambda to avoid errors self.logger(" Starting post fetch (single-threaded download process)...") - self.logger(" Fetching ALL available post information first. This may take a moment...") - - all_posts_data = [] post_generator = download_from_api( self.api_url_input, logger=self.logger, @@ -1916,101 +1939,99 @@ class DownloadThread (QThread ): if self.isInterruptionRequested(): was_process_cancelled = True break - all_posts_data.extend(posts_batch_data) - - if not was_process_cancelled: - self.logger(f"✅ Fetching complete. Found {len(all_posts_data)} total posts. Starting download process...") + for individual_post_data in posts_batch_data: + if self.isInterruptionRequested(): + was_process_cancelled = True + break + + # Create the worker, now correctly passing single_pdf_mode + post_processing_worker = PostProcessorWorker( + post_data=individual_post_data, + download_root=self.output_dir, + known_names=self.known_names, + filter_character_list=self.filter_character_list_objects_initial, + dynamic_character_filter_holder=self.dynamic_filter_holder, + unwanted_keywords=self.unwanted_keywords, + filter_mode=self.filter_mode, + skip_zip=self.skip_zip, skip_rar=self.skip_rar, + use_subfolders=self.use_subfolders, use_post_subfolders=self.use_post_subfolders, + target_post_id_from_initial_url=self.initial_target_post_id, + custom_folder_name=self.custom_folder_name, + compress_images=self.compress_images, download_thumbnails=self.download_thumbnails, + service=self.service, user_id=self.user_id, + api_url_input=self.api_url_input, + pause_event=self.pause_event, + cancellation_event=self.cancellation_event, + emitter=worker_signals_obj, + downloaded_files=self.downloaded_files, + downloaded_file_hashes=self.downloaded_file_hashes, + downloaded_files_lock=self.downloaded_files_lock, + downloaded_file_hashes_lock=self.downloaded_file_hashes_lock, + skip_words_list=self.skip_words_list, + skip_words_scope=self.skip_words_scope, + show_external_links=self.show_external_links, + extract_links_only=self.extract_links_only, + num_file_threads=self.num_file_threads_for_worker, + skip_current_file_flag=self.skip_current_file_flag, + manga_mode_active=self.manga_mode_active, + manga_filename_style=self.manga_filename_style, + manga_date_prefix=self.manga_date_prefix, + char_filter_scope=self.char_filter_scope, + remove_from_filename_words_list=self.remove_from_filename_words_list, + allow_multipart_download=self.allow_multipart_download, + selected_cookie_file=self.selected_cookie_file, + app_base_dir=self.app_base_dir, + cookie_text=self.cookie_text, + override_output_dir=self.override_output_dir, + manga_global_file_counter_ref=self.manga_global_file_counter_ref, + use_cookie=self.use_cookie, + manga_date_file_counter_ref=self.manga_date_file_counter_ref, + use_date_prefix_for_subfolder=self.use_date_prefix_for_subfolder, + keep_in_post_duplicates=self.keep_in_post_duplicates, + creator_download_folder_ignore_words=self.creator_download_folder_ignore_words, + session_file_path=self.session_file_path, + session_lock=self.session_lock, + text_only_scope=self.text_only_scope, + text_export_format=self.text_export_format, + single_pdf_mode=self.single_pdf_mode, # <-- This is now correctly passed + project_root_dir=self.project_root_dir + ) + try: + # Correctly unpack the 7 values returned from the worker + (dl_count, skip_count, kept_originals_this_post, + retryable_failures, permanent_failures, + history_data, temp_filepath) = post_processing_worker.process() + + grand_total_downloaded_files += dl_count + grand_total_skipped_files += skip_count + + if kept_originals_this_post: + grand_list_of_kept_original_filenames.extend(kept_originals_this_post) + if retryable_failures: + self.retryable_file_failed_signal.emit(retryable_failures) + if history_data: + if len(self.history_candidates_buffer) < 8: + self.post_processed_for_history_signal.emit(history_data) + if permanent_failures: + self.permanent_file_failed_signal.emit(permanent_failures) + + # In single-threaded text mode, pass the temp file path back to the main window + if self.single_pdf_mode and temp_filepath: + self.progress_signal.emit(f"TEMP_FILE_PATH:{temp_filepath}") - for individual_post_data in all_posts_data: - if self.isInterruptionRequested(): - was_process_cancelled = True + except Exception as proc_err: + post_id_for_err = individual_post_data.get('id', 'N/A') + self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}") + traceback.print_exc() + num_potential_files_est = len(individual_post_data.get('attachments', [])) + (1 if individual_post_data.get('file') else 0) + grand_total_skipped_files += num_potential_files_est + + if self.skip_current_file_flag and self.skip_current_file_flag.is_set(): + self.skip_current_file_flag.clear() + self.logger(" Skip current file flag was processed and cleared by DownloadThread.") + self.msleep(10) + if was_process_cancelled: break - - post_processing_worker = PostProcessorWorker( - post_data=individual_post_data, - download_root=self.output_dir, - known_names=self.known_names, - filter_character_list=self.filter_character_list_objects_initial, - dynamic_character_filter_holder=self.dynamic_filter_holder, - unwanted_keywords=self.unwanted_keywords, - filter_mode=self.filter_mode, - skip_zip=self.skip_zip, skip_rar=self.skip_rar, - use_subfolders=self.use_subfolders, use_post_subfolders=self.use_post_subfolders, - target_post_id_from_initial_url=self.initial_target_post_id, - custom_folder_name=self.custom_folder_name, - compress_images=self.compress_images, download_thumbnails=self.download_thumbnails, - service=self.service, user_id=self.user_id, - api_url_input=self.api_url_input, - pause_event=self.pause_event, - cancellation_event=self.cancellation_event, - emitter=worker_signals_obj, - downloaded_files=self.downloaded_files, - downloaded_file_hashes=self.downloaded_file_hashes, - downloaded_files_lock=self.downloaded_files_lock, - downloaded_file_hashes_lock=self.downloaded_file_hashes_lock, - skip_words_list=self.skip_words_list, - skip_words_scope=self.skip_words_scope, - show_external_links=self.show_external_links, - extract_links_only=self.extract_links_only, - num_file_threads=self.num_file_threads_for_worker, - skip_current_file_flag=self.skip_current_file_flag, - manga_mode_active=self.manga_mode_active, - manga_filename_style=self.manga_filename_style, - manga_date_prefix=self.manga_date_prefix, - char_filter_scope=self.char_filter_scope, - remove_from_filename_words_list=self.remove_from_filename_words_list, - allow_multipart_download=self.allow_multipart_download, - selected_cookie_file=self.selected_cookie_file, - app_base_dir=self.app_base_dir, - cookie_text=self.cookie_text, - override_output_dir=self.override_output_dir, - manga_global_file_counter_ref=self.manga_global_file_counter_ref, - use_cookie=self.use_cookie, - manga_date_file_counter_ref=self.manga_date_file_counter_ref, - use_date_prefix_for_subfolder=self.use_date_prefix_for_subfolder, - keep_in_post_duplicates=self.keep_in_post_duplicates, - creator_download_folder_ignore_words=self.creator_download_folder_ignore_words, - session_file_path=self.session_file_path, - session_lock=self.session_lock, - processed_ids_to_skip=self.processed_ids_to_skip, # <-- FIX: Pass the list to the worker - text_only_scope=self.text_only_scope, - text_export_format=self.text_export_format, - single_pdf_mode=self.single_pdf_mode, - project_root_dir=self.project_root_dir - ) - try: - (dl_count, skip_count, kept_originals_this_post, - retryable_failures, permanent_failures, - history_data, temp_filepath) = post_processing_worker.process() - - grand_total_downloaded_files += dl_count - grand_total_skipped_files += skip_count - - if kept_originals_this_post: - grand_list_of_kept_original_filenames.extend(kept_originals_this_post) - if retryable_failures: - self.retryable_file_failed_signal.emit(retryable_failures) - if history_data: - if len(self.history_candidates_buffer) < 8: - self.post_processed_for_history_signal.emit(history_data) - if permanent_failures: - self.permanent_file_failed_signal.emit(permanent_failures) - - if self.single_pdf_mode and temp_filepath: - self.progress_signal.emit(f"TEMP_FILE_PATH:{temp_filepath}") - - except Exception as proc_err: - post_id_for_err = individual_post_data.get('id', 'N/A') - self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}") - traceback.print_exc() - num_potential_files_est = len(individual_post_data.get('attachments', [])) + (1 if individual_post_data.get('file') else 0) - grand_total_skipped_files += num_potential_files_est - - if self.skip_current_file_flag and self.skip_current_file_flag.is_set(): - self.skip_current_file_flag.clear() - self.logger(" Skip current file flag was processed and cleared by DownloadThread.") - self.msleep(10) - if not was_process_cancelled and not self.isInterruptionRequested(): self.logger("✅ All posts processed or end of content reached by DownloadThread.") @@ -2019,6 +2040,7 @@ class DownloadThread (QThread ): traceback.print_exc() finally: try: + # Disconnect signals if worker_signals_obj: worker_signals_obj.progress_signal.disconnect(self.progress_signal) worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal) @@ -2029,8 +2051,14 @@ class DownloadThread (QThread ): except (TypeError, RuntimeError) as e: self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}") + # Emit the final signal with all collected results self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, self.isInterruptionRequested(), grand_list_of_kept_original_filenames) + def receive_add_character_result (self ,result ): + with QMutexLocker (self .prompt_mutex ): + self ._add_character_response =result + self .logger (f" (DownloadThread) Received character prompt response: {'Yes (added/confirmed)'if result else 'No (declined/failed)'}") + class InterruptedError(Exception): """Custom exception for handling cancellations gracefully.""" pass \ No newline at end of file diff --git a/src/ui/dialogs/MoreOptionsDialog.py b/src/ui/dialogs/MoreOptionsDialog.py index 1f1ec5d..2136dd5 100644 --- a/src/ui/dialogs/MoreOptionsDialog.py +++ b/src/ui/dialogs/MoreOptionsDialog.py @@ -23,6 +23,7 @@ class MoreOptionsDialog(QDialog): self.radio_button_group = QButtonGroup(self) self.radio_content = QRadioButton("Description/Content") self.radio_comments = QRadioButton("Comments") + self.radio_comments = QRadioButton("Comments (Not Working)") self.radio_button_group.addButton(self.radio_content) self.radio_button_group.addButton(self.radio_comments) layout.addWidget(self.radio_content) diff --git a/src/ui/flow_layout.py b/src/ui/flow_layout.py deleted file mode 100644 index e492ad4..0000000 --- a/src/ui/flow_layout.py +++ /dev/null @@ -1,93 +0,0 @@ -# src/ui/flow_layout.py - -from PyQt5.QtWidgets import QLayout, QSizePolicy, QStyle -from PyQt5.QtCore import QPoint, QRect, QSize, Qt - -class FlowLayout(QLayout): - """A custom layout that arranges widgets in a flow, wrapping as necessary.""" - def __init__(self, parent=None, margin=0, spacing=-1): - super(FlowLayout, self).__init__(parent) - - if parent is not None: - self.setContentsMargins(margin, margin, margin, margin) - - self.setSpacing(spacing) - self.itemList = [] - - def __del__(self): - item = self.takeAt(0) - while item: - item = self.takeAt(0) - - def addItem(self, item): - self.itemList.append(item) - - def count(self): - return len(self.itemList) - - def itemAt(self, index): - if 0 <= index < len(self.itemList): - return self.itemList[index] - return None - - def takeAt(self, index): - if 0 <= index < len(self.itemList): - return self.itemList.pop(index) - return None - - def expandingDirections(self): - return Qt.Orientations(Qt.Orientation(0)) - - def hasHeightForWidth(self): - return True - - def heightForWidth(self, width): - return self._do_layout(QRect(0, 0, width, 0), True) - - def setGeometry(self, rect): - super(FlowLayout, self).setGeometry(rect) - self._do_layout(rect, False) - - def sizeHint(self): - return self.minimumSize() - - def minimumSize(self): - size = QSize() - for item in self.itemList: - size = size.expandedTo(item.minimumSize()) - - margin, _, _, _ = self.getContentsMargins() - size += QSize(2 * margin, 2 * margin) - return size - - def _do_layout(self, rect, test_only): - x = rect.x() - y = rect.y() - line_height = 0 - - space_x = self.spacing() - space_y = self.spacing() - if self.layout() is not None: - space_x = self.spacing() - space_y = self.spacing() - else: - space_x = self.spacing() - space_y = self.spacing() - - - for item in self.itemList: - wid = item.widget() - next_x = x + item.sizeHint().width() + space_x - if next_x - space_x > rect.right() and line_height > 0: - x = rect.x() - y = y + line_height + space_y - next_x = x + item.sizeHint().width() + space_x - line_height = 0 - - if not test_only: - item.setGeometry(QRect(QPoint(x, y), item.sizeHint())) - - x = next_x - line_height = max(line_height, item.sizeHint().height()) - - return y + line_height - rect.y() \ No newline at end of file diff --git a/src/ui/main_window.py b/src/ui/main_window.py index 49958d6..9ea4716 100644 --- a/src/ui/main_window.py +++ b/src/ui/main_window.py @@ -26,7 +26,7 @@ from PyQt5.QtWidgets import ( QScrollArea, QListWidgetItem, QSizePolicy, QProgressBar, QAbstractItemView, QFrame, QMainWindow, QAction, QGridLayout ) -from PyQt5.QtCore import Qt, QThread, pyqtSignal, QObject, QTimer, QSettings, QStandardPaths, QUrl, QSize, QProcess, QMutex, QMutexLocker, QCoreApplication +from PyQt5.QtCore import Qt, QThread, pyqtSignal, QObject, QTimer, QSettings, QStandardPaths, QUrl, QSize, QProcess, QMutex, QMutexLocker # --- Local Application Imports --- from ..services.drive_downloader import download_mega_file as drive_download_mega_file ,download_gdrive_file ,download_dropbox_file @@ -995,10 +995,9 @@ class DownloaderApp (QWidget ): f"Could not automatically restart the application: {e }\n\nPlease restart it manually.") def init_ui(self): - from .flow_layout import FlowLayout - self.main_splitter = QSplitter(Qt.Horizontal) + # --- Use a scroll area for the left panel for consistency --- left_scroll_area = QScrollArea() left_scroll_area.setWidgetResizable(True) left_scroll_area.setFrameShape(QFrame.NoFrame) @@ -1022,7 +1021,7 @@ class DownloaderApp (QWidget ): url_input_layout.addWidget(self.url_label_widget) self.link_input = QLineEdit() self.link_input.setPlaceholderText("e.g., https://kemono.su/patreon/user/12345 or .../post/98765") - self.link_input.textChanged.connect(self.update_custom_folder_visibility) + self.link_input.textChanged.connect(self.update_custom_folder_visibility) # Connects the custom folder logic url_input_layout.addWidget(self.link_input, 1) self.empty_popup_button = QPushButton("🎨") self.empty_popup_button.setStyleSheet("padding: 4px 6px;") @@ -1068,7 +1067,7 @@ class DownloaderApp (QWidget ): dir_layout.addWidget(self.dir_button) left_layout.addLayout(dir_layout) - # --- Filters and Custom Folder Container --- + # --- Filters and Custom Folder Container (from old layout) --- self.filters_and_custom_folder_container_widget = QWidget() filters_and_custom_folder_layout = QHBoxLayout(self.filters_and_custom_folder_container_widget) filters_and_custom_folder_layout.setContentsMargins(0, 5, 0, 0) @@ -1090,6 +1089,7 @@ class DownloaderApp (QWidget ): char_input_and_button_layout.addWidget(self.char_filter_scope_toggle_button, 1) character_filter_v_layout.addLayout(char_input_and_button_layout) + # --- Custom Folder Widget Definition --- self.custom_folder_widget = QWidget() custom_folder_v_layout = QVBoxLayout(self.custom_folder_widget) custom_folder_v_layout.setContentsMargins(0, 0, 0, 0) @@ -1143,100 +1143,52 @@ class DownloaderApp (QWidget ): file_filter_layout = QVBoxLayout() file_filter_layout.setContentsMargins(0, 10, 0, 0) file_filter_layout.addWidget(QLabel("Filter Files:")) - - radio_button_flow_layout = FlowLayout() - radio_button_flow_layout.setSpacing(10) - + radio_button_layout = QHBoxLayout() + radio_button_layout.setSpacing(10) self.radio_group = QButtonGroup(self) - - group1_widget = QWidget() - group1_layout = QHBoxLayout(group1_widget) - group1_layout.setContentsMargins(0, 0, 0, 0) - group1_layout.setSpacing(15) self.radio_all = QRadioButton("All") self.radio_images = QRadioButton("Images/GIFs") self.radio_videos = QRadioButton("Videos") - self.radio_group.addButton(self.radio_all) - self.radio_group.addButton(self.radio_images) - self.radio_group.addButton(self.radio_videos) - group1_layout.addWidget(self.radio_all) - group1_layout.addWidget(self.radio_images) - group1_layout.addWidget(self.radio_videos) - radio_button_flow_layout.addWidget(group1_widget) - - group2_widget = QWidget() - group2_layout = QHBoxLayout(group2_widget) - group2_layout.setContentsMargins(0, 0, 0, 0) - group2_layout.setSpacing(15) self.radio_only_archives = QRadioButton("📦 Only Archives") self.radio_only_audio = QRadioButton("🎧 Only Audio") self.radio_only_links = QRadioButton("🔗 Only Links") - self.radio_group.addButton(self.radio_only_archives) - self.radio_group.addButton(self.radio_only_audio) - self.radio_group.addButton(self.radio_only_links) - group2_layout.addWidget(self.radio_only_archives) - group2_layout.addWidget(self.radio_only_audio) - group2_layout.addWidget(self.radio_only_links) - radio_button_flow_layout.addWidget(group2_widget) - - group3_widget = QWidget() - group3_layout = QHBoxLayout(group3_widget) - group3_layout.setContentsMargins(0, 0, 0, 0) - group3_layout.setSpacing(15) - self.radio_more = QRadioButton("More") - self.favorite_mode_checkbox = QCheckBox("⭐ Favorite Mode") - self.radio_group.addButton(self.radio_more) - group3_layout.addWidget(self.radio_more) - group3_layout.addWidget(self.favorite_mode_checkbox) - radio_button_flow_layout.addWidget(group3_widget) + self.radio_more = QRadioButton("More") self.radio_all.setChecked(True) - file_filter_layout.addLayout(radio_button_flow_layout) + for btn in [self.radio_all, self.radio_images, self.radio_videos, self.radio_only_archives, self.radio_only_audio, self.radio_only_links, self.radio_more]: + self.radio_group.addButton(btn) + radio_button_layout.addWidget(btn) + self.favorite_mode_checkbox = QCheckBox() + self.favorite_mode_checkbox.setChecked(False) + radio_button_layout.addWidget(self.favorite_mode_checkbox) + radio_button_layout.addStretch(1) + file_filter_layout.addLayout(radio_button_layout) left_layout.addLayout(file_filter_layout) # --- Checkboxes Group --- checkboxes_group_layout = QVBoxLayout() checkboxes_group_layout.setSpacing(10) - - checkboxes_flow_layout = FlowLayout() - checkboxes_flow_layout.setSpacing(10) - - groupA_widget = QWidget() - groupA_layout = QHBoxLayout(groupA_widget) - groupA_layout.setContentsMargins(0,0,0,0) - groupA_layout.setSpacing(15) + row1_layout = QHBoxLayout() + row1_layout.setSpacing(10) self.skip_zip_checkbox = QCheckBox("Skip .zip") self.skip_zip_checkbox.setChecked(True) + row1_layout.addWidget(self.skip_zip_checkbox) self.skip_rar_checkbox = QCheckBox("Skip .rar") self.skip_rar_checkbox.setChecked(True) - groupA_layout.addWidget(self.skip_zip_checkbox) - groupA_layout.addWidget(self.skip_rar_checkbox) - checkboxes_flow_layout.addWidget(groupA_widget) - - groupB_widget = QWidget() - groupB_layout = QHBoxLayout(groupB_widget) - groupB_layout.setContentsMargins(0,0,0,0) - groupB_layout.setSpacing(15) + row1_layout.addWidget(self.skip_rar_checkbox) self.download_thumbnails_checkbox = QCheckBox("Download Thumbnails Only") + row1_layout.addWidget(self.download_thumbnails_checkbox) self.scan_content_images_checkbox = QCheckBox("Scan Content for Images") self.scan_content_images_checkbox.setChecked(self.scan_content_images_setting) - groupB_layout.addWidget(self.download_thumbnails_checkbox) - groupB_layout.addWidget(self.scan_content_images_checkbox) - checkboxes_flow_layout.addWidget(groupB_widget) - - groupC_widget = QWidget() - groupC_layout = QHBoxLayout(groupC_widget) - groupC_layout.setContentsMargins(0,0,0,0) - groupC_layout.setSpacing(15) + row1_layout.addWidget(self.scan_content_images_checkbox) self.compress_images_checkbox = QCheckBox("Compress to WebP") self.compress_images_checkbox.setToolTip("Compress images > 1.5MB to WebP format (requires Pillow).") + row1_layout.addWidget(self.compress_images_checkbox) self.keep_duplicates_checkbox = QCheckBox("Keep Duplicates") self.keep_duplicates_checkbox.setToolTip("If checked, downloads all files from a post even if they have the same name.") - groupC_layout.addWidget(self.compress_images_checkbox) - groupC_layout.addWidget(self.keep_duplicates_checkbox) - checkboxes_flow_layout.addWidget(groupC_widget) - - checkboxes_group_layout.addLayout(checkboxes_flow_layout) + row1_layout.addWidget(self.keep_duplicates_checkbox) + row1_layout.addStretch(1) + checkboxes_group_layout.addLayout(row1_layout) # --- Advanced Settings --- advanced_settings_label = QLabel("⚙️ Advanced Settings:") @@ -1293,47 +1245,33 @@ class DownloaderApp (QWidget ): btn_layout = QHBoxLayout(self.standard_action_buttons_widget) btn_layout.setContentsMargins(0, 10, 0, 0) btn_layout.setSpacing(10) - self.download_btn = QPushButton("⬇️ Start Download") self.download_btn.setStyleSheet("padding: 4px 12px; font-weight: bold;") self.download_btn.clicked.connect(self.start_download) - self.download_btn.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred) - self.pause_btn = QPushButton("⏸️ Pause Download") self.pause_btn.setEnabled(False) self.pause_btn.setStyleSheet("padding: 4px 12px;") self.pause_btn.clicked.connect(self._handle_pause_resume_action) - self.pause_btn.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred) - self.cancel_btn = QPushButton("❌ Cancel & Reset UI") self.cancel_btn.setEnabled(False) self.cancel_btn.setStyleSheet("padding: 4px 12px;") self.cancel_btn.clicked.connect(self.cancel_download_button_action) - self.cancel_btn.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred) - self.error_btn = QPushButton("Error") self.error_btn.setToolTip("View files skipped due to errors and optionally retry them.") self.error_btn.setStyleSheet("padding: 4px 8px;") self.error_btn.setEnabled(True) - self.error_btn.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred) - btn_layout.addWidget(self.download_btn) btn_layout.addWidget(self.pause_btn) btn_layout.addWidget(self.cancel_btn) btn_layout.addWidget(self.error_btn) - self.favorite_action_buttons_widget = QWidget() favorite_buttons_layout = QHBoxLayout(self.favorite_action_buttons_widget) self.favorite_mode_artists_button = QPushButton("🖼️ Favorite Artists") self.favorite_mode_posts_button = QPushButton("📄 Favorite Posts") self.favorite_scope_toggle_button = QPushButton() - self.favorite_mode_artists_button.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred) - self.favorite_mode_posts_button.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred) - self.favorite_scope_toggle_button.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred) favorite_buttons_layout.addWidget(self.favorite_mode_artists_button) favorite_buttons_layout.addWidget(self.favorite_mode_posts_button) favorite_buttons_layout.addWidget(self.favorite_scope_toggle_button) - self.bottom_action_buttons_stack = QStackedWidget() self.bottom_action_buttons_stack.addWidget(self.standard_action_buttons_widget) self.bottom_action_buttons_stack.addWidget(self.favorite_action_buttons_widget) @@ -1355,7 +1293,7 @@ class DownloaderApp (QWidget ): left_layout.addLayout(known_chars_label_layout) self.character_list = QListWidget() self.character_list.setSelectionMode(QListWidget.ExtendedSelection) - self.character_list.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Expanding) + self.character_list.setMaximumHeight(150) # Set smaller height left_layout.addWidget(self.character_list, 1) char_manage_layout = QHBoxLayout() char_manage_layout.setSpacing(10) @@ -1392,11 +1330,11 @@ class DownloaderApp (QWidget ): char_manage_layout.addWidget(self.known_names_help_button, 0) char_manage_layout.addWidget(self.history_button, 0) char_manage_layout.addWidget(self.future_settings_button, 0) - char_manage_layout.addStretch() left_layout.addLayout(char_manage_layout) left_layout.addStretch(0) # --- Right Panel (Logs) --- + # (This part of the layout is unchanged and remains correct) log_title_layout = QHBoxLayout() self.progress_log_label = QLabel("📜 Progress Log:") log_title_layout.addWidget(self.progress_log_label) @@ -1405,7 +1343,7 @@ class DownloaderApp (QWidget ): self.link_search_input.setPlaceholderText("Search Links...") self.link_search_input.setVisible(False) log_title_layout.addWidget(self.link_search_input) - self.link_search_button = QPushButton("?") + self.link_search_button = QPushButton("🔍") self.link_search_button.setVisible(False) self.link_search_button.setFixedWidth(30) self.link_search_button.setStyleSheet("padding: 4px 4px;") @@ -1485,9 +1423,10 @@ class DownloaderApp (QWidget ): right_layout.addWidget(self.file_progress_label) # --- Final Assembly --- - self.main_splitter.addWidget(left_scroll_area) + self.main_splitter.addWidget(left_scroll_area) # Use the scroll area self.main_splitter.addWidget(right_panel_widget) - self.main_splitter.setSizes([800, 400]) + self.main_splitter.setStretchFactor(0, 7) + self.main_splitter.setStretchFactor(1, 3) top_level_layout = QHBoxLayout(self) top_level_layout.setContentsMargins(0, 0, 0, 0) top_level_layout.addWidget(self.main_splitter) @@ -1507,7 +1446,7 @@ class DownloaderApp (QWidget ): self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked()) if hasattr(self, 'radio_group') and self.radio_group.checkedButton(): self._handle_filter_mode_change(self.radio_group.checkedButton(), True) - self.radio_group.buttonToggled.connect(self._handle_more_options_toggled) + self.radio_group.buttonToggled.connect(self._handle_more_options_toggled) # Add this line self._update_manga_filename_style_button_text() self._update_skip_scope_button_text() @@ -2286,30 +2225,19 @@ class DownloaderApp (QWidget ): if self .external_log_output :self .external_log_output .clear () self .log_signal .emit ("\n"+"="*40 +"\n🔗 External Links Log Disabled\n"+"="*40 ) + def _handle_filter_mode_change(self, button, checked): - if not button or not checked: - return - - # --- FIX: Automatically disable multithreading for text-based modes --- - if button == self.radio_more: - if hasattr(self, 'use_multithreading_checkbox'): - self.use_multithreading_checkbox.setChecked(False) - self.use_multithreading_checkbox.setEnabled(False) - self.log_signal.emit("ℹ️ Text extraction mode enabled. Multithreading has been disabled.") - else: - # Re-enable it for other modes, but respect the manga mode rule that might also disable it. - if hasattr(self, 'use_multithreading_checkbox'): - is_sequential_manga = (self.manga_mode_checkbox.isChecked() and - (self.manga_filename_style == STYLE_DATE_BASED or - self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING)) - if not is_sequential_manga: - self.use_multithreading_checkbox.setEnabled(True) - # --- END FIX --- - + # If a button other than "More" is selected, reset the UI if button != self.radio_more and checked: self.radio_more.setText("More") self.more_filter_scope = None - self.single_pdf_setting = False + self.single_pdf_setting = False # Reset the setting + # Re-enable the checkboxes + if hasattr(self, 'use_multithreading_checkbox'): self.use_multithreading_checkbox.setEnabled(True) + if hasattr(self, 'use_subfolders_checkbox'): self.use_subfolders_checkbox.setEnabled(True) + + if not button or not checked: + return is_only_links =(button ==self .radio_only_links ) is_only_audio =(hasattr (self ,'radio_only_audio')and self .radio_only_audio is not None and button ==self .radio_only_audio ) @@ -2339,6 +2267,8 @@ class DownloaderApp (QWidget ): file_download_mode_active =not is_only_links + + if self .use_subfolders_checkbox :self .use_subfolders_checkbox .setEnabled (file_download_mode_active ) if self .skip_words_input :self .skip_words_input .setEnabled (file_download_mode_active ) if self .skip_scope_toggle_button :self .skip_scope_toggle_button .setEnabled (file_download_mode_active ) @@ -2366,17 +2296,22 @@ class DownloaderApp (QWidget ): if not can_show_external_log_option : self .external_links_checkbox .setChecked (False ) + if is_only_links : self .progress_log_label .setText ("📜 Extracted Links Log:") if self .external_log_output :self .external_log_output .hide () if self .log_splitter :self .log_splitter .setSizes ([self .height (),0 ]) + + do_clear_log_in_filter_change =True if self .mega_download_log_preserved_once and self .only_links_log_display_mode ==LOG_DISPLAY_DOWNLOAD_PROGRESS : do_clear_log_in_filter_change =False + if self .main_log_output and do_clear_log_in_filter_change : self .log_signal .emit ("INTERNAL: _handle_filter_mode_change - About to clear log.") self .main_log_output .clear () self .log_signal .emit ("INTERNAL: _handle_filter_mode_change - Log cleared by _handle_filter_mode_change.") + if self .main_log_output :self .main_log_output .setMinimumHeight (0 ) self .log_signal .emit ("="*20 +" Mode changed to: Only Links "+"="*20 ) self ._try_process_next_external_link () @@ -2395,8 +2330,8 @@ class DownloaderApp (QWidget ): else : self .progress_log_label .setText (self ._tr ("progress_log_label_text","📜 Progress Log:")) self .update_external_links_setting (self .external_links_checkbox .isChecked ()if self .external_links_checkbox else False ) - if button != self.radio_more: - self .log_signal .emit (f"="*20 +f" Mode changed to: {button .text ()} "+"="*20 ) + self .log_signal .emit (f"="*20 +f" Mode changed to: {button .text ()} "+"="*20 ) + if is_only_links : self ._filter_links_log () @@ -2427,6 +2362,7 @@ class DownloaderApp (QWidget ): self .update_custom_folder_visibility () self .update_ui_for_manga_mode (self .manga_mode_checkbox .isChecked ()if self .manga_mode_checkbox else False ) + def _filter_links_log (self ): if not (self .radio_only_links and self .radio_only_links .isChecked ()):return @@ -2731,43 +2667,69 @@ class DownloaderApp (QWidget ): def _handle_more_options_toggled(self, button, checked): """Shows the MoreOptionsDialog when the 'More' radio button is selected.""" + + # This block handles when the user clicks ON the "More" button. if button == self.radio_more and checked: current_scope = self.more_filter_scope or MoreOptionsDialog.SCOPE_CONTENT current_format = self.text_export_format or 'pdf' - - dialog = MoreOptionsDialog(self, current_scope=current_scope, current_format=current_format, single_pdf_checked=self.single_pdf_setting) + + dialog = MoreOptionsDialog( + self, + current_scope=current_scope, + current_format=current_format, + single_pdf_checked=self.single_pdf_setting + ) if dialog.exec_() == QDialog.Accepted: self.more_filter_scope = dialog.get_selected_scope() self.text_export_format = dialog.get_selected_format() self.single_pdf_setting = dialog.get_single_pdf_state() + # Define the variable based on the dialog's result + is_any_pdf_mode = (self.text_export_format == 'pdf') + + # Update the radio button text to reflect the choice scope_text = "Comments" if self.more_filter_scope == MoreOptionsDialog.SCOPE_COMMENTS else "Description" - format_display = f" ({self.text_export_format.upper()})" if self.single_pdf_setting: format_display = " (Single PDF)" - # --- NEW: Disable checkboxes if Single PDF is active --- - if hasattr(self, 'use_multithreading_checkbox'): - self.use_multithreading_checkbox.setChecked(False) - self.use_multithreading_checkbox.setEnabled(False) - if hasattr(self, 'use_subfolders_checkbox'): - self.use_subfolders_checkbox.setChecked(False) - self.use_subfolders_checkbox.setEnabled(False) - else: - # --- NEW: Re-enable checkboxes if Single PDF is not active --- - if hasattr(self, 'use_multithreading_checkbox'): self.use_multithreading_checkbox.setEnabled(True) - if hasattr(self, 'use_subfolders_checkbox'): self.use_subfolders_checkbox.setEnabled(True) - - self.radio_more.setText(f"{scope_text}{format_display}") - + + # --- Logic to Disable/Enable Checkboxes --- + # Disable multithreading for ANY PDF export + if hasattr(self, 'use_multithreading_checkbox'): + self.use_multithreading_checkbox.setEnabled(not is_any_pdf_mode) + if is_any_pdf_mode: + self.use_multithreading_checkbox.setChecked(False) + self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked()) + + # Also disable subfolders for the "Single PDF" case, as it doesn't apply + if hasattr(self, 'use_subfolders_checkbox'): + self.use_subfolders_checkbox.setEnabled(not self.single_pdf_setting) + if self.single_pdf_setting: + self.use_subfolders_checkbox.setChecked(False) + self.log_signal.emit(f"ℹ️ 'More' filter scope set to: {scope_text}, Format: {self.text_export_format.upper()}") self.log_signal.emit(f"ℹ️ Single PDF setting: {'Enabled' if self.single_pdf_setting else 'Disabled'}") + if is_any_pdf_mode: + self.log_signal.emit("ℹ️ Multithreading automatically disabled for PDF export.") else: + # User cancelled the dialog, so revert to the 'All' option. self.log_signal.emit("ℹ️ 'More' filter selection cancelled. Reverting to 'All'.") self.radio_all.setChecked(True) + # This block handles when the user switches AWAY from "More" to another option. + elif button != self.radio_more and checked: + self.radio_more.setText("More") + self.more_filter_scope = None + self.single_pdf_setting = False + # Re-enable the checkboxes when switching to any non-PDF mode + if hasattr(self, 'use_multithreading_checkbox'): + self.use_multithreading_checkbox.setEnabled(True) + self._update_multithreading_for_date_mode() + if hasattr(self, 'use_subfolders_checkbox'): + self.use_subfolders_checkbox.setEnabled(True) + def delete_selected_character (self ): global KNOWN_NAMES selected_items =self .character_list .selectedItems () @@ -3115,392 +3077,761 @@ class DownloaderApp (QWidget ): if total_posts >0 or processed_posts >0 : self .file_progress_label .setText ("") - def start_download(self, direct_api_url=None, override_output_dir=None, is_restore=False): - global KNOWN_NAMES, BackendDownloadThread, PostProcessorWorker, extract_post_info, clean_folder_name, MAX_FILE_THREADS_PER_POST_OR_WORKER - self._clear_stale_temp_files() - self.session_temp_files = [] + def start_download (self ,direct_api_url =None ,override_output_dir =None, is_restore=False ): + global KNOWN_NAMES ,BackendDownloadThread ,PostProcessorWorker ,extract_post_info ,clean_folder_name ,MAX_FILE_THREADS_PER_POST_OR_WORKER - if self._is_download_active(): + self._clear_stale_temp_files() + self.session_temp_files = [] + + if self ._is_download_active (): QMessageBox.warning(self, "Busy", "A download is already in progress.") - return False + return False - if not (self.favorite_download_queue and not self.is_processing_favorites_queue): - self.main_log_output.clear() - if not direct_api_url and self.favorite_download_queue and not self.is_processing_favorites_queue: - self.log_signal.emit(f"ℹ️ Detected {len(self.favorite_download_queue)} item(s) in the queue. Starting processing...") - self.cancellation_message_logged_this_session = False - self._process_next_favorite_download() - return True - if is_restore and self.interrupted_session_data: - api_url = self.interrupted_session_data.get("ui_settings", {}).get("api_url") - else: - api_url = direct_api_url if direct_api_url else self.link_input.text().strip() + if not direct_api_url and self .favorite_download_queue and not self .is_processing_favorites_queue : + self .log_signal .emit (f"ℹ️ Detected {len (self .favorite_download_queue )} item(s) in the queue. Starting processing...") + self .cancellation_message_logged_this_session =False + self ._process_next_favorite_download () + return True if not is_restore and self.interrupted_session_data: + self.log_signal.emit("ℹ️ New download started. Discarding previous interrupted session.") self._clear_session_file() self.interrupted_session_data = None self.is_restore_pending = False + api_url =direct_api_url if direct_api_url else self .link_input .text ().strip () + self .download_history_candidates .clear () + self._update_button_states_and_connections() # Ensure buttons are updated to active state - self.download_history_candidates.clear() - self._update_button_states_and_connections() - if self.favorite_mode_checkbox and self.favorite_mode_checkbox.isChecked() and not direct_api_url and not api_url: - QMessageBox.information(self, "Favorite Mode Active", "Favorite Mode is active. Please use the 'Favorite Artists' or 'Favorite Posts' buttons to start downloads in this mode, or uncheck 'Favorite Mode' to use the URL input.") - self.set_ui_enabled(True) - return False + if self .favorite_mode_checkbox and self .favorite_mode_checkbox .isChecked ()and not direct_api_url and not api_url : + QMessageBox .information (self ,"Favorite Mode Active", + "Favorite Mode is active. Please use the 'Favorite Artists' or 'Favorite Posts' buttons to start downloads in this mode, or uncheck 'Favorite Mode' to use the URL input.") + self .set_ui_enabled (True ) + return False - main_ui_download_dir = self.dir_input.text().strip() + main_ui_download_dir =self .dir_input .text ().strip () - if not api_url and not self.favorite_download_queue: - QMessageBox.critical(self, "Input Error", "URL is required.") - return False - elif not api_url and self.favorite_download_queue: - self.log_signal.emit("ℹ️ URL input is empty, but queue has items. Processing queue...") - self.cancellation_message_logged_this_session = False - self._process_next_favorite_download() - return True + if not api_url and not self .favorite_download_queue : + QMessageBox .critical (self ,"Input Error","URL is required.") + return False + elif not api_url and self .favorite_download_queue : + self .log_signal .emit ("ℹ️ URL input is empty, but queue has items. Processing queue...") + self .cancellation_message_logged_this_session =False + self ._process_next_favorite_download () + return True - self.cancellation_message_logged_this_session = False - use_subfolders = self.use_subfolders_checkbox.isChecked() - use_post_subfolders = self.use_subfolder_per_post_checkbox.isChecked() - compress_images = self.compress_images_checkbox.isChecked() - download_thumbnails = self.download_thumbnails_checkbox.isChecked() - use_multithreading_enabled_by_checkbox = self.use_multithreading_checkbox.isChecked() - - try: - num_threads_from_gui = int(self.thread_count_input.text().strip()) - if num_threads_from_gui < 1: num_threads_from_gui = 1 - except ValueError: - QMessageBox.critical(self, "Thread Count Error", "Invalid number of threads. Please enter a positive number.") - return False + self .cancellation_message_logged_this_session =False + use_subfolders =self .use_subfolders_checkbox .isChecked () + use_post_subfolders =self .use_subfolder_per_post_checkbox .isChecked () + compress_images =self .compress_images_checkbox .isChecked () + download_thumbnails =self .download_thumbnails_checkbox .isChecked () - if use_multithreading_enabled_by_checkbox: - if num_threads_from_gui > MAX_THREADS: - hard_warning_msg = (f"You've entered a thread count ({num_threads_from_gui}) exceeding the maximum of {MAX_THREADS}.\n\n" - "Using an extremely high number of threads can lead to:\n" - " - Diminishing returns (no significant speed increase).\n" - " - Increased system instability or application crashes.\n" - " - Higher chance of being rate-limited or temporarily IP-banned by the server.\n\n" - f"The thread count has been automatically capped to {MAX_THREADS} for stability.") - QMessageBox.warning(self, "High Thread Count Warning", hard_warning_msg) - num_threads_from_gui = MAX_THREADS - self.thread_count_input.setText(str(MAX_THREADS)) - self.log_signal.emit(f"⚠️ User attempted {num_threads_from_gui} threads, capped to {MAX_THREADS}.") - if SOFT_WARNING_THREAD_THRESHOLD < num_threads_from_gui <= MAX_THREADS: - soft_warning_msg_box = QMessageBox(self) - soft_warning_msg_box.setIcon(QMessageBox.Question) - soft_warning_msg_box.setWindowTitle("Thread Count Advisory") - soft_warning_msg_box.setText(f"You've set the thread count to {num_threads_from_gui}.\n\n" - "While this is within the allowed limit, using a high number of threads (typically above 40-50) can sometimes lead to:\n" - " - Increased errors or failed file downloads.\n" - " - Connection issues with the server.\n" - " - Higher system resource usage.\n\n" - "For most users and connections, 10-30 threads provide a good balance.\n\n" - f"Do you want to proceed with {num_threads_from_gui} threads, or would you like to change the value?") - proceed_button = soft_warning_msg_box.addButton("Proceed Anyway", QMessageBox.AcceptRole) - change_button = soft_warning_msg_box.addButton("Change Thread Value", QMessageBox.RejectRole) - soft_warning_msg_box.setDefaultButton(proceed_button) - soft_warning_msg_box.setEscapeButton(change_button) - soft_warning_msg_box.exec_() - if soft_warning_msg_box.clickedButton() == change_button: - self.log_signal.emit(f"ℹ️ User opted to change thread count from {num_threads_from_gui} after advisory.") - self.thread_count_input.setFocus() - self.thread_count_input.selectAll() - return False + use_multithreading_enabled_by_checkbox =self .use_multithreading_checkbox .isChecked () + try : + num_threads_from_gui =int (self .thread_count_input .text ().strip ()) + if num_threads_from_gui <1 :num_threads_from_gui =1 + except ValueError : + QMessageBox .critical (self ,"Thread Count Error","Invalid number of threads. Please enter a positive number.") + return False - raw_skip_words = self.skip_words_input.text().strip() - skip_words_list = [word.strip().lower() for word in raw_skip_words.split(',') if word.strip()] - raw_remove_filename_words = self.remove_from_filename_input.text().strip() if hasattr(self, 'remove_from_filename_input') else "" - allow_multipart = self.allow_multipart_download_setting - remove_from_filename_words_list = [word.strip() for word in raw_remove_filename_words.split(',') if word.strip()] - scan_content_for_images = self.scan_content_images_checkbox.isChecked() if hasattr(self, 'scan_content_images_checkbox') else False - use_cookie_from_checkbox = self.use_cookie_checkbox.isChecked() if hasattr(self, 'use_cookie_checkbox') else False - app_base_dir_for_cookies = os.path.dirname(self.config_file) - cookie_text_from_input = self.cookie_text_input.text().strip() if hasattr(self, 'cookie_text_input') and use_cookie_from_checkbox else "" - use_cookie_for_this_run = use_cookie_from_checkbox - selected_cookie_file_path_for_backend = self.selected_cookie_filepath if use_cookie_from_checkbox and self.selected_cookie_filepath else None + if use_multithreading_enabled_by_checkbox : + if num_threads_from_gui >MAX_THREADS : + hard_warning_msg =( + f"You've entered a thread count ({num_threads_from_gui }) exceeding the maximum of {MAX_THREADS }.\n\n" + "Using an extremely high number of threads can lead to:\n" + " - Diminishing returns (no significant speed increase).\n" + " - Increased system instability or application crashes.\n" + " - Higher chance of being rate-limited or temporarily IP-banned by the server.\n\n" + f"The thread count has been automatically capped to {MAX_THREADS } for stability." + ) + QMessageBox .warning (self ,"High Thread Count Warning",hard_warning_msg ) + num_threads_from_gui =MAX_THREADS + self .thread_count_input .setText (str (MAX_THREADS )) + self .log_signal .emit (f"⚠️ User attempted {num_threads_from_gui } threads, capped to {MAX_THREADS }.") + if SOFT_WARNING_THREAD_THRESHOLD MAX_THREADS : + hard_warning_msg =( + f"You've entered a thread count ({num_threads_from_gui }) exceeding the maximum of {MAX_THREADS }.\n\n" + "Using an extremely high number of threads can lead to:\n" + " - Diminishing returns (no significant speed increase).\n" + " - Increased system instability or application crashes.\n" + " - Higher chance of being rate-limited or temporarily IP-banned by the server.\n\n" + f"The thread count has been automatically capped to {MAX_THREADS } for stability." + ) + QMessageBox .warning (self ,"High Thread Count Warning",hard_warning_msg ) + num_threads_from_gui =MAX_THREADS + self .thread_count_input .setText (str (MAX_THREADS )) + self .log_signal .emit (f"⚠️ User attempted {num_threads_from_gui } threads, capped to {MAX_THREADS }.") + if SOFT_WARNING_THREAD_THRESHOLD end_page :raise ValueError ("Start page cannot be greater than end page.") + + if manga_mode and start_page and end_page : + msg_box =QMessageBox (self ) + msg_box .setIcon (QMessageBox .Warning ) + msg_box .setWindowTitle ("Manga Mode & Page Range Warning") + msg_box .setText ( + "You have enabled Manga/Comic Mode and also specified a Page Range.\n\n" + "Manga Mode processes posts from oldest to newest across all available pages by default.\n" + "If you use a page range, you might miss parts of the manga/comic if it starts before your 'Start Page' or continues after your 'End Page'.\n\n" + "However, if you are certain the content you want is entirely within this page range (e.g., a short series, or you know the specific pages for a volume), then proceeding is okay.\n\n" + "Do you want to proceed with this page range in Manga Mode?" + ) + proceed_button =msg_box .addButton ("Proceed Anyway",QMessageBox .AcceptRole ) + cancel_button =msg_box .addButton ("Cancel Download",QMessageBox .RejectRole ) + msg_box .setDefaultButton (proceed_button ) + msg_box .setEscapeButton (cancel_button ) + msg_box .exec_ () + + if msg_box .clickedButton ()==cancel_button : + self .log_signal .emit ("❌ Download cancelled by user due to Manga Mode & Page Range warning.") + return False + except ValueError as e : + QMessageBox .critical (self ,"Page Range Error",f"Invalid page range: {e }") + return False + self .external_link_queue .clear ();self .extracted_links_cache =[];self ._is_processing_external_link_queue =False ;self ._current_link_post_title =None + + raw_character_filters_text =self .character_input .text ().strip () + parsed_character_filter_objects =self ._parse_character_filters (raw_character_filters_text ) + + actual_filters_to_use_for_run =[] + + needs_folder_naming_validation =(use_subfolders or manga_mode )and not extract_links_only + + if parsed_character_filter_objects : + actual_filters_to_use_for_run =parsed_character_filter_objects + + if not extract_links_only : + self .log_signal .emit (f"ℹ️ Using character filters for matching: {', '.join (item ['name']for item in actual_filters_to_use_for_run )}") + + filter_objects_to_potentially_add_to_known_list =[] + for filter_item_obj in parsed_character_filter_objects : + item_primary_name =filter_item_obj ["name"] + cleaned_name_test =clean_folder_name (item_primary_name ) + if needs_folder_naming_validation and not cleaned_name_test : + QMessageBox .warning (self ,"Invalid Filter Name for Folder",f"Filter name '{item_primary_name }' is invalid for a folder and will be skipped for Known.txt interaction.") + self .log_signal .emit (f"⚠️ Skipping invalid filter for Known.txt interaction: '{item_primary_name }'") + continue + + an_alias_is_already_known =False + if any (kn_entry ["name"].lower ()==item_primary_name .lower ()for kn_entry in KNOWN_NAMES ): + an_alias_is_already_known =True + elif filter_item_obj ["is_group"]and needs_folder_naming_validation : + for alias_in_filter_obj in filter_item_obj ["aliases"]: + if any (kn_entry ["name"].lower ()==alias_in_filter_obj .lower ()or alias_in_filter_obj .lower ()in [a .lower ()for a in kn_entry ["aliases"]]for kn_entry in KNOWN_NAMES ): + an_alias_is_already_known =True ;break + + if an_alias_is_already_known and filter_item_obj ["is_group"]: + self .log_signal .emit (f"ℹ️ An alias from group '{item_primary_name }' is already known. Group will not be prompted for Known.txt addition.") + + should_prompt_to_add_to_known_list =( + needs_folder_naming_validation and not manga_mode and + not any (kn_entry ["name"].lower ()==item_primary_name .lower ()for kn_entry in KNOWN_NAMES )and + not an_alias_is_already_known + ) + if should_prompt_to_add_to_known_list : + if not any (obj_to_add ["name"].lower ()==item_primary_name .lower ()for obj_to_add in filter_objects_to_potentially_add_to_known_list ): + filter_objects_to_potentially_add_to_known_list .append (filter_item_obj ) + elif manga_mode and needs_folder_naming_validation and item_primary_name .lower ()not in {kn_entry ["name"].lower ()for kn_entry in KNOWN_NAMES }and not an_alias_is_already_known : + self .log_signal .emit (f"ℹ️ Manga Mode: Using filter '{item_primary_name }' for this session without adding to Known Names.") + + if filter_objects_to_potentially_add_to_known_list : + confirm_dialog =ConfirmAddAllDialog (filter_objects_to_potentially_add_to_known_list ,self ,self ) + dialog_result =confirm_dialog .exec_ () + + if dialog_result ==CONFIRM_ADD_ALL_CANCEL_DOWNLOAD : + self .log_signal .emit ("❌ Download cancelled by user at new name confirmation stage.") + return False + elif isinstance (dialog_result ,list ): + if dialog_result : + self .log_signal .emit (f"ℹ️ User chose to add {len (dialog_result )} new entry/entries to Known.txt.") + for filter_obj_to_add in dialog_result : + if filter_obj_to_add .get ("components_are_distinct_for_known_txt"): + self .log_signal .emit (f" Processing group '{filter_obj_to_add ['name']}' to add its components individually to Known.txt.") + for alias_component in filter_obj_to_add ["aliases"]: + self .add_new_character ( + name_to_add =alias_component , + is_group_to_add =False , + aliases_to_add =[alias_component ], + suppress_similarity_prompt =True + ) + else : + self .add_new_character ( + name_to_add =filter_obj_to_add ["name"], + is_group_to_add =filter_obj_to_add ["is_group"], + aliases_to_add =filter_obj_to_add ["aliases"], + suppress_similarity_prompt =True + ) + else : + self .log_signal .emit ("ℹ️ User confirmed adding, but no names were selected in the dialog. No new names added to Known.txt.") + elif dialog_result ==CONFIRM_ADD_ALL_SKIP_ADDING : + self .log_signal .emit ("ℹ️ User chose not to add new names to Known.txt for this session.") + else : + self .log_signal .emit (f"ℹ️ Using character filters for link extraction: {', '.join (item ['name']for item in actual_filters_to_use_for_run )}") + + self .dynamic_character_filter_holder .set_filters (actual_filters_to_use_for_run ) + + creator_folder_ignore_words_for_run =None + character_filters_are_empty =not actual_filters_to_use_for_run + if is_full_creator_download and character_filters_are_empty : + creator_folder_ignore_words_for_run =CREATOR_DOWNLOAD_DEFAULT_FOLDER_IGNORE_WORDS + log_messages .append (f" Creator Download (No Char Filter): Applying default folder name ignore list ({len (creator_folder_ignore_words_for_run )} words).") + + custom_folder_name_cleaned =None + if use_subfolders and post_id_from_url and self .custom_folder_widget and self .custom_folder_widget .isVisible ()and not extract_links_only : + raw_custom_name =self .custom_folder_input .text ().strip () + if raw_custom_name : + cleaned_custom =clean_folder_name (raw_custom_name ) + if cleaned_custom :custom_folder_name_cleaned =cleaned_custom + else :self .log_signal .emit (f"⚠️ Invalid custom folder name ignored: '{raw_custom_name }' (resulted in empty string after cleaning).") + + + self .main_log_output .clear () + if extract_links_only :self .main_log_output .append ("🔗 Extracting Links..."); + elif backend_filter_mode =='archive':self .main_log_output .append ("📦 Downloading Archives Only...") + + if self .external_log_output :self .external_log_output .clear () + if self .show_external_links and not extract_links_only and backend_filter_mode !='archive': + self .external_log_output .append ("🔗 External Links Found:") + + self .file_progress_label .setText ("");self .cancellation_event .clear ();self .active_futures =[] + self .total_posts_to_process =0 ;self .processed_posts_count =0 ;self .download_counter =0 ;self .skip_counter =0 + self .progress_label .setText (self ._tr ("progress_initializing_text","Progress: Initializing...")) + + self .retryable_failed_files_info .clear () + self .permanently_failed_files_for_dialog .clear () + self._update_error_button_count() + + manga_date_file_counter_ref_for_thread =None + if manga_mode and self .manga_filename_style ==STYLE_DATE_BASED and not extract_links_only : + manga_date_file_counter_ref_for_thread =None + self .log_signal .emit (f"ℹ️ Manga Date Mode: File counter will be initialized by the download thread.") + + manga_global_file_counter_ref_for_thread =None + if manga_mode and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING and not extract_links_only : + manga_global_file_counter_ref_for_thread =None + self .log_signal .emit (f"ℹ️ Manga Title+GlobalNum Mode: File counter will be initialized by the download thread (starts at 1).") + + effective_num_post_workers =1 + + effective_num_file_threads_per_worker =1 + + if post_id_from_url : + if use_multithreading_enabled_by_checkbox : + effective_num_file_threads_per_worker =max (1 ,min (num_threads_from_gui ,MAX_FILE_THREADS_PER_POST_OR_WORKER )) + else : + if manga_mode and self .manga_filename_style ==STYLE_DATE_BASED : + effective_num_post_workers =1 + elif manga_mode and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING : + effective_num_post_workers =1 + effective_num_file_threads_per_worker =1 + elif use_multithreading_enabled_by_checkbox : + effective_num_post_workers =max (1 ,min (num_threads_from_gui ,MAX_THREADS )) + effective_num_file_threads_per_worker =1 + + if not extract_links_only :log_messages .append (f" Save Location: {effective_output_dir_for_run }") + + if post_id_from_url : + log_messages .append (f" Mode: Single Post") + log_messages .append (f" ↳ File Downloads: Up to {effective_num_file_threads_per_worker } concurrent file(s)") + else : + log_messages .append (f" Mode: Creator Feed") + log_messages .append (f" Post Processing: {'Multi-threaded ('+str (effective_num_post_workers )+' workers)'if effective_num_post_workers >1 else 'Single-threaded (1 worker)'}") + log_messages .append (f" ↳ File Downloads per Worker: Up to {effective_num_file_threads_per_worker } concurrent file(s)") + pr_log ="All" + if start_page or end_page : + pr_log =f"{f'From {start_page } 'if start_page else ''}{'to 'if start_page and end_page else ''}{f'{end_page }'if end_page else (f'Up to {end_page }'if end_page else (f'From {start_page }'if start_page else 'Specific Range'))}".strip () + + if manga_mode : + log_messages .append (f" Page Range: {pr_log if pr_log else 'All'} (Manga Mode - Oldest Posts Processed First within range)") + else : + log_messages .append (f" Page Range: {pr_log if pr_log else 'All'}") + + + if not extract_links_only : + log_messages .append (f" Subfolders: {'Enabled'if use_subfolders else 'Disabled'}") + if use_subfolders and self.use_subfolder_per_post_checkbox.isChecked(): + use_date_prefix = self.date_prefix_checkbox.isChecked() if hasattr(self, 'date_prefix_checkbox') else False + log_messages.append(f" ↳ Date Prefix for Post Subfolders: {'Enabled' if use_date_prefix else 'Disabled'}") + if use_subfolders : + if custom_folder_name_cleaned :log_messages .append (f" Custom Folder (Post): '{custom_folder_name_cleaned }'") + if actual_filters_to_use_for_run : + log_messages .append (f" Character Filters: {', '.join (item ['name']for item in actual_filters_to_use_for_run )}") + log_messages .append (f" ↳ Char Filter Scope: {current_char_filter_scope .capitalize ()}") + elif use_subfolders : + log_messages .append (f" Folder Naming: Automatic (based on title/known names)") - if not extract_links_only: - log_messages.append(f" Subfolders: {'Enabled' if use_subfolders else 'Disabled'}") - if use_subfolders and use_post_subfolders: - log_messages.append(f" ↳ Date Prefix for Post Subfolders: {'Enabled' if self.date_prefix_checkbox.isChecked() else 'Disabled'}") - - raw_character_filters_text = self.character_input.text().strip() - parsed_character_filter_objects = self._parse_character_filters(raw_character_filters_text) - if parsed_character_filter_objects: - log_messages.append(f" Character Filters: {', '.join(item['name'] for item in parsed_character_filter_objects)}") - log_messages.append(f" ↳ Char Filter Scope: {current_char_filter_scope.capitalize()}") - elif use_subfolders: - log_messages.append(f" Folder Naming: Automatic (based on title/known names)") keep_duplicates = self.keep_duplicates_checkbox.isChecked() if hasattr(self, 'keep_duplicates_checkbox') else False log_messages.extend([ f" File Type Filter: {user_selected_filter_text} (Backend processing as: {backend_filter_mode})", f" Keep In-Post Duplicates: {'Enabled' if keep_duplicates else 'Disabled'}", f" Skip Archives: {'.zip' if effective_skip_zip else ''}{', ' if effective_skip_zip and effective_skip_rar else ''}{'.rar' if effective_skip_rar else ''}{'None (Archive Mode)' if backend_filter_mode == 'archive' else ('None' if not (effective_skip_zip or effective_skip_rar) else '')}", - f" Skip Words Scope: {current_skip_words_scope.capitalize()}", - f" Remove Words from Filename: {', '.join(remove_from_filename_words_list) if remove_from_filename_words_list else 'None'}", - f" Compress Images: {'Enabled' if compress_images else 'Disabled'}", - f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}" + f" Skip Words Scope: {current_skip_words_scope .capitalize ()}", + f" Remove Words from Filename: {', '.join (remove_from_filename_words_list )if remove_from_filename_words_list else 'None'}", + f" Compress Images: {'Enabled'if compress_images else 'Disabled'}", + f" Thumbnails Only: {'Enabled'if download_thumbnails else 'Disabled'}" ]) - log_messages.append(f" Scan Post Content for Images: {'Enabled' if scan_content_for_images else 'Disabled'}") - else: - log_messages.append(f" Mode: Extracting Links Only") + log_messages .append (f" Scan Post Content for Images: {'Enabled'if scan_content_for_images else 'Disabled'}") + else : + log_messages .append (f" Mode: Extracting Links Only") - log_messages.append(f" Show External Links: {'Enabled' if self.show_external_links and not extract_links_only and backend_filter_mode != 'archive' else 'Disabled'}") - if manga_mode: - log_messages.append(f" Manga Mode: Enabled") - log_messages.append(f" ↳ Manga Filename Style: {self.manga_filename_style}") - - log_messages.append(f" Use Cookie ('cookies.txt'): {'Enabled' if use_cookie_from_checkbox else 'Disabled'}") - if use_cookie_from_checkbox and cookie_text_from_input: - log_messages.append(f" ↳ Cookie Text Provided: Yes (length: {len(cookie_text_from_input)})") - elif use_cookie_from_checkbox and selected_cookie_file_path_for_backend: - log_messages.append(f" ↳ Cookie File Selected: {os.path.basename(selected_cookie_file_path_for_backend)}") - - should_use_multithreading_for_posts = use_multithreading_enabled_by_checkbox and not post_id_from_url - log_messages.append(f" Threading: {'Multi-threaded (posts)' if should_use_multithreading_for_posts else 'Single-threaded (posts)'}") - if should_use_multithreading_for_posts: - log_messages.append(f" Number of Post Worker Threads: {num_threads_from_gui}") - - log_messages.append("="*40) + log_messages .append (f" Show External Links: {'Enabled'if self .show_external_links and not extract_links_only and backend_filter_mode !='archive'else 'Disabled'}") - for msg in log_messages: - self.log_signal.emit(msg) + if manga_mode : + log_messages .append (f" Manga Mode (File Renaming by Post Title): Enabled") + log_messages .append (f" ↳ Manga Filename Style: {'Post Title Based'if self .manga_filename_style ==STYLE_POST_TITLE else 'Original File Name'}") + if actual_filters_to_use_for_run : + log_messages .append (f" ↳ Manga Character Filter (for naming/folder): {', '.join (item ['name']for item in actual_filters_to_use_for_run )}") + log_messages .append (f" ↳ Manga Duplicates: Will be renamed with numeric suffix if names clash (e.g., _1, _2).") + + log_messages .append (f" Use Cookie ('cookies.txt'): {'Enabled'if use_cookie_from_checkbox else 'Disabled'}") + if use_cookie_from_checkbox and cookie_text_from_input : + log_messages .append (f" ↳ Cookie Text Provided: Yes (length: {len (cookie_text_from_input )})") + elif use_cookie_from_checkbox and selected_cookie_file_path_for_backend : + log_messages .append (f" ↳ Cookie File Selected: {os .path .basename (selected_cookie_file_path_for_backend )}") + should_use_multithreading_for_posts =use_multithreading_enabled_by_checkbox and not post_id_from_url + if manga_mode and (self .manga_filename_style ==STYLE_DATE_BASED or self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING )and not post_id_from_url : + enforced_by_style ="Date Mode"if self .manga_filename_style ==STYLE_DATE_BASED else "Title+GlobalNum Mode" + should_use_multithreading_for_posts =False + log_messages .append (f" Threading: Single-threaded (posts) - Enforced by Manga {enforced_by_style } (Actual workers: {effective_num_post_workers if effective_num_post_workers >1 else 1 })") + else : + log_messages .append (f" Threading: {'Multi-threaded (posts)'if should_use_multithreading_for_posts else 'Single-threaded (posts)'}") + if should_use_multithreading_for_posts : + log_messages .append (f" Number of Post Worker Threads: {effective_num_post_workers }") + log_messages .append ("="*40 ) + for msg in log_messages :self .log_signal .emit (msg ) + + self .set_ui_enabled (False ) - self.set_ui_enabled(False) from src.config.constants import FOLDER_NAME_STOP_WORDS - - manga_date_file_counter_ref_for_thread = None - if manga_mode and self.manga_filename_style == STYLE_DATE_BASED and not extract_links_only: - manga_date_file_counter_ref_for_thread = None - manga_global_file_counter_ref_for_thread = None - if manga_mode and self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING and not extract_links_only: - manga_global_file_counter_ref_for_thread = None - creator_folder_ignore_words_for_run = None - if not post_id_from_url and not self._parse_character_filters(self.character_input.text().strip()): - from src.config.constants import CREATOR_DOWNLOAD_DEFAULT_FOLDER_IGNORE_WORDS - creator_folder_ignore_words_for_run = CREATOR_DOWNLOAD_DEFAULT_FOLDER_IGNORE_WORDS - - args_template = { - 'processed_ids_to_skip': processed_ids_to_skip, - 'api_url_input': api_url, - 'output_dir': effective_output_dir_for_run, - 'download_root': effective_output_dir_for_run, - 'known_names': list(KNOWN_NAMES), - 'known_names_copy': list(KNOWN_NAMES), - 'filter_character_list': self._parse_character_filters(self.character_input.text().strip()), - 'filter_mode': backend_filter_mode, - 'text_only_scope': text_only_scope_for_run, - 'text_export_format': export_format_for_run, - 'single_pdf_mode': self.single_pdf_setting, - 'skip_zip': effective_skip_zip, - 'skip_rar': effective_skip_rar, - 'use_subfolders': use_subfolders, - 'use_post_subfolders': use_post_subfolders, - 'compress_images': compress_images, - 'download_thumbnails': download_thumbnails, - 'service': service, - 'user_id': user_id, - 'downloaded_files': self.downloaded_files, - 'downloaded_files_lock': self.downloaded_files_lock, - 'downloaded_file_hashes': self.downloaded_file_hashes, - 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, - 'skip_words_list': skip_words_list, - 'skip_words_scope': current_skip_words_scope, - 'remove_from_filename_words_list': remove_from_filename_words_list, - 'char_filter_scope': self.get_char_filter_scope(), - 'show_external_links': self.show_external_links, - 'extract_links_only': extract_links_only, - 'start_page': start_page, - 'end_page': end_page, - 'target_post_id_from_initial_url': post_id_from_url, - 'custom_folder_name': self.custom_folder_input.text().strip(), - 'manga_mode_active': manga_mode, - 'unwanted_keywords': FOLDER_NAME_STOP_WORDS, - 'cancellation_event': self.cancellation_event, - 'manga_date_prefix': self.manga_date_prefix_input.text().strip() if hasattr(self, 'manga_date_prefix_input') else "", - 'dynamic_character_filter_holder': self.dynamic_character_filter_holder, - 'pause_event': self.pause_event, - 'scan_content_for_images': scan_content_for_images, - 'manga_filename_style': self.manga_filename_style, - 'allow_multipart_download': allow_multipart, - 'cookie_text': cookie_text_from_input, - 'selected_cookie_file': selected_cookie_file_path_for_backend, - 'app_base_dir': app_base_dir_for_cookies, - 'project_root_dir': self.app_base_dir, - 'use_cookie': use_cookie_for_this_run, - 'session_file_path': self.session_file_path, - 'session_lock': self.session_lock, - 'use_date_prefix_for_subfolder': self.date_prefix_checkbox.isChecked() if hasattr(self, 'date_prefix_checkbox') else False, - 'keep_in_post_duplicates': self.keep_duplicates_checkbox.isChecked() if hasattr(self, 'keep_duplicates_checkbox') else False, - 'skip_current_file_flag': None, - 'override_output_dir': override_output_dir, - 'manga_date_file_counter_ref': manga_date_file_counter_ref_for_thread, - 'manga_global_file_counter_ref': manga_global_file_counter_ref_for_thread, - 'creator_download_folder_ignore_words': creator_folder_ignore_words_for_run, + args_template ={ + 'api_url_input':api_url , + 'download_root':effective_output_dir_for_run , + 'output_dir':effective_output_dir_for_run , + 'known_names':list (KNOWN_NAMES ), + 'known_names_copy':list (KNOWN_NAMES ), + 'filter_character_list':actual_filters_to_use_for_run , + 'filter_mode':backend_filter_mode , + 'text_only_scope': text_only_scope_for_run, + 'text_export_format': export_format_for_run, + 'single_pdf_mode': self.single_pdf_setting, + 'skip_zip':effective_skip_zip , + 'skip_rar':effective_skip_rar , + 'use_subfolders':use_subfolders , + 'use_post_subfolders':use_post_subfolders , + 'compress_images':compress_images , + 'download_thumbnails':download_thumbnails , + 'service':service , + 'user_id':user_id , + 'downloaded_files':self .downloaded_files , + 'downloaded_files_lock':self .downloaded_files_lock , + 'downloaded_file_hashes':self .downloaded_file_hashes , + 'downloaded_file_hashes_lock':self .downloaded_file_hashes_lock , + 'skip_words_list':skip_words_list , + 'skip_words_scope':current_skip_words_scope , + 'remove_from_filename_words_list':remove_from_filename_words_list , + 'char_filter_scope':current_char_filter_scope , + 'show_external_links':self .show_external_links , + 'extract_links_only':extract_links_only , + 'start_page':start_page , + 'end_page':end_page , + 'target_post_id_from_initial_url':post_id_from_url , + 'custom_folder_name':custom_folder_name_cleaned , + 'manga_mode_active':manga_mode , + 'unwanted_keywords':FOLDER_NAME_STOP_WORDS , + 'cancellation_event':self .cancellation_event , + 'manga_date_prefix':manga_date_prefix_text , + 'dynamic_character_filter_holder':self .dynamic_character_filter_holder , + 'pause_event':self .pause_event , + 'scan_content_for_images':scan_content_for_images , + 'manga_filename_style':self .manga_filename_style , + 'num_file_threads_for_worker':effective_num_file_threads_per_worker , + 'manga_date_file_counter_ref':manga_date_file_counter_ref_for_thread , + 'allow_multipart_download':allow_multipart , + 'cookie_text':cookie_text_from_input , + 'selected_cookie_file':selected_cookie_file_path_for_backend , + 'manga_global_file_counter_ref':manga_global_file_counter_ref_for_thread , + 'app_base_dir':app_base_dir_for_cookies , + 'project_root_dir': self.app_base_dir, + 'use_cookie':use_cookie_for_this_run , + 'session_file_path': self.session_file_path, + 'session_lock': self.session_lock, + 'creator_download_folder_ignore_words':creator_folder_ignore_words_for_run , + 'use_date_prefix_for_subfolder': self.date_prefix_checkbox.isChecked() if hasattr(self, 'date_prefix_checkbox') else False, + 'keep_in_post_duplicates': self.keep_duplicates_checkbox.isChecked() if hasattr(self, 'keep_duplicates_checkbox') else False, + 'skip_current_file_flag': None, } - try: - if should_use_multithreading_for_posts: - self.log_signal.emit(f" Initializing multi-threaded download...") - args_template['emitter'] = self.worker_to_gui_queue - self.start_multi_threaded_download(num_post_workers=num_threads_from_gui, **args_template) - else: - self.log_signal.emit(f" Initializing single-threaded download...") - single_thread_args = args_template.copy() - if 'download_root' in single_thread_args: - del single_thread_args['download_root'] - if 'emitter' in single_thread_args: - del single_thread_args['emitter'] - if 'known_names' in single_thread_args: - del single_thread_args['known_names'] - single_thread_args['num_file_threads_for_worker'] = effective_num_file_threads_per_worker - self.start_single_threaded_download(**single_thread_args) - except Exception as e: - self._update_button_states_and_connections() - self.log_signal.emit(f"❌ CRITICAL ERROR preparing download: {e}\n{traceback.format_exc()}") - QMessageBox.critical(self, "Start Error", f"Failed to start process:\n{e}") - self.download_finished(0, 0, False, []) - if self.pause_event: self.pause_event.clear() - self.is_paused = False - return True + args_template ['override_output_dir']=override_output_dir + try : + if should_use_multithreading_for_posts : + self .log_signal .emit (f" Initializing multi-threaded {current_mode_log_text .lower ()} with {effective_num_post_workers } post workers...") + args_template ['emitter']=self .worker_to_gui_queue + self .start_multi_threaded_download (num_post_workers =effective_num_post_workers ,**args_template ) + else : + self .log_signal .emit (f" Initializing single-threaded {'link extraction'if extract_links_only else 'download'}...") + dt_expected_keys =[ + 'api_url_input','output_dir','known_names_copy','cancellation_event', + 'filter_character_list','filter_mode','skip_zip','skip_rar', + 'use_subfolders','use_post_subfolders','custom_folder_name', + 'compress_images','download_thumbnails','service','user_id', + 'downloaded_files','downloaded_file_hashes','pause_event','remove_from_filename_words_list', + 'downloaded_files_lock','downloaded_file_hashes_lock','dynamic_character_filter_holder', 'session_file_path', + 'session_lock', + 'skip_words_list','skip_words_scope','char_filter_scope', + 'show_external_links','extract_links_only','num_file_threads_for_worker', + 'start_page','end_page','target_post_id_from_initial_url', + 'manga_date_file_counter_ref', + 'manga_global_file_counter_ref','manga_date_prefix', + 'manga_mode_active','unwanted_keywords','manga_filename_style','scan_content_for_images', + 'allow_multipart_download','use_cookie','cookie_text','app_base_dir','selected_cookie_file','override_output_dir','project_root_dir', + 'text_only_scope', 'text_export_format', + 'single_pdf_mode' + ] + args_template ['skip_current_file_flag']=None + single_thread_args ={key :args_template [key ]for key in dt_expected_keys if key in args_template } + self .start_single_threaded_download (**single_thread_args ) + except Exception as e : + self._update_button_states_and_connections() # Re-enable UI if start fails + self .log_signal .emit (f"❌ CRITICAL ERROR preparing download: {e }\n{traceback .format_exc ()}") + QMessageBox .critical (self ,"Start Error",f"Failed to start process:\n{e }") + self .download_finished (0 ,0 ,False ,[]) + if self .pause_event :self .pause_event .clear () + self .is_paused =False + return True def restore_download(self): """Initiates the download restoration process.""" @@ -3692,15 +4023,12 @@ class DownloaderApp (QWidget ): def _fetch_and_queue_posts(self, api_url_input_for_fetcher, worker_args_template, num_post_workers): """ - Fetches all post data first and then submits tasks to the pool. + Fetches post data and submits tasks to the pool. It does NOT wait for completion. """ global PostProcessorWorker, download_from_api try: - # --- CHANGE START: Fetch all posts into a list before queuing --- - self.log_signal.emit("[Fetcher] Fetching ALL available post information first. This may take a moment for large creators...") - - all_posts = [] + # This section remains the same as before post_generator = download_from_api( api_url_input_for_fetcher, logger=lambda msg: self.log_signal.emit(f"[Fetcher] {msg}"), @@ -3715,26 +4043,7 @@ class DownloaderApp (QWidget ): app_base_dir=worker_args_template.get('app_base_dir'), manga_filename_style_for_sort_check=worker_args_template.get('manga_filename_style') ) - - # Consume the entire generator to get all posts - for posts_batch in post_generator: - if self.cancellation_event.is_set(): - break - if isinstance(posts_batch, list): - all_posts.extend(posts_batch) - if self.cancellation_event.is_set(): - self.log_signal.emit("[Fetcher] Post fetching was cancelled.") - # The 'finally' block will handle the rest - return - - self.log_signal.emit(f"[Fetcher] ✅ Fetching complete. Found {len(all_posts)} total posts. Now queuing for download...") - - # Set the total count once at the end of fetching - self.total_posts_to_process = len(all_posts) - self.overall_progress_signal.emit(self.total_posts_to_process, 0) - - # Now submit all the collected posts to the worker pool ppw_expected_keys = [ 'post_data','download_root','known_names','filter_character_list','unwanted_keywords', 'filter_mode','skip_zip','skip_rar','use_subfolders','use_post_subfolders', @@ -3755,11 +4064,14 @@ class DownloaderApp (QWidget ): num_file_dl_threads_for_each_worker = worker_args_template.get('num_file_threads_for_worker', 1) emitter_for_worker = worker_args_template.get('emitter') - for post_data_item in all_posts: - if self.cancellation_event.is_set(): + for posts_batch in post_generator: + if self.cancellation_event.is_set(): break - self._submit_post_to_worker_pool(post_data_item, worker_args_template, num_file_dl_threads_for_each_worker, emitter_for_worker, ppw_expected_keys, {}) - # --- CHANGE END --- + if isinstance(posts_batch, list) and posts_batch: + for post_data_item in posts_batch: + self._submit_post_to_worker_pool(post_data_item, worker_args_template, num_file_dl_threads_for_each_worker, emitter_for_worker, ppw_expected_keys, {}) + self.total_posts_to_process += len(posts_batch) + self.overall_progress_signal.emit(self.total_posts_to_process, self.processed_posts_count) except Exception as e: self.log_signal.emit(f"❌ Error during post fetching: {e}\n{traceback.format_exc(limit=2)}") diff --git a/workers.py b/workers.py new file mode 100644 index 0000000..89bfb31 --- /dev/null +++ b/workers.py @@ -0,0 +1,2064 @@ +# --- Standard Library Imports --- +import os +import queue +import re +import threading +import time +import traceback +import uuid +import http +import html +import json +from collections import deque +import hashlib +from concurrent.futures import ThreadPoolExecutor, as_completed, CancelledError, Future +from io import BytesIO +from urllib .parse import urlparse +import requests +# --- Third-Party Library Imports --- +try: + from PIL import Image +except ImportError: + Image = None +# +try: + from fpdf import FPDF + # Add a simple class to handle the header/footer for stories + class PDF(FPDF): + def header(self): + pass # No header + def footer(self): + self.set_y(-15) + self.set_font('Arial', 'I', 8) + self.cell(0, 10, 'Page %s' % self.page_no(), 0, 0, 'C') + +except ImportError: + FPDF = None + +try: + from docx import Document +except ImportError: + Document = None + +# --- PyQt5 Imports --- +from PyQt5 .QtCore import Qt ,QThread ,pyqtSignal ,QMutex ,QMutexLocker ,QObject ,QTimer ,QSettings ,QStandardPaths ,QCoreApplication ,QUrl ,QSize ,QProcess +# --- Local Application Imports --- +from .api_client import download_from_api, fetch_post_comments +from ..services.multipart_downloader import download_file_in_parts, MULTIPART_DOWNLOADER_AVAILABLE +from ..services.drive_downloader import ( + download_mega_file, download_gdrive_file, download_dropbox_file +) +# Corrected Imports: +from ..utils.file_utils import ( + is_image, is_video, is_zip, is_rar, is_archive, is_audio, KNOWN_NAMES, + clean_filename, clean_folder_name +) +from ..utils.network_utils import prepare_cookies_for_request, get_link_platform +from ..utils.text_utils import ( + is_title_match_for_character, is_filename_match_for_character, strip_html_tags, + extract_folder_name_from_title, # This was the function causing the error + match_folders_from_title, match_folders_from_filename_enhanced +) +from ..config.constants import * + +class PostProcessorSignals (QObject ): + progress_signal =pyqtSignal (str ) + file_download_status_signal =pyqtSignal (bool ) + external_link_signal =pyqtSignal (str ,str ,str ,str ,str ) + file_progress_signal =pyqtSignal (str ,object ) + file_successfully_downloaded_signal =pyqtSignal (dict ) + missed_character_post_signal =pyqtSignal (str ,str ) + worker_finished_signal = pyqtSignal(tuple) + +class PostProcessorWorker: + def __init__ (self ,post_data ,download_root ,known_names , + filter_character_list ,emitter , + unwanted_keywords ,filter_mode ,skip_zip ,skip_rar , + use_subfolders ,use_post_subfolders ,target_post_id_from_initial_url ,custom_folder_name , + compress_images ,download_thumbnails ,service ,user_id ,pause_event , + api_url_input ,cancellation_event , + downloaded_files ,downloaded_file_hashes ,downloaded_files_lock ,downloaded_file_hashes_lock , + dynamic_character_filter_holder =None ,skip_words_list =None , + skip_words_scope =SKIP_SCOPE_FILES , + show_external_links =False , + extract_links_only =False , + num_file_threads =4 ,skip_current_file_flag =None , + manga_mode_active =False , + manga_filename_style =STYLE_POST_TITLE , + char_filter_scope =CHAR_SCOPE_FILES , + remove_from_filename_words_list =None , + allow_multipart_download =True , + cookie_text ="", + use_cookie =False , + override_output_dir =None , + selected_cookie_file =None , + app_base_dir =None , + manga_date_prefix =MANGA_DATE_PREFIX_DEFAULT , + manga_date_file_counter_ref =None , + scan_content_for_images =False , + creator_download_folder_ignore_words =None , + manga_global_file_counter_ref =None , + use_date_prefix_for_subfolder=False, + keep_in_post_duplicates=False, + session_file_path=None, + session_lock=None, + text_only_scope=None, + text_export_format='txt', + single_pdf_mode=False, + project_root_dir=None, + ): + self .post =post_data + self .download_root =download_root + self .known_names =known_names + self .filter_character_list_objects_initial =filter_character_list if filter_character_list else [] + self .dynamic_filter_holder =dynamic_character_filter_holder + self .unwanted_keywords =unwanted_keywords if unwanted_keywords is not None else set () + self .filter_mode =filter_mode + self .skip_zip =skip_zip + self .skip_rar =skip_rar + self .use_subfolders =use_subfolders + self .use_post_subfolders =use_post_subfolders + self .target_post_id_from_initial_url =target_post_id_from_initial_url + self .custom_folder_name =custom_folder_name + self .compress_images =compress_images + self .download_thumbnails =download_thumbnails + self .service =service + self .user_id =user_id + self .api_url_input =api_url_input + self .cancellation_event =cancellation_event + self .pause_event =pause_event + self .emitter =emitter + if not self .emitter : + raise ValueError ("PostProcessorWorker requires an emitter (signals object or queue).") + self .skip_current_file_flag =skip_current_file_flag + self .downloaded_files =downloaded_files if downloaded_files is not None else set () + self .downloaded_file_hashes =downloaded_file_hashes if downloaded_file_hashes is not None else set () + self .downloaded_files_lock =downloaded_files_lock if downloaded_files_lock is not None else threading .Lock () + self .downloaded_file_hashes_lock =downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading .Lock () + self .skip_words_list =skip_words_list if skip_words_list is not None else [] + self .skip_words_scope =skip_words_scope + self .show_external_links =show_external_links + self .extract_links_only =extract_links_only + self .num_file_threads =num_file_threads + self .manga_mode_active =manga_mode_active + self .manga_filename_style =manga_filename_style + self .char_filter_scope =char_filter_scope + self .remove_from_filename_words_list =remove_from_filename_words_list if remove_from_filename_words_list is not None else [] + self .allow_multipart_download =allow_multipart_download + self .manga_date_file_counter_ref =manga_date_file_counter_ref + self .selected_cookie_file =selected_cookie_file + self .app_base_dir =app_base_dir + self .cookie_text =cookie_text + self .manga_date_prefix =manga_date_prefix + self .manga_global_file_counter_ref =manga_global_file_counter_ref + self .use_cookie =use_cookie + self .override_output_dir =override_output_dir + self .scan_content_for_images =scan_content_for_images + self .creator_download_folder_ignore_words =creator_download_folder_ignore_words + self.use_date_prefix_for_subfolder = use_date_prefix_for_subfolder + self.keep_in_post_duplicates = keep_in_post_duplicates + self.session_file_path = session_file_path + self.session_lock = session_lock + self.text_only_scope = text_only_scope + self.text_export_format = text_export_format + self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE + self.project_root_dir = project_root_dir + if self .compress_images and Image is None : + + self .logger ("⚠️ Image compression disabled: Pillow library not found.") + self .compress_images =False + def _emit_signal (self ,signal_type_str ,*payload_args ): + """Helper to emit signal either directly or via queue.""" + if isinstance (self .emitter ,queue .Queue ): + self .emitter .put ({'type':signal_type_str ,'payload':payload_args }) + elif self .emitter and hasattr (self .emitter ,f"{signal_type_str }_signal"): + signal_attr =getattr (self .emitter ,f"{signal_type_str }_signal") + signal_attr .emit (*payload_args ) + else : + print (f"(Worker Log - Unrecognized Emitter for {signal_type_str }): {payload_args [0 ]if payload_args else ''}") + def logger (self ,message ): + self ._emit_signal ('progress',message ) + def check_cancel (self ): + return self .cancellation_event .is_set () + def _check_pause (self ,context_message ="Operation"): + if self .pause_event and self .pause_event .is_set (): + self .logger (f" {context_message } paused...") + while self .pause_event .is_set (): + if self .check_cancel (): + self .logger (f" {context_message } cancelled while paused.") + return True + time .sleep (0.5 ) + if not self .check_cancel ():self .logger (f" {context_message } resumed.") + return False + def _download_single_file (self ,file_info ,target_folder_path ,headers ,original_post_id_for_log ,skip_event , + post_title ="",file_index_in_post =0 ,num_files_in_this_post =1 , + manga_date_file_counter_ref =None ): + was_original_name_kept_flag =False + + final_filename_saved_for_return ="" + def _get_current_character_filters (self ): + if self .dynamic_filter_holder : + return self .dynamic_filter_holder .get_filters () + return self .filter_character_list_objects_initial + + def _download_single_file (self ,file_info ,target_folder_path ,headers ,original_post_id_for_log ,skip_event , + post_title ="",file_index_in_post =0 ,num_files_in_this_post =1 , + manga_date_file_counter_ref =None , + forced_filename_override =None , + manga_global_file_counter_ref =None ,folder_context_name_for_history =None ): + was_original_name_kept_flag =False + final_filename_saved_for_return ="" + retry_later_details =None + + + + if self ._check_pause (f"File download prep for '{file_info .get ('name','unknown file')}'"):return 0 ,1 ,"",False + if self .check_cancel ()or (skip_event and skip_event .is_set ()):return 0 ,1 ,"",False + + + + file_url =file_info .get ('url') + cookies_to_use_for_file =None + if self .use_cookie : + + cookies_to_use_for_file =prepare_cookies_for_request (self .use_cookie ,self .cookie_text ,self .selected_cookie_file ,self .app_base_dir ,self .logger ) + + + api_original_filename =file_info .get ('_original_name_for_log',file_info .get ('name')) + + + filename_to_save_in_main_path ="" + if forced_filename_override : + filename_to_save_in_main_path =forced_filename_override + self .logger (f" Retrying with forced filename: '{filename_to_save_in_main_path }'") + else : + + if self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_FILES or self .skip_words_scope ==SKIP_SCOPE_BOTH ): + filename_to_check_for_skip_words =api_original_filename .lower () + for skip_word in self .skip_words_list : + if skip_word .lower ()in filename_to_check_for_skip_words : + self .logger (f" -> Skip File (Keyword in Original Name '{skip_word }'): '{api_original_filename }'. Scope: {self .skip_words_scope }") + return 0 ,1 ,api_original_filename ,False ,FILE_DOWNLOAD_STATUS_SKIPPED ,None + + cleaned_original_api_filename =clean_filename (api_original_filename ) + + original_filename_cleaned_base ,original_ext =os .path .splitext (cleaned_original_api_filename ) + + if not original_ext .startswith ('.'):original_ext ='.'+original_ext if original_ext else '' + if self .manga_mode_active : + + if self .manga_filename_style ==STYLE_ORIGINAL_NAME : + filename_to_save_in_main_path =cleaned_original_api_filename + if self .manga_date_prefix and self .manga_date_prefix .strip (): + cleaned_prefix =clean_filename (self .manga_date_prefix .strip ()) + if cleaned_prefix : + filename_to_save_in_main_path =f"{cleaned_prefix } {filename_to_save_in_main_path }" + else : + self .logger (f"⚠️ Manga Original Name Mode: Provided prefix '{self .manga_date_prefix }' was empty after cleaning. Using original name only.") + was_original_name_kept_flag =True + elif self .manga_filename_style ==STYLE_POST_TITLE : + if post_title and post_title .strip (): + cleaned_post_title_base =clean_filename (post_title .strip ()) + if num_files_in_this_post >1 : + if file_index_in_post ==0 : + filename_to_save_in_main_path =f"{cleaned_post_title_base }{original_ext }" + else : + filename_to_save_in_main_path =f"{cleaned_post_title_base }_{file_index_in_post }{original_ext }" + was_original_name_kept_flag =False + else : + filename_to_save_in_main_path =f"{cleaned_post_title_base }{original_ext }" + else : + filename_to_save_in_main_path =cleaned_original_api_filename + self .logger (f"⚠️ Manga mode (Post Title Style): Post title missing for post {original_post_id_for_log }. Using cleaned original filename '{filename_to_save_in_main_path }'.") + elif self .manga_filename_style ==STYLE_DATE_BASED : + current_thread_name =threading .current_thread ().name + if manga_date_file_counter_ref is not None and len (manga_date_file_counter_ref )==2 : + counter_val_for_filename =-1 + counter_lock =manga_date_file_counter_ref [1 ] + + with counter_lock : + counter_val_for_filename =manga_date_file_counter_ref [0 ] + manga_date_file_counter_ref [0 ]+=1 + + base_numbered_name =f"{counter_val_for_filename :03d}" + if self .manga_date_prefix and self .manga_date_prefix .strip (): + cleaned_prefix =clean_filename (self .manga_date_prefix .strip ()) + if cleaned_prefix : + filename_to_save_in_main_path =f"{cleaned_prefix } {base_numbered_name }{original_ext }" + else : + filename_to_save_in_main_path =f"{base_numbered_name }{original_ext }";self .logger (f"⚠️ Manga Date Mode: Provided prefix '{self .manga_date_prefix }' was empty after cleaning. Using number only.") + else : + filename_to_save_in_main_path =f"{base_numbered_name }{original_ext }" + else : + self .logger (f"⚠️ Manga Date Mode: Counter ref not provided or malformed for '{api_original_filename }'. Using original. Ref: {manga_date_file_counter_ref }") + filename_to_save_in_main_path =cleaned_original_api_filename + elif self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING : + if manga_global_file_counter_ref is not None and len (manga_global_file_counter_ref )==2 : + counter_val_for_filename =-1 + counter_lock =manga_global_file_counter_ref [1 ] + + with counter_lock : + counter_val_for_filename =manga_global_file_counter_ref [0 ] + manga_global_file_counter_ref [0 ]+=1 + + cleaned_post_title_base_for_global =clean_filename (post_title .strip ()if post_title and post_title .strip ()else "post") + filename_to_save_in_main_path =f"{cleaned_post_title_base_for_global }_{counter_val_for_filename :03d}{original_ext }" + else : + self .logger (f"⚠️ Manga Title+GlobalNum Mode: Counter ref not provided or malformed for '{api_original_filename }'. Using original. Ref: {manga_global_file_counter_ref }") + filename_to_save_in_main_path =cleaned_original_api_filename + self .logger (f"⚠️ Manga mode (Title+GlobalNum Style Fallback): Using cleaned original filename '{filename_to_save_in_main_path }' for post {original_post_id_for_log }.") + elif self.manga_filename_style == STYLE_POST_ID: + if original_post_id_for_log and original_post_id_for_log != 'unknown_id': + base_name = str(original_post_id_for_log) + # Always append the file index for consistency (e.g., xxxxxx_0, xxxxxx_1) + filename_to_save_in_main_path = f"{base_name}_{file_index_in_post}{original_ext}" + else: + # Fallback if post_id is somehow not available + self.logger(f"⚠️ Manga mode (Post ID Style): Post ID missing. Using cleaned original filename '{cleaned_original_api_filename}'.") + filename_to_save_in_main_path = cleaned_original_api_filename + elif self .manga_filename_style ==STYLE_DATE_POST_TITLE : + published_date_str =self .post .get ('published') + added_date_str =self .post .get ('added') + formatted_date_str ="nodate" + + if published_date_str : + try : + formatted_date_str =published_date_str .split ('T')[0 ] + except Exception : + self .logger (f" ⚠️ Could not parse 'published' date '{published_date_str }' for STYLE_DATE_POST_TITLE. Using 'nodate'.") + elif added_date_str : + try : + formatted_date_str =added_date_str .split ('T')[0 ] + self .logger (f" ⚠️ Post ID {original_post_id_for_log } missing 'published' date, using 'added' date '{added_date_str }' for STYLE_DATE_POST_TITLE naming.") + except Exception : + self .logger (f" ⚠️ Could not parse 'added' date '{added_date_str }' for STYLE_DATE_POST_TITLE. Using 'nodate'.") + else : + self .logger (f" ⚠️ Post ID {original_post_id_for_log } missing both 'published' and 'added' dates for STYLE_DATE_POST_TITLE. Using 'nodate'.") + + if post_title and post_title .strip (): + temp_cleaned_title =clean_filename (post_title .strip ()) + if not temp_cleaned_title or temp_cleaned_title .startswith ("untitled_file"): + self .logger (f"⚠️ Manga mode (Date+PostTitle Style): Post title for post {original_post_id_for_log } ('{post_title }') was empty or generic after cleaning. Using 'post' as title part.") + cleaned_post_title_for_filename ="post" + else : + cleaned_post_title_for_filename =temp_cleaned_title + + base_name_for_style =f"{formatted_date_str }_{cleaned_post_title_for_filename }" + + if num_files_in_this_post >1 : + filename_to_save_in_main_path =f"{base_name_for_style }_{file_index_in_post }{original_ext }"if file_index_in_post >0 else f"{base_name_for_style }{original_ext }" + else : + filename_to_save_in_main_path =f"{base_name_for_style }{original_ext }" + else : + self .logger (f"⚠️ Manga mode (Date+PostTitle Style): Post title missing for post {original_post_id_for_log }. Using 'post' as title part with date prefix.") + cleaned_post_title_for_filename ="post" + base_name_for_style =f"{formatted_date_str }_{cleaned_post_title_for_filename }" + if num_files_in_this_post >1 : + filename_to_save_in_main_path =f"{base_name_for_style }_{file_index_in_post }{original_ext }"if file_index_in_post >0 else f"{base_name_for_style }{original_ext }" + else : + filename_to_save_in_main_path =f"{base_name_for_style }{original_ext }" + self .logger (f"⚠️ Manga mode (Title+GlobalNum Style Fallback): Using cleaned original filename '{filename_to_save_in_main_path }' for post {original_post_id_for_log }.") + else : + self .logger (f"⚠️ Manga mode: Unknown filename style '{self .manga_filename_style }'. Defaulting to original filename for '{api_original_filename }'.") + filename_to_save_in_main_path =cleaned_original_api_filename + if not filename_to_save_in_main_path : + filename_to_save_in_main_path =f"manga_file_{original_post_id_for_log }_{file_index_in_post +1 }{original_ext }" + self .logger (f"⚠️ Manga mode: Generated filename was empty. Using generic fallback: '{filename_to_save_in_main_path }'.") + was_original_name_kept_flag =False + else : + + filename_to_save_in_main_path =cleaned_original_api_filename + was_original_name_kept_flag =False + + + + if self .remove_from_filename_words_list and filename_to_save_in_main_path : + + base_name_for_removal ,ext_for_removal =os .path .splitext (filename_to_save_in_main_path ) + modified_base_name =base_name_for_removal + for word_to_remove in self .remove_from_filename_words_list : + if not word_to_remove :continue + pattern =re .compile (re .escape (word_to_remove ),re .IGNORECASE ) + modified_base_name =pattern .sub ("",modified_base_name ) + modified_base_name =re .sub (r'[_.\s-]+',' ',modified_base_name ) + modified_base_name =re .sub (r'\s+',' ',modified_base_name ) + modified_base_name =modified_base_name .strip () + if modified_base_name and modified_base_name !=ext_for_removal .lstrip ('.'): + filename_to_save_in_main_path =modified_base_name +ext_for_removal + else : + filename_to_save_in_main_path =base_name_for_removal +ext_for_removal + + + + if not self .download_thumbnails : + + is_img_type =is_image (api_original_filename ) + is_vid_type =is_video (api_original_filename ) + is_archive_type =is_archive (api_original_filename ) + is_audio_type =is_audio (api_original_filename ) + if self .filter_mode =='archive': + if not is_archive_type : + self .logger (f" -> Filter Skip (Archive Mode): '{api_original_filename }' (Not an Archive).") + return 0 ,1 ,api_original_filename ,False ,FILE_DOWNLOAD_STATUS_SKIPPED ,None + elif self .filter_mode =='image': + if not is_img_type : + self .logger (f" -> Filter Skip: '{api_original_filename }' (Not Image).") + return 0 ,1 ,api_original_filename ,False ,FILE_DOWNLOAD_STATUS_SKIPPED ,None + elif self .filter_mode =='video': + if not is_vid_type : + self .logger (f" -> Filter Skip: '{api_original_filename }' (Not Video).") + return 0 ,1 ,api_original_filename ,False ,FILE_DOWNLOAD_STATUS_SKIPPED ,None + elif self .filter_mode =='audio': + if not is_audio_type : + self .logger (f" -> Filter Skip: '{api_original_filename }' (Not Audio).") + return 0 ,1 ,api_original_filename ,False ,FILE_DOWNLOAD_STATUS_SKIPPED ,None + if self .skip_zip and is_zip (api_original_filename ): + self .logger (f" -> Pref Skip: '{api_original_filename }' (ZIP).") + return 0 ,1 ,api_original_filename ,False ,FILE_DOWNLOAD_STATUS_SKIPPED ,None + if self .skip_rar and is_rar (api_original_filename ): + self .logger (f" -> Pref Skip: '{api_original_filename }' (RAR).") + return 0 ,1 ,api_original_filename ,False ,FILE_DOWNLOAD_STATUS_SKIPPED ,None + + + + try : + os .makedirs (target_folder_path ,exist_ok =True ) + + except OSError as e : + self .logger (f" ❌ Critical error creating directory '{target_folder_path }': {e }. Skipping file '{api_original_filename }'.") + return 0 ,1 ,api_original_filename ,False ,FILE_DOWNLOAD_STATUS_SKIPPED ,None + + + + + + temp_file_base_for_unique_part ,temp_file_ext_for_unique_part =os .path .splitext (filename_to_save_in_main_path if filename_to_save_in_main_path else api_original_filename ) + unique_id_for_part_file =uuid .uuid4 ().hex [:8 ] + unique_part_file_stem_on_disk =f"{temp_file_base_for_unique_part }_{unique_id_for_part_file }" + max_retries =3 + retry_delay =5 + downloaded_size_bytes =0 + calculated_file_hash =None + downloaded_part_file_path =None + was_multipart_download =False + total_size_bytes =0 + download_successful_flag =False + last_exception_for_retry_later =None + + response_for_this_attempt =None + for attempt_num_single_stream in range (max_retries +1 ): + response_for_this_attempt =None + if self ._check_pause (f"File download attempt for '{api_original_filename }'"):break + if self .check_cancel ()or (skip_event and skip_event .is_set ()):break + try : + if attempt_num_single_stream >0 : + self .logger (f" Retrying download for '{api_original_filename }' (Overall Attempt {attempt_num_single_stream +1 }/{max_retries +1 })...") + time .sleep (retry_delay *(2 **(attempt_num_single_stream -1 ))) + self ._emit_signal ('file_download_status',True ) + response =requests .get (file_url ,headers =headers ,timeout =(15 ,300 ),stream =True ,cookies =cookies_to_use_for_file ) + response .raise_for_status () + total_size_bytes =int (response .headers .get ('Content-Length',0 )) + num_parts_for_file =min (self .num_file_threads ,MAX_PARTS_FOR_MULTIPART_DOWNLOAD ) + attempt_multipart =(self .allow_multipart_download and MULTIPART_DOWNLOADER_AVAILABLE and + num_parts_for_file >1 and total_size_bytes >MIN_SIZE_FOR_MULTIPART_DOWNLOAD and + 'bytes'in response .headers .get ('Accept-Ranges','').lower ()) + if self ._check_pause (f"Multipart decision for '{api_original_filename }'"):break + + if attempt_multipart : + if response_for_this_attempt : + response_for_this_attempt .close () + response_for_this_attempt =None + + + + + + mp_save_path_for_unique_part_stem_arg =os .path .join (target_folder_path ,f"{unique_part_file_stem_on_disk }{temp_file_ext_for_unique_part }") + mp_success ,mp_bytes ,mp_hash ,mp_file_handle =download_file_in_parts ( + file_url ,mp_save_path_for_unique_part_stem_arg ,total_size_bytes ,num_parts_for_file ,headers ,api_original_filename , + emitter_for_multipart =self .emitter ,cookies_for_chunk_session =cookies_to_use_for_file , + cancellation_event =self .cancellation_event ,skip_event =skip_event ,logger_func =self .logger , + pause_event =self .pause_event + ) + if mp_success : + download_successful_flag =True + downloaded_size_bytes =mp_bytes + calculated_file_hash =mp_hash + + + + downloaded_part_file_path =mp_save_path_for_unique_part_stem_arg +".part" + was_multipart_download =True + if mp_file_handle :mp_file_handle .close () + break + else : + if attempt_num_single_stream 1 and total_size_bytes >0 : + self ._emit_signal ('file_progress',api_original_filename ,(current_attempt_downloaded_bytes ,total_size_bytes )) + last_progress_time =time .time () + + if self .check_cancel ()or (skip_event and skip_event .is_set ())or (self .pause_event and self .pause_event .is_set ()and not (current_attempt_downloaded_bytes >0 or (total_size_bytes ==0 and response .status_code ==200 ))): + if os .path .exists (current_single_stream_part_path ):os .remove (current_single_stream_part_path ) + break + + + attempt_is_complete =False + if response .status_code ==200 : + if total_size_bytes >0 : + if current_attempt_downloaded_bytes ==total_size_bytes : + attempt_is_complete =True + else : + self .logger (f" ⚠️ Single-stream attempt for '{api_original_filename }' incomplete: received {current_attempt_downloaded_bytes } of {total_size_bytes } bytes.") + elif total_size_bytes ==0 : + if current_attempt_downloaded_bytes ==0 : + attempt_is_complete =True + else : + self .logger (f" ⚠️ Mismatch for '{api_original_filename }': Server reported 0 bytes, but received {current_attempt_downloaded_bytes } bytes this attempt.") + + + elif current_attempt_downloaded_bytes >0 : + attempt_is_complete =True + self .logger (f" ⚠️ Single-stream for '{api_original_filename }' received {current_attempt_downloaded_bytes } bytes (no Content-Length from server). Assuming complete for this attempt as stream ended.") + + if attempt_is_complete : + calculated_file_hash =md5_hasher .hexdigest () + downloaded_size_bytes =current_attempt_downloaded_bytes + downloaded_part_file_path =current_single_stream_part_path + was_multipart_download =False + download_successful_flag =True + break + else : + if os .path .exists (current_single_stream_part_path ): + try :os .remove (current_single_stream_part_path ) + except OSError as e_rem_part :self .logger (f" -> Failed to remove .part file after failed single stream attempt: {e_rem_part }") + + except Exception as e_write : + self .logger (f" ❌ Error writing single-stream to disk for '{api_original_filename }': {e_write }") + if os .path .exists (current_single_stream_part_path ):os .remove (current_single_stream_part_path ) + + raise + single_stream_exception =e_write + if single_stream_exception : + raise single_stream_exception + + except (requests .exceptions .ConnectionError ,requests .exceptions .Timeout ,http .client .IncompleteRead )as e : + self .logger (f" ❌ Download Error (Retryable): {api_original_filename }. Error: {e }") + last_exception_for_retry_later =e + if isinstance (e ,requests .exceptions .ConnectionError )and ("Failed to resolve"in str (e )or "NameResolutionError"in str (e )): + self .logger (" 💡 This looks like a DNS resolution problem. Please check your internet connection, DNS settings, or VPN.") + except requests .exceptions .RequestException as e : + self .logger (f" ❌ Download Error (Non-Retryable): {api_original_filename }. Error: {e }") + last_exception_for_retry_later =e + if ("Failed to resolve"in str (e )or "NameResolutionError"in str (e )): + self .logger (" 💡 This looks like a DNS resolution problem. Please check your internet connection, DNS settings, or VPN.") + + break + except Exception as e : + self .logger (f" ❌ Unexpected Download Error: {api_original_filename }: {e }\n{traceback .format_exc (limit =2 )}") + last_exception_for_retry_later =e + break + finally : + if response_for_this_attempt : + response_for_this_attempt .close () + self ._emit_signal ('file_download_status',False ) + + final_total_for_progress =total_size_bytes if download_successful_flag and total_size_bytes >0 else downloaded_size_bytes + self ._emit_signal ('file_progress',api_original_filename ,(downloaded_size_bytes ,final_total_for_progress )) + +# --- Start of Replacement Block --- + + # Rescue download if an IncompleteRead error occurred but the file is complete + if (not download_successful_flag and + isinstance(last_exception_for_retry_later, http.client.IncompleteRead) and + total_size_bytes > 0 and downloaded_part_file_path and os.path.exists(downloaded_part_file_path)): + try: + actual_size = os.path.getsize(downloaded_part_file_path) + if actual_size == total_size_bytes: + self.logger(f" ✅ Rescued '{api_original_filename}': IncompleteRead error occurred, but file size matches. Proceeding with save.") + download_successful_flag = True + # The hash must be recalculated now that we've verified the file + md5_hasher = hashlib.md5() + with open(downloaded_part_file_path, 'rb') as f_verify: + for chunk in iter(lambda: f_verify.read(8192), b""): # Read in chunks + md5_hasher.update(chunk) + calculated_file_hash = md5_hasher.hexdigest() + except Exception as rescue_exc: + self.logger(f" ⚠️ Failed to rescue file despite matching size. Error: {rescue_exc}") + + if self.check_cancel() or (skip_event and skip_event.is_set()) or (self.pause_event and self.pause_event.is_set() and not download_successful_flag): + self.logger(f" ⚠️ Download process interrupted for {api_original_filename}.") + if downloaded_part_file_path and os.path.exists(downloaded_part_file_path): + try: os.remove(downloaded_part_file_path) + except OSError: pass + return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None + + # This logic block now correctly handles all outcomes: success, failure, or rescued. + if download_successful_flag: + # --- This is the success path --- + if self._check_pause(f"Post-download hash check for '{api_original_filename}'"): + return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None + + with self.downloaded_file_hashes_lock: + if calculated_file_hash in self.downloaded_file_hashes: + self.logger(f" -> Skip Saving Duplicate (Hash Match): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...).") + with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) + if downloaded_part_file_path and os.path.exists(downloaded_part_file_path): + try: os.remove(downloaded_part_file_path) + except OSError as e_rem: self.logger(f" -> Failed to remove .part file for hash duplicate: {e_rem}") + return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None + + effective_save_folder = target_folder_path + filename_after_styling_and_word_removal = filename_to_save_in_main_path + + try: + os.makedirs(effective_save_folder, exist_ok=True) + except OSError as e: + self.logger(f" ❌ Critical error creating directory '{effective_save_folder}': {e}. Skipping file '{api_original_filename}'.") + if downloaded_part_file_path and os.path.exists(downloaded_part_file_path): + try: os.remove(downloaded_part_file_path) + except OSError: pass + return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None + + data_to_write_io = None + filename_after_compression = filename_after_styling_and_word_removal + is_img_for_compress_check = is_image(api_original_filename) + + if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024): + self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024 * 1024):.2f} MB)...") + if self._check_pause(f"Image compression for '{api_original_filename}'"): return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None + img_content_for_pillow = None + try: + with open(downloaded_part_file_path, 'rb') as f_img_in: + img_content_for_pillow = BytesIO(f_img_in.read()) + with Image.open(img_content_for_pillow) as img_obj: + if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA') + elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB') + compressed_output_io = BytesIO() + img_obj.save(compressed_output_io, format='WebP', quality=80, method=4) + compressed_size = compressed_output_io.getbuffer().nbytes + if compressed_size < downloaded_size_bytes * 0.9: + self.logger(f" Compression success: {compressed_size / (1024 * 1024):.2f} MB.") + data_to_write_io = compressed_output_io + data_to_write_io.seek(0) + base_name_orig, _ = os.path.splitext(filename_after_compression) + filename_after_compression = base_name_orig + '.webp' + self.logger(f" Updated filename (compressed): {filename_after_compression}") + else: + self.logger(f" Compression skipped: WebP not significantly smaller.") + if compressed_output_io: compressed_output_io.close() + except Exception as comp_e: + self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original.") + finally: + if img_content_for_pillow: img_content_for_pillow.close() + + final_filename_on_disk = filename_after_compression + temp_base, temp_ext = os.path.splitext(final_filename_on_disk) + suffix_counter = 1 + while os.path.exists(os.path.join(effective_save_folder, final_filename_on_disk)): + final_filename_on_disk = f"{temp_base}_{suffix_counter}{temp_ext}" + suffix_counter += 1 + if final_filename_on_disk != filename_after_compression: + self.logger(f" Applied numeric suffix in '{os.path.basename(effective_save_folder)}': '{final_filename_on_disk}' (was '{filename_after_compression}')") + + if self._check_pause(f"File saving for '{final_filename_on_disk}'"): + return 0, 1, final_filename_on_disk, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None + + final_save_path = os.path.join(effective_save_folder, final_filename_on_disk) + try: + if data_to_write_io: + with open(final_save_path, 'wb') as f_out: + time.sleep(0.05) + f_out.write(data_to_write_io.getvalue()) + if downloaded_part_file_path and os.path.exists(downloaded_part_file_path): + try: + os.remove(downloaded_part_file_path) + except OSError as e_rem: + self.logger(f" -> Failed to remove .part after compression: {e_rem}") + else: + if downloaded_part_file_path and os.path.exists(downloaded_part_file_path): + time.sleep(0.1) + os.rename(downloaded_part_file_path, final_save_path) + else: + raise FileNotFoundError(f"Original .part file not found for saving: {downloaded_part_file_path}") + + with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash) + with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) + + final_filename_saved_for_return = final_filename_on_disk + self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024 * 1024):.2f} MB) in '{os.path.basename(effective_save_folder)}'") + + downloaded_file_details = { + 'disk_filename': final_filename_saved_for_return, + 'post_title': post_title, + 'post_id': original_post_id_for_log, + 'upload_date_str': self.post.get('published') or self.post.get('added') or "N/A", + 'download_timestamp': time.time(), + 'download_path': effective_save_folder, + 'service': self.service, + 'user_id': self.user_id, + 'api_original_filename': api_original_filename, + 'folder_context_name': folder_context_name_for_history or os.path.basename(effective_save_folder) + } + self._emit_signal('file_successfully_downloaded', downloaded_file_details) + time.sleep(0.05) + + return 1, 0, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SUCCESS, None + except Exception as save_err: + self.logger(f"->>Save Fail for '{final_filename_on_disk}': {save_err}") + if os.path.exists(final_save_path): + try: os.remove(final_save_path) + except OSError: self.logger(f" -> Failed to remove partially saved file: {final_save_path}") + + # --- FIX: Report as a permanent failure so it appears in the error dialog --- + permanent_failure_details = { 'file_info': file_info, 'target_folder_path': target_folder_path, 'headers': headers, 'original_post_id_for_log': original_post_id_for_log, 'post_title': post_title, 'file_index_in_post': file_index_in_post, 'num_files_in_this_post': num_files_in_this_post, 'forced_filename_override': filename_to_save_in_main_path, } + return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION, permanent_failure_details + finally: + if data_to_write_io and hasattr(data_to_write_io, 'close'): + data_to_write_io.close() + + else: + # --- This is the failure path --- + self.logger(f"❌ Download failed for '{api_original_filename}' after {max_retries + 1} attempts.") + + is_actually_incomplete_read = False + if isinstance(last_exception_for_retry_later, http.client.IncompleteRead): + is_actually_incomplete_read = True + elif hasattr(last_exception_for_retry_later, '__cause__') and isinstance(last_exception_for_retry_later.__cause__, http.client.IncompleteRead): + is_actually_incomplete_read = True + elif last_exception_for_retry_later is not None: + str_exc = str(last_exception_for_retry_later).lower() + if "incompleteread" in str_exc or (isinstance(last_exception_for_retry_later, tuple) and any("incompleteread" in str(arg).lower() for arg in last_exception_for_retry_later if isinstance(arg, (str, Exception)))): + is_actually_incomplete_read = True + + if is_actually_incomplete_read: + self.logger(f" Marking '{api_original_filename}' for potential retry later due to IncompleteRead.") + retry_later_details = { 'file_info': file_info, 'target_folder_path': target_folder_path, 'headers': headers, 'original_post_id_for_log': original_post_id_for_log, 'post_title': post_title, 'file_index_in_post': file_index_in_post, 'num_files_in_this_post': num_files_in_this_post, 'forced_filename_override': filename_to_save_in_main_path, 'manga_mode_active_for_file': self.manga_mode_active, 'manga_filename_style_for_file': self.manga_filename_style, } + return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER, retry_later_details + else: + self.logger(f" Marking '{api_original_filename}' as permanently failed for this session.") + permanent_failure_details = { 'file_info': file_info, 'target_folder_path': target_folder_path, 'headers': headers, 'original_post_id_for_log': original_post_id_for_log, 'post_title': post_title, 'file_index_in_post': file_index_in_post, 'num_files_in_this_post': num_files_in_this_post, 'forced_filename_override': filename_to_save_in_main_path, } + return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION, permanent_failure_details + with self .downloaded_file_hashes_lock : + if calculated_file_hash in self .downloaded_file_hashes : + self .logger (f" -> Skip Saving Duplicate (Hash Match): '{api_original_filename }' (Hash: {calculated_file_hash [:8 ]}...).") + with self .downloaded_files_lock :self .downloaded_files .add (filename_to_save_in_main_path ) + if downloaded_part_file_path and os .path .exists (downloaded_part_file_path ): + try :os .remove (downloaded_part_file_path ) + except OSError as e_rem :self .logger (f" -> Failed to remove .part file for hash duplicate: {e_rem }") + return 0 ,1 ,filename_to_save_in_main_path ,was_original_name_kept_flag ,FILE_DOWNLOAD_STATUS_SKIPPED ,None + + effective_save_folder =target_folder_path + filename_after_styling_and_word_removal =filename_to_save_in_main_path + + try: + os.makedirs(effective_save_folder, exist_ok=True) + except OSError as e: + self.logger(f" ❌ Critical error creating directory '{effective_save_folder}': {e}. Skipping file '{api_original_filename}'.") + if downloaded_part_file_path and os.path.exists(downloaded_part_file_path): + try: os.remove(downloaded_part_file_path) + except OSError: pass + # --- FIX: Report as a permanent failure so it appears in the error dialog --- + permanent_failure_details = { 'file_info': file_info, 'target_folder_path': target_folder_path, 'headers': headers, 'original_post_id_for_log': original_post_id_for_log, 'post_title': post_title, 'file_index_in_post': file_index_in_post, 'num_files_in_this_post': num_files_in_this_post, 'forced_filename_override': filename_to_save_in_main_path, } + return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION, permanent_failure_details + + data_to_write_io =None + filename_after_compression =filename_after_styling_and_word_removal + is_img_for_compress_check =is_image (api_original_filename ) + + if is_img_for_compress_check and self .compress_images and Image and downloaded_size_bytes >(1.5 *1024 *1024 ): + self .logger (f" Compressing '{api_original_filename }' ({downloaded_size_bytes /(1024 *1024 ):.2f} MB)...") + if self ._check_pause (f"Image compression for '{api_original_filename }'"):return 0 ,1 ,filename_to_save_in_main_path ,was_original_name_kept_flag ,FILE_DOWNLOAD_STATUS_SKIPPED ,None + + img_content_for_pillow =None + try : + with open (downloaded_part_file_path ,'rb')as f_img_in : + img_content_for_pillow =BytesIO (f_img_in .read ()) + + with Image .open (img_content_for_pillow )as img_obj : + if img_obj .mode =='P':img_obj =img_obj .convert ('RGBA') + elif img_obj .mode not in ['RGB','RGBA','L']:img_obj =img_obj .convert ('RGB') + + compressed_output_io =BytesIO () + img_obj .save (compressed_output_io ,format ='WebP',quality =80 ,method =4 ) + compressed_size =compressed_output_io .getbuffer ().nbytes + + if compressed_size Failed to remove .part after compression: {e_rem }") + else : + if downloaded_part_file_path and os .path .exists (downloaded_part_file_path ): + time .sleep (0.1 ) + os .rename (downloaded_part_file_path ,final_save_path ) + else : + raise FileNotFoundError (f"Original .part file not found for saving: {downloaded_part_file_path }") + with self .downloaded_file_hashes_lock :self .downloaded_file_hashes .add (calculated_file_hash ) + with self .downloaded_files_lock :self .downloaded_files .add (filename_to_save_in_main_path ) + final_filename_saved_for_return =final_filename_on_disk + self .logger (f"✅ Saved: '{final_filename_saved_for_return }' (from '{api_original_filename }', {downloaded_size_bytes /(1024 *1024 ):.2f} MB) in '{os .path .basename (effective_save_folder )}'") + + + downloaded_file_details ={ + 'disk_filename':final_filename_saved_for_return , + 'post_title':post_title , + 'post_id':original_post_id_for_log , + 'upload_date_str':self .post .get ('published')or self .post .get ('added')or "N/A", + 'download_timestamp':time .time (), + 'download_path':effective_save_folder , + 'service':self .service , + 'user_id':self .user_id , + 'api_original_filename':api_original_filename , + 'folder_context_name':folder_context_name_for_history or os .path .basename (effective_save_folder ) + } + self ._emit_signal ('file_successfully_downloaded',downloaded_file_details ) + time .sleep (0.05 ) + + return 1 ,0 ,final_filename_saved_for_return ,was_original_name_kept_flag ,FILE_DOWNLOAD_STATUS_SUCCESS ,None + except Exception as save_err : + self .logger (f"->>Save Fail for '{final_filename_on_disk }': {save_err }") + if os .path .exists (final_save_path ): + try :os .remove (final_save_path ); + except OSError :self .logger (f" -> Failed to remove partially saved file: {final_save_path }") + + + return 0 ,1 ,final_filename_saved_for_return ,was_original_name_kept_flag ,FILE_DOWNLOAD_STATUS_SKIPPED ,None + finally : + if data_to_write_io and hasattr (data_to_write_io ,'close'): + data_to_write_io .close () + + def process (self ): + if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None, None + if self .check_cancel ():return 0 ,0 ,[],[],[],None, None + current_character_filters =self ._get_current_character_filters () + kept_original_filenames_for_log =[] + retryable_failures_this_post =[] + permanent_failures_this_post =[] + total_downloaded_this_post =0 + total_skipped_this_post =0 + history_data_for_this_post =None + + parsed_api_url =urlparse (self .api_url_input ) + referer_url =f"https://{parsed_api_url .netloc }/" + headers ={'User-Agent':'Mozilla/5.0','Referer':referer_url ,'Accept':'*/*'} + link_pattern =re .compile (r"""]*>(.*?)""", + re .IGNORECASE |re .DOTALL ) + post_data =self .post + post_title =post_data .get ('title','')or 'untitled_post' + post_id =post_data .get ('id','unknown_id') + post_main_file_info =post_data .get ('file') + post_attachments =post_data .get ('attachments',[]) + + effective_unwanted_keywords_for_folder_naming =self .unwanted_keywords .copy () + is_full_creator_download_no_char_filter =not self .target_post_id_from_initial_url and not current_character_filters + if is_full_creator_download_no_char_filter and self .creator_download_folder_ignore_words : + self .logger (f" Applying creator download specific folder ignore words ({len (self .creator_download_folder_ignore_words )} words).") + effective_unwanted_keywords_for_folder_naming .update (self .creator_download_folder_ignore_words ) + + post_content_html =post_data .get ('content','') + self .logger (f"\n--- Processing Post {post_id } ('{post_title [:50 ]}...') (Thread: {threading .current_thread ().name }) ---") + num_potential_files_in_post =len (post_attachments or [])+(1 if post_main_file_info and post_main_file_info .get ('path')else 0 ) + post_is_candidate_by_title_char_match =False + char_filter_that_matched_title =None + post_is_candidate_by_comment_char_match =False + post_is_candidate_by_file_char_match_in_comment_scope =False + char_filter_that_matched_file_in_comment_scope =None + char_filter_that_matched_comment =None + if current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH ): + if self ._check_pause (f"Character title filter for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + for idx ,filter_item_obj in enumerate (current_character_filters ): + if self .check_cancel ():break + terms_to_check_for_title =list (filter_item_obj ["aliases"]) + if filter_item_obj ["is_group"]: + if filter_item_obj ["name"]not in terms_to_check_for_title : + terms_to_check_for_title .append (filter_item_obj ["name"]) + unique_terms_for_title_check =list (set (terms_to_check_for_title )) + for term_to_match in unique_terms_for_title_check : + match_found_for_term =is_title_match_for_character (post_title ,term_to_match ) + if match_found_for_term : + post_is_candidate_by_title_char_match =True + char_filter_that_matched_title =filter_item_obj + self .logger (f" Post title matches char filter term '{term_to_match }' (from group/name '{filter_item_obj ['name']}', Scope: {self .char_filter_scope }). Post is candidate.") + break + if post_is_candidate_by_title_char_match :break + all_files_from_post_api_for_char_check =[] + api_file_domain_for_char_check =urlparse (self .api_url_input ).netloc + if not api_file_domain_for_char_check or not any (d in api_file_domain_for_char_check .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']): + api_file_domain_for_char_check ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party" + if post_main_file_info and isinstance (post_main_file_info ,dict )and post_main_file_info .get ('path'): + original_api_name =post_main_file_info .get ('name')or os .path .basename (post_main_file_info ['path'].lstrip ('/')) + if original_api_name : + all_files_from_post_api_for_char_check .append ({'_original_name_for_log':original_api_name }) + for att_info in post_attachments : + if isinstance (att_info ,dict )and att_info .get ('path'): + original_api_att_name =att_info .get ('name')or os .path .basename (att_info ['path'].lstrip ('/')) + if original_api_att_name : + all_files_from_post_api_for_char_check .append ({'_original_name_for_log':original_api_att_name }) + if current_character_filters and self .char_filter_scope ==CHAR_SCOPE_COMMENTS : + self .logger (f" [Char Scope: Comments] Phase 1: Checking post files for matches before comments for post ID '{post_id }'.") + if self ._check_pause (f"File check (comments scope) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + for file_info_item in all_files_from_post_api_for_char_check : + if self .check_cancel ():break + current_api_original_filename_for_check =file_info_item .get ('_original_name_for_log') + if not current_api_original_filename_for_check :continue + for filter_item_obj in current_character_filters : + terms_to_check =list (filter_item_obj ["aliases"]) + if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check : + terms_to_check .append (filter_item_obj ["name"]) + for term_to_match in terms_to_check : + if is_filename_match_for_character (current_api_original_filename_for_check ,term_to_match ): + post_is_candidate_by_file_char_match_in_comment_scope =True + char_filter_that_matched_file_in_comment_scope =filter_item_obj + self .logger (f" Match Found (File in Comments Scope): File '{current_api_original_filename_for_check }' matches char filter term '{term_to_match }' (from group/name '{filter_item_obj ['name']}'). Post is candidate.") + break + if post_is_candidate_by_file_char_match_in_comment_scope :break + if post_is_candidate_by_file_char_match_in_comment_scope :break + self .logger (f" [Char Scope: Comments] Phase 1 Result: post_is_candidate_by_file_char_match_in_comment_scope = {post_is_candidate_by_file_char_match_in_comment_scope }") + if current_character_filters and self .char_filter_scope ==CHAR_SCOPE_COMMENTS : + if not post_is_candidate_by_file_char_match_in_comment_scope : + if self ._check_pause (f"Comment check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + self .logger (f" [Char Scope: Comments] Phase 2: No file match found. Checking post comments for post ID '{post_id }'.") + try : + parsed_input_url_for_comments =urlparse (self .api_url_input ) + api_domain_for_comments =parsed_input_url_for_comments .netloc + if not any (d in api_domain_for_comments .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']): + self .logger (f"⚠️ Unrecognized domain '{api_domain_for_comments }' for comment API. Defaulting based on service.") + api_domain_for_comments ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party" + comments_data =fetch_post_comments ( + api_domain_for_comments ,self .service ,self .user_id ,post_id , + headers ,self .logger ,self .cancellation_event ,self .pause_event , + cookies_dict =prepare_cookies_for_request ( + self .use_cookie ,self .cookie_text ,self .selected_cookie_file ,self .app_base_dir ,self .logger + ) + ) + if comments_data : + self .logger (f" Fetched {len (comments_data )} comments for post {post_id }.") + for comment_item_idx ,comment_item in enumerate (comments_data ): + if self .check_cancel ():break + raw_comment_content =comment_item .get ('content','') + if not raw_comment_content :continue + cleaned_comment_text =strip_html_tags (raw_comment_content ) + if not cleaned_comment_text .strip ():continue + for filter_item_obj in current_character_filters : + terms_to_check_comment =list (filter_item_obj ["aliases"]) + if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_comment : + terms_to_check_comment .append (filter_item_obj ["name"]) + for term_to_match_comment in terms_to_check_comment : + if is_title_match_for_character (cleaned_comment_text ,term_to_match_comment ): + post_is_candidate_by_comment_char_match =True + char_filter_that_matched_comment =filter_item_obj + self .logger (f" Match Found (Comment in Comments Scope): Comment in post {post_id } matches char filter term '{term_to_match_comment }' (from group/name '{filter_item_obj ['name']}'). Post is candidate.") + self .logger (f" Matching comment (first 100 chars): '{cleaned_comment_text [:100 ]}...'") + break + if post_is_candidate_by_comment_char_match :break + if post_is_candidate_by_comment_char_match :break + else : + self .logger (f" No comments found or fetched for post {post_id } to check against character filters.") + except RuntimeError as e_fetch_comment : + self .logger (f" ⚠️ Error fetching or processing comments for post {post_id }: {e_fetch_comment }") + except Exception as e_generic_comment : + self .logger (f" ❌ Unexpected error during comment processing for post {post_id }: {e_generic_comment }\n{traceback .format_exc (limit =2 )}") + self .logger (f" [Char Scope: Comments] Phase 2 Result: post_is_candidate_by_comment_char_match = {post_is_candidate_by_comment_char_match }") + else : + self .logger (f" [Char Scope: Comments] Phase 2: Skipped comment check for post ID '{post_id }' because a file match already made it a candidate.") + if current_character_filters : + if self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match : + self .logger (f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title [:50 ]}' does not match character filters.") + self ._emit_signal ('missed_character_post',post_title ,"No title match for character filter") + return 0 ,num_potential_files_in_post ,[],[],[],None, None + if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match : + self .logger (f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id }', Title '{post_title [:50 ]}...'") + if self .emitter and hasattr (self .emitter ,'missed_character_post_signal'): + self ._emit_signal ('missed_character_post',post_title ,"No character match in files or comments (Comments scope)") + return 0 ,num_potential_files_in_post ,[],[],[],None, None + if self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH ): + if self ._check_pause (f"Skip words (post title) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + post_title_lower =post_title .lower () + for skip_word in self .skip_words_list : + if skip_word .lower ()in post_title_lower : + self .logger (f" -> Skip Post (Keyword in Title '{skip_word }'): '{post_title [:50 ]}...'. Scope: {self .skip_words_scope }") + return 0 ,num_potential_files_in_post ,[],[],[],None, None + if not self .extract_links_only and self .manga_mode_active and current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and not post_is_candidate_by_title_char_match : + self .logger (f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title [:50 ]}' doesn't match filters.") + self ._emit_signal ('missed_character_post',post_title ,"Manga Mode: No title match for character filter (Title/Both scope)") + return 0 ,num_potential_files_in_post ,[],[],[],None, None + if not isinstance (post_attachments ,list ): + self .logger (f"⚠️ Corrupt attachment data for post {post_id } (expected list, got {type (post_attachments )}). Skipping attachments.") + post_attachments =[] + base_folder_names_for_post_content =[] + determined_post_save_path_for_history =self .override_output_dir if self .override_output_dir else self .download_root + if not self .extract_links_only and self .use_subfolders : + if self ._check_pause (f"Subfolder determination for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + primary_char_filter_for_folder =None + log_reason_for_folder ="" + if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment : + if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope : + primary_char_filter_for_folder =char_filter_that_matched_file_in_comment_scope + log_reason_for_folder ="Matched char filter in filename (Comments scope)" + elif post_is_candidate_by_comment_char_match and char_filter_that_matched_comment : + primary_char_filter_for_folder =char_filter_that_matched_comment + log_reason_for_folder ="Matched char filter in comments (Comments scope, no file match)" + elif (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and char_filter_that_matched_title : + primary_char_filter_for_folder =char_filter_that_matched_title + log_reason_for_folder ="Matched char filter in title" + if primary_char_filter_for_folder : + base_folder_names_for_post_content =[clean_folder_name (primary_char_filter_for_folder ["name"])] + cleaned_primary_folder_name =clean_folder_name (primary_char_filter_for_folder ["name"]) + if cleaned_primary_folder_name .lower ()in effective_unwanted_keywords_for_folder_naming and cleaned_primary_folder_name .lower ()!="untitled_folder": + self .logger (f" ⚠️ Primary char filter folder name '{cleaned_primary_folder_name }' is in ignore list. Using generic name.") + base_folder_names_for_post_content =["Generic Post Content"] + else : + base_folder_names_for_post_content =[cleaned_primary_folder_name ] + self .logger (f" Base folder name(s) for post content ({log_reason_for_folder }): {', '.join (base_folder_names_for_post_content )}") + elif not current_character_filters : + + derived_folders_from_title_via_known_txt =match_folders_from_title ( + post_title , + self .known_names , + effective_unwanted_keywords_for_folder_naming + ) + + valid_derived_folders_from_title_known_txt =[ + name for name in derived_folders_from_title_via_known_txt + if name and name .strip ()and name .lower ()!="untitled_folder" + ] + + if valid_derived_folders_from_title_known_txt : + base_folder_names_for_post_content .extend (valid_derived_folders_from_title_known_txt ) + self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Post Title): {', '.join (base_folder_names_for_post_content )}") + else : + + + + + candidate_name_from_title_basic_clean =extract_folder_name_from_title ( + post_title , + FOLDER_NAME_STOP_WORDS + ) + + title_is_only_creator_ignored_words =False + if candidate_name_from_title_basic_clean and candidate_name_from_title_basic_clean .lower ()!="untitled_folder"and self .creator_download_folder_ignore_words : + + candidate_title_words ={word .lower ()for word in candidate_name_from_title_basic_clean .split ()} + if candidate_title_words and candidate_title_words .issubset (self .creator_download_folder_ignore_words ): + title_is_only_creator_ignored_words =True + self .logger (f" Title-derived name '{candidate_name_from_title_basic_clean }' consists only of creator-specific ignore words.") + + if title_is_only_creator_ignored_words : + + self .logger (f" Attempting Known.txt match on filenames as title was poor ('{candidate_name_from_title_basic_clean }').") + + filenames_to_check =[ + f_info ['_original_name_for_log']for f_info in all_files_from_post_api_for_char_check + if f_info .get ('_original_name_for_log') + ] + + derived_folders_from_filenames_known_txt =set () + if filenames_to_check : + for fname in filenames_to_check : + matches =match_folders_from_title ( + fname , + self .known_names , + effective_unwanted_keywords_for_folder_naming + ) + for m in matches : + if m and m .strip ()and m .lower ()!="untitled_folder": + derived_folders_from_filenames_known_txt .add (m ) + + if derived_folders_from_filenames_known_txt : + base_folder_names_for_post_content .extend (list (derived_folders_from_filenames_known_txt )) + self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Filenames): {', '.join (base_folder_names_for_post_content )}") + else : + final_title_extract =extract_folder_name_from_title ( + post_title ,effective_unwanted_keywords_for_folder_naming + ) + base_folder_names_for_post_content .append (final_title_extract ) + self .logger (f" No Known.txt match from filenames. Using title-derived name (with full ignore list): '{final_title_extract }'") + else : + extracted_name_from_title_full_ignore =extract_folder_name_from_title ( + post_title ,effective_unwanted_keywords_for_folder_naming + ) + base_folder_names_for_post_content .append (extracted_name_from_title_full_ignore ) + self .logger (f" Base folder name(s) for post content (Generic title parsing - title not solely creator-ignored words): {', '.join (base_folder_names_for_post_content )}") + + base_folder_names_for_post_content =[ + name for name in base_folder_names_for_post_content if name and name .strip () + ] + if not base_folder_names_for_post_content : + final_fallback_name =clean_folder_name (post_title if post_title and post_title .strip ()else "Generic Post Content") + base_folder_names_for_post_content =[final_fallback_name ] + self .logger (f" Ultimate fallback folder name: {final_fallback_name }") + + if base_folder_names_for_post_content : + determined_post_save_path_for_history =os .path .join (determined_post_save_path_for_history ,base_folder_names_for_post_content [0 ]) + + if not self .extract_links_only and self .use_post_subfolders : + cleaned_post_title_for_sub =clean_folder_name (post_title ) + post_id_for_fallback =self .post .get ('id','unknown_id') + + + if not cleaned_post_title_for_sub or cleaned_post_title_for_sub =="untitled_folder": + self .logger (f" ⚠️ Post title '{post_title }' resulted in a generic subfolder name. Using 'post_{post_id_for_fallback }' as base.") + original_cleaned_post_title_for_sub =f"post_{post_id_for_fallback }" + else : + original_cleaned_post_title_for_sub =cleaned_post_title_for_sub + + if self.use_date_prefix_for_subfolder: + # Prioritize 'published' date, fall back to 'added' date + published_date_str = self.post.get('published') or self.post.get('added') + if published_date_str: + try: + # Extract just the date part (YYYY-MM-DD) + date_prefix = published_date_str.split('T')[0] + # Prepend the date to the folder name + original_cleaned_post_title_for_sub = f"{date_prefix} {original_cleaned_post_title_for_sub}" + self.logger(f" ℹ️ Applying date prefix to subfolder: '{original_cleaned_post_title_for_sub}'") + except Exception as e: + self.logger(f" ⚠️ Could not parse date '{published_date_str}' for prefix. Using original name. Error: {e}") + else: + self.logger(" ⚠️ 'Date Prefix' is checked, but post has no 'published' or 'added' date. Omitting prefix.") + + base_path_for_post_subfolder =determined_post_save_path_for_history + + suffix_counter =0 + final_post_subfolder_name ="" + + while True : + if suffix_counter ==0 : + name_candidate =original_cleaned_post_title_for_sub + else : + name_candidate =f"{original_cleaned_post_title_for_sub }_{suffix_counter }" + + potential_post_subfolder_path =os .path .join (base_path_for_post_subfolder ,name_candidate ) + + try : + os .makedirs (potential_post_subfolder_path ,exist_ok =False ) + final_post_subfolder_name =name_candidate + if suffix_counter >0 : + self .logger (f" Post subfolder name conflict: Using '{final_post_subfolder_name }' instead of '{original_cleaned_post_title_for_sub }' to avoid mixing posts.") + break + except FileExistsError : + suffix_counter +=1 + if suffix_counter >100 : + self .logger (f" ⚠️ Exceeded 100 attempts to find unique subfolder name for '{original_cleaned_post_title_for_sub }'. Using UUID.") + final_post_subfolder_name =f"{original_cleaned_post_title_for_sub }_{uuid .uuid4 ().hex [:8 ]}" + os .makedirs (os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name ),exist_ok =True ) + break + except OSError as e_mkdir : + self .logger (f" ❌ Error creating directory '{potential_post_subfolder_path }': {e_mkdir }. Files for this post might be saved in parent or fail.") + final_post_subfolder_name =original_cleaned_post_title_for_sub + break + + determined_post_save_path_for_history =os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name ) + if self.filter_mode == 'text_only' and not self.extract_links_only: + self.logger(f" Mode: Text Only (Scope: {self.text_only_scope})") + + # --- Apply Title-based filters to ensure post is a candidate --- + post_title_lower = post_title.lower() + if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH): + for skip_word in self.skip_words_list: + if skip_word.lower() in post_title_lower: + self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'.") + return 0, num_potential_files_in_post, [], [], [], None, None + + if current_character_filters and not post_is_candidate_by_title_char_match and not post_is_candidate_by_comment_char_match and not post_is_candidate_by_file_char_match_in_comment_scope: + self.logger(f" -> Skip Post (No character match for text extraction): '{post_title[:50]}...'.") + return 0, num_potential_files_in_post, [], [], [], None, None + + # --- Get the text content based on scope --- + raw_text_content = "" + final_post_data = post_data + + # Fetch full post data if content is missing and scope is 'content' + if self.text_only_scope == 'content' and 'content' not in final_post_data: + self.logger(f" Post {post_id} is missing 'content' field, fetching full data...") + parsed_url = urlparse(self.api_url_input) + api_domain = parsed_url.netloc + cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain) + + from .api_client import fetch_single_post_data # Local import to avoid circular dependency issues + full_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies) + if full_data: + final_post_data = full_data + + if self.text_only_scope == 'content': + raw_text_content = final_post_data.get('content', '') + elif self.text_only_scope == 'comments': + try: + parsed_url = urlparse(self.api_url_input) + api_domain = parsed_url.netloc + comments_data = fetch_post_comments(api_domain, self.service, self.user_id, post_id, headers, self.logger, self.cancellation_event, self.pause_event) + if comments_data: + comment_texts = [] + for comment in comments_data: + user = comment.get('user', {}).get('name', 'Unknown User') + timestamp = comment.get('updated', 'No Date') + body = strip_html_tags(comment.get('content', '')) + comment_texts.append(f"--- Comment by {user} on {timestamp} ---\n{body}\n") + raw_text_content = "\n".join(comment_texts) + except Exception as e: + self.logger(f" ❌ Error fetching comments for text-only mode: {e}") + + if not raw_text_content or not raw_text_content.strip(): + self.logger(" -> Skip Saving Text: No content/comments found or fetched.") + return 0, num_potential_files_in_post, [], [], [], None, None + + # --- Robust HTML-to-TEXT Conversion --- + paragraph_pattern = re.compile(r'(.*?)

', re.IGNORECASE | re.DOTALL) + html_paragraphs = paragraph_pattern.findall(raw_text_content) + cleaned_text = "" + if not html_paragraphs: + self.logger(" ⚠️ No

tags found. Falling back to basic HTML cleaning for the whole block.") + text_with_br = re.sub(r'', '\n', raw_text_content, flags=re.IGNORECASE) + cleaned_text = re.sub(r'<.*?>', '', text_with_br) + else: + cleaned_paragraphs_list = [] + for p_content in html_paragraphs: + p_with_br = re.sub(r'', '\n', p_content, flags=re.IGNORECASE) + p_cleaned = re.sub(r'<.*?>', '', p_with_br) + p_final = html.unescape(p_cleaned).strip() + if p_final: + cleaned_paragraphs_list.append(p_final) + cleaned_text = '\n\n'.join(cleaned_paragraphs_list) + cleaned_text = cleaned_text.replace('…', '...') + + # --- Logic for Single PDF Mode (File-based) --- + if self.single_pdf_mode: + if not cleaned_text: + return 0, 0, [], [], [], None, None + + content_data = { + 'title': post_title, + 'content': cleaned_text, + 'published': self.post.get('published') or self.post.get('added') + } + temp_dir = os.path.join(self.app_base_dir, "appdata") + os.makedirs(temp_dir, exist_ok=True) + temp_filename = f"tmp_{post_id}_{uuid.uuid4().hex[:8]}.json" + temp_filepath = os.path.join(temp_dir, temp_filename) + + try: + with open(temp_filepath, 'w', encoding='utf-8') as f: + json.dump(content_data, f, indent=2) + self.logger(f" Saved temporary text for '{post_title}' for single PDF compilation.") + return 0, 0, [], [], [], None, temp_filepath + except Exception as e: + self.logger(f" ❌ Failed to write temporary file for single PDF: {e}") + return 0, 0, [], [], [], None, None + + # --- Logic for Individual File Saving --- + else: + file_extension = self.text_export_format + txt_filename = clean_filename(post_title) + f".{file_extension}" + final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename) + + try: + os.makedirs(determined_post_save_path_for_history, exist_ok=True) + base, ext = os.path.splitext(final_save_path) + counter = 1 + while os.path.exists(final_save_path): + final_save_path = f"{base}_{counter}{ext}" + counter += 1 + + if file_extension == 'pdf': + if FPDF: + self.logger(f" Converting to PDF...") + pdf = PDF() + font_path = "" + if self.project_root_dir: + font_path = os.path.join(self.project_root_dir, 'data', 'dejavu-sans', 'DejaVuSans.ttf') + try: + if not os.path.exists(font_path): raise RuntimeError(f"Font file not found: {font_path}") + pdf.add_font('DejaVu', '', font_path, uni=True) + pdf.set_font('DejaVu', '', 12) + except Exception as font_error: + self.logger(f" ⚠️ Could not load DejaVu font: {font_error}. Falling back to Arial.") + pdf.set_font('Arial', '', 12) + pdf.add_page() + pdf.multi_cell(0, 5, cleaned_text) + pdf.output(final_save_path) + else: + self.logger(f" ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.") + final_save_path = os.path.splitext(final_save_path)[0] + ".txt" + with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text) + + elif file_extension == 'docx': + if Document: + self.logger(f" Converting to DOCX...") + document = Document() + document.add_paragraph(cleaned_text) + document.save(final_save_path) + else: + self.logger(f" ⚠️ Cannot create DOCX: 'python-docx' library not installed. Saving as .txt.") + final_save_path = os.path.splitext(final_save_path)[0] + ".txt" + with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text) + + else: # Default to TXT + with open(final_save_path, 'w', encoding='utf-8') as f: + f.write(cleaned_text) + + self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'") + return 1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None + except Exception as e: + self.logger(f" ❌ Critical error saving text file '{txt_filename}': {e}") + return 0, num_potential_files_in_post, [], [], [], None, None + + if not self .extract_links_only and self .use_subfolders and self .skip_words_list : + if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + for folder_name_to_check in base_folder_names_for_post_content : + if not folder_name_to_check :continue + if any (skip_word .lower ()in folder_name_to_check .lower ()for skip_word in self .skip_words_list ): + matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word") + self .logger (f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.") + return 0 ,num_potential_files_in_post ,[],[],[],None, None + if (self .show_external_links or self .extract_links_only )and post_content_html : + if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + try : + mega_key_pattern =re .compile (r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b') + unique_links_data ={} + for match in link_pattern .finditer (post_content_html ): + link_url =match .group (1 ).strip () + link_url =html .unescape (link_url ) + link_inner_text =match .group (2 ) + if not any (ext in link_url .lower ()for ext in ['.css','.js','.ico','.xml','.svg'])and not link_url .startswith ('javascript:')and link_url not in unique_links_data : + clean_link_text =re .sub (r'<.*?>','',link_inner_text ) + clean_link_text =html .unescape (clean_link_text ).strip () + display_text =clean_link_text if clean_link_text else "[Link]" + unique_links_data [link_url ]=display_text + links_emitted_count =0 + scraped_platforms ={'kemono','coomer','patreon'} + for link_url ,link_text in unique_links_data .items (): + platform =get_link_platform (link_url ) + decryption_key_found ="" + if platform =='mega': + parsed_mega_url =urlparse (link_url ) + if parsed_mega_url .fragment : + potential_key_from_fragment =parsed_mega_url .fragment .split ('!')[-1 ] + if mega_key_pattern .fullmatch (potential_key_from_fragment ): + decryption_key_found =potential_key_from_fragment + + if not decryption_key_found and link_text : + key_match_in_text =mega_key_pattern .search (link_text ) + if key_match_in_text : + decryption_key_found =key_match_in_text .group (1 ) + if not decryption_key_found and self .extract_links_only and post_content_html : + key_match_in_content =mega_key_pattern .search (strip_html_tags (post_content_html )) + if key_match_in_content : + decryption_key_found =key_match_in_content .group (1 ) + if platform not in scraped_platforms : + self ._emit_signal ('external_link',post_title ,link_text ,link_url ,platform ,decryption_key_found or "") + links_emitted_count +=1 + if links_emitted_count >0 :self .logger (f" 🔗 Found {links_emitted_count } potential external link(s) in post content.") + except Exception as e :self .logger (f"⚠️ Error parsing post content for links: {e }\n{traceback .format_exc (limit =2 )}") + if self .extract_links_only : + self .logger (f" Extract Links Only mode: Finished processing post {post_id } for links.") + return 0 ,0 ,[],[],[],None + all_files_from_post_api =[] + api_file_domain =urlparse (self .api_url_input ).netloc + if not api_file_domain or not any (d in api_file_domain .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']): + api_file_domain ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party" + if post_main_file_info and isinstance (post_main_file_info ,dict )and post_main_file_info .get ('path'): + file_path =post_main_file_info ['path'].lstrip ('/') + original_api_name =post_main_file_info .get ('name')or os .path .basename (file_path ) + if original_api_name : + all_files_from_post_api .append ({ + 'url':f"https://{api_file_domain }{file_path }"if file_path .startswith ('/')else f"https://{api_file_domain }/data/{file_path }", + 'name':original_api_name , + '_original_name_for_log':original_api_name , + '_is_thumbnail':is_image (original_api_name ) + }) + else :self .logger (f" ⚠️ Skipping main file for post {post_id }: Missing name (Path: {file_path })") + for idx ,att_info in enumerate (post_attachments ): + if isinstance (att_info ,dict )and att_info .get ('path'): + att_path =att_info ['path'].lstrip ('/') + original_api_att_name =att_info .get ('name')or os .path .basename (att_path ) + if original_api_att_name : + all_files_from_post_api .append ({ + 'url':f"https://{api_file_domain }{att_path }"if att_path .startswith ('/')else f"https://{api_file_domain }/data/{att_path }", + 'name':original_api_att_name , + '_original_name_for_log':original_api_att_name , + '_is_thumbnail':is_image (original_api_att_name ) + }) + else :self .logger (f" ⚠️ Skipping attachment {idx +1 } for post {post_id }: Missing name (Path: {att_path })") + else :self .logger (f" ⚠️ Skipping invalid attachment {idx +1 } for post {post_id }: {str (att_info )[:100 ]}") + if self .scan_content_for_images and post_content_html and not self .extract_links_only : + self .logger (f" Scanning post content for additional image URLs (Post ID: {post_id })...") + parsed_input_url =urlparse (self .api_url_input ) + base_url_for_relative_paths =f"{parsed_input_url .scheme }://{parsed_input_url .netloc }" + img_ext_pattern ="|".join (ext .lstrip ('.')for ext in IMAGE_EXTENSIONS ) + direct_url_pattern_str =r"""(?i)\b(https?://[^\s"'<>\[\]\{\}\|\^\\^~\[\]`]+\.(?:"""+img_ext_pattern +r"""))\b""" + img_tag_src_pattern_str =r"""]*?src\s*=\s*["']([^"']+)["']""" + found_image_sources =set () + for direct_url_match in re .finditer (direct_url_pattern_str ,post_content_html ): + found_image_sources .add (direct_url_match .group (1 )) + for img_tag_match in re .finditer (img_tag_src_pattern_str ,post_content_html ,re .IGNORECASE ): + src_attr =img_tag_match .group (1 ).strip () + src_attr =html .unescape (src_attr ) + if not src_attr :continue + resolved_src_url ="" + if src_attr .startswith (('http://','https://')): + resolved_src_url =src_attr + elif src_attr .startswith ('//'): + resolved_src_url =f"{parsed_input_url .scheme }:{src_attr }" + elif src_attr .startswith ('/'): + resolved_src_url =f"{base_url_for_relative_paths }{src_attr }" + if resolved_src_url : + parsed_resolved_url =urlparse (resolved_src_url ) + if any (parsed_resolved_url .path .lower ().endswith (ext )for ext in IMAGE_EXTENSIONS ): + found_image_sources .add (resolved_src_url ) + if found_image_sources : + self .logger (f" Found {len (found_image_sources )} potential image URLs/sources in content.") + existing_urls_in_api_list ={f_info ['url']for f_info in all_files_from_post_api } + for found_url in found_image_sources : + if self .check_cancel ():break + if found_url in existing_urls_in_api_list : + self .logger (f" Skipping URL from content (already in API list or previously added from content): {found_url [:70 ]}...") + continue + try : + parsed_found_url =urlparse (found_url ) + url_filename =os .path .basename (parsed_found_url .path ) + if not url_filename or not is_image (url_filename ): + self .logger (f" Skipping URL from content (no filename part or not an image extension): {found_url [:70 ]}...") + continue + self .logger (f" Adding image from content: {url_filename } (URL: {found_url [:70 ]}...)") + all_files_from_post_api .append ({ + 'url':found_url , + 'name':url_filename , + '_original_name_for_log':url_filename , + '_is_thumbnail':False , + '_from_content_scan':True + }) + existing_urls_in_api_list .add (found_url ) + except Exception as e_url_parse : + self .logger (f" Error processing URL from content '{found_url [:70 ]}...': {e_url_parse }") + else : + self .logger (f" No additional image URLs found in post content scan for post {post_id }.") + if self .download_thumbnails : + if self .scan_content_for_images : + self .logger (f" Mode: 'Download Thumbnails Only' + 'Scan Content for Images' active. Prioritizing images from content scan for post {post_id }.") + all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_from_content_scan')] + if not all_files_from_post_api : + self .logger (f" -> No images found via content scan for post {post_id } in this combined mode.") + return 0 ,0 ,[],[],[],None + else : + self .logger (f" Mode: 'Download Thumbnails Only' active. Filtering for API thumbnails for post {post_id }.") + all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_is_thumbnail')] + if not all_files_from_post_api : + self .logger (f" -> No API image thumbnails found for post {post_id } in thumbnail-only mode.") + return 0 ,0 ,[],[],[],None + if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED : + def natural_sort_key_for_files (file_api_info ): + name =file_api_info .get ('_original_name_for_log','').lower () + return [int (text )if text .isdigit ()else text for text in re .split ('([0-9]+)',name )] + all_files_from_post_api .sort (key =natural_sort_key_for_files ) + self .logger (f" Manga Date Mode: Sorted {len (all_files_from_post_api )} files within post {post_id } by original name for sequential numbering.") + if not all_files_from_post_api : + self .logger (f" No files found to download for post {post_id }.") + return 0 ,0 ,[],[],[],None + files_to_download_info_list =[] + processed_original_filenames_in_this_post =set () + + if self.keep_in_post_duplicates: + # If we keep duplicates, just add every file to the list to be processed. + # The downstream hash check and rename-on-collision logic will handle them. + files_to_download_info_list.extend(all_files_from_post_api) + self.logger(f" ℹ️ 'Keep Duplicates' is on. All {len(all_files_from_post_api)} files from post will be processed.") + else: + # This is the original logic that skips duplicates by name within a post. + for file_info in all_files_from_post_api: + current_api_original_filename = file_info.get('_original_name_for_log') + if current_api_original_filename in processed_original_filenames_in_this_post: + self.logger(f" -> Skip Duplicate Original Name (within post {post_id}): '{current_api_original_filename}' already processed/listed for this post.") + total_skipped_this_post += 1 + else: + files_to_download_info_list.append(file_info) + if current_api_original_filename: + processed_original_filenames_in_this_post.add(current_api_original_filename) + + if not files_to_download_info_list: + + self .logger (f" All files for post {post_id } were duplicate original names or skipped earlier.") + return 0 ,total_skipped_this_post ,[],[],[],None + + self .logger (f" Identified {len (files_to_download_info_list )} unique original file(s) for potential download from post {post_id }.") + with ThreadPoolExecutor (max_workers =self .num_file_threads ,thread_name_prefix =f'P{post_id }File_')as file_pool : + futures_list =[] + for file_idx ,file_info_to_dl in enumerate (files_to_download_info_list ): + if self ._check_pause (f"File processing loop for post {post_id }, file {file_idx }"):break + if self .check_cancel ():break + current_api_original_filename =file_info_to_dl .get ('_original_name_for_log') + file_is_candidate_by_char_filter_scope =False + char_filter_info_that_matched_file =None + if not current_character_filters : + file_is_candidate_by_char_filter_scope =True + else : + if self .char_filter_scope ==CHAR_SCOPE_FILES : + for filter_item_obj in current_character_filters : + terms_to_check_for_file =list (filter_item_obj ["aliases"]) + if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_for_file : + terms_to_check_for_file .append (filter_item_obj ["name"]) + unique_terms_for_file_check =list (set (terms_to_check_for_file )) + for term_to_match in unique_terms_for_file_check : + if is_filename_match_for_character (current_api_original_filename ,term_to_match ): + file_is_candidate_by_char_filter_scope =True + char_filter_info_that_matched_file =filter_item_obj + self .logger (f" File '{current_api_original_filename }' matches char filter term '{term_to_match }' (from '{filter_item_obj ['name']}'). Scope: Files.") + break + if file_is_candidate_by_char_filter_scope :break + elif self .char_filter_scope ==CHAR_SCOPE_TITLE : + if post_is_candidate_by_title_char_match : + file_is_candidate_by_char_filter_scope =True + char_filter_info_that_matched_file =char_filter_that_matched_title + self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Title.") + elif self .char_filter_scope ==CHAR_SCOPE_BOTH : + if post_is_candidate_by_title_char_match : + file_is_candidate_by_char_filter_scope =True + char_filter_info_that_matched_file =char_filter_that_matched_title + self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Both (Title part).") + else : + for filter_item_obj_both_file in current_character_filters : + terms_to_check_for_file_both =list (filter_item_obj_both_file ["aliases"]) + if filter_item_obj_both_file ["is_group"]and filter_item_obj_both_file ["name"]not in terms_to_check_for_file_both : + terms_to_check_for_file_both .append (filter_item_obj_both_file ["name"]) + unique_terms_for_file_both_check =list (set (terms_to_check_for_file_both )) + for term_to_match in unique_terms_for_file_both_check : + if is_filename_match_for_character (current_api_original_filename ,term_to_match ): + file_is_candidate_by_char_filter_scope =True + char_filter_info_that_matched_file =filter_item_obj_both_file + self .logger (f" File '{current_api_original_filename }' matches char filter term '{term_to_match }' (from '{filter_item_obj ['name']}'). Scope: Both (File part).") + break + if file_is_candidate_by_char_filter_scope :break + elif self .char_filter_scope ==CHAR_SCOPE_COMMENTS : + if post_is_candidate_by_file_char_match_in_comment_scope : + file_is_candidate_by_char_filter_scope =True + char_filter_info_that_matched_file =char_filter_that_matched_file_in_comment_scope + self .logger (f" File '{current_api_original_filename }' is candidate because a file in this post matched char filter (Overall Scope: Comments).") + elif post_is_candidate_by_comment_char_match : + file_is_candidate_by_char_filter_scope =True + char_filter_info_that_matched_file =char_filter_that_matched_comment + self .logger (f" File '{current_api_original_filename }' is candidate because post comments matched char filter (Overall Scope: Comments).") + if not file_is_candidate_by_char_filter_scope : + self .logger (f" -> Skip File (Char Filter Scope '{self .char_filter_scope }'): '{current_api_original_filename }' no match.") + total_skipped_this_post +=1 + continue + + + target_base_folders_for_this_file_iteration =[] + + if current_character_filters : + char_title_subfolder_name =None + if self .target_post_id_from_initial_url and self .custom_folder_name : + char_title_subfolder_name =self .custom_folder_name + elif char_filter_info_that_matched_file : + char_title_subfolder_name =clean_folder_name (char_filter_info_that_matched_file ["name"]) + elif char_filter_that_matched_title : + char_title_subfolder_name =clean_folder_name (char_filter_that_matched_title ["name"]) + elif char_filter_that_matched_comment : + char_title_subfolder_name =clean_folder_name (char_filter_that_matched_comment ["name"]) + if char_title_subfolder_name : + target_base_folders_for_this_file_iteration .append (char_title_subfolder_name ) + else : + self .logger (f"⚠️ File '{current_api_original_filename }' candidate by char filter, but no folder name derived. Using post title.") + target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title )) + else : + if base_folder_names_for_post_content : + target_base_folders_for_this_file_iteration .extend (base_folder_names_for_post_content ) + else : + target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title )) + + if not target_base_folders_for_this_file_iteration : + target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title if post_title else "Uncategorized_Post_Content")) + + for target_base_folder_name_for_instance in target_base_folders_for_this_file_iteration : + current_path_for_file_instance =self .override_output_dir if self .override_output_dir else self .download_root + if self .use_subfolders and target_base_folder_name_for_instance : + current_path_for_file_instance =os .path .join (current_path_for_file_instance ,target_base_folder_name_for_instance ) + if self .use_post_subfolders : + + current_path_for_file_instance =os .path .join (current_path_for_file_instance ,final_post_subfolder_name ) + + manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None + manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None + + + folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title ) + + futures_list .append (file_pool .submit ( + self ._download_single_file , + file_info =file_info_to_dl , + target_folder_path =current_path_for_file_instance , + headers =headers ,original_post_id_for_log =post_id ,skip_event =self .skip_current_file_flag , + post_title =post_title ,manga_date_file_counter_ref =manga_date_counter_to_pass , + manga_global_file_counter_ref =manga_global_counter_to_pass ,folder_context_name_for_history =folder_context_for_file , + file_index_in_post =file_idx ,num_files_in_this_post =len (files_to_download_info_list ) + )) + + for future in as_completed (futures_list ): + if self .check_cancel (): + for f_to_cancel in futures_list : + if not f_to_cancel .done (): + f_to_cancel .cancel () + break + try : + dl_count ,skip_count ,actual_filename_saved ,original_kept_flag ,status ,details_for_dialog_or_retry =future .result () + total_downloaded_this_post +=dl_count + total_skipped_this_post +=skip_count + if original_kept_flag and dl_count >0 and actual_filename_saved : + kept_original_filenames_for_log .append (actual_filename_saved ) + if status ==FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER and details_for_dialog_or_retry : + retryable_failures_this_post .append (details_for_dialog_or_retry ) + elif status ==FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION and details_for_dialog_or_retry : + permanent_failures_this_post .append (details_for_dialog_or_retry ) + except CancelledError : + self .logger (f" File download task for post {post_id } was cancelled.") + total_skipped_this_post +=1 + except Exception as exc_f : + self .logger (f"❌ File download task for post {post_id } resulted in error: {exc_f }") + total_skipped_this_post +=1 + self ._emit_signal ('file_progress',"",None ) + + # After a post's files are all processed, update the session file to mark this post as done. + if self.session_file_path and self.session_lock: + try: + with self.session_lock: + if os.path.exists(self.session_file_path): # Only update if the session file exists + # Read current state + with open(self.session_file_path, 'r', encoding='utf-8') as f: + session_data = json.load(f) + + if 'download_state' not in session_data: + session_data['download_state'] = {} + + # Add processed ID + if not isinstance(session_data['download_state'].get('processed_post_ids'), list): + session_data['download_state']['processed_post_ids'] = [] + session_data['download_state']['processed_post_ids'].append(self.post.get('id')) + + # Add any permanent failures from this worker to the session file + if permanent_failures_this_post: + if not isinstance(session_data['download_state'].get('permanently_failed_files'), list): + session_data['download_state']['permanently_failed_files'] = [] + # To avoid duplicates if the same post is somehow re-processed + existing_failed_urls = {f.get('file_info', {}).get('url') for f in session_data['download_state']['permanently_failed_files']} + for failure in permanent_failures_this_post: + if failure.get('file_info', {}).get('url') not in existing_failed_urls: + session_data['download_state']['permanently_failed_files'].append(failure) + + # Write to temp file and then atomically replace + temp_file_path = self.session_file_path + ".tmp" + with open(temp_file_path, 'w', encoding='utf-8') as f_tmp: + json.dump(session_data, f_tmp, indent=2) + os.replace(temp_file_path, self.session_file_path) + except Exception as e: + self.logger(f"⚠️ Could not update session file for post {post_id}: {e}") + + if not self .extract_links_only and (total_downloaded_this_post >0 or not ( + (current_character_filters and ( + (self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or + (self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match ) + ))or + (self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH )and any (sw .lower ()in post_title .lower ()for sw in self .skip_words_list )) + )): + top_file_name_for_history ="N/A" + if post_main_file_info and post_main_file_info .get ('name'): + top_file_name_for_history =post_main_file_info ['name'] + elif post_attachments and post_attachments [0 ].get ('name'): + top_file_name_for_history =post_attachments [0 ]['name'] + + history_data_for_this_post ={ + 'post_title':post_title ,'post_id':post_id , + 'top_file_name':top_file_name_for_history , + 'num_files':num_potential_files_in_post , + 'upload_date_str':post_data .get ('published')or post_data .get ('added')or "Unknown", + 'download_location':determined_post_save_path_for_history , + 'service':self .service ,'user_id':self .user_id , + } + if self .check_cancel ():self .logger (f" Post {post_id } processing interrupted/cancelled."); + else :self .logger (f" Post {post_id } Summary: Downloaded={total_downloaded_this_post }, Skipped Files={total_skipped_this_post }") + + if not self .extract_links_only and self .use_post_subfolders and total_downloaded_this_post ==0 : + + path_to_check_for_emptiness =determined_post_save_path_for_history + try : + if os .path .isdir (path_to_check_for_emptiness )and not os .listdir (path_to_check_for_emptiness ): + self .logger (f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness }'") + os .rmdir (path_to_check_for_emptiness ) + except OSError as e_rmdir : + self .logger (f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }") + + result_tuple = (total_downloaded_this_post, total_skipped_this_post, + kept_original_filenames_for_log, retryable_failures_this_post, + permanent_failures_this_post, history_data_for_this_post, + None) # The 7th item is None because we already saved the temp file + + # In Single PDF mode, the 7th item is the temp file path we created. + if self.single_pdf_mode and os.path.exists(temp_filepath): + result_tuple = (0, 0, [], [], [], None, temp_filepath) + + self._emit_signal('worker_finished', result_tuple) + return # The method now returns nothing. + +class DownloadThread (QThread ): + progress_signal =pyqtSignal (str ) + add_character_prompt_signal =pyqtSignal (str ) + file_download_status_signal =pyqtSignal (bool ) + finished_signal =pyqtSignal (int ,int ,bool ,list ) + external_link_signal =pyqtSignal (str ,str ,str ,str ,str ) + file_successfully_downloaded_signal =pyqtSignal (dict ) + file_progress_signal =pyqtSignal (str ,object ) + retryable_file_failed_signal =pyqtSignal (list ) + missed_character_post_signal =pyqtSignal (str ,str ) + post_processed_for_history_signal =pyqtSignal (dict ) + final_history_entries_signal =pyqtSignal (list ) + permanent_file_failed_signal =pyqtSignal (list ) + def __init__ (self ,api_url_input ,output_dir ,known_names_copy , + cancellation_event , + pause_event ,filter_character_list =None ,dynamic_character_filter_holder =None , + filter_mode ='all',skip_zip =True ,skip_rar =True , + use_subfolders =True ,use_post_subfolders =False ,custom_folder_name =None ,compress_images =False , + download_thumbnails =False ,service =None ,user_id =None , + downloaded_files =None ,downloaded_file_hashes =None ,downloaded_files_lock =None ,downloaded_file_hashes_lock =None , + skip_words_list =None , + skip_words_scope =SKIP_SCOPE_FILES , + show_external_links =False , + extract_links_only =False , + num_file_threads_for_worker =1 , + skip_current_file_flag =None , + start_page =None ,end_page =None , + target_post_id_from_initial_url =None , + manga_mode_active =False , + unwanted_keywords =None , + manga_filename_style =STYLE_POST_TITLE , + char_filter_scope =CHAR_SCOPE_FILES , + remove_from_filename_words_list =None , + manga_date_prefix =MANGA_DATE_PREFIX_DEFAULT , + allow_multipart_download =True , + selected_cookie_file =None , + override_output_dir =None , + app_base_dir =None , + manga_date_file_counter_ref =None , + manga_global_file_counter_ref =None , + use_cookie =False , + scan_content_for_images =False , + creator_download_folder_ignore_words =None , + use_date_prefix_for_subfolder=False, + keep_in_post_duplicates=False, + cookie_text ="", + session_file_path=None, + session_lock=None, + text_only_scope=None, + text_export_format='txt', + single_pdf_mode=False, + project_root_dir=None, + ): + super ().__init__ () + self .api_url_input =api_url_input + self .output_dir =output_dir + self .known_names =list (known_names_copy ) + self .cancellation_event =cancellation_event + self .pause_event =pause_event + self .skip_current_file_flag =skip_current_file_flag + self .initial_target_post_id =target_post_id_from_initial_url + self .filter_character_list_objects_initial =filter_character_list if filter_character_list else [] + self .dynamic_filter_holder =dynamic_character_filter_holder + self .filter_mode =filter_mode + self .skip_zip =skip_zip + self .skip_rar =skip_rar + self .use_subfolders =use_subfolders + self .use_post_subfolders =use_post_subfolders + self .custom_folder_name =custom_folder_name + self .compress_images =compress_images + self .download_thumbnails =download_thumbnails + self .service =service + self .user_id =user_id + self .skip_words_list =skip_words_list if skip_words_list is not None else [] + self .skip_words_scope =skip_words_scope + self .downloaded_files =downloaded_files + self .downloaded_files_lock =downloaded_files_lock + self .downloaded_file_hashes =downloaded_file_hashes + self .downloaded_file_hashes_lock =downloaded_file_hashes_lock + self ._add_character_response =None + self .prompt_mutex =QMutex () + self .show_external_links =show_external_links + self .extract_links_only =extract_links_only + self .num_file_threads_for_worker =num_file_threads_for_worker + self .start_page =start_page + self .end_page =end_page + self .manga_mode_active =manga_mode_active + self .unwanted_keywords =unwanted_keywords if unwanted_keywords is not None else {'spicy','hd','nsfw','4k','preview','teaser','clip'} + self .manga_filename_style =manga_filename_style + self .char_filter_scope =char_filter_scope + self .remove_from_filename_words_list =remove_from_filename_words_list + self .manga_date_prefix =manga_date_prefix + self .allow_multipart_download =allow_multipart_download + self .selected_cookie_file =selected_cookie_file + self .app_base_dir =app_base_dir + self .cookie_text =cookie_text + self .use_cookie =use_cookie + self .override_output_dir =override_output_dir + self .manga_date_file_counter_ref =manga_date_file_counter_ref + self .scan_content_for_images =scan_content_for_images + self .creator_download_folder_ignore_words =creator_download_folder_ignore_words + self.use_date_prefix_for_subfolder = use_date_prefix_for_subfolder + self.keep_in_post_duplicates = keep_in_post_duplicates + self .manga_global_file_counter_ref =manga_global_file_counter_ref + self.session_file_path = session_file_path + self.session_lock = session_lock + self.history_candidates_buffer =deque (maxlen =8 ) + self.text_only_scope = text_only_scope + self.text_export_format = text_export_format + self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE + self.project_root_dir = project_root_dir # Add this assignment + + if self .compress_images and Image is None : + self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).") + self .compress_images =False + def logger (self ,message ): + self .progress_signal .emit (str (message )) + def isInterruptionRequested (self ): + return self .cancellation_event .is_set ()or super ().isInterruptionRequested () + def _check_pause_self (self ,context_message ="DownloadThread operation"): + if self .pause_event and self .pause_event .is_set (): + self .logger (f" {context_message } paused...") + while self .pause_event .is_set (): + if self .isInterruptionRequested (): + self .logger (f" {context_message } cancelled while paused.") + return True + time .sleep (0.5 ) + if not self .isInterruptionRequested ():self .logger (f" {context_message } resumed.") + return False + def skip_file (self ): + if self .isRunning ()and self .skip_current_file_flag : + self .logger ("⏭️ Skip requested for current file (single-thread mode).") + self .skip_current_file_flag .set () + else :self .logger ("ℹ️ Skip file: No download active or skip flag not available for current context.") + + def run (self ): + """ + The main execution method for the single-threaded download process. + This version is corrected to handle 7 return values from the worker and + to pass the 'single_pdf_mode' setting correctly. + """ + grand_total_downloaded_files =0 + grand_total_skipped_files =0 + grand_list_of_kept_original_filenames =[] + was_process_cancelled =False + + # This block for initializing manga mode counters remains unchanged + if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED and not self .extract_links_only and self .manga_date_file_counter_ref is None : + # ... (existing manga counter initialization logic) ... + pass + if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING and not self .extract_links_only and self .manga_global_file_counter_ref is None : + # ... (existing manga counter initialization logic) ... + pass + + worker_signals_obj = PostProcessorSignals() + try : + # Connect signals + worker_signals_obj.progress_signal.connect(self.progress_signal) + worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal) + worker_signals_obj.file_progress_signal.connect(self.file_progress_signal) + worker_signals_obj.external_link_signal.connect(self.external_link_signal) + worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal) + worker_signals_obj.file_successfully_downloaded_signal.connect(self.file_successfully_downloaded_signal) + worker_signals_obj.worker_finished_signal.connect(lambda result: None) # Connect to dummy lambda to avoid errors + + self.logger(" Starting post fetch (single-threaded download process)...") + post_generator = download_from_api( + self.api_url_input, + logger=self.logger, + start_page=self.start_page, + end_page=self.end_page, + manga_mode=self.manga_mode_active, + cancellation_event=self.cancellation_event, + pause_event=self.pause_event, + use_cookie=self.use_cookie, + cookie_text=self.cookie_text, + selected_cookie_file=self.selected_cookie_file, + app_base_dir=self.app_base_dir, + manga_filename_style_for_sort_check=self.manga_filename_style if self.manga_mode_active else None + ) + + for posts_batch_data in post_generator: + if self.isInterruptionRequested(): + was_process_cancelled = True + break + for individual_post_data in posts_batch_data: + if self.isInterruptionRequested(): + was_process_cancelled = True + break + + # Create the worker, now correctly passing single_pdf_mode + post_processing_worker = PostProcessorWorker( + post_data=individual_post_data, + download_root=self.output_dir, + known_names=self.known_names, + filter_character_list=self.filter_character_list_objects_initial, + dynamic_character_filter_holder=self.dynamic_filter_holder, + unwanted_keywords=self.unwanted_keywords, + filter_mode=self.filter_mode, + skip_zip=self.skip_zip, skip_rar=self.skip_rar, + use_subfolders=self.use_subfolders, use_post_subfolders=self.use_post_subfolders, + target_post_id_from_initial_url=self.initial_target_post_id, + custom_folder_name=self.custom_folder_name, + compress_images=self.compress_images, download_thumbnails=self.download_thumbnails, + service=self.service, user_id=self.user_id, + api_url_input=self.api_url_input, + pause_event=self.pause_event, + cancellation_event=self.cancellation_event, + emitter=worker_signals_obj, + downloaded_files=self.downloaded_files, + downloaded_file_hashes=self.downloaded_file_hashes, + downloaded_files_lock=self.downloaded_files_lock, + downloaded_file_hashes_lock=self.downloaded_file_hashes_lock, + skip_words_list=self.skip_words_list, + skip_words_scope=self.skip_words_scope, + show_external_links=self.show_external_links, + extract_links_only=self.extract_links_only, + num_file_threads=self.num_file_threads_for_worker, + skip_current_file_flag=self.skip_current_file_flag, + manga_mode_active=self.manga_mode_active, + manga_filename_style=self.manga_filename_style, + manga_date_prefix=self.manga_date_prefix, + char_filter_scope=self.char_filter_scope, + remove_from_filename_words_list=self.remove_from_filename_words_list, + allow_multipart_download=self.allow_multipart_download, + selected_cookie_file=self.selected_cookie_file, + app_base_dir=self.app_base_dir, + cookie_text=self.cookie_text, + override_output_dir=self.override_output_dir, + manga_global_file_counter_ref=self.manga_global_file_counter_ref, + use_cookie=self.use_cookie, + manga_date_file_counter_ref=self.manga_date_file_counter_ref, + use_date_prefix_for_subfolder=self.use_date_prefix_for_subfolder, + keep_in_post_duplicates=self.keep_in_post_duplicates, + creator_download_folder_ignore_words=self.creator_download_folder_ignore_words, + session_file_path=self.session_file_path, + session_lock=self.session_lock, + text_only_scope=self.text_only_scope, + text_export_format=self.text_export_format, + single_pdf_mode=self.single_pdf_mode, # <-- This is now correctly passed + project_root_dir=self.project_root_dir + ) + try: + # Correctly unpack the 7 values returned from the worker + (dl_count, skip_count, kept_originals_this_post, + retryable_failures, permanent_failures, + history_data, temp_filepath) = post_processing_worker.process() + + grand_total_downloaded_files += dl_count + grand_total_skipped_files += skip_count + + if kept_originals_this_post: + grand_list_of_kept_original_filenames.extend(kept_originals_this_post) + if retryable_failures: + self.retryable_file_failed_signal.emit(retryable_failures) + if history_data: + if len(self.history_candidates_buffer) < 8: + self.post_processed_for_history_signal.emit(history_data) + if permanent_failures: + self.permanent_file_failed_signal.emit(permanent_failures) + + # In single-threaded text mode, pass the temp file path back to the main window + if self.single_pdf_mode and temp_filepath: + self.progress_signal.emit(f"TEMP_FILE_PATH:{temp_filepath}") + + except Exception as proc_err: + post_id_for_err = individual_post_data.get('id', 'N/A') + self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}") + traceback.print_exc() + num_potential_files_est = len(individual_post_data.get('attachments', [])) + (1 if individual_post_data.get('file') else 0) + grand_total_skipped_files += num_potential_files_est + + if self.skip_current_file_flag and self.skip_current_file_flag.is_set(): + self.skip_current_file_flag.clear() + self.logger(" Skip current file flag was processed and cleared by DownloadThread.") + self.msleep(10) + if was_process_cancelled: + break + if not was_process_cancelled and not self.isInterruptionRequested(): + self.logger("✅ All posts processed or end of content reached by DownloadThread.") + + except Exception as main_thread_err: + self.logger(f"\n❌ Critical error within DownloadThread run loop: {main_thread_err}") + traceback.print_exc() + finally: + try: + # Disconnect signals + if worker_signals_obj: + worker_signals_obj.progress_signal.disconnect(self.progress_signal) + worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal) + worker_signals_obj.external_link_signal.disconnect(self.external_link_signal) + worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal) + worker_signals_obj.missed_character_post_signal.disconnect(self.missed_character_post_signal) + worker_signals_obj.file_successfully_downloaded_signal.disconnect(self.file_successfully_downloaded_signal) + except (TypeError, RuntimeError) as e: + self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}") + + # Emit the final signal with all collected results + self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, self.isInterruptionRequested(), grand_list_of_kept_original_filenames) + + def receive_add_character_result (self ,result ): + with QMutexLocker (self .prompt_mutex ): + self ._add_character_response =result + self .logger (f" (DownloadThread) Received character prompt response: {'Yes (added/confirmed)'if result else 'No (declined/failed)'}") + +class InterruptedError(Exception): + """Custom exception for handling cancellations gracefully.""" + pass \ No newline at end of file