diff --git a/main_window.py b/main_window.py
new file mode 100644
index 0000000..930462d
--- /dev/null
+++ b/main_window.py
@@ -0,0 +1,5517 @@
+# --- Standard Library Imports ---
+import sys
+import os
+import time
+import queue
+import traceback
+import html
+import http
+import json
+import re
+import subprocess
+import datetime
+import requests
+import unicodedata
+from collections import deque
+import threading
+from concurrent.futures import Future, ThreadPoolExecutor ,CancelledError
+from urllib .parse import urlparse
+
+# --- PyQt5 Imports ---
+from PyQt5.QtGui import QIcon, QIntValidator, QDesktopServices
+from PyQt5.QtWidgets import (
+ QApplication, QWidget, QLabel, QLineEdit, QTextEdit, QPushButton,
+ QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget, QRadioButton,
+ QButtonGroup, QCheckBox, QSplitter, QGroupBox, QDialog, QStackedWidget,
+ QScrollArea, QListWidgetItem, QSizePolicy, QProgressBar, QAbstractItemView, QFrame,
+ QMainWindow, QAction, QGridLayout
+)
+from PyQt5.QtCore import Qt, QThread, pyqtSignal, QObject, QTimer, QSettings, QStandardPaths, QUrl, QSize, QProcess, QMutex, QMutexLocker
+
+# --- Local Application Imports ---
+from ..services.drive_downloader import download_mega_file as drive_download_mega_file ,download_gdrive_file ,download_dropbox_file
+from ..core.workers import DownloadThread as BackendDownloadThread
+from ..core.workers import PostProcessorWorker
+from ..core.workers import PostProcessorSignals
+from ..core.api_client import download_from_api
+from ..core.manager import DownloadManager
+from .assets import get_app_icon_object
+from ..config.constants import *
+from ..utils.file_utils import KNOWN_NAMES, clean_folder_name
+from ..utils.network_utils import extract_post_info, prepare_cookies_for_request
+from ..i18n.translator import get_translation
+from .dialogs.EmptyPopupDialog import EmptyPopupDialog
+from .dialogs.CookieHelpDialog import CookieHelpDialog
+from .dialogs.FavoriteArtistsDialog import FavoriteArtistsDialog
+from .dialogs.KnownNamesFilterDialog import KnownNamesFilterDialog
+from .dialogs.HelpGuideDialog import HelpGuideDialog
+from .dialogs.FutureSettingsDialog import FutureSettingsDialog
+from .dialogs.ErrorFilesDialog import ErrorFilesDialog
+from .dialogs.DownloadHistoryDialog import DownloadHistoryDialog
+from .dialogs.DownloadExtractedLinksDialog import DownloadExtractedLinksDialog
+from .dialogs.FavoritePostsDialog import FavoritePostsDialog
+from .dialogs.FavoriteArtistsDialog import FavoriteArtistsDialog
+from .dialogs.ConfirmAddAllDialog import ConfirmAddAllDialog
+from .dialogs.MoreOptionsDialog import MoreOptionsDialog
+from .dialogs.SinglePDF import create_single_pdf_from_content
+
+class DynamicFilterHolder:
+ """A thread-safe class to hold and update character filters during a download."""
+ def __init__(self, initial_filters=None):
+ self.lock = threading.Lock()
+ self._filters = initial_filters if initial_filters is not None else []
+
+ def get_filters(self):
+ with self.lock:
+ return [dict(f) for f in self._filters]
+
+ def set_filters(self, new_filters):
+ with self.lock:
+ self._filters = [dict(f) for f in (new_filters if new_filters else [])]
+
+
+class PostProcessorSignals(QObject):
+ """A collection of signals for the DownloaderApp to communicate with itself across threads."""
+ progress_signal = pyqtSignal(str)
+ file_download_status_signal = pyqtSignal(bool)
+ external_link_signal = pyqtSignal(str, str, str, str, str)
+ file_progress_signal = pyqtSignal(str, object)
+ file_successfully_downloaded_signal = pyqtSignal(dict)
+ missed_character_post_signal = pyqtSignal(str, str)
+ worker_finished_signal = pyqtSignal(tuple)
+ finished_signal = pyqtSignal(int, int, bool, list)
+ retryable_file_failed_signal = pyqtSignal(list)
+ permanent_file_failed_signal = pyqtSignal(list)
+
+class DownloaderApp (QWidget ):
+ character_prompt_response_signal =pyqtSignal (bool )
+ log_signal =pyqtSignal (str )
+ add_character_prompt_signal =pyqtSignal (str )
+ overall_progress_signal =pyqtSignal (int ,int )
+ file_successfully_downloaded_signal =pyqtSignal (dict )
+ post_processed_for_history_signal =pyqtSignal (dict )
+ finished_signal =pyqtSignal (int ,int ,bool ,list )
+ external_link_signal =pyqtSignal (str ,str ,str ,str ,str )
+ file_progress_signal =pyqtSignal (str ,object )
+
+
+ def __init__(self):
+ super().__init__()
+ self.settings = QSettings(CONFIG_ORGANIZATION_NAME, CONFIG_APP_NAME_MAIN)
+
+ # --- CORRECT PATH DEFINITION ---
+ # This block correctly determines the application's base directory whether
+ # it's running from source or as a frozen executable.
+ if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
+ # Path for PyInstaller one-file bundle
+ self.app_base_dir = os.path.dirname(sys.executable)
+ else:
+ # Path for running from source code
+ self.app_base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
+
+ # All file paths will now correctly use the single, correct app_base_dir
+ self.config_file = os.path.join(self.app_base_dir, "appdata", "Known.txt")
+ self.session_file_path = os.path.join(self.app_base_dir, "appdata", "session.json")
+ self.persistent_history_file = os.path.join(self.app_base_dir, "appdata", "download_history.json")
+
+ self.download_thread = None
+ self.thread_pool = None
+ self.cancellation_event = threading.Event()
+ self.session_lock = threading.Lock()
+ self.interrupted_session_data = None
+ self.is_restore_pending = False
+ self.external_link_download_thread = None
+ self.pause_event = threading.Event()
+ self.active_futures = []
+ self.total_posts_to_process = 0
+ self.dynamic_character_filter_holder = DynamicFilterHolder()
+ self.processed_posts_count = 0
+ self.creator_name_cache = {}
+ self.log_signal.emit(f"ℹ️ App base directory: {self.app_base_dir}")
+ self.log_signal.emit(f"ℹ️ Persistent history file path set to: {self.persistent_history_file}")
+
+ # --- The rest of your __init__ method continues from here ---
+ self.last_downloaded_files_details = deque(maxlen=3)
+ self.download_history_candidates = deque(maxlen=8)
+ self.final_download_history_entries = []
+ self.favorite_download_queue = deque()
+ self.is_processing_favorites_queue = False
+ self.download_counter = 0
+ self.permanently_failed_files_for_dialog = []
+ self.last_link_input_text_for_queue_sync = ""
+ self.is_fetcher_thread_running = False
+ self._restart_pending = False
+ self.download_history_log = deque(maxlen=50)
+ self.skip_counter = 0
+ self.all_kept_original_filenames = []
+ self.cancellation_message_logged_this_session = False
+ self.favorite_scope_toggle_button = None
+ self.favorite_download_scope = FAVORITE_SCOPE_SELECTED_LOCATION
+ self.manga_mode_checkbox = None
+ self.selected_cookie_filepath = None
+ self.retryable_failed_files_info = []
+ self.is_paused = False
+ self.worker_to_gui_queue = queue.Queue()
+ self.gui_update_timer = QTimer(self)
+ self.actual_gui_signals = PostProcessorSignals()
+ self.worker_signals = PostProcessorSignals()
+ self.prompt_mutex = QMutex()
+ self._add_character_response = None
+ self._original_scan_content_tooltip = ("If checked, the downloader will scan the HTML content of posts for image URLs (from tags or direct links).\n"
+ "now This includes resolving relative paths from
tags to full URLs.\n"
+ "Relative paths in
tags (e.g., /data/image.jpg) will be resolved to full URLs.\n"
+ "Useful for cases where images are in the post description but not in the API's file/attachment list.")
+ self.downloaded_files = set()
+ self.downloaded_files_lock = threading.Lock()
+ self.downloaded_file_hashes = set()
+ self.downloaded_file_hashes_lock = threading.Lock()
+ self.show_external_links = False
+ self.external_link_queue = deque()
+ self._is_processing_external_link_queue = False
+ self._current_link_post_title = None
+ self.extracted_links_cache = []
+ self.manga_rename_toggle_button = None
+ self.favorite_mode_checkbox = None
+ self.url_or_placeholder_stack = None
+ self.url_input_widget = None
+ self.url_placeholder_widget = None
+ self.favorite_action_buttons_widget = None
+ self.favorite_mode_artists_button = None
+ self.favorite_mode_posts_button = None
+ self.standard_action_buttons_widget = None
+ self.bottom_action_buttons_stack = None
+ self.main_log_output = None
+ self.external_log_output = None
+ self.log_splitter = None
+ self.main_splitter = None
+ self.reset_button = None
+ self.progress_log_label = None
+ self.log_verbosity_toggle_button = None
+ self.missed_character_log_output = None
+ self.log_view_stack = None
+ self.current_log_view = 'progress'
+ self.link_search_input = None
+ self.link_search_button = None
+ self.export_links_button = None
+ self.radio_only_links = None
+ self.radio_only_archives = None
+ self.missed_title_key_terms_count = {}
+ self.missed_title_key_terms_examples = {}
+ self.logged_summary_for_key_term = set()
+ self.STOP_WORDS = set(["a", "an", "the", "is", "was", "were", "of", "for", "with", "in", "on", "at", "by", "to", "and", "or", "but", "i", "you", "he", "she", "it", "we", "they", "my", "your", "his", "her", "its", "our", "their", "com", "net", "org", "www"])
+ self.already_logged_bold_key_terms = set()
+ self.missed_key_terms_buffer = []
+ self.char_filter_scope_toggle_button = None
+ self.skip_words_scope = SKIP_SCOPE_POSTS
+ self.char_filter_scope = CHAR_SCOPE_TITLE
+ self.manga_filename_style = self.settings.value(MANGA_FILENAME_STYLE_KEY, STYLE_POST_TITLE, type=str)
+ self.current_theme = self.settings.value(THEME_KEY, "dark", type=str)
+ self.only_links_log_display_mode = LOG_DISPLAY_LINKS
+ self.mega_download_log_preserved_once = False
+ self.allow_multipart_download_setting = False
+ self.use_cookie_setting = False
+ self.scan_content_images_setting = self.settings.value(SCAN_CONTENT_IMAGES_KEY, False, type=bool)
+ self.cookie_text_setting = ""
+ self.current_selected_language = self.settings.value(LANGUAGE_KEY, "en", type=str)
+ self.more_filter_scope = None
+ self.text_export_format = 'pdf'
+ self.single_pdf_setting = False
+ self.session_temp_files = []
+
+ print(f"ℹ️ Known.txt will be loaded/saved at: {self.config_file}")
+
+ try:
+ base_path_for_icon = ""
+ if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
+ base_path_for_icon = sys._MEIPASS
+ else:
+ base_path_for_icon = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
+
+ icon_path_for_window = os.path.join(base_path_for_icon, 'assets', 'Kemono.ico')
+
+ if os.path.exists(icon_path_for_window):
+ self.setWindowIcon(QIcon(icon_path_for_window))
+ else:
+ if getattr(sys, 'frozen', False):
+ executable_dir = os.path.dirname(sys.executable)
+ fallback_icon_path = os.path.join(executable_dir, 'assets', 'Kemono.ico')
+ if os.path.exists(fallback_icon_path):
+ self.setWindowIcon(QIcon(fallback_icon_path))
+ else:
+ self.log_signal.emit(f"⚠️ Main window icon 'assets/Kemono.ico' not found at {icon_path_for_window} or {fallback_icon_path}")
+ else:
+ self.log_signal.emit(f"⚠️ Main window icon 'assets/Kemono.ico' not found at {icon_path_for_window}")
+ except Exception as e_icon_app:
+ self.log_signal.emit(f"❌ Error setting main window icon in DownloaderApp init: {e_icon_app}")
+
+ self.url_label_widget = None
+ self.download_location_label_widget = None
+ self.remove_from_filename_label_widget = None
+ self.skip_words_label_widget = None
+ self.setWindowTitle("Kemono Downloader v6.0.0")
+ self.init_ui()
+ self._connect_signals()
+ self.log_signal.emit("ℹ️ Local API server functionality has been removed.")
+ self.log_signal.emit("ℹ️ 'Skip Current File' button has been removed.")
+ if hasattr(self, 'character_input'):
+ self.character_input.setToolTip(self._tr("character_input_tooltip", "Enter character names (comma-separated)..."))
+ self.log_signal.emit(f"ℹ️ Manga filename style loaded: '{self.manga_filename_style}'")
+ self.log_signal.emit(f"ℹ️ Skip words scope loaded: '{self.skip_words_scope}'")
+ self.log_signal.emit(f"ℹ️ Character filter scope set to default: '{self.char_filter_scope}'")
+ self.log_signal.emit(f"ℹ️ Multi-part download defaults to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'}")
+ self.log_signal.emit(f"ℹ️ Cookie text defaults to: Empty on launch")
+ self.log_signal.emit(f"ℹ️ 'Use Cookie' setting defaults to: Disabled on launch")
+ self.log_signal.emit(f"ℹ️ Scan post content for images defaults to: {'Enabled' if self.scan_content_images_setting else 'Disabled'}")
+ self.log_signal.emit(f"ℹ️ Application language loaded: '{self.current_selected_language.upper()}' (UI may not reflect this yet).")
+ self._retranslate_main_ui()
+ self._load_persistent_history()
+ self._load_saved_download_location()
+ self._update_button_states_and_connections()
+ self._check_for_interrupted_session()
+
+
+ def get_checkbox_map(self):
+ """Returns a mapping of checkbox attribute names to their corresponding settings key."""
+ return {
+ 'skip_zip_checkbox': 'skip_zip',
+ 'skip_rar_checkbox': 'skip_rar',
+ 'download_thumbnails_checkbox': 'download_thumbnails',
+ 'compress_images_checkbox': 'compress_images',
+ 'use_subfolders_checkbox': 'use_subfolders',
+ 'use_subfolder_per_post_checkbox': 'use_post_subfolders',
+ 'use_multithreading_checkbox': 'use_multithreading',
+ 'external_links_checkbox': 'show_external_links',
+ 'keep_duplicates_checkbox': 'keep_in_post_duplicates',
+ 'date_prefix_checkbox': 'use_date_prefix_for_subfolder',
+ 'manga_mode_checkbox': 'manga_mode_active',
+ 'scan_content_images_checkbox': 'scan_content_for_images',
+ 'use_cookie_checkbox': 'use_cookie',
+ 'favorite_mode_checkbox': 'favorite_mode_active'
+ }
+
+ def _get_current_ui_settings_as_dict(self, api_url_override=None, output_dir_override=None):
+ """Gathers all relevant UI settings into a JSON-serializable dictionary."""
+ settings = {}
+
+ settings['api_url'] = api_url_override if api_url_override is not None else self.link_input.text().strip()
+ settings['output_dir'] = output_dir_override if output_dir_override is not None else self.dir_input.text().strip()
+ settings['character_filter_text'] = self.character_input.text().strip()
+ settings['skip_words_text'] = self.skip_words_input.text().strip()
+ settings['remove_words_text'] = self.remove_from_filename_input.text().strip()
+ settings['custom_folder_name'] = self.custom_folder_input.text().strip()
+ settings['cookie_text'] = self.cookie_text_input.text().strip()
+ if hasattr(self, 'manga_date_prefix_input'):
+ settings['manga_date_prefix'] = self.manga_date_prefix_input.text().strip()
+
+ try: settings['num_threads'] = int(self.thread_count_input.text().strip())
+ except (ValueError, AttributeError): settings['num_threads'] = 4
+ try: settings['start_page'] = int(self.start_page_input.text().strip()) if self.start_page_input.text().strip() else None
+ except (ValueError, AttributeError): settings['start_page'] = None
+ try: settings['end_page'] = int(self.end_page_input.text().strip()) if self.end_page_input.text().strip() else None
+ except (ValueError, AttributeError): settings['end_page'] = None
+
+ for checkbox_name, key in self.get_checkbox_map().items():
+ if checkbox := getattr(self, checkbox_name, None): settings[key] = checkbox.isChecked()
+
+ settings['filter_mode'] = self.get_filter_mode()
+ settings['only_links'] = self.radio_only_links.isChecked()
+
+ settings['skip_words_scope'] = self.skip_words_scope
+ settings['char_filter_scope'] = self.char_filter_scope
+ settings['manga_filename_style'] = self.manga_filename_style
+ settings['allow_multipart_download'] = self.allow_multipart_download_setting
+
+ return settings
+
+
+ def _tr (self ,key ,default_text =""):
+ """Helper to get translation based on current app language for the main window."""
+ if callable (get_translation ):
+ return get_translation (self .current_selected_language ,key ,default_text )
+ return default_text
+
+ def _load_saved_download_location (self ):
+ saved_location =self .settings .value (DOWNLOAD_LOCATION_KEY ,"",type =str )
+ if saved_location and os .path .isdir (saved_location ):
+ if hasattr (self ,'dir_input')and self .dir_input :
+ self .dir_input .setText (saved_location )
+ self .log_signal .emit (f"ℹ️ Loaded saved download location: {saved_location }")
+ else :
+ self .log_signal .emit (f"⚠️ Found saved download location '{saved_location }', but dir_input not ready.")
+ elif saved_location :
+ self .log_signal .emit (f"⚠️ Found saved download location '{saved_location }', but it's not a valid directory. Ignoring.")
+
+ def _check_for_interrupted_session(self):
+ """Checks for an incomplete session file on startup and prepares the UI for restore if found."""
+ if os.path.exists(self.session_file_path):
+ try:
+ with open(self.session_file_path, 'r', encoding='utf-8') as f:
+ session_data = json.load(f)
+
+ if "ui_settings" not in session_data or "download_state" not in session_data:
+ raise ValueError("Invalid session file structure.")
+
+ failed_files_from_session = session_data.get('download_state', {}).get('permanently_failed_files', [])
+ if failed_files_from_session:
+ self.permanently_failed_files_for_dialog.clear()
+ self.permanently_failed_files_for_dialog.extend(failed_files_from_session)
+ self.log_signal.emit(f"ℹ️ Restored {len(failed_files_from_session)} failed file entries from the previous session.")
+
+ self.interrupted_session_data = session_data
+ self.log_signal.emit("ℹ️ Incomplete download session found. UI updated for restore.")
+ self._prepare_ui_for_restore()
+
+ except Exception as e:
+ self.log_signal.emit(f"❌ Error reading session file: {e}. Deleting corrupt session file.")
+ os.remove(self.session_file_path)
+ self.interrupted_session_data = None
+ self.is_restore_pending = False
+
+ def _prepare_ui_for_restore(self):
+ """Configures the UI to a 'restore pending' state."""
+ if not self.interrupted_session_data:
+ return
+
+ self.log_signal.emit(" UI updated for session restore.")
+ settings = self.interrupted_session_data.get("ui_settings", {})
+ self._load_ui_from_settings_dict(settings)
+
+ self.is_restore_pending = True
+ self._update_button_states_and_connections() # Update buttons for restore state, UI remains editable
+
+ def _clear_session_and_reset_ui(self):
+ """Clears the session file and resets the UI to its default state."""
+ self._clear_session_file()
+ self.interrupted_session_data = None
+ self.is_restore_pending = False
+ self._update_button_states_and_connections() # Ensure buttons are updated to idle state
+ self.reset_application_state()
+
+ def _clear_session_file(self):
+ """Safely deletes the session file."""
+ if os.path.exists(self.session_file_path):
+ try:
+ os.remove(self.session_file_path)
+ self.log_signal.emit("ℹ️ Interrupted session file cleared.")
+ except Exception as e:
+ self.log_signal.emit(f"❌ Failed to clear session file: {e}")
+
+ def _save_session_file(self, session_data):
+ """Safely saves the session data to the session file using an atomic write pattern."""
+ temp_session_file_path = self.session_file_path + ".tmp"
+ try:
+ with open(temp_session_file_path, 'w', encoding='utf-8') as f:
+ json.dump(session_data, f, indent=2)
+ os.replace(temp_session_file_path, self.session_file_path)
+ except Exception as e:
+ self.log_signal.emit(f"❌ Failed to save session state: {e}")
+ if os.path.exists(temp_session_file_path):
+ try:
+ os.remove(temp_session_file_path)
+ except Exception as e_rem:
+ self.log_signal.emit(f"❌ Failed to remove temp session file: {e_rem}")
+
+ def _update_button_states_and_connections(self):
+ """
+ Updates the text and click connections of the main action buttons
+ based on the current application state (downloading, paused, restore pending, idle).
+ """
+ # Disconnect all signals first to prevent multiple connections
+ try: self.download_btn.clicked.disconnect()
+ except TypeError: pass
+ try: self.pause_btn.clicked.disconnect()
+ except TypeError: pass
+ try: self.cancel_btn.clicked.disconnect()
+ except TypeError: pass
+
+ is_download_active = self._is_download_active()
+
+ if self.is_restore_pending:
+ # State: Restore Pending
+ self.download_btn.setText(self._tr("start_download_button_text", "⬇️ Start Download"))
+ self.download_btn.setEnabled(True)
+ self.download_btn.clicked.connect(self.start_download)
+ self.download_btn.setToolTip(self._tr("start_download_discard_tooltip", "Click to start a new download, discarding the previous session."))
+
+ self.pause_btn.setText(self._tr("restore_download_button_text", "🔄 Restore Download"))
+ self.pause_btn.setEnabled(True)
+ self.pause_btn.clicked.connect(self.restore_download)
+ self.pause_btn.setToolTip(self._tr("restore_download_button_tooltip", "Click to restore the interrupted download."))
+
+ # --- START: CORRECTED CANCEL BUTTON LOGIC ---
+ self.cancel_btn.setText(self._tr("discard_session_button_text", "🗑️ Discard Session"))
+ self.cancel_btn.setEnabled(True)
+ self.cancel_btn.clicked.connect(self._clear_session_and_reset_ui)
+ self.cancel_btn.setToolTip(self._tr("discard_session_tooltip", "Click to discard the interrupted session and reset the UI."))
+
+ elif is_download_active:
+ # State: Downloading / Paused
+ self.download_btn.setText(self._tr("start_download_button_text", "⬇️ Start Download"))
+ self.download_btn.setEnabled(False) # Cannot start new download while one is active
+
+ self.pause_btn.setText(self._tr("resume_download_button_text", "▶️ Resume Download") if self.is_paused else self._tr("pause_download_button_text", "⏸️ Pause Download"))
+ self.pause_btn.setEnabled(True)
+ self.pause_btn.clicked.connect(self._handle_pause_resume_action)
+ self.pause_btn.setToolTip(self._tr("resume_download_button_tooltip", "Click to resume the download.") if self.is_paused else self._tr("pause_download_button_tooltip", "Click to pause the download."))
+
+ self.cancel_btn.setText(self._tr("cancel_button_text", "❌ Cancel & Reset UI"))
+ self.cancel_btn.setEnabled(True)
+ self.cancel_btn.clicked.connect(self.cancel_download_button_action)
+ self.cancel_btn.setToolTip(self._tr("cancel_button_tooltip", "Click to cancel the ongoing download/extraction process and reset the UI fields (preserving URL and Directory)."))
+ else:
+ # State: Idle (No download, no restore pending)
+ self.download_btn.setText(self._tr("start_download_button_text", "⬇️ Start Download"))
+ self.download_btn.setEnabled(True)
+ self.download_btn.clicked.connect(self.start_download)
+
+ self.pause_btn.setText(self._tr("pause_download_button_text", "⏸️ Pause Download"))
+ self.pause_btn.setEnabled(False) # No active download to pause
+ self.pause_btn.setToolTip(self._tr("pause_download_button_tooltip", "Click to pause the ongoing download process."))
+
+ self.cancel_btn.setText(self._tr("cancel_button_text", "❌ Cancel & Reset UI"))
+ self.cancel_btn.setEnabled(False) # No active download to cancel
+ self.cancel_btn.setToolTip(self._tr("cancel_button_tooltip", "Click to cancel the ongoing download/extraction process and reset the UI fields (preserving URL and Directory)."))
+
+
+ def _retranslate_main_ui (self ):
+ """Retranslates static text elements in the main UI."""
+ if self .url_label_widget :
+ self .url_label_widget .setText (self ._tr ("creator_post_url_label","🔗 Kemono Creator/Post URL:"))
+ if self .download_location_label_widget :
+ self .download_location_label_widget .setText (self ._tr ("download_location_label","📁 Download Location:"))
+ if hasattr (self ,'character_label')and self .character_label :
+ self .character_label .setText (self ._tr ("filter_by_character_label","🎯 Filter by Character(s) (comma-separated):"))
+ if self .skip_words_label_widget :
+ self .skip_words_label_widget .setText (self ._tr ("skip_with_words_label","🚫 Skip with Words (comma-separated):"))
+ if self .remove_from_filename_label_widget :
+ self .remove_from_filename_label_widget .setText (self ._tr ("remove_words_from_name_label","✂️ Remove Words from name:"))
+ if hasattr (self ,'radio_all'):self .radio_all .setText (self ._tr ("filter_all_radio","All"))
+ if hasattr (self ,'radio_images'):self .radio_images .setText (self ._tr ("filter_images_radio","Images/GIFs"))
+ if hasattr (self ,'radio_videos'):self .radio_videos .setText (self ._tr ("filter_videos_radio","Videos"))
+ if hasattr (self ,'radio_only_archives'):self .radio_only_archives .setText (self ._tr ("filter_archives_radio","📦 Only Archives"))
+ if hasattr (self ,'radio_only_links'):self .radio_only_links .setText (self ._tr ("filter_links_radio","🔗 Only Links"))
+ if hasattr (self ,'radio_only_audio'):self .radio_only_audio .setText (self ._tr ("filter_audio_radio","🎧 Only Audio"))
+ if hasattr (self ,'favorite_mode_checkbox'):self .favorite_mode_checkbox .setText (self ._tr ("favorite_mode_checkbox_label","⭐ Favorite Mode"))
+ if hasattr (self ,'dir_button'):self .dir_button .setText (self ._tr ("browse_button_text","Browse..."))
+ self ._update_char_filter_scope_button_text ()
+ self ._update_skip_scope_button_text ()
+
+ if hasattr (self ,'skip_zip_checkbox'):self .skip_zip_checkbox .setText (self ._tr ("skip_zip_checkbox_label","Skip .zip"))
+ if hasattr (self ,'skip_rar_checkbox'):self .skip_rar_checkbox .setText (self ._tr ("skip_rar_checkbox_label","Skip .rar"))
+ if hasattr (self ,'download_thumbnails_checkbox'):self .download_thumbnails_checkbox .setText (self ._tr ("download_thumbnails_checkbox_label","Download Thumbnails Only"))
+ if hasattr (self ,'scan_content_images_checkbox'):self .scan_content_images_checkbox .setText (self ._tr ("scan_content_images_checkbox_label","Scan Content for Images"))
+ if hasattr (self ,'compress_images_checkbox'):self .compress_images_checkbox .setText (self ._tr ("compress_images_checkbox_label","Compress to WebP"))
+ if hasattr (self ,'use_subfolders_checkbox'):self .use_subfolders_checkbox .setText (self ._tr ("separate_folders_checkbox_label","Separate Folders by Name/Title"))
+ if hasattr (self ,'use_subfolder_per_post_checkbox'):self .use_subfolder_per_post_checkbox .setText (self ._tr ("subfolder_per_post_checkbox_label","Subfolder per Post"))
+ if hasattr (self ,'use_cookie_checkbox'):self .use_cookie_checkbox .setText (self ._tr ("use_cookie_checkbox_label","Use Cookie"))
+ if hasattr (self ,'use_multithreading_checkbox'):self .update_multithreading_label (self .thread_count_input .text ()if hasattr (self ,'thread_count_input')else "1")
+ if hasattr (self ,'external_links_checkbox'):self .external_links_checkbox .setText (self ._tr ("show_external_links_checkbox_label","Show External Links in Log"))
+ if hasattr (self ,'manga_mode_checkbox'):self .manga_mode_checkbox .setText (self ._tr ("manga_comic_mode_checkbox_label","Manga/Comic Mode"))
+ if hasattr (self ,'thread_count_label'):self .thread_count_label .setText (self ._tr ("threads_label","Threads:"))
+
+ if hasattr (self ,'character_input'):
+ self .character_input .setToolTip (self ._tr ("character_input_tooltip","Enter character names (comma-separated)..."))
+ if hasattr (self ,'download_btn'):self .download_btn .setToolTip (self ._tr ("start_download_button_tooltip","Click to start the download or link extraction process with the current settings."))
+
+
+
+
+
+ current_download_is_active =self ._is_download_active ()if hasattr (self ,'_is_download_active')else False
+ self .set_ui_enabled (not current_download_is_active )
+
+ if hasattr (self ,'known_chars_label'):self .known_chars_label .setText (self ._tr ("known_chars_label_text","🎭 Known Shows/Characters (for Folder Names):"))
+ if hasattr (self ,'open_known_txt_button'):self .open_known_txt_button .setText (self ._tr ("open_known_txt_button_text","Open Known.txt"));self .open_known_txt_button .setToolTip (self ._tr ("open_known_txt_button_tooltip","Open the 'Known.txt' file..."))
+ if hasattr (self ,'add_char_button'):self .add_char_button .setText (self ._tr ("add_char_button_text","➕ Add"));self .add_char_button .setToolTip (self ._tr ("add_char_button_tooltip","Add the name from the input field..."))
+ if hasattr (self ,'add_to_filter_button'):self .add_to_filter_button .setText (self ._tr ("add_to_filter_button_text","⤵️ Add to Filter"));self .add_to_filter_button .setToolTip (self ._tr ("add_to_filter_button_tooltip","Select names from 'Known Shows/Characters' list..."))
+ if hasattr (self ,'character_list'):
+ self .character_list .setToolTip (self ._tr ("known_chars_list_tooltip","This list contains names used for automatic folder creation..."))
+ if hasattr (self ,'delete_char_button'):self .delete_char_button .setText (self ._tr ("delete_char_button_text","🗑️ Delete Selected"));self .delete_char_button .setToolTip (self ._tr ("delete_char_button_tooltip","Delete the selected name(s)..."))
+
+ if hasattr (self ,'cancel_btn'):self .cancel_btn .setToolTip (self ._tr ("cancel_button_tooltip","Click to cancel the ongoing download/extraction process and reset the UI fields (preserving URL and Directory)."))
+ if hasattr (self ,'error_btn'):self .error_btn .setText (self ._tr ("error_button_text","Error"));self .error_btn .setToolTip (self ._tr ("error_button_tooltip","View files skipped due to errors and optionally retry them."))
+ if hasattr (self ,'progress_log_label'):self .progress_log_label .setText (self ._tr ("progress_log_label_text","📜 Progress Log:"))
+ if hasattr (self ,'reset_button'):self .reset_button .setText (self ._tr ("reset_button_text","🔄 Reset"));self .reset_button .setToolTip (self ._tr ("reset_button_tooltip","Reset all inputs and logs to default state (only when idle)."))
+ self ._update_multipart_toggle_button_text ()
+ if hasattr (self ,'progress_label')and not self ._is_download_active ():self .progress_label .setText (self ._tr ("progress_idle_text","Progress: Idle"))
+ if hasattr (self ,'favorite_mode_artists_button'):self .favorite_mode_artists_button .setText (self ._tr ("favorite_artists_button_text","🖼️ Favorite Artists"));self .favorite_mode_artists_button .setToolTip (self ._tr ("favorite_artists_button_tooltip","Browse and download from your favorite artists..."))
+ if hasattr (self ,'favorite_mode_posts_button'):self .favorite_mode_posts_button .setText (self ._tr ("favorite_posts_button_text","📄 Favorite Posts"));self .favorite_mode_posts_button .setToolTip (self ._tr ("favorite_posts_button_tooltip","Browse and download your favorite posts..."))
+ self ._update_favorite_scope_button_text ()
+ if hasattr (self ,'page_range_label'):self .page_range_label .setText (self ._tr ("page_range_label_text","Page Range:"))
+ if hasattr (self ,'start_page_input'):
+ self .start_page_input .setPlaceholderText (self ._tr ("start_page_input_placeholder","Start"))
+ self .start_page_input .setToolTip (self ._tr ("start_page_input_tooltip","For creator URLs: Specify the starting page number..."))
+ if hasattr (self ,'to_label'):self .to_label .setText (self ._tr ("page_range_to_label_text","to"))
+ if hasattr (self ,'end_page_input'):
+ self .end_page_input .setPlaceholderText (self ._tr ("end_page_input_placeholder","End"))
+ self .end_page_input .setToolTip (self ._tr ("end_page_input_tooltip","For creator URLs: Specify the ending page number..."))
+ if hasattr (self ,'fav_mode_active_label'):
+ self .fav_mode_active_label .setText (self ._tr ("fav_mode_active_label_text","⭐ Favorite Mode is active..."))
+ if hasattr (self ,'cookie_browse_button'):
+ self .cookie_browse_button .setToolTip (self ._tr ("cookie_browse_button_tooltip","Browse for a cookie file..."))
+ self ._update_manga_filename_style_button_text ()
+ if hasattr (self ,'export_links_button'):self .export_links_button .setText (self ._tr ("export_links_button_text","Export Links"))
+ if hasattr (self ,'download_extracted_links_button'):self .download_extracted_links_button .setText (self ._tr ("download_extracted_links_button_text","Download"))
+ self ._update_log_display_mode_button_text ()
+
+
+ if hasattr (self ,'radio_all'):self .radio_all .setToolTip (self ._tr ("radio_all_tooltip","Download all file types found in posts."))
+ if hasattr (self ,'radio_images'):self .radio_images .setToolTip (self ._tr ("radio_images_tooltip","Download only common image formats (JPG, PNG, GIF, WEBP, etc.)."))
+ if hasattr (self ,'radio_videos'):self .radio_videos .setToolTip (self ._tr ("radio_videos_tooltip","Download only common video formats (MP4, MKV, WEBM, MOV, etc.)."))
+ if hasattr (self ,'radio_only_archives'):self .radio_only_archives .setToolTip (self ._tr ("radio_only_archives_tooltip","Exclusively download .zip and .rar files. Other file-specific options are disabled."))
+ if hasattr (self ,'radio_only_audio'):self .radio_only_audio .setToolTip (self ._tr ("radio_only_audio_tooltip","Download only common audio formats (MP3, WAV, FLAC, etc.)."))
+ if hasattr (self ,'radio_only_links'):self .radio_only_links .setToolTip (self ._tr ("radio_only_links_tooltip","Extract and display external links from post descriptions instead of downloading files.\nDownload-related options will be disabled."))
+
+
+ if hasattr (self ,'use_subfolders_checkbox'):self .use_subfolders_checkbox .setToolTip (self ._tr ("use_subfolders_checkbox_tooltip","Create subfolders based on 'Filter by Character(s)' input..."))
+ if hasattr (self ,'use_subfolder_per_post_checkbox'):self .use_subfolder_per_post_checkbox .setToolTip (self ._tr ("use_subfolder_per_post_checkbox_tooltip","Creates a subfolder for each post..."))
+ if hasattr (self ,'use_cookie_checkbox'):self .use_cookie_checkbox .setToolTip (self ._tr ("use_cookie_checkbox_tooltip","If checked, will attempt to use cookies..."))
+ if hasattr (self ,'use_multithreading_checkbox'):self .use_multithreading_checkbox .setToolTip (self ._tr ("use_multithreading_checkbox_tooltip","Enables concurrent operations..."))
+ if hasattr (self ,'thread_count_input'):self .thread_count_input .setToolTip (self ._tr ("thread_count_input_tooltip","Number of concurrent operations..."))
+ if hasattr (self ,'external_links_checkbox'):self .external_links_checkbox .setToolTip (self ._tr ("external_links_checkbox_tooltip","If checked, a secondary log panel appears..."))
+ if hasattr (self ,'manga_mode_checkbox'):self .manga_mode_checkbox .setToolTip (self ._tr ("manga_mode_checkbox_tooltip","Downloads posts from oldest to newest..."))
+
+ if hasattr (self ,'scan_content_images_checkbox'):self .scan_content_images_checkbox .setToolTip (self ._tr ("scan_content_images_checkbox_tooltip",self ._original_scan_content_tooltip ))
+ if hasattr (self ,'download_thumbnails_checkbox'):self .download_thumbnails_checkbox .setToolTip (self ._tr ("download_thumbnails_checkbox_tooltip","Downloads small preview images..."))
+ if hasattr (self ,'skip_words_input'):
+ self .skip_words_input .setToolTip (self ._tr ("skip_words_input_tooltip",
+ ("Enter words, comma-separated, to skip downloading certain content (e.g., WIP, sketch, preview).\n\n"
+ "The 'Scope: [Type]' button next to this input cycles how this filter applies:\n"
+ "- Scope: Files: Skips individual files if their names contain any of these words.\n"
+ "- Scope: Posts: Skips entire posts if their titles contain any of these words.\n"
+ "- Scope: Both: Applies both (post title first, then individual files if post title is okay).")))
+ if hasattr (self ,'remove_from_filename_input'):
+ self .remove_from_filename_input .setToolTip (self ._tr ("remove_words_input_tooltip",
+ ("Enter words, comma-separated, to remove from downloaded filenames (case-insensitive).\n"
+ "Useful for cleaning up common prefixes/suffixes.\nExample: patreon, kemono, [HD], _final")))
+
+ if hasattr (self ,'link_input'):
+ self .link_input .setPlaceholderText (self ._tr ("link_input_placeholder_text","e.g., https://kemono.su/patreon/user/12345 or .../post/98765"))
+ self .link_input .setToolTip (self ._tr ("link_input_tooltip_text","Enter the full URL..."))
+ if hasattr (self ,'dir_input'):
+ self .dir_input .setPlaceholderText (self ._tr ("dir_input_placeholder_text","Select folder where downloads will be saved"))
+ self .dir_input .setToolTip (self ._tr ("dir_input_tooltip_text","Enter or browse to the main folder..."))
+ if hasattr (self ,'character_input'):
+ self .character_input .setPlaceholderText (self ._tr ("character_input_placeholder_text","e.g., Tifa, Aerith, (Cloud, Zack)"))
+ if hasattr (self ,'custom_folder_input'):
+ self .custom_folder_input .setPlaceholderText (self ._tr ("custom_folder_input_placeholder_text","Optional: Save this post to specific folder"))
+ self .custom_folder_input .setToolTip (self ._tr ("custom_folder_input_tooltip_text","If downloading a single post URL..."))
+ if hasattr (self ,'skip_words_input'):
+ self .skip_words_input .setPlaceholderText (self ._tr ("skip_words_input_placeholder_text","e.g., WM, WIP, sketch, preview"))
+ if hasattr (self ,'remove_from_filename_input'):
+ self .remove_from_filename_input .setPlaceholderText (self ._tr ("remove_from_filename_input_placeholder_text","e.g., patreon, HD"))
+ self ._update_cookie_input_placeholders_and_tooltips ()
+ if hasattr (self ,'character_search_input'):
+ self .character_search_input .setPlaceholderText (self ._tr ("character_search_input_placeholder_text","Search characters..."))
+ self .character_search_input .setToolTip (self ._tr ("character_search_input_tooltip_text","Type here to filter the list..."))
+ if hasattr (self ,'new_char_input'):
+ self .new_char_input .setPlaceholderText (self ._tr ("new_char_input_placeholder_text","Add new show/character name"))
+ self .new_char_input .setToolTip (self ._tr ("new_char_input_tooltip_text","Enter a new show, game, or character name..."))
+ if hasattr (self ,'link_search_input'):
+ self .link_search_input .setPlaceholderText (self ._tr ("link_search_input_placeholder_text","Search Links..."))
+ self .link_search_input .setToolTip (self ._tr ("link_search_input_tooltip_text","When in 'Only Links' mode..."))
+ if hasattr (self ,'manga_date_prefix_input'):
+ self .manga_date_prefix_input .setPlaceholderText (self ._tr ("manga_date_prefix_input_placeholder_text","Prefix for Manga Filenames"))
+ self .manga_date_prefix_input .setToolTip (self ._tr ("manga_date_prefix_input_tooltip_text","Optional prefix for 'Date Based'..."))
+ if hasattr (self ,'empty_popup_button'):self .empty_popup_button .setToolTip (self ._tr ("empty_popup_button_tooltip_text","Open Creator Selection..."))
+ if hasattr (self ,'known_names_help_button'):self .known_names_help_button .setToolTip (self ._tr ("known_names_help_button_tooltip_text","Open the application feature guide."))
+ if hasattr (self ,'future_settings_button'):self .future_settings_button .setToolTip (self ._tr ("future_settings_button_tooltip_text","Open application settings..."))
+ if hasattr (self ,'link_search_button'):self .link_search_button .setToolTip (self ._tr ("link_search_button_tooltip_text","Filter displayed links"))
+ def apply_theme (self ,theme_name ,initial_load =False ):
+ self .current_theme =theme_name
+ if not initial_load :
+ self .settings .setValue (THEME_KEY ,theme_name )
+ self .settings .sync ()
+
+ if theme_name =="dark":
+ self .setStyleSheet (self .get_dark_theme ())
+ if not initial_load :
+ self .log_signal .emit ("🎨 Switched to Dark Mode.")
+ else :
+ self .setStyleSheet ("")
+ if not initial_load :
+ self .log_signal .emit ("🎨 Switched to Light Mode.")
+ self .update ()
+
+ def _get_tooltip_for_character_input (self ):
+ return (
+ self ._tr ("character_input_tooltip","Default tooltip if translation fails.")
+ )
+ def _connect_signals (self ):
+ self .actual_gui_signals .progress_signal .connect (self .handle_main_log )
+ self .actual_gui_signals .file_progress_signal .connect (self .update_file_progress_display )
+ self .actual_gui_signals .missed_character_post_signal .connect (self .handle_missed_character_post )
+ self .actual_gui_signals .external_link_signal .connect (self .handle_external_link_signal )
+ self .actual_gui_signals .file_successfully_downloaded_signal .connect (self ._handle_actual_file_downloaded )
+ self.actual_gui_signals.worker_finished_signal.connect(self._handle_worker_result)
+ self .actual_gui_signals .file_download_status_signal .connect (lambda status :None )
+
+ if hasattr (self ,'character_input'):
+ self .character_input .textChanged .connect (self ._on_character_input_changed_live )
+ if hasattr (self ,'use_cookie_checkbox'):
+ self .use_cookie_checkbox .toggled .connect (self ._update_cookie_input_visibility )
+ if hasattr (self ,'link_input'):
+ self .link_input .textChanged .connect (self ._sync_queue_with_link_input )
+ if hasattr (self ,'cookie_browse_button'):
+ self .cookie_browse_button .clicked .connect (self ._browse_cookie_file )
+ if hasattr (self ,'cookie_text_input'):
+ self .cookie_text_input .textChanged .connect (self ._handle_cookie_text_manual_change )
+ if hasattr (self ,'download_thumbnails_checkbox'):
+ self .download_thumbnails_checkbox .toggled .connect (self ._handle_thumbnail_mode_change )
+ self .gui_update_timer .timeout .connect (self ._process_worker_queue )
+ self .gui_update_timer .start (100 )
+ self .log_signal .connect (self .handle_main_log )
+ self .add_character_prompt_signal .connect (self .prompt_add_character )
+ self .character_prompt_response_signal .connect (self .receive_add_character_result )
+ self .overall_progress_signal .connect (self .update_progress_display )
+ self .post_processed_for_history_signal .connect (self ._add_to_history_candidates )
+ self .finished_signal .connect (self .download_finished )
+ if hasattr (self ,'character_search_input'):self .character_search_input .textChanged .connect (self .filter_character_list )
+ if hasattr (self ,'external_links_checkbox'):self .external_links_checkbox .toggled .connect (self .update_external_links_setting )
+ if hasattr (self ,'thread_count_input'):self .thread_count_input .textChanged .connect (self .update_multithreading_label )
+ if hasattr (self ,'use_subfolder_per_post_checkbox'):self .use_subfolder_per_post_checkbox .toggled .connect (self .update_ui_for_subfolders )
+ if hasattr (self ,'use_multithreading_checkbox'):self .use_multithreading_checkbox .toggled .connect (self ._handle_multithreading_toggle )
+
+ if hasattr (self ,'radio_group')and self .radio_group :
+ self .radio_group .buttonToggled .connect (self ._handle_filter_mode_change )
+
+ if self .reset_button :self .reset_button .clicked .connect (self .reset_application_state )
+ if self .log_verbosity_toggle_button :self .log_verbosity_toggle_button .clicked .connect (self .toggle_active_log_view )
+
+ if self .link_search_button :self .link_search_button .clicked .connect (self ._filter_links_log )
+ if self .link_search_input :
+ self .link_search_input .returnPressed .connect (self ._filter_links_log )
+ self .link_search_input .textChanged .connect (self ._filter_links_log )
+ if self .export_links_button :self .export_links_button .clicked .connect (self ._export_links_to_file )
+
+ if self .manga_mode_checkbox :self .manga_mode_checkbox .toggled .connect (self .update_ui_for_manga_mode )
+
+
+ if hasattr (self ,'download_extracted_links_button'):
+ self .download_extracted_links_button .clicked .connect (self ._show_download_extracted_links_dialog )
+
+ if hasattr (self ,'log_display_mode_toggle_button'):
+ self .log_display_mode_toggle_button .clicked .connect (self ._toggle_log_display_mode )
+
+ if self .manga_rename_toggle_button :self .manga_rename_toggle_button .clicked .connect (self ._toggle_manga_filename_style )
+
+ if hasattr (self ,'link_input'):
+ self .link_input .textChanged .connect (lambda :self .update_ui_for_manga_mode (self .manga_mode_checkbox .isChecked ()if self .manga_mode_checkbox else False ))
+
+ if self .skip_scope_toggle_button :
+ self .skip_scope_toggle_button .clicked .connect (self ._cycle_skip_scope )
+
+ if self .char_filter_scope_toggle_button :
+ self .char_filter_scope_toggle_button .clicked .connect (self ._cycle_char_filter_scope )
+
+ if hasattr (self ,'multipart_toggle_button'):self .multipart_toggle_button .clicked .connect (self ._toggle_multipart_mode )
+
+
+ if hasattr (self ,'favorite_mode_checkbox'):
+ self .favorite_mode_checkbox .toggled .connect (self ._handle_favorite_mode_toggle )
+
+ if hasattr (self ,'open_known_txt_button'):
+ self .open_known_txt_button .clicked .connect (self ._open_known_txt_file )
+
+ if hasattr (self ,'add_to_filter_button'):
+ self .add_to_filter_button .clicked .connect (self ._show_add_to_filter_dialog )
+ if hasattr (self ,'favorite_mode_artists_button'):
+ self .favorite_mode_artists_button .clicked .connect (self ._show_favorite_artists_dialog )
+ if hasattr (self ,'favorite_mode_posts_button'):
+ self .favorite_mode_posts_button .clicked .connect (self ._show_favorite_posts_dialog )
+ if hasattr (self ,'favorite_scope_toggle_button'):
+ self .favorite_scope_toggle_button .clicked .connect (self ._cycle_favorite_scope )
+ if hasattr (self ,'history_button'):
+ self .history_button .clicked .connect (self ._show_download_history_dialog )
+ if hasattr (self ,'error_btn'):
+ self .error_btn .clicked .connect (self ._show_error_files_dialog )
+
+ def _on_character_input_changed_live (self ,text ):
+ """
+ Called when the character input field text changes.
+ If a download is active (running or paused), this updates the dynamic filter holder.
+ """
+ if self ._is_download_active ():
+ QCoreApplication .processEvents ()
+ raw_character_filters_text =self .character_input .text ().strip ()
+ parsed_filters =self ._parse_character_filters (raw_character_filters_text )
+
+ self .dynamic_character_filter_holder .set_filters (parsed_filters )
+
+ def _parse_character_filters (self ,raw_text ):
+ """Helper to parse character filter string into list of objects."""
+ parsed_character_filter_objects =[]
+ if raw_text :
+ raw_parts =[]
+ current_part_buffer =""
+ in_group_parsing =False
+ for char_token in raw_text :
+ if char_token =='('and not in_group_parsing :
+ in_group_parsing =True
+ current_part_buffer +=char_token
+ elif char_token ==')'and in_group_parsing :
+ in_group_parsing =False
+ current_part_buffer +=char_token
+ elif char_token ==','and not in_group_parsing :
+ if current_part_buffer .strip ():raw_parts .append (current_part_buffer .strip ())
+ current_part_buffer =""
+ else :
+ current_part_buffer +=char_token
+ if current_part_buffer .strip ():raw_parts .append (current_part_buffer .strip ())
+
+ for part_str in raw_parts :
+ part_str =part_str .strip ()
+ if not part_str :continue
+
+ is_tilde_group =part_str .startswith ("(")and part_str .endswith (")~")
+ is_standard_group_for_splitting =part_str .startswith ("(")and part_str .endswith (")")and not is_tilde_group
+
+ if is_tilde_group :
+ group_content_str =part_str [1 :-2 ].strip ()
+ aliases_in_group =[alias .strip ()for alias in group_content_str .split (',')if alias .strip ()]
+ if aliases_in_group :
+ group_folder_name =" ".join (aliases_in_group )
+ parsed_character_filter_objects .append ({"name":group_folder_name ,"is_group":True ,"aliases":aliases_in_group })
+ elif is_standard_group_for_splitting :
+ group_content_str =part_str [1 :-1 ].strip ()
+ aliases_in_group =[alias .strip ()for alias in group_content_str .split (',')if alias .strip ()]
+ if aliases_in_group :
+ group_folder_name =" ".join (aliases_in_group )
+ parsed_character_filter_objects .append ({
+ "name":group_folder_name ,
+ "is_group":True ,
+ "aliases":aliases_in_group ,
+ "components_are_distinct_for_known_txt":True
+ })
+ else :
+ parsed_character_filter_objects .append ({"name":part_str ,"is_group":False ,"aliases":[part_str ],"components_are_distinct_for_known_txt":False })
+ return parsed_character_filter_objects
+
+ def _process_worker_queue (self ):
+ """Processes messages from the worker queue and emits Qt signals from the GUI thread."""
+ while not self .worker_to_gui_queue .empty ():
+ try :
+ item =self .worker_to_gui_queue .get_nowait ()
+ signal_type =item .get ('type')
+ payload =item .get ('payload',tuple ())
+
+ if signal_type =='progress':
+ self .actual_gui_signals .progress_signal .emit (*payload )
+ elif signal_type =='file_download_status':
+ self .actual_gui_signals .file_download_status_signal .emit (*payload )
+ elif signal_type =='external_link':
+ self .actual_gui_signals .external_link_signal .emit (*payload )
+ elif signal_type =='file_progress':
+ self .actual_gui_signals .file_progress_signal .emit (*payload )
+ elif signal_type =='missed_character_post':
+ self .actual_gui_signals .missed_character_post_signal .emit (*payload )
+ elif signal_type =='file_successfully_downloaded':
+ self ._handle_actual_file_downloaded (payload [0 ]if payload else {})
+ elif signal_type =='file_successfully_downloaded':
+ self ._handle_file_successfully_downloaded (payload [0 ])
+ elif signal_type == 'worker_finished': # <-- ADD THIS ELIF BLOCK
+ self.actual_gui_signals.worker_finished_signal.emit(payload[0] if payload else tuple())
+ else:
+ self .log_signal .emit (f"⚠️ Unknown signal type from worker queue: {signal_type }")
+ self .worker_to_gui_queue .task_done ()
+ except queue .Empty :
+ break
+ except Exception as e :
+ self .log_signal .emit (f"❌ Error processing worker queue: {e }")
+
+ def load_known_names_from_util (self ):
+ global KNOWN_NAMES
+ if os .path .exists (self .config_file ):
+ parsed_known_objects =[]
+ try :
+ with open (self .config_file ,'r',encoding ='utf-8')as f :
+ for line_num ,line in enumerate (f ,1 ):
+ line =line .strip ()
+ if not line :continue
+
+ if line .startswith ("(")and line .endswith (")"):
+ content =line [1 :-1 ].strip ()
+ parts =[p .strip ()for p in content .split (',')if p .strip ()]
+ if parts :
+ folder_name_raw =content .replace (',',' ')
+ folder_name_cleaned =clean_folder_name (folder_name_raw )
+
+ unique_aliases_set ={p for p in parts }
+ final_aliases_list =sorted (list (unique_aliases_set ),key =str .lower )
+
+ if not folder_name_cleaned :
+ if hasattr (self ,'log_signal'):self .log_signal .emit (f"⚠️ Group resulted in empty folder name after cleaning in Known.txt on line {line_num }: '{line }'. Skipping entry.")
+ continue
+
+ parsed_known_objects .append ({
+ "name":folder_name_cleaned ,
+ "is_group":True ,
+ "aliases":final_aliases_list
+ })
+ else :
+ if hasattr (self ,'log_signal'):self .log_signal .emit (f"⚠️ Empty group found in Known.txt on line {line_num }: '{line }'")
+ else :
+ parsed_known_objects .append ({
+ "name":line ,
+ "is_group":False ,
+ "aliases":[line ]
+ })
+ parsed_known_objects .sort (key =lambda x :x ["name"].lower ())
+ KNOWN_NAMES [:]=parsed_known_objects
+ log_msg =f"ℹ️ Loaded {len (KNOWN_NAMES )} known entries from {self .config_file }"
+ except Exception as e :
+ log_msg =f"❌ Error loading config '{self .config_file }': {e }"
+ QMessageBox .warning (self ,"Config Load Error",f"Could not load list from {self .config_file }:\n{e }")
+ KNOWN_NAMES [:]=[]
+ else :
+ self .character_input .setToolTip ("Names, comma-separated. Group aliases: (alias1, alias2, alias3) becomes folder name 'alias1 alias2 alias3' (after cleaning).\nAll names in the group are used as aliases for matching.\nE.g., yor, (Boa, Hancock, Snake Princess)")
+ log_msg =f"ℹ️ Config file '{self .config_file }' not found. It will be created on save."
+ KNOWN_NAMES [:]=[]
+
+ if hasattr (self ,'log_signal'):self .log_signal .emit (log_msg )
+
+ if hasattr (self ,'character_list'):
+ self .character_list .clear ()
+ if not KNOWN_NAMES :
+ self .log_signal .emit ("ℹ️ 'Known.txt' is empty or was not found. No default entries will be added.")
+
+ self .character_list .addItems ([entry ["name"]for entry in KNOWN_NAMES ])
+
+ def save_known_names(self):
+ """
+ Saves the current list of known names (KNOWN_NAMES) to the config file.
+ This version includes a fix to ensure the destination directory exists
+ before attempting to write the file, preventing crashes in new installations.
+ """
+ global KNOWN_NAMES
+ try:
+ # --- FIX STARTS HERE ---
+ # Get the directory path from the full file path.
+ config_dir = os.path.dirname(self.config_file)
+ # Create the directory if it doesn't exist. 'exist_ok=True' prevents
+ # an error if the directory is already there.
+ os.makedirs(config_dir, exist_ok=True)
+ # --- FIX ENDS HERE ---
+
+ with open(self.config_file, 'w', encoding='utf-8') as f:
+ for entry in KNOWN_NAMES:
+ if entry["is_group"]:
+ # For groups, write the aliases in a sorted, comma-separated format inside parentheses.
+ f.write(f"({', '.join(sorted(entry['aliases'], key=str.lower))})\n")
+ else:
+ # For single entries, write the name on its own line.
+ f.write(entry["name"] + '\n')
+
+ if hasattr(self, 'log_signal'):
+ self.log_signal.emit(f"💾 Saved {len(KNOWN_NAMES)} known entries to {self.config_file}")
+
+ except Exception as e:
+ # If any error occurs during saving, log it and show a warning popup.
+ log_msg = f"❌ Error saving config '{self.config_file}': {e}"
+ if hasattr(self, 'log_signal'):
+ self.log_signal.emit(log_msg)
+ QMessageBox.warning(self, "Config Save Error", f"Could not save list to {self.config_file}:\n{e}")
+
+ def closeEvent (self ,event ):
+ self .save_known_names ()
+ self .settings .setValue (MANGA_FILENAME_STYLE_KEY ,self .manga_filename_style )
+ self .settings .setValue (ALLOW_MULTIPART_DOWNLOAD_KEY ,self .allow_multipart_download_setting )
+ self .settings .setValue (COOKIE_TEXT_KEY ,self .cookie_text_input .text ()if hasattr (self ,'cookie_text_input')else "")
+ self .settings .setValue (SCAN_CONTENT_IMAGES_KEY ,self .scan_content_images_checkbox .isChecked ()if hasattr (self ,'scan_content_images_checkbox')else False )
+ self .settings .setValue (USE_COOKIE_KEY ,self .use_cookie_checkbox .isChecked ()if hasattr (self ,'use_cookie_checkbox')else False )
+ self .settings .setValue (THEME_KEY ,self .current_theme )
+ self .settings .setValue (LANGUAGE_KEY ,self .current_selected_language )
+ self .settings .sync ()
+ self ._save_persistent_history ()
+
+ should_exit =True
+ is_downloading =self ._is_download_active ()
+
+ if is_downloading :
+ reply =QMessageBox .question (self ,"Confirm Exit",
+ "Download in progress. Are you sure you want to exit and cancel?",
+ QMessageBox .Yes |QMessageBox .No ,QMessageBox .No )
+ if reply ==QMessageBox .Yes :
+ self .log_signal .emit ("⚠️ Cancelling active download due to application exit...")
+ self .cancellation_event .set ()
+ if self .download_thread and self .download_thread .isRunning ():
+ self .download_thread .requestInterruption ()
+ self .log_signal .emit (" Signaled single download thread to interrupt.")
+ if self .download_thread and self .download_thread .isRunning ():
+ self .log_signal .emit (" Waiting for single download thread to finish...")
+ self .download_thread .wait (3000 )
+ if self .download_thread .isRunning ():
+ self .log_signal .emit (" ⚠️ Single download thread did not terminate gracefully.")
+
+ if self .thread_pool :
+ self .log_signal .emit (" Shutting down thread pool (waiting for completion)...")
+ self .thread_pool .shutdown (wait =True ,cancel_futures =True )
+ self .log_signal .emit (" Thread pool shutdown complete.")
+ self .thread_pool =None
+ self .log_signal .emit (" Cancellation for exit complete.")
+ else :
+ should_exit =False
+ self .log_signal .emit ("ℹ️ Application exit cancelled.")
+ event .ignore ()
+ return
+
+ if should_exit :
+ self .log_signal .emit ("ℹ️ Application closing.")
+ if self .thread_pool :
+ self .log_signal .emit (" Final thread pool check: Shutting down...")
+ self .cancellation_event .set ()
+ self .thread_pool .shutdown (wait =True ,cancel_futures =True )
+ self .thread_pool =None
+ self .log_signal .emit ("👋 Exiting application.")
+ event .accept ()
+
+
+ def _request_restart_application (self ):
+ self .log_signal .emit ("🔄 Application restart requested by user for language change.")
+ self ._restart_pending =True
+ self .close ()
+
+ def _do_actual_restart (self ):
+ try :
+ self .log_signal .emit (" Performing application restart...")
+ python_executable =sys .executable
+ script_args =sys .argv
+
+
+ if getattr (sys ,'frozen',False ):
+
+
+
+ QProcess .startDetached (python_executable ,script_args [1 :])
+ else :
+
+
+ QProcess .startDetached (python_executable ,script_args )
+
+ QCoreApplication .instance ().quit ()
+ except Exception as e :
+ self .log_signal .emit (f"❌ CRITICAL: Failed to start new application instance: {e }")
+ QMessageBox .critical (self ,"Restart Failed",
+ f"Could not automatically restart the application: {e }\n\nPlease restart it manually.")
+
+ def init_ui(self):
+ self.main_splitter = QSplitter(Qt.Horizontal)
+
+ # --- Use a scroll area for the left panel for consistency ---
+ left_scroll_area = QScrollArea()
+ left_scroll_area.setWidgetResizable(True)
+ left_scroll_area.setFrameShape(QFrame.NoFrame)
+
+ left_panel_widget = QWidget()
+ left_layout = QVBoxLayout(left_panel_widget)
+ left_scroll_area.setWidget(left_panel_widget)
+
+ right_panel_widget = QWidget()
+ right_layout = QVBoxLayout(right_panel_widget)
+
+ left_layout.setContentsMargins(10, 10, 10, 10)
+ right_layout.setContentsMargins(10, 10, 10, 10)
+ self.apply_theme(self.current_theme, initial_load=True)
+
+ # --- URL and Page Range ---
+ self.url_input_widget = QWidget()
+ url_input_layout = QHBoxLayout(self.url_input_widget)
+ url_input_layout.setContentsMargins(0, 0, 0, 0)
+ self.url_label_widget = QLabel()
+ url_input_layout.addWidget(self.url_label_widget)
+ self.link_input = QLineEdit()
+ self.link_input.setPlaceholderText("e.g., https://kemono.su/patreon/user/12345 or .../post/98765")
+ self.link_input.textChanged.connect(self.update_custom_folder_visibility) # Connects the custom folder logic
+ url_input_layout.addWidget(self.link_input, 1)
+ self.empty_popup_button = QPushButton("🎨")
+ self.empty_popup_button.setStyleSheet("padding: 4px 6px;")
+ self.empty_popup_button.clicked.connect(self._show_empty_popup)
+ url_input_layout.addWidget(self.empty_popup_button)
+ self.page_range_label = QLabel(self._tr("page_range_label_text", "Page Range:"))
+ self.page_range_label.setStyleSheet("font-weight: bold; padding-left: 10px;")
+ url_input_layout.addWidget(self.page_range_label)
+ self.start_page_input = QLineEdit()
+ self.start_page_input.setPlaceholderText(self._tr("start_page_input_placeholder", "Start"))
+ self.start_page_input.setFixedWidth(50)
+ self.start_page_input.setValidator(QIntValidator(1, 99999))
+ url_input_layout.addWidget(self.start_page_input)
+ self.to_label = QLabel(self._tr("page_range_to_label_text", "to"))
+ url_input_layout.addWidget(self.to_label)
+ self.end_page_input = QLineEdit()
+ self.end_page_input.setPlaceholderText(self._tr("end_page_input_placeholder", "End"))
+ self.end_page_input.setFixedWidth(50)
+ self.end_page_input.setToolTip(self._tr("end_page_input_tooltip", "For creator URLs: Specify the ending page number..."))
+ self.end_page_input.setValidator(QIntValidator(1, 99999))
+ url_input_layout.addWidget(self.end_page_input)
+ self.url_placeholder_widget = QWidget()
+ placeholder_layout = QHBoxLayout(self.url_placeholder_widget)
+ placeholder_layout.setContentsMargins(0, 0, 0, 0)
+ self.fav_mode_active_label = QLabel(self._tr("fav_mode_active_label_text", "⭐ Favorite Mode is active..."))
+ self.fav_mode_active_label.setAlignment(Qt.AlignCenter)
+ placeholder_layout.addWidget(self.fav_mode_active_label)
+ self.url_or_placeholder_stack = QStackedWidget()
+ self.url_or_placeholder_stack.addWidget(self.url_input_widget)
+ self.url_or_placeholder_stack.addWidget(self.url_placeholder_widget)
+ left_layout.addWidget(self.url_or_placeholder_stack)
+
+ # --- Download Location ---
+ self.download_location_label_widget = QLabel()
+ left_layout.addWidget(self.download_location_label_widget)
+ dir_layout = QHBoxLayout()
+ self.dir_input = QLineEdit()
+ self.dir_input.setPlaceholderText("Select folder where downloads will be saved")
+ self.dir_button = QPushButton("Browse...")
+ self.dir_button.setStyleSheet("padding: 4px 10px;")
+ self.dir_button.clicked.connect(self.browse_directory)
+ dir_layout.addWidget(self.dir_input, 1)
+ dir_layout.addWidget(self.dir_button)
+ left_layout.addLayout(dir_layout)
+
+ # --- Filters and Custom Folder Container (from old layout) ---
+ self.filters_and_custom_folder_container_widget = QWidget()
+ filters_and_custom_folder_layout = QHBoxLayout(self.filters_and_custom_folder_container_widget)
+ filters_and_custom_folder_layout.setContentsMargins(0, 5, 0, 0)
+ filters_and_custom_folder_layout.setSpacing(10)
+ self.character_filter_widget = QWidget()
+ character_filter_v_layout = QVBoxLayout(self.character_filter_widget)
+ character_filter_v_layout.setContentsMargins(0, 0, 0, 0)
+ character_filter_v_layout.setSpacing(2)
+ self.character_label = QLabel("🎯 Filter by Character(s) (comma-separated):")
+ character_filter_v_layout.addWidget(self.character_label)
+ char_input_and_button_layout = QHBoxLayout()
+ char_input_and_button_layout.setContentsMargins(0, 0, 0, 0)
+ char_input_and_button_layout.setSpacing(10)
+ self.character_input = QLineEdit()
+ self.character_input.setPlaceholderText("e.g., Tifa, Aerith, (Cloud, Zack)")
+ char_input_and_button_layout.addWidget(self.character_input, 3)
+ self.char_filter_scope_toggle_button = QPushButton()
+ self._update_char_filter_scope_button_text()
+ char_input_and_button_layout.addWidget(self.char_filter_scope_toggle_button, 1)
+ character_filter_v_layout.addLayout(char_input_and_button_layout)
+
+ # --- Custom Folder Widget Definition ---
+ self.custom_folder_widget = QWidget()
+ custom_folder_v_layout = QVBoxLayout(self.custom_folder_widget)
+ custom_folder_v_layout.setContentsMargins(0, 0, 0, 0)
+ custom_folder_v_layout.setSpacing(2)
+ self.custom_folder_label = QLabel("🗄️ Custom Folder Name (Single Post Only):")
+ self.custom_folder_input = QLineEdit()
+ self.custom_folder_input.setPlaceholderText("Optional: Save this post to specific folder")
+ custom_folder_v_layout.addWidget(self.custom_folder_label)
+ custom_folder_v_layout.addWidget(self.custom_folder_input)
+ self.custom_folder_widget.setVisible(False)
+
+ filters_and_custom_folder_layout.addWidget(self.character_filter_widget, 1)
+ filters_and_custom_folder_layout.addWidget(self.custom_folder_widget, 1)
+ left_layout.addWidget(self.filters_and_custom_folder_container_widget)
+
+ # --- Word Manipulation Container ---
+ word_manipulation_container_widget = QWidget()
+ word_manipulation_outer_layout = QHBoxLayout(word_manipulation_container_widget)
+ word_manipulation_outer_layout.setContentsMargins(0, 0, 0, 0)
+ word_manipulation_outer_layout.setSpacing(15)
+ skip_words_widget = QWidget()
+ skip_words_vertical_layout = QVBoxLayout(skip_words_widget)
+ skip_words_vertical_layout.setContentsMargins(0, 0, 0, 0)
+ skip_words_vertical_layout.setSpacing(2)
+ self.skip_words_label_widget = QLabel()
+ skip_words_vertical_layout.addWidget(self.skip_words_label_widget)
+ skip_input_and_button_layout = QHBoxLayout()
+ skip_input_and_button_layout.setContentsMargins(0, 0, 0, 0)
+ skip_input_and_button_layout.setSpacing(10)
+ self.skip_words_input = QLineEdit()
+ self.skip_words_input.setPlaceholderText("e.g., WM, WIP, sketch, preview")
+ skip_input_and_button_layout.addWidget(self.skip_words_input, 1)
+ self.skip_scope_toggle_button = QPushButton()
+ self._update_skip_scope_button_text()
+ skip_input_and_button_layout.addWidget(self.skip_scope_toggle_button, 0)
+ skip_words_vertical_layout.addLayout(skip_input_and_button_layout)
+ word_manipulation_outer_layout.addWidget(skip_words_widget, 7)
+ remove_words_widget = QWidget()
+ remove_words_vertical_layout = QVBoxLayout(remove_words_widget)
+ remove_words_vertical_layout.setContentsMargins(0, 0, 0, 0)
+ remove_words_vertical_layout.setSpacing(2)
+ self.remove_from_filename_label_widget = QLabel()
+ remove_words_vertical_layout.addWidget(self.remove_from_filename_label_widget)
+ self.remove_from_filename_input = QLineEdit()
+ self.remove_from_filename_input.setPlaceholderText("e.g., patreon, HD")
+ remove_words_vertical_layout.addWidget(self.remove_from_filename_input)
+ word_manipulation_outer_layout.addWidget(remove_words_widget, 3)
+ left_layout.addWidget(word_manipulation_container_widget)
+
+ # --- File Filter Layout ---
+ file_filter_layout = QVBoxLayout()
+ file_filter_layout.setContentsMargins(0, 10, 0, 0)
+ file_filter_layout.addWidget(QLabel("Filter Files:"))
+ radio_button_layout = QHBoxLayout()
+ radio_button_layout.setSpacing(10)
+ self.radio_group = QButtonGroup(self)
+ self.radio_all = QRadioButton("All")
+ self.radio_images = QRadioButton("Images/GIFs")
+ self.radio_videos = QRadioButton("Videos")
+ self.radio_only_archives = QRadioButton("📦 Only Archives")
+ self.radio_only_audio = QRadioButton("🎧 Only Audio")
+ self.radio_only_links = QRadioButton("🔗 Only Links")
+ self.radio_more = QRadioButton("More")
+
+ self.radio_all.setChecked(True)
+ for btn in [self.radio_all, self.radio_images, self.radio_videos, self.radio_only_archives, self.radio_only_audio, self.radio_only_links, self.radio_more]:
+ self.radio_group.addButton(btn)
+ radio_button_layout.addWidget(btn)
+ self.favorite_mode_checkbox = QCheckBox()
+ self.favorite_mode_checkbox.setChecked(False)
+ radio_button_layout.addWidget(self.favorite_mode_checkbox)
+ radio_button_layout.addStretch(1)
+ file_filter_layout.addLayout(radio_button_layout)
+ left_layout.addLayout(file_filter_layout)
+
+ # --- Checkboxes Group ---
+ checkboxes_group_layout = QVBoxLayout()
+ checkboxes_group_layout.setSpacing(10)
+ row1_layout = QHBoxLayout()
+ row1_layout.setSpacing(10)
+ self.skip_zip_checkbox = QCheckBox("Skip .zip")
+ self.skip_zip_checkbox.setChecked(True)
+ row1_layout.addWidget(self.skip_zip_checkbox)
+ self.skip_rar_checkbox = QCheckBox("Skip .rar")
+ self.skip_rar_checkbox.setChecked(True)
+ row1_layout.addWidget(self.skip_rar_checkbox)
+ self.download_thumbnails_checkbox = QCheckBox("Download Thumbnails Only")
+ row1_layout.addWidget(self.download_thumbnails_checkbox)
+ self.scan_content_images_checkbox = QCheckBox("Scan Content for Images")
+ self.scan_content_images_checkbox.setChecked(self.scan_content_images_setting)
+ row1_layout.addWidget(self.scan_content_images_checkbox)
+ self.compress_images_checkbox = QCheckBox("Compress to WebP")
+ self.compress_images_checkbox.setToolTip("Compress images > 1.5MB to WebP format (requires Pillow).")
+ row1_layout.addWidget(self.compress_images_checkbox)
+ self.keep_duplicates_checkbox = QCheckBox("Keep Duplicates")
+ self.keep_duplicates_checkbox.setToolTip("If checked, downloads all files from a post even if they have the same name.")
+ row1_layout.addWidget(self.keep_duplicates_checkbox)
+ row1_layout.addStretch(1)
+ checkboxes_group_layout.addLayout(row1_layout)
+
+ # --- Advanced Settings ---
+ advanced_settings_label = QLabel("⚙️ Advanced Settings:")
+ checkboxes_group_layout.addWidget(advanced_settings_label)
+ advanced_row1_layout = QHBoxLayout()
+ advanced_row1_layout.setSpacing(10)
+ self.use_subfolders_checkbox = QCheckBox("Separate Folders by Name/Title")
+ self.use_subfolders_checkbox.setChecked(True)
+ self.use_subfolders_checkbox.toggled.connect(self.update_ui_for_subfolders)
+ advanced_row1_layout.addWidget(self.use_subfolders_checkbox)
+ self.use_subfolder_per_post_checkbox = QCheckBox("Subfolder per Post")
+ self.use_subfolder_per_post_checkbox.toggled.connect(self.update_ui_for_subfolders)
+ advanced_row1_layout.addWidget(self.use_subfolder_per_post_checkbox)
+ self.date_prefix_checkbox = QCheckBox("Date Prefix")
+ self.date_prefix_checkbox.setToolTip("When 'Subfolder per Post' is active, prefix the folder name with the post's upload date.")
+ advanced_row1_layout.addWidget(self.date_prefix_checkbox)
+ self.use_cookie_checkbox = QCheckBox("Use Cookie")
+ self.use_cookie_checkbox.setChecked(self.use_cookie_setting)
+ self.cookie_text_input = QLineEdit()
+ self.cookie_text_input.setPlaceholderText("if no Select cookies.txt)")
+ self.cookie_text_input.setText(self.cookie_text_setting)
+ advanced_row1_layout.addWidget(self.use_cookie_checkbox)
+ advanced_row1_layout.addWidget(self.cookie_text_input, 2)
+ self.cookie_browse_button = QPushButton("Browse...")
+ self.cookie_browse_button.setFixedWidth(80)
+ self.cookie_browse_button.setStyleSheet("padding: 4px 8px;")
+ advanced_row1_layout.addWidget(self.cookie_browse_button)
+ advanced_row1_layout.addStretch(1)
+ checkboxes_group_layout.addLayout(advanced_row1_layout)
+ advanced_row2_layout = QHBoxLayout()
+ advanced_row2_layout.setSpacing(10)
+ multithreading_layout = QHBoxLayout()
+ multithreading_layout.setContentsMargins(0, 0, 0, 0)
+ self.use_multithreading_checkbox = QCheckBox("Use Multithreading")
+ self.use_multithreading_checkbox.setChecked(True)
+ multithreading_layout.addWidget(self.use_multithreading_checkbox)
+ self.thread_count_label = QLabel("Threads:")
+ multithreading_layout.addWidget(self.thread_count_label)
+ self.thread_count_input = QLineEdit("4")
+ self.thread_count_input.setFixedWidth(40)
+ self.thread_count_input.setValidator(QIntValidator(1, MAX_THREADS))
+ multithreading_layout.addWidget(self.thread_count_input)
+ advanced_row2_layout.addLayout(multithreading_layout)
+ self.external_links_checkbox = QCheckBox("Show External Links in Log")
+ advanced_row2_layout.addWidget(self.external_links_checkbox)
+ self.manga_mode_checkbox = QCheckBox("Manga/Comic Mode")
+ advanced_row2_layout.addWidget(self.manga_mode_checkbox)
+ advanced_row2_layout.addStretch(1)
+ checkboxes_group_layout.addLayout(advanced_row2_layout)
+ left_layout.addLayout(checkboxes_group_layout)
+
+ # --- Action Buttons ---
+ self.standard_action_buttons_widget = QWidget()
+ btn_layout = QHBoxLayout(self.standard_action_buttons_widget)
+ btn_layout.setContentsMargins(0, 10, 0, 0)
+ btn_layout.setSpacing(10)
+ self.download_btn = QPushButton("⬇️ Start Download")
+ self.download_btn.setStyleSheet("padding: 4px 12px; font-weight: bold;")
+ self.download_btn.clicked.connect(self.start_download)
+ self.pause_btn = QPushButton("⏸️ Pause Download")
+ self.pause_btn.setEnabled(False)
+ self.pause_btn.setStyleSheet("padding: 4px 12px;")
+ self.pause_btn.clicked.connect(self._handle_pause_resume_action)
+ self.cancel_btn = QPushButton("❌ Cancel & Reset UI")
+ self.cancel_btn.setEnabled(False)
+ self.cancel_btn.setStyleSheet("padding: 4px 12px;")
+ self.cancel_btn.clicked.connect(self.cancel_download_button_action)
+ self.error_btn = QPushButton("Error")
+ self.error_btn.setToolTip("View files skipped due to errors and optionally retry them.")
+ self.error_btn.setStyleSheet("padding: 4px 8px;")
+ self.error_btn.setEnabled(True)
+ btn_layout.addWidget(self.download_btn)
+ btn_layout.addWidget(self.pause_btn)
+ btn_layout.addWidget(self.cancel_btn)
+ btn_layout.addWidget(self.error_btn)
+ self.favorite_action_buttons_widget = QWidget()
+ favorite_buttons_layout = QHBoxLayout(self.favorite_action_buttons_widget)
+ self.favorite_mode_artists_button = QPushButton("🖼️ Favorite Artists")
+ self.favorite_mode_posts_button = QPushButton("📄 Favorite Posts")
+ self.favorite_scope_toggle_button = QPushButton()
+ favorite_buttons_layout.addWidget(self.favorite_mode_artists_button)
+ favorite_buttons_layout.addWidget(self.favorite_mode_posts_button)
+ favorite_buttons_layout.addWidget(self.favorite_scope_toggle_button)
+ self.bottom_action_buttons_stack = QStackedWidget()
+ self.bottom_action_buttons_stack.addWidget(self.standard_action_buttons_widget)
+ self.bottom_action_buttons_stack.addWidget(self.favorite_action_buttons_widget)
+ left_layout.addWidget(self.bottom_action_buttons_stack)
+ left_layout.addSpacing(10)
+
+ # --- Known Names Layout ---
+ known_chars_label_layout = QHBoxLayout()
+ known_chars_label_layout.setSpacing(10)
+ self.known_chars_label = QLabel("🎭 Known Shows/Characters (for Folder Names):")
+ known_chars_label_layout.addWidget(self.known_chars_label)
+ self.open_known_txt_button = QPushButton("Open Known.txt")
+ self.open_known_txt_button.setStyleSheet("padding: 4px 8px;")
+ self.open_known_txt_button.setFixedWidth(120)
+ known_chars_label_layout.addWidget(self.open_known_txt_button)
+ self.character_search_input = QLineEdit()
+ self.character_search_input.setPlaceholderText("Search characters...")
+ known_chars_label_layout.addWidget(self.character_search_input, 1)
+ left_layout.addLayout(known_chars_label_layout)
+ self.character_list = QListWidget()
+ self.character_list.setSelectionMode(QListWidget.ExtendedSelection)
+ self.character_list.setMaximumHeight(150) # Set smaller height
+ left_layout.addWidget(self.character_list, 1)
+ char_manage_layout = QHBoxLayout()
+ char_manage_layout.setSpacing(10)
+ self.new_char_input = QLineEdit()
+ self.new_char_input.setPlaceholderText("Add new show/character name")
+ self.new_char_input.setStyleSheet("padding: 3px 5px;")
+ self.add_char_button = QPushButton("➕ Add")
+ self.add_char_button.setStyleSheet("padding: 4px 10px;")
+ self.add_to_filter_button = QPushButton("⤵️ Add to Filter")
+ self.add_to_filter_button.setToolTip("Select names... to add to the 'Filter by Character(s)' field.")
+ self.add_to_filter_button.setStyleSheet("padding: 4px 10px;")
+ self.delete_char_button = QPushButton("🗑️ Delete Selected")
+ self.delete_char_button.setToolTip("Delete the selected name(s)...")
+ self.delete_char_button.setStyleSheet("padding: 4px 10px;")
+ self.add_char_button.clicked.connect(self._handle_ui_add_new_character)
+ self.new_char_input.returnPressed.connect(self.add_char_button.click)
+ self.delete_char_button.clicked.connect(self.delete_selected_character)
+ char_manage_layout.addWidget(self.new_char_input, 2)
+ char_manage_layout.addWidget(self.add_char_button, 0)
+ self.known_names_help_button = QPushButton("?")
+ self.known_names_help_button.setFixedWidth(35)
+ self.known_names_help_button.setStyleSheet("padding: 4px 6px;")
+ self.known_names_help_button.clicked.connect(self._show_feature_guide)
+ self.history_button = QPushButton("📜")
+ self.history_button.setFixedWidth(35)
+ self.history_button.setStyleSheet("padding: 4px 6px;")
+ self.history_button.setToolTip(self._tr("history_button_tooltip_text", "View download history"))
+ self.future_settings_button = QPushButton("⚙️")
+ self.future_settings_button.setFixedWidth(35)
+ self.future_settings_button.setStyleSheet("padding: 4px 6px;")
+ self.future_settings_button.clicked.connect(self._show_future_settings_dialog)
+ char_manage_layout.addWidget(self.add_to_filter_button, 1)
+ char_manage_layout.addWidget(self.delete_char_button, 1)
+ char_manage_layout.addWidget(self.known_names_help_button, 0)
+ char_manage_layout.addWidget(self.history_button, 0)
+ char_manage_layout.addWidget(self.future_settings_button, 0)
+ left_layout.addLayout(char_manage_layout)
+ left_layout.addStretch(0)
+
+ # --- Right Panel (Logs) ---
+ # (This part of the layout is unchanged and remains correct)
+ log_title_layout = QHBoxLayout()
+ self.progress_log_label = QLabel("📜 Progress Log:")
+ log_title_layout.addWidget(self.progress_log_label)
+ log_title_layout.addStretch(1)
+ self.link_search_input = QLineEdit()
+ self.link_search_input.setPlaceholderText("Search Links...")
+ self.link_search_input.setVisible(False)
+ log_title_layout.addWidget(self.link_search_input)
+ self.link_search_button = QPushButton("🔍")
+ self.link_search_button.setVisible(False)
+ self.link_search_button.setFixedWidth(30)
+ self.link_search_button.setStyleSheet("padding: 4px 4px;")
+ log_title_layout.addWidget(self.link_search_button)
+ self.manga_rename_toggle_button = QPushButton()
+ self.manga_rename_toggle_button.setVisible(False)
+ self.manga_rename_toggle_button.setFixedWidth(140)
+ self.manga_rename_toggle_button.setStyleSheet("padding: 4px 8px;")
+ self._update_manga_filename_style_button_text()
+ log_title_layout.addWidget(self.manga_rename_toggle_button)
+ self.manga_date_prefix_input = QLineEdit()
+ self.manga_date_prefix_input.setPlaceholderText("Prefix for Manga Filenames")
+ self.manga_date_prefix_input.setVisible(False)
+ log_title_layout.addWidget(self.manga_date_prefix_input)
+ self.multipart_toggle_button = QPushButton()
+ self.multipart_toggle_button.setToolTip("Toggle between Multi-part and Single-stream downloads for large files.")
+ self.multipart_toggle_button.setFixedWidth(130)
+ self.multipart_toggle_button.setStyleSheet("padding: 4px 8px;")
+ self._update_multipart_toggle_button_text()
+ log_title_layout.addWidget(self.multipart_toggle_button)
+ self.EYE_ICON = "\U0001F441"
+ self.CLOSED_EYE_ICON = "\U0001F648"
+ self.log_verbosity_toggle_button = QPushButton(self.EYE_ICON)
+ self.log_verbosity_toggle_button.setFixedWidth(45)
+ self.log_verbosity_toggle_button.setStyleSheet("font-size: 11pt; padding: 4px 2px;")
+ log_title_layout.addWidget(self.log_verbosity_toggle_button)
+ self.reset_button = QPushButton("🔄 Reset")
+ self.reset_button.setFixedWidth(80)
+ self.reset_button.setStyleSheet("padding: 4px 8px;")
+ log_title_layout.addWidget(self.reset_button)
+ right_layout.addLayout(log_title_layout)
+ self.log_splitter = QSplitter(Qt.Vertical)
+ self.log_view_stack = QStackedWidget()
+ self.main_log_output = QTextEdit()
+ self.main_log_output.setReadOnly(True)
+ self.main_log_output.setLineWrapMode(QTextEdit.NoWrap)
+ self.log_view_stack.addWidget(self.main_log_output)
+ self.missed_character_log_output = QTextEdit()
+ self.missed_character_log_output.setReadOnly(True)
+ self.missed_character_log_output.setLineWrapMode(QTextEdit.NoWrap)
+ self.log_view_stack.addWidget(self.missed_character_log_output)
+ self.external_log_output = QTextEdit()
+ self.external_log_output.setReadOnly(True)
+ self.external_log_output.setLineWrapMode(QTextEdit.NoWrap)
+ self.external_log_output.hide()
+ self.log_splitter.addWidget(self.log_view_stack)
+ self.log_splitter.addWidget(self.external_log_output)
+ self.log_splitter.setSizes([self.height(), 0])
+ right_layout.addWidget(self.log_splitter, 1)
+ export_button_layout = QHBoxLayout()
+ export_button_layout.addStretch(1)
+ self.export_links_button = QPushButton(self._tr("export_links_button_text", "Export Links"))
+ self.export_links_button.setFixedWidth(100)
+ self.export_links_button.setStyleSheet("padding: 4px 8px; margin-top: 5px;")
+ self.export_links_button.setEnabled(False)
+ self.export_links_button.setVisible(False)
+ export_button_layout.addWidget(self.export_links_button)
+ self.download_extracted_links_button = QPushButton(self._tr("download_extracted_links_button_text", "Download"))
+ self.download_extracted_links_button.setFixedWidth(100)
+ self.download_extracted_links_button.setStyleSheet("padding: 4px 8px; margin-top: 5px;")
+ self.download_extracted_links_button.setEnabled(False)
+ self.download_extracted_links_button.setVisible(False)
+ export_button_layout.addWidget(self.download_extracted_links_button)
+ self.log_display_mode_toggle_button = QPushButton()
+ self.log_display_mode_toggle_button.setFixedWidth(120)
+ self.log_display_mode_toggle_button.setStyleSheet("padding: 4px 8px; margin-top: 5px;")
+ self.log_display_mode_toggle_button.setVisible(False)
+ export_button_layout.addWidget(self.log_display_mode_toggle_button)
+ right_layout.addLayout(export_button_layout)
+ self.progress_label = QLabel("Progress: Idle")
+ self.progress_label.setStyleSheet("padding-top: 5px; font-style: italic;")
+ right_layout.addWidget(self.progress_label)
+ self.file_progress_label = QLabel("")
+ self.file_progress_label.setToolTip("Shows the progress of individual file downloads, including speed and size.")
+ self.file_progress_label.setWordWrap(True)
+ self.file_progress_label.setStyleSheet("padding-top: 2px; font-style: italic; color: #A0A0A0;")
+ right_layout.addWidget(self.file_progress_label)
+
+ # --- Final Assembly ---
+ self.main_splitter.addWidget(left_scroll_area) # Use the scroll area
+ self.main_splitter.addWidget(right_panel_widget)
+ self.main_splitter.setStretchFactor(0, 7)
+ self.main_splitter.setStretchFactor(1, 3)
+ top_level_layout = QHBoxLayout(self)
+ top_level_layout.setContentsMargins(0, 0, 0, 0)
+ top_level_layout.addWidget(self.main_splitter)
+
+ # --- Initial UI State Updates ---
+ self.update_ui_for_subfolders(self.use_subfolders_checkbox.isChecked())
+ self.update_external_links_setting(self.external_links_checkbox.isChecked())
+ self.update_multithreading_label(self.thread_count_input.text())
+ self.update_page_range_enabled_state()
+ if self.manga_mode_checkbox:
+ self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked())
+ if hasattr(self, 'link_input'):
+ self.link_input.textChanged.connect(lambda: self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False))
+ self._load_creator_name_cache_from_json()
+ self.load_known_names_from_util()
+ self._update_cookie_input_visibility(self.use_cookie_checkbox.isChecked() if hasattr(self, 'use_cookie_checkbox') else False)
+ self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked())
+ if hasattr(self, 'radio_group') and self.radio_group.checkedButton():
+ self._handle_filter_mode_change(self.radio_group.checkedButton(), True)
+ self.radio_group.buttonToggled.connect(self._handle_more_options_toggled) # Add this line
+
+ self._update_manga_filename_style_button_text()
+ self._update_skip_scope_button_text()
+ self._update_char_filter_scope_button_text()
+ self._update_multithreading_for_date_mode()
+ if hasattr(self, 'download_thumbnails_checkbox'):
+ self._handle_thumbnail_mode_change(self.download_thumbnails_checkbox.isChecked())
+ if hasattr(self, 'favorite_mode_checkbox'):
+ self._handle_favorite_mode_toggle(False)
+
+ def _load_persistent_history (self ):
+ """Loads download history from a persistent file."""
+ self .log_signal .emit (f"📜 Attempting to load history from: {self .persistent_history_file }")
+ if os .path .exists (self .persistent_history_file ):
+ try :
+ with open (self .persistent_history_file ,'r',encoding ='utf-8')as f :
+ loaded_data =json .load (f )
+
+ if isinstance (loaded_data ,dict ):
+ self .last_downloaded_files_details .clear ()
+ self .last_downloaded_files_details .extend (loaded_data .get ("last_downloaded_files",[]))
+ self .final_download_history_entries =loaded_data .get ("first_processed_posts",[])
+ self .log_signal .emit (f"✅ Loaded {len (self .last_downloaded_files_details )} last downloaded files and {len (self .final_download_history_entries )} first processed posts from persistent history.")
+ elif loaded_data is None and os .path .getsize (self .persistent_history_file )==0 :
+ self .log_signal .emit (f"ℹ️ Persistent history file is empty. Initializing with empty history.")
+ self .final_download_history_entries =[]
+ self .last_downloaded_files_details .clear ()
+ elif isinstance(loaded_data, list): # Handle old format where only first_processed_posts was saved
+ self.log_signal.emit("⚠️ Persistent history file is in old format (only first_processed_posts). Converting to new format.")
+ self.final_download_history_entries = loaded_data
+ self.last_downloaded_files_details.clear()
+ self._save_persistent_history() # Save in new format immediately
+ else :
+ self .log_signal .emit (f"⚠️ Persistent history file has incorrect format. Expected list, got {type (loaded_history )}. Ignoring.")
+ self .final_download_history_entries =[]
+ except json .JSONDecodeError :
+ self .log_signal .emit (f"⚠️ Error decoding persistent history file. It might be corrupted. Ignoring.")
+ self .final_download_history_entries =[]
+ except Exception as e :
+ self .log_signal .emit (f"❌ Error loading persistent history: {e }")
+ self .final_download_history_entries =[]
+ else :
+ self .log_signal .emit (f"⚠️ Persistent history file NOT FOUND at: {self .persistent_history_file }. Starting with empty history.")
+ self .final_download_history_entries =[]
+ self ._save_persistent_history ()
+
+
+ def _save_persistent_history(self):
+ """Saves download history to a persistent file."""
+ self.log_signal.emit(f"📜 Attempting to save history to: {self.persistent_history_file}")
+ try:
+ history_dir = os.path.dirname(self.persistent_history_file)
+ self.log_signal.emit(f" History directory: {history_dir}")
+ if not os.path.exists(history_dir):
+ os.makedirs(history_dir, exist_ok=True)
+ self.log_signal.emit(f" Created history directory: {history_dir}")
+
+ history_data = {
+ "last_downloaded_files": list(self.last_downloaded_files_details),
+ "first_processed_posts": self.final_download_history_entries
+ }
+ with open(self.persistent_history_file, 'w', encoding='utf-8') as f:
+ json.dump(history_data, f, indent=2)
+ self.log_signal.emit(f"✅ Saved {len(self.final_download_history_entries)} history entries to: {self.persistent_history_file}")
+ except Exception as e:
+ self.log_signal.emit(f"❌ Error saving persistent history to {self.persistent_history_file}: {e}")
+
+
+ def _load_creator_name_cache_from_json (self ):
+ """Loads creator id-name-service mappings from creators.json into self.creator_name_cache."""
+ self .log_signal .emit ("ℹ️ Attempting to load creators.json for creator name cache.")
+
+ if getattr (sys ,'frozen',False )and hasattr (sys ,'_MEIPASS'):
+ base_path_for_creators =sys ._MEIPASS
+ else :
+ base_path_for_creators =self .app_base_dir
+
+ creators_file_path =os .path .join (base_path_for_creators ,"data" ,"creators.json")
+
+ if not os .path .exists (creators_file_path ):
+ self .log_signal .emit (f"⚠️ 'creators.json' not found at {creators_file_path }. Creator name cache will be empty.")
+ self .creator_name_cache .clear ()
+ return
+
+ try :
+ with open (creators_file_path ,'r',encoding ='utf-8')as f :
+ loaded_data =json .load (f )
+
+ creators_list =[]
+ if isinstance (loaded_data ,list )and len (loaded_data )>0 and isinstance (loaded_data [0 ],list ):
+ creators_list =loaded_data [0 ]
+ elif isinstance (loaded_data ,list )and all (isinstance (item ,dict )for item in loaded_data ):
+ creators_list =loaded_data
+ else :
+ self .log_signal .emit (f"⚠️ 'creators.json' has an unexpected format. Creator name cache may be incomplete.")
+
+ for creator_data in creators_list :
+ creator_id =creator_data .get ("id")
+ name =creator_data .get ("name")
+ service =creator_data .get ("service")
+ if creator_id and name and service :
+ self .creator_name_cache [(service .lower (),str (creator_id ))]=name
+ self .log_signal .emit (f"✅ Successfully loaded {len (self .creator_name_cache )} creator names into cache from 'creators.json'.")
+ except Exception as e :
+ self .log_signal .emit (f"❌ Error loading 'creators.json' for name cache: {e }")
+ self .creator_name_cache .clear ()
+
+ def _show_download_history_dialog (self ):
+ """Shows the dialog with the finalized download history."""
+ last_3_downloaded =list (self .last_downloaded_files_details )
+ first_processed =self .final_download_history_entries
+
+ if not last_3_downloaded and not first_processed :
+ QMessageBox .information (
+ self ,
+ self ._tr ("download_history_dialog_title_empty","Download History (Empty)"),
+ self ._tr ("no_download_history_header","No Downloads Yet")
+ )
+ return
+
+ dialog = DownloadHistoryDialog(last_3_downloaded, first_processed, self)
+ dialog .exec_ ()
+
+ def _handle_actual_file_downloaded (self ,file_details_dict ):
+ """Handles a successfully downloaded file for the 'last 3 downloaded' history."""
+ if not file_details_dict :
+ return
+ file_details_dict ['download_timestamp']=time .time ()
+ creator_key =(file_details_dict .get ('service','').lower (),str (file_details_dict .get ('user_id','')))
+ file_details_dict ['creator_display_name']=self .creator_name_cache .get (creator_key ,file_details_dict .get ('folder_context_name','Unknown Creator/Series'))
+ self .last_downloaded_files_details .append (file_details_dict )
+
+
+ def _handle_file_successfully_downloaded (self ,history_entry_dict ):
+ """Handles a successfully downloaded file for history logging."""
+ if len (self .download_history_log )>=self .download_history_log .maxlen :
+ self .download_history_log .popleft ()
+ self .download_history_log .append (history_entry_dict )
+
+
+ def _handle_actual_file_downloaded (self ,file_details_dict ):
+ """Handles a successfully downloaded file for the 'last 3 downloaded' history."""
+ if not file_details_dict :
+ return
+
+ file_details_dict ['download_timestamp']=time .time ()
+
+
+ creator_key =(
+ file_details_dict .get ('service','').lower (),
+ str (file_details_dict .get ('user_id',''))
+ )
+ creator_display_name =self .creator_name_cache .get (creator_key ,file_details_dict .get ('folder_context_name','Unknown Creator'))
+ file_details_dict ['creator_display_name']=creator_display_name
+
+ self .last_downloaded_files_details .append (file_details_dict )
+
+
+ def _handle_favorite_mode_toggle (self ,checked ):
+ if not self .url_or_placeholder_stack or not self .bottom_action_buttons_stack :
+ return
+
+ self ._handle_favorite_mode_toggle (self .favorite_mode_checkbox .isChecked ())
+ self ._update_favorite_scope_button_text ()
+ if hasattr (self ,'link_input'):
+ self .last_link_input_text_for_queue_sync =self .link_input .text ()
+
+ def _update_download_extracted_links_button_state (self ):
+ if hasattr (self ,'download_extracted_links_button')and self .download_extracted_links_button :
+ is_only_links =self .radio_only_links and self .radio_only_links .isChecked ()
+ if not is_only_links :
+ self .download_extracted_links_button .setEnabled (False )
+ return
+
+ supported_platforms_for_button ={'mega','google drive','dropbox'}
+ has_supported_links =any (
+ link_info [3 ].lower ()in supported_platforms_for_button for link_info in self .extracted_links_cache
+ )
+ self .download_extracted_links_button .setEnabled (is_only_links and has_supported_links )
+
+ def _show_download_extracted_links_dialog (self ):
+ """Shows the placeholder dialog for downloading extracted links."""
+ if not (self .radio_only_links and self .radio_only_links .isChecked ()):
+ self .log_signal .emit ("ℹ️ Download extracted links button clicked, but not in 'Only Links' mode.")
+ return
+
+ supported_platforms ={'mega','google drive','dropbox'}
+ links_to_show_in_dialog =[]
+ for link_data_tuple in self .extracted_links_cache :
+ platform =link_data_tuple [3 ].lower ()
+ if platform in supported_platforms :
+ links_to_show_in_dialog .append ({
+ 'title':link_data_tuple [0 ],
+ 'link_text':link_data_tuple [1 ],
+ 'url':link_data_tuple [2 ],
+ 'platform':platform ,
+ 'key':link_data_tuple [4 ]
+ })
+
+ if not links_to_show_in_dialog :
+ QMessageBox .information (self ,"No Supported Links","No Mega, Google Drive, or Dropbox links were found in the extracted links.")
+ return
+
+ dialog = DownloadExtractedLinksDialog(links_to_show_in_dialog, self)
+ dialog .download_requested .connect (self ._handle_extracted_links_download_request )
+ dialog .exec_ ()
+
+ def _handle_extracted_links_download_request (self ,selected_links_info ):
+ if not selected_links_info :
+ self .log_signal .emit ("ℹ️ No links selected for download from dialog.")
+ return
+
+
+ if self .radio_only_links and self .radio_only_links .isChecked ()and self .only_links_log_display_mode ==LOG_DISPLAY_DOWNLOAD_PROGRESS :
+ self .main_log_output .clear ()
+ self .log_signal .emit ("ℹ️ Displaying Mega download progress (extracted links hidden)...")
+ self .mega_download_log_preserved_once =False
+
+ current_main_dir =self .dir_input .text ().strip ()
+ download_dir_for_mega =""
+
+ if current_main_dir and os .path .isdir (current_main_dir ):
+ download_dir_for_mega =current_main_dir
+ self .log_signal .emit (f"ℹ️ Using existing main download location for external links: {download_dir_for_mega }")
+ else :
+ if not current_main_dir :
+ self .log_signal .emit ("ℹ️ Main download location is empty. Prompting for download folder.")
+ else :
+ self .log_signal .emit (
+ f"⚠️ Main download location '{current_main_dir }' is not a valid directory. Prompting for download folder.")
+
+
+ suggestion_path =current_main_dir if current_main_dir else QStandardPaths .writableLocation (QStandardPaths .DownloadLocation )
+
+ chosen_dir =QFileDialog .getExistingDirectory (
+ self ,
+ self ._tr ("select_download_folder_mega_dialog_title","Select Download Folder for External Links"),
+ suggestion_path ,
+ options =QFileDialog .ShowDirsOnly |QFileDialog .DontUseNativeDialog
+ )
+
+ if not chosen_dir :
+ self .log_signal .emit ("ℹ️ External links download cancelled - no download directory selected from prompt.")
+ return
+ download_dir_for_mega =chosen_dir
+
+
+ self .log_signal .emit (f"ℹ️ Preparing to download {len (selected_links_info )} selected external link(s) to: {download_dir_for_mega }")
+ if not os .path .exists (download_dir_for_mega ):
+ self .log_signal .emit (f"❌ Critical Error: Selected download directory '{download_dir_for_mega }' does not exist.")
+ return
+
+
+ tasks_for_thread =selected_links_info
+
+ if self .external_link_download_thread and self .external_link_download_thread .isRunning ():
+ QMessageBox .warning (self ,"Busy","Another external link download is already in progress.")
+ return
+
+ self .external_link_download_thread =ExternalLinkDownloadThread (
+ tasks_for_thread ,
+ download_dir_for_mega ,
+ self .log_signal .emit ,
+ self
+ )
+ self .external_link_download_thread .finished .connect (self ._on_external_link_download_thread_finished )
+
+ self .external_link_download_thread .progress_signal .connect (self .handle_main_log )
+ self .external_link_download_thread .file_complete_signal .connect (self ._on_single_external_file_complete )
+
+
+
+ self .set_ui_enabled (False )
+
+ self .progress_label .setText (self ._tr ("progress_processing_post_text","Progress: Processing post {processed_posts}...").format (processed_posts =f"External Links (0/{len (tasks_for_thread )})"))
+ self .external_link_download_thread .start ()
+
+ def _on_external_link_download_thread_finished (self ):
+ self .log_signal .emit ("✅ External link download thread finished.")
+ self .progress_label .setText (f"{self ._tr ('status_completed','Completed')}: External link downloads. {self ._tr ('ready_for_new_task_text','Ready for new task.')}")
+
+ self .mega_download_log_preserved_once =True
+ self .log_signal .emit ("INTERNAL: mega_download_log_preserved_once SET to True.")
+
+ if self .radio_only_links and self .radio_only_links .isChecked ():
+ self .log_signal .emit (HTML_PREFIX +"
{display_term }
') + self .missed_character_log_output .append (separator_line ) + self .missed_character_log_output .append ("") + + scrollbar =self .missed_character_log_output .verticalScrollBar () + scrollbar .setValue (0 ) + + def _is_download_active (self ): + single_thread_active =self .download_thread and self .download_thread .isRunning () + fetcher_active =hasattr (self ,'is_fetcher_thread_running')and self .is_fetcher_thread_running + pool_has_active_tasks =self .thread_pool is not None and any (not f .done ()for f in self .active_futures if f is not None ) + retry_pool_active =hasattr (self ,'retry_thread_pool')and self .retry_thread_pool is not None and hasattr (self ,'active_retry_futures')and any (not f .done ()for f in self .active_retry_futures if f is not None ) + + + external_dl_thread_active =hasattr (self ,'external_link_download_thread')and self .external_link_download_thread is not None and self .external_link_download_thread .isRunning () + + return single_thread_active or fetcher_active or pool_has_active_tasks or retry_pool_active or external_dl_thread_active + + def handle_external_link_signal (self ,post_title ,link_text ,link_url ,platform ,decryption_key ): + link_data =(post_title ,link_text ,link_url ,platform ,decryption_key ) + self .external_link_queue .append (link_data ) + if self .radio_only_links and self .radio_only_links .isChecked (): + self .extracted_links_cache .append (link_data ) + self ._update_download_extracted_links_button_state () + + is_only_links_mode =self .radio_only_links and self .radio_only_links .isChecked () + should_display_in_external_log =self .show_external_links and not is_only_links_mode + + if not (is_only_links_mode or should_display_in_external_log ): + self ._is_processing_external_link_queue =False + if self .external_link_queue : + QTimer .singleShot (0 ,self ._try_process_next_external_link ) + return + + + if link_data not in self .extracted_links_cache : + self .extracted_links_cache .append (link_data ) + + def _try_process_next_external_link (self ): + if self ._is_processing_external_link_queue or not self .external_link_queue : + return + + is_only_links_mode =self .radio_only_links and self .radio_only_links .isChecked () + should_display_in_external_log =self .show_external_links and not is_only_links_mode + + if not (is_only_links_mode or should_display_in_external_log ): + self ._is_processing_external_link_queue =False + if self .external_link_queue : + QTimer .singleShot (0 ,self ._try_process_next_external_link ) + return + + self ._is_processing_external_link_queue =True + link_data =self .external_link_queue .popleft () + + if is_only_links_mode : + QTimer .singleShot (0 ,lambda data =link_data :self ._display_and_schedule_next (data )) + elif self ._is_download_active (): + delay_ms =random .randint (4000 ,8000 ) + QTimer .singleShot (delay_ms ,lambda data =link_data :self ._display_and_schedule_next (data )) + else : + QTimer .singleShot (0 ,lambda data =link_data :self ._display_and_schedule_next (data )) + + + def _display_and_schedule_next (self ,link_data ): + post_title ,link_text ,link_url ,platform ,decryption_key =link_data + is_only_links_mode =self .radio_only_links and self .radio_only_links .isChecked () + + max_link_text_len =50 + display_text =(link_text [:max_link_text_len ].strip ()+"..." + if len (link_text )>max_link_text_len else link_text .strip ()) + formatted_link_info =f"{display_text } - {link_url } - {platform }" + + if decryption_key : + formatted_link_info +=f" (Decryption Key: {decryption_key })" + + if is_only_links_mode : + if post_title !=self ._current_link_post_title : + separator_html ="ℹ️ The following files from multi-file manga posts " + "(after the first file) kept their original names:
" + ) + self .log_signal .emit (intro_msg ) + + html_list_items =" tags found. Falling back to basic HTML cleaning for the whole block.")
- text_with_br = re.sub(r'
', '\n', raw_text_content, flags=re.IGNORECASE)
- cleaned_text = re.sub(r'<.*?>', '', text_with_br)
- else:
- cleaned_paragraphs_list = []
- for p_content in html_paragraphs:
- p_with_br = re.sub(r'
', '\n', p_content, flags=re.IGNORECASE)
- p_cleaned = re.sub(r'<.*?>', '', p_with_br)
- p_final = html.unescape(p_cleaned).strip()
- if p_final:
- cleaned_paragraphs_list.append(p_final)
- cleaned_text = '\n\n'.join(cleaned_paragraphs_list)
- cleaned_text = cleaned_text.replace('…', '...')
- if self.single_pdf_mode:
- if not cleaned_text:
- return 0, 0, [], [], [], None, None
- content_data = {
- 'title': post_title,
- 'content': cleaned_text,
- 'published': self.post.get('published') or self.post.get('added')
- }
- temp_dir = os.path.join(self.app_base_dir, "appdata")
- os.makedirs(temp_dir, exist_ok=True)
- temp_filename = f"tmp_{post_id}_{uuid.uuid4().hex[:8]}.json"
- temp_filepath = os.path.join(temp_dir, temp_filename)
- try:
- with open(temp_filepath, 'w', encoding='utf-8') as f:
- json.dump(content_data, f, indent=2)
- self.logger(f" Saved temporary text for '{post_title}' for single PDF compilation.")
- self._emit_signal('worker_finished', (0, 0, [], [], [], None, temp_filepath))
- return (0, 0, [], [], [], None, temp_filepath)
- except Exception as e:
- self.logger(f" ❌ Failed to write temporary file for single PDF: {e}")
- self._emit_signal('worker_finished', (0, 0, [], [], [], [], None))
- return (0, 0, [], [], [], [], None)
- else:
- file_extension = self.text_export_format
- txt_filename = clean_filename(post_title) + f".{file_extension}"
- final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename)
- try:
- os.makedirs(determined_post_save_path_for_history, exist_ok=True)
- base, ext = os.path.splitext(final_save_path)
- counter = 1
- while os.path.exists(final_save_path):
- final_save_path = f"{base}_{counter}{ext}"
- counter += 1
- if file_extension == 'pdf':
- if FPDF:
- self.logger(f" Converting to PDF...")
- pdf = PDF()
- font_path = ""
- if self.project_root_dir:
- font_path = os.path.join(self.project_root_dir, 'data', 'dejavu-sans', 'DejaVuSans.ttf')
- try:
- if not os.path.exists(font_path): raise RuntimeError(f"Font file not found: {font_path}")
- pdf.add_font('DejaVu', '', font_path, uni=True)
- pdf.set_font('DejaVu', '', 12)
- except Exception as font_error:
- self.logger(f" ⚠️ Could not load DejaVu font: {font_error}. Falling back to Arial.")
- pdf.set_font('Arial', '', 12)
- pdf.add_page()
- pdf.multi_cell(0, 5, cleaned_text)
- pdf.output(final_save_path)
- else:
- self.logger(f" ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.")
- final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
- with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
- elif file_extension == 'docx':
- if Document:
- self.logger(f" Converting to DOCX...")
- document = Document()
- document.add_paragraph(cleaned_text)
- document.save(final_save_path)
- else:
- self.logger(f" ⚠️ Cannot create DOCX: 'python-docx' library not installed. Saving as .txt.")
- final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
- with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
- else:
- with open(final_save_path, 'w', encoding='utf-8') as f:
- f.write(cleaned_text)
- self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'")
- return 1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None
- except Exception as e:
- self.logger(f" ❌ Critical error saving text file '{txt_filename}': {e}")
- return 0, num_potential_files_in_post, [], [], [], None, None
- if not self .extract_links_only and self .use_subfolders and self .skip_words_list :
- if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
- for folder_name_to_check in base_folder_names_for_post_content :
- if not folder_name_to_check :continue
- if any (skip_word .lower ()in folder_name_to_check .lower ()for skip_word in self .skip_words_list ):
- matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word")
- self .logger (f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.")
- return 0 ,num_potential_files_in_post ,[],[],[],None, None
- if (self .show_external_links or self .extract_links_only )and post_content_html :
- if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
- try :
- mega_key_pattern =re .compile (r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b')
- unique_links_data ={}
- for match in link_pattern .finditer (post_content_html ):
- link_url =match .group (1 ).strip ()
- link_url =html .unescape (link_url )
- link_inner_text =match .group (2 )
- if not any (ext in link_url .lower ()for ext in ['.css','.js','.ico','.xml','.svg'])and not link_url .startswith ('javascript:')and link_url not in unique_links_data :
- clean_link_text =re .sub (r'<.*?>','',link_inner_text )
- clean_link_text =html .unescape (clean_link_text ).strip ()
- display_text =clean_link_text if clean_link_text else "[Link]"
- unique_links_data [link_url ]=display_text
- links_emitted_count =0
- scraped_platforms ={'kemono','coomer','patreon'}
- for link_url ,link_text in unique_links_data .items ():
- platform =get_link_platform (link_url )
- decryption_key_found =""
- if platform =='mega':
- parsed_mega_url =urlparse (link_url )
- if parsed_mega_url .fragment :
- potential_key_from_fragment =parsed_mega_url .fragment .split ('!')[-1 ]
- if mega_key_pattern .fullmatch (potential_key_from_fragment ):
- decryption_key_found =potential_key_from_fragment
- if not decryption_key_found and link_text :
- key_match_in_text =mega_key_pattern .search (link_text )
- if key_match_in_text :
- decryption_key_found =key_match_in_text .group (1 )
- if not decryption_key_found and self .extract_links_only and post_content_html :
- key_match_in_content =mega_key_pattern .search (strip_html_tags (post_content_html ))
- if key_match_in_content :
- decryption_key_found =key_match_in_content .group (1 )
- if platform not in scraped_platforms :
- self ._emit_signal ('external_link',post_title ,link_text ,link_url ,platform ,decryption_key_found or "")
- links_emitted_count +=1
- if links_emitted_count >0 :self .logger (f" 🔗 Found {links_emitted_count } potential external link(s) in post content.")
- except Exception as e :self .logger (f"⚠️ Error parsing post content for links: {e }\n{traceback .format_exc (limit =2 )}")
- if self .extract_links_only :
- self .logger (f" Extract Links Only mode: Finished processing post {post_id } for links.")
- return 0 ,0 ,[],[],[],None
- all_files_from_post_api =[]
- api_file_domain =urlparse (self .api_url_input ).netloc
- if not api_file_domain or not any (d in api_file_domain .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']):
- api_file_domain ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party"
- if post_main_file_info and isinstance (post_main_file_info ,dict )and post_main_file_info .get ('path'):
- file_path =post_main_file_info ['path'].lstrip ('/')
- original_api_name =post_main_file_info .get ('name')or os .path .basename (file_path )
- if original_api_name :
- all_files_from_post_api .append ({
- 'url':f"https://{api_file_domain }{file_path }"if file_path .startswith ('/')else f"https://{api_file_domain }/data/{file_path }",
- 'name':original_api_name ,
- '_original_name_for_log':original_api_name ,
- '_is_thumbnail':is_image (original_api_name )
- })
- else :self .logger (f" ⚠️ Skipping main file for post {post_id }: Missing name (Path: {file_path })")
- for idx ,att_info in enumerate (post_attachments ):
- if isinstance (att_info ,dict )and att_info .get ('path'):
- att_path =att_info ['path'].lstrip ('/')
- original_api_att_name =att_info .get ('name')or os .path .basename (att_path )
- if original_api_att_name :
- all_files_from_post_api .append ({
- 'url':f"https://{api_file_domain }{att_path }"if att_path .startswith ('/')else f"https://{api_file_domain }/data/{att_path }",
- 'name':original_api_att_name ,
- '_original_name_for_log':original_api_att_name ,
- '_is_thumbnail':is_image (original_api_att_name )
- })
- else :self .logger (f" ⚠️ Skipping attachment {idx +1 } for post {post_id }: Missing name (Path: {att_path })")
- else :self .logger (f" ⚠️ Skipping invalid attachment {idx +1 } for post {post_id }: {str (att_info )[:100 ]}")
- if self .scan_content_for_images and post_content_html and not self .extract_links_only :
- self .logger (f" Scanning post content for additional image URLs (Post ID: {post_id })...")
- parsed_input_url =urlparse (self .api_url_input )
- base_url_for_relative_paths =f"{parsed_input_url .scheme }://{parsed_input_url .netloc }"
- img_ext_pattern ="|".join (ext .lstrip ('.')for ext in IMAGE_EXTENSIONS )
- direct_url_pattern_str =r"""(?i)\b(https?://[^\s"'<>\[\]\{\}\|\^\\^~\[\]`]+\.(?:"""+img_ext_pattern +r"""))\b"""
- img_tag_src_pattern_str =r"""]*?src\s*=\s*["']([^"']+)["']"""
- found_image_sources =set ()
- for direct_url_match in re .finditer (direct_url_pattern_str ,post_content_html ):
- found_image_sources .add (direct_url_match .group (1 ))
- for img_tag_match in re .finditer (img_tag_src_pattern_str ,post_content_html ,re .IGNORECASE ):
- src_attr =img_tag_match .group (1 ).strip ()
- src_attr =html .unescape (src_attr )
- if not src_attr :continue
- resolved_src_url =""
- if src_attr .startswith (('http://','https://')):
- resolved_src_url =src_attr
- elif src_attr .startswith ('//'):
- resolved_src_url =f"{parsed_input_url .scheme }:{src_attr }"
- elif src_attr .startswith ('/'):
- resolved_src_url =f"{base_url_for_relative_paths }{src_attr }"
- if resolved_src_url :
- parsed_resolved_url =urlparse (resolved_src_url )
- if any (parsed_resolved_url .path .lower ().endswith (ext )for ext in IMAGE_EXTENSIONS ):
- found_image_sources .add (resolved_src_url )
- if found_image_sources :
- self .logger (f" Found {len (found_image_sources )} potential image URLs/sources in content.")
- existing_urls_in_api_list ={f_info ['url']for f_info in all_files_from_post_api }
- for found_url in found_image_sources :
- if self .check_cancel ():break
- if found_url in existing_urls_in_api_list :
- self .logger (f" Skipping URL from content (already in API list or previously added from content): {found_url [:70 ]}...")
- continue
- try :
- parsed_found_url =urlparse (found_url )
- url_filename =os .path .basename (parsed_found_url .path )
- if not url_filename or not is_image (url_filename ):
- self .logger (f" Skipping URL from content (no filename part or not an image extension): {found_url [:70 ]}...")
- continue
- self .logger (f" Adding image from content: {url_filename } (URL: {found_url [:70 ]}...)")
- all_files_from_post_api .append ({
- 'url':found_url ,
- 'name':url_filename ,
- '_original_name_for_log':url_filename ,
- '_is_thumbnail':False ,
- '_from_content_scan':True
- })
- existing_urls_in_api_list .add (found_url )
- except Exception as e_url_parse :
- self .logger (f" Error processing URL from content '{found_url [:70 ]}...': {e_url_parse }")
- else :
- self .logger (f" No additional image URLs found in post content scan for post {post_id }.")
- if self .download_thumbnails :
- if self .scan_content_for_images :
- self .logger (f" Mode: 'Download Thumbnails Only' + 'Scan Content for Images' active. Prioritizing images from content scan for post {post_id }.")
- all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_from_content_scan')]
- if not all_files_from_post_api :
- self .logger (f" -> No images found via content scan for post {post_id } in this combined mode.")
- return 0 ,0 ,[],[],[],None
- else :
- self .logger (f" Mode: 'Download Thumbnails Only' active. Filtering for API thumbnails for post {post_id }.")
- all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_is_thumbnail')]
- if not all_files_from_post_api :
- self .logger (f" -> No API image thumbnails found for post {post_id } in thumbnail-only mode.")
- return 0 ,0 ,[],[],[],None
- if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED :
- def natural_sort_key_for_files (file_api_info ):
- name =file_api_info .get ('_original_name_for_log','').lower ()
- return [int (text )if text .isdigit ()else text for text in re .split ('([0-9]+)',name )]
- all_files_from_post_api .sort (key =natural_sort_key_for_files )
- self .logger (f" Manga Date Mode: Sorted {len (all_files_from_post_api )} files within post {post_id } by original name for sequential numbering.")
- if not all_files_from_post_api :
- self .logger (f" No files found to download for post {post_id }.")
- return 0 ,0 ,[],[],[],None
- files_to_download_info_list =[]
- processed_original_filenames_in_this_post =set ()
- if self.keep_in_post_duplicates:
- files_to_download_info_list.extend(all_files_from_post_api)
- self.logger(f" ℹ️ 'Keep Duplicates' is on. All {len(all_files_from_post_api)} files from post will be processed.")
+ comments_data = fetch_post_comments(api_domain, self.service, self.user_id, post_id, headers, self.logger, self.cancellation_event, self.pause_event)
+ if comments_data:
+ comment_texts = []
+ for comment in comments_data:
+ user = comment.get('user', {}).get('name', 'Unknown User')
+ timestamp = comment.get('updated', 'No Date')
+ body = strip_html_tags(comment.get('content', ''))
+ comment_texts.append(f"--- Comment by {user} on {timestamp} ---\n{body}\n")
+ raw_text_content = "\n".join(comment_texts)
+ except Exception as e:
+ self.logger(f" ❌ Error fetching comments for text-only mode: {e}")
+
+ if not raw_text_content or not raw_text_content.strip():
+ self.logger(" -> Skip Saving Text: No content/comments found or fetched.")
+ return 0, num_potential_files_in_post, [], [], [], None, None
+
+ # --- Robust HTML-to-TEXT Conversion ---
+ paragraph_pattern = re.compile(r'
tags found. Falling back to basic HTML cleaning for the whole block.")
+ text_with_br = re.sub(r'
', '\n', raw_text_content, flags=re.IGNORECASE)
+ cleaned_text = re.sub(r'<.*?>', '', text_with_br)
else:
- for file_info in all_files_from_post_api:
- current_api_original_filename = file_info.get('_original_name_for_log')
- if current_api_original_filename in processed_original_filenames_in_this_post:
- self.logger(f" -> Skip Duplicate Original Name (within post {post_id}): '{current_api_original_filename}' already processed/listed for this post.")
- total_skipped_this_post += 1
- else:
- files_to_download_info_list.append(file_info)
- if current_api_original_filename:
- processed_original_filenames_in_this_post.add(current_api_original_filename)
- if not files_to_download_info_list:
- self .logger (f" All files for post {post_id } were duplicate original names or skipped earlier.")
- return 0 ,total_skipped_this_post ,[],[],[],None
- self .logger (f" Identified {len (files_to_download_info_list )} unique original file(s) for potential download from post {post_id }.")
- with ThreadPoolExecutor (max_workers =self .num_file_threads ,thread_name_prefix =f'P{post_id }File_')as file_pool :
- futures_list =[]
- for file_idx ,file_info_to_dl in enumerate (files_to_download_info_list ):
- if self ._check_pause (f"File processing loop for post {post_id }, file {file_idx }"):break
+ cleaned_paragraphs_list = []
+ for p_content in html_paragraphs:
+ p_with_br = re.sub(r'
', '\n', p_content, flags=re.IGNORECASE)
+ p_cleaned = re.sub(r'<.*?>', '', p_with_br)
+ p_final = html.unescape(p_cleaned).strip()
+ if p_final:
+ cleaned_paragraphs_list.append(p_final)
+ cleaned_text = '\n\n'.join(cleaned_paragraphs_list)
+ cleaned_text = cleaned_text.replace('…', '...')
+
+ # --- Logic for Single PDF Mode (File-based) ---
+ if self.single_pdf_mode:
+ if not cleaned_text:
+ return 0, 0, [], [], [], None, None
+
+ content_data = {
+ 'title': post_title,
+ 'content': cleaned_text,
+ 'published': self.post.get('published') or self.post.get('added')
+ }
+ temp_dir = os.path.join(self.app_base_dir, "appdata")
+ os.makedirs(temp_dir, exist_ok=True)
+ temp_filename = f"tmp_{post_id}_{uuid.uuid4().hex[:8]}.json"
+ temp_filepath = os.path.join(temp_dir, temp_filename)
+
+ try:
+ with open(temp_filepath, 'w', encoding='utf-8') as f:
+ json.dump(content_data, f, indent=2)
+ self.logger(f" Saved temporary text for '{post_title}' for single PDF compilation.")
+ return 0, 0, [], [], [], None, temp_filepath
+ except Exception as e:
+ self.logger(f" ❌ Failed to write temporary file for single PDF: {e}")
+ return 0, 0, [], [], [], None, None
+
+ # --- Logic for Individual File Saving ---
+ else:
+ file_extension = self.text_export_format
+ txt_filename = clean_filename(post_title) + f".{file_extension}"
+ final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename)
+
+ try:
+ os.makedirs(determined_post_save_path_for_history, exist_ok=True)
+ base, ext = os.path.splitext(final_save_path)
+ counter = 1
+ while os.path.exists(final_save_path):
+ final_save_path = f"{base}_{counter}{ext}"
+ counter += 1
+
+ if file_extension == 'pdf':
+ if FPDF:
+ self.logger(f" Converting to PDF...")
+ pdf = PDF()
+ font_path = ""
+ if self.project_root_dir:
+ font_path = os.path.join(self.project_root_dir, 'data', 'dejavu-sans', 'DejaVuSans.ttf')
+ try:
+ if not os.path.exists(font_path): raise RuntimeError(f"Font file not found: {font_path}")
+ pdf.add_font('DejaVu', '', font_path, uni=True)
+ pdf.set_font('DejaVu', '', 12)
+ except Exception as font_error:
+ self.logger(f" ⚠️ Could not load DejaVu font: {font_error}. Falling back to Arial.")
+ pdf.set_font('Arial', '', 12)
+ pdf.add_page()
+ pdf.multi_cell(0, 5, cleaned_text)
+ pdf.output(final_save_path)
+ else:
+ self.logger(f" ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.")
+ final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
+ with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
+
+ elif file_extension == 'docx':
+ if Document:
+ self.logger(f" Converting to DOCX...")
+ document = Document()
+ document.add_paragraph(cleaned_text)
+ document.save(final_save_path)
+ else:
+ self.logger(f" ⚠️ Cannot create DOCX: 'python-docx' library not installed. Saving as .txt.")
+ final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
+ with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
+
+ else: # Default to TXT
+ with open(final_save_path, 'w', encoding='utf-8') as f:
+ f.write(cleaned_text)
+
+ self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'")
+ return 1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None
+ except Exception as e:
+ self.logger(f" ❌ Critical error saving text file '{txt_filename}': {e}")
+ return 0, num_potential_files_in_post, [], [], [], None, None
+
+ if not self .extract_links_only and self .use_subfolders and self .skip_words_list :
+ if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
+ for folder_name_to_check in base_folder_names_for_post_content :
+ if not folder_name_to_check :continue
+ if any (skip_word .lower ()in folder_name_to_check .lower ()for skip_word in self .skip_words_list ):
+ matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word")
+ self .logger (f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.")
+ return 0 ,num_potential_files_in_post ,[],[],[],None, None
+ if (self .show_external_links or self .extract_links_only )and post_content_html :
+ if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
+ try :
+ mega_key_pattern =re .compile (r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b')
+ unique_links_data ={}
+ for match in link_pattern .finditer (post_content_html ):
+ link_url =match .group (1 ).strip ()
+ link_url =html .unescape (link_url )
+ link_inner_text =match .group (2 )
+ if not any (ext in link_url .lower ()for ext in ['.css','.js','.ico','.xml','.svg'])and not link_url .startswith ('javascript:')and link_url not in unique_links_data :
+ clean_link_text =re .sub (r'<.*?>','',link_inner_text )
+ clean_link_text =html .unescape (clean_link_text ).strip ()
+ display_text =clean_link_text if clean_link_text else "[Link]"
+ unique_links_data [link_url ]=display_text
+ links_emitted_count =0
+ scraped_platforms ={'kemono','coomer','patreon'}
+ for link_url ,link_text in unique_links_data .items ():
+ platform =get_link_platform (link_url )
+ decryption_key_found =""
+ if platform =='mega':
+ parsed_mega_url =urlparse (link_url )
+ if parsed_mega_url .fragment :
+ potential_key_from_fragment =parsed_mega_url .fragment .split ('!')[-1 ]
+ if mega_key_pattern .fullmatch (potential_key_from_fragment ):
+ decryption_key_found =potential_key_from_fragment
+
+ if not decryption_key_found and link_text :
+ key_match_in_text =mega_key_pattern .search (link_text )
+ if key_match_in_text :
+ decryption_key_found =key_match_in_text .group (1 )
+ if not decryption_key_found and self .extract_links_only and post_content_html :
+ key_match_in_content =mega_key_pattern .search (strip_html_tags (post_content_html ))
+ if key_match_in_content :
+ decryption_key_found =key_match_in_content .group (1 )
+ if platform not in scraped_platforms :
+ self ._emit_signal ('external_link',post_title ,link_text ,link_url ,platform ,decryption_key_found or "")
+ links_emitted_count +=1
+ if links_emitted_count >0 :self .logger (f" 🔗 Found {links_emitted_count } potential external link(s) in post content.")
+ except Exception as e :self .logger (f"⚠️ Error parsing post content for links: {e }\n{traceback .format_exc (limit =2 )}")
+ if self .extract_links_only :
+ self .logger (f" Extract Links Only mode: Finished processing post {post_id } for links.")
+ return 0 ,0 ,[],[],[],None
+ all_files_from_post_api =[]
+ api_file_domain =urlparse (self .api_url_input ).netloc
+ if not api_file_domain or not any (d in api_file_domain .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']):
+ api_file_domain ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party"
+ if post_main_file_info and isinstance (post_main_file_info ,dict )and post_main_file_info .get ('path'):
+ file_path =post_main_file_info ['path'].lstrip ('/')
+ original_api_name =post_main_file_info .get ('name')or os .path .basename (file_path )
+ if original_api_name :
+ all_files_from_post_api .append ({
+ 'url':f"https://{api_file_domain }{file_path }"if file_path .startswith ('/')else f"https://{api_file_domain }/data/{file_path }",
+ 'name':original_api_name ,
+ '_original_name_for_log':original_api_name ,
+ '_is_thumbnail':is_image (original_api_name )
+ })
+ else :self .logger (f" ⚠️ Skipping main file for post {post_id }: Missing name (Path: {file_path })")
+ for idx ,att_info in enumerate (post_attachments ):
+ if isinstance (att_info ,dict )and att_info .get ('path'):
+ att_path =att_info ['path'].lstrip ('/')
+ original_api_att_name =att_info .get ('name')or os .path .basename (att_path )
+ if original_api_att_name :
+ all_files_from_post_api .append ({
+ 'url':f"https://{api_file_domain }{att_path }"if att_path .startswith ('/')else f"https://{api_file_domain }/data/{att_path }",
+ 'name':original_api_att_name ,
+ '_original_name_for_log':original_api_att_name ,
+ '_is_thumbnail':is_image (original_api_att_name )
+ })
+ else :self .logger (f" ⚠️ Skipping attachment {idx +1 } for post {post_id }: Missing name (Path: {att_path })")
+ else :self .logger (f" ⚠️ Skipping invalid attachment {idx +1 } for post {post_id }: {str (att_info )[:100 ]}")
+ if self .scan_content_for_images and post_content_html and not self .extract_links_only :
+ self .logger (f" Scanning post content for additional image URLs (Post ID: {post_id })...")
+ parsed_input_url =urlparse (self .api_url_input )
+ base_url_for_relative_paths =f"{parsed_input_url .scheme }://{parsed_input_url .netloc }"
+ img_ext_pattern ="|".join (ext .lstrip ('.')for ext in IMAGE_EXTENSIONS )
+ direct_url_pattern_str =r"""(?i)\b(https?://[^\s"'<>\[\]\{\}\|\^\\^~\[\]`]+\.(?:"""+img_ext_pattern +r"""))\b"""
+ img_tag_src_pattern_str =r"""]*?src\s*=\s*["']([^"']+)["']"""
+ found_image_sources =set ()
+ for direct_url_match in re .finditer (direct_url_pattern_str ,post_content_html ):
+ found_image_sources .add (direct_url_match .group (1 ))
+ for img_tag_match in re .finditer (img_tag_src_pattern_str ,post_content_html ,re .IGNORECASE ):
+ src_attr =img_tag_match .group (1 ).strip ()
+ src_attr =html .unescape (src_attr )
+ if not src_attr :continue
+ resolved_src_url =""
+ if src_attr .startswith (('http://','https://')):
+ resolved_src_url =src_attr
+ elif src_attr .startswith ('//'):
+ resolved_src_url =f"{parsed_input_url .scheme }:{src_attr }"
+ elif src_attr .startswith ('/'):
+ resolved_src_url =f"{base_url_for_relative_paths }{src_attr }"
+ if resolved_src_url :
+ parsed_resolved_url =urlparse (resolved_src_url )
+ if any (parsed_resolved_url .path .lower ().endswith (ext )for ext in IMAGE_EXTENSIONS ):
+ found_image_sources .add (resolved_src_url )
+ if found_image_sources :
+ self .logger (f" Found {len (found_image_sources )} potential image URLs/sources in content.")
+ existing_urls_in_api_list ={f_info ['url']for f_info in all_files_from_post_api }
+ for found_url in found_image_sources :
if self .check_cancel ():break
- current_api_original_filename =file_info_to_dl .get ('_original_name_for_log')
- file_is_candidate_by_char_filter_scope =False
- char_filter_info_that_matched_file =None
- if not current_character_filters :
- file_is_candidate_by_char_filter_scope =True
- else :
- if self .char_filter_scope ==CHAR_SCOPE_FILES :
- for filter_item_obj in current_character_filters :
- terms_to_check_for_file =list (filter_item_obj ["aliases"])
- if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_for_file :
- terms_to_check_for_file .append (filter_item_obj ["name"])
- unique_terms_for_file_check =list (set (terms_to_check_for_file ))
- for term_to_match in unique_terms_for_file_check :
+ if found_url in existing_urls_in_api_list :
+ self .logger (f" Skipping URL from content (already in API list or previously added from content): {found_url [:70 ]}...")
+ continue
+ try :
+ parsed_found_url =urlparse (found_url )
+ url_filename =os .path .basename (parsed_found_url .path )
+ if not url_filename or not is_image (url_filename ):
+ self .logger (f" Skipping URL from content (no filename part or not an image extension): {found_url [:70 ]}...")
+ continue
+ self .logger (f" Adding image from content: {url_filename } (URL: {found_url [:70 ]}...)")
+ all_files_from_post_api .append ({
+ 'url':found_url ,
+ 'name':url_filename ,
+ '_original_name_for_log':url_filename ,
+ '_is_thumbnail':False ,
+ '_from_content_scan':True
+ })
+ existing_urls_in_api_list .add (found_url )
+ except Exception as e_url_parse :
+ self .logger (f" Error processing URL from content '{found_url [:70 ]}...': {e_url_parse }")
+ else :
+ self .logger (f" No additional image URLs found in post content scan for post {post_id }.")
+ if self .download_thumbnails :
+ if self .scan_content_for_images :
+ self .logger (f" Mode: 'Download Thumbnails Only' + 'Scan Content for Images' active. Prioritizing images from content scan for post {post_id }.")
+ all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_from_content_scan')]
+ if not all_files_from_post_api :
+ self .logger (f" -> No images found via content scan for post {post_id } in this combined mode.")
+ return 0 ,0 ,[],[],[],None
+ else :
+ self .logger (f" Mode: 'Download Thumbnails Only' active. Filtering for API thumbnails for post {post_id }.")
+ all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_is_thumbnail')]
+ if not all_files_from_post_api :
+ self .logger (f" -> No API image thumbnails found for post {post_id } in thumbnail-only mode.")
+ return 0 ,0 ,[],[],[],None
+ if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED :
+ def natural_sort_key_for_files (file_api_info ):
+ name =file_api_info .get ('_original_name_for_log','').lower ()
+ return [int (text )if text .isdigit ()else text for text in re .split ('([0-9]+)',name )]
+ all_files_from_post_api .sort (key =natural_sort_key_for_files )
+ self .logger (f" Manga Date Mode: Sorted {len (all_files_from_post_api )} files within post {post_id } by original name for sequential numbering.")
+ if not all_files_from_post_api :
+ self .logger (f" No files found to download for post {post_id }.")
+ return 0 ,0 ,[],[],[],None
+ files_to_download_info_list =[]
+ processed_original_filenames_in_this_post =set ()
+
+ if self.keep_in_post_duplicates:
+ # If we keep duplicates, just add every file to the list to be processed.
+ # The downstream hash check and rename-on-collision logic will handle them.
+ files_to_download_info_list.extend(all_files_from_post_api)
+ self.logger(f" ℹ️ 'Keep Duplicates' is on. All {len(all_files_from_post_api)} files from post will be processed.")
+ else:
+ # This is the original logic that skips duplicates by name within a post.
+ for file_info in all_files_from_post_api:
+ current_api_original_filename = file_info.get('_original_name_for_log')
+ if current_api_original_filename in processed_original_filenames_in_this_post:
+ self.logger(f" -> Skip Duplicate Original Name (within post {post_id}): '{current_api_original_filename}' already processed/listed for this post.")
+ total_skipped_this_post += 1
+ else:
+ files_to_download_info_list.append(file_info)
+ if current_api_original_filename:
+ processed_original_filenames_in_this_post.add(current_api_original_filename)
+
+ if not files_to_download_info_list:
+
+ self .logger (f" All files for post {post_id } were duplicate original names or skipped earlier.")
+ return 0 ,total_skipped_this_post ,[],[],[],None
+
+ self .logger (f" Identified {len (files_to_download_info_list )} unique original file(s) for potential download from post {post_id }.")
+ with ThreadPoolExecutor (max_workers =self .num_file_threads ,thread_name_prefix =f'P{post_id }File_')as file_pool :
+ futures_list =[]
+ for file_idx ,file_info_to_dl in enumerate (files_to_download_info_list ):
+ if self ._check_pause (f"File processing loop for post {post_id }, file {file_idx }"):break
+ if self .check_cancel ():break
+ current_api_original_filename =file_info_to_dl .get ('_original_name_for_log')
+ file_is_candidate_by_char_filter_scope =False
+ char_filter_info_that_matched_file =None
+ if not current_character_filters :
+ file_is_candidate_by_char_filter_scope =True
+ else :
+ if self .char_filter_scope ==CHAR_SCOPE_FILES :
+ for filter_item_obj in current_character_filters :
+ terms_to_check_for_file =list (filter_item_obj ["aliases"])
+ if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_for_file :
+ terms_to_check_for_file .append (filter_item_obj ["name"])
+ unique_terms_for_file_check =list (set (terms_to_check_for_file ))
+ for term_to_match in unique_terms_for_file_check :
+ if is_filename_match_for_character (current_api_original_filename ,term_to_match ):
+ file_is_candidate_by_char_filter_scope =True
+ char_filter_info_that_matched_file =filter_item_obj
+ self .logger (f" File '{current_api_original_filename }' matches char filter term '{term_to_match }' (from '{filter_item_obj ['name']}'). Scope: Files.")
+ break
+ if file_is_candidate_by_char_filter_scope :break
+ elif self .char_filter_scope ==CHAR_SCOPE_TITLE :
+ if post_is_candidate_by_title_char_match :
+ file_is_candidate_by_char_filter_scope =True
+ char_filter_info_that_matched_file =char_filter_that_matched_title
+ self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Title.")
+ elif self .char_filter_scope ==CHAR_SCOPE_BOTH :
+ if post_is_candidate_by_title_char_match :
+ file_is_candidate_by_char_filter_scope =True
+ char_filter_info_that_matched_file =char_filter_that_matched_title
+ self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Both (Title part).")
+ else :
+ for filter_item_obj_both_file in current_character_filters :
+ terms_to_check_for_file_both =list (filter_item_obj_both_file ["aliases"])
+ if filter_item_obj_both_file ["is_group"]and filter_item_obj_both_file ["name"]not in terms_to_check_for_file_both :
+ terms_to_check_for_file_both .append (filter_item_obj_both_file ["name"])
+ unique_terms_for_file_both_check =list (set (terms_to_check_for_file_both ))
+ for term_to_match in unique_terms_for_file_both_check :
if is_filename_match_for_character (current_api_original_filename ,term_to_match ):
file_is_candidate_by_char_filter_scope =True
- char_filter_info_that_matched_file =filter_item_obj
- self .logger (f" File '{current_api_original_filename }' matches char filter term '{term_to_match }' (from '{filter_item_obj ['name']}'). Scope: Files.")
+ char_filter_info_that_matched_file =filter_item_obj_both_file
+ self .logger (f" File '{current_api_original_filename }' matches char filter term '{term_to_match }' (from '{filter_item_obj ['name']}'). Scope: Both (File part).")
break
if file_is_candidate_by_char_filter_scope :break
- elif self .char_filter_scope ==CHAR_SCOPE_TITLE :
- if post_is_candidate_by_title_char_match :
- file_is_candidate_by_char_filter_scope =True
- char_filter_info_that_matched_file =char_filter_that_matched_title
- self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Title.")
- elif self .char_filter_scope ==CHAR_SCOPE_BOTH :
- if post_is_candidate_by_title_char_match :
- file_is_candidate_by_char_filter_scope =True
- char_filter_info_that_matched_file =char_filter_that_matched_title
- self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Both (Title part).")
- else :
- for filter_item_obj_both_file in current_character_filters :
- terms_to_check_for_file_both =list (filter_item_obj_both_file ["aliases"])
- if filter_item_obj_both_file ["is_group"]and filter_item_obj_both_file ["name"]not in terms_to_check_for_file_both :
- terms_to_check_for_file_both .append (filter_item_obj_both_file ["name"])
- unique_terms_for_file_both_check =list (set (terms_to_check_for_file_both ))
- for term_to_match in unique_terms_for_file_both_check :
- if is_filename_match_for_character (current_api_original_filename ,term_to_match ):
- file_is_candidate_by_char_filter_scope =True
- char_filter_info_that_matched_file =filter_item_obj_both_file
- self .logger (f" File '{current_api_original_filename }' matches char filter term '{term_to_match }' (from '{filter_item_obj ['name']}'). Scope: Both (File part).")
- break
- if file_is_candidate_by_char_filter_scope :break
- elif self .char_filter_scope ==CHAR_SCOPE_COMMENTS :
- if post_is_candidate_by_file_char_match_in_comment_scope :
- file_is_candidate_by_char_filter_scope =True
- char_filter_info_that_matched_file =char_filter_that_matched_file_in_comment_scope
- self .logger (f" File '{current_api_original_filename }' is candidate because a file in this post matched char filter (Overall Scope: Comments).")
- elif post_is_candidate_by_comment_char_match :
- file_is_candidate_by_char_filter_scope =True
- char_filter_info_that_matched_file =char_filter_that_matched_comment
- self .logger (f" File '{current_api_original_filename }' is candidate because post comments matched char filter (Overall Scope: Comments).")
- if not file_is_candidate_by_char_filter_scope :
- self .logger (f" -> Skip File (Char Filter Scope '{self .char_filter_scope }'): '{current_api_original_filename }' no match.")
- total_skipped_this_post +=1
- continue
- target_base_folders_for_this_file_iteration =[]
- if current_character_filters :
- char_title_subfolder_name =None
- if self .target_post_id_from_initial_url and self .custom_folder_name :
- char_title_subfolder_name =self .custom_folder_name
- elif char_filter_info_that_matched_file :
- char_title_subfolder_name =clean_folder_name (char_filter_info_that_matched_file ["name"])
- elif char_filter_that_matched_title :
- char_title_subfolder_name =clean_folder_name (char_filter_that_matched_title ["name"])
- elif char_filter_that_matched_comment :
- char_title_subfolder_name =clean_folder_name (char_filter_that_matched_comment ["name"])
- if char_title_subfolder_name :
- target_base_folders_for_this_file_iteration .append (char_title_subfolder_name )
- else :
- self .logger (f"⚠️ File '{current_api_original_filename }' candidate by char filter, but no folder name derived. Using post title.")
- target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title ))
- else :
- if base_folder_names_for_post_content :
- target_base_folders_for_this_file_iteration .extend (base_folder_names_for_post_content )
- else :
- target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title ))
- if not target_base_folders_for_this_file_iteration :
- target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title if post_title else "Uncategorized_Post_Content"))
- for target_base_folder_name_for_instance in target_base_folders_for_this_file_iteration :
- current_path_for_file_instance =self .override_output_dir if self .override_output_dir else self .download_root
- if self .use_subfolders and target_base_folder_name_for_instance :
- current_path_for_file_instance =os .path .join (current_path_for_file_instance ,target_base_folder_name_for_instance )
- if self .use_post_subfolders :
- current_path_for_file_instance =os .path .join (current_path_for_file_instance ,final_post_subfolder_name )
- manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None
- manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None
- folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title )
- futures_list .append (file_pool .submit (
- self ._download_single_file ,
- file_info =file_info_to_dl ,
- target_folder_path =current_path_for_file_instance ,
- headers =headers ,original_post_id_for_log =post_id ,skip_event =self .skip_current_file_flag ,
- post_title =post_title ,manga_date_file_counter_ref =manga_date_counter_to_pass ,
- manga_global_file_counter_ref =manga_global_counter_to_pass ,folder_context_name_for_history =folder_context_for_file ,
- file_index_in_post =file_idx ,num_files_in_this_post =len (files_to_download_info_list )
- ))
- for future in as_completed (futures_list ):
- if self .check_cancel ():
- for f_to_cancel in futures_list :
- if not f_to_cancel .done ():
- f_to_cancel .cancel ()
- break
- try :
- dl_count ,skip_count ,actual_filename_saved ,original_kept_flag ,status ,details_for_dialog_or_retry =future .result ()
- total_downloaded_this_post +=dl_count
- total_skipped_this_post +=skip_count
- if original_kept_flag and dl_count >0 and actual_filename_saved :
- kept_original_filenames_for_log .append (actual_filename_saved )
- if status ==FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER and details_for_dialog_or_retry :
- retryable_failures_this_post .append (details_for_dialog_or_retry )
- elif status ==FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION and details_for_dialog_or_retry :
- permanent_failures_this_post .append (details_for_dialog_or_retry )
- except CancelledError :
- self .logger (f" File download task for post {post_id } was cancelled.")
- total_skipped_this_post +=1
- except Exception as exc_f :
- self .logger (f"❌ File download task for post {post_id } resulted in error: {exc_f }")
- total_skipped_this_post +=1
- self ._emit_signal ('file_progress',"",None )
- if self.session_file_path and self.session_lock:
- try:
- with self.session_lock:
- if os.path.exists(self.session_file_path):
- with open(self.session_file_path, 'r', encoding='utf-8') as f:
- session_data = json.load(f)
- if 'download_state' not in session_data:
- session_data['download_state'] = {}
- if not isinstance(session_data['download_state'].get('processed_post_ids'), list):
- session_data['download_state']['processed_post_ids'] = []
- session_data['download_state']['processed_post_ids'].append(self.post.get('id'))
- if permanent_failures_this_post:
- if not isinstance(session_data['download_state'].get('permanently_failed_files'), list):
- session_data['download_state']['permanently_failed_files'] = []
- existing_failed_urls = {f.get('file_info', {}).get('url') for f in session_data['download_state']['permanently_failed_files']}
- for failure in permanent_failures_this_post:
- if failure.get('file_info', {}).get('url') not in existing_failed_urls:
- session_data['download_state']['permanently_failed_files'].append(failure)
- temp_file_path = self.session_file_path + ".tmp"
- with open(temp_file_path, 'w', encoding='utf-8') as f_tmp:
- json.dump(session_data, f_tmp, indent=2)
- os.replace(temp_file_path, self.session_file_path)
- except Exception as e:
- self.logger(f"⚠️ Could not update session file for post {post_id}: {e}")
- if not self .extract_links_only and (total_downloaded_this_post >0 or not (
- (current_character_filters and (
- (self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or
- (self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match )
- ))or
- (self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH )and any (sw .lower ()in post_title .lower ()for sw in self .skip_words_list ))
- )):
- top_file_name_for_history ="N/A"
- if post_main_file_info and post_main_file_info .get ('name'):
- top_file_name_for_history =post_main_file_info ['name']
- elif post_attachments and post_attachments [0 ].get ('name'):
- top_file_name_for_history =post_attachments [0 ]['name']
- history_data_for_this_post ={
- 'post_title':post_title ,'post_id':post_id ,
- 'top_file_name':top_file_name_for_history ,
- 'num_files':num_potential_files_in_post ,
- 'upload_date_str':post_data .get ('published')or post_data .get ('added')or "Unknown",
- 'download_location':determined_post_save_path_for_history ,
- 'service':self .service ,'user_id':self .user_id ,
- }
- if self .check_cancel ():self .logger (f" Post {post_id } processing interrupted/cancelled.");
- else :self .logger (f" Post {post_id } Summary: Downloaded={total_downloaded_this_post }, Skipped Files={total_skipped_this_post }")
- if not self .extract_links_only and self .use_post_subfolders and total_downloaded_this_post ==0 :
- path_to_check_for_emptiness =determined_post_save_path_for_history
- try :
- if os .path .isdir (path_to_check_for_emptiness )and not os .listdir (path_to_check_for_emptiness ):
- self .logger (f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness }'")
- os .rmdir (path_to_check_for_emptiness )
- except OSError as e_rmdir :
- self .logger (f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }")
-
- except Exception as e:
- post_id = self.post.get('id', 'N/A')
- # Log the unexpected crash of the worker
- self.logger(f"❌ CRITICAL WORKER FAILURE on Post ID {post_id}: {e}\n{traceback.format_exc(limit=4)}")
- # Ensure the number of skipped files reflects the total potential files in the post,
- # as none of them were processed successfully.
- num_potential_files_in_post = len(self.post.get('attachments', [])) + (1 if self.post.get('file') else 0)
- total_skipped_this_post = num_potential_files_in_post
- total_downloaded_this_post = 0
+ elif self .char_filter_scope ==CHAR_SCOPE_COMMENTS :
+ if post_is_candidate_by_file_char_match_in_comment_scope :
+ file_is_candidate_by_char_filter_scope =True
+ char_filter_info_that_matched_file =char_filter_that_matched_file_in_comment_scope
+ self .logger (f" File '{current_api_original_filename }' is candidate because a file in this post matched char filter (Overall Scope: Comments).")
+ elif post_is_candidate_by_comment_char_match :
+ file_is_candidate_by_char_filter_scope =True
+ char_filter_info_that_matched_file =char_filter_that_matched_comment
+ self .logger (f" File '{current_api_original_filename }' is candidate because post comments matched char filter (Overall Scope: Comments).")
+ if not file_is_candidate_by_char_filter_scope :
+ self .logger (f" -> Skip File (Char Filter Scope '{self .char_filter_scope }'): '{current_api_original_filename }' no match.")
+ total_skipped_this_post +=1
+ continue
- finally:
- # This 'finally' block ensures that the worker ALWAYS reports back,
- # preventing the main UI from getting stuck.
- result_tuple = (total_downloaded_this_post, total_skipped_this_post,
- kept_original_filenames_for_log, retryable_failures_this_post,
- permanent_failures_this_post, history_data_for_this_post,
- temp_filepath_for_return)
- self._emit_signal('worker_finished', result_tuple)
-
- return result_tuple
+
+ target_base_folders_for_this_file_iteration =[]
+
+ if current_character_filters :
+ char_title_subfolder_name =None
+ if self .target_post_id_from_initial_url and self .custom_folder_name :
+ char_title_subfolder_name =self .custom_folder_name
+ elif char_filter_info_that_matched_file :
+ char_title_subfolder_name =clean_folder_name (char_filter_info_that_matched_file ["name"])
+ elif char_filter_that_matched_title :
+ char_title_subfolder_name =clean_folder_name (char_filter_that_matched_title ["name"])
+ elif char_filter_that_matched_comment :
+ char_title_subfolder_name =clean_folder_name (char_filter_that_matched_comment ["name"])
+ if char_title_subfolder_name :
+ target_base_folders_for_this_file_iteration .append (char_title_subfolder_name )
+ else :
+ self .logger (f"⚠️ File '{current_api_original_filename }' candidate by char filter, but no folder name derived. Using post title.")
+ target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title ))
+ else :
+ if base_folder_names_for_post_content :
+ target_base_folders_for_this_file_iteration .extend (base_folder_names_for_post_content )
+ else :
+ target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title ))
+
+ if not target_base_folders_for_this_file_iteration :
+ target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title if post_title else "Uncategorized_Post_Content"))
+
+ for target_base_folder_name_for_instance in target_base_folders_for_this_file_iteration :
+ current_path_for_file_instance =self .override_output_dir if self .override_output_dir else self .download_root
+ if self .use_subfolders and target_base_folder_name_for_instance :
+ current_path_for_file_instance =os .path .join (current_path_for_file_instance ,target_base_folder_name_for_instance )
+ if self .use_post_subfolders :
+
+ current_path_for_file_instance =os .path .join (current_path_for_file_instance ,final_post_subfolder_name )
+
+ manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None
+ manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None
+
+
+ folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title )
+
+ futures_list .append (file_pool .submit (
+ self ._download_single_file ,
+ file_info =file_info_to_dl ,
+ target_folder_path =current_path_for_file_instance ,
+ headers =headers ,original_post_id_for_log =post_id ,skip_event =self .skip_current_file_flag ,
+ post_title =post_title ,manga_date_file_counter_ref =manga_date_counter_to_pass ,
+ manga_global_file_counter_ref =manga_global_counter_to_pass ,folder_context_name_for_history =folder_context_for_file ,
+ file_index_in_post =file_idx ,num_files_in_this_post =len (files_to_download_info_list )
+ ))
+
+ for future in as_completed (futures_list ):
+ if self .check_cancel ():
+ for f_to_cancel in futures_list :
+ if not f_to_cancel .done ():
+ f_to_cancel .cancel ()
+ break
+ try :
+ dl_count ,skip_count ,actual_filename_saved ,original_kept_flag ,status ,details_for_dialog_or_retry =future .result ()
+ total_downloaded_this_post +=dl_count
+ total_skipped_this_post +=skip_count
+ if original_kept_flag and dl_count >0 and actual_filename_saved :
+ kept_original_filenames_for_log .append (actual_filename_saved )
+ if status ==FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER and details_for_dialog_or_retry :
+ retryable_failures_this_post .append (details_for_dialog_or_retry )
+ elif status ==FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION and details_for_dialog_or_retry :
+ permanent_failures_this_post .append (details_for_dialog_or_retry )
+ except CancelledError :
+ self .logger (f" File download task for post {post_id } was cancelled.")
+ total_skipped_this_post +=1
+ except Exception as exc_f :
+ self .logger (f"❌ File download task for post {post_id } resulted in error: {exc_f }")
+ total_skipped_this_post +=1
+ self ._emit_signal ('file_progress',"",None )
+
+ # After a post's files are all processed, update the session file to mark this post as done.
+ if self.session_file_path and self.session_lock:
+ try:
+ with self.session_lock:
+ if os.path.exists(self.session_file_path): # Only update if the session file exists
+ # Read current state
+ with open(self.session_file_path, 'r', encoding='utf-8') as f:
+ session_data = json.load(f)
+
+ if 'download_state' not in session_data:
+ session_data['download_state'] = {}
+
+ # Add processed ID
+ if not isinstance(session_data['download_state'].get('processed_post_ids'), list):
+ session_data['download_state']['processed_post_ids'] = []
+ session_data['download_state']['processed_post_ids'].append(self.post.get('id'))
+
+ # Add any permanent failures from this worker to the session file
+ if permanent_failures_this_post:
+ if not isinstance(session_data['download_state'].get('permanently_failed_files'), list):
+ session_data['download_state']['permanently_failed_files'] = []
+ # To avoid duplicates if the same post is somehow re-processed
+ existing_failed_urls = {f.get('file_info', {}).get('url') for f in session_data['download_state']['permanently_failed_files']}
+ for failure in permanent_failures_this_post:
+ if failure.get('file_info', {}).get('url') not in existing_failed_urls:
+ session_data['download_state']['permanently_failed_files'].append(failure)
+
+ # Write to temp file and then atomically replace
+ temp_file_path = self.session_file_path + ".tmp"
+ with open(temp_file_path, 'w', encoding='utf-8') as f_tmp:
+ json.dump(session_data, f_tmp, indent=2)
+ os.replace(temp_file_path, self.session_file_path)
+ except Exception as e:
+ self.logger(f"⚠️ Could not update session file for post {post_id}: {e}")
+
+ if not self .extract_links_only and (total_downloaded_this_post >0 or not (
+ (current_character_filters and (
+ (self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or
+ (self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match )
+ ))or
+ (self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH )and any (sw .lower ()in post_title .lower ()for sw in self .skip_words_list ))
+ )):
+ top_file_name_for_history ="N/A"
+ if post_main_file_info and post_main_file_info .get ('name'):
+ top_file_name_for_history =post_main_file_info ['name']
+ elif post_attachments and post_attachments [0 ].get ('name'):
+ top_file_name_for_history =post_attachments [0 ]['name']
+
+ history_data_for_this_post ={
+ 'post_title':post_title ,'post_id':post_id ,
+ 'top_file_name':top_file_name_for_history ,
+ 'num_files':num_potential_files_in_post ,
+ 'upload_date_str':post_data .get ('published')or post_data .get ('added')or "Unknown",
+ 'download_location':determined_post_save_path_for_history ,
+ 'service':self .service ,'user_id':self .user_id ,
+ }
+ if self .check_cancel ():self .logger (f" Post {post_id } processing interrupted/cancelled.");
+ else :self .logger (f" Post {post_id } Summary: Downloaded={total_downloaded_this_post }, Skipped Files={total_skipped_this_post }")
+
+ if not self .extract_links_only and self .use_post_subfolders and total_downloaded_this_post ==0 :
+
+ path_to_check_for_emptiness =determined_post_save_path_for_history
+ try :
+ if os .path .isdir (path_to_check_for_emptiness )and not os .listdir (path_to_check_for_emptiness ):
+ self .logger (f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness }'")
+ os .rmdir (path_to_check_for_emptiness )
+ except OSError as e_rmdir :
+ self .logger (f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }")
+
+ result_tuple = (total_downloaded_this_post, total_skipped_this_post,
+ kept_original_filenames_for_log, retryable_failures_this_post,
+ permanent_failures_this_post, history_data_for_this_post,
+ None) # The 7th item is None because we already saved the temp file
+
+ # In Single PDF mode, the 7th item is the temp file path we created.
+ if self.single_pdf_mode and os.path.exists(temp_filepath):
+ result_tuple = (0, 0, [], [], [], None, temp_filepath)
+
+ self._emit_signal('worker_finished', result_tuple)
+ return # The method now returns nothing.
class DownloadThread (QThread ):
progress_signal =pyqtSignal (str )
@@ -1749,7 +1802,6 @@ class DownloadThread (QThread ):
cookie_text ="",
session_file_path=None,
session_lock=None,
- processed_ids_to_skip=None,
text_only_scope=None,
text_export_format='txt',
single_pdf_mode=False,
@@ -1808,12 +1860,11 @@ class DownloadThread (QThread ):
self .manga_global_file_counter_ref =manga_global_file_counter_ref
self.session_file_path = session_file_path
self.session_lock = session_lock
- self.processed_ids_to_skip = processed_ids_to_skip
self.history_candidates_buffer =deque (maxlen =8 )
self.text_only_scope = text_only_scope
self.text_export_format = text_export_format
- self.single_pdf_mode = single_pdf_mode
- self.project_root_dir = project_root_dir
+ self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE
+ self.project_root_dir = project_root_dir # Add this assignment
if self .compress_images and Image is None :
self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
@@ -1838,65 +1889,37 @@ class DownloadThread (QThread ):
self .skip_current_file_flag .set ()
else :self .logger ("ℹ️ Skip file: No download active or skip flag not available for current context.")
- def run(self):
+ def run (self ):
"""
The main execution method for the single-threaded download process.
This version is corrected to handle 7 return values from the worker and
to pass the 'single_pdf_mode' setting correctly.
"""
- grand_total_downloaded_files = 0
- grand_total_skipped_files = 0
- grand_list_of_kept_original_filenames = []
- was_process_cancelled = False
+ grand_total_downloaded_files =0
+ grand_total_skipped_files =0
+ grand_list_of_kept_original_filenames =[]
+ was_process_cancelled =False
- if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED and not self.extract_links_only and self.manga_date_file_counter_ref is None:
- series_scan_dir = self.output_dir
- if self.use_subfolders :
- if self.filter_character_list_objects_initial and self.filter_character_list_objects_initial [0] and self.filter_character_list_objects_initial[0].get("name"):
- series_folder_name = clean_folder_name(self.filter_character_list_objects_initial[0]["name"])
- series_scan_dir = os.path.join(series_scan_dir, series_folder_name)
- elif self.service and self.user_id :
- creator_based_folder_name = clean_folder_name(str(self.user_id))
- series_scan_dir = os.path.join(series_scan_dir, creator_based_folder_name)
-
- highest_num = 0
- if os.path.isdir(series_scan_dir):
- self.logger(f"ℹ️ [Thread] Manga Date Mode: Scanning for existing files in '{series_scan_dir}'...")
- for dirpath, _, filenames_in_dir in os.walk(series_scan_dir):
- for filename_to_check in filenames_in_dir:
- prefix_to_check = clean_filename(self.manga_date_prefix.strip()) if self.manga_date_prefix and self.manga_date_prefix.strip() else ""
- name_part_to_match = filename_to_check
- if prefix_to_check and name_part_to_match.startswith(prefix_to_check):
- name_part_to_match = name_part_to_match[len(prefix_to_check):].lstrip()
-
- base_name_no_ext = os.path.splitext(name_part_to_match)[0]
- match = re.match(r"(\d+)", base_name_no_ext)
- if match:
- highest_num = max(highest_num, int(match.group(1)))
-
- self.manga_date_file_counter_ref = [highest_num + 1, threading.Lock()]
- self.logger(f"ℹ️ [Thread] Manga Date Mode: Initialized date-based counter at {self.manga_date_file_counter_ref[0]}.")
+ # This block for initializing manga mode counters remains unchanged
+ if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED and not self .extract_links_only and self .manga_date_file_counter_ref is None :
+ # ... (existing manga counter initialization logic) ...
pass
-
- if self.manga_mode_active and self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING and not self.extract_links_only and self.manga_global_file_counter_ref is None:
- self.manga_global_file_counter_ref = [1, threading.Lock()]
- self.logger(f"ℹ️ [Thread] Manga Title+GlobalNum Mode: Initialized global counter at {self.manga_global_file_counter_ref[0]}.")
+ if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING and not self .extract_links_only and self .manga_global_file_counter_ref is None :
+ # ... (existing manga counter initialization logic) ...
pass
worker_signals_obj = PostProcessorSignals()
- try:
+ try :
+ # Connect signals
worker_signals_obj.progress_signal.connect(self.progress_signal)
worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal)
worker_signals_obj.file_progress_signal.connect(self.file_progress_signal)
worker_signals_obj.external_link_signal.connect(self.external_link_signal)
worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal)
worker_signals_obj.file_successfully_downloaded_signal.connect(self.file_successfully_downloaded_signal)
- worker_signals_obj.worker_finished_signal.connect(lambda result: None)
+ worker_signals_obj.worker_finished_signal.connect(lambda result: None) # Connect to dummy lambda to avoid errors
self.logger(" Starting post fetch (single-threaded download process)...")
- self.logger(" Fetching ALL available post information first. This may take a moment...")
-
- all_posts_data = []
post_generator = download_from_api(
self.api_url_input,
logger=self.logger,
@@ -1916,101 +1939,99 @@ class DownloadThread (QThread ):
if self.isInterruptionRequested():
was_process_cancelled = True
break
- all_posts_data.extend(posts_batch_data)
-
- if not was_process_cancelled:
- self.logger(f"✅ Fetching complete. Found {len(all_posts_data)} total posts. Starting download process...")
+ for individual_post_data in posts_batch_data:
+ if self.isInterruptionRequested():
+ was_process_cancelled = True
+ break
+
+ # Create the worker, now correctly passing single_pdf_mode
+ post_processing_worker = PostProcessorWorker(
+ post_data=individual_post_data,
+ download_root=self.output_dir,
+ known_names=self.known_names,
+ filter_character_list=self.filter_character_list_objects_initial,
+ dynamic_character_filter_holder=self.dynamic_filter_holder,
+ unwanted_keywords=self.unwanted_keywords,
+ filter_mode=self.filter_mode,
+ skip_zip=self.skip_zip, skip_rar=self.skip_rar,
+ use_subfolders=self.use_subfolders, use_post_subfolders=self.use_post_subfolders,
+ target_post_id_from_initial_url=self.initial_target_post_id,
+ custom_folder_name=self.custom_folder_name,
+ compress_images=self.compress_images, download_thumbnails=self.download_thumbnails,
+ service=self.service, user_id=self.user_id,
+ api_url_input=self.api_url_input,
+ pause_event=self.pause_event,
+ cancellation_event=self.cancellation_event,
+ emitter=worker_signals_obj,
+ downloaded_files=self.downloaded_files,
+ downloaded_file_hashes=self.downloaded_file_hashes,
+ downloaded_files_lock=self.downloaded_files_lock,
+ downloaded_file_hashes_lock=self.downloaded_file_hashes_lock,
+ skip_words_list=self.skip_words_list,
+ skip_words_scope=self.skip_words_scope,
+ show_external_links=self.show_external_links,
+ extract_links_only=self.extract_links_only,
+ num_file_threads=self.num_file_threads_for_worker,
+ skip_current_file_flag=self.skip_current_file_flag,
+ manga_mode_active=self.manga_mode_active,
+ manga_filename_style=self.manga_filename_style,
+ manga_date_prefix=self.manga_date_prefix,
+ char_filter_scope=self.char_filter_scope,
+ remove_from_filename_words_list=self.remove_from_filename_words_list,
+ allow_multipart_download=self.allow_multipart_download,
+ selected_cookie_file=self.selected_cookie_file,
+ app_base_dir=self.app_base_dir,
+ cookie_text=self.cookie_text,
+ override_output_dir=self.override_output_dir,
+ manga_global_file_counter_ref=self.manga_global_file_counter_ref,
+ use_cookie=self.use_cookie,
+ manga_date_file_counter_ref=self.manga_date_file_counter_ref,
+ use_date_prefix_for_subfolder=self.use_date_prefix_for_subfolder,
+ keep_in_post_duplicates=self.keep_in_post_duplicates,
+ creator_download_folder_ignore_words=self.creator_download_folder_ignore_words,
+ session_file_path=self.session_file_path,
+ session_lock=self.session_lock,
+ text_only_scope=self.text_only_scope,
+ text_export_format=self.text_export_format,
+ single_pdf_mode=self.single_pdf_mode, # <-- This is now correctly passed
+ project_root_dir=self.project_root_dir
+ )
+ try:
+ # Correctly unpack the 7 values returned from the worker
+ (dl_count, skip_count, kept_originals_this_post,
+ retryable_failures, permanent_failures,
+ history_data, temp_filepath) = post_processing_worker.process()
+
+ grand_total_downloaded_files += dl_count
+ grand_total_skipped_files += skip_count
+
+ if kept_originals_this_post:
+ grand_list_of_kept_original_filenames.extend(kept_originals_this_post)
+ if retryable_failures:
+ self.retryable_file_failed_signal.emit(retryable_failures)
+ if history_data:
+ if len(self.history_candidates_buffer) < 8:
+ self.post_processed_for_history_signal.emit(history_data)
+ if permanent_failures:
+ self.permanent_file_failed_signal.emit(permanent_failures)
+
+ # In single-threaded text mode, pass the temp file path back to the main window
+ if self.single_pdf_mode and temp_filepath:
+ self.progress_signal.emit(f"TEMP_FILE_PATH:{temp_filepath}")
- for individual_post_data in all_posts_data:
- if self.isInterruptionRequested():
- was_process_cancelled = True
+ except Exception as proc_err:
+ post_id_for_err = individual_post_data.get('id', 'N/A')
+ self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}")
+ traceback.print_exc()
+ num_potential_files_est = len(individual_post_data.get('attachments', [])) + (1 if individual_post_data.get('file') else 0)
+ grand_total_skipped_files += num_potential_files_est
+
+ if self.skip_current_file_flag and self.skip_current_file_flag.is_set():
+ self.skip_current_file_flag.clear()
+ self.logger(" Skip current file flag was processed and cleared by DownloadThread.")
+ self.msleep(10)
+ if was_process_cancelled:
break
-
- post_processing_worker = PostProcessorWorker(
- post_data=individual_post_data,
- download_root=self.output_dir,
- known_names=self.known_names,
- filter_character_list=self.filter_character_list_objects_initial,
- dynamic_character_filter_holder=self.dynamic_filter_holder,
- unwanted_keywords=self.unwanted_keywords,
- filter_mode=self.filter_mode,
- skip_zip=self.skip_zip, skip_rar=self.skip_rar,
- use_subfolders=self.use_subfolders, use_post_subfolders=self.use_post_subfolders,
- target_post_id_from_initial_url=self.initial_target_post_id,
- custom_folder_name=self.custom_folder_name,
- compress_images=self.compress_images, download_thumbnails=self.download_thumbnails,
- service=self.service, user_id=self.user_id,
- api_url_input=self.api_url_input,
- pause_event=self.pause_event,
- cancellation_event=self.cancellation_event,
- emitter=worker_signals_obj,
- downloaded_files=self.downloaded_files,
- downloaded_file_hashes=self.downloaded_file_hashes,
- downloaded_files_lock=self.downloaded_files_lock,
- downloaded_file_hashes_lock=self.downloaded_file_hashes_lock,
- skip_words_list=self.skip_words_list,
- skip_words_scope=self.skip_words_scope,
- show_external_links=self.show_external_links,
- extract_links_only=self.extract_links_only,
- num_file_threads=self.num_file_threads_for_worker,
- skip_current_file_flag=self.skip_current_file_flag,
- manga_mode_active=self.manga_mode_active,
- manga_filename_style=self.manga_filename_style,
- manga_date_prefix=self.manga_date_prefix,
- char_filter_scope=self.char_filter_scope,
- remove_from_filename_words_list=self.remove_from_filename_words_list,
- allow_multipart_download=self.allow_multipart_download,
- selected_cookie_file=self.selected_cookie_file,
- app_base_dir=self.app_base_dir,
- cookie_text=self.cookie_text,
- override_output_dir=self.override_output_dir,
- manga_global_file_counter_ref=self.manga_global_file_counter_ref,
- use_cookie=self.use_cookie,
- manga_date_file_counter_ref=self.manga_date_file_counter_ref,
- use_date_prefix_for_subfolder=self.use_date_prefix_for_subfolder,
- keep_in_post_duplicates=self.keep_in_post_duplicates,
- creator_download_folder_ignore_words=self.creator_download_folder_ignore_words,
- session_file_path=self.session_file_path,
- session_lock=self.session_lock,
- processed_ids_to_skip=self.processed_ids_to_skip, # <-- FIX: Pass the list to the worker
- text_only_scope=self.text_only_scope,
- text_export_format=self.text_export_format,
- single_pdf_mode=self.single_pdf_mode,
- project_root_dir=self.project_root_dir
- )
- try:
- (dl_count, skip_count, kept_originals_this_post,
- retryable_failures, permanent_failures,
- history_data, temp_filepath) = post_processing_worker.process()
-
- grand_total_downloaded_files += dl_count
- grand_total_skipped_files += skip_count
-
- if kept_originals_this_post:
- grand_list_of_kept_original_filenames.extend(kept_originals_this_post)
- if retryable_failures:
- self.retryable_file_failed_signal.emit(retryable_failures)
- if history_data:
- if len(self.history_candidates_buffer) < 8:
- self.post_processed_for_history_signal.emit(history_data)
- if permanent_failures:
- self.permanent_file_failed_signal.emit(permanent_failures)
-
- if self.single_pdf_mode and temp_filepath:
- self.progress_signal.emit(f"TEMP_FILE_PATH:{temp_filepath}")
-
- except Exception as proc_err:
- post_id_for_err = individual_post_data.get('id', 'N/A')
- self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}")
- traceback.print_exc()
- num_potential_files_est = len(individual_post_data.get('attachments', [])) + (1 if individual_post_data.get('file') else 0)
- grand_total_skipped_files += num_potential_files_est
-
- if self.skip_current_file_flag and self.skip_current_file_flag.is_set():
- self.skip_current_file_flag.clear()
- self.logger(" Skip current file flag was processed and cleared by DownloadThread.")
- self.msleep(10)
-
if not was_process_cancelled and not self.isInterruptionRequested():
self.logger("✅ All posts processed or end of content reached by DownloadThread.")
@@ -2019,6 +2040,7 @@ class DownloadThread (QThread ):
traceback.print_exc()
finally:
try:
+ # Disconnect signals
if worker_signals_obj:
worker_signals_obj.progress_signal.disconnect(self.progress_signal)
worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal)
@@ -2029,8 +2051,14 @@ class DownloadThread (QThread ):
except (TypeError, RuntimeError) as e:
self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}")
+ # Emit the final signal with all collected results
self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, self.isInterruptionRequested(), grand_list_of_kept_original_filenames)
+ def receive_add_character_result (self ,result ):
+ with QMutexLocker (self .prompt_mutex ):
+ self ._add_character_response =result
+ self .logger (f" (DownloadThread) Received character prompt response: {'Yes (added/confirmed)'if result else 'No (declined/failed)'}")
+
class InterruptedError(Exception):
"""Custom exception for handling cancellations gracefully."""
pass
\ No newline at end of file
diff --git a/src/ui/dialogs/MoreOptionsDialog.py b/src/ui/dialogs/MoreOptionsDialog.py
index 1f1ec5d..2136dd5 100644
--- a/src/ui/dialogs/MoreOptionsDialog.py
+++ b/src/ui/dialogs/MoreOptionsDialog.py
@@ -23,6 +23,7 @@ class MoreOptionsDialog(QDialog):
self.radio_button_group = QButtonGroup(self)
self.radio_content = QRadioButton("Description/Content")
self.radio_comments = QRadioButton("Comments")
+ self.radio_comments = QRadioButton("Comments (Not Working)")
self.radio_button_group.addButton(self.radio_content)
self.radio_button_group.addButton(self.radio_comments)
layout.addWidget(self.radio_content)
diff --git a/src/ui/flow_layout.py b/src/ui/flow_layout.py
deleted file mode 100644
index e492ad4..0000000
--- a/src/ui/flow_layout.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# src/ui/flow_layout.py
-
-from PyQt5.QtWidgets import QLayout, QSizePolicy, QStyle
-from PyQt5.QtCore import QPoint, QRect, QSize, Qt
-
-class FlowLayout(QLayout):
- """A custom layout that arranges widgets in a flow, wrapping as necessary."""
- def __init__(self, parent=None, margin=0, spacing=-1):
- super(FlowLayout, self).__init__(parent)
-
- if parent is not None:
- self.setContentsMargins(margin, margin, margin, margin)
-
- self.setSpacing(spacing)
- self.itemList = []
-
- def __del__(self):
- item = self.takeAt(0)
- while item:
- item = self.takeAt(0)
-
- def addItem(self, item):
- self.itemList.append(item)
-
- def count(self):
- return len(self.itemList)
-
- def itemAt(self, index):
- if 0 <= index < len(self.itemList):
- return self.itemList[index]
- return None
-
- def takeAt(self, index):
- if 0 <= index < len(self.itemList):
- return self.itemList.pop(index)
- return None
-
- def expandingDirections(self):
- return Qt.Orientations(Qt.Orientation(0))
-
- def hasHeightForWidth(self):
- return True
-
- def heightForWidth(self, width):
- return self._do_layout(QRect(0, 0, width, 0), True)
-
- def setGeometry(self, rect):
- super(FlowLayout, self).setGeometry(rect)
- self._do_layout(rect, False)
-
- def sizeHint(self):
- return self.minimumSize()
-
- def minimumSize(self):
- size = QSize()
- for item in self.itemList:
- size = size.expandedTo(item.minimumSize())
-
- margin, _, _, _ = self.getContentsMargins()
- size += QSize(2 * margin, 2 * margin)
- return size
-
- def _do_layout(self, rect, test_only):
- x = rect.x()
- y = rect.y()
- line_height = 0
-
- space_x = self.spacing()
- space_y = self.spacing()
- if self.layout() is not None:
- space_x = self.spacing()
- space_y = self.spacing()
- else:
- space_x = self.spacing()
- space_y = self.spacing()
-
-
- for item in self.itemList:
- wid = item.widget()
- next_x = x + item.sizeHint().width() + space_x
- if next_x - space_x > rect.right() and line_height > 0:
- x = rect.x()
- y = y + line_height + space_y
- next_x = x + item.sizeHint().width() + space_x
- line_height = 0
-
- if not test_only:
- item.setGeometry(QRect(QPoint(x, y), item.sizeHint()))
-
- x = next_x
- line_height = max(line_height, item.sizeHint().height())
-
- return y + line_height - rect.y()
\ No newline at end of file
diff --git a/src/ui/main_window.py b/src/ui/main_window.py
index 49958d6..9ea4716 100644
--- a/src/ui/main_window.py
+++ b/src/ui/main_window.py
@@ -26,7 +26,7 @@ from PyQt5.QtWidgets import (
QScrollArea, QListWidgetItem, QSizePolicy, QProgressBar, QAbstractItemView, QFrame,
QMainWindow, QAction, QGridLayout
)
-from PyQt5.QtCore import Qt, QThread, pyqtSignal, QObject, QTimer, QSettings, QStandardPaths, QUrl, QSize, QProcess, QMutex, QMutexLocker, QCoreApplication
+from PyQt5.QtCore import Qt, QThread, pyqtSignal, QObject, QTimer, QSettings, QStandardPaths, QUrl, QSize, QProcess, QMutex, QMutexLocker
# --- Local Application Imports ---
from ..services.drive_downloader import download_mega_file as drive_download_mega_file ,download_gdrive_file ,download_dropbox_file
@@ -995,10 +995,9 @@ class DownloaderApp (QWidget ):
f"Could not automatically restart the application: {e }\n\nPlease restart it manually.")
def init_ui(self):
- from .flow_layout import FlowLayout
-
self.main_splitter = QSplitter(Qt.Horizontal)
+ # --- Use a scroll area for the left panel for consistency ---
left_scroll_area = QScrollArea()
left_scroll_area.setWidgetResizable(True)
left_scroll_area.setFrameShape(QFrame.NoFrame)
@@ -1022,7 +1021,7 @@ class DownloaderApp (QWidget ):
url_input_layout.addWidget(self.url_label_widget)
self.link_input = QLineEdit()
self.link_input.setPlaceholderText("e.g., https://kemono.su/patreon/user/12345 or .../post/98765")
- self.link_input.textChanged.connect(self.update_custom_folder_visibility)
+ self.link_input.textChanged.connect(self.update_custom_folder_visibility) # Connects the custom folder logic
url_input_layout.addWidget(self.link_input, 1)
self.empty_popup_button = QPushButton("🎨")
self.empty_popup_button.setStyleSheet("padding: 4px 6px;")
@@ -1068,7 +1067,7 @@ class DownloaderApp (QWidget ):
dir_layout.addWidget(self.dir_button)
left_layout.addLayout(dir_layout)
- # --- Filters and Custom Folder Container ---
+ # --- Filters and Custom Folder Container (from old layout) ---
self.filters_and_custom_folder_container_widget = QWidget()
filters_and_custom_folder_layout = QHBoxLayout(self.filters_and_custom_folder_container_widget)
filters_and_custom_folder_layout.setContentsMargins(0, 5, 0, 0)
@@ -1090,6 +1089,7 @@ class DownloaderApp (QWidget ):
char_input_and_button_layout.addWidget(self.char_filter_scope_toggle_button, 1)
character_filter_v_layout.addLayout(char_input_and_button_layout)
+ # --- Custom Folder Widget Definition ---
self.custom_folder_widget = QWidget()
custom_folder_v_layout = QVBoxLayout(self.custom_folder_widget)
custom_folder_v_layout.setContentsMargins(0, 0, 0, 0)
@@ -1143,100 +1143,52 @@ class DownloaderApp (QWidget ):
file_filter_layout = QVBoxLayout()
file_filter_layout.setContentsMargins(0, 10, 0, 0)
file_filter_layout.addWidget(QLabel("Filter Files:"))
-
- radio_button_flow_layout = FlowLayout()
- radio_button_flow_layout.setSpacing(10)
-
+ radio_button_layout = QHBoxLayout()
+ radio_button_layout.setSpacing(10)
self.radio_group = QButtonGroup(self)
-
- group1_widget = QWidget()
- group1_layout = QHBoxLayout(group1_widget)
- group1_layout.setContentsMargins(0, 0, 0, 0)
- group1_layout.setSpacing(15)
self.radio_all = QRadioButton("All")
self.radio_images = QRadioButton("Images/GIFs")
self.radio_videos = QRadioButton("Videos")
- self.radio_group.addButton(self.radio_all)
- self.radio_group.addButton(self.radio_images)
- self.radio_group.addButton(self.radio_videos)
- group1_layout.addWidget(self.radio_all)
- group1_layout.addWidget(self.radio_images)
- group1_layout.addWidget(self.radio_videos)
- radio_button_flow_layout.addWidget(group1_widget)
-
- group2_widget = QWidget()
- group2_layout = QHBoxLayout(group2_widget)
- group2_layout.setContentsMargins(0, 0, 0, 0)
- group2_layout.setSpacing(15)
self.radio_only_archives = QRadioButton("📦 Only Archives")
self.radio_only_audio = QRadioButton("🎧 Only Audio")
self.radio_only_links = QRadioButton("🔗 Only Links")
- self.radio_group.addButton(self.radio_only_archives)
- self.radio_group.addButton(self.radio_only_audio)
- self.radio_group.addButton(self.radio_only_links)
- group2_layout.addWidget(self.radio_only_archives)
- group2_layout.addWidget(self.radio_only_audio)
- group2_layout.addWidget(self.radio_only_links)
- radio_button_flow_layout.addWidget(group2_widget)
-
- group3_widget = QWidget()
- group3_layout = QHBoxLayout(group3_widget)
- group3_layout.setContentsMargins(0, 0, 0, 0)
- group3_layout.setSpacing(15)
- self.radio_more = QRadioButton("More")
- self.favorite_mode_checkbox = QCheckBox("⭐ Favorite Mode")
- self.radio_group.addButton(self.radio_more)
- group3_layout.addWidget(self.radio_more)
- group3_layout.addWidget(self.favorite_mode_checkbox)
- radio_button_flow_layout.addWidget(group3_widget)
+ self.radio_more = QRadioButton("More")
self.radio_all.setChecked(True)
- file_filter_layout.addLayout(radio_button_flow_layout)
+ for btn in [self.radio_all, self.radio_images, self.radio_videos, self.radio_only_archives, self.radio_only_audio, self.radio_only_links, self.radio_more]:
+ self.radio_group.addButton(btn)
+ radio_button_layout.addWidget(btn)
+ self.favorite_mode_checkbox = QCheckBox()
+ self.favorite_mode_checkbox.setChecked(False)
+ radio_button_layout.addWidget(self.favorite_mode_checkbox)
+ radio_button_layout.addStretch(1)
+ file_filter_layout.addLayout(radio_button_layout)
left_layout.addLayout(file_filter_layout)
# --- Checkboxes Group ---
checkboxes_group_layout = QVBoxLayout()
checkboxes_group_layout.setSpacing(10)
-
- checkboxes_flow_layout = FlowLayout()
- checkboxes_flow_layout.setSpacing(10)
-
- groupA_widget = QWidget()
- groupA_layout = QHBoxLayout(groupA_widget)
- groupA_layout.setContentsMargins(0,0,0,0)
- groupA_layout.setSpacing(15)
+ row1_layout = QHBoxLayout()
+ row1_layout.setSpacing(10)
self.skip_zip_checkbox = QCheckBox("Skip .zip")
self.skip_zip_checkbox.setChecked(True)
+ row1_layout.addWidget(self.skip_zip_checkbox)
self.skip_rar_checkbox = QCheckBox("Skip .rar")
self.skip_rar_checkbox.setChecked(True)
- groupA_layout.addWidget(self.skip_zip_checkbox)
- groupA_layout.addWidget(self.skip_rar_checkbox)
- checkboxes_flow_layout.addWidget(groupA_widget)
-
- groupB_widget = QWidget()
- groupB_layout = QHBoxLayout(groupB_widget)
- groupB_layout.setContentsMargins(0,0,0,0)
- groupB_layout.setSpacing(15)
+ row1_layout.addWidget(self.skip_rar_checkbox)
self.download_thumbnails_checkbox = QCheckBox("Download Thumbnails Only")
+ row1_layout.addWidget(self.download_thumbnails_checkbox)
self.scan_content_images_checkbox = QCheckBox("Scan Content for Images")
self.scan_content_images_checkbox.setChecked(self.scan_content_images_setting)
- groupB_layout.addWidget(self.download_thumbnails_checkbox)
- groupB_layout.addWidget(self.scan_content_images_checkbox)
- checkboxes_flow_layout.addWidget(groupB_widget)
-
- groupC_widget = QWidget()
- groupC_layout = QHBoxLayout(groupC_widget)
- groupC_layout.setContentsMargins(0,0,0,0)
- groupC_layout.setSpacing(15)
+ row1_layout.addWidget(self.scan_content_images_checkbox)
self.compress_images_checkbox = QCheckBox("Compress to WebP")
self.compress_images_checkbox.setToolTip("Compress images > 1.5MB to WebP format (requires Pillow).")
+ row1_layout.addWidget(self.compress_images_checkbox)
self.keep_duplicates_checkbox = QCheckBox("Keep Duplicates")
self.keep_duplicates_checkbox.setToolTip("If checked, downloads all files from a post even if they have the same name.")
- groupC_layout.addWidget(self.compress_images_checkbox)
- groupC_layout.addWidget(self.keep_duplicates_checkbox)
- checkboxes_flow_layout.addWidget(groupC_widget)
-
- checkboxes_group_layout.addLayout(checkboxes_flow_layout)
+ row1_layout.addWidget(self.keep_duplicates_checkbox)
+ row1_layout.addStretch(1)
+ checkboxes_group_layout.addLayout(row1_layout)
# --- Advanced Settings ---
advanced_settings_label = QLabel("⚙️ Advanced Settings:")
@@ -1293,47 +1245,33 @@ class DownloaderApp (QWidget ):
btn_layout = QHBoxLayout(self.standard_action_buttons_widget)
btn_layout.setContentsMargins(0, 10, 0, 0)
btn_layout.setSpacing(10)
-
self.download_btn = QPushButton("⬇️ Start Download")
self.download_btn.setStyleSheet("padding: 4px 12px; font-weight: bold;")
self.download_btn.clicked.connect(self.start_download)
- self.download_btn.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred)
-
self.pause_btn = QPushButton("⏸️ Pause Download")
self.pause_btn.setEnabled(False)
self.pause_btn.setStyleSheet("padding: 4px 12px;")
self.pause_btn.clicked.connect(self._handle_pause_resume_action)
- self.pause_btn.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred)
-
self.cancel_btn = QPushButton("❌ Cancel & Reset UI")
self.cancel_btn.setEnabled(False)
self.cancel_btn.setStyleSheet("padding: 4px 12px;")
self.cancel_btn.clicked.connect(self.cancel_download_button_action)
- self.cancel_btn.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred)
-
self.error_btn = QPushButton("Error")
self.error_btn.setToolTip("View files skipped due to errors and optionally retry them.")
self.error_btn.setStyleSheet("padding: 4px 8px;")
self.error_btn.setEnabled(True)
- self.error_btn.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred)
-
btn_layout.addWidget(self.download_btn)
btn_layout.addWidget(self.pause_btn)
btn_layout.addWidget(self.cancel_btn)
btn_layout.addWidget(self.error_btn)
-
self.favorite_action_buttons_widget = QWidget()
favorite_buttons_layout = QHBoxLayout(self.favorite_action_buttons_widget)
self.favorite_mode_artists_button = QPushButton("🖼️ Favorite Artists")
self.favorite_mode_posts_button = QPushButton("📄 Favorite Posts")
self.favorite_scope_toggle_button = QPushButton()
- self.favorite_mode_artists_button.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred)
- self.favorite_mode_posts_button.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred)
- self.favorite_scope_toggle_button.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred)
favorite_buttons_layout.addWidget(self.favorite_mode_artists_button)
favorite_buttons_layout.addWidget(self.favorite_mode_posts_button)
favorite_buttons_layout.addWidget(self.favorite_scope_toggle_button)
-
self.bottom_action_buttons_stack = QStackedWidget()
self.bottom_action_buttons_stack.addWidget(self.standard_action_buttons_widget)
self.bottom_action_buttons_stack.addWidget(self.favorite_action_buttons_widget)
@@ -1355,7 +1293,7 @@ class DownloaderApp (QWidget ):
left_layout.addLayout(known_chars_label_layout)
self.character_list = QListWidget()
self.character_list.setSelectionMode(QListWidget.ExtendedSelection)
- self.character_list.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Expanding)
+ self.character_list.setMaximumHeight(150) # Set smaller height
left_layout.addWidget(self.character_list, 1)
char_manage_layout = QHBoxLayout()
char_manage_layout.setSpacing(10)
@@ -1392,11 +1330,11 @@ class DownloaderApp (QWidget ):
char_manage_layout.addWidget(self.known_names_help_button, 0)
char_manage_layout.addWidget(self.history_button, 0)
char_manage_layout.addWidget(self.future_settings_button, 0)
- char_manage_layout.addStretch()
left_layout.addLayout(char_manage_layout)
left_layout.addStretch(0)
# --- Right Panel (Logs) ---
+ # (This part of the layout is unchanged and remains correct)
log_title_layout = QHBoxLayout()
self.progress_log_label = QLabel("📜 Progress Log:")
log_title_layout.addWidget(self.progress_log_label)
@@ -1405,7 +1343,7 @@ class DownloaderApp (QWidget ):
self.link_search_input.setPlaceholderText("Search Links...")
self.link_search_input.setVisible(False)
log_title_layout.addWidget(self.link_search_input)
- self.link_search_button = QPushButton("?")
+ self.link_search_button = QPushButton("🔍")
self.link_search_button.setVisible(False)
self.link_search_button.setFixedWidth(30)
self.link_search_button.setStyleSheet("padding: 4px 4px;")
@@ -1485,9 +1423,10 @@ class DownloaderApp (QWidget ):
right_layout.addWidget(self.file_progress_label)
# --- Final Assembly ---
- self.main_splitter.addWidget(left_scroll_area)
+ self.main_splitter.addWidget(left_scroll_area) # Use the scroll area
self.main_splitter.addWidget(right_panel_widget)
- self.main_splitter.setSizes([800, 400])
+ self.main_splitter.setStretchFactor(0, 7)
+ self.main_splitter.setStretchFactor(1, 3)
top_level_layout = QHBoxLayout(self)
top_level_layout.setContentsMargins(0, 0, 0, 0)
top_level_layout.addWidget(self.main_splitter)
@@ -1507,7 +1446,7 @@ class DownloaderApp (QWidget ):
self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked())
if hasattr(self, 'radio_group') and self.radio_group.checkedButton():
self._handle_filter_mode_change(self.radio_group.checkedButton(), True)
- self.radio_group.buttonToggled.connect(self._handle_more_options_toggled)
+ self.radio_group.buttonToggled.connect(self._handle_more_options_toggled) # Add this line
self._update_manga_filename_style_button_text()
self._update_skip_scope_button_text()
@@ -2286,30 +2225,19 @@ class DownloaderApp (QWidget ):
if self .external_log_output :self .external_log_output .clear ()
self .log_signal .emit ("\n"+"="*40 +"\n🔗 External Links Log Disabled\n"+"="*40 )
+
def _handle_filter_mode_change(self, button, checked):
- if not button or not checked:
- return
-
- # --- FIX: Automatically disable multithreading for text-based modes ---
- if button == self.radio_more:
- if hasattr(self, 'use_multithreading_checkbox'):
- self.use_multithreading_checkbox.setChecked(False)
- self.use_multithreading_checkbox.setEnabled(False)
- self.log_signal.emit("ℹ️ Text extraction mode enabled. Multithreading has been disabled.")
- else:
- # Re-enable it for other modes, but respect the manga mode rule that might also disable it.
- if hasattr(self, 'use_multithreading_checkbox'):
- is_sequential_manga = (self.manga_mode_checkbox.isChecked() and
- (self.manga_filename_style == STYLE_DATE_BASED or
- self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING))
- if not is_sequential_manga:
- self.use_multithreading_checkbox.setEnabled(True)
- # --- END FIX ---
-
+ # If a button other than "More" is selected, reset the UI
if button != self.radio_more and checked:
self.radio_more.setText("More")
self.more_filter_scope = None
- self.single_pdf_setting = False
+ self.single_pdf_setting = False # Reset the setting
+ # Re-enable the checkboxes
+ if hasattr(self, 'use_multithreading_checkbox'): self.use_multithreading_checkbox.setEnabled(True)
+ if hasattr(self, 'use_subfolders_checkbox'): self.use_subfolders_checkbox.setEnabled(True)
+
+ if not button or not checked:
+ return
is_only_links =(button ==self .radio_only_links )
is_only_audio =(hasattr (self ,'radio_only_audio')and self .radio_only_audio is not None and button ==self .radio_only_audio )
@@ -2339,6 +2267,8 @@ class DownloaderApp (QWidget ):
file_download_mode_active =not is_only_links
+
+
if self .use_subfolders_checkbox :self .use_subfolders_checkbox .setEnabled (file_download_mode_active )
if self .skip_words_input :self .skip_words_input .setEnabled (file_download_mode_active )
if self .skip_scope_toggle_button :self .skip_scope_toggle_button .setEnabled (file_download_mode_active )
@@ -2366,17 +2296,22 @@ class DownloaderApp (QWidget ):
if not can_show_external_log_option :
self .external_links_checkbox .setChecked (False )
+
if is_only_links :
self .progress_log_label .setText ("📜 Extracted Links Log:")
if self .external_log_output :self .external_log_output .hide ()
if self .log_splitter :self .log_splitter .setSizes ([self .height (),0 ])
+
+
do_clear_log_in_filter_change =True
if self .mega_download_log_preserved_once and self .only_links_log_display_mode ==LOG_DISPLAY_DOWNLOAD_PROGRESS :
do_clear_log_in_filter_change =False
+
if self .main_log_output and do_clear_log_in_filter_change :
self .log_signal .emit ("INTERNAL: _handle_filter_mode_change - About to clear log.")
self .main_log_output .clear ()
self .log_signal .emit ("INTERNAL: _handle_filter_mode_change - Log cleared by _handle_filter_mode_change.")
+
if self .main_log_output :self .main_log_output .setMinimumHeight (0 )
self .log_signal .emit ("="*20 +" Mode changed to: Only Links "+"="*20 )
self ._try_process_next_external_link ()
@@ -2395,8 +2330,8 @@ class DownloaderApp (QWidget ):
else :
self .progress_log_label .setText (self ._tr ("progress_log_label_text","📜 Progress Log:"))
self .update_external_links_setting (self .external_links_checkbox .isChecked ()if self .external_links_checkbox else False )
- if button != self.radio_more:
- self .log_signal .emit (f"="*20 +f" Mode changed to: {button .text ()} "+"="*20 )
+ self .log_signal .emit (f"="*20 +f" Mode changed to: {button .text ()} "+"="*20 )
+
if is_only_links :
self ._filter_links_log ()
@@ -2427,6 +2362,7 @@ class DownloaderApp (QWidget ):
self .update_custom_folder_visibility ()
self .update_ui_for_manga_mode (self .manga_mode_checkbox .isChecked ()if self .manga_mode_checkbox else False )
+
def _filter_links_log (self ):
if not (self .radio_only_links and self .radio_only_links .isChecked ()):return
@@ -2731,43 +2667,69 @@ class DownloaderApp (QWidget ):
def _handle_more_options_toggled(self, button, checked):
"""Shows the MoreOptionsDialog when the 'More' radio button is selected."""
+
+ # This block handles when the user clicks ON the "More" button.
if button == self.radio_more and checked:
current_scope = self.more_filter_scope or MoreOptionsDialog.SCOPE_CONTENT
current_format = self.text_export_format or 'pdf'
-
- dialog = MoreOptionsDialog(self, current_scope=current_scope, current_format=current_format, single_pdf_checked=self.single_pdf_setting)
+
+ dialog = MoreOptionsDialog(
+ self,
+ current_scope=current_scope,
+ current_format=current_format,
+ single_pdf_checked=self.single_pdf_setting
+ )
if dialog.exec_() == QDialog.Accepted:
self.more_filter_scope = dialog.get_selected_scope()
self.text_export_format = dialog.get_selected_format()
self.single_pdf_setting = dialog.get_single_pdf_state()
+ # Define the variable based on the dialog's result
+ is_any_pdf_mode = (self.text_export_format == 'pdf')
+
+ # Update the radio button text to reflect the choice
scope_text = "Comments" if self.more_filter_scope == MoreOptionsDialog.SCOPE_COMMENTS else "Description"
-
format_display = f" ({self.text_export_format.upper()})"
if self.single_pdf_setting:
format_display = " (Single PDF)"
- # --- NEW: Disable checkboxes if Single PDF is active ---
- if hasattr(self, 'use_multithreading_checkbox'):
- self.use_multithreading_checkbox.setChecked(False)
- self.use_multithreading_checkbox.setEnabled(False)
- if hasattr(self, 'use_subfolders_checkbox'):
- self.use_subfolders_checkbox.setChecked(False)
- self.use_subfolders_checkbox.setEnabled(False)
- else:
- # --- NEW: Re-enable checkboxes if Single PDF is not active ---
- if hasattr(self, 'use_multithreading_checkbox'): self.use_multithreading_checkbox.setEnabled(True)
- if hasattr(self, 'use_subfolders_checkbox'): self.use_subfolders_checkbox.setEnabled(True)
-
-
self.radio_more.setText(f"{scope_text}{format_display}")
-
+
+ # --- Logic to Disable/Enable Checkboxes ---
+ # Disable multithreading for ANY PDF export
+ if hasattr(self, 'use_multithreading_checkbox'):
+ self.use_multithreading_checkbox.setEnabled(not is_any_pdf_mode)
+ if is_any_pdf_mode:
+ self.use_multithreading_checkbox.setChecked(False)
+ self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked())
+
+ # Also disable subfolders for the "Single PDF" case, as it doesn't apply
+ if hasattr(self, 'use_subfolders_checkbox'):
+ self.use_subfolders_checkbox.setEnabled(not self.single_pdf_setting)
+ if self.single_pdf_setting:
+ self.use_subfolders_checkbox.setChecked(False)
+
self.log_signal.emit(f"ℹ️ 'More' filter scope set to: {scope_text}, Format: {self.text_export_format.upper()}")
self.log_signal.emit(f"ℹ️ Single PDF setting: {'Enabled' if self.single_pdf_setting else 'Disabled'}")
+ if is_any_pdf_mode:
+ self.log_signal.emit("ℹ️ Multithreading automatically disabled for PDF export.")
else:
+ # User cancelled the dialog, so revert to the 'All' option.
self.log_signal.emit("ℹ️ 'More' filter selection cancelled. Reverting to 'All'.")
self.radio_all.setChecked(True)
+ # This block handles when the user switches AWAY from "More" to another option.
+ elif button != self.radio_more and checked:
+ self.radio_more.setText("More")
+ self.more_filter_scope = None
+ self.single_pdf_setting = False
+ # Re-enable the checkboxes when switching to any non-PDF mode
+ if hasattr(self, 'use_multithreading_checkbox'):
+ self.use_multithreading_checkbox.setEnabled(True)
+ self._update_multithreading_for_date_mode()
+ if hasattr(self, 'use_subfolders_checkbox'):
+ self.use_subfolders_checkbox.setEnabled(True)
+
def delete_selected_character (self ):
global KNOWN_NAMES
selected_items =self .character_list .selectedItems ()
@@ -3115,392 +3077,761 @@ class DownloaderApp (QWidget ):
if total_posts >0 or processed_posts >0 :
self .file_progress_label .setText ("")
- def start_download(self, direct_api_url=None, override_output_dir=None, is_restore=False):
- global KNOWN_NAMES, BackendDownloadThread, PostProcessorWorker, extract_post_info, clean_folder_name, MAX_FILE_THREADS_PER_POST_OR_WORKER
- self._clear_stale_temp_files()
- self.session_temp_files = []
+ def start_download (self ,direct_api_url =None ,override_output_dir =None, is_restore=False ):
+ global KNOWN_NAMES ,BackendDownloadThread ,PostProcessorWorker ,extract_post_info ,clean_folder_name ,MAX_FILE_THREADS_PER_POST_OR_WORKER
- if self._is_download_active():
+ self._clear_stale_temp_files()
+ self.session_temp_files = []
+
+ if self ._is_download_active ():
QMessageBox.warning(self, "Busy", "A download is already in progress.")
- return False
+ return False
- if not (self.favorite_download_queue and not self.is_processing_favorites_queue):
- self.main_log_output.clear()
- if not direct_api_url and self.favorite_download_queue and not self.is_processing_favorites_queue:
- self.log_signal.emit(f"ℹ️ Detected {len(self.favorite_download_queue)} item(s) in the queue. Starting processing...")
- self.cancellation_message_logged_this_session = False
- self._process_next_favorite_download()
- return True
- if is_restore and self.interrupted_session_data:
- api_url = self.interrupted_session_data.get("ui_settings", {}).get("api_url")
- else:
- api_url = direct_api_url if direct_api_url else self.link_input.text().strip()
+ if not direct_api_url and self .favorite_download_queue and not self .is_processing_favorites_queue :
+ self .log_signal .emit (f"ℹ️ Detected {len (self .favorite_download_queue )} item(s) in the queue. Starting processing...")
+ self .cancellation_message_logged_this_session =False
+ self ._process_next_favorite_download ()
+ return True
if not is_restore and self.interrupted_session_data:
+ self.log_signal.emit("ℹ️ New download started. Discarding previous interrupted session.")
self._clear_session_file()
self.interrupted_session_data = None
self.is_restore_pending = False
+ api_url =direct_api_url if direct_api_url else self .link_input .text ().strip ()
+ self .download_history_candidates .clear ()
+ self._update_button_states_and_connections() # Ensure buttons are updated to active state
- self.download_history_candidates.clear()
- self._update_button_states_and_connections()
- if self.favorite_mode_checkbox and self.favorite_mode_checkbox.isChecked() and not direct_api_url and not api_url:
- QMessageBox.information(self, "Favorite Mode Active", "Favorite Mode is active. Please use the 'Favorite Artists' or 'Favorite Posts' buttons to start downloads in this mode, or uncheck 'Favorite Mode' to use the URL input.")
- self.set_ui_enabled(True)
- return False
+ if self .favorite_mode_checkbox and self .favorite_mode_checkbox .isChecked ()and not direct_api_url and not api_url :
+ QMessageBox .information (self ,"Favorite Mode Active",
+ "Favorite Mode is active. Please use the 'Favorite Artists' or 'Favorite Posts' buttons to start downloads in this mode, or uncheck 'Favorite Mode' to use the URL input.")
+ self .set_ui_enabled (True )
+ return False
- main_ui_download_dir = self.dir_input.text().strip()
+ main_ui_download_dir =self .dir_input .text ().strip ()
- if not api_url and not self.favorite_download_queue:
- QMessageBox.critical(self, "Input Error", "URL is required.")
- return False
- elif not api_url and self.favorite_download_queue:
- self.log_signal.emit("ℹ️ URL input is empty, but queue has items. Processing queue...")
- self.cancellation_message_logged_this_session = False
- self._process_next_favorite_download()
- return True
+ if not api_url and not self .favorite_download_queue :
+ QMessageBox .critical (self ,"Input Error","URL is required.")
+ return False
+ elif not api_url and self .favorite_download_queue :
+ self .log_signal .emit ("ℹ️ URL input is empty, but queue has items. Processing queue...")
+ self .cancellation_message_logged_this_session =False
+ self ._process_next_favorite_download ()
+ return True
- self.cancellation_message_logged_this_session = False
- use_subfolders = self.use_subfolders_checkbox.isChecked()
- use_post_subfolders = self.use_subfolder_per_post_checkbox.isChecked()
- compress_images = self.compress_images_checkbox.isChecked()
- download_thumbnails = self.download_thumbnails_checkbox.isChecked()
- use_multithreading_enabled_by_checkbox = self.use_multithreading_checkbox.isChecked()
-
- try:
- num_threads_from_gui = int(self.thread_count_input.text().strip())
- if num_threads_from_gui < 1: num_threads_from_gui = 1
- except ValueError:
- QMessageBox.critical(self, "Thread Count Error", "Invalid number of threads. Please enter a positive number.")
- return False
+ self .cancellation_message_logged_this_session =False
+ use_subfolders =self .use_subfolders_checkbox .isChecked ()
+ use_post_subfolders =self .use_subfolder_per_post_checkbox .isChecked ()
+ compress_images =self .compress_images_checkbox .isChecked ()
+ download_thumbnails =self .download_thumbnails_checkbox .isChecked ()
- if use_multithreading_enabled_by_checkbox:
- if num_threads_from_gui > MAX_THREADS:
- hard_warning_msg = (f"You've entered a thread count ({num_threads_from_gui}) exceeding the maximum of {MAX_THREADS}.\n\n"
- "Using an extremely high number of threads can lead to:\n"
- " - Diminishing returns (no significant speed increase).\n"
- " - Increased system instability or application crashes.\n"
- " - Higher chance of being rate-limited or temporarily IP-banned by the server.\n\n"
- f"The thread count has been automatically capped to {MAX_THREADS} for stability.")
- QMessageBox.warning(self, "High Thread Count Warning", hard_warning_msg)
- num_threads_from_gui = MAX_THREADS
- self.thread_count_input.setText(str(MAX_THREADS))
- self.log_signal.emit(f"⚠️ User attempted {num_threads_from_gui} threads, capped to {MAX_THREADS}.")
- if SOFT_WARNING_THREAD_THRESHOLD < num_threads_from_gui <= MAX_THREADS:
- soft_warning_msg_box = QMessageBox(self)
- soft_warning_msg_box.setIcon(QMessageBox.Question)
- soft_warning_msg_box.setWindowTitle("Thread Count Advisory")
- soft_warning_msg_box.setText(f"You've set the thread count to {num_threads_from_gui}.\n\n"
- "While this is within the allowed limit, using a high number of threads (typically above 40-50) can sometimes lead to:\n"
- " - Increased errors or failed file downloads.\n"
- " - Connection issues with the server.\n"
- " - Higher system resource usage.\n\n"
- "For most users and connections, 10-30 threads provide a good balance.\n\n"
- f"Do you want to proceed with {num_threads_from_gui} threads, or would you like to change the value?")
- proceed_button = soft_warning_msg_box.addButton("Proceed Anyway", QMessageBox.AcceptRole)
- change_button = soft_warning_msg_box.addButton("Change Thread Value", QMessageBox.RejectRole)
- soft_warning_msg_box.setDefaultButton(proceed_button)
- soft_warning_msg_box.setEscapeButton(change_button)
- soft_warning_msg_box.exec_()
- if soft_warning_msg_box.clickedButton() == change_button:
- self.log_signal.emit(f"ℹ️ User opted to change thread count from {num_threads_from_gui} after advisory.")
- self.thread_count_input.setFocus()
- self.thread_count_input.selectAll()
- return False
+ use_multithreading_enabled_by_checkbox =self .use_multithreading_checkbox .isChecked ()
+ try :
+ num_threads_from_gui =int (self .thread_count_input .text ().strip ())
+ if num_threads_from_gui <1 :num_threads_from_gui =1
+ except ValueError :
+ QMessageBox .critical (self ,"Thread Count Error","Invalid number of threads. Please enter a positive number.")
+ return False
- raw_skip_words = self.skip_words_input.text().strip()
- skip_words_list = [word.strip().lower() for word in raw_skip_words.split(',') if word.strip()]
- raw_remove_filename_words = self.remove_from_filename_input.text().strip() if hasattr(self, 'remove_from_filename_input') else ""
- allow_multipart = self.allow_multipart_download_setting
- remove_from_filename_words_list = [word.strip() for word in raw_remove_filename_words.split(',') if word.strip()]
- scan_content_for_images = self.scan_content_images_checkbox.isChecked() if hasattr(self, 'scan_content_images_checkbox') else False
- use_cookie_from_checkbox = self.use_cookie_checkbox.isChecked() if hasattr(self, 'use_cookie_checkbox') else False
- app_base_dir_for_cookies = os.path.dirname(self.config_file)
- cookie_text_from_input = self.cookie_text_input.text().strip() if hasattr(self, 'cookie_text_input') and use_cookie_from_checkbox else ""
- use_cookie_for_this_run = use_cookie_from_checkbox
- selected_cookie_file_path_for_backend = self.selected_cookie_filepath if use_cookie_from_checkbox and self.selected_cookie_filepath else None
+ if use_multithreading_enabled_by_checkbox :
+ if num_threads_from_gui >MAX_THREADS :
+ hard_warning_msg =(
+ f"You've entered a thread count ({num_threads_from_gui }) exceeding the maximum of {MAX_THREADS }.\n\n"
+ "Using an extremely high number of threads can lead to:\n"
+ " - Diminishing returns (no significant speed increase).\n"
+ " - Increased system instability or application crashes.\n"
+ " - Higher chance of being rate-limited or temporarily IP-banned by the server.\n\n"
+ f"The thread count has been automatically capped to {MAX_THREADS } for stability."
+ )
+ QMessageBox .warning (self ,"High Thread Count Warning",hard_warning_msg )
+ num_threads_from_gui =MAX_THREADS
+ self .thread_count_input .setText (str (MAX_THREADS ))
+ self .log_signal .emit (f"⚠️ User attempted {num_threads_from_gui } threads, capped to {MAX_THREADS }.")
+ if SOFT_WARNING_THREAD_THRESHOLD
tags found. Falling back to basic HTML cleaning for the whole block.")
+ text_with_br = re.sub(r'
', '\n', raw_text_content, flags=re.IGNORECASE)
+ cleaned_text = re.sub(r'<.*?>', '', text_with_br)
+ else:
+ cleaned_paragraphs_list = []
+ for p_content in html_paragraphs:
+ p_with_br = re.sub(r'
', '\n', p_content, flags=re.IGNORECASE)
+ p_cleaned = re.sub(r'<.*?>', '', p_with_br)
+ p_final = html.unescape(p_cleaned).strip()
+ if p_final:
+ cleaned_paragraphs_list.append(p_final)
+ cleaned_text = '\n\n'.join(cleaned_paragraphs_list)
+ cleaned_text = cleaned_text.replace('…', '...')
+
+ # --- Logic for Single PDF Mode (File-based) ---
+ if self.single_pdf_mode:
+ if not cleaned_text:
+ return 0, 0, [], [], [], None, None
+
+ content_data = {
+ 'title': post_title,
+ 'content': cleaned_text,
+ 'published': self.post.get('published') or self.post.get('added')
+ }
+ temp_dir = os.path.join(self.app_base_dir, "appdata")
+ os.makedirs(temp_dir, exist_ok=True)
+ temp_filename = f"tmp_{post_id}_{uuid.uuid4().hex[:8]}.json"
+ temp_filepath = os.path.join(temp_dir, temp_filename)
+
+ try:
+ with open(temp_filepath, 'w', encoding='utf-8') as f:
+ json.dump(content_data, f, indent=2)
+ self.logger(f" Saved temporary text for '{post_title}' for single PDF compilation.")
+ return 0, 0, [], [], [], None, temp_filepath
+ except Exception as e:
+ self.logger(f" ❌ Failed to write temporary file for single PDF: {e}")
+ return 0, 0, [], [], [], None, None
+
+ # --- Logic for Individual File Saving ---
+ else:
+ file_extension = self.text_export_format
+ txt_filename = clean_filename(post_title) + f".{file_extension}"
+ final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename)
+
+ try:
+ os.makedirs(determined_post_save_path_for_history, exist_ok=True)
+ base, ext = os.path.splitext(final_save_path)
+ counter = 1
+ while os.path.exists(final_save_path):
+ final_save_path = f"{base}_{counter}{ext}"
+ counter += 1
+
+ if file_extension == 'pdf':
+ if FPDF:
+ self.logger(f" Converting to PDF...")
+ pdf = PDF()
+ font_path = ""
+ if self.project_root_dir:
+ font_path = os.path.join(self.project_root_dir, 'data', 'dejavu-sans', 'DejaVuSans.ttf')
+ try:
+ if not os.path.exists(font_path): raise RuntimeError(f"Font file not found: {font_path}")
+ pdf.add_font('DejaVu', '', font_path, uni=True)
+ pdf.set_font('DejaVu', '', 12)
+ except Exception as font_error:
+ self.logger(f" ⚠️ Could not load DejaVu font: {font_error}. Falling back to Arial.")
+ pdf.set_font('Arial', '', 12)
+ pdf.add_page()
+ pdf.multi_cell(0, 5, cleaned_text)
+ pdf.output(final_save_path)
+ else:
+ self.logger(f" ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.")
+ final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
+ with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
+
+ elif file_extension == 'docx':
+ if Document:
+ self.logger(f" Converting to DOCX...")
+ document = Document()
+ document.add_paragraph(cleaned_text)
+ document.save(final_save_path)
+ else:
+ self.logger(f" ⚠️ Cannot create DOCX: 'python-docx' library not installed. Saving as .txt.")
+ final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
+ with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
+
+ else: # Default to TXT
+ with open(final_save_path, 'w', encoding='utf-8') as f:
+ f.write(cleaned_text)
+
+ self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'")
+ return 1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None
+ except Exception as e:
+ self.logger(f" ❌ Critical error saving text file '{txt_filename}': {e}")
+ return 0, num_potential_files_in_post, [], [], [], None, None
+
+ if not self .extract_links_only and self .use_subfolders and self .skip_words_list :
+ if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
+ for folder_name_to_check in base_folder_names_for_post_content :
+ if not folder_name_to_check :continue
+ if any (skip_word .lower ()in folder_name_to_check .lower ()for skip_word in self .skip_words_list ):
+ matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word")
+ self .logger (f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.")
+ return 0 ,num_potential_files_in_post ,[],[],[],None, None
+ if (self .show_external_links or self .extract_links_only )and post_content_html :
+ if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
+ try :
+ mega_key_pattern =re .compile (r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b')
+ unique_links_data ={}
+ for match in link_pattern .finditer (post_content_html ):
+ link_url =match .group (1 ).strip ()
+ link_url =html .unescape (link_url )
+ link_inner_text =match .group (2 )
+ if not any (ext in link_url .lower ()for ext in ['.css','.js','.ico','.xml','.svg'])and not link_url .startswith ('javascript:')and link_url not in unique_links_data :
+ clean_link_text =re .sub (r'<.*?>','',link_inner_text )
+ clean_link_text =html .unescape (clean_link_text ).strip ()
+ display_text =clean_link_text if clean_link_text else "[Link]"
+ unique_links_data [link_url ]=display_text
+ links_emitted_count =0
+ scraped_platforms ={'kemono','coomer','patreon'}
+ for link_url ,link_text in unique_links_data .items ():
+ platform =get_link_platform (link_url )
+ decryption_key_found =""
+ if platform =='mega':
+ parsed_mega_url =urlparse (link_url )
+ if parsed_mega_url .fragment :
+ potential_key_from_fragment =parsed_mega_url .fragment .split ('!')[-1 ]
+ if mega_key_pattern .fullmatch (potential_key_from_fragment ):
+ decryption_key_found =potential_key_from_fragment
+
+ if not decryption_key_found and link_text :
+ key_match_in_text =mega_key_pattern .search (link_text )
+ if key_match_in_text :
+ decryption_key_found =key_match_in_text .group (1 )
+ if not decryption_key_found and self .extract_links_only and post_content_html :
+ key_match_in_content =mega_key_pattern .search (strip_html_tags (post_content_html ))
+ if key_match_in_content :
+ decryption_key_found =key_match_in_content .group (1 )
+ if platform not in scraped_platforms :
+ self ._emit_signal ('external_link',post_title ,link_text ,link_url ,platform ,decryption_key_found or "")
+ links_emitted_count +=1
+ if links_emitted_count >0 :self .logger (f" 🔗 Found {links_emitted_count } potential external link(s) in post content.")
+ except Exception as e :self .logger (f"⚠️ Error parsing post content for links: {e }\n{traceback .format_exc (limit =2 )}")
+ if self .extract_links_only :
+ self .logger (f" Extract Links Only mode: Finished processing post {post_id } for links.")
+ return 0 ,0 ,[],[],[],None
+ all_files_from_post_api =[]
+ api_file_domain =urlparse (self .api_url_input ).netloc
+ if not api_file_domain or not any (d in api_file_domain .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']):
+ api_file_domain ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party"
+ if post_main_file_info and isinstance (post_main_file_info ,dict )and post_main_file_info .get ('path'):
+ file_path =post_main_file_info ['path'].lstrip ('/')
+ original_api_name =post_main_file_info .get ('name')or os .path .basename (file_path )
+ if original_api_name :
+ all_files_from_post_api .append ({
+ 'url':f"https://{api_file_domain }{file_path }"if file_path .startswith ('/')else f"https://{api_file_domain }/data/{file_path }",
+ 'name':original_api_name ,
+ '_original_name_for_log':original_api_name ,
+ '_is_thumbnail':is_image (original_api_name )
+ })
+ else :self .logger (f" ⚠️ Skipping main file for post {post_id }: Missing name (Path: {file_path })")
+ for idx ,att_info in enumerate (post_attachments ):
+ if isinstance (att_info ,dict )and att_info .get ('path'):
+ att_path =att_info ['path'].lstrip ('/')
+ original_api_att_name =att_info .get ('name')or os .path .basename (att_path )
+ if original_api_att_name :
+ all_files_from_post_api .append ({
+ 'url':f"https://{api_file_domain }{att_path }"if att_path .startswith ('/')else f"https://{api_file_domain }/data/{att_path }",
+ 'name':original_api_att_name ,
+ '_original_name_for_log':original_api_att_name ,
+ '_is_thumbnail':is_image (original_api_att_name )
+ })
+ else :self .logger (f" ⚠️ Skipping attachment {idx +1 } for post {post_id }: Missing name (Path: {att_path })")
+ else :self .logger (f" ⚠️ Skipping invalid attachment {idx +1 } for post {post_id }: {str (att_info )[:100 ]}")
+ if self .scan_content_for_images and post_content_html and not self .extract_links_only :
+ self .logger (f" Scanning post content for additional image URLs (Post ID: {post_id })...")
+ parsed_input_url =urlparse (self .api_url_input )
+ base_url_for_relative_paths =f"{parsed_input_url .scheme }://{parsed_input_url .netloc }"
+ img_ext_pattern ="|".join (ext .lstrip ('.')for ext in IMAGE_EXTENSIONS )
+ direct_url_pattern_str =r"""(?i)\b(https?://[^\s"'<>\[\]\{\}\|\^\\^~\[\]`]+\.(?:"""+img_ext_pattern +r"""))\b"""
+ img_tag_src_pattern_str =r"""]*?src\s*=\s*["']([^"']+)["']"""
+ found_image_sources =set ()
+ for direct_url_match in re .finditer (direct_url_pattern_str ,post_content_html ):
+ found_image_sources .add (direct_url_match .group (1 ))
+ for img_tag_match in re .finditer (img_tag_src_pattern_str ,post_content_html ,re .IGNORECASE ):
+ src_attr =img_tag_match .group (1 ).strip ()
+ src_attr =html .unescape (src_attr )
+ if not src_attr :continue
+ resolved_src_url =""
+ if src_attr .startswith (('http://','https://')):
+ resolved_src_url =src_attr
+ elif src_attr .startswith ('//'):
+ resolved_src_url =f"{parsed_input_url .scheme }:{src_attr }"
+ elif src_attr .startswith ('/'):
+ resolved_src_url =f"{base_url_for_relative_paths }{src_attr }"
+ if resolved_src_url :
+ parsed_resolved_url =urlparse (resolved_src_url )
+ if any (parsed_resolved_url .path .lower ().endswith (ext )for ext in IMAGE_EXTENSIONS ):
+ found_image_sources .add (resolved_src_url )
+ if found_image_sources :
+ self .logger (f" Found {len (found_image_sources )} potential image URLs/sources in content.")
+ existing_urls_in_api_list ={f_info ['url']for f_info in all_files_from_post_api }
+ for found_url in found_image_sources :
+ if self .check_cancel ():break
+ if found_url in existing_urls_in_api_list :
+ self .logger (f" Skipping URL from content (already in API list or previously added from content): {found_url [:70 ]}...")
+ continue
+ try :
+ parsed_found_url =urlparse (found_url )
+ url_filename =os .path .basename (parsed_found_url .path )
+ if not url_filename or not is_image (url_filename ):
+ self .logger (f" Skipping URL from content (no filename part or not an image extension): {found_url [:70 ]}...")
+ continue
+ self .logger (f" Adding image from content: {url_filename } (URL: {found_url [:70 ]}...)")
+ all_files_from_post_api .append ({
+ 'url':found_url ,
+ 'name':url_filename ,
+ '_original_name_for_log':url_filename ,
+ '_is_thumbnail':False ,
+ '_from_content_scan':True
+ })
+ existing_urls_in_api_list .add (found_url )
+ except Exception as e_url_parse :
+ self .logger (f" Error processing URL from content '{found_url [:70 ]}...': {e_url_parse }")
+ else :
+ self .logger (f" No additional image URLs found in post content scan for post {post_id }.")
+ if self .download_thumbnails :
+ if self .scan_content_for_images :
+ self .logger (f" Mode: 'Download Thumbnails Only' + 'Scan Content for Images' active. Prioritizing images from content scan for post {post_id }.")
+ all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_from_content_scan')]
+ if not all_files_from_post_api :
+ self .logger (f" -> No images found via content scan for post {post_id } in this combined mode.")
+ return 0 ,0 ,[],[],[],None
+ else :
+ self .logger (f" Mode: 'Download Thumbnails Only' active. Filtering for API thumbnails for post {post_id }.")
+ all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_is_thumbnail')]
+ if not all_files_from_post_api :
+ self .logger (f" -> No API image thumbnails found for post {post_id } in thumbnail-only mode.")
+ return 0 ,0 ,[],[],[],None
+ if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED :
+ def natural_sort_key_for_files (file_api_info ):
+ name =file_api_info .get ('_original_name_for_log','').lower ()
+ return [int (text )if text .isdigit ()else text for text in re .split ('([0-9]+)',name )]
+ all_files_from_post_api .sort (key =natural_sort_key_for_files )
+ self .logger (f" Manga Date Mode: Sorted {len (all_files_from_post_api )} files within post {post_id } by original name for sequential numbering.")
+ if not all_files_from_post_api :
+ self .logger (f" No files found to download for post {post_id }.")
+ return 0 ,0 ,[],[],[],None
+ files_to_download_info_list =[]
+ processed_original_filenames_in_this_post =set ()
+
+ if self.keep_in_post_duplicates:
+ # If we keep duplicates, just add every file to the list to be processed.
+ # The downstream hash check and rename-on-collision logic will handle them.
+ files_to_download_info_list.extend(all_files_from_post_api)
+ self.logger(f" ℹ️ 'Keep Duplicates' is on. All {len(all_files_from_post_api)} files from post will be processed.")
+ else:
+ # This is the original logic that skips duplicates by name within a post.
+ for file_info in all_files_from_post_api:
+ current_api_original_filename = file_info.get('_original_name_for_log')
+ if current_api_original_filename in processed_original_filenames_in_this_post:
+ self.logger(f" -> Skip Duplicate Original Name (within post {post_id}): '{current_api_original_filename}' already processed/listed for this post.")
+ total_skipped_this_post += 1
+ else:
+ files_to_download_info_list.append(file_info)
+ if current_api_original_filename:
+ processed_original_filenames_in_this_post.add(current_api_original_filename)
+
+ if not files_to_download_info_list:
+
+ self .logger (f" All files for post {post_id } were duplicate original names or skipped earlier.")
+ return 0 ,total_skipped_this_post ,[],[],[],None
+
+ self .logger (f" Identified {len (files_to_download_info_list )} unique original file(s) for potential download from post {post_id }.")
+ with ThreadPoolExecutor (max_workers =self .num_file_threads ,thread_name_prefix =f'P{post_id }File_')as file_pool :
+ futures_list =[]
+ for file_idx ,file_info_to_dl in enumerate (files_to_download_info_list ):
+ if self ._check_pause (f"File processing loop for post {post_id }, file {file_idx }"):break
+ if self .check_cancel ():break
+ current_api_original_filename =file_info_to_dl .get ('_original_name_for_log')
+ file_is_candidate_by_char_filter_scope =False
+ char_filter_info_that_matched_file =None
+ if not current_character_filters :
+ file_is_candidate_by_char_filter_scope =True
+ else :
+ if self .char_filter_scope ==CHAR_SCOPE_FILES :
+ for filter_item_obj in current_character_filters :
+ terms_to_check_for_file =list (filter_item_obj ["aliases"])
+ if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_for_file :
+ terms_to_check_for_file .append (filter_item_obj ["name"])
+ unique_terms_for_file_check =list (set (terms_to_check_for_file ))
+ for term_to_match in unique_terms_for_file_check :
+ if is_filename_match_for_character (current_api_original_filename ,term_to_match ):
+ file_is_candidate_by_char_filter_scope =True
+ char_filter_info_that_matched_file =filter_item_obj
+ self .logger (f" File '{current_api_original_filename }' matches char filter term '{term_to_match }' (from '{filter_item_obj ['name']}'). Scope: Files.")
+ break
+ if file_is_candidate_by_char_filter_scope :break
+ elif self .char_filter_scope ==CHAR_SCOPE_TITLE :
+ if post_is_candidate_by_title_char_match :
+ file_is_candidate_by_char_filter_scope =True
+ char_filter_info_that_matched_file =char_filter_that_matched_title
+ self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Title.")
+ elif self .char_filter_scope ==CHAR_SCOPE_BOTH :
+ if post_is_candidate_by_title_char_match :
+ file_is_candidate_by_char_filter_scope =True
+ char_filter_info_that_matched_file =char_filter_that_matched_title
+ self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Both (Title part).")
+ else :
+ for filter_item_obj_both_file in current_character_filters :
+ terms_to_check_for_file_both =list (filter_item_obj_both_file ["aliases"])
+ if filter_item_obj_both_file ["is_group"]and filter_item_obj_both_file ["name"]not in terms_to_check_for_file_both :
+ terms_to_check_for_file_both .append (filter_item_obj_both_file ["name"])
+ unique_terms_for_file_both_check =list (set (terms_to_check_for_file_both ))
+ for term_to_match in unique_terms_for_file_both_check :
+ if is_filename_match_for_character (current_api_original_filename ,term_to_match ):
+ file_is_candidate_by_char_filter_scope =True
+ char_filter_info_that_matched_file =filter_item_obj_both_file
+ self .logger (f" File '{current_api_original_filename }' matches char filter term '{term_to_match }' (from '{filter_item_obj ['name']}'). Scope: Both (File part).")
+ break
+ if file_is_candidate_by_char_filter_scope :break
+ elif self .char_filter_scope ==CHAR_SCOPE_COMMENTS :
+ if post_is_candidate_by_file_char_match_in_comment_scope :
+ file_is_candidate_by_char_filter_scope =True
+ char_filter_info_that_matched_file =char_filter_that_matched_file_in_comment_scope
+ self .logger (f" File '{current_api_original_filename }' is candidate because a file in this post matched char filter (Overall Scope: Comments).")
+ elif post_is_candidate_by_comment_char_match :
+ file_is_candidate_by_char_filter_scope =True
+ char_filter_info_that_matched_file =char_filter_that_matched_comment
+ self .logger (f" File '{current_api_original_filename }' is candidate because post comments matched char filter (Overall Scope: Comments).")
+ if not file_is_candidate_by_char_filter_scope :
+ self .logger (f" -> Skip File (Char Filter Scope '{self .char_filter_scope }'): '{current_api_original_filename }' no match.")
+ total_skipped_this_post +=1
+ continue
+
+
+ target_base_folders_for_this_file_iteration =[]
+
+ if current_character_filters :
+ char_title_subfolder_name =None
+ if self .target_post_id_from_initial_url and self .custom_folder_name :
+ char_title_subfolder_name =self .custom_folder_name
+ elif char_filter_info_that_matched_file :
+ char_title_subfolder_name =clean_folder_name (char_filter_info_that_matched_file ["name"])
+ elif char_filter_that_matched_title :
+ char_title_subfolder_name =clean_folder_name (char_filter_that_matched_title ["name"])
+ elif char_filter_that_matched_comment :
+ char_title_subfolder_name =clean_folder_name (char_filter_that_matched_comment ["name"])
+ if char_title_subfolder_name :
+ target_base_folders_for_this_file_iteration .append (char_title_subfolder_name )
+ else :
+ self .logger (f"⚠️ File '{current_api_original_filename }' candidate by char filter, but no folder name derived. Using post title.")
+ target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title ))
+ else :
+ if base_folder_names_for_post_content :
+ target_base_folders_for_this_file_iteration .extend (base_folder_names_for_post_content )
+ else :
+ target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title ))
+
+ if not target_base_folders_for_this_file_iteration :
+ target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title if post_title else "Uncategorized_Post_Content"))
+
+ for target_base_folder_name_for_instance in target_base_folders_for_this_file_iteration :
+ current_path_for_file_instance =self .override_output_dir if self .override_output_dir else self .download_root
+ if self .use_subfolders and target_base_folder_name_for_instance :
+ current_path_for_file_instance =os .path .join (current_path_for_file_instance ,target_base_folder_name_for_instance )
+ if self .use_post_subfolders :
+
+ current_path_for_file_instance =os .path .join (current_path_for_file_instance ,final_post_subfolder_name )
+
+ manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None
+ manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None
+
+
+ folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title )
+
+ futures_list .append (file_pool .submit (
+ self ._download_single_file ,
+ file_info =file_info_to_dl ,
+ target_folder_path =current_path_for_file_instance ,
+ headers =headers ,original_post_id_for_log =post_id ,skip_event =self .skip_current_file_flag ,
+ post_title =post_title ,manga_date_file_counter_ref =manga_date_counter_to_pass ,
+ manga_global_file_counter_ref =manga_global_counter_to_pass ,folder_context_name_for_history =folder_context_for_file ,
+ file_index_in_post =file_idx ,num_files_in_this_post =len (files_to_download_info_list )
+ ))
+
+ for future in as_completed (futures_list ):
+ if self .check_cancel ():
+ for f_to_cancel in futures_list :
+ if not f_to_cancel .done ():
+ f_to_cancel .cancel ()
+ break
+ try :
+ dl_count ,skip_count ,actual_filename_saved ,original_kept_flag ,status ,details_for_dialog_or_retry =future .result ()
+ total_downloaded_this_post +=dl_count
+ total_skipped_this_post +=skip_count
+ if original_kept_flag and dl_count >0 and actual_filename_saved :
+ kept_original_filenames_for_log .append (actual_filename_saved )
+ if status ==FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER and details_for_dialog_or_retry :
+ retryable_failures_this_post .append (details_for_dialog_or_retry )
+ elif status ==FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION and details_for_dialog_or_retry :
+ permanent_failures_this_post .append (details_for_dialog_or_retry )
+ except CancelledError :
+ self .logger (f" File download task for post {post_id } was cancelled.")
+ total_skipped_this_post +=1
+ except Exception as exc_f :
+ self .logger (f"❌ File download task for post {post_id } resulted in error: {exc_f }")
+ total_skipped_this_post +=1
+ self ._emit_signal ('file_progress',"",None )
+
+ # After a post's files are all processed, update the session file to mark this post as done.
+ if self.session_file_path and self.session_lock:
+ try:
+ with self.session_lock:
+ if os.path.exists(self.session_file_path): # Only update if the session file exists
+ # Read current state
+ with open(self.session_file_path, 'r', encoding='utf-8') as f:
+ session_data = json.load(f)
+
+ if 'download_state' not in session_data:
+ session_data['download_state'] = {}
+
+ # Add processed ID
+ if not isinstance(session_data['download_state'].get('processed_post_ids'), list):
+ session_data['download_state']['processed_post_ids'] = []
+ session_data['download_state']['processed_post_ids'].append(self.post.get('id'))
+
+ # Add any permanent failures from this worker to the session file
+ if permanent_failures_this_post:
+ if not isinstance(session_data['download_state'].get('permanently_failed_files'), list):
+ session_data['download_state']['permanently_failed_files'] = []
+ # To avoid duplicates if the same post is somehow re-processed
+ existing_failed_urls = {f.get('file_info', {}).get('url') for f in session_data['download_state']['permanently_failed_files']}
+ for failure in permanent_failures_this_post:
+ if failure.get('file_info', {}).get('url') not in existing_failed_urls:
+ session_data['download_state']['permanently_failed_files'].append(failure)
+
+ # Write to temp file and then atomically replace
+ temp_file_path = self.session_file_path + ".tmp"
+ with open(temp_file_path, 'w', encoding='utf-8') as f_tmp:
+ json.dump(session_data, f_tmp, indent=2)
+ os.replace(temp_file_path, self.session_file_path)
+ except Exception as e:
+ self.logger(f"⚠️ Could not update session file for post {post_id}: {e}")
+
+ if not self .extract_links_only and (total_downloaded_this_post >0 or not (
+ (current_character_filters and (
+ (self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or
+ (self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match )
+ ))or
+ (self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH )and any (sw .lower ()in post_title .lower ()for sw in self .skip_words_list ))
+ )):
+ top_file_name_for_history ="N/A"
+ if post_main_file_info and post_main_file_info .get ('name'):
+ top_file_name_for_history =post_main_file_info ['name']
+ elif post_attachments and post_attachments [0 ].get ('name'):
+ top_file_name_for_history =post_attachments [0 ]['name']
+
+ history_data_for_this_post ={
+ 'post_title':post_title ,'post_id':post_id ,
+ 'top_file_name':top_file_name_for_history ,
+ 'num_files':num_potential_files_in_post ,
+ 'upload_date_str':post_data .get ('published')or post_data .get ('added')or "Unknown",
+ 'download_location':determined_post_save_path_for_history ,
+ 'service':self .service ,'user_id':self .user_id ,
+ }
+ if self .check_cancel ():self .logger (f" Post {post_id } processing interrupted/cancelled.");
+ else :self .logger (f" Post {post_id } Summary: Downloaded={total_downloaded_this_post }, Skipped Files={total_skipped_this_post }")
+
+ if not self .extract_links_only and self .use_post_subfolders and total_downloaded_this_post ==0 :
+
+ path_to_check_for_emptiness =determined_post_save_path_for_history
+ try :
+ if os .path .isdir (path_to_check_for_emptiness )and not os .listdir (path_to_check_for_emptiness ):
+ self .logger (f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness }'")
+ os .rmdir (path_to_check_for_emptiness )
+ except OSError as e_rmdir :
+ self .logger (f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }")
+
+ result_tuple = (total_downloaded_this_post, total_skipped_this_post,
+ kept_original_filenames_for_log, retryable_failures_this_post,
+ permanent_failures_this_post, history_data_for_this_post,
+ None) # The 7th item is None because we already saved the temp file
+
+ # In Single PDF mode, the 7th item is the temp file path we created.
+ if self.single_pdf_mode and os.path.exists(temp_filepath):
+ result_tuple = (0, 0, [], [], [], None, temp_filepath)
+
+ self._emit_signal('worker_finished', result_tuple)
+ return # The method now returns nothing.
+
+class DownloadThread (QThread ):
+ progress_signal =pyqtSignal (str )
+ add_character_prompt_signal =pyqtSignal (str )
+ file_download_status_signal =pyqtSignal (bool )
+ finished_signal =pyqtSignal (int ,int ,bool ,list )
+ external_link_signal =pyqtSignal (str ,str ,str ,str ,str )
+ file_successfully_downloaded_signal =pyqtSignal (dict )
+ file_progress_signal =pyqtSignal (str ,object )
+ retryable_file_failed_signal =pyqtSignal (list )
+ missed_character_post_signal =pyqtSignal (str ,str )
+ post_processed_for_history_signal =pyqtSignal (dict )
+ final_history_entries_signal =pyqtSignal (list )
+ permanent_file_failed_signal =pyqtSignal (list )
+ def __init__ (self ,api_url_input ,output_dir ,known_names_copy ,
+ cancellation_event ,
+ pause_event ,filter_character_list =None ,dynamic_character_filter_holder =None ,
+ filter_mode ='all',skip_zip =True ,skip_rar =True ,
+ use_subfolders =True ,use_post_subfolders =False ,custom_folder_name =None ,compress_images =False ,
+ download_thumbnails =False ,service =None ,user_id =None ,
+ downloaded_files =None ,downloaded_file_hashes =None ,downloaded_files_lock =None ,downloaded_file_hashes_lock =None ,
+ skip_words_list =None ,
+ skip_words_scope =SKIP_SCOPE_FILES ,
+ show_external_links =False ,
+ extract_links_only =False ,
+ num_file_threads_for_worker =1 ,
+ skip_current_file_flag =None ,
+ start_page =None ,end_page =None ,
+ target_post_id_from_initial_url =None ,
+ manga_mode_active =False ,
+ unwanted_keywords =None ,
+ manga_filename_style =STYLE_POST_TITLE ,
+ char_filter_scope =CHAR_SCOPE_FILES ,
+ remove_from_filename_words_list =None ,
+ manga_date_prefix =MANGA_DATE_PREFIX_DEFAULT ,
+ allow_multipart_download =True ,
+ selected_cookie_file =None ,
+ override_output_dir =None ,
+ app_base_dir =None ,
+ manga_date_file_counter_ref =None ,
+ manga_global_file_counter_ref =None ,
+ use_cookie =False ,
+ scan_content_for_images =False ,
+ creator_download_folder_ignore_words =None ,
+ use_date_prefix_for_subfolder=False,
+ keep_in_post_duplicates=False,
+ cookie_text ="",
+ session_file_path=None,
+ session_lock=None,
+ text_only_scope=None,
+ text_export_format='txt',
+ single_pdf_mode=False,
+ project_root_dir=None,
+ ):
+ super ().__init__ ()
+ self .api_url_input =api_url_input
+ self .output_dir =output_dir
+ self .known_names =list (known_names_copy )
+ self .cancellation_event =cancellation_event
+ self .pause_event =pause_event
+ self .skip_current_file_flag =skip_current_file_flag
+ self .initial_target_post_id =target_post_id_from_initial_url
+ self .filter_character_list_objects_initial =filter_character_list if filter_character_list else []
+ self .dynamic_filter_holder =dynamic_character_filter_holder
+ self .filter_mode =filter_mode
+ self .skip_zip =skip_zip
+ self .skip_rar =skip_rar
+ self .use_subfolders =use_subfolders
+ self .use_post_subfolders =use_post_subfolders
+ self .custom_folder_name =custom_folder_name
+ self .compress_images =compress_images
+ self .download_thumbnails =download_thumbnails
+ self .service =service
+ self .user_id =user_id
+ self .skip_words_list =skip_words_list if skip_words_list is not None else []
+ self .skip_words_scope =skip_words_scope
+ self .downloaded_files =downloaded_files
+ self .downloaded_files_lock =downloaded_files_lock
+ self .downloaded_file_hashes =downloaded_file_hashes
+ self .downloaded_file_hashes_lock =downloaded_file_hashes_lock
+ self ._add_character_response =None
+ self .prompt_mutex =QMutex ()
+ self .show_external_links =show_external_links
+ self .extract_links_only =extract_links_only
+ self .num_file_threads_for_worker =num_file_threads_for_worker
+ self .start_page =start_page
+ self .end_page =end_page
+ self .manga_mode_active =manga_mode_active
+ self .unwanted_keywords =unwanted_keywords if unwanted_keywords is not None else {'spicy','hd','nsfw','4k','preview','teaser','clip'}
+ self .manga_filename_style =manga_filename_style
+ self .char_filter_scope =char_filter_scope
+ self .remove_from_filename_words_list =remove_from_filename_words_list
+ self .manga_date_prefix =manga_date_prefix
+ self .allow_multipart_download =allow_multipart_download
+ self .selected_cookie_file =selected_cookie_file
+ self .app_base_dir =app_base_dir
+ self .cookie_text =cookie_text
+ self .use_cookie =use_cookie
+ self .override_output_dir =override_output_dir
+ self .manga_date_file_counter_ref =manga_date_file_counter_ref
+ self .scan_content_for_images =scan_content_for_images
+ self .creator_download_folder_ignore_words =creator_download_folder_ignore_words
+ self.use_date_prefix_for_subfolder = use_date_prefix_for_subfolder
+ self.keep_in_post_duplicates = keep_in_post_duplicates
+ self .manga_global_file_counter_ref =manga_global_file_counter_ref
+ self.session_file_path = session_file_path
+ self.session_lock = session_lock
+ self.history_candidates_buffer =deque (maxlen =8 )
+ self.text_only_scope = text_only_scope
+ self.text_export_format = text_export_format
+ self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE
+ self.project_root_dir = project_root_dir # Add this assignment
+
+ if self .compress_images and Image is None :
+ self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
+ self .compress_images =False
+ def logger (self ,message ):
+ self .progress_signal .emit (str (message ))
+ def isInterruptionRequested (self ):
+ return self .cancellation_event .is_set ()or super ().isInterruptionRequested ()
+ def _check_pause_self (self ,context_message ="DownloadThread operation"):
+ if self .pause_event and self .pause_event .is_set ():
+ self .logger (f" {context_message } paused...")
+ while self .pause_event .is_set ():
+ if self .isInterruptionRequested ():
+ self .logger (f" {context_message } cancelled while paused.")
+ return True
+ time .sleep (0.5 )
+ if not self .isInterruptionRequested ():self .logger (f" {context_message } resumed.")
+ return False
+ def skip_file (self ):
+ if self .isRunning ()and self .skip_current_file_flag :
+ self .logger ("⏭️ Skip requested for current file (single-thread mode).")
+ self .skip_current_file_flag .set ()
+ else :self .logger ("ℹ️ Skip file: No download active or skip flag not available for current context.")
+
+ def run (self ):
+ """
+ The main execution method for the single-threaded download process.
+ This version is corrected to handle 7 return values from the worker and
+ to pass the 'single_pdf_mode' setting correctly.
+ """
+ grand_total_downloaded_files =0
+ grand_total_skipped_files =0
+ grand_list_of_kept_original_filenames =[]
+ was_process_cancelled =False
+
+ # This block for initializing manga mode counters remains unchanged
+ if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED and not self .extract_links_only and self .manga_date_file_counter_ref is None :
+ # ... (existing manga counter initialization logic) ...
+ pass
+ if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING and not self .extract_links_only and self .manga_global_file_counter_ref is None :
+ # ... (existing manga counter initialization logic) ...
+ pass
+
+ worker_signals_obj = PostProcessorSignals()
+ try :
+ # Connect signals
+ worker_signals_obj.progress_signal.connect(self.progress_signal)
+ worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal)
+ worker_signals_obj.file_progress_signal.connect(self.file_progress_signal)
+ worker_signals_obj.external_link_signal.connect(self.external_link_signal)
+ worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal)
+ worker_signals_obj.file_successfully_downloaded_signal.connect(self.file_successfully_downloaded_signal)
+ worker_signals_obj.worker_finished_signal.connect(lambda result: None) # Connect to dummy lambda to avoid errors
+
+ self.logger(" Starting post fetch (single-threaded download process)...")
+ post_generator = download_from_api(
+ self.api_url_input,
+ logger=self.logger,
+ start_page=self.start_page,
+ end_page=self.end_page,
+ manga_mode=self.manga_mode_active,
+ cancellation_event=self.cancellation_event,
+ pause_event=self.pause_event,
+ use_cookie=self.use_cookie,
+ cookie_text=self.cookie_text,
+ selected_cookie_file=self.selected_cookie_file,
+ app_base_dir=self.app_base_dir,
+ manga_filename_style_for_sort_check=self.manga_filename_style if self.manga_mode_active else None
+ )
+
+ for posts_batch_data in post_generator:
+ if self.isInterruptionRequested():
+ was_process_cancelled = True
+ break
+ for individual_post_data in posts_batch_data:
+ if self.isInterruptionRequested():
+ was_process_cancelled = True
+ break
+
+ # Create the worker, now correctly passing single_pdf_mode
+ post_processing_worker = PostProcessorWorker(
+ post_data=individual_post_data,
+ download_root=self.output_dir,
+ known_names=self.known_names,
+ filter_character_list=self.filter_character_list_objects_initial,
+ dynamic_character_filter_holder=self.dynamic_filter_holder,
+ unwanted_keywords=self.unwanted_keywords,
+ filter_mode=self.filter_mode,
+ skip_zip=self.skip_zip, skip_rar=self.skip_rar,
+ use_subfolders=self.use_subfolders, use_post_subfolders=self.use_post_subfolders,
+ target_post_id_from_initial_url=self.initial_target_post_id,
+ custom_folder_name=self.custom_folder_name,
+ compress_images=self.compress_images, download_thumbnails=self.download_thumbnails,
+ service=self.service, user_id=self.user_id,
+ api_url_input=self.api_url_input,
+ pause_event=self.pause_event,
+ cancellation_event=self.cancellation_event,
+ emitter=worker_signals_obj,
+ downloaded_files=self.downloaded_files,
+ downloaded_file_hashes=self.downloaded_file_hashes,
+ downloaded_files_lock=self.downloaded_files_lock,
+ downloaded_file_hashes_lock=self.downloaded_file_hashes_lock,
+ skip_words_list=self.skip_words_list,
+ skip_words_scope=self.skip_words_scope,
+ show_external_links=self.show_external_links,
+ extract_links_only=self.extract_links_only,
+ num_file_threads=self.num_file_threads_for_worker,
+ skip_current_file_flag=self.skip_current_file_flag,
+ manga_mode_active=self.manga_mode_active,
+ manga_filename_style=self.manga_filename_style,
+ manga_date_prefix=self.manga_date_prefix,
+ char_filter_scope=self.char_filter_scope,
+ remove_from_filename_words_list=self.remove_from_filename_words_list,
+ allow_multipart_download=self.allow_multipart_download,
+ selected_cookie_file=self.selected_cookie_file,
+ app_base_dir=self.app_base_dir,
+ cookie_text=self.cookie_text,
+ override_output_dir=self.override_output_dir,
+ manga_global_file_counter_ref=self.manga_global_file_counter_ref,
+ use_cookie=self.use_cookie,
+ manga_date_file_counter_ref=self.manga_date_file_counter_ref,
+ use_date_prefix_for_subfolder=self.use_date_prefix_for_subfolder,
+ keep_in_post_duplicates=self.keep_in_post_duplicates,
+ creator_download_folder_ignore_words=self.creator_download_folder_ignore_words,
+ session_file_path=self.session_file_path,
+ session_lock=self.session_lock,
+ text_only_scope=self.text_only_scope,
+ text_export_format=self.text_export_format,
+ single_pdf_mode=self.single_pdf_mode, # <-- This is now correctly passed
+ project_root_dir=self.project_root_dir
+ )
+ try:
+ # Correctly unpack the 7 values returned from the worker
+ (dl_count, skip_count, kept_originals_this_post,
+ retryable_failures, permanent_failures,
+ history_data, temp_filepath) = post_processing_worker.process()
+
+ grand_total_downloaded_files += dl_count
+ grand_total_skipped_files += skip_count
+
+ if kept_originals_this_post:
+ grand_list_of_kept_original_filenames.extend(kept_originals_this_post)
+ if retryable_failures:
+ self.retryable_file_failed_signal.emit(retryable_failures)
+ if history_data:
+ if len(self.history_candidates_buffer) < 8:
+ self.post_processed_for_history_signal.emit(history_data)
+ if permanent_failures:
+ self.permanent_file_failed_signal.emit(permanent_failures)
+
+ # In single-threaded text mode, pass the temp file path back to the main window
+ if self.single_pdf_mode and temp_filepath:
+ self.progress_signal.emit(f"TEMP_FILE_PATH:{temp_filepath}")
+
+ except Exception as proc_err:
+ post_id_for_err = individual_post_data.get('id', 'N/A')
+ self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}")
+ traceback.print_exc()
+ num_potential_files_est = len(individual_post_data.get('attachments', [])) + (1 if individual_post_data.get('file') else 0)
+ grand_total_skipped_files += num_potential_files_est
+
+ if self.skip_current_file_flag and self.skip_current_file_flag.is_set():
+ self.skip_current_file_flag.clear()
+ self.logger(" Skip current file flag was processed and cleared by DownloadThread.")
+ self.msleep(10)
+ if was_process_cancelled:
+ break
+ if not was_process_cancelled and not self.isInterruptionRequested():
+ self.logger("✅ All posts processed or end of content reached by DownloadThread.")
+
+ except Exception as main_thread_err:
+ self.logger(f"\n❌ Critical error within DownloadThread run loop: {main_thread_err}")
+ traceback.print_exc()
+ finally:
+ try:
+ # Disconnect signals
+ if worker_signals_obj:
+ worker_signals_obj.progress_signal.disconnect(self.progress_signal)
+ worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal)
+ worker_signals_obj.external_link_signal.disconnect(self.external_link_signal)
+ worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal)
+ worker_signals_obj.missed_character_post_signal.disconnect(self.missed_character_post_signal)
+ worker_signals_obj.file_successfully_downloaded_signal.disconnect(self.file_successfully_downloaded_signal)
+ except (TypeError, RuntimeError) as e:
+ self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}")
+
+ # Emit the final signal with all collected results
+ self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, self.isInterruptionRequested(), grand_list_of_kept_original_filenames)
+
+ def receive_add_character_result (self ,result ):
+ with QMutexLocker (self .prompt_mutex ):
+ self ._add_character_response =result
+ self .logger (f" (DownloadThread) Received character prompt response: {'Yes (added/confirmed)'if result else 'No (declined/failed)'}")
+
+class InterruptedError(Exception):
+ """Custom exception for handling cancellations gracefully."""
+ pass
\ No newline at end of file