diff --git a/src/core/manager.py b/src/core/manager.py index 0a30277..5f60ed5 100644 --- a/src/core/manager.py +++ b/src/core/manager.py @@ -3,7 +3,7 @@ import time import os import json import traceback -from concurrent.futures import ThreadPoolExecutor, as_completed, Future +from concurrent.futures import ThreadPoolExecutor, as_completed, Future, CancelledError from .api_client import download_from_api from .workers import PostProcessorWorker from ..config.constants import ( @@ -113,6 +113,29 @@ class DownloadManager: self.is_running = False # Allow another session to start if needed self.progress_queue.put({'type': 'handoff_to_single_thread', 'payload': (config,)}) + def _get_proxies_from_config(self, config): + """Constructs the proxy dictionary from the config.""" + if not config.get('proxy_enabled'): + return None + + host = config.get('proxy_host') + port = config.get('proxy_port') + if not host or not port: + return None + + proxy_str = f"http://{host}:{port}" + + # Add auth if provided + user = config.get('proxy_username') + password = config.get('proxy_password') + if user and password: + proxy_str = f"http://{user}:{password}@{host}:{port}" + + return { + "http": proxy_str, + "https": proxy_str + } + def _fetch_and_queue_posts_for_pool(self, config, restore_data, creator_profile_data): """ Fetches posts from the API in batches and submits them as tasks to a thread pool. @@ -126,6 +149,9 @@ class DownloadManager: session_processed_ids = set(restore_data.get('processed_post_ids', [])) if restore_data else set() profile_processed_ids = set(creator_profile_data.get('processed_post_ids', [])) processed_ids = session_processed_ids.union(profile_processed_ids) + + # Helper to get proxies + proxies = self._get_proxies_from_config(config) if restore_data and 'all_posts_data' in restore_data: # This logic for session restore remains as it relies on a pre-fetched list @@ -143,12 +169,20 @@ class DownloadManager: for post_data in posts_to_process: if self.cancellation_event.is_set(): break - worker = PostProcessorWorker(post_data, config, self.progress_queue) + + worker_args = self._map_config_to_worker_args(post_data, config) + # Manually inject proxies here if _map_config_to_worker_args didn't catch it (though it should) + worker_args['proxies'] = proxies + + worker = PostProcessorWorker(**worker_args) future = self.thread_pool.submit(worker.process) future.add_done_callback(self._handle_future_result) self.active_futures.append(future) else: # --- Streaming Logic --- + if proxies: + self._log(f" 🌐 Using Proxy: {config.get('proxy_host')}:{config.get('proxy_port')}") + post_generator = download_from_api( api_url_input=config['api_url'], logger=self._log, @@ -156,7 +190,8 @@ class DownloadManager: end_page=config.get('end_page'), cancellation_event=self.cancellation_event, pause_event=self.pause_event, - cookies_dict=None # Cookie handling handled inside client if needed, or update if passed + cookies_dict=None, # Cookie handling handled inside client if needed + proxies=proxies # <--- NEW: Pass proxies to API client ) for post_batch in post_generator: @@ -169,23 +204,16 @@ class DownloadManager: new_posts_batch = [p for p in post_batch if p.get('id') not in processed_ids] if not new_posts_batch: - # Log skipped count for UI feedback if needed, already handled in api_client usually continue # Update total posts dynamically as we find them self.total_posts += len(new_posts_batch) - # Note: total_posts in streaming is a "running total of found posts", not absolute total for post_data in new_posts_batch: if self.cancellation_event.is_set(): break - # Pass explicit args or config to worker - # Ideally PostProcessorWorker should accept the whole config dict or mapped args - # For now assuming PostProcessorWorker takes (post_data, config_dict, queue) - # OR we map the config to the args expected by PostProcessorWorker.__init__ - - # MAPPING CONFIG TO WORKER ARGS (Safe wrapper) + # MAPPING CONFIG TO WORKER ARGS worker_args = self._map_config_to_worker_args(post_data, config) worker = PostProcessorWorker(**worker_args) @@ -193,7 +221,7 @@ class DownloadManager: future.add_done_callback(self._handle_future_result) self.active_futures.append(future) - # Small sleep to prevent UI freeze if batches are huge and instant + # Small sleep to prevent UI freeze time.sleep(0.01) except Exception as e: @@ -205,6 +233,9 @@ class DownloadManager: def _map_config_to_worker_args(self, post_data, config): """Helper to map the flat config dict to PostProcessorWorker arguments.""" + # Get proxy dict + proxies = self._get_proxies_from_config(config) + # This mirrors the arguments in workers.py PostProcessorWorker.__init__ return { 'post_data': post_data, @@ -221,29 +252,27 @@ class DownloadManager: 'custom_folder_name': config.get('custom_folder_name'), 'compress_images': config.get('compress_images'), 'download_thumbnails': config.get('download_thumbnails'), - 'service': config.get('service') or 'unknown', # extracted elsewhere + 'service': config.get('service') or 'unknown', 'user_id': config.get('user_id') or 'unknown', 'pause_event': self.pause_event, 'api_url_input': config.get('api_url'), 'cancellation_event': self.cancellation_event, - 'downloaded_files': None, # Managed per worker or global if passed + 'downloaded_files': None, 'downloaded_file_hashes': None, 'downloaded_files_lock': None, 'downloaded_file_hashes_lock': None, - # Add other necessary fields from config... 'manga_mode_active': config.get('manga_mode_active'), 'manga_filename_style': config.get('manga_filename_style'), - 'manga_custom_filename_format': config.get('custom_manga_filename_format', "{published} {title}"), # Pass custom format + 'manga_custom_filename_format': config.get('custom_manga_filename_format', "{published} {title}"), 'manga_custom_date_format': config.get('manga_custom_date_format', "YYYY-MM-DD"), 'use_multithreading': config.get('use_multithreading', True), - # Ensure defaults for others + 'proxies': proxies, # <--- NEW: Pass proxies to worker } def _setup_creator_profile(self, config): """Prepares the path and loads data for the current creator's profile.""" # Extract name logic here or assume config has it - # ... (Same as your existing code) - self.current_creator_name_for_profile = "Unknown" # Placeholder + self.current_creator_name_for_profile = "Unknown" # You should ideally extract name from URL or config here if available return {} diff --git a/src/core/workers.py b/src/core/workers.py index d489cc8..290bc44 100644 --- a/src/core/workers.py +++ b/src/core/workers.py @@ -133,7 +133,8 @@ class PostProcessorWorker: sfp_threshold=None, handle_unknown_mode=False, creator_name_cache=None, - add_info_in_pdf=False + add_info_in_pdf=False, + proxies=None ): self.post = post_data @@ -208,9 +209,8 @@ class PostProcessorWorker: self.sfp_threshold = sfp_threshold self.handle_unknown_mode = handle_unknown_mode self.creator_name_cache = creator_name_cache - #-- New assign -- self.add_info_in_pdf = add_info_in_pdf - #-- New assign -- + self.proxies = proxies if self.compress_images and Image is None: @@ -263,7 +263,7 @@ class PostProcessorWorker: new_url = parsed_url._replace(netloc=new_domain).geturl() try: - with requests.head(new_url, headers={'User-Agent': 'Mozilla/5.0'}, timeout=5, allow_redirects=True) as resp: + with requests.head(new_url, headers={'User-Agent': 'Mozilla/5.0'}, timeout=5, allow_redirects=True, proxies=self.proxies) as resp: if resp.status_code == 200: return new_url except requests.RequestException: @@ -338,7 +338,8 @@ class PostProcessorWorker: api_original_filename_for_size_check = file_info.get('_original_name_for_log', file_info.get('name')) try: # Use a stream=True HEAD request to get headers without downloading the body - with requests.head(file_url, headers=file_download_headers, timeout=15, cookies=cookies_to_use_for_file, allow_redirects=True) as head_response: + with requests.head(file_url, headers=file_download_headers, timeout=15, cookies=cookies_to_use_for_file, allow_redirects=True, proxies=self.proxies) as head_response: + head_response.raise_for_status() content_length = head_response.headers.get('Content-Length') if content_length: @@ -672,7 +673,7 @@ class PostProcessorWorker: current_url_to_try = file_url - response = requests.get(current_url_to_try, headers=file_download_headers, timeout=(30, 300), stream=True, cookies=cookies_to_use_for_file) + response = requests.get(current_url_to_try, headers=file_download_headers, timeout=(30, 300), stream=True, cookies=cookies_to_use_for_file, proxies=self.proxies) if response.status_code == 403 and ('kemono.' in current_url_to_try or 'coomer.' in current_url_to_try): self.logger(f" âš ī¸ Got 403 Forbidden for '{api_original_filename}'. Attempting subdomain rotation...") @@ -681,8 +682,7 @@ class PostProcessorWorker: self.logger(f" Retrying with new URL: {new_url}") file_url = new_url response.close() # Close the old response - response = requests.get(new_url, headers=file_download_headers, timeout=(30, 300), stream=True, cookies=cookies_to_use_for_file) - + response = requests.get(new_url, headers=file_download_headers, timeout=(30, 300), stream=True, cookies=cookies_to_use_for_file, proxies=self.proxies) response.raise_for_status() # --- REVISED AND MOVED SIZE CHECK LOGIC --- @@ -1104,8 +1104,8 @@ class PostProcessorWorker: 'Referer': creator_page_url, 'Accept': 'text/css' } - cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain) - full_post_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies) + cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain) + full_post_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies, proxies=self.proxies) if full_post_data: self.logger(" ✅ Full post data fetched successfully.") self.post = full_post_data @@ -1306,13 +1306,17 @@ class PostProcessorWorker: if not any(d in api_domain_for_comments.lower() for d in ['kemono.su', 'kemono.party', 'kemono.cr', 'coomer.su', 'coomer.party', 'coomer.st']): self.logger(f"âš ī¸ Unrecognized domain '{api_domain_for_comments}' for comment API. Defaulting based on service.") api_domain_for_comments = "kemono.cr" if "kemono" in self.service.lower() else "coomer.st" + + # Fetch comments (Indented correctly now) comments_data = fetch_post_comments( api_domain_for_comments, self.service, self.user_id, post_id, headers, self.logger, self.cancellation_event, self.pause_event, cookies_dict=prepare_cookies_for_request( self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger - ) + ), + proxies=self.proxies ) + if comments_data: self.logger(f" Fetched {len(comments_data)} comments for post {post_id}.") for comment_item_idx, comment_item in enumerate(comments_data): @@ -1339,8 +1343,8 @@ class PostProcessorWorker: except RuntimeError as e_fetch_comment: self.logger(f" âš ī¸ Error fetching or processing comments for post {post_id}: {e_fetch_comment}") except Exception as e_generic_comment: - self.logger(f" ❌ Unexpected error during comment processing for post {post_id}: {e_generic_comment}\n{traceback.format_exc(limit=2)}") - self.logger(f" [Char Scope: Comments] Phase 2 Result: post_is_candidate_by_comment_char_match = {post_is_candidate_by_comment_char_match}") + self.logger(f" ❌ Unexpected error during comment processing for post {post_id}: {e_generic_comment}\n{traceback.format_exc(limit=2)}") + else: self.logger(f" [Char Scope: Comments] Phase 2: Skipped comment check for post ID '{post_id}' because a file match already made it a candidate.") @@ -2327,9 +2331,10 @@ class DownloadThread(QThread): manga_custom_filename_format="{published} {title}", manga_custom_date_format="YYYY-MM-DD" , sfp_threshold=None, - creator_name_cache=None - + creator_name_cache=None, + proxies=None ): + super().__init__() self.api_url_input = api_url_input self.output_dir = output_dir @@ -2404,6 +2409,7 @@ class DownloadThread(QThread): self.domain_override = domain_override self.sfp_threshold = sfp_threshold self.creator_name_cache = creator_name_cache + self.proxies = proxies if self.compress_images and Image is None: self.logger("âš ī¸ Image compression disabled: Pillow library not found (DownloadThread).") @@ -2437,6 +2443,7 @@ class DownloadThread(QThread): self.logger(" Starting post fetch (single-threaded download process)...") + # --- FIX: Removed duplicate proxies argument here --- post_generator = download_from_api( self.api_url_input, logger=self.logger, @@ -2451,7 +2458,8 @@ class DownloadThread(QThread): app_base_dir=self.app_base_dir, manga_filename_style_for_sort_check=self.manga_filename_style if self.manga_mode_active else None, processed_post_ids=self.processed_post_ids_set, - fetch_all_first=self.fetch_first + fetch_all_first=self.fetch_first, + proxies=self.proxies ) for posts_batch_data in post_generator: @@ -2464,6 +2472,7 @@ class DownloadThread(QThread): was_process_cancelled = True break + # --- FIX: Ensure 'proxies' is in this dictionary --- worker_args = { 'post_data': individual_post_data, 'emitter': worker_signals_obj, @@ -2532,7 +2541,8 @@ class DownloadThread(QThread): 'archive_only_mode': self.archive_only_mode, 'manga_custom_filename_format': self.manga_custom_filename_format, 'manga_custom_date_format': self.manga_custom_date_format, - 'sfp_threshold': self.sfp_threshold + 'sfp_threshold': self.sfp_threshold, + 'proxies': self.proxies } post_processing_worker = PostProcessorWorker(**worker_args) diff --git a/src/ui/main_window.py b/src/ui/main_window.py index d51a6f4..eb047cc 100644 --- a/src/ui/main_window.py +++ b/src/ui/main_window.py @@ -843,7 +843,20 @@ class DownloaderApp (QWidget ): settings['add_info_in_pdf'] = self.add_info_in_pdf_setting # Save to settings dict settings['keep_duplicates_mode'] = self.keep_duplicates_mode settings['keep_duplicates_limit'] = self.keep_duplicates_limit - + + settings['proxy_enabled'] = self.settings.value(PROXY_ENABLED_KEY, False, type=bool) + settings['proxy_host'] = self.settings.value(PROXY_HOST_KEY, "", type=str) + settings['proxy_port'] = self.settings.value(PROXY_PORT_KEY, "", type=str) + settings['proxy_username'] = self.settings.value(PROXY_USERNAME_KEY, "", type=str) + settings['proxy_password'] = self.settings.value(PROXY_PASSWORD_KEY, "", type=str) + + settings['proxies'] = None + if settings['proxy_enabled'] and settings['proxy_host'] and settings['proxy_port']: + proxy_str = f"http://{settings['proxy_host']}:{settings['proxy_port']}" + if settings['proxy_username'] and settings['proxy_password']: + proxy_str = f"http://{settings['proxy_username']}:{settings['proxy_password']}@{settings['proxy_host']}:{settings['proxy_port']}" + settings['proxies'] = {'http': proxy_str, 'https': proxy_str} + return settings @@ -4627,6 +4640,14 @@ class DownloaderApp (QWidget ): if should_use_multithreading_for_posts: log_messages.append(f" Number of Post Worker Threads: {effective_num_post_workers}") + proxy_enabled_log = self.settings.value(PROXY_ENABLED_KEY, False, type=bool) + if proxy_enabled_log: + p_host = self.settings.value(PROXY_HOST_KEY, "") + p_port = self.settings.value(PROXY_PORT_KEY, "") + log_messages.append(f" Proxy: Enabled ({p_host}:{p_port})") + else: + log_messages.append(f" Proxy: Disabled") + if domain_override_command: self.log_signal.emit(f"â„šī¸ Domain Override Active: Will probe for the correct 'n*' subdomain on the '.{domain_override_command}' domain for each file.") @@ -4639,7 +4660,7 @@ class DownloaderApp (QWidget ): self.set_ui_enabled(False) from src.config.constants import FOLDER_NAME_STOP_WORDS - + current_proxies = self._get_current_ui_settings_as_dict().get('proxies') args_template = { 'api_url_input': api_url, 'download_root': effective_output_dir_for_run, @@ -4715,7 +4736,8 @@ class DownloaderApp (QWidget ): 'fetch_first': fetch_first_enabled, 'sfp_threshold': download_commands.get('sfp_threshold'), 'handle_unknown_mode': handle_unknown_command, - 'add_info_in_pdf': self.add_info_in_pdf_setting, + 'add_info_in_pdf': self.add_info_in_pdf_setting, + 'proxies': current_proxies } args_template['override_output_dir'] = override_output_dir @@ -4741,7 +4763,8 @@ class DownloaderApp (QWidget ): 'app_base_dir': app_base_dir_for_cookies, 'manga_filename_style_for_sort_check': self.manga_filename_style, 'processed_post_ids': processed_post_ids_for_this_run, - 'fetch_all_first': True + 'fetch_all_first': True, + 'proxies': self._get_current_ui_settings_as_dict().get('proxies') } self.download_thread = threading.Thread(target=self._run_fetch_only_thread, args=(fetch_thread_args,), daemon=True) @@ -5097,8 +5120,7 @@ class DownloaderApp (QWidget ): ppw_expected_keys = list(PostProcessorWorker.__init__.__code__.co_varnames)[1:] - # 1. Define all LIVE RUNTIME arguments. - # These are taken from the current app state and are the same for all workers. + current_proxies = self._get_current_ui_settings_as_dict().get('proxies') live_runtime_args = { 'emitter': self.worker_to_gui_queue, 'creator_name_cache': self.creator_name_cache, @@ -5128,7 +5150,8 @@ class DownloaderApp (QWidget ): 'use_cookie': self.use_cookie_checkbox.isChecked(), 'cookie_text': self.cookie_text_input.text(), 'selected_cookie_file': self.selected_cookie_filepath, - 'add_info_in_pdf': self.add_info_in_pdf_setting, + 'add_info_in_pdf': self.add_info_in_pdf_setting, + 'proxies': current_proxies, } # 2. Define DEFAULTS for all settings that *should* be in the profile. @@ -5364,6 +5387,19 @@ class DownloaderApp (QWidget ): self._update_manga_filename_style_button_text() self._update_multipart_toggle_button_text() + if 'proxy_enabled' in settings: + self.settings.setValue(PROXY_ENABLED_KEY, settings['proxy_enabled']) + if 'proxy_host' in settings: + self.settings.setValue(PROXY_HOST_KEY, settings['proxy_host']) + if 'proxy_port' in settings: + self.settings.setValue(PROXY_PORT_KEY, settings['proxy_port']) + if 'proxy_username' in settings: + self.settings.setValue(PROXY_USERNAME_KEY, settings['proxy_username']) + if 'proxy_password' in settings: + self.settings.setValue(PROXY_PASSWORD_KEY, settings['proxy_password']) + + self.settings.sync() + def start_multi_threaded_download(self, num_post_workers, **kwargs): """ Initializes and starts the multi-threaded download process. @@ -5424,7 +5460,8 @@ class DownloaderApp (QWidget ): app_base_dir=worker_args_template.get('app_base_dir'), manga_filename_style_for_sort_check=worker_args_template.get('manga_filename_style'), processed_post_ids=worker_args_template.get('processed_post_ids', []), - fetch_all_first=worker_args_template.get('fetch_first', False) + fetch_all_first=worker_args_template.get('fetch_first', False), + proxies=worker_args_template.get('proxies') ) ppw_expected_keys = list(PostProcessorWorker.__init__.__code__.co_varnames)[1:]