diff --git a/src/core/workers.py b/src/core/workers.py index 23a82d8..79f80e9 100644 --- a/src/core/workers.py +++ b/src/core/workers.py @@ -237,6 +237,11 @@ class PostProcessorWorker: if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1, "", False, FILE_DOWNLOAD_STATUS_SKIPPED, None + file_download_headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36', + 'Referer': post_page_url + } + file_url = file_info.get('url') cookies_to_use_for_file = None if self.use_cookie: @@ -255,29 +260,23 @@ class PostProcessorWorker: self.logger(f" -> Skip File (Keyword in Original Name '{skip_word}'): '{api_original_filename}'. Scope: {self.skip_words_scope}") return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None - cleaned_original_api_filename = clean_filename(api_original_filename) + cleaned_original_api_filename = robust_clean_name(api_original_filename) original_filename_cleaned_base, original_ext = os.path.splitext(cleaned_original_api_filename) if not original_ext.startswith('.'): original_ext = '.' + original_ext if original_ext else '' if self.manga_mode_active: if self.manga_filename_style == STYLE_ORIGINAL_NAME: - # Get the post's publication or added date published_date_str = self.post.get('published') added_date_str = self.post.get('added') - formatted_date_str = "nodate" # Fallback if no date is found - + formatted_date_str = "nodate" date_to_use_str = published_date_str or added_date_str - if date_to_use_str: try: - # Extract just the YYYY-MM-DD part from the timestamp formatted_date_str = date_to_use_str.split('T')[0] except Exception: self.logger(f" ⚠️ Could not parse date '{date_to_use_str}'. Using 'nodate' prefix.") else: self.logger(f" ⚠️ Post ID {original_post_id_for_log} has no date. Using 'nodate' prefix.") - - # Combine the date with the cleaned original filename filename_to_save_in_main_path = f"{formatted_date_str}_{cleaned_original_api_filename}" was_original_name_kept_flag = True elif self.manga_filename_style == STYLE_POST_TITLE: @@ -303,7 +302,7 @@ class PostProcessorWorker: manga_date_file_counter_ref[0] += 1 base_numbered_name = f"{counter_val_for_filename:03d}" if self.manga_date_prefix and self.manga_date_prefix.strip(): - cleaned_prefix = clean_filename(self.manga_date_prefix.strip()) + cleaned_prefix = robust_clean_name(self.manga_date_prefix.strip()) if cleaned_prefix: filename_to_save_in_main_path = f"{cleaned_prefix} {base_numbered_name}{original_ext}" else: @@ -320,7 +319,7 @@ class PostProcessorWorker: with counter_lock: counter_val_for_filename = manga_global_file_counter_ref[0] manga_global_file_counter_ref[0] += 1 - cleaned_post_title_base_for_global = clean_filename(post_title.strip() if post_title and post_title.strip() else "post") + cleaned_post_title_base_for_global = robust_clean_name(post_title.strip() if post_title and post_title.strip() else "post") filename_to_save_in_main_path = f"{cleaned_post_title_base_for_global}_{counter_val_for_filename:03d}{original_ext}" else: self.logger(f"⚠️ Manga Title+GlobalNum Mode: Counter ref not provided or malformed for '{api_original_filename}'. Using original. Ref: {manga_global_file_counter_ref}") @@ -353,7 +352,7 @@ class PostProcessorWorker: if post_title and post_title.strip(): temp_cleaned_title = robust_clean_name(post_title.strip()) - if not temp_cleaned_title or temp_cleaned_title.startswith("untitled_file"): + if not temp_cleaned_title or temp_cleaned_title.startswith("untitled_folder"): self.logger(f"⚠️ Manga mode (Date+PostTitle Style): Post title for post {original_post_id_for_log} ('{post_title}') was empty or generic after cleaning. Using 'post' as title part.") cleaned_post_title_for_filename = "post" else: @@ -436,7 +435,6 @@ class PostProcessorWorker: final_save_path_check = os.path.join(target_folder_path, filename_to_save_in_main_path) if os.path.exists(final_save_path_check): try: - # Use a HEAD request to get the expected size without downloading the body with requests.head(file_url, headers=file_download_headers, timeout=15, cookies=cookies_to_use_for_file, allow_redirects=True) as head_response: head_response.raise_for_status() expected_size = int(head_response.headers.get('Content-Length', -1)) @@ -445,31 +443,21 @@ class PostProcessorWorker: if expected_size != -1 and actual_size == expected_size: self.logger(f" -> Skip (File Exists & Complete): '{filename_to_save_in_main_path}' is already on disk with the correct size.") - - # We still need to add its hash to the session to prevent duplicates in other modes - # This is a quick hash calculation for the already existing file try: md5_hasher = hashlib.md5() with open(final_save_path_check, 'rb') as f_verify: for chunk in iter(lambda: f_verify.read(8192), b""): md5_hasher.update(chunk) - with self.downloaded_hash_counts_lock: self.downloaded_hash_counts[md5_hasher.hexdigest()] += 1 except Exception as hash_exc: self.logger(f" ⚠️ Could not hash existing file '{filename_to_save_in_main_path}' for session: {hash_exc}") - return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None else: self.logger(f" ⚠️ File '{filename_to_save_in_main_path}' exists but is incomplete (Expected: {expected_size}, Actual: {actual_size}). Re-downloading.") - except requests.RequestException as e: self.logger(f" ⚠️ Could not verify size of existing file '{filename_to_save_in_main_path}': {e}. Proceeding with download.") - file_download_headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36', - 'Referer': post_page_url - } - + retry_delay = 5 downloaded_size_bytes = 0 calculated_file_hash = None @@ -494,7 +482,6 @@ class PostProcessorWorker: response.raise_for_status() total_size_bytes = int(response.headers.get('Content-Length', 0)) - # Use the dedicated parts count from the dialog, not the main thread count num_parts_for_file = min(self.multipart_parts_count, MAX_PARTS_FOR_MULTIPART_DOWNLOAD) file_is_eligible_by_scope = False @@ -532,7 +519,7 @@ class PostProcessorWorker: download_successful_flag = True downloaded_size_bytes = mp_bytes calculated_file_hash = mp_hash - downloaded_part_file_path = mp_save_path_for_unique_part_stem_arg + ".part" + downloaded_part_file_path = mp_save_path_for_unique_part_stem_arg if mp_file_handle: mp_file_handle.close() break else: @@ -683,26 +670,22 @@ class PostProcessorWorker: self.logger(f" 🔄 Compressing '{api_original_filename}' to WebP...") try: with Image.open(downloaded_part_file_path) as img: - # Convert to RGB to avoid issues with paletted images or alpha channels in WebP if img.mode not in ('RGB', 'RGBA'): img = img.convert('RGBA') - # Use an in-memory buffer to save the compressed image output_buffer = BytesIO() img.save(output_buffer, format='WebP', quality=85) - # This buffer now holds the compressed data data_to_write_io = output_buffer - # Update the filename to use the .webp extension base, _ = os.path.splitext(filename_to_save_in_main_path) filename_to_save_in_main_path = f"{base}.webp" self.logger(f" ✅ Compression successful. New size: {len(data_to_write_io.getvalue()) / (1024*1024):.2f} MB") except Exception as e_compress: self.logger(f" ⚠️ Failed to compress '{api_original_filename}': {e_compress}. Saving original file instead.") - data_to_write_io = None # Ensure we fall back to saving the original - + data_to_write_io = None + effective_save_folder = target_folder_path base_name, extension = os.path.splitext(filename_to_save_in_main_path) counter = 1 @@ -719,17 +702,14 @@ class PostProcessorWorker: try: if data_to_write_io: - # Write the compressed data from the in-memory buffer with open(final_save_path, 'wb') as f_out: f_out.write(data_to_write_io.getvalue()) - # Clean up the original downloaded part file if downloaded_part_file_path and os.path.exists(downloaded_part_file_path): try: os.remove(downloaded_part_file_path) except OSError as e_rem: self.logger(f" -> Failed to remove .part after compression: {e_rem}") else: - # No compression was done, just rename the original file if downloaded_part_file_path and os.path.exists(downloaded_part_file_path): time.sleep(0.1) os.rename(downloaded_part_file_path, final_save_path) diff --git a/src/ui/main_window.py b/src/ui/main_window.py index 0d5d1c6..ee0acda 100644 --- a/src/ui/main_window.py +++ b/src/ui/main_window.py @@ -4555,44 +4555,51 @@ class DownloaderApp (QWidget ): self .active_retry_futures_map [future ]=job_details self .active_retry_futures .append (future ) - def _execute_single_file_retry (self ,job_details ,common_args ): - """Executes a single file download retry attempt.""" - dummy_post_data ={'id':job_details ['original_post_id_for_log'],'title':job_details ['post_title']} + def _execute_single_file_retry(self, job_details, common_args): + """ + Executes a single file download retry attempt. This function is called by the retry thread pool. + """ + # This worker is temporary and only for this retry task. + # It needs dummy post data to initialize. + dummy_post_data = {'id': job_details['original_post_id_for_log'], 'title': job_details['post_title']} - ppw_init_args ={ - **common_args , - 'post_data':dummy_post_data , - 'service':job_details .get ('service','unknown_service'), - 'user_id':job_details .get ('user_id','unknown_user'), - 'api_url_input':job_details .get ('api_url_input',''), - 'manga_mode_active':job_details .get ('manga_mode_active_for_file',False ), - 'manga_filename_style':job_details .get ('manga_filename_style_for_file',STYLE_POST_TITLE ), - 'scan_content_for_images':common_args .get ('scan_content_for_images',False ), - 'use_cookie':common_args .get ('use_cookie',False ), - 'cookie_text':common_args .get ('cookie_text',""), - 'selected_cookie_file':common_args .get ('selected_cookie_file',None ), - 'app_base_dir':common_args .get ('app_base_dir',None ), + # Reconstruct the post_page_url, which is needed by the download function + service = job_details.get('service', 'unknown_service') + user_id = job_details.get('user_id', 'unknown_user') + post_id = job_details.get('original_post_id_for_log', 'unknown_id') + api_url_input = job_details.get('api_url_input', '') + parsed_api_url = urlparse(api_url_input) + api_domain = parsed_api_url.netloc if parsed_api_url.netloc else self._get_domain_for_service(service) + post_page_url = f"https://{api_domain}/{service}/user/{user_id}/post/{post_id}" + + # Prepare all arguments for the PostProcessorWorker + ppw_init_args = { + **common_args, + 'post_data': dummy_post_data, + 'service': service, + 'user_id': user_id, + 'api_url_input': api_url_input } - worker =PostProcessorWorker (**ppw_init_args ) - dl_count ,skip_count ,filename_saved ,original_kept ,status ,_ =worker ._download_single_file ( - file_info =job_details ['file_info'], - target_folder_path =job_details ['target_folder_path'], - headers =job_details ['headers'], - original_post_id_for_log =job_details ['original_post_id_for_log'], - skip_event =None , - post_title =job_details ['post_title'], - file_index_in_post =job_details ['file_index_in_post'], - num_files_in_this_post =job_details ['num_files_in_this_post'], - forced_filename_override =job_details .get ('forced_filename_override') + worker = PostProcessorWorker(**ppw_init_args) + + # Call the download method with the corrected arguments + dl_count, skip_count, filename_saved, original_kept, status, _ = worker._download_single_file( + file_info=job_details['file_info'], + target_folder_path=job_details['target_folder_path'], + post_page_url=post_page_url, # Using the correct argument + original_post_id_for_log=job_details['original_post_id_for_log'], + skip_event=None, + post_title=job_details['post_title'], + file_index_in_post=job_details['file_index_in_post'], + num_files_in_this_post=job_details['num_files_in_this_post'], + forced_filename_override=job_details.get('forced_filename_override') ) + is_successful_download = (status == FILE_DOWNLOAD_STATUS_SUCCESS) + is_resolved_as_skipped = (status == FILE_DOWNLOAD_STATUS_SKIPPED) - - is_successful_download =(status ==FILE_DOWNLOAD_STATUS_SUCCESS ) - is_resolved_as_skipped =(status ==FILE_DOWNLOAD_STATUS_SKIPPED ) - - return is_successful_download or is_resolved_as_skipped + return is_successful_download or is_resolved_as_skipped def _handle_retry_future_result (self ,future ): self .processed_retry_count +=1