From b191776f6537018412aea4035ec6ee2828b10bc2 Mon Sep 17 00:00:00 2001 From: Yuvi9587 <114073886+Yuvi9587@users.noreply.github.com> Date: Mon, 14 Jul 2025 08:19:58 -0700 Subject: [PATCH] Commit --- src/core/workers.py | 1856 ++++++++++++++++++++--------------------- src/ui/flow_layout.py | 93 +++ src/ui/main_window.py | 1217 ++++++++++----------------- 3 files changed, 1435 insertions(+), 1731 deletions(-) create mode 100644 src/ui/flow_layout.py diff --git a/src/core/workers.py b/src/core/workers.py index bc93876..f2612c9 100644 --- a/src/core/workers.py +++ b/src/core/workers.py @@ -102,6 +102,7 @@ class PostProcessorWorker: keep_in_post_duplicates=False, session_file_path=None, session_lock=None, + processed_ids_to_skip=None, text_only_scope=None, text_export_format='txt', single_pdf_mode=False, @@ -159,6 +160,7 @@ class PostProcessorWorker: self.keep_in_post_duplicates = keep_in_post_duplicates self.session_file_path = session_file_path self.session_lock = session_lock + self.processed_ids_to_skip = processed_ids_to_skip self.text_only_scope = text_only_scope self.text_export_format = text_export_format self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE @@ -370,10 +372,10 @@ class PostProcessorWorker: filename_to_save_in_main_path =cleaned_original_api_filename was_original_name_kept_flag =False - - if self .remove_from_filename_words_list and filename_to_save_in_main_path : - + # Store the name before this specific modification, so we can revert if it gets destroyed. + name_before_word_removal = filename_to_save_in_main_path + base_name_for_removal ,ext_for_removal =os .path .splitext (filename_to_save_in_main_path ) modified_base_name =base_name_for_removal for word_to_remove in self .remove_from_filename_words_list : @@ -383,12 +385,13 @@ class PostProcessorWorker: modified_base_name =re .sub (r'[_.\s-]+',' ',modified_base_name ) modified_base_name =re .sub (r'\s+',' ',modified_base_name ) modified_base_name =modified_base_name .strip () + if modified_base_name and modified_base_name !=ext_for_removal .lstrip ('.'): filename_to_save_in_main_path =modified_base_name +ext_for_removal else : - filename_to_save_in_main_path =base_name_for_removal +ext_for_removal - - + # If the name was stripped to nothing, revert to the name from before this block. + self.logger(f" ⚠️ Filename was empty after removing words. Reverting to '{name_before_word_removal}'.") + filename_to_save_in_main_path = name_before_word_removal if not self .download_thumbnails : @@ -883,874 +886,819 @@ class PostProcessorWorker: if data_to_write_io and hasattr (data_to_write_io ,'close'): data_to_write_io .close () - def process (self ): - if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None, None - if self .check_cancel ():return 0 ,0 ,[],[],[],None, None - current_character_filters =self ._get_current_character_filters () - kept_original_filenames_for_log =[] - retryable_failures_this_post =[] - permanent_failures_this_post =[] - total_downloaded_this_post =0 - total_skipped_this_post =0 - history_data_for_this_post =None + def process(self): + # --- FIX START: This entire method is now wrapped in a try...finally block --- + # to ensure it always reports completion back to the main window. + + # Initialize result values to safe defaults for failure cases. + total_downloaded_this_post = 0 + total_skipped_this_post = 0 + kept_original_filenames_for_log = [] + retryable_failures_this_post = [] + permanent_failures_this_post = [] + history_data_for_this_post = None temp_filepath_for_return = None - parsed_api_url =urlparse (self .api_url_input ) - referer_url =f"https://{parsed_api_url .netloc }/" - headers ={'User-Agent':'Mozilla/5.0','Referer':referer_url ,'Accept':'*/*'} - link_pattern =re .compile (r"""]*>(.*?)""", - re .IGNORECASE |re .DOTALL ) - post_data =self .post - post_title =post_data .get ('title','')or 'untitled_post' - post_id =post_data .get ('id','unknown_id') - post_main_file_info =post_data .get ('file') - post_attachments =post_data .get ('attachments',[]) + try: + post_id_for_skip_check = self.post.get('id') + if self.processed_ids_to_skip and post_id_for_skip_check in self.processed_ids_to_skip: + self.logger(f" -> Skipping Post {post_id_for_skip_check} (already processed in previous session).") + # We must emit 'worker_finished' so the main UI can count this as a completed (skipped) task. + num_potential_files_in_post = len(self.post.get('attachments', [])) + (1 if self.post.get('file') else 0) + total_skipped_this_post = num_potential_files_in_post + # The rest of the result tuple can be empty defaults + result_tuple = (0, total_skipped_this_post, [], [], [], None, None) + self._emit_signal('worker_finished', result_tuple) + return result_tuple - effective_unwanted_keywords_for_folder_naming =self .unwanted_keywords .copy () - is_full_creator_download_no_char_filter =not self .target_post_id_from_initial_url and not current_character_filters - if is_full_creator_download_no_char_filter and self .creator_download_folder_ignore_words : - self .logger (f" Applying creator download specific folder ignore words ({len (self .creator_download_folder_ignore_words )} words).") - effective_unwanted_keywords_for_folder_naming .update (self .creator_download_folder_ignore_words ) + # ALL OF THE ORIGINAL LOGIC OF THE `process` METHOD GOES HERE + if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None, None + if self .check_cancel ():return 0 ,0 ,[],[],[],None, None + current_character_filters =self ._get_current_character_filters () + + parsed_api_url =urlparse (self .api_url_input ) + referer_url =f"https://{parsed_api_url .netloc }/" + headers ={'User-Agent':'Mozilla/5.0','Referer':referer_url ,'Accept':'*/*'} + link_pattern =re .compile (r"""]*>(.*?)""", + re .IGNORECASE |re .DOTALL ) + post_data =self .post + post_title =post_data .get ('title','')or 'untitled_post' + post_id =post_data .get ('id','unknown_id') + post_main_file_info =post_data .get ('file') + post_attachments =post_data .get ('attachments',[]) - post_content_html =post_data .get ('content','') - self .logger (f"\n--- Processing Post {post_id } ('{post_title [:50 ]}...') (Thread: {threading .current_thread ().name }) ---") - num_potential_files_in_post =len (post_attachments or [])+(1 if post_main_file_info and post_main_file_info .get ('path')else 0 ) - post_is_candidate_by_title_char_match =False - char_filter_that_matched_title =None - post_is_candidate_by_comment_char_match =False - post_is_candidate_by_file_char_match_in_comment_scope =False - char_filter_that_matched_file_in_comment_scope =None - char_filter_that_matched_comment =None - if current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH ): - if self ._check_pause (f"Character title filter for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None - for idx ,filter_item_obj in enumerate (current_character_filters ): - if self .check_cancel ():break - terms_to_check_for_title =list (filter_item_obj ["aliases"]) - if filter_item_obj ["is_group"]: - if filter_item_obj ["name"]not in terms_to_check_for_title : - terms_to_check_for_title .append (filter_item_obj ["name"]) - unique_terms_for_title_check =list (set (terms_to_check_for_title )) - for term_to_match in unique_terms_for_title_check : - match_found_for_term =is_title_match_for_character (post_title ,term_to_match ) - if match_found_for_term : - post_is_candidate_by_title_char_match =True - char_filter_that_matched_title =filter_item_obj - self .logger (f" Post title matches char filter term '{term_to_match }' (from group/name '{filter_item_obj ['name']}', Scope: {self .char_filter_scope }). Post is candidate.") - break - if post_is_candidate_by_title_char_match :break - all_files_from_post_api_for_char_check =[] - api_file_domain_for_char_check =urlparse (self .api_url_input ).netloc - if not api_file_domain_for_char_check or not any (d in api_file_domain_for_char_check .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']): - api_file_domain_for_char_check ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party" - if post_main_file_info and isinstance (post_main_file_info ,dict )and post_main_file_info .get ('path'): - original_api_name =post_main_file_info .get ('name')or os .path .basename (post_main_file_info ['path'].lstrip ('/')) - if original_api_name : - all_files_from_post_api_for_char_check .append ({'_original_name_for_log':original_api_name }) - for att_info in post_attachments : - if isinstance (att_info ,dict )and att_info .get ('path'): - original_api_att_name =att_info .get ('name')or os .path .basename (att_info ['path'].lstrip ('/')) - if original_api_att_name : - all_files_from_post_api_for_char_check .append ({'_original_name_for_log':original_api_att_name }) - if current_character_filters and self .char_filter_scope ==CHAR_SCOPE_COMMENTS : - self .logger (f" [Char Scope: Comments] Phase 1: Checking post files for matches before comments for post ID '{post_id }'.") - if self ._check_pause (f"File check (comments scope) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None - for file_info_item in all_files_from_post_api_for_char_check : - if self .check_cancel ():break - current_api_original_filename_for_check =file_info_item .get ('_original_name_for_log') - if not current_api_original_filename_for_check :continue - for filter_item_obj in current_character_filters : - terms_to_check =list (filter_item_obj ["aliases"]) - if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check : - terms_to_check .append (filter_item_obj ["name"]) - for term_to_match in terms_to_check : - if is_filename_match_for_character (current_api_original_filename_for_check ,term_to_match ): - post_is_candidate_by_file_char_match_in_comment_scope =True - char_filter_that_matched_file_in_comment_scope =filter_item_obj - self .logger (f" Match Found (File in Comments Scope): File '{current_api_original_filename_for_check }' matches char filter term '{term_to_match }' (from group/name '{filter_item_obj ['name']}'). Post is candidate.") + effective_unwanted_keywords_for_folder_naming =self .unwanted_keywords .copy () + is_full_creator_download_no_char_filter =not self .target_post_id_from_initial_url and not current_character_filters + if is_full_creator_download_no_char_filter and self .creator_download_folder_ignore_words : + self .logger (f" Applying creator download specific folder ignore words ({len (self .creator_download_folder_ignore_words )} words).") + effective_unwanted_keywords_for_folder_naming .update (self .creator_download_folder_ignore_words ) + + post_content_html =post_data .get ('content','') + self .logger (f"\n--- Processing Post {post_id } ('{post_title [:50 ]}...') (Thread: {threading .current_thread ().name }) ---") + num_potential_files_in_post =len (post_attachments or [])+(1 if post_main_file_info and post_main_file_info .get ('path')else 0 ) + post_is_candidate_by_title_char_match =False + char_filter_that_matched_title =None + post_is_candidate_by_comment_char_match =False + post_is_candidate_by_file_char_match_in_comment_scope =False + char_filter_that_matched_file_in_comment_scope =None + char_filter_that_matched_comment =None + if current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH ): + if self ._check_pause (f"Character title filter for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + for idx ,filter_item_obj in enumerate (current_character_filters ): + if self .check_cancel ():break + terms_to_check_for_title =list (filter_item_obj ["aliases"]) + if filter_item_obj ["is_group"]: + if filter_item_obj ["name"]not in terms_to_check_for_title : + terms_to_check_for_title .append (filter_item_obj ["name"]) + unique_terms_for_title_check =list (set (terms_to_check_for_title )) + for term_to_match in unique_terms_for_title_check : + match_found_for_term =is_title_match_for_character (post_title ,term_to_match ) + if match_found_for_term : + post_is_candidate_by_title_char_match =True + char_filter_that_matched_title =filter_item_obj + self .logger (f" Post title matches char filter term '{term_to_match }' (from group/name '{filter_item_obj ['name']}', Scope: {self .char_filter_scope }). Post is candidate.") break + if post_is_candidate_by_title_char_match :break + all_files_from_post_api_for_char_check =[] + api_file_domain_for_char_check =urlparse (self .api_url_input ).netloc + if not api_file_domain_for_char_check or not any (d in api_file_domain_for_char_check .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']): + api_file_domain_for_char_check ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party" + if post_main_file_info and isinstance (post_main_file_info ,dict )and post_main_file_info .get ('path'): + original_api_name =post_main_file_info .get ('name')or os .path .basename (post_main_file_info ['path'].lstrip ('/')) + if original_api_name : + all_files_from_post_api_for_char_check .append ({'_original_name_for_log':original_api_name }) + for att_info in post_attachments : + if isinstance (att_info ,dict )and att_info .get ('path'): + original_api_att_name =att_info .get ('name')or os .path .basename (att_info ['path'].lstrip ('/')) + if original_api_att_name : + all_files_from_post_api_for_char_check .append ({'_original_name_for_log':original_api_att_name }) + if current_character_filters and self .char_filter_scope ==CHAR_SCOPE_COMMENTS : + self .logger (f" [Char Scope: Comments] Phase 1: Checking post files for matches before comments for post ID '{post_id }'.") + if self ._check_pause (f"File check (comments scope) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + for file_info_item in all_files_from_post_api_for_char_check : + if self .check_cancel ():break + current_api_original_filename_for_check =file_info_item .get ('_original_name_for_log') + if not current_api_original_filename_for_check :continue + for filter_item_obj in current_character_filters : + terms_to_check =list (filter_item_obj ["aliases"]) + if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check : + terms_to_check .append (filter_item_obj ["name"]) + for term_to_match in terms_to_check : + if is_filename_match_for_character (current_api_original_filename_for_check ,term_to_match ): + post_is_candidate_by_file_char_match_in_comment_scope =True + char_filter_that_matched_file_in_comment_scope =filter_item_obj + self .logger (f" Match Found (File in Comments Scope): File '{current_api_original_filename_for_check }' matches char filter term '{term_to_match }' (from group/name '{filter_item_obj ['name']}'). Post is candidate.") + break + if post_is_candidate_by_file_char_match_in_comment_scope :break if post_is_candidate_by_file_char_match_in_comment_scope :break - if post_is_candidate_by_file_char_match_in_comment_scope :break - self .logger (f" [Char Scope: Comments] Phase 1 Result: post_is_candidate_by_file_char_match_in_comment_scope = {post_is_candidate_by_file_char_match_in_comment_scope }") - if current_character_filters and self .char_filter_scope ==CHAR_SCOPE_COMMENTS : - if not post_is_candidate_by_file_char_match_in_comment_scope : - if self ._check_pause (f"Comment check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None - self .logger (f" [Char Scope: Comments] Phase 2: No file match found. Checking post comments for post ID '{post_id }'.") - try : - parsed_input_url_for_comments =urlparse (self .api_url_input ) - api_domain_for_comments =parsed_input_url_for_comments .netloc - if not any (d in api_domain_for_comments .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']): - self .logger (f"⚠️ Unrecognized domain '{api_domain_for_comments }' for comment API. Defaulting based on service.") - api_domain_for_comments ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party" - comments_data =fetch_post_comments ( - api_domain_for_comments ,self .service ,self .user_id ,post_id , - headers ,self .logger ,self .cancellation_event ,self .pause_event , - cookies_dict =prepare_cookies_for_request ( - self .use_cookie ,self .cookie_text ,self .selected_cookie_file ,self .app_base_dir ,self .logger - ) - ) - if comments_data : - self .logger (f" Fetched {len (comments_data )} comments for post {post_id }.") - for comment_item_idx ,comment_item in enumerate (comments_data ): - if self .check_cancel ():break - raw_comment_content =comment_item .get ('content','') - if not raw_comment_content :continue - cleaned_comment_text =strip_html_tags (raw_comment_content ) - if not cleaned_comment_text .strip ():continue - for filter_item_obj in current_character_filters : - terms_to_check_comment =list (filter_item_obj ["aliases"]) - if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_comment : - terms_to_check_comment .append (filter_item_obj ["name"]) - for term_to_match_comment in terms_to_check_comment : - if is_title_match_for_character (cleaned_comment_text ,term_to_match_comment ): - post_is_candidate_by_comment_char_match =True - char_filter_that_matched_comment =filter_item_obj - self .logger (f" Match Found (Comment in Comments Scope): Comment in post {post_id } matches char filter term '{term_to_match_comment }' (from group/name '{filter_item_obj ['name']}'). Post is candidate.") - self .logger (f" Matching comment (first 100 chars): '{cleaned_comment_text [:100 ]}...'") - break + self .logger (f" [Char Scope: Comments] Phase 1 Result: post_is_candidate_by_file_char_match_in_comment_scope = {post_is_candidate_by_file_char_match_in_comment_scope }") + if current_character_filters and self .char_filter_scope ==CHAR_SCOPE_COMMENTS : + if not post_is_candidate_by_file_char_match_in_comment_scope : + if self ._check_pause (f"Comment check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + self .logger (f" [Char Scope: Comments] Phase 2: No file match found. Checking post comments for post ID '{post_id }'.") + try : + parsed_input_url_for_comments =urlparse (self .api_url_input ) + api_domain_for_comments =parsed_input_url_for_comments .netloc + if not any (d in api_domain_for_comments .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']): + self .logger (f"⚠️ Unrecognized domain '{api_domain_for_comments }' for comment API. Defaulting based on service.") + api_domain_for_comments ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party" + comments_data =fetch_post_comments ( + api_domain_for_comments ,self .service ,self .user_id ,post_id , + headers ,self .logger ,self .cancellation_event ,self .pause_event , + cookies_dict =prepare_cookies_for_request ( + self .use_cookie ,self .cookie_text ,self .selected_cookie_file ,self .app_base_dir ,self .logger + ) + ) + if comments_data : + self .logger (f" Fetched {len (comments_data )} comments for post {post_id }.") + for comment_item_idx ,comment_item in enumerate (comments_data ): + if self .check_cancel ():break + raw_comment_content =comment_item .get ('content','') + if not raw_comment_content :continue + cleaned_comment_text =strip_html_tags (raw_comment_content ) + if not cleaned_comment_text .strip ():continue + for filter_item_obj in current_character_filters : + terms_to_check_comment =list (filter_item_obj ["aliases"]) + if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_comment : + terms_to_check_comment .append (filter_item_obj ["name"]) + for term_to_match_comment in terms_to_check_comment : + if is_title_match_for_character (cleaned_comment_text ,term_to_match_comment ): + post_is_candidate_by_comment_char_match =True + char_filter_that_matched_comment =filter_item_obj + self .logger (f" Match Found (Comment in Comments Scope): Comment in post {post_id } matches char filter term '{term_to_match_comment }' (from group/name '{filter_item_obj ['name']}'). Post is candidate.") + self .logger (f" Matching comment (first 100 chars): '{cleaned_comment_text [:100 ]}...'") + break + if post_is_candidate_by_comment_char_match :break if post_is_candidate_by_comment_char_match :break - if post_is_candidate_by_comment_char_match :break - else : - self .logger (f" No comments found or fetched for post {post_id } to check against character filters.") - except RuntimeError as e_fetch_comment : - self .logger (f" ⚠️ Error fetching or processing comments for post {post_id }: {e_fetch_comment }") - except Exception as e_generic_comment : - self .logger (f" ❌ Unexpected error during comment processing for post {post_id }: {e_generic_comment }\n{traceback .format_exc (limit =2 )}") - self .logger (f" [Char Scope: Comments] Phase 2 Result: post_is_candidate_by_comment_char_match = {post_is_candidate_by_comment_char_match }") - else : - self .logger (f" [Char Scope: Comments] Phase 2: Skipped comment check for post ID '{post_id }' because a file match already made it a candidate.") - if current_character_filters : - if self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match : - self .logger (f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title [:50 ]}' does not match character filters.") - self ._emit_signal ('missed_character_post',post_title ,"No title match for character filter") - return 0 ,num_potential_files_in_post ,[],[],[],None, None - if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match : - self .logger (f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id }', Title '{post_title [:50 ]}...'") - if self .emitter and hasattr (self .emitter ,'missed_character_post_signal'): - self ._emit_signal ('missed_character_post',post_title ,"No character match in files or comments (Comments scope)") - return 0 ,num_potential_files_in_post ,[],[],[],None, None - if self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH ): - if self ._check_pause (f"Skip words (post title) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None - post_title_lower =post_title .lower () - for skip_word in self .skip_words_list : - if skip_word .lower ()in post_title_lower : - self .logger (f" -> Skip Post (Keyword in Title '{skip_word }'): '{post_title [:50 ]}...'. Scope: {self .skip_words_scope }") - return 0 ,num_potential_files_in_post ,[],[],[],None, None - if not self .extract_links_only and self .manga_mode_active and current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and not post_is_candidate_by_title_char_match : - self .logger (f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title [:50 ]}' doesn't match filters.") - self ._emit_signal ('missed_character_post',post_title ,"Manga Mode: No title match for character filter (Title/Both scope)") - return 0 ,num_potential_files_in_post ,[],[],[],None, None - if not isinstance (post_attachments ,list ): - self .logger (f"⚠️ Corrupt attachment data for post {post_id } (expected list, got {type (post_attachments )}). Skipping attachments.") - post_attachments =[] - base_folder_names_for_post_content =[] - determined_post_save_path_for_history =self .override_output_dir if self .override_output_dir else self .download_root - if not self .extract_links_only and self .use_subfolders : - if self ._check_pause (f"Subfolder determination for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None - primary_char_filter_for_folder =None - log_reason_for_folder ="" - if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment : - if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope : - primary_char_filter_for_folder =char_filter_that_matched_file_in_comment_scope - log_reason_for_folder ="Matched char filter in filename (Comments scope)" - elif post_is_candidate_by_comment_char_match and char_filter_that_matched_comment : - primary_char_filter_for_folder =char_filter_that_matched_comment - log_reason_for_folder ="Matched char filter in comments (Comments scope, no file match)" - elif (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and char_filter_that_matched_title : - primary_char_filter_for_folder =char_filter_that_matched_title - log_reason_for_folder ="Matched char filter in title" - if primary_char_filter_for_folder : - base_folder_names_for_post_content =[clean_folder_name (primary_char_filter_for_folder ["name"])] - cleaned_primary_folder_name =clean_folder_name (primary_char_filter_for_folder ["name"]) - if cleaned_primary_folder_name .lower ()in effective_unwanted_keywords_for_folder_naming and cleaned_primary_folder_name .lower ()!="untitled_folder": - self .logger (f" ⚠️ Primary char filter folder name '{cleaned_primary_folder_name }' is in ignore list. Using generic name.") - base_folder_names_for_post_content =["Generic Post Content"] - else : - base_folder_names_for_post_content =[cleaned_primary_folder_name ] - self .logger (f" Base folder name(s) for post content ({log_reason_for_folder }): {', '.join (base_folder_names_for_post_content )}") - elif not current_character_filters : - - derived_folders_from_title_via_known_txt =match_folders_from_title ( - post_title , - self .known_names , - effective_unwanted_keywords_for_folder_naming - ) - - valid_derived_folders_from_title_known_txt =[ - name for name in derived_folders_from_title_via_known_txt - if name and name .strip ()and name .lower ()!="untitled_folder" - ] - - if valid_derived_folders_from_title_known_txt : - base_folder_names_for_post_content .extend (valid_derived_folders_from_title_known_txt ) - self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Post Title): {', '.join (base_folder_names_for_post_content )}") - else : - - - - - candidate_name_from_title_basic_clean =extract_folder_name_from_title ( - post_title , - FOLDER_NAME_STOP_WORDS - ) - - title_is_only_creator_ignored_words =False - if candidate_name_from_title_basic_clean and candidate_name_from_title_basic_clean .lower ()!="untitled_folder"and self .creator_download_folder_ignore_words : - - candidate_title_words ={word .lower ()for word in candidate_name_from_title_basic_clean .split ()} - if candidate_title_words and candidate_title_words .issubset (self .creator_download_folder_ignore_words ): - title_is_only_creator_ignored_words =True - self .logger (f" Title-derived name '{candidate_name_from_title_basic_clean }' consists only of creator-specific ignore words.") - - if title_is_only_creator_ignored_words : - - self .logger (f" Attempting Known.txt match on filenames as title was poor ('{candidate_name_from_title_basic_clean }').") - - filenames_to_check =[ - f_info ['_original_name_for_log']for f_info in all_files_from_post_api_for_char_check - if f_info .get ('_original_name_for_log') - ] - - derived_folders_from_filenames_known_txt =set () - if filenames_to_check : - for fname in filenames_to_check : - matches =match_folders_from_title ( - fname , - self .known_names , - effective_unwanted_keywords_for_folder_naming - ) - for m in matches : - if m and m .strip ()and m .lower ()!="untitled_folder": - derived_folders_from_filenames_known_txt .add (m ) - - if derived_folders_from_filenames_known_txt : - base_folder_names_for_post_content .extend (list (derived_folders_from_filenames_known_txt )) - self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Filenames): {', '.join (base_folder_names_for_post_content )}") else : - final_title_extract =extract_folder_name_from_title ( + self .logger (f" No comments found or fetched for post {post_id } to check against character filters.") + except RuntimeError as e_fetch_comment : + self .logger (f" ⚠️ Error fetching or processing comments for post {post_id }: {e_fetch_comment }") + except Exception as e_generic_comment : + self .logger (f" ❌ Unexpected error during comment processing for post {post_id }: {e_generic_comment }\n{traceback .format_exc (limit =2 )}") + self .logger (f" [Char Scope: Comments] Phase 2 Result: post_is_candidate_by_comment_char_match = {post_is_candidate_by_comment_char_match }") + else : + self .logger (f" [Char Scope: Comments] Phase 2: Skipped comment check for post ID '{post_id }' because a file match already made it a candidate.") + if current_character_filters : + if self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match : + self .logger (f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title [:50 ]}' does not match character filters.") + self ._emit_signal ('missed_character_post',post_title ,"No title match for character filter") + return 0 ,num_potential_files_in_post ,[],[],[],None, None + if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match : + self .logger (f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id }', Title '{post_title [:50 ]}...'") + if self .emitter and hasattr (self .emitter ,'missed_character_post_signal'): + self ._emit_signal ('missed_character_post',post_title ,"No character match in files or comments (Comments scope)") + return 0 ,num_potential_files_in_post ,[],[],[],None, None + if self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH ): + if self ._check_pause (f"Skip words (post title) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + post_title_lower =post_title .lower () + for skip_word in self .skip_words_list : + if skip_word .lower ()in post_title_lower : + self .logger (f" -> Skip Post (Keyword in Title '{skip_word }'): '{post_title [:50 ]}...'. Scope: {self .skip_words_scope }") + return 0 ,num_potential_files_in_post ,[],[],[],None, None + if not self .extract_links_only and self .manga_mode_active and current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and not post_is_candidate_by_title_char_match : + self .logger (f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title [:50 ]}' doesn't match filters.") + self ._emit_signal ('missed_character_post',post_title ,"Manga Mode: No title match for character filter (Title/Both scope)") + return 0 ,num_potential_files_in_post ,[],[],[],None, None + if not isinstance (post_attachments ,list ): + self .logger (f"⚠️ Corrupt attachment data for post {post_id } (expected list, got {type (post_attachments )}). Skipping attachments.") + post_attachments =[] + base_folder_names_for_post_content =[] + determined_post_save_path_for_history =self .override_output_dir if self .override_output_dir else self .download_root + if not self .extract_links_only and self .use_subfolders : + if self ._check_pause (f"Subfolder determination for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + primary_char_filter_for_folder =None + log_reason_for_folder ="" + if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment : + if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope : + primary_char_filter_for_folder =char_filter_that_matched_file_in_comment_scope + log_reason_for_folder ="Matched char filter in filename (Comments scope)" + elif post_is_candidate_by_comment_char_match and char_filter_that_matched_comment : + primary_char_filter_for_folder =char_filter_that_matched_comment + log_reason_for_folder ="Matched char filter in comments (Comments scope, no file match)" + elif (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and char_filter_that_matched_title : + primary_char_filter_for_folder =char_filter_that_matched_title + log_reason_for_folder ="Matched char filter in title" + if primary_char_filter_for_folder : + base_folder_names_for_post_content =[clean_folder_name (primary_char_filter_for_folder ["name"])] + cleaned_primary_folder_name =clean_folder_name (primary_char_filter_for_folder ["name"]) + if cleaned_primary_folder_name .lower ()in effective_unwanted_keywords_for_folder_naming and cleaned_primary_folder_name .lower ()!="untitled_folder": + self .logger (f" ⚠️ Primary char filter folder name '{cleaned_primary_folder_name }' is in ignore list. Using generic name.") + base_folder_names_for_post_content =["Generic Post Content"] + else : + base_folder_names_for_post_content =[cleaned_primary_folder_name ] + self .logger (f" Base folder name(s) for post content ({log_reason_for_folder }): {', '.join (base_folder_names_for_post_content )}") + elif not current_character_filters : + derived_folders_from_title_via_known_txt =match_folders_from_title ( + post_title , + self .known_names , + effective_unwanted_keywords_for_folder_naming + ) + valid_derived_folders_from_title_known_txt =[ + name for name in derived_folders_from_title_via_known_txt + if name and name .strip ()and name .lower ()!="untitled_folder" + ] + if valid_derived_folders_from_title_known_txt : + base_folder_names_for_post_content .extend (valid_derived_folders_from_title_known_txt ) + self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Post Title): {', '.join (base_folder_names_for_post_content )}") + else : + candidate_name_from_title_basic_clean =extract_folder_name_from_title ( + post_title , + FOLDER_NAME_STOP_WORDS + ) + title_is_only_creator_ignored_words =False + if candidate_name_from_title_basic_clean and candidate_name_from_title_basic_clean .lower ()!="untitled_folder"and self .creator_download_folder_ignore_words : + candidate_title_words ={word .lower ()for word in candidate_name_from_title_basic_clean .split ()} + if candidate_title_words and candidate_title_words .issubset (self .creator_download_folder_ignore_words ): + title_is_only_creator_ignored_words =True + self .logger (f" Title-derived name '{candidate_name_from_title_basic_clean }' consists only of creator-specific ignore words.") + if title_is_only_creator_ignored_words : + self .logger (f" Attempting Known.txt match on filenames as title was poor ('{candidate_name_from_title_basic_clean }').") + filenames_to_check =[ + f_info ['_original_name_for_log']for f_info in all_files_from_post_api_for_char_check + if f_info .get ('_original_name_for_log') + ] + derived_folders_from_filenames_known_txt =set () + if filenames_to_check : + for fname in filenames_to_check : + matches =match_folders_from_title ( + fname , + self .known_names , + effective_unwanted_keywords_for_folder_naming + ) + for m in matches : + if m and m .strip ()and m .lower ()!="untitled_folder": + derived_folders_from_filenames_known_txt .add (m ) + if derived_folders_from_filenames_known_txt : + base_folder_names_for_post_content .extend (list (derived_folders_from_filenames_known_txt )) + self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Filenames): {', '.join (base_folder_names_for_post_content )}") + else : + final_title_extract =extract_folder_name_from_title ( + post_title ,effective_unwanted_keywords_for_folder_naming + ) + base_folder_names_for_post_content .append (final_title_extract ) + self .logger (f" No Known.txt match from filenames. Using title-derived name (with full ignore list): '{final_title_extract }'") + else : + extracted_name_from_title_full_ignore =extract_folder_name_from_title ( post_title ,effective_unwanted_keywords_for_folder_naming ) - base_folder_names_for_post_content .append (final_title_extract ) - self .logger (f" No Known.txt match from filenames. Using title-derived name (with full ignore list): '{final_title_extract }'") - else : - extracted_name_from_title_full_ignore =extract_folder_name_from_title ( - post_title ,effective_unwanted_keywords_for_folder_naming - ) - base_folder_names_for_post_content .append (extracted_name_from_title_full_ignore ) - self .logger (f" Base folder name(s) for post content (Generic title parsing - title not solely creator-ignored words): {', '.join (base_folder_names_for_post_content )}") - - base_folder_names_for_post_content =[ - name for name in base_folder_names_for_post_content if name and name .strip () - ] - if not base_folder_names_for_post_content : - final_fallback_name =clean_folder_name (post_title if post_title and post_title .strip ()else "Generic Post Content") - base_folder_names_for_post_content =[final_fallback_name ] - self .logger (f" Ultimate fallback folder name: {final_fallback_name }") - - if base_folder_names_for_post_content : - determined_post_save_path_for_history =os .path .join (determined_post_save_path_for_history ,base_folder_names_for_post_content [0 ]) - - if not self .extract_links_only and self .use_post_subfolders : - cleaned_post_title_for_sub =clean_folder_name (post_title ) - post_id_for_fallback =self .post .get ('id','unknown_id') - - - if not cleaned_post_title_for_sub or cleaned_post_title_for_sub =="untitled_folder": - self .logger (f" ⚠️ Post title '{post_title }' resulted in a generic subfolder name. Using 'post_{post_id_for_fallback }' as base.") - original_cleaned_post_title_for_sub =f"post_{post_id_for_fallback }" - else : - original_cleaned_post_title_for_sub =cleaned_post_title_for_sub - - if self.use_date_prefix_for_subfolder: - # Prioritize 'published' date, fall back to 'added' date - published_date_str = self.post.get('published') or self.post.get('added') - if published_date_str: - try: - # Extract just the date part (YYYY-MM-DD) - date_prefix = published_date_str.split('T')[0] - # Prepend the date to the folder name - original_cleaned_post_title_for_sub = f"{date_prefix} {original_cleaned_post_title_for_sub}" - self.logger(f" ℹ️ Applying date prefix to subfolder: '{original_cleaned_post_title_for_sub}'") - except Exception as e: - self.logger(f" ⚠️ Could not parse date '{published_date_str}' for prefix. Using original name. Error: {e}") - else: - self.logger(" ⚠️ 'Date Prefix' is checked, but post has no 'published' or 'added' date. Omitting prefix.") - - base_path_for_post_subfolder =determined_post_save_path_for_history - - suffix_counter =0 - final_post_subfolder_name ="" - - while True : - if suffix_counter ==0 : - name_candidate =original_cleaned_post_title_for_sub + base_folder_names_for_post_content .append (extracted_name_from_title_full_ignore ) + self .logger (f" Base folder name(s) for post content (Generic title parsing - title not solely creator-ignored words): {', '.join (base_folder_names_for_post_content )}") + base_folder_names_for_post_content =[ + name for name in base_folder_names_for_post_content if name and name .strip () + ] + if not base_folder_names_for_post_content : + final_fallback_name =clean_folder_name (post_title if post_title and post_title .strip ()else "Generic Post Content") + base_folder_names_for_post_content =[final_fallback_name ] + self .logger (f" Ultimate fallback folder name: {final_fallback_name }") + if base_folder_names_for_post_content : + determined_post_save_path_for_history =os .path .join (determined_post_save_path_for_history ,base_folder_names_for_post_content [0 ]) + if not self .extract_links_only and self .use_post_subfolders : + cleaned_post_title_for_sub =clean_folder_name (post_title ) + post_id_for_fallback =self .post .get ('id','unknown_id') + if not cleaned_post_title_for_sub or cleaned_post_title_for_sub =="untitled_folder": + self .logger (f" ⚠️ Post title '{post_title }' resulted in a generic subfolder name. Using 'post_{post_id_for_fallback }' as base.") + original_cleaned_post_title_for_sub =f"post_{post_id_for_fallback }" else : - name_candidate =f"{original_cleaned_post_title_for_sub }_{suffix_counter }" - - potential_post_subfolder_path =os .path .join (base_path_for_post_subfolder ,name_candidate ) - - try : - os .makedirs (potential_post_subfolder_path ,exist_ok =False ) - final_post_subfolder_name =name_candidate - if suffix_counter >0 : - self .logger (f" Post subfolder name conflict: Using '{final_post_subfolder_name }' instead of '{original_cleaned_post_title_for_sub }' to avoid mixing posts.") - break - except FileExistsError : - suffix_counter +=1 - if suffix_counter >100 : - self .logger (f" ⚠️ Exceeded 100 attempts to find unique subfolder name for '{original_cleaned_post_title_for_sub }'. Using UUID.") - final_post_subfolder_name =f"{original_cleaned_post_title_for_sub }_{uuid .uuid4 ().hex [:8 ]}" - os .makedirs (os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name ),exist_ok =True ) + original_cleaned_post_title_for_sub =cleaned_post_title_for_sub + if self.use_date_prefix_for_subfolder: + published_date_str = self.post.get('published') or self.post.get('added') + if published_date_str: + try: + date_prefix = published_date_str.split('T')[0] + original_cleaned_post_title_for_sub = f"{date_prefix} {original_cleaned_post_title_for_sub}" + self.logger(f" ℹ️ Applying date prefix to subfolder: '{original_cleaned_post_title_for_sub}'") + except Exception as e: + self.logger(f" ⚠️ Could not parse date '{published_date_str}' for prefix. Using original name. Error: {e}") + else: + self.logger(" ⚠️ 'Date Prefix' is checked, but post has no 'published' or 'added' date. Omitting prefix.") + base_path_for_post_subfolder =determined_post_save_path_for_history + suffix_counter =0 + final_post_subfolder_name ="" + while True : + if suffix_counter ==0 : + name_candidate =original_cleaned_post_title_for_sub + else : + name_candidate =f"{original_cleaned_post_title_for_sub }_{suffix_counter }" + potential_post_subfolder_path =os .path .join (base_path_for_post_subfolder ,name_candidate ) + try : + os .makedirs (potential_post_subfolder_path ,exist_ok =False ) + final_post_subfolder_name =name_candidate + if suffix_counter >0 : + self .logger (f" Post subfolder name conflict: Using '{final_post_subfolder_name }' instead of '{original_cleaned_post_title_for_sub }' to avoid mixing posts.") break - except OSError as e_mkdir : - self .logger (f" ❌ Error creating directory '{potential_post_subfolder_path }': {e_mkdir }. Files for this post might be saved in parent or fail.") - final_post_subfolder_name =original_cleaned_post_title_for_sub - break + except FileExistsError : + suffix_counter +=1 + if suffix_counter >100 : + self .logger (f" ⚠️ Exceeded 100 attempts to find unique subfolder name for '{original_cleaned_post_title_for_sub }'. Using UUID.") + final_post_subfolder_name =f"{original_cleaned_post_title_for_sub }_{uuid .uuid4 ().hex [:8 ]}" + os .makedirs (os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name ),exist_ok =True ) + break + except OSError as e_mkdir : + self .logger (f" ❌ Error creating directory '{potential_post_subfolder_path }': {e_mkdir }. Files for this post might be saved in parent or fail.") + final_post_subfolder_name =original_cleaned_post_title_for_sub + break + determined_post_save_path_for_history =os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name ) - determined_post_save_path_for_history =os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name ) - if self.filter_mode == 'text_only' and not self.extract_links_only: - self.logger(f" Mode: Text Only (Scope: {self.text_only_scope})") - - # --- Apply Title-based filters to ensure post is a candidate --- - post_title_lower = post_title.lower() - if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH): - for skip_word in self.skip_words_list: - if skip_word.lower() in post_title_lower: - self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'.") - return 0, num_potential_files_in_post, [], [], [], None, None - - if current_character_filters and not post_is_candidate_by_title_char_match and not post_is_candidate_by_comment_char_match and not post_is_candidate_by_file_char_match_in_comment_scope: - self.logger(f" -> Skip Post (No character match for text extraction): '{post_title[:50]}...'.") - return 0, num_potential_files_in_post, [], [], [], None, None - - # --- Get the text content based on scope --- - raw_text_content = "" - final_post_data = post_data - - # Fetch full post data if content is missing and scope is 'content' - if self.text_only_scope == 'content' and 'content' not in final_post_data: - self.logger(f" Post {post_id} is missing 'content' field, fetching full data...") - parsed_url = urlparse(self.api_url_input) - api_domain = parsed_url.netloc - cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain) - - from .api_client import fetch_single_post_data # Local import to avoid circular dependency issues - full_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies) - if full_data: - final_post_data = full_data - - if self.text_only_scope == 'content': - raw_text_content = final_post_data.get('content', '') - elif self.text_only_scope == 'comments': - try: + if self.filter_mode == 'text_only' and not self.extract_links_only: + self.logger(f" Mode: Text Only (Scope: {self.text_only_scope})") + post_title_lower = post_title.lower() + if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH): + for skip_word in self.skip_words_list: + if skip_word.lower() in post_title_lower: + self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'.") + return 0, num_potential_files_in_post, [], [], [], None, None + if current_character_filters and not post_is_candidate_by_title_char_match and not post_is_candidate_by_comment_char_match and not post_is_candidate_by_file_char_match_in_comment_scope: + self.logger(f" -> Skip Post (No character match for text extraction): '{post_title[:50]}...'.") + return 0, num_potential_files_in_post, [], [], [], None, None + raw_text_content = "" + final_post_data = post_data + if self.text_only_scope == 'content' and 'content' not in final_post_data: + self.logger(f" Post {post_id} is missing 'content' field, fetching full data...") parsed_url = urlparse(self.api_url_input) api_domain = parsed_url.netloc - comments_data = fetch_post_comments(api_domain, self.service, self.user_id, post_id, headers, self.logger, self.cancellation_event, self.pause_event) - if comments_data: - comment_texts = [] - for comment in comments_data: - user = comment.get('user', {}).get('name', 'Unknown User') - timestamp = comment.get('updated', 'No Date') - body = strip_html_tags(comment.get('content', '')) - comment_texts.append(f"--- Comment by {user} on {timestamp} ---\n{body}\n") - raw_text_content = "\n".join(comment_texts) - except Exception as e: - self.logger(f" ❌ Error fetching comments for text-only mode: {e}") - - if not raw_text_content or not raw_text_content.strip(): - self.logger(" -> Skip Saving Text: No content/comments found or fetched.") - return 0, num_potential_files_in_post, [], [], [], None, None - - # --- Robust HTML-to-TEXT Conversion --- - paragraph_pattern = re.compile(r'(.*?)

', re.IGNORECASE | re.DOTALL) - html_paragraphs = paragraph_pattern.findall(raw_text_content) - cleaned_text = "" - if not html_paragraphs: - self.logger(" ⚠️ No

tags found. Falling back to basic HTML cleaning for the whole block.") - text_with_br = re.sub(r'', '\n', raw_text_content, flags=re.IGNORECASE) - cleaned_text = re.sub(r'<.*?>', '', text_with_br) - else: - cleaned_paragraphs_list = [] - for p_content in html_paragraphs: - p_with_br = re.sub(r'', '\n', p_content, flags=re.IGNORECASE) - p_cleaned = re.sub(r'<.*?>', '', p_with_br) - p_final = html.unescape(p_cleaned).strip() - if p_final: - cleaned_paragraphs_list.append(p_final) - cleaned_text = '\n\n'.join(cleaned_paragraphs_list) - cleaned_text = cleaned_text.replace('…', '...') - - # --- Logic for Single PDF Mode (File-based) --- - if self.single_pdf_mode: - if not cleaned_text: - return 0, 0, [], [], [], None, None - - content_data = { - 'title': post_title, - 'content': cleaned_text, - 'published': self.post.get('published') or self.post.get('added') - } - temp_dir = os.path.join(self.app_base_dir, "appdata") - os.makedirs(temp_dir, exist_ok=True) - temp_filename = f"tmp_{post_id}_{uuid.uuid4().hex[:8]}.json" - temp_filepath = os.path.join(temp_dir, temp_filename) - - try: - with open(temp_filepath, 'w', encoding='utf-8') as f: - json.dump(content_data, f, indent=2) - self.logger(f" Saved temporary text for '{post_title}' for single PDF compilation.") - self._emit_signal('worker_finished', (0, 0, [], [], [], None, temp_filepath)) # <--- CHANGE THIS - return (0, 0, [], [], [], None, temp_filepath) - except Exception as e: - self.logger(f" ❌ Failed to write temporary file for single PDF: {e}") - self._emit_signal('worker_finished', (0, 0, [], [], [], [], None)) - return (0, 0, [], [], [], [], None) - - # --- Logic for Individual File Saving --- - else: - file_extension = self.text_export_format - txt_filename = clean_filename(post_title) + f".{file_extension}" - final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename) - - try: - os.makedirs(determined_post_save_path_for_history, exist_ok=True) - base, ext = os.path.splitext(final_save_path) - counter = 1 - while os.path.exists(final_save_path): - final_save_path = f"{base}_{counter}{ext}" - counter += 1 - - if file_extension == 'pdf': - if FPDF: - self.logger(f" Converting to PDF...") - pdf = PDF() - font_path = "" - if self.project_root_dir: - font_path = os.path.join(self.project_root_dir, 'data', 'dejavu-sans', 'DejaVuSans.ttf') - try: - if not os.path.exists(font_path): raise RuntimeError(f"Font file not found: {font_path}") - pdf.add_font('DejaVu', '', font_path, uni=True) - pdf.set_font('DejaVu', '', 12) - except Exception as font_error: - self.logger(f" ⚠️ Could not load DejaVu font: {font_error}. Falling back to Arial.") - pdf.set_font('Arial', '', 12) - pdf.add_page() - pdf.multi_cell(0, 5, cleaned_text) - pdf.output(final_save_path) - else: - self.logger(f" ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.") - final_save_path = os.path.splitext(final_save_path)[0] + ".txt" - with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text) - - elif file_extension == 'docx': - if Document: - self.logger(f" Converting to DOCX...") - document = Document() - document.add_paragraph(cleaned_text) - document.save(final_save_path) - else: - self.logger(f" ⚠️ Cannot create DOCX: 'python-docx' library not installed. Saving as .txt.") - final_save_path = os.path.splitext(final_save_path)[0] + ".txt" - with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text) - - else: # Default to TXT - with open(final_save_path, 'w', encoding='utf-8') as f: - f.write(cleaned_text) - - self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'") - return 1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None - except Exception as e: - self.logger(f" ❌ Critical error saving text file '{txt_filename}': {e}") + cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain) + from .api_client import fetch_single_post_data + full_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies) + if full_data: + final_post_data = full_data + if self.text_only_scope == 'content': + raw_text_content = final_post_data.get('content', '') + elif self.text_only_scope == 'comments': + try: + parsed_url = urlparse(self.api_url_input) + api_domain = parsed_url.netloc + comments_data = fetch_post_comments(api_domain, self.service, self.user_id, post_id, headers, self.logger, self.cancellation_event, self.pause_event) + if comments_data: + comment_texts = [] + for comment in comments_data: + user = comment.get('user', {}).get('name', 'Unknown User') + timestamp = comment.get('updated', 'No Date') + body = strip_html_tags(comment.get('content', '')) + comment_texts.append(f"--- Comment by {user} on {timestamp} ---\n{body}\n") + raw_text_content = "\n".join(comment_texts) + except Exception as e: + self.logger(f" ❌ Error fetching comments for text-only mode: {e}") + if not raw_text_content or not raw_text_content.strip(): + self.logger(" -> Skip Saving Text: No content/comments found or fetched.") return 0, num_potential_files_in_post, [], [], [], None, None - - if not self .extract_links_only and self .use_subfolders and self .skip_words_list : - if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None - for folder_name_to_check in base_folder_names_for_post_content : - if not folder_name_to_check :continue - if any (skip_word .lower ()in folder_name_to_check .lower ()for skip_word in self .skip_words_list ): - matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word") - self .logger (f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.") - return 0 ,num_potential_files_in_post ,[],[],[],None, None - if (self .show_external_links or self .extract_links_only )and post_content_html : - if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None - try : - mega_key_pattern =re .compile (r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b') - unique_links_data ={} - for match in link_pattern .finditer (post_content_html ): - link_url =match .group (1 ).strip () - link_url =html .unescape (link_url ) - link_inner_text =match .group (2 ) - if not any (ext in link_url .lower ()for ext in ['.css','.js','.ico','.xml','.svg'])and not link_url .startswith ('javascript:')and link_url not in unique_links_data : - clean_link_text =re .sub (r'<.*?>','',link_inner_text ) - clean_link_text =html .unescape (clean_link_text ).strip () - display_text =clean_link_text if clean_link_text else "[Link]" - unique_links_data [link_url ]=display_text - links_emitted_count =0 - scraped_platforms ={'kemono','coomer','patreon'} - for link_url ,link_text in unique_links_data .items (): - platform =get_link_platform (link_url ) - decryption_key_found ="" - if platform =='mega': - parsed_mega_url =urlparse (link_url ) - if parsed_mega_url .fragment : - potential_key_from_fragment =parsed_mega_url .fragment .split ('!')[-1 ] - if mega_key_pattern .fullmatch (potential_key_from_fragment ): - decryption_key_found =potential_key_from_fragment - - if not decryption_key_found and link_text : - key_match_in_text =mega_key_pattern .search (link_text ) - if key_match_in_text : - decryption_key_found =key_match_in_text .group (1 ) - if not decryption_key_found and self .extract_links_only and post_content_html : - key_match_in_content =mega_key_pattern .search (strip_html_tags (post_content_html )) - if key_match_in_content : - decryption_key_found =key_match_in_content .group (1 ) - if platform not in scraped_platforms : - self ._emit_signal ('external_link',post_title ,link_text ,link_url ,platform ,decryption_key_found or "") - links_emitted_count +=1 - if links_emitted_count >0 :self .logger (f" 🔗 Found {links_emitted_count } potential external link(s) in post content.") - except Exception as e :self .logger (f"⚠️ Error parsing post content for links: {e }\n{traceback .format_exc (limit =2 )}") - if self .extract_links_only : - self .logger (f" Extract Links Only mode: Finished processing post {post_id } for links.") - return 0 ,0 ,[],[],[],None - all_files_from_post_api =[] - api_file_domain =urlparse (self .api_url_input ).netloc - if not api_file_domain or not any (d in api_file_domain .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']): - api_file_domain ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party" - if post_main_file_info and isinstance (post_main_file_info ,dict )and post_main_file_info .get ('path'): - file_path =post_main_file_info ['path'].lstrip ('/') - original_api_name =post_main_file_info .get ('name')or os .path .basename (file_path ) - if original_api_name : - all_files_from_post_api .append ({ - 'url':f"https://{api_file_domain }{file_path }"if file_path .startswith ('/')else f"https://{api_file_domain }/data/{file_path }", - 'name':original_api_name , - '_original_name_for_log':original_api_name , - '_is_thumbnail':is_image (original_api_name ) - }) - else :self .logger (f" ⚠️ Skipping main file for post {post_id }: Missing name (Path: {file_path })") - for idx ,att_info in enumerate (post_attachments ): - if isinstance (att_info ,dict )and att_info .get ('path'): - att_path =att_info ['path'].lstrip ('/') - original_api_att_name =att_info .get ('name')or os .path .basename (att_path ) - if original_api_att_name : - all_files_from_post_api .append ({ - 'url':f"https://{api_file_domain }{att_path }"if att_path .startswith ('/')else f"https://{api_file_domain }/data/{att_path }", - 'name':original_api_att_name , - '_original_name_for_log':original_api_att_name , - '_is_thumbnail':is_image (original_api_att_name ) - }) - else :self .logger (f" ⚠️ Skipping attachment {idx +1 } for post {post_id }: Missing name (Path: {att_path })") - else :self .logger (f" ⚠️ Skipping invalid attachment {idx +1 } for post {post_id }: {str (att_info )[:100 ]}") - if self .scan_content_for_images and post_content_html and not self .extract_links_only : - self .logger (f" Scanning post content for additional image URLs (Post ID: {post_id })...") - parsed_input_url =urlparse (self .api_url_input ) - base_url_for_relative_paths =f"{parsed_input_url .scheme }://{parsed_input_url .netloc }" - img_ext_pattern ="|".join (ext .lstrip ('.')for ext in IMAGE_EXTENSIONS ) - direct_url_pattern_str =r"""(?i)\b(https?://[^\s"'<>\[\]\{\}\|\^\\^~\[\]`]+\.(?:"""+img_ext_pattern +r"""))\b""" - img_tag_src_pattern_str =r"""]*?src\s*=\s*["']([^"']+)["']""" - found_image_sources =set () - for direct_url_match in re .finditer (direct_url_pattern_str ,post_content_html ): - found_image_sources .add (direct_url_match .group (1 )) - for img_tag_match in re .finditer (img_tag_src_pattern_str ,post_content_html ,re .IGNORECASE ): - src_attr =img_tag_match .group (1 ).strip () - src_attr =html .unescape (src_attr ) - if not src_attr :continue - resolved_src_url ="" - if src_attr .startswith (('http://','https://')): - resolved_src_url =src_attr - elif src_attr .startswith ('//'): - resolved_src_url =f"{parsed_input_url .scheme }:{src_attr }" - elif src_attr .startswith ('/'): - resolved_src_url =f"{base_url_for_relative_paths }{src_attr }" - if resolved_src_url : - parsed_resolved_url =urlparse (resolved_src_url ) - if any (parsed_resolved_url .path .lower ().endswith (ext )for ext in IMAGE_EXTENSIONS ): - found_image_sources .add (resolved_src_url ) - if found_image_sources : - self .logger (f" Found {len (found_image_sources )} potential image URLs/sources in content.") - existing_urls_in_api_list ={f_info ['url']for f_info in all_files_from_post_api } - for found_url in found_image_sources : - if self .check_cancel ():break - if found_url in existing_urls_in_api_list : - self .logger (f" Skipping URL from content (already in API list or previously added from content): {found_url [:70 ]}...") - continue - try : - parsed_found_url =urlparse (found_url ) - url_filename =os .path .basename (parsed_found_url .path ) - if not url_filename or not is_image (url_filename ): - self .logger (f" Skipping URL from content (no filename part or not an image extension): {found_url [:70 ]}...") - continue - self .logger (f" Adding image from content: {url_filename } (URL: {found_url [:70 ]}...)") - all_files_from_post_api .append ({ - 'url':found_url , - 'name':url_filename , - '_original_name_for_log':url_filename , - '_is_thumbnail':False , - '_from_content_scan':True - }) - existing_urls_in_api_list .add (found_url ) - except Exception as e_url_parse : - self .logger (f" Error processing URL from content '{found_url [:70 ]}...': {e_url_parse }") - else : - self .logger (f" No additional image URLs found in post content scan for post {post_id }.") - if self .download_thumbnails : - if self .scan_content_for_images : - self .logger (f" Mode: 'Download Thumbnails Only' + 'Scan Content for Images' active. Prioritizing images from content scan for post {post_id }.") - all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_from_content_scan')] - if not all_files_from_post_api : - self .logger (f" -> No images found via content scan for post {post_id } in this combined mode.") - return 0 ,0 ,[],[],[],None - else : - self .logger (f" Mode: 'Download Thumbnails Only' active. Filtering for API thumbnails for post {post_id }.") - all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_is_thumbnail')] - if not all_files_from_post_api : - self .logger (f" -> No API image thumbnails found for post {post_id } in thumbnail-only mode.") - return 0 ,0 ,[],[],[],None - if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED : - def natural_sort_key_for_files (file_api_info ): - name =file_api_info .get ('_original_name_for_log','').lower () - return [int (text )if text .isdigit ()else text for text in re .split ('([0-9]+)',name )] - all_files_from_post_api .sort (key =natural_sort_key_for_files ) - self .logger (f" Manga Date Mode: Sorted {len (all_files_from_post_api )} files within post {post_id } by original name for sequential numbering.") - if not all_files_from_post_api : - self .logger (f" No files found to download for post {post_id }.") - return 0 ,0 ,[],[],[],None - files_to_download_info_list =[] - processed_original_filenames_in_this_post =set () - - if self.keep_in_post_duplicates: - # If we keep duplicates, just add every file to the list to be processed. - # The downstream hash check and rename-on-collision logic will handle them. - files_to_download_info_list.extend(all_files_from_post_api) - self.logger(f" ℹ️ 'Keep Duplicates' is on. All {len(all_files_from_post_api)} files from post will be processed.") - else: - # This is the original logic that skips duplicates by name within a post. - for file_info in all_files_from_post_api: - current_api_original_filename = file_info.get('_original_name_for_log') - if current_api_original_filename in processed_original_filenames_in_this_post: - self.logger(f" -> Skip Duplicate Original Name (within post {post_id}): '{current_api_original_filename}' already processed/listed for this post.") - total_skipped_this_post += 1 + paragraph_pattern = re.compile(r'(.*?)

', re.IGNORECASE | re.DOTALL) + html_paragraphs = paragraph_pattern.findall(raw_text_content) + cleaned_text = "" + if not html_paragraphs: + self.logger(" ⚠️ No

tags found. Falling back to basic HTML cleaning for the whole block.") + text_with_br = re.sub(r'', '\n', raw_text_content, flags=re.IGNORECASE) + cleaned_text = re.sub(r'<.*?>', '', text_with_br) else: - files_to_download_info_list.append(file_info) - if current_api_original_filename: - processed_original_filenames_in_this_post.add(current_api_original_filename) - - if not files_to_download_info_list: - - self .logger (f" All files for post {post_id } were duplicate original names or skipped earlier.") - return 0 ,total_skipped_this_post ,[],[],[],None - - self .logger (f" Identified {len (files_to_download_info_list )} unique original file(s) for potential download from post {post_id }.") - with ThreadPoolExecutor (max_workers =self .num_file_threads ,thread_name_prefix =f'P{post_id }File_')as file_pool : - futures_list =[] - for file_idx ,file_info_to_dl in enumerate (files_to_download_info_list ): - if self ._check_pause (f"File processing loop for post {post_id }, file {file_idx }"):break - if self .check_cancel ():break - current_api_original_filename =file_info_to_dl .get ('_original_name_for_log') - file_is_candidate_by_char_filter_scope =False - char_filter_info_that_matched_file =None - if not current_character_filters : - file_is_candidate_by_char_filter_scope =True + cleaned_paragraphs_list = [] + for p_content in html_paragraphs: + p_with_br = re.sub(r'', '\n', p_content, flags=re.IGNORECASE) + p_cleaned = re.sub(r'<.*?>', '', p_with_br) + p_final = html.unescape(p_cleaned).strip() + if p_final: + cleaned_paragraphs_list.append(p_final) + cleaned_text = '\n\n'.join(cleaned_paragraphs_list) + cleaned_text = cleaned_text.replace('…', '...') + if self.single_pdf_mode: + if not cleaned_text: + return 0, 0, [], [], [], None, None + content_data = { + 'title': post_title, + 'content': cleaned_text, + 'published': self.post.get('published') or self.post.get('added') + } + temp_dir = os.path.join(self.app_base_dir, "appdata") + os.makedirs(temp_dir, exist_ok=True) + temp_filename = f"tmp_{post_id}_{uuid.uuid4().hex[:8]}.json" + temp_filepath = os.path.join(temp_dir, temp_filename) + try: + with open(temp_filepath, 'w', encoding='utf-8') as f: + json.dump(content_data, f, indent=2) + self.logger(f" Saved temporary text for '{post_title}' for single PDF compilation.") + self._emit_signal('worker_finished', (0, 0, [], [], [], None, temp_filepath)) + return (0, 0, [], [], [], None, temp_filepath) + except Exception as e: + self.logger(f" ❌ Failed to write temporary file for single PDF: {e}") + self._emit_signal('worker_finished', (0, 0, [], [], [], [], None)) + return (0, 0, [], [], [], [], None) + else: + file_extension = self.text_export_format + txt_filename = clean_filename(post_title) + f".{file_extension}" + final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename) + try: + os.makedirs(determined_post_save_path_for_history, exist_ok=True) + base, ext = os.path.splitext(final_save_path) + counter = 1 + while os.path.exists(final_save_path): + final_save_path = f"{base}_{counter}{ext}" + counter += 1 + if file_extension == 'pdf': + if FPDF: + self.logger(f" Converting to PDF...") + pdf = PDF() + font_path = "" + if self.project_root_dir: + font_path = os.path.join(self.project_root_dir, 'data', 'dejavu-sans', 'DejaVuSans.ttf') + try: + if not os.path.exists(font_path): raise RuntimeError(f"Font file not found: {font_path}") + pdf.add_font('DejaVu', '', font_path, uni=True) + pdf.set_font('DejaVu', '', 12) + except Exception as font_error: + self.logger(f" ⚠️ Could not load DejaVu font: {font_error}. Falling back to Arial.") + pdf.set_font('Arial', '', 12) + pdf.add_page() + pdf.multi_cell(0, 5, cleaned_text) + pdf.output(final_save_path) + else: + self.logger(f" ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.") + final_save_path = os.path.splitext(final_save_path)[0] + ".txt" + with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text) + elif file_extension == 'docx': + if Document: + self.logger(f" Converting to DOCX...") + document = Document() + document.add_paragraph(cleaned_text) + document.save(final_save_path) + else: + self.logger(f" ⚠️ Cannot create DOCX: 'python-docx' library not installed. Saving as .txt.") + final_save_path = os.path.splitext(final_save_path)[0] + ".txt" + with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text) + else: + with open(final_save_path, 'w', encoding='utf-8') as f: + f.write(cleaned_text) + self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'") + return 1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None + except Exception as e: + self.logger(f" ❌ Critical error saving text file '{txt_filename}': {e}") + return 0, num_potential_files_in_post, [], [], [], None, None + if not self .extract_links_only and self .use_subfolders and self .skip_words_list : + if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + for folder_name_to_check in base_folder_names_for_post_content : + if not folder_name_to_check :continue + if any (skip_word .lower ()in folder_name_to_check .lower ()for skip_word in self .skip_words_list ): + matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word") + self .logger (f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.") + return 0 ,num_potential_files_in_post ,[],[],[],None, None + if (self .show_external_links or self .extract_links_only )and post_content_html : + if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None + try : + mega_key_pattern =re .compile (r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b') + unique_links_data ={} + for match in link_pattern .finditer (post_content_html ): + link_url =match .group (1 ).strip () + link_url =html .unescape (link_url ) + link_inner_text =match .group (2 ) + if not any (ext in link_url .lower ()for ext in ['.css','.js','.ico','.xml','.svg'])and not link_url .startswith ('javascript:')and link_url not in unique_links_data : + clean_link_text =re .sub (r'<.*?>','',link_inner_text ) + clean_link_text =html .unescape (clean_link_text ).strip () + display_text =clean_link_text if clean_link_text else "[Link]" + unique_links_data [link_url ]=display_text + links_emitted_count =0 + scraped_platforms ={'kemono','coomer','patreon'} + for link_url ,link_text in unique_links_data .items (): + platform =get_link_platform (link_url ) + decryption_key_found ="" + if platform =='mega': + parsed_mega_url =urlparse (link_url ) + if parsed_mega_url .fragment : + potential_key_from_fragment =parsed_mega_url .fragment .split ('!')[-1 ] + if mega_key_pattern .fullmatch (potential_key_from_fragment ): + decryption_key_found =potential_key_from_fragment + if not decryption_key_found and link_text : + key_match_in_text =mega_key_pattern .search (link_text ) + if key_match_in_text : + decryption_key_found =key_match_in_text .group (1 ) + if not decryption_key_found and self .extract_links_only and post_content_html : + key_match_in_content =mega_key_pattern .search (strip_html_tags (post_content_html )) + if key_match_in_content : + decryption_key_found =key_match_in_content .group (1 ) + if platform not in scraped_platforms : + self ._emit_signal ('external_link',post_title ,link_text ,link_url ,platform ,decryption_key_found or "") + links_emitted_count +=1 + if links_emitted_count >0 :self .logger (f" 🔗 Found {links_emitted_count } potential external link(s) in post content.") + except Exception as e :self .logger (f"⚠️ Error parsing post content for links: {e }\n{traceback .format_exc (limit =2 )}") + if self .extract_links_only : + self .logger (f" Extract Links Only mode: Finished processing post {post_id } for links.") + return 0 ,0 ,[],[],[],None + all_files_from_post_api =[] + api_file_domain =urlparse (self .api_url_input ).netloc + if not api_file_domain or not any (d in api_file_domain .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']): + api_file_domain ="kemono.su"if "kemono"in self .service .lower ()else "coomer.party" + if post_main_file_info and isinstance (post_main_file_info ,dict )and post_main_file_info .get ('path'): + file_path =post_main_file_info ['path'].lstrip ('/') + original_api_name =post_main_file_info .get ('name')or os .path .basename (file_path ) + if original_api_name : + all_files_from_post_api .append ({ + 'url':f"https://{api_file_domain }{file_path }"if file_path .startswith ('/')else f"https://{api_file_domain }/data/{file_path }", + 'name':original_api_name , + '_original_name_for_log':original_api_name , + '_is_thumbnail':is_image (original_api_name ) + }) + else :self .logger (f" ⚠️ Skipping main file for post {post_id }: Missing name (Path: {file_path })") + for idx ,att_info in enumerate (post_attachments ): + if isinstance (att_info ,dict )and att_info .get ('path'): + att_path =att_info ['path'].lstrip ('/') + original_api_att_name =att_info .get ('name')or os .path .basename (att_path ) + if original_api_att_name : + all_files_from_post_api .append ({ + 'url':f"https://{api_file_domain }{att_path }"if att_path .startswith ('/')else f"https://{api_file_domain }/data/{att_path }", + 'name':original_api_att_name , + '_original_name_for_log':original_api_att_name , + '_is_thumbnail':is_image (original_api_att_name ) + }) + else :self .logger (f" ⚠️ Skipping attachment {idx +1 } for post {post_id }: Missing name (Path: {att_path })") + else :self .logger (f" ⚠️ Skipping invalid attachment {idx +1 } for post {post_id }: {str (att_info )[:100 ]}") + if self .scan_content_for_images and post_content_html and not self .extract_links_only : + self .logger (f" Scanning post content for additional image URLs (Post ID: {post_id })...") + parsed_input_url =urlparse (self .api_url_input ) + base_url_for_relative_paths =f"{parsed_input_url .scheme }://{parsed_input_url .netloc }" + img_ext_pattern ="|".join (ext .lstrip ('.')for ext in IMAGE_EXTENSIONS ) + direct_url_pattern_str =r"""(?i)\b(https?://[^\s"'<>\[\]\{\}\|\^\\^~\[\]`]+\.(?:"""+img_ext_pattern +r"""))\b""" + img_tag_src_pattern_str =r"""]*?src\s*=\s*["']([^"']+)["']""" + found_image_sources =set () + for direct_url_match in re .finditer (direct_url_pattern_str ,post_content_html ): + found_image_sources .add (direct_url_match .group (1 )) + for img_tag_match in re .finditer (img_tag_src_pattern_str ,post_content_html ,re .IGNORECASE ): + src_attr =img_tag_match .group (1 ).strip () + src_attr =html .unescape (src_attr ) + if not src_attr :continue + resolved_src_url ="" + if src_attr .startswith (('http://','https://')): + resolved_src_url =src_attr + elif src_attr .startswith ('//'): + resolved_src_url =f"{parsed_input_url .scheme }:{src_attr }" + elif src_attr .startswith ('/'): + resolved_src_url =f"{base_url_for_relative_paths }{src_attr }" + if resolved_src_url : + parsed_resolved_url =urlparse (resolved_src_url ) + if any (parsed_resolved_url .path .lower ().endswith (ext )for ext in IMAGE_EXTENSIONS ): + found_image_sources .add (resolved_src_url ) + if found_image_sources : + self .logger (f" Found {len (found_image_sources )} potential image URLs/sources in content.") + existing_urls_in_api_list ={f_info ['url']for f_info in all_files_from_post_api } + for found_url in found_image_sources : + if self .check_cancel ():break + if found_url in existing_urls_in_api_list : + self .logger (f" Skipping URL from content (already in API list or previously added from content): {found_url [:70 ]}...") + continue + try : + parsed_found_url =urlparse (found_url ) + url_filename =os .path .basename (parsed_found_url .path ) + if not url_filename or not is_image (url_filename ): + self .logger (f" Skipping URL from content (no filename part or not an image extension): {found_url [:70 ]}...") + continue + self .logger (f" Adding image from content: {url_filename } (URL: {found_url [:70 ]}...)") + all_files_from_post_api .append ({ + 'url':found_url , + 'name':url_filename , + '_original_name_for_log':url_filename , + '_is_thumbnail':False , + '_from_content_scan':True + }) + existing_urls_in_api_list .add (found_url ) + except Exception as e_url_parse : + self .logger (f" Error processing URL from content '{found_url [:70 ]}...': {e_url_parse }") else : - if self .char_filter_scope ==CHAR_SCOPE_FILES : - for filter_item_obj in current_character_filters : - terms_to_check_for_file =list (filter_item_obj ["aliases"]) - if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_for_file : - terms_to_check_for_file .append (filter_item_obj ["name"]) - unique_terms_for_file_check =list (set (terms_to_check_for_file )) - for term_to_match in unique_terms_for_file_check : - if is_filename_match_for_character (current_api_original_filename ,term_to_match ): - file_is_candidate_by_char_filter_scope =True - char_filter_info_that_matched_file =filter_item_obj - self .logger (f" File '{current_api_original_filename }' matches char filter term '{term_to_match }' (from '{filter_item_obj ['name']}'). Scope: Files.") - break - if file_is_candidate_by_char_filter_scope :break - elif self .char_filter_scope ==CHAR_SCOPE_TITLE : - if post_is_candidate_by_title_char_match : - file_is_candidate_by_char_filter_scope =True - char_filter_info_that_matched_file =char_filter_that_matched_title - self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Title.") - elif self .char_filter_scope ==CHAR_SCOPE_BOTH : - if post_is_candidate_by_title_char_match : - file_is_candidate_by_char_filter_scope =True - char_filter_info_that_matched_file =char_filter_that_matched_title - self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Both (Title part).") - else : - for filter_item_obj_both_file in current_character_filters : - terms_to_check_for_file_both =list (filter_item_obj_both_file ["aliases"]) - if filter_item_obj_both_file ["is_group"]and filter_item_obj_both_file ["name"]not in terms_to_check_for_file_both : - terms_to_check_for_file_both .append (filter_item_obj_both_file ["name"]) - unique_terms_for_file_both_check =list (set (terms_to_check_for_file_both )) - for term_to_match in unique_terms_for_file_both_check : + self .logger (f" No additional image URLs found in post content scan for post {post_id }.") + if self .download_thumbnails : + if self .scan_content_for_images : + self .logger (f" Mode: 'Download Thumbnails Only' + 'Scan Content for Images' active. Prioritizing images from content scan for post {post_id }.") + all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_from_content_scan')] + if not all_files_from_post_api : + self .logger (f" -> No images found via content scan for post {post_id } in this combined mode.") + return 0 ,0 ,[],[],[],None + else : + self .logger (f" Mode: 'Download Thumbnails Only' active. Filtering for API thumbnails for post {post_id }.") + all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_is_thumbnail')] + if not all_files_from_post_api : + self .logger (f" -> No API image thumbnails found for post {post_id } in thumbnail-only mode.") + return 0 ,0 ,[],[],[],None + if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED : + def natural_sort_key_for_files (file_api_info ): + name =file_api_info .get ('_original_name_for_log','').lower () + return [int (text )if text .isdigit ()else text for text in re .split ('([0-9]+)',name )] + all_files_from_post_api .sort (key =natural_sort_key_for_files ) + self .logger (f" Manga Date Mode: Sorted {len (all_files_from_post_api )} files within post {post_id } by original name for sequential numbering.") + if not all_files_from_post_api : + self .logger (f" No files found to download for post {post_id }.") + return 0 ,0 ,[],[],[],None + files_to_download_info_list =[] + processed_original_filenames_in_this_post =set () + if self.keep_in_post_duplicates: + files_to_download_info_list.extend(all_files_from_post_api) + self.logger(f" ℹ️ 'Keep Duplicates' is on. All {len(all_files_from_post_api)} files from post will be processed.") + else: + for file_info in all_files_from_post_api: + current_api_original_filename = file_info.get('_original_name_for_log') + if current_api_original_filename in processed_original_filenames_in_this_post: + self.logger(f" -> Skip Duplicate Original Name (within post {post_id}): '{current_api_original_filename}' already processed/listed for this post.") + total_skipped_this_post += 1 + else: + files_to_download_info_list.append(file_info) + if current_api_original_filename: + processed_original_filenames_in_this_post.add(current_api_original_filename) + if not files_to_download_info_list: + self .logger (f" All files for post {post_id } were duplicate original names or skipped earlier.") + return 0 ,total_skipped_this_post ,[],[],[],None + self .logger (f" Identified {len (files_to_download_info_list )} unique original file(s) for potential download from post {post_id }.") + with ThreadPoolExecutor (max_workers =self .num_file_threads ,thread_name_prefix =f'P{post_id }File_')as file_pool : + futures_list =[] + for file_idx ,file_info_to_dl in enumerate (files_to_download_info_list ): + if self ._check_pause (f"File processing loop for post {post_id }, file {file_idx }"):break + if self .check_cancel ():break + current_api_original_filename =file_info_to_dl .get ('_original_name_for_log') + file_is_candidate_by_char_filter_scope =False + char_filter_info_that_matched_file =None + if not current_character_filters : + file_is_candidate_by_char_filter_scope =True + else : + if self .char_filter_scope ==CHAR_SCOPE_FILES : + for filter_item_obj in current_character_filters : + terms_to_check_for_file =list (filter_item_obj ["aliases"]) + if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_for_file : + terms_to_check_for_file .append (filter_item_obj ["name"]) + unique_terms_for_file_check =list (set (terms_to_check_for_file )) + for term_to_match in unique_terms_for_file_check : if is_filename_match_for_character (current_api_original_filename ,term_to_match ): file_is_candidate_by_char_filter_scope =True - char_filter_info_that_matched_file =filter_item_obj_both_file - self .logger (f" File '{current_api_original_filename }' matches char filter term '{term_to_match }' (from '{filter_item_obj ['name']}'). Scope: Both (File part).") + char_filter_info_that_matched_file =filter_item_obj + self .logger (f" File '{current_api_original_filename }' matches char filter term '{term_to_match }' (from '{filter_item_obj ['name']}'). Scope: Files.") break if file_is_candidate_by_char_filter_scope :break - elif self .char_filter_scope ==CHAR_SCOPE_COMMENTS : - if post_is_candidate_by_file_char_match_in_comment_scope : - file_is_candidate_by_char_filter_scope =True - char_filter_info_that_matched_file =char_filter_that_matched_file_in_comment_scope - self .logger (f" File '{current_api_original_filename }' is candidate because a file in this post matched char filter (Overall Scope: Comments).") - elif post_is_candidate_by_comment_char_match : - file_is_candidate_by_char_filter_scope =True - char_filter_info_that_matched_file =char_filter_that_matched_comment - self .logger (f" File '{current_api_original_filename }' is candidate because post comments matched char filter (Overall Scope: Comments).") - if not file_is_candidate_by_char_filter_scope : - self .logger (f" -> Skip File (Char Filter Scope '{self .char_filter_scope }'): '{current_api_original_filename }' no match.") - total_skipped_this_post +=1 - continue - - - target_base_folders_for_this_file_iteration =[] - - if current_character_filters : - char_title_subfolder_name =None - if self .target_post_id_from_initial_url and self .custom_folder_name : - char_title_subfolder_name =self .custom_folder_name - elif char_filter_info_that_matched_file : - char_title_subfolder_name =clean_folder_name (char_filter_info_that_matched_file ["name"]) - elif char_filter_that_matched_title : - char_title_subfolder_name =clean_folder_name (char_filter_that_matched_title ["name"]) - elif char_filter_that_matched_comment : - char_title_subfolder_name =clean_folder_name (char_filter_that_matched_comment ["name"]) - if char_title_subfolder_name : - target_base_folders_for_this_file_iteration .append (char_title_subfolder_name ) + elif self .char_filter_scope ==CHAR_SCOPE_TITLE : + if post_is_candidate_by_title_char_match : + file_is_candidate_by_char_filter_scope =True + char_filter_info_that_matched_file =char_filter_that_matched_title + self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Title.") + elif self .char_filter_scope ==CHAR_SCOPE_BOTH : + if post_is_candidate_by_title_char_match : + file_is_candidate_by_char_filter_scope =True + char_filter_info_that_matched_file =char_filter_that_matched_title + self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Both (Title part).") + else : + for filter_item_obj_both_file in current_character_filters : + terms_to_check_for_file_both =list (filter_item_obj_both_file ["aliases"]) + if filter_item_obj_both_file ["is_group"]and filter_item_obj_both_file ["name"]not in terms_to_check_for_file_both : + terms_to_check_for_file_both .append (filter_item_obj_both_file ["name"]) + unique_terms_for_file_both_check =list (set (terms_to_check_for_file_both )) + for term_to_match in unique_terms_for_file_both_check : + if is_filename_match_for_character (current_api_original_filename ,term_to_match ): + file_is_candidate_by_char_filter_scope =True + char_filter_info_that_matched_file =filter_item_obj_both_file + self .logger (f" File '{current_api_original_filename }' matches char filter term '{term_to_match }' (from '{filter_item_obj ['name']}'). Scope: Both (File part).") + break + if file_is_candidate_by_char_filter_scope :break + elif self .char_filter_scope ==CHAR_SCOPE_COMMENTS : + if post_is_candidate_by_file_char_match_in_comment_scope : + file_is_candidate_by_char_filter_scope =True + char_filter_info_that_matched_file =char_filter_that_matched_file_in_comment_scope + self .logger (f" File '{current_api_original_filename }' is candidate because a file in this post matched char filter (Overall Scope: Comments).") + elif post_is_candidate_by_comment_char_match : + file_is_candidate_by_char_filter_scope =True + char_filter_info_that_matched_file =char_filter_that_matched_comment + self .logger (f" File '{current_api_original_filename }' is candidate because post comments matched char filter (Overall Scope: Comments).") + if not file_is_candidate_by_char_filter_scope : + self .logger (f" -> Skip File (Char Filter Scope '{self .char_filter_scope }'): '{current_api_original_filename }' no match.") + total_skipped_this_post +=1 + continue + target_base_folders_for_this_file_iteration =[] + if current_character_filters : + char_title_subfolder_name =None + if self .target_post_id_from_initial_url and self .custom_folder_name : + char_title_subfolder_name =self .custom_folder_name + elif char_filter_info_that_matched_file : + char_title_subfolder_name =clean_folder_name (char_filter_info_that_matched_file ["name"]) + elif char_filter_that_matched_title : + char_title_subfolder_name =clean_folder_name (char_filter_that_matched_title ["name"]) + elif char_filter_that_matched_comment : + char_title_subfolder_name =clean_folder_name (char_filter_that_matched_comment ["name"]) + if char_title_subfolder_name : + target_base_folders_for_this_file_iteration .append (char_title_subfolder_name ) + else : + self .logger (f"⚠️ File '{current_api_original_filename }' candidate by char filter, but no folder name derived. Using post title.") + target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title )) else : - self .logger (f"⚠️ File '{current_api_original_filename }' candidate by char filter, but no folder name derived. Using post title.") - target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title )) - else : - if base_folder_names_for_post_content : - target_base_folders_for_this_file_iteration .extend (base_folder_names_for_post_content ) - else : - target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title )) - - if not target_base_folders_for_this_file_iteration : - target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title if post_title else "Uncategorized_Post_Content")) - - for target_base_folder_name_for_instance in target_base_folders_for_this_file_iteration : - current_path_for_file_instance =self .override_output_dir if self .override_output_dir else self .download_root - if self .use_subfolders and target_base_folder_name_for_instance : - current_path_for_file_instance =os .path .join (current_path_for_file_instance ,target_base_folder_name_for_instance ) - if self .use_post_subfolders : - - current_path_for_file_instance =os .path .join (current_path_for_file_instance ,final_post_subfolder_name ) - - manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None - manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None - - - folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title ) - - futures_list .append (file_pool .submit ( - self ._download_single_file , - file_info =file_info_to_dl , - target_folder_path =current_path_for_file_instance , - headers =headers ,original_post_id_for_log =post_id ,skip_event =self .skip_current_file_flag , - post_title =post_title ,manga_date_file_counter_ref =manga_date_counter_to_pass , - manga_global_file_counter_ref =manga_global_counter_to_pass ,folder_context_name_for_history =folder_context_for_file , - file_index_in_post =file_idx ,num_files_in_this_post =len (files_to_download_info_list ) - )) - - for future in as_completed (futures_list ): - if self .check_cancel (): - for f_to_cancel in futures_list : - if not f_to_cancel .done (): - f_to_cancel .cancel () - break + if base_folder_names_for_post_content : + target_base_folders_for_this_file_iteration .extend (base_folder_names_for_post_content ) + else : + target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title )) + if not target_base_folders_for_this_file_iteration : + target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title if post_title else "Uncategorized_Post_Content")) + for target_base_folder_name_for_instance in target_base_folders_for_this_file_iteration : + current_path_for_file_instance =self .override_output_dir if self .override_output_dir else self .download_root + if self .use_subfolders and target_base_folder_name_for_instance : + current_path_for_file_instance =os .path .join (current_path_for_file_instance ,target_base_folder_name_for_instance ) + if self .use_post_subfolders : + current_path_for_file_instance =os .path .join (current_path_for_file_instance ,final_post_subfolder_name ) + manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None + manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None + folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title ) + futures_list .append (file_pool .submit ( + self ._download_single_file , + file_info =file_info_to_dl , + target_folder_path =current_path_for_file_instance , + headers =headers ,original_post_id_for_log =post_id ,skip_event =self .skip_current_file_flag , + post_title =post_title ,manga_date_file_counter_ref =manga_date_counter_to_pass , + manga_global_file_counter_ref =manga_global_counter_to_pass ,folder_context_name_for_history =folder_context_for_file , + file_index_in_post =file_idx ,num_files_in_this_post =len (files_to_download_info_list ) + )) + for future in as_completed (futures_list ): + if self .check_cancel (): + for f_to_cancel in futures_list : + if not f_to_cancel .done (): + f_to_cancel .cancel () + break + try : + dl_count ,skip_count ,actual_filename_saved ,original_kept_flag ,status ,details_for_dialog_or_retry =future .result () + total_downloaded_this_post +=dl_count + total_skipped_this_post +=skip_count + if original_kept_flag and dl_count >0 and actual_filename_saved : + kept_original_filenames_for_log .append (actual_filename_saved ) + if status ==FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER and details_for_dialog_or_retry : + retryable_failures_this_post .append (details_for_dialog_or_retry ) + elif status ==FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION and details_for_dialog_or_retry : + permanent_failures_this_post .append (details_for_dialog_or_retry ) + except CancelledError : + self .logger (f" File download task for post {post_id } was cancelled.") + total_skipped_this_post +=1 + except Exception as exc_f : + self .logger (f"❌ File download task for post {post_id } resulted in error: {exc_f }") + total_skipped_this_post +=1 + self ._emit_signal ('file_progress',"",None ) + if self.session_file_path and self.session_lock: + try: + with self.session_lock: + if os.path.exists(self.session_file_path): + with open(self.session_file_path, 'r', encoding='utf-8') as f: + session_data = json.load(f) + if 'download_state' not in session_data: + session_data['download_state'] = {} + if not isinstance(session_data['download_state'].get('processed_post_ids'), list): + session_data['download_state']['processed_post_ids'] = [] + session_data['download_state']['processed_post_ids'].append(self.post.get('id')) + if permanent_failures_this_post: + if not isinstance(session_data['download_state'].get('permanently_failed_files'), list): + session_data['download_state']['permanently_failed_files'] = [] + existing_failed_urls = {f.get('file_info', {}).get('url') for f in session_data['download_state']['permanently_failed_files']} + for failure in permanent_failures_this_post: + if failure.get('file_info', {}).get('url') not in existing_failed_urls: + session_data['download_state']['permanently_failed_files'].append(failure) + temp_file_path = self.session_file_path + ".tmp" + with open(temp_file_path, 'w', encoding='utf-8') as f_tmp: + json.dump(session_data, f_tmp, indent=2) + os.replace(temp_file_path, self.session_file_path) + except Exception as e: + self.logger(f"⚠️ Could not update session file for post {post_id}: {e}") + if not self .extract_links_only and (total_downloaded_this_post >0 or not ( + (current_character_filters and ( + (self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or + (self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match ) + ))or + (self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH )and any (sw .lower ()in post_title .lower ()for sw in self .skip_words_list )) + )): + top_file_name_for_history ="N/A" + if post_main_file_info and post_main_file_info .get ('name'): + top_file_name_for_history =post_main_file_info ['name'] + elif post_attachments and post_attachments [0 ].get ('name'): + top_file_name_for_history =post_attachments [0 ]['name'] + history_data_for_this_post ={ + 'post_title':post_title ,'post_id':post_id , + 'top_file_name':top_file_name_for_history , + 'num_files':num_potential_files_in_post , + 'upload_date_str':post_data .get ('published')or post_data .get ('added')or "Unknown", + 'download_location':determined_post_save_path_for_history , + 'service':self .service ,'user_id':self .user_id , + } + if self .check_cancel ():self .logger (f" Post {post_id } processing interrupted/cancelled."); + else :self .logger (f" Post {post_id } Summary: Downloaded={total_downloaded_this_post }, Skipped Files={total_skipped_this_post }") + if not self .extract_links_only and self .use_post_subfolders and total_downloaded_this_post ==0 : + path_to_check_for_emptiness =determined_post_save_path_for_history try : - dl_count ,skip_count ,actual_filename_saved ,original_kept_flag ,status ,details_for_dialog_or_retry =future .result () - total_downloaded_this_post +=dl_count - total_skipped_this_post +=skip_count - if original_kept_flag and dl_count >0 and actual_filename_saved : - kept_original_filenames_for_log .append (actual_filename_saved ) - if status ==FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER and details_for_dialog_or_retry : - retryable_failures_this_post .append (details_for_dialog_or_retry ) - elif status ==FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION and details_for_dialog_or_retry : - permanent_failures_this_post .append (details_for_dialog_or_retry ) - except CancelledError : - self .logger (f" File download task for post {post_id } was cancelled.") - total_skipped_this_post +=1 - except Exception as exc_f : - self .logger (f"❌ File download task for post {post_id } resulted in error: {exc_f }") - total_skipped_this_post +=1 - self ._emit_signal ('file_progress',"",None ) + if os .path .isdir (path_to_check_for_emptiness )and not os .listdir (path_to_check_for_emptiness ): + self .logger (f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness }'") + os .rmdir (path_to_check_for_emptiness ) + except OSError as e_rmdir : + self .logger (f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }") + + except Exception as e: + post_id = self.post.get('id', 'N/A') + # Log the unexpected crash of the worker + self.logger(f"❌ CRITICAL WORKER FAILURE on Post ID {post_id}: {e}\n{traceback.format_exc(limit=4)}") + # Ensure the number of skipped files reflects the total potential files in the post, + # as none of them were processed successfully. + num_potential_files_in_post = len(self.post.get('attachments', [])) + (1 if self.post.get('file') else 0) + total_skipped_this_post = num_potential_files_in_post + total_downloaded_this_post = 0 - # After a post's files are all processed, update the session file to mark this post as done. - if self.session_file_path and self.session_lock: - try: - with self.session_lock: - if os.path.exists(self.session_file_path): # Only update if the session file exists - # Read current state - with open(self.session_file_path, 'r', encoding='utf-8') as f: - session_data = json.load(f) - - if 'download_state' not in session_data: - session_data['download_state'] = {} - - # Add processed ID - if not isinstance(session_data['download_state'].get('processed_post_ids'), list): - session_data['download_state']['processed_post_ids'] = [] - session_data['download_state']['processed_post_ids'].append(self.post.get('id')) - - # Add any permanent failures from this worker to the session file - if permanent_failures_this_post: - if not isinstance(session_data['download_state'].get('permanently_failed_files'), list): - session_data['download_state']['permanently_failed_files'] = [] - # To avoid duplicates if the same post is somehow re-processed - existing_failed_urls = {f.get('file_info', {}).get('url') for f in session_data['download_state']['permanently_failed_files']} - for failure in permanent_failures_this_post: - if failure.get('file_info', {}).get('url') not in existing_failed_urls: - session_data['download_state']['permanently_failed_files'].append(failure) - - # Write to temp file and then atomically replace - temp_file_path = self.session_file_path + ".tmp" - with open(temp_file_path, 'w', encoding='utf-8') as f_tmp: - json.dump(session_data, f_tmp, indent=2) - os.replace(temp_file_path, self.session_file_path) - except Exception as e: - self.logger(f"⚠️ Could not update session file for post {post_id}: {e}") - - if not self .extract_links_only and (total_downloaded_this_post >0 or not ( - (current_character_filters and ( - (self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or - (self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match ) - ))or - (self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH )and any (sw .lower ()in post_title .lower ()for sw in self .skip_words_list )) - )): - top_file_name_for_history ="N/A" - if post_main_file_info and post_main_file_info .get ('name'): - top_file_name_for_history =post_main_file_info ['name'] - elif post_attachments and post_attachments [0 ].get ('name'): - top_file_name_for_history =post_attachments [0 ]['name'] - - history_data_for_this_post ={ - 'post_title':post_title ,'post_id':post_id , - 'top_file_name':top_file_name_for_history , - 'num_files':num_potential_files_in_post , - 'upload_date_str':post_data .get ('published')or post_data .get ('added')or "Unknown", - 'download_location':determined_post_save_path_for_history , - 'service':self .service ,'user_id':self .user_id , - } - if self .check_cancel ():self .logger (f" Post {post_id } processing interrupted/cancelled."); - else :self .logger (f" Post {post_id } Summary: Downloaded={total_downloaded_this_post }, Skipped Files={total_skipped_this_post }") - - if not self .extract_links_only and self .use_post_subfolders and total_downloaded_this_post ==0 : - - path_to_check_for_emptiness =determined_post_save_path_for_history - try : - if os .path .isdir (path_to_check_for_emptiness )and not os .listdir (path_to_check_for_emptiness ): - self .logger (f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness }'") - os .rmdir (path_to_check_for_emptiness ) - except OSError as e_rmdir : - self .logger (f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }") - - result_tuple = (total_downloaded_this_post, total_skipped_this_post, - kept_original_filenames_for_log, retryable_failures_this_post, - permanent_failures_this_post, history_data_for_this_post, - None) # The 7th item is None because we already saved the temp file - - self._emit_signal('worker_finished', result_tuple) + finally: + # This 'finally' block ensures that the worker ALWAYS reports back, + # preventing the main UI from getting stuck. + result_tuple = (total_downloaded_this_post, total_skipped_this_post, + kept_original_filenames_for_log, retryable_failures_this_post, + permanent_failures_this_post, history_data_for_this_post, + temp_filepath_for_return) + self._emit_signal('worker_finished', result_tuple) + return result_tuple class DownloadThread (QThread ): @@ -1801,6 +1749,7 @@ class DownloadThread (QThread ): cookie_text ="", session_file_path=None, session_lock=None, + processed_ids_to_skip=None, text_only_scope=None, text_export_format='txt', single_pdf_mode=False, @@ -1859,11 +1808,12 @@ class DownloadThread (QThread ): self .manga_global_file_counter_ref =manga_global_file_counter_ref self.session_file_path = session_file_path self.session_lock = session_lock + self.processed_ids_to_skip = processed_ids_to_skip self.history_candidates_buffer =deque (maxlen =8 ) self.text_only_scope = text_only_scope self.text_export_format = text_export_format - self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE - self.project_root_dir = project_root_dir # Add this assignment + self.single_pdf_mode = single_pdf_mode + self.project_root_dir = project_root_dir if self .compress_images and Image is None : self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).") @@ -1899,9 +1849,7 @@ class DownloadThread (QThread ): grand_list_of_kept_original_filenames = [] was_process_cancelled = False - # This block for initializing manga mode counters remains unchanged if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED and not self.extract_links_only and self.manga_date_file_counter_ref is None: - # Determine the directory to scan for existing numbered files series_scan_dir = self.output_dir if self.use_subfolders : if self.filter_character_list_objects_initial and self.filter_character_list_objects_initial [0] and self.filter_character_list_objects_initial[0].get("name"): @@ -1916,41 +1864,39 @@ class DownloadThread (QThread ): self.logger(f"ℹ️ [Thread] Manga Date Mode: Scanning for existing files in '{series_scan_dir}'...") for dirpath, _, filenames_in_dir in os.walk(series_scan_dir): for filename_to_check in filenames_in_dir: - # Check for an optional prefix defined by the user prefix_to_check = clean_filename(self.manga_date_prefix.strip()) if self.manga_date_prefix and self.manga_date_prefix.strip() else "" name_part_to_match = filename_to_check if prefix_to_check and name_part_to_match.startswith(prefix_to_check): name_part_to_match = name_part_to_match[len(prefix_to_check):].lstrip() - # Use regex to find the number at the start of the filename base_name_no_ext = os.path.splitext(name_part_to_match)[0] match = re.match(r"(\d+)", base_name_no_ext) if match: highest_num = max(highest_num, int(match.group(1))) - # Initialize the shared counter to the next number, protected by a thread lock self.manga_date_file_counter_ref = [highest_num + 1, threading.Lock()] self.logger(f"ℹ️ [Thread] Manga Date Mode: Initialized date-based counter at {self.manga_date_file_counter_ref[0]}.") pass if self.manga_mode_active and self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING and not self.extract_links_only and self.manga_global_file_counter_ref is None: - # Initialize the shared counter at 1, protected by a thread lock self.manga_global_file_counter_ref = [1, threading.Lock()] self.logger(f"ℹ️ [Thread] Manga Title+GlobalNum Mode: Initialized global counter at {self.manga_global_file_counter_ref[0]}.") pass worker_signals_obj = PostProcessorSignals() try: - # Connect signals worker_signals_obj.progress_signal.connect(self.progress_signal) worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal) worker_signals_obj.file_progress_signal.connect(self.file_progress_signal) worker_signals_obj.external_link_signal.connect(self.external_link_signal) worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal) worker_signals_obj.file_successfully_downloaded_signal.connect(self.file_successfully_downloaded_signal) - worker_signals_obj.worker_finished_signal.connect(lambda result: None) # Connect to dummy lambda to avoid errors + worker_signals_obj.worker_finished_signal.connect(lambda result: None) self.logger(" Starting post fetch (single-threaded download process)...") + self.logger(" Fetching ALL available post information first. This may take a moment...") + + all_posts_data = [] post_generator = download_from_api( self.api_url_input, logger=self.logger, @@ -1970,99 +1916,101 @@ class DownloadThread (QThread ): if self.isInterruptionRequested(): was_process_cancelled = True break - for individual_post_data in posts_batch_data: - if self.isInterruptionRequested(): - was_process_cancelled = True - break - - # Create the worker, now correctly passing single_pdf_mode - post_processing_worker = PostProcessorWorker( - post_data=individual_post_data, - download_root=self.output_dir, - known_names=self.known_names, - filter_character_list=self.filter_character_list_objects_initial, - dynamic_character_filter_holder=self.dynamic_filter_holder, - unwanted_keywords=self.unwanted_keywords, - filter_mode=self.filter_mode, - skip_zip=self.skip_zip, skip_rar=self.skip_rar, - use_subfolders=self.use_subfolders, use_post_subfolders=self.use_post_subfolders, - target_post_id_from_initial_url=self.initial_target_post_id, - custom_folder_name=self.custom_folder_name, - compress_images=self.compress_images, download_thumbnails=self.download_thumbnails, - service=self.service, user_id=self.user_id, - api_url_input=self.api_url_input, - pause_event=self.pause_event, - cancellation_event=self.cancellation_event, - emitter=worker_signals_obj, - downloaded_files=self.downloaded_files, - downloaded_file_hashes=self.downloaded_file_hashes, - downloaded_files_lock=self.downloaded_files_lock, - downloaded_file_hashes_lock=self.downloaded_file_hashes_lock, - skip_words_list=self.skip_words_list, - skip_words_scope=self.skip_words_scope, - show_external_links=self.show_external_links, - extract_links_only=self.extract_links_only, - num_file_threads=self.num_file_threads_for_worker, - skip_current_file_flag=self.skip_current_file_flag, - manga_mode_active=self.manga_mode_active, - manga_filename_style=self.manga_filename_style, - manga_date_prefix=self.manga_date_prefix, - char_filter_scope=self.char_filter_scope, - remove_from_filename_words_list=self.remove_from_filename_words_list, - allow_multipart_download=self.allow_multipart_download, - selected_cookie_file=self.selected_cookie_file, - app_base_dir=self.app_base_dir, - cookie_text=self.cookie_text, - override_output_dir=self.override_output_dir, - manga_global_file_counter_ref=self.manga_global_file_counter_ref, - use_cookie=self.use_cookie, - manga_date_file_counter_ref=self.manga_date_file_counter_ref, - use_date_prefix_for_subfolder=self.use_date_prefix_for_subfolder, - keep_in_post_duplicates=self.keep_in_post_duplicates, - creator_download_folder_ignore_words=self.creator_download_folder_ignore_words, - session_file_path=self.session_file_path, - session_lock=self.session_lock, - text_only_scope=self.text_only_scope, - text_export_format=self.text_export_format, - single_pdf_mode=self.single_pdf_mode, # <-- This is now correctly passed - project_root_dir=self.project_root_dir - ) - try: - # Correctly unpack the 7 values returned from the worker - (dl_count, skip_count, kept_originals_this_post, - retryable_failures, permanent_failures, - history_data, temp_filepath) = post_processing_worker.process() - - grand_total_downloaded_files += dl_count - grand_total_skipped_files += skip_count - - if kept_originals_this_post: - grand_list_of_kept_original_filenames.extend(kept_originals_this_post) - if retryable_failures: - self.retryable_file_failed_signal.emit(retryable_failures) - if history_data: - if len(self.history_candidates_buffer) < 8: - self.post_processed_for_history_signal.emit(history_data) - if permanent_failures: - self.permanent_file_failed_signal.emit(permanent_failures) - - # In single-threaded text mode, pass the temp file path back to the main window - if self.single_pdf_mode and temp_filepath: - self.progress_signal.emit(f"TEMP_FILE_PATH:{temp_filepath}") + all_posts_data.extend(posts_batch_data) + + if not was_process_cancelled: + self.logger(f"✅ Fetching complete. Found {len(all_posts_data)} total posts. Starting download process...") - except Exception as proc_err: - post_id_for_err = individual_post_data.get('id', 'N/A') - self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}") - traceback.print_exc() - num_potential_files_est = len(individual_post_data.get('attachments', [])) + (1 if individual_post_data.get('file') else 0) - grand_total_skipped_files += num_potential_files_est - - if self.skip_current_file_flag and self.skip_current_file_flag.is_set(): - self.skip_current_file_flag.clear() - self.logger(" Skip current file flag was processed and cleared by DownloadThread.") - self.msleep(10) - if was_process_cancelled: + for individual_post_data in all_posts_data: + if self.isInterruptionRequested(): + was_process_cancelled = True break + + post_processing_worker = PostProcessorWorker( + post_data=individual_post_data, + download_root=self.output_dir, + known_names=self.known_names, + filter_character_list=self.filter_character_list_objects_initial, + dynamic_character_filter_holder=self.dynamic_filter_holder, + unwanted_keywords=self.unwanted_keywords, + filter_mode=self.filter_mode, + skip_zip=self.skip_zip, skip_rar=self.skip_rar, + use_subfolders=self.use_subfolders, use_post_subfolders=self.use_post_subfolders, + target_post_id_from_initial_url=self.initial_target_post_id, + custom_folder_name=self.custom_folder_name, + compress_images=self.compress_images, download_thumbnails=self.download_thumbnails, + service=self.service, user_id=self.user_id, + api_url_input=self.api_url_input, + pause_event=self.pause_event, + cancellation_event=self.cancellation_event, + emitter=worker_signals_obj, + downloaded_files=self.downloaded_files, + downloaded_file_hashes=self.downloaded_file_hashes, + downloaded_files_lock=self.downloaded_files_lock, + downloaded_file_hashes_lock=self.downloaded_file_hashes_lock, + skip_words_list=self.skip_words_list, + skip_words_scope=self.skip_words_scope, + show_external_links=self.show_external_links, + extract_links_only=self.extract_links_only, + num_file_threads=self.num_file_threads_for_worker, + skip_current_file_flag=self.skip_current_file_flag, + manga_mode_active=self.manga_mode_active, + manga_filename_style=self.manga_filename_style, + manga_date_prefix=self.manga_date_prefix, + char_filter_scope=self.char_filter_scope, + remove_from_filename_words_list=self.remove_from_filename_words_list, + allow_multipart_download=self.allow_multipart_download, + selected_cookie_file=self.selected_cookie_file, + app_base_dir=self.app_base_dir, + cookie_text=self.cookie_text, + override_output_dir=self.override_output_dir, + manga_global_file_counter_ref=self.manga_global_file_counter_ref, + use_cookie=self.use_cookie, + manga_date_file_counter_ref=self.manga_date_file_counter_ref, + use_date_prefix_for_subfolder=self.use_date_prefix_for_subfolder, + keep_in_post_duplicates=self.keep_in_post_duplicates, + creator_download_folder_ignore_words=self.creator_download_folder_ignore_words, + session_file_path=self.session_file_path, + session_lock=self.session_lock, + processed_ids_to_skip=self.processed_ids_to_skip, # <-- FIX: Pass the list to the worker + text_only_scope=self.text_only_scope, + text_export_format=self.text_export_format, + single_pdf_mode=self.single_pdf_mode, + project_root_dir=self.project_root_dir + ) + try: + (dl_count, skip_count, kept_originals_this_post, + retryable_failures, permanent_failures, + history_data, temp_filepath) = post_processing_worker.process() + + grand_total_downloaded_files += dl_count + grand_total_skipped_files += skip_count + + if kept_originals_this_post: + grand_list_of_kept_original_filenames.extend(kept_originals_this_post) + if retryable_failures: + self.retryable_file_failed_signal.emit(retryable_failures) + if history_data: + if len(self.history_candidates_buffer) < 8: + self.post_processed_for_history_signal.emit(history_data) + if permanent_failures: + self.permanent_file_failed_signal.emit(permanent_failures) + + if self.single_pdf_mode and temp_filepath: + self.progress_signal.emit(f"TEMP_FILE_PATH:{temp_filepath}") + + except Exception as proc_err: + post_id_for_err = individual_post_data.get('id', 'N/A') + self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}") + traceback.print_exc() + num_potential_files_est = len(individual_post_data.get('attachments', [])) + (1 if individual_post_data.get('file') else 0) + grand_total_skipped_files += num_potential_files_est + + if self.skip_current_file_flag and self.skip_current_file_flag.is_set(): + self.skip_current_file_flag.clear() + self.logger(" Skip current file flag was processed and cleared by DownloadThread.") + self.msleep(10) + if not was_process_cancelled and not self.isInterruptionRequested(): self.logger("✅ All posts processed or end of content reached by DownloadThread.") @@ -2071,7 +2019,6 @@ class DownloadThread (QThread ): traceback.print_exc() finally: try: - # Disconnect signals if worker_signals_obj: worker_signals_obj.progress_signal.disconnect(self.progress_signal) worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal) @@ -2082,15 +2029,8 @@ class DownloadThread (QThread ): except (TypeError, RuntimeError) as e: self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}") - # Emit the final signal with all collected results self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, self.isInterruptionRequested(), grand_list_of_kept_original_filenames) - - def receive_add_character_result (self ,result ): - with QMutexLocker (self .prompt_mutex ): - self ._add_character_response =result - self .logger (f" (DownloadThread) Received character prompt response: {'Yes (added/confirmed)'if result else 'No (declined/failed)'}") - class InterruptedError(Exception): """Custom exception for handling cancellations gracefully.""" pass \ No newline at end of file diff --git a/src/ui/flow_layout.py b/src/ui/flow_layout.py new file mode 100644 index 0000000..e492ad4 --- /dev/null +++ b/src/ui/flow_layout.py @@ -0,0 +1,93 @@ +# src/ui/flow_layout.py + +from PyQt5.QtWidgets import QLayout, QSizePolicy, QStyle +from PyQt5.QtCore import QPoint, QRect, QSize, Qt + +class FlowLayout(QLayout): + """A custom layout that arranges widgets in a flow, wrapping as necessary.""" + def __init__(self, parent=None, margin=0, spacing=-1): + super(FlowLayout, self).__init__(parent) + + if parent is not None: + self.setContentsMargins(margin, margin, margin, margin) + + self.setSpacing(spacing) + self.itemList = [] + + def __del__(self): + item = self.takeAt(0) + while item: + item = self.takeAt(0) + + def addItem(self, item): + self.itemList.append(item) + + def count(self): + return len(self.itemList) + + def itemAt(self, index): + if 0 <= index < len(self.itemList): + return self.itemList[index] + return None + + def takeAt(self, index): + if 0 <= index < len(self.itemList): + return self.itemList.pop(index) + return None + + def expandingDirections(self): + return Qt.Orientations(Qt.Orientation(0)) + + def hasHeightForWidth(self): + return True + + def heightForWidth(self, width): + return self._do_layout(QRect(0, 0, width, 0), True) + + def setGeometry(self, rect): + super(FlowLayout, self).setGeometry(rect) + self._do_layout(rect, False) + + def sizeHint(self): + return self.minimumSize() + + def minimumSize(self): + size = QSize() + for item in self.itemList: + size = size.expandedTo(item.minimumSize()) + + margin, _, _, _ = self.getContentsMargins() + size += QSize(2 * margin, 2 * margin) + return size + + def _do_layout(self, rect, test_only): + x = rect.x() + y = rect.y() + line_height = 0 + + space_x = self.spacing() + space_y = self.spacing() + if self.layout() is not None: + space_x = self.spacing() + space_y = self.spacing() + else: + space_x = self.spacing() + space_y = self.spacing() + + + for item in self.itemList: + wid = item.widget() + next_x = x + item.sizeHint().width() + space_x + if next_x - space_x > rect.right() and line_height > 0: + x = rect.x() + y = y + line_height + space_y + next_x = x + item.sizeHint().width() + space_x + line_height = 0 + + if not test_only: + item.setGeometry(QRect(QPoint(x, y), item.sizeHint())) + + x = next_x + line_height = max(line_height, item.sizeHint().height()) + + return y + line_height - rect.y() \ No newline at end of file diff --git a/src/ui/main_window.py b/src/ui/main_window.py index 1bb5864..e5adf00 100644 --- a/src/ui/main_window.py +++ b/src/ui/main_window.py @@ -995,9 +995,11 @@ class DownloaderApp (QWidget ): f"Could not automatically restart the application: {e }\n\nPlease restart it manually.") def init_ui(self): + # --- FIX: Import the new FlowLayout class --- + from .flow_layout import FlowLayout + self.main_splitter = QSplitter(Qt.Horizontal) - # --- Use a scroll area for the left panel for consistency --- left_scroll_area = QScrollArea() left_scroll_area.setWidgetResizable(True) left_scroll_area.setFrameShape(QFrame.NoFrame) @@ -1021,7 +1023,7 @@ class DownloaderApp (QWidget ): url_input_layout.addWidget(self.url_label_widget) self.link_input = QLineEdit() self.link_input.setPlaceholderText("e.g., https://kemono.su/patreon/user/12345 or .../post/98765") - self.link_input.textChanged.connect(self.update_custom_folder_visibility) # Connects the custom folder logic + self.link_input.textChanged.connect(self.update_custom_folder_visibility) url_input_layout.addWidget(self.link_input, 1) self.empty_popup_button = QPushButton("🎨") self.empty_popup_button.setStyleSheet("padding: 4px 6px;") @@ -1067,7 +1069,7 @@ class DownloaderApp (QWidget ): dir_layout.addWidget(self.dir_button) left_layout.addLayout(dir_layout) - # --- Filters and Custom Folder Container (from old layout) --- + # --- Filters and Custom Folder Container --- self.filters_and_custom_folder_container_widget = QWidget() filters_and_custom_folder_layout = QHBoxLayout(self.filters_and_custom_folder_container_widget) filters_and_custom_folder_layout.setContentsMargins(0, 5, 0, 0) @@ -1089,7 +1091,6 @@ class DownloaderApp (QWidget ): char_input_and_button_layout.addWidget(self.char_filter_scope_toggle_button, 1) character_filter_v_layout.addLayout(char_input_and_button_layout) - # --- Custom Folder Widget Definition --- self.custom_folder_widget = QWidget() custom_folder_v_layout = QVBoxLayout(self.custom_folder_widget) custom_folder_v_layout.setContentsMargins(0, 0, 0, 0) @@ -1143,8 +1144,11 @@ class DownloaderApp (QWidget ): file_filter_layout = QVBoxLayout() file_filter_layout.setContentsMargins(0, 10, 0, 0) file_filter_layout.addWidget(QLabel("Filter Files:")) - radio_button_layout = QHBoxLayout() - radio_button_layout.setSpacing(10) + + # --- FIX: Use FlowLayout for responsive radio buttons --- + radio_button_layout = FlowLayout() + radio_button_layout.setSpacing(15) # Add more horizontal space + self.radio_group = QButtonGroup(self) self.radio_all = QRadioButton("All") self.radio_images = QRadioButton("Images/GIFs") @@ -1153,42 +1157,57 @@ class DownloaderApp (QWidget ): self.radio_only_audio = QRadioButton("🎧 Only Audio") self.radio_only_links = QRadioButton("🔗 Only Links") self.radio_more = QRadioButton("More") - + self.favorite_mode_checkbox = QCheckBox("⭐ Favorite Mode") self.radio_all.setChecked(True) - for btn in [self.radio_all, self.radio_images, self.radio_videos, self.radio_only_archives, self.radio_only_audio, self.radio_only_links, self.radio_more]: + + radio_buttons = [ + self.radio_all, self.radio_images, self.radio_videos, self.radio_only_archives, + self.radio_only_audio, self.radio_only_links, self.radio_more + ] + + for btn in radio_buttons: self.radio_group.addButton(btn) radio_button_layout.addWidget(btn) - self.favorite_mode_checkbox = QCheckBox() - self.favorite_mode_checkbox.setChecked(False) - radio_button_layout.addWidget(self.favorite_mode_checkbox) - radio_button_layout.addStretch(1) + + radio_button_layout.addWidget(self.favorite_mode_checkbox) # Add checkbox to the flow file_filter_layout.addLayout(radio_button_layout) + # --- FIX END --- + left_layout.addLayout(file_filter_layout) # --- Checkboxes Group --- checkboxes_group_layout = QVBoxLayout() checkboxes_group_layout.setSpacing(10) - row1_layout = QHBoxLayout() - row1_layout.setSpacing(10) + + # --- FIX: Replace original row1_layout with a FlowLayout --- + checkboxes_flow_layout = FlowLayout() + checkboxes_flow_layout.setSpacing(15) + self.skip_zip_checkbox = QCheckBox("Skip .zip") self.skip_zip_checkbox.setChecked(True) - row1_layout.addWidget(self.skip_zip_checkbox) + checkboxes_flow_layout.addWidget(self.skip_zip_checkbox) + self.skip_rar_checkbox = QCheckBox("Skip .rar") self.skip_rar_checkbox.setChecked(True) - row1_layout.addWidget(self.skip_rar_checkbox) + checkboxes_flow_layout.addWidget(self.skip_rar_checkbox) + self.download_thumbnails_checkbox = QCheckBox("Download Thumbnails Only") - row1_layout.addWidget(self.download_thumbnails_checkbox) + checkboxes_flow_layout.addWidget(self.download_thumbnails_checkbox) + self.scan_content_images_checkbox = QCheckBox("Scan Content for Images") self.scan_content_images_checkbox.setChecked(self.scan_content_images_setting) - row1_layout.addWidget(self.scan_content_images_checkbox) + checkboxes_flow_layout.addWidget(self.scan_content_images_checkbox) + self.compress_images_checkbox = QCheckBox("Compress to WebP") self.compress_images_checkbox.setToolTip("Compress images > 1.5MB to WebP format (requires Pillow).") - row1_layout.addWidget(self.compress_images_checkbox) + checkboxes_flow_layout.addWidget(self.compress_images_checkbox) + self.keep_duplicates_checkbox = QCheckBox("Keep Duplicates") self.keep_duplicates_checkbox.setToolTip("If checked, downloads all files from a post even if they have the same name.") - row1_layout.addWidget(self.keep_duplicates_checkbox) - row1_layout.addStretch(1) - checkboxes_group_layout.addLayout(row1_layout) + checkboxes_flow_layout.addWidget(self.keep_duplicates_checkbox) + + checkboxes_group_layout.addLayout(checkboxes_flow_layout) + # --- FIX END --- # --- Advanced Settings --- advanced_settings_label = QLabel("⚙️ Advanced Settings:") @@ -1293,7 +1312,7 @@ class DownloaderApp (QWidget ): left_layout.addLayout(known_chars_label_layout) self.character_list = QListWidget() self.character_list.setSelectionMode(QListWidget.ExtendedSelection) - self.character_list.setMaximumHeight(150) # Set smaller height + self.character_list.setMaximumHeight(150) left_layout.addWidget(self.character_list, 1) char_manage_layout = QHBoxLayout() char_manage_layout.setSpacing(10) @@ -1334,7 +1353,6 @@ class DownloaderApp (QWidget ): left_layout.addStretch(0) # --- Right Panel (Logs) --- - # (This part of the layout is unchanged and remains correct) log_title_layout = QHBoxLayout() self.progress_log_label = QLabel("📜 Progress Log:") log_title_layout.addWidget(self.progress_log_label) @@ -1423,7 +1441,7 @@ class DownloaderApp (QWidget ): right_layout.addWidget(self.file_progress_label) # --- Final Assembly --- - self.main_splitter.addWidget(left_scroll_area) # Use the scroll area + self.main_splitter.addWidget(left_scroll_area) self.main_splitter.addWidget(right_panel_widget) self.main_splitter.setStretchFactor(0, 7) self.main_splitter.setStretchFactor(1, 3) @@ -1446,7 +1464,7 @@ class DownloaderApp (QWidget ): self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked()) if hasattr(self, 'radio_group') and self.radio_group.checkedButton(): self._handle_filter_mode_change(self.radio_group.checkedButton(), True) - self.radio_group.buttonToggled.connect(self._handle_more_options_toggled) # Add this line + self.radio_group.buttonToggled.connect(self._handle_more_options_toggled) self._update_manga_filename_style_button_text() self._update_skip_scope_button_text() @@ -2225,19 +2243,30 @@ class DownloaderApp (QWidget ): if self .external_log_output :self .external_log_output .clear () self .log_signal .emit ("\n"+"="*40 +"\n🔗 External Links Log Disabled\n"+"="*40 ) - def _handle_filter_mode_change(self, button, checked): - # If a button other than "More" is selected, reset the UI + if not button or not checked: + return + + # --- FIX: Automatically disable multithreading for text-based modes --- + if button == self.radio_more: + if hasattr(self, 'use_multithreading_checkbox'): + self.use_multithreading_checkbox.setChecked(False) + self.use_multithreading_checkbox.setEnabled(False) + self.log_signal.emit("ℹ️ Text extraction mode enabled. Multithreading has been disabled.") + else: + # Re-enable it for other modes, but respect the manga mode rule that might also disable it. + if hasattr(self, 'use_multithreading_checkbox'): + is_sequential_manga = (self.manga_mode_checkbox.isChecked() and + (self.manga_filename_style == STYLE_DATE_BASED or + self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING)) + if not is_sequential_manga: + self.use_multithreading_checkbox.setEnabled(True) + # --- END FIX --- + if button != self.radio_more and checked: self.radio_more.setText("More") self.more_filter_scope = None - self.single_pdf_setting = False # Reset the setting - # Re-enable the checkboxes - if hasattr(self, 'use_multithreading_checkbox'): self.use_multithreading_checkbox.setEnabled(True) - if hasattr(self, 'use_subfolders_checkbox'): self.use_subfolders_checkbox.setEnabled(True) - - if not button or not checked: - return + self.single_pdf_setting = False is_only_links =(button ==self .radio_only_links ) is_only_audio =(hasattr (self ,'radio_only_audio')and self .radio_only_audio is not None and button ==self .radio_only_audio ) @@ -2267,8 +2296,6 @@ class DownloaderApp (QWidget ): file_download_mode_active =not is_only_links - - if self .use_subfolders_checkbox :self .use_subfolders_checkbox .setEnabled (file_download_mode_active ) if self .skip_words_input :self .skip_words_input .setEnabled (file_download_mode_active ) if self .skip_scope_toggle_button :self .skip_scope_toggle_button .setEnabled (file_download_mode_active ) @@ -2296,22 +2323,17 @@ class DownloaderApp (QWidget ): if not can_show_external_log_option : self .external_links_checkbox .setChecked (False ) - if is_only_links : self .progress_log_label .setText ("📜 Extracted Links Log:") if self .external_log_output :self .external_log_output .hide () if self .log_splitter :self .log_splitter .setSizes ([self .height (),0 ]) - - do_clear_log_in_filter_change =True if self .mega_download_log_preserved_once and self .only_links_log_display_mode ==LOG_DISPLAY_DOWNLOAD_PROGRESS : do_clear_log_in_filter_change =False - if self .main_log_output and do_clear_log_in_filter_change : self .log_signal .emit ("INTERNAL: _handle_filter_mode_change - About to clear log.") self .main_log_output .clear () self .log_signal .emit ("INTERNAL: _handle_filter_mode_change - Log cleared by _handle_filter_mode_change.") - if self .main_log_output :self .main_log_output .setMinimumHeight (0 ) self .log_signal .emit ("="*20 +" Mode changed to: Only Links "+"="*20 ) self ._try_process_next_external_link () @@ -2330,8 +2352,8 @@ class DownloaderApp (QWidget ): else : self .progress_log_label .setText (self ._tr ("progress_log_label_text","📜 Progress Log:")) self .update_external_links_setting (self .external_links_checkbox .isChecked ()if self .external_links_checkbox else False ) - self .log_signal .emit (f"="*20 +f" Mode changed to: {button .text ()} "+"="*20 ) - + if button != self.radio_more: + self .log_signal .emit (f"="*20 +f" Mode changed to: {button .text ()} "+"="*20 ) if is_only_links : self ._filter_links_log () @@ -2362,7 +2384,6 @@ class DownloaderApp (QWidget ): self .update_custom_folder_visibility () self .update_ui_for_manga_mode (self .manga_mode_checkbox .isChecked ()if self .manga_mode_checkbox else False ) - def _filter_links_log (self ): if not (self .radio_only_links and self .radio_only_links .isChecked ()):return @@ -3051,761 +3072,392 @@ class DownloaderApp (QWidget ): if total_posts >0 or processed_posts >0 : self .file_progress_label .setText ("") + def start_download(self, direct_api_url=None, override_output_dir=None, is_restore=False): + global KNOWN_NAMES, BackendDownloadThread, PostProcessorWorker, extract_post_info, clean_folder_name, MAX_FILE_THREADS_PER_POST_OR_WORKER - def start_download (self ,direct_api_url =None ,override_output_dir =None, is_restore=False ): - global KNOWN_NAMES ,BackendDownloadThread ,PostProcessorWorker ,extract_post_info ,clean_folder_name ,MAX_FILE_THREADS_PER_POST_OR_WORKER + self._clear_stale_temp_files() + self.session_temp_files = [] - self._clear_stale_temp_files() - self.session_temp_files = [] - - if self ._is_download_active (): + if self._is_download_active(): QMessageBox.warning(self, "Busy", "A download is already in progress.") - return False + return False + if not (self.favorite_download_queue and not self.is_processing_favorites_queue): + self.main_log_output.clear() + if not direct_api_url and self.favorite_download_queue and not self.is_processing_favorites_queue: + self.log_signal.emit(f"ℹ️ Detected {len(self.favorite_download_queue)} item(s) in the queue. Starting processing...") + self.cancellation_message_logged_this_session = False + self._process_next_favorite_download() + return True - if not direct_api_url and self .favorite_download_queue and not self .is_processing_favorites_queue : - self .log_signal .emit (f"ℹ️ Detected {len (self .favorite_download_queue )} item(s) in the queue. Starting processing...") - self .cancellation_message_logged_this_session =False - self ._process_next_favorite_download () - return True + if is_restore and self.interrupted_session_data: + api_url = self.interrupted_session_data.get("ui_settings", {}).get("api_url") + else: + api_url = direct_api_url if direct_api_url else self.link_input.text().strip() if not is_restore and self.interrupted_session_data: - self.log_signal.emit("ℹ️ New download started. Discarding previous interrupted session.") self._clear_session_file() self.interrupted_session_data = None self.is_restore_pending = False - api_url =direct_api_url if direct_api_url else self .link_input .text ().strip () - self .download_history_candidates .clear () - self._update_button_states_and_connections() # Ensure buttons are updated to active state + self.download_history_candidates.clear() + self._update_button_states_and_connections() - if self .favorite_mode_checkbox and self .favorite_mode_checkbox .isChecked ()and not direct_api_url and not api_url : - QMessageBox .information (self ,"Favorite Mode Active", - "Favorite Mode is active. Please use the 'Favorite Artists' or 'Favorite Posts' buttons to start downloads in this mode, or uncheck 'Favorite Mode' to use the URL input.") - self .set_ui_enabled (True ) - return False + if self.favorite_mode_checkbox and self.favorite_mode_checkbox.isChecked() and not direct_api_url and not api_url: + QMessageBox.information(self, "Favorite Mode Active", "Favorite Mode is active. Please use the 'Favorite Artists' or 'Favorite Posts' buttons to start downloads in this mode, or uncheck 'Favorite Mode' to use the URL input.") + self.set_ui_enabled(True) + return False - main_ui_download_dir =self .dir_input .text ().strip () + main_ui_download_dir = self.dir_input.text().strip() - if not api_url and not self .favorite_download_queue : - QMessageBox .critical (self ,"Input Error","URL is required.") - return False - elif not api_url and self .favorite_download_queue : - self .log_signal .emit ("ℹ️ URL input is empty, but queue has items. Processing queue...") - self .cancellation_message_logged_this_session =False - self ._process_next_favorite_download () - return True + if not api_url and not self.favorite_download_queue: + QMessageBox.critical(self, "Input Error", "URL is required.") + return False + elif not api_url and self.favorite_download_queue: + self.log_signal.emit("ℹ️ URL input is empty, but queue has items. Processing queue...") + self.cancellation_message_logged_this_session = False + self._process_next_favorite_download() + return True - self .cancellation_message_logged_this_session =False - use_subfolders =self .use_subfolders_checkbox .isChecked () - use_post_subfolders =self .use_subfolder_per_post_checkbox .isChecked () - compress_images =self .compress_images_checkbox .isChecked () - download_thumbnails =self .download_thumbnails_checkbox .isChecked () + self.cancellation_message_logged_this_session = False + use_subfolders = self.use_subfolders_checkbox.isChecked() + use_post_subfolders = self.use_subfolder_per_post_checkbox.isChecked() + compress_images = self.compress_images_checkbox.isChecked() + download_thumbnails = self.download_thumbnails_checkbox.isChecked() + use_multithreading_enabled_by_checkbox = self.use_multithreading_checkbox.isChecked() + + try: + num_threads_from_gui = int(self.thread_count_input.text().strip()) + if num_threads_from_gui < 1: num_threads_from_gui = 1 + except ValueError: + QMessageBox.critical(self, "Thread Count Error", "Invalid number of threads. Please enter a positive number.") + return False - use_multithreading_enabled_by_checkbox =self .use_multithreading_checkbox .isChecked () - try : - num_threads_from_gui =int (self .thread_count_input .text ().strip ()) - if num_threads_from_gui <1 :num_threads_from_gui =1 - except ValueError : - QMessageBox .critical (self ,"Thread Count Error","Invalid number of threads. Please enter a positive number.") - return False + if use_multithreading_enabled_by_checkbox: + if num_threads_from_gui > MAX_THREADS: + hard_warning_msg = (f"You've entered a thread count ({num_threads_from_gui}) exceeding the maximum of {MAX_THREADS}.\n\n" + "Using an extremely high number of threads can lead to:\n" + " - Diminishing returns (no significant speed increase).\n" + " - Increased system instability or application crashes.\n" + " - Higher chance of being rate-limited or temporarily IP-banned by the server.\n\n" + f"The thread count has been automatically capped to {MAX_THREADS} for stability.") + QMessageBox.warning(self, "High Thread Count Warning", hard_warning_msg) + num_threads_from_gui = MAX_THREADS + self.thread_count_input.setText(str(MAX_THREADS)) + self.log_signal.emit(f"⚠️ User attempted {num_threads_from_gui} threads, capped to {MAX_THREADS}.") + if SOFT_WARNING_THREAD_THRESHOLD < num_threads_from_gui <= MAX_THREADS: + soft_warning_msg_box = QMessageBox(self) + soft_warning_msg_box.setIcon(QMessageBox.Question) + soft_warning_msg_box.setWindowTitle("Thread Count Advisory") + soft_warning_msg_box.setText(f"You've set the thread count to {num_threads_from_gui}.\n\n" + "While this is within the allowed limit, using a high number of threads (typically above 40-50) can sometimes lead to:\n" + " - Increased errors or failed file downloads.\n" + " - Connection issues with the server.\n" + " - Higher system resource usage.\n\n" + "For most users and connections, 10-30 threads provide a good balance.\n\n" + f"Do you want to proceed with {num_threads_from_gui} threads, or would you like to change the value?") + proceed_button = soft_warning_msg_box.addButton("Proceed Anyway", QMessageBox.AcceptRole) + change_button = soft_warning_msg_box.addButton("Change Thread Value", QMessageBox.RejectRole) + soft_warning_msg_box.setDefaultButton(proceed_button) + soft_warning_msg_box.setEscapeButton(change_button) + soft_warning_msg_box.exec_() + if soft_warning_msg_box.clickedButton() == change_button: + self.log_signal.emit(f"ℹ️ User opted to change thread count from {num_threads_from_gui} after advisory.") + self.thread_count_input.setFocus() + self.thread_count_input.selectAll() + return False - if use_multithreading_enabled_by_checkbox : - if num_threads_from_gui >MAX_THREADS : - hard_warning_msg =( - f"You've entered a thread count ({num_threads_from_gui }) exceeding the maximum of {MAX_THREADS }.\n\n" - "Using an extremely high number of threads can lead to:\n" - " - Diminishing returns (no significant speed increase).\n" - " - Increased system instability or application crashes.\n" - " - Higher chance of being rate-limited or temporarily IP-banned by the server.\n\n" - f"The thread count has been automatically capped to {MAX_THREADS } for stability." - ) - QMessageBox .warning (self ,"High Thread Count Warning",hard_warning_msg ) - num_threads_from_gui =MAX_THREADS - self .thread_count_input .setText (str (MAX_THREADS )) - self .log_signal .emit (f"⚠️ User attempted {num_threads_from_gui } threads, capped to {MAX_THREADS }.") - if SOFT_WARNING_THREAD_THRESHOLD MAX_THREADS : - hard_warning_msg =( - f"You've entered a thread count ({num_threads_from_gui }) exceeding the maximum of {MAX_THREADS }.\n\n" - "Using an extremely high number of threads can lead to:\n" - " - Diminishing returns (no significant speed increase).\n" - " - Increased system instability or application crashes.\n" - " - Higher chance of being rate-limited or temporarily IP-banned by the server.\n\n" - f"The thread count has been automatically capped to {MAX_THREADS } for stability." - ) - QMessageBox .warning (self ,"High Thread Count Warning",hard_warning_msg ) - num_threads_from_gui =MAX_THREADS - self .thread_count_input .setText (str (MAX_THREADS )) - self .log_signal .emit (f"⚠️ User attempted {num_threads_from_gui } threads, capped to {MAX_THREADS }.") - if SOFT_WARNING_THREAD_THRESHOLD end_page :raise ValueError ("Start page cannot be greater than end page.") - - if manga_mode and start_page and end_page : - msg_box =QMessageBox (self ) - msg_box .setIcon (QMessageBox .Warning ) - msg_box .setWindowTitle ("Manga Mode & Page Range Warning") - msg_box .setText ( - "You have enabled Manga/Comic Mode and also specified a Page Range.\n\n" - "Manga Mode processes posts from oldest to newest across all available pages by default.\n" - "If you use a page range, you might miss parts of the manga/comic if it starts before your 'Start Page' or continues after your 'End Page'.\n\n" - "However, if you are certain the content you want is entirely within this page range (e.g., a short series, or you know the specific pages for a volume), then proceeding is okay.\n\n" - "Do you want to proceed with this page range in Manga Mode?" - ) - proceed_button =msg_box .addButton ("Proceed Anyway",QMessageBox .AcceptRole ) - cancel_button =msg_box .addButton ("Cancel Download",QMessageBox .RejectRole ) - msg_box .setDefaultButton (proceed_button ) - msg_box .setEscapeButton (cancel_button ) - msg_box .exec_ () - - if msg_box .clickedButton ()==cancel_button : - self .log_signal .emit ("❌ Download cancelled by user due to Manga Mode & Page Range warning.") - return False - except ValueError as e : - QMessageBox .critical (self ,"Page Range Error",f"Invalid page range: {e }") - return False - self .external_link_queue .clear ();self .extracted_links_cache =[];self ._is_processing_external_link_queue =False ;self ._current_link_post_title =None - - raw_character_filters_text =self .character_input .text ().strip () - parsed_character_filter_objects =self ._parse_character_filters (raw_character_filters_text ) - - actual_filters_to_use_for_run =[] - - needs_folder_naming_validation =(use_subfolders or manga_mode )and not extract_links_only - - if parsed_character_filter_objects : - actual_filters_to_use_for_run =parsed_character_filter_objects - - if not extract_links_only : - self .log_signal .emit (f"ℹ️ Using character filters for matching: {', '.join (item ['name']for item in actual_filters_to_use_for_run )}") - - filter_objects_to_potentially_add_to_known_list =[] - for filter_item_obj in parsed_character_filter_objects : - item_primary_name =filter_item_obj ["name"] - cleaned_name_test =clean_folder_name (item_primary_name ) - if needs_folder_naming_validation and not cleaned_name_test : - QMessageBox .warning (self ,"Invalid Filter Name for Folder",f"Filter name '{item_primary_name }' is invalid for a folder and will be skipped for Known.txt interaction.") - self .log_signal .emit (f"⚠️ Skipping invalid filter for Known.txt interaction: '{item_primary_name }'") - continue - - an_alias_is_already_known =False - if any (kn_entry ["name"].lower ()==item_primary_name .lower ()for kn_entry in KNOWN_NAMES ): - an_alias_is_already_known =True - elif filter_item_obj ["is_group"]and needs_folder_naming_validation : - for alias_in_filter_obj in filter_item_obj ["aliases"]: - if any (kn_entry ["name"].lower ()==alias_in_filter_obj .lower ()or alias_in_filter_obj .lower ()in [a .lower ()for a in kn_entry ["aliases"]]for kn_entry in KNOWN_NAMES ): - an_alias_is_already_known =True ;break - - if an_alias_is_already_known and filter_item_obj ["is_group"]: - self .log_signal .emit (f"ℹ️ An alias from group '{item_primary_name }' is already known. Group will not be prompted for Known.txt addition.") - - should_prompt_to_add_to_known_list =( - needs_folder_naming_validation and not manga_mode and - not any (kn_entry ["name"].lower ()==item_primary_name .lower ()for kn_entry in KNOWN_NAMES )and - not an_alias_is_already_known - ) - if should_prompt_to_add_to_known_list : - if not any (obj_to_add ["name"].lower ()==item_primary_name .lower ()for obj_to_add in filter_objects_to_potentially_add_to_known_list ): - filter_objects_to_potentially_add_to_known_list .append (filter_item_obj ) - elif manga_mode and needs_folder_naming_validation and item_primary_name .lower ()not in {kn_entry ["name"].lower ()for kn_entry in KNOWN_NAMES }and not an_alias_is_already_known : - self .log_signal .emit (f"ℹ️ Manga Mode: Using filter '{item_primary_name }' for this session without adding to Known Names.") - - if filter_objects_to_potentially_add_to_known_list : - confirm_dialog =ConfirmAddAllDialog (filter_objects_to_potentially_add_to_known_list ,self ,self ) - dialog_result =confirm_dialog .exec_ () - - if dialog_result ==CONFIRM_ADD_ALL_CANCEL_DOWNLOAD : - self .log_signal .emit ("❌ Download cancelled by user at new name confirmation stage.") - return False - elif isinstance (dialog_result ,list ): - if dialog_result : - self .log_signal .emit (f"ℹ️ User chose to add {len (dialog_result )} new entry/entries to Known.txt.") - for filter_obj_to_add in dialog_result : - if filter_obj_to_add .get ("components_are_distinct_for_known_txt"): - self .log_signal .emit (f" Processing group '{filter_obj_to_add ['name']}' to add its components individually to Known.txt.") - for alias_component in filter_obj_to_add ["aliases"]: - self .add_new_character ( - name_to_add =alias_component , - is_group_to_add =False , - aliases_to_add =[alias_component ], - suppress_similarity_prompt =True - ) - else : - self .add_new_character ( - name_to_add =filter_obj_to_add ["name"], - is_group_to_add =filter_obj_to_add ["is_group"], - aliases_to_add =filter_obj_to_add ["aliases"], - suppress_similarity_prompt =True - ) - else : - self .log_signal .emit ("ℹ️ User confirmed adding, but no names were selected in the dialog. No new names added to Known.txt.") - elif dialog_result ==CONFIRM_ADD_ALL_SKIP_ADDING : - self .log_signal .emit ("ℹ️ User chose not to add new names to Known.txt for this session.") - else : - self .log_signal .emit (f"ℹ️ Using character filters for link extraction: {', '.join (item ['name']for item in actual_filters_to_use_for_run )}") - - self .dynamic_character_filter_holder .set_filters (actual_filters_to_use_for_run ) - - creator_folder_ignore_words_for_run =None - character_filters_are_empty =not actual_filters_to_use_for_run - if is_full_creator_download and character_filters_are_empty : - creator_folder_ignore_words_for_run =CREATOR_DOWNLOAD_DEFAULT_FOLDER_IGNORE_WORDS - log_messages .append (f" Creator Download (No Char Filter): Applying default folder name ignore list ({len (creator_folder_ignore_words_for_run )} words).") - - custom_folder_name_cleaned =None - if use_subfolders and post_id_from_url and self .custom_folder_widget and self .custom_folder_widget .isVisible ()and not extract_links_only : - raw_custom_name =self .custom_folder_input .text ().strip () - if raw_custom_name : - cleaned_custom =clean_folder_name (raw_custom_name ) - if cleaned_custom :custom_folder_name_cleaned =cleaned_custom - else :self .log_signal .emit (f"⚠️ Invalid custom folder name ignored: '{raw_custom_name }' (resulted in empty string after cleaning).") - - - self .main_log_output .clear () - if extract_links_only :self .main_log_output .append ("🔗 Extracting Links..."); - elif backend_filter_mode =='archive':self .main_log_output .append ("📦 Downloading Archives Only...") - - if self .external_log_output :self .external_log_output .clear () - if self .show_external_links and not extract_links_only and backend_filter_mode !='archive': - self .external_log_output .append ("🔗 External Links Found:") - - self .file_progress_label .setText ("");self .cancellation_event .clear ();self .active_futures =[] - self .total_posts_to_process =0 ;self .processed_posts_count =0 ;self .download_counter =0 ;self .skip_counter =0 - self .progress_label .setText (self ._tr ("progress_initializing_text","Progress: Initializing...")) - - self .retryable_failed_files_info .clear () - self .permanently_failed_files_for_dialog .clear () - self._update_error_button_count() - - manga_date_file_counter_ref_for_thread =None - if manga_mode and self .manga_filename_style ==STYLE_DATE_BASED and not extract_links_only : - manga_date_file_counter_ref_for_thread =None - self .log_signal .emit (f"ℹ️ Manga Date Mode: File counter will be initialized by the download thread.") - - manga_global_file_counter_ref_for_thread =None - if manga_mode and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING and not extract_links_only : - manga_global_file_counter_ref_for_thread =None - self .log_signal .emit (f"ℹ️ Manga Title+GlobalNum Mode: File counter will be initialized by the download thread (starts at 1).") - - effective_num_post_workers =1 - - effective_num_file_threads_per_worker =1 - - if post_id_from_url : - if use_multithreading_enabled_by_checkbox : - effective_num_file_threads_per_worker =max (1 ,min (num_threads_from_gui ,MAX_FILE_THREADS_PER_POST_OR_WORKER )) - else : - if manga_mode and self .manga_filename_style ==STYLE_DATE_BASED : - effective_num_post_workers =1 - elif manga_mode and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING : - effective_num_post_workers =1 - effective_num_file_threads_per_worker =1 - elif use_multithreading_enabled_by_checkbox : - effective_num_post_workers =max (1 ,min (num_threads_from_gui ,MAX_THREADS )) - effective_num_file_threads_per_worker =1 - - if not extract_links_only :log_messages .append (f" Save Location: {effective_output_dir_for_run }") - - if post_id_from_url : - log_messages .append (f" Mode: Single Post") - log_messages .append (f" ↳ File Downloads: Up to {effective_num_file_threads_per_worker } concurrent file(s)") - else : - log_messages .append (f" Mode: Creator Feed") - log_messages .append (f" Post Processing: {'Multi-threaded ('+str (effective_num_post_workers )+' workers)'if effective_num_post_workers >1 else 'Single-threaded (1 worker)'}") - log_messages .append (f" ↳ File Downloads per Worker: Up to {effective_num_file_threads_per_worker } concurrent file(s)") - pr_log ="All" - if start_page or end_page : - pr_log =f"{f'From {start_page } 'if start_page else ''}{'to 'if start_page and end_page else ''}{f'{end_page }'if end_page else (f'Up to {end_page }'if end_page else (f'From {start_page }'if start_page else 'Specific Range'))}".strip () - - if manga_mode : - log_messages .append (f" Page Range: {pr_log if pr_log else 'All'} (Manga Mode - Oldest Posts Processed First within range)") - else : - log_messages .append (f" Page Range: {pr_log if pr_log else 'All'}") - - - if not extract_links_only : - log_messages .append (f" Subfolders: {'Enabled'if use_subfolders else 'Disabled'}") - if use_subfolders and self.use_subfolder_per_post_checkbox.isChecked(): - use_date_prefix = self.date_prefix_checkbox.isChecked() if hasattr(self, 'date_prefix_checkbox') else False - log_messages.append(f" ↳ Date Prefix for Post Subfolders: {'Enabled' if use_date_prefix else 'Disabled'}") - if use_subfolders : - if custom_folder_name_cleaned :log_messages .append (f" Custom Folder (Post): '{custom_folder_name_cleaned }'") - if actual_filters_to_use_for_run : - log_messages .append (f" Character Filters: {', '.join (item ['name']for item in actual_filters_to_use_for_run )}") - log_messages .append (f" ↳ Char Filter Scope: {current_char_filter_scope .capitalize ()}") - elif use_subfolders : - log_messages .append (f" Folder Naming: Automatic (based on title/known names)") - + dialog_exec_result = cookie_dialog.exec_() + if cookie_dialog.user_choice == CookieHelpDialog.CHOICE_PROCEED_WITHOUT_COOKIES and dialog_exec_result == QDialog.Accepted: + self.log_signal.emit("ℹ️ User chose to download without cookies for this session.") + use_cookie_for_this_run = False + elif cookie_dialog.user_choice == CookieHelpDialog.CHOICE_CANCEL_DOWNLOAD or dialog_exec_result == QDialog.Rejected: + self.log_signal.emit("❌ Download cancelled by user at cookie prompt.") + return False + else: + self.log_signal.emit("⚠️ Cookie dialog closed or unexpected choice. Aborting download.") + return False + + current_skip_words_scope = self.get_skip_words_scope() + manga_mode_is_checked = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False + extract_links_only = (self.radio_only_links and self.radio_only_links.isChecked()) + backend_filter_mode = self.get_filter_mode() + text_only_scope_for_run = self.more_filter_scope if backend_filter_mode == 'text_only' else None + export_format_for_run = self.text_export_format if backend_filter_mode == 'text_only' else 'txt' + checked_radio_button = self.radio_group.checkedButton() + user_selected_filter_text = checked_radio_button.text() if checked_radio_button else "All" + effective_output_dir_for_run = "" + + if selected_cookie_file_path_for_backend: + cookie_text_from_input = "" + + if backend_filter_mode == 'archive': + effective_skip_zip = False + effective_skip_rar = False + else: + effective_skip_zip = self.skip_zip_checkbox.isChecked() + effective_skip_rar = self.skip_rar_checkbox.isChecked() + if backend_filter_mode == 'audio': + effective_skip_zip = self.skip_zip_checkbox.isChecked() + effective_skip_rar = self.skip_rar_checkbox.isChecked() + + if not api_url: + QMessageBox.critical(self, "Input Error", "URL is required.") + return False + + if override_output_dir: + if not main_ui_download_dir: + QMessageBox.critical(self, "Configuration Error", "The main 'Download Location' must be set in the UI before downloading favorites with 'Artist Folders' scope.") + if self.is_processing_favorites_queue: self.log_signal.emit(f"❌ Favorite download for '{api_url}' skipped: Main download directory not set.") + return False + if not os.path.isdir(main_ui_download_dir): + QMessageBox.critical(self, "Directory Error", f"The main 'Download Location' ('{main_ui_download_dir}') does not exist or is not a directory. Please set a valid one for 'Artist Folders' scope.") + if self.is_processing_favorites_queue: self.log_signal.emit(f"❌ Favorite download for '{api_url}' skipped: Main download directory invalid.") + return False + effective_output_dir_for_run = os.path.normpath(override_output_dir) + else: + if not extract_links_only and not main_ui_download_dir: + QMessageBox.critical(self, "Input Error", "Download Directory is required when not in 'Only Links' mode.") + return False + if not extract_links_only and main_ui_download_dir and not os.path.isdir(main_ui_download_dir): + reply = QMessageBox.question(self, "Create Directory?", f"The directory '{main_ui_download_dir}' does not exist.\nCreate it now?", QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) + if reply == QMessageBox.Yes: + try: + os.makedirs(main_ui_download_dir, exist_ok=True) + except Exception as e: + QMessageBox.critical(self, "Directory Error", f"Could not create directory: {e}") + return False + else: + self.log_signal.emit("❌ Download cancelled: Output directory does not exist and was not created.") + return False + effective_output_dir_for_run = os.path.normpath(main_ui_download_dir) + + service, user_id, post_id_from_url = extract_post_info(api_url) + if not service or not user_id: + QMessageBox.critical(self, "Input Error", "Invalid or unsupported URL format.") + return False + + is_resumable_download = not post_id_from_url + processed_ids_to_skip = [] + if is_restore and self.interrupted_session_data: + processed_ids_to_skip = self.interrupted_session_data.get("download_state", {}).get("processed_post_ids", []) + + if is_resumable_download and not is_restore: + initial_session_data = { + "ui_settings": self._get_current_ui_settings_as_dict(api_url_override=api_url, output_dir_override=effective_output_dir_for_run), + "download_state": {"processed_post_ids": [], "permanently_failed_files": []} + } + self._save_session_file(initial_session_data) + + if compress_images and Image is None: + QMessageBox.warning(self, "Missing Dependency", "Pillow library (for image compression) not found. Compression will be disabled.") + compress_images = False; self.compress_images_checkbox.setChecked(False) + + log_messages = ["="*40, f"🚀 Starting {'Link Extraction' if extract_links_only else 'Download'} @ {time.strftime('%Y-%m-%d %H:%M:%S')}", f" URL: {api_url}"] + if is_restore: + log_messages.insert(1, "🔄 RESTORING INTERRUPTED SESSION 🔄") + if processed_ids_to_skip: + log_messages.append(f" Will skip {len(processed_ids_to_skip)} posts already processed.") + + current_char_filter_scope = self.get_char_filter_scope() + manga_mode = manga_mode_is_checked and not post_id_from_url + + start_page_str, end_page_str = self.start_page_input.text().strip(), self.end_page_input.text().strip() + start_page, end_page = None, None + if not post_id_from_url: + try: + if start_page_str: start_page = int(start_page_str) + if end_page_str: end_page = int(end_page_str) + except ValueError as e: + QMessageBox.critical(self, "Page Range Error", f"Invalid page range: {e}") + return False + + effective_num_post_workers = 1 + effective_num_file_threads_per_worker = 1 + if use_multithreading_enabled_by_checkbox: + if post_id_from_url: + effective_num_file_threads_per_worker = max(1, min(num_threads_from_gui, MAX_FILE_THREADS_PER_POST_OR_WORKER)) + else: + effective_num_post_workers = max(1, min(num_threads_from_gui, MAX_THREADS)) + + if not extract_links_only: log_messages.append(f" Save Location: {effective_output_dir_for_run}") + + if post_id_from_url: + log_messages.append(f" Mode: Single Post") + log_messages.append(f" ↳ File Downloads: Up to {effective_num_file_threads_per_worker} concurrent file(s)") + else: + log_messages.append(f" Mode: Creator Feed") + log_messages.append(f" Post Processing: {'Multi-threaded ('+str(effective_num_post_workers)+' workers)' if use_multithreading_enabled_by_checkbox else 'Single-threaded (1 worker)'}") + pr_log = "All" + if start_page or end_page: + pr_log = f"{f'From {start_page} ' if start_page else ''}{'to ' if start_page and end_page else ''}{f'{end_page}' if end_page else (f'Up to {end_page}' if end_page else (f'From {start_page}' if start_page else 'Specific Range'))}".strip() + log_messages.append(f" Page Range: {pr_log}") + + if not extract_links_only: + log_messages.append(f" Subfolders: {'Enabled' if use_subfolders else 'Disabled'}") + if use_subfolders and use_post_subfolders: + log_messages.append(f" ↳ Date Prefix for Post Subfolders: {'Enabled' if self.date_prefix_checkbox.isChecked() else 'Disabled'}") + + raw_character_filters_text = self.character_input.text().strip() + parsed_character_filter_objects = self._parse_character_filters(raw_character_filters_text) + if parsed_character_filter_objects: + log_messages.append(f" Character Filters: {', '.join(item['name'] for item in parsed_character_filter_objects)}") + log_messages.append(f" ↳ Char Filter Scope: {current_char_filter_scope.capitalize()}") + elif use_subfolders: + log_messages.append(f" Folder Naming: Automatic (based on title/known names)") keep_duplicates = self.keep_duplicates_checkbox.isChecked() if hasattr(self, 'keep_duplicates_checkbox') else False log_messages.extend([ f" File Type Filter: {user_selected_filter_text} (Backend processing as: {backend_filter_mode})", f" Keep In-Post Duplicates: {'Enabled' if keep_duplicates else 'Disabled'}", f" Skip Archives: {'.zip' if effective_skip_zip else ''}{', ' if effective_skip_zip and effective_skip_rar else ''}{'.rar' if effective_skip_rar else ''}{'None (Archive Mode)' if backend_filter_mode == 'archive' else ('None' if not (effective_skip_zip or effective_skip_rar) else '')}", - f" Skip Words Scope: {current_skip_words_scope .capitalize ()}", - f" Remove Words from Filename: {', '.join (remove_from_filename_words_list )if remove_from_filename_words_list else 'None'}", - f" Compress Images: {'Enabled'if compress_images else 'Disabled'}", - f" Thumbnails Only: {'Enabled'if download_thumbnails else 'Disabled'}" + f" Skip Words Scope: {current_skip_words_scope.capitalize()}", + f" Remove Words from Filename: {', '.join(remove_from_filename_words_list) if remove_from_filename_words_list else 'None'}", + f" Compress Images: {'Enabled' if compress_images else 'Disabled'}", + f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}" ]) - log_messages .append (f" Scan Post Content for Images: {'Enabled'if scan_content_for_images else 'Disabled'}") - else : - log_messages .append (f" Mode: Extracting Links Only") + log_messages.append(f" Scan Post Content for Images: {'Enabled' if scan_content_for_images else 'Disabled'}") + else: + log_messages.append(f" Mode: Extracting Links Only") - log_messages .append (f" Show External Links: {'Enabled'if self .show_external_links and not extract_links_only and backend_filter_mode !='archive'else 'Disabled'}") + log_messages.append(f" Show External Links: {'Enabled' if self.show_external_links and not extract_links_only and backend_filter_mode != 'archive' else 'Disabled'}") + if manga_mode: + log_messages.append(f" Manga Mode: Enabled") + log_messages.append(f" ↳ Manga Filename Style: {self.manga_filename_style}") + + log_messages.append(f" Use Cookie ('cookies.txt'): {'Enabled' if use_cookie_from_checkbox else 'Disabled'}") + if use_cookie_from_checkbox and cookie_text_from_input: + log_messages.append(f" ↳ Cookie Text Provided: Yes (length: {len(cookie_text_from_input)})") + elif use_cookie_from_checkbox and selected_cookie_file_path_for_backend: + log_messages.append(f" ↳ Cookie File Selected: {os.path.basename(selected_cookie_file_path_for_backend)}") + + should_use_multithreading_for_posts = use_multithreading_enabled_by_checkbox and not post_id_from_url + log_messages.append(f" Threading: {'Multi-threaded (posts)' if should_use_multithreading_for_posts else 'Single-threaded (posts)'}") + if should_use_multithreading_for_posts: + log_messages.append(f" Number of Post Worker Threads: {num_threads_from_gui}") + + log_messages.append("="*40) - if manga_mode : - log_messages .append (f" Manga Mode (File Renaming by Post Title): Enabled") - log_messages .append (f" ↳ Manga Filename Style: {'Post Title Based'if self .manga_filename_style ==STYLE_POST_TITLE else 'Original File Name'}") - if actual_filters_to_use_for_run : - log_messages .append (f" ↳ Manga Character Filter (for naming/folder): {', '.join (item ['name']for item in actual_filters_to_use_for_run )}") - log_messages .append (f" ↳ Manga Duplicates: Will be renamed with numeric suffix if names clash (e.g., _1, _2).") - - log_messages .append (f" Use Cookie ('cookies.txt'): {'Enabled'if use_cookie_from_checkbox else 'Disabled'}") - if use_cookie_from_checkbox and cookie_text_from_input : - log_messages .append (f" ↳ Cookie Text Provided: Yes (length: {len (cookie_text_from_input )})") - elif use_cookie_from_checkbox and selected_cookie_file_path_for_backend : - log_messages .append (f" ↳ Cookie File Selected: {os .path .basename (selected_cookie_file_path_for_backend )}") - should_use_multithreading_for_posts =use_multithreading_enabled_by_checkbox and not post_id_from_url - if manga_mode and (self .manga_filename_style ==STYLE_DATE_BASED or self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING )and not post_id_from_url : - enforced_by_style ="Date Mode"if self .manga_filename_style ==STYLE_DATE_BASED else "Title+GlobalNum Mode" - should_use_multithreading_for_posts =False - log_messages .append (f" Threading: Single-threaded (posts) - Enforced by Manga {enforced_by_style } (Actual workers: {effective_num_post_workers if effective_num_post_workers >1 else 1 })") - else : - log_messages .append (f" Threading: {'Multi-threaded (posts)'if should_use_multithreading_for_posts else 'Single-threaded (posts)'}") - if should_use_multithreading_for_posts : - log_messages .append (f" Number of Post Worker Threads: {effective_num_post_workers }") - log_messages .append ("="*40 ) - for msg in log_messages :self .log_signal .emit (msg ) - - self .set_ui_enabled (False ) + for msg in log_messages: + self.log_signal.emit(msg) + self.set_ui_enabled(False) from src.config.constants import FOLDER_NAME_STOP_WORDS + + manga_date_file_counter_ref_for_thread = None + if manga_mode and self.manga_filename_style == STYLE_DATE_BASED and not extract_links_only: + manga_date_file_counter_ref_for_thread = None + manga_global_file_counter_ref_for_thread = None + if manga_mode and self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING and not extract_links_only: + manga_global_file_counter_ref_for_thread = None - args_template ={ - 'api_url_input':api_url , - 'download_root':effective_output_dir_for_run , - 'output_dir':effective_output_dir_for_run , - 'known_names':list (KNOWN_NAMES ), - 'known_names_copy':list (KNOWN_NAMES ), - 'filter_character_list':actual_filters_to_use_for_run , - 'filter_mode':backend_filter_mode , - 'text_only_scope': text_only_scope_for_run, - 'text_export_format': export_format_for_run, - 'single_pdf_mode': self.single_pdf_setting, - 'skip_zip':effective_skip_zip , - 'skip_rar':effective_skip_rar , - 'use_subfolders':use_subfolders , - 'use_post_subfolders':use_post_subfolders , - 'compress_images':compress_images , - 'download_thumbnails':download_thumbnails , - 'service':service , - 'user_id':user_id , - 'downloaded_files':self .downloaded_files , - 'downloaded_files_lock':self .downloaded_files_lock , - 'downloaded_file_hashes':self .downloaded_file_hashes , - 'downloaded_file_hashes_lock':self .downloaded_file_hashes_lock , - 'skip_words_list':skip_words_list , - 'skip_words_scope':current_skip_words_scope , - 'remove_from_filename_words_list':remove_from_filename_words_list , - 'char_filter_scope':current_char_filter_scope , - 'show_external_links':self .show_external_links , - 'extract_links_only':extract_links_only , - 'start_page':start_page , - 'end_page':end_page , - 'target_post_id_from_initial_url':post_id_from_url , - 'custom_folder_name':custom_folder_name_cleaned , - 'manga_mode_active':manga_mode , - 'unwanted_keywords':FOLDER_NAME_STOP_WORDS , - 'cancellation_event':self .cancellation_event , - 'manga_date_prefix':manga_date_prefix_text , - 'dynamic_character_filter_holder':self .dynamic_character_filter_holder , - 'pause_event':self .pause_event , - 'scan_content_for_images':scan_content_for_images , - 'manga_filename_style':self .manga_filename_style , - 'num_file_threads_for_worker':effective_num_file_threads_per_worker , - 'manga_date_file_counter_ref':manga_date_file_counter_ref_for_thread , - 'allow_multipart_download':allow_multipart , - 'cookie_text':cookie_text_from_input , - 'selected_cookie_file':selected_cookie_file_path_for_backend , - 'manga_global_file_counter_ref':manga_global_file_counter_ref_for_thread , - 'app_base_dir':app_base_dir_for_cookies , - 'project_root_dir': self.app_base_dir, - 'use_cookie':use_cookie_for_this_run , - 'session_file_path': self.session_file_path, - 'session_lock': self.session_lock, - 'creator_download_folder_ignore_words':creator_folder_ignore_words_for_run , - 'use_date_prefix_for_subfolder': self.date_prefix_checkbox.isChecked() if hasattr(self, 'date_prefix_checkbox') else False, - 'keep_in_post_duplicates': self.keep_duplicates_checkbox.isChecked() if hasattr(self, 'keep_duplicates_checkbox') else False, - 'skip_current_file_flag': None, + creator_folder_ignore_words_for_run = None + if not post_id_from_url and not self._parse_character_filters(self.character_input.text().strip()): + from src.config.constants import CREATOR_DOWNLOAD_DEFAULT_FOLDER_IGNORE_WORDS + creator_folder_ignore_words_for_run = CREATOR_DOWNLOAD_DEFAULT_FOLDER_IGNORE_WORDS + + args_template = { + 'processed_ids_to_skip': processed_ids_to_skip, + 'api_url_input': api_url, + 'output_dir': effective_output_dir_for_run, + 'download_root': effective_output_dir_for_run, + 'known_names': list(KNOWN_NAMES), + 'known_names_copy': list(KNOWN_NAMES), + 'filter_character_list': self._parse_character_filters(self.character_input.text().strip()), + 'filter_mode': backend_filter_mode, + 'text_only_scope': text_only_scope_for_run, + 'text_export_format': export_format_for_run, + 'single_pdf_mode': self.single_pdf_setting, + 'skip_zip': effective_skip_zip, + 'skip_rar': effective_skip_rar, + 'use_subfolders': use_subfolders, + 'use_post_subfolders': use_post_subfolders, + 'compress_images': compress_images, + 'download_thumbnails': download_thumbnails, + 'service': service, + 'user_id': user_id, + 'downloaded_files': self.downloaded_files, + 'downloaded_files_lock': self.downloaded_files_lock, + 'downloaded_file_hashes': self.downloaded_file_hashes, + 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, + 'skip_words_list': skip_words_list, + 'skip_words_scope': current_skip_words_scope, + 'remove_from_filename_words_list': remove_from_filename_words_list, + 'char_filter_scope': self.get_char_filter_scope(), + 'show_external_links': self.show_external_links, + 'extract_links_only': extract_links_only, + 'start_page': start_page, + 'end_page': end_page, + 'target_post_id_from_initial_url': post_id_from_url, + 'custom_folder_name': self.custom_folder_input.text().strip(), + 'manga_mode_active': manga_mode, + 'unwanted_keywords': FOLDER_NAME_STOP_WORDS, + 'cancellation_event': self.cancellation_event, + 'manga_date_prefix': self.manga_date_prefix_input.text().strip() if hasattr(self, 'manga_date_prefix_input') else "", + 'dynamic_character_filter_holder': self.dynamic_character_filter_holder, + 'pause_event': self.pause_event, + 'scan_content_for_images': scan_content_for_images, + 'manga_filename_style': self.manga_filename_style, + 'allow_multipart_download': allow_multipart, + 'cookie_text': cookie_text_from_input, + 'selected_cookie_file': selected_cookie_file_path_for_backend, + 'app_base_dir': app_base_dir_for_cookies, + 'project_root_dir': self.app_base_dir, + 'use_cookie': use_cookie_for_this_run, + 'session_file_path': self.session_file_path, + 'session_lock': self.session_lock, + 'use_date_prefix_for_subfolder': self.date_prefix_checkbox.isChecked() if hasattr(self, 'date_prefix_checkbox') else False, + 'keep_in_post_duplicates': self.keep_duplicates_checkbox.isChecked() if hasattr(self, 'keep_duplicates_checkbox') else False, + 'skip_current_file_flag': None, + 'override_output_dir': override_output_dir, + 'manga_date_file_counter_ref': manga_date_file_counter_ref_for_thread, + 'manga_global_file_counter_ref': manga_global_file_counter_ref_for_thread, + 'creator_download_folder_ignore_words': creator_folder_ignore_words_for_run, } - args_template ['override_output_dir']=override_output_dir - try : - if should_use_multithreading_for_posts : - self .log_signal .emit (f" Initializing multi-threaded {current_mode_log_text .lower ()} with {effective_num_post_workers } post workers...") - args_template ['emitter']=self .worker_to_gui_queue - self .start_multi_threaded_download (num_post_workers =effective_num_post_workers ,**args_template ) - else : - self .log_signal .emit (f" Initializing single-threaded {'link extraction'if extract_links_only else 'download'}...") - dt_expected_keys =[ - 'api_url_input','output_dir','known_names_copy','cancellation_event', - 'filter_character_list','filter_mode','skip_zip','skip_rar', - 'use_subfolders','use_post_subfolders','custom_folder_name', - 'compress_images','download_thumbnails','service','user_id', - 'downloaded_files','downloaded_file_hashes','pause_event','remove_from_filename_words_list', - 'downloaded_files_lock','downloaded_file_hashes_lock','dynamic_character_filter_holder', 'session_file_path', - 'session_lock', - 'skip_words_list','skip_words_scope','char_filter_scope', - 'show_external_links','extract_links_only','num_file_threads_for_worker', - 'start_page','end_page','target_post_id_from_initial_url', - 'manga_date_file_counter_ref', - 'manga_global_file_counter_ref','manga_date_prefix', - 'manga_mode_active','unwanted_keywords','manga_filename_style','scan_content_for_images', - 'allow_multipart_download','use_cookie','cookie_text','app_base_dir','selected_cookie_file','override_output_dir','project_root_dir', - 'text_only_scope', - 'single_pdf_mode' - ] - args_template ['skip_current_file_flag']=None - single_thread_args ={key :args_template [key ]for key in dt_expected_keys if key in args_template } - self .start_single_threaded_download (**single_thread_args ) - except Exception as e : - self._update_button_states_and_connections() # Re-enable UI if start fails - self .log_signal .emit (f"❌ CRITICAL ERROR preparing download: {e }\n{traceback .format_exc ()}") - QMessageBox .critical (self ,"Start Error",f"Failed to start process:\n{e }") - self .download_finished (0 ,0 ,False ,[]) - if self .pause_event :self .pause_event .clear () - self .is_paused =False - return True + try: + if should_use_multithreading_for_posts: + self.log_signal.emit(f" Initializing multi-threaded download...") + args_template['emitter'] = self.worker_to_gui_queue + self.start_multi_threaded_download(num_post_workers=num_threads_from_gui, **args_template) + else: + self.log_signal.emit(f" Initializing single-threaded download...") + single_thread_args = args_template.copy() + if 'download_root' in single_thread_args: + del single_thread_args['download_root'] + if 'emitter' in single_thread_args: + del single_thread_args['emitter'] + if 'known_names' in single_thread_args: + del single_thread_args['known_names'] + single_thread_args['num_file_threads_for_worker'] = effective_num_file_threads_per_worker + self.start_single_threaded_download(**single_thread_args) + except Exception as e: + self._update_button_states_and_connections() + self.log_signal.emit(f"❌ CRITICAL ERROR preparing download: {e}\n{traceback.format_exc()}") + QMessageBox.critical(self, "Start Error", f"Failed to start process:\n{e}") + self.download_finished(0, 0, False, []) + if self.pause_event: self.pause_event.clear() + self.is_paused = False + return True def restore_download(self): """Initiates the download restoration process.""" @@ -3997,12 +3649,15 @@ class DownloaderApp (QWidget ): def _fetch_and_queue_posts(self, api_url_input_for_fetcher, worker_args_template, num_post_workers): """ - Fetches post data and submits tasks to the pool. It does NOT wait for completion. + Fetches all post data first and then submits tasks to the pool. """ global PostProcessorWorker, download_from_api try: - # This section remains the same as before + # --- CHANGE START: Fetch all posts into a list before queuing --- + self.log_signal.emit("[Fetcher] Fetching ALL available post information first. This may take a moment for large creators...") + + all_posts = [] post_generator = download_from_api( api_url_input_for_fetcher, logger=lambda msg: self.log_signal.emit(f"[Fetcher] {msg}"), @@ -4017,7 +3672,26 @@ class DownloaderApp (QWidget ): app_base_dir=worker_args_template.get('app_base_dir'), manga_filename_style_for_sort_check=worker_args_template.get('manga_filename_style') ) + + # Consume the entire generator to get all posts + for posts_batch in post_generator: + if self.cancellation_event.is_set(): + break + if isinstance(posts_batch, list): + all_posts.extend(posts_batch) + if self.cancellation_event.is_set(): + self.log_signal.emit("[Fetcher] Post fetching was cancelled.") + # The 'finally' block will handle the rest + return + + self.log_signal.emit(f"[Fetcher] ✅ Fetching complete. Found {len(all_posts)} total posts. Now queuing for download...") + + # Set the total count once at the end of fetching + self.total_posts_to_process = len(all_posts) + self.overall_progress_signal.emit(self.total_posts_to_process, 0) + + # Now submit all the collected posts to the worker pool ppw_expected_keys = [ 'post_data','download_root','known_names','filter_character_list','unwanted_keywords', 'filter_mode','skip_zip','skip_rar','use_subfolders','use_post_subfolders', @@ -4038,14 +3712,11 @@ class DownloaderApp (QWidget ): num_file_dl_threads_for_each_worker = worker_args_template.get('num_file_threads_for_worker', 1) emitter_for_worker = worker_args_template.get('emitter') - for posts_batch in post_generator: - if self.cancellation_event.is_set(): + for post_data_item in all_posts: + if self.cancellation_event.is_set(): break - if isinstance(posts_batch, list) and posts_batch: - for post_data_item in posts_batch: - self._submit_post_to_worker_pool(post_data_item, worker_args_template, num_file_dl_threads_for_each_worker, emitter_for_worker, ppw_expected_keys, {}) - self.total_posts_to_process += len(posts_batch) - self.overall_progress_signal.emit(self.total_posts_to_process, self.processed_posts_count) + self._submit_post_to_worker_pool(post_data_item, worker_args_template, num_file_dl_threads_for_each_worker, emitter_for_worker, ppw_expected_keys, {}) + # --- CHANGE END --- except Exception as e: self.log_signal.emit(f"❌ Error during post fetching: {e}\n{traceback.format_exc(limit=2)}")