mirror of
https://github.com/Yuvi9587/Kemono-Downloader.git
synced 2025-12-29 16:14:44 +00:00
Commit
This commit is contained in:
5517
main_window.py
Normal file
5517
main_window.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -102,7 +102,6 @@ class PostProcessorWorker:
|
|||||||
keep_in_post_duplicates=False,
|
keep_in_post_duplicates=False,
|
||||||
session_file_path=None,
|
session_file_path=None,
|
||||||
session_lock=None,
|
session_lock=None,
|
||||||
processed_ids_to_skip=None,
|
|
||||||
text_only_scope=None,
|
text_only_scope=None,
|
||||||
text_export_format='txt',
|
text_export_format='txt',
|
||||||
single_pdf_mode=False,
|
single_pdf_mode=False,
|
||||||
@@ -160,7 +159,6 @@ class PostProcessorWorker:
|
|||||||
self.keep_in_post_duplicates = keep_in_post_duplicates
|
self.keep_in_post_duplicates = keep_in_post_duplicates
|
||||||
self.session_file_path = session_file_path
|
self.session_file_path = session_file_path
|
||||||
self.session_lock = session_lock
|
self.session_lock = session_lock
|
||||||
self.processed_ids_to_skip = processed_ids_to_skip
|
|
||||||
self.text_only_scope = text_only_scope
|
self.text_only_scope = text_only_scope
|
||||||
self.text_export_format = text_export_format
|
self.text_export_format = text_export_format
|
||||||
self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE
|
self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE
|
||||||
@@ -372,9 +370,9 @@ class PostProcessorWorker:
|
|||||||
filename_to_save_in_main_path =cleaned_original_api_filename
|
filename_to_save_in_main_path =cleaned_original_api_filename
|
||||||
was_original_name_kept_flag =False
|
was_original_name_kept_flag =False
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if self .remove_from_filename_words_list and filename_to_save_in_main_path :
|
if self .remove_from_filename_words_list and filename_to_save_in_main_path :
|
||||||
# Store the name before this specific modification, so we can revert if it gets destroyed.
|
|
||||||
name_before_word_removal = filename_to_save_in_main_path
|
|
||||||
|
|
||||||
base_name_for_removal ,ext_for_removal =os .path .splitext (filename_to_save_in_main_path )
|
base_name_for_removal ,ext_for_removal =os .path .splitext (filename_to_save_in_main_path )
|
||||||
modified_base_name =base_name_for_removal
|
modified_base_name =base_name_for_removal
|
||||||
@@ -385,13 +383,12 @@ class PostProcessorWorker:
|
|||||||
modified_base_name =re .sub (r'[_.\s-]+',' ',modified_base_name )
|
modified_base_name =re .sub (r'[_.\s-]+',' ',modified_base_name )
|
||||||
modified_base_name =re .sub (r'\s+',' ',modified_base_name )
|
modified_base_name =re .sub (r'\s+',' ',modified_base_name )
|
||||||
modified_base_name =modified_base_name .strip ()
|
modified_base_name =modified_base_name .strip ()
|
||||||
|
|
||||||
if modified_base_name and modified_base_name !=ext_for_removal .lstrip ('.'):
|
if modified_base_name and modified_base_name !=ext_for_removal .lstrip ('.'):
|
||||||
filename_to_save_in_main_path =modified_base_name +ext_for_removal
|
filename_to_save_in_main_path =modified_base_name +ext_for_removal
|
||||||
else :
|
else :
|
||||||
# If the name was stripped to nothing, revert to the name from before this block.
|
filename_to_save_in_main_path =base_name_for_removal +ext_for_removal
|
||||||
self.logger(f" ⚠️ Filename was empty after removing words. Reverting to '{name_before_word_removal}'.")
|
|
||||||
filename_to_save_in_main_path = name_before_word_removal
|
|
||||||
|
|
||||||
if not self .download_thumbnails :
|
if not self .download_thumbnails :
|
||||||
|
|
||||||
@@ -887,34 +884,15 @@ class PostProcessorWorker:
|
|||||||
data_to_write_io .close ()
|
data_to_write_io .close ()
|
||||||
|
|
||||||
def process (self ):
|
def process (self ):
|
||||||
# --- FIX START: This entire method is now wrapped in a try...finally block ---
|
|
||||||
# to ensure it always reports completion back to the main window.
|
|
||||||
|
|
||||||
# Initialize result values to safe defaults for failure cases.
|
|
||||||
total_downloaded_this_post = 0
|
|
||||||
total_skipped_this_post = 0
|
|
||||||
kept_original_filenames_for_log = []
|
|
||||||
retryable_failures_this_post = []
|
|
||||||
permanent_failures_this_post = []
|
|
||||||
history_data_for_this_post = None
|
|
||||||
temp_filepath_for_return = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
post_id_for_skip_check = self.post.get('id')
|
|
||||||
if self.processed_ids_to_skip and post_id_for_skip_check in self.processed_ids_to_skip:
|
|
||||||
self.logger(f" -> Skipping Post {post_id_for_skip_check} (already processed in previous session).")
|
|
||||||
# We must emit 'worker_finished' so the main UI can count this as a completed (skipped) task.
|
|
||||||
num_potential_files_in_post = len(self.post.get('attachments', [])) + (1 if self.post.get('file') else 0)
|
|
||||||
total_skipped_this_post = num_potential_files_in_post
|
|
||||||
# The rest of the result tuple can be empty defaults
|
|
||||||
result_tuple = (0, total_skipped_this_post, [], [], [], None, None)
|
|
||||||
self._emit_signal('worker_finished', result_tuple)
|
|
||||||
return result_tuple
|
|
||||||
|
|
||||||
# ALL OF THE ORIGINAL LOGIC OF THE `process` METHOD GOES HERE
|
|
||||||
if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None, None
|
if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None, None
|
||||||
if self .check_cancel ():return 0 ,0 ,[],[],[],None, None
|
if self .check_cancel ():return 0 ,0 ,[],[],[],None, None
|
||||||
current_character_filters =self ._get_current_character_filters ()
|
current_character_filters =self ._get_current_character_filters ()
|
||||||
|
kept_original_filenames_for_log =[]
|
||||||
|
retryable_failures_this_post =[]
|
||||||
|
permanent_failures_this_post =[]
|
||||||
|
total_downloaded_this_post =0
|
||||||
|
total_skipped_this_post =0
|
||||||
|
history_data_for_this_post =None
|
||||||
|
|
||||||
parsed_api_url =urlparse (self .api_url_input )
|
parsed_api_url =urlparse (self .api_url_input )
|
||||||
referer_url =f"https://{parsed_api_url .netloc }/"
|
referer_url =f"https://{parsed_api_url .netloc }/"
|
||||||
@@ -1089,35 +1067,48 @@ class PostProcessorWorker:
|
|||||||
base_folder_names_for_post_content =[cleaned_primary_folder_name ]
|
base_folder_names_for_post_content =[cleaned_primary_folder_name ]
|
||||||
self .logger (f" Base folder name(s) for post content ({log_reason_for_folder }): {', '.join (base_folder_names_for_post_content )}")
|
self .logger (f" Base folder name(s) for post content ({log_reason_for_folder }): {', '.join (base_folder_names_for_post_content )}")
|
||||||
elif not current_character_filters :
|
elif not current_character_filters :
|
||||||
|
|
||||||
derived_folders_from_title_via_known_txt =match_folders_from_title (
|
derived_folders_from_title_via_known_txt =match_folders_from_title (
|
||||||
post_title ,
|
post_title ,
|
||||||
self .known_names ,
|
self .known_names ,
|
||||||
effective_unwanted_keywords_for_folder_naming
|
effective_unwanted_keywords_for_folder_naming
|
||||||
)
|
)
|
||||||
|
|
||||||
valid_derived_folders_from_title_known_txt =[
|
valid_derived_folders_from_title_known_txt =[
|
||||||
name for name in derived_folders_from_title_via_known_txt
|
name for name in derived_folders_from_title_via_known_txt
|
||||||
if name and name .strip ()and name .lower ()!="untitled_folder"
|
if name and name .strip ()and name .lower ()!="untitled_folder"
|
||||||
]
|
]
|
||||||
|
|
||||||
if valid_derived_folders_from_title_known_txt :
|
if valid_derived_folders_from_title_known_txt :
|
||||||
base_folder_names_for_post_content .extend (valid_derived_folders_from_title_known_txt )
|
base_folder_names_for_post_content .extend (valid_derived_folders_from_title_known_txt )
|
||||||
self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Post Title): {', '.join (base_folder_names_for_post_content )}")
|
self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Post Title): {', '.join (base_folder_names_for_post_content )}")
|
||||||
else :
|
else :
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
candidate_name_from_title_basic_clean =extract_folder_name_from_title (
|
candidate_name_from_title_basic_clean =extract_folder_name_from_title (
|
||||||
post_title ,
|
post_title ,
|
||||||
FOLDER_NAME_STOP_WORDS
|
FOLDER_NAME_STOP_WORDS
|
||||||
)
|
)
|
||||||
|
|
||||||
title_is_only_creator_ignored_words =False
|
title_is_only_creator_ignored_words =False
|
||||||
if candidate_name_from_title_basic_clean and candidate_name_from_title_basic_clean .lower ()!="untitled_folder"and self .creator_download_folder_ignore_words :
|
if candidate_name_from_title_basic_clean and candidate_name_from_title_basic_clean .lower ()!="untitled_folder"and self .creator_download_folder_ignore_words :
|
||||||
|
|
||||||
candidate_title_words ={word .lower ()for word in candidate_name_from_title_basic_clean .split ()}
|
candidate_title_words ={word .lower ()for word in candidate_name_from_title_basic_clean .split ()}
|
||||||
if candidate_title_words and candidate_title_words .issubset (self .creator_download_folder_ignore_words ):
|
if candidate_title_words and candidate_title_words .issubset (self .creator_download_folder_ignore_words ):
|
||||||
title_is_only_creator_ignored_words =True
|
title_is_only_creator_ignored_words =True
|
||||||
self .logger (f" Title-derived name '{candidate_name_from_title_basic_clean }' consists only of creator-specific ignore words.")
|
self .logger (f" Title-derived name '{candidate_name_from_title_basic_clean }' consists only of creator-specific ignore words.")
|
||||||
|
|
||||||
if title_is_only_creator_ignored_words :
|
if title_is_only_creator_ignored_words :
|
||||||
|
|
||||||
self .logger (f" Attempting Known.txt match on filenames as title was poor ('{candidate_name_from_title_basic_clean }').")
|
self .logger (f" Attempting Known.txt match on filenames as title was poor ('{candidate_name_from_title_basic_clean }').")
|
||||||
|
|
||||||
filenames_to_check =[
|
filenames_to_check =[
|
||||||
f_info ['_original_name_for_log']for f_info in all_files_from_post_api_for_char_check
|
f_info ['_original_name_for_log']for f_info in all_files_from_post_api_for_char_check
|
||||||
if f_info .get ('_original_name_for_log')
|
if f_info .get ('_original_name_for_log')
|
||||||
]
|
]
|
||||||
|
|
||||||
derived_folders_from_filenames_known_txt =set ()
|
derived_folders_from_filenames_known_txt =set ()
|
||||||
if filenames_to_check :
|
if filenames_to_check :
|
||||||
for fname in filenames_to_check :
|
for fname in filenames_to_check :
|
||||||
@@ -1129,6 +1120,7 @@ class PostProcessorWorker:
|
|||||||
for m in matches :
|
for m in matches :
|
||||||
if m and m .strip ()and m .lower ()!="untitled_folder":
|
if m and m .strip ()and m .lower ()!="untitled_folder":
|
||||||
derived_folders_from_filenames_known_txt .add (m )
|
derived_folders_from_filenames_known_txt .add (m )
|
||||||
|
|
||||||
if derived_folders_from_filenames_known_txt :
|
if derived_folders_from_filenames_known_txt :
|
||||||
base_folder_names_for_post_content .extend (list (derived_folders_from_filenames_known_txt ))
|
base_folder_names_for_post_content .extend (list (derived_folders_from_filenames_known_txt ))
|
||||||
self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Filenames): {', '.join (base_folder_names_for_post_content )}")
|
self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Filenames): {', '.join (base_folder_names_for_post_content )}")
|
||||||
@@ -1144,6 +1136,7 @@ class PostProcessorWorker:
|
|||||||
)
|
)
|
||||||
base_folder_names_for_post_content .append (extracted_name_from_title_full_ignore )
|
base_folder_names_for_post_content .append (extracted_name_from_title_full_ignore )
|
||||||
self .logger (f" Base folder name(s) for post content (Generic title parsing - title not solely creator-ignored words): {', '.join (base_folder_names_for_post_content )}")
|
self .logger (f" Base folder name(s) for post content (Generic title parsing - title not solely creator-ignored words): {', '.join (base_folder_names_for_post_content )}")
|
||||||
|
|
||||||
base_folder_names_for_post_content =[
|
base_folder_names_for_post_content =[
|
||||||
name for name in base_folder_names_for_post_content if name and name .strip ()
|
name for name in base_folder_names_for_post_content if name and name .strip ()
|
||||||
]
|
]
|
||||||
@@ -1151,36 +1144,49 @@ class PostProcessorWorker:
|
|||||||
final_fallback_name =clean_folder_name (post_title if post_title and post_title .strip ()else "Generic Post Content")
|
final_fallback_name =clean_folder_name (post_title if post_title and post_title .strip ()else "Generic Post Content")
|
||||||
base_folder_names_for_post_content =[final_fallback_name ]
|
base_folder_names_for_post_content =[final_fallback_name ]
|
||||||
self .logger (f" Ultimate fallback folder name: {final_fallback_name }")
|
self .logger (f" Ultimate fallback folder name: {final_fallback_name }")
|
||||||
|
|
||||||
if base_folder_names_for_post_content :
|
if base_folder_names_for_post_content :
|
||||||
determined_post_save_path_for_history =os .path .join (determined_post_save_path_for_history ,base_folder_names_for_post_content [0 ])
|
determined_post_save_path_for_history =os .path .join (determined_post_save_path_for_history ,base_folder_names_for_post_content [0 ])
|
||||||
|
|
||||||
if not self .extract_links_only and self .use_post_subfolders :
|
if not self .extract_links_only and self .use_post_subfolders :
|
||||||
cleaned_post_title_for_sub =clean_folder_name (post_title )
|
cleaned_post_title_for_sub =clean_folder_name (post_title )
|
||||||
post_id_for_fallback =self .post .get ('id','unknown_id')
|
post_id_for_fallback =self .post .get ('id','unknown_id')
|
||||||
|
|
||||||
|
|
||||||
if not cleaned_post_title_for_sub or cleaned_post_title_for_sub =="untitled_folder":
|
if not cleaned_post_title_for_sub or cleaned_post_title_for_sub =="untitled_folder":
|
||||||
self .logger (f" ⚠️ Post title '{post_title }' resulted in a generic subfolder name. Using 'post_{post_id_for_fallback }' as base.")
|
self .logger (f" ⚠️ Post title '{post_title }' resulted in a generic subfolder name. Using 'post_{post_id_for_fallback }' as base.")
|
||||||
original_cleaned_post_title_for_sub =f"post_{post_id_for_fallback }"
|
original_cleaned_post_title_for_sub =f"post_{post_id_for_fallback }"
|
||||||
else :
|
else :
|
||||||
original_cleaned_post_title_for_sub =cleaned_post_title_for_sub
|
original_cleaned_post_title_for_sub =cleaned_post_title_for_sub
|
||||||
|
|
||||||
if self.use_date_prefix_for_subfolder:
|
if self.use_date_prefix_for_subfolder:
|
||||||
|
# Prioritize 'published' date, fall back to 'added' date
|
||||||
published_date_str = self.post.get('published') or self.post.get('added')
|
published_date_str = self.post.get('published') or self.post.get('added')
|
||||||
if published_date_str:
|
if published_date_str:
|
||||||
try:
|
try:
|
||||||
|
# Extract just the date part (YYYY-MM-DD)
|
||||||
date_prefix = published_date_str.split('T')[0]
|
date_prefix = published_date_str.split('T')[0]
|
||||||
|
# Prepend the date to the folder name
|
||||||
original_cleaned_post_title_for_sub = f"{date_prefix} {original_cleaned_post_title_for_sub}"
|
original_cleaned_post_title_for_sub = f"{date_prefix} {original_cleaned_post_title_for_sub}"
|
||||||
self.logger(f" ℹ️ Applying date prefix to subfolder: '{original_cleaned_post_title_for_sub}'")
|
self.logger(f" ℹ️ Applying date prefix to subfolder: '{original_cleaned_post_title_for_sub}'")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger(f" ⚠️ Could not parse date '{published_date_str}' for prefix. Using original name. Error: {e}")
|
self.logger(f" ⚠️ Could not parse date '{published_date_str}' for prefix. Using original name. Error: {e}")
|
||||||
else:
|
else:
|
||||||
self.logger(" ⚠️ 'Date Prefix' is checked, but post has no 'published' or 'added' date. Omitting prefix.")
|
self.logger(" ⚠️ 'Date Prefix' is checked, but post has no 'published' or 'added' date. Omitting prefix.")
|
||||||
|
|
||||||
base_path_for_post_subfolder =determined_post_save_path_for_history
|
base_path_for_post_subfolder =determined_post_save_path_for_history
|
||||||
|
|
||||||
suffix_counter =0
|
suffix_counter =0
|
||||||
final_post_subfolder_name =""
|
final_post_subfolder_name =""
|
||||||
|
|
||||||
while True :
|
while True :
|
||||||
if suffix_counter ==0 :
|
if suffix_counter ==0 :
|
||||||
name_candidate =original_cleaned_post_title_for_sub
|
name_candidate =original_cleaned_post_title_for_sub
|
||||||
else :
|
else :
|
||||||
name_candidate =f"{original_cleaned_post_title_for_sub }_{suffix_counter }"
|
name_candidate =f"{original_cleaned_post_title_for_sub }_{suffix_counter }"
|
||||||
|
|
||||||
potential_post_subfolder_path =os .path .join (base_path_for_post_subfolder ,name_candidate )
|
potential_post_subfolder_path =os .path .join (base_path_for_post_subfolder ,name_candidate )
|
||||||
|
|
||||||
try :
|
try :
|
||||||
os .makedirs (potential_post_subfolder_path ,exist_ok =False )
|
os .makedirs (potential_post_subfolder_path ,exist_ok =False )
|
||||||
final_post_subfolder_name =name_candidate
|
final_post_subfolder_name =name_candidate
|
||||||
@@ -1198,30 +1204,39 @@ class PostProcessorWorker:
|
|||||||
self .logger (f" ❌ Error creating directory '{potential_post_subfolder_path }': {e_mkdir }. Files for this post might be saved in parent or fail.")
|
self .logger (f" ❌ Error creating directory '{potential_post_subfolder_path }': {e_mkdir }. Files for this post might be saved in parent or fail.")
|
||||||
final_post_subfolder_name =original_cleaned_post_title_for_sub
|
final_post_subfolder_name =original_cleaned_post_title_for_sub
|
||||||
break
|
break
|
||||||
determined_post_save_path_for_history =os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name )
|
|
||||||
|
|
||||||
|
determined_post_save_path_for_history =os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name )
|
||||||
if self.filter_mode == 'text_only' and not self.extract_links_only:
|
if self.filter_mode == 'text_only' and not self.extract_links_only:
|
||||||
self.logger(f" Mode: Text Only (Scope: {self.text_only_scope})")
|
self.logger(f" Mode: Text Only (Scope: {self.text_only_scope})")
|
||||||
|
|
||||||
|
# --- Apply Title-based filters to ensure post is a candidate ---
|
||||||
post_title_lower = post_title.lower()
|
post_title_lower = post_title.lower()
|
||||||
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
|
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
|
||||||
for skip_word in self.skip_words_list:
|
for skip_word in self.skip_words_list:
|
||||||
if skip_word.lower() in post_title_lower:
|
if skip_word.lower() in post_title_lower:
|
||||||
self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'.")
|
self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'.")
|
||||||
return 0, num_potential_files_in_post, [], [], [], None, None
|
return 0, num_potential_files_in_post, [], [], [], None, None
|
||||||
|
|
||||||
if current_character_filters and not post_is_candidate_by_title_char_match and not post_is_candidate_by_comment_char_match and not post_is_candidate_by_file_char_match_in_comment_scope:
|
if current_character_filters and not post_is_candidate_by_title_char_match and not post_is_candidate_by_comment_char_match and not post_is_candidate_by_file_char_match_in_comment_scope:
|
||||||
self.logger(f" -> Skip Post (No character match for text extraction): '{post_title[:50]}...'.")
|
self.logger(f" -> Skip Post (No character match for text extraction): '{post_title[:50]}...'.")
|
||||||
return 0, num_potential_files_in_post, [], [], [], None, None
|
return 0, num_potential_files_in_post, [], [], [], None, None
|
||||||
|
|
||||||
|
# --- Get the text content based on scope ---
|
||||||
raw_text_content = ""
|
raw_text_content = ""
|
||||||
final_post_data = post_data
|
final_post_data = post_data
|
||||||
|
|
||||||
|
# Fetch full post data if content is missing and scope is 'content'
|
||||||
if self.text_only_scope == 'content' and 'content' not in final_post_data:
|
if self.text_only_scope == 'content' and 'content' not in final_post_data:
|
||||||
self.logger(f" Post {post_id} is missing 'content' field, fetching full data...")
|
self.logger(f" Post {post_id} is missing 'content' field, fetching full data...")
|
||||||
parsed_url = urlparse(self.api_url_input)
|
parsed_url = urlparse(self.api_url_input)
|
||||||
api_domain = parsed_url.netloc
|
api_domain = parsed_url.netloc
|
||||||
cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain)
|
cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain)
|
||||||
from .api_client import fetch_single_post_data
|
|
||||||
|
from .api_client import fetch_single_post_data # Local import to avoid circular dependency issues
|
||||||
full_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies)
|
full_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies)
|
||||||
if full_data:
|
if full_data:
|
||||||
final_post_data = full_data
|
final_post_data = full_data
|
||||||
|
|
||||||
if self.text_only_scope == 'content':
|
if self.text_only_scope == 'content':
|
||||||
raw_text_content = final_post_data.get('content', '')
|
raw_text_content = final_post_data.get('content', '')
|
||||||
elif self.text_only_scope == 'comments':
|
elif self.text_only_scope == 'comments':
|
||||||
@@ -1239,9 +1254,12 @@ class PostProcessorWorker:
|
|||||||
raw_text_content = "\n".join(comment_texts)
|
raw_text_content = "\n".join(comment_texts)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger(f" ❌ Error fetching comments for text-only mode: {e}")
|
self.logger(f" ❌ Error fetching comments for text-only mode: {e}")
|
||||||
|
|
||||||
if not raw_text_content or not raw_text_content.strip():
|
if not raw_text_content or not raw_text_content.strip():
|
||||||
self.logger(" -> Skip Saving Text: No content/comments found or fetched.")
|
self.logger(" -> Skip Saving Text: No content/comments found or fetched.")
|
||||||
return 0, num_potential_files_in_post, [], [], [], None, None
|
return 0, num_potential_files_in_post, [], [], [], None, None
|
||||||
|
|
||||||
|
# --- Robust HTML-to-TEXT Conversion ---
|
||||||
paragraph_pattern = re.compile(r'<p.*?>(.*?)</p>', re.IGNORECASE | re.DOTALL)
|
paragraph_pattern = re.compile(r'<p.*?>(.*?)</p>', re.IGNORECASE | re.DOTALL)
|
||||||
html_paragraphs = paragraph_pattern.findall(raw_text_content)
|
html_paragraphs = paragraph_pattern.findall(raw_text_content)
|
||||||
cleaned_text = ""
|
cleaned_text = ""
|
||||||
@@ -1259,9 +1277,12 @@ class PostProcessorWorker:
|
|||||||
cleaned_paragraphs_list.append(p_final)
|
cleaned_paragraphs_list.append(p_final)
|
||||||
cleaned_text = '\n\n'.join(cleaned_paragraphs_list)
|
cleaned_text = '\n\n'.join(cleaned_paragraphs_list)
|
||||||
cleaned_text = cleaned_text.replace('…', '...')
|
cleaned_text = cleaned_text.replace('…', '...')
|
||||||
|
|
||||||
|
# --- Logic for Single PDF Mode (File-based) ---
|
||||||
if self.single_pdf_mode:
|
if self.single_pdf_mode:
|
||||||
if not cleaned_text:
|
if not cleaned_text:
|
||||||
return 0, 0, [], [], [], None, None
|
return 0, 0, [], [], [], None, None
|
||||||
|
|
||||||
content_data = {
|
content_data = {
|
||||||
'title': post_title,
|
'title': post_title,
|
||||||
'content': cleaned_text,
|
'content': cleaned_text,
|
||||||
@@ -1271,20 +1292,22 @@ class PostProcessorWorker:
|
|||||||
os.makedirs(temp_dir, exist_ok=True)
|
os.makedirs(temp_dir, exist_ok=True)
|
||||||
temp_filename = f"tmp_{post_id}_{uuid.uuid4().hex[:8]}.json"
|
temp_filename = f"tmp_{post_id}_{uuid.uuid4().hex[:8]}.json"
|
||||||
temp_filepath = os.path.join(temp_dir, temp_filename)
|
temp_filepath = os.path.join(temp_dir, temp_filename)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(temp_filepath, 'w', encoding='utf-8') as f:
|
with open(temp_filepath, 'w', encoding='utf-8') as f:
|
||||||
json.dump(content_data, f, indent=2)
|
json.dump(content_data, f, indent=2)
|
||||||
self.logger(f" Saved temporary text for '{post_title}' for single PDF compilation.")
|
self.logger(f" Saved temporary text for '{post_title}' for single PDF compilation.")
|
||||||
self._emit_signal('worker_finished', (0, 0, [], [], [], None, temp_filepath))
|
return 0, 0, [], [], [], None, temp_filepath
|
||||||
return (0, 0, [], [], [], None, temp_filepath)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger(f" ❌ Failed to write temporary file for single PDF: {e}")
|
self.logger(f" ❌ Failed to write temporary file for single PDF: {e}")
|
||||||
self._emit_signal('worker_finished', (0, 0, [], [], [], [], None))
|
return 0, 0, [], [], [], None, None
|
||||||
return (0, 0, [], [], [], [], None)
|
|
||||||
|
# --- Logic for Individual File Saving ---
|
||||||
else:
|
else:
|
||||||
file_extension = self.text_export_format
|
file_extension = self.text_export_format
|
||||||
txt_filename = clean_filename(post_title) + f".{file_extension}"
|
txt_filename = clean_filename(post_title) + f".{file_extension}"
|
||||||
final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename)
|
final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
os.makedirs(determined_post_save_path_for_history, exist_ok=True)
|
os.makedirs(determined_post_save_path_for_history, exist_ok=True)
|
||||||
base, ext = os.path.splitext(final_save_path)
|
base, ext = os.path.splitext(final_save_path)
|
||||||
@@ -1292,6 +1315,7 @@ class PostProcessorWorker:
|
|||||||
while os.path.exists(final_save_path):
|
while os.path.exists(final_save_path):
|
||||||
final_save_path = f"{base}_{counter}{ext}"
|
final_save_path = f"{base}_{counter}{ext}"
|
||||||
counter += 1
|
counter += 1
|
||||||
|
|
||||||
if file_extension == 'pdf':
|
if file_extension == 'pdf':
|
||||||
if FPDF:
|
if FPDF:
|
||||||
self.logger(f" Converting to PDF...")
|
self.logger(f" Converting to PDF...")
|
||||||
@@ -1313,6 +1337,7 @@ class PostProcessorWorker:
|
|||||||
self.logger(f" ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.")
|
self.logger(f" ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.")
|
||||||
final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
|
final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
|
||||||
with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
|
with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
|
||||||
|
|
||||||
elif file_extension == 'docx':
|
elif file_extension == 'docx':
|
||||||
if Document:
|
if Document:
|
||||||
self.logger(f" Converting to DOCX...")
|
self.logger(f" Converting to DOCX...")
|
||||||
@@ -1323,14 +1348,17 @@ class PostProcessorWorker:
|
|||||||
self.logger(f" ⚠️ Cannot create DOCX: 'python-docx' library not installed. Saving as .txt.")
|
self.logger(f" ⚠️ Cannot create DOCX: 'python-docx' library not installed. Saving as .txt.")
|
||||||
final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
|
final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
|
||||||
with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
|
with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
|
||||||
else:
|
|
||||||
|
else: # Default to TXT
|
||||||
with open(final_save_path, 'w', encoding='utf-8') as f:
|
with open(final_save_path, 'w', encoding='utf-8') as f:
|
||||||
f.write(cleaned_text)
|
f.write(cleaned_text)
|
||||||
|
|
||||||
self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'")
|
self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'")
|
||||||
return 1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None
|
return 1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger(f" ❌ Critical error saving text file '{txt_filename}': {e}")
|
self.logger(f" ❌ Critical error saving text file '{txt_filename}': {e}")
|
||||||
return 0, num_potential_files_in_post, [], [], [], None, None
|
return 0, num_potential_files_in_post, [], [], [], None, None
|
||||||
|
|
||||||
if not self .extract_links_only and self .use_subfolders and self .skip_words_list :
|
if not self .extract_links_only and self .use_subfolders and self .skip_words_list :
|
||||||
if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
|
if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||||
for folder_name_to_check in base_folder_names_for_post_content :
|
for folder_name_to_check in base_folder_names_for_post_content :
|
||||||
@@ -1364,6 +1392,7 @@ class PostProcessorWorker:
|
|||||||
potential_key_from_fragment =parsed_mega_url .fragment .split ('!')[-1 ]
|
potential_key_from_fragment =parsed_mega_url .fragment .split ('!')[-1 ]
|
||||||
if mega_key_pattern .fullmatch (potential_key_from_fragment ):
|
if mega_key_pattern .fullmatch (potential_key_from_fragment ):
|
||||||
decryption_key_found =potential_key_from_fragment
|
decryption_key_found =potential_key_from_fragment
|
||||||
|
|
||||||
if not decryption_key_found and link_text :
|
if not decryption_key_found and link_text :
|
||||||
key_match_in_text =mega_key_pattern .search (link_text )
|
key_match_in_text =mega_key_pattern .search (link_text )
|
||||||
if key_match_in_text :
|
if key_match_in_text :
|
||||||
@@ -1484,10 +1513,14 @@ class PostProcessorWorker:
|
|||||||
return 0 ,0 ,[],[],[],None
|
return 0 ,0 ,[],[],[],None
|
||||||
files_to_download_info_list =[]
|
files_to_download_info_list =[]
|
||||||
processed_original_filenames_in_this_post =set ()
|
processed_original_filenames_in_this_post =set ()
|
||||||
|
|
||||||
if self.keep_in_post_duplicates:
|
if self.keep_in_post_duplicates:
|
||||||
|
# If we keep duplicates, just add every file to the list to be processed.
|
||||||
|
# The downstream hash check and rename-on-collision logic will handle them.
|
||||||
files_to_download_info_list.extend(all_files_from_post_api)
|
files_to_download_info_list.extend(all_files_from_post_api)
|
||||||
self.logger(f" ℹ️ 'Keep Duplicates' is on. All {len(all_files_from_post_api)} files from post will be processed.")
|
self.logger(f" ℹ️ 'Keep Duplicates' is on. All {len(all_files_from_post_api)} files from post will be processed.")
|
||||||
else:
|
else:
|
||||||
|
# This is the original logic that skips duplicates by name within a post.
|
||||||
for file_info in all_files_from_post_api:
|
for file_info in all_files_from_post_api:
|
||||||
current_api_original_filename = file_info.get('_original_name_for_log')
|
current_api_original_filename = file_info.get('_original_name_for_log')
|
||||||
if current_api_original_filename in processed_original_filenames_in_this_post:
|
if current_api_original_filename in processed_original_filenames_in_this_post:
|
||||||
@@ -1497,9 +1530,12 @@ class PostProcessorWorker:
|
|||||||
files_to_download_info_list.append(file_info)
|
files_to_download_info_list.append(file_info)
|
||||||
if current_api_original_filename:
|
if current_api_original_filename:
|
||||||
processed_original_filenames_in_this_post.add(current_api_original_filename)
|
processed_original_filenames_in_this_post.add(current_api_original_filename)
|
||||||
|
|
||||||
if not files_to_download_info_list:
|
if not files_to_download_info_list:
|
||||||
|
|
||||||
self .logger (f" All files for post {post_id } were duplicate original names or skipped earlier.")
|
self .logger (f" All files for post {post_id } were duplicate original names or skipped earlier.")
|
||||||
return 0 ,total_skipped_this_post ,[],[],[],None
|
return 0 ,total_skipped_this_post ,[],[],[],None
|
||||||
|
|
||||||
self .logger (f" Identified {len (files_to_download_info_list )} unique original file(s) for potential download from post {post_id }.")
|
self .logger (f" Identified {len (files_to_download_info_list )} unique original file(s) for potential download from post {post_id }.")
|
||||||
with ThreadPoolExecutor (max_workers =self .num_file_threads ,thread_name_prefix =f'P{post_id }File_')as file_pool :
|
with ThreadPoolExecutor (max_workers =self .num_file_threads ,thread_name_prefix =f'P{post_id }File_')as file_pool :
|
||||||
futures_list =[]
|
futures_list =[]
|
||||||
@@ -1561,7 +1597,10 @@ class PostProcessorWorker:
|
|||||||
self .logger (f" -> Skip File (Char Filter Scope '{self .char_filter_scope }'): '{current_api_original_filename }' no match.")
|
self .logger (f" -> Skip File (Char Filter Scope '{self .char_filter_scope }'): '{current_api_original_filename }' no match.")
|
||||||
total_skipped_this_post +=1
|
total_skipped_this_post +=1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
target_base_folders_for_this_file_iteration =[]
|
target_base_folders_for_this_file_iteration =[]
|
||||||
|
|
||||||
if current_character_filters :
|
if current_character_filters :
|
||||||
char_title_subfolder_name =None
|
char_title_subfolder_name =None
|
||||||
if self .target_post_id_from_initial_url and self .custom_folder_name :
|
if self .target_post_id_from_initial_url and self .custom_folder_name :
|
||||||
@@ -1582,17 +1621,24 @@ class PostProcessorWorker:
|
|||||||
target_base_folders_for_this_file_iteration .extend (base_folder_names_for_post_content )
|
target_base_folders_for_this_file_iteration .extend (base_folder_names_for_post_content )
|
||||||
else :
|
else :
|
||||||
target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title ))
|
target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title ))
|
||||||
|
|
||||||
if not target_base_folders_for_this_file_iteration :
|
if not target_base_folders_for_this_file_iteration :
|
||||||
target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title if post_title else "Uncategorized_Post_Content"))
|
target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title if post_title else "Uncategorized_Post_Content"))
|
||||||
|
|
||||||
for target_base_folder_name_for_instance in target_base_folders_for_this_file_iteration :
|
for target_base_folder_name_for_instance in target_base_folders_for_this_file_iteration :
|
||||||
current_path_for_file_instance =self .override_output_dir if self .override_output_dir else self .download_root
|
current_path_for_file_instance =self .override_output_dir if self .override_output_dir else self .download_root
|
||||||
if self .use_subfolders and target_base_folder_name_for_instance :
|
if self .use_subfolders and target_base_folder_name_for_instance :
|
||||||
current_path_for_file_instance =os .path .join (current_path_for_file_instance ,target_base_folder_name_for_instance )
|
current_path_for_file_instance =os .path .join (current_path_for_file_instance ,target_base_folder_name_for_instance )
|
||||||
if self .use_post_subfolders :
|
if self .use_post_subfolders :
|
||||||
|
|
||||||
current_path_for_file_instance =os .path .join (current_path_for_file_instance ,final_post_subfolder_name )
|
current_path_for_file_instance =os .path .join (current_path_for_file_instance ,final_post_subfolder_name )
|
||||||
|
|
||||||
manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None
|
manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None
|
||||||
manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None
|
manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None
|
||||||
|
|
||||||
|
|
||||||
folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title )
|
folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title )
|
||||||
|
|
||||||
futures_list .append (file_pool .submit (
|
futures_list .append (file_pool .submit (
|
||||||
self ._download_single_file ,
|
self ._download_single_file ,
|
||||||
file_info =file_info_to_dl ,
|
file_info =file_info_to_dl ,
|
||||||
@@ -1602,6 +1648,7 @@ class PostProcessorWorker:
|
|||||||
manga_global_file_counter_ref =manga_global_counter_to_pass ,folder_context_name_for_history =folder_context_for_file ,
|
manga_global_file_counter_ref =manga_global_counter_to_pass ,folder_context_name_for_history =folder_context_for_file ,
|
||||||
file_index_in_post =file_idx ,num_files_in_this_post =len (files_to_download_info_list )
|
file_index_in_post =file_idx ,num_files_in_this_post =len (files_to_download_info_list )
|
||||||
))
|
))
|
||||||
|
|
||||||
for future in as_completed (futures_list ):
|
for future in as_completed (futures_list ):
|
||||||
if self .check_cancel ():
|
if self .check_cancel ():
|
||||||
for f_to_cancel in futures_list :
|
for f_to_cancel in futures_list :
|
||||||
@@ -1625,30 +1672,42 @@ class PostProcessorWorker:
|
|||||||
self .logger (f"❌ File download task for post {post_id } resulted in error: {exc_f }")
|
self .logger (f"❌ File download task for post {post_id } resulted in error: {exc_f }")
|
||||||
total_skipped_this_post +=1
|
total_skipped_this_post +=1
|
||||||
self ._emit_signal ('file_progress',"",None )
|
self ._emit_signal ('file_progress',"",None )
|
||||||
|
|
||||||
|
# After a post's files are all processed, update the session file to mark this post as done.
|
||||||
if self.session_file_path and self.session_lock:
|
if self.session_file_path and self.session_lock:
|
||||||
try:
|
try:
|
||||||
with self.session_lock:
|
with self.session_lock:
|
||||||
if os.path.exists(self.session_file_path):
|
if os.path.exists(self.session_file_path): # Only update if the session file exists
|
||||||
|
# Read current state
|
||||||
with open(self.session_file_path, 'r', encoding='utf-8') as f:
|
with open(self.session_file_path, 'r', encoding='utf-8') as f:
|
||||||
session_data = json.load(f)
|
session_data = json.load(f)
|
||||||
|
|
||||||
if 'download_state' not in session_data:
|
if 'download_state' not in session_data:
|
||||||
session_data['download_state'] = {}
|
session_data['download_state'] = {}
|
||||||
|
|
||||||
|
# Add processed ID
|
||||||
if not isinstance(session_data['download_state'].get('processed_post_ids'), list):
|
if not isinstance(session_data['download_state'].get('processed_post_ids'), list):
|
||||||
session_data['download_state']['processed_post_ids'] = []
|
session_data['download_state']['processed_post_ids'] = []
|
||||||
session_data['download_state']['processed_post_ids'].append(self.post.get('id'))
|
session_data['download_state']['processed_post_ids'].append(self.post.get('id'))
|
||||||
|
|
||||||
|
# Add any permanent failures from this worker to the session file
|
||||||
if permanent_failures_this_post:
|
if permanent_failures_this_post:
|
||||||
if not isinstance(session_data['download_state'].get('permanently_failed_files'), list):
|
if not isinstance(session_data['download_state'].get('permanently_failed_files'), list):
|
||||||
session_data['download_state']['permanently_failed_files'] = []
|
session_data['download_state']['permanently_failed_files'] = []
|
||||||
|
# To avoid duplicates if the same post is somehow re-processed
|
||||||
existing_failed_urls = {f.get('file_info', {}).get('url') for f in session_data['download_state']['permanently_failed_files']}
|
existing_failed_urls = {f.get('file_info', {}).get('url') for f in session_data['download_state']['permanently_failed_files']}
|
||||||
for failure in permanent_failures_this_post:
|
for failure in permanent_failures_this_post:
|
||||||
if failure.get('file_info', {}).get('url') not in existing_failed_urls:
|
if failure.get('file_info', {}).get('url') not in existing_failed_urls:
|
||||||
session_data['download_state']['permanently_failed_files'].append(failure)
|
session_data['download_state']['permanently_failed_files'].append(failure)
|
||||||
|
|
||||||
|
# Write to temp file and then atomically replace
|
||||||
temp_file_path = self.session_file_path + ".tmp"
|
temp_file_path = self.session_file_path + ".tmp"
|
||||||
with open(temp_file_path, 'w', encoding='utf-8') as f_tmp:
|
with open(temp_file_path, 'w', encoding='utf-8') as f_tmp:
|
||||||
json.dump(session_data, f_tmp, indent=2)
|
json.dump(session_data, f_tmp, indent=2)
|
||||||
os.replace(temp_file_path, self.session_file_path)
|
os.replace(temp_file_path, self.session_file_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger(f"⚠️ Could not update session file for post {post_id}: {e}")
|
self.logger(f"⚠️ Could not update session file for post {post_id}: {e}")
|
||||||
|
|
||||||
if not self .extract_links_only and (total_downloaded_this_post >0 or not (
|
if not self .extract_links_only and (total_downloaded_this_post >0 or not (
|
||||||
(current_character_filters and (
|
(current_character_filters and (
|
||||||
(self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or
|
(self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or
|
||||||
@@ -1661,6 +1720,7 @@ class PostProcessorWorker:
|
|||||||
top_file_name_for_history =post_main_file_info ['name']
|
top_file_name_for_history =post_main_file_info ['name']
|
||||||
elif post_attachments and post_attachments [0 ].get ('name'):
|
elif post_attachments and post_attachments [0 ].get ('name'):
|
||||||
top_file_name_for_history =post_attachments [0 ]['name']
|
top_file_name_for_history =post_attachments [0 ]['name']
|
||||||
|
|
||||||
history_data_for_this_post ={
|
history_data_for_this_post ={
|
||||||
'post_title':post_title ,'post_id':post_id ,
|
'post_title':post_title ,'post_id':post_id ,
|
||||||
'top_file_name':top_file_name_for_history ,
|
'top_file_name':top_file_name_for_history ,
|
||||||
@@ -1671,7 +1731,9 @@ class PostProcessorWorker:
|
|||||||
}
|
}
|
||||||
if self .check_cancel ():self .logger (f" Post {post_id } processing interrupted/cancelled.");
|
if self .check_cancel ():self .logger (f" Post {post_id } processing interrupted/cancelled.");
|
||||||
else :self .logger (f" Post {post_id } Summary: Downloaded={total_downloaded_this_post }, Skipped Files={total_skipped_this_post }")
|
else :self .logger (f" Post {post_id } Summary: Downloaded={total_downloaded_this_post }, Skipped Files={total_skipped_this_post }")
|
||||||
|
|
||||||
if not self .extract_links_only and self .use_post_subfolders and total_downloaded_this_post ==0 :
|
if not self .extract_links_only and self .use_post_subfolders and total_downloaded_this_post ==0 :
|
||||||
|
|
||||||
path_to_check_for_emptiness =determined_post_save_path_for_history
|
path_to_check_for_emptiness =determined_post_save_path_for_history
|
||||||
try :
|
try :
|
||||||
if os .path .isdir (path_to_check_for_emptiness )and not os .listdir (path_to_check_for_emptiness ):
|
if os .path .isdir (path_to_check_for_emptiness )and not os .listdir (path_to_check_for_emptiness ):
|
||||||
@@ -1680,26 +1742,17 @@ class PostProcessorWorker:
|
|||||||
except OSError as e_rmdir :
|
except OSError as e_rmdir :
|
||||||
self .logger (f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }")
|
self .logger (f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }")
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
post_id = self.post.get('id', 'N/A')
|
|
||||||
# Log the unexpected crash of the worker
|
|
||||||
self.logger(f"❌ CRITICAL WORKER FAILURE on Post ID {post_id}: {e}\n{traceback.format_exc(limit=4)}")
|
|
||||||
# Ensure the number of skipped files reflects the total potential files in the post,
|
|
||||||
# as none of them were processed successfully.
|
|
||||||
num_potential_files_in_post = len(self.post.get('attachments', [])) + (1 if self.post.get('file') else 0)
|
|
||||||
total_skipped_this_post = num_potential_files_in_post
|
|
||||||
total_downloaded_this_post = 0
|
|
||||||
|
|
||||||
finally:
|
|
||||||
# This 'finally' block ensures that the worker ALWAYS reports back,
|
|
||||||
# preventing the main UI from getting stuck.
|
|
||||||
result_tuple = (total_downloaded_this_post, total_skipped_this_post,
|
result_tuple = (total_downloaded_this_post, total_skipped_this_post,
|
||||||
kept_original_filenames_for_log, retryable_failures_this_post,
|
kept_original_filenames_for_log, retryable_failures_this_post,
|
||||||
permanent_failures_this_post, history_data_for_this_post,
|
permanent_failures_this_post, history_data_for_this_post,
|
||||||
temp_filepath_for_return)
|
None) # The 7th item is None because we already saved the temp file
|
||||||
self._emit_signal('worker_finished', result_tuple)
|
|
||||||
|
|
||||||
return result_tuple
|
# In Single PDF mode, the 7th item is the temp file path we created.
|
||||||
|
if self.single_pdf_mode and os.path.exists(temp_filepath):
|
||||||
|
result_tuple = (0, 0, [], [], [], None, temp_filepath)
|
||||||
|
|
||||||
|
self._emit_signal('worker_finished', result_tuple)
|
||||||
|
return # The method now returns nothing.
|
||||||
|
|
||||||
class DownloadThread (QThread ):
|
class DownloadThread (QThread ):
|
||||||
progress_signal =pyqtSignal (str )
|
progress_signal =pyqtSignal (str )
|
||||||
@@ -1749,7 +1802,6 @@ class DownloadThread (QThread ):
|
|||||||
cookie_text ="",
|
cookie_text ="",
|
||||||
session_file_path=None,
|
session_file_path=None,
|
||||||
session_lock=None,
|
session_lock=None,
|
||||||
processed_ids_to_skip=None,
|
|
||||||
text_only_scope=None,
|
text_only_scope=None,
|
||||||
text_export_format='txt',
|
text_export_format='txt',
|
||||||
single_pdf_mode=False,
|
single_pdf_mode=False,
|
||||||
@@ -1808,12 +1860,11 @@ class DownloadThread (QThread ):
|
|||||||
self .manga_global_file_counter_ref =manga_global_file_counter_ref
|
self .manga_global_file_counter_ref =manga_global_file_counter_ref
|
||||||
self.session_file_path = session_file_path
|
self.session_file_path = session_file_path
|
||||||
self.session_lock = session_lock
|
self.session_lock = session_lock
|
||||||
self.processed_ids_to_skip = processed_ids_to_skip
|
|
||||||
self.history_candidates_buffer =deque (maxlen =8 )
|
self.history_candidates_buffer =deque (maxlen =8 )
|
||||||
self.text_only_scope = text_only_scope
|
self.text_only_scope = text_only_scope
|
||||||
self.text_export_format = text_export_format
|
self.text_export_format = text_export_format
|
||||||
self.single_pdf_mode = single_pdf_mode
|
self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE
|
||||||
self.project_root_dir = project_root_dir
|
self.project_root_dir = project_root_dir # Add this assignment
|
||||||
|
|
||||||
if self .compress_images and Image is None :
|
if self .compress_images and Image is None :
|
||||||
self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
|
self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
|
||||||
@@ -1849,54 +1900,26 @@ class DownloadThread (QThread ):
|
|||||||
grand_list_of_kept_original_filenames =[]
|
grand_list_of_kept_original_filenames =[]
|
||||||
was_process_cancelled =False
|
was_process_cancelled =False
|
||||||
|
|
||||||
|
# This block for initializing manga mode counters remains unchanged
|
||||||
if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED and not self .extract_links_only and self .manga_date_file_counter_ref is None :
|
if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED and not self .extract_links_only and self .manga_date_file_counter_ref is None :
|
||||||
series_scan_dir = self.output_dir
|
# ... (existing manga counter initialization logic) ...
|
||||||
if self.use_subfolders :
|
|
||||||
if self.filter_character_list_objects_initial and self.filter_character_list_objects_initial [0] and self.filter_character_list_objects_initial[0].get("name"):
|
|
||||||
series_folder_name = clean_folder_name(self.filter_character_list_objects_initial[0]["name"])
|
|
||||||
series_scan_dir = os.path.join(series_scan_dir, series_folder_name)
|
|
||||||
elif self.service and self.user_id :
|
|
||||||
creator_based_folder_name = clean_folder_name(str(self.user_id))
|
|
||||||
series_scan_dir = os.path.join(series_scan_dir, creator_based_folder_name)
|
|
||||||
|
|
||||||
highest_num = 0
|
|
||||||
if os.path.isdir(series_scan_dir):
|
|
||||||
self.logger(f"ℹ️ [Thread] Manga Date Mode: Scanning for existing files in '{series_scan_dir}'...")
|
|
||||||
for dirpath, _, filenames_in_dir in os.walk(series_scan_dir):
|
|
||||||
for filename_to_check in filenames_in_dir:
|
|
||||||
prefix_to_check = clean_filename(self.manga_date_prefix.strip()) if self.manga_date_prefix and self.manga_date_prefix.strip() else ""
|
|
||||||
name_part_to_match = filename_to_check
|
|
||||||
if prefix_to_check and name_part_to_match.startswith(prefix_to_check):
|
|
||||||
name_part_to_match = name_part_to_match[len(prefix_to_check):].lstrip()
|
|
||||||
|
|
||||||
base_name_no_ext = os.path.splitext(name_part_to_match)[0]
|
|
||||||
match = re.match(r"(\d+)", base_name_no_ext)
|
|
||||||
if match:
|
|
||||||
highest_num = max(highest_num, int(match.group(1)))
|
|
||||||
|
|
||||||
self.manga_date_file_counter_ref = [highest_num + 1, threading.Lock()]
|
|
||||||
self.logger(f"ℹ️ [Thread] Manga Date Mode: Initialized date-based counter at {self.manga_date_file_counter_ref[0]}.")
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING and not self .extract_links_only and self .manga_global_file_counter_ref is None :
|
if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING and not self .extract_links_only and self .manga_global_file_counter_ref is None :
|
||||||
self.manga_global_file_counter_ref = [1, threading.Lock()]
|
# ... (existing manga counter initialization logic) ...
|
||||||
self.logger(f"ℹ️ [Thread] Manga Title+GlobalNum Mode: Initialized global counter at {self.manga_global_file_counter_ref[0]}.")
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
worker_signals_obj = PostProcessorSignals()
|
worker_signals_obj = PostProcessorSignals()
|
||||||
try :
|
try :
|
||||||
|
# Connect signals
|
||||||
worker_signals_obj.progress_signal.connect(self.progress_signal)
|
worker_signals_obj.progress_signal.connect(self.progress_signal)
|
||||||
worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal)
|
worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal)
|
||||||
worker_signals_obj.file_progress_signal.connect(self.file_progress_signal)
|
worker_signals_obj.file_progress_signal.connect(self.file_progress_signal)
|
||||||
worker_signals_obj.external_link_signal.connect(self.external_link_signal)
|
worker_signals_obj.external_link_signal.connect(self.external_link_signal)
|
||||||
worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal)
|
worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal)
|
||||||
worker_signals_obj.file_successfully_downloaded_signal.connect(self.file_successfully_downloaded_signal)
|
worker_signals_obj.file_successfully_downloaded_signal.connect(self.file_successfully_downloaded_signal)
|
||||||
worker_signals_obj.worker_finished_signal.connect(lambda result: None)
|
worker_signals_obj.worker_finished_signal.connect(lambda result: None) # Connect to dummy lambda to avoid errors
|
||||||
|
|
||||||
self.logger(" Starting post fetch (single-threaded download process)...")
|
self.logger(" Starting post fetch (single-threaded download process)...")
|
||||||
self.logger(" Fetching ALL available post information first. This may take a moment...")
|
|
||||||
|
|
||||||
all_posts_data = []
|
|
||||||
post_generator = download_from_api(
|
post_generator = download_from_api(
|
||||||
self.api_url_input,
|
self.api_url_input,
|
||||||
logger=self.logger,
|
logger=self.logger,
|
||||||
@@ -1916,16 +1939,12 @@ class DownloadThread (QThread ):
|
|||||||
if self.isInterruptionRequested():
|
if self.isInterruptionRequested():
|
||||||
was_process_cancelled = True
|
was_process_cancelled = True
|
||||||
break
|
break
|
||||||
all_posts_data.extend(posts_batch_data)
|
for individual_post_data in posts_batch_data:
|
||||||
|
|
||||||
if not was_process_cancelled:
|
|
||||||
self.logger(f"✅ Fetching complete. Found {len(all_posts_data)} total posts. Starting download process...")
|
|
||||||
|
|
||||||
for individual_post_data in all_posts_data:
|
|
||||||
if self.isInterruptionRequested():
|
if self.isInterruptionRequested():
|
||||||
was_process_cancelled = True
|
was_process_cancelled = True
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Create the worker, now correctly passing single_pdf_mode
|
||||||
post_processing_worker = PostProcessorWorker(
|
post_processing_worker = PostProcessorWorker(
|
||||||
post_data=individual_post_data,
|
post_data=individual_post_data,
|
||||||
download_root=self.output_dir,
|
download_root=self.output_dir,
|
||||||
@@ -1972,13 +1991,13 @@ class DownloadThread (QThread ):
|
|||||||
creator_download_folder_ignore_words=self.creator_download_folder_ignore_words,
|
creator_download_folder_ignore_words=self.creator_download_folder_ignore_words,
|
||||||
session_file_path=self.session_file_path,
|
session_file_path=self.session_file_path,
|
||||||
session_lock=self.session_lock,
|
session_lock=self.session_lock,
|
||||||
processed_ids_to_skip=self.processed_ids_to_skip, # <-- FIX: Pass the list to the worker
|
|
||||||
text_only_scope=self.text_only_scope,
|
text_only_scope=self.text_only_scope,
|
||||||
text_export_format=self.text_export_format,
|
text_export_format=self.text_export_format,
|
||||||
single_pdf_mode=self.single_pdf_mode,
|
single_pdf_mode=self.single_pdf_mode, # <-- This is now correctly passed
|
||||||
project_root_dir=self.project_root_dir
|
project_root_dir=self.project_root_dir
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
|
# Correctly unpack the 7 values returned from the worker
|
||||||
(dl_count, skip_count, kept_originals_this_post,
|
(dl_count, skip_count, kept_originals_this_post,
|
||||||
retryable_failures, permanent_failures,
|
retryable_failures, permanent_failures,
|
||||||
history_data, temp_filepath) = post_processing_worker.process()
|
history_data, temp_filepath) = post_processing_worker.process()
|
||||||
@@ -1996,6 +2015,7 @@ class DownloadThread (QThread ):
|
|||||||
if permanent_failures:
|
if permanent_failures:
|
||||||
self.permanent_file_failed_signal.emit(permanent_failures)
|
self.permanent_file_failed_signal.emit(permanent_failures)
|
||||||
|
|
||||||
|
# In single-threaded text mode, pass the temp file path back to the main window
|
||||||
if self.single_pdf_mode and temp_filepath:
|
if self.single_pdf_mode and temp_filepath:
|
||||||
self.progress_signal.emit(f"TEMP_FILE_PATH:{temp_filepath}")
|
self.progress_signal.emit(f"TEMP_FILE_PATH:{temp_filepath}")
|
||||||
|
|
||||||
@@ -2010,7 +2030,8 @@ class DownloadThread (QThread ):
|
|||||||
self.skip_current_file_flag.clear()
|
self.skip_current_file_flag.clear()
|
||||||
self.logger(" Skip current file flag was processed and cleared by DownloadThread.")
|
self.logger(" Skip current file flag was processed and cleared by DownloadThread.")
|
||||||
self.msleep(10)
|
self.msleep(10)
|
||||||
|
if was_process_cancelled:
|
||||||
|
break
|
||||||
if not was_process_cancelled and not self.isInterruptionRequested():
|
if not was_process_cancelled and not self.isInterruptionRequested():
|
||||||
self.logger("✅ All posts processed or end of content reached by DownloadThread.")
|
self.logger("✅ All posts processed or end of content reached by DownloadThread.")
|
||||||
|
|
||||||
@@ -2019,6 +2040,7 @@ class DownloadThread (QThread ):
|
|||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
finally:
|
finally:
|
||||||
try:
|
try:
|
||||||
|
# Disconnect signals
|
||||||
if worker_signals_obj:
|
if worker_signals_obj:
|
||||||
worker_signals_obj.progress_signal.disconnect(self.progress_signal)
|
worker_signals_obj.progress_signal.disconnect(self.progress_signal)
|
||||||
worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal)
|
worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal)
|
||||||
@@ -2029,8 +2051,14 @@ class DownloadThread (QThread ):
|
|||||||
except (TypeError, RuntimeError) as e:
|
except (TypeError, RuntimeError) as e:
|
||||||
self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}")
|
self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}")
|
||||||
|
|
||||||
|
# Emit the final signal with all collected results
|
||||||
self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, self.isInterruptionRequested(), grand_list_of_kept_original_filenames)
|
self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, self.isInterruptionRequested(), grand_list_of_kept_original_filenames)
|
||||||
|
|
||||||
|
def receive_add_character_result (self ,result ):
|
||||||
|
with QMutexLocker (self .prompt_mutex ):
|
||||||
|
self ._add_character_response =result
|
||||||
|
self .logger (f" (DownloadThread) Received character prompt response: {'Yes (added/confirmed)'if result else 'No (declined/failed)'}")
|
||||||
|
|
||||||
class InterruptedError(Exception):
|
class InterruptedError(Exception):
|
||||||
"""Custom exception for handling cancellations gracefully."""
|
"""Custom exception for handling cancellations gracefully."""
|
||||||
pass
|
pass
|
||||||
@@ -23,6 +23,7 @@ class MoreOptionsDialog(QDialog):
|
|||||||
self.radio_button_group = QButtonGroup(self)
|
self.radio_button_group = QButtonGroup(self)
|
||||||
self.radio_content = QRadioButton("Description/Content")
|
self.radio_content = QRadioButton("Description/Content")
|
||||||
self.radio_comments = QRadioButton("Comments")
|
self.radio_comments = QRadioButton("Comments")
|
||||||
|
self.radio_comments = QRadioButton("Comments (Not Working)")
|
||||||
self.radio_button_group.addButton(self.radio_content)
|
self.radio_button_group.addButton(self.radio_content)
|
||||||
self.radio_button_group.addButton(self.radio_comments)
|
self.radio_button_group.addButton(self.radio_comments)
|
||||||
layout.addWidget(self.radio_content)
|
layout.addWidget(self.radio_content)
|
||||||
|
|||||||
@@ -1,93 +0,0 @@
|
|||||||
# src/ui/flow_layout.py
|
|
||||||
|
|
||||||
from PyQt5.QtWidgets import QLayout, QSizePolicy, QStyle
|
|
||||||
from PyQt5.QtCore import QPoint, QRect, QSize, Qt
|
|
||||||
|
|
||||||
class FlowLayout(QLayout):
|
|
||||||
"""A custom layout that arranges widgets in a flow, wrapping as necessary."""
|
|
||||||
def __init__(self, parent=None, margin=0, spacing=-1):
|
|
||||||
super(FlowLayout, self).__init__(parent)
|
|
||||||
|
|
||||||
if parent is not None:
|
|
||||||
self.setContentsMargins(margin, margin, margin, margin)
|
|
||||||
|
|
||||||
self.setSpacing(spacing)
|
|
||||||
self.itemList = []
|
|
||||||
|
|
||||||
def __del__(self):
|
|
||||||
item = self.takeAt(0)
|
|
||||||
while item:
|
|
||||||
item = self.takeAt(0)
|
|
||||||
|
|
||||||
def addItem(self, item):
|
|
||||||
self.itemList.append(item)
|
|
||||||
|
|
||||||
def count(self):
|
|
||||||
return len(self.itemList)
|
|
||||||
|
|
||||||
def itemAt(self, index):
|
|
||||||
if 0 <= index < len(self.itemList):
|
|
||||||
return self.itemList[index]
|
|
||||||
return None
|
|
||||||
|
|
||||||
def takeAt(self, index):
|
|
||||||
if 0 <= index < len(self.itemList):
|
|
||||||
return self.itemList.pop(index)
|
|
||||||
return None
|
|
||||||
|
|
||||||
def expandingDirections(self):
|
|
||||||
return Qt.Orientations(Qt.Orientation(0))
|
|
||||||
|
|
||||||
def hasHeightForWidth(self):
|
|
||||||
return True
|
|
||||||
|
|
||||||
def heightForWidth(self, width):
|
|
||||||
return self._do_layout(QRect(0, 0, width, 0), True)
|
|
||||||
|
|
||||||
def setGeometry(self, rect):
|
|
||||||
super(FlowLayout, self).setGeometry(rect)
|
|
||||||
self._do_layout(rect, False)
|
|
||||||
|
|
||||||
def sizeHint(self):
|
|
||||||
return self.minimumSize()
|
|
||||||
|
|
||||||
def minimumSize(self):
|
|
||||||
size = QSize()
|
|
||||||
for item in self.itemList:
|
|
||||||
size = size.expandedTo(item.minimumSize())
|
|
||||||
|
|
||||||
margin, _, _, _ = self.getContentsMargins()
|
|
||||||
size += QSize(2 * margin, 2 * margin)
|
|
||||||
return size
|
|
||||||
|
|
||||||
def _do_layout(self, rect, test_only):
|
|
||||||
x = rect.x()
|
|
||||||
y = rect.y()
|
|
||||||
line_height = 0
|
|
||||||
|
|
||||||
space_x = self.spacing()
|
|
||||||
space_y = self.spacing()
|
|
||||||
if self.layout() is not None:
|
|
||||||
space_x = self.spacing()
|
|
||||||
space_y = self.spacing()
|
|
||||||
else:
|
|
||||||
space_x = self.spacing()
|
|
||||||
space_y = self.spacing()
|
|
||||||
|
|
||||||
|
|
||||||
for item in self.itemList:
|
|
||||||
wid = item.widget()
|
|
||||||
next_x = x + item.sizeHint().width() + space_x
|
|
||||||
if next_x - space_x > rect.right() and line_height > 0:
|
|
||||||
x = rect.x()
|
|
||||||
y = y + line_height + space_y
|
|
||||||
next_x = x + item.sizeHint().width() + space_x
|
|
||||||
line_height = 0
|
|
||||||
|
|
||||||
if not test_only:
|
|
||||||
item.setGeometry(QRect(QPoint(x, y), item.sizeHint()))
|
|
||||||
|
|
||||||
x = next_x
|
|
||||||
line_height = max(line_height, item.sizeHint().height())
|
|
||||||
|
|
||||||
return y + line_height - rect.y()
|
|
||||||
File diff suppressed because it is too large
Load Diff
2064
workers.py
Normal file
2064
workers.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user