mirror of
https://github.com/Yuvi9587/Kemono-Downloader.git
synced 2025-12-29 16:14:44 +00:00
Commit
This commit is contained in:
@@ -102,6 +102,7 @@ class PostProcessorWorker:
|
|||||||
keep_in_post_duplicates=False,
|
keep_in_post_duplicates=False,
|
||||||
session_file_path=None,
|
session_file_path=None,
|
||||||
session_lock=None,
|
session_lock=None,
|
||||||
|
processed_ids_to_skip=None,
|
||||||
text_only_scope=None,
|
text_only_scope=None,
|
||||||
text_export_format='txt',
|
text_export_format='txt',
|
||||||
single_pdf_mode=False,
|
single_pdf_mode=False,
|
||||||
@@ -159,6 +160,7 @@ class PostProcessorWorker:
|
|||||||
self.keep_in_post_duplicates = keep_in_post_duplicates
|
self.keep_in_post_duplicates = keep_in_post_duplicates
|
||||||
self.session_file_path = session_file_path
|
self.session_file_path = session_file_path
|
||||||
self.session_lock = session_lock
|
self.session_lock = session_lock
|
||||||
|
self.processed_ids_to_skip = processed_ids_to_skip
|
||||||
self.text_only_scope = text_only_scope
|
self.text_only_scope = text_only_scope
|
||||||
self.text_export_format = text_export_format
|
self.text_export_format = text_export_format
|
||||||
self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE
|
self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE
|
||||||
@@ -370,9 +372,9 @@ class PostProcessorWorker:
|
|||||||
filename_to_save_in_main_path =cleaned_original_api_filename
|
filename_to_save_in_main_path =cleaned_original_api_filename
|
||||||
was_original_name_kept_flag =False
|
was_original_name_kept_flag =False
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if self .remove_from_filename_words_list and filename_to_save_in_main_path :
|
if self .remove_from_filename_words_list and filename_to_save_in_main_path :
|
||||||
|
# Store the name before this specific modification, so we can revert if it gets destroyed.
|
||||||
|
name_before_word_removal = filename_to_save_in_main_path
|
||||||
|
|
||||||
base_name_for_removal ,ext_for_removal =os .path .splitext (filename_to_save_in_main_path )
|
base_name_for_removal ,ext_for_removal =os .path .splitext (filename_to_save_in_main_path )
|
||||||
modified_base_name =base_name_for_removal
|
modified_base_name =base_name_for_removal
|
||||||
@@ -383,12 +385,13 @@ class PostProcessorWorker:
|
|||||||
modified_base_name =re .sub (r'[_.\s-]+',' ',modified_base_name )
|
modified_base_name =re .sub (r'[_.\s-]+',' ',modified_base_name )
|
||||||
modified_base_name =re .sub (r'\s+',' ',modified_base_name )
|
modified_base_name =re .sub (r'\s+',' ',modified_base_name )
|
||||||
modified_base_name =modified_base_name .strip ()
|
modified_base_name =modified_base_name .strip ()
|
||||||
|
|
||||||
if modified_base_name and modified_base_name !=ext_for_removal .lstrip ('.'):
|
if modified_base_name and modified_base_name !=ext_for_removal .lstrip ('.'):
|
||||||
filename_to_save_in_main_path =modified_base_name +ext_for_removal
|
filename_to_save_in_main_path =modified_base_name +ext_for_removal
|
||||||
else :
|
else :
|
||||||
filename_to_save_in_main_path =base_name_for_removal +ext_for_removal
|
# If the name was stripped to nothing, revert to the name from before this block.
|
||||||
|
self.logger(f" ⚠️ Filename was empty after removing words. Reverting to '{name_before_word_removal}'.")
|
||||||
|
filename_to_save_in_main_path = name_before_word_removal
|
||||||
|
|
||||||
if not self .download_thumbnails :
|
if not self .download_thumbnails :
|
||||||
|
|
||||||
@@ -883,17 +886,35 @@ class PostProcessorWorker:
|
|||||||
if data_to_write_io and hasattr (data_to_write_io ,'close'):
|
if data_to_write_io and hasattr (data_to_write_io ,'close'):
|
||||||
data_to_write_io .close ()
|
data_to_write_io .close ()
|
||||||
|
|
||||||
def process (self ):
|
def process(self):
|
||||||
|
# --- FIX START: This entire method is now wrapped in a try...finally block ---
|
||||||
|
# to ensure it always reports completion back to the main window.
|
||||||
|
|
||||||
|
# Initialize result values to safe defaults for failure cases.
|
||||||
|
total_downloaded_this_post = 0
|
||||||
|
total_skipped_this_post = 0
|
||||||
|
kept_original_filenames_for_log = []
|
||||||
|
retryable_failures_this_post = []
|
||||||
|
permanent_failures_this_post = []
|
||||||
|
history_data_for_this_post = None
|
||||||
|
temp_filepath_for_return = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
post_id_for_skip_check = self.post.get('id')
|
||||||
|
if self.processed_ids_to_skip and post_id_for_skip_check in self.processed_ids_to_skip:
|
||||||
|
self.logger(f" -> Skipping Post {post_id_for_skip_check} (already processed in previous session).")
|
||||||
|
# We must emit 'worker_finished' so the main UI can count this as a completed (skipped) task.
|
||||||
|
num_potential_files_in_post = len(self.post.get('attachments', [])) + (1 if self.post.get('file') else 0)
|
||||||
|
total_skipped_this_post = num_potential_files_in_post
|
||||||
|
# The rest of the result tuple can be empty defaults
|
||||||
|
result_tuple = (0, total_skipped_this_post, [], [], [], None, None)
|
||||||
|
self._emit_signal('worker_finished', result_tuple)
|
||||||
|
return result_tuple
|
||||||
|
|
||||||
|
# ALL OF THE ORIGINAL LOGIC OF THE `process` METHOD GOES HERE
|
||||||
if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None, None
|
if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None, None
|
||||||
if self .check_cancel ():return 0 ,0 ,[],[],[],None, None
|
if self .check_cancel ():return 0 ,0 ,[],[],[],None, None
|
||||||
current_character_filters =self ._get_current_character_filters ()
|
current_character_filters =self ._get_current_character_filters ()
|
||||||
kept_original_filenames_for_log =[]
|
|
||||||
retryable_failures_this_post =[]
|
|
||||||
permanent_failures_this_post =[]
|
|
||||||
total_downloaded_this_post =0
|
|
||||||
total_skipped_this_post =0
|
|
||||||
history_data_for_this_post =None
|
|
||||||
temp_filepath_for_return = None
|
|
||||||
|
|
||||||
parsed_api_url =urlparse (self .api_url_input )
|
parsed_api_url =urlparse (self .api_url_input )
|
||||||
referer_url =f"https://{parsed_api_url .netloc }/"
|
referer_url =f"https://{parsed_api_url .netloc }/"
|
||||||
@@ -1068,48 +1089,35 @@ class PostProcessorWorker:
|
|||||||
base_folder_names_for_post_content =[cleaned_primary_folder_name ]
|
base_folder_names_for_post_content =[cleaned_primary_folder_name ]
|
||||||
self .logger (f" Base folder name(s) for post content ({log_reason_for_folder }): {', '.join (base_folder_names_for_post_content )}")
|
self .logger (f" Base folder name(s) for post content ({log_reason_for_folder }): {', '.join (base_folder_names_for_post_content )}")
|
||||||
elif not current_character_filters :
|
elif not current_character_filters :
|
||||||
|
|
||||||
derived_folders_from_title_via_known_txt =match_folders_from_title (
|
derived_folders_from_title_via_known_txt =match_folders_from_title (
|
||||||
post_title ,
|
post_title ,
|
||||||
self .known_names ,
|
self .known_names ,
|
||||||
effective_unwanted_keywords_for_folder_naming
|
effective_unwanted_keywords_for_folder_naming
|
||||||
)
|
)
|
||||||
|
|
||||||
valid_derived_folders_from_title_known_txt =[
|
valid_derived_folders_from_title_known_txt =[
|
||||||
name for name in derived_folders_from_title_via_known_txt
|
name for name in derived_folders_from_title_via_known_txt
|
||||||
if name and name .strip ()and name .lower ()!="untitled_folder"
|
if name and name .strip ()and name .lower ()!="untitled_folder"
|
||||||
]
|
]
|
||||||
|
|
||||||
if valid_derived_folders_from_title_known_txt :
|
if valid_derived_folders_from_title_known_txt :
|
||||||
base_folder_names_for_post_content .extend (valid_derived_folders_from_title_known_txt )
|
base_folder_names_for_post_content .extend (valid_derived_folders_from_title_known_txt )
|
||||||
self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Post Title): {', '.join (base_folder_names_for_post_content )}")
|
self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Post Title): {', '.join (base_folder_names_for_post_content )}")
|
||||||
else :
|
else :
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
candidate_name_from_title_basic_clean =extract_folder_name_from_title (
|
candidate_name_from_title_basic_clean =extract_folder_name_from_title (
|
||||||
post_title ,
|
post_title ,
|
||||||
FOLDER_NAME_STOP_WORDS
|
FOLDER_NAME_STOP_WORDS
|
||||||
)
|
)
|
||||||
|
|
||||||
title_is_only_creator_ignored_words =False
|
title_is_only_creator_ignored_words =False
|
||||||
if candidate_name_from_title_basic_clean and candidate_name_from_title_basic_clean .lower ()!="untitled_folder"and self .creator_download_folder_ignore_words :
|
if candidate_name_from_title_basic_clean and candidate_name_from_title_basic_clean .lower ()!="untitled_folder"and self .creator_download_folder_ignore_words :
|
||||||
|
|
||||||
candidate_title_words ={word .lower ()for word in candidate_name_from_title_basic_clean .split ()}
|
candidate_title_words ={word .lower ()for word in candidate_name_from_title_basic_clean .split ()}
|
||||||
if candidate_title_words and candidate_title_words .issubset (self .creator_download_folder_ignore_words ):
|
if candidate_title_words and candidate_title_words .issubset (self .creator_download_folder_ignore_words ):
|
||||||
title_is_only_creator_ignored_words =True
|
title_is_only_creator_ignored_words =True
|
||||||
self .logger (f" Title-derived name '{candidate_name_from_title_basic_clean }' consists only of creator-specific ignore words.")
|
self .logger (f" Title-derived name '{candidate_name_from_title_basic_clean }' consists only of creator-specific ignore words.")
|
||||||
|
|
||||||
if title_is_only_creator_ignored_words :
|
if title_is_only_creator_ignored_words :
|
||||||
|
|
||||||
self .logger (f" Attempting Known.txt match on filenames as title was poor ('{candidate_name_from_title_basic_clean }').")
|
self .logger (f" Attempting Known.txt match on filenames as title was poor ('{candidate_name_from_title_basic_clean }').")
|
||||||
|
|
||||||
filenames_to_check =[
|
filenames_to_check =[
|
||||||
f_info ['_original_name_for_log']for f_info in all_files_from_post_api_for_char_check
|
f_info ['_original_name_for_log']for f_info in all_files_from_post_api_for_char_check
|
||||||
if f_info .get ('_original_name_for_log')
|
if f_info .get ('_original_name_for_log')
|
||||||
]
|
]
|
||||||
|
|
||||||
derived_folders_from_filenames_known_txt =set ()
|
derived_folders_from_filenames_known_txt =set ()
|
||||||
if filenames_to_check :
|
if filenames_to_check :
|
||||||
for fname in filenames_to_check :
|
for fname in filenames_to_check :
|
||||||
@@ -1121,7 +1129,6 @@ class PostProcessorWorker:
|
|||||||
for m in matches :
|
for m in matches :
|
||||||
if m and m .strip ()and m .lower ()!="untitled_folder":
|
if m and m .strip ()and m .lower ()!="untitled_folder":
|
||||||
derived_folders_from_filenames_known_txt .add (m )
|
derived_folders_from_filenames_known_txt .add (m )
|
||||||
|
|
||||||
if derived_folders_from_filenames_known_txt :
|
if derived_folders_from_filenames_known_txt :
|
||||||
base_folder_names_for_post_content .extend (list (derived_folders_from_filenames_known_txt ))
|
base_folder_names_for_post_content .extend (list (derived_folders_from_filenames_known_txt ))
|
||||||
self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Filenames): {', '.join (base_folder_names_for_post_content )}")
|
self .logger (f" Base folder name(s) for post content (Derived from Known.txt & Filenames): {', '.join (base_folder_names_for_post_content )}")
|
||||||
@@ -1137,7 +1144,6 @@ class PostProcessorWorker:
|
|||||||
)
|
)
|
||||||
base_folder_names_for_post_content .append (extracted_name_from_title_full_ignore )
|
base_folder_names_for_post_content .append (extracted_name_from_title_full_ignore )
|
||||||
self .logger (f" Base folder name(s) for post content (Generic title parsing - title not solely creator-ignored words): {', '.join (base_folder_names_for_post_content )}")
|
self .logger (f" Base folder name(s) for post content (Generic title parsing - title not solely creator-ignored words): {', '.join (base_folder_names_for_post_content )}")
|
||||||
|
|
||||||
base_folder_names_for_post_content =[
|
base_folder_names_for_post_content =[
|
||||||
name for name in base_folder_names_for_post_content if name and name .strip ()
|
name for name in base_folder_names_for_post_content if name and name .strip ()
|
||||||
]
|
]
|
||||||
@@ -1145,49 +1151,36 @@ class PostProcessorWorker:
|
|||||||
final_fallback_name =clean_folder_name (post_title if post_title and post_title .strip ()else "Generic Post Content")
|
final_fallback_name =clean_folder_name (post_title if post_title and post_title .strip ()else "Generic Post Content")
|
||||||
base_folder_names_for_post_content =[final_fallback_name ]
|
base_folder_names_for_post_content =[final_fallback_name ]
|
||||||
self .logger (f" Ultimate fallback folder name: {final_fallback_name }")
|
self .logger (f" Ultimate fallback folder name: {final_fallback_name }")
|
||||||
|
|
||||||
if base_folder_names_for_post_content :
|
if base_folder_names_for_post_content :
|
||||||
determined_post_save_path_for_history =os .path .join (determined_post_save_path_for_history ,base_folder_names_for_post_content [0 ])
|
determined_post_save_path_for_history =os .path .join (determined_post_save_path_for_history ,base_folder_names_for_post_content [0 ])
|
||||||
|
|
||||||
if not self .extract_links_only and self .use_post_subfolders :
|
if not self .extract_links_only and self .use_post_subfolders :
|
||||||
cleaned_post_title_for_sub =clean_folder_name (post_title )
|
cleaned_post_title_for_sub =clean_folder_name (post_title )
|
||||||
post_id_for_fallback =self .post .get ('id','unknown_id')
|
post_id_for_fallback =self .post .get ('id','unknown_id')
|
||||||
|
|
||||||
|
|
||||||
if not cleaned_post_title_for_sub or cleaned_post_title_for_sub =="untitled_folder":
|
if not cleaned_post_title_for_sub or cleaned_post_title_for_sub =="untitled_folder":
|
||||||
self .logger (f" ⚠️ Post title '{post_title }' resulted in a generic subfolder name. Using 'post_{post_id_for_fallback }' as base.")
|
self .logger (f" ⚠️ Post title '{post_title }' resulted in a generic subfolder name. Using 'post_{post_id_for_fallback }' as base.")
|
||||||
original_cleaned_post_title_for_sub =f"post_{post_id_for_fallback }"
|
original_cleaned_post_title_for_sub =f"post_{post_id_for_fallback }"
|
||||||
else :
|
else :
|
||||||
original_cleaned_post_title_for_sub =cleaned_post_title_for_sub
|
original_cleaned_post_title_for_sub =cleaned_post_title_for_sub
|
||||||
|
|
||||||
if self.use_date_prefix_for_subfolder:
|
if self.use_date_prefix_for_subfolder:
|
||||||
# Prioritize 'published' date, fall back to 'added' date
|
|
||||||
published_date_str = self.post.get('published') or self.post.get('added')
|
published_date_str = self.post.get('published') or self.post.get('added')
|
||||||
if published_date_str:
|
if published_date_str:
|
||||||
try:
|
try:
|
||||||
# Extract just the date part (YYYY-MM-DD)
|
|
||||||
date_prefix = published_date_str.split('T')[0]
|
date_prefix = published_date_str.split('T')[0]
|
||||||
# Prepend the date to the folder name
|
|
||||||
original_cleaned_post_title_for_sub = f"{date_prefix} {original_cleaned_post_title_for_sub}"
|
original_cleaned_post_title_for_sub = f"{date_prefix} {original_cleaned_post_title_for_sub}"
|
||||||
self.logger(f" ℹ️ Applying date prefix to subfolder: '{original_cleaned_post_title_for_sub}'")
|
self.logger(f" ℹ️ Applying date prefix to subfolder: '{original_cleaned_post_title_for_sub}'")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger(f" ⚠️ Could not parse date '{published_date_str}' for prefix. Using original name. Error: {e}")
|
self.logger(f" ⚠️ Could not parse date '{published_date_str}' for prefix. Using original name. Error: {e}")
|
||||||
else:
|
else:
|
||||||
self.logger(" ⚠️ 'Date Prefix' is checked, but post has no 'published' or 'added' date. Omitting prefix.")
|
self.logger(" ⚠️ 'Date Prefix' is checked, but post has no 'published' or 'added' date. Omitting prefix.")
|
||||||
|
|
||||||
base_path_for_post_subfolder =determined_post_save_path_for_history
|
base_path_for_post_subfolder =determined_post_save_path_for_history
|
||||||
|
|
||||||
suffix_counter =0
|
suffix_counter =0
|
||||||
final_post_subfolder_name =""
|
final_post_subfolder_name =""
|
||||||
|
|
||||||
while True :
|
while True :
|
||||||
if suffix_counter ==0 :
|
if suffix_counter ==0 :
|
||||||
name_candidate =original_cleaned_post_title_for_sub
|
name_candidate =original_cleaned_post_title_for_sub
|
||||||
else :
|
else :
|
||||||
name_candidate =f"{original_cleaned_post_title_for_sub }_{suffix_counter }"
|
name_candidate =f"{original_cleaned_post_title_for_sub }_{suffix_counter }"
|
||||||
|
|
||||||
potential_post_subfolder_path =os .path .join (base_path_for_post_subfolder ,name_candidate )
|
potential_post_subfolder_path =os .path .join (base_path_for_post_subfolder ,name_candidate )
|
||||||
|
|
||||||
try :
|
try :
|
||||||
os .makedirs (potential_post_subfolder_path ,exist_ok =False )
|
os .makedirs (potential_post_subfolder_path ,exist_ok =False )
|
||||||
final_post_subfolder_name =name_candidate
|
final_post_subfolder_name =name_candidate
|
||||||
@@ -1205,39 +1198,30 @@ class PostProcessorWorker:
|
|||||||
self .logger (f" ❌ Error creating directory '{potential_post_subfolder_path }': {e_mkdir }. Files for this post might be saved in parent or fail.")
|
self .logger (f" ❌ Error creating directory '{potential_post_subfolder_path }': {e_mkdir }. Files for this post might be saved in parent or fail.")
|
||||||
final_post_subfolder_name =original_cleaned_post_title_for_sub
|
final_post_subfolder_name =original_cleaned_post_title_for_sub
|
||||||
break
|
break
|
||||||
|
|
||||||
determined_post_save_path_for_history =os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name )
|
determined_post_save_path_for_history =os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name )
|
||||||
|
|
||||||
if self.filter_mode == 'text_only' and not self.extract_links_only:
|
if self.filter_mode == 'text_only' and not self.extract_links_only:
|
||||||
self.logger(f" Mode: Text Only (Scope: {self.text_only_scope})")
|
self.logger(f" Mode: Text Only (Scope: {self.text_only_scope})")
|
||||||
|
|
||||||
# --- Apply Title-based filters to ensure post is a candidate ---
|
|
||||||
post_title_lower = post_title.lower()
|
post_title_lower = post_title.lower()
|
||||||
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
|
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
|
||||||
for skip_word in self.skip_words_list:
|
for skip_word in self.skip_words_list:
|
||||||
if skip_word.lower() in post_title_lower:
|
if skip_word.lower() in post_title_lower:
|
||||||
self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'.")
|
self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'.")
|
||||||
return 0, num_potential_files_in_post, [], [], [], None, None
|
return 0, num_potential_files_in_post, [], [], [], None, None
|
||||||
|
|
||||||
if current_character_filters and not post_is_candidate_by_title_char_match and not post_is_candidate_by_comment_char_match and not post_is_candidate_by_file_char_match_in_comment_scope:
|
if current_character_filters and not post_is_candidate_by_title_char_match and not post_is_candidate_by_comment_char_match and not post_is_candidate_by_file_char_match_in_comment_scope:
|
||||||
self.logger(f" -> Skip Post (No character match for text extraction): '{post_title[:50]}...'.")
|
self.logger(f" -> Skip Post (No character match for text extraction): '{post_title[:50]}...'.")
|
||||||
return 0, num_potential_files_in_post, [], [], [], None, None
|
return 0, num_potential_files_in_post, [], [], [], None, None
|
||||||
|
|
||||||
# --- Get the text content based on scope ---
|
|
||||||
raw_text_content = ""
|
raw_text_content = ""
|
||||||
final_post_data = post_data
|
final_post_data = post_data
|
||||||
|
|
||||||
# Fetch full post data if content is missing and scope is 'content'
|
|
||||||
if self.text_only_scope == 'content' and 'content' not in final_post_data:
|
if self.text_only_scope == 'content' and 'content' not in final_post_data:
|
||||||
self.logger(f" Post {post_id} is missing 'content' field, fetching full data...")
|
self.logger(f" Post {post_id} is missing 'content' field, fetching full data...")
|
||||||
parsed_url = urlparse(self.api_url_input)
|
parsed_url = urlparse(self.api_url_input)
|
||||||
api_domain = parsed_url.netloc
|
api_domain = parsed_url.netloc
|
||||||
cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain)
|
cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain)
|
||||||
|
from .api_client import fetch_single_post_data
|
||||||
from .api_client import fetch_single_post_data # Local import to avoid circular dependency issues
|
|
||||||
full_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies)
|
full_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies)
|
||||||
if full_data:
|
if full_data:
|
||||||
final_post_data = full_data
|
final_post_data = full_data
|
||||||
|
|
||||||
if self.text_only_scope == 'content':
|
if self.text_only_scope == 'content':
|
||||||
raw_text_content = final_post_data.get('content', '')
|
raw_text_content = final_post_data.get('content', '')
|
||||||
elif self.text_only_scope == 'comments':
|
elif self.text_only_scope == 'comments':
|
||||||
@@ -1255,12 +1239,9 @@ class PostProcessorWorker:
|
|||||||
raw_text_content = "\n".join(comment_texts)
|
raw_text_content = "\n".join(comment_texts)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger(f" ❌ Error fetching comments for text-only mode: {e}")
|
self.logger(f" ❌ Error fetching comments for text-only mode: {e}")
|
||||||
|
|
||||||
if not raw_text_content or not raw_text_content.strip():
|
if not raw_text_content or not raw_text_content.strip():
|
||||||
self.logger(" -> Skip Saving Text: No content/comments found or fetched.")
|
self.logger(" -> Skip Saving Text: No content/comments found or fetched.")
|
||||||
return 0, num_potential_files_in_post, [], [], [], None, None
|
return 0, num_potential_files_in_post, [], [], [], None, None
|
||||||
|
|
||||||
# --- Robust HTML-to-TEXT Conversion ---
|
|
||||||
paragraph_pattern = re.compile(r'<p.*?>(.*?)</p>', re.IGNORECASE | re.DOTALL)
|
paragraph_pattern = re.compile(r'<p.*?>(.*?)</p>', re.IGNORECASE | re.DOTALL)
|
||||||
html_paragraphs = paragraph_pattern.findall(raw_text_content)
|
html_paragraphs = paragraph_pattern.findall(raw_text_content)
|
||||||
cleaned_text = ""
|
cleaned_text = ""
|
||||||
@@ -1278,12 +1259,9 @@ class PostProcessorWorker:
|
|||||||
cleaned_paragraphs_list.append(p_final)
|
cleaned_paragraphs_list.append(p_final)
|
||||||
cleaned_text = '\n\n'.join(cleaned_paragraphs_list)
|
cleaned_text = '\n\n'.join(cleaned_paragraphs_list)
|
||||||
cleaned_text = cleaned_text.replace('…', '...')
|
cleaned_text = cleaned_text.replace('…', '...')
|
||||||
|
|
||||||
# --- Logic for Single PDF Mode (File-based) ---
|
|
||||||
if self.single_pdf_mode:
|
if self.single_pdf_mode:
|
||||||
if not cleaned_text:
|
if not cleaned_text:
|
||||||
return 0, 0, [], [], [], None, None
|
return 0, 0, [], [], [], None, None
|
||||||
|
|
||||||
content_data = {
|
content_data = {
|
||||||
'title': post_title,
|
'title': post_title,
|
||||||
'content': cleaned_text,
|
'content': cleaned_text,
|
||||||
@@ -1293,24 +1271,20 @@ class PostProcessorWorker:
|
|||||||
os.makedirs(temp_dir, exist_ok=True)
|
os.makedirs(temp_dir, exist_ok=True)
|
||||||
temp_filename = f"tmp_{post_id}_{uuid.uuid4().hex[:8]}.json"
|
temp_filename = f"tmp_{post_id}_{uuid.uuid4().hex[:8]}.json"
|
||||||
temp_filepath = os.path.join(temp_dir, temp_filename)
|
temp_filepath = os.path.join(temp_dir, temp_filename)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(temp_filepath, 'w', encoding='utf-8') as f:
|
with open(temp_filepath, 'w', encoding='utf-8') as f:
|
||||||
json.dump(content_data, f, indent=2)
|
json.dump(content_data, f, indent=2)
|
||||||
self.logger(f" Saved temporary text for '{post_title}' for single PDF compilation.")
|
self.logger(f" Saved temporary text for '{post_title}' for single PDF compilation.")
|
||||||
self._emit_signal('worker_finished', (0, 0, [], [], [], None, temp_filepath)) # <--- CHANGE THIS
|
self._emit_signal('worker_finished', (0, 0, [], [], [], None, temp_filepath))
|
||||||
return (0, 0, [], [], [], None, temp_filepath)
|
return (0, 0, [], [], [], None, temp_filepath)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger(f" ❌ Failed to write temporary file for single PDF: {e}")
|
self.logger(f" ❌ Failed to write temporary file for single PDF: {e}")
|
||||||
self._emit_signal('worker_finished', (0, 0, [], [], [], [], None))
|
self._emit_signal('worker_finished', (0, 0, [], [], [], [], None))
|
||||||
return (0, 0, [], [], [], [], None)
|
return (0, 0, [], [], [], [], None)
|
||||||
|
|
||||||
# --- Logic for Individual File Saving ---
|
|
||||||
else:
|
else:
|
||||||
file_extension = self.text_export_format
|
file_extension = self.text_export_format
|
||||||
txt_filename = clean_filename(post_title) + f".{file_extension}"
|
txt_filename = clean_filename(post_title) + f".{file_extension}"
|
||||||
final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename)
|
final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
os.makedirs(determined_post_save_path_for_history, exist_ok=True)
|
os.makedirs(determined_post_save_path_for_history, exist_ok=True)
|
||||||
base, ext = os.path.splitext(final_save_path)
|
base, ext = os.path.splitext(final_save_path)
|
||||||
@@ -1318,7 +1292,6 @@ class PostProcessorWorker:
|
|||||||
while os.path.exists(final_save_path):
|
while os.path.exists(final_save_path):
|
||||||
final_save_path = f"{base}_{counter}{ext}"
|
final_save_path = f"{base}_{counter}{ext}"
|
||||||
counter += 1
|
counter += 1
|
||||||
|
|
||||||
if file_extension == 'pdf':
|
if file_extension == 'pdf':
|
||||||
if FPDF:
|
if FPDF:
|
||||||
self.logger(f" Converting to PDF...")
|
self.logger(f" Converting to PDF...")
|
||||||
@@ -1340,7 +1313,6 @@ class PostProcessorWorker:
|
|||||||
self.logger(f" ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.")
|
self.logger(f" ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.")
|
||||||
final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
|
final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
|
||||||
with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
|
with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
|
||||||
|
|
||||||
elif file_extension == 'docx':
|
elif file_extension == 'docx':
|
||||||
if Document:
|
if Document:
|
||||||
self.logger(f" Converting to DOCX...")
|
self.logger(f" Converting to DOCX...")
|
||||||
@@ -1351,17 +1323,14 @@ class PostProcessorWorker:
|
|||||||
self.logger(f" ⚠️ Cannot create DOCX: 'python-docx' library not installed. Saving as .txt.")
|
self.logger(f" ⚠️ Cannot create DOCX: 'python-docx' library not installed. Saving as .txt.")
|
||||||
final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
|
final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
|
||||||
with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
|
with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
|
||||||
|
else:
|
||||||
else: # Default to TXT
|
|
||||||
with open(final_save_path, 'w', encoding='utf-8') as f:
|
with open(final_save_path, 'w', encoding='utf-8') as f:
|
||||||
f.write(cleaned_text)
|
f.write(cleaned_text)
|
||||||
|
|
||||||
self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'")
|
self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'")
|
||||||
return 1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None
|
return 1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger(f" ❌ Critical error saving text file '{txt_filename}': {e}")
|
self.logger(f" ❌ Critical error saving text file '{txt_filename}': {e}")
|
||||||
return 0, num_potential_files_in_post, [], [], [], None, None
|
return 0, num_potential_files_in_post, [], [], [], None, None
|
||||||
|
|
||||||
if not self .extract_links_only and self .use_subfolders and self .skip_words_list :
|
if not self .extract_links_only and self .use_subfolders and self .skip_words_list :
|
||||||
if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
|
if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||||
for folder_name_to_check in base_folder_names_for_post_content :
|
for folder_name_to_check in base_folder_names_for_post_content :
|
||||||
@@ -1395,7 +1364,6 @@ class PostProcessorWorker:
|
|||||||
potential_key_from_fragment =parsed_mega_url .fragment .split ('!')[-1 ]
|
potential_key_from_fragment =parsed_mega_url .fragment .split ('!')[-1 ]
|
||||||
if mega_key_pattern .fullmatch (potential_key_from_fragment ):
|
if mega_key_pattern .fullmatch (potential_key_from_fragment ):
|
||||||
decryption_key_found =potential_key_from_fragment
|
decryption_key_found =potential_key_from_fragment
|
||||||
|
|
||||||
if not decryption_key_found and link_text :
|
if not decryption_key_found and link_text :
|
||||||
key_match_in_text =mega_key_pattern .search (link_text )
|
key_match_in_text =mega_key_pattern .search (link_text )
|
||||||
if key_match_in_text :
|
if key_match_in_text :
|
||||||
@@ -1516,14 +1484,10 @@ class PostProcessorWorker:
|
|||||||
return 0 ,0 ,[],[],[],None
|
return 0 ,0 ,[],[],[],None
|
||||||
files_to_download_info_list =[]
|
files_to_download_info_list =[]
|
||||||
processed_original_filenames_in_this_post =set ()
|
processed_original_filenames_in_this_post =set ()
|
||||||
|
|
||||||
if self.keep_in_post_duplicates:
|
if self.keep_in_post_duplicates:
|
||||||
# If we keep duplicates, just add every file to the list to be processed.
|
|
||||||
# The downstream hash check and rename-on-collision logic will handle them.
|
|
||||||
files_to_download_info_list.extend(all_files_from_post_api)
|
files_to_download_info_list.extend(all_files_from_post_api)
|
||||||
self.logger(f" ℹ️ 'Keep Duplicates' is on. All {len(all_files_from_post_api)} files from post will be processed.")
|
self.logger(f" ℹ️ 'Keep Duplicates' is on. All {len(all_files_from_post_api)} files from post will be processed.")
|
||||||
else:
|
else:
|
||||||
# This is the original logic that skips duplicates by name within a post.
|
|
||||||
for file_info in all_files_from_post_api:
|
for file_info in all_files_from_post_api:
|
||||||
current_api_original_filename = file_info.get('_original_name_for_log')
|
current_api_original_filename = file_info.get('_original_name_for_log')
|
||||||
if current_api_original_filename in processed_original_filenames_in_this_post:
|
if current_api_original_filename in processed_original_filenames_in_this_post:
|
||||||
@@ -1533,12 +1497,9 @@ class PostProcessorWorker:
|
|||||||
files_to_download_info_list.append(file_info)
|
files_to_download_info_list.append(file_info)
|
||||||
if current_api_original_filename:
|
if current_api_original_filename:
|
||||||
processed_original_filenames_in_this_post.add(current_api_original_filename)
|
processed_original_filenames_in_this_post.add(current_api_original_filename)
|
||||||
|
|
||||||
if not files_to_download_info_list:
|
if not files_to_download_info_list:
|
||||||
|
|
||||||
self .logger (f" All files for post {post_id } were duplicate original names or skipped earlier.")
|
self .logger (f" All files for post {post_id } were duplicate original names or skipped earlier.")
|
||||||
return 0 ,total_skipped_this_post ,[],[],[],None
|
return 0 ,total_skipped_this_post ,[],[],[],None
|
||||||
|
|
||||||
self .logger (f" Identified {len (files_to_download_info_list )} unique original file(s) for potential download from post {post_id }.")
|
self .logger (f" Identified {len (files_to_download_info_list )} unique original file(s) for potential download from post {post_id }.")
|
||||||
with ThreadPoolExecutor (max_workers =self .num_file_threads ,thread_name_prefix =f'P{post_id }File_')as file_pool :
|
with ThreadPoolExecutor (max_workers =self .num_file_threads ,thread_name_prefix =f'P{post_id }File_')as file_pool :
|
||||||
futures_list =[]
|
futures_list =[]
|
||||||
@@ -1600,10 +1561,7 @@ class PostProcessorWorker:
|
|||||||
self .logger (f" -> Skip File (Char Filter Scope '{self .char_filter_scope }'): '{current_api_original_filename }' no match.")
|
self .logger (f" -> Skip File (Char Filter Scope '{self .char_filter_scope }'): '{current_api_original_filename }' no match.")
|
||||||
total_skipped_this_post +=1
|
total_skipped_this_post +=1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
target_base_folders_for_this_file_iteration =[]
|
target_base_folders_for_this_file_iteration =[]
|
||||||
|
|
||||||
if current_character_filters :
|
if current_character_filters :
|
||||||
char_title_subfolder_name =None
|
char_title_subfolder_name =None
|
||||||
if self .target_post_id_from_initial_url and self .custom_folder_name :
|
if self .target_post_id_from_initial_url and self .custom_folder_name :
|
||||||
@@ -1624,24 +1582,17 @@ class PostProcessorWorker:
|
|||||||
target_base_folders_for_this_file_iteration .extend (base_folder_names_for_post_content )
|
target_base_folders_for_this_file_iteration .extend (base_folder_names_for_post_content )
|
||||||
else :
|
else :
|
||||||
target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title ))
|
target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title ))
|
||||||
|
|
||||||
if not target_base_folders_for_this_file_iteration :
|
if not target_base_folders_for_this_file_iteration :
|
||||||
target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title if post_title else "Uncategorized_Post_Content"))
|
target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title if post_title else "Uncategorized_Post_Content"))
|
||||||
|
|
||||||
for target_base_folder_name_for_instance in target_base_folders_for_this_file_iteration :
|
for target_base_folder_name_for_instance in target_base_folders_for_this_file_iteration :
|
||||||
current_path_for_file_instance =self .override_output_dir if self .override_output_dir else self .download_root
|
current_path_for_file_instance =self .override_output_dir if self .override_output_dir else self .download_root
|
||||||
if self .use_subfolders and target_base_folder_name_for_instance :
|
if self .use_subfolders and target_base_folder_name_for_instance :
|
||||||
current_path_for_file_instance =os .path .join (current_path_for_file_instance ,target_base_folder_name_for_instance )
|
current_path_for_file_instance =os .path .join (current_path_for_file_instance ,target_base_folder_name_for_instance )
|
||||||
if self .use_post_subfolders :
|
if self .use_post_subfolders :
|
||||||
|
|
||||||
current_path_for_file_instance =os .path .join (current_path_for_file_instance ,final_post_subfolder_name )
|
current_path_for_file_instance =os .path .join (current_path_for_file_instance ,final_post_subfolder_name )
|
||||||
|
|
||||||
manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None
|
manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None
|
||||||
manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None
|
manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None
|
||||||
|
|
||||||
|
|
||||||
folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title )
|
folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title )
|
||||||
|
|
||||||
futures_list .append (file_pool .submit (
|
futures_list .append (file_pool .submit (
|
||||||
self ._download_single_file ,
|
self ._download_single_file ,
|
||||||
file_info =file_info_to_dl ,
|
file_info =file_info_to_dl ,
|
||||||
@@ -1651,7 +1602,6 @@ class PostProcessorWorker:
|
|||||||
manga_global_file_counter_ref =manga_global_counter_to_pass ,folder_context_name_for_history =folder_context_for_file ,
|
manga_global_file_counter_ref =manga_global_counter_to_pass ,folder_context_name_for_history =folder_context_for_file ,
|
||||||
file_index_in_post =file_idx ,num_files_in_this_post =len (files_to_download_info_list )
|
file_index_in_post =file_idx ,num_files_in_this_post =len (files_to_download_info_list )
|
||||||
))
|
))
|
||||||
|
|
||||||
for future in as_completed (futures_list ):
|
for future in as_completed (futures_list ):
|
||||||
if self .check_cancel ():
|
if self .check_cancel ():
|
||||||
for f_to_cancel in futures_list :
|
for f_to_cancel in futures_list :
|
||||||
@@ -1675,42 +1625,30 @@ class PostProcessorWorker:
|
|||||||
self .logger (f"❌ File download task for post {post_id } resulted in error: {exc_f }")
|
self .logger (f"❌ File download task for post {post_id } resulted in error: {exc_f }")
|
||||||
total_skipped_this_post +=1
|
total_skipped_this_post +=1
|
||||||
self ._emit_signal ('file_progress',"",None )
|
self ._emit_signal ('file_progress',"",None )
|
||||||
|
|
||||||
# After a post's files are all processed, update the session file to mark this post as done.
|
|
||||||
if self.session_file_path and self.session_lock:
|
if self.session_file_path and self.session_lock:
|
||||||
try:
|
try:
|
||||||
with self.session_lock:
|
with self.session_lock:
|
||||||
if os.path.exists(self.session_file_path): # Only update if the session file exists
|
if os.path.exists(self.session_file_path):
|
||||||
# Read current state
|
|
||||||
with open(self.session_file_path, 'r', encoding='utf-8') as f:
|
with open(self.session_file_path, 'r', encoding='utf-8') as f:
|
||||||
session_data = json.load(f)
|
session_data = json.load(f)
|
||||||
|
|
||||||
if 'download_state' not in session_data:
|
if 'download_state' not in session_data:
|
||||||
session_data['download_state'] = {}
|
session_data['download_state'] = {}
|
||||||
|
|
||||||
# Add processed ID
|
|
||||||
if not isinstance(session_data['download_state'].get('processed_post_ids'), list):
|
if not isinstance(session_data['download_state'].get('processed_post_ids'), list):
|
||||||
session_data['download_state']['processed_post_ids'] = []
|
session_data['download_state']['processed_post_ids'] = []
|
||||||
session_data['download_state']['processed_post_ids'].append(self.post.get('id'))
|
session_data['download_state']['processed_post_ids'].append(self.post.get('id'))
|
||||||
|
|
||||||
# Add any permanent failures from this worker to the session file
|
|
||||||
if permanent_failures_this_post:
|
if permanent_failures_this_post:
|
||||||
if not isinstance(session_data['download_state'].get('permanently_failed_files'), list):
|
if not isinstance(session_data['download_state'].get('permanently_failed_files'), list):
|
||||||
session_data['download_state']['permanently_failed_files'] = []
|
session_data['download_state']['permanently_failed_files'] = []
|
||||||
# To avoid duplicates if the same post is somehow re-processed
|
|
||||||
existing_failed_urls = {f.get('file_info', {}).get('url') for f in session_data['download_state']['permanently_failed_files']}
|
existing_failed_urls = {f.get('file_info', {}).get('url') for f in session_data['download_state']['permanently_failed_files']}
|
||||||
for failure in permanent_failures_this_post:
|
for failure in permanent_failures_this_post:
|
||||||
if failure.get('file_info', {}).get('url') not in existing_failed_urls:
|
if failure.get('file_info', {}).get('url') not in existing_failed_urls:
|
||||||
session_data['download_state']['permanently_failed_files'].append(failure)
|
session_data['download_state']['permanently_failed_files'].append(failure)
|
||||||
|
|
||||||
# Write to temp file and then atomically replace
|
|
||||||
temp_file_path = self.session_file_path + ".tmp"
|
temp_file_path = self.session_file_path + ".tmp"
|
||||||
with open(temp_file_path, 'w', encoding='utf-8') as f_tmp:
|
with open(temp_file_path, 'w', encoding='utf-8') as f_tmp:
|
||||||
json.dump(session_data, f_tmp, indent=2)
|
json.dump(session_data, f_tmp, indent=2)
|
||||||
os.replace(temp_file_path, self.session_file_path)
|
os.replace(temp_file_path, self.session_file_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger(f"⚠️ Could not update session file for post {post_id}: {e}")
|
self.logger(f"⚠️ Could not update session file for post {post_id}: {e}")
|
||||||
|
|
||||||
if not self .extract_links_only and (total_downloaded_this_post >0 or not (
|
if not self .extract_links_only and (total_downloaded_this_post >0 or not (
|
||||||
(current_character_filters and (
|
(current_character_filters and (
|
||||||
(self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or
|
(self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or
|
||||||
@@ -1723,7 +1661,6 @@ class PostProcessorWorker:
|
|||||||
top_file_name_for_history =post_main_file_info ['name']
|
top_file_name_for_history =post_main_file_info ['name']
|
||||||
elif post_attachments and post_attachments [0 ].get ('name'):
|
elif post_attachments and post_attachments [0 ].get ('name'):
|
||||||
top_file_name_for_history =post_attachments [0 ]['name']
|
top_file_name_for_history =post_attachments [0 ]['name']
|
||||||
|
|
||||||
history_data_for_this_post ={
|
history_data_for_this_post ={
|
||||||
'post_title':post_title ,'post_id':post_id ,
|
'post_title':post_title ,'post_id':post_id ,
|
||||||
'top_file_name':top_file_name_for_history ,
|
'top_file_name':top_file_name_for_history ,
|
||||||
@@ -1734,9 +1671,7 @@ class PostProcessorWorker:
|
|||||||
}
|
}
|
||||||
if self .check_cancel ():self .logger (f" Post {post_id } processing interrupted/cancelled.");
|
if self .check_cancel ():self .logger (f" Post {post_id } processing interrupted/cancelled.");
|
||||||
else :self .logger (f" Post {post_id } Summary: Downloaded={total_downloaded_this_post }, Skipped Files={total_skipped_this_post }")
|
else :self .logger (f" Post {post_id } Summary: Downloaded={total_downloaded_this_post }, Skipped Files={total_skipped_this_post }")
|
||||||
|
|
||||||
if not self .extract_links_only and self .use_post_subfolders and total_downloaded_this_post ==0 :
|
if not self .extract_links_only and self .use_post_subfolders and total_downloaded_this_post ==0 :
|
||||||
|
|
||||||
path_to_check_for_emptiness =determined_post_save_path_for_history
|
path_to_check_for_emptiness =determined_post_save_path_for_history
|
||||||
try :
|
try :
|
||||||
if os .path .isdir (path_to_check_for_emptiness )and not os .listdir (path_to_check_for_emptiness ):
|
if os .path .isdir (path_to_check_for_emptiness )and not os .listdir (path_to_check_for_emptiness ):
|
||||||
@@ -1745,12 +1680,25 @@ class PostProcessorWorker:
|
|||||||
except OSError as e_rmdir :
|
except OSError as e_rmdir :
|
||||||
self .logger (f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }")
|
self .logger (f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
post_id = self.post.get('id', 'N/A')
|
||||||
|
# Log the unexpected crash of the worker
|
||||||
|
self.logger(f"❌ CRITICAL WORKER FAILURE on Post ID {post_id}: {e}\n{traceback.format_exc(limit=4)}")
|
||||||
|
# Ensure the number of skipped files reflects the total potential files in the post,
|
||||||
|
# as none of them were processed successfully.
|
||||||
|
num_potential_files_in_post = len(self.post.get('attachments', [])) + (1 if self.post.get('file') else 0)
|
||||||
|
total_skipped_this_post = num_potential_files_in_post
|
||||||
|
total_downloaded_this_post = 0
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# This 'finally' block ensures that the worker ALWAYS reports back,
|
||||||
|
# preventing the main UI from getting stuck.
|
||||||
result_tuple = (total_downloaded_this_post, total_skipped_this_post,
|
result_tuple = (total_downloaded_this_post, total_skipped_this_post,
|
||||||
kept_original_filenames_for_log, retryable_failures_this_post,
|
kept_original_filenames_for_log, retryable_failures_this_post,
|
||||||
permanent_failures_this_post, history_data_for_this_post,
|
permanent_failures_this_post, history_data_for_this_post,
|
||||||
None) # The 7th item is None because we already saved the temp file
|
temp_filepath_for_return)
|
||||||
|
|
||||||
self._emit_signal('worker_finished', result_tuple)
|
self._emit_signal('worker_finished', result_tuple)
|
||||||
|
|
||||||
return result_tuple
|
return result_tuple
|
||||||
|
|
||||||
class DownloadThread (QThread ):
|
class DownloadThread (QThread ):
|
||||||
@@ -1801,6 +1749,7 @@ class DownloadThread (QThread ):
|
|||||||
cookie_text ="",
|
cookie_text ="",
|
||||||
session_file_path=None,
|
session_file_path=None,
|
||||||
session_lock=None,
|
session_lock=None,
|
||||||
|
processed_ids_to_skip=None,
|
||||||
text_only_scope=None,
|
text_only_scope=None,
|
||||||
text_export_format='txt',
|
text_export_format='txt',
|
||||||
single_pdf_mode=False,
|
single_pdf_mode=False,
|
||||||
@@ -1859,11 +1808,12 @@ class DownloadThread (QThread ):
|
|||||||
self .manga_global_file_counter_ref =manga_global_file_counter_ref
|
self .manga_global_file_counter_ref =manga_global_file_counter_ref
|
||||||
self.session_file_path = session_file_path
|
self.session_file_path = session_file_path
|
||||||
self.session_lock = session_lock
|
self.session_lock = session_lock
|
||||||
|
self.processed_ids_to_skip = processed_ids_to_skip
|
||||||
self.history_candidates_buffer =deque (maxlen =8 )
|
self.history_candidates_buffer =deque (maxlen =8 )
|
||||||
self.text_only_scope = text_only_scope
|
self.text_only_scope = text_only_scope
|
||||||
self.text_export_format = text_export_format
|
self.text_export_format = text_export_format
|
||||||
self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE
|
self.single_pdf_mode = single_pdf_mode
|
||||||
self.project_root_dir = project_root_dir # Add this assignment
|
self.project_root_dir = project_root_dir
|
||||||
|
|
||||||
if self .compress_images and Image is None :
|
if self .compress_images and Image is None :
|
||||||
self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
|
self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
|
||||||
@@ -1899,9 +1849,7 @@ class DownloadThread (QThread ):
|
|||||||
grand_list_of_kept_original_filenames = []
|
grand_list_of_kept_original_filenames = []
|
||||||
was_process_cancelled = False
|
was_process_cancelled = False
|
||||||
|
|
||||||
# This block for initializing manga mode counters remains unchanged
|
|
||||||
if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED and not self.extract_links_only and self.manga_date_file_counter_ref is None:
|
if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED and not self.extract_links_only and self.manga_date_file_counter_ref is None:
|
||||||
# Determine the directory to scan for existing numbered files
|
|
||||||
series_scan_dir = self.output_dir
|
series_scan_dir = self.output_dir
|
||||||
if self.use_subfolders :
|
if self.use_subfolders :
|
||||||
if self.filter_character_list_objects_initial and self.filter_character_list_objects_initial [0] and self.filter_character_list_objects_initial[0].get("name"):
|
if self.filter_character_list_objects_initial and self.filter_character_list_objects_initial [0] and self.filter_character_list_objects_initial[0].get("name"):
|
||||||
@@ -1916,41 +1864,39 @@ class DownloadThread (QThread ):
|
|||||||
self.logger(f"ℹ️ [Thread] Manga Date Mode: Scanning for existing files in '{series_scan_dir}'...")
|
self.logger(f"ℹ️ [Thread] Manga Date Mode: Scanning for existing files in '{series_scan_dir}'...")
|
||||||
for dirpath, _, filenames_in_dir in os.walk(series_scan_dir):
|
for dirpath, _, filenames_in_dir in os.walk(series_scan_dir):
|
||||||
for filename_to_check in filenames_in_dir:
|
for filename_to_check in filenames_in_dir:
|
||||||
# Check for an optional prefix defined by the user
|
|
||||||
prefix_to_check = clean_filename(self.manga_date_prefix.strip()) if self.manga_date_prefix and self.manga_date_prefix.strip() else ""
|
prefix_to_check = clean_filename(self.manga_date_prefix.strip()) if self.manga_date_prefix and self.manga_date_prefix.strip() else ""
|
||||||
name_part_to_match = filename_to_check
|
name_part_to_match = filename_to_check
|
||||||
if prefix_to_check and name_part_to_match.startswith(prefix_to_check):
|
if prefix_to_check and name_part_to_match.startswith(prefix_to_check):
|
||||||
name_part_to_match = name_part_to_match[len(prefix_to_check):].lstrip()
|
name_part_to_match = name_part_to_match[len(prefix_to_check):].lstrip()
|
||||||
|
|
||||||
# Use regex to find the number at the start of the filename
|
|
||||||
base_name_no_ext = os.path.splitext(name_part_to_match)[0]
|
base_name_no_ext = os.path.splitext(name_part_to_match)[0]
|
||||||
match = re.match(r"(\d+)", base_name_no_ext)
|
match = re.match(r"(\d+)", base_name_no_ext)
|
||||||
if match:
|
if match:
|
||||||
highest_num = max(highest_num, int(match.group(1)))
|
highest_num = max(highest_num, int(match.group(1)))
|
||||||
|
|
||||||
# Initialize the shared counter to the next number, protected by a thread lock
|
|
||||||
self.manga_date_file_counter_ref = [highest_num + 1, threading.Lock()]
|
self.manga_date_file_counter_ref = [highest_num + 1, threading.Lock()]
|
||||||
self.logger(f"ℹ️ [Thread] Manga Date Mode: Initialized date-based counter at {self.manga_date_file_counter_ref[0]}.")
|
self.logger(f"ℹ️ [Thread] Manga Date Mode: Initialized date-based counter at {self.manga_date_file_counter_ref[0]}.")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if self.manga_mode_active and self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING and not self.extract_links_only and self.manga_global_file_counter_ref is None:
|
if self.manga_mode_active and self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING and not self.extract_links_only and self.manga_global_file_counter_ref is None:
|
||||||
# Initialize the shared counter at 1, protected by a thread lock
|
|
||||||
self.manga_global_file_counter_ref = [1, threading.Lock()]
|
self.manga_global_file_counter_ref = [1, threading.Lock()]
|
||||||
self.logger(f"ℹ️ [Thread] Manga Title+GlobalNum Mode: Initialized global counter at {self.manga_global_file_counter_ref[0]}.")
|
self.logger(f"ℹ️ [Thread] Manga Title+GlobalNum Mode: Initialized global counter at {self.manga_global_file_counter_ref[0]}.")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
worker_signals_obj = PostProcessorSignals()
|
worker_signals_obj = PostProcessorSignals()
|
||||||
try:
|
try:
|
||||||
# Connect signals
|
|
||||||
worker_signals_obj.progress_signal.connect(self.progress_signal)
|
worker_signals_obj.progress_signal.connect(self.progress_signal)
|
||||||
worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal)
|
worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal)
|
||||||
worker_signals_obj.file_progress_signal.connect(self.file_progress_signal)
|
worker_signals_obj.file_progress_signal.connect(self.file_progress_signal)
|
||||||
worker_signals_obj.external_link_signal.connect(self.external_link_signal)
|
worker_signals_obj.external_link_signal.connect(self.external_link_signal)
|
||||||
worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal)
|
worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal)
|
||||||
worker_signals_obj.file_successfully_downloaded_signal.connect(self.file_successfully_downloaded_signal)
|
worker_signals_obj.file_successfully_downloaded_signal.connect(self.file_successfully_downloaded_signal)
|
||||||
worker_signals_obj.worker_finished_signal.connect(lambda result: None) # Connect to dummy lambda to avoid errors
|
worker_signals_obj.worker_finished_signal.connect(lambda result: None)
|
||||||
|
|
||||||
self.logger(" Starting post fetch (single-threaded download process)...")
|
self.logger(" Starting post fetch (single-threaded download process)...")
|
||||||
|
self.logger(" Fetching ALL available post information first. This may take a moment...")
|
||||||
|
|
||||||
|
all_posts_data = []
|
||||||
post_generator = download_from_api(
|
post_generator = download_from_api(
|
||||||
self.api_url_input,
|
self.api_url_input,
|
||||||
logger=self.logger,
|
logger=self.logger,
|
||||||
@@ -1970,12 +1916,16 @@ class DownloadThread (QThread ):
|
|||||||
if self.isInterruptionRequested():
|
if self.isInterruptionRequested():
|
||||||
was_process_cancelled = True
|
was_process_cancelled = True
|
||||||
break
|
break
|
||||||
for individual_post_data in posts_batch_data:
|
all_posts_data.extend(posts_batch_data)
|
||||||
|
|
||||||
|
if not was_process_cancelled:
|
||||||
|
self.logger(f"✅ Fetching complete. Found {len(all_posts_data)} total posts. Starting download process...")
|
||||||
|
|
||||||
|
for individual_post_data in all_posts_data:
|
||||||
if self.isInterruptionRequested():
|
if self.isInterruptionRequested():
|
||||||
was_process_cancelled = True
|
was_process_cancelled = True
|
||||||
break
|
break
|
||||||
|
|
||||||
# Create the worker, now correctly passing single_pdf_mode
|
|
||||||
post_processing_worker = PostProcessorWorker(
|
post_processing_worker = PostProcessorWorker(
|
||||||
post_data=individual_post_data,
|
post_data=individual_post_data,
|
||||||
download_root=self.output_dir,
|
download_root=self.output_dir,
|
||||||
@@ -2022,13 +1972,13 @@ class DownloadThread (QThread ):
|
|||||||
creator_download_folder_ignore_words=self.creator_download_folder_ignore_words,
|
creator_download_folder_ignore_words=self.creator_download_folder_ignore_words,
|
||||||
session_file_path=self.session_file_path,
|
session_file_path=self.session_file_path,
|
||||||
session_lock=self.session_lock,
|
session_lock=self.session_lock,
|
||||||
|
processed_ids_to_skip=self.processed_ids_to_skip, # <-- FIX: Pass the list to the worker
|
||||||
text_only_scope=self.text_only_scope,
|
text_only_scope=self.text_only_scope,
|
||||||
text_export_format=self.text_export_format,
|
text_export_format=self.text_export_format,
|
||||||
single_pdf_mode=self.single_pdf_mode, # <-- This is now correctly passed
|
single_pdf_mode=self.single_pdf_mode,
|
||||||
project_root_dir=self.project_root_dir
|
project_root_dir=self.project_root_dir
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
# Correctly unpack the 7 values returned from the worker
|
|
||||||
(dl_count, skip_count, kept_originals_this_post,
|
(dl_count, skip_count, kept_originals_this_post,
|
||||||
retryable_failures, permanent_failures,
|
retryable_failures, permanent_failures,
|
||||||
history_data, temp_filepath) = post_processing_worker.process()
|
history_data, temp_filepath) = post_processing_worker.process()
|
||||||
@@ -2046,7 +1996,6 @@ class DownloadThread (QThread ):
|
|||||||
if permanent_failures:
|
if permanent_failures:
|
||||||
self.permanent_file_failed_signal.emit(permanent_failures)
|
self.permanent_file_failed_signal.emit(permanent_failures)
|
||||||
|
|
||||||
# In single-threaded text mode, pass the temp file path back to the main window
|
|
||||||
if self.single_pdf_mode and temp_filepath:
|
if self.single_pdf_mode and temp_filepath:
|
||||||
self.progress_signal.emit(f"TEMP_FILE_PATH:{temp_filepath}")
|
self.progress_signal.emit(f"TEMP_FILE_PATH:{temp_filepath}")
|
||||||
|
|
||||||
@@ -2061,8 +2010,7 @@ class DownloadThread (QThread ):
|
|||||||
self.skip_current_file_flag.clear()
|
self.skip_current_file_flag.clear()
|
||||||
self.logger(" Skip current file flag was processed and cleared by DownloadThread.")
|
self.logger(" Skip current file flag was processed and cleared by DownloadThread.")
|
||||||
self.msleep(10)
|
self.msleep(10)
|
||||||
if was_process_cancelled:
|
|
||||||
break
|
|
||||||
if not was_process_cancelled and not self.isInterruptionRequested():
|
if not was_process_cancelled and not self.isInterruptionRequested():
|
||||||
self.logger("✅ All posts processed or end of content reached by DownloadThread.")
|
self.logger("✅ All posts processed or end of content reached by DownloadThread.")
|
||||||
|
|
||||||
@@ -2071,7 +2019,6 @@ class DownloadThread (QThread ):
|
|||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
finally:
|
finally:
|
||||||
try:
|
try:
|
||||||
# Disconnect signals
|
|
||||||
if worker_signals_obj:
|
if worker_signals_obj:
|
||||||
worker_signals_obj.progress_signal.disconnect(self.progress_signal)
|
worker_signals_obj.progress_signal.disconnect(self.progress_signal)
|
||||||
worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal)
|
worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal)
|
||||||
@@ -2082,15 +2029,8 @@ class DownloadThread (QThread ):
|
|||||||
except (TypeError, RuntimeError) as e:
|
except (TypeError, RuntimeError) as e:
|
||||||
self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}")
|
self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}")
|
||||||
|
|
||||||
# Emit the final signal with all collected results
|
|
||||||
self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, self.isInterruptionRequested(), grand_list_of_kept_original_filenames)
|
self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, self.isInterruptionRequested(), grand_list_of_kept_original_filenames)
|
||||||
|
|
||||||
|
|
||||||
def receive_add_character_result (self ,result ):
|
|
||||||
with QMutexLocker (self .prompt_mutex ):
|
|
||||||
self ._add_character_response =result
|
|
||||||
self .logger (f" (DownloadThread) Received character prompt response: {'Yes (added/confirmed)'if result else 'No (declined/failed)'}")
|
|
||||||
|
|
||||||
class InterruptedError(Exception):
|
class InterruptedError(Exception):
|
||||||
"""Custom exception for handling cancellations gracefully."""
|
"""Custom exception for handling cancellations gracefully."""
|
||||||
pass
|
pass
|
||||||
93
src/ui/flow_layout.py
Normal file
93
src/ui/flow_layout.py
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
# src/ui/flow_layout.py
|
||||||
|
|
||||||
|
from PyQt5.QtWidgets import QLayout, QSizePolicy, QStyle
|
||||||
|
from PyQt5.QtCore import QPoint, QRect, QSize, Qt
|
||||||
|
|
||||||
|
class FlowLayout(QLayout):
|
||||||
|
"""A custom layout that arranges widgets in a flow, wrapping as necessary."""
|
||||||
|
def __init__(self, parent=None, margin=0, spacing=-1):
|
||||||
|
super(FlowLayout, self).__init__(parent)
|
||||||
|
|
||||||
|
if parent is not None:
|
||||||
|
self.setContentsMargins(margin, margin, margin, margin)
|
||||||
|
|
||||||
|
self.setSpacing(spacing)
|
||||||
|
self.itemList = []
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
item = self.takeAt(0)
|
||||||
|
while item:
|
||||||
|
item = self.takeAt(0)
|
||||||
|
|
||||||
|
def addItem(self, item):
|
||||||
|
self.itemList.append(item)
|
||||||
|
|
||||||
|
def count(self):
|
||||||
|
return len(self.itemList)
|
||||||
|
|
||||||
|
def itemAt(self, index):
|
||||||
|
if 0 <= index < len(self.itemList):
|
||||||
|
return self.itemList[index]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def takeAt(self, index):
|
||||||
|
if 0 <= index < len(self.itemList):
|
||||||
|
return self.itemList.pop(index)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def expandingDirections(self):
|
||||||
|
return Qt.Orientations(Qt.Orientation(0))
|
||||||
|
|
||||||
|
def hasHeightForWidth(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def heightForWidth(self, width):
|
||||||
|
return self._do_layout(QRect(0, 0, width, 0), True)
|
||||||
|
|
||||||
|
def setGeometry(self, rect):
|
||||||
|
super(FlowLayout, self).setGeometry(rect)
|
||||||
|
self._do_layout(rect, False)
|
||||||
|
|
||||||
|
def sizeHint(self):
|
||||||
|
return self.minimumSize()
|
||||||
|
|
||||||
|
def minimumSize(self):
|
||||||
|
size = QSize()
|
||||||
|
for item in self.itemList:
|
||||||
|
size = size.expandedTo(item.minimumSize())
|
||||||
|
|
||||||
|
margin, _, _, _ = self.getContentsMargins()
|
||||||
|
size += QSize(2 * margin, 2 * margin)
|
||||||
|
return size
|
||||||
|
|
||||||
|
def _do_layout(self, rect, test_only):
|
||||||
|
x = rect.x()
|
||||||
|
y = rect.y()
|
||||||
|
line_height = 0
|
||||||
|
|
||||||
|
space_x = self.spacing()
|
||||||
|
space_y = self.spacing()
|
||||||
|
if self.layout() is not None:
|
||||||
|
space_x = self.spacing()
|
||||||
|
space_y = self.spacing()
|
||||||
|
else:
|
||||||
|
space_x = self.spacing()
|
||||||
|
space_y = self.spacing()
|
||||||
|
|
||||||
|
|
||||||
|
for item in self.itemList:
|
||||||
|
wid = item.widget()
|
||||||
|
next_x = x + item.sizeHint().width() + space_x
|
||||||
|
if next_x - space_x > rect.right() and line_height > 0:
|
||||||
|
x = rect.x()
|
||||||
|
y = y + line_height + space_y
|
||||||
|
next_x = x + item.sizeHint().width() + space_x
|
||||||
|
line_height = 0
|
||||||
|
|
||||||
|
if not test_only:
|
||||||
|
item.setGeometry(QRect(QPoint(x, y), item.sizeHint()))
|
||||||
|
|
||||||
|
x = next_x
|
||||||
|
line_height = max(line_height, item.sizeHint().height())
|
||||||
|
|
||||||
|
return y + line_height - rect.y()
|
||||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user