This commit is contained in:
Yuvi9587
2025-06-16 10:46:23 +01:00
parent efd5458493
commit 953dbaebf0
2 changed files with 1444 additions and 1448 deletions

View File

@@ -8,7 +8,7 @@ import hashlib
import http .client
import traceback
from concurrent .futures import ThreadPoolExecutor ,Future ,CancelledError ,as_completed
from collections import deque # Import deque
from collections import deque
import html
from PyQt5 .QtCore import QObject ,pyqtSignal ,QThread ,QMutex ,QMutexLocker
from urllib .parse import urlparse
@@ -42,7 +42,7 @@ from io import BytesIO
STYLE_POST_TITLE ="post_title"
STYLE_ORIGINAL_NAME ="original_name"
STYLE_DATE_BASED ="date_based"
STYLE_DATE_POST_TITLE = "date_post_title" # New style constant
STYLE_DATE_POST_TITLE ="date_post_title"
MANGA_DATE_PREFIX_DEFAULT =""
STYLE_POST_TITLE_GLOBAL_NUMBERING ="post_title_global_numbering"
SKIP_SCOPE_FILES ="files"
@@ -513,17 +513,17 @@ def fetch_post_comments (api_domain ,service ,user_id ,post_id ,headers ,logger
raise RuntimeError (f"Unexpected error fetching comments for post {post_id } ({comments_api_url }): {e }")
def download_from_api (
api_url_input ,
logger=print, # type: ignore
start_page=None, # type: ignore
end_page=None, # type: ignore
manga_mode=False, # type: ignore
cancellation_event=None, # type: ignore
pause_event=None, # type: ignore
use_cookie=False, # type: ignore
cookie_text="", # type: ignore
selected_cookie_file=None, # type: ignore
app_base_dir=None, # type: ignore
manga_filename_style_for_sort_check=None # type: ignore # Parameter is correctly defined
logger =print ,
start_page =None ,
end_page =None ,
manga_mode =False ,
cancellation_event =None ,
pause_event =None ,
use_cookie =False ,
cookie_text ="",
selected_cookie_file =None ,
app_base_dir =None ,
manga_filename_style_for_sort_check =None
):
headers ={
'User-Agent':'Mozilla/5.0',
@@ -572,10 +572,8 @@ def download_from_api(
return
if target_post_id and (start_page or end_page ):
logger ("⚠️ Page range (start/end page) is ignored when a specific post URL is provided (searching all pages for the post).")
# determine if we should use the "fetch all then sort oldest first" logic for manga mode
is_manga_mode_fetch_all_and_sort_oldest_first = manga_mode and \
(manga_filename_style_for_sort_check != STYLE_DATE_POST_TITLE) and \
not target_post_id
is_manga_mode_fetch_all_and_sort_oldest_first =manga_mode and (manga_filename_style_for_sort_check !=STYLE_DATE_POST_TITLE )and not target_post_id
api_base_url =f"https://{api_domain }/api/v1/{service }/user/{user_id }"
page_size =50
if is_manga_mode_fetch_all_and_sort_oldest_first :
@@ -659,8 +657,8 @@ def download_from_api(
yield all_posts_for_manga_mode [i :i +page_size ]
return
# If manga_mode is true but we didn't enter the block above,
# it means we want newest first for STYLE_DATE_POST_TITLE (or it's a single post URL)
if manga_mode and not target_post_id and (manga_filename_style_for_sort_check ==STYLE_DATE_POST_TITLE ):
logger (f" Manga Mode (Style: {STYLE_DATE_POST_TITLE }): Processing posts in default API order (newest first).")
@@ -756,10 +754,10 @@ class PostProcessorSignals (QObject ):
file_download_status_signal =pyqtSignal (bool )
external_link_signal =pyqtSignal (str ,str ,str ,str ,str )
file_progress_signal =pyqtSignal (str ,object )
file_successfully_downloaded_signal = pyqtSignal(dict) # New signal for successfully downloaded files
file_successfully_downloaded_signal =pyqtSignal (dict )
missed_character_post_signal =pyqtSignal (str ,str )
class PostProcessorWorker :
# ... (other __init__ arguments)
def __init__ (self ,post_data ,download_root ,known_names ,
filter_character_list ,emitter ,
unwanted_keywords ,filter_mode ,skip_zip ,skip_rar ,
@@ -867,7 +865,7 @@ class PostProcessorWorker :
post_title ="",file_index_in_post =0 ,num_files_in_this_post =1 ,
manga_date_file_counter_ref =None ):
was_original_name_kept_flag =False
# manga_global_file_counter_ref =None # This was a duplicate definition, removed
final_filename_saved_for_return =""
def _get_current_character_filters (self ):
if self .dynamic_filter_holder :
@@ -877,7 +875,7 @@ class PostProcessorWorker :
post_title ="",file_index_in_post =0 ,num_files_in_this_post =1 ,
manga_date_file_counter_ref =None ,
forced_filename_override =None ,
manga_global_file_counter_ref =None, folder_context_name_for_history=None ): # Added folder_context_name_for_history
manga_global_file_counter_ref =None ,folder_context_name_for_history =None ):
was_original_name_kept_flag =False
final_filename_saved_for_return =""
retry_later_details =None
@@ -982,18 +980,18 @@ class PostProcessorWorker :
elif self .manga_filename_style ==STYLE_DATE_POST_TITLE :
published_date_str =self .post .get ('published')
added_date_str =self .post .get ('added')
formatted_date_str = "nodate" # Default if no date found
formatted_date_str ="nodate"
if published_date_str :
try :
formatted_date_str =published_date_str .split ('T')[0 ]
except Exception: # pylint: disable=bare-except
except Exception :
self .logger (f" ⚠️ Could not parse 'published' date '{published_date_str }' for STYLE_DATE_POST_TITLE. Using 'nodate'.")
elif added_date_str :
try :
formatted_date_str =added_date_str .split ('T')[0 ]
self .logger (f" ⚠️ Post ID {original_post_id_for_log } missing 'published' date, using 'added' date '{added_date_str }' for STYLE_DATE_POST_TITLE naming.")
except Exception: # pylint: disable=bare-except
except Exception :
self .logger (f" ⚠️ Could not parse 'added' date '{added_date_str }' for STYLE_DATE_POST_TITLE. Using 'nodate'.")
else :
self .logger (f" ⚠️ Post ID {original_post_id_for_log } missing both 'published' and 'added' dates for STYLE_DATE_POST_TITLE. Using 'nodate'.")
@@ -1010,15 +1008,15 @@ class PostProcessorWorker :
if num_files_in_this_post >1 :
filename_to_save_in_main_path =f"{base_name_for_style }_{file_index_in_post }{original_ext }"if file_index_in_post >0 else f"{base_name_for_style }{original_ext }"
else: # Single file post
else :
filename_to_save_in_main_path =f"{base_name_for_style }{original_ext }"
else :
self .logger (f"⚠️ Manga mode (Date+PostTitle Style): Post title missing for post {original_post_id_for_log }. Using 'post' as title part with date prefix.")
cleaned_post_title_for_filename = "post" # Fallback title part
cleaned_post_title_for_filename ="post"
base_name_for_style =f"{formatted_date_str }_{cleaned_post_title_for_filename }"
if num_files_in_this_post >1 :
filename_to_save_in_main_path =f"{base_name_for_style }_{file_index_in_post }{original_ext }"if file_index_in_post >0 else f"{base_name_for_style }{original_ext }"
else: # Single file post
else :
filename_to_save_in_main_path =f"{base_name_for_style }{original_ext }"
self .logger (f"⚠️ Manga mode (Title+GlobalNum Style Fallback): Using cleaned original filename '{filename_to_save_in_main_path }' for post {original_post_id_for_log }.")
else :
@@ -1394,18 +1392,18 @@ class PostProcessorWorker :
final_filename_saved_for_return =final_filename_on_disk
self .logger (f"✅ Saved: '{final_filename_saved_for_return }' (from '{api_original_filename }', {downloaded_size_bytes /(1024 *1024 ):.2f} MB) in '{os .path .basename (effective_save_folder )}'")
# Emit signal for successfully downloaded file
downloaded_file_details ={
'disk_filename':final_filename_saved_for_return ,
'post_title':post_title ,
'post_id':original_post_id_for_log ,
'upload_date_str':self .post .get ('published')or self .post .get ('added')or "N/A",
'download_timestamp': time.time(), # Will be recorded by main app
'download_path': effective_save_folder, # The folder it was saved into
'download_timestamp':time .time (),
'download_path':effective_save_folder ,
'service':self .service ,
'user_id':self .user_id ,
'api_original_filename':api_original_filename ,
'folder_context_name': folder_context_name_for_history or os.path.basename(effective_save_folder) # Best effort context name
'folder_context_name':folder_context_name_for_history or os .path .basename (effective_save_folder )
}
self ._emit_signal ('file_successfully_downloaded',downloaded_file_details )
time .sleep (0.05 )
@@ -1438,7 +1436,7 @@ class PostProcessorWorker :
parsed_api_url =urlparse (self .api_url_input )
referer_url =f"https://{parsed_api_url .netloc }/"
headers ={'User-Agent':'Mozilla/5.0','Referer':referer_url ,'Accept':'*/*'}
link_pattern =re .compile (r"""<a\s+.*?href=["'](https?://[^"']+)["'][^>]*>(.*?)</a>""", # type: ignore
link_pattern =re .compile (r"""<a\s+.*?href=["'](https?://[^"']+)["'][^>]*>(.*?)</a>""",
re .IGNORECASE |re .DOTALL )
post_data =self .post
post_title =post_data .get ('title','')or 'untitled_post'
@@ -1461,17 +1459,17 @@ class PostProcessorWorker :
post_is_candidate_by_file_char_match_in_comment_scope =False
char_filter_that_matched_file_in_comment_scope =None
char_filter_that_matched_comment =None
if current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH ): # type: ignore
if current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH ):
if self ._check_pause (f"Character title filter for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
for idx ,filter_item_obj in enumerate (current_character_filters ):
if self .check_cancel ():break
terms_to_check_for_title =list (filter_item_obj ["aliases"]) # type: ignore
terms_to_check_for_title =list (filter_item_obj ["aliases"])
if filter_item_obj ["is_group"]:
if filter_item_obj ["name"]not in terms_to_check_for_title : # type: ignore
terms_to_check_for_title .append (filter_item_obj ["name"]) # type: ignore
if filter_item_obj ["name"]not in terms_to_check_for_title :
terms_to_check_for_title .append (filter_item_obj ["name"])
unique_terms_for_title_check =list (set (terms_to_check_for_title ))
for term_to_match in unique_terms_for_title_check :
match_found_for_term =is_title_match_for_character (post_title ,term_to_match ) # type: ignore
match_found_for_term =is_title_match_for_character (post_title ,term_to_match )
if match_found_for_term :
post_is_candidate_by_title_char_match =True
char_filter_that_matched_title =filter_item_obj
@@ -1499,12 +1497,12 @@ class PostProcessorWorker :
current_api_original_filename_for_check =file_info_item .get ('_original_name_for_log')
if not current_api_original_filename_for_check :continue
for filter_item_obj in current_character_filters :
terms_to_check =list (filter_item_obj ["aliases"]) # type: ignore
if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check : # type: ignore
terms_to_check .append (filter_item_obj ["name"]) # type: ignore
terms_to_check =list (filter_item_obj ["aliases"])
if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check :
terms_to_check .append (filter_item_obj ["name"])
for term_to_match in terms_to_check :
if is_filename_match_for_character (current_api_original_filename_for_check ,term_to_match ):
post_is_candidate_by_file_char_match_in_comment_scope =True # type: ignore
post_is_candidate_by_file_char_match_in_comment_scope =True
char_filter_that_matched_file_in_comment_scope =filter_item_obj
self .logger (f" Match Found (File in Comments Scope): File '{current_api_original_filename_for_check }' matches char filter term '{term_to_match }' (from group/name '{filter_item_obj ['name']}'). Post is candidate.")
break
@@ -1535,11 +1533,11 @@ class PostProcessorWorker :
raw_comment_content =comment_item .get ('content','')
if not raw_comment_content :continue
cleaned_comment_text =strip_html_tags (raw_comment_content )
if not cleaned_comment_text .strip ():continue # type: ignore
if not cleaned_comment_text .strip ():continue
for filter_item_obj in current_character_filters :
terms_to_check_comment =list (filter_item_obj ["aliases"]) # type: ignore
if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_comment : # type: ignore
terms_to_check_comment .append (filter_item_obj ["name"]) # type: ignore
terms_to_check_comment =list (filter_item_obj ["aliases"])
if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_comment :
terms_to_check_comment .append (filter_item_obj ["name"])
for term_to_match_comment in terms_to_check_comment :
if is_title_match_for_character (cleaned_comment_text ,term_to_match_comment ):
post_is_candidate_by_comment_char_match =True
@@ -1561,12 +1559,12 @@ class PostProcessorWorker :
if current_character_filters :
if self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match :
self .logger (f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title [:50 ]}' does not match character filters.")
self ._emit_signal ('missed_character_post',post_title ,"No title match for character filter") # type: ignore
self ._emit_signal ('missed_character_post',post_title ,"No title match for character filter")
return 0 ,num_potential_files_in_post ,[],[],[],None
if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match :
self .logger (f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id }', Title '{post_title [:50 ]}...'")
if self .emitter and hasattr (self .emitter ,'missed_character_post_signal'):
self ._emit_signal ('missed_character_post',post_title ,"No character match in files or comments (Comments scope)") # type: ignore
self ._emit_signal ('missed_character_post',post_title ,"No character match in files or comments (Comments scope)")
return 0 ,num_potential_files_in_post ,[],[],[],None
if self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH ):
if self ._check_pause (f"Skip words (post title) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
@@ -1577,7 +1575,7 @@ class PostProcessorWorker :
return 0 ,num_potential_files_in_post ,[],[],[],None
if not self .extract_links_only and self .manga_mode_active and current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and not post_is_candidate_by_title_char_match :
self .logger (f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title [:50 ]}' doesn't match filters.")
self ._emit_signal ('missed_character_post',post_title ,"Manga Mode: No title match for character filter (Title/Both scope)") # type: ignore
self ._emit_signal ('missed_character_post',post_title ,"Manga Mode: No title match for character filter (Title/Both scope)")
return 0 ,num_potential_files_in_post ,[],[],[],None
if not isinstance (post_attachments ,list ):
self .logger (f"⚠️ Corrupt attachment data for post {post_id } (expected list, got {type (post_attachments )}). Skipping attachments.")
@@ -1587,7 +1585,7 @@ class PostProcessorWorker :
if not self .extract_links_only and self .use_subfolders :
if self ._check_pause (f"Subfolder determination for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
primary_char_filter_for_folder =None
log_reason_for_folder ="" # type: ignore
log_reason_for_folder =""
if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment :
if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope :
primary_char_filter_for_folder =char_filter_that_matched_file_in_comment_scope
@@ -1598,10 +1596,10 @@ class PostProcessorWorker :
elif (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and char_filter_that_matched_title :
primary_char_filter_for_folder =char_filter_that_matched_title
log_reason_for_folder ="Matched char filter in title"
if primary_char_filter_for_folder : # type: ignore
base_folder_names_for_post_content =[clean_folder_name (primary_char_filter_for_folder ["name"])] # type: ignore
cleaned_primary_folder_name =clean_folder_name (primary_char_filter_for_folder ["name"]) # type: ignore
if cleaned_primary_folder_name .lower ()in effective_unwanted_keywords_for_folder_naming and cleaned_primary_folder_name .lower ()!="untitled_folder": # type: ignore
if primary_char_filter_for_folder :
base_folder_names_for_post_content =[clean_folder_name (primary_char_filter_for_folder ["name"])]
cleaned_primary_folder_name =clean_folder_name (primary_char_filter_for_folder ["name"])
if cleaned_primary_folder_name .lower ()in effective_unwanted_keywords_for_folder_naming and cleaned_primary_folder_name .lower ()!="untitled_folder":
self .logger (f" ⚠️ Primary char filter folder name '{cleaned_primary_folder_name }' is in ignore list. Using generic name.")
base_folder_names_for_post_content =["Generic Post Content"]
else :
@@ -1616,7 +1614,7 @@ class PostProcessorWorker :
)
valid_derived_folders_from_title_known_txt =[
name for name in derived_folders_from_title_via_known_txt # type: ignore
name for name in derived_folders_from_title_via_known_txt
if name and name .strip ()and name .lower ()!="untitled_folder"
]
@@ -1633,7 +1631,7 @@ class PostProcessorWorker :
FOLDER_NAME_STOP_WORDS
)
title_is_only_creator_ignored_words =False # type: ignore
title_is_only_creator_ignored_words =False
if candidate_name_from_title_basic_clean and candidate_name_from_title_basic_clean .lower ()!="untitled_folder"and self .creator_download_folder_ignore_words :
candidate_title_words ={word .lower ()for word in candidate_name_from_title_basic_clean .split ()}
@@ -1684,7 +1682,7 @@ class PostProcessorWorker :
if not base_folder_names_for_post_content :
final_fallback_name =clean_folder_name (post_title if post_title and post_title .strip ()else "Generic Post Content")
base_folder_names_for_post_content =[final_fallback_name ]
self .logger (f" Ultimate fallback folder name: {final_fallback_name }") # type: ignore
self .logger (f" Ultimate fallback folder name: {final_fallback_name }")
if base_folder_names_for_post_content :
determined_post_save_path_for_history =os .path .join (determined_post_save_path_for_history ,base_folder_names_for_post_content [0 ])
@@ -1698,17 +1696,17 @@ class PostProcessorWorker :
for folder_name_to_check in base_folder_names_for_post_content :
if not folder_name_to_check :continue
if any (skip_word .lower ()in folder_name_to_check .lower ()for skip_word in self .skip_words_list ):
matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word") # type: ignore
self .logger (f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.") # type: ignore
matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word")
self .logger (f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.")
return 0 ,num_potential_files_in_post ,[],[],[],None
if (self .show_external_links or self .extract_links_only )and post_content_html : # type: ignore
if (self .show_external_links or self .extract_links_only )and post_content_html :
if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
try :
mega_key_pattern =re .compile (r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b')
unique_links_data ={}
for match in link_pattern .finditer (post_content_html ):
link_url =match .group (1 ).strip ()
link_url =html .unescape (link_url ) # type: ignore
link_url =html .unescape (link_url )
link_inner_text =match .group (2 )
if not any (ext in link_url .lower ()for ext in ['.css','.js','.ico','.xml','.svg'])and not link_url .startswith ('javascript:')and link_url not in unique_links_data :
clean_link_text =re .sub (r'<.*?>','',link_inner_text )
@@ -1737,7 +1735,7 @@ class PostProcessorWorker :
decryption_key_found =key_match_in_content .group (1 )
if platform not in scraped_platforms :
self ._emit_signal ('external_link',post_title ,link_text ,link_url ,platform ,decryption_key_found or "")
links_emitted_count +=1 # type: ignore
links_emitted_count +=1
if links_emitted_count >0 :self .logger (f" 🔗 Found {links_emitted_count } potential external link(s) in post content.")
except Exception as e :self .logger (f"⚠️ Error parsing post content for links: {e }\n{traceback .format_exc (limit =2 )}")
if self .extract_links_only :
@@ -1841,9 +1839,9 @@ class PostProcessorWorker :
name =file_api_info .get ('_original_name_for_log','').lower ()
return [int (text )if text .isdigit ()else text for text in re .split ('([0-9]+)',name )]
all_files_from_post_api .sort (key =natural_sort_key_for_files )
self .logger (f" Manga Date Mode: Sorted {len (all_files_from_post_api )} files within post {post_id } by original name for sequential numbering.") # type: ignore
self .logger (f" Manga Date Mode: Sorted {len (all_files_from_post_api )} files within post {post_id } by original name for sequential numbering.")
if not all_files_from_post_api :
self .logger (f" No files found to download for post {post_id }.") # type: ignore
self .logger (f" No files found to download for post {post_id }.")
return 0 ,0 ,[],[],[],None
files_to_download_info_list =[]
processed_original_filenames_in_this_post =set ()
@@ -1873,10 +1871,10 @@ class PostProcessorWorker :
file_is_candidate_by_char_filter_scope =True
else :
if self .char_filter_scope ==CHAR_SCOPE_FILES :
for filter_item_obj in current_character_filters : # type: ignore
terms_to_check_for_file =list (filter_item_obj ["aliases"]) # type: ignore
if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_for_file : # type: ignore
terms_to_check_for_file .append (filter_item_obj ["name"]) # type: ignore
for filter_item_obj in current_character_filters :
terms_to_check_for_file =list (filter_item_obj ["aliases"])
if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_for_file :
terms_to_check_for_file .append (filter_item_obj ["name"])
unique_terms_for_file_check =list (set (terms_to_check_for_file ))
for term_to_match in unique_terms_for_file_check :
if is_filename_match_for_character (current_api_original_filename ,term_to_match ):
@@ -1896,10 +1894,10 @@ class PostProcessorWorker :
char_filter_info_that_matched_file =char_filter_that_matched_title
self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Both (Title part).")
else :
for filter_item_obj_both_file in current_character_filters : # type: ignore
terms_to_check_for_file_both =list (filter_item_obj_both_file ["aliases"]) # type: ignore
if filter_item_obj_both_file ["is_group"]and filter_item_obj_both_file ["name"]not in terms_to_check_for_file_both : # type: ignore
terms_to_check_for_file_both .append (filter_item_obj_both_file ["name"]) # type: ignore
for filter_item_obj_both_file in current_character_filters :
terms_to_check_for_file_both =list (filter_item_obj_both_file ["aliases"])
if filter_item_obj_both_file ["is_group"]and filter_item_obj_both_file ["name"]not in terms_to_check_for_file_both :
terms_to_check_for_file_both .append (filter_item_obj_both_file ["name"])
unique_terms_for_file_both_check =list (set (terms_to_check_for_file_both ))
for term_to_match in unique_terms_for_file_both_check :
if is_filename_match_for_character (current_api_original_filename ,term_to_match ):
@@ -1929,13 +1927,13 @@ class PostProcessorWorker :
char_title_subfolder_name =None
if self .target_post_id_from_initial_url and self .custom_folder_name :
char_title_subfolder_name =self .custom_folder_name
elif char_filter_info_that_matched_file : # type: ignore
char_title_subfolder_name =clean_folder_name (char_filter_info_that_matched_file ["name"]) # type: ignore
elif char_filter_that_matched_title : # type: ignore
char_title_subfolder_name =clean_folder_name (char_filter_that_matched_title ["name"]) # type: ignore
elif char_filter_that_matched_comment : # type: ignore
char_title_subfolder_name =clean_folder_name (char_filter_that_matched_comment ["name"]) # type: ignore
if char_title_subfolder_name : # type: ignore
elif char_filter_info_that_matched_file :
char_title_subfolder_name =clean_folder_name (char_filter_info_that_matched_file ["name"])
elif char_filter_that_matched_title :
char_title_subfolder_name =clean_folder_name (char_filter_that_matched_title ["name"])
elif char_filter_that_matched_comment :
char_title_subfolder_name =clean_folder_name (char_filter_that_matched_comment ["name"])
if char_title_subfolder_name :
target_base_folders_for_this_file_iteration .append (char_title_subfolder_name )
else :
self .logger (f"⚠️ File '{current_api_original_filename }' candidate by char filter, but no folder name derived. Using post title.")
@@ -1960,7 +1958,7 @@ class PostProcessorWorker :
manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None
manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None
# Pass the determined folder name for history context
folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title )
futures_list .append (file_pool .submit (
@@ -1997,10 +1995,10 @@ class PostProcessorWorker :
total_skipped_this_post +=1
self ._emit_signal ('file_progress',"",None )
# --- History Data Collection ---
# This part is added to collect data for the history feature.
# It's placed after the file processing loop for the post.
if not self.extract_links_only and (total_downloaded_this_post > 0 or not ( # Condition: if not extract_links_only AND (files were downloaded OR post wasn't skipped at very start by title/char filter)
if not self .extract_links_only and (total_downloaded_this_post >0 or not (
(current_character_filters and (
(self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or
(self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match )
@@ -2016,9 +2014,9 @@ class PostProcessorWorker :
history_data_for_this_post ={
'post_title':post_title ,'post_id':post_id ,
'top_file_name':top_file_name_for_history ,
'num_files': num_potential_files_in_post, # Already calculated
'num_files':num_potential_files_in_post ,
'upload_date_str':post_data .get ('published')or post_data .get ('added')or "Unknown",
'download_location': determined_post_save_path_for_history, # Calculated earlier
'download_location':determined_post_save_path_for_history ,
'service':self .service ,'user_id':self .user_id ,
}
if self .check_cancel ():self .logger (f" Post {post_id } processing interrupted/cancelled.");
@@ -2030,12 +2028,12 @@ class DownloadThread (QThread ):
file_download_status_signal =pyqtSignal (bool )
finished_signal =pyqtSignal (int ,int ,bool ,list )
external_link_signal =pyqtSignal (str ,str ,str ,str ,str )
file_successfully_downloaded_signal = pyqtSignal(dict) # Relay from worker
file_successfully_downloaded_signal =pyqtSignal (dict )
file_progress_signal =pyqtSignal (str ,object )
retryable_file_failed_signal =pyqtSignal (list )
missed_character_post_signal =pyqtSignal (str ,str )
post_processed_for_history_signal = pyqtSignal(dict) # New signal for history data
final_history_entries_signal = pyqtSignal(list) # New signal for the final 3 history entries
post_processed_for_history_signal =pyqtSignal (dict )
final_history_entries_signal =pyqtSignal (list )
permanent_file_failed_signal =pyqtSignal (list )
def __init__ (self ,api_url_input ,output_dir ,known_names_copy ,
cancellation_event ,
@@ -2118,7 +2116,7 @@ class DownloadThread (QThread ):
self .scan_content_for_images =scan_content_for_images
self .creator_download_folder_ignore_words =creator_download_folder_ignore_words
self .manga_global_file_counter_ref =manga_global_file_counter_ref
self.history_candidates_buffer = deque(maxlen=8) # Buffer for the first 8 posts
self .history_candidates_buffer =deque (maxlen =8 )
if self .compress_images and Image is None :
self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
self .compress_images =False
@@ -2184,10 +2182,10 @@ class DownloadThread (QThread ):
worker_signals_obj .file_progress_signal .connect (self .file_progress_signal )
worker_signals_obj .external_link_signal .connect (self .external_link_signal )
worker_signals_obj .missed_character_post_signal .connect (self .missed_character_post_signal )
worker_signals_obj.file_successfully_downloaded_signal.connect(self.file_successfully_downloaded_signal) # Connect new signal
worker_signals_obj .file_successfully_downloaded_signal .connect (self .file_successfully_downloaded_signal )
self .logger (" Starting post fetch (single-threaded download process)...")
post_generator =download_from_api (
self .api_url_input , # type: ignore
self .api_url_input ,
logger =self .logger ,
start_page =self .start_page ,
end_page =self .end_page ,
@@ -2257,9 +2255,9 @@ class DownloadThread (QThread ):
grand_list_of_kept_original_filenames .extend (kept_originals_this_post )
if retryable_failures :
self .retryable_file_failed_signal .emit (retryable_failures )
if history_data: # New: Handle history data from worker
if history_data :
if len (self .history_candidates_buffer )<8 :
self.post_processed_for_history_signal.emit(history_data) # Emit for App to handle
self .post_processed_for_history_signal .emit (history_data )
if permanent_failures :
self .permanent_file_failed_signal .emit (permanent_failures )
except Exception as proc_err :
@@ -2276,8 +2274,8 @@ class DownloadThread (QThread ):
if not was_process_cancelled and not self .isInterruptionRequested ():
self .logger ("✅ All posts processed or end of content reached by DownloadThread.")
# Process history candidates at the end of the thread's run
# This part is now handled by DownloaderApp for both single and multi-thread
except Exception as main_thread_err :
self .logger (f"\n❌ Critical error within DownloadThread run loop: {main_thread_err }")
@@ -2291,7 +2289,7 @@ class DownloadThread (QThread ):
worker_signals_obj .external_link_signal .disconnect (self .external_link_signal )
worker_signals_obj .file_progress_signal .disconnect (self .file_progress_signal )
worker_signals_obj .missed_character_post_signal .disconnect (self .missed_character_post_signal )
worker_signals_obj.file_successfully_downloaded_signal.disconnect(self.file_successfully_downloaded_signal) # Disconnect new signal
worker_signals_obj .file_successfully_downloaded_signal .disconnect (self .file_successfully_downloaded_signal )
except (TypeError ,RuntimeError )as e :
self .logger (f" Note during DownloadThread signal disconnection: {e }")

518
main.py

File diff suppressed because it is too large Load Diff