mirror of
https://github.com/Yuvi9587/Kemono-Downloader.git
synced 2025-12-29 16:14:44 +00:00
Compare commits
4 Commits
3473f6540d
...
384edfee3f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
384edfee3f | ||
|
|
3c1b361fc1 | ||
|
|
953dbaebf0 | ||
|
|
efd5458493 |
24
LICENSE
24
LICENSE
@@ -1,11 +1,21 @@
|
|||||||
Custom License - No Commercial Use
|
MIT License
|
||||||
|
|
||||||
Copyright [Yuvi9587] [2025]
|
Copyright (c) [2025] [Yuvi9587]
|
||||||
|
|
||||||
Permission is hereby granted to any person obtaining a copy of this software and associated documentation files (the "Software"), to use, copy, modify, and distribute the Software for **non-commercial purposes only**, subject to the following conditions:
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
The above copyright notice and this permission notice shall be included in all
|
||||||
2. Proper credit must be given to the original author in any public use, distribution, or derivative works.
|
copies or substantial portions of the Software.
|
||||||
3. Commercial use, resale, or sublicensing of the Software or any derivative works is strictly prohibited without explicit written permission.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND...
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import hashlib
|
|||||||
import http .client
|
import http .client
|
||||||
import traceback
|
import traceback
|
||||||
from concurrent .futures import ThreadPoolExecutor ,Future ,CancelledError ,as_completed
|
from concurrent .futures import ThreadPoolExecutor ,Future ,CancelledError ,as_completed
|
||||||
from collections import deque # Import deque
|
from collections import deque
|
||||||
import html
|
import html
|
||||||
from PyQt5 .QtCore import QObject ,pyqtSignal ,QThread ,QMutex ,QMutexLocker
|
from PyQt5 .QtCore import QObject ,pyqtSignal ,QThread ,QMutex ,QMutexLocker
|
||||||
from urllib .parse import urlparse
|
from urllib .parse import urlparse
|
||||||
@@ -42,7 +42,7 @@ from io import BytesIO
|
|||||||
STYLE_POST_TITLE ="post_title"
|
STYLE_POST_TITLE ="post_title"
|
||||||
STYLE_ORIGINAL_NAME ="original_name"
|
STYLE_ORIGINAL_NAME ="original_name"
|
||||||
STYLE_DATE_BASED ="date_based"
|
STYLE_DATE_BASED ="date_based"
|
||||||
STYLE_DATE_POST_TITLE = "date_post_title" # New style constant
|
STYLE_DATE_POST_TITLE ="date_post_title"
|
||||||
MANGA_DATE_PREFIX_DEFAULT =""
|
MANGA_DATE_PREFIX_DEFAULT =""
|
||||||
STYLE_POST_TITLE_GLOBAL_NUMBERING ="post_title_global_numbering"
|
STYLE_POST_TITLE_GLOBAL_NUMBERING ="post_title_global_numbering"
|
||||||
SKIP_SCOPE_FILES ="files"
|
SKIP_SCOPE_FILES ="files"
|
||||||
@@ -513,17 +513,17 @@ def fetch_post_comments (api_domain ,service ,user_id ,post_id ,headers ,logger
|
|||||||
raise RuntimeError (f"Unexpected error fetching comments for post {post_id } ({comments_api_url }): {e }")
|
raise RuntimeError (f"Unexpected error fetching comments for post {post_id } ({comments_api_url }): {e }")
|
||||||
def download_from_api (
|
def download_from_api (
|
||||||
api_url_input ,
|
api_url_input ,
|
||||||
logger=print, # type: ignore
|
logger =print ,
|
||||||
start_page=None, # type: ignore
|
start_page =None ,
|
||||||
end_page=None, # type: ignore
|
end_page =None ,
|
||||||
manga_mode=False, # type: ignore
|
manga_mode =False ,
|
||||||
cancellation_event=None, # type: ignore
|
cancellation_event =None ,
|
||||||
pause_event=None, # type: ignore
|
pause_event =None ,
|
||||||
use_cookie=False, # type: ignore
|
use_cookie =False ,
|
||||||
cookie_text="", # type: ignore
|
cookie_text ="",
|
||||||
selected_cookie_file=None, # type: ignore
|
selected_cookie_file =None ,
|
||||||
app_base_dir=None, # type: ignore
|
app_base_dir =None ,
|
||||||
manga_filename_style_for_sort_check=None # type: ignore # Parameter is correctly defined
|
manga_filename_style_for_sort_check =None
|
||||||
):
|
):
|
||||||
headers ={
|
headers ={
|
||||||
'User-Agent':'Mozilla/5.0',
|
'User-Agent':'Mozilla/5.0',
|
||||||
@@ -572,10 +572,8 @@ def download_from_api(
|
|||||||
return
|
return
|
||||||
if target_post_id and (start_page or end_page ):
|
if target_post_id and (start_page or end_page ):
|
||||||
logger ("⚠️ Page range (start/end page) is ignored when a specific post URL is provided (searching all pages for the post).")
|
logger ("⚠️ Page range (start/end page) is ignored when a specific post URL is provided (searching all pages for the post).")
|
||||||
# determine if we should use the "fetch all then sort oldest first" logic for manga mode
|
|
||||||
is_manga_mode_fetch_all_and_sort_oldest_first = manga_mode and \
|
is_manga_mode_fetch_all_and_sort_oldest_first =manga_mode and (manga_filename_style_for_sort_check !=STYLE_DATE_POST_TITLE )and not target_post_id
|
||||||
(manga_filename_style_for_sort_check != STYLE_DATE_POST_TITLE) and \
|
|
||||||
not target_post_id
|
|
||||||
api_base_url =f"https://{api_domain }/api/v1/{service }/user/{user_id }"
|
api_base_url =f"https://{api_domain }/api/v1/{service }/user/{user_id }"
|
||||||
page_size =50
|
page_size =50
|
||||||
if is_manga_mode_fetch_all_and_sort_oldest_first :
|
if is_manga_mode_fetch_all_and_sort_oldest_first :
|
||||||
@@ -659,8 +657,8 @@ def download_from_api(
|
|||||||
yield all_posts_for_manga_mode [i :i +page_size ]
|
yield all_posts_for_manga_mode [i :i +page_size ]
|
||||||
return
|
return
|
||||||
|
|
||||||
# If manga_mode is true but we didn't enter the block above,
|
|
||||||
# it means we want newest first for STYLE_DATE_POST_TITLE (or it's a single post URL)
|
|
||||||
if manga_mode and not target_post_id and (manga_filename_style_for_sort_check ==STYLE_DATE_POST_TITLE ):
|
if manga_mode and not target_post_id and (manga_filename_style_for_sort_check ==STYLE_DATE_POST_TITLE ):
|
||||||
logger (f" Manga Mode (Style: {STYLE_DATE_POST_TITLE }): Processing posts in default API order (newest first).")
|
logger (f" Manga Mode (Style: {STYLE_DATE_POST_TITLE }): Processing posts in default API order (newest first).")
|
||||||
|
|
||||||
@@ -756,10 +754,10 @@ class PostProcessorSignals (QObject ):
|
|||||||
file_download_status_signal =pyqtSignal (bool )
|
file_download_status_signal =pyqtSignal (bool )
|
||||||
external_link_signal =pyqtSignal (str ,str ,str ,str ,str )
|
external_link_signal =pyqtSignal (str ,str ,str ,str ,str )
|
||||||
file_progress_signal =pyqtSignal (str ,object )
|
file_progress_signal =pyqtSignal (str ,object )
|
||||||
file_successfully_downloaded_signal = pyqtSignal(dict) # New signal for successfully downloaded files
|
file_successfully_downloaded_signal =pyqtSignal (dict )
|
||||||
missed_character_post_signal =pyqtSignal (str ,str )
|
missed_character_post_signal =pyqtSignal (str ,str )
|
||||||
class PostProcessorWorker :
|
class PostProcessorWorker :
|
||||||
# ... (other __init__ arguments)
|
|
||||||
def __init__ (self ,post_data ,download_root ,known_names ,
|
def __init__ (self ,post_data ,download_root ,known_names ,
|
||||||
filter_character_list ,emitter ,
|
filter_character_list ,emitter ,
|
||||||
unwanted_keywords ,filter_mode ,skip_zip ,skip_rar ,
|
unwanted_keywords ,filter_mode ,skip_zip ,skip_rar ,
|
||||||
@@ -867,7 +865,7 @@ class PostProcessorWorker :
|
|||||||
post_title ="",file_index_in_post =0 ,num_files_in_this_post =1 ,
|
post_title ="",file_index_in_post =0 ,num_files_in_this_post =1 ,
|
||||||
manga_date_file_counter_ref =None ):
|
manga_date_file_counter_ref =None ):
|
||||||
was_original_name_kept_flag =False
|
was_original_name_kept_flag =False
|
||||||
# manga_global_file_counter_ref =None # This was a duplicate definition, removed
|
|
||||||
final_filename_saved_for_return =""
|
final_filename_saved_for_return =""
|
||||||
def _get_current_character_filters (self ):
|
def _get_current_character_filters (self ):
|
||||||
if self .dynamic_filter_holder :
|
if self .dynamic_filter_holder :
|
||||||
@@ -877,7 +875,7 @@ class PostProcessorWorker :
|
|||||||
post_title ="",file_index_in_post =0 ,num_files_in_this_post =1 ,
|
post_title ="",file_index_in_post =0 ,num_files_in_this_post =1 ,
|
||||||
manga_date_file_counter_ref =None ,
|
manga_date_file_counter_ref =None ,
|
||||||
forced_filename_override =None ,
|
forced_filename_override =None ,
|
||||||
manga_global_file_counter_ref =None, folder_context_name_for_history=None ): # Added folder_context_name_for_history
|
manga_global_file_counter_ref =None ,folder_context_name_for_history =None ):
|
||||||
was_original_name_kept_flag =False
|
was_original_name_kept_flag =False
|
||||||
final_filename_saved_for_return =""
|
final_filename_saved_for_return =""
|
||||||
retry_later_details =None
|
retry_later_details =None
|
||||||
@@ -982,18 +980,18 @@ class PostProcessorWorker :
|
|||||||
elif self .manga_filename_style ==STYLE_DATE_POST_TITLE :
|
elif self .manga_filename_style ==STYLE_DATE_POST_TITLE :
|
||||||
published_date_str =self .post .get ('published')
|
published_date_str =self .post .get ('published')
|
||||||
added_date_str =self .post .get ('added')
|
added_date_str =self .post .get ('added')
|
||||||
formatted_date_str = "nodate" # Default if no date found
|
formatted_date_str ="nodate"
|
||||||
|
|
||||||
if published_date_str :
|
if published_date_str :
|
||||||
try :
|
try :
|
||||||
formatted_date_str =published_date_str .split ('T')[0 ]
|
formatted_date_str =published_date_str .split ('T')[0 ]
|
||||||
except Exception: # pylint: disable=bare-except
|
except Exception :
|
||||||
self .logger (f" ⚠️ Could not parse 'published' date '{published_date_str }' for STYLE_DATE_POST_TITLE. Using 'nodate'.")
|
self .logger (f" ⚠️ Could not parse 'published' date '{published_date_str }' for STYLE_DATE_POST_TITLE. Using 'nodate'.")
|
||||||
elif added_date_str :
|
elif added_date_str :
|
||||||
try :
|
try :
|
||||||
formatted_date_str =added_date_str .split ('T')[0 ]
|
formatted_date_str =added_date_str .split ('T')[0 ]
|
||||||
self .logger (f" ⚠️ Post ID {original_post_id_for_log } missing 'published' date, using 'added' date '{added_date_str }' for STYLE_DATE_POST_TITLE naming.")
|
self .logger (f" ⚠️ Post ID {original_post_id_for_log } missing 'published' date, using 'added' date '{added_date_str }' for STYLE_DATE_POST_TITLE naming.")
|
||||||
except Exception: # pylint: disable=bare-except
|
except Exception :
|
||||||
self .logger (f" ⚠️ Could not parse 'added' date '{added_date_str }' for STYLE_DATE_POST_TITLE. Using 'nodate'.")
|
self .logger (f" ⚠️ Could not parse 'added' date '{added_date_str }' for STYLE_DATE_POST_TITLE. Using 'nodate'.")
|
||||||
else :
|
else :
|
||||||
self .logger (f" ⚠️ Post ID {original_post_id_for_log } missing both 'published' and 'added' dates for STYLE_DATE_POST_TITLE. Using 'nodate'.")
|
self .logger (f" ⚠️ Post ID {original_post_id_for_log } missing both 'published' and 'added' dates for STYLE_DATE_POST_TITLE. Using 'nodate'.")
|
||||||
@@ -1010,15 +1008,15 @@ class PostProcessorWorker :
|
|||||||
|
|
||||||
if num_files_in_this_post >1 :
|
if num_files_in_this_post >1 :
|
||||||
filename_to_save_in_main_path =f"{base_name_for_style }_{file_index_in_post }{original_ext }"if file_index_in_post >0 else f"{base_name_for_style }{original_ext }"
|
filename_to_save_in_main_path =f"{base_name_for_style }_{file_index_in_post }{original_ext }"if file_index_in_post >0 else f"{base_name_for_style }{original_ext }"
|
||||||
else: # Single file post
|
else :
|
||||||
filename_to_save_in_main_path =f"{base_name_for_style }{original_ext }"
|
filename_to_save_in_main_path =f"{base_name_for_style }{original_ext }"
|
||||||
else :
|
else :
|
||||||
self .logger (f"⚠️ Manga mode (Date+PostTitle Style): Post title missing for post {original_post_id_for_log }. Using 'post' as title part with date prefix.")
|
self .logger (f"⚠️ Manga mode (Date+PostTitle Style): Post title missing for post {original_post_id_for_log }. Using 'post' as title part with date prefix.")
|
||||||
cleaned_post_title_for_filename = "post" # Fallback title part
|
cleaned_post_title_for_filename ="post"
|
||||||
base_name_for_style =f"{formatted_date_str }_{cleaned_post_title_for_filename }"
|
base_name_for_style =f"{formatted_date_str }_{cleaned_post_title_for_filename }"
|
||||||
if num_files_in_this_post >1 :
|
if num_files_in_this_post >1 :
|
||||||
filename_to_save_in_main_path =f"{base_name_for_style }_{file_index_in_post }{original_ext }"if file_index_in_post >0 else f"{base_name_for_style }{original_ext }"
|
filename_to_save_in_main_path =f"{base_name_for_style }_{file_index_in_post }{original_ext }"if file_index_in_post >0 else f"{base_name_for_style }{original_ext }"
|
||||||
else: # Single file post
|
else :
|
||||||
filename_to_save_in_main_path =f"{base_name_for_style }{original_ext }"
|
filename_to_save_in_main_path =f"{base_name_for_style }{original_ext }"
|
||||||
self .logger (f"⚠️ Manga mode (Title+GlobalNum Style Fallback): Using cleaned original filename '{filename_to_save_in_main_path }' for post {original_post_id_for_log }.")
|
self .logger (f"⚠️ Manga mode (Title+GlobalNum Style Fallback): Using cleaned original filename '{filename_to_save_in_main_path }' for post {original_post_id_for_log }.")
|
||||||
else :
|
else :
|
||||||
@@ -1394,18 +1392,18 @@ class PostProcessorWorker :
|
|||||||
final_filename_saved_for_return =final_filename_on_disk
|
final_filename_saved_for_return =final_filename_on_disk
|
||||||
self .logger (f"✅ Saved: '{final_filename_saved_for_return }' (from '{api_original_filename }', {downloaded_size_bytes /(1024 *1024 ):.2f} MB) in '{os .path .basename (effective_save_folder )}'")
|
self .logger (f"✅ Saved: '{final_filename_saved_for_return }' (from '{api_original_filename }', {downloaded_size_bytes /(1024 *1024 ):.2f} MB) in '{os .path .basename (effective_save_folder )}'")
|
||||||
|
|
||||||
# Emit signal for successfully downloaded file
|
|
||||||
downloaded_file_details ={
|
downloaded_file_details ={
|
||||||
'disk_filename':final_filename_saved_for_return ,
|
'disk_filename':final_filename_saved_for_return ,
|
||||||
'post_title':post_title ,
|
'post_title':post_title ,
|
||||||
'post_id':original_post_id_for_log ,
|
'post_id':original_post_id_for_log ,
|
||||||
'upload_date_str':self .post .get ('published')or self .post .get ('added')or "N/A",
|
'upload_date_str':self .post .get ('published')or self .post .get ('added')or "N/A",
|
||||||
'download_timestamp': time.time(), # Will be recorded by main app
|
'download_timestamp':time .time (),
|
||||||
'download_path': effective_save_folder, # The folder it was saved into
|
'download_path':effective_save_folder ,
|
||||||
'service':self .service ,
|
'service':self .service ,
|
||||||
'user_id':self .user_id ,
|
'user_id':self .user_id ,
|
||||||
'api_original_filename':api_original_filename ,
|
'api_original_filename':api_original_filename ,
|
||||||
'folder_context_name': folder_context_name_for_history or os.path.basename(effective_save_folder) # Best effort context name
|
'folder_context_name':folder_context_name_for_history or os .path .basename (effective_save_folder )
|
||||||
}
|
}
|
||||||
self ._emit_signal ('file_successfully_downloaded',downloaded_file_details )
|
self ._emit_signal ('file_successfully_downloaded',downloaded_file_details )
|
||||||
time .sleep (0.05 )
|
time .sleep (0.05 )
|
||||||
@@ -1438,7 +1436,7 @@ class PostProcessorWorker :
|
|||||||
parsed_api_url =urlparse (self .api_url_input )
|
parsed_api_url =urlparse (self .api_url_input )
|
||||||
referer_url =f"https://{parsed_api_url .netloc }/"
|
referer_url =f"https://{parsed_api_url .netloc }/"
|
||||||
headers ={'User-Agent':'Mozilla/5.0','Referer':referer_url ,'Accept':'*/*'}
|
headers ={'User-Agent':'Mozilla/5.0','Referer':referer_url ,'Accept':'*/*'}
|
||||||
link_pattern =re .compile (r"""<a\s+.*?href=["'](https?://[^"']+)["'][^>]*>(.*?)</a>""", # type: ignore
|
link_pattern =re .compile (r"""<a\s+.*?href=["'](https?://[^"']+)["'][^>]*>(.*?)</a>""",
|
||||||
re .IGNORECASE |re .DOTALL )
|
re .IGNORECASE |re .DOTALL )
|
||||||
post_data =self .post
|
post_data =self .post
|
||||||
post_title =post_data .get ('title','')or 'untitled_post'
|
post_title =post_data .get ('title','')or 'untitled_post'
|
||||||
@@ -1461,17 +1459,17 @@ class PostProcessorWorker :
|
|||||||
post_is_candidate_by_file_char_match_in_comment_scope =False
|
post_is_candidate_by_file_char_match_in_comment_scope =False
|
||||||
char_filter_that_matched_file_in_comment_scope =None
|
char_filter_that_matched_file_in_comment_scope =None
|
||||||
char_filter_that_matched_comment =None
|
char_filter_that_matched_comment =None
|
||||||
if current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH ): # type: ignore
|
if current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH ):
|
||||||
if self ._check_pause (f"Character title filter for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
|
if self ._check_pause (f"Character title filter for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||||
for idx ,filter_item_obj in enumerate (current_character_filters ):
|
for idx ,filter_item_obj in enumerate (current_character_filters ):
|
||||||
if self .check_cancel ():break
|
if self .check_cancel ():break
|
||||||
terms_to_check_for_title =list (filter_item_obj ["aliases"]) # type: ignore
|
terms_to_check_for_title =list (filter_item_obj ["aliases"])
|
||||||
if filter_item_obj ["is_group"]:
|
if filter_item_obj ["is_group"]:
|
||||||
if filter_item_obj ["name"]not in terms_to_check_for_title : # type: ignore
|
if filter_item_obj ["name"]not in terms_to_check_for_title :
|
||||||
terms_to_check_for_title .append (filter_item_obj ["name"]) # type: ignore
|
terms_to_check_for_title .append (filter_item_obj ["name"])
|
||||||
unique_terms_for_title_check =list (set (terms_to_check_for_title ))
|
unique_terms_for_title_check =list (set (terms_to_check_for_title ))
|
||||||
for term_to_match in unique_terms_for_title_check :
|
for term_to_match in unique_terms_for_title_check :
|
||||||
match_found_for_term =is_title_match_for_character (post_title ,term_to_match ) # type: ignore
|
match_found_for_term =is_title_match_for_character (post_title ,term_to_match )
|
||||||
if match_found_for_term :
|
if match_found_for_term :
|
||||||
post_is_candidate_by_title_char_match =True
|
post_is_candidate_by_title_char_match =True
|
||||||
char_filter_that_matched_title =filter_item_obj
|
char_filter_that_matched_title =filter_item_obj
|
||||||
@@ -1499,12 +1497,12 @@ class PostProcessorWorker :
|
|||||||
current_api_original_filename_for_check =file_info_item .get ('_original_name_for_log')
|
current_api_original_filename_for_check =file_info_item .get ('_original_name_for_log')
|
||||||
if not current_api_original_filename_for_check :continue
|
if not current_api_original_filename_for_check :continue
|
||||||
for filter_item_obj in current_character_filters :
|
for filter_item_obj in current_character_filters :
|
||||||
terms_to_check =list (filter_item_obj ["aliases"]) # type: ignore
|
terms_to_check =list (filter_item_obj ["aliases"])
|
||||||
if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check : # type: ignore
|
if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check :
|
||||||
terms_to_check .append (filter_item_obj ["name"]) # type: ignore
|
terms_to_check .append (filter_item_obj ["name"])
|
||||||
for term_to_match in terms_to_check :
|
for term_to_match in terms_to_check :
|
||||||
if is_filename_match_for_character (current_api_original_filename_for_check ,term_to_match ):
|
if is_filename_match_for_character (current_api_original_filename_for_check ,term_to_match ):
|
||||||
post_is_candidate_by_file_char_match_in_comment_scope =True # type: ignore
|
post_is_candidate_by_file_char_match_in_comment_scope =True
|
||||||
char_filter_that_matched_file_in_comment_scope =filter_item_obj
|
char_filter_that_matched_file_in_comment_scope =filter_item_obj
|
||||||
self .logger (f" Match Found (File in Comments Scope): File '{current_api_original_filename_for_check }' matches char filter term '{term_to_match }' (from group/name '{filter_item_obj ['name']}'). Post is candidate.")
|
self .logger (f" Match Found (File in Comments Scope): File '{current_api_original_filename_for_check }' matches char filter term '{term_to_match }' (from group/name '{filter_item_obj ['name']}'). Post is candidate.")
|
||||||
break
|
break
|
||||||
@@ -1535,11 +1533,11 @@ class PostProcessorWorker :
|
|||||||
raw_comment_content =comment_item .get ('content','')
|
raw_comment_content =comment_item .get ('content','')
|
||||||
if not raw_comment_content :continue
|
if not raw_comment_content :continue
|
||||||
cleaned_comment_text =strip_html_tags (raw_comment_content )
|
cleaned_comment_text =strip_html_tags (raw_comment_content )
|
||||||
if not cleaned_comment_text .strip ():continue # type: ignore
|
if not cleaned_comment_text .strip ():continue
|
||||||
for filter_item_obj in current_character_filters :
|
for filter_item_obj in current_character_filters :
|
||||||
terms_to_check_comment =list (filter_item_obj ["aliases"]) # type: ignore
|
terms_to_check_comment =list (filter_item_obj ["aliases"])
|
||||||
if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_comment : # type: ignore
|
if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_comment :
|
||||||
terms_to_check_comment .append (filter_item_obj ["name"]) # type: ignore
|
terms_to_check_comment .append (filter_item_obj ["name"])
|
||||||
for term_to_match_comment in terms_to_check_comment :
|
for term_to_match_comment in terms_to_check_comment :
|
||||||
if is_title_match_for_character (cleaned_comment_text ,term_to_match_comment ):
|
if is_title_match_for_character (cleaned_comment_text ,term_to_match_comment ):
|
||||||
post_is_candidate_by_comment_char_match =True
|
post_is_candidate_by_comment_char_match =True
|
||||||
@@ -1561,12 +1559,12 @@ class PostProcessorWorker :
|
|||||||
if current_character_filters :
|
if current_character_filters :
|
||||||
if self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match :
|
if self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match :
|
||||||
self .logger (f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title [:50 ]}' does not match character filters.")
|
self .logger (f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title [:50 ]}' does not match character filters.")
|
||||||
self ._emit_signal ('missed_character_post',post_title ,"No title match for character filter") # type: ignore
|
self ._emit_signal ('missed_character_post',post_title ,"No title match for character filter")
|
||||||
return 0 ,num_potential_files_in_post ,[],[],[],None
|
return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||||
if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match :
|
if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match :
|
||||||
self .logger (f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id }', Title '{post_title [:50 ]}...'")
|
self .logger (f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id }', Title '{post_title [:50 ]}...'")
|
||||||
if self .emitter and hasattr (self .emitter ,'missed_character_post_signal'):
|
if self .emitter and hasattr (self .emitter ,'missed_character_post_signal'):
|
||||||
self ._emit_signal ('missed_character_post',post_title ,"No character match in files or comments (Comments scope)") # type: ignore
|
self ._emit_signal ('missed_character_post',post_title ,"No character match in files or comments (Comments scope)")
|
||||||
return 0 ,num_potential_files_in_post ,[],[],[],None
|
return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||||
if self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH ):
|
if self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH ):
|
||||||
if self ._check_pause (f"Skip words (post title) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
|
if self ._check_pause (f"Skip words (post title) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||||
@@ -1577,7 +1575,7 @@ class PostProcessorWorker :
|
|||||||
return 0 ,num_potential_files_in_post ,[],[],[],None
|
return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||||
if not self .extract_links_only and self .manga_mode_active and current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and not post_is_candidate_by_title_char_match :
|
if not self .extract_links_only and self .manga_mode_active and current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and not post_is_candidate_by_title_char_match :
|
||||||
self .logger (f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title [:50 ]}' doesn't match filters.")
|
self .logger (f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title [:50 ]}' doesn't match filters.")
|
||||||
self ._emit_signal ('missed_character_post',post_title ,"Manga Mode: No title match for character filter (Title/Both scope)") # type: ignore
|
self ._emit_signal ('missed_character_post',post_title ,"Manga Mode: No title match for character filter (Title/Both scope)")
|
||||||
return 0 ,num_potential_files_in_post ,[],[],[],None
|
return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||||
if not isinstance (post_attachments ,list ):
|
if not isinstance (post_attachments ,list ):
|
||||||
self .logger (f"⚠️ Corrupt attachment data for post {post_id } (expected list, got {type (post_attachments )}). Skipping attachments.")
|
self .logger (f"⚠️ Corrupt attachment data for post {post_id } (expected list, got {type (post_attachments )}). Skipping attachments.")
|
||||||
@@ -1587,7 +1585,7 @@ class PostProcessorWorker :
|
|||||||
if not self .extract_links_only and self .use_subfolders :
|
if not self .extract_links_only and self .use_subfolders :
|
||||||
if self ._check_pause (f"Subfolder determination for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
|
if self ._check_pause (f"Subfolder determination for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||||
primary_char_filter_for_folder =None
|
primary_char_filter_for_folder =None
|
||||||
log_reason_for_folder ="" # type: ignore
|
log_reason_for_folder =""
|
||||||
if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment :
|
if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment :
|
||||||
if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope :
|
if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope :
|
||||||
primary_char_filter_for_folder =char_filter_that_matched_file_in_comment_scope
|
primary_char_filter_for_folder =char_filter_that_matched_file_in_comment_scope
|
||||||
@@ -1598,10 +1596,10 @@ class PostProcessorWorker :
|
|||||||
elif (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and char_filter_that_matched_title :
|
elif (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and char_filter_that_matched_title :
|
||||||
primary_char_filter_for_folder =char_filter_that_matched_title
|
primary_char_filter_for_folder =char_filter_that_matched_title
|
||||||
log_reason_for_folder ="Matched char filter in title"
|
log_reason_for_folder ="Matched char filter in title"
|
||||||
if primary_char_filter_for_folder : # type: ignore
|
if primary_char_filter_for_folder :
|
||||||
base_folder_names_for_post_content =[clean_folder_name (primary_char_filter_for_folder ["name"])] # type: ignore
|
base_folder_names_for_post_content =[clean_folder_name (primary_char_filter_for_folder ["name"])]
|
||||||
cleaned_primary_folder_name =clean_folder_name (primary_char_filter_for_folder ["name"]) # type: ignore
|
cleaned_primary_folder_name =clean_folder_name (primary_char_filter_for_folder ["name"])
|
||||||
if cleaned_primary_folder_name .lower ()in effective_unwanted_keywords_for_folder_naming and cleaned_primary_folder_name .lower ()!="untitled_folder": # type: ignore
|
if cleaned_primary_folder_name .lower ()in effective_unwanted_keywords_for_folder_naming and cleaned_primary_folder_name .lower ()!="untitled_folder":
|
||||||
self .logger (f" ⚠️ Primary char filter folder name '{cleaned_primary_folder_name }' is in ignore list. Using generic name.")
|
self .logger (f" ⚠️ Primary char filter folder name '{cleaned_primary_folder_name }' is in ignore list. Using generic name.")
|
||||||
base_folder_names_for_post_content =["Generic Post Content"]
|
base_folder_names_for_post_content =["Generic Post Content"]
|
||||||
else :
|
else :
|
||||||
@@ -1616,7 +1614,7 @@ class PostProcessorWorker :
|
|||||||
)
|
)
|
||||||
|
|
||||||
valid_derived_folders_from_title_known_txt =[
|
valid_derived_folders_from_title_known_txt =[
|
||||||
name for name in derived_folders_from_title_via_known_txt # type: ignore
|
name for name in derived_folders_from_title_via_known_txt
|
||||||
if name and name .strip ()and name .lower ()!="untitled_folder"
|
if name and name .strip ()and name .lower ()!="untitled_folder"
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -1633,7 +1631,7 @@ class PostProcessorWorker :
|
|||||||
FOLDER_NAME_STOP_WORDS
|
FOLDER_NAME_STOP_WORDS
|
||||||
)
|
)
|
||||||
|
|
||||||
title_is_only_creator_ignored_words =False # type: ignore
|
title_is_only_creator_ignored_words =False
|
||||||
if candidate_name_from_title_basic_clean and candidate_name_from_title_basic_clean .lower ()!="untitled_folder"and self .creator_download_folder_ignore_words :
|
if candidate_name_from_title_basic_clean and candidate_name_from_title_basic_clean .lower ()!="untitled_folder"and self .creator_download_folder_ignore_words :
|
||||||
|
|
||||||
candidate_title_words ={word .lower ()for word in candidate_name_from_title_basic_clean .split ()}
|
candidate_title_words ={word .lower ()for word in candidate_name_from_title_basic_clean .split ()}
|
||||||
@@ -1684,7 +1682,7 @@ class PostProcessorWorker :
|
|||||||
if not base_folder_names_for_post_content :
|
if not base_folder_names_for_post_content :
|
||||||
final_fallback_name =clean_folder_name (post_title if post_title and post_title .strip ()else "Generic Post Content")
|
final_fallback_name =clean_folder_name (post_title if post_title and post_title .strip ()else "Generic Post Content")
|
||||||
base_folder_names_for_post_content =[final_fallback_name ]
|
base_folder_names_for_post_content =[final_fallback_name ]
|
||||||
self .logger (f" Ultimate fallback folder name: {final_fallback_name }") # type: ignore
|
self .logger (f" Ultimate fallback folder name: {final_fallback_name }")
|
||||||
|
|
||||||
if base_folder_names_for_post_content :
|
if base_folder_names_for_post_content :
|
||||||
determined_post_save_path_for_history =os .path .join (determined_post_save_path_for_history ,base_folder_names_for_post_content [0 ])
|
determined_post_save_path_for_history =os .path .join (determined_post_save_path_for_history ,base_folder_names_for_post_content [0 ])
|
||||||
@@ -1698,17 +1696,17 @@ class PostProcessorWorker :
|
|||||||
for folder_name_to_check in base_folder_names_for_post_content :
|
for folder_name_to_check in base_folder_names_for_post_content :
|
||||||
if not folder_name_to_check :continue
|
if not folder_name_to_check :continue
|
||||||
if any (skip_word .lower ()in folder_name_to_check .lower ()for skip_word in self .skip_words_list ):
|
if any (skip_word .lower ()in folder_name_to_check .lower ()for skip_word in self .skip_words_list ):
|
||||||
matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word") # type: ignore
|
matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word")
|
||||||
self .logger (f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.") # type: ignore
|
self .logger (f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.")
|
||||||
return 0 ,num_potential_files_in_post ,[],[],[],None
|
return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||||
if (self .show_external_links or self .extract_links_only )and post_content_html : # type: ignore
|
if (self .show_external_links or self .extract_links_only )and post_content_html :
|
||||||
if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
|
if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||||
try :
|
try :
|
||||||
mega_key_pattern =re .compile (r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b')
|
mega_key_pattern =re .compile (r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b')
|
||||||
unique_links_data ={}
|
unique_links_data ={}
|
||||||
for match in link_pattern .finditer (post_content_html ):
|
for match in link_pattern .finditer (post_content_html ):
|
||||||
link_url =match .group (1 ).strip ()
|
link_url =match .group (1 ).strip ()
|
||||||
link_url =html .unescape (link_url ) # type: ignore
|
link_url =html .unescape (link_url )
|
||||||
link_inner_text =match .group (2 )
|
link_inner_text =match .group (2 )
|
||||||
if not any (ext in link_url .lower ()for ext in ['.css','.js','.ico','.xml','.svg'])and not link_url .startswith ('javascript:')and link_url not in unique_links_data :
|
if not any (ext in link_url .lower ()for ext in ['.css','.js','.ico','.xml','.svg'])and not link_url .startswith ('javascript:')and link_url not in unique_links_data :
|
||||||
clean_link_text =re .sub (r'<.*?>','',link_inner_text )
|
clean_link_text =re .sub (r'<.*?>','',link_inner_text )
|
||||||
@@ -1737,7 +1735,7 @@ class PostProcessorWorker :
|
|||||||
decryption_key_found =key_match_in_content .group (1 )
|
decryption_key_found =key_match_in_content .group (1 )
|
||||||
if platform not in scraped_platforms :
|
if platform not in scraped_platforms :
|
||||||
self ._emit_signal ('external_link',post_title ,link_text ,link_url ,platform ,decryption_key_found or "")
|
self ._emit_signal ('external_link',post_title ,link_text ,link_url ,platform ,decryption_key_found or "")
|
||||||
links_emitted_count +=1 # type: ignore
|
links_emitted_count +=1
|
||||||
if links_emitted_count >0 :self .logger (f" 🔗 Found {links_emitted_count } potential external link(s) in post content.")
|
if links_emitted_count >0 :self .logger (f" 🔗 Found {links_emitted_count } potential external link(s) in post content.")
|
||||||
except Exception as e :self .logger (f"⚠️ Error parsing post content for links: {e }\n{traceback .format_exc (limit =2 )}")
|
except Exception as e :self .logger (f"⚠️ Error parsing post content for links: {e }\n{traceback .format_exc (limit =2 )}")
|
||||||
if self .extract_links_only :
|
if self .extract_links_only :
|
||||||
@@ -1841,9 +1839,9 @@ class PostProcessorWorker :
|
|||||||
name =file_api_info .get ('_original_name_for_log','').lower ()
|
name =file_api_info .get ('_original_name_for_log','').lower ()
|
||||||
return [int (text )if text .isdigit ()else text for text in re .split ('([0-9]+)',name )]
|
return [int (text )if text .isdigit ()else text for text in re .split ('([0-9]+)',name )]
|
||||||
all_files_from_post_api .sort (key =natural_sort_key_for_files )
|
all_files_from_post_api .sort (key =natural_sort_key_for_files )
|
||||||
self .logger (f" Manga Date Mode: Sorted {len (all_files_from_post_api )} files within post {post_id } by original name for sequential numbering.") # type: ignore
|
self .logger (f" Manga Date Mode: Sorted {len (all_files_from_post_api )} files within post {post_id } by original name for sequential numbering.")
|
||||||
if not all_files_from_post_api :
|
if not all_files_from_post_api :
|
||||||
self .logger (f" No files found to download for post {post_id }.") # type: ignore
|
self .logger (f" No files found to download for post {post_id }.")
|
||||||
return 0 ,0 ,[],[],[],None
|
return 0 ,0 ,[],[],[],None
|
||||||
files_to_download_info_list =[]
|
files_to_download_info_list =[]
|
||||||
processed_original_filenames_in_this_post =set ()
|
processed_original_filenames_in_this_post =set ()
|
||||||
@@ -1873,10 +1871,10 @@ class PostProcessorWorker :
|
|||||||
file_is_candidate_by_char_filter_scope =True
|
file_is_candidate_by_char_filter_scope =True
|
||||||
else :
|
else :
|
||||||
if self .char_filter_scope ==CHAR_SCOPE_FILES :
|
if self .char_filter_scope ==CHAR_SCOPE_FILES :
|
||||||
for filter_item_obj in current_character_filters : # type: ignore
|
for filter_item_obj in current_character_filters :
|
||||||
terms_to_check_for_file =list (filter_item_obj ["aliases"]) # type: ignore
|
terms_to_check_for_file =list (filter_item_obj ["aliases"])
|
||||||
if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_for_file : # type: ignore
|
if filter_item_obj ["is_group"]and filter_item_obj ["name"]not in terms_to_check_for_file :
|
||||||
terms_to_check_for_file .append (filter_item_obj ["name"]) # type: ignore
|
terms_to_check_for_file .append (filter_item_obj ["name"])
|
||||||
unique_terms_for_file_check =list (set (terms_to_check_for_file ))
|
unique_terms_for_file_check =list (set (terms_to_check_for_file ))
|
||||||
for term_to_match in unique_terms_for_file_check :
|
for term_to_match in unique_terms_for_file_check :
|
||||||
if is_filename_match_for_character (current_api_original_filename ,term_to_match ):
|
if is_filename_match_for_character (current_api_original_filename ,term_to_match ):
|
||||||
@@ -1896,10 +1894,10 @@ class PostProcessorWorker :
|
|||||||
char_filter_info_that_matched_file =char_filter_that_matched_title
|
char_filter_info_that_matched_file =char_filter_that_matched_title
|
||||||
self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Both (Title part).")
|
self .logger (f" File '{current_api_original_filename }' is candidate because post title matched. Scope: Both (Title part).")
|
||||||
else :
|
else :
|
||||||
for filter_item_obj_both_file in current_character_filters : # type: ignore
|
for filter_item_obj_both_file in current_character_filters :
|
||||||
terms_to_check_for_file_both =list (filter_item_obj_both_file ["aliases"]) # type: ignore
|
terms_to_check_for_file_both =list (filter_item_obj_both_file ["aliases"])
|
||||||
if filter_item_obj_both_file ["is_group"]and filter_item_obj_both_file ["name"]not in terms_to_check_for_file_both : # type: ignore
|
if filter_item_obj_both_file ["is_group"]and filter_item_obj_both_file ["name"]not in terms_to_check_for_file_both :
|
||||||
terms_to_check_for_file_both .append (filter_item_obj_both_file ["name"]) # type: ignore
|
terms_to_check_for_file_both .append (filter_item_obj_both_file ["name"])
|
||||||
unique_terms_for_file_both_check =list (set (terms_to_check_for_file_both ))
|
unique_terms_for_file_both_check =list (set (terms_to_check_for_file_both ))
|
||||||
for term_to_match in unique_terms_for_file_both_check :
|
for term_to_match in unique_terms_for_file_both_check :
|
||||||
if is_filename_match_for_character (current_api_original_filename ,term_to_match ):
|
if is_filename_match_for_character (current_api_original_filename ,term_to_match ):
|
||||||
@@ -1929,13 +1927,13 @@ class PostProcessorWorker :
|
|||||||
char_title_subfolder_name =None
|
char_title_subfolder_name =None
|
||||||
if self .target_post_id_from_initial_url and self .custom_folder_name :
|
if self .target_post_id_from_initial_url and self .custom_folder_name :
|
||||||
char_title_subfolder_name =self .custom_folder_name
|
char_title_subfolder_name =self .custom_folder_name
|
||||||
elif char_filter_info_that_matched_file : # type: ignore
|
elif char_filter_info_that_matched_file :
|
||||||
char_title_subfolder_name =clean_folder_name (char_filter_info_that_matched_file ["name"]) # type: ignore
|
char_title_subfolder_name =clean_folder_name (char_filter_info_that_matched_file ["name"])
|
||||||
elif char_filter_that_matched_title : # type: ignore
|
elif char_filter_that_matched_title :
|
||||||
char_title_subfolder_name =clean_folder_name (char_filter_that_matched_title ["name"]) # type: ignore
|
char_title_subfolder_name =clean_folder_name (char_filter_that_matched_title ["name"])
|
||||||
elif char_filter_that_matched_comment : # type: ignore
|
elif char_filter_that_matched_comment :
|
||||||
char_title_subfolder_name =clean_folder_name (char_filter_that_matched_comment ["name"]) # type: ignore
|
char_title_subfolder_name =clean_folder_name (char_filter_that_matched_comment ["name"])
|
||||||
if char_title_subfolder_name : # type: ignore
|
if char_title_subfolder_name :
|
||||||
target_base_folders_for_this_file_iteration .append (char_title_subfolder_name )
|
target_base_folders_for_this_file_iteration .append (char_title_subfolder_name )
|
||||||
else :
|
else :
|
||||||
self .logger (f"⚠️ File '{current_api_original_filename }' candidate by char filter, but no folder name derived. Using post title.")
|
self .logger (f"⚠️ File '{current_api_original_filename }' candidate by char filter, but no folder name derived. Using post title.")
|
||||||
@@ -1960,7 +1958,7 @@ class PostProcessorWorker :
|
|||||||
manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None
|
manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None
|
||||||
manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None
|
manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None
|
||||||
|
|
||||||
# Pass the determined folder name for history context
|
|
||||||
folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title )
|
folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title )
|
||||||
|
|
||||||
futures_list .append (file_pool .submit (
|
futures_list .append (file_pool .submit (
|
||||||
@@ -1997,10 +1995,10 @@ class PostProcessorWorker :
|
|||||||
total_skipped_this_post +=1
|
total_skipped_this_post +=1
|
||||||
self ._emit_signal ('file_progress',"",None )
|
self ._emit_signal ('file_progress',"",None )
|
||||||
|
|
||||||
# --- History Data Collection ---
|
|
||||||
# This part is added to collect data for the history feature.
|
|
||||||
# It's placed after the file processing loop for the post.
|
|
||||||
if not self.extract_links_only and (total_downloaded_this_post > 0 or not ( # Condition: if not extract_links_only AND (files were downloaded OR post wasn't skipped at very start by title/char filter)
|
if not self .extract_links_only and (total_downloaded_this_post >0 or not (
|
||||||
(current_character_filters and (
|
(current_character_filters and (
|
||||||
(self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or
|
(self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or
|
||||||
(self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match )
|
(self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match )
|
||||||
@@ -2016,9 +2014,9 @@ class PostProcessorWorker :
|
|||||||
history_data_for_this_post ={
|
history_data_for_this_post ={
|
||||||
'post_title':post_title ,'post_id':post_id ,
|
'post_title':post_title ,'post_id':post_id ,
|
||||||
'top_file_name':top_file_name_for_history ,
|
'top_file_name':top_file_name_for_history ,
|
||||||
'num_files': num_potential_files_in_post, # Already calculated
|
'num_files':num_potential_files_in_post ,
|
||||||
'upload_date_str':post_data .get ('published')or post_data .get ('added')or "Unknown",
|
'upload_date_str':post_data .get ('published')or post_data .get ('added')or "Unknown",
|
||||||
'download_location': determined_post_save_path_for_history, # Calculated earlier
|
'download_location':determined_post_save_path_for_history ,
|
||||||
'service':self .service ,'user_id':self .user_id ,
|
'service':self .service ,'user_id':self .user_id ,
|
||||||
}
|
}
|
||||||
if self .check_cancel ():self .logger (f" Post {post_id } processing interrupted/cancelled.");
|
if self .check_cancel ():self .logger (f" Post {post_id } processing interrupted/cancelled.");
|
||||||
@@ -2030,12 +2028,12 @@ class DownloadThread (QThread ):
|
|||||||
file_download_status_signal =pyqtSignal (bool )
|
file_download_status_signal =pyqtSignal (bool )
|
||||||
finished_signal =pyqtSignal (int ,int ,bool ,list )
|
finished_signal =pyqtSignal (int ,int ,bool ,list )
|
||||||
external_link_signal =pyqtSignal (str ,str ,str ,str ,str )
|
external_link_signal =pyqtSignal (str ,str ,str ,str ,str )
|
||||||
file_successfully_downloaded_signal = pyqtSignal(dict) # Relay from worker
|
file_successfully_downloaded_signal =pyqtSignal (dict )
|
||||||
file_progress_signal =pyqtSignal (str ,object )
|
file_progress_signal =pyqtSignal (str ,object )
|
||||||
retryable_file_failed_signal =pyqtSignal (list )
|
retryable_file_failed_signal =pyqtSignal (list )
|
||||||
missed_character_post_signal =pyqtSignal (str ,str )
|
missed_character_post_signal =pyqtSignal (str ,str )
|
||||||
post_processed_for_history_signal = pyqtSignal(dict) # New signal for history data
|
post_processed_for_history_signal =pyqtSignal (dict )
|
||||||
final_history_entries_signal = pyqtSignal(list) # New signal for the final 3 history entries
|
final_history_entries_signal =pyqtSignal (list )
|
||||||
permanent_file_failed_signal =pyqtSignal (list )
|
permanent_file_failed_signal =pyqtSignal (list )
|
||||||
def __init__ (self ,api_url_input ,output_dir ,known_names_copy ,
|
def __init__ (self ,api_url_input ,output_dir ,known_names_copy ,
|
||||||
cancellation_event ,
|
cancellation_event ,
|
||||||
@@ -2118,7 +2116,7 @@ class DownloadThread (QThread ):
|
|||||||
self .scan_content_for_images =scan_content_for_images
|
self .scan_content_for_images =scan_content_for_images
|
||||||
self .creator_download_folder_ignore_words =creator_download_folder_ignore_words
|
self .creator_download_folder_ignore_words =creator_download_folder_ignore_words
|
||||||
self .manga_global_file_counter_ref =manga_global_file_counter_ref
|
self .manga_global_file_counter_ref =manga_global_file_counter_ref
|
||||||
self.history_candidates_buffer = deque(maxlen=8) # Buffer for the first 8 posts
|
self .history_candidates_buffer =deque (maxlen =8 )
|
||||||
if self .compress_images and Image is None :
|
if self .compress_images and Image is None :
|
||||||
self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
|
self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
|
||||||
self .compress_images =False
|
self .compress_images =False
|
||||||
@@ -2184,10 +2182,10 @@ class DownloadThread (QThread ):
|
|||||||
worker_signals_obj .file_progress_signal .connect (self .file_progress_signal )
|
worker_signals_obj .file_progress_signal .connect (self .file_progress_signal )
|
||||||
worker_signals_obj .external_link_signal .connect (self .external_link_signal )
|
worker_signals_obj .external_link_signal .connect (self .external_link_signal )
|
||||||
worker_signals_obj .missed_character_post_signal .connect (self .missed_character_post_signal )
|
worker_signals_obj .missed_character_post_signal .connect (self .missed_character_post_signal )
|
||||||
worker_signals_obj.file_successfully_downloaded_signal.connect(self.file_successfully_downloaded_signal) # Connect new signal
|
worker_signals_obj .file_successfully_downloaded_signal .connect (self .file_successfully_downloaded_signal )
|
||||||
self .logger (" Starting post fetch (single-threaded download process)...")
|
self .logger (" Starting post fetch (single-threaded download process)...")
|
||||||
post_generator =download_from_api (
|
post_generator =download_from_api (
|
||||||
self .api_url_input , # type: ignore
|
self .api_url_input ,
|
||||||
logger =self .logger ,
|
logger =self .logger ,
|
||||||
start_page =self .start_page ,
|
start_page =self .start_page ,
|
||||||
end_page =self .end_page ,
|
end_page =self .end_page ,
|
||||||
@@ -2257,9 +2255,9 @@ class DownloadThread (QThread ):
|
|||||||
grand_list_of_kept_original_filenames .extend (kept_originals_this_post )
|
grand_list_of_kept_original_filenames .extend (kept_originals_this_post )
|
||||||
if retryable_failures :
|
if retryable_failures :
|
||||||
self .retryable_file_failed_signal .emit (retryable_failures )
|
self .retryable_file_failed_signal .emit (retryable_failures )
|
||||||
if history_data: # New: Handle history data from worker
|
if history_data :
|
||||||
if len (self .history_candidates_buffer )<8 :
|
if len (self .history_candidates_buffer )<8 :
|
||||||
self.post_processed_for_history_signal.emit(history_data) # Emit for App to handle
|
self .post_processed_for_history_signal .emit (history_data )
|
||||||
if permanent_failures :
|
if permanent_failures :
|
||||||
self .permanent_file_failed_signal .emit (permanent_failures )
|
self .permanent_file_failed_signal .emit (permanent_failures )
|
||||||
except Exception as proc_err :
|
except Exception as proc_err :
|
||||||
@@ -2276,8 +2274,8 @@ class DownloadThread (QThread ):
|
|||||||
if not was_process_cancelled and not self .isInterruptionRequested ():
|
if not was_process_cancelled and not self .isInterruptionRequested ():
|
||||||
self .logger ("✅ All posts processed or end of content reached by DownloadThread.")
|
self .logger ("✅ All posts processed or end of content reached by DownloadThread.")
|
||||||
|
|
||||||
# Process history candidates at the end of the thread's run
|
|
||||||
# This part is now handled by DownloaderApp for both single and multi-thread
|
|
||||||
|
|
||||||
except Exception as main_thread_err :
|
except Exception as main_thread_err :
|
||||||
self .logger (f"\n❌ Critical error within DownloadThread run loop: {main_thread_err }")
|
self .logger (f"\n❌ Critical error within DownloadThread run loop: {main_thread_err }")
|
||||||
@@ -2291,7 +2289,7 @@ class DownloadThread (QThread ):
|
|||||||
worker_signals_obj .external_link_signal .disconnect (self .external_link_signal )
|
worker_signals_obj .external_link_signal .disconnect (self .external_link_signal )
|
||||||
worker_signals_obj .file_progress_signal .disconnect (self .file_progress_signal )
|
worker_signals_obj .file_progress_signal .disconnect (self .file_progress_signal )
|
||||||
worker_signals_obj .missed_character_post_signal .disconnect (self .missed_character_post_signal )
|
worker_signals_obj .missed_character_post_signal .disconnect (self .missed_character_post_signal )
|
||||||
worker_signals_obj.file_successfully_downloaded_signal.disconnect(self.file_successfully_downloaded_signal) # Disconnect new signal
|
worker_signals_obj .file_successfully_downloaded_signal .disconnect (self .file_successfully_downloaded_signal )
|
||||||
|
|
||||||
except (TypeError ,RuntimeError )as e :
|
except (TypeError ,RuntimeError )as e :
|
||||||
self .logger (f"ℹ️ Note during DownloadThread signal disconnection: {e }")
|
self .logger (f"ℹ️ Note during DownloadThread signal disconnection: {e }")
|
||||||
|
|||||||
Reference in New Issue
Block a user