16 Commits

Author SHA1 Message Date
Yuvi9587
65c5d2798e Update features.md 2025-06-25 12:24:35 +05:30
Yuvi9587
c23f18be6d main.py 2025-06-24 18:15:58 +01:00
Yuvi9587
69ddc2ca08 Commit 2025-06-24 16:31:28 +01:00
Yuvi9587
191dbc8c62 Commit 2025-06-19 08:25:30 +01:00
Yuvi9587
3c1b361fc1 Update main.py 2025-06-16 11:28:28 +01:00
Yuvi9587
953dbaebf0 Commit 2025-06-16 10:46:23 +01:00
Yuvi9587
efd5458493 Update main.py 2025-06-16 08:13:01 +01:00
Yuvi9587
3473f6540d Commit 2025-06-15 09:49:09 +01:00
Yuvi9587
7fe5f4b83e Commit 2025-06-14 11:40:44 +01:00
Yuvi9587
072b582622 Update languages.py 2025-06-14 11:32:29 +01:00
Yuvi9587
de936e8d96 Update languages.py 2025-06-14 16:04:41 +05:30
Yuvi9587
9d0f0dda23 Commit 2025-06-14 03:42:26 +01:00
Yuvi9587
222ec769db Commit 2025-06-12 09:13:06 +01:00
Yuvi9587
6771ede722 Commit 2025-06-11 16:39:02 +01:00
Yuvi9587
8199b79dc7 Update main.py 2025-06-11 14:28:26 +01:00
Yuvi9587
dfca265380 Update main.py 2025-06-11 04:03:38 +01:00
9 changed files with 4969 additions and 3507 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 168 KiB

After

Width:  |  Height:  |  Size: 82 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 126 KiB

After

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 139 KiB

After

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 130 KiB

After

Width:  |  Height:  |  Size: 90 KiB

View File

@@ -3,11 +3,13 @@ import time
import requests
import re
import threading
import json
import queue
import hashlib
import http .client
import traceback
from concurrent .futures import ThreadPoolExecutor ,Future ,CancelledError ,as_completed
from collections import deque
import html
from PyQt5 .QtCore import QObject ,pyqtSignal ,QThread ,QMutex ,QMutexLocker
from urllib .parse import urlparse
@@ -41,6 +43,7 @@ from io import BytesIO
STYLE_POST_TITLE ="post_title"
STYLE_ORIGINAL_NAME ="original_name"
STYLE_DATE_BASED ="date_based"
STYLE_DATE_POST_TITLE ="date_post_title"
MANGA_DATE_PREFIX_DEFAULT =""
STYLE_POST_TITLE_GLOBAL_NUMBERING ="post_title_global_numbering"
SKIP_SCOPE_FILES ="files"
@@ -454,16 +457,43 @@ def fetch_posts_paginated (api_url_base ,headers ,offset ,logger ,cancellation_e
time .sleep (0.5 )
logger (" Post fetching resumed.")
paginated_url =f'{api_url_base }?o={offset }'
logger (f" Fetching: {paginated_url } (Page approx. {offset //50 +1 })")
max_retries =3
retry_delay =5
for attempt in range (max_retries +1 ):
if cancellation_event and cancellation_event .is_set ():
raise RuntimeError ("Fetch operation cancelled by user during retry loop.")
log_message =f" Fetching: {paginated_url } (Page approx. {offset //50 +1 })"
if attempt >0 :
log_message +=f" (Attempt {attempt +1 }/{max_retries +1 })"
logger (log_message )
try :
response =requests .get (paginated_url ,headers =headers ,timeout =(10 ,60 ),cookies =cookies_dict )
response =requests .get (paginated_url ,headers =headers ,timeout =(15 ,90 ),cookies =cookies_dict )
response .raise_for_status ()
if 'application/json'not in response .headers .get ('Content-Type','').lower ():
logger (f"⚠️ Unexpected content type from API: {response .headers .get ('Content-Type')}. Body: {response .text [:200 ]}")
return []
return response .json ()
except requests .exceptions .Timeout :
raise RuntimeError (f"Timeout fetching offset {offset } from {paginated_url }")
except (requests .exceptions .Timeout ,requests .exceptions .ConnectionError )as e :
logger (f" ⚠️ Retryable network error on page fetch (Attempt {attempt +1 }): {e }")
if attempt <max_retries :
delay =retry_delay *(2 **attempt )
logger (f" Retrying in {delay } seconds...")
sleep_start =time .time ()
while time .time ()-sleep_start <delay :
if cancellation_event and cancellation_event .is_set ():
raise RuntimeError ("Fetch operation cancelled by user during retry delay.")
time .sleep (0.1 )
continue
else :
logger (f" ❌ Failed to fetch page after {max_retries +1 } attempts.")
raise RuntimeError (f"Timeout or connection error fetching offset {offset } from {paginated_url }")
except requests .exceptions .RequestException as e :
err_msg =f"Error fetching offset {offset } from {paginated_url }: {e }"
if e .response is not None :
@@ -475,6 +505,8 @@ def fetch_posts_paginated (api_url_base ,headers ,offset ,logger ,cancellation_e
raise RuntimeError (f"Error decoding JSON from offset {offset } ({paginated_url }): {e }. Response text: {response .text [:200 ]}")
except Exception as e :
raise RuntimeError (f"Unexpected error fetching offset {offset } ({paginated_url }): {e }")
raise RuntimeError (f"Failed to fetch page {paginated_url } after all attempts.")
def fetch_post_comments (api_domain ,service ,user_id ,post_id ,headers ,logger ,cancellation_event =None ,pause_event =None ,cookies_dict =None ):
if cancellation_event and cancellation_event .is_set ():
logger (" Comment fetch cancelled before request.")
@@ -488,16 +520,43 @@ def fetch_post_comments (api_domain ,service ,user_id ,post_id ,headers ,logger
time .sleep (0.5 )
logger (" Comment fetching resumed.")
comments_api_url =f"https://{api_domain }/api/v1/{service }/user/{user_id }/post/{post_id }/comments"
logger (f" Fetching comments: {comments_api_url }")
max_retries =2
retry_delay =3
for attempt in range (max_retries +1 ):
if cancellation_event and cancellation_event .is_set ():
raise RuntimeError ("Comment fetch operation cancelled by user during retry loop.")
log_message =f" Fetching comments: {comments_api_url }"
if attempt >0 :
log_message +=f" (Attempt {attempt +1 }/{max_retries +1 })"
logger (log_message )
try :
response =requests .get (comments_api_url ,headers =headers ,timeout =(10 ,30 ),cookies =cookies_dict )
response .raise_for_status ()
if 'application/json'not in response .headers .get ('Content-Type','').lower ():
logger (f"⚠️ Unexpected content type from comments API: {response .headers .get ('Content-Type')}. Body: {response .text [:200 ]}")
return []
return response .json ()
except requests .exceptions .Timeout :
raise RuntimeError (f"Timeout fetching comments for post {post_id } from {comments_api_url }")
except (requests .exceptions .Timeout ,requests .exceptions .ConnectionError )as e :
logger (f" ⚠️ Retryable network error on comment fetch (Attempt {attempt +1 }): {e }")
if attempt <max_retries :
delay =retry_delay *(2 **attempt )
logger (f" Retrying in {delay } seconds...")
sleep_start =time .time ()
while time .time ()-sleep_start <delay :
if cancellation_event and cancellation_event .is_set ():
raise RuntimeError ("Comment fetch operation cancelled by user during retry delay.")
time .sleep (0.1 )
continue
else :
logger (f" ❌ Failed to fetch comments for post {post_id } after {max_retries +1 } attempts.")
raise RuntimeError (f"Timeout or connection error fetching comments for post {post_id } from {comments_api_url }")
except requests .exceptions .RequestException as e :
err_msg =f"Error fetching comments for post {post_id } from {comments_api_url }: {e }"
if e .response is not None :
@@ -509,13 +568,33 @@ def fetch_post_comments (api_domain ,service ,user_id ,post_id ,headers ,logger
raise RuntimeError (f"Error decoding JSON from comments API for post {post_id } ({comments_api_url }): {e }. Response text: {response .text [:200 ]}")
except Exception as e :
raise RuntimeError (f"Unexpected error fetching comments for post {post_id } ({comments_api_url }): {e }")
def download_from_api (api_url_input ,logger =print ,start_page =None ,end_page =None ,manga_mode =False ,
cancellation_event =None ,pause_event =None ,use_cookie =False ,cookie_text ="",selected_cookie_file =None ,app_base_dir =None ):
headers ={'User-Agent':'Mozilla/5.0','Accept':'application/json'}
raise RuntimeError (f"Failed to fetch comments for post {post_id } after all attempts.")
def download_from_api (
api_url_input ,
logger =print ,
start_page =None ,
end_page =None ,
manga_mode =False ,
cancellation_event =None ,
pause_event =None ,
use_cookie =False ,
cookie_text ="",
selected_cookie_file =None ,
app_base_dir =None ,
manga_filename_style_for_sort_check =None
):
headers ={
'User-Agent':'Mozilla/5.0',
'Accept':'application/json'
}
service ,user_id ,target_post_id =extract_post_info (api_url_input )
if cancellation_event and cancellation_event .is_set ():
logger (" Download_from_api cancelled at start.")
return
parsed_input_url_for_domain =urlparse (api_url_input )
api_domain =parsed_input_url_for_domain .netloc
if not any (d in api_domain .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']):
@@ -552,11 +631,12 @@ cancellation_event =None ,pause_event =None ,use_cookie =False ,cookie_text ="",
return
if target_post_id and (start_page or end_page ):
logger ("⚠️ Page range (start/end page) is ignored when a specific post URL is provided (searching all pages for the post).")
is_creator_feed_for_manga =manga_mode and not target_post_id
is_manga_mode_fetch_all_and_sort_oldest_first =manga_mode and (manga_filename_style_for_sort_check !=STYLE_DATE_POST_TITLE )and not target_post_id
api_base_url =f"https://{api_domain }/api/v1/{service }/user/{user_id }"
page_size =50
if is_creator_feed_for_manga :
logger (" Manga Mode: Fetching posts to sort by date (oldest processed first)...")
if is_manga_mode_fetch_all_and_sort_oldest_first :
logger (f" Manga Mode (Style: {manga_filename_style_for_sort_check if manga_filename_style_for_sort_check else 'Default'} - Oldest First Sort Active): Fetching all posts to sort by date...")
all_posts_for_manga_mode =[]
current_offset_manga =0
if start_page and start_page >1 :
@@ -635,6 +715,12 @@ cancellation_event =None ,pause_event =None ,use_cookie =False ,cookie_text ="",
break
yield all_posts_for_manga_mode [i :i +page_size ]
return
if manga_mode and not target_post_id and (manga_filename_style_for_sort_check ==STYLE_DATE_POST_TITLE ):
logger (f" Manga Mode (Style: {STYLE_DATE_POST_TITLE }): Processing posts in default API order (newest first).")
current_page_num =1
current_offset =0
processed_target_post_flag =False
@@ -727,8 +813,10 @@ class PostProcessorSignals (QObject ):
file_download_status_signal =pyqtSignal (bool )
external_link_signal =pyqtSignal (str ,str ,str ,str ,str )
file_progress_signal =pyqtSignal (str ,object )
file_successfully_downloaded_signal =pyqtSignal (dict )
missed_character_post_signal =pyqtSignal (str ,str )
class PostProcessorWorker :
def __init__ (self ,post_data ,download_root ,known_names ,
filter_character_list ,emitter ,
unwanted_keywords ,filter_mode ,skip_zip ,skip_rar ,
@@ -756,6 +844,8 @@ class PostProcessorWorker :
scan_content_for_images =False ,
creator_download_folder_ignore_words =None ,
manga_global_file_counter_ref =None ,
session_file_path=None,
session_lock=None,
):
self .post =post_data
self .download_root =download_root
@@ -805,6 +895,8 @@ class PostProcessorWorker :
self .override_output_dir =override_output_dir
self .scan_content_for_images =scan_content_for_images
self .creator_download_folder_ignore_words =creator_download_folder_ignore_words
self.session_file_path = session_file_path
self.session_lock = session_lock
if self .compress_images and Image is None :
self .logger ("⚠️ Image compression disabled: Pillow library not found.")
@@ -836,7 +928,7 @@ class PostProcessorWorker :
post_title ="",file_index_in_post =0 ,num_files_in_this_post =1 ,
manga_date_file_counter_ref =None ):
was_original_name_kept_flag =False
manga_global_file_counter_ref =None
final_filename_saved_for_return =""
def _get_current_character_filters (self ):
if self .dynamic_filter_holder :
@@ -846,7 +938,7 @@ class PostProcessorWorker :
post_title ="",file_index_in_post =0 ,num_files_in_this_post =1 ,
manga_date_file_counter_ref =None ,
forced_filename_override =None ,
manga_global_file_counter_ref =None ):
manga_global_file_counter_ref =None ,folder_context_name_for_history =None ):
was_original_name_kept_flag =False
final_filename_saved_for_return =""
retry_later_details =None
@@ -948,6 +1040,48 @@ class PostProcessorWorker :
self .logger (f"⚠️ Manga Title+GlobalNum Mode: Counter ref not provided or malformed for '{api_original_filename }'. Using original. Ref: {manga_global_file_counter_ref }")
filename_to_save_in_main_path =cleaned_original_api_filename
self .logger (f"⚠️ Manga mode (Title+GlobalNum Style Fallback): Using cleaned original filename '{filename_to_save_in_main_path }' for post {original_post_id_for_log }.")
elif self .manga_filename_style ==STYLE_DATE_POST_TITLE :
published_date_str =self .post .get ('published')
added_date_str =self .post .get ('added')
formatted_date_str ="nodate"
if published_date_str :
try :
formatted_date_str =published_date_str .split ('T')[0 ]
except Exception :
self .logger (f" ⚠️ Could not parse 'published' date '{published_date_str }' for STYLE_DATE_POST_TITLE. Using 'nodate'.")
elif added_date_str :
try :
formatted_date_str =added_date_str .split ('T')[0 ]
self .logger (f" ⚠️ Post ID {original_post_id_for_log } missing 'published' date, using 'added' date '{added_date_str }' for STYLE_DATE_POST_TITLE naming.")
except Exception :
self .logger (f" ⚠️ Could not parse 'added' date '{added_date_str }' for STYLE_DATE_POST_TITLE. Using 'nodate'.")
else :
self .logger (f" ⚠️ Post ID {original_post_id_for_log } missing both 'published' and 'added' dates for STYLE_DATE_POST_TITLE. Using 'nodate'.")
if post_title and post_title .strip ():
temp_cleaned_title =clean_filename (post_title .strip ())
if not temp_cleaned_title or temp_cleaned_title .startswith ("untitled_file"):
self .logger (f"⚠️ Manga mode (Date+PostTitle Style): Post title for post {original_post_id_for_log } ('{post_title }') was empty or generic after cleaning. Using 'post' as title part.")
cleaned_post_title_for_filename ="post"
else :
cleaned_post_title_for_filename =temp_cleaned_title
base_name_for_style =f"{formatted_date_str }_{cleaned_post_title_for_filename }"
if num_files_in_this_post >1 :
filename_to_save_in_main_path =f"{base_name_for_style }_{file_index_in_post }{original_ext }"if file_index_in_post >0 else f"{base_name_for_style }{original_ext }"
else :
filename_to_save_in_main_path =f"{base_name_for_style }{original_ext }"
else :
self .logger (f"⚠️ Manga mode (Date+PostTitle Style): Post title missing for post {original_post_id_for_log }. Using 'post' as title part with date prefix.")
cleaned_post_title_for_filename ="post"
base_name_for_style =f"{formatted_date_str }_{cleaned_post_title_for_filename }"
if num_files_in_this_post >1 :
filename_to_save_in_main_path =f"{base_name_for_style }_{file_index_in_post }{original_ext }"if file_index_in_post >0 else f"{base_name_for_style }{original_ext }"
else :
filename_to_save_in_main_path =f"{base_name_for_style }{original_ext }"
self .logger (f"⚠️ Manga mode (Title+GlobalNum Style Fallback): Using cleaned original filename '{filename_to_save_in_main_path }' for post {original_post_id_for_log }.")
else :
self .logger (f"⚠️ Manga mode: Unknown filename style '{self .manga_filename_style }'. Defaulting to original filename for '{api_original_filename }'.")
filename_to_save_in_main_path =cleaned_original_api_filename
@@ -1320,7 +1454,23 @@ class PostProcessorWorker :
with self .downloaded_files_lock :self .downloaded_files .add (filename_to_save_in_main_path )
final_filename_saved_for_return =final_filename_on_disk
self .logger (f"✅ Saved: '{final_filename_saved_for_return }' (from '{api_original_filename }', {downloaded_size_bytes /(1024 *1024 ):.2f} MB) in '{os .path .basename (effective_save_folder )}'")
downloaded_file_details ={
'disk_filename':final_filename_saved_for_return ,
'post_title':post_title ,
'post_id':original_post_id_for_log ,
'upload_date_str':self .post .get ('published')or self .post .get ('added')or "N/A",
'download_timestamp':time .time (),
'download_path':effective_save_folder ,
'service':self .service ,
'user_id':self .user_id ,
'api_original_filename':api_original_filename ,
'folder_context_name':folder_context_name_for_history or os .path .basename (effective_save_folder )
}
self ._emit_signal ('file_successfully_downloaded',downloaded_file_details )
time .sleep (0.05 )
return 1 ,0 ,final_filename_saved_for_return ,was_original_name_kept_flag ,FILE_DOWNLOAD_STATUS_SUCCESS ,None
except Exception as save_err :
self .logger (f"->>Save Fail for '{final_filename_on_disk }': {save_err }")
@@ -1336,14 +1486,16 @@ class PostProcessorWorker :
def process (self ):
if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[]
if self .check_cancel ():return 0 ,0 ,[],[],[]
if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None
if self .check_cancel ():return 0 ,0 ,[],[],[],None
current_character_filters =self ._get_current_character_filters ()
kept_original_filenames_for_log =[]
retryable_failures_this_post =[]
permanent_failures_this_post =[]
total_downloaded_this_post =0
total_skipped_this_post =0
history_data_for_this_post =None
parsed_api_url =urlparse (self .api_url_input )
referer_url =f"https://{parsed_api_url .netloc }/"
headers ={'User-Agent':'Mozilla/5.0','Referer':referer_url ,'Accept':'*/*'}
@@ -1371,7 +1523,7 @@ class PostProcessorWorker :
char_filter_that_matched_file_in_comment_scope =None
char_filter_that_matched_comment =None
if current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH ):
if self ._check_pause (f"Character title filter for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[]
if self ._check_pause (f"Character title filter for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
for idx ,filter_item_obj in enumerate (current_character_filters ):
if self .check_cancel ():break
terms_to_check_for_title =list (filter_item_obj ["aliases"])
@@ -1402,7 +1554,7 @@ class PostProcessorWorker :
all_files_from_post_api_for_char_check .append ({'_original_name_for_log':original_api_att_name })
if current_character_filters and self .char_filter_scope ==CHAR_SCOPE_COMMENTS :
self .logger (f" [Char Scope: Comments] Phase 1: Checking post files for matches before comments for post ID '{post_id }'.")
if self ._check_pause (f"File check (comments scope) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[]
if self ._check_pause (f"File check (comments scope) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
for file_info_item in all_files_from_post_api_for_char_check :
if self .check_cancel ():break
current_api_original_filename_for_check =file_info_item .get ('_original_name_for_log')
@@ -1422,7 +1574,7 @@ class PostProcessorWorker :
self .logger (f" [Char Scope: Comments] Phase 1 Result: post_is_candidate_by_file_char_match_in_comment_scope = {post_is_candidate_by_file_char_match_in_comment_scope }")
if current_character_filters and self .char_filter_scope ==CHAR_SCOPE_COMMENTS :
if not post_is_candidate_by_file_char_match_in_comment_scope :
if self ._check_pause (f"Comment check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[]
if self ._check_pause (f"Comment check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
self .logger (f" [Char Scope: Comments] Phase 2: No file match found. Checking post comments for post ID '{post_id }'.")
try :
parsed_input_url_for_comments =urlparse (self .api_url_input )
@@ -1471,29 +1623,30 @@ class PostProcessorWorker :
if self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match :
self .logger (f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title [:50 ]}' does not match character filters.")
self ._emit_signal ('missed_character_post',post_title ,"No title match for character filter")
return 0 ,num_potential_files_in_post ,[],[],[]
return 0 ,num_potential_files_in_post ,[],[],[],None
if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match :
self .logger (f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id }', Title '{post_title [:50 ]}...'")
if self .emitter and hasattr (self .emitter ,'missed_character_post_signal'):
self ._emit_signal ('missed_character_post',post_title ,"No character match in files or comments (Comments scope)")
return 0 ,num_potential_files_in_post ,[],[],[]
return 0 ,num_potential_files_in_post ,[],[],[],None
if self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH ):
if self ._check_pause (f"Skip words (post title) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[]
if self ._check_pause (f"Skip words (post title) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
post_title_lower =post_title .lower ()
for skip_word in self .skip_words_list :
if skip_word .lower ()in post_title_lower :
self .logger (f" -> Skip Post (Keyword in Title '{skip_word }'): '{post_title [:50 ]}...'. Scope: {self .skip_words_scope }")
return 0 ,num_potential_files_in_post ,[],[],[]
return 0 ,num_potential_files_in_post ,[],[],[],None
if not self .extract_links_only and self .manga_mode_active and current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and not post_is_candidate_by_title_char_match :
self .logger (f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title [:50 ]}' doesn't match filters.")
self ._emit_signal ('missed_character_post',post_title ,"Manga Mode: No title match for character filter (Title/Both scope)")
return 0 ,num_potential_files_in_post ,[],[],[]
return 0 ,num_potential_files_in_post ,[],[],[],None
if not isinstance (post_attachments ,list ):
self .logger (f"⚠️ Corrupt attachment data for post {post_id } (expected list, got {type (post_attachments )}). Skipping attachments.")
post_attachments =[]
base_folder_names_for_post_content =[]
determined_post_save_path_for_history =self .override_output_dir if self .override_output_dir else self .download_root
if not self .extract_links_only and self .use_subfolders :
if self ._check_pause (f"Subfolder determination for post {post_id }"):return 0 ,num_potential_files_in_post ,[]
if self ._check_pause (f"Subfolder determination for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
primary_char_filter_for_folder =None
log_reason_for_folder =""
if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment :
@@ -1593,16 +1746,65 @@ class PostProcessorWorker :
final_fallback_name =clean_folder_name (post_title if post_title and post_title .strip ()else "Generic Post Content")
base_folder_names_for_post_content =[final_fallback_name ]
self .logger (f" Ultimate fallback folder name: {final_fallback_name }")
if base_folder_names_for_post_content :
determined_post_save_path_for_history =os .path .join (determined_post_save_path_for_history ,base_folder_names_for_post_content [0 ])
if not self .extract_links_only and self .use_post_subfolders :
cleaned_post_title_for_sub =clean_folder_name (post_title )
post_id_for_fallback =self .post .get ('id','unknown_id')
if not cleaned_post_title_for_sub or cleaned_post_title_for_sub =="untitled_folder":
self .logger (f" ⚠️ Post title '{post_title }' resulted in a generic subfolder name. Using 'post_{post_id_for_fallback }' as base.")
original_cleaned_post_title_for_sub =f"post_{post_id_for_fallback }"
else :
original_cleaned_post_title_for_sub =cleaned_post_title_for_sub
base_path_for_post_subfolder =determined_post_save_path_for_history
suffix_counter =0
final_post_subfolder_name =""
while True :
if suffix_counter ==0 :
name_candidate =original_cleaned_post_title_for_sub
else :
name_candidate =f"{original_cleaned_post_title_for_sub }_{suffix_counter }"
potential_post_subfolder_path =os .path .join (base_path_for_post_subfolder ,name_candidate )
try :
os .makedirs (potential_post_subfolder_path ,exist_ok =False )
final_post_subfolder_name =name_candidate
if suffix_counter >0 :
self .logger (f" Post subfolder name conflict: Using '{final_post_subfolder_name }' instead of '{original_cleaned_post_title_for_sub }' to avoid mixing posts.")
break
except FileExistsError :
suffix_counter +=1
if suffix_counter >100 :
self .logger (f" ⚠️ Exceeded 100 attempts to find unique subfolder name for '{original_cleaned_post_title_for_sub }'. Using UUID.")
final_post_subfolder_name =f"{original_cleaned_post_title_for_sub }_{uuid .uuid4 ().hex [:8 ]}"
os .makedirs (os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name ),exist_ok =True )
break
except OSError as e_mkdir :
self .logger (f" ❌ Error creating directory '{potential_post_subfolder_path }': {e_mkdir }. Files for this post might be saved in parent or fail.")
final_post_subfolder_name =original_cleaned_post_title_for_sub
break
determined_post_save_path_for_history =os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name )
if not self .extract_links_only and self .use_subfolders and self .skip_words_list :
if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[]
if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
for folder_name_to_check in base_folder_names_for_post_content :
if not folder_name_to_check :continue
if any (skip_word .lower ()in folder_name_to_check .lower ()for skip_word in self .skip_words_list ):
matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word")
self .logger (f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.")
return 0 ,num_potential_files_in_post ,[],[],[]
return 0 ,num_potential_files_in_post ,[],[],[],None
if (self .show_external_links or self .extract_links_only )and post_content_html :
if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[]
if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
try :
mega_key_pattern =re .compile (r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b')
unique_links_data ={}
@@ -1642,7 +1844,7 @@ class PostProcessorWorker :
except Exception as e :self .logger (f"⚠️ Error parsing post content for links: {e }\n{traceback .format_exc (limit =2 )}")
if self .extract_links_only :
self .logger (f" Extract Links Only mode: Finished processing post {post_id } for links.")
return 0 ,0 ,[],[],[]
return 0 ,0 ,[],[],[],None
all_files_from_post_api =[]
api_file_domain =urlparse (self .api_url_input ).netloc
if not api_file_domain or not any (d in api_file_domain .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']):
@@ -1729,13 +1931,13 @@ class PostProcessorWorker :
all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_from_content_scan')]
if not all_files_from_post_api :
self .logger (f" -> No images found via content scan for post {post_id } in this combined mode.")
return 0 ,0 ,[],[],[]
return 0 ,0 ,[],[],[],None
else :
self .logger (f" Mode: 'Download Thumbnails Only' active. Filtering for API thumbnails for post {post_id }.")
all_files_from_post_api =[finfo for finfo in all_files_from_post_api if finfo .get ('_is_thumbnail')]
if not all_files_from_post_api :
self .logger (f" -> No API image thumbnails found for post {post_id } in thumbnail-only mode.")
return 0 ,0 ,[],[],[]
return 0 ,0 ,[],[],[],None
if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED :
def natural_sort_key_for_files (file_api_info ):
name =file_api_info .get ('_original_name_for_log','').lower ()
@@ -1744,7 +1946,7 @@ class PostProcessorWorker :
self .logger (f" Manga Date Mode: Sorted {len (all_files_from_post_api )} files within post {post_id } by original name for sequential numbering.")
if not all_files_from_post_api :
self .logger (f" No files found to download for post {post_id }.")
return 0 ,0 ,[],[],[]
return 0 ,0 ,[],[],[],None
files_to_download_info_list =[]
processed_original_filenames_in_this_post =set ()
for file_info in all_files_from_post_api :
@@ -1758,7 +1960,7 @@ class PostProcessorWorker :
processed_original_filenames_in_this_post .add (current_api_original_filename )
if not files_to_download_info_list :
self .logger (f" All files for post {post_id } were duplicate original names or skipped earlier.")
return 0 ,total_skipped_this_post ,[],[],[]
return 0 ,total_skipped_this_post ,[],[],[],None
self .logger (f" Identified {len (files_to_download_info_list )} unique original file(s) for potential download from post {post_id }.")
with ThreadPoolExecutor (max_workers =self .num_file_threads ,thread_name_prefix =f'P{post_id }File_')as file_pool :
@@ -1854,19 +2056,22 @@ class PostProcessorWorker :
if self .use_subfolders and target_base_folder_name_for_instance :
current_path_for_file_instance =os .path .join (current_path_for_file_instance ,target_base_folder_name_for_instance )
if self .use_post_subfolders :
cleaned_title_for_subfolder_instance =clean_folder_name (post_title )
current_path_for_file_instance =os .path .join (current_path_for_file_instance ,cleaned_title_for_subfolder_instance )
current_path_for_file_instance =os .path .join (current_path_for_file_instance ,final_post_subfolder_name )
manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None
manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None
folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title )
futures_list .append (file_pool .submit (
self ._download_single_file ,
file_info =file_info_to_dl ,
target_folder_path =current_path_for_file_instance ,
headers =headers ,original_post_id_for_log =post_id ,skip_event =self .skip_current_file_flag ,
post_title =post_title ,manga_date_file_counter_ref =manga_date_counter_to_pass ,
manga_global_file_counter_ref =manga_global_counter_to_pass ,
manga_global_file_counter_ref =manga_global_counter_to_pass ,folder_context_name_for_history =folder_context_for_file ,
file_index_in_post =file_idx ,num_files_in_this_post =len (files_to_download_info_list )
))
@@ -1893,18 +2098,85 @@ class PostProcessorWorker :
self .logger (f"❌ File download task for post {post_id } resulted in error: {exc_f }")
total_skipped_this_post +=1
self ._emit_signal ('file_progress',"",None )
# After a post's files are all processed, update the session file to mark this post as done.
if self.session_file_path and self.session_lock:
try:
with self.session_lock:
if os.path.exists(self.session_file_path): # Only update if the session file exists
# Read current state
with open(self.session_file_path, 'r', encoding='utf-8') as f:
session_data = json.load(f)
# Modify in memory
if not isinstance(session_data.get('download_state', {}).get('processed_post_ids'), list):
if 'download_state' not in session_data:
session_data['download_state'] = {}
session_data['download_state']['processed_post_ids'] = []
session_data['download_state']['processed_post_ids'].append(self.post.get('id'))
# Write to temp file and then atomically replace
temp_file_path = self.session_file_path + ".tmp"
with open(temp_file_path, 'w', encoding='utf-8') as f_tmp:
json.dump(session_data, f_tmp, indent=2)
os.replace(temp_file_path, self.session_file_path)
except Exception as e:
self.logger(f"⚠️ Could not update session file for post {post_id}: {e}")
if not self .extract_links_only and (total_downloaded_this_post >0 or not (
(current_character_filters and (
(self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or
(self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match )
))or
(self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH )and any (sw .lower ()in post_title .lower ()for sw in self .skip_words_list ))
)):
top_file_name_for_history ="N/A"
if post_main_file_info and post_main_file_info .get ('name'):
top_file_name_for_history =post_main_file_info ['name']
elif post_attachments and post_attachments [0 ].get ('name'):
top_file_name_for_history =post_attachments [0 ]['name']
history_data_for_this_post ={
'post_title':post_title ,'post_id':post_id ,
'top_file_name':top_file_name_for_history ,
'num_files':num_potential_files_in_post ,
'upload_date_str':post_data .get ('published')or post_data .get ('added')or "Unknown",
'download_location':determined_post_save_path_for_history ,
'service':self .service ,'user_id':self .user_id ,
}
if self .check_cancel ():self .logger (f" Post {post_id } processing interrupted/cancelled.");
else :self .logger (f" Post {post_id } Summary: Downloaded={total_downloaded_this_post }, Skipped Files={total_skipped_this_post }")
return total_downloaded_this_post ,total_skipped_this_post ,kept_original_filenames_for_log ,retryable_failures_this_post ,permanent_failures_this_post
if not self .extract_links_only and self .use_post_subfolders and total_downloaded_this_post ==0 :
path_to_check_for_emptiness =determined_post_save_path_for_history
try :
if os .path .isdir (path_to_check_for_emptiness )and not os .listdir (path_to_check_for_emptiness ):
self .logger (f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness }'")
os .rmdir (path_to_check_for_emptiness )
except OSError as e_rmdir :
self .logger (f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }")
return total_downloaded_this_post ,total_skipped_this_post ,kept_original_filenames_for_log ,retryable_failures_this_post ,permanent_failures_this_post ,history_data_for_this_post
class DownloadThread (QThread ):
progress_signal =pyqtSignal (str )
add_character_prompt_signal =pyqtSignal (str )
file_download_status_signal =pyqtSignal (bool )
finished_signal =pyqtSignal (int ,int ,bool ,list )
external_link_signal =pyqtSignal (str ,str ,str ,str ,str )
file_successfully_downloaded_signal =pyqtSignal (dict )
file_progress_signal =pyqtSignal (str ,object )
retryable_file_failed_signal =pyqtSignal (list )
missed_character_post_signal =pyqtSignal (str ,str )
post_processed_for_history_signal =pyqtSignal (dict )
final_history_entries_signal =pyqtSignal (list )
permanent_file_failed_signal =pyqtSignal (list )
def __init__ (self ,api_url_input ,output_dir ,known_names_copy ,
cancellation_event ,
@@ -1937,6 +2209,8 @@ class DownloadThread (QThread ):
scan_content_for_images =False ,
creator_download_folder_ignore_words =None ,
cookie_text ="",
session_file_path=None,
session_lock=None,
):
super ().__init__ ()
self .api_url_input =api_url_input
@@ -1987,6 +2261,9 @@ class DownloadThread (QThread ):
self .scan_content_for_images =scan_content_for_images
self .creator_download_folder_ignore_words =creator_download_folder_ignore_words
self .manga_global_file_counter_ref =manga_global_file_counter_ref
self.session_file_path = session_file_path
self.session_lock = session_lock
self.history_candidates_buffer =deque (maxlen =8 )
if self .compress_images and Image is None :
self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
self .compress_images =False
@@ -2052,6 +2329,7 @@ class DownloadThread (QThread ):
worker_signals_obj .file_progress_signal .connect (self .file_progress_signal )
worker_signals_obj .external_link_signal .connect (self .external_link_signal )
worker_signals_obj .missed_character_post_signal .connect (self .missed_character_post_signal )
worker_signals_obj .file_successfully_downloaded_signal .connect (self .file_successfully_downloaded_signal )
self .logger (" Starting post fetch (single-threaded download process)...")
post_generator =download_from_api (
self .api_url_input ,
@@ -2064,7 +2342,8 @@ class DownloadThread (QThread ):
use_cookie =self .use_cookie ,
cookie_text =self .cookie_text ,
selected_cookie_file =self .selected_cookie_file ,
app_base_dir =self .app_base_dir
app_base_dir =self .app_base_dir ,
manga_filename_style_for_sort_check =self .manga_filename_style if self .manga_mode_active else None
)
for posts_batch_data in post_generator :
if self ._check_pause_self ("Post batch processing"):was_process_cancelled =True ;break
@@ -2114,15 +2393,20 @@ class DownloadThread (QThread ):
use_cookie =self .use_cookie ,
manga_date_file_counter_ref =self .manga_date_file_counter_ref ,
creator_download_folder_ignore_words =self .creator_download_folder_ignore_words ,
session_file_path=self.session_file_path,
session_lock=self.session_lock,
)
try :
dl_count ,skip_count ,kept_originals_this_post ,retryable_failures ,permanent_failures =post_processing_worker .process ()
dl_count ,skip_count ,kept_originals_this_post ,retryable_failures ,permanent_failures ,history_data =post_processing_worker .process ()
grand_total_downloaded_files +=dl_count
grand_total_skipped_files +=skip_count
if kept_originals_this_post :
grand_list_of_kept_original_filenames .extend (kept_originals_this_post )
if retryable_failures :
self .retryable_file_failed_signal .emit (retryable_failures )
if history_data :
if len (self .history_candidates_buffer )<8 :
self .post_processed_for_history_signal .emit (history_data )
if permanent_failures :
self .permanent_file_failed_signal .emit (permanent_failures )
except Exception as proc_err :
@@ -2138,6 +2422,10 @@ class DownloadThread (QThread ):
if was_process_cancelled :break
if not was_process_cancelled and not self .isInterruptionRequested ():
self .logger ("✅ All posts processed or end of content reached by DownloadThread.")
except Exception as main_thread_err :
self .logger (f"\n❌ Critical error within DownloadThread run loop: {main_thread_err }")
traceback .print_exc ()
@@ -2150,6 +2438,7 @@ class DownloadThread (QThread ):
worker_signals_obj .external_link_signal .disconnect (self .external_link_signal )
worker_signals_obj .file_progress_signal .disconnect (self .file_progress_signal )
worker_signals_obj .missed_character_post_signal .disconnect (self .missed_character_post_signal )
worker_signals_obj .file_successfully_downloaded_signal .disconnect (self .file_successfully_downloaded_signal )
except (TypeError ,RuntimeError )as e :
self .logger (f" Note during DownloadThread signal disconnection: {e }")

View File

@@ -17,7 +17,9 @@ These are the primary controls you'll interact with to initiate and manage downl
- Kemono.su (and mirrors) individual posts (e.g., `https://kemono.su/patreon/user/12345/post/98765`).
- Coomer.party (and mirrors like coomer.su) creator pages.
- Coomer.party (and mirrors) individual posts.
- **Note:** When **⭐ Favorite Mode** is active, this field is disabled and shows a "Favorite Mode active" message.
- **Note:**
- When **⭐ Favorite Mode** is active, this field is disabled and shows a "Favorite Mode active" message.
- This field can also be populated with a placeholder message (e.g., "{count} items in queue from popup") if posts are added to the download queue directly from the 'Creator Selection' dialog's 'Fetched Posts' view.
- **🎨 Creator Selection Button:**
- **Icon:** 🎨 (Artist Palette)
@@ -29,9 +31,17 @@ These are the primary controls you'll interact with to initiate and manage downl
- **Creator List:** Displays creators with their service (e.g., Patreon, Fanbox) and ID.
- **Selection:** Checkboxes to select one or more creators.
- **"Add Selected to URL" Button:** Adds the names of selected creators to the URL input field, comma-separated.
- **"Fetch Posts" Button:** After selecting creators, click this to retrieve their latest posts. This will display a new pane within the dialog showing the fetched posts.
- **"Download Scope" Radio Buttons (`Characters` / `Creators`):** Determines the folder structure for items added via this popup.
- `Characters`: Assumes creator names are character names for folder organization.
- `Creators`: Uses the actual creator names for folder organization.
- **Fetched Posts View (Right Pane - Appears after clicking 'Fetch Posts'):**
- **Posts Area Title Label:** Indicates loading status or number of fetched posts.
- **Posts Search Input:** Allows filtering the list of fetched posts by title.
- **Posts List Widget:** Displays posts fetched from the selected creators, often grouped by creator. Each post is checkable.
- **Select All / Deselect All Buttons (for Posts):** Convenience buttons for selecting/deselecting all displayed fetched posts.
- **"Add Selected Posts to Queue" Button:** Adds all checked posts from this view directly to the application's main download queue. The main URL input field will then show a message like "{count} items in queue from popup".
- **"Close" Button (for Posts View):** Hides the fetched posts view and returns to the creator selection list, allowing you to use the 'Add Selected to URL' button if preferred.
- **Page Range (Start to End) Input Fields:**
- **Purpose:** For creator URLs, specify a range of pages to fetch and process.
@@ -204,7 +214,7 @@ Controls for how downloaded content is structured into folders.
- **⤵️ Add to Filter Button:** Opens a dialog displaying all entries from `Known.txt` (with a search bar). Select one or more entries to add them to the "**🎯 Filter by Character(s)**" input field. Grouped names from `Known.txt` are added with the `~` syntax if applicable.
- **🗑️ Delete Selected Button:** Removes the currently selected name(s) from the list display and from the `Known.txt` file.
- **Open Known.txt Button:** Opens your `Known.txt` file in the system's default text editor for manual editing.
- **❓ Help Button (Known.txt):** Opens a guide or tooltip explaining the `Known.txt` feature and syntax.
- **❓ Help Button:** Opens a guide or tooltip explaining the app feature
---

File diff suppressed because one or more lines are too long

1885
main.py

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
<h1 align="center">Kemono Downloader v5.2.0</h1>
<h1 align="center">Kemono Downloader v5.3.0</h1>
<table align="center">
<tr>
@@ -80,6 +80,20 @@ Kemono Downloader offers a range of features to streamline your content download
---
## ✨ What's New in v5.3.0
- **Multi-Creator Post Fetching & Queuing:**
- The **Creator Selection popup** (🎨 icon) has been significantly enhanced.
- After selecting multiple creators, you can now click a new "**Fetch Posts**" button.
- This will retrieve and display posts from all selected creators in a new view within the popup.
- You can then browse these fetched posts (with search functionality) and select individual posts.
- A new "**Add Selected Posts to Queue**" button allows you to add your chosen posts directly to the main download queue, streamlining the process of gathering content from multiple artists.
- The traditional "**Add Selected to URL**" button is still available if you prefer to populate the main URL field with creator names.
- **Improved Favorite Download Queue Handling:**
- When items are added to the download queue from the Creator Selection popup, the main URL input field will now display a placeholder message (e.g., "{count} items in queue from popup").
- The queue is now more robustly managed, especially when interacting with the main URL input field after items have been queued from the popup.
---
## ✨ What's New in v5.1.0
- **Enhanced Error File Management**: The "Error" button now opens a dialog listing files that failed to download. This dialog includes:
- An option to **retry selected** failed downloads.