2025-07-01 22:48:58 +05:30
import os
2025-07-27 06:32:15 -07:00
import sys
2025-07-01 22:48:58 +05:30
import queue
import re
import threading
import time
import traceback
import uuid
import http
import html
2025-06-24 16:31:28 +01:00
import json
2025-07-16 09:02:47 -07:00
from collections import deque , defaultdict
2025-10-08 17:02:46 +05:30
from datetime import datetime
2025-07-01 22:48:58 +05:30
import hashlib
from concurrent . futures import ThreadPoolExecutor , as_completed , CancelledError , Future
from io import BytesIO
2025-06-06 05:14:07 +01:00
from urllib . parse import urlparse
2025-07-01 22:48:58 +05:30
import requests
2025-08-17 05:51:25 -07:00
import cloudscraper
2025-07-01 22:48:58 +05:30
try :
from PIL import Image
except ImportError :
Image = None
2025-07-13 10:22:06 -07:00
try :
from fpdf import FPDF
class PDF ( FPDF ) :
def header ( self ) :
pass # No header
def footer ( self ) :
self . set_y ( - 15 )
self . set_font ( ' Arial ' , ' I ' , 8 )
self . cell ( 0 , 10 , ' Page %s ' % self . page_no ( ) , 0 , 0 , ' C ' )
except ImportError :
FPDF = None
try :
from docx import Document
except ImportError :
Document = None
2025-07-01 22:48:58 +05:30
from PyQt5 . QtCore import Qt , QThread , pyqtSignal , QMutex , QMutexLocker , QObject , QTimer , QSettings , QStandardPaths , QCoreApplication , QUrl , QSize , QProcess
2025-08-14 09:48:55 -07:00
from . api_client import download_from_api , fetch_post_comments , fetch_single_post_data
2025-07-01 22:48:58 +05:30
from . . services . multipart_downloader import download_file_in_parts , MULTIPART_DOWNLOADER_AVAILABLE
from . . services . drive_downloader import (
download_mega_file , download_gdrive_file , download_dropbox_file
)
from . . utils . file_utils import (
is_image , is_video , is_zip , is_rar , is_archive , is_audio , KNOWN_NAMES ,
clean_filename , clean_folder_name
)
from . . utils . network_utils import prepare_cookies_for_request , get_link_platform
from . . utils . text_utils import (
is_title_match_for_character , is_filename_match_for_character , strip_html_tags ,
extract_folder_name_from_title , # This was the function causing the error
match_folders_from_title , match_folders_from_filename_enhanced
)
from . . config . constants import *
2025-06-24 16:31:28 +01:00
2025-08-01 06:33:36 -07:00
def robust_clean_name ( name ) :
""" A more robust function to remove illegal characters for filenames and folders. """
if not name :
return " "
2025-10-08 17:02:46 +05:30
illegal_chars_pattern = r ' [ \ x00- \ x1f<>: " / \\ |?* \' \ [ \ ]] '
2025-08-01 06:33:36 -07:00
cleaned_name = re . sub ( illegal_chars_pattern , ' ' , name )
2025-08-17 05:51:25 -07:00
2025-08-01 06:33:36 -07:00
cleaned_name = cleaned_name . strip ( ' . ' )
2025-08-17 05:51:25 -07:00
2025-08-01 06:33:36 -07:00
if not cleaned_name :
2025-08-17 05:51:25 -07:00
return " untitled_folder "
2025-08-01 06:33:36 -07:00
return cleaned_name
2025-06-06 05:14:07 +01:00
class PostProcessorSignals ( QObject ) :
progress_signal = pyqtSignal ( str )
file_download_status_signal = pyqtSignal ( bool )
external_link_signal = pyqtSignal ( str , str , str , str , str )
file_progress_signal = pyqtSignal ( str , object )
2025-06-16 10:46:23 +01:00
file_successfully_downloaded_signal = pyqtSignal ( dict )
2025-06-06 05:14:07 +01:00
missed_character_post_signal = pyqtSignal ( str , str )
2025-07-13 10:22:06 -07:00
worker_finished_signal = pyqtSignal ( tuple )
2025-07-01 22:48:58 +05:30
class PostProcessorWorker :
2025-07-16 09:02:47 -07:00
def __init__ ( self , post_data , download_root , known_names ,
filter_character_list , emitter ,
2025-07-18 07:54:11 -07:00
unwanted_keywords , filter_mode , skip_zip ,
2025-07-16 09:02:47 -07:00
use_subfolders , use_post_subfolders , target_post_id_from_initial_url , custom_folder_name ,
compress_images , download_thumbnails , service , user_id , pause_event ,
api_url_input , cancellation_event ,
downloaded_files , downloaded_file_hashes , downloaded_files_lock , downloaded_file_hashes_lock ,
dynamic_character_filter_holder = None , skip_words_list = None ,
skip_words_scope = SKIP_SCOPE_FILES ,
show_external_links = False ,
extract_links_only = False ,
num_file_threads = 4 , skip_current_file_flag = None ,
manga_mode_active = False ,
manga_filename_style = STYLE_POST_TITLE ,
char_filter_scope = CHAR_SCOPE_FILES ,
remove_from_filename_words_list = None ,
allow_multipart_download = True ,
cookie_text = " " ,
use_cookie = False ,
override_output_dir = None ,
selected_cookie_file = None ,
app_base_dir = None ,
manga_date_prefix = MANGA_DATE_PREFIX_DEFAULT ,
manga_date_file_counter_ref = None ,
scan_content_for_images = False ,
creator_download_folder_ignore_words = None ,
manga_global_file_counter_ref = None ,
use_date_prefix_for_subfolder = False ,
2025-10-08 17:02:46 +05:30
date_prefix_format = " YYYY-MM-DD " ,
2025-07-16 09:02:47 -07:00
keep_in_post_duplicates = False ,
keep_duplicates_mode = DUPLICATE_HANDLING_HASH ,
keep_duplicates_limit = 0 ,
downloaded_hash_counts = None ,
downloaded_hash_counts_lock = None ,
session_file_path = None ,
session_lock = None ,
text_only_scope = None ,
text_export_format = ' txt ' ,
single_pdf_mode = False ,
project_root_dir = None ,
2025-08-01 06:33:36 -07:00
processed_post_ids = None ,
multipart_scope = ' both ' ,
multipart_parts_count = 4 ,
2025-08-13 19:38:33 -07:00
multipart_min_size_mb = 100 ,
2025-10-08 17:02:46 +05:30
skip_file_size_mb = None ,
domain_override = None ,
archive_only_mode = False ,
manga_custom_filename_format = " {published} {title} " ,
manga_custom_date_format = " YYYY-MM-DD " ,
sfp_threshold = None ,
handle_unknown_mode = False ,
creator_name_cache = None ,
2025-07-16 09:02:47 -07:00
) :
self . post = post_data
self . download_root = download_root
self . known_names = known_names
self . filter_character_list_objects_initial = filter_character_list if filter_character_list else [ ]
self . dynamic_filter_holder = dynamic_character_filter_holder
self . unwanted_keywords = unwanted_keywords if unwanted_keywords is not None else set ( )
self . filter_mode = filter_mode
self . skip_zip = skip_zip
self . use_subfolders = use_subfolders
self . use_post_subfolders = use_post_subfolders
self . target_post_id_from_initial_url = target_post_id_from_initial_url
self . custom_folder_name = custom_folder_name
self . compress_images = compress_images
self . download_thumbnails = download_thumbnails
self . service = service
self . user_id = user_id
self . api_url_input = api_url_input
self . cancellation_event = cancellation_event
self . pause_event = pause_event
self . emitter = emitter
if not self . emitter :
raise ValueError ( " PostProcessorWorker requires an emitter (signals object or queue). " )
self . skip_current_file_flag = skip_current_file_flag
self . downloaded_files = downloaded_files if downloaded_files is not None else set ( )
self . downloaded_file_hashes = downloaded_file_hashes if downloaded_file_hashes is not None else set ( )
self . downloaded_files_lock = downloaded_files_lock if downloaded_files_lock is not None else threading . Lock ( )
self . downloaded_file_hashes_lock = downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading . Lock ( )
self . skip_words_list = skip_words_list if skip_words_list is not None else [ ]
self . skip_words_scope = skip_words_scope
self . show_external_links = show_external_links
self . extract_links_only = extract_links_only
self . num_file_threads = num_file_threads
self . manga_mode_active = manga_mode_active
self . manga_filename_style = manga_filename_style
self . char_filter_scope = char_filter_scope
self . remove_from_filename_words_list = remove_from_filename_words_list if remove_from_filename_words_list is not None else [ ]
self . allow_multipart_download = allow_multipart_download
self . manga_date_file_counter_ref = manga_date_file_counter_ref
self . selected_cookie_file = selected_cookie_file
self . app_base_dir = app_base_dir
self . cookie_text = cookie_text
self . manga_date_prefix = manga_date_prefix
self . manga_global_file_counter_ref = manga_global_file_counter_ref
self . use_cookie = use_cookie
self . override_output_dir = override_output_dir
self . scan_content_for_images = scan_content_for_images
self . creator_download_folder_ignore_words = creator_download_folder_ignore_words
2025-07-10 09:59:51 -07:00
self . use_date_prefix_for_subfolder = use_date_prefix_for_subfolder
2025-10-08 17:02:46 +05:30
self . date_prefix_format = date_prefix_format
2025-07-11 01:24:12 -07:00
self . keep_in_post_duplicates = keep_in_post_duplicates
2025-07-16 09:02:47 -07:00
self . keep_duplicates_mode = keep_duplicates_mode
self . keep_duplicates_limit = keep_duplicates_limit
self . downloaded_hash_counts = downloaded_hash_counts if downloaded_hash_counts is not None else defaultdict ( int )
self . downloaded_hash_counts_lock = downloaded_hash_counts_lock if downloaded_hash_counts_lock is not None else threading . Lock ( )
2025-06-24 16:31:28 +01:00
self . session_file_path = session_file_path
self . session_lock = session_lock
2025-07-13 10:22:06 -07:00
self . text_only_scope = text_only_scope
self . text_export_format = text_export_format
2025-07-16 09:02:47 -07:00
self . single_pdf_mode = single_pdf_mode
2025-07-13 10:22:06 -07:00
self . project_root_dir = project_root_dir
2025-07-15 06:54:31 -07:00
self . processed_post_ids = processed_post_ids if processed_post_ids is not None else [ ]
2025-08-01 06:33:36 -07:00
self . multipart_scope = multipart_scope
self . multipart_parts_count = multipart_parts_count
self . multipart_min_size_mb = multipart_min_size_mb
2025-10-08 17:02:46 +05:30
self . domain_override = domain_override
self . archive_only_mode = archive_only_mode
2025-08-13 19:38:33 -07:00
self . skip_file_size_mb = skip_file_size_mb
2025-10-08 17:02:46 +05:30
self . manga_custom_filename_format = manga_custom_filename_format
self . manga_custom_date_format = manga_custom_date_format
self . sfp_threshold = sfp_threshold
self . handle_unknown_mode = handle_unknown_mode
self . creator_name_cache = creator_name_cache
2025-07-16 09:02:47 -07:00
if self . compress_images and Image is None :
self . logger ( " ⚠️ Image compression disabled: Pillow library not found. " )
self . compress_images = False
2025-06-06 05:14:07 +01:00
def _emit_signal ( self , signal_type_str , * payload_args ) :
2025-05-18 16:12:19 +05:30
""" Helper to emit signal either directly or via queue. """
2025-06-06 05:14:07 +01:00
if isinstance ( self . emitter , queue . Queue ) :
self . emitter . put ( { ' type ' : signal_type_str , ' payload ' : payload_args } )
elif self . emitter and hasattr ( self . emitter , f " { signal_type_str } _signal " ) :
signal_attr = getattr ( self . emitter , f " { signal_type_str } _signal " )
signal_attr . emit ( * payload_args )
else :
print ( f " (Worker Log - Unrecognized Emitter for { signal_type_str } ): { payload_args [ 0 ] if payload_args else ' ' } " )
2025-07-16 09:02:47 -07:00
2025-06-06 05:14:07 +01:00
def logger ( self , message ) :
self . _emit_signal ( ' progress ' , message )
def check_cancel ( self ) :
return self . cancellation_event . is_set ( )
def _check_pause ( self , context_message = " Operation " ) :
if self . pause_event and self . pause_event . is_set ( ) :
while self . pause_event . is_set ( ) :
if self . check_cancel ( ) :
return True
time . sleep ( 0.5 )
2025-10-08 17:02:46 +05:30
return False
2025-06-16 10:46:23 +01:00
2025-06-06 05:14:07 +01:00
def _get_current_character_filters ( self ) :
if self . dynamic_filter_holder :
return self . dynamic_filter_holder . get_filters ( )
return self . filter_character_list_objects_initial
2025-08-03 05:46:51 -07:00
def _find_valid_subdomain ( self , url : str , max_subdomains : int = 4 ) - > str :
"""
Attempts to find a working subdomain for a Kemono / Coomer URL that returned a 403 error .
Returns the original URL if no other valid subdomain is found .
"""
parsed_url = urlparse ( url )
original_domain = parsed_url . netloc
for i in range ( 1 , max_subdomains + 1 ) :
domain_parts = original_domain . split ( ' . ' )
if len ( domain_parts ) > 1 :
base_domain = " . " . join ( domain_parts [ - 2 : ] )
new_domain = f " n { i } . { base_domain } "
else :
continue
new_url = parsed_url . _replace ( netloc = new_domain ) . geturl ( )
try :
with requests . head ( new_url , headers = { ' User-Agent ' : ' Mozilla/5.0 ' } , timeout = 5 , allow_redirects = True ) as resp :
if resp . status_code == 200 :
return new_url
except requests . RequestException :
continue
return url
2025-08-01 06:33:36 -07:00
def _download_single_file ( self , file_info , target_folder_path , post_page_url , original_post_id_for_log , skip_event ,
2025-07-15 06:54:31 -07:00
post_title = " " , file_index_in_post = 0 , num_files_in_this_post = 1 ,
manga_date_file_counter_ref = None ,
forced_filename_override = None ,
manga_global_file_counter_ref = None , folder_context_name_for_history = None ) :
was_original_name_kept_flag = False
final_filename_saved_for_return = " "
retry_later_details = None
2025-10-08 17:02:46 +05:30
# Define and prepare file_url at the top
file_url = file_info . get ( ' url ' )
if self . download_thumbnails and file_url and is_image ( file_info . get ( ' name ' ) ) :
try :
parsed = urlparse ( file_url )
new_netloc = re . sub ( r ' ^n \ d+ \ . ' , ' img. ' , parsed . netloc )
new_path = ' /thumbnail ' + parsed . path if parsed . path . startswith ( ' /data/ ' ) else parsed . path
if new_netloc != parsed . netloc or new_path != parsed . path :
file_url = parsed . _replace ( netloc = new_netloc , path = new_path ) . geturl ( )
except Exception as e :
self . logger ( f " ⚠️ Could not create thumbnail URL: { e } " )
if self . domain_override and file_url :
try :
parsed_url = urlparse ( file_url )
original_netloc_parts = parsed_url . netloc . split ( ' . ' )
base_domain_name = original_netloc_parts [ - 2 ] if len ( original_netloc_parts ) > = 2 else original_netloc_parts [ 0 ]
base_override_netloc = f " { base_domain_name } . { self . domain_override } "
base_override_url = parsed_url . _replace ( netloc = base_override_netloc ) . geturl ( )
if ' kemono. ' in base_override_url or ' coomer. ' in base_override_url :
file_url = self . _find_valid_subdomain ( base_override_url )
else :
file_url = base_override_url
except Exception as e :
self . logger ( f " ⚠️ Domain Override: Failed to rewrite URL ' { file_url } ' : { e } " )
# Pre-download duplicate check using URL hash
try :
parsed_url_for_hash = urlparse ( file_url )
url_hash = os . path . basename ( parsed_url_for_hash . path ) . split ( ' . ' ) [ 0 ]
with self . downloaded_hash_counts_lock :
current_count = self . downloaded_hash_counts . get ( url_hash , 0 )
if self . keep_duplicates_mode == DUPLICATE_HANDLING_HASH and current_count > = 1 :
self . logger ( f " -> Skip (Content Duplicate by URL Hash): ' { file_info . get ( ' name ' ) } ' is a duplicate. Skipping download. " )
return 0 , 1 , file_info . get ( ' name ' ) , False , FILE_DOWNLOAD_STATUS_SKIPPED , None
if self . keep_duplicates_mode == DUPLICATE_HANDLING_KEEP_ALL and self . keep_duplicates_limit > 0 and current_count > = self . keep_duplicates_limit :
self . logger ( f " -> Skip (Duplicate Limit by URL Hash): Limit of { self . keep_duplicates_limit } for this content has been met. Skipping download. " )
return 0 , 1 , file_info . get ( ' name ' ) , False , FILE_DOWNLOAD_STATUS_SKIPPED , None
except Exception as e :
self . logger ( f " ⚠️ Could not perform pre-download hash check: { e } . Proceeding with normal download. " )
2025-07-15 06:54:31 -07:00
if self . _check_pause ( f " File download prep for ' { file_info . get ( ' name ' , ' unknown file ' ) } ' " ) :
return 0 , 1 , " " , False , FILE_DOWNLOAD_STATUS_SKIPPED , None
2025-10-08 17:02:46 +05:30
2025-08-01 09:42:10 -07:00
file_download_headers = {
2025-08-15 20:22:40 -07:00
' User-Agent ' : ' Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) ' ,
2025-08-17 05:51:25 -07:00
' Referer ' : post_page_url ,
' Accept ' : ' text/css '
2025-08-01 09:42:10 -07:00
}
2025-07-15 06:54:31 -07:00
cookies_to_use_for_file = None
if self . use_cookie :
cookies_to_use_for_file = prepare_cookies_for_request ( self . use_cookie , self . cookie_text , self . selected_cookie_file , self . app_base_dir , self . logger )
2025-08-13 19:38:33 -07:00
if self . skip_file_size_mb is not None :
api_original_filename_for_size_check = file_info . get ( ' _original_name_for_log ' , file_info . get ( ' name ' ) )
try :
# Use a stream=True HEAD request to get headers without downloading the body
with requests . head ( file_url , headers = file_download_headers , timeout = 15 , cookies = cookies_to_use_for_file , allow_redirects = True ) as head_response :
head_response . raise_for_status ( )
content_length = head_response . headers . get ( ' Content-Length ' )
if content_length :
file_size_bytes = int ( content_length )
file_size_mb = file_size_bytes / ( 1024 * 1024 )
if file_size_mb < self . skip_file_size_mb :
self . logger ( f " -> Skip File (Size): ' { api_original_filename_for_size_check } ' is { file_size_mb : .2f } MB, which is smaller than the { self . skip_file_size_mb } MB limit. " )
return 0 , 1 , api_original_filename_for_size_check , False , FILE_DOWNLOAD_STATUS_SKIPPED , None
else :
self . logger ( f " ⚠️ Could not determine file size for ' { api_original_filename_for_size_check } ' to check against size limit. Proceeding with download. " )
except requests . RequestException as e :
self . logger ( f " ⚠️ Could not fetch file headers to check size for ' { api_original_filename_for_size_check } ' : { e } . Proceeding with download. " )
2025-07-15 06:54:31 -07:00
api_original_filename = file_info . get ( ' _original_name_for_log ' , file_info . get ( ' name ' ) )
filename_to_save_in_main_path = " "
if forced_filename_override :
filename_to_save_in_main_path = forced_filename_override
self . logger ( f " Retrying with forced filename: ' { filename_to_save_in_main_path } ' " )
else :
if self . skip_words_list and ( self . skip_words_scope == SKIP_SCOPE_FILES or self . skip_words_scope == SKIP_SCOPE_BOTH ) :
filename_to_check_for_skip_words = api_original_filename . lower ( )
for skip_word in self . skip_words_list :
if skip_word . lower ( ) in filename_to_check_for_skip_words :
self . logger ( f " -> Skip File (Keyword in Original Name ' { skip_word } ' ): ' { api_original_filename } ' . Scope: { self . skip_words_scope } " )
return 0 , 1 , api_original_filename , False , FILE_DOWNLOAD_STATUS_SKIPPED , None
2025-08-01 09:42:10 -07:00
cleaned_original_api_filename = robust_clean_name ( api_original_filename )
2025-07-15 06:54:31 -07:00
original_filename_cleaned_base , original_ext = os . path . splitext ( cleaned_original_api_filename )
if not original_ext . startswith ( ' . ' ) : original_ext = ' . ' + original_ext if original_ext else ' '
if self . manga_mode_active :
if self . manga_filename_style == STYLE_ORIGINAL_NAME :
2025-07-22 07:00:34 -07:00
published_date_str = self . post . get ( ' published ' )
added_date_str = self . post . get ( ' added ' )
2025-08-01 09:42:10 -07:00
formatted_date_str = " nodate "
2025-07-22 07:00:34 -07:00
date_to_use_str = published_date_str or added_date_str
if date_to_use_str :
try :
formatted_date_str = date_to_use_str . split ( ' T ' ) [ 0 ]
except Exception :
self . logger ( f " ⚠️ Could not parse date ' { date_to_use_str } ' . Using ' nodate ' prefix. " )
else :
self . logger ( f " ⚠️ Post ID { original_post_id_for_log } has no date. Using ' nodate ' prefix. " )
filename_to_save_in_main_path = f " { formatted_date_str } _ { cleaned_original_api_filename } "
2025-07-15 06:54:31 -07:00
was_original_name_kept_flag = True
elif self . manga_filename_style == STYLE_POST_TITLE :
if post_title and post_title . strip ( ) :
2025-08-01 06:33:36 -07:00
cleaned_post_title_base = robust_clean_name ( post_title . strip ( ) )
2025-07-15 06:54:31 -07:00
if num_files_in_this_post > 1 :
if file_index_in_post == 0 :
filename_to_save_in_main_path = f " { cleaned_post_title_base } { original_ext } "
else :
filename_to_save_in_main_path = f " { cleaned_post_title_base } _ { file_index_in_post } { original_ext } "
was_original_name_kept_flag = False
else :
filename_to_save_in_main_path = f " { cleaned_post_title_base } { original_ext } "
else :
filename_to_save_in_main_path = cleaned_original_api_filename
2025-10-08 17:02:46 +05:30
self . logger ( f " ⚠️ Renaming Mode (Post Title Style): Post title missing for post { original_post_id_for_log } . Using cleaned original filename ' { filename_to_save_in_main_path } ' . " )
2025-07-15 06:54:31 -07:00
elif self . manga_filename_style == STYLE_DATE_BASED :
if manga_date_file_counter_ref is not None and len ( manga_date_file_counter_ref ) == 2 :
counter_val_for_filename = - 1
counter_lock = manga_date_file_counter_ref [ 1 ]
with counter_lock :
counter_val_for_filename = manga_date_file_counter_ref [ 0 ]
manga_date_file_counter_ref [ 0 ] + = 1
base_numbered_name = f " { counter_val_for_filename : 03d } "
if self . manga_date_prefix and self . manga_date_prefix . strip ( ) :
2025-08-01 09:42:10 -07:00
cleaned_prefix = robust_clean_name ( self . manga_date_prefix . strip ( ) )
2025-07-15 06:54:31 -07:00
if cleaned_prefix :
filename_to_save_in_main_path = f " { cleaned_prefix } { base_numbered_name } { original_ext } "
else :
filename_to_save_in_main_path = f " { base_numbered_name } { original_ext } " ; self . logger ( f " ⚠️ Manga Date Mode: Provided prefix ' { self . manga_date_prefix } ' was empty after cleaning. Using number only. " )
else :
filename_to_save_in_main_path = f " { base_numbered_name } { original_ext } "
else :
self . logger ( f " ⚠️ Manga Date Mode: Counter ref not provided or malformed for ' { api_original_filename } ' . Using original. Ref: { manga_date_file_counter_ref } " )
filename_to_save_in_main_path = cleaned_original_api_filename
elif self . manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING :
if manga_global_file_counter_ref is not None and len ( manga_global_file_counter_ref ) == 2 :
counter_val_for_filename = - 1
counter_lock = manga_global_file_counter_ref [ 1 ]
with counter_lock :
counter_val_for_filename = manga_global_file_counter_ref [ 0 ]
manga_global_file_counter_ref [ 0 ] + = 1
2025-08-01 09:42:10 -07:00
cleaned_post_title_base_for_global = robust_clean_name ( post_title . strip ( ) if post_title and post_title . strip ( ) else " post " )
2025-07-15 06:54:31 -07:00
filename_to_save_in_main_path = f " { cleaned_post_title_base_for_global } _ { counter_val_for_filename : 03d } { original_ext } "
else :
self . logger ( f " ⚠️ Manga Title+GlobalNum Mode: Counter ref not provided or malformed for ' { api_original_filename } ' . Using original. Ref: { manga_global_file_counter_ref } " )
filename_to_save_in_main_path = cleaned_original_api_filename
2025-10-08 17:02:46 +05:30
self . logger ( f " ⚠️ Renaming Mode (Title+GlobalNum Style Fallback): Using cleaned original filename ' { filename_to_save_in_main_path } ' for post { original_post_id_for_log } . " )
2025-07-05 06:02:21 +05:30
elif self . manga_filename_style == STYLE_POST_ID :
if original_post_id_for_log and original_post_id_for_log != ' unknown_id ' :
base_name = str ( original_post_id_for_log )
filename_to_save_in_main_path = f " { base_name } _ { file_index_in_post } { original_ext } "
else :
2025-10-08 17:02:46 +05:30
self . logger ( f " ⚠️ Renaming Mode (Post ID Style): Post ID missing. Using cleaned original filename ' { cleaned_original_api_filename } ' . " )
filename_to_save_in_main_path = cleaned_original_api_filename
elif self . manga_filename_style == STYLE_CUSTOM :
try :
def format_date ( date_str ) :
if not date_str or ' NoDate ' in date_str :
return " NoDate "
try :
dt_obj = datetime . fromisoformat ( date_str )
strftime_format = self . manga_custom_date_format . replace ( " YYYY " , " % Y " ) . replace ( " MM " , " % m " ) . replace ( " DD " , " %d " )
return dt_obj . strftime ( strftime_format )
except ( ValueError , TypeError ) :
return date_str . split ( ' T ' ) [ 0 ]
service = self . service . lower ( )
user_id = str ( self . user_id )
# Look up the name in the cache, falling back to the user_id if not found
creator_name = user_id # Default to the ID
if self . creator_name_cache :
creator_name = self . creator_name_cache . get ( ( service , user_id ) , user_id )
added_date = self . post . get ( ' added ' )
published_date = self . post . get ( ' published ' )
edited_date = self . post . get ( ' edited ' )
format_values = {
' id ' : str ( self . post . get ( ' id ' , ' ' ) ) ,
' user ' : user_id ,
' creator_name ' : creator_name ,
' service ' : self . service ,
' title ' : str ( self . post . get ( ' title ' , ' ' ) ) ,
' name ' : original_filename_cleaned_base ,
' added ' : format_date ( added_date or published_date ) ,
' published ' : format_date ( published_date ) ,
' edited ' : format_date ( edited_date or published_date )
}
custom_base_name = self . manga_custom_filename_format . format ( * * format_values )
cleaned_custom_name = robust_clean_name ( custom_base_name )
if num_files_in_this_post > 1 :
filename_to_save_in_main_path = f " { cleaned_custom_name } _ { file_index_in_post } { original_ext } "
else :
filename_to_save_in_main_path = f " { cleaned_custom_name } { original_ext } "
except ( KeyError , IndexError , ValueError ) as e :
self . logger ( f " ⚠️ Custom format error: { e } . Falling back to original filename. " )
2025-07-05 06:02:21 +05:30
filename_to_save_in_main_path = cleaned_original_api_filename
2025-10-08 17:02:46 +05:30
2025-07-15 06:54:31 -07:00
elif self . manga_filename_style == STYLE_DATE_POST_TITLE :
published_date_str = self . post . get ( ' published ' )
added_date_str = self . post . get ( ' added ' )
formatted_date_str = " nodate "
if published_date_str :
try :
formatted_date_str = published_date_str . split ( ' T ' ) [ 0 ]
except Exception :
self . logger ( f " ⚠️ Could not parse ' published ' date ' { published_date_str } ' for STYLE_DATE_POST_TITLE. Using ' nodate ' . " )
elif added_date_str :
try :
formatted_date_str = added_date_str . split ( ' T ' ) [ 0 ]
self . logger ( f " ⚠️ Post ID { original_post_id_for_log } missing ' published ' date, using ' added ' date ' { added_date_str } ' for STYLE_DATE_POST_TITLE naming. " )
except Exception :
self . logger ( f " ⚠️ Could not parse ' added ' date ' { added_date_str } ' for STYLE_DATE_POST_TITLE. Using ' nodate ' . " )
else :
self . logger ( f " ⚠️ Post ID { original_post_id_for_log } missing both ' published ' and ' added ' dates for STYLE_DATE_POST_TITLE. Using ' nodate ' . " )
if post_title and post_title . strip ( ) :
2025-08-01 06:33:36 -07:00
temp_cleaned_title = robust_clean_name ( post_title . strip ( ) )
2025-08-01 09:42:10 -07:00
if not temp_cleaned_title or temp_cleaned_title . startswith ( " untitled_folder " ) :
2025-10-08 17:02:46 +05:30
self . logger ( f " ⚠️ Renaming Mode (Date+PostTitle Style): Post title for post { original_post_id_for_log } ( ' { post_title } ' ) was empty or generic after cleaning. Using ' post ' as title part. " )
2025-07-15 06:54:31 -07:00
cleaned_post_title_for_filename = " post "
else :
cleaned_post_title_for_filename = temp_cleaned_title
base_name_for_style = f " { formatted_date_str } _ { cleaned_post_title_for_filename } "
if num_files_in_this_post > 1 :
filename_to_save_in_main_path = f " { base_name_for_style } _ { file_index_in_post } { original_ext } " if file_index_in_post > 0 else f " { base_name_for_style } { original_ext } "
else :
filename_to_save_in_main_path = f " { base_name_for_style } { original_ext } "
else :
2025-10-08 17:02:46 +05:30
self . logger ( f " ⚠️ Renaming Mode (Date+PostTitle Style): Post title missing for post { original_post_id_for_log } . Using ' post ' as title part with date prefix. " )
2025-07-15 06:54:31 -07:00
cleaned_post_title_for_filename = " post "
base_name_for_style = f " { formatted_date_str } _ { cleaned_post_title_for_filename } "
if num_files_in_this_post > 1 :
filename_to_save_in_main_path = f " { base_name_for_style } _ { file_index_in_post } { original_ext } " if file_index_in_post > 0 else f " { base_name_for_style } { original_ext } "
else :
filename_to_save_in_main_path = f " { base_name_for_style } { original_ext } "
else :
2025-10-08 17:02:46 +05:30
self . logger ( f " ⚠️ Renaming Mode: Unknown filename style ' { self . manga_filename_style } ' . Defaulting to original filename for ' { api_original_filename } ' . " )
2025-07-15 06:54:31 -07:00
filename_to_save_in_main_path = cleaned_original_api_filename
if not filename_to_save_in_main_path :
filename_to_save_in_main_path = f " manga_file_ { original_post_id_for_log } _ { file_index_in_post + 1 } { original_ext } "
2025-10-08 17:02:46 +05:30
self . logger ( f " ⚠️ Renaming Mode: Generated filename was empty. Using generic fallback: ' { filename_to_save_in_main_path } ' . " )
2025-07-15 06:54:31 -07:00
was_original_name_kept_flag = False
else :
2025-08-17 05:51:25 -07:00
is_url_like = ' http ' in api_original_filename . lower ( )
is_too_long = len ( cleaned_original_api_filename ) > 100
if is_url_like or is_too_long :
self . logger ( f " ⚠️ Original filename is a URL or too long. Generating a shorter name. " )
name_hash = hashlib . md5 ( api_original_filename . encode ( ) ) . hexdigest ( ) [ : 12 ]
_ , ext = os . path . splitext ( cleaned_original_api_filename )
if not ext :
try :
path = urlparse ( api_original_filename ) . path
ext = os . path . splitext ( path ) [ 1 ] or " .file "
except Exception :
ext = " .file "
cleaned_post_title = robust_clean_name ( post_title . strip ( ) if post_title else " post " ) [ : 40 ]
filename_to_save_in_main_path = f " { cleaned_post_title } _ { name_hash } { ext } "
was_original_name_kept_flag = False
else :
filename_to_save_in_main_path = cleaned_original_api_filename
was_original_name_kept_flag = True
2025-07-15 06:54:31 -07:00
if self . remove_from_filename_words_list and filename_to_save_in_main_path :
base_name_for_removal , ext_for_removal = os . path . splitext ( filename_to_save_in_main_path )
modified_base_name = base_name_for_removal
for word_to_remove in self . remove_from_filename_words_list :
if not word_to_remove : continue
pattern = re . compile ( re . escape ( word_to_remove ) , re . IGNORECASE )
modified_base_name = pattern . sub ( " " , modified_base_name )
modified_base_name = re . sub ( r ' [_. \ s-]+ ' , ' ' , modified_base_name )
modified_base_name = re . sub ( r ' \ s+ ' , ' ' , modified_base_name )
modified_base_name = modified_base_name . strip ( )
if modified_base_name and modified_base_name != ext_for_removal . lstrip ( ' . ' ) :
filename_to_save_in_main_path = modified_base_name + ext_for_removal
else :
filename_to_save_in_main_path = base_name_for_removal + ext_for_removal
2025-10-08 17:02:46 +05:30
MAX_PATH_LENGTH = 240
base_name , extension = os . path . splitext ( filename_to_save_in_main_path )
potential_full_path = os . path . join ( target_folder_path , filename_to_save_in_main_path )
if len ( potential_full_path ) > MAX_PATH_LENGTH :
excess_length = len ( potential_full_path ) - MAX_PATH_LENGTH
if len ( base_name ) > excess_length :
truncated_base_name = base_name [ : - excess_length ]
filename_to_save_in_main_path = truncated_base_name . strip ( ) + extension
self . logger ( f " ⚠️ Path was too long. Truncating filename to: ' { filename_to_save_in_main_path } ' " )
else :
name_hash = hashlib . md5 ( base_name . encode ( ) ) . hexdigest ( ) [ : 16 ]
filename_to_save_in_main_path = f " { name_hash } { extension } "
self . logger ( f " ⚠️ Path is extremely long. Hashing filename to: ' { filename_to_save_in_main_path } ' " )
2025-07-15 06:54:31 -07:00
if not self . download_thumbnails :
is_img_type = is_image ( api_original_filename )
is_vid_type = is_video ( api_original_filename )
is_archive_type = is_archive ( api_original_filename )
is_audio_type = is_audio ( api_original_filename )
if self . filter_mode == ' archive ' :
if not is_archive_type :
self . logger ( f " -> Filter Skip (Archive Mode): ' { api_original_filename } ' (Not an Archive). " )
return 0 , 1 , api_original_filename , False , FILE_DOWNLOAD_STATUS_SKIPPED , None
elif self . filter_mode == ' image ' :
if not is_img_type :
self . logger ( f " -> Filter Skip: ' { api_original_filename } ' (Not Image). " )
return 0 , 1 , api_original_filename , False , FILE_DOWNLOAD_STATUS_SKIPPED , None
elif self . filter_mode == ' video ' :
if not is_vid_type :
self . logger ( f " -> Filter Skip: ' { api_original_filename } ' (Not Video). " )
return 0 , 1 , api_original_filename , False , FILE_DOWNLOAD_STATUS_SKIPPED , None
elif self . filter_mode == ' audio ' :
if not is_audio_type :
self . logger ( f " -> Filter Skip: ' { api_original_filename } ' (Not Audio). " )
return 0 , 1 , api_original_filename , False , FILE_DOWNLOAD_STATUS_SKIPPED , None
2025-07-18 07:54:11 -07:00
if ( self . skip_zip ) and is_archive ( api_original_filename ) :
self . logger ( f " -> Pref Skip: ' { api_original_filename } ' (Archive). " )
2025-07-15 06:54:31 -07:00
return 0 , 1 , api_original_filename , False , FILE_DOWNLOAD_STATUS_SKIPPED , None
try :
os . makedirs ( target_folder_path , exist_ok = True )
except OSError as e :
self . logger ( f " ❌ Critical error creating directory ' { target_folder_path } ' : { e } . Skipping file ' { api_original_filename } ' . " )
return 0 , 1 , api_original_filename , False , FILE_DOWNLOAD_STATUS_SKIPPED , None
temp_file_base_for_unique_part , temp_file_ext_for_unique_part = os . path . splitext ( filename_to_save_in_main_path if filename_to_save_in_main_path else api_original_filename )
unique_id_for_part_file = uuid . uuid4 ( ) . hex [ : 8 ]
unique_part_file_stem_on_disk = f " { temp_file_base_for_unique_part } _ { unique_id_for_part_file } "
max_retries = 3
2025-07-30 19:30:50 -07:00
if not self . keep_in_post_duplicates :
final_save_path_check = os . path . join ( target_folder_path , filename_to_save_in_main_path )
if os . path . exists ( final_save_path_check ) :
try :
2025-10-08 17:02:46 +05:30
self . logger ( f " ⚠️ File ' { filename_to_save_in_main_path } ' exists. Verifying content with URL hash... " )
2025-07-30 19:30:50 -07:00
2025-10-08 17:02:46 +05:30
parsed_url = urlparse ( file_url )
hash_from_url = os . path . basename ( parsed_url . path ) . split ( ' . ' ) [ 0 ]
hash_from_disk = hashlib . sha256 ( )
with open ( final_save_path_check , ' rb ' ) as f :
for chunk in iter ( lambda : f . read ( 8192 ) , b " " ) : # Use a larger buffer for hashing
hash_from_disk . update ( chunk )
if hash_from_url == hash_from_disk . hexdigest ( ) :
self . logger ( f " -> Skip (Hash Match): The existing file is a perfect match. " )
2025-07-30 19:30:50 -07:00
return 0 , 1 , filename_to_save_in_main_path , was_original_name_kept_flag , FILE_DOWNLOAD_STATUS_SKIPPED , None
else :
2025-10-08 17:02:46 +05:30
# Hashes differ. This is a legitimate collision. Log it and proceed.
# The downloader will then save the new file with a numbered suffix (_1, _2, etc.).
self . logger ( f " -> Hash Mismatch. Existing file is different content. Proceeding to download new file with a suffix. " )
except Exception as e :
self . logger ( f " ⚠️ Could not perform hash check for existing file: { e } . Re-downloading with a suffix to be safe. " )
2025-08-01 09:42:10 -07:00
2025-08-13 19:38:33 -07:00
max_retries = 3
2025-07-15 06:54:31 -07:00
retry_delay = 5
downloaded_size_bytes = 0
calculated_file_hash = None
downloaded_part_file_path = None
download_successful_flag = False
last_exception_for_retry_later = None
2025-07-16 09:02:47 -07:00
is_permanent_error = False
2025-07-15 06:54:31 -07:00
data_to_write_io = None
for attempt_num_single_stream in range ( max_retries + 1 ) :
2025-08-13 19:38:33 -07:00
response = None
2025-07-15 06:54:31 -07:00
if self . _check_pause ( f " File download attempt for ' { api_original_filename } ' " ) : break
if self . check_cancel ( ) or ( skip_event and skip_event . is_set ( ) ) : break
try :
if attempt_num_single_stream > 0 :
self . logger ( f " Retrying download for ' { api_original_filename } ' (Overall Attempt { attempt_num_single_stream + 1 } / { max_retries + 1 } )... " )
time . sleep ( retry_delay * ( 2 * * ( attempt_num_single_stream - 1 ) ) )
2025-08-03 05:46:51 -07:00
2025-07-15 06:54:31 -07:00
self . _emit_signal ( ' file_download_status ' , True )
2025-08-03 05:46:51 -07:00
current_url_to_try = file_url
2025-10-08 17:02:46 +05:30
2025-08-03 05:46:51 -07:00
response = requests . get ( current_url_to_try , headers = file_download_headers , timeout = ( 30 , 300 ) , stream = True , cookies = cookies_to_use_for_file )
2025-10-08 17:02:46 +05:30
if response . status_code == 403 and ( ' kemono. ' in current_url_to_try or ' coomer. ' in current_url_to_try ) :
2025-08-03 05:46:51 -07:00
self . logger ( f " ⚠️ Got 403 Forbidden for ' { api_original_filename } ' . Attempting subdomain rotation... " )
new_url = self . _find_valid_subdomain ( current_url_to_try )
if new_url != current_url_to_try :
self . logger ( f " Retrying with new URL: { new_url } " )
2025-08-13 19:38:33 -07:00
file_url = new_url
response . close ( ) # Close the old response
2025-08-03 05:46:51 -07:00
response = requests . get ( new_url , headers = file_download_headers , timeout = ( 30 , 300 ) , stream = True , cookies = cookies_to_use_for_file )
2025-07-15 06:54:31 -07:00
response . raise_for_status ( )
2025-08-13 19:38:33 -07:00
# --- REVISED AND MOVED SIZE CHECK LOGIC ---
2025-07-15 06:54:31 -07:00
total_size_bytes = int ( response . headers . get ( ' Content-Length ' , 0 ) )
2025-08-13 19:38:33 -07:00
if self . skip_file_size_mb is not None :
if total_size_bytes > 0 :
file_size_mb = total_size_bytes / ( 1024 * 1024 )
if file_size_mb < self . skip_file_size_mb :
self . logger ( f " -> Skip File (Size): ' { api_original_filename } ' is { file_size_mb : .2f } MB, which is smaller than the { self . skip_file_size_mb } MB limit. " )
return 0 , 1 , api_original_filename , False , FILE_DOWNLOAD_STATUS_SKIPPED , None
# If Content-Length is missing, we can't check, so we no longer log a warning here and just proceed.
# --- END OF REVISED LOGIC ---
2025-08-01 06:33:36 -07:00
num_parts_for_file = min ( self . multipart_parts_count , MAX_PARTS_FOR_MULTIPART_DOWNLOAD )
file_is_eligible_by_scope = False
if self . multipart_scope == ' videos ' :
if is_video ( api_original_filename ) :
file_is_eligible_by_scope = True
elif self . multipart_scope == ' archives ' :
if is_archive ( api_original_filename ) :
file_is_eligible_by_scope = True
elif self . multipart_scope == ' both ' :
if is_video ( api_original_filename ) or is_archive ( api_original_filename ) :
file_is_eligible_by_scope = True
min_size_in_bytes = self . multipart_min_size_mb * 1024 * 1024
2025-07-15 06:54:31 -07:00
attempt_multipart = ( self . allow_multipart_download and MULTIPART_DOWNLOADER_AVAILABLE and
2025-08-01 06:33:36 -07:00
file_is_eligible_by_scope and
num_parts_for_file > 1 and total_size_bytes > min_size_in_bytes and
2025-07-15 06:54:31 -07:00
' bytes ' in response . headers . get ( ' Accept-Ranges ' , ' ' ) . lower ( ) )
2025-08-01 06:33:36 -07:00
2025-07-15 06:54:31 -07:00
if self . _check_pause ( f " Multipart decision for ' { api_original_filename } ' " ) : break
if attempt_multipart :
2025-08-13 19:38:33 -07:00
response . close ( ) # Close the initial connection before starting multipart
2025-07-15 06:54:31 -07:00
mp_save_path_for_unique_part_stem_arg = os . path . join ( target_folder_path , f " { unique_part_file_stem_on_disk } { temp_file_ext_for_unique_part } " )
mp_success , mp_bytes , mp_hash , mp_file_handle = download_file_in_parts (
2025-08-01 06:33:36 -07:00
file_url , mp_save_path_for_unique_part_stem_arg , total_size_bytes , num_parts_for_file , file_download_headers , api_original_filename ,
2025-07-15 06:54:31 -07:00
emitter_for_multipart = self . emitter , cookies_for_chunk_session = cookies_to_use_for_file ,
cancellation_event = self . cancellation_event , skip_event = skip_event , logger_func = self . logger ,
pause_event = self . pause_event
2025-05-12 10:54:31 +05:30
)
2025-07-15 06:54:31 -07:00
if mp_success :
download_successful_flag = True
downloaded_size_bytes = mp_bytes
calculated_file_hash = mp_hash
2025-08-01 09:42:10 -07:00
downloaded_part_file_path = mp_save_path_for_unique_part_stem_arg
2025-07-15 06:54:31 -07:00
if mp_file_handle : mp_file_handle . close ( )
break
else :
if attempt_num_single_stream < max_retries :
self . logger ( f " Multi-part download attempt failed for ' { api_original_filename } ' . Retrying with single stream. " )
else :
download_successful_flag = False ; break
else :
2025-10-08 17:02:46 +05:30
self . logger ( f " ⬇️ Downloading (Single Stream): ' { api_original_filename } ' (Size: { total_size_bytes / ( 1024 * 1024 ) : .2f } MB if known) [Base Name: ' { filename_to_save_in_main_path } ' ] ( { response . url } ) " )
2025-07-15 06:54:31 -07:00
current_single_stream_part_path = os . path . join ( target_folder_path , f " { unique_part_file_stem_on_disk } { temp_file_ext_for_unique_part } .part " )
current_attempt_downloaded_bytes = 0
md5_hasher = hashlib . md5 ( )
last_progress_time = time . time ( )
try :
with open ( current_single_stream_part_path , ' wb ' ) as f_part :
for chunk in response . iter_content ( chunk_size = 1 * 1024 * 1024 ) :
if self . _check_pause ( f " Chunk download for ' { api_original_filename } ' " ) : break
if self . check_cancel ( ) or ( skip_event and skip_event . is_set ( ) ) : break
if chunk :
f_part . write ( chunk )
md5_hasher . update ( chunk )
current_attempt_downloaded_bytes + = len ( chunk )
if time . time ( ) - last_progress_time > 1 and total_size_bytes > 0 :
self . _emit_signal ( ' file_progress ' , api_original_filename , ( current_attempt_downloaded_bytes , total_size_bytes ) )
last_progress_time = time . time ( )
if self . check_cancel ( ) or ( skip_event and skip_event . is_set ( ) ) or ( self . pause_event and self . pause_event . is_set ( ) and not ( current_attempt_downloaded_bytes > 0 or ( total_size_bytes == 0 and response . status_code == 200 ) ) ) :
if os . path . exists ( current_single_stream_part_path ) : os . remove ( current_single_stream_part_path )
break
attempt_is_complete = False
if response . status_code == 200 :
if total_size_bytes > 0 :
if current_attempt_downloaded_bytes == total_size_bytes :
attempt_is_complete = True
else :
self . logger ( f " ⚠️ Single-stream attempt for ' { api_original_filename } ' incomplete: received { current_attempt_downloaded_bytes } of { total_size_bytes } bytes. " )
elif total_size_bytes == 0 :
if current_attempt_downloaded_bytes > 0 :
self . logger ( f " ⚠️ Mismatch for ' { api_original_filename } ' : Server reported 0 bytes, but received { current_attempt_downloaded_bytes } bytes this attempt. " )
attempt_is_complete = True
else :
attempt_is_complete = True
if attempt_is_complete :
calculated_file_hash = md5_hasher . hexdigest ( )
downloaded_size_bytes = current_attempt_downloaded_bytes
downloaded_part_file_path = current_single_stream_part_path
download_successful_flag = True
break
else :
if os . path . exists ( current_single_stream_part_path ) :
try :
os . remove ( current_single_stream_part_path )
except OSError as e_rem_part :
self . logger ( f " -> Failed to remove .part file after failed single stream attempt: { e_rem_part } " )
except Exception as e_write :
self . logger ( f " ❌ Error writing single-stream to disk for ' { api_original_filename } ' : { e_write } " )
if os . path . exists ( current_single_stream_part_path ) : os . remove ( current_single_stream_part_path )
raise
except ( requests . exceptions . ConnectionError , requests . exceptions . Timeout , http . client . IncompleteRead ) as e :
self . logger ( f " ❌ Download Error (Retryable): { api_original_filename } . Error: { e } " )
last_exception_for_retry_later = e
if isinstance ( e , requests . exceptions . ConnectionError ) and ( " Failed to resolve " in str ( e ) or " NameResolutionError " in str ( e ) ) :
self . logger ( " 💡 This looks like a DNS resolution problem. Please check your internet connection, DNS settings, or VPN. " )
except requests . exceptions . RequestException as e :
2025-08-01 06:33:36 -07:00
if e . response is not None and e . response . status_code == 403 :
self . logger ( f " ⚠️ Download Error (403 Forbidden): { api_original_filename } . This often requires valid cookies. " )
self . logger ( f " Will retry... Check your ' Use Cookie ' settings if this persists. " )
last_exception_for_retry_later = e
else :
self . logger ( f " ❌ Download Error (Non-Retryable): { api_original_filename } . Error: { e } " )
last_exception_for_retry_later = e
is_permanent_error = True
break
2025-07-15 06:54:31 -07:00
except Exception as e :
self . logger ( f " ❌ Unexpected Download Error: { api_original_filename } : { e } \n { traceback . format_exc ( limit = 2 ) } " )
last_exception_for_retry_later = e
2025-07-16 09:02:47 -07:00
is_permanent_error = True
2025-07-15 06:54:31 -07:00
break
finally :
2025-08-13 19:38:33 -07:00
if response :
response . close ( )
2025-07-15 06:54:31 -07:00
self . _emit_signal ( ' file_download_status ' , False )
final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes
self . _emit_signal ( ' file_progress ' , api_original_filename , ( downloaded_size_bytes , final_total_for_progress ) )
2025-07-11 01:24:12 -07:00
if ( not download_successful_flag and
isinstance ( last_exception_for_retry_later , http . client . IncompleteRead ) and
total_size_bytes > 0 and downloaded_part_file_path and os . path . exists ( downloaded_part_file_path ) ) :
try :
actual_size = os . path . getsize ( downloaded_part_file_path )
if actual_size == total_size_bytes :
self . logger ( f " ✅ Rescued ' { api_original_filename } ' : IncompleteRead error occurred, but file size matches. Proceeding with save. " )
download_successful_flag = True
md5_hasher = hashlib . md5 ( )
with open ( downloaded_part_file_path , ' rb ' ) as f_verify :
2025-07-15 06:54:31 -07:00
for chunk in iter ( lambda : f_verify . read ( 8192 ) , b " " ) :
2025-07-11 01:24:12 -07:00
md5_hasher . update ( chunk )
calculated_file_hash = md5_hasher . hexdigest ( )
except Exception as rescue_exc :
self . logger ( f " ⚠️ Failed to rescue file despite matching size. Error: { rescue_exc } " )
if self . check_cancel ( ) or ( skip_event and skip_event . is_set ( ) ) or ( self . pause_event and self . pause_event . is_set ( ) and not download_successful_flag ) :
if downloaded_part_file_path and os . path . exists ( downloaded_part_file_path ) :
2025-07-15 06:54:31 -07:00
try :
os . remove ( downloaded_part_file_path )
except OSError :
pass
2025-07-11 01:24:12 -07:00
return 0 , 1 , filename_to_save_in_main_path , was_original_name_kept_flag , FILE_DOWNLOAD_STATUS_SKIPPED , None
if download_successful_flag :
2025-10-08 17:02:46 +05:30
if self . _check_pause ( f " Post-download processing for ' { api_original_filename } ' " ) :
2025-07-16 09:02:47 -07:00
if downloaded_part_file_path and os . path . exists ( downloaded_part_file_path ) :
try :
os . remove ( downloaded_part_file_path )
except OSError : pass
return 0 , 1 , filename_to_save_in_main_path , was_original_name_kept_flag , FILE_DOWNLOAD_STATUS_SKIPPED , None
2025-07-23 20:08:44 -07:00
if ( self . compress_images and downloaded_part_file_path and
is_image ( api_original_filename ) and
os . path . getsize ( downloaded_part_file_path ) > 1.5 * 1024 * 1024 ) :
self . logger ( f " 🔄 Compressing ' { api_original_filename } ' to WebP... " )
try :
with Image . open ( downloaded_part_file_path ) as img :
if img . mode not in ( ' RGB ' , ' RGBA ' ) :
img = img . convert ( ' RGBA ' )
output_buffer = BytesIO ( )
img . save ( output_buffer , format = ' WebP ' , quality = 85 )
data_to_write_io = output_buffer
base , _ = os . path . splitext ( filename_to_save_in_main_path )
filename_to_save_in_main_path = f " { base } .webp "
self . logger ( f " ✅ Compression successful. New size: { len ( data_to_write_io . getvalue ( ) ) / ( 1024 * 1024 ) : .2f } MB " )
except Exception as e_compress :
self . logger ( f " ⚠️ Failed to compress ' { api_original_filename } ' : { e_compress } . Saving original file instead. " )
2025-08-01 09:42:10 -07:00
data_to_write_io = None
2025-07-11 01:24:12 -07:00
effective_save_folder = target_folder_path
2025-07-15 08:49:20 -07:00
base_name , extension = os . path . splitext ( filename_to_save_in_main_path )
counter = 1
final_filename_on_disk = filename_to_save_in_main_path
final_save_path = os . path . join ( effective_save_folder , final_filename_on_disk )
2025-07-11 01:24:12 -07:00
2025-07-15 08:49:20 -07:00
while os . path . exists ( final_save_path ) :
final_filename_on_disk = f " { base_name } _ { counter } { extension } "
final_save_path = os . path . join ( effective_save_folder , final_filename_on_disk )
counter + = 1
if counter > 1 :
self . logger ( f " ⚠️ Filename collision: Saving as ' { final_filename_on_disk } ' instead. " )
2025-07-11 01:24:12 -07:00
try :
if data_to_write_io :
with open ( final_save_path , ' wb ' ) as f_out :
f_out . write ( data_to_write_io . getvalue ( ) )
if downloaded_part_file_path and os . path . exists ( downloaded_part_file_path ) :
try :
os . remove ( downloaded_part_file_path )
except OSError as e_rem :
self . logger ( f " -> Failed to remove .part after compression: { e_rem } " )
else :
if downloaded_part_file_path and os . path . exists ( downloaded_part_file_path ) :
time . sleep ( 0.1 )
os . rename ( downloaded_part_file_path , final_save_path )
else :
raise FileNotFoundError ( f " Original .part file not found for saving: { downloaded_part_file_path } " )
2025-10-08 17:02:46 +05:30
try :
parsed_url_for_hash = urlparse ( file_url )
url_hash = os . path . basename ( parsed_url_for_hash . path ) . split ( ' . ' ) [ 0 ]
with self . downloaded_hash_counts_lock :
self . downloaded_hash_counts [ url_hash ] + = 1
except Exception as e :
self . logger ( f " ⚠️ Could not update post-download hash count: { e } " )
2025-07-11 01:24:12 -07:00
final_filename_saved_for_return = final_filename_on_disk
self . logger ( f " ✅ Saved: ' { final_filename_saved_for_return } ' (from ' { api_original_filename } ' , { downloaded_size_bytes / ( 1024 * 1024 ) : .2f } MB) in ' { os . path . basename ( effective_save_folder ) } ' " )
downloaded_file_details = {
' disk_filename ' : final_filename_saved_for_return ,
' post_title ' : post_title ,
' post_id ' : original_post_id_for_log ,
' upload_date_str ' : self . post . get ( ' published ' ) or self . post . get ( ' added ' ) or " N/A " ,
' download_timestamp ' : time . time ( ) ,
' download_path ' : effective_save_folder ,
' service ' : self . service ,
' user_id ' : self . user_id ,
' api_original_filename ' : api_original_filename ,
' folder_context_name ' : folder_context_name_for_history or os . path . basename ( effective_save_folder )
2025-06-02 08:08:10 +01:00
}
2025-07-11 01:24:12 -07:00
self . _emit_signal ( ' file_successfully_downloaded ' , downloaded_file_details )
time . sleep ( 0.05 )
return 1 , 0 , final_filename_saved_for_return , was_original_name_kept_flag , FILE_DOWNLOAD_STATUS_SUCCESS , None
2025-07-15 06:54:31 -07:00
2025-07-11 01:24:12 -07:00
except Exception as save_err :
2025-07-15 06:54:31 -07:00
self . logger ( f " ->>Save Fail for ' { final_filename_on_disk } ' : { save_err } " )
if downloaded_part_file_path and os . path . exists ( downloaded_part_file_path ) :
try :
os . remove ( downloaded_part_file_path )
self . logger ( f " Cleaned up temporary file after save error: { os . path . basename ( downloaded_part_file_path ) } " )
except OSError as e_rem :
self . logger ( f " ⚠️ Could not clean up temporary file ' { os . path . basename ( downloaded_part_file_path ) } ' after save error: { e_rem } " )
if os . path . exists ( final_save_path ) :
try :
os . remove ( final_save_path )
except OSError :
self . logger ( f " -> Failed to remove partially saved file: { final_save_path } " )
permanent_failure_details = {
2025-08-01 06:33:36 -07:00
' file_info ' : file_info , ' target_folder_path ' : target_folder_path , ' headers ' : file_download_headers ,
2025-07-15 06:54:31 -07:00
' original_post_id_for_log ' : original_post_id_for_log , ' post_title ' : post_title ,
' file_index_in_post ' : file_index_in_post , ' num_files_in_this_post ' : num_files_in_this_post ,
' forced_filename_override ' : filename_to_save_in_main_path ,
2025-09-07 04:56:08 -07:00
' service ' : self . service ,
' user_id ' : self . user_id
2025-07-15 06:54:31 -07:00
}
return 0 , 1 , final_filename_saved_for_return , was_original_name_kept_flag , FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION , permanent_failure_details
2025-07-11 01:24:12 -07:00
finally :
if data_to_write_io and hasattr ( data_to_write_io , ' close ' ) :
data_to_write_io . close ( )
else :
2025-07-15 06:54:31 -07:00
self . logger ( f " ->>Download Fail for ' { api_original_filename } ' (Post ID: { original_post_id_for_log } ). No successful download after retries. " )
2025-07-16 09:02:47 -07:00
details_for_failure = {
2025-07-19 03:28:32 -07:00
' file_info ' : file_info ,
' target_folder_path ' : target_folder_path ,
2025-08-01 06:33:36 -07:00
' headers ' : file_download_headers ,
2025-07-19 03:28:32 -07:00
' original_post_id_for_log ' : original_post_id_for_log ,
' post_title ' : post_title ,
' file_index_in_post ' : file_index_in_post ,
' num_files_in_this_post ' : num_files_in_this_post ,
2025-10-08 17:02:46 +05:30
' forced_filename_override ' : filename_to_save_in_main_path ,
' added ' : self . post . get ( ' added ' ) ,
' published ' : self . post . get ( ' published ' ) ,
' edited ' : self . post . get ( ' edited ' )
2025-06-15 09:49:09 +01:00
}
2025-07-16 09:02:47 -07:00
if is_permanent_error :
return 0 , 1 , filename_to_save_in_main_path , was_original_name_kept_flag , FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION , details_for_failure
else :
return 0 , 1 , filename_to_save_in_main_path , was_original_name_kept_flag , FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER , details_for_failure
2025-07-15 06:54:31 -07:00
def process ( self ) :
2025-10-08 17:02:46 +05:30
result_tuple = ( 0 , 0 , [ ] , [ ] , [ ] , None , None )
try :
if self . service == ' discord ' :
post_title = self . post . get ( ' content ' , ' ' ) or f " Message { self . post . get ( ' id ' , ' N/A ' ) } "
post_id = self . post . get ( ' id ' , ' unknown_id ' )
post_main_file_info = { }
post_attachments = self . post . get ( ' attachments ' , [ ] )
post_content_html = self . post . get ( ' content ' , ' ' )
post_data = self . post
log_prefix = " Message "
else :
2025-08-14 09:48:55 -07:00
post_title = self . post . get ( ' title ' , ' ' ) or ' untitled_post '
2025-10-08 17:02:46 +05:30
post_id = self . post . get ( ' id ' , ' unknown_id ' )
2025-08-14 09:48:55 -07:00
post_main_file_info = self . post . get ( ' file ' )
post_attachments = self . post . get ( ' attachments ' , [ ] )
post_content_html = self . post . get ( ' content ' , ' ' )
post_data = self . post
2025-10-08 17:02:46 +05:30
log_prefix = " Post "
2025-08-14 09:48:55 -07:00
2025-10-08 17:02:46 +05:30
content_is_needed = (
self . show_external_links or
self . extract_links_only or
self . scan_content_for_images or
( self . filter_mode == ' text_only ' and self . text_only_scope == ' content ' )
)
if content_is_needed and self . post . get ( ' content ' ) is None and self . service != ' discord ' :
self . logger ( f " Post { post_id } is missing ' content ' field, fetching full data... " )
parsed_url = urlparse ( self . api_url_input )
api_domain = parsed_url . netloc
creator_page_url = f " https:// { api_domain } / { self . service } /user/ { self . user_id } "
headers = {
' User-Agent ' : ' Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) ' ,
' Referer ' : creator_page_url ,
' Accept ' : ' text/css '
}
cookies = prepare_cookies_for_request ( self . use_cookie , self . cookie_text , self . selected_cookie_file , self . app_base_dir , self . logger , target_domain = api_domain )
full_post_data = fetch_single_post_data ( api_domain , self . service , self . user_id , post_id , headers , self . logger , cookies_dict = cookies )
if full_post_data :
self . logger ( " ✅ Full post data fetched successfully. " )
self . post = full_post_data
post_title = self . post . get ( ' title ' , ' ' ) or ' untitled_post '
post_main_file_info = self . post . get ( ' file ' )
post_attachments = self . post . get ( ' attachments ' , [ ] )
post_content_html = self . post . get ( ' content ' , ' ' )
post_data = self . post
else :
self . logger ( f " ⚠️ Failed to fetch full content for post { post_id } . Content-dependent features may not work for this post. " )
total_downloaded_this_post = 0
total_skipped_this_post = 0
determined_post_save_path_for_history = self . override_output_dir if self . override_output_dir else self . download_root
2025-08-07 21:42:04 -07:00
if self . _check_pause ( f " { log_prefix } processing for ID { post_id } " ) :
2025-10-08 17:02:46 +05:30
result_tuple = ( 0 , 0 , [ ] , [ ] , [ ] , None , None )
self . _emit_signal ( ' worker_finished ' , result_tuple )
return result_tuple
2025-07-15 06:54:31 -07:00
if self . check_cancel ( ) :
2025-10-08 17:02:46 +05:30
result_tuple = ( 0 , 0 , [ ] , [ ] , [ ] , None , None )
self . _emit_signal ( ' worker_finished ' , result_tuple )
return result_tuple
2025-07-15 06:54:31 -07:00
current_character_filters = self . _get_current_character_filters ( )
kept_original_filenames_for_log = [ ]
retryable_failures_this_post = [ ]
permanent_failures_this_post = [ ]
history_data_for_this_post = None
parsed_api_url = urlparse ( self . api_url_input )
2025-10-08 17:02:46 +05:30
2025-08-07 21:42:04 -07:00
if self . service == ' discord ' :
2025-10-08 17:02:46 +05:30
server_id = self . user_id
2025-08-07 21:42:04 -07:00
channel_id = self . post . get ( ' channel ' , ' unknown_channel ' )
post_page_url = f " https:// { parsed_api_url . netloc } /discord/server/ { server_id } / { channel_id } "
else :
post_page_url = f " https:// { parsed_api_url . netloc } / { self . service } /user/ { self . user_id } /post/ { post_id } "
2025-07-30 19:30:50 -07:00
2025-08-17 05:51:25 -07:00
headers = {
' User-Agent ' : ' Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) ' ,
2025-10-08 17:02:46 +05:30
' Referer ' : post_page_url ,
2025-08-17 05:51:25 -07:00
' Accept ' : ' text/css '
}
2025-07-15 06:54:31 -07:00
link_pattern = re . compile ( r """ <a \ s+.*?href=[ " ' ](https?://[^ " ' ]+)[ " ' ][^>]*>(.*?)</a> """ , re . IGNORECASE | re . DOTALL )
effective_unwanted_keywords_for_folder_naming = self . unwanted_keywords . copy ( )
is_full_creator_download_no_char_filter = not self . target_post_id_from_initial_url and not current_character_filters
2025-10-08 17:02:46 +05:30
2025-07-29 06:37:28 -07:00
if ( self . show_external_links or self . extract_links_only ) :
2025-10-08 17:02:46 +05:30
unique_links_data = { }
links_emitted_count = 0
2025-07-29 06:37:28 -07:00
embed_data = post_data . get ( ' embed ' )
if isinstance ( embed_data , dict ) and embed_data . get ( ' url ' ) :
embed_url = embed_data [ ' url ' ]
2025-10-08 17:02:46 +05:30
embed_subject = embed_data . get ( ' subject ' , embed_url )
unique_links_data [ embed_url ] = embed_subject
if post_content_html :
try :
# Stage 2: Find all clickable <a> tag links
for match in link_pattern . finditer ( post_content_html ) :
link_url = html . unescape ( match . group ( 1 ) . strip ( ) )
if not any ( ext in link_url . lower ( ) for ext in [ ' .css ' , ' .js ' , ' .ico ' , ' .xml ' , ' .svg ' ] ) and not link_url . startswith ( ' javascript: ' ) :
clean_link_text = html . unescape ( re . sub ( r ' <.*?> ' , ' ' , match . group ( 2 ) ) ) . strip ( )
unique_links_data [ link_url ] = clean_link_text if clean_link_text else " [Link] "
# Stage 3: Find all plain-text URLs
plain_text_content = strip_html_tags ( post_content_html )
plain_text_url_pattern = re . compile ( r """ \ b(https?://[^ \ s " ' <> \ [ \ ] \ { \ } \ | \ ^ \ \ ^~ \ [ \ ]`]+) """ , re . IGNORECASE )
for match in plain_text_url_pattern . finditer ( plain_text_content ) :
link_url = html . unescape ( match . group ( 1 ) . strip ( ) )
if link_url not in unique_links_data :
unique_links_data [ link_url ] = " [Plain Text Link] "
# Stage 4: Process all unique links found
scraped_platforms = { ' kemono ' , ' coomer ' , ' patreon ' }
for link_url , link_text in unique_links_data . items ( ) :
platform = get_link_platform ( link_url )
decryption_key_found = " "
if platform == ' mega ' :
mega_key_pattern = re . compile ( r ' \ b([a-zA-Z0-9_-] {43} |[a-zA-Z0-9_-] {22} ) \ b ' )
parsed_mega_url = urlparse ( link_url )
if parsed_mega_url . fragment :
potential_key_from_fragment = parsed_mega_url . fragment . split ( ' ! ' ) [ - 1 ]
if mega_key_pattern . fullmatch ( potential_key_from_fragment ) :
decryption_key_found = potential_key_from_fragment
if not decryption_key_found and link_text :
key_match_in_text = mega_key_pattern . search ( link_text )
if key_match_in_text :
decryption_key_found = key_match_in_text . group ( 1 )
if not decryption_key_found and self . extract_links_only and post_content_html :
key_match_in_content = mega_key_pattern . search ( strip_html_tags ( post_content_html ) )
if key_match_in_content :
decryption_key_found = key_match_in_content . group ( 1 )
final_link_url_to_emit = link_url
if platform == ' mega ' and decryption_key_found :
parsed_url = urlparse ( link_url )
if decryption_key_found not in ( parsed_url . fragment or ' ' ) :
base_url = link_url . split ( ' # ' ) [ 0 ]
final_link_url_to_emit = f " { base_url } # { decryption_key_found } "
self . logger ( f " ℹ ️ Combined Mega link and key: { final_link_url_to_emit } " )
if platform not in scraped_platforms :
self . _emit_signal ( ' external_link ' , post_title , link_text , final_link_url_to_emit , platform , " " )
links_emitted_count + = 1
if links_emitted_count > 0 : self . logger ( f " 🔗 Found { links_emitted_count } potential external link(s) in post content. " )
except Exception as e :
self . logger ( f " ⚠️ Error parsing post content for links: { e } \n { traceback . format_exc ( limit = 2 ) } " )
elif self . extract_links_only and not unique_links_data :
self . logger ( f " ℹ ️ Post { post_id } contains no text content to scan for links. " )
2025-07-15 06:54:31 -07:00
if is_full_creator_download_no_char_filter and self . creator_download_folder_ignore_words :
self . logger ( f " Applying creator download specific folder ignore words ( { len ( self . creator_download_folder_ignore_words ) } words). " )
effective_unwanted_keywords_for_folder_naming . update ( self . creator_download_folder_ignore_words )
2025-07-18 07:54:11 -07:00
if not self . extract_links_only :
2025-08-07 21:42:04 -07:00
self . logger ( f " \n --- Processing { log_prefix } { post_id } ( ' { post_title [ : 50 ] } ... ' ) (Thread: { threading . current_thread ( ) . name } ) --- " )
2025-07-15 06:54:31 -07:00
num_potential_files_in_post = len ( post_attachments or [ ] ) + ( 1 if post_main_file_info and post_main_file_info . get ( ' path ' ) else 0 )
post_is_candidate_by_title_char_match = False
char_filter_that_matched_title = None
post_is_candidate_by_comment_char_match = False
post_is_candidate_by_file_char_match_in_comment_scope = False
char_filter_that_matched_file_in_comment_scope = None
char_filter_that_matched_comment = None
if current_character_filters and ( self . char_filter_scope == CHAR_SCOPE_TITLE or self . char_filter_scope == CHAR_SCOPE_BOTH ) :
if self . _check_pause ( f " Character title filter for post { post_id } " ) :
result_tuple = ( 0 , num_potential_files_in_post , [ ] , [ ] , [ ] , None , None )
2025-10-08 17:02:46 +05:30
self . _emit_signal ( ' worker_finished ' , result_tuple )
2025-07-15 06:54:31 -07:00
return result_tuple
for idx , filter_item_obj in enumerate ( current_character_filters ) :
if self . check_cancel ( ) : break
terms_to_check_for_title = list ( filter_item_obj [ " aliases " ] )
if filter_item_obj [ " is_group " ] :
if filter_item_obj [ " name " ] not in terms_to_check_for_title :
terms_to_check_for_title . append ( filter_item_obj [ " name " ] )
unique_terms_for_title_check = list ( set ( terms_to_check_for_title ) )
for term_to_match in unique_terms_for_title_check :
match_found_for_term = is_title_match_for_character ( post_title , term_to_match )
if match_found_for_term :
post_is_candidate_by_title_char_match = True
char_filter_that_matched_title = filter_item_obj
self . logger ( f " Post title matches char filter term ' { term_to_match } ' (from group/name ' { filter_item_obj [ ' name ' ] } ' , Scope: { self . char_filter_scope } ). Post is candidate. " )
break
if post_is_candidate_by_title_char_match : break
all_files_from_post_api_for_char_check = [ ]
api_file_domain_for_char_check = urlparse ( self . api_url_input ) . netloc
2025-07-27 07:44:14 -07:00
if not api_file_domain_for_char_check or not any ( d in api_file_domain_for_char_check . lower ( ) for d in [ ' kemono.su ' , ' kemono.party ' , ' kemono.cr ' , ' coomer.su ' , ' coomer.party ' , ' coomer.st ' ] ) :
2025-07-30 19:30:50 -07:00
api_file_domain_for_char_check = " kemono.cr " if " kemono " in self . service . lower ( ) else " coomer.st "
2025-07-15 06:54:31 -07:00
if post_main_file_info and isinstance ( post_main_file_info , dict ) and post_main_file_info . get ( ' path ' ) :
original_api_name = post_main_file_info . get ( ' name ' ) or os . path . basename ( post_main_file_info [ ' path ' ] . lstrip ( ' / ' ) )
if original_api_name :
all_files_from_post_api_for_char_check . append ( { ' _original_name_for_log ' : original_api_name } )
for att_info in post_attachments :
if isinstance ( att_info , dict ) and att_info . get ( ' path ' ) :
original_api_att_name = att_info . get ( ' name ' ) or os . path . basename ( att_info [ ' path ' ] . lstrip ( ' / ' ) )
if original_api_att_name :
all_files_from_post_api_for_char_check . append ( { ' _original_name_for_log ' : original_api_att_name } )
2025-08-07 21:42:04 -07:00
if current_character_filters and self . char_filter_scope == CHAR_SCOPE_COMMENTS and self . service != ' discord ' :
2025-07-15 06:54:31 -07:00
self . logger ( f " [Char Scope: Comments] Phase 1: Checking post files for matches before comments for post ID ' { post_id } ' . " )
if self . _check_pause ( f " File check (comments scope) for post { post_id } " ) :
result_tuple = ( 0 , num_potential_files_in_post , [ ] , [ ] , [ ] , None , None )
2025-10-08 17:02:46 +05:30
self . _emit_signal ( ' worker_finished ' , result_tuple )
2025-07-15 06:54:31 -07:00
return result_tuple
for file_info_item in all_files_from_post_api_for_char_check :
if self . check_cancel ( ) : break
current_api_original_filename_for_check = file_info_item . get ( ' _original_name_for_log ' )
if not current_api_original_filename_for_check : continue
for filter_item_obj in current_character_filters :
terms_to_check = list ( filter_item_obj [ " aliases " ] )
if filter_item_obj [ " is_group " ] and filter_item_obj [ " name " ] not in terms_to_check :
terms_to_check . append ( filter_item_obj [ " name " ] )
for term_to_match in terms_to_check :
if is_filename_match_for_character ( current_api_original_filename_for_check , term_to_match ) :
post_is_candidate_by_file_char_match_in_comment_scope = True
char_filter_that_matched_file_in_comment_scope = filter_item_obj
self . logger ( f " Match Found (File in Comments Scope): File ' { current_api_original_filename_for_check } ' matches char filter term ' { term_to_match } ' (from group/name ' { filter_item_obj [ ' name ' ] } ' ). Post is candidate. " )
break
if post_is_candidate_by_file_char_match_in_comment_scope : break
if post_is_candidate_by_file_char_match_in_comment_scope : break
self . logger ( f " [Char Scope: Comments] Phase 1 Result: post_is_candidate_by_file_char_match_in_comment_scope = { post_is_candidate_by_file_char_match_in_comment_scope } " )
2025-08-07 21:42:04 -07:00
if current_character_filters and self . char_filter_scope == CHAR_SCOPE_COMMENTS and self . service != ' discord ' :
2025-07-15 06:54:31 -07:00
if not post_is_candidate_by_file_char_match_in_comment_scope :
if self . _check_pause ( f " Comment check for post { post_id } " ) :
result_tuple = ( 0 , num_potential_files_in_post , [ ] , [ ] , [ ] , None , None )
2025-10-08 17:02:46 +05:30
self . _emit_signal ( ' worker_finished ' , result_tuple )
2025-07-15 06:54:31 -07:00
return result_tuple
self . logger ( f " [Char Scope: Comments] Phase 2: No file match found. Checking post comments for post ID ' { post_id } ' . " )
try :
parsed_input_url_for_comments = urlparse ( self . api_url_input )
api_domain_for_comments = parsed_input_url_for_comments . netloc
2025-07-30 19:30:50 -07:00
if not any ( d in api_domain_for_comments . lower ( ) for d in [ ' kemono.su ' , ' kemono.party ' , ' kemono.cr ' , ' coomer.su ' , ' coomer.party ' , ' coomer.st ' ] ) :
2025-07-15 06:54:31 -07:00
self . logger ( f " ⚠️ Unrecognized domain ' { api_domain_for_comments } ' for comment API. Defaulting based on service. " )
2025-07-30 19:30:50 -07:00
api_domain_for_comments = " kemono.cr " if " kemono " in self . service . lower ( ) else " coomer.st "
2025-07-15 06:54:31 -07:00
comments_data = fetch_post_comments (
api_domain_for_comments , self . service , self . user_id , post_id ,
headers , self . logger , self . cancellation_event , self . pause_event ,
cookies_dict = prepare_cookies_for_request (
self . use_cookie , self . cookie_text , self . selected_cookie_file , self . app_base_dir , self . logger
2025-06-04 04:01:01 +01:00
)
2025-07-14 20:17:48 -07:00
)
2025-07-15 06:54:31 -07:00
if comments_data :
self . logger ( f " Fetched { len ( comments_data ) } comments for post { post_id } . " )
for comment_item_idx , comment_item in enumerate ( comments_data ) :
if self . check_cancel ( ) : break
raw_comment_content = comment_item . get ( ' content ' , ' ' )
if not raw_comment_content : continue
cleaned_comment_text = strip_html_tags ( raw_comment_content )
if not cleaned_comment_text . strip ( ) : continue
for filter_item_obj in current_character_filters :
terms_to_check_comment = list ( filter_item_obj [ " aliases " ] )
if filter_item_obj [ " is_group " ] and filter_item_obj [ " name " ] not in terms_to_check_comment :
terms_to_check_comment . append ( filter_item_obj [ " name " ] )
for term_to_match_comment in terms_to_check_comment :
if is_title_match_for_character ( cleaned_comment_text , term_to_match_comment ) :
post_is_candidate_by_comment_char_match = True
char_filter_that_matched_comment = filter_item_obj
self . logger ( f " Match Found (Comment in Comments Scope): Comment in post { post_id } matches char filter term ' { term_to_match_comment } ' (from group/name ' { filter_item_obj [ ' name ' ] } ' ). Post is candidate. " )
self . logger ( f " Matching comment (first 100 chars): ' { cleaned_comment_text [ : 100 ] } ... ' " )
break
if post_is_candidate_by_comment_char_match : break
if post_is_candidate_by_comment_char_match : break
else :
self . logger ( f " No comments found or fetched for post { post_id } to check against character filters. " )
except RuntimeError as e_fetch_comment :
self . logger ( f " ⚠️ Error fetching or processing comments for post { post_id } : { e_fetch_comment } " )
except Exception as e_generic_comment :
self . logger ( f " ❌ Unexpected error during comment processing for post { post_id } : { e_generic_comment } \n { traceback . format_exc ( limit = 2 ) } " )
self . logger ( f " [Char Scope: Comments] Phase 2 Result: post_is_candidate_by_comment_char_match = { post_is_candidate_by_comment_char_match } " )
2025-07-14 08:19:58 -07:00
else :
2025-07-15 06:54:31 -07:00
self . logger ( f " [Char Scope: Comments] Phase 2: Skipped comment check for post ID ' { post_id } ' because a file match already made it a candidate. " )
if current_character_filters :
if self . char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match :
self . logger ( f " -> Skip Post (Scope: Title - No Char Match): Title ' { post_title [ : 50 ] } ' does not match character filters. " )
self . _emit_signal ( ' missed_character_post ' , post_title , " No title match for character filter " )
result_tuple = ( 0 , num_potential_files_in_post , [ ] , [ ] , [ ] , None , None )
2025-10-08 17:02:46 +05:30
self . _emit_signal ( ' worker_finished ' , result_tuple )
2025-07-15 06:54:31 -07:00
return result_tuple
if self . char_filter_scope == CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match :
self . logger ( f " -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID ' { post_id } ' , Title ' { post_title [ : 50 ] } ... ' " )
if self . emitter and hasattr ( self . emitter , ' missed_character_post_signal ' ) :
self . _emit_signal ( ' missed_character_post ' , post_title , " No character match in files or comments (Comments scope) " )
result_tuple = ( 0 , num_potential_files_in_post , [ ] , [ ] , [ ] , None , None )
2025-10-08 17:02:46 +05:30
self . _emit_signal ( ' worker_finished ' , result_tuple )
2025-07-15 06:54:31 -07:00
return result_tuple
if self . filter_mode == ' text_only ' and not self . extract_links_only :
self . logger ( f " Mode: Text Only (Scope: { self . text_only_scope } ) " )
post_title_lower = post_title . lower ( )
if self . skip_words_list and ( self . skip_words_scope == SKIP_SCOPE_POSTS or self . skip_words_scope == SKIP_SCOPE_BOTH ) :
for skip_word in self . skip_words_list :
if skip_word . lower ( ) in post_title_lower :
self . logger ( f " -> Skip Post (Keyword in Title ' { skip_word } ' ): ' { post_title [ : 50 ] } ... ' . " )
result_tuple = ( 0 , num_potential_files_in_post , [ ] , [ ] , [ ] , None , None )
2025-10-08 17:02:46 +05:30
self . _emit_signal ( ' worker_finished ' , result_tuple )
2025-07-15 06:54:31 -07:00
return result_tuple
if current_character_filters and not post_is_candidate_by_title_char_match and not post_is_candidate_by_comment_char_match and not post_is_candidate_by_file_char_match_in_comment_scope :
self . logger ( f " -> Skip Post (No character match for text extraction): ' { post_title [ : 50 ] } ... ' . " )
result_tuple = ( 0 , num_potential_files_in_post , [ ] , [ ] , [ ] , None , None )
2025-10-08 17:02:46 +05:30
self . _emit_signal ( ' worker_finished ' , result_tuple )
2025-07-15 06:54:31 -07:00
return result_tuple
raw_text_content = " "
2025-07-19 03:28:32 -07:00
comments_data = [ ]
2025-07-15 06:54:31 -07:00
final_post_data = post_data
2025-07-19 03:28:32 -07:00
2025-07-15 06:54:31 -07:00
if self . text_only_scope == ' content ' and ' content ' not in final_post_data :
self . logger ( f " Post { post_id } is missing ' content ' field, fetching full data... " )
parsed_url = urlparse ( self . api_url_input )
api_domain = parsed_url . netloc
cookies = prepare_cookies_for_request ( self . use_cookie , self . cookie_text , self . selected_cookie_file , self . app_base_dir , self . logger , target_domain = api_domain )
full_data = fetch_single_post_data ( api_domain , self . service , self . user_id , post_id , headers , self . logger , cookies_dict = cookies )
if full_data :
final_post_data = full_data
2025-07-19 03:28:32 -07:00
2025-07-15 06:54:31 -07:00
if self . text_only_scope == ' content ' :
raw_text_content = final_post_data . get ( ' content ' , ' ' )
elif self . text_only_scope == ' comments ' :
try :
parsed_url = urlparse ( self . api_url_input )
api_domain = parsed_url . netloc
comments_data = fetch_post_comments ( api_domain , self . service , self . user_id , post_id , headers , self . logger , self . cancellation_event , self . pause_event )
if comments_data :
comment_texts = [ ]
for comment in comments_data :
2025-07-19 03:28:32 -07:00
user = comment . get ( ' commenter_name ' , ' Unknown User ' )
timestamp = comment . get ( ' published ' , ' No Date ' )
2025-07-15 06:54:31 -07:00
body = strip_html_tags ( comment . get ( ' content ' , ' ' ) )
comment_texts . append ( f " --- Comment by { user } on { timestamp } --- \n { body } \n " )
raw_text_content = " \n " . join ( comment_texts )
2025-07-19 03:28:32 -07:00
else :
raw_text_content = " "
2025-07-15 06:54:31 -07:00
except Exception as e :
self . logger ( f " ❌ Error fetching comments for text-only mode: { e } " )
2025-07-19 03:28:32 -07:00
cleaned_text = " "
if self . text_only_scope == ' content ' :
if not raw_text_content :
cleaned_text = " "
else :
text_with_newlines = re . sub ( r ' (?i)</p>|<br \ s*/?> ' , ' \n ' , raw_text_content )
just_text = re . sub ( r ' <.*?> ' , ' ' , text_with_newlines )
cleaned_text = html . unescape ( just_text ) . strip ( )
else :
cleaned_text = raw_text_content
cleaned_text = cleaned_text . replace ( ' … ' , ' ... ' )
if not cleaned_text . strip ( ) :
2025-07-15 06:54:31 -07:00
self . logger ( " -> Skip Saving Text: No content/comments found or fetched. " )
result_tuple = ( 0 , num_potential_files_in_post , [ ] , [ ] , [ ] , None , None )
2025-10-08 17:02:46 +05:30
self . _emit_signal ( ' worker_finished ' , result_tuple )
2025-07-15 06:54:31 -07:00
return result_tuple
if self . single_pdf_mode :
content_data = {
' title ' : post_title ,
' published ' : self . post . get ( ' published ' ) or self . post . get ( ' added ' )
}
2025-07-19 03:28:32 -07:00
if self . text_only_scope == ' comments ' :
2025-10-08 17:02:46 +05:30
if not comments_data :
result_tuple = ( 0 , 0 , [ ] , [ ] , [ ] , None , None )
self . _emit_signal ( ' worker_finished ' , result_tuple )
return result_tuple
2025-07-19 03:28:32 -07:00
content_data [ ' comments ' ] = comments_data
else :
2025-10-08 17:02:46 +05:30
if not cleaned_text . strip ( ) :
result_tuple = ( 0 , 0 , [ ] , [ ] , [ ] , None , None )
self . _emit_signal ( ' worker_finished ' , result_tuple )
return result_tuple
2025-07-19 03:28:32 -07:00
content_data [ ' content ' ] = cleaned_text
2025-07-15 06:54:31 -07:00
temp_dir = os . path . join ( self . app_base_dir , " appdata " )
os . makedirs ( temp_dir , exist_ok = True )
temp_filename = f " tmp_ { post_id } _ { uuid . uuid4 ( ) . hex [ : 8 ] } .json "
temp_filepath = os . path . join ( temp_dir , temp_filename )
try :
with open ( temp_filepath , ' w ' , encoding = ' utf-8 ' ) as f :
json . dump ( content_data , f , indent = 2 )
2025-07-19 03:28:32 -07:00
self . logger ( f " Saved temporary data for ' { post_title } ' for single PDF compilation. " )
2025-10-08 17:02:46 +05:30
result_tuple = ( 0 , 0 , [ ] , [ ] , [ ] , None , temp_filepath )
self . _emit_signal ( ' worker_finished ' , result_tuple )
return result_tuple
2025-07-15 06:54:31 -07:00
except Exception as e :
self . logger ( f " ❌ Failed to write temporary file for single PDF: { e } " )
2025-10-08 17:02:46 +05:30
result_tuple = ( 0 , 0 , [ ] , [ ] , [ ] , None , None )
self . _emit_signal ( ' worker_finished ' , result_tuple )
return result_tuple
2025-07-15 06:54:31 -07:00
else :
file_extension = self . text_export_format
txt_filename = clean_filename ( post_title ) + f " . { file_extension } "
final_save_path = os . path . join ( determined_post_save_path_for_history , txt_filename )
try :
os . makedirs ( determined_post_save_path_for_history , exist_ok = True )
base , ext = os . path . splitext ( final_save_path )
counter = 1
while os . path . exists ( final_save_path ) :
final_save_path = f " { base } _ { counter } { ext } "
counter + = 1
2025-07-19 03:28:32 -07:00
2025-07-15 06:54:31 -07:00
if file_extension == ' pdf ' :
if FPDF :
2025-07-19 03:28:32 -07:00
self . logger ( f " Creating formatted PDF for { ' comments ' if self . text_only_scope == ' comments ' else ' content ' } ... " )
2025-07-15 06:54:31 -07:00
pdf = PDF ( )
2025-10-08 17:02:46 +05:30
base_path = self . project_root_dir
2025-07-15 06:54:31 -07:00
font_path = " "
2025-07-19 03:28:32 -07:00
bold_font_path = " "
2025-10-08 17:02:46 +05:30
2025-07-27 06:32:15 -07:00
if base_path :
font_path = os . path . join ( base_path , ' data ' , ' dejavu-sans ' , ' DejaVuSans.ttf ' )
bold_font_path = os . path . join ( base_path , ' data ' , ' dejavu-sans ' , ' DejaVuSans-Bold.ttf ' )
2025-07-19 03:28:32 -07:00
2025-07-15 06:54:31 -07:00
try :
if not os . path . exists ( font_path ) : raise RuntimeError ( f " Font file not found: { font_path } " )
2025-07-19 03:28:32 -07:00
if not os . path . exists ( bold_font_path ) : raise RuntimeError ( f " Bold font file not found: { bold_font_path } " )
2025-07-15 06:54:31 -07:00
pdf . add_font ( ' DejaVu ' , ' ' , font_path , uni = True )
2025-07-19 03:28:32 -07:00
pdf . add_font ( ' DejaVu ' , ' B ' , bold_font_path , uni = True )
default_font_family = ' DejaVu '
2025-07-15 06:54:31 -07:00
except Exception as font_error :
self . logger ( f " ⚠️ Could not load DejaVu font: { font_error } . Falling back to Arial. " )
2025-07-19 03:28:32 -07:00
default_font_family = ' Arial '
2025-07-15 06:54:31 -07:00
pdf . add_page ( )
2025-07-19 03:28:32 -07:00
pdf . set_font ( default_font_family , ' B ' , 16 )
pdf . multi_cell ( 0 , 10 , post_title )
pdf . ln ( 10 )
if self . text_only_scope == ' comments ' :
if not comments_data :
self . logger ( " -> Skip PDF Creation: No comments to process. " )
2025-10-08 17:02:46 +05:30
result_tuple = ( 0 , num_potential_files_in_post , [ ] , [ ] , [ ] , None , None )
self . _emit_signal ( ' worker_finished ' , result_tuple )
return result_tuple
2025-07-19 03:28:32 -07:00
for i , comment in enumerate ( comments_data ) :
user = comment . get ( ' commenter_name ' , ' Unknown User ' )
timestamp = comment . get ( ' published ' , ' No Date ' )
body = strip_html_tags ( comment . get ( ' content ' , ' ' ) )
pdf . set_font ( default_font_family , ' ' , 10 )
pdf . write ( 8 , " Comment by: " )
pdf . set_font ( default_font_family , ' B ' , 10 )
pdf . write ( 8 , user )
pdf . set_font ( default_font_family , ' ' , 10 )
pdf . write ( 8 , f " on { timestamp } " )
pdf . ln ( 10 )
pdf . set_font ( default_font_family , ' ' , 11 )
pdf . multi_cell ( 0 , 7 , body )
if i < len ( comments_data ) - 1 :
pdf . ln ( 5 )
pdf . cell ( 0 , 0 , ' ' , border = ' T ' )
pdf . ln ( 5 )
else :
pdf . set_font ( default_font_family , ' ' , 12 )
pdf . multi_cell ( 0 , 7 , cleaned_text )
2025-07-15 06:54:31 -07:00
pdf . output ( final_save_path )
else :
self . logger ( f " ⚠️ Cannot create PDF: ' fpdf2 ' library not installed. Saving as .txt. " )
final_save_path = os . path . splitext ( final_save_path ) [ 0 ] + " .txt "
with open ( final_save_path , ' w ' , encoding = ' utf-8 ' ) as f : f . write ( cleaned_text )
2025-07-19 03:28:32 -07:00
2025-07-15 06:54:31 -07:00
elif file_extension == ' docx ' :
if Document :
self . logger ( f " Converting to DOCX... " )
document = Document ( )
document . add_paragraph ( cleaned_text )
document . save ( final_save_path )
else :
self . logger ( f " ⚠️ Cannot create DOCX: ' python-docx ' library not installed. Saving as .txt. " )
final_save_path = os . path . splitext ( final_save_path ) [ 0 ] + " .txt "
with open ( final_save_path , ' w ' , encoding = ' utf-8 ' ) as f : f . write ( cleaned_text )
2025-07-19 03:28:32 -07:00
else : # TXT file
2025-07-15 06:54:31 -07:00
with open ( final_save_path , ' w ' , encoding = ' utf-8 ' ) as f :
f . write ( cleaned_text )
2025-07-19 03:28:32 -07:00
2025-07-15 06:54:31 -07:00
self . logger ( f " ✅ Saved Text: ' { os . path . basename ( final_save_path ) } ' in ' { os . path . basename ( determined_post_save_path_for_history ) } ' " )
result_tuple = ( 1 , num_potential_files_in_post , [ ] , [ ] , [ ] , history_data_for_this_post , None )
2025-10-08 17:02:46 +05:30
self . _emit_signal ( ' worker_finished ' , result_tuple )
2025-07-15 06:54:31 -07:00
return result_tuple
2025-07-19 03:28:32 -07:00
2025-07-15 06:54:31 -07:00
except Exception as e :
self . logger ( f " ❌ Critical error saving text file ' { txt_filename } ' : { e } " )
result_tuple = ( 0 , num_potential_files_in_post , [ ] , [ ] , [ ] , None , None )
2025-10-08 17:02:46 +05:30
self . _emit_signal ( ' worker_finished ' , result_tuple )
2025-07-15 06:54:31 -07:00
return result_tuple
2025-10-08 17:02:46 +05:30
if not self . extract_links_only and self . manga_mode_active and current_character_filters and ( self . char_filter_scope == CHAR_SCOPE_TITLE or self . char_filter_scope == CHAR_SCOPE_BOTH ) and not post_is_candidate_by_title_char_match :
self . logger ( f " -> Skip Post (Renaming Mode with Title/Both Scope - No Title Char Match): Title ' { post_title [ : 50 ] } ' doesn ' t match filters. " )
self . _emit_signal ( ' missed_character_post ' , post_title , " Renaming Mode: No title match for character filter (Title/Both scope) " )
result_tuple = ( 0 , num_potential_files_in_post , [ ] , [ ] , [ ] , None , None )
self . _emit_signal ( ' worker_finished ' , result_tuple )
return result_tuple
if not isinstance ( post_attachments , list ) :
self . logger ( f " ⚠️ Corrupt attachment data for post { post_id } (expected list, got { type ( post_attachments ) } ). Skipping attachments. " )
post_attachments = [ ]
should_create_post_subfolder = self . use_post_subfolders
if ( not self . use_post_subfolders and self . use_subfolders and
self . sfp_threshold is not None and num_potential_files_in_post > = self . sfp_threshold ) :
self . logger ( f " ℹ ️ Post has { num_potential_files_in_post } files (≥ { self . sfp_threshold } ). Activating Subfolder per Post via [sfp] command. " )
should_create_post_subfolder = True
base_folder_names_for_post_content = [ ]
determined_post_save_path_for_history = self . override_output_dir if self . override_output_dir else self . download_root
if not self . extract_links_only and self . use_subfolders :
if self . _check_pause ( f " Subfolder determination for post { post_id } " ) :
result_tuple = ( 0 , num_potential_files_in_post , [ ] , [ ] , [ ] , None , None )
self . _emit_signal ( ' worker_finished ' , result_tuple )
return result_tuple
primary_char_filter_for_folder = None
log_reason_for_folder = " "
known_name_match_found = False
if self . char_filter_scope == CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment :
if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope :
primary_char_filter_for_folder = char_filter_that_matched_file_in_comment_scope
log_reason_for_folder = " Matched char filter in filename (Comments scope) "
elif post_is_candidate_by_comment_char_match and char_filter_that_matched_comment :
primary_char_filter_for_folder = char_filter_that_matched_comment
log_reason_for_folder = " Matched char filter in comments (Comments scope, no file match) "
elif ( self . char_filter_scope == CHAR_SCOPE_TITLE or self . char_filter_scope == CHAR_SCOPE_BOTH ) and char_filter_that_matched_title :
primary_char_filter_for_folder = char_filter_that_matched_title
log_reason_for_folder = " Matched char filter in title "
if primary_char_filter_for_folder :
known_name_match_found = True
cleaned_primary_folder_name = clean_folder_name ( primary_char_filter_for_folder [ " name " ] )
if cleaned_primary_folder_name . lower ( ) in effective_unwanted_keywords_for_folder_naming and cleaned_primary_folder_name . lower ( ) != " untitled_folder " :
self . logger ( f " ⚠️ Primary char filter folder name ' { cleaned_primary_folder_name } ' is in ignore list. Using generic name. " )
base_folder_names_for_post_content = [ " Generic Post Content " ]
else :
base_folder_names_for_post_content = [ cleaned_primary_folder_name ]
self . logger ( f " Base folder name(s) for post content ( { log_reason_for_folder } ): { ' , ' . join ( base_folder_names_for_post_content ) } " )
elif not current_character_filters :
derived_folders_from_title_via_known_txt = match_folders_from_title (
post_title , self . known_names , effective_unwanted_keywords_for_folder_naming
)
valid_derived_folders_from_title_known_txt = [ name for name in derived_folders_from_title_via_known_txt if name and name . strip ( ) and name . lower ( ) != " untitled_folder " ]
if valid_derived_folders_from_title_known_txt :
known_name_match_found = True
first_match = valid_derived_folders_from_title_known_txt [ 0 ]
base_folder_names_for_post_content . append ( first_match )
self . logger ( f " Base folder name for post content (First match from Known.txt & Title): ' { first_match } ' " )
elif self . char_filter_scope == CHAR_SCOPE_BOTH :
self . logger ( f " -> No folder match from title for post ' { post_id } ' . Checking filenames (Scope: Both)... " )
for file_info_for_fallback in all_files_from_post_api_for_char_check :
filename_for_fallback = file_info_for_fallback . get ( ' _original_name_for_log ' )
if not filename_for_fallback :
continue
matched_folders_from_filename = match_folders_from_filename_enhanced (
filename_for_fallback , self . known_names , effective_unwanted_keywords_for_folder_naming
)
if matched_folders_from_filename :
known_name_match_found = True
first_match = matched_folders_from_filename [ 0 ]
base_folder_names_for_post_content . append ( first_match )
self . logger ( f " Base folder name for post content (First match from Known.txt & Filename ' { filename_for_fallback } ' ): ' { first_match } ' " )
break
if self . handle_unknown_mode and not known_name_match_found :
self . logger ( f " ℹ ️ [unknown] mode: No match in Known.txt. Creating parent folder from post title ' { post_title } ' . " )
post_title_as_folder = robust_clean_name ( post_title )
base_folder_names_for_post_content = [ post_title_as_folder ]
should_create_post_subfolder = False
else :
if not known_name_match_found :
extracted_name_from_title_full_ignore = extract_folder_name_from_title (
post_title , effective_unwanted_keywords_for_folder_naming
)
base_folder_names_for_post_content . append ( extracted_name_from_title_full_ignore )
self . logger ( f " Base folder name(s) for post content (Generic title parsing): { ' , ' . join ( base_folder_names_for_post_content ) } " )
if base_folder_names_for_post_content :
determined_post_save_path_for_history = os . path . join ( determined_post_save_path_for_history , base_folder_names_for_post_content [ 0 ] )
if not self . extract_links_only and should_create_post_subfolder :
cleaned_post_title_for_sub = robust_clean_name ( post_title )
max_folder_len = 100
if len ( cleaned_post_title_for_sub ) > max_folder_len :
cleaned_post_title_for_sub = cleaned_post_title_for_sub [ : max_folder_len ] . strip ( )
post_id_for_fallback = self . post . get ( ' id ' , ' unknown_id ' )
if not cleaned_post_title_for_sub or cleaned_post_title_for_sub == " untitled_folder " :
self . logger ( f " ⚠️ Post title ' { post_title } ' resulted in a generic subfolder name. Using ' post_ { post_id_for_fallback } ' as base. " )
original_cleaned_post_title_for_sub = f " post_ { post_id_for_fallback } "
else :
original_cleaned_post_title_for_sub = cleaned_post_title_for_sub
if self . use_date_prefix_for_subfolder :
published_date_str = self . post . get ( ' published ' ) or self . post . get ( ' added ' )
post_id_for_format = str ( self . post . get ( ' id ' , ' ' ) ) # Get the post ID
# Start with the user's format string from settings
final_subfolder_name = self . date_prefix_format
# 1. Replace date placeholders if a date is available
if published_date_str :
try :
dt_obj = datetime . fromisoformat ( published_date_str )
final_subfolder_name = final_subfolder_name . replace ( " YYYY " , dt_obj . strftime ( " % Y " ) )
final_subfolder_name = final_subfolder_name . replace ( " MM " , dt_obj . strftime ( " % m " ) )
final_subfolder_name = final_subfolder_name . replace ( " DD " , dt_obj . strftime ( " %d " ) )
except ( ValueError , TypeError ) as e :
self . logger ( f " ⚠️ Could not parse date ' { published_date_str } ' . Date placeholders will be skipped. Error: { e } " )
# 2. Perform case-insensitive replacement for {post} and {postid}
final_subfolder_name = re . sub ( r ' {post} ' , original_cleaned_post_title_for_sub , final_subfolder_name , flags = re . IGNORECASE )
final_subfolder_name = re . sub ( r ' {postid} ' , post_id_for_format , final_subfolder_name , flags = re . IGNORECASE )
# 3. The result of all replacements becomes the new folder name
original_cleaned_post_title_for_sub = final_subfolder_name . strip ( )
self . logger ( f " ℹ ️ Applying custom subfolder format: ' { original_cleaned_post_title_for_sub } ' " )
base_path_for_post_subfolder = determined_post_save_path_for_history
suffix_counter = 0
final_post_subfolder_name = " "
suffix_counter = 0
folder_creation_successful = False
final_post_subfolder_name = " "
post_id_for_folder = str ( self . post . get ( ' id ' , ' unknown_id ' ) )
while not folder_creation_successful :
if suffix_counter == 0 :
name_candidate = original_cleaned_post_title_for_sub
else :
name_candidate = f " { original_cleaned_post_title_for_sub } _ { suffix_counter } "
potential_post_subfolder_path = os . path . join ( base_path_for_post_subfolder , name_candidate )
id_file_path = os . path . join ( potential_post_subfolder_path , f " .postid_ { post_id_for_folder } " )
if not os . path . isdir ( potential_post_subfolder_path ) :
try :
os . makedirs ( potential_post_subfolder_path )
with open ( id_file_path , ' w ' ) as f :
f . write ( post_id_for_folder )
final_post_subfolder_name = name_candidate
folder_creation_successful = True
if suffix_counter > 0 :
self . logger ( f " Post subfolder name conflict: Using ' { final_post_subfolder_name } ' to avoid mixing posts. " )
except OSError as e_mkdir :
self . logger ( f " ❌ Error creating directory ' { potential_post_subfolder_path } ' : { e_mkdir } . " )
final_post_subfolder_name = original_cleaned_post_title_for_sub
break
else :
if os . path . exists ( id_file_path ) :
self . logger ( f " ℹ ️ Re-using existing post subfolder: ' { name_candidate } ' " )
final_post_subfolder_name = name_candidate
folder_creation_successful = True
else :
suffix_counter + = 1
if suffix_counter > 100 : # Safety break
self . logger ( f " ⚠️ Exceeded 100 attempts to find unique subfolder for ' { original_cleaned_post_title_for_sub } ' . " )
final_post_subfolder_name = f " { original_cleaned_post_title_for_sub } _ { uuid . uuid4 ( ) . hex [ : 8 ] } "
os . makedirs ( os . path . join ( base_path_for_post_subfolder , final_post_subfolder_name ) , exist_ok = True )
break
determined_post_save_path_for_history = os . path . join ( base_path_for_post_subfolder , final_post_subfolder_name )
if self . skip_words_list and ( self . skip_words_scope == SKIP_SCOPE_POSTS or self . skip_words_scope == SKIP_SCOPE_BOTH ) :
if self . _check_pause ( f " Skip words (post title) for post { post_id } " ) :
result_tuple = ( 0 , num_potential_files_in_post , [ ] , [ ] , [ ] , None , None )
self . _emit_signal ( ' worker_finished ' , result_tuple )
return result_tuple
post_title_lower = post_title . lower ( )
for skip_word in self . skip_words_list :
if skip_word . lower ( ) in post_title_lower :
self . logger ( f " -> Skip Post (Keyword in Title ' { skip_word } ' ): ' { post_title [ : 50 ] } ... ' . Scope: { self . skip_words_scope } " )
history_data_for_skipped_post = {
' post_id ' : post_id , ' service ' : self . service , ' user_id ' : self . user_id , ' post_title ' : post_title ,
' top_file_name ' : " N/A (Post Skipped) " , ' num_files ' : num_potential_files_in_post ,
' upload_date_str ' : post_data . get ( ' published ' ) or post_data . get ( ' added ' ) or " Unknown " ,
' download_location ' : determined_post_save_path_for_history
}
result_tuple = ( 0 , num_potential_files_in_post , [ ] , [ ] , [ ] , history_data_for_skipped_post , None )
self . _emit_signal ( ' worker_finished ' , result_tuple )
return result_tuple
2025-07-15 06:54:31 -07:00
if not self . extract_links_only and self . use_subfolders and self . skip_words_list :
if self . _check_pause ( f " Folder keyword skip check for post { post_id } " ) :
result_tuple = ( 0 , num_potential_files_in_post , [ ] , [ ] , [ ] , None , None )
return result_tuple
for folder_name_to_check in base_folder_names_for_post_content :
if not folder_name_to_check : continue
if any ( skip_word . lower ( ) in folder_name_to_check . lower ( ) for skip_word in self . skip_words_list ) :
matched_skip = next ( ( sw for sw in self . skip_words_list if sw . lower ( ) in folder_name_to_check . lower ( ) ) , " unknown_skip_word " )
self . logger ( f " -> Skip Post (Folder Keyword): Potential folder ' { folder_name_to_check } ' contains ' { matched_skip } ' . " )
result_tuple = ( 0 , num_potential_files_in_post , [ ] , [ ] , [ ] , None , None )
return result_tuple
if self . extract_links_only :
self . logger ( f " Extract Links Only mode: Finished processing post { post_id } for links. " )
result_tuple = ( 0 , 0 , [ ] , [ ] , [ ] , None , None )
2025-10-08 17:02:46 +05:30
self . _emit_signal ( ' worker_finished ' , result_tuple )
2025-07-15 06:54:31 -07:00
return result_tuple
all_files_from_post_api = [ ]
api_file_domain = urlparse ( self . api_url_input ) . netloc
2025-07-27 07:44:14 -07:00
if not api_file_domain or not any ( d in api_file_domain . lower ( ) for d in [ ' kemono.su ' , ' kemono.party ' , ' kemono.cr ' , ' coomer.su ' , ' coomer.party ' , ' coomer.st ' ] ) :
2025-07-30 19:30:50 -07:00
api_file_domain = " kemono.cr " if " kemono " in self . service . lower ( ) else " coomer.st "
2025-07-15 06:54:31 -07:00
if post_main_file_info and isinstance ( post_main_file_info , dict ) and post_main_file_info . get ( ' path ' ) :
file_path = post_main_file_info [ ' path ' ] . lstrip ( ' / ' )
original_api_name = post_main_file_info . get ( ' name ' ) or os . path . basename ( file_path )
if original_api_name :
all_files_from_post_api . append ( {
' url ' : f " https:// { api_file_domain } { file_path } " if file_path . startswith ( ' / ' ) else f " https:// { api_file_domain } /data/ { file_path } " ,
' name ' : original_api_name ,
' _original_name_for_log ' : original_api_name ,
' _is_thumbnail ' : is_image ( original_api_name )
2025-05-08 19:49:50 +05:30
} )
2025-07-15 06:54:31 -07:00
else :
self . logger ( f " ⚠️ Skipping main file for post { post_id } : Missing name (Path: { file_path } ) " )
for idx , att_info in enumerate ( post_attachments ) :
if isinstance ( att_info , dict ) and att_info . get ( ' path ' ) :
att_path = att_info [ ' path ' ] . lstrip ( ' / ' )
original_api_att_name = att_info . get ( ' name ' ) or os . path . basename ( att_path )
if original_api_att_name :
all_files_from_post_api . append ( {
' url ' : f " https:// { api_file_domain } { att_path } " if att_path . startswith ( ' / ' ) else f " https:// { api_file_domain } /data/ { att_path } " ,
' name ' : original_api_att_name ,
' _original_name_for_log ' : original_api_att_name ,
' _is_thumbnail ' : is_image ( original_api_att_name )
2025-05-26 09:48:00 +05:30
} )
2025-07-15 06:54:31 -07:00
else :
self . logger ( f " ⚠️ Skipping attachment { idx + 1 } for post { post_id } : Missing name (Path: { att_path } ) " )
2025-07-14 20:17:48 -07:00
else :
2025-07-15 06:54:31 -07:00
self . logger ( f " ⚠️ Skipping invalid attachment { idx + 1 } for post { post_id } : { str ( att_info ) [ : 100 ] } " )
2025-07-16 09:02:47 -07:00
if self . keep_duplicates_mode == DUPLICATE_HANDLING_HASH :
unique_files_by_url = { }
for file_info in all_files_from_post_api :
file_url = file_info . get ( ' url ' )
if file_url and file_url not in unique_files_by_url :
unique_files_by_url [ file_url ] = file_info
original_count = len ( all_files_from_post_api )
all_files_from_post_api = list ( unique_files_by_url . values ( ) )
new_count = len ( all_files_from_post_api )
if new_count < original_count :
self . logger ( f " De-duplicated file list: Removed { original_count - new_count } redundant entries from the API response. " )
2025-07-15 06:54:31 -07:00
if self . scan_content_for_images and post_content_html and not self . extract_links_only :
self . logger ( f " Scanning post content for additional image URLs (Post ID: { post_id } )... " )
parsed_input_url = urlparse ( self . api_url_input )
base_url_for_relative_paths = f " { parsed_input_url . scheme } :// { parsed_input_url . netloc } "
img_ext_pattern = " | " . join ( ext . lstrip ( ' . ' ) for ext in IMAGE_EXTENSIONS )
direct_url_pattern_str = r """ (?i) \ b(https?://[^ \ s " ' <> \ [ \ ] \ { \ } \ | \ ^ \ \ ^~ \ [ \ ]`]+ \ .(?: """ + img_ext_pattern + r """ )) \ b """
img_tag_src_pattern_str = r """ <img \ s+[^>]*?src \ s*= \ s*[ " ' ]([^ " ' ]+)[ " ' ] """
found_image_sources = set ( )
for direct_url_match in re . finditer ( direct_url_pattern_str , post_content_html ) :
found_image_sources . add ( direct_url_match . group ( 1 ) )
for img_tag_match in re . finditer ( img_tag_src_pattern_str , post_content_html , re . IGNORECASE ) :
src_attr = img_tag_match . group ( 1 ) . strip ( )
src_attr = html . unescape ( src_attr )
if not src_attr : continue
resolved_src_url = " "
if src_attr . startswith ( ( ' http:// ' , ' https:// ' ) ) :
resolved_src_url = src_attr
elif src_attr . startswith ( ' // ' ) :
resolved_src_url = f " { parsed_input_url . scheme } : { src_attr } "
elif src_attr . startswith ( ' / ' ) :
resolved_src_url = f " { base_url_for_relative_paths } { src_attr } "
if resolved_src_url :
parsed_resolved_url = urlparse ( resolved_src_url )
if any ( parsed_resolved_url . path . lower ( ) . endswith ( ext ) for ext in IMAGE_EXTENSIONS ) :
found_image_sources . add ( resolved_src_url )
if found_image_sources :
self . logger ( f " Found { len ( found_image_sources ) } potential image URLs/sources in content. " )
existing_urls_in_api_list = { f_info [ ' url ' ] for f_info in all_files_from_post_api }
for found_url in found_image_sources :
if self . check_cancel ( ) : break
if found_url in existing_urls_in_api_list :
self . logger ( f " Skipping URL from content (already in API list or previously added from content): { found_url [ : 70 ] } ... " )
continue
try :
parsed_found_url = urlparse ( found_url )
url_filename = os . path . basename ( parsed_found_url . path )
if not url_filename or not is_image ( url_filename ) :
self . logger ( f " Skipping URL from content (no filename part or not an image extension): { found_url [ : 70 ] } ... " )
continue
self . logger ( f " Adding image from content: { url_filename } (URL: { found_url [ : 70 ] } ...) " )
all_files_from_post_api . append ( {
' url ' : found_url ,
' name ' : url_filename ,
' _original_name_for_log ' : url_filename ,
' _is_thumbnail ' : False ,
' _from_content_scan ' : True
} )
existing_urls_in_api_list . add ( found_url )
except Exception as e_url_parse :
self . logger ( f " Error processing URL from content ' { found_url [ : 70 ] } ... ' : { e_url_parse } " )
else :
self . logger ( f " No additional image URLs found in post content scan for post { post_id } . " )
2025-10-08 17:02:46 +05:30
if self . archive_only_mode and not self . extract_links_only :
has_archive = any ( is_archive ( f . get ( ' _original_name_for_log ' , ' ' ) ) for f in all_files_from_post_api )
if has_archive and len ( all_files_from_post_api ) > 1 :
self . logger ( f " [AO] Archive found in post { post_id } . Prioritizing archive and skipping other files. " )
archives_only = [ f for f in all_files_from_post_api if is_archive ( f . get ( ' _original_name_for_log ' , ' ' ) ) ]
if archives_only :
all_files_from_post_api = archives_only
else :
self . logger ( f " [AO] Warning: has_archive was true, but no archives found after filtering for post { post_id } . This is unexpected. " )
2025-07-15 06:54:31 -07:00
if self . manga_mode_active and self . manga_filename_style == STYLE_DATE_BASED :
def natural_sort_key_for_files ( file_api_info ) :
name = file_api_info . get ( ' _original_name_for_log ' , ' ' ) . lower ( )
return [ int ( text ) if text . isdigit ( ) else text for text in re . split ( ' ([0-9]+) ' , name ) ]
all_files_from_post_api . sort ( key = natural_sort_key_for_files )
self . logger ( f " Manga Date Mode: Sorted { len ( all_files_from_post_api ) } files within post { post_id } by original name for sequential numbering. " )
if not all_files_from_post_api :
self . logger ( f " No files found to download for post { post_id } . " )
2025-07-22 07:00:34 -07:00
history_data_for_no_files_post = {
' post_title ' : post_title ,
' post_id ' : post_id ,
' service ' : self . service ,
' user_id ' : self . user_id ,
' top_file_name ' : " N/A (No Files) " ,
' num_files ' : 0 ,
' upload_date_str ' : post_data . get ( ' published ' ) or post_data . get ( ' added ' ) or " Unknown " ,
' download_location ' : determined_post_save_path_for_history
}
result_tuple = ( 0 , 0 , [ ] , [ ] , [ ] , history_data_for_no_files_post , None )
2025-10-08 17:02:46 +05:30
self . _emit_signal ( ' worker_finished ' , result_tuple )
2025-07-15 06:54:31 -07:00
return result_tuple
files_to_download_info_list = [ ]
processed_original_filenames_in_this_post = set ( )
if self . keep_in_post_duplicates :
files_to_download_info_list . extend ( all_files_from_post_api )
self . logger ( f " ℹ ️ ' Keep Duplicates ' is on. All { len ( all_files_from_post_api ) } files from post will be processed. " )
else :
for file_info in all_files_from_post_api :
current_api_original_filename = file_info . get ( ' _original_name_for_log ' )
if current_api_original_filename in processed_original_filenames_in_this_post :
self . logger ( f " -> Skip Duplicate Original Name (within post { post_id } ): ' { current_api_original_filename } ' already processed/listed for this post. " )
total_skipped_this_post + = 1
else :
files_to_download_info_list . append ( file_info )
if current_api_original_filename :
processed_original_filenames_in_this_post . add ( current_api_original_filename )
if not files_to_download_info_list :
self . logger ( f " All files for post { post_id } were duplicate original names or skipped earlier. " )
result_tuple = ( 0 , total_skipped_this_post , [ ] , [ ] , [ ] , None , None )
return result_tuple
self . logger ( f " Identified { len ( files_to_download_info_list ) } unique original file(s) for potential download from post { post_id } . " )
with ThreadPoolExecutor ( max_workers = self . num_file_threads , thread_name_prefix = f ' P { post_id } File_ ' ) as file_pool :
futures_list = [ ]
for file_idx , file_info_to_dl in enumerate ( files_to_download_info_list ) :
if self . _check_pause ( f " File processing loop for post { post_id } , file { file_idx } " ) : break
if self . check_cancel ( ) : break
current_api_original_filename = file_info_to_dl . get ( ' _original_name_for_log ' )
file_is_candidate_by_char_filter_scope = False
char_filter_info_that_matched_file = None
if not current_character_filters :
file_is_candidate_by_char_filter_scope = True
else :
if self . char_filter_scope == CHAR_SCOPE_FILES :
for filter_item_obj in current_character_filters :
terms_to_check_for_file = list ( filter_item_obj [ " aliases " ] )
if filter_item_obj [ " is_group " ] and filter_item_obj [ " name " ] not in terms_to_check_for_file :
terms_to_check_for_file . append ( filter_item_obj [ " name " ] )
unique_terms_for_file_check = list ( set ( terms_to_check_for_file ) )
for term_to_match in unique_terms_for_file_check :
if is_filename_match_for_character ( current_api_original_filename , term_to_match ) :
file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = filter_item_obj
self . logger ( f " File ' { current_api_original_filename } ' matches char filter term ' { term_to_match } ' (from ' { filter_item_obj [ ' name ' ] } ' ). Scope: Files. " )
break
if file_is_candidate_by_char_filter_scope : break
elif self . char_filter_scope == CHAR_SCOPE_TITLE :
if post_is_candidate_by_title_char_match :
file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = char_filter_that_matched_title
self . logger ( f " File ' { current_api_original_filename } ' is candidate because post title matched. Scope: Title. " )
elif self . char_filter_scope == CHAR_SCOPE_BOTH :
if post_is_candidate_by_title_char_match :
file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = char_filter_that_matched_title
self . logger ( f " File ' { current_api_original_filename } ' is candidate because post title matched. Scope: Both (Title part). " )
else :
for filter_item_obj_both_file in current_character_filters :
terms_to_check_for_file_both = list ( filter_item_obj_both_file [ " aliases " ] )
if filter_item_obj_both_file [ " is_group " ] and filter_item_obj_both_file [ " name " ] not in terms_to_check_for_file_both :
terms_to_check_for_file_both . append ( filter_item_obj_both_file [ " name " ] )
unique_terms_for_file_both_check = list ( set ( terms_to_check_for_file_both ) )
for term_to_match in unique_terms_for_file_both_check :
if is_filename_match_for_character ( current_api_original_filename , term_to_match ) :
file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = filter_item_obj_both_file
self . logger ( f " File ' { current_api_original_filename } ' matches char filter term ' { term_to_match } ' (from ' { filter_item_obj [ ' name ' ] } ' ). Scope: Both (File part). " )
break
if file_is_candidate_by_char_filter_scope : break
elif self . char_filter_scope == CHAR_SCOPE_COMMENTS :
if post_is_candidate_by_file_char_match_in_comment_scope :
file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = char_filter_that_matched_file_in_comment_scope
self . logger ( f " File ' { current_api_original_filename } ' is candidate because a file in this post matched char filter (Overall Scope: Comments). " )
elif post_is_candidate_by_comment_char_match :
file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = char_filter_that_matched_comment
self . logger ( f " File ' { current_api_original_filename } ' is candidate because post comments matched char filter (Overall Scope: Comments). " )
if not file_is_candidate_by_char_filter_scope :
self . logger ( f " -> Skip File (Char Filter Scope ' { self . char_filter_scope } ' ): ' { current_api_original_filename } ' no match. " )
total_skipped_this_post + = 1
continue
2025-10-08 17:02:46 +05:30
base_folder_for_this_file = " "
known_name_match_found_for_this_file = False
if self . use_subfolders :
if char_filter_info_that_matched_file :
base_folder_for_this_file = clean_folder_name ( char_filter_info_that_matched_file [ " name " ] )
known_name_match_found_for_this_file = True
2025-07-15 06:54:31 -07:00
elif char_filter_that_matched_title :
2025-10-08 17:02:46 +05:30
base_folder_for_this_file = clean_folder_name ( char_filter_that_matched_title [ " name " ] )
known_name_match_found_for_this_file = True
2025-07-15 06:54:31 -07:00
elif char_filter_that_matched_comment :
2025-10-08 17:02:46 +05:30
base_folder_for_this_file = clean_folder_name ( char_filter_that_matched_comment [ " name " ] )
known_name_match_found_for_this_file = True
2025-07-15 06:54:31 -07:00
else :
2025-10-08 17:02:46 +05:30
title_folders = match_folders_from_title ( post_title , self . known_names , effective_unwanted_keywords_for_folder_naming )
if title_folders :
base_folder_for_this_file = title_folders [ 0 ]
known_name_match_found_for_this_file = True
else :
filename_folders = match_folders_from_filename_enhanced ( current_api_original_filename , self . known_names , effective_unwanted_keywords_for_folder_naming )
if filename_folders :
base_folder_for_this_file = filename_folders [ 0 ]
known_name_match_found_for_this_file = True
if not known_name_match_found_for_this_file :
if self . handle_unknown_mode :
self . logger ( f " ℹ ️ [unknown] mode: No match in Known.txt for ' { current_api_original_filename } ' . Using post title for folder. " )
base_folder_for_this_file = robust_clean_name ( post_title )
else :
base_folder_for_this_file = extract_folder_name_from_title ( post_title , effective_unwanted_keywords_for_folder_naming )
final_path_for_this_file = self . override_output_dir if self . override_output_dir else self . download_root
if self . use_subfolders and base_folder_for_this_file :
final_path_for_this_file = os . path . join ( final_path_for_this_file , base_folder_for_this_file )
effective_spsp = should_create_post_subfolder
if self . handle_unknown_mode and not known_name_match_found_for_this_file :
effective_spsp = False
if effective_spsp :
final_path_for_this_file = os . path . join ( final_path_for_this_file , final_post_subfolder_name )
futures_list . append ( file_pool . submit (
self . _download_single_file ,
file_info = file_info_to_dl ,
target_folder_path = final_path_for_this_file ,
post_page_url = post_page_url ,
original_post_id_for_log = post_id ,
skip_event = self . skip_current_file_flag ,
post_title = post_title ,
manga_date_file_counter_ref = self . manga_date_file_counter_ref ,
manga_global_file_counter_ref = self . manga_global_file_counter_ref ,
folder_context_name_for_history = base_folder_for_this_file ,
file_index_in_post = file_idx ,
num_files_in_this_post = len ( files_to_download_info_list )
) )
2025-07-15 06:54:31 -07:00
for future in as_completed ( futures_list ) :
if self . check_cancel ( ) :
for f_to_cancel in futures_list :
if not f_to_cancel . done ( ) :
f_to_cancel . cancel ( )
break
try :
dl_count , skip_count , actual_filename_saved , original_kept_flag , status , details_for_dialog_or_retry = future . result ( )
total_downloaded_this_post + = dl_count
total_skipped_this_post + = skip_count
if original_kept_flag and dl_count > 0 and actual_filename_saved :
kept_original_filenames_for_log . append ( actual_filename_saved )
if status == FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER and details_for_dialog_or_retry :
retryable_failures_this_post . append ( details_for_dialog_or_retry )
elif status == FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION and details_for_dialog_or_retry :
permanent_failures_this_post . append ( details_for_dialog_or_retry )
except CancelledError :
self . logger ( f " File download task for post { post_id } was cancelled. " )
total_skipped_this_post + = 1
except Exception as exc_f :
self . logger ( f " ❌ File download task for post { post_id } resulted in error: { exc_f } " )
total_skipped_this_post + = 1
self . _emit_signal ( ' file_progress ' , " " , None )
if self . session_file_path and self . session_lock :
try :
with self . session_lock :
if os . path . exists ( self . session_file_path ) :
with open ( self . session_file_path , ' r ' , encoding = ' utf-8 ' ) as f :
session_data = json . load ( f )
2025-07-14 20:17:48 -07:00
2025-07-15 06:54:31 -07:00
if ' download_state ' not in session_data :
session_data [ ' download_state ' ] = { }
if not isinstance ( session_data [ ' download_state ' ] . get ( ' processed_post_ids ' ) , list ) :
session_data [ ' download_state ' ] [ ' processed_post_ids ' ] = [ ]
session_data [ ' download_state ' ] [ ' processed_post_ids ' ] . append ( self . post . get ( ' id ' ) )
if ' manga_counters ' not in session_data [ ' download_state ' ] :
session_data [ ' download_state ' ] [ ' manga_counters ' ] = { }
if self . manga_date_file_counter_ref is not None :
session_data [ ' download_state ' ] [ ' manga_counters ' ] [ ' date_based ' ] = self . manga_date_file_counter_ref [ 0 ]
if self . manga_global_file_counter_ref is not None :
session_data [ ' download_state ' ] [ ' manga_counters ' ] [ ' global_numbering ' ] = self . manga_global_file_counter_ref [ 0 ]
if permanent_failures_this_post :
if not isinstance ( session_data [ ' download_state ' ] . get ( ' permanently_failed_files ' ) , list ) :
session_data [ ' download_state ' ] [ ' permanently_failed_files ' ] = [ ]
existing_failed_urls = { f . get ( ' file_info ' , { } ) . get ( ' url ' ) for f in session_data [ ' download_state ' ] [ ' permanently_failed_files ' ] }
for failure in permanent_failures_this_post :
if failure . get ( ' file_info ' , { } ) . get ( ' url ' ) not in existing_failed_urls :
session_data [ ' download_state ' ] [ ' permanently_failed_files ' ] . append ( failure )
temp_file_path = self . session_file_path + " .tmp "
with open ( temp_file_path , ' w ' , encoding = ' utf-8 ' ) as f_tmp :
json . dump ( session_data , f_tmp , indent = 2 )
os . replace ( temp_file_path , self . session_file_path )
except Exception as e :
self . logger ( f " ⚠️ Could not update session file for post { post_id } : { e } " )
if not self . extract_links_only and ( total_downloaded_this_post > 0 or not (
( current_character_filters and (
( self . char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match ) or
( self . char_filter_scope == CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match )
) ) or
( self . skip_words_list and ( self . skip_words_scope == SKIP_SCOPE_POSTS or self . skip_words_scope == SKIP_SCOPE_BOTH ) and any ( sw . lower ( ) in post_title . lower ( ) for sw in self . skip_words_list ) )
) ) :
top_file_name_for_history = " N/A "
if post_main_file_info and post_main_file_info . get ( ' name ' ) :
top_file_name_for_history = post_main_file_info [ ' name ' ]
elif post_attachments and post_attachments [ 0 ] . get ( ' name ' ) :
top_file_name_for_history = post_attachments [ 0 ] [ ' name ' ]
history_data_for_this_post = {
' post_title ' : post_title , ' post_id ' : post_id ,
' top_file_name ' : top_file_name_for_history ,
' num_files ' : num_potential_files_in_post ,
' upload_date_str ' : post_data . get ( ' published ' ) or post_data . get ( ' added ' ) or " Unknown " ,
' download_location ' : determined_post_save_path_for_history ,
' service ' : self . service , ' user_id ' : self . user_id ,
}
2025-07-14 20:17:48 -07:00
2025-07-16 09:02:47 -07:00
if not self . check_cancel ( ) :
2025-07-15 06:54:31 -07:00
self . logger ( f " Post { post_id } Summary: Downloaded= { total_downloaded_this_post } , Skipped Files= { total_skipped_this_post } " )
2025-07-14 20:17:48 -07:00
2025-07-15 06:54:31 -07:00
if not self . extract_links_only and self . use_post_subfolders and total_downloaded_this_post == 0 :
path_to_check_for_emptiness = determined_post_save_path_for_history
try :
if os . path . isdir ( path_to_check_for_emptiness ) and not os . listdir ( path_to_check_for_emptiness ) :
self . logger ( f " 🗑️ Removing empty post-specific subfolder: ' { path_to_check_for_emptiness } ' " )
os . rmdir ( path_to_check_for_emptiness )
except OSError as e_rmdir :
self . logger ( f " ⚠️ Could not remove empty post-specific subfolder ' { path_to_check_for_emptiness } ' : { e_rmdir } " )
result_tuple = ( total_downloaded_this_post , total_skipped_this_post ,
kept_original_filenames_for_log , retryable_failures_this_post ,
permanent_failures_this_post , history_data_for_this_post ,
None )
2025-07-01 22:48:58 +05:30
2025-07-27 06:32:15 -07:00
if not self . extract_links_only and self . use_post_subfolders and total_downloaded_this_post == 0 :
path_to_check_for_emptiness = determined_post_save_path_for_history
try :
if os . path . isdir ( path_to_check_for_emptiness ) and not os . listdir ( path_to_check_for_emptiness ) :
self . logger ( f " 🗑️ Removing empty post-specific subfolder: ' { path_to_check_for_emptiness } ' " )
os . rmdir ( path_to_check_for_emptiness )
except OSError as e_rmdir :
self . logger ( f " ⚠️ Could not remove potentially empty subfolder ' { path_to_check_for_emptiness } ' : { e_rmdir } " )
2025-10-08 17:02:46 +05:30
self . _emit_signal ( ' worker_finished ' , result_tuple )
return result_tuple
2025-07-27 06:32:15 -07:00
2025-10-08 17:02:46 +05:30
except Exception as main_thread_err :
self . logger ( f " \n ❌ Critical error within Worker process for { log_prefix } { post_id } : { main_thread_err } " )
self . logger ( traceback . format_exc ( ) )
result_tuple = ( 0 , 1 , [ ] , [ ] , [ { ' error ' : str ( main_thread_err ) } ] , None , None )
2025-07-15 06:54:31 -07:00
self . _emit_signal ( ' worker_finished ' , result_tuple )
2025-10-08 17:02:46 +05:30
return result_tuple
2025-07-15 06:54:31 -07:00
class DownloadThread ( QThread ) :
progress_signal = pyqtSignal ( str )
add_character_prompt_signal = pyqtSignal ( str )
file_download_status_signal = pyqtSignal ( bool )
finished_signal = pyqtSignal ( int , int , bool , list )
external_link_signal = pyqtSignal ( str , str , str , str , str )
file_successfully_downloaded_signal = pyqtSignal ( dict )
file_progress_signal = pyqtSignal ( str , object )
retryable_file_failed_signal = pyqtSignal ( list )
missed_character_post_signal = pyqtSignal ( str , str )
post_processed_for_history_signal = pyqtSignal ( dict )
final_history_entries_signal = pyqtSignal ( list )
permanent_file_failed_signal = pyqtSignal ( list )
def __init__ ( self , api_url_input , output_dir , known_names_copy ,
cancellation_event ,
pause_event , filter_character_list = None , dynamic_character_filter_holder = None ,
2025-07-18 07:54:11 -07:00
filter_mode = ' all ' , skip_zip = True ,
2025-07-15 06:54:31 -07:00
use_subfolders = True , use_post_subfolders = False , custom_folder_name = None , compress_images = False ,
download_thumbnails = False , service = None , user_id = None ,
downloaded_files = None , downloaded_file_hashes = None , downloaded_files_lock = None , downloaded_file_hashes_lock = None ,
skip_words_list = None ,
2025-07-16 09:02:47 -07:00
skip_words_scope = ' files ' ,
2025-07-15 06:54:31 -07:00
show_external_links = False ,
extract_links_only = False ,
num_file_threads_for_worker = 1 ,
skip_current_file_flag = None ,
start_page = None , end_page = None ,
target_post_id_from_initial_url = None ,
manga_mode_active = False ,
unwanted_keywords = None ,
2025-07-16 09:02:47 -07:00
manga_filename_style = ' post_title ' ,
char_filter_scope = ' files ' ,
2025-07-15 06:54:31 -07:00
remove_from_filename_words_list = None ,
2025-07-16 09:02:47 -07:00
manga_date_prefix = ' ' ,
2025-07-15 06:54:31 -07:00
allow_multipart_download = True ,
2025-08-01 06:33:36 -07:00
multipart_parts_count = 4 ,
multipart_min_size_mb = 100 ,
2025-07-15 06:54:31 -07:00
selected_cookie_file = None ,
override_output_dir = None ,
app_base_dir = None ,
manga_date_file_counter_ref = None ,
manga_global_file_counter_ref = None ,
use_cookie = False ,
scan_content_for_images = False ,
creator_download_folder_ignore_words = None ,
use_date_prefix_for_subfolder = False ,
2025-10-08 17:02:46 +05:30
date_prefix_format = " YYYY-MM-DD " ,
2025-07-15 06:54:31 -07:00
keep_in_post_duplicates = False ,
2025-07-16 09:02:47 -07:00
keep_duplicates_mode = ' hash ' ,
keep_duplicates_limit = 0 ,
downloaded_hash_counts = None ,
downloaded_hash_counts_lock = None ,
2025-07-15 06:54:31 -07:00
cookie_text = " " ,
session_file_path = None ,
session_lock = None ,
text_only_scope = None ,
text_export_format = ' txt ' ,
single_pdf_mode = False ,
project_root_dir = None ,
2025-07-18 07:54:11 -07:00
processed_post_ids = None ,
2025-08-10 09:16:31 -07:00
start_offset = 0 ,
2025-08-13 19:38:33 -07:00
fetch_first = False ,
2025-10-08 17:02:46 +05:30
skip_file_size_mb = None ,
domain_override = None ,
archive_only_mode = False ,
manga_custom_filename_format = " {published} {title} " ,
manga_custom_date_format = " YYYY-MM-DD " ,
sfp_threshold = None ,
creator_name_cache = None
2025-08-13 19:38:33 -07:00
) :
2025-07-15 06:54:31 -07:00
super ( ) . __init__ ( )
self . api_url_input = api_url_input
self . output_dir = output_dir
self . known_names = list ( known_names_copy )
self . cancellation_event = cancellation_event
self . pause_event = pause_event
self . skip_current_file_flag = skip_current_file_flag
self . initial_target_post_id = target_post_id_from_initial_url
self . filter_character_list_objects_initial = filter_character_list if filter_character_list else [ ]
self . dynamic_filter_holder = dynamic_character_filter_holder
self . filter_mode = filter_mode
self . skip_zip = skip_zip
self . use_subfolders = use_subfolders
self . use_post_subfolders = use_post_subfolders
self . custom_folder_name = custom_folder_name
self . compress_images = compress_images
self . download_thumbnails = download_thumbnails
self . service = service
self . user_id = user_id
self . skip_words_list = skip_words_list if skip_words_list is not None else [ ]
self . skip_words_scope = skip_words_scope
self . downloaded_files = downloaded_files
self . downloaded_files_lock = downloaded_files_lock
self . downloaded_file_hashes = downloaded_file_hashes
self . downloaded_file_hashes_lock = downloaded_file_hashes_lock
self . _add_character_response = None
self . prompt_mutex = QMutex ( )
self . show_external_links = show_external_links
self . extract_links_only = extract_links_only
self . num_file_threads_for_worker = num_file_threads_for_worker
self . start_page = start_page
self . end_page = end_page
self . manga_mode_active = manga_mode_active
self . unwanted_keywords = unwanted_keywords if unwanted_keywords is not None else { ' spicy ' , ' hd ' , ' nsfw ' , ' 4k ' , ' preview ' , ' teaser ' , ' clip ' }
self . manga_filename_style = manga_filename_style
self . char_filter_scope = char_filter_scope
self . remove_from_filename_words_list = remove_from_filename_words_list
self . manga_date_prefix = manga_date_prefix
self . allow_multipart_download = allow_multipart_download
2025-08-01 06:33:36 -07:00
self . multipart_parts_count = multipart_parts_count
self . multipart_min_size_mb = multipart_min_size_mb
2025-07-15 06:54:31 -07:00
self . selected_cookie_file = selected_cookie_file
self . app_base_dir = app_base_dir
self . cookie_text = cookie_text
self . use_cookie = use_cookie
self . override_output_dir = override_output_dir
self . manga_date_file_counter_ref = manga_date_file_counter_ref
self . scan_content_for_images = scan_content_for_images
self . creator_download_folder_ignore_words = creator_download_folder_ignore_words
2025-07-10 09:59:51 -07:00
self . use_date_prefix_for_subfolder = use_date_prefix_for_subfolder
2025-10-08 17:02:46 +05:30
self . date_prefix_format = date_prefix_format
2025-07-11 01:24:12 -07:00
self . keep_in_post_duplicates = keep_in_post_duplicates
2025-07-16 09:02:47 -07:00
self . keep_duplicates_mode = keep_duplicates_mode
self . keep_duplicates_limit = keep_duplicates_limit
self . downloaded_hash_counts = downloaded_hash_counts
self . downloaded_hash_counts_lock = downloaded_hash_counts_lock
2025-07-15 06:54:31 -07:00
self . manga_global_file_counter_ref = manga_global_file_counter_ref
2025-06-24 16:31:28 +01:00
self . session_file_path = session_file_path
self . session_lock = session_lock
2025-07-15 06:54:31 -07:00
self . history_candidates_buffer = deque ( maxlen = 8 )
2025-07-13 10:22:06 -07:00
self . text_only_scope = text_only_scope
self . text_export_format = text_export_format
2025-07-15 06:54:31 -07:00
self . single_pdf_mode = single_pdf_mode
self . project_root_dir = project_root_dir
2025-07-18 07:54:11 -07:00
self . processed_post_ids_set = set ( processed_post_ids ) if processed_post_ids is not None else set ( )
self . start_offset = start_offset
2025-08-10 09:16:31 -07:00
self . fetch_first = fetch_first
2025-08-13 19:38:33 -07:00
self . skip_file_size_mb = skip_file_size_mb
2025-10-08 17:02:46 +05:30
self . archive_only_mode = archive_only_mode
self . manga_custom_filename_format = manga_custom_filename_format
self . manga_custom_date_format = manga_custom_date_format
self . domain_override = domain_override
self . sfp_threshold = sfp_threshold
self . creator_name_cache = creator_name_cache
2025-07-13 10:22:06 -07:00
2025-07-15 06:54:31 -07:00
if self . compress_images and Image is None :
self . logger ( " ⚠️ Image compression disabled: Pillow library not found (DownloadThread). " )
self . compress_images = False
def logger ( self , message ) :
""" Emits a progress signal to be displayed in the log. """
if hasattr ( self , ' progress_signal ' ) :
self . progress_signal . emit ( str ( message ) )
2025-07-13 10:22:06 -07:00
2025-07-15 06:54:31 -07:00
def run ( self ) :
2025-07-13 10:22:06 -07:00
"""
2025-07-18 07:54:11 -07:00
The main execution method for the download process .
This version correctly uses the central ` download_from_api ` function
and explicitly maps all arguments to the PostProcessorWorker to prevent TypeErrors .
2025-07-13 10:22:06 -07:00
"""
2025-07-15 06:54:31 -07:00
grand_total_downloaded_files = 0
grand_total_skipped_files = 0
grand_list_of_kept_original_filenames = [ ]
was_process_cancelled = False
2025-07-13 10:22:06 -07:00
worker_signals_obj = PostProcessorSignals ( )
2025-07-15 06:54:31 -07:00
try :
2025-07-13 10:22:06 -07:00
worker_signals_obj . progress_signal . connect ( self . progress_signal )
worker_signals_obj . file_download_status_signal . connect ( self . file_download_status_signal )
worker_signals_obj . file_progress_signal . connect ( self . file_progress_signal )
worker_signals_obj . external_link_signal . connect ( self . external_link_signal )
worker_signals_obj . missed_character_post_signal . connect ( self . missed_character_post_signal )
worker_signals_obj . file_successfully_downloaded_signal . connect ( self . file_successfully_downloaded_signal )
2025-07-15 06:54:31 -07:00
worker_signals_obj . worker_finished_signal . connect ( lambda result : None )
2025-07-13 10:22:06 -07:00
self . logger ( " Starting post fetch (single-threaded download process)... " )
2025-07-18 07:54:11 -07:00
2025-07-13 10:22:06 -07:00
post_generator = download_from_api (
self . api_url_input ,
logger = self . logger ,
start_page = self . start_page ,
end_page = self . end_page ,
manga_mode = self . manga_mode_active ,
cancellation_event = self . cancellation_event ,
pause_event = self . pause_event ,
use_cookie = self . use_cookie ,
cookie_text = self . cookie_text ,
selected_cookie_file = self . selected_cookie_file ,
app_base_dir = self . app_base_dir ,
2025-07-15 06:54:31 -07:00
manga_filename_style_for_sort_check = self . manga_filename_style if self . manga_mode_active else None ,
2025-08-10 09:16:31 -07:00
processed_post_ids = self . processed_post_ids_set ,
fetch_all_first = self . fetch_first
2025-05-08 19:49:50 +05:30
)
2025-07-13 10:22:06 -07:00
for posts_batch_data in post_generator :
if self . isInterruptionRequested ( ) :
was_process_cancelled = True
break
2025-07-18 07:54:11 -07:00
2025-07-14 20:17:48 -07:00
for individual_post_data in posts_batch_data :
if self . isInterruptionRequested ( ) :
was_process_cancelled = True
break
2025-07-15 06:54:31 -07:00
2025-07-18 07:54:11 -07:00
worker_args = {
' post_data ' : individual_post_data ,
' emitter ' : worker_signals_obj ,
2025-10-08 17:02:46 +05:30
' creator_name_cache ' : self . creator_name_cache ,
2025-07-18 07:54:11 -07:00
' download_root ' : self . output_dir ,
' known_names ' : self . known_names ,
' filter_character_list ' : self . filter_character_list_objects_initial ,
' dynamic_character_filter_holder ' : self . dynamic_filter_holder ,
' target_post_id_from_initial_url ' : self . initial_target_post_id ,
' num_file_threads ' : self . num_file_threads_for_worker ,
' processed_post_ids ' : list ( self . processed_post_ids_set ) ,
' unwanted_keywords ' : self . unwanted_keywords ,
' filter_mode ' : self . filter_mode ,
' skip_zip ' : self . skip_zip ,
' use_subfolders ' : self . use_subfolders ,
' use_post_subfolders ' : self . use_post_subfolders ,
' custom_folder_name ' : self . custom_folder_name ,
' compress_images ' : self . compress_images ,
' download_thumbnails ' : self . download_thumbnails ,
' service ' : self . service ,
' user_id ' : self . user_id ,
' api_url_input ' : self . api_url_input ,
' pause_event ' : self . pause_event ,
' cancellation_event ' : self . cancellation_event ,
' downloaded_files ' : self . downloaded_files ,
' downloaded_file_hashes ' : self . downloaded_file_hashes ,
' downloaded_files_lock ' : self . downloaded_files_lock ,
' downloaded_file_hashes_lock ' : self . downloaded_file_hashes_lock ,
' skip_words_list ' : self . skip_words_list ,
' skip_words_scope ' : self . skip_words_scope ,
' show_external_links ' : self . show_external_links ,
' extract_links_only ' : self . extract_links_only ,
' skip_current_file_flag ' : self . skip_current_file_flag ,
' manga_mode_active ' : self . manga_mode_active ,
' manga_filename_style ' : self . manga_filename_style ,
' char_filter_scope ' : self . char_filter_scope ,
' remove_from_filename_words_list ' : self . remove_from_filename_words_list ,
' allow_multipart_download ' : self . allow_multipart_download ,
' cookie_text ' : self . cookie_text ,
' use_cookie ' : self . use_cookie ,
' override_output_dir ' : self . override_output_dir ,
' selected_cookie_file ' : self . selected_cookie_file ,
' app_base_dir ' : self . app_base_dir ,
' manga_date_prefix ' : self . manga_date_prefix ,
' manga_date_file_counter_ref ' : self . manga_date_file_counter_ref ,
' scan_content_for_images ' : self . scan_content_for_images ,
' creator_download_folder_ignore_words ' : self . creator_download_folder_ignore_words ,
' manga_global_file_counter_ref ' : self . manga_global_file_counter_ref ,
' use_date_prefix_for_subfolder ' : self . use_date_prefix_for_subfolder ,
2025-10-08 17:02:46 +05:30
' date_prefix_format ' : self . date_prefix_format ,
2025-07-18 07:54:11 -07:00
' keep_in_post_duplicates ' : self . keep_in_post_duplicates ,
' keep_duplicates_mode ' : self . keep_duplicates_mode ,
' keep_duplicates_limit ' : self . keep_duplicates_limit ,
' downloaded_hash_counts ' : self . downloaded_hash_counts ,
' downloaded_hash_counts_lock ' : self . downloaded_hash_counts_lock ,
' session_file_path ' : self . session_file_path ,
' session_lock ' : self . session_lock ,
' text_only_scope ' : self . text_only_scope ,
' text_export_format ' : self . text_export_format ,
' single_pdf_mode ' : self . single_pdf_mode ,
2025-08-01 06:33:36 -07:00
' multipart_parts_count ' : self . multipart_parts_count ,
' multipart_min_size_mb ' : self . multipart_min_size_mb ,
2025-08-13 19:38:33 -07:00
' skip_file_size_mb ' : self . skip_file_size_mb ,
2025-07-18 07:54:11 -07:00
' project_root_dir ' : self . project_root_dir ,
2025-10-08 17:02:46 +05:30
' domain_override ' : self . domain_override ,
' archive_only_mode ' : self . archive_only_mode ,
' manga_custom_filename_format ' : self . manga_custom_filename_format ,
' manga_custom_date_format ' : self . manga_custom_date_format ,
' sfp_threshold ' : self . sfp_threshold
2025-07-18 07:54:11 -07:00
}
post_processing_worker = PostProcessorWorker ( * * worker_args )
( dl_count , skip_count , kept_originals_this_post ,
retryable_failures , permanent_failures ,
history_data , temp_filepath ) = post_processing_worker . process ( )
grand_total_downloaded_files + = dl_count
grand_total_skipped_files + = skip_count
if kept_originals_this_post :
grand_list_of_kept_original_filenames . extend ( kept_originals_this_post )
if retryable_failures :
self . retryable_file_failed_signal . emit ( retryable_failures )
if history_data :
self . post_processed_for_history_signal . emit ( history_data )
if permanent_failures :
self . permanent_file_failed_signal . emit ( permanent_failures )
if self . single_pdf_mode and temp_filepath :
self . progress_signal . emit ( f " TEMP_FILE_PATH: { temp_filepath } " )
2025-07-14 20:17:48 -07:00
if was_process_cancelled :
break
2025-07-18 07:54:11 -07:00
2025-07-13 10:22:06 -07:00
if not was_process_cancelled and not self . isInterruptionRequested ( ) :
self . logger ( " ✅ All posts processed or end of content reached by DownloadThread. " )
2025-07-19 03:28:32 -07:00
2025-07-13 10:22:06 -07:00
except Exception as main_thread_err :
self . logger ( f " \n ❌ Critical error within DownloadThread run loop: { main_thread_err } " )
traceback . print_exc ( )
finally :
try :
if worker_signals_obj :
worker_signals_obj . progress_signal . disconnect ( self . progress_signal )
worker_signals_obj . file_download_status_signal . disconnect ( self . file_download_status_signal )
worker_signals_obj . external_link_signal . disconnect ( self . external_link_signal )
worker_signals_obj . file_progress_signal . disconnect ( self . file_progress_signal )
worker_signals_obj . missed_character_post_signal . disconnect ( self . missed_character_post_signal )
worker_signals_obj . file_successfully_downloaded_signal . disconnect ( self . file_successfully_downloaded_signal )
except ( TypeError , RuntimeError ) as e :
self . logger ( f " ℹ ️ Note during DownloadThread signal disconnection: { e } " )
2025-07-18 07:54:11 -07:00
2025-07-13 10:22:06 -07:00
self . finished_signal . emit ( grand_total_downloaded_files , grand_total_skipped_files , self . isInterruptionRequested ( ) , grand_list_of_kept_original_filenames )
2025-07-01 22:48:58 +05:30
class InterruptedError ( Exception ) :
""" Custom exception for handling cancellations gracefully. """
2025-07-03 12:54:05 +05:30
pass