This commit is contained in:
Yuvi9587 2025-05-23 17:22:54 +05:30
parent c4d5ba3040
commit d7960bbb85
5 changed files with 305 additions and 614 deletions

BIN
Read.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 141 KiB

After

Width:  |  Height:  |  Size: 165 KiB

View File

@ -62,8 +62,6 @@ VIDEO_EXTENSIONS = {
ARCHIVE_EXTENSIONS = { ARCHIVE_EXTENSIONS = {
'.zip', '.rar', '.7z', '.tar', '.gz', '.bz2' '.zip', '.rar', '.7z', '.tar', '.gz', '.bz2'
} }
# --- Cookie Helper Functions ---
def parse_cookie_string(cookie_string): def parse_cookie_string(cookie_string):
"""Parses a 'name=value; name2=value2' cookie string into a dict.""" """Parses a 'name=value; name2=value2' cookie string into a dict."""
cookies = {} cookies = {}
@ -88,13 +86,10 @@ def load_cookies_from_netscape_file(filepath, logger_func):
continue continue
parts = line.split('\t') parts = line.split('\t')
if len(parts) == 7: if len(parts) == 7:
# Netscape format: domain, flag, path, secure, expiration, name, value
name = parts[5] name = parts[5]
value = parts[6] value = parts[6]
if name: # Ensure name is not empty if name: # Ensure name is not empty
cookies[name] = value cookies[name] = value
# else:
# logger_func(f" 🍪 Cookie file line {line_num} malformed (expected 7 tab-separated parts): '{line[:50]}...'")
logger_func(f" 🍪 Loaded {len(cookies)} cookies from '{os.path.basename(filepath)}'.") logger_func(f" 🍪 Loaded {len(cookies)} cookies from '{os.path.basename(filepath)}'.")
return cookies if cookies else None return cookies if cookies else None
except FileNotFoundError: except FileNotFoundError:
@ -104,8 +99,6 @@ def load_cookies_from_netscape_file(filepath, logger_func):
logger_func(f" 🍪 Error parsing cookie file '{os.path.basename(filepath)}': {e}") logger_func(f" 🍪 Error parsing cookie file '{os.path.basename(filepath)}': {e}")
return None return None
# --- End Cookie Helper Functions ---
def is_title_match_for_character(post_title, character_name_filter): def is_title_match_for_character(post_title, character_name_filter):
if not post_title or not character_name_filter: if not post_title or not character_name_filter:
return False return False
@ -137,15 +130,9 @@ def clean_folder_name(name):
if not cleaned: # If empty after initial cleaning if not cleaned: # If empty after initial cleaning
return "untitled_folder" return "untitled_folder"
# Strip all trailing dots and spaces.
# This handles cases like "folder...", "folder. .", "folder . ." -> "folder"
temp_name = cleaned temp_name = cleaned
while len(temp_name) > 0 and (temp_name.endswith('.') or temp_name.endswith(' ')): while len(temp_name) > 0 and (temp_name.endswith('.') or temp_name.endswith(' ')):
temp_name = temp_name[:-1] temp_name = temp_name[:-1]
# If stripping all trailing dots/spaces made it empty (e.g., original was "."), use default
# Also handles if the original name was just spaces and became empty.
return temp_name if temp_name else "untitled_folder" return temp_name if temp_name else "untitled_folder"
@ -158,10 +145,7 @@ def clean_filename(name):
def strip_html_tags(html_text): def strip_html_tags(html_text):
if not html_text: return "" if not html_text: return ""
# First, unescape HTML entities
text = html.unescape(html_text) text = html.unescape(html_text)
# Then, remove HTML tags using a simple regex
# This is a basic approach and might not handle all complex HTML perfectly
clean_pattern = re.compile('<.*?>') clean_pattern = re.compile('<.*?>')
cleaned_text = re.sub(clean_pattern, '', text) cleaned_text = re.sub(clean_pattern, '', text)
return cleaned_text.strip() return cleaned_text.strip()
@ -187,8 +171,6 @@ def match_folders_from_title(title, names_to_match, unwanted_keywords):
if not title or not names_to_match: return [] if not title or not names_to_match: return []
title_lower = title.lower() title_lower = title.lower()
matched_cleaned_names = set() matched_cleaned_names = set()
# Sort by the length of the primary name for matching longer, more specific names first.
# This is a heuristic; alias length might also be a factor but primary name length is simpler.
sorted_name_objects = sorted(names_to_match, key=lambda x: len(x.get("name", "")), reverse=True) sorted_name_objects = sorted(names_to_match, key=lambda x: len(x.get("name", "")), reverse=True)
for name_obj in sorted_name_objects: for name_obj in sorted_name_objects:
@ -625,7 +607,6 @@ class PostProcessorWorker:
self.pause_event = pause_event # Store pause_event self.pause_event = pause_event # Store pause_event
self.emitter = emitter # Store the emitter self.emitter = emitter # Store the emitter
if not self.emitter: if not self.emitter:
# This case should ideally be prevented by the caller
raise ValueError("PostProcessorWorker requires an emitter (signals object or queue).") raise ValueError("PostProcessorWorker requires an emitter (signals object or queue).")
self.skip_current_file_flag = skip_current_file_flag self.skip_current_file_flag = skip_current_file_flag
@ -660,12 +641,9 @@ class PostProcessorWorker:
if isinstance(self.emitter, queue.Queue): if isinstance(self.emitter, queue.Queue):
self.emitter.put({'type': signal_type_str, 'payload': payload_args}) self.emitter.put({'type': signal_type_str, 'payload': payload_args})
elif self.emitter and hasattr(self.emitter, f"{signal_type_str}_signal"): elif self.emitter and hasattr(self.emitter, f"{signal_type_str}_signal"):
# Assuming emitter is a QObject with pyqtSignal attributes
# e.g., emitter.progress_signal.emit(*payload_args)
signal_attr = getattr(self.emitter, f"{signal_type_str}_signal") signal_attr = getattr(self.emitter, f"{signal_type_str}_signal")
signal_attr.emit(*payload_args) signal_attr.emit(*payload_args)
else: else:
# Fallback or error logging if emitter is not recognized
print(f"(Worker Log - Unrecognized Emitter for {signal_type_str}): {payload_args[0] if payload_args else ''}") print(f"(Worker Log - Unrecognized Emitter for {signal_type_str}): {payload_args[0] if payload_args else ''}")
def logger(self, message): def logger(self, message):
@ -686,12 +664,10 @@ class PostProcessorWorker:
return False # Not cancelled during pause return False # Not cancelled during pause
def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event, # skip_event is threading.Event def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event, # skip_event is threading.Event
# emitter_for_file_ops, # This will be self.emitter
post_title="", file_index_in_post=0, num_files_in_this_post=1, post_title="", file_index_in_post=0, num_files_in_this_post=1,
manga_date_file_counter_ref=None): # Added manga_date_file_counter_ref manga_date_file_counter_ref=None): # Added manga_date_file_counter_ref
was_original_name_kept_flag = False was_original_name_kept_flag = False
final_filename_saved_for_return = "" final_filename_saved_for_return = ""
# target_folder_path is the base character/post folder.
def _get_current_character_filters(self): def _get_current_character_filters(self):
if self.dynamic_filter_holder: if self.dynamic_filter_holder:
@ -699,14 +675,12 @@ class PostProcessorWorker:
return self.filter_character_list_objects_initial return self.filter_character_list_objects_initial
def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event, def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event,
# emitter_for_file_ops, # This will be self.emitter
post_title="", file_index_in_post=0, num_files_in_this_post=1, # Added manga_date_file_counter_ref post_title="", file_index_in_post=0, num_files_in_this_post=1, # Added manga_date_file_counter_ref
manga_date_file_counter_ref=None, manga_date_file_counter_ref=None,
forced_filename_override=None): # New for retries forced_filename_override=None): # New for retries
was_original_name_kept_flag = False was_original_name_kept_flag = False
final_filename_saved_for_return = "" final_filename_saved_for_return = ""
retry_later_details = None # For storing info if retryable failure retry_later_details = None # For storing info if retryable failure
# target_folder_path is the base character/post folder.
if self._check_pause(f"File download prep for '{file_info.get('name', 'unknown file')}'"): return 0, 1, "", False if self._check_pause(f"File download prep for '{file_info.get('name', 'unknown file')}'"): return 0, 1, "", False
if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1, "", False if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1, "", False
@ -716,14 +690,11 @@ class PostProcessorWorker:
if self.use_cookie: # This flag comes from the checkbox if self.use_cookie: # This flag comes from the checkbox
cookies_to_use_for_file = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger) cookies_to_use_for_file = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger)
api_original_filename = file_info.get('_original_name_for_log', file_info.get('name')) api_original_filename = file_info.get('_original_name_for_log', file_info.get('name'))
# This is the ideal name for the file if it were to be saved in the main target_folder_path.
filename_to_save_in_main_path = "" filename_to_save_in_main_path = ""
if forced_filename_override: if forced_filename_override:
filename_to_save_in_main_path = forced_filename_override filename_to_save_in_main_path = forced_filename_override
self.logger(f" Retrying with forced filename: '{filename_to_save_in_main_path}'") self.logger(f" Retrying with forced filename: '{filename_to_save_in_main_path}'")
# was_original_name_kept_flag might need to be determined based on how forced_filename_override was created
else: else:
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_FILES or self.skip_words_scope == SKIP_SCOPE_BOTH): if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_FILES or self.skip_words_scope == SKIP_SCOPE_BOTH):
filename_to_check_for_skip_words = api_original_filename.lower() filename_to_check_for_skip_words = api_original_filename.lower()
@ -755,21 +726,15 @@ class PostProcessorWorker:
self.logger(f"⚠️ Manga mode (Post Title Style): Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save_in_main_path}'.") self.logger(f"⚠️ Manga mode (Post Title Style): Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save_in_main_path}'.")
elif self.manga_filename_style == STYLE_DATE_BASED: elif self.manga_filename_style == STYLE_DATE_BASED:
current_thread_name = threading.current_thread().name current_thread_name = threading.current_thread().name
# self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Manga Date Mode. Counter Ref ID: {id(manga_date_file_counter_ref)}, Value before access: {manga_date_file_counter_ref}")
if manga_date_file_counter_ref is not None and len(manga_date_file_counter_ref) == 2: if manga_date_file_counter_ref is not None and len(manga_date_file_counter_ref) == 2:
counter_val_for_filename = -1 counter_val_for_filename = -1
counter_lock = manga_date_file_counter_ref[1] counter_lock = manga_date_file_counter_ref[1]
# self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Attempting to acquire lock. Counter value before lock: {manga_date_file_counter_ref[0]}")
with counter_lock: with counter_lock:
# self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Lock acquired. Counter value at lock acquisition: {manga_date_file_counter_ref[0]}")
counter_val_for_filename = manga_date_file_counter_ref[0] counter_val_for_filename = manga_date_file_counter_ref[0]
manga_date_file_counter_ref[0] += 1 manga_date_file_counter_ref[0] += 1
# self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Incremented counter. New counter value: {manga_date_file_counter_ref[0]}. Filename will use: {counter_val_for_filename}")
filename_to_save_in_main_path = f"{counter_val_for_filename:03d}{original_ext}" filename_to_save_in_main_path = f"{counter_val_for_filename:03d}{original_ext}"
# self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Lock released. Generated filename: {filename_to_save_in_main_path}")
else: else:
self.logger(f"⚠️ Manga Date Mode: Counter ref not provided or malformed for '{api_original_filename}'. Using original. Ref: {manga_date_file_counter_ref}") self.logger(f"⚠️ Manga Date Mode: Counter ref not provided or malformed for '{api_original_filename}'. Using original. Ref: {manga_date_file_counter_ref}")
filename_to_save_in_main_path = clean_filename(api_original_filename) filename_to_save_in_main_path = clean_filename(api_original_filename)
@ -824,19 +789,11 @@ class PostProcessorWorker:
if self.skip_rar and is_rar(api_original_filename): if self.skip_rar and is_rar(api_original_filename):
self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR).") self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR).")
return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
# --- Pre-Download Duplicate Handling ---
# Skipping based on filename before download is removed to allow suffixing for files from different posts.
# Hash-based skipping occurs after download.
# Physical path existence is handled by suffixing logic later.
# Ensure base target folder exists (used for .part file with multipart)
try: try:
os.makedirs(target_folder_path, exist_ok=True) # For .part file os.makedirs(target_folder_path, exist_ok=True) # For .part file
except OSError as e: except OSError as e:
self.logger(f" ❌ Critical error creating directory '{target_folder_path}': {e}. Skipping file '{api_original_filename}'.") self.logger(f" ❌ Critical error creating directory '{target_folder_path}': {e}. Skipping file '{api_original_filename}'.")
return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None # Treat as skip return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None # Treat as skip
# --- Download Attempt ---
max_retries = 3 max_retries = 3
retry_delay = 5 retry_delay = 5
downloaded_size_bytes = 0 downloaded_size_bytes = 0
@ -869,8 +826,6 @@ class PostProcessorWorker:
if attempt_multipart: if attempt_multipart:
response.close() response.close()
self._emit_signal('file_download_status', False) self._emit_signal('file_download_status', False)
# .part file is always based on the main target_folder_path and filename_to_save_in_main_path
mp_save_path_base_for_part = os.path.join(target_folder_path, filename_to_save_in_main_path) mp_save_path_base_for_part = os.path.join(target_folder_path, filename_to_save_in_main_path)
mp_success, mp_bytes, mp_hash, mp_file_handle = download_file_in_parts( mp_success, mp_bytes, mp_hash, mp_file_handle = download_file_in_parts(
file_url, mp_save_path_base_for_part, total_size_bytes, num_parts_for_file, headers, api_original_filename, file_url, mp_save_path_base_for_part, total_size_bytes, num_parts_for_file, headers, api_original_filename,
@ -931,8 +886,6 @@ class PostProcessorWorker:
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close(); break if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close(); break
finally: finally:
self._emit_signal('file_download_status', False) self._emit_signal('file_download_status', False)
# Final progress update for single stream
final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes
self._emit_signal('file_progress', api_original_filename, (downloaded_size_bytes, final_total_for_progress)) self._emit_signal('file_progress', api_original_filename, (downloaded_size_bytes, final_total_for_progress))
@ -944,8 +897,6 @@ class PostProcessorWorker:
if not download_successful_flag: if not download_successful_flag:
self.logger(f"❌ Download failed for '{api_original_filename}' after {max_retries + 1} attempts.") self.logger(f"❌ Download failed for '{api_original_filename}' after {max_retries + 1} attempts.")
if file_content_bytes: file_content_bytes.close() if file_content_bytes: file_content_bytes.close()
# Check if this failure is one we want to mark for later retry
if isinstance(last_exception_for_retry_later, http.client.IncompleteRead): if isinstance(last_exception_for_retry_later, http.client.IncompleteRead):
self.logger(f" Marking '{api_original_filename}' for potential retry later due to IncompleteRead.") self.logger(f" Marking '{api_original_filename}' for potential retry later due to IncompleteRead.")
retry_later_details = { retry_later_details = {
@ -964,43 +915,29 @@ class PostProcessorWorker:
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None # Generic failure return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None # Generic failure
if self._check_pause(f"Post-download hash check for '{api_original_filename}'"): return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None if self._check_pause(f"Post-download hash check for '{api_original_filename}'"): return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
# --- Universal Post-Download Hash Check ---
with self.downloaded_file_hashes_lock: with self.downloaded_file_hashes_lock:
if calculated_file_hash in self.downloaded_file_hashes: if calculated_file_hash in self.downloaded_file_hashes:
self.logger(f" -> Skip Saving Duplicate (Hash Match): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...).") self.logger(f" -> Skip Saving Duplicate (Hash Match): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...).")
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Mark logical name with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Mark logical name
if file_content_bytes: file_content_bytes.close() if file_content_bytes: file_content_bytes.close()
# If it was a multipart download, its .part file needs cleanup
if not isinstance(file_content_bytes, BytesIO): # Indicates multipart download if not isinstance(file_content_bytes, BytesIO): # Indicates multipart download
part_file_to_remove = os.path.join(target_folder_path, filename_to_save_in_main_path + ".part") part_file_to_remove = os.path.join(target_folder_path, filename_to_save_in_main_path + ".part")
if os.path.exists(part_file_to_remove): if os.path.exists(part_file_to_remove):
try: os.remove(part_file_to_remove); try: os.remove(part_file_to_remove);
except OSError: self.logger(f" -> Failed to remove .part file for hash duplicate: {part_file_to_remove}") # type: ignore except OSError: self.logger(f" -> Failed to remove .part file for hash duplicate: {part_file_to_remove}") # type: ignore
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
# --- Determine Save Location and Final Filename ---
effective_save_folder = target_folder_path # Default: main character/post folder effective_save_folder = target_folder_path # Default: main character/post folder
# filename_to_save_in_main_path is the logical name after cleaning, manga styling, word removal
filename_after_styling_and_word_removal = filename_to_save_in_main_path filename_after_styling_and_word_removal = filename_to_save_in_main_path
# "Move" logic and "Duplicate" subfolder logic removed.
# effective_save_folder will always be target_folder_path.
try: # Ensure the chosen save folder (main or Duplicate) exists try: # Ensure the chosen save folder (main or Duplicate) exists
os.makedirs(effective_save_folder, exist_ok=True) os.makedirs(effective_save_folder, exist_ok=True)
except OSError as e: except OSError as e:
self.logger(f" ❌ Critical error creating directory '{effective_save_folder}': {e}. Skipping file '{api_original_filename}'.") self.logger(f" ❌ Critical error creating directory '{effective_save_folder}': {e}. Skipping file '{api_original_filename}'.")
if file_content_bytes: file_content_bytes.close() if file_content_bytes: file_content_bytes.close()
# Cleanup .part file if multipart
if not isinstance(file_content_bytes, BytesIO): if not isinstance(file_content_bytes, BytesIO):
part_file_to_remove = os.path.join(target_folder_path, filename_to_save_in_main_path + ".part") part_file_to_remove = os.path.join(target_folder_path, filename_to_save_in_main_path + ".part")
if os.path.exists(part_file_to_remove): os.remove(part_file_to_remove) if os.path.exists(part_file_to_remove): os.remove(part_file_to_remove)
return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
# --- Image Compression ---
# This operates on file_content_bytes (which is BytesIO or a file handle from multipart)
# It might change filename_after_styling_and_word_removal's extension (e.g., .jpg to .webp)
# and returns new data_to_write_after_compression (BytesIO) or original file_content_bytes.
data_to_write_after_compression = file_content_bytes data_to_write_after_compression = file_content_bytes
filename_after_compression = filename_after_styling_and_word_removal filename_after_compression = filename_after_styling_and_word_removal
@ -1029,33 +966,21 @@ class PostProcessorWorker:
except Exception as comp_e: except Exception as comp_e:
self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); file_content_bytes.seek(0) self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); file_content_bytes.seek(0)
data_to_write_after_compression = file_content_bytes # Use original data_to_write_after_compression = file_content_bytes # Use original
# --- Final Numeric Suffixing in the effective_save_folder ---
final_filename_on_disk = filename_after_compression # This is the name after potential compression final_filename_on_disk = filename_after_compression # This is the name after potential compression
# If Manga Date Based style, we trust the counter from main.py.
# Suffixing should not be needed if the counter initialization was correct.
# If a file with the generated DDD.ext name exists, it will be overwritten.
if not (self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED): if not (self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED):
temp_base, temp_ext = os.path.splitext(final_filename_on_disk) temp_base, temp_ext = os.path.splitext(final_filename_on_disk)
suffix_counter = 1 suffix_counter = 1
# Check for existing file and apply suffix only if not in date-based manga mode
while os.path.exists(os.path.join(effective_save_folder, final_filename_on_disk)): while os.path.exists(os.path.join(effective_save_folder, final_filename_on_disk)):
final_filename_on_disk = f"{temp_base}_{suffix_counter}{temp_ext}" final_filename_on_disk = f"{temp_base}_{suffix_counter}{temp_ext}"
suffix_counter += 1 suffix_counter += 1
if final_filename_on_disk != filename_after_compression: # Log if a suffix was applied if final_filename_on_disk != filename_after_compression: # Log if a suffix was applied
self.logger(f" Applied numeric suffix in '{os.path.basename(effective_save_folder)}': '{final_filename_on_disk}' (was '{filename_after_compression}')") self.logger(f" Applied numeric suffix in '{os.path.basename(effective_save_folder)}': '{final_filename_on_disk}' (was '{filename_after_compression}')")
# else: for STYLE_DATE_BASED, final_filename_on_disk remains filename_after_compression.
if self._check_pause(f"File saving for '{final_filename_on_disk}'"): return 0, 1, final_filename_on_disk, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None if self._check_pause(f"File saving for '{final_filename_on_disk}'"): return 0, 1, final_filename_on_disk, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
# --- Save File ---
final_save_path = os.path.join(effective_save_folder, final_filename_on_disk) final_save_path = os.path.join(effective_save_folder, final_filename_on_disk)
try: try:
# data_to_write_after_compression is BytesIO (single stream, or compressed multipart)
# OR it's the original file_content_bytes (which is a file handle if uncompressed multipart)
if data_to_write_after_compression is file_content_bytes and not isinstance(file_content_bytes, BytesIO): if data_to_write_after_compression is file_content_bytes and not isinstance(file_content_bytes, BytesIO):
# This means uncompressed multipart download. Original .part file handle is file_content_bytes.
# The .part file is at target_folder_path/filename_to_save_in_main_path.part
original_part_file_actual_path = file_content_bytes.name original_part_file_actual_path = file_content_bytes.name
file_content_bytes.close() # Close handle first file_content_bytes.close() # Close handle first
os.rename(original_part_file_actual_path, final_save_path) os.rename(original_part_file_actual_path, final_save_path)
@ -1063,8 +988,6 @@ class PostProcessorWorker:
else: # Single stream download, or compressed multipart. Write from BytesIO. else: # Single stream download, or compressed multipart. Write from BytesIO.
with open(final_save_path, 'wb') as f_out: with open(final_save_path, 'wb') as f_out:
f_out.write(data_to_write_after_compression.getvalue()) f_out.write(data_to_write_after_compression.getvalue())
# If original was multipart and then compressed, clean up original .part file
if data_to_write_after_compression is not file_content_bytes and not isinstance(file_content_bytes, BytesIO): if data_to_write_after_compression is not file_content_bytes and not isinstance(file_content_bytes, BytesIO):
original_part_file_actual_path = file_content_bytes.name original_part_file_actual_path = file_content_bytes.name
file_content_bytes.close() file_content_bytes.close()
@ -1074,10 +997,8 @@ class PostProcessorWorker:
with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash) with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash)
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Track by logical name with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Track by logical name
# The counter for STYLE_DATE_BASED is now incremented *before* filename generation, under lock.
final_filename_saved_for_return = final_filename_on_disk final_filename_saved_for_return = final_filename_on_disk
self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{os.path.basename(effective_save_folder)}'") self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{os.path.basename(effective_save_folder)}'")
# Session-wide base name tracking removed.
time.sleep(0.05) # Brief pause after successful save time.sleep(0.05) # Brief pause after successful save
return 1, 0, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SUCCESS, None return 1, 0, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SUCCESS, None
except Exception as save_err: except Exception as save_err:
@ -1087,10 +1008,8 @@ class PostProcessorWorker:
except OSError: self.logger(f" -> Failed to remove partially saved file: {final_save_path}") except OSError: self.logger(f" -> Failed to remove partially saved file: {final_save_path}")
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None # Treat save fail as skip return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None # Treat save fail as skip
finally: finally:
# Ensure all handles are closed
if data_to_write_after_compression and hasattr(data_to_write_after_compression, 'close'): if data_to_write_after_compression and hasattr(data_to_write_after_compression, 'close'):
data_to_write_after_compression.close() data_to_write_after_compression.close()
# If original file_content_bytes was a different handle (e.g. multipart before compression) and not closed yet
if file_content_bytes and file_content_bytes is not data_to_write_after_compression and hasattr(file_content_bytes, 'close'): if file_content_bytes and file_content_bytes is not data_to_write_after_compression and hasattr(file_content_bytes, 'close'):
try: try:
if not file_content_bytes.closed: # Check if already closed if not file_content_bytes.closed: # Check if already closed
@ -1101,10 +1020,7 @@ class PostProcessorWorker:
def process(self): def process(self):
if self._check_pause(f"Post processing for ID {self.post.get('id', 'N/A')}"): return 0,0,[], [] if self._check_pause(f"Post processing for ID {self.post.get('id', 'N/A')}"): return 0,0,[], []
if self.check_cancel(): return 0, 0, [], [] if self.check_cancel(): return 0, 0, [], []
# Get the potentially updated character filters at the start of processing this post
current_character_filters = self._get_current_character_filters() current_character_filters = self._get_current_character_filters()
# self.logger(f"DEBUG: Post {post_id}, Worker using filters: {[(f['name'], f['aliases']) for f in current_character_filters]}")
kept_original_filenames_for_log = [] kept_original_filenames_for_log = []
retryable_failures_this_post = [] # New list to store retryable failure details retryable_failures_this_post = [] # New list to store retryable failure details
@ -1132,40 +1048,30 @@ class PostProcessorWorker:
post_is_candidate_by_title_char_match = False post_is_candidate_by_title_char_match = False
char_filter_that_matched_title = None char_filter_that_matched_title = None
post_is_candidate_by_comment_char_match = False post_is_candidate_by_comment_char_match = False
# New variables for CHAR_SCOPE_COMMENTS file-first logic
post_is_candidate_by_file_char_match_in_comment_scope = False post_is_candidate_by_file_char_match_in_comment_scope = False
char_filter_that_matched_file_in_comment_scope = None char_filter_that_matched_file_in_comment_scope = None
char_filter_that_matched_comment = None char_filter_that_matched_comment = None
if current_character_filters and \ if current_character_filters and \
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH): (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH):
# self.logger(f" [Debug Title Match] Checking post title '{post_title}' against {len(self.filter_character_list_objects)} filter objects. Scope: {self.char_filter_scope}")
if self._check_pause(f"Character title filter for post {post_id}"): return 0, num_potential_files_in_post, [], [] if self._check_pause(f"Character title filter for post {post_id}"): return 0, num_potential_files_in_post, [], []
for idx, filter_item_obj in enumerate(current_character_filters): for idx, filter_item_obj in enumerate(current_character_filters):
if self.check_cancel(): break if self.check_cancel(): break
# self.logger(f" [Debug Title Match] Filter obj #{idx}: {filter_item_obj}")
terms_to_check_for_title = list(filter_item_obj["aliases"]) terms_to_check_for_title = list(filter_item_obj["aliases"])
if filter_item_obj["is_group"]: if filter_item_obj["is_group"]:
if filter_item_obj["name"] not in terms_to_check_for_title: if filter_item_obj["name"] not in terms_to_check_for_title:
terms_to_check_for_title.append(filter_item_obj["name"]) terms_to_check_for_title.append(filter_item_obj["name"])
unique_terms_for_title_check = list(set(terms_to_check_for_title)) unique_terms_for_title_check = list(set(terms_to_check_for_title))
# self.logger(f" [Debug Title Match] Unique terms for this filter obj: {unique_terms_for_title_check}")
for term_to_match in unique_terms_for_title_check: for term_to_match in unique_terms_for_title_check:
# self.logger(f" [Debug Title Match] Checking term: '{term_to_match}'")
match_found_for_term = is_title_match_for_character(post_title, term_to_match) match_found_for_term = is_title_match_for_character(post_title, term_to_match)
# self.logger(f" [Debug Title Match] Result for '{term_to_match}': {match_found_for_term}")
if match_found_for_term: if match_found_for_term:
post_is_candidate_by_title_char_match = True post_is_candidate_by_title_char_match = True
char_filter_that_matched_title = filter_item_obj char_filter_that_matched_title = filter_item_obj
self.logger(f" Post title matches char filter term '{term_to_match}' (from group/name '{filter_item_obj['name']}', Scope: {self.char_filter_scope}). Post is candidate.") self.logger(f" Post title matches char filter term '{term_to_match}' (from group/name '{filter_item_obj['name']}', Scope: {self.char_filter_scope}). Post is candidate.")
break break
if post_is_candidate_by_title_char_match: break if post_is_candidate_by_title_char_match: break
# self.logger(f" [Debug Title Match] Final post_is_candidate_by_title_char_match: {post_is_candidate_by_title_char_match}")
# --- Populate all_files_from_post_api before character filter logic that needs it ---
# This is needed for the file-first check in CHAR_SCOPE_COMMENTS
all_files_from_post_api_for_char_check = [] all_files_from_post_api_for_char_check = []
api_file_domain_for_char_check = urlparse(self.api_url_input).netloc api_file_domain_for_char_check = urlparse(self.api_url_input).netloc
if not api_file_domain_for_char_check or not any(d in api_file_domain_for_char_check.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']): if not api_file_domain_for_char_check or not any(d in api_file_domain_for_char_check.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']):
@ -1181,7 +1087,6 @@ class PostProcessorWorker:
original_api_att_name = att_info.get('name') or os.path.basename(att_info['path'].lstrip('/')) original_api_att_name = att_info.get('name') or os.path.basename(att_info['path'].lstrip('/'))
if original_api_att_name: if original_api_att_name:
all_files_from_post_api_for_char_check.append({'_original_name_for_log': original_api_att_name}) all_files_from_post_api_for_char_check.append({'_original_name_for_log': original_api_att_name})
# --- End population of all_files_from_post_api_for_char_check ---
if current_character_filters and self.char_filter_scope == CHAR_SCOPE_COMMENTS: if current_character_filters and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
@ -1258,8 +1163,6 @@ class PostProcessorWorker:
self.logger(f" [Char Scope: Comments] Phase 2 Result: post_is_candidate_by_comment_char_match = {post_is_candidate_by_comment_char_match}") self.logger(f" [Char Scope: Comments] Phase 2 Result: post_is_candidate_by_comment_char_match = {post_is_candidate_by_comment_char_match}")
else: # post_is_candidate_by_file_char_match_in_comment_scope was True else: # post_is_candidate_by_file_char_match_in_comment_scope was True
self.logger(f" [Char Scope: Comments] Phase 2: Skipped comment check for post ID '{post_id}' because a file match already made it a candidate.") self.logger(f" [Char Scope: Comments] Phase 2: Skipped comment check for post ID '{post_id}' because a file match already made it a candidate.")
# --- Skip Post Logic based on Title or Comment Scope (if filters are active) ---
if current_character_filters: # Check if any filters are defined if current_character_filters: # Check if any filters are defined
if self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match: if self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match:
self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.") self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.")
@ -1278,9 +1181,6 @@ class PostProcessorWorker:
post_title_lower = post_title.lower() post_title_lower = post_title.lower()
for skip_word in self.skip_words_list: for skip_word in self.skip_words_list:
if skip_word.lower() in post_title_lower: if skip_word.lower() in post_title_lower:
# This is a skip by "skip_words_list", not by character filter.
# If you want these in the "Missed Character Log" too, you'd add a signal emit here.
# For now, sticking to the request for character filter misses.
self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'. Scope: {self.skip_words_scope}") self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'. Scope: {self.skip_words_scope}")
return 0, num_potential_files_in_post, [], [] return 0, num_potential_files_in_post, [], []
@ -1302,7 +1202,6 @@ class PostProcessorWorker:
log_reason_for_folder = "" log_reason_for_folder = ""
if self.char_filter_scope == CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment: if self.char_filter_scope == CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment:
# For CHAR_SCOPE_COMMENTS, prioritize file match for folder name if it happened
if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope: if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope:
primary_char_filter_for_folder = char_filter_that_matched_file_in_comment_scope primary_char_filter_for_folder = char_filter_that_matched_file_in_comment_scope
log_reason_for_folder = "Matched char filter in filename (Comments scope)" log_reason_for_folder = "Matched char filter in filename (Comments scope)"
@ -1312,25 +1211,18 @@ class PostProcessorWorker:
elif (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and char_filter_that_matched_title: # Existing logic for other scopes elif (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and char_filter_that_matched_title: # Existing logic for other scopes
primary_char_filter_for_folder = char_filter_that_matched_title primary_char_filter_for_folder = char_filter_that_matched_title
log_reason_for_folder = "Matched char filter in title" log_reason_for_folder = "Matched char filter in title"
# If scope is FILES, primary_char_filter_for_folder will be None here. Folder determined per file.
# When determining base_folder_names_for_post_content without a direct character filter match:
if primary_char_filter_for_folder: if primary_char_filter_for_folder:
base_folder_names_for_post_content = [clean_folder_name(primary_char_filter_for_folder["name"])] base_folder_names_for_post_content = [clean_folder_name(primary_char_filter_for_folder["name"])]
self.logger(f" Base folder name(s) for post content ({log_reason_for_folder}): {', '.join(base_folder_names_for_post_content)}") self.logger(f" Base folder name(s) for post content ({log_reason_for_folder}): {', '.join(base_folder_names_for_post_content)}")
elif not current_character_filters: # No char filters defined, use generic logic elif not current_character_filters: # No char filters defined, use generic logic
derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords) derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords)
if derived_folders: if derived_folders:
# Use the live KNOWN_NAMES from downloader_utils for generic title parsing
# self.known_names is a snapshot from when the worker was created.
base_folder_names_for_post_content.extend(match_folders_from_title(post_title, KNOWN_NAMES, self.unwanted_keywords)) base_folder_names_for_post_content.extend(match_folders_from_title(post_title, KNOWN_NAMES, self.unwanted_keywords))
else: else:
base_folder_names_for_post_content.append(extract_folder_name_from_title(post_title, self.unwanted_keywords)) base_folder_names_for_post_content.append(extract_folder_name_from_title(post_title, self.unwanted_keywords))
if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]: if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]:
base_folder_names_for_post_content = [clean_folder_name(post_title if post_title else "untitled_creator_content")] base_folder_names_for_post_content = [clean_folder_name(post_title if post_title else "untitled_creator_content")]
self.logger(f" Base folder name(s) for post content (Generic title parsing - no char filters): {', '.join(base_folder_names_for_post_content)}") self.logger(f" Base folder name(s) for post content (Generic title parsing - no char filters): {', '.join(base_folder_names_for_post_content)}")
# If char filters are defined, and scope is FILES, then base_folder_names_for_post_content remains empty.
# The folder will be determined by char_filter_info_that_matched_file later.
if not self.extract_links_only and self.use_subfolders and self.skip_words_list: if not self.extract_links_only and self.use_subfolders and self.skip_words_list:
if self._check_pause(f"Folder keyword skip check for post {post_id}"): return 0, num_potential_files_in_post, [] if self._check_pause(f"Folder keyword skip check for post {post_id}"): return 0, num_potential_files_in_post, []
@ -1413,12 +1305,9 @@ class PostProcessorWorker:
if not all_files_from_post_api: if not all_files_from_post_api:
self.logger(f" -> No image thumbnails found for post {post_id} in thumbnail-only mode.") self.logger(f" -> No image thumbnails found for post {post_id} in thumbnail-only mode.")
return 0, 0, [], [] return 0, 0, [], []
# Sort files within the post by original name if in Date Based manga mode
if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED: if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED:
def natural_sort_key_for_files(file_api_info): def natural_sort_key_for_files(file_api_info):
name = file_api_info.get('_original_name_for_log', '').lower() name = file_api_info.get('_original_name_for_log', '').lower()
# Split into text and number parts for natural sorting (e.g., "file2.jpg" before "file10.jpg")
return [int(text) if text.isdigit() else text for text in re.split('([0-9]+)', name)] return [int(text) if text.isdigit() else text for text in re.split('([0-9]+)', name)]
all_files_from_post_api.sort(key=natural_sort_key_for_files) all_files_from_post_api.sort(key=natural_sort_key_for_files)
@ -1489,12 +1378,10 @@ class PostProcessorWorker:
char_filter_info_that_matched_file = char_filter_that_matched_title char_filter_info_that_matched_file = char_filter_that_matched_title
self.logger(f" File '{current_api_original_filename}' is candidate because post title matched. Scope: Both (Title part).") self.logger(f" File '{current_api_original_filename}' is candidate because post title matched. Scope: Both (Title part).")
else: else:
# This part is for the "File" part of "Both" scope
for filter_item_obj_both_file in current_character_filters: for filter_item_obj_both_file in current_character_filters:
terms_to_check_for_file_both = list(filter_item_obj_both_file["aliases"]) terms_to_check_for_file_both = list(filter_item_obj_both_file["aliases"])
if filter_item_obj_both_file["is_group"] and filter_item_obj_both_file["name"] not in terms_to_check_for_file_both: if filter_item_obj_both_file["is_group"] and filter_item_obj_both_file["name"] not in terms_to_check_for_file_both:
terms_to_check_for_file_both.append(filter_item_obj_both_file["name"]) terms_to_check_for_file_both.append(filter_item_obj_both_file["name"])
# Ensure unique_terms_for_file_both_check is defined here
unique_terms_for_file_both_check = list(set(terms_to_check_for_file_both)) unique_terms_for_file_both_check = list(set(terms_to_check_for_file_both))
for term_to_match in unique_terms_for_file_both_check: for term_to_match in unique_terms_for_file_both_check:
@ -1505,8 +1392,6 @@ class PostProcessorWorker:
break break
if file_is_candidate_by_char_filter_scope: break if file_is_candidate_by_char_filter_scope: break
elif self.char_filter_scope == CHAR_SCOPE_COMMENTS: elif self.char_filter_scope == CHAR_SCOPE_COMMENTS:
# If the post is a candidate (either by file or comment under this scope), then this file is also a candidate.
# The folder naming will use the filter that made the POST a candidate.
if post_is_candidate_by_file_char_match_in_comment_scope: # Post was candidate due to a file match if post_is_candidate_by_file_char_match_in_comment_scope: # Post was candidate due to a file match
file_is_candidate_by_char_filter_scope = True file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = char_filter_that_matched_file_in_comment_scope # Use the filter that matched a file in the post char_filter_info_that_matched_file = char_filter_that_matched_file_in_comment_scope # Use the filter that matched a file in the post
@ -1577,8 +1462,6 @@ class PostProcessorWorker:
except Exception as exc_f: except Exception as exc_f:
self.logger(f"❌ File download task for post {post_id} resulted in error: {exc_f}") self.logger(f"❌ File download task for post {post_id} resulted in error: {exc_f}")
total_skipped_this_post += 1 total_skipped_this_post += 1
# Clear file progress display after all files in a post are done
self._emit_signal('file_progress', "", None) self._emit_signal('file_progress', "", None)
if self.check_cancel(): self.logger(f" Post {post_id} processing interrupted/cancelled."); if self.check_cancel(): self.logger(f" Post {post_id} processing interrupted/cancelled.");
@ -1670,7 +1553,6 @@ class DownloadThread(QThread):
self.cookie_text = cookie_text # Store cookie text self.cookie_text = cookie_text # Store cookie text
self.use_cookie = use_cookie # Store cookie setting self.use_cookie = use_cookie # Store cookie setting
self.manga_date_file_counter_ref = manga_date_file_counter_ref # Store for passing to worker by DownloadThread self.manga_date_file_counter_ref = manga_date_file_counter_ref # Store for passing to worker by DownloadThread
# self.manga_date_scan_dir = manga_date_scan_dir # Store scan directory
if self.compress_images and Image is None: if self.compress_images and Image is None:
self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).") self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
self.compress_images = False self.compress_images = False
@ -1704,14 +1586,9 @@ class DownloadThread(QThread):
grand_total_skipped_files = 0 grand_total_skipped_files = 0
grand_list_of_kept_original_filenames = [] grand_list_of_kept_original_filenames = []
was_process_cancelled = False was_process_cancelled = False
# Initialize manga_date_file_counter_ref if needed (moved from main.py)
# This is now done within the DownloadThread's run method.
current_manga_date_file_counter_ref = self.manga_date_file_counter_ref current_manga_date_file_counter_ref = self.manga_date_file_counter_ref
if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED and \ if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED and \
not self.extract_links_only and current_manga_date_file_counter_ref is None: # Check if it needs calculation not self.extract_links_only and current_manga_date_file_counter_ref is None: # Check if it needs calculation
# series_scan_directory calculation logic (simplified for direct use here)
series_scan_dir = self.output_dir series_scan_dir = self.output_dir
if self.use_subfolders: if self.use_subfolders:
if self.filter_character_list_objects and self.filter_character_list_objects[0] and self.filter_character_list_objects[0].get("name"): if self.filter_character_list_objects and self.filter_character_list_objects[0] and self.filter_character_list_objects[0].get("name"):
@ -1731,9 +1608,6 @@ class DownloadThread(QThread):
if match: highest_num = max(highest_num, int(match.group(1))) if match: highest_num = max(highest_num, int(match.group(1)))
current_manga_date_file_counter_ref = [highest_num + 1, threading.Lock()] current_manga_date_file_counter_ref = [highest_num + 1, threading.Lock()]
self.logger(f" [Thread] Manga Date Mode: Initialized counter at {current_manga_date_file_counter_ref[0]}.") self.logger(f" [Thread] Manga Date Mode: Initialized counter at {current_manga_date_file_counter_ref[0]}.")
# This DownloadThread (being a QThread) will use its own signals object
# to communicate with PostProcessorWorker if needed.
worker_signals_obj = PostProcessorSignals() worker_signals_obj = PostProcessorSignals()
try: try:
worker_signals_obj.progress_signal.connect(self.progress_signal) worker_signals_obj.progress_signal.connect(self.progress_signal)
@ -1841,7 +1715,6 @@ class DownloadThread(QThread):
worker_signals_obj.external_link_signal.disconnect(self.external_link_signal) worker_signals_obj.external_link_signal.disconnect(self.external_link_signal)
worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal) worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal)
worker_signals_obj.missed_character_post_signal.disconnect(self.missed_character_post_signal) worker_signals_obj.missed_character_post_signal.disconnect(self.missed_character_post_signal)
# No need to disconnect retryable_file_failed_signal from worker_signals_obj as it's not on it
except (TypeError, RuntimeError) as e: except (TypeError, RuntimeError) as e:
self.logger(f" Note during DownloadThread signal disconnection: {e}") self.logger(f" Note during DownloadThread signal disconnection: {e}")

699
main.py

File diff suppressed because it is too large Load Diff

View File

@ -33,21 +33,13 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte,
logger_func(f" [Chunk {part_num + 1}/{total_parts}] Download resumed.") logger_func(f" [Chunk {part_num + 1}/{total_parts}] Download resumed.")
chunk_headers = headers.copy() chunk_headers = headers.copy()
# end_byte can be -1 for 0-byte files, meaning download from start_byte to end of file (which is start_byte itself)
if end_byte != -1 : # For 0-byte files, end_byte might be -1, Range header should not be set or be 0-0 if end_byte != -1 : # For 0-byte files, end_byte might be -1, Range header should not be set or be 0-0
chunk_headers['Range'] = f"bytes={start_byte}-{end_byte}" chunk_headers['Range'] = f"bytes={start_byte}-{end_byte}"
elif start_byte == 0 and end_byte == -1: # Specifically for 0-byte files elif start_byte == 0 and end_byte == -1: # Specifically for 0-byte files
# Some servers might not like Range: bytes=0--1.
# For a 0-byte file, we might not even need a range header, or Range: bytes=0-0
# Let's try without for 0-byte, or rely on server to handle 0-0 if Content-Length was 0.
# If Content-Length was 0, the main function might handle it directly.
# This chunking logic is primarily for files > 0 bytes.
# For now, if end_byte is -1, it implies a 0-byte file, so we expect 0 bytes.
pass pass
bytes_this_chunk = 0 bytes_this_chunk = 0
# last_progress_emit_time_for_chunk = time.time() # Replaced by global_emit_time_ref logic
last_speed_calc_time = time.time() last_speed_calc_time = time.time()
bytes_at_last_speed_calc = 0 bytes_at_last_speed_calc = 0
@ -71,18 +63,12 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte,
if attempt > 0: if attempt > 0:
logger_func(f" [Chunk {part_num + 1}/{total_parts}] Retrying download (Attempt {attempt}/{MAX_CHUNK_DOWNLOAD_RETRIES})...") logger_func(f" [Chunk {part_num + 1}/{total_parts}] Retrying download (Attempt {attempt}/{MAX_CHUNK_DOWNLOAD_RETRIES})...")
time.sleep(CHUNK_DOWNLOAD_RETRY_DELAY * (2 ** (attempt - 1))) time.sleep(CHUNK_DOWNLOAD_RETRY_DELAY * (2 ** (attempt - 1)))
# Reset speed calculation on retry
last_speed_calc_time = time.time() last_speed_calc_time = time.time()
bytes_at_last_speed_calc = bytes_this_chunk # Current progress of this chunk bytes_at_last_speed_calc = bytes_this_chunk # Current progress of this chunk
# Enhanced log message for chunk start
log_msg = f" 🚀 [Chunk {part_num + 1}/{total_parts}] Starting download: bytes {start_byte}-{end_byte if end_byte != -1 else 'EOF'}" log_msg = f" 🚀 [Chunk {part_num + 1}/{total_parts}] Starting download: bytes {start_byte}-{end_byte if end_byte != -1 else 'EOF'}"
logger_func(log_msg) logger_func(log_msg)
# print(f"DEBUG_MULTIPART: {log_msg}") # Direct console print for debugging
response = requests.get(chunk_url, headers=chunk_headers, timeout=(10, 120), stream=True, cookies=cookies_for_chunk) response = requests.get(chunk_url, headers=chunk_headers, timeout=(10, 120), stream=True, cookies=cookies_for_chunk)
response.raise_for_status() response.raise_for_status()
# For 0-byte files, if end_byte was -1, we expect 0 content.
if start_byte == 0 and end_byte == -1 and int(response.headers.get('Content-Length', 0)) == 0: if start_byte == 0 and end_byte == -1 and int(response.headers.get('Content-Length', 0)) == 0:
logger_func(f" [Chunk {part_num + 1}/{total_parts}] Confirmed 0-byte file.") logger_func(f" [Chunk {part_num + 1}/{total_parts}] Confirmed 0-byte file.")
with progress_data['lock']: with progress_data['lock']:
@ -112,7 +98,6 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte,
bytes_this_chunk += len(data_segment) bytes_this_chunk += len(data_segment)
with progress_data['lock']: with progress_data['lock']:
# Increment both the chunk's downloaded and the overall downloaded
progress_data['total_downloaded_so_far'] += len(data_segment) progress_data['total_downloaded_so_far'] += len(data_segment)
progress_data['chunks_status'][part_num]['downloaded'] = bytes_this_chunk progress_data['chunks_status'][part_num]['downloaded'] = bytes_this_chunk
progress_data['chunks_status'][part_num]['active'] = True progress_data['chunks_status'][part_num]['active'] = True
@ -125,17 +110,12 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte,
progress_data['chunks_status'][part_num]['speed_bps'] = current_speed_bps progress_data['chunks_status'][part_num]['speed_bps'] = current_speed_bps
last_speed_calc_time = current_time last_speed_calc_time = current_time
bytes_at_last_speed_calc = bytes_this_chunk bytes_at_last_speed_calc = bytes_this_chunk
# Throttle emissions globally for this file download
if emitter and (current_time - global_emit_time_ref[0] > 0.25): # Max ~4Hz for the whole file if emitter and (current_time - global_emit_time_ref[0] > 0.25): # Max ~4Hz for the whole file
global_emit_time_ref[0] = current_time # Update shared last emit time global_emit_time_ref[0] = current_time # Update shared last emit time
# Prepare and emit the status_list_copy
status_list_copy = [dict(s) for s in progress_data['chunks_status']] # Make a deep enough copy status_list_copy = [dict(s) for s in progress_data['chunks_status']] # Make a deep enough copy
if isinstance(emitter, queue.Queue): if isinstance(emitter, queue.Queue):
emitter.put({'type': 'file_progress', 'payload': (api_original_filename, status_list_copy)}) emitter.put({'type': 'file_progress', 'payload': (api_original_filename, status_list_copy)})
elif hasattr(emitter, 'file_progress_signal'): # PostProcessorSignals-like elif hasattr(emitter, 'file_progress_signal'): # PostProcessorSignals-like
# Ensure we read the latest total downloaded from progress_data
emitter.file_progress_signal.emit(api_original_filename, status_list_copy) emitter.file_progress_signal.emit(api_original_filename, status_list_copy)
return bytes_this_chunk, True return bytes_this_chunk, True
@ -150,8 +130,6 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte,
except Exception as e: except Exception as e:
logger_func(f" ❌ [Chunk {part_num + 1}/{total_parts}] Unexpected error: {e}\n{traceback.format_exc(limit=1)}") logger_func(f" ❌ [Chunk {part_num + 1}/{total_parts}] Unexpected error: {e}\n{traceback.format_exc(limit=1)}")
return bytes_this_chunk, False return bytes_this_chunk, False
# Ensure final status is marked as inactive if loop finishes due to retries
with progress_data['lock']: with progress_data['lock']:
progress_data['chunks_status'][part_num]['active'] = False progress_data['chunks_status'][part_num]['active'] = False
progress_data['chunks_status'][part_num]['speed_bps'] = 0 progress_data['chunks_status'][part_num]['speed_bps'] = 0
@ -236,11 +214,8 @@ def download_file_in_parts(file_url, save_path, total_size, num_parts, headers,
if cancellation_event and cancellation_event.is_set(): if cancellation_event and cancellation_event.is_set():
logger_func(f" Multi-part download for '{api_original_filename}' cancelled by main event.") logger_func(f" Multi-part download for '{api_original_filename}' cancelled by main event.")
all_chunks_successful = False all_chunks_successful = False
# Ensure a final progress update is sent with all chunks marked inactive (unless still active due to error)
if emitter_for_multipart: if emitter_for_multipart:
with progress_data['lock']: with progress_data['lock']:
# Ensure all chunks are marked inactive for the final signal if download didn't fully succeed or was cancelled
status_list_copy = [dict(s) for s in progress_data['chunks_status']] status_list_copy = [dict(s) for s in progress_data['chunks_status']]
if isinstance(emitter_for_multipart, queue.Queue): if isinstance(emitter_for_multipart, queue.Queue):
emitter_for_multipart.put({'type': 'file_progress', 'payload': (api_original_filename, status_list_copy)}) emitter_for_multipart.put({'type': 'file_progress', 'payload': (api_original_filename, status_list_copy)})
@ -254,8 +229,6 @@ def download_file_in_parts(file_url, save_path, total_size, num_parts, headers,
for buf in iter(lambda: f_hash.read(4096*10), b''): # Read in larger buffers for hashing for buf in iter(lambda: f_hash.read(4096*10), b''): # Read in larger buffers for hashing
md5_hasher.update(buf) md5_hasher.update(buf)
calculated_hash = md5_hasher.hexdigest() calculated_hash = md5_hasher.hexdigest()
# Return an open file handle for the caller to manage (e.g., for compression)
# The caller is responsible for closing this handle and renaming/deleting the .part file.
return True, total_bytes_from_chunks, calculated_hash, open(temp_file_path, 'rb') return True, total_bytes_from_chunks, calculated_hash, open(temp_file_path, 'rb')
else: else:
logger_func(f" ❌ Multi-part download failed for '{api_original_filename}'. Success: {all_chunks_successful}, Bytes: {total_bytes_from_chunks}/{total_size}. Cleaning up.") logger_func(f" ❌ Multi-part download failed for '{api_original_filename}'. Success: {all_chunks_successful}, Bytes: {total_bytes_from_chunks}/{total_size}. Cleaning up.")

View File

@ -11,13 +11,13 @@ Built with **PyQt5**, this tool is ideal for users who want deep filtering, cust
--- ---
## What's New in v3.5.0? ## What's New in v3.5.0?
Version 3.5.0 focuses on enhancing access to content and providing even smarter organization: Version 3.5.0 focuses on enhancing access to content and providing even smarter organization:
### 🍪 Enhanced Cookie Management ### Cookie Management
- **Access Restricted Content:** Seamlessly download from Kemono/Coomer as if you were logged in by using your browser's cookies. - **Access Content:** Seamlessly download from Kemono/Coomer as if you were logged in by using your browser's cookies.
- **Flexible Input:** - **Flexible Input:**
- Directly paste your cookie string (e.g., `name1=value1; name2=value2`). - Directly paste your cookie string (e.g., `name1=value1; name2=value2`).
- Browse and load cookies from a `cookies.txt` file (Netscape format). - Browse and load cookies from a `cookies.txt` file (Netscape format).
@ -27,7 +27,7 @@ Version 3.5.0 focuses on enhancing access to content and providing even smarter
--- ---
### 🗂️ Advanced `Known.txt` for Smart Folder Organization ### Advanced `Known.txt` for Smart Folder Organization
- **Fine-Grained Control:** Take your automatic folder organization to the next level with a personalized list of names, series titles, and keywords in `Known.txt`. - **Fine-Grained Control:** Take your automatic folder organization to the next level with a personalized list of names, series titles, and keywords in `Known.txt`.
- **Primary Names & Aliases:** Define a main folder name and link multiple aliases to it. For example, `([Power], powwr, pwr, Blood devil)` ensures any post matching "Power" or "powwr" (in title or filename, depending on settings) gets saved into a "Power" folder. Simple entries like `My Series` are also supported. - **Primary Names & Aliases:** Define a main folder name and link multiple aliases to it. For example, `([Power], powwr, pwr, Blood devil)` ensures any post matching "Power" or "powwr" (in title or filename, depending on settings) gets saved into a "Power" folder. Simple entries like `My Series` are also supported.
@ -35,10 +35,10 @@ Version 3.5.0 focuses on enhancing access to content and providing even smarter
- **User-Friendly Management:** Add or remove primary names directly through the UI, or click "Open Known.txt" for advanced editing (e.g., setting up aliases). - **User-Friendly Management:** Add or remove primary names directly through the UI, or click "Open Known.txt" for advanced editing (e.g., setting up aliases).
--- ---
## What's in v3.4.0? (Previous Update) ## What's in v3.4.0? (Previous Update)
This version brings significant enhancements to manga/comic downloading, filtering capabilities, and user experience: This version brings significant enhancements to manga/comic downloading, filtering capabilities, and user experience:
### 📖 Enhanced Manga/Comic Mode ### Enhanced Manga/Comic Mode
- **New "Date Based" Filename Style:** - **New "Date Based" Filename Style:**
@ -52,7 +52,7 @@ This version brings significant enhancements to manga/comic downloading, filteri
--- ---
### ✂️ "Remove Words from Filename" Feature ### "Remove Words from Filename" Feature
- Specify comma-separated words or phrases (case-insensitive) that will be automatically removed from filenames. - Specify comma-separated words or phrases (case-insensitive) that will be automatically removed from filenames.
@ -60,7 +60,7 @@ This version brings significant enhancements to manga/comic downloading, filteri
--- ---
### 📦 New "Only Archives" File Filter Mode ### New "Only Archives" File Filter Mode
- Exclusively downloads `.zip` and `.rar` files. - Exclusively downloads `.zip` and `.rar` files.
@ -68,7 +68,7 @@ This version brings significant enhancements to manga/comic downloading, filteri
--- ---
### 🗣️ Improved Character Filter Scope - "Comments (Beta)" ### Improved Character Filter Scope - "Comments (Beta)"
- **File-First Check:** Prioritizes matching filenames before checking post comments for character names. - **File-First Check:** Prioritizes matching filenames before checking post comments for character names.
@ -76,7 +76,7 @@ This version brings significant enhancements to manga/comic downloading, filteri
--- ---
### 🧐 Refined "Missed Character Log" ### Refined "Missed Character Log"
- Displays a capitalized, alphabetized list of key terms from skipped post titles. - Displays a capitalized, alphabetized list of key terms from skipped post titles.
@ -84,25 +84,25 @@ This version brings significant enhancements to manga/comic downloading, filteri
--- ---
### 🚀 Enhanced Multi-part Download Progress ### Enhanced Multi-part Download Progress
- Granular visibility into active chunk downloads and combined speed for large files. - Granular visibility into active chunk downloads and combined speed for large files.
--- ---
### 🗺️ Updated Onboarding Tour ### Updated Onboarding Tour
- Improved guide for new users, covering v3.4.0 features and existing core functions. - Improved guide for new users, covering v3.4.0 features and existing core functions.
--- ---
### 🛡️ Robust Configuration Path ### Robust Configuration Path
- Settings and `Known.txt` are now stored in the system-standard application data folder (e.g., `AppData`, `~/.local/share`). - Settings and `Known.txt` are now stored in the same folder as app.
--- ---
## 🖥️ Core Features ## Core Features
--- ---
@ -122,7 +122,7 @@ This version brings significant enhancements to manga/comic downloading, filteri
--- ---
### 🧠 Smart Filtering ### Smart Filtering
- **Character Name Filtering:** - **Character Name Filtering:**
- Use `Tifa, Aerith` or group `(Boa, Hancock)` → folder `Boa Hancock` - Use `Tifa, Aerith` or group `(Boa, Hancock)` → folder `Boa Hancock`
@ -149,7 +149,7 @@ This version brings significant enhancements to manga/comic downloading, filteri
--- ---
### 📚 Manga/Comic Mode (Creator Feeds Only) ### Manga/Comic Mode (Creator Feeds Only)
- **Chronological Processing** — Oldest posts first - **Chronological Processing** — Oldest posts first
@ -162,7 +162,7 @@ This version brings significant enhancements to manga/comic downloading, filteri
--- ---
### 📁 Folder Structure & Naming ### Folder Structure & Naming
- **Subfolders:** - **Subfolders:**
- Auto-created based on character name, post title, or `Known.txt` - Auto-created based on character name, post title, or `Known.txt`
@ -173,7 +173,7 @@ This version brings significant enhancements to manga/comic downloading, filteri
--- ---
### 🖼️ Thumbnail & Compression Tools ### Thumbnail & Compression Tools
- **Download Thumbnails Only** - **Download Thumbnails Only**
@ -182,7 +182,7 @@ This version brings significant enhancements to manga/comic downloading, filteri
--- ---
### ⚙️ Performance Features ### Performance Features
- **Multithreading:** - **Multithreading:**
- For both post processing and file downloading - For both post processing and file downloading
@ -194,7 +194,7 @@ This version brings significant enhancements to manga/comic downloading, filteri
--- ---
### 📋 Logging & Progress ### Logging & Progress
- **Real-time Logs:** Activity, errors, skipped posts - **Real-time Logs:** Activity, errors, skipped posts
@ -206,7 +206,7 @@ This version brings significant enhancements to manga/comic downloading, filteri
--- ---
### 🗃️ Config System ### Config System
- **`Known.txt` for Smart Folder Naming:** - **`Known.txt` for Smart Folder Naming:**
- A user-editable file (`Known.txt`) stores a list of preferred names, series titles, or keywords. - A user-editable file (`Known.txt`) stores a list of preferred names, series titles, or keywords.
@ -221,7 +221,7 @@ This version brings significant enhancements to manga/comic downloading, filteri
--- ---
## 💻 Installation ## Installation
--- ---
@ -241,7 +241,7 @@ pip install PyQt5 requests Pillow
*** ***
## **🛠️ Build a Standalone Executable (Optional)** ## ** Build a Standalone Executable (Optional)**
1. Install PyInstaller: 1. Install PyInstaller:
```bash ```bash
@ -257,14 +257,14 @@ pyinstaller --name "Kemono Downloader" --onefile --windowed --icon="Kemono.ico"
*** ***
## **🗂 Config Files** ## ** Config Files**
- `Known.txt` — character/show names used for folder organization - `Known.txt` — character/show names used for folder organization
- Supports simple names (e.g., `My Series`) and grouped names with a primary folder name and aliases (e.g., `([Primary Folder Name], alias1, alias2)`). - Supports simple names (e.g., `My Series`) and grouped names with a primary folder name and aliases (e.g., `([Primary Folder Name], alias1, alias2)`).
*** ***
## **💬 Feedback & Support** ## ** Feedback & Support**
Issues? Suggestions? Issues? Suggestions?
Open an issue on the [GitHub repository](https://github.com/Yuvi9587/kemono-downloader) or join our community. Open an issue on the [GitHub repository](https://github.com/Yuvi9587/kemono-downloader) or join our community.