This commit is contained in:
Yuvi9587
2025-05-18 16:12:19 +05:30
parent 32a12e8a09
commit decef6730f
4 changed files with 521 additions and 237 deletions

View File

@@ -30,6 +30,7 @@ from io import BytesIO
STYLE_POST_TITLE = "post_title"
STYLE_ORIGINAL_NAME = "original_name"
STYLE_DATE_BASED = "date_based" # For manga date-based sequential naming
SKIP_SCOPE_FILES = "files"
SKIP_SCOPE_POSTS = "posts"
@@ -313,16 +314,37 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non
if cancellation_event and cancellation_event.is_set(): return
if all_posts_for_manga_mode:
logger(f" Manga Mode: Fetched {len(all_posts_for_manga_mode)} total posts. Reversing order...")
all_posts_for_manga_mode.reverse()
logger(f" Manga Mode: Fetched {len(all_posts_for_manga_mode)} total posts. Sorting by publication date (oldest first)...")
def sort_key_tuple(post):
published_date_str = post.get('published')
added_date_str = post.get('added')
post_id_str = post.get('id', "0")
primary_sort_val = "0000-00-00T00:00:00" # Default for missing dates (effectively oldest)
if published_date_str:
primary_sort_val = published_date_str
elif added_date_str:
logger(f" ⚠️ Post ID {post_id_str} missing 'published' date, using 'added' date '{added_date_str}' for primary sorting.")
primary_sort_val = added_date_str
else:
logger(f" ⚠️ Post ID {post_id_str} missing both 'published' and 'added' dates. Placing at start of sort (using default earliest date).")
secondary_sort_val = 0 # Default for non-integer IDs
try:
secondary_sort_val = int(post_id_str)
except ValueError:
logger(f" ⚠️ Post ID '{post_id_str}' is not a valid integer for secondary sorting, using 0.")
return (primary_sort_val, secondary_sort_val)
all_posts_for_manga_mode.sort(key=sort_key_tuple) # Sorts ascending by (date, id)
for i in range(0, len(all_posts_for_manga_mode), page_size):
if cancellation_event and cancellation_event.is_set():
logger(" Manga mode post yielding cancelled.")
break
yield all_posts_for_manga_mode[i:i + page_size]
else:
logger(" Manga Mode: No posts found to process.")
return
current_page_num = 1
@@ -428,13 +450,13 @@ class PostProcessorSignals(QObject):
class PostProcessorWorker:
def __init__(self, post_data, download_root, known_names,
filter_character_list,
filter_character_list, emitter, # Changed signals to emitter
unwanted_keywords, filter_mode, skip_zip, skip_rar,
use_subfolders, use_post_subfolders, target_post_id_from_initial_url, custom_folder_name,
compress_images, download_thumbnails, service, user_id,
api_url_input, cancellation_event, signals,
api_url_input, cancellation_event,
downloaded_files, downloaded_file_hashes, downloaded_files_lock, downloaded_file_hashes_lock,
skip_words_list=None,
skip_words_list=None,
skip_words_scope=SKIP_SCOPE_FILES,
show_external_links=False,
extract_links_only=False,
@@ -444,7 +466,8 @@ class PostProcessorWorker:
char_filter_scope=CHAR_SCOPE_FILES,
remove_from_filename_words_list=None,
allow_multipart_download=True,
): # Removed duplicate_file_mode and session-wide tracking
manga_date_file_counter_ref=None, # New parameter for date-based manga naming
):
self.post = post_data
self.download_root = download_root
self.known_names = known_names
@@ -463,7 +486,10 @@ class PostProcessorWorker:
self.user_id = user_id
self.api_url_input = api_url_input
self.cancellation_event = cancellation_event
self.signals = signals
self.emitter = emitter # Store the emitter
if not self.emitter:
# This case should ideally be prevented by the caller
raise ValueError("PostProcessorWorker requires an emitter (signals object or queue).")
self.skip_current_file_flag = skip_current_file_flag
self.downloaded_files = downloaded_files if downloaded_files is not None else set()
@@ -482,23 +508,35 @@ class PostProcessorWorker:
self.char_filter_scope = char_filter_scope
self.remove_from_filename_words_list = remove_from_filename_words_list if remove_from_filename_words_list is not None else []
self.allow_multipart_download = allow_multipart_download
# self.duplicate_file_mode and session-wide tracking removed
self.manga_date_file_counter_ref = manga_date_file_counter_ref # Store the reference
if self.compress_images and Image is None:
self.logger("⚠️ Image compression disabled: Pillow library not found.")
self.compress_images = False
def logger(self, message):
if self.signals and hasattr(self.signals, 'progress_signal'):
self.signals.progress_signal.emit(message)
def _emit_signal(self, signal_type_str, *payload_args):
"""Helper to emit signal either directly or via queue."""
if isinstance(self.emitter, queue.Queue):
self.emitter.put({'type': signal_type_str, 'payload': payload_args})
elif self.emitter and hasattr(self.emitter, f"{signal_type_str}_signal"):
# Assuming emitter is a QObject with pyqtSignal attributes
# e.g., emitter.progress_signal.emit(*payload_args)
signal_attr = getattr(self.emitter, f"{signal_type_str}_signal")
signal_attr.emit(*payload_args)
else:
print(f"(Worker Log - No Signal): {message}")
# Fallback or error logging if emitter is not recognized
print(f"(Worker Log - Unrecognized Emitter for {signal_type_str}): {payload_args[0] if payload_args else ''}")
def logger(self, message):
self._emit_signal('progress', message)
def check_cancel(self):
return self.cancellation_event.is_set()
def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event,
post_title="", file_index_in_post=0, num_files_in_this_post=1):
# emitter_for_file_ops, # This will be self.emitter
post_title="", file_index_in_post=0, num_files_in_this_post=1,
manga_date_file_counter_ref=None): # Added manga_date_file_counter_ref
was_original_name_kept_flag = False
final_filename_saved_for_return = ""
# target_folder_path is the base character/post folder.
@@ -537,8 +575,33 @@ class PostProcessorWorker:
else:
filename_to_save_in_main_path = f"{cleaned_post_title_base}{original_ext}"
else:
filename_to_save_in_main_path = clean_filename(api_original_filename)
filename_to_save_in_main_path = clean_filename(api_original_filename) # Fallback to original if no title
self.logger(f"⚠️ Manga mode (Post Title Style): Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save_in_main_path}'.")
elif self.manga_filename_style == STYLE_DATE_BASED:
current_thread_name = threading.current_thread().name
self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Manga Date Mode. Counter Ref ID: {id(manga_date_file_counter_ref)}, Value before access: {manga_date_file_counter_ref}")
if manga_date_file_counter_ref is not None and len(manga_date_file_counter_ref) == 2:
counter_val_for_filename = -1
counter_lock = manga_date_file_counter_ref[1]
self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Attempting to acquire lock. Counter value before lock: {manga_date_file_counter_ref[0]}")
with counter_lock:
self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Lock acquired. Counter value at lock acquisition: {manga_date_file_counter_ref[0]}")
counter_val_for_filename = manga_date_file_counter_ref[0]
# Increment is done here, under lock, before this number is used by another thread.
# This number is now "reserved" for this file.
# If this file download fails, this number is "lost" (sequence will have a gap). This is acceptable.
manga_date_file_counter_ref[0] += 1
self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Incremented counter. New counter value: {manga_date_file_counter_ref[0]}. Filename will use: {counter_val_for_filename}")
filename_to_save_in_main_path = f"{counter_val_for_filename:03d}{original_ext}"
self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Lock released. Generated filename: {filename_to_save_in_main_path}")
else:
self.logger(f"⚠️ Manga Date Mode: Counter ref not provided or malformed for '{api_original_filename}'. Using original. Ref: {manga_date_file_counter_ref}")
# This log line below had a typo, fixed to reflect Date Mode context
filename_to_save_in_main_path = clean_filename(api_original_filename)
self.logger(f"⚠️ Manga mode (Date Based Style Fallback): Using cleaned original filename '{filename_to_save_in_main_path}' for post {original_post_id_for_log}.")
else:
self.logger(f"⚠️ Manga mode: Unknown filename style '{self.manga_filename_style}'. Defaulting to original filename for '{api_original_filename}'.")
filename_to_save_in_main_path = clean_filename(api_original_filename)
@@ -629,9 +692,8 @@ class PostProcessorWorker:
if attempt_num_single_stream > 0:
self.logger(f" Retrying download for '{api_original_filename}' (Overall Attempt {attempt_num_single_stream + 1}/{max_retries + 1})...")
time.sleep(retry_delay * (2**(attempt_num_single_stream - 1)))
if self.signals and hasattr(self.signals, 'file_download_status_signal'):
self.signals.file_download_status_signal.emit(True)
self._emit_signal('file_download_status', True)
response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True)
response.raise_for_status()
@@ -644,14 +706,14 @@ class PostProcessorWorker:
if attempt_multipart:
response.close()
if self.signals and hasattr(self.signals, 'file_download_status_signal'):
self.signals.file_download_status_signal.emit(False)
self._emit_signal('file_download_status', False)
# .part file is always based on the main target_folder_path and filename_to_save_in_main_path
mp_save_path_base_for_part = os.path.join(target_folder_path, filename_to_save_in_main_path)
mp_success, mp_bytes, mp_hash, mp_file_handle = download_file_in_parts(
file_url, mp_save_path_base_for_part, total_size_bytes, num_parts_for_file, headers,
api_original_filename, self.signals, self.cancellation_event, skip_event, self.logger
file_url, mp_save_path_base_for_part, total_size_bytes, num_parts_for_file, headers, api_original_filename,
emitter_for_multipart=self.emitter, # Pass the worker's emitter
cancellation_event=self.cancellation_event, skip_event=skip_event, logger_func=self.logger
)
if mp_success:
download_successful_flag = True
@@ -676,9 +738,8 @@ class PostProcessorWorker:
if chunk:
file_content_buffer.write(chunk); md5_hasher.update(chunk)
current_attempt_downloaded_bytes += len(chunk)
if time.time() - last_progress_time > 1 and total_size_bytes > 0 and \
self.signals and hasattr(self.signals, 'file_progress_signal'):
self.signals.file_progress_signal.emit(api_original_filename, (current_attempt_downloaded_bytes, total_size_bytes))
if time.time() - last_progress_time > 1 and total_size_bytes > 0:
self._emit_signal('file_progress', api_original_filename, (current_attempt_downloaded_bytes, total_size_bytes))
last_progress_time = time.time()
if self.check_cancel() or (skip_event and skip_event.is_set()):
@@ -703,12 +764,11 @@ class PostProcessorWorker:
self.logger(f" ❌ Unexpected Download Error: {api_original_filename}: {e}\n{traceback.format_exc(limit=2)}")
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close(); break
finally:
if self.signals and hasattr(self.signals, 'file_download_status_signal'):
self.signals.file_download_status_signal.emit(False)
self._emit_signal('file_download_status', False)
if self.signals and hasattr(self.signals, 'file_progress_signal'):
final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes
self.signals.file_progress_signal.emit(api_original_filename, (downloaded_size_bytes, final_total_for_progress))
# Final progress update for single stream
final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes
self._emit_signal('file_progress', api_original_filename, (downloaded_size_bytes, final_total_for_progress))
if self.check_cancel() or (skip_event and skip_event.is_set()):
self.logger(f" ⚠️ Download process interrupted for {api_original_filename}.")
@@ -787,14 +847,19 @@ class PostProcessorWorker:
# --- Final Numeric Suffixing in the effective_save_folder ---
final_filename_on_disk = filename_after_compression # This is the name after potential compression
temp_base, temp_ext = os.path.splitext(final_filename_on_disk)
suffix_counter = 1
while os.path.exists(os.path.join(effective_save_folder, final_filename_on_disk)):
final_filename_on_disk = f"{temp_base}_{suffix_counter}{temp_ext}"
suffix_counter += 1
if final_filename_on_disk != filename_after_compression:
self.logger(f" Applied numeric suffix in '{os.path.basename(effective_save_folder)}': '{final_filename_on_disk}' (was '{filename_after_compression}')")
# If Manga Date Based style, we trust the counter from main.py.
# Suffixing should not be needed if the counter initialization was correct.
# If a file with the generated DDD.ext name exists, it will be overwritten.
if not (self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED):
temp_base, temp_ext = os.path.splitext(final_filename_on_disk)
suffix_counter = 1
# Check for existing file and apply suffix only if not in date-based manga mode
while os.path.exists(os.path.join(effective_save_folder, final_filename_on_disk)):
final_filename_on_disk = f"{temp_base}_{suffix_counter}{temp_ext}"
suffix_counter += 1
if final_filename_on_disk != filename_after_compression: # Log if a suffix was applied
self.logger(f" Applied numeric suffix in '{os.path.basename(effective_save_folder)}': '{final_filename_on_disk}' (was '{filename_after_compression}')")
# else: for STYLE_DATE_BASED, final_filename_on_disk remains filename_after_compression.
# --- Save File ---
final_save_path = os.path.join(effective_save_folder, final_filename_on_disk)
@@ -824,7 +889,7 @@ class PostProcessorWorker:
with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash)
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Track by logical name
# The counter for STYLE_DATE_BASED is now incremented *before* filename generation, under lock.
final_filename_saved_for_return = final_filename_on_disk
self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{os.path.basename(effective_save_folder)}'")
# Session-wide base name tracking removed.
@@ -1002,15 +1067,14 @@ class PostProcessorWorker:
if self.filter_character_list_objects:
if self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match:
self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.")
if self.signals and hasattr(self.signals, 'missed_character_post_signal'):
self.signals.missed_character_post_signal.emit(post_title, "No title match for character filter")
self._emit_signal('missed_character_post', post_title, "No title match for character filter")
return 0, num_potential_files_in_post, []
if self.char_filter_scope == CHAR_SCOPE_COMMENTS and \
not post_is_candidate_by_file_char_match_in_comment_scope and \
not post_is_candidate_by_comment_char_match: # MODIFIED: Check both file and comment match flags
self.logger(f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id}', Title '{post_title[:50]}...'")
if self.signals and hasattr(self.signals, 'missed_character_post_signal'):
self.signals.missed_character_post_signal.emit(post_title, "No character match in files or comments (Comments scope)")
self._emit_signal('missed_character_post', post_title, "No character match in files or comments (Comments scope)")
return 0, num_potential_files_in_post, []
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
@@ -1027,8 +1091,7 @@ class PostProcessorWorker:
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and \
not post_is_candidate_by_title_char_match:
self.logger(f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title[:50]}' doesn't match filters.")
if self.signals and hasattr(self.signals, 'missed_character_post_signal'):
self.signals.missed_character_post_signal.emit(post_title, "Manga Mode: No title match for character filter (Title/Both scope)")
self._emit_signal('missed_character_post', post_title, "Manga Mode: No title match for character filter (Title/Both scope)")
return 0, num_potential_files_in_post, []
if not isinstance(post_attachments, list):
@@ -1099,9 +1162,8 @@ class PostProcessorWorker:
for link_url, link_text in unique_links_data.items():
platform = get_link_platform(link_url)
if platform not in scraped_platforms:
if self.signals and hasattr(self.signals, 'external_link_signal'):
self.signals.external_link_signal.emit(post_title, link_text, link_url, platform)
links_emitted_count +=1
self._emit_signal('external_link', post_title, link_text, link_url, platform)
links_emitted_count +=1
if links_emitted_count > 0: self.logger(f" 🔗 Found {links_emitted_count} potential external link(s) in post content.")
except Exception as e: self.logger(f"⚠️ Error parsing post content for links: {e}\n{traceback.format_exc(limit=2)}")
@@ -1147,6 +1209,16 @@ class PostProcessorWorker:
if not all_files_from_post_api:
self.logger(f" -> No image thumbnails found for post {post_id} in thumbnail-only mode.")
return 0, 0, []
# Sort files within the post by original name if in Date Based manga mode
if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED:
def natural_sort_key_for_files(file_api_info):
name = file_api_info.get('_original_name_for_log', '').lower()
# Split into text and number parts for natural sorting (e.g., "file2.jpg" before "file10.jpg")
return [int(text) if text.isdigit() else text for text in re.split('([0-9]+)', name)]
all_files_from_post_api.sort(key=natural_sort_key_for_files)
self.logger(f" Manga Date Mode: Sorted {len(all_files_from_post_api)} files within post {post_id} by original name for sequential numbering.")
if not all_files_from_post_api:
@@ -1274,9 +1346,10 @@ class PostProcessorWorker:
headers,
post_id,
self.skip_current_file_flag,
post_title,
file_idx,
num_files_in_this_post_for_naming
post_title=post_title, # Keyword argument
manga_date_file_counter_ref=self.manga_date_file_counter_ref if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED else None,
file_index_in_post=file_idx, # Changed to keyword argument
num_files_in_this_post=num_files_in_this_post_for_naming # Changed to keyword argument
))
for future in as_completed(futures_list):
@@ -1298,8 +1371,8 @@ class PostProcessorWorker:
self.logger(f"❌ File download task for post {post_id} resulted in error: {exc_f}")
total_skipped_this_post += 1
if self.signals and hasattr(self.signals, 'file_progress_signal'):
self.signals.file_progress_signal.emit("", None)
# Clear file progress display after all files in a post are done
self._emit_signal('file_progress', "", None)
if self.check_cancel(): self.logger(f" Post {post_id} processing interrupted/cancelled.");
else: self.logger(f" Post {post_id} Summary: Downloaded={total_downloaded_this_post}, Skipped Files={total_skipped_this_post}")
@@ -1338,7 +1411,8 @@ class DownloadThread(QThread):
char_filter_scope=CHAR_SCOPE_FILES,
remove_from_filename_words_list=None,
allow_multipart_download=True,
): # Removed duplicate_file_mode and session-wide tracking
manga_date_file_counter_ref=None, # New parameter
):
super().__init__()
self.api_url_input = api_url_input
self.output_dir = output_dir
@@ -1379,7 +1453,7 @@ class DownloadThread(QThread):
self.char_filter_scope = char_filter_scope
self.remove_from_filename_words_list = remove_from_filename_words_list
self.allow_multipart_download = allow_multipart_download
# self.duplicate_file_mode and session-wide tracking removed
self.manga_date_file_counter_ref = manga_date_file_counter_ref # Store for passing to worker
if self.compress_images and Image is None:
self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
self.compress_images = False
@@ -1404,13 +1478,15 @@ class DownloadThread(QThread):
grand_list_of_kept_original_filenames = []
was_process_cancelled = False
# This DownloadThread (being a QThread) will use its own signals object
# to communicate with PostProcessorWorker if needed.
worker_signals_obj = PostProcessorSignals()
try:
worker_signals_obj.progress_signal.connect(self.progress_signal)
worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal)
worker_signals_obj.file_progress_signal.connect(self.file_progress_signal)
worker_signals_obj.external_link_signal.connect(self.external_link_signal)
worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal) # New connection
worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal)
self.logger(" Starting post fetch (single-threaded download process)...")
post_generator = download_from_api(
@@ -1441,8 +1517,8 @@ class DownloadThread(QThread):
compress_images=self.compress_images, download_thumbnails=self.download_thumbnails,
service=self.service, user_id=self.user_id,
api_url_input=self.api_url_input,
cancellation_event=self.cancellation_event,
signals=worker_signals_obj,
cancellation_event=self.cancellation_event, # emitter is PostProcessorSignals for single-thread
emitter=worker_signals_obj, # Pass the signals object as the emitter
downloaded_files=self.downloaded_files,
downloaded_file_hashes=self.downloaded_file_hashes,
downloaded_files_lock=self.downloaded_files_lock,
@@ -1458,7 +1534,8 @@ class DownloadThread(QThread):
char_filter_scope=self.char_filter_scope,
remove_from_filename_words_list=self.remove_from_filename_words_list,
allow_multipart_download=self.allow_multipart_download,
) # Removed duplicate_file_mode and session-wide tracking
manga_date_file_counter_ref=self.manga_date_file_counter_ref, # Pass it here
)
try:
dl_count, skip_count, kept_originals_this_post = post_processing_worker.process()
grand_total_downloaded_files += dl_count
@@ -1494,7 +1571,7 @@ class DownloadThread(QThread):
worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal)
worker_signals_obj.external_link_signal.disconnect(self.external_link_signal)
worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal)
worker_signals_obj.missed_character_post_signal.disconnect(self.missed_character_post_signal) # New disconnection
worker_signals_obj.missed_character_post_signal.disconnect(self.missed_character_post_signal)
except (TypeError, RuntimeError) as e:
self.logger(f" Note during DownloadThread signal disconnection: {e}")

242
main.py
View File

@@ -19,8 +19,8 @@ from PyQt5.QtGui import (
)
from PyQt5.QtWidgets import (
QApplication, QWidget, QLabel, QLineEdit, QTextEdit, QPushButton,
QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget,
QRadioButton, QButtonGroup, QCheckBox, QSplitter, QSizePolicy, QDialog, QStackedWidget,
QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget, QRadioButton, QButtonGroup, QCheckBox, QSplitter,
QDialog, QStackedWidget,
QFrame,
QAbstractButton
)
@@ -91,6 +91,7 @@ CONFIG_APP_NAME_MAIN = "ApplicationSettings"
MANGA_FILENAME_STYLE_KEY = "mangaFilenameStyleV1"
STYLE_POST_TITLE = "post_title"
STYLE_ORIGINAL_NAME = "original_name"
STYLE_DATE_BASED = "date_based" # New style for date-based naming
SKIP_WORDS_SCOPE_KEY = "skipWordsScopeV1"
ALLOW_MULTIPART_DOWNLOAD_KEY = "allowMultipartDownloadV1"
@@ -487,6 +488,11 @@ class DownloaderApp(QWidget):
self.download_counter = 0
self.skip_counter = 0
# For handling signals from worker threads via a queue
self.worker_to_gui_queue = queue.Queue()
self.gui_update_timer = QTimer(self)
self.actual_gui_signals = PostProcessorSignals() # Renamed from self.worker_signals
self.worker_signals = PostProcessorSignals()
self.prompt_mutex = QMutex()
self._add_character_response = None
@@ -561,23 +567,27 @@ class DownloaderApp(QWidget):
def _connect_signals(self):
if hasattr(self.worker_signals, 'progress_signal'):
self.worker_signals.progress_signal.connect(self.handle_main_log)
if hasattr(self.worker_signals, 'file_progress_signal'):
self.worker_signals.file_progress_signal.connect(self.update_file_progress_display)
if hasattr(self.worker_signals, 'missed_character_post_signal'): # New
self.worker_signals.missed_character_post_signal.connect(self.handle_missed_character_post)
if hasattr(self.worker_signals, 'external_link_signal'):
self.worker_signals.external_link_signal.connect(self.handle_external_link_signal)
# Signals from the GUI's perspective (emitted by _process_worker_queue or directly)
self.actual_gui_signals.progress_signal.connect(self.handle_main_log)
self.actual_gui_signals.file_progress_signal.connect(self.update_file_progress_display)
self.actual_gui_signals.missed_character_post_signal.connect(self.handle_missed_character_post)
self.actual_gui_signals.external_link_signal.connect(self.handle_external_link_signal)
self.actual_gui_signals.file_download_status_signal.connect(lambda status: None) # Placeholder if needed, or connect to UI
# Timer for processing the worker queue
self.gui_update_timer.timeout.connect(self._process_worker_queue)
self.gui_update_timer.start(100) # Check queue every 100ms
# Direct GUI signals
self.log_signal.connect(self.handle_main_log)
self.add_character_prompt_signal.connect(self.prompt_add_character)
self.character_prompt_response_signal.connect(self.receive_add_character_result)
self.overall_progress_signal.connect(self.update_progress_display)
self.finished_signal.connect(self.download_finished)
self.external_link_signal.connect(self.handle_external_link_signal)
self.file_progress_signal.connect(self.update_file_progress_display)
# self.external_link_signal.connect(self.handle_external_link_signal) # Covered by actual_gui_signals
# self.file_progress_signal.connect(self.update_file_progress_display) # Covered by actual_gui_signals
# UI element connections
if hasattr(self, 'character_search_input'): self.character_search_input.textChanged.connect(self.filter_character_list)
if hasattr(self, 'external_links_checkbox'): self.external_links_checkbox.toggled.connect(self.update_external_links_setting)
if hasattr(self, 'thread_count_input'): self.thread_count_input.textChanged.connect(self.update_multithreading_label)
@@ -608,8 +618,33 @@ class DownloaderApp(QWidget):
if self.char_filter_scope_toggle_button:
self.char_filter_scope_toggle_button.clicked.connect(self._cycle_char_filter_scope)
if hasattr(self, 'multipart_toggle_button'): self.multipart_toggle_button.clicked.connect(self._toggle_multipart_mode)
if hasattr(self, 'multipart_toggle_button'): self.multipart_toggle_button.clicked.connect(self._toggle_multipart_mode) # Keep this if it's separate
def _process_worker_queue(self):
"""Processes messages from the worker queue and emits Qt signals from the GUI thread."""
while not self.worker_to_gui_queue.empty():
try:
item = self.worker_to_gui_queue.get_nowait()
signal_type = item.get('type')
payload = item.get('payload', tuple()) # Default to empty tuple
if signal_type == 'progress':
self.actual_gui_signals.progress_signal.emit(*payload)
elif signal_type == 'file_download_status': # Changed from 'file_status'
self.actual_gui_signals.file_download_status_signal.emit(*payload)
elif signal_type == 'external_link': # Changed from 'ext_link'
self.actual_gui_signals.external_link_signal.emit(*payload)
elif signal_type == 'file_progress':
self.actual_gui_signals.file_progress_signal.emit(*payload)
elif signal_type == 'missed_character_post':
self.actual_gui_signals.missed_character_post_signal.emit(*payload)
else:
self.log_signal.emit(f"⚠️ Unknown signal type from worker queue: {signal_type}")
self.worker_to_gui_queue.task_done()
except queue.Empty:
break # Should not happen with while not empty, but good practice
except Exception as e:
self.log_signal.emit(f"❌ Error processing worker queue: {e}")
def load_known_names_from_util(self):
global KNOWN_NAMES
@@ -719,7 +754,8 @@ class DownloaderApp(QWidget):
self.link_input.setPlaceholderText("e.g., https://kemono.su/patreon/user/12345 or .../post/98765")
self.link_input.setToolTip("Enter the full URL of a Kemono/Coomer creator's page or a specific post.\nExample (Creator): https://kemono.su/patreon/user/12345\nExample (Post): https://kemono.su/patreon/user/12345/post/98765")
self.link_input.textChanged.connect(self.update_custom_folder_visibility)
url_page_layout.addWidget(self.link_input, 1)
url_page_layout.addWidget(self.link_input, 1) # URL input takes available space
self.page_range_label = QLabel("Page Range:")
self.page_range_label.setStyleSheet("font-weight: bold; padding-left: 10px;")
@@ -1166,7 +1202,7 @@ class DownloaderApp(QWidget):
self.update_page_range_enabled_state()
if self.manga_mode_checkbox:
self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked())
if hasattr(self, 'link_input'): self.link_input.textChanged.connect(self.update_page_range_enabled_state)
if hasattr(self, 'link_input'): self.link_input.textChanged.connect(lambda: self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)) # Also trigger manga UI update
self.load_known_names_from_util()
self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked())
if hasattr(self, 'radio_group') and self.radio_group.checkedButton():
@@ -1174,6 +1210,7 @@ class DownloaderApp(QWidget):
self._update_manga_filename_style_button_text()
self._update_skip_scope_button_text()
self._update_char_filter_scope_button_text()
self._update_multithreading_for_date_mode() # Ensure correct initial state
def _center_on_screen(self):
"""Centers the widget on the screen."""
@@ -1958,6 +1995,16 @@ class DownloaderApp(QWidget):
" Downloads as: \"001.jpg\", \"002.jpg\".\n\n"
"Click to change to: Post Title"
)
elif self.manga_filename_style == STYLE_DATE_BASED:
self.manga_rename_toggle_button.setText("Name: Date Based")
self.manga_rename_toggle_button.setToolTip(
"Manga Filename Style: Date Based\n\n"
"When Manga/Comic Mode is active for a creator feed:\n"
"- Files will be named sequentially (001.ext, 002.ext, ...) based on post publication order (oldest to newest).\n"
"- To ensure correct numbering, multithreading for post processing is automatically disabled when this style is active.\n\n"
"Click to change to: Post Title"
)
else:
self.manga_rename_toggle_button.setText("Name: Unknown Style")
self.manga_rename_toggle_button.setToolTip(
@@ -1971,17 +2018,20 @@ class DownloaderApp(QWidget):
current_style = self.manga_filename_style
new_style = ""
if current_style == STYLE_POST_TITLE:
if current_style == STYLE_POST_TITLE: # Title -> Original
new_style = STYLE_ORIGINAL_NAME
reply = QMessageBox.information(self, "Manga Filename Preference",
"Using 'Name: Post Title' (first file by title, others original) is recommended for Manga Mode.\n\n"
"Using 'Name: Original File' for all files might lead to less organized downloads if original names are inconsistent or non-sequential.\n\n"
"Proceed with using 'Name: Original File' for all files?",
QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
if reply == QMessageBox.No:
self.log_signal.emit(" Manga filename style change to 'Original File' cancelled by user.")
return
elif current_style == STYLE_ORIGINAL_NAME:
# The warning for original name style
# reply = QMessageBox.information(self, "Manga Filename Preference",
# "Using 'Name: Post Title' (first file by title, others original) is recommended for Manga Mode.\n\n"
# "Using 'Name: Original File' for all files might lead to less organized downloads if original names are inconsistent or non-sequential.\n\n"
# "Proceed with using 'Name: Original File' for all files?",
# QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
# if reply == QMessageBox.No:
# self.log_signal.emit(" Manga filename style change to 'Original File' cancelled by user.")
# return
elif current_style == STYLE_ORIGINAL_NAME: # Original -> Date
new_style = STYLE_DATE_BASED
elif current_style == STYLE_DATE_BASED: # Date -> Title
new_style = STYLE_POST_TITLE
else:
self.log_signal.emit(f"⚠️ Unknown current manga filename style: {current_style}. Resetting to default ('{STYLE_POST_TITLE}').")
@@ -1991,6 +2041,7 @@ class DownloaderApp(QWidget):
self.settings.setValue(MANGA_FILENAME_STYLE_KEY, self.manga_filename_style)
self.settings.sync()
self._update_manga_filename_style_button_text()
self._update_multithreading_for_date_mode() # Update multithreading state based on new style
self.log_signal.emit(f" Manga filename style changed to: '{self.manga_filename_style}'")
@@ -2036,6 +2087,7 @@ class DownloaderApp(QWidget):
if not enable_char_filter_widgets: self.character_input.clear()
if self.char_filter_scope_toggle_button:
self.char_filter_scope_toggle_button.setEnabled(enable_char_filter_widgets)
self._update_multithreading_for_date_mode() # Update multithreading state based on manga mode
def filter_character_list(self, search_text):
@@ -2067,6 +2119,29 @@ class DownloaderApp(QWidget):
self.thread_count_label.setEnabled(True)
self.update_multithreading_label(self.thread_count_input.text())
def _update_multithreading_for_date_mode(self):
"""
Checks if Manga Mode is ON and 'Date Based' style is selected.
If so, disables multithreading. Otherwise, enables it.
"""
if not hasattr(self, 'manga_mode_checkbox') or not hasattr(self, 'use_multithreading_checkbox'):
return # UI elements not ready
manga_on = self.manga_mode_checkbox.isChecked()
is_date_style = (self.manga_filename_style == STYLE_DATE_BASED)
if manga_on and is_date_style:
if self.use_multithreading_checkbox.isChecked() or self.use_multithreading_checkbox.isEnabled():
# Only log if a change is made or it was previously enabled
if self.use_multithreading_checkbox.isChecked():
self.log_signal.emit(" Manga Date Mode: Multithreading for post processing has been disabled to ensure correct sequential file numbering.")
self.use_multithreading_checkbox.setChecked(False)
self.use_multithreading_checkbox.setEnabled(False)
self._handle_multithreading_toggle(False) # Update label to show "1 Thread"
else:
if not self.use_multithreading_checkbox.isEnabled(): # Only re-enable if it was disabled by this logic
self.use_multithreading_checkbox.setEnabled(True)
self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked()) # Update label based on current state
def update_progress_display(self, total_posts, processed_posts):
if total_posts > 0:
@@ -2325,16 +2400,63 @@ class DownloaderApp(QWidget):
self.progress_label.setText("Progress: Initializing...")
self.manga_date_file_counter_obj = [1, threading.Lock()] # Default: [value, lock]
if manga_mode and self.manga_filename_style == STYLE_DATE_BASED and not extract_links_only:
# Determine the directory to scan for existing numbered files for this series
# This path should be the "series" root, before any "per-post" subfolders.
series_scan_directory = output_dir # Base download location
if use_subfolders: # If 'Separate Folders by Name/Title' is ON
# Try to get folder name from character filter (manga series title)
if filter_character_list_to_pass and filter_character_list_to_pass[0] and filter_character_list_to_pass[0].get("name"):
# Assuming the first filter is the series name for folder creation
series_folder_name = clean_folder_name(filter_character_list_to_pass[0]["name"])
series_scan_directory = os.path.join(series_scan_directory, series_folder_name)
elif service and user_id: # Fallback if no char filter, but subfolders are on
# This might group multiple series from one creator if no distinct char filter is used.
# The counter is per download operation, so this is consistent.
creator_based_folder_name = clean_folder_name(user_id) # Or a more specific creator name convention
series_scan_directory = os.path.join(series_scan_directory, creator_based_folder_name)
# If neither, series_scan_directory remains output_dir (files go directly there if use_subfolders is on but no name found)
# If use_subfolders is OFF, files go into output_dir. So, series_scan_directory remains output_dir.
highest_num = 0
if os.path.isdir(series_scan_directory):
self.log_signal.emit(f" Manga Date Mode: Scanning for existing numbered files in '{series_scan_directory}' and its subdirectories...")
for dirpath, _, filenames_in_dir in os.walk(series_scan_directory):
for filename_to_check in filenames_in_dir:
# Check the base name (without extension) for leading digits
base_name_no_ext = os.path.splitext(filename_to_check)[0]
match = re.match(r"(\d{3,})", base_name_no_ext) # Matches "001" from "001.jpg" or "001_13.jpg"
if match:
try:
num = int(match.group(1))
if num > highest_num:
highest_num = num
except ValueError:
continue
else:
self.log_signal.emit(f" Manga Date Mode: Scan directory '{series_scan_directory}' not found or is not a directory. Starting counter at 1.")
self.manga_date_file_counter_obj = [highest_num + 1, threading.Lock()] # [value, lock]
self.log_signal.emit(f" Manga Date Mode: Initialized file counter at {self.manga_date_file_counter_obj[0]}.")
effective_num_post_workers = 1
effective_num_file_threads_per_worker = 1
# Determine if multithreading for posts should be used
if post_id_from_url:
# Single post URL: no post workers, but file threads can be > 1
if use_multithreading_enabled_by_checkbox:
effective_num_file_threads_per_worker = max(1, min(num_threads_from_gui, MAX_FILE_THREADS_PER_POST_OR_WORKER))
else:
if use_multithreading_enabled_by_checkbox:
effective_num_post_workers = max(1, min(num_threads_from_gui, MAX_THREADS))
effective_num_file_threads_per_worker = max(1, min(num_threads_from_gui, MAX_FILE_THREADS_PER_POST_OR_WORKER))
# Creator feed
if manga_mode and self.manga_filename_style == STYLE_DATE_BASED:
# Force single post worker for date-based manga mode
effective_num_post_workers = 1
# File threads per worker can still be > 1 if user set it
effective_num_file_threads_per_worker = max(1, min(num_threads_from_gui, MAX_FILE_THREADS_PER_POST_OR_WORKER)) if use_multithreading_enabled_by_checkbox else 1
elif use_multithreading_enabled_by_checkbox: # Standard creator feed with multithreading enabled
effective_num_post_workers = max(1, min(num_threads_from_gui, MAX_THREADS)) # For posts
effective_num_file_threads_per_worker = max(1, min(num_threads_from_gui, MAX_FILE_THREADS_PER_POST_OR_WORKER)) # For files within each post worker
log_messages = ["="*40, f"🚀 Starting {'Link Extraction' if extract_links_only else ('Archive Download' if backend_filter_mode == 'archive' else 'Download')} @ {time.strftime('%Y-%m-%d %H:%M:%S')}", f" URL: {api_url}"]
@@ -2389,7 +2511,12 @@ class DownloaderApp(QWidget):
log_messages.append(f" ↳ Manga Duplicates: Will be renamed with numeric suffix if names clash (e.g., _1, _2).")
should_use_multithreading_for_posts = use_multithreading_enabled_by_checkbox and not post_id_from_url
log_messages.append(f" Threading: {'Multi-threaded (posts)' if should_use_multithreading_for_posts else 'Single-threaded (posts)'}")
# Adjust log message if date-based manga mode forced single thread
if manga_mode and self.manga_filename_style == STYLE_DATE_BASED and not post_id_from_url:
log_messages.append(f" Threading: Single-threaded (posts) - Enforced by Manga Date Mode")
should_use_multithreading_for_posts = False # Ensure this reflects the forced state
else:
log_messages.append(f" Threading: {'Multi-threaded (posts)' if should_use_multithreading_for_posts else 'Single-threaded (posts)'}")
if should_use_multithreading_for_posts:
log_messages.append(f" Number of Post Worker Threads: {effective_num_post_workers}")
log_messages.append("="*40)
@@ -2432,9 +2559,10 @@ class DownloaderApp(QWidget):
'manga_mode_active': manga_mode,
'unwanted_keywords': unwanted_keywords_for_folders,
'cancellation_event': self.cancellation_event,
'signals': self.worker_signals,
# 'emitter' will be set based on single/multi-thread mode below
'manga_filename_style': self.manga_filename_style,
'num_file_threads_for_worker': effective_num_file_threads_per_worker,
'manga_date_file_counter_ref': self.manga_date_file_counter_obj if manga_mode and self.manga_filename_style == STYLE_DATE_BASED else None,
'allow_multipart_download': allow_multipart,
# 'duplicate_file_mode' and session-wide tracking removed
}
@@ -2442,9 +2570,11 @@ class DownloaderApp(QWidget):
try:
if should_use_multithreading_for_posts:
self.log_signal.emit(f" Initializing multi-threaded {'link extraction' if extract_links_only else 'download'} with {effective_num_post_workers} post workers...")
args_template['emitter'] = self.worker_to_gui_queue # For multi-threaded, use the queue
self.start_multi_threaded_download(num_post_workers=effective_num_post_workers, **args_template)
else:
self.log_signal.emit(f" Initializing single-threaded {'link extraction' if extract_links_only else 'download'}...")
# For single-threaded, DownloadThread creates its own PostProcessorSignals and passes it as emitter.
dt_expected_keys = [
'api_url_input', 'output_dir', 'known_names_copy', 'cancellation_event',
'filter_character_list', 'filter_mode', 'skip_zip', 'skip_rar',
@@ -2454,7 +2584,8 @@ class DownloaderApp(QWidget):
'downloaded_files_lock', 'downloaded_file_hashes_lock',
'skip_words_list', 'skip_words_scope', 'char_filter_scope',
'show_external_links', 'extract_links_only', 'num_file_threads_for_worker',
'start_page', 'end_page', 'target_post_id_from_initial_url', 'duplicate_file_mode',
'start_page', 'end_page', 'target_post_id_from_initial_url',
'manga_date_file_counter_ref', # Ensure this is passed for single thread mode
'manga_mode_active', 'unwanted_keywords', 'manga_filename_style',
'allow_multipart_download'
]
@@ -2478,7 +2609,6 @@ class DownloaderApp(QWidget):
if hasattr(self.download_thread, 'external_link_signal'): self.download_thread.external_link_signal.connect(self.handle_external_link_signal)
if hasattr(self.download_thread, 'file_progress_signal'): self.download_thread.file_progress_signal.connect(self.update_file_progress_display)
if hasattr(self.download_thread, 'missed_character_post_signal'): # New
self.download_thread.missed_character_post_signal.connect(self.handle_missed_character_post)
self.download_thread.start()
self.log_signal.emit("✅ Single download thread (for posts) started.")
@@ -2513,9 +2643,10 @@ class DownloaderApp(QWidget):
fetch_error_occurred = False
manga_mode_active_for_fetch = worker_args_template.get('manga_mode_active', False)
signals_for_worker = worker_args_template.get('signals')
if not signals_for_worker:
self.log_signal.emit("❌ CRITICAL ERROR: Signals object missing for worker in _fetch_and_queue_posts.");
# In multi-threaded mode, the emitter is the queue.
emitter_for_worker = worker_args_template.get('emitter') # This should be self.worker_to_gui_queue
if not emitter_for_worker: # Should not happen if logic in start_download is correct
self.log_signal.emit("❌ CRITICAL ERROR: Emitter (queue) missing for worker in _fetch_and_queue_posts.");
self.finished_signal.emit(0,0,True, []);
return
@@ -2572,13 +2703,13 @@ class DownloaderApp(QWidget):
ppw_expected_keys = [
'post_data', 'download_root', 'known_names', 'filter_character_list', 'unwanted_keywords',
'filter_mode', 'skip_zip', 'skip_rar', 'use_subfolders', 'use_post_subfolders',
'target_post_id_from_initial_url', 'custom_folder_name', 'compress_images',
'target_post_id_from_initial_url', 'custom_folder_name', 'compress_images', 'emitter',
'download_thumbnails', 'service', 'user_id', 'api_url_input',
'cancellation_event', 'signals', 'downloaded_files', 'downloaded_file_hashes',
'cancellation_event', 'downloaded_files', 'downloaded_file_hashes',
'downloaded_files_lock', 'downloaded_file_hashes_lock', 'remove_from_filename_words_list',
'skip_words_list', 'skip_words_scope', 'char_filter_scope',
'show_external_links', 'extract_links_only', 'allow_multipart_download',
'num_file_threads', 'skip_current_file_flag',
'num_file_threads', 'skip_current_file_flag', 'manga_date_file_counter_ref',
'manga_mode_active', 'manga_filename_style'
]
# Ensure 'allow_multipart_download' is also considered for optional keys if it has a default in PostProcessorWorker
@@ -2586,7 +2717,7 @@ class DownloaderApp(QWidget):
'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'remove_from_filename_words_list',
'show_external_links', 'extract_links_only', 'duplicate_file_mode', # Added duplicate_file_mode here
'num_file_threads', 'skip_current_file_flag', 'manga_mode_active', 'manga_filename_style',
'processed_base_filenames_session_wide', 'processed_base_filenames_session_wide_lock' # Add these
'manga_date_file_counter_ref' # Add this
}
for post_data_item in all_posts_data:
@@ -2600,7 +2731,7 @@ class DownloaderApp(QWidget):
for key in ppw_expected_keys:
if key == 'post_data': worker_init_args[key] = post_data_item
elif key == 'num_file_threads': worker_init_args[key] = num_file_dl_threads_for_each_worker
elif key == 'signals': worker_init_args[key] = signals_for_worker
elif key == 'emitter': worker_init_args[key] = emitter_for_worker # Pass the queue
elif key in worker_args_template: worker_init_args[key] = worker_args_template[key]
elif key in ppw_optional_keys_with_defaults: pass
else: missing_keys.append(key)
@@ -2777,7 +2908,6 @@ class DownloaderApp(QWidget):
if kept_original_names_list is None:
kept_original_names_list = []
status_message = "Cancelled by user" if cancelled_by_user else "Finished"
summary_log = "="*40
@@ -2801,10 +2931,6 @@ class DownloaderApp(QWidget):
self.log_signal.emit(HTML_PREFIX + html_list_items)
self.log_signal.emit("="*40)
self.progress_label.setText(f"{status_message}: {total_downloaded} downloaded, {total_skipped} skipped."); self.file_progress_label.setText("")
if not cancelled_by_user: self._try_process_next_external_link()
if self.download_thread:
try:
if hasattr(self.download_thread, 'progress_signal'): self.download_thread.progress_signal.disconnect(self.handle_main_log)
@@ -2815,15 +2941,23 @@ class DownloaderApp(QWidget):
if hasattr(self.download_thread, 'file_progress_signal'): self.download_thread.file_progress_signal.disconnect(self.update_file_progress_display)
if hasattr(self.download_thread, 'missed_character_post_signal'): # New
self.download_thread.missed_character_post_signal.disconnect(self.handle_missed_character_post)
except (TypeError, RuntimeError) as e: self.log_signal.emit(f" Note during single-thread signal disconnection: {e}")
# Ensure these are cleared if the download_finished is for the single download thread
if self.download_thread and not self.download_thread.isRunning(): # Check if it was this thread
self.download_thread = None
except (TypeError, RuntimeError) as e:
self.log_signal.emit(f" Note during single-thread signal disconnection: {e}")
if not self.download_thread.isRunning(): # Check if it was this thread
self.download_thread = None
if self.thread_pool: self.log_signal.emit(" Ensuring worker thread pool is shut down..."); self.thread_pool.shutdown(wait=True, cancel_futures=True); self.thread_pool = None
self.progress_label.setText(f"{status_message}: {total_downloaded} downloaded, {total_skipped} skipped.")
self.file_progress_label.setText("")
if not cancelled_by_user: self._try_process_next_external_link()
if self.thread_pool:
self.log_signal.emit(" Ensuring worker thread pool is shut down...")
self.thread_pool.shutdown(wait=True, cancel_futures=True)
self.thread_pool = None
self.active_futures = []
self.set_ui_enabled(True); self.cancel_btn.setEnabled(False)
self.set_ui_enabled(True)
self.cancel_btn.setEnabled(False)
def toggle_active_log_view(self):
if self.current_log_view == 'progress':
@@ -2889,8 +3023,6 @@ class DownloaderApp(QWidget):
self._update_manga_filename_style_button_text()
self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)
self.log_signal.emit("✅ Application reset complete.")
def _reset_ui_to_defaults(self):
self.link_input.clear(); self.dir_input.clear(); self.custom_folder_input.clear(); self.character_input.clear();
self.skip_words_input.clear(); self.start_page_input.clear(); self.end_page_input.clear(); self.new_char_input.clear();
@@ -3007,7 +3139,6 @@ class DownloaderApp(QWidget):
self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting)
self.log_signal.emit(f" Multi-part download set to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'}")
if __name__ == '__main__':
import traceback
try:
@@ -3071,4 +3202,3 @@ if __name__ == '__main__':
print(f"An unhandled exception occurred: {e}")
traceback.print_exc()
print("--- END CRITICAL ERROR ---")
sys.exit(1)

View File

@@ -13,14 +13,14 @@ DOWNLOAD_CHUNK_SIZE_ITER = 1024 * 256 # 256KB for iter_content within a chunk d
def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte, headers,
part_num, total_parts, progress_data, cancellation_event, skip_event, logger,
signals=None, api_original_filename=None): # Added signals and api_original_filename
part_num, total_parts, progress_data, cancellation_event, skip_event,
logger_func, emitter=None, api_original_filename=None): # Renamed logger, signals to emitter
"""Downloads a single chunk of a file and writes it to the temp file."""
if cancellation_event and cancellation_event.is_set():
logger(f" [Chunk {part_num + 1}/{total_parts}] Download cancelled before start.")
logger_func(f" [Chunk {part_num + 1}/{total_parts}] Download cancelled before start.")
return 0, False # bytes_downloaded, success
if skip_event and skip_event.is_set():
logger(f" [Chunk {part_num + 1}/{total_parts}] Skip event triggered before start.")
logger_func(f" [Chunk {part_num + 1}/{total_parts}] Skip event triggered before start.")
return 0, False
chunk_headers = headers.copy()
@@ -44,15 +44,15 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte,
for attempt in range(MAX_CHUNK_DOWNLOAD_RETRIES + 1):
if cancellation_event and cancellation_event.is_set():
logger(f" [Chunk {part_num + 1}/{total_parts}] Cancelled during retry loop.")
logger_func(f" [Chunk {part_num + 1}/{total_parts}] Cancelled during retry loop.")
return bytes_this_chunk, False
if skip_event and skip_event.is_set():
logger(f" [Chunk {part_num + 1}/{total_parts}] Skip event during retry loop.")
logger_func(f" [Chunk {part_num + 1}/{total_parts}] Skip event during retry loop.")
return bytes_this_chunk, False
try:
if attempt > 0:
logger(f" [Chunk {part_num + 1}/{total_parts}] Retrying download (Attempt {attempt}/{MAX_CHUNK_DOWNLOAD_RETRIES})...")
logger_func(f" [Chunk {part_num + 1}/{total_parts}] Retrying download (Attempt {attempt}/{MAX_CHUNK_DOWNLOAD_RETRIES})...")
time.sleep(CHUNK_DOWNLOAD_RETRY_DELAY * (2 ** (attempt - 1)))
# Reset speed calculation on retry
last_speed_calc_time = time.time()
@@ -60,14 +60,14 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte,
# Enhanced log message for chunk start
log_msg = f" 🚀 [Chunk {part_num + 1}/{total_parts}] Starting download: bytes {start_byte}-{end_byte if end_byte != -1 else 'EOF'}"
logger(log_msg)
logger_func(log_msg)
print(f"DEBUG_MULTIPART: {log_msg}") # Direct console print for debugging
response = requests.get(chunk_url, headers=chunk_headers, timeout=(10, 120), stream=True)
response.raise_for_status()
# For 0-byte files, if end_byte was -1, we expect 0 content.
if start_byte == 0 and end_byte == -1 and int(response.headers.get('Content-Length', 0)) == 0:
logger(f" [Chunk {part_num + 1}/{total_parts}] Confirmed 0-byte file.")
logger_func(f" [Chunk {part_num + 1}/{total_parts}] Confirmed 0-byte file.")
with progress_data['lock']:
progress_data['chunks_status'][part_num]['active'] = False
progress_data['chunks_status'][part_num]['speed_bps'] = 0
@@ -77,10 +77,10 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte,
f.seek(start_byte)
for data_segment in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE_ITER):
if cancellation_event and cancellation_event.is_set():
logger(f" [Chunk {part_num + 1}/{total_parts}] Cancelled during data iteration.")
logger_func(f" [Chunk {part_num + 1}/{total_parts}] Cancelled during data iteration.")
return bytes_this_chunk, False
if skip_event and skip_event.is_set():
logger(f" [Chunk {part_num + 1}/{total_parts}] Skip event during data iteration.")
logger_func(f" [Chunk {part_num + 1}/{total_parts}] Skip event during data iteration.")
return bytes_this_chunk, False
if data_segment:
f.write(data_segment)
@@ -103,26 +103,29 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte,
# Emit progress more frequently from within the chunk download
if current_time - last_progress_emit_time_for_chunk > 0.1: # Emit up to 10 times/sec per chunk
if signals and hasattr(signals, 'file_progress_signal'):
if emitter:
# Ensure we read the latest total downloaded from progress_data
# Send a copy of the chunks_status list
status_list_copy = [dict(s) for s in progress_data['chunks_status']] # Make a deep enough copy
signals.file_progress_signal.emit(api_original_filename, status_list_copy)
if isinstance(emitter, queue.Queue):
emitter.put({'type': 'file_progress', 'payload': (api_original_filename, status_list_copy)})
elif hasattr(emitter, 'file_progress_signal'): # PostProcessorSignals-like
emitter.file_progress_signal.emit(api_original_filename, status_list_copy)
last_progress_emit_time_for_chunk = current_time
return bytes_this_chunk, True
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e:
logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Retryable error: {e}")
logger_func(f" ❌ [Chunk {part_num + 1}/{total_parts}] Retryable error: {e}")
if attempt == MAX_CHUNK_DOWNLOAD_RETRIES:
logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Failed after {MAX_CHUNK_DOWNLOAD_RETRIES} retries.")
logger_func(f" ❌ [Chunk {part_num + 1}/{total_parts}] Failed after {MAX_CHUNK_DOWNLOAD_RETRIES} retries.")
return bytes_this_chunk, False
except requests.exceptions.RequestException as e: # Includes 4xx/5xx errors after raise_for_status
logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Non-retryable error: {e}")
logger_func(f" ❌ [Chunk {part_num + 1}/{total_parts}] Non-retryable error: {e}")
return bytes_this_chunk, False
except Exception as e:
logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Unexpected error: {e}\n{traceback.format_exc(limit=1)}")
logger_func(f" ❌ [Chunk {part_num + 1}/{total_parts}] Unexpected error: {e}\n{traceback.format_exc(limit=1)}")
return bytes_this_chunk, False
# Ensure final status is marked as inactive if loop finishes due to retries
with progress_data['lock']:
progress_data['chunks_status'][part_num]['active'] = False
@@ -130,15 +133,15 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte,
return bytes_this_chunk, False # Should be unreachable
def download_file_in_parts(file_url, save_path, total_size, num_parts, headers,
api_original_filename, signals, cancellation_event, skip_event, logger):
def download_file_in_parts(file_url, save_path, total_size, num_parts, headers, api_original_filename,
emitter_for_multipart, cancellation_event, skip_event, logger_func): # Renamed signals, logger
"""
Downloads a file in multiple parts concurrently.
Returns: (download_successful_flag, downloaded_bytes, calculated_file_hash, temp_file_handle_or_None)
The temp_file_handle will be an open read-binary file handle to the .part file if successful, otherwise None.
It is the responsibility of the caller to close this handle and rename/delete the .part file.
"""
logger(f"⬇️ Initializing Multi-part Download ({num_parts} parts) for: '{api_original_filename}' (Size: {total_size / (1024*1024):.2f} MB)")
logger_func(f"⬇️ Initializing Multi-part Download ({num_parts} parts) for: '{api_original_filename}' (Size: {total_size / (1024*1024):.2f} MB)")
temp_file_path = save_path + ".part"
try:
@@ -146,7 +149,7 @@ def download_file_in_parts(file_url, save_path, total_size, num_parts, headers,
if total_size > 0:
f_temp.truncate(total_size) # Pre-allocate space
except IOError as e:
logger(f" ❌ Error creating/truncating temp file '{temp_file_path}': {e}")
logger_func(f" ❌ Error creating/truncating temp file '{temp_file_path}': {e}")
return False, 0, None, None
chunk_size_calc = total_size // num_parts
@@ -167,7 +170,7 @@ def download_file_in_parts(file_url, save_path, total_size, num_parts, headers,
chunk_actual_sizes.append(end - start + 1)
if not chunks_ranges and total_size > 0:
logger(f" ⚠️ No valid chunk ranges for multipart download of '{api_original_filename}'. Aborting multipart.")
logger_func(f" ⚠️ No valid chunk ranges for multipart download of '{api_original_filename}'. Aborting multipart.")
if os.path.exists(temp_file_path): os.remove(temp_file_path)
return False, 0, None, None
@@ -191,8 +194,9 @@ def download_file_in_parts(file_url, save_path, total_size, num_parts, headers,
chunk_futures.append(chunk_pool.submit(
_download_individual_chunk, chunk_url=file_url, temp_file_path=temp_file_path,
start_byte=start, end_byte=end, headers=headers, part_num=i, total_parts=num_parts,
progress_data=progress_data, cancellation_event=cancellation_event, skip_event=skip_event, logger=logger,
signals=signals, api_original_filename=api_original_filename # Pass them here
progress_data=progress_data, cancellation_event=cancellation_event, skip_event=skip_event,
logger_func=logger_func, emitter=emitter_for_multipart, # Pass emitter
api_original_filename=api_original_filename
))
for future in as_completed(chunk_futures):
@@ -201,21 +205,23 @@ def download_file_in_parts(file_url, save_path, total_size, num_parts, headers,
total_bytes_from_chunks += bytes_downloaded_this_chunk
if not success_this_chunk:
all_chunks_successful = False
# Progress is emitted from within _download_individual_chunk
if cancellation_event and cancellation_event.is_set():
logger(f" Multi-part download for '{api_original_filename}' cancelled by main event.")
logger_func(f" Multi-part download for '{api_original_filename}' cancelled by main event.")
all_chunks_successful = False
# Ensure a final progress update is sent with all chunks marked inactive (unless still active due to error)
if signals and hasattr(signals, 'file_progress_signal'):
if emitter_for_multipart:
with progress_data['lock']:
# Ensure all chunks are marked inactive for the final signal if download didn't fully succeed or was cancelled
status_list_copy = [dict(s) for s in progress_data['chunks_status']]
signals.file_progress_signal.emit(api_original_filename, status_list_copy)
status_list_copy = [dict(s) for s in progress_data['chunks_status']]
if isinstance(emitter_for_multipart, queue.Queue):
emitter_for_multipart.put({'type': 'file_progress', 'payload': (api_original_filename, status_list_copy)})
elif hasattr(emitter_for_multipart, 'file_progress_signal'): # PostProcessorSignals-like
emitter_for_multipart.file_progress_signal.emit(api_original_filename, status_list_copy)
if all_chunks_successful and (total_bytes_from_chunks == total_size or total_size == 0):
logger(f" ✅ Multi-part download successful for '{api_original_filename}'. Total bytes: {total_bytes_from_chunks}")
logger_func(f" ✅ Multi-part download successful for '{api_original_filename}'. Total bytes: {total_bytes_from_chunks}")
md5_hasher = hashlib.md5()
with open(temp_file_path, 'rb') as f_hash:
for buf in iter(lambda: f_hash.read(4096*10), b''): # Read in larger buffers for hashing
@@ -225,8 +231,8 @@ def download_file_in_parts(file_url, save_path, total_size, num_parts, headers,
# The caller is responsible for closing this handle and renaming/deleting the .part file.
return True, total_bytes_from_chunks, calculated_hash, open(temp_file_path, 'rb')
else:
logger(f" ❌ Multi-part download failed for '{api_original_filename}'. Success: {all_chunks_successful}, Bytes: {total_bytes_from_chunks}/{total_size}. Cleaning up.")
logger_func(f" ❌ Multi-part download failed for '{api_original_filename}'. Success: {all_chunks_successful}, Bytes: {total_bytes_from_chunks}/{total_size}. Cleaning up.")
if os.path.exists(temp_file_path):
try: os.remove(temp_file_path)
except OSError as e: logger(f" Failed to remove temp part file '{temp_file_path}': {e}")
except OSError as e: logger_func(f" Failed to remove temp part file '{temp_file_path}': {e}")
return False, total_bytes_from_chunks, None, None

245
readme.md
View File

@@ -1,141 +1,212 @@
# Kemono Downloader v3.3.0
# Kemono Downloader v3.4.0
A powerful, feature-rich GUI application for downloading content from **[Kemono.su](https://kemono.su)** and **[Coomer.party](https://coomer.party)**.
Built with **PyQt5**, this tool is ideal for users who want deep filtering, customizable folder structure, efficient downloads, and intelligent automation — all within a modern GUI.
Built with **PyQt5**, this tool is ideal for users who want deep filtering, customizable folder structures, efficient downloads, and intelligent automation — all within a modern, user-friendly graphical interface.
---
## 🔄 Recent Updates (v3.3.0)
## ✨ What's New in v3.4.0?
### Skipped Characters Review (Eye Toggle)
- After a download, you can toggle a log view to review characters or keywords that were skipped based on your filters.
- Helps catch overlooked content you might want to adjust filters for.
### Grouped Folder Naming
- You can group aliases together using parentheses.
- Example: `(Boa, Hancock), Robin` → Downloads for "Boa" and "Hancock" go into one folder: `Boa Hancock`.
- Great for creators who use inconsistent naming.
This version brings significant enhancements to manga/comic downloading, filtering capabilities, and user experience:
---
## 🖥 User Interface & Workflow
### 📖 Enhanced Manga/Comic Mode
### Clean PyQt5 GUI
- Simple and responsive interface
- Dark theme for long usage comfort
- Persistent settings saved between sessions
- Introductory tour for first-time users
- **New "Date Based" Filename Style:**
### Download Modes
- Download from:
- **Single Post URL**
- **Entire Creator Feed**
- Optional:
- **Page Range** for creator feeds
- **Custom folder name** for single-post downloads
- Perfect for truly sequential content! Files are named numerically (e.g., `001.jpg`, `002.jpg`, `003.ext`...) across an *entire creator's feed*, strictly following post publication order.
- **Smart Numbering:** Automatically resumes from the highest existing number found in the series folder (and subfolders, if "Subfolder per Post" is enabled).
- **Guaranteed Order:** Disables multi-threading for post processing to ensure sequential accuracy.
- Works alongside the existing "Post Title" and "Original File Name" styles.
---
## 🧠 Smart Filtering
### ✂️ "Remove Words from Filename" Feature
### Character Name Filtering
- Input comma-separated names to only include relevant content.
- Filtering modes:
- **Files**: Checks filenames
- **Titles**: Checks post titles
- **Both**: Hybrid mode
- **Comments**: Also scans post comments for matches
- Specify comma-separated words or phrases (case-insensitive) that will be automatically removed from filenames.
### Skip Words
- Enter words to **exclude** files or posts.
- Modes: File-level, Post-level, or Both
- Helps exclude WIPs, previews, sketches, etc.
### File Type Filters
- Filter download targets by type:
- All
- Images/GIFs
- Videos
- Archives
- External Links (no downloads)
### Filename Cleanup
- Auto-remove unwanted keywords from filenames (e.g., `[HD]`, `patreon`)
- Example: `patreon, [HD], _final` transforms `AwesomeArt_patreon_[HD]_final.jpg` into `AwesomeArt.jpg`.
---
## 📚 Manga/Comic Mode
### 📦 New "Only Archives" File Filter Mode
Special handling for serialized content:
- Automatically fetches posts **oldest to newest**
- File naming options:
- Use **Post Title** (e.g., `MyChapter1.jpg`)
- Use **Original Filename** (e.g., `page_001.png`)
- Ignores page ranges and applies full-feed scan
- Works best when paired with grouped name filters (e.g., series titles)
- Exclusively downloads `.zip` and `.rar` files.
- Automatically disables conflicting options like "Skip .zip/.rar" and external link logging.
---
## 📁 Folder Structure & Naming
### 🗣️ Improved Character Filter Scope - "Comments (Beta)"
- Auto-foldering by:
- Character name
- Post title
- Custom name (for post URLs)
- Optional:
- Subfolder per post
- Auto-detection and fallback from `Known.txt` if needed
- Smart cleaning of folder/file names to remove illegal characters
- **File-First Check:** Prioritizes matching filenames before checking post comments for character names.
- **Comment Fallback:** Only checks comments if no filename match is found, reducing unnecessary API calls.
---
## 🖼 Thumbnail & Compression Tools
### 🧐 Refined "Missed Character Log"
- **Thumbnail Mode**: Downloads only the preview thumbnails
- **Image Compression** (via Pillow):
- Large images auto-converted to WebP
- Only saved if final size is significantly smaller
- Displays a capitalized, alphabetized list of key terms from skipped post titles.
- Makes it easier to spot patterns or characters that might be unintentionally excluded.
---
## ⚙️ Performance Features
### 🚀 Enhanced Multi-part Download Progress
- **Multithreading**: Set number of threads for concurrent file and post downloads
- **Multi-part Downloads**:
- Large files split into multiple threads for faster retrieval
- Detailed chunk-level progress tracking
- Smart retries and fallback on failure
- Granular visibility into active chunk downloads and combined speed for large files.
---
## 📋 Logging & Progress
### 🗺️ Updated Onboarding Tour
- Real-time log output with two views:
- **Progress Log**
- **Missed Character Summary**
- Log filters external links and organizes them separately
- Export logs as `.txt` for backup/reference
- Auto-log failed/skipped files and links
- Improved guide for new users, covering v3.4.0 features and existing core functions.
---
## 🗃 Config System
### 🛡️ Robust Configuration Path
- `Known.txt`: Add frequently used names for fallback filtering and folder naming
- Auto-loaded and saved in system AppData (or local fallback)
- GUI for editing known names inside the app
- Settings and `Known.txt` are now stored in the system-standard application data folder (e.g., `AppData`, `~/.local/share`).
---
## 🖥️ Core Features
---
### User Interface & Workflow
- **Clean PyQt5 GUI** — Simple, modern, and dark-themed.
- **Persistent Settings** — Saves preferences between sessions.
- **Download Modes:**
- Single Post URL
- Entire Creator Feed
- **Flexible Options:**
- Specify Page Range (disabled in Manga Mode)
- Custom Folder Name for single posts
---
### 🧠 Smart Filtering
- **Character Name Filtering:**
- Use `Tifa, Aerith` or group `(Boa, Hancock)` → folder `Boa Hancock`
- **Filter Scopes:**
- `Files`
- `Title`
- `Both (Title then Files)`
- `Comments (Beta - Files first)`
- **Skip with Words:**
- Exclude with `WIP, sketch, preview`
- **Skip Scopes:**
- `Files`
- `Posts`
- `Both (Posts then Files)`
- **File Type Filters:**
- `All`, `Images/GIFs`, `Videos`, `📦 Only Archives`, `🔗 Only Links`
- **Filename Cleanup:**
- Remove illegal and unwanted characters or phrases
---
### 📚 Manga/Comic Mode (Creator Feeds Only)
- **Chronological Processing** — Oldest posts first
- **Filename Style Options:**
- `Name: Post Title (Default)`
- `Name: Original File`
- `Name: Date Based (New)`
- **Best With:** Character filters set to manga/series title
---
### 📁 Folder Structure & Naming
- **Subfolders:**
- Auto-created based on character name, post title, or `Known.txt`
- "Subfolder per Post" option for further nesting
- **Smart Naming:** Cleans invalid characters and structures logically
---
### 🖼️ Thumbnail & Compression Tools
- **Download Thumbnails Only**
- **Compress to WebP** (via Pillow)
- Converts large images to smaller WebP versions
---
### ⚙️ Performance Features
- **Multithreading:**
- For both post processing and file downloading
- **Multi-part Downloads:**
- Toggleable in GUI
- Splits large files into chunks
- Granular chunk-level progress display
---
### 📋 Logging & Progress
- **Real-time Logs:** Activity, errors, skipped posts
- **Missed Character Log:** Shows skipped keywords in easy-to-read list
- **External Links Log:** Shows links (unless disabled in some modes)
- **Export Links:** Save `.txt` of links (Only Links mode)
---
### 🗃️ Config System
- **Known.txt:**
- Stores names for smart folder suggestions
- Supports aliases via `(alias1, alias2)`
- **Stored in Standard App Data Path**
- **Editable Within GUI**
---
## 💻 Installation
---
### Requirements
- Python 3.6 or higher
- Python 3.6 or higher
- pip
---
### Install Dependencies
```bash
pip install PyQt5 requests Pillow
```
***