This commit is contained in:
Yuvi9587
2025-05-14 16:26:18 +05:30
parent 25d33f1531
commit b5e9080285
3 changed files with 469 additions and 279 deletions

View File

@@ -1,8 +0,0 @@
Boa Hancock
Hairy D.va
Mercy
Misc
Nami
Robin
Sombra
Yamato

View File

@@ -38,10 +38,7 @@ SKIP_SCOPE_BOTH = "both"
CHAR_SCOPE_TITLE = "title" CHAR_SCOPE_TITLE = "title"
CHAR_SCOPE_FILES = "files" CHAR_SCOPE_FILES = "files"
CHAR_SCOPE_BOTH = "both" CHAR_SCOPE_BOTH = "both"
CHAR_SCOPE_COMMENTS = "comments"
# DUPLICATE_MODE_RENAME is removed. Renaming only happens within a target folder if needed.
DUPLICATE_MODE_DELETE = "delete"
DUPLICATE_MODE_MOVE_TO_SUBFOLDER = "move"
fastapi_app = None fastapi_app = None
KNOWN_NAMES = [] KNOWN_NAMES = []
@@ -99,6 +96,15 @@ def clean_filename(name):
cleaned = re.sub(r'\s+', '_', cleaned) cleaned = re.sub(r'\s+', '_', cleaned)
return cleaned if cleaned else "untitled_file" return cleaned if cleaned else "untitled_file"
def strip_html_tags(html_text):
if not html_text: return ""
# First, unescape HTML entities
text = html.unescape(html_text)
# Then, remove HTML tags using a simple regex
# This is a basic approach and might not handle all complex HTML perfectly
clean_pattern = re.compile('<.*?>')
cleaned_text = re.sub(clean_pattern, '', text)
return cleaned_text.strip()
def extract_folder_name_from_title(title, unwanted_keywords): def extract_folder_name_from_title(title, unwanted_keywords):
if not title: return 'Uncategorized' if not title: return 'Uncategorized'
@@ -221,6 +227,31 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
except Exception as e: except Exception as e:
raise RuntimeError(f"Unexpected error fetching offset {offset} ({paginated_url}): {e}") raise RuntimeError(f"Unexpected error fetching offset {offset} ({paginated_url}): {e}")
def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None):
if cancellation_event and cancellation_event.is_set():
logger(" Comment fetch cancelled before request.")
raise RuntimeError("Comment fetch operation cancelled by user.")
comments_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}/comments"
logger(f" Fetching comments: {comments_api_url}")
try:
response = requests.get(comments_api_url, headers=headers, timeout=(10, 30)) # Shorter timeout for comments
response.raise_for_status()
if 'application/json' not in response.headers.get('Content-Type', '').lower():
logger(f"⚠️ Unexpected content type from comments API: {response.headers.get('Content-Type')}. Body: {response.text[:200]}")
return [] # Return empty list if not JSON
return response.json()
except requests.exceptions.Timeout:
raise RuntimeError(f"Timeout fetching comments for post {post_id} from {comments_api_url}")
except requests.exceptions.RequestException as e:
err_msg = f"Error fetching comments for post {post_id} from {comments_api_url}: {e}"
if e.response is not None:
err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})"
raise RuntimeError(err_msg)
except ValueError as e: # JSONDecodeError inherits from ValueError
raise RuntimeError(f"Error decoding JSON from comments API for post {post_id} ({comments_api_url}): {e}. Response text: {response.text[:200]}")
except Exception as e:
raise RuntimeError(f"Unexpected error fetching comments for post {post_id} ({comments_api_url}): {e}")
def download_from_api(api_url_input, logger=print, start_page=None, end_page=None, manga_mode=False, cancellation_event=None): def download_from_api(api_url_input, logger=print, start_page=None, end_page=None, manga_mode=False, cancellation_event=None):
headers = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'} headers = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}
@@ -412,7 +443,7 @@ class PostProcessorWorker:
char_filter_scope=CHAR_SCOPE_FILES, char_filter_scope=CHAR_SCOPE_FILES,
remove_from_filename_words_list=None, remove_from_filename_words_list=None,
allow_multipart_download=True, allow_multipart_download=True,
duplicate_file_mode=DUPLICATE_MODE_DELETE): ): # Removed duplicate_file_mode and session-wide tracking
self.post = post_data self.post = post_data
self.download_root = download_root self.download_root = download_root
self.known_names = known_names self.known_names = known_names
@@ -450,7 +481,7 @@ class PostProcessorWorker:
self.char_filter_scope = char_filter_scope self.char_filter_scope = char_filter_scope
self.remove_from_filename_words_list = remove_from_filename_words_list if remove_from_filename_words_list is not None else [] self.remove_from_filename_words_list = remove_from_filename_words_list if remove_from_filename_words_list is not None else []
self.allow_multipart_download = allow_multipart_download self.allow_multipart_download = allow_multipart_download
self.duplicate_file_mode = duplicate_file_mode # This will be the effective mode (possibly overridden by main.py for manga) # self.duplicate_file_mode and session-wide tracking removed
if self.compress_images and Image is None: if self.compress_images and Image is None:
self.logger("⚠️ Image compression disabled: Pillow library not found.") self.logger("⚠️ Image compression disabled: Pillow library not found.")
@@ -469,10 +500,7 @@ class PostProcessorWorker:
post_title="", file_index_in_post=0, num_files_in_this_post=1): post_title="", file_index_in_post=0, num_files_in_this_post=1):
was_original_name_kept_flag = False was_original_name_kept_flag = False
final_filename_saved_for_return = "" final_filename_saved_for_return = ""
# target_folder_path is the base character/post folder.
# current_target_folder_path is the actual folder where the file will be saved.
# It starts as the main character/post folder (target_folder_path) by default.
current_target_folder_path = target_folder_path
if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1, "", False if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1, "", False
@@ -561,44 +589,29 @@ class PostProcessorWorker:
self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR).") self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR).")
return 0, 1, api_original_filename, False return 0, 1, api_original_filename, False
# --- Pre-Download Duplicate Handling (Standard Mode Only - Manga mode has its own suffixing) ---
if not self.manga_mode_active: if not self.manga_mode_active:
# --- Pre-Download Duplicate Handling (Standard Mode Only) --- path_in_main_folder_check = os.path.join(target_folder_path, filename_to_save_in_main_path)
is_duplicate_for_main_folder_by_path = os.path.exists(os.path.join(target_folder_path, filename_to_save_in_main_path)) and \ is_duplicate_by_path = os.path.exists(path_in_main_folder_check) and \
os.path.getsize(os.path.join(target_folder_path, filename_to_save_in_main_path)) > 0 os.path.getsize(path_in_main_folder_check) > 0
is_duplicate_for_main_folder_by_session_name = False is_duplicate_by_session_name = False
with self.downloaded_files_lock: with self.downloaded_files_lock:
if filename_to_save_in_main_path in self.downloaded_files: if filename_to_save_in_main_path in self.downloaded_files:
is_duplicate_for_main_folder_by_session_name = True is_duplicate_by_session_name = True
if is_duplicate_for_main_folder_by_path or is_duplicate_for_main_folder_by_session_name: if is_duplicate_by_path or is_duplicate_by_session_name:
if self.duplicate_file_mode == DUPLICATE_MODE_DELETE: reason = "Path Exists" if is_duplicate_by_path else "Session Name"
reason = "Path Exists" if is_duplicate_for_main_folder_by_path else "Session Name" self.logger(f" -> Skip Duplicate ({reason}, Pre-DL): '{filename_to_save_in_main_path}'. Skipping download.")
self.logger(f" -> Delete Duplicate ({reason}): '{filename_to_save_in_main_path}'. Skipping download.") with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Mark as processed
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER:
reason = "Path Exists" if is_duplicate_for_main_folder_by_path else "Session Name"
self.logger(f" -> Pre-DL Move ({reason}): '{filename_to_save_in_main_path}'. Will target 'Duplicate' subfolder.")
current_target_folder_path = os.path.join(target_folder_path, "Duplicate")
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
# Ensure base target folder exists (used for .part file with multipart)
try: try:
os.makedirs(current_target_folder_path, exist_ok=True) os.makedirs(target_folder_path, exist_ok=True) # For .part file
except OSError as e: except OSError as e:
self.logger(f" ❌ Critical error creating directory '{current_target_folder_path}': {e}. Skipping file '{api_original_filename}'.") self.logger(f" ❌ Critical error creating directory '{target_folder_path}': {e}. Skipping file '{api_original_filename}'.")
return 0, 1, api_original_filename, False return 0, 1, api_original_filename, False
# If mode is MOVE (and not manga mode), and current_target_folder_path is now "Duplicate",
# check if the file *already* exists by its base name in this "Duplicate" folder. (Standard Mode Only)
if not self.manga_mode_active and \
self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER and \
"Duplicate" in current_target_folder_path.split(os.sep) and \
os.path.exists(os.path.join(current_target_folder_path, filename_to_save_in_main_path)):
self.logger(f" -> File '{filename_to_save_in_main_path}' already exists in '{os.path.basename(current_target_folder_path)}' subfolder. Skipping download.")
# The name was already added to downloaded_files if it was a pre-DL move.
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
# --- Download Attempt --- # --- Download Attempt ---
max_retries = 3 max_retries = 3
@@ -633,9 +646,10 @@ class PostProcessorWorker:
if self.signals and hasattr(self.signals, 'file_download_status_signal'): if self.signals and hasattr(self.signals, 'file_download_status_signal'):
self.signals.file_download_status_signal.emit(False) self.signals.file_download_status_signal.emit(False)
mp_save_path_base = os.path.join(current_target_folder_path, filename_to_save_in_main_path) # .part file is always based on the main target_folder_path and filename_to_save_in_main_path
mp_save_path_base_for_part = os.path.join(target_folder_path, filename_to_save_in_main_path)
mp_success, mp_bytes, mp_hash, mp_file_handle = download_file_in_parts( mp_success, mp_bytes, mp_hash, mp_file_handle = download_file_in_parts(
file_url, mp_save_path_base, total_size_bytes, num_parts_for_file, headers, file_url, mp_save_path_base_for_part, total_size_bytes, num_parts_for_file, headers,
api_original_filename, self.signals, self.cancellation_event, skip_event, self.logger api_original_filename, self.signals, self.cancellation_event, skip_event, self.logger
) )
if mp_success: if mp_success:
@@ -705,130 +719,132 @@ class PostProcessorWorker:
if file_content_bytes: file_content_bytes.close() if file_content_bytes: file_content_bytes.close()
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
if not self.manga_mode_active: # --- Universal Post-Download Hash Check ---
# --- Post-Download Hash Check (Standard Mode Only) --- with self.downloaded_file_hashes_lock:
with self.downloaded_file_hashes_lock: if calculated_file_hash in self.downloaded_file_hashes:
if calculated_file_hash in self.downloaded_file_hashes: self.logger(f" -> Skip Saving Duplicate (Hash Match): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...).")
if self.duplicate_file_mode == DUPLICATE_MODE_DELETE: with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Mark logical name
self.logger(f" -> Delete Duplicate (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...). Skipping save.") if file_content_bytes: file_content_bytes.close()
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # If it was a multipart download, its .part file needs cleanup
if file_content_bytes: file_content_bytes.close() if not isinstance(file_content_bytes, BytesIO): # Indicates multipart download
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag part_file_to_remove = os.path.join(target_folder_path, filename_to_save_in_main_path + ".part")
if os.path.exists(part_file_to_remove):
elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER: try: os.remove(part_file_to_remove);
self.logger(f" -> Post-DL Move (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...). Content already downloaded.") except OSError: self.logger(f" -> Failed to remove .part file for hash duplicate: {part_file_to_remove}")
if "Duplicate" not in current_target_folder_path.split(os.sep): return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
current_target_folder_path = os.path.join(target_folder_path, "Duplicate")
self.logger(f" Redirecting to 'Duplicate' subfolder: '{current_target_folder_path}'")
# Ensure "Duplicate" folder exists if this is a new redirection due to hash
try: os.makedirs(current_target_folder_path, exist_ok=True)
except OSError as e_mkdir_hash: self.logger(f" Error creating Duplicate folder for hash collision: {e_mkdir_hash}")
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
# --- Final Filename Determination for Saving ---
filename_for_actual_save = filename_to_save_in_main_path
# If mode is MOVE (and not manga mode) and the file is destined for the main folder, # --- Determine Save Location and Final Filename ---
# but a file with that name *now* exists (e.g. race condition, or different file with same name not caught by hash), effective_save_folder = target_folder_path # Default: main character/post folder
# reroute it to the "Duplicate" folder. # filename_to_save_in_main_path is the logical name after cleaning, manga styling, word removal
if not self.manga_mode_active and \ filename_after_styling_and_word_removal = filename_to_save_in_main_path
self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER and \
current_target_folder_path == target_folder_path and \
os.path.exists(os.path.join(current_target_folder_path, filename_for_actual_save)):
self.logger(f" -> Post-DL Move (Late Name Collision in Main): '{filename_for_actual_save}'. Moving to 'Duplicate'.")
current_target_folder_path = os.path.join(target_folder_path, "Duplicate")
try: # Ensure "Duplicate" folder exists if this is a new redirection
os.makedirs(current_target_folder_path, exist_ok=True)
except OSError as e_mkdir: self.logger(f" Error creating Duplicate folder during late move: {e_mkdir}")
# The name filename_to_save_in_main_path was already added to downloaded_files if it was a pre-DL name collision.
# If it was a hash collision that got rerouted, it was also added.
# If this is a new reroute due to late name collision, ensure it's marked.
# "Move" logic and "Duplicate" subfolder logic removed.
# effective_save_folder will always be target_folder_path.
try: # Ensure the chosen save folder (main or Duplicate) exists
os.makedirs(effective_save_folder, exist_ok=True)
except OSError as e:
self.logger(f" ❌ Critical error creating directory '{effective_save_folder}': {e}. Skipping file '{api_original_filename}'.")
if file_content_bytes: file_content_bytes.close()
# Cleanup .part file if multipart
if not isinstance(file_content_bytes, BytesIO):
part_file_to_remove = os.path.join(target_folder_path, filename_to_save_in_main_path + ".part")
if os.path.exists(part_file_to_remove): os.remove(part_file_to_remove)
return 0, 1, api_original_filename, False
# --- Image Compression ---
# This operates on file_content_bytes (which is BytesIO or a file handle from multipart)
# It might change filename_after_styling_and_word_removal's extension (e.g., .jpg to .webp)
# and returns new data_to_write_after_compression (BytesIO) or original file_content_bytes.
data_to_write_after_compression = file_content_bytes
filename_after_compression = filename_after_styling_and_word_removal
# Apply numeric suffix renaming (_1, _2) *only if needed within the current_target_folder_path*
# This means:
# - If current_target_folder_path is the main folder (and not MOVE mode, or MOVE mode but file was unique):
# Renaming happens if a file with filename_for_actual_save exists there.
# - If current_target_folder_path is "Duplicate" (because of MOVE mode):
# Renaming happens if filename_for_actual_save exists *within "Duplicate"*.
counter = 1
base_name_final_coll, ext_final_coll = os.path.splitext(filename_for_actual_save)
temp_filename_final_check = filename_for_actual_save
while os.path.exists(os.path.join(current_target_folder_path, temp_filename_final_check)):
temp_filename_final_check = f"{base_name_final_coll}_{counter}{ext_final_coll}"
counter += 1
if temp_filename_final_check != filename_for_actual_save:
self.logger(f" Final rename for target folder '{os.path.basename(current_target_folder_path)}': '{temp_filename_final_check}' (was '{filename_for_actual_save}')")
filename_for_actual_save = temp_filename_final_check
bytes_to_write = file_content_bytes
final_filename_after_processing = filename_for_actual_save
current_save_path_final = os.path.join(current_target_folder_path, final_filename_after_processing)
is_img_for_compress_check = is_image(api_original_filename) is_img_for_compress_check = is_image(api_original_filename)
if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024): if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024):
self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024*1024):.2f} MB)...") self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024*1024):.2f} MB)...")
try: try:
bytes_to_write.seek(0) file_content_bytes.seek(0)
with Image.open(bytes_to_write) as img_obj: with Image.open(file_content_bytes) as img_obj:
if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA') if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA')
elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB') elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB')
compressed_bytes_io = BytesIO() compressed_bytes_io = BytesIO()
img_obj.save(compressed_bytes_io, format='WebP', quality=80, method=4) img_obj.save(compressed_bytes_io, format='WebP', quality=80, method=4)
compressed_size = compressed_bytes_io.getbuffer().nbytes compressed_size = compressed_bytes_io.getbuffer().nbytes
if compressed_size < downloaded_size_bytes * 0.9: if compressed_size < downloaded_size_bytes * 0.9: # If significantly smaller
self.logger(f" Compression success: {compressed_size / (1024*1024):.2f} MB.") self.logger(f" Compression success: {compressed_size / (1024*1024):.2f} MB.")
if hasattr(bytes_to_write, 'close'): bytes_to_write.close() data_to_write_after_compression = compressed_bytes_io; data_to_write_after_compression.seek(0)
base_name_orig, _ = os.path.splitext(filename_after_compression)
original_part_file_path = os.path.join(current_target_folder_path, filename_to_save_in_main_path) + ".part" # Use original base for .part filename_after_compression = base_name_orig + '.webp'
if os.path.exists(original_part_file_path): self.logger(f" Updated filename (compressed): {filename_after_compression}")
os.remove(original_part_file_path)
bytes_to_write = compressed_bytes_io; bytes_to_write.seek(0)
base_name_orig, _ = os.path.splitext(filename_for_actual_save)
final_filename_after_processing = base_name_orig + '.webp'
current_save_path_final = os.path.join(current_target_folder_path, final_filename_after_processing)
self.logger(f" Updated filename (compressed): {final_filename_after_processing}")
else: else:
self.logger(f" Compression skipped: WebP not significantly smaller."); bytes_to_write.seek(0) self.logger(f" Compression skipped: WebP not significantly smaller."); file_content_bytes.seek(0) # Reset original stream
data_to_write_after_compression = file_content_bytes # Use original
except Exception as comp_e: except Exception as comp_e:
self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); bytes_to_write.seek(0) self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); file_content_bytes.seek(0)
data_to_write_after_compression = file_content_bytes # Use original
if final_filename_after_processing != filename_for_actual_save and \ # --- Final Numeric Suffixing in the effective_save_folder ---
os.path.exists(current_save_path_final) and os.path.getsize(current_save_path_final) > 0: final_filename_on_disk = filename_after_compression # This is the name after potential compression
self.logger(f" -> Exists (Path - Post-Compress): '{final_filename_after_processing}' in '{os.path.basename(current_target_folder_path)}'.") temp_base, temp_ext = os.path.splitext(final_filename_on_disk)
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) suffix_counter = 1
if bytes_to_write and hasattr(bytes_to_write, 'close'): bytes_to_write.close() while os.path.exists(os.path.join(effective_save_folder, final_filename_on_disk)):
return 0, 1, final_filename_after_processing, was_original_name_kept_flag final_filename_on_disk = f"{temp_base}_{suffix_counter}{temp_ext}"
suffix_counter += 1
if final_filename_on_disk != filename_after_compression:
self.logger(f" Applied numeric suffix in '{os.path.basename(effective_save_folder)}': '{final_filename_on_disk}' (was '{filename_after_compression}')")
# --- Save File ---
final_save_path = os.path.join(effective_save_folder, final_filename_on_disk)
try: try:
os.makedirs(current_target_folder_path, exist_ok=True) # data_to_write_after_compression is BytesIO (single stream, or compressed multipart)
# OR it's the original file_content_bytes (which is a file handle if uncompressed multipart)
if isinstance(bytes_to_write, BytesIO): if data_to_write_after_compression is file_content_bytes and not isinstance(file_content_bytes, BytesIO):
with open(current_save_path_final, 'wb') as f_out: # This means uncompressed multipart download. Original .part file handle is file_content_bytes.
f_out.write(bytes_to_write.getvalue()) # The .part file is at target_folder_path/filename_to_save_in_main_path.part
else: original_part_file_actual_path = file_content_bytes.name
if hasattr(bytes_to_write, 'close'): bytes_to_write.close() file_content_bytes.close() # Close handle first
source_part_file = os.path.join(current_target_folder_path, filename_to_save_in_main_path) + ".part" # Use original base for .part os.rename(original_part_file_actual_path, final_save_path)
os.rename(source_part_file, current_save_path_final) self.logger(f" Renamed .part file to final: {final_save_path}")
else: # Single stream download, or compressed multipart. Write from BytesIO.
with open(final_save_path, 'wb') as f_out:
f_out.write(data_to_write_after_compression.getvalue())
# If original was multipart and then compressed, clean up original .part file
if data_to_write_after_compression is not file_content_bytes and not isinstance(file_content_bytes, BytesIO):
original_part_file_actual_path = file_content_bytes.name
file_content_bytes.close()
if os.path.exists(original_part_file_actual_path):
try: os.remove(original_part_file_actual_path)
except OSError as e_rem: self.logger(f" -> Failed to remove .part after compression: {e_rem}")
with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash) with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash)
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Track by logical name
final_filename_saved_for_return = final_filename_after_processing final_filename_saved_for_return = final_filename_on_disk
self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{os.path.basename(current_target_folder_path)}'") self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{os.path.basename(effective_save_folder)}'")
# Session-wide base name tracking removed.
time.sleep(0.05) time.sleep(0.05)
return 1, 0, final_filename_saved_for_return, was_original_name_kept_flag return 1, 0, final_filename_saved_for_return, was_original_name_kept_flag
except Exception as save_err: except Exception as save_err:
self.logger(f"❌ Save Fail for '{final_filename_after_processing}': {save_err}") self.logger(f"❌ Save Fail for '{final_filename_on_disk}': {save_err}")
if os.path.exists(current_save_path_final): if os.path.exists(final_save_path):
try: os.remove(current_save_path_final); try: os.remove(final_save_path);
except OSError: self.logger(f" -> Failed to remove partially saved file: {current_save_path_final}") except OSError: self.logger(f" -> Failed to remove partially saved file: {final_save_path}")
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag
finally: finally:
if bytes_to_write and hasattr(bytes_to_write, 'close'): # Ensure all handles are closed
bytes_to_write.close() if data_to_write_after_compression and hasattr(data_to_write_after_compression, 'close'):
data_to_write_after_compression.close()
# If original file_content_bytes was a different handle (e.g. multipart before compression) and not closed yet
if file_content_bytes and file_content_bytes is not data_to_write_after_compression and hasattr(file_content_bytes, 'close'):
try:
if not file_content_bytes.closed: # Check if already closed
file_content_bytes.close()
except Exception: pass # Ignore errors on close if already handled
def process(self): def process(self):
@@ -858,36 +874,140 @@ class PostProcessorWorker:
post_is_candidate_by_title_char_match = False post_is_candidate_by_title_char_match = False
char_filter_that_matched_title = None char_filter_that_matched_title = None
post_is_candidate_by_comment_char_match = False
# New variables for CHAR_SCOPE_COMMENTS file-first logic
post_is_candidate_by_file_char_match_in_comment_scope = False
char_filter_that_matched_file_in_comment_scope = None
char_filter_that_matched_comment = None
if self.filter_character_list_objects and \ if self.filter_character_list_objects and \
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH): (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH):
self.logger(f" [Debug Title Match] Checking post title '{post_title}' against {len(self.filter_character_list_objects)} filter objects. Scope: {self.char_filter_scope}") # self.logger(f" [Debug Title Match] Checking post title '{post_title}' against {len(self.filter_character_list_objects)} filter objects. Scope: {self.char_filter_scope}")
for idx, filter_item_obj in enumerate(self.filter_character_list_objects): for idx, filter_item_obj in enumerate(self.filter_character_list_objects):
self.logger(f" [Debug Title Match] Filter obj #{idx}: {filter_item_obj}") if self.check_cancel(): break
# self.logger(f" [Debug Title Match] Filter obj #{idx}: {filter_item_obj}")
terms_to_check_for_title = list(filter_item_obj["aliases"]) terms_to_check_for_title = list(filter_item_obj["aliases"])
if filter_item_obj["is_group"]: if filter_item_obj["is_group"]:
if filter_item_obj["name"] not in terms_to_check_for_title: if filter_item_obj["name"] not in terms_to_check_for_title:
terms_to_check_for_title.append(filter_item_obj["name"]) terms_to_check_for_title.append(filter_item_obj["name"])
unique_terms_for_title_check = list(set(terms_to_check_for_title)) unique_terms_for_title_check = list(set(terms_to_check_for_title))
self.logger(f" [Debug Title Match] Unique terms for this filter obj: {unique_terms_for_title_check}") # self.logger(f" [Debug Title Match] Unique terms for this filter obj: {unique_terms_for_title_check}")
for term_to_match in unique_terms_for_title_check: for term_to_match in unique_terms_for_title_check:
self.logger(f" [Debug Title Match] Checking term: '{term_to_match}'") # self.logger(f" [Debug Title Match] Checking term: '{term_to_match}'")
match_found_for_term = is_title_match_for_character(post_title, term_to_match) match_found_for_term = is_title_match_for_character(post_title, term_to_match)
self.logger(f" [Debug Title Match] Result for '{term_to_match}': {match_found_for_term}") # self.logger(f" [Debug Title Match] Result for '{term_to_match}': {match_found_for_term}")
if match_found_for_term: if match_found_for_term:
post_is_candidate_by_title_char_match = True post_is_candidate_by_title_char_match = True
char_filter_that_matched_title = filter_item_obj char_filter_that_matched_title = filter_item_obj
self.logger(f" Post title matches char filter term '{term_to_match}' (from group/name '{filter_item_obj['name']}', Scope: {self.char_filter_scope}). Post is candidate.") self.logger(f" Post title matches char filter term '{term_to_match}' (from group/name '{filter_item_obj['name']}', Scope: {self.char_filter_scope}). Post is candidate.")
break break
if post_is_candidate_by_title_char_match: break if post_is_candidate_by_title_char_match: break
self.logger(f" [Debug Title Match] Final post_is_candidate_by_title_char_match: {post_is_candidate_by_title_char_match}") # self.logger(f" [Debug Title Match] Final post_is_candidate_by_title_char_match: {post_is_candidate_by_title_char_match}")
if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match: # --- Populate all_files_from_post_api before character filter logic that needs it ---
self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.") # This is needed for the file-first check in CHAR_SCOPE_COMMENTS
return 0, num_potential_files_in_post, [] all_files_from_post_api_for_char_check = []
api_file_domain_for_char_check = urlparse(self.api_url_input).netloc
if not api_file_domain_for_char_check or not any(d in api_file_domain_for_char_check.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']):
api_file_domain_for_char_check = "kemono.su" if "kemono" in self.service.lower() else "coomer.party"
if post_main_file_info and isinstance(post_main_file_info, dict) and post_main_file_info.get('path'):
original_api_name = post_main_file_info.get('name') or os.path.basename(post_main_file_info['path'].lstrip('/'))
if original_api_name:
all_files_from_post_api_for_char_check.append({'_original_name_for_log': original_api_name})
for att_info in post_attachments:
if isinstance(att_info, dict) and att_info.get('path'):
original_api_att_name = att_info.get('name') or os.path.basename(att_info['path'].lstrip('/'))
if original_api_att_name:
all_files_from_post_api_for_char_check.append({'_original_name_for_log': original_api_att_name})
# --- End population of all_files_from_post_api_for_char_check ---
if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
self.logger(f" [Char Scope: Comments] Phase 1: Checking post files for matches before comments for post ID '{post_id}'.")
for file_info_item in all_files_from_post_api_for_char_check: # Use the pre-populated list of file names
if self.check_cancel(): break
current_api_original_filename_for_check = file_info_item.get('_original_name_for_log')
if not current_api_original_filename_for_check: continue
for filter_item_obj in self.filter_character_list_objects:
terms_to_check = list(filter_item_obj["aliases"])
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check:
terms_to_check.append(filter_item_obj["name"])
for term_to_match in terms_to_check:
if is_filename_match_for_character(current_api_original_filename_for_check, term_to_match):
post_is_candidate_by_file_char_match_in_comment_scope = True
char_filter_that_matched_file_in_comment_scope = filter_item_obj
self.logger(f" Match Found (File in Comments Scope): File '{current_api_original_filename_for_check}' matches char filter term '{term_to_match}' (from group/name '{filter_item_obj['name']}'). Post is candidate.")
break
if post_is_candidate_by_file_char_match_in_comment_scope: break
if post_is_candidate_by_file_char_match_in_comment_scope: break
self.logger(f" [Char Scope: Comments] Phase 1 Result: post_is_candidate_by_file_char_match_in_comment_scope = {post_is_candidate_by_file_char_match_in_comment_scope}")
if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
if not post_is_candidate_by_file_char_match_in_comment_scope:
self.logger(f" [Char Scope: Comments] Phase 2: No file match found. Checking post comments for post ID '{post_id}'.")
try:
parsed_input_url_for_comments = urlparse(self.api_url_input)
api_domain_for_comments = parsed_input_url_for_comments.netloc
if not any(d in api_domain_for_comments.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']):
self.logger(f"⚠️ Unrecognized domain '{api_domain_for_comments}' for comment API. Defaulting based on service.")
api_domain_for_comments = "kemono.su" if "kemono" in self.service.lower() else "coomer.party"
comments_data = fetch_post_comments(
api_domain_for_comments, self.service, self.user_id, post_id,
headers, self.logger, self.cancellation_event
)
if comments_data:
self.logger(f" Fetched {len(comments_data)} comments for post {post_id}.")
for comment_item_idx, comment_item in enumerate(comments_data):
if self.check_cancel(): break
raw_comment_content = comment_item.get('content', '')
if not raw_comment_content: continue
cleaned_comment_text = strip_html_tags(raw_comment_content)
if not cleaned_comment_text.strip(): continue
for filter_item_obj in self.filter_character_list_objects:
terms_to_check_comment = list(filter_item_obj["aliases"])
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_comment:
terms_to_check_comment.append(filter_item_obj["name"])
for term_to_match_comment in terms_to_check_comment:
if is_title_match_for_character(cleaned_comment_text, term_to_match_comment): # Re-use title matcher
post_is_candidate_by_comment_char_match = True
char_filter_that_matched_comment = filter_item_obj
self.logger(f" Match Found (Comment in Comments Scope): Comment in post {post_id} matches char filter term '{term_to_match_comment}' (from group/name '{filter_item_obj['name']}'). Post is candidate.")
self.logger(f" Matching comment (first 100 chars): '{cleaned_comment_text[:100]}...'")
break
if post_is_candidate_by_comment_char_match: break
if post_is_candidate_by_comment_char_match: break
else:
self.logger(f" No comments found or fetched for post {post_id} to check against character filters.")
except RuntimeError as e_fetch_comment:
self.logger(f" ⚠️ Error fetching or processing comments for post {post_id}: {e_fetch_comment}")
except Exception as e_generic_comment:
self.logger(f" ❌ Unexpected error during comment processing for post {post_id}: {e_generic_comment}\n{traceback.format_exc(limit=2)}")
self.logger(f" [Char Scope: Comments] Phase 2 Result: post_is_candidate_by_comment_char_match = {post_is_candidate_by_comment_char_match}")
else: # post_is_candidate_by_file_char_match_in_comment_scope was True
self.logger(f" [Char Scope: Comments] Phase 2: Skipped comment check for post ID '{post_id}' because a file match already made it a candidate.")
# --- Skip Post Logic based on Title or Comment Scope (if filters are active) ---
if self.filter_character_list_objects:
if self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match:
self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.")
return 0, num_potential_files_in_post, []
if self.char_filter_scope == CHAR_SCOPE_COMMENTS and \
not post_is_candidate_by_file_char_match_in_comment_scope and \
not post_is_candidate_by_comment_char_match: # MODIFIED: Check both file and comment match flags
self.logger(f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id}', Title '{post_title[:50]}...'")
return 0, num_potential_files_in_post, []
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH): if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
post_title_lower = post_title.lower() post_title_lower = post_title.lower()
for skip_word in self.skip_words_list: for skip_word in self.skip_words_list:
@@ -907,9 +1027,26 @@ class PostProcessorWorker:
base_folder_names_for_post_content = [] base_folder_names_for_post_content = []
if not self.extract_links_only and self.use_subfolders: if not self.extract_links_only and self.use_subfolders:
if post_is_candidate_by_title_char_match and char_filter_that_matched_title: primary_char_filter_for_folder = None
base_folder_names_for_post_content = [clean_folder_name(char_filter_that_matched_title["name"])] log_reason_for_folder = ""
elif not self.filter_character_list_objects:
if self.char_filter_scope == CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment:
# For CHAR_SCOPE_COMMENTS, prioritize file match for folder name if it happened
if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope:
primary_char_filter_for_folder = char_filter_that_matched_file_in_comment_scope
log_reason_for_folder = "Matched char filter in filename (Comments scope)"
elif post_is_candidate_by_comment_char_match and char_filter_that_matched_comment: # Fallback to comment match
primary_char_filter_for_folder = char_filter_that_matched_comment
log_reason_for_folder = "Matched char filter in comments (Comments scope, no file match)"
elif (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and char_filter_that_matched_title: # Existing logic for other scopes
primary_char_filter_for_folder = char_filter_that_matched_title
log_reason_for_folder = "Matched char filter in title"
# If scope is FILES, primary_char_filter_for_folder will be None here. Folder determined per file.
if primary_char_filter_for_folder:
base_folder_names_for_post_content = [clean_folder_name(primary_char_filter_for_folder["name"])]
self.logger(f" Base folder name(s) for post content ({log_reason_for_folder}): {', '.join(base_folder_names_for_post_content)}")
elif not self.filter_character_list_objects: # No char filters defined, use generic logic
derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords) derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords)
if derived_folders: if derived_folders:
base_folder_names_for_post_content.extend(derived_folders) base_folder_names_for_post_content.extend(derived_folders)
@@ -917,11 +1054,10 @@ class PostProcessorWorker:
base_folder_names_for_post_content.append(extract_folder_name_from_title(post_title, self.unwanted_keywords)) base_folder_names_for_post_content.append(extract_folder_name_from_title(post_title, self.unwanted_keywords))
if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]: if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]:
base_folder_names_for_post_content = [clean_folder_name(post_title if post_title else "untitled_creator_content")] base_folder_names_for_post_content = [clean_folder_name(post_title if post_title else "untitled_creator_content")]
self.logger(f" Base folder name(s) for post content (Generic title parsing - no char filters): {', '.join(base_folder_names_for_post_content)}")
# If char filters are defined, and scope is FILES, then base_folder_names_for_post_content remains empty.
# The folder will be determined by char_filter_info_that_matched_file later.
if base_folder_names_for_post_content:
log_reason = "Matched char filter" if (post_is_candidate_by_title_char_match and char_filter_that_matched_title) else "Generic title parsing (no char filters)"
self.logger(f" Base folder name(s) for post content ({log_reason}): {', '.join(base_folder_names_for_post_content)}")
if not self.extract_links_only and self.use_subfolders and self.skip_words_list: if not self.extract_links_only and self.use_subfolders and self.skip_words_list:
for folder_name_to_check in base_folder_names_for_post_content: for folder_name_to_check in base_folder_names_for_post_content:
if not folder_name_to_check: continue if not folder_name_to_check: continue
@@ -1066,19 +1202,32 @@ class PostProcessorWorker:
char_filter_info_that_matched_file = char_filter_that_matched_title char_filter_info_that_matched_file = char_filter_that_matched_title
self.logger(f" File '{current_api_original_filename}' is candidate because post title matched. Scope: Both (Title part).") self.logger(f" File '{current_api_original_filename}' is candidate because post title matched. Scope: Both (Title part).")
else: else:
for filter_item_obj in self.filter_character_list_objects: # This part is for the "File" part of "Both" scope
terms_to_check_for_file_both = list(filter_item_obj["aliases"]) for filter_item_obj_both_file in self.filter_character_list_objects:
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_for_file_both: terms_to_check_for_file_both = list(filter_item_obj_both_file["aliases"])
terms_to_check_for_file_both.append(filter_item_obj["name"]) if filter_item_obj_both_file["is_group"] and filter_item_obj_both_file["name"] not in terms_to_check_for_file_both:
unique_terms_for_file_both_check = list(set(terms_to_check_for_file_both)) terms_to_check_for_file_both.append(filter_item_obj_both_file["name"])
# Ensure unique_terms_for_file_both_check is defined here
unique_terms_for_file_both_check = list(set(terms_to_check_for_file_both))
for term_to_match in unique_terms_for_file_both_check: for term_to_match in unique_terms_for_file_both_check:
if is_filename_match_for_character(current_api_original_filename, term_to_match): if is_filename_match_for_character(current_api_original_filename, term_to_match):
file_is_candidate_by_char_filter_scope = True file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = filter_item_obj char_filter_info_that_matched_file = filter_item_obj_both_file # Use the filter that matched the file
self.logger(f" File '{current_api_original_filename}' matches char filter term '{term_to_match}' (from '{filter_item_obj['name']}'). Scope: Both (File part).") self.logger(f" File '{current_api_original_filename}' matches char filter term '{term_to_match}' (from '{filter_item_obj['name']}'). Scope: Both (File part).")
break break
if file_is_candidate_by_char_filter_scope: break if file_is_candidate_by_char_filter_scope: break
elif self.char_filter_scope == CHAR_SCOPE_COMMENTS:
# If the post is a candidate (either by file or comment under this scope), then this file is also a candidate.
# The folder naming will use the filter that made the POST a candidate.
if post_is_candidate_by_file_char_match_in_comment_scope: # Post was candidate due to a file match
file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = char_filter_that_matched_file_in_comment_scope # Use the filter that matched a file in the post
self.logger(f" File '{current_api_original_filename}' is candidate because a file in this post matched char filter (Overall Scope: Comments).")
elif post_is_candidate_by_comment_char_match: # Post was candidate due to comment match (no file match for post)
file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = char_filter_that_matched_comment # Use the filter that matched comments
self.logger(f" File '{current_api_original_filename}' is candidate because post comments matched char filter (Overall Scope: Comments).")
if not file_is_candidate_by_char_filter_scope: if not file_is_candidate_by_char_filter_scope:
self.logger(f" -> Skip File (Char Filter Scope '{self.char_filter_scope}'): '{current_api_original_filename}' no match.") self.logger(f" -> Skip File (Char Filter Scope '{self.char_filter_scope}'): '{current_api_original_filename}' no match.")
@@ -1178,7 +1327,7 @@ class DownloadThread(QThread):
char_filter_scope=CHAR_SCOPE_FILES, char_filter_scope=CHAR_SCOPE_FILES,
remove_from_filename_words_list=None, remove_from_filename_words_list=None,
allow_multipart_download=True, allow_multipart_download=True,
duplicate_file_mode=DUPLICATE_MODE_DELETE): # Default to DELETE ): # Removed duplicate_file_mode and session-wide tracking
super().__init__() super().__init__()
self.api_url_input = api_url_input self.api_url_input = api_url_input
self.output_dir = output_dir self.output_dir = output_dir
@@ -1219,7 +1368,7 @@ class DownloadThread(QThread):
self.char_filter_scope = char_filter_scope self.char_filter_scope = char_filter_scope
self.remove_from_filename_words_list = remove_from_filename_words_list self.remove_from_filename_words_list = remove_from_filename_words_list
self.allow_multipart_download = allow_multipart_download self.allow_multipart_download = allow_multipart_download
self.duplicate_file_mode = duplicate_file_mode # self.duplicate_file_mode and session-wide tracking removed
if self.compress_images and Image is None: if self.compress_images and Image is None:
self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).") self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
self.compress_images = False self.compress_images = False
@@ -1297,7 +1446,7 @@ class DownloadThread(QThread):
char_filter_scope=self.char_filter_scope, char_filter_scope=self.char_filter_scope,
remove_from_filename_words_list=self.remove_from_filename_words_list, remove_from_filename_words_list=self.remove_from_filename_words_list,
allow_multipart_download=self.allow_multipart_download, allow_multipart_download=self.allow_multipart_download,
duplicate_file_mode=self.duplicate_file_mode) ) # Removed duplicate_file_mode and session-wide tracking
try: try:
dl_count, skip_count, kept_originals_this_post = post_processing_worker.process() dl_count, skip_count, kept_originals_this_post = post_processing_worker.process()
grand_total_downloaded_files += dl_count grand_total_downloaded_files += dl_count

267
main.py
View File

@@ -48,8 +48,9 @@ try:
SKIP_SCOPE_POSTS, SKIP_SCOPE_POSTS,
SKIP_SCOPE_BOTH, SKIP_SCOPE_BOTH,
CHAR_SCOPE_TITLE, # Added for completeness if used directly CHAR_SCOPE_TITLE, # Added for completeness if used directly
CHAR_SCOPE_FILES, # Added CHAR_SCOPE_FILES, # Ensure this is imported
CHAR_SCOPE_BOTH # Added CHAR_SCOPE_BOTH,
CHAR_SCOPE_COMMENTS
) )
print("Successfully imported names from downloader_utils.") print("Successfully imported names from downloader_utils.")
except ImportError as e: except ImportError as e:
@@ -68,6 +69,7 @@ except ImportError as e:
CHAR_SCOPE_TITLE = "title" CHAR_SCOPE_TITLE = "title"
CHAR_SCOPE_FILES = "files" CHAR_SCOPE_FILES = "files"
CHAR_SCOPE_BOTH = "both" CHAR_SCOPE_BOTH = "both"
CHAR_SCOPE_COMMENTS = "comments"
except Exception as e: except Exception as e:
print(f"--- UNEXPECTED IMPORT ERROR ---") print(f"--- UNEXPECTED IMPORT ERROR ---")
@@ -80,6 +82,7 @@ except Exception as e:
MAX_THREADS = 200 MAX_THREADS = 200
RECOMMENDED_MAX_THREADS = 50 RECOMMENDED_MAX_THREADS = 50
MAX_FILE_THREADS_PER_POST_OR_WORKER = 10 MAX_FILE_THREADS_PER_POST_OR_WORKER = 10
MAX_POST_WORKERS_WHEN_COMMENT_FILTERING = 3 # New constant
HTML_PREFIX = "<!HTML!>" HTML_PREFIX = "<!HTML!>"
@@ -92,13 +95,7 @@ SKIP_WORDS_SCOPE_KEY = "skipWordsScopeV1"
ALLOW_MULTIPART_DOWNLOAD_KEY = "allowMultipartDownloadV1" ALLOW_MULTIPART_DOWNLOAD_KEY = "allowMultipartDownloadV1"
CHAR_FILTER_SCOPE_KEY = "charFilterScopeV1" CHAR_FILTER_SCOPE_KEY = "charFilterScopeV1"
# CHAR_SCOPE_TITLE, CHAR_SCOPE_FILES, CHAR_SCOPE_BOTH are already defined or imported # CHAR_SCOPE_TITLE, CHAR_SCOPE_FILES, CHAR_SCOPE_BOTH, CHAR_SCOPE_COMMENTS are already defined or imported
DUPLICATE_FILE_MODE_KEY = "duplicateFileModeV1"
# DUPLICATE_MODE_RENAME is removed. Renaming only happens within a target folder if needed.
DUPLICATE_MODE_DELETE = "delete"
DUPLICATE_MODE_MOVE_TO_SUBFOLDER = "move" # New mode
# --- Tour Classes (Moved from tour.py) --- # --- Tour Classes (Moved from tour.py) ---
class TourStepWidget(QWidget): class TourStepWidget(QWidget):
@@ -480,25 +477,21 @@ class DownloaderApp(QWidget):
self.radio_only_links = None self.radio_only_links = None
self.radio_only_archives = None self.radio_only_archives = None
self.skip_scope_toggle_button = None
self.char_filter_scope_toggle_button = None self.char_filter_scope_toggle_button = None
self.all_kept_original_filenames = []
self.manga_filename_style = self.settings.value(MANGA_FILENAME_STYLE_KEY, STYLE_POST_TITLE, type=str) self.manga_filename_style = self.settings.value(MANGA_FILENAME_STYLE_KEY, STYLE_POST_TITLE, type=str)
self.skip_words_scope = self.settings.value(SKIP_WORDS_SCOPE_KEY, SKIP_SCOPE_POSTS, type=str) self.skip_words_scope = self.settings.value(SKIP_WORDS_SCOPE_KEY, SKIP_SCOPE_POSTS, type=str)
self.char_filter_scope = self.settings.value(CHAR_FILTER_SCOPE_KEY, CHAR_SCOPE_TITLE, type=str) self.char_filter_scope = self.settings.value(CHAR_FILTER_SCOPE_KEY, CHAR_SCOPE_FILES, type=str) # Default to Files
# Always default multi-part download to OFF on launch, ignoring any saved setting. # Always default multi-part download to OFF on launch, ignoring any saved setting.
self.allow_multipart_download_setting = False self.allow_multipart_download_setting = False
self.duplicate_file_mode = self.settings.value(DUPLICATE_FILE_MODE_KEY, DUPLICATE_MODE_DELETE, type=str) # Default to DELETE
print(f" Known.txt will be loaded/saved at: {self.config_file}") print(f" Known.txt will be loaded/saved at: {self.config_file}")
self.load_known_names_from_util() self.load_known_names_from_util()
self.setWindowTitle("Kemono Downloader v3.2.0") self.setWindowTitle("Kemono Downloader v3.2.0")
# self.setGeometry(150, 150, 1050, 820) # Initial geometry will be set after showing # self.setGeometry(150, 150, 1050, 820) # Initial geometry will be set after showing
self.setStyleSheet(self.get_dark_theme()) self.setStyleSheet(self.get_dark_theme())
self.init_ui() self.init_ui()
self._connect_signals() self._connect_signals()
@@ -510,7 +503,6 @@ class DownloaderApp(QWidget):
self.log_signal.emit(f" Skip words scope loaded: '{self.skip_words_scope}'") self.log_signal.emit(f" Skip words scope loaded: '{self.skip_words_scope}'")
self.log_signal.emit(f" Character filter scope loaded: '{self.char_filter_scope}'") self.log_signal.emit(f" Character filter scope loaded: '{self.char_filter_scope}'")
self.log_signal.emit(f" Multi-part download defaults to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'} on launch") self.log_signal.emit(f" Multi-part download defaults to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'} on launch")
self.log_signal.emit(f" Duplicate file handling mode loaded: '{self.duplicate_file_mode.capitalize()}'")
def _connect_signals(self): def _connect_signals(self):
@@ -560,7 +552,6 @@ class DownloaderApp(QWidget):
self.char_filter_scope_toggle_button.clicked.connect(self._cycle_char_filter_scope) self.char_filter_scope_toggle_button.clicked.connect(self._cycle_char_filter_scope)
if hasattr(self, 'multipart_toggle_button'): self.multipart_toggle_button.clicked.connect(self._toggle_multipart_mode) if hasattr(self, 'multipart_toggle_button'): self.multipart_toggle_button.clicked.connect(self._toggle_multipart_mode)
if hasattr(self, 'duplicate_mode_toggle_button'): self.duplicate_mode_toggle_button.clicked.connect(self._cycle_duplicate_mode)
def load_known_names_from_util(self): def load_known_names_from_util(self):
@@ -606,7 +597,6 @@ class DownloaderApp(QWidget):
self.settings.setValue(SKIP_WORDS_SCOPE_KEY, self.skip_words_scope) self.settings.setValue(SKIP_WORDS_SCOPE_KEY, self.skip_words_scope)
self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope) self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope)
self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting) self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting)
self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode) # Save current mode
self.settings.sync() self.settings.sync()
should_exit = True should_exit = True
@@ -726,17 +716,17 @@ class DownloaderApp(QWidget):
self.character_input = QLineEdit() self.character_input = QLineEdit()
self.character_input.setPlaceholderText("e.g., Tifa, Aerith, (Cloud, Zack)") self.character_input.setPlaceholderText("e.g., Tifa, Aerith, (Cloud, Zack)")
self.character_input.setToolTip( self.character_input.setToolTip(
"Filter files or posts by character/series names (comma-separated).\n" "Filter by character/series names (comma-separated, e.g., Tifa, Aerith).\n"
" - Normal Mode: Filters individual files by matching their filenames.\n" "The behavior of this filter (Files, Title, Both, or Comments) is controlled by the 'Filter: [Scope]' button next to this input.\n"
" - Manga/Comic Mode: Filters entire posts by matching the post title.\n"
"Also used for folder naming if 'Separate Folders' is enabled.\n" "Also used for folder naming if 'Separate Folders' is enabled.\n"
"Group aliases for a combined folder name: (alias1, alias2) -> folder 'alias1 alias2'.\n" "Group aliases for a combined folder name: (alias1, alias2) -> folder 'alias1 alias2'.\n"
"Example: yor, Tifa, (Boa, Hancock)") "Example: yor, Tifa, (Boa, Hancock)")
char_input_and_button_layout.addWidget(self.character_input, 3) char_input_and_button_layout.addWidget(self.character_input, 3)
self.char_filter_scope_toggle_button = QPushButton() self.char_filter_scope_toggle_button = QPushButton()
# Initial text and tooltip will be set by calling _update_char_filter_scope_button_text()
# at the end of init_ui or when the scope is first set.
self._update_char_filter_scope_button_text() self._update_char_filter_scope_button_text()
self.char_filter_scope_toggle_button.setToolTip("Click to cycle character filter scope (Files -> Title -> Both)")
self.char_filter_scope_toggle_button.setStyleSheet("padding: 6px 10px;") self.char_filter_scope_toggle_button.setStyleSheet("padding: 6px 10px;")
self.char_filter_scope_toggle_button.setMinimumWidth(100) self.char_filter_scope_toggle_button.setMinimumWidth(100)
char_input_and_button_layout.addWidget(self.char_filter_scope_toggle_button, 1) char_input_and_button_layout.addWidget(self.char_filter_scope_toggle_button, 1)
@@ -794,7 +784,6 @@ class DownloaderApp(QWidget):
skip_input_and_button_layout.addWidget(self.skip_words_input, 1) # Input field takes available space skip_input_and_button_layout.addWidget(self.skip_words_input, 1) # Input field takes available space
self.skip_scope_toggle_button = QPushButton() self.skip_scope_toggle_button = QPushButton()
self._update_skip_scope_button_text() self._update_skip_scope_button_text()
self.skip_scope_toggle_button.setToolTip("Click to cycle skip scope (Files -> Posts -> Both)")
self.skip_scope_toggle_button.setStyleSheet("padding: 6px 10px;") self.skip_scope_toggle_button.setStyleSheet("padding: 6px 10px;")
self.skip_scope_toggle_button.setMinimumWidth(100) self.skip_scope_toggle_button.setMinimumWidth(100)
skip_input_and_button_layout.addWidget(self.skip_scope_toggle_button, 0) # Button takes its minimum skip_input_and_button_layout.addWidget(self.skip_scope_toggle_button, 0) # Button takes its minimum
@@ -1017,38 +1006,26 @@ class DownloaderApp(QWidget):
log_title_layout.addWidget(self.link_search_button) log_title_layout.addWidget(self.link_search_button)
self.manga_rename_toggle_button = QPushButton() self.manga_rename_toggle_button = QPushButton()
# Tooltip is dynamically set by _update_manga_filename_style_button_text
self.manga_rename_toggle_button.setVisible(False) self.manga_rename_toggle_button.setVisible(False)
self.manga_rename_toggle_button.setFixedWidth(140) self.manga_rename_toggle_button.setFixedWidth(140)
self.manga_rename_toggle_button.setStyleSheet("padding: 4px 8px;") self.manga_rename_toggle_button.setStyleSheet("padding: 4px 8px;")
self._update_manga_filename_style_button_text() self._update_manga_filename_style_button_text()
log_title_layout.addWidget(self.manga_rename_toggle_button) log_title_layout.addWidget(self.manga_rename_toggle_button)
self.multipart_toggle_button = QPushButton() # Create the button self.multipart_toggle_button = QPushButton()
# Tooltip is dynamically set by _update_multipart_toggle_button_text
self.multipart_toggle_button.setToolTip("Toggle between Multi-part and Single-stream downloads for large files.") self.multipart_toggle_button.setToolTip("Toggle between Multi-part and Single-stream downloads for large files.")
self.multipart_toggle_button.setFixedWidth(130) # Adjust width as needed self.multipart_toggle_button.setFixedWidth(130) # Adjust width as needed
self.multipart_toggle_button.setStyleSheet("padding: 4px 8px;") # Added padding self.multipart_toggle_button.setStyleSheet("padding: 4px 8px;") # Added padding
self._update_multipart_toggle_button_text() # Set initial text self._update_multipart_toggle_button_text() # Set initial text
log_title_layout.addWidget(self.multipart_toggle_button) # Add to layout log_title_layout.addWidget(self.multipart_toggle_button) # Add to layout
self.duplicate_mode_toggle_button = QPushButton()
# Tooltip is dynamically set by _update_duplicate_mode_button_text
self.duplicate_mode_toggle_button.setToolTip("Toggle how duplicate filenames are handled (Rename or Delete).")
self.duplicate_mode_toggle_button.setFixedWidth(150) # Adjust width
self.duplicate_mode_toggle_button.setStyleSheet("padding: 4px 8px;") # Added padding
self._update_duplicate_mode_button_text() # Set initial text
log_title_layout.addWidget(self.duplicate_mode_toggle_button)
self.log_verbosity_button = QPushButton("Show Basic Log") self.log_verbosity_button = QPushButton("Show Basic Log")
# Tooltip already exists for log_verbosity_button
self.log_verbosity_button.setToolTip("Toggle between full and basic log details.") self.log_verbosity_button.setToolTip("Toggle between full and basic log details.")
self.log_verbosity_button.setFixedWidth(110) self.log_verbosity_button.setFixedWidth(110)
self.log_verbosity_button.setStyleSheet("padding: 4px 8px;") self.log_verbosity_button.setStyleSheet("padding: 4px 8px;")
log_title_layout.addWidget(self.log_verbosity_button) log_title_layout.addWidget(self.log_verbosity_button)
self.reset_button = QPushButton("🔄 Reset") self.reset_button = QPushButton("🔄 Reset")
# Tooltip already exists for reset_button
self.reset_button.setToolTip("Reset all inputs and logs to default state (only when idle).") self.reset_button.setToolTip("Reset all inputs and logs to default state (only when idle).")
self.reset_button.setFixedWidth(80) self.reset_button.setFixedWidth(80)
self.reset_button.setStyleSheet("padding: 4px 8px;") self.reset_button.setStyleSheet("padding: 4px 8px;")
@@ -1125,7 +1102,6 @@ class DownloaderApp(QWidget):
self._update_manga_filename_style_button_text() self._update_manga_filename_style_button_text()
self._update_skip_scope_button_text() self._update_skip_scope_button_text()
self._update_char_filter_scope_button_text() self._update_char_filter_scope_button_text()
self._update_duplicate_mode_button_text()
def _center_on_screen(self): def _center_on_screen(self):
"""Centers the widget on the screen.""" """Centers the widget on the screen."""
@@ -1382,8 +1358,7 @@ class DownloaderApp(QWidget):
self.skip_scope_toggle_button.setVisible(not (is_only_links or is_only_archives)) self.skip_scope_toggle_button.setVisible(not (is_only_links or is_only_archives))
if hasattr(self, 'multipart_toggle_button') and self.multipart_toggle_button: if hasattr(self, 'multipart_toggle_button') and self.multipart_toggle_button:
self.multipart_toggle_button.setVisible(not (is_only_links or is_only_archives)) self.multipart_toggle_button.setVisible(not (is_only_links or is_only_archives))
# Other log header buttons (manga, duplicate, char filter scope) are handled by # Other log header buttons (manga, char filter scope) are handled by update_ui_for_manga_mode and update_ui_for_subfolders
# update_ui_for_manga_mode and update_ui_for_subfolders, which are called below.
if self.link_search_input: self.link_search_input.setVisible(is_only_links) if self.link_search_input: self.link_search_input.setVisible(is_only_links)
if self.link_search_button: self.link_search_button.setVisible(is_only_links) if self.link_search_button: self.link_search_button.setVisible(is_only_links)
@@ -1466,9 +1441,7 @@ class DownloaderApp(QWidget):
self.update_ui_for_subfolders(subfolders_on) self.update_ui_for_subfolders(subfolders_on)
self.update_custom_folder_visibility() self.update_custom_folder_visibility()
# Ensure manga mode UI updates (which includes the visibility of # Ensure manga mode UI updates (which includes the visibility of manga_rename_toggle_button)
# manga_rename_toggle_button and duplicate_mode_toggle_button)
# are triggered after filter mode changes.
self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False) self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)
@@ -1556,12 +1529,46 @@ class DownloaderApp(QWidget):
if self.skip_scope_toggle_button: if self.skip_scope_toggle_button:
if self.skip_words_scope == SKIP_SCOPE_FILES: if self.skip_words_scope == SKIP_SCOPE_FILES:
self.skip_scope_toggle_button.setText("Scope: Files") self.skip_scope_toggle_button.setText("Scope: Files")
self.skip_scope_toggle_button.setToolTip(
"Current Skip Scope: Files\n\n"
"Skips individual files if their names contain any of the 'Skip with Words'.\n"
"Example: Skip words \"WIP, sketch\".\n"
"- File \"art_WIP.jpg\" -> SKIPPED.\n"
"- File \"final_art.png\" -> DOWNLOADED (if other conditions met).\n"
"Post is still processed for other non-skipped files.\n\n"
"Click to cycle to: Posts"
)
elif self.skip_words_scope == SKIP_SCOPE_POSTS: elif self.skip_words_scope == SKIP_SCOPE_POSTS:
self.skip_scope_toggle_button.setText("Scope: Posts") self.skip_scope_toggle_button.setText("Scope: Posts")
self.skip_scope_toggle_button.setToolTip(
"Current Skip Scope: Posts\n\n"
"Skips entire posts if their titles contain any of the 'Skip with Words'.\n"
"All files from a skipped post are ignored.\n"
"Example: Skip words \"preview, announcement\".\n"
"- Post \"Exciting Announcement!\" -> SKIPPED.\n"
"- Post \"Finished Artwork\" -> PROCESSED (if other conditions met).\n\n"
"Click to cycle to: Both"
)
elif self.skip_words_scope == SKIP_SCOPE_BOTH: elif self.skip_words_scope == SKIP_SCOPE_BOTH:
self.skip_scope_toggle_button.setText("Scope: Both") self.skip_scope_toggle_button.setText("Scope: Both")
self.skip_scope_toggle_button.setToolTip(
"Current Skip Scope: Both (Posts then Files)\n\n"
"1. Checks post title: If title contains a skip word, the entire post is SKIPPED.\n"
"2. If post title is OK, then checks individual filenames: If a filename contains a skip word, only that file is SKIPPED.\n"
"Example: Skip words \"WIP, sketch\".\n"
"- Post \"Sketches and WIPs\" (title match) -> ENTIRE POST SKIPPED.\n"
"- Post \"Art Update\" (title OK) with files:\n"
" - \"character_WIP.jpg\" (file match) -> SKIPPED.\n"
" - \"final_scene.png\" (file OK) -> DOWNLOADED.\n\n"
"Click to cycle to: Files"
)
else: else:
self.skip_scope_toggle_button.setText("Scope: Unknown") self.skip_scope_toggle_button.setText("Scope: Unknown")
self.skip_scope_toggle_button.setToolTip(
"Current Skip Scope: Unknown\n\n"
"The skip words scope is in an unknown state. Please cycle or reset.\n\n"
"Click to cycle to: Files"
)
def _cycle_skip_scope(self): def _cycle_skip_scope(self):
@@ -1585,28 +1592,74 @@ class DownloaderApp(QWidget):
if self.char_filter_scope_toggle_button: if self.char_filter_scope_toggle_button:
if self.char_filter_scope == CHAR_SCOPE_FILES: if self.char_filter_scope == CHAR_SCOPE_FILES:
self.char_filter_scope_toggle_button.setText("Filter: Files") self.char_filter_scope_toggle_button.setText("Filter: Files")
self.char_filter_scope_toggle_button.setToolTip(
"Current Scope: Files\n\n"
"Filters individual files by name. A post is kept if any file matches.\n"
"Only matching files from that post are downloaded.\n"
"Example: Filter 'Tifa'. File 'Tifa_artwork.jpg' matches and is downloaded.\n"
"Folder Naming: Uses character from matching filename.\n\n"
"Click to cycle to: Title"
)
elif self.char_filter_scope == CHAR_SCOPE_TITLE: elif self.char_filter_scope == CHAR_SCOPE_TITLE:
self.char_filter_scope_toggle_button.setText("Filter: Title") self.char_filter_scope_toggle_button.setText("Filter: Title")
self.char_filter_scope_toggle_button.setToolTip(
"Current Scope: Title\n\n"
"Filters entire posts by their title. All files from a matching post are downloaded.\n"
"Example: Filter 'Aerith'. Post titled 'Aerith's Garden' matches; all its files are downloaded.\n"
"Folder Naming: Uses character from matching post title.\n\n"
"Click to cycle to: Both"
)
elif self.char_filter_scope == CHAR_SCOPE_BOTH: elif self.char_filter_scope == CHAR_SCOPE_BOTH:
self.char_filter_scope_toggle_button.setText("Filter: Both") self.char_filter_scope_toggle_button.setText("Filter: Both")
self.char_filter_scope_toggle_button.setToolTip(
"Current Scope: Both (Title then Files)\n\n"
"1. Checks post title: If matches, all files from post are downloaded.\n"
"2. If title doesn't match, checks filenames: If any file matches, only that file is downloaded.\n"
"Example: Filter 'Cloud'.\n"
" - Post 'Cloud Strife' (title match) -> all files downloaded.\n"
" - Post 'Bike Chase' with 'Cloud_fenrir.jpg' (file match) -> only 'Cloud_fenrir.jpg' downloaded.\n"
"Folder Naming: Prioritizes title match, then file match.\n\n"
"Click to cycle to: Comments"
)
elif self.char_filter_scope == CHAR_SCOPE_COMMENTS:
self.char_filter_scope_toggle_button.setText("Filter: Comments (Beta)")
self.char_filter_scope_toggle_button.setToolTip(
"Current Scope: Comments (Beta - Files first, then Comments as fallback)\n\n"
"1. Checks filenames: If any file in the post matches the filter, the entire post is downloaded. Comments are NOT checked for this filter term.\n"
"2. If no file matches, THEN checks post comments: If a comment matches, the entire post is downloaded.\n"
"Example: Filter 'Barret'.\n"
" - Post A: Files 'Barret_gunarm.jpg', 'other.png'. File 'Barret_gunarm.jpg' matches. All files from Post A downloaded. Comments not checked for 'Barret'.\n"
" - Post B: Files 'dyne.jpg', 'weapon.gif'. Comments: '...a drawing of Barret Wallace...'. No file match for 'Barret'. Comment matches. All files from Post B downloaded.\n"
"Folder Naming: Prioritizes character from file match, then from comment match.\n\n"
"Click to cycle to: Files"
)
else: else:
self.char_filter_scope_toggle_button.setText("Filter: Unknown") self.char_filter_scope_toggle_button.setText("Filter: Unknown")
self.char_filter_scope_toggle_button.setToolTip(
"Current Scope: Unknown\n\n"
"The character filter scope is in an unknown state. Please cycle or reset.\n\n"
"Click to cycle to: Files"
)
def _cycle_char_filter_scope(self): def _cycle_char_filter_scope(self):
# Cycle: Files -> Title -> Both -> Comments -> Files
if self.char_filter_scope == CHAR_SCOPE_FILES: if self.char_filter_scope == CHAR_SCOPE_FILES:
self.char_filter_scope = CHAR_SCOPE_TITLE self.char_filter_scope = CHAR_SCOPE_TITLE
elif self.char_filter_scope == CHAR_SCOPE_TITLE: elif self.char_filter_scope == CHAR_SCOPE_TITLE:
self.char_filter_scope = CHAR_SCOPE_BOTH self.char_filter_scope = CHAR_SCOPE_BOTH
elif self.char_filter_scope == CHAR_SCOPE_BOTH: elif self.char_filter_scope == CHAR_SCOPE_BOTH:
self.char_filter_scope = CHAR_SCOPE_COMMENTS
elif self.char_filter_scope == CHAR_SCOPE_COMMENTS:
self.char_filter_scope = CHAR_SCOPE_FILES self.char_filter_scope = CHAR_SCOPE_FILES
else: else:
self.char_filter_scope = CHAR_SCOPE_FILES self.char_filter_scope = CHAR_SCOPE_FILES # Default fallback
self._update_char_filter_scope_button_text() self._update_char_filter_scope_button_text()
self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope) self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope)
self.log_signal.emit(f" Character filter scope changed to: '{self.char_filter_scope}'") self.log_signal.emit(f" Character filter scope changed to: '{self.char_filter_scope}'")
def add_new_character(self): def add_new_character(self):
global KNOWN_NAMES, clean_folder_name global KNOWN_NAMES, clean_folder_name
name_to_add = self.new_char_input.text().strip() name_to_add = self.new_char_input.text().strip()
@@ -1751,18 +1804,34 @@ class DownloaderApp(QWidget):
if self.manga_filename_style == STYLE_POST_TITLE: if self.manga_filename_style == STYLE_POST_TITLE:
self.manga_rename_toggle_button.setText("Name: Post Title") self.manga_rename_toggle_button.setText("Name: Post Title")
self.manga_rename_toggle_button.setToolTip( self.manga_rename_toggle_button.setToolTip(
"Manga files: First file named by post title. Subsequent files in same post keep original names.\n" "Manga Filename Style: Post Title\n\n"
"Click to change to original file names for all files." "When Manga/Comic Mode is active for a creator feed:\n"
"- The *first* file in a post is named after the post's title (e.g., \"MyMangaChapter1.jpg\").\n"
"- Any *subsequent* files within the *same post* will retain their original filenames (e.g., \"page_02.png\", \"bonus_art.jpg\").\n"
"- This is generally recommended for better organization of sequential content.\n"
"- Example: Post \"Chapter 1: The Beginning\" with files \"001.jpg\", \"002.jpg\".\n"
" Downloads as: \"Chapter 1 The Beginning.jpg\", \"002.jpg\".\n\n"
"Click to change to: Original File Name"
) )
elif self.manga_filename_style == STYLE_ORIGINAL_NAME: elif self.manga_filename_style == STYLE_ORIGINAL_NAME:
self.manga_rename_toggle_button.setText("Name: Original File") self.manga_rename_toggle_button.setText("Name: Original File")
self.manga_rename_toggle_button.setToolTip( self.manga_rename_toggle_button.setToolTip(
"Manga files will keep their original names as provided by the site (e.g., 001.jpg, page_01.png).\n" "Manga Filename Style: Original File Name\n\n"
"Click to change to post title based naming for the first file." "When Manga/Comic Mode is active for a creator feed:\n"
"- *All* files in a post will attempt to keep their original filenames as provided by the site (e.g., \"001.jpg\", \"page_02.png\").\n"
"- This can be useful if original names are already well-structured and sequential.\n"
"- If original names are inconsistent, using \"Post Title\" style is often better.\n"
"- Example: Post \"Chapter 1: The Beginning\" with files \"001.jpg\", \"002.jpg\".\n"
" Downloads as: \"001.jpg\", \"002.jpg\".\n\n"
"Click to change to: Post Title"
) )
else: else:
self.manga_rename_toggle_button.setText("Name: Unknown Style") self.manga_rename_toggle_button.setText("Name: Unknown Style")
self.manga_rename_toggle_button.setToolTip("Manga filename style is in an unknown state.") self.manga_rename_toggle_button.setToolTip(
"Manga Filename Style: Unknown\n\n"
"The manga filename style is in an unknown state. Please cycle or reset.\n\n"
"Click to change to: Post Title"
)
def _toggle_manga_filename_style(self): def _toggle_manga_filename_style(self):
@@ -1816,11 +1885,6 @@ class DownloaderApp(QWidget):
# Visible if manga mode is on AND not in "Only Links" or "Only Archives" mode # Visible if manga mode is on AND not in "Only Links" or "Only Archives" mode
self.manga_rename_toggle_button.setVisible(manga_mode_effectively_on and not (is_only_links_mode or is_only_archives_mode)) self.manga_rename_toggle_button.setVisible(manga_mode_effectively_on and not (is_only_links_mode or is_only_archives_mode))
if hasattr(self, 'duplicate_mode_toggle_button'):
# Visible if manga mode is OFF AND not in "Only Links" or "Only Archives" mode
self.duplicate_mode_toggle_button.setVisible(
not manga_mode_effectively_on and not (is_only_links_mode or is_only_archives_mode)
)
if manga_mode_effectively_on: if manga_mode_effectively_on:
if self.page_range_label: self.page_range_label.setEnabled(False) if self.page_range_label: self.page_range_label.setEnabled(False)
@@ -1909,12 +1973,11 @@ class DownloaderApp(QWidget):
raw_skip_words = self.skip_words_input.text().strip() raw_skip_words = self.skip_words_input.text().strip()
skip_words_list = [word.strip().lower() for word in raw_skip_words.split(',') if word.strip()] skip_words_list = [word.strip().lower() for word in raw_skip_words.split(',') if word.strip()]
current_skip_words_scope = self.get_skip_words_scope()
raw_remove_filename_words = self.remove_from_filename_input.text().strip() if hasattr(self, 'remove_from_filename_input') else "" raw_remove_filename_words = self.remove_from_filename_input.text().strip() if hasattr(self, 'remove_from_filename_input') else ""
effective_duplicate_file_mode = self.duplicate_file_mode # Start with user's choice
allow_multipart = self.allow_multipart_download_setting # Use the internal setting allow_multipart = self.allow_multipart_download_setting # Use the internal setting
remove_from_filename_words_list = [word.strip() for word in raw_remove_filename_words.split(',') if word.strip()] remove_from_filename_words_list = [word.strip() for word in raw_remove_filename_words.split(',') if word.strip()]
current_skip_words_scope = self.get_skip_words_scope()
current_char_filter_scope = self.get_char_filter_scope() current_char_filter_scope = self.get_char_filter_scope()
manga_mode_is_checked = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False manga_mode_is_checked = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False
@@ -1967,10 +2030,8 @@ class DownloaderApp(QWidget):
elif manga_mode: elif manga_mode:
start_page, end_page = None, None start_page, end_page = None, None
# effective_duplicate_file_mode will be self.duplicate_file_mode (UI button's state).
# Manga Mode specific duplicate handling is now managed entirely within downloader_utils.py # Manga Mode specific duplicate handling is now managed entirely within downloader_utils.py
self.external_link_queue.clear(); self.extracted_links_cache = []; self._is_processing_external_link_queue = False; self._current_link_post_title = None self.external_link_queue.clear(); self.extracted_links_cache = []; self._is_processing_external_link_queue = False; self._current_link_post_title = None
self.all_kept_original_filenames = []
raw_character_filters_text = self.character_input.text().strip() raw_character_filters_text = self.character_input.text().strip()
@@ -2130,6 +2191,7 @@ class DownloaderApp(QWidget):
self.total_posts_to_process = 0; self.processed_posts_count = 0; self.download_counter = 0; self.skip_counter = 0 self.total_posts_to_process = 0; self.processed_posts_count = 0; self.download_counter = 0; self.skip_counter = 0
self.progress_label.setText("Progress: Initializing...") self.progress_label.setText("Progress: Initializing...")
effective_num_post_workers = 1 effective_num_post_workers = 1
effective_num_file_threads_per_worker = 1 effective_num_file_threads_per_worker = 1
@@ -2179,8 +2241,7 @@ class DownloaderApp(QWidget):
f" Skip Words Scope: {current_skip_words_scope.capitalize()}", f" Skip Words Scope: {current_skip_words_scope.capitalize()}",
f" Remove Words from Filename: {', '.join(remove_from_filename_words_list) if remove_from_filename_words_list else 'None'}", f" Remove Words from Filename: {', '.join(remove_from_filename_words_list) if remove_from_filename_words_list else 'None'}",
f" Compress Images: {'Enabled' if compress_images else 'Disabled'}", f" Compress Images: {'Enabled' if compress_images else 'Disabled'}",
f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}", f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}" # Removed duplicate file handling log
f" Multi-part Download: {'Enabled' if allow_multipart else 'Disabled'}"
]) ])
else: else:
log_messages.append(f" Mode: Extracting Links Only") log_messages.append(f" Mode: Extracting Links Only")
@@ -2192,7 +2253,6 @@ class DownloaderApp(QWidget):
log_messages.append(f" ↳ Manga Filename Style: {'Post Title Based' if self.manga_filename_style == STYLE_POST_TITLE else 'Original File Name'}") log_messages.append(f" ↳ Manga Filename Style: {'Post Title Based' if self.manga_filename_style == STYLE_POST_TITLE else 'Original File Name'}")
if filter_character_list_to_pass: if filter_character_list_to_pass:
log_messages.append(f" ↳ Manga Character Filter (for naming/folder): {', '.join(item['name'] for item in filter_character_list_to_pass)}") log_messages.append(f" ↳ Manga Character Filter (for naming/folder): {', '.join(item['name'] for item in filter_character_list_to_pass)}")
log_messages.append(f" ↳ Char Filter Scope (Manga): {current_char_filter_scope.capitalize()}")
log_messages.append(f" ↳ Manga Duplicates: Will be renamed with numeric suffix if names clash (e.g., _1, _2).") log_messages.append(f" ↳ Manga Duplicates: Will be renamed with numeric suffix if names clash (e.g., _1, _2).")
should_use_multithreading_for_posts = use_multithreading_enabled_by_checkbox and not post_id_from_url should_use_multithreading_for_posts = use_multithreading_enabled_by_checkbox and not post_id_from_url
@@ -2242,8 +2302,8 @@ class DownloaderApp(QWidget):
'signals': self.worker_signals, 'signals': self.worker_signals,
'manga_filename_style': self.manga_filename_style, 'manga_filename_style': self.manga_filename_style,
'num_file_threads_for_worker': effective_num_file_threads_per_worker, 'num_file_threads_for_worker': effective_num_file_threads_per_worker,
'allow_multipart_download': allow_multipart, # Corrected from previous thought 'allow_multipart_download': allow_multipart,
'duplicate_file_mode': effective_duplicate_file_mode # Pass the potentially overridden mode # 'duplicate_file_mode' and session-wide tracking removed
} }
try: try:
@@ -2258,13 +2318,11 @@ class DownloaderApp(QWidget):
'use_subfolders', 'use_post_subfolders', 'custom_folder_name', 'use_subfolders', 'use_post_subfolders', 'custom_folder_name',
'compress_images', 'download_thumbnails', 'service', 'user_id', 'compress_images', 'download_thumbnails', 'service', 'user_id',
'downloaded_files', 'downloaded_file_hashes', 'remove_from_filename_words_list', 'downloaded_files', 'downloaded_file_hashes', 'remove_from_filename_words_list',
'downloaded_files_lock', 'downloaded_file_hashes_lock', 'downloaded_files_lock', 'downloaded_file_hashes_lock',
'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'skip_words_list', 'skip_words_scope', 'char_filter_scope',
'show_external_links', 'extract_links_only', 'show_external_links', 'extract_links_only', 'num_file_threads_for_worker',
'num_file_threads_for_worker', 'start_page', 'end_page', 'target_post_id_from_initial_url', 'duplicate_file_mode',
'skip_current_file_flag', 'manga_mode_active', 'unwanted_keywords', 'manga_filename_style',
'start_page', 'end_page', 'target_post_id_from_initial_url',
'manga_mode_active', 'unwanted_keywords', 'manga_filename_style', 'duplicate_file_mode',
'allow_multipart_download' 'allow_multipart_download'
] ]
args_template['skip_current_file_flag'] = None args_template['skip_current_file_flag'] = None
@@ -2385,18 +2443,17 @@ class DownloaderApp(QWidget):
'downloaded_files_lock', 'downloaded_file_hashes_lock', 'remove_from_filename_words_list', 'downloaded_files_lock', 'downloaded_file_hashes_lock', 'remove_from_filename_words_list',
'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'skip_words_list', 'skip_words_scope', 'char_filter_scope',
'show_external_links', 'extract_links_only', 'allow_multipart_download', 'show_external_links', 'extract_links_only', 'allow_multipart_download',
'num_file_threads', 'num_file_threads', 'skip_current_file_flag',
'skip_current_file_flag',
'manga_mode_active', 'manga_filename_style' 'manga_mode_active', 'manga_filename_style'
] ]
# Ensure 'allow_multipart_download' is also considered for optional keys if it has a default in PostProcessorWorker # Ensure 'allow_multipart_download' is also considered for optional keys if it has a default in PostProcessorWorker
ppw_optional_keys_with_defaults = { ppw_optional_keys_with_defaults = {
'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'remove_from_filename_words_list', 'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'remove_from_filename_words_list',
'show_external_links', 'extract_links_only', 'show_external_links', 'extract_links_only', 'duplicate_file_mode', # Added duplicate_file_mode here
'num_file_threads', 'skip_current_file_flag', 'manga_mode_active', 'manga_filename_style' 'num_file_threads', 'skip_current_file_flag', 'manga_mode_active', 'manga_filename_style',
'processed_base_filenames_session_wide', 'processed_base_filenames_session_wide_lock' # Add these
} }
for post_data_item in all_posts_data: for post_data_item in all_posts_data:
if self.cancellation_event.is_set(): break if self.cancellation_event.is_set(): break
if not isinstance(post_data_item, dict): if not isinstance(post_data_item, dict):
@@ -2464,12 +2521,10 @@ class DownloaderApp(QWidget):
widgets_to_toggle = [ self.download_btn, self.link_input, self.radio_all, self.radio_images, self.radio_videos, self.radio_only_links, widgets_to_toggle = [ self.download_btn, self.link_input, self.radio_all, self.radio_images, self.radio_videos, self.radio_only_links,
self.skip_zip_checkbox, self.skip_rar_checkbox, self.use_subfolders_checkbox, self.compress_images_checkbox, self.skip_zip_checkbox, self.skip_rar_checkbox, self.use_subfolders_checkbox, self.compress_images_checkbox,
self.download_thumbnails_checkbox, self.use_multithreading_checkbox, self.skip_words_input, self.character_search_input, self.download_thumbnails_checkbox, self.use_multithreading_checkbox, self.skip_words_input, self.character_search_input,
self.new_char_input, self.add_char_button, self.delete_char_button, self.new_char_input, self.add_char_button, self.delete_char_button, self.char_filter_scope_toggle_button, # duplicate_file_mode_toggle_button removed
self.char_filter_scope_toggle_button, self.start_page_input, self.end_page_input, self.page_range_label, self.to_label,
self.start_page_input, self.end_page_input, self.character_input, self.custom_folder_input, self.custom_folder_label, self.remove_from_filename_input,
self.page_range_label, self.to_label, self.character_input, self.custom_folder_input, self.custom_folder_label, self.remove_from_filename_input, self.reset_button, self.manga_mode_checkbox, self.manga_rename_toggle_button, self.multipart_toggle_button, self.skip_scope_toggle_button
self.reset_button, self.manga_mode_checkbox, self.manga_rename_toggle_button, self.multipart_toggle_button,
self.skip_scope_toggle_button
] ]
for widget in widgets_to_toggle: for widget in widgets_to_toggle:
@@ -2663,15 +2718,10 @@ class DownloaderApp(QWidget):
self.settings.setValue(SKIP_WORDS_SCOPE_KEY, self.skip_words_scope) self.settings.setValue(SKIP_WORDS_SCOPE_KEY, self.skip_words_scope)
self._update_skip_scope_button_text() self._update_skip_scope_button_text()
self.char_filter_scope = CHAR_SCOPE_TITLE self.char_filter_scope = CHAR_SCOPE_FILES # Default to Files on full reset
self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope) self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope)
self._update_char_filter_scope_button_text() self._update_char_filter_scope_button_text()
self.duplicate_file_mode = DUPLICATE_MODE_DELETE # Reset to default (Delete)
self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode)
self._update_duplicate_mode_button_text()
self.settings.sync() self.settings.sync()
self._update_manga_filename_style_button_text() self._update_manga_filename_style_button_text()
self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False) self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)
@@ -2693,12 +2743,8 @@ class DownloaderApp(QWidget):
self.skip_words_scope = SKIP_SCOPE_POSTS self.skip_words_scope = SKIP_SCOPE_POSTS
self._update_skip_scope_button_text() self._update_skip_scope_button_text()
self.char_filter_scope = CHAR_SCOPE_TITLE self.char_filter_scope = CHAR_SCOPE_FILES # Default to Files
self._update_char_filter_scope_button_text() self._update_char_filter_scope_button_text()
self.duplicate_file_mode = DUPLICATE_MODE_DELETE # Default to DELETE
self._update_duplicate_mode_button_text()
self._handle_filter_mode_change(self.radio_all, True) self._handle_filter_mode_change(self.radio_all, True)
self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked()) self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked())
self.filter_character_list("") self.filter_character_list("")
@@ -2728,6 +2774,26 @@ class DownloaderApp(QWidget):
if hasattr(self, 'multipart_toggle_button'): if hasattr(self, 'multipart_toggle_button'):
text = "Multi-part: ON" if self.allow_multipart_download_setting else "Multi-part: OFF" text = "Multi-part: ON" if self.allow_multipart_download_setting else "Multi-part: OFF"
self.multipart_toggle_button.setText(text) self.multipart_toggle_button.setText(text)
if self.allow_multipart_download_setting:
self.multipart_toggle_button.setToolTip(
"Multi-part Download: ON\n\n"
"Enables downloading large files in multiple segments (parts) simultaneously.\n"
"- Can significantly speed up downloads for *single large files* (e.g., videos, large archives) if the server supports it.\n"
"- May increase CPU/network usage.\n"
"- For creator feeds with many *small files* (e.g., images), this might not offer speed benefits and could make the UI/log feel busy.\n"
"- If a multi-part download fails for a file, it will automatically retry with a single stream.\n"
"- Example: A 500MB video might be downloaded in 5 parts of 100MB each, concurrently.\n\n"
"Click to turn OFF (use single-stream for all files)."
)
else:
self.multipart_toggle_button.setToolTip(
"Multi-part Download: OFF\n\n"
"All files will be downloaded using a single connection (stream).\n"
"- This is generally stable and works well for most scenarios, especially for feeds with many smaller files.\n"
"- Large files will be downloaded sequentially in one go.\n"
"- Example: A 500MB video will be downloaded as one continuous stream.\n\n"
"Click to turn ON (enable multi-part for large files, see advisory on click)."
)
def _toggle_multipart_mode(self): def _toggle_multipart_mode(self):
# If currently OFF, and user is trying to turn it ON # If currently OFF, and user is trying to turn it ON
@@ -2762,23 +2828,6 @@ class DownloaderApp(QWidget):
self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting) self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting)
self.log_signal.emit(f" Multi-part download set to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'}") self.log_signal.emit(f" Multi-part download set to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'}")
def _update_duplicate_mode_button_text(self):
if hasattr(self, 'duplicate_mode_toggle_button'):
if self.duplicate_file_mode == DUPLICATE_MODE_DELETE:
self.duplicate_mode_toggle_button.setText("Duplicates: Delete")
elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER:
self.duplicate_mode_toggle_button.setText("Duplicates: Move")
else: # Should not happen
self.duplicate_mode_toggle_button.setText("Duplicates: Move") # Default to Move if unknown
def _cycle_duplicate_mode(self):
if self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER:
self.duplicate_file_mode = DUPLICATE_MODE_DELETE
else: # If it's DELETE or unknown, cycle back to MOVE
self.duplicate_file_mode = DUPLICATE_MODE_MOVE_TO_SUBFOLDER
self._update_duplicate_mode_button_text()
self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode)
self.log_signal.emit(f" Duplicate file handling mode changed to: '{self.duplicate_file_mode.capitalize()}'")
if __name__ == '__main__': if __name__ == '__main__':
import traceback import traceback