This commit is contained in:
Yuvi9587
2025-05-14 16:26:18 +05:30
parent 25d33f1531
commit b5e9080285
3 changed files with 469 additions and 279 deletions

View File

@@ -1,8 +0,0 @@
Boa Hancock
Hairy D.va
Mercy
Misc
Nami
Robin
Sombra
Yamato

View File

@@ -38,10 +38,7 @@ SKIP_SCOPE_BOTH = "both"
CHAR_SCOPE_TITLE = "title"
CHAR_SCOPE_FILES = "files"
CHAR_SCOPE_BOTH = "both"
# DUPLICATE_MODE_RENAME is removed. Renaming only happens within a target folder if needed.
DUPLICATE_MODE_DELETE = "delete"
DUPLICATE_MODE_MOVE_TO_SUBFOLDER = "move"
CHAR_SCOPE_COMMENTS = "comments"
fastapi_app = None
KNOWN_NAMES = []
@@ -99,6 +96,15 @@ def clean_filename(name):
cleaned = re.sub(r'\s+', '_', cleaned)
return cleaned if cleaned else "untitled_file"
def strip_html_tags(html_text):
if not html_text: return ""
# First, unescape HTML entities
text = html.unescape(html_text)
# Then, remove HTML tags using a simple regex
# This is a basic approach and might not handle all complex HTML perfectly
clean_pattern = re.compile('<.*?>')
cleaned_text = re.sub(clean_pattern, '', text)
return cleaned_text.strip()
def extract_folder_name_from_title(title, unwanted_keywords):
if not title: return 'Uncategorized'
@@ -221,6 +227,31 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
except Exception as e:
raise RuntimeError(f"Unexpected error fetching offset {offset} ({paginated_url}): {e}")
def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None):
if cancellation_event and cancellation_event.is_set():
logger(" Comment fetch cancelled before request.")
raise RuntimeError("Comment fetch operation cancelled by user.")
comments_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}/comments"
logger(f" Fetching comments: {comments_api_url}")
try:
response = requests.get(comments_api_url, headers=headers, timeout=(10, 30)) # Shorter timeout for comments
response.raise_for_status()
if 'application/json' not in response.headers.get('Content-Type', '').lower():
logger(f"⚠️ Unexpected content type from comments API: {response.headers.get('Content-Type')}. Body: {response.text[:200]}")
return [] # Return empty list if not JSON
return response.json()
except requests.exceptions.Timeout:
raise RuntimeError(f"Timeout fetching comments for post {post_id} from {comments_api_url}")
except requests.exceptions.RequestException as e:
err_msg = f"Error fetching comments for post {post_id} from {comments_api_url}: {e}"
if e.response is not None:
err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})"
raise RuntimeError(err_msg)
except ValueError as e: # JSONDecodeError inherits from ValueError
raise RuntimeError(f"Error decoding JSON from comments API for post {post_id} ({comments_api_url}): {e}. Response text: {response.text[:200]}")
except Exception as e:
raise RuntimeError(f"Unexpected error fetching comments for post {post_id} ({comments_api_url}): {e}")
def download_from_api(api_url_input, logger=print, start_page=None, end_page=None, manga_mode=False, cancellation_event=None):
headers = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}
@@ -412,7 +443,7 @@ class PostProcessorWorker:
char_filter_scope=CHAR_SCOPE_FILES,
remove_from_filename_words_list=None,
allow_multipart_download=True,
duplicate_file_mode=DUPLICATE_MODE_DELETE):
): # Removed duplicate_file_mode and session-wide tracking
self.post = post_data
self.download_root = download_root
self.known_names = known_names
@@ -450,7 +481,7 @@ class PostProcessorWorker:
self.char_filter_scope = char_filter_scope
self.remove_from_filename_words_list = remove_from_filename_words_list if remove_from_filename_words_list is not None else []
self.allow_multipart_download = allow_multipart_download
self.duplicate_file_mode = duplicate_file_mode # This will be the effective mode (possibly overridden by main.py for manga)
# self.duplicate_file_mode and session-wide tracking removed
if self.compress_images and Image is None:
self.logger("⚠️ Image compression disabled: Pillow library not found.")
@@ -469,10 +500,7 @@ class PostProcessorWorker:
post_title="", file_index_in_post=0, num_files_in_this_post=1):
was_original_name_kept_flag = False
final_filename_saved_for_return = ""
# current_target_folder_path is the actual folder where the file will be saved.
# It starts as the main character/post folder (target_folder_path) by default.
current_target_folder_path = target_folder_path
# target_folder_path is the base character/post folder.
if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1, "", False
@@ -561,44 +589,29 @@ class PostProcessorWorker:
self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR).")
return 0, 1, api_original_filename, False
# --- Pre-Download Duplicate Handling (Standard Mode Only - Manga mode has its own suffixing) ---
if not self.manga_mode_active:
# --- Pre-Download Duplicate Handling (Standard Mode Only) ---
is_duplicate_for_main_folder_by_path = os.path.exists(os.path.join(target_folder_path, filename_to_save_in_main_path)) and \
os.path.getsize(os.path.join(target_folder_path, filename_to_save_in_main_path)) > 0
path_in_main_folder_check = os.path.join(target_folder_path, filename_to_save_in_main_path)
is_duplicate_by_path = os.path.exists(path_in_main_folder_check) and \
os.path.getsize(path_in_main_folder_check) > 0
is_duplicate_for_main_folder_by_session_name = False
is_duplicate_by_session_name = False
with self.downloaded_files_lock:
if filename_to_save_in_main_path in self.downloaded_files:
is_duplicate_for_main_folder_by_session_name = True
is_duplicate_by_session_name = True
if is_duplicate_for_main_folder_by_path or is_duplicate_for_main_folder_by_session_name:
if self.duplicate_file_mode == DUPLICATE_MODE_DELETE:
reason = "Path Exists" if is_duplicate_for_main_folder_by_path else "Session Name"
self.logger(f" -> Delete Duplicate ({reason}): '{filename_to_save_in_main_path}'. Skipping download.")
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER:
reason = "Path Exists" if is_duplicate_for_main_folder_by_path else "Session Name"
self.logger(f" -> Pre-DL Move ({reason}): '{filename_to_save_in_main_path}'. Will target 'Duplicate' subfolder.")
current_target_folder_path = os.path.join(target_folder_path, "Duplicate")
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
if is_duplicate_by_path or is_duplicate_by_session_name:
reason = "Path Exists" if is_duplicate_by_path else "Session Name"
self.logger(f" -> Skip Duplicate ({reason}, Pre-DL): '{filename_to_save_in_main_path}'. Skipping download.")
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Mark as processed
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
# Ensure base target folder exists (used for .part file with multipart)
try:
os.makedirs(current_target_folder_path, exist_ok=True)
os.makedirs(target_folder_path, exist_ok=True) # For .part file
except OSError as e:
self.logger(f" ❌ Critical error creating directory '{current_target_folder_path}': {e}. Skipping file '{api_original_filename}'.")
self.logger(f" ❌ Critical error creating directory '{target_folder_path}': {e}. Skipping file '{api_original_filename}'.")
return 0, 1, api_original_filename, False
# If mode is MOVE (and not manga mode), and current_target_folder_path is now "Duplicate",
# check if the file *already* exists by its base name in this "Duplicate" folder. (Standard Mode Only)
if not self.manga_mode_active and \
self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER and \
"Duplicate" in current_target_folder_path.split(os.sep) and \
os.path.exists(os.path.join(current_target_folder_path, filename_to_save_in_main_path)):
self.logger(f" -> File '{filename_to_save_in_main_path}' already exists in '{os.path.basename(current_target_folder_path)}' subfolder. Skipping download.")
# The name was already added to downloaded_files if it was a pre-DL move.
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
# --- Download Attempt ---
max_retries = 3
@@ -633,9 +646,10 @@ class PostProcessorWorker:
if self.signals and hasattr(self.signals, 'file_download_status_signal'):
self.signals.file_download_status_signal.emit(False)
mp_save_path_base = os.path.join(current_target_folder_path, filename_to_save_in_main_path)
# .part file is always based on the main target_folder_path and filename_to_save_in_main_path
mp_save_path_base_for_part = os.path.join(target_folder_path, filename_to_save_in_main_path)
mp_success, mp_bytes, mp_hash, mp_file_handle = download_file_in_parts(
file_url, mp_save_path_base, total_size_bytes, num_parts_for_file, headers,
file_url, mp_save_path_base_for_part, total_size_bytes, num_parts_for_file, headers,
api_original_filename, self.signals, self.cancellation_event, skip_event, self.logger
)
if mp_success:
@@ -705,130 +719,132 @@ class PostProcessorWorker:
if file_content_bytes: file_content_bytes.close()
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
if not self.manga_mode_active:
# --- Post-Download Hash Check (Standard Mode Only) ---
with self.downloaded_file_hashes_lock:
if calculated_file_hash in self.downloaded_file_hashes:
if self.duplicate_file_mode == DUPLICATE_MODE_DELETE:
self.logger(f" -> Delete Duplicate (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...). Skipping save.")
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
if file_content_bytes: file_content_bytes.close()
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER:
self.logger(f" -> Post-DL Move (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...). Content already downloaded.")
if "Duplicate" not in current_target_folder_path.split(os.sep):
current_target_folder_path = os.path.join(target_folder_path, "Duplicate")
self.logger(f" Redirecting to 'Duplicate' subfolder: '{current_target_folder_path}'")
# Ensure "Duplicate" folder exists if this is a new redirection due to hash
try: os.makedirs(current_target_folder_path, exist_ok=True)
except OSError as e_mkdir_hash: self.logger(f" Error creating Duplicate folder for hash collision: {e_mkdir_hash}")
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
# --- Final Filename Determination for Saving ---
filename_for_actual_save = filename_to_save_in_main_path
# --- Universal Post-Download Hash Check ---
with self.downloaded_file_hashes_lock:
if calculated_file_hash in self.downloaded_file_hashes:
self.logger(f" -> Skip Saving Duplicate (Hash Match): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...).")
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Mark logical name
if file_content_bytes: file_content_bytes.close()
# If it was a multipart download, its .part file needs cleanup
if not isinstance(file_content_bytes, BytesIO): # Indicates multipart download
part_file_to_remove = os.path.join(target_folder_path, filename_to_save_in_main_path + ".part")
if os.path.exists(part_file_to_remove):
try: os.remove(part_file_to_remove);
except OSError: self.logger(f" -> Failed to remove .part file for hash duplicate: {part_file_to_remove}")
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
# If mode is MOVE (and not manga mode) and the file is destined for the main folder,
# but a file with that name *now* exists (e.g. race condition, or different file with same name not caught by hash),
# reroute it to the "Duplicate" folder.
if not self.manga_mode_active and \
self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER and \
current_target_folder_path == target_folder_path and \
os.path.exists(os.path.join(current_target_folder_path, filename_for_actual_save)):
self.logger(f" -> Post-DL Move (Late Name Collision in Main): '{filename_for_actual_save}'. Moving to 'Duplicate'.")
current_target_folder_path = os.path.join(target_folder_path, "Duplicate")
try: # Ensure "Duplicate" folder exists if this is a new redirection
os.makedirs(current_target_folder_path, exist_ok=True)
except OSError as e_mkdir: self.logger(f" Error creating Duplicate folder during late move: {e_mkdir}")
# The name filename_to_save_in_main_path was already added to downloaded_files if it was a pre-DL name collision.
# If it was a hash collision that got rerouted, it was also added.
# If this is a new reroute due to late name collision, ensure it's marked.
# --- Determine Save Location and Final Filename ---
effective_save_folder = target_folder_path # Default: main character/post folder
# filename_to_save_in_main_path is the logical name after cleaning, manga styling, word removal
filename_after_styling_and_word_removal = filename_to_save_in_main_path
# "Move" logic and "Duplicate" subfolder logic removed.
# effective_save_folder will always be target_folder_path.
try: # Ensure the chosen save folder (main or Duplicate) exists
os.makedirs(effective_save_folder, exist_ok=True)
except OSError as e:
self.logger(f" ❌ Critical error creating directory '{effective_save_folder}': {e}. Skipping file '{api_original_filename}'.")
if file_content_bytes: file_content_bytes.close()
# Cleanup .part file if multipart
if not isinstance(file_content_bytes, BytesIO):
part_file_to_remove = os.path.join(target_folder_path, filename_to_save_in_main_path + ".part")
if os.path.exists(part_file_to_remove): os.remove(part_file_to_remove)
return 0, 1, api_original_filename, False
# --- Image Compression ---
# This operates on file_content_bytes (which is BytesIO or a file handle from multipart)
# It might change filename_after_styling_and_word_removal's extension (e.g., .jpg to .webp)
# and returns new data_to_write_after_compression (BytesIO) or original file_content_bytes.
data_to_write_after_compression = file_content_bytes
filename_after_compression = filename_after_styling_and_word_removal
# Apply numeric suffix renaming (_1, _2) *only if needed within the current_target_folder_path*
# This means:
# - If current_target_folder_path is the main folder (and not MOVE mode, or MOVE mode but file was unique):
# Renaming happens if a file with filename_for_actual_save exists there.
# - If current_target_folder_path is "Duplicate" (because of MOVE mode):
# Renaming happens if filename_for_actual_save exists *within "Duplicate"*.
counter = 1
base_name_final_coll, ext_final_coll = os.path.splitext(filename_for_actual_save)
temp_filename_final_check = filename_for_actual_save
while os.path.exists(os.path.join(current_target_folder_path, temp_filename_final_check)):
temp_filename_final_check = f"{base_name_final_coll}_{counter}{ext_final_coll}"
counter += 1
if temp_filename_final_check != filename_for_actual_save:
self.logger(f" Final rename for target folder '{os.path.basename(current_target_folder_path)}': '{temp_filename_final_check}' (was '{filename_for_actual_save}')")
filename_for_actual_save = temp_filename_final_check
bytes_to_write = file_content_bytes
final_filename_after_processing = filename_for_actual_save
current_save_path_final = os.path.join(current_target_folder_path, final_filename_after_processing)
is_img_for_compress_check = is_image(api_original_filename)
if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024):
self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024*1024):.2f} MB)...")
try:
bytes_to_write.seek(0)
with Image.open(bytes_to_write) as img_obj:
file_content_bytes.seek(0)
with Image.open(file_content_bytes) as img_obj:
if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA')
elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB')
compressed_bytes_io = BytesIO()
img_obj.save(compressed_bytes_io, format='WebP', quality=80, method=4)
compressed_size = compressed_bytes_io.getbuffer().nbytes
if compressed_size < downloaded_size_bytes * 0.9:
if compressed_size < downloaded_size_bytes * 0.9: # If significantly smaller
self.logger(f" Compression success: {compressed_size / (1024*1024):.2f} MB.")
if hasattr(bytes_to_write, 'close'): bytes_to_write.close()
original_part_file_path = os.path.join(current_target_folder_path, filename_to_save_in_main_path) + ".part" # Use original base for .part
if os.path.exists(original_part_file_path):
os.remove(original_part_file_path)
bytes_to_write = compressed_bytes_io; bytes_to_write.seek(0)
base_name_orig, _ = os.path.splitext(filename_for_actual_save)
final_filename_after_processing = base_name_orig + '.webp'
current_save_path_final = os.path.join(current_target_folder_path, final_filename_after_processing)
self.logger(f" Updated filename (compressed): {final_filename_after_processing}")
data_to_write_after_compression = compressed_bytes_io; data_to_write_after_compression.seek(0)
base_name_orig, _ = os.path.splitext(filename_after_compression)
filename_after_compression = base_name_orig + '.webp'
self.logger(f" Updated filename (compressed): {filename_after_compression}")
else:
self.logger(f" Compression skipped: WebP not significantly smaller."); bytes_to_write.seek(0)
self.logger(f" Compression skipped: WebP not significantly smaller."); file_content_bytes.seek(0) # Reset original stream
data_to_write_after_compression = file_content_bytes # Use original
except Exception as comp_e:
self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); bytes_to_write.seek(0)
self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); file_content_bytes.seek(0)
data_to_write_after_compression = file_content_bytes # Use original
if final_filename_after_processing != filename_for_actual_save and \
os.path.exists(current_save_path_final) and os.path.getsize(current_save_path_final) > 0:
self.logger(f" -> Exists (Path - Post-Compress): '{final_filename_after_processing}' in '{os.path.basename(current_target_folder_path)}'.")
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
if bytes_to_write and hasattr(bytes_to_write, 'close'): bytes_to_write.close()
return 0, 1, final_filename_after_processing, was_original_name_kept_flag
# --- Final Numeric Suffixing in the effective_save_folder ---
final_filename_on_disk = filename_after_compression # This is the name after potential compression
temp_base, temp_ext = os.path.splitext(final_filename_on_disk)
suffix_counter = 1
while os.path.exists(os.path.join(effective_save_folder, final_filename_on_disk)):
final_filename_on_disk = f"{temp_base}_{suffix_counter}{temp_ext}"
suffix_counter += 1
if final_filename_on_disk != filename_after_compression:
self.logger(f" Applied numeric suffix in '{os.path.basename(effective_save_folder)}': '{final_filename_on_disk}' (was '{filename_after_compression}')")
# --- Save File ---
final_save_path = os.path.join(effective_save_folder, final_filename_on_disk)
try:
os.makedirs(current_target_folder_path, exist_ok=True)
# data_to_write_after_compression is BytesIO (single stream, or compressed multipart)
# OR it's the original file_content_bytes (which is a file handle if uncompressed multipart)
if isinstance(bytes_to_write, BytesIO):
with open(current_save_path_final, 'wb') as f_out:
f_out.write(bytes_to_write.getvalue())
else:
if hasattr(bytes_to_write, 'close'): bytes_to_write.close()
source_part_file = os.path.join(current_target_folder_path, filename_to_save_in_main_path) + ".part" # Use original base for .part
os.rename(source_part_file, current_save_path_final)
if data_to_write_after_compression is file_content_bytes and not isinstance(file_content_bytes, BytesIO):
# This means uncompressed multipart download. Original .part file handle is file_content_bytes.
# The .part file is at target_folder_path/filename_to_save_in_main_path.part
original_part_file_actual_path = file_content_bytes.name
file_content_bytes.close() # Close handle first
os.rename(original_part_file_actual_path, final_save_path)
self.logger(f" Renamed .part file to final: {final_save_path}")
else: # Single stream download, or compressed multipart. Write from BytesIO.
with open(final_save_path, 'wb') as f_out:
f_out.write(data_to_write_after_compression.getvalue())
# If original was multipart and then compressed, clean up original .part file
if data_to_write_after_compression is not file_content_bytes and not isinstance(file_content_bytes, BytesIO):
original_part_file_actual_path = file_content_bytes.name
file_content_bytes.close()
if os.path.exists(original_part_file_actual_path):
try: os.remove(original_part_file_actual_path)
except OSError as e_rem: self.logger(f" -> Failed to remove .part after compression: {e_rem}")
with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash)
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Track by logical name
final_filename_saved_for_return = final_filename_after_processing
self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{os.path.basename(current_target_folder_path)}'")
final_filename_saved_for_return = final_filename_on_disk
self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{os.path.basename(effective_save_folder)}'")
# Session-wide base name tracking removed.
time.sleep(0.05)
return 1, 0, final_filename_saved_for_return, was_original_name_kept_flag
except Exception as save_err:
self.logger(f"❌ Save Fail for '{final_filename_after_processing}': {save_err}")
if os.path.exists(current_save_path_final):
try: os.remove(current_save_path_final);
except OSError: self.logger(f" -> Failed to remove partially saved file: {current_save_path_final}")
self.logger(f"❌ Save Fail for '{final_filename_on_disk}': {save_err}")
if os.path.exists(final_save_path):
try: os.remove(final_save_path);
except OSError: self.logger(f" -> Failed to remove partially saved file: {final_save_path}")
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag
finally:
if bytes_to_write and hasattr(bytes_to_write, 'close'):
bytes_to_write.close()
# Ensure all handles are closed
if data_to_write_after_compression and hasattr(data_to_write_after_compression, 'close'):
data_to_write_after_compression.close()
# If original file_content_bytes was a different handle (e.g. multipart before compression) and not closed yet
if file_content_bytes and file_content_bytes is not data_to_write_after_compression and hasattr(file_content_bytes, 'close'):
try:
if not file_content_bytes.closed: # Check if already closed
file_content_bytes.close()
except Exception: pass # Ignore errors on close if already handled
def process(self):
@@ -858,36 +874,140 @@ class PostProcessorWorker:
post_is_candidate_by_title_char_match = False
char_filter_that_matched_title = None
post_is_candidate_by_comment_char_match = False
# New variables for CHAR_SCOPE_COMMENTS file-first logic
post_is_candidate_by_file_char_match_in_comment_scope = False
char_filter_that_matched_file_in_comment_scope = None
char_filter_that_matched_comment = None
if self.filter_character_list_objects and \
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH):
self.logger(f" [Debug Title Match] Checking post title '{post_title}' against {len(self.filter_character_list_objects)} filter objects. Scope: {self.char_filter_scope}")
# self.logger(f" [Debug Title Match] Checking post title '{post_title}' against {len(self.filter_character_list_objects)} filter objects. Scope: {self.char_filter_scope}")
for idx, filter_item_obj in enumerate(self.filter_character_list_objects):
self.logger(f" [Debug Title Match] Filter obj #{idx}: {filter_item_obj}")
if self.check_cancel(): break
# self.logger(f" [Debug Title Match] Filter obj #{idx}: {filter_item_obj}")
terms_to_check_for_title = list(filter_item_obj["aliases"])
if filter_item_obj["is_group"]:
if filter_item_obj["name"] not in terms_to_check_for_title:
terms_to_check_for_title.append(filter_item_obj["name"])
unique_terms_for_title_check = list(set(terms_to_check_for_title))
self.logger(f" [Debug Title Match] Unique terms for this filter obj: {unique_terms_for_title_check}")
# self.logger(f" [Debug Title Match] Unique terms for this filter obj: {unique_terms_for_title_check}")
for term_to_match in unique_terms_for_title_check:
self.logger(f" [Debug Title Match] Checking term: '{term_to_match}'")
# self.logger(f" [Debug Title Match] Checking term: '{term_to_match}'")
match_found_for_term = is_title_match_for_character(post_title, term_to_match)
self.logger(f" [Debug Title Match] Result for '{term_to_match}': {match_found_for_term}")
# self.logger(f" [Debug Title Match] Result for '{term_to_match}': {match_found_for_term}")
if match_found_for_term:
post_is_candidate_by_title_char_match = True
char_filter_that_matched_title = filter_item_obj
self.logger(f" Post title matches char filter term '{term_to_match}' (from group/name '{filter_item_obj['name']}', Scope: {self.char_filter_scope}). Post is candidate.")
break
if post_is_candidate_by_title_char_match: break
self.logger(f" [Debug Title Match] Final post_is_candidate_by_title_char_match: {post_is_candidate_by_title_char_match}")
# self.logger(f" [Debug Title Match] Final post_is_candidate_by_title_char_match: {post_is_candidate_by_title_char_match}")
if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match:
self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.")
return 0, num_potential_files_in_post, []
# --- Populate all_files_from_post_api before character filter logic that needs it ---
# This is needed for the file-first check in CHAR_SCOPE_COMMENTS
all_files_from_post_api_for_char_check = []
api_file_domain_for_char_check = urlparse(self.api_url_input).netloc
if not api_file_domain_for_char_check or not any(d in api_file_domain_for_char_check.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']):
api_file_domain_for_char_check = "kemono.su" if "kemono" in self.service.lower() else "coomer.party"
if post_main_file_info and isinstance(post_main_file_info, dict) and post_main_file_info.get('path'):
original_api_name = post_main_file_info.get('name') or os.path.basename(post_main_file_info['path'].lstrip('/'))
if original_api_name:
all_files_from_post_api_for_char_check.append({'_original_name_for_log': original_api_name})
for att_info in post_attachments:
if isinstance(att_info, dict) and att_info.get('path'):
original_api_att_name = att_info.get('name') or os.path.basename(att_info['path'].lstrip('/'))
if original_api_att_name:
all_files_from_post_api_for_char_check.append({'_original_name_for_log': original_api_att_name})
# --- End population of all_files_from_post_api_for_char_check ---
if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
self.logger(f" [Char Scope: Comments] Phase 1: Checking post files for matches before comments for post ID '{post_id}'.")
for file_info_item in all_files_from_post_api_for_char_check: # Use the pre-populated list of file names
if self.check_cancel(): break
current_api_original_filename_for_check = file_info_item.get('_original_name_for_log')
if not current_api_original_filename_for_check: continue
for filter_item_obj in self.filter_character_list_objects:
terms_to_check = list(filter_item_obj["aliases"])
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check:
terms_to_check.append(filter_item_obj["name"])
for term_to_match in terms_to_check:
if is_filename_match_for_character(current_api_original_filename_for_check, term_to_match):
post_is_candidate_by_file_char_match_in_comment_scope = True
char_filter_that_matched_file_in_comment_scope = filter_item_obj
self.logger(f" Match Found (File in Comments Scope): File '{current_api_original_filename_for_check}' matches char filter term '{term_to_match}' (from group/name '{filter_item_obj['name']}'). Post is candidate.")
break
if post_is_candidate_by_file_char_match_in_comment_scope: break
if post_is_candidate_by_file_char_match_in_comment_scope: break
self.logger(f" [Char Scope: Comments] Phase 1 Result: post_is_candidate_by_file_char_match_in_comment_scope = {post_is_candidate_by_file_char_match_in_comment_scope}")
if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
if not post_is_candidate_by_file_char_match_in_comment_scope:
self.logger(f" [Char Scope: Comments] Phase 2: No file match found. Checking post comments for post ID '{post_id}'.")
try:
parsed_input_url_for_comments = urlparse(self.api_url_input)
api_domain_for_comments = parsed_input_url_for_comments.netloc
if not any(d in api_domain_for_comments.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']):
self.logger(f"⚠️ Unrecognized domain '{api_domain_for_comments}' for comment API. Defaulting based on service.")
api_domain_for_comments = "kemono.su" if "kemono" in self.service.lower() else "coomer.party"
comments_data = fetch_post_comments(
api_domain_for_comments, self.service, self.user_id, post_id,
headers, self.logger, self.cancellation_event
)
if comments_data:
self.logger(f" Fetched {len(comments_data)} comments for post {post_id}.")
for comment_item_idx, comment_item in enumerate(comments_data):
if self.check_cancel(): break
raw_comment_content = comment_item.get('content', '')
if not raw_comment_content: continue
cleaned_comment_text = strip_html_tags(raw_comment_content)
if not cleaned_comment_text.strip(): continue
for filter_item_obj in self.filter_character_list_objects:
terms_to_check_comment = list(filter_item_obj["aliases"])
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_comment:
terms_to_check_comment.append(filter_item_obj["name"])
for term_to_match_comment in terms_to_check_comment:
if is_title_match_for_character(cleaned_comment_text, term_to_match_comment): # Re-use title matcher
post_is_candidate_by_comment_char_match = True
char_filter_that_matched_comment = filter_item_obj
self.logger(f" Match Found (Comment in Comments Scope): Comment in post {post_id} matches char filter term '{term_to_match_comment}' (from group/name '{filter_item_obj['name']}'). Post is candidate.")
self.logger(f" Matching comment (first 100 chars): '{cleaned_comment_text[:100]}...'")
break
if post_is_candidate_by_comment_char_match: break
if post_is_candidate_by_comment_char_match: break
else:
self.logger(f" No comments found or fetched for post {post_id} to check against character filters.")
except RuntimeError as e_fetch_comment:
self.logger(f" ⚠️ Error fetching or processing comments for post {post_id}: {e_fetch_comment}")
except Exception as e_generic_comment:
self.logger(f" ❌ Unexpected error during comment processing for post {post_id}: {e_generic_comment}\n{traceback.format_exc(limit=2)}")
self.logger(f" [Char Scope: Comments] Phase 2 Result: post_is_candidate_by_comment_char_match = {post_is_candidate_by_comment_char_match}")
else: # post_is_candidate_by_file_char_match_in_comment_scope was True
self.logger(f" [Char Scope: Comments] Phase 2: Skipped comment check for post ID '{post_id}' because a file match already made it a candidate.")
# --- Skip Post Logic based on Title or Comment Scope (if filters are active) ---
if self.filter_character_list_objects:
if self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match:
self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.")
return 0, num_potential_files_in_post, []
if self.char_filter_scope == CHAR_SCOPE_COMMENTS and \
not post_is_candidate_by_file_char_match_in_comment_scope and \
not post_is_candidate_by_comment_char_match: # MODIFIED: Check both file and comment match flags
self.logger(f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id}', Title '{post_title[:50]}...'")
return 0, num_potential_files_in_post, []
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
post_title_lower = post_title.lower()
for skip_word in self.skip_words_list:
@@ -907,9 +1027,26 @@ class PostProcessorWorker:
base_folder_names_for_post_content = []
if not self.extract_links_only and self.use_subfolders:
if post_is_candidate_by_title_char_match and char_filter_that_matched_title:
base_folder_names_for_post_content = [clean_folder_name(char_filter_that_matched_title["name"])]
elif not self.filter_character_list_objects:
primary_char_filter_for_folder = None
log_reason_for_folder = ""
if self.char_filter_scope == CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment:
# For CHAR_SCOPE_COMMENTS, prioritize file match for folder name if it happened
if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope:
primary_char_filter_for_folder = char_filter_that_matched_file_in_comment_scope
log_reason_for_folder = "Matched char filter in filename (Comments scope)"
elif post_is_candidate_by_comment_char_match and char_filter_that_matched_comment: # Fallback to comment match
primary_char_filter_for_folder = char_filter_that_matched_comment
log_reason_for_folder = "Matched char filter in comments (Comments scope, no file match)"
elif (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and char_filter_that_matched_title: # Existing logic for other scopes
primary_char_filter_for_folder = char_filter_that_matched_title
log_reason_for_folder = "Matched char filter in title"
# If scope is FILES, primary_char_filter_for_folder will be None here. Folder determined per file.
if primary_char_filter_for_folder:
base_folder_names_for_post_content = [clean_folder_name(primary_char_filter_for_folder["name"])]
self.logger(f" Base folder name(s) for post content ({log_reason_for_folder}): {', '.join(base_folder_names_for_post_content)}")
elif not self.filter_character_list_objects: # No char filters defined, use generic logic
derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords)
if derived_folders:
base_folder_names_for_post_content.extend(derived_folders)
@@ -917,11 +1054,10 @@ class PostProcessorWorker:
base_folder_names_for_post_content.append(extract_folder_name_from_title(post_title, self.unwanted_keywords))
if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]:
base_folder_names_for_post_content = [clean_folder_name(post_title if post_title else "untitled_creator_content")]
self.logger(f" Base folder name(s) for post content (Generic title parsing - no char filters): {', '.join(base_folder_names_for_post_content)}")
# If char filters are defined, and scope is FILES, then base_folder_names_for_post_content remains empty.
# The folder will be determined by char_filter_info_that_matched_file later.
if base_folder_names_for_post_content:
log_reason = "Matched char filter" if (post_is_candidate_by_title_char_match and char_filter_that_matched_title) else "Generic title parsing (no char filters)"
self.logger(f" Base folder name(s) for post content ({log_reason}): {', '.join(base_folder_names_for_post_content)}")
if not self.extract_links_only and self.use_subfolders and self.skip_words_list:
for folder_name_to_check in base_folder_names_for_post_content:
if not folder_name_to_check: continue
@@ -1066,19 +1202,32 @@ class PostProcessorWorker:
char_filter_info_that_matched_file = char_filter_that_matched_title
self.logger(f" File '{current_api_original_filename}' is candidate because post title matched. Scope: Both (Title part).")
else:
for filter_item_obj in self.filter_character_list_objects:
terms_to_check_for_file_both = list(filter_item_obj["aliases"])
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_for_file_both:
terms_to_check_for_file_both.append(filter_item_obj["name"])
unique_terms_for_file_both_check = list(set(terms_to_check_for_file_both))
# This part is for the "File" part of "Both" scope
for filter_item_obj_both_file in self.filter_character_list_objects:
terms_to_check_for_file_both = list(filter_item_obj_both_file["aliases"])
if filter_item_obj_both_file["is_group"] and filter_item_obj_both_file["name"] not in terms_to_check_for_file_both:
terms_to_check_for_file_both.append(filter_item_obj_both_file["name"])
# Ensure unique_terms_for_file_both_check is defined here
unique_terms_for_file_both_check = list(set(terms_to_check_for_file_both))
for term_to_match in unique_terms_for_file_both_check:
if is_filename_match_for_character(current_api_original_filename, term_to_match):
file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = filter_item_obj
char_filter_info_that_matched_file = filter_item_obj_both_file # Use the filter that matched the file
self.logger(f" File '{current_api_original_filename}' matches char filter term '{term_to_match}' (from '{filter_item_obj['name']}'). Scope: Both (File part).")
break
if file_is_candidate_by_char_filter_scope: break
elif self.char_filter_scope == CHAR_SCOPE_COMMENTS:
# If the post is a candidate (either by file or comment under this scope), then this file is also a candidate.
# The folder naming will use the filter that made the POST a candidate.
if post_is_candidate_by_file_char_match_in_comment_scope: # Post was candidate due to a file match
file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = char_filter_that_matched_file_in_comment_scope # Use the filter that matched a file in the post
self.logger(f" File '{current_api_original_filename}' is candidate because a file in this post matched char filter (Overall Scope: Comments).")
elif post_is_candidate_by_comment_char_match: # Post was candidate due to comment match (no file match for post)
file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = char_filter_that_matched_comment # Use the filter that matched comments
self.logger(f" File '{current_api_original_filename}' is candidate because post comments matched char filter (Overall Scope: Comments).")
if not file_is_candidate_by_char_filter_scope:
self.logger(f" -> Skip File (Char Filter Scope '{self.char_filter_scope}'): '{current_api_original_filename}' no match.")
@@ -1178,7 +1327,7 @@ class DownloadThread(QThread):
char_filter_scope=CHAR_SCOPE_FILES,
remove_from_filename_words_list=None,
allow_multipart_download=True,
duplicate_file_mode=DUPLICATE_MODE_DELETE): # Default to DELETE
): # Removed duplicate_file_mode and session-wide tracking
super().__init__()
self.api_url_input = api_url_input
self.output_dir = output_dir
@@ -1219,7 +1368,7 @@ class DownloadThread(QThread):
self.char_filter_scope = char_filter_scope
self.remove_from_filename_words_list = remove_from_filename_words_list
self.allow_multipart_download = allow_multipart_download
self.duplicate_file_mode = duplicate_file_mode
# self.duplicate_file_mode and session-wide tracking removed
if self.compress_images and Image is None:
self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
self.compress_images = False
@@ -1297,7 +1446,7 @@ class DownloadThread(QThread):
char_filter_scope=self.char_filter_scope,
remove_from_filename_words_list=self.remove_from_filename_words_list,
allow_multipart_download=self.allow_multipart_download,
duplicate_file_mode=self.duplicate_file_mode)
) # Removed duplicate_file_mode and session-wide tracking
try:
dl_count, skip_count, kept_originals_this_post = post_processing_worker.process()
grand_total_downloaded_files += dl_count

267
main.py
View File

@@ -48,8 +48,9 @@ try:
SKIP_SCOPE_POSTS,
SKIP_SCOPE_BOTH,
CHAR_SCOPE_TITLE, # Added for completeness if used directly
CHAR_SCOPE_FILES, # Added
CHAR_SCOPE_BOTH # Added
CHAR_SCOPE_FILES, # Ensure this is imported
CHAR_SCOPE_BOTH,
CHAR_SCOPE_COMMENTS
)
print("Successfully imported names from downloader_utils.")
except ImportError as e:
@@ -68,6 +69,7 @@ except ImportError as e:
CHAR_SCOPE_TITLE = "title"
CHAR_SCOPE_FILES = "files"
CHAR_SCOPE_BOTH = "both"
CHAR_SCOPE_COMMENTS = "comments"
except Exception as e:
print(f"--- UNEXPECTED IMPORT ERROR ---")
@@ -80,6 +82,7 @@ except Exception as e:
MAX_THREADS = 200
RECOMMENDED_MAX_THREADS = 50
MAX_FILE_THREADS_PER_POST_OR_WORKER = 10
MAX_POST_WORKERS_WHEN_COMMENT_FILTERING = 3 # New constant
HTML_PREFIX = "<!HTML!>"
@@ -92,13 +95,7 @@ SKIP_WORDS_SCOPE_KEY = "skipWordsScopeV1"
ALLOW_MULTIPART_DOWNLOAD_KEY = "allowMultipartDownloadV1"
CHAR_FILTER_SCOPE_KEY = "charFilterScopeV1"
# CHAR_SCOPE_TITLE, CHAR_SCOPE_FILES, CHAR_SCOPE_BOTH are already defined or imported
DUPLICATE_FILE_MODE_KEY = "duplicateFileModeV1"
# DUPLICATE_MODE_RENAME is removed. Renaming only happens within a target folder if needed.
DUPLICATE_MODE_DELETE = "delete"
DUPLICATE_MODE_MOVE_TO_SUBFOLDER = "move" # New mode
# CHAR_SCOPE_TITLE, CHAR_SCOPE_FILES, CHAR_SCOPE_BOTH, CHAR_SCOPE_COMMENTS are already defined or imported
# --- Tour Classes (Moved from tour.py) ---
class TourStepWidget(QWidget):
@@ -480,25 +477,21 @@ class DownloaderApp(QWidget):
self.radio_only_links = None
self.radio_only_archives = None
self.skip_scope_toggle_button = None
self.char_filter_scope_toggle_button = None
self.all_kept_original_filenames = []
self.manga_filename_style = self.settings.value(MANGA_FILENAME_STYLE_KEY, STYLE_POST_TITLE, type=str)
self.skip_words_scope = self.settings.value(SKIP_WORDS_SCOPE_KEY, SKIP_SCOPE_POSTS, type=str)
self.char_filter_scope = self.settings.value(CHAR_FILTER_SCOPE_KEY, CHAR_SCOPE_TITLE, type=str)
self.char_filter_scope = self.settings.value(CHAR_FILTER_SCOPE_KEY, CHAR_SCOPE_FILES, type=str) # Default to Files
# Always default multi-part download to OFF on launch, ignoring any saved setting.
self.allow_multipart_download_setting = False
self.duplicate_file_mode = self.settings.value(DUPLICATE_FILE_MODE_KEY, DUPLICATE_MODE_DELETE, type=str) # Default to DELETE
print(f" Known.txt will be loaded/saved at: {self.config_file}")
self.load_known_names_from_util()
self.setWindowTitle("Kemono Downloader v3.2.0")
# self.setGeometry(150, 150, 1050, 820) # Initial geometry will be set after showing
self.setStyleSheet(self.get_dark_theme())
self.init_ui()
self._connect_signals()
@@ -510,7 +503,6 @@ class DownloaderApp(QWidget):
self.log_signal.emit(f" Skip words scope loaded: '{self.skip_words_scope}'")
self.log_signal.emit(f" Character filter scope loaded: '{self.char_filter_scope}'")
self.log_signal.emit(f" Multi-part download defaults to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'} on launch")
self.log_signal.emit(f" Duplicate file handling mode loaded: '{self.duplicate_file_mode.capitalize()}'")
def _connect_signals(self):
@@ -560,7 +552,6 @@ class DownloaderApp(QWidget):
self.char_filter_scope_toggle_button.clicked.connect(self._cycle_char_filter_scope)
if hasattr(self, 'multipart_toggle_button'): self.multipart_toggle_button.clicked.connect(self._toggle_multipart_mode)
if hasattr(self, 'duplicate_mode_toggle_button'): self.duplicate_mode_toggle_button.clicked.connect(self._cycle_duplicate_mode)
def load_known_names_from_util(self):
@@ -606,7 +597,6 @@ class DownloaderApp(QWidget):
self.settings.setValue(SKIP_WORDS_SCOPE_KEY, self.skip_words_scope)
self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope)
self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting)
self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode) # Save current mode
self.settings.sync()
should_exit = True
@@ -726,17 +716,17 @@ class DownloaderApp(QWidget):
self.character_input = QLineEdit()
self.character_input.setPlaceholderText("e.g., Tifa, Aerith, (Cloud, Zack)")
self.character_input.setToolTip(
"Filter files or posts by character/series names (comma-separated).\n"
" - Normal Mode: Filters individual files by matching their filenames.\n"
" - Manga/Comic Mode: Filters entire posts by matching the post title.\n"
"Filter by character/series names (comma-separated, e.g., Tifa, Aerith).\n"
"The behavior of this filter (Files, Title, Both, or Comments) is controlled by the 'Filter: [Scope]' button next to this input.\n"
"Also used for folder naming if 'Separate Folders' is enabled.\n"
"Group aliases for a combined folder name: (alias1, alias2) -> folder 'alias1 alias2'.\n"
"Example: yor, Tifa, (Boa, Hancock)")
char_input_and_button_layout.addWidget(self.character_input, 3)
self.char_filter_scope_toggle_button = QPushButton()
# Initial text and tooltip will be set by calling _update_char_filter_scope_button_text()
# at the end of init_ui or when the scope is first set.
self._update_char_filter_scope_button_text()
self.char_filter_scope_toggle_button.setToolTip("Click to cycle character filter scope (Files -> Title -> Both)")
self.char_filter_scope_toggle_button.setStyleSheet("padding: 6px 10px;")
self.char_filter_scope_toggle_button.setMinimumWidth(100)
char_input_and_button_layout.addWidget(self.char_filter_scope_toggle_button, 1)
@@ -794,7 +784,6 @@ class DownloaderApp(QWidget):
skip_input_and_button_layout.addWidget(self.skip_words_input, 1) # Input field takes available space
self.skip_scope_toggle_button = QPushButton()
self._update_skip_scope_button_text()
self.skip_scope_toggle_button.setToolTip("Click to cycle skip scope (Files -> Posts -> Both)")
self.skip_scope_toggle_button.setStyleSheet("padding: 6px 10px;")
self.skip_scope_toggle_button.setMinimumWidth(100)
skip_input_and_button_layout.addWidget(self.skip_scope_toggle_button, 0) # Button takes its minimum
@@ -1017,38 +1006,26 @@ class DownloaderApp(QWidget):
log_title_layout.addWidget(self.link_search_button)
self.manga_rename_toggle_button = QPushButton()
# Tooltip is dynamically set by _update_manga_filename_style_button_text
self.manga_rename_toggle_button.setVisible(False)
self.manga_rename_toggle_button.setFixedWidth(140)
self.manga_rename_toggle_button.setStyleSheet("padding: 4px 8px;")
self._update_manga_filename_style_button_text()
log_title_layout.addWidget(self.manga_rename_toggle_button)
self.multipart_toggle_button = QPushButton() # Create the button
# Tooltip is dynamically set by _update_multipart_toggle_button_text
self.multipart_toggle_button = QPushButton()
self.multipart_toggle_button.setToolTip("Toggle between Multi-part and Single-stream downloads for large files.")
self.multipart_toggle_button.setFixedWidth(130) # Adjust width as needed
self.multipart_toggle_button.setStyleSheet("padding: 4px 8px;") # Added padding
self._update_multipart_toggle_button_text() # Set initial text
log_title_layout.addWidget(self.multipart_toggle_button) # Add to layout
self.duplicate_mode_toggle_button = QPushButton()
# Tooltip is dynamically set by _update_duplicate_mode_button_text
self.duplicate_mode_toggle_button.setToolTip("Toggle how duplicate filenames are handled (Rename or Delete).")
self.duplicate_mode_toggle_button.setFixedWidth(150) # Adjust width
self.duplicate_mode_toggle_button.setStyleSheet("padding: 4px 8px;") # Added padding
self._update_duplicate_mode_button_text() # Set initial text
log_title_layout.addWidget(self.duplicate_mode_toggle_button)
self.log_verbosity_button = QPushButton("Show Basic Log")
# Tooltip already exists for log_verbosity_button
self.log_verbosity_button.setToolTip("Toggle between full and basic log details.")
self.log_verbosity_button.setFixedWidth(110)
self.log_verbosity_button.setStyleSheet("padding: 4px 8px;")
log_title_layout.addWidget(self.log_verbosity_button)
self.reset_button = QPushButton("🔄 Reset")
# Tooltip already exists for reset_button
self.reset_button.setToolTip("Reset all inputs and logs to default state (only when idle).")
self.reset_button.setFixedWidth(80)
self.reset_button.setStyleSheet("padding: 4px 8px;")
@@ -1125,7 +1102,6 @@ class DownloaderApp(QWidget):
self._update_manga_filename_style_button_text()
self._update_skip_scope_button_text()
self._update_char_filter_scope_button_text()
self._update_duplicate_mode_button_text()
def _center_on_screen(self):
"""Centers the widget on the screen."""
@@ -1382,8 +1358,7 @@ class DownloaderApp(QWidget):
self.skip_scope_toggle_button.setVisible(not (is_only_links or is_only_archives))
if hasattr(self, 'multipart_toggle_button') and self.multipart_toggle_button:
self.multipart_toggle_button.setVisible(not (is_only_links or is_only_archives))
# Other log header buttons (manga, duplicate, char filter scope) are handled by
# update_ui_for_manga_mode and update_ui_for_subfolders, which are called below.
# Other log header buttons (manga, char filter scope) are handled by update_ui_for_manga_mode and update_ui_for_subfolders
if self.link_search_input: self.link_search_input.setVisible(is_only_links)
if self.link_search_button: self.link_search_button.setVisible(is_only_links)
@@ -1466,9 +1441,7 @@ class DownloaderApp(QWidget):
self.update_ui_for_subfolders(subfolders_on)
self.update_custom_folder_visibility()
# Ensure manga mode UI updates (which includes the visibility of
# manga_rename_toggle_button and duplicate_mode_toggle_button)
# are triggered after filter mode changes.
# Ensure manga mode UI updates (which includes the visibility of manga_rename_toggle_button)
self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)
@@ -1556,12 +1529,46 @@ class DownloaderApp(QWidget):
if self.skip_scope_toggle_button:
if self.skip_words_scope == SKIP_SCOPE_FILES:
self.skip_scope_toggle_button.setText("Scope: Files")
self.skip_scope_toggle_button.setToolTip(
"Current Skip Scope: Files\n\n"
"Skips individual files if their names contain any of the 'Skip with Words'.\n"
"Example: Skip words \"WIP, sketch\".\n"
"- File \"art_WIP.jpg\" -> SKIPPED.\n"
"- File \"final_art.png\" -> DOWNLOADED (if other conditions met).\n"
"Post is still processed for other non-skipped files.\n\n"
"Click to cycle to: Posts"
)
elif self.skip_words_scope == SKIP_SCOPE_POSTS:
self.skip_scope_toggle_button.setText("Scope: Posts")
self.skip_scope_toggle_button.setToolTip(
"Current Skip Scope: Posts\n\n"
"Skips entire posts if their titles contain any of the 'Skip with Words'.\n"
"All files from a skipped post are ignored.\n"
"Example: Skip words \"preview, announcement\".\n"
"- Post \"Exciting Announcement!\" -> SKIPPED.\n"
"- Post \"Finished Artwork\" -> PROCESSED (if other conditions met).\n\n"
"Click to cycle to: Both"
)
elif self.skip_words_scope == SKIP_SCOPE_BOTH:
self.skip_scope_toggle_button.setText("Scope: Both")
self.skip_scope_toggle_button.setToolTip(
"Current Skip Scope: Both (Posts then Files)\n\n"
"1. Checks post title: If title contains a skip word, the entire post is SKIPPED.\n"
"2. If post title is OK, then checks individual filenames: If a filename contains a skip word, only that file is SKIPPED.\n"
"Example: Skip words \"WIP, sketch\".\n"
"- Post \"Sketches and WIPs\" (title match) -> ENTIRE POST SKIPPED.\n"
"- Post \"Art Update\" (title OK) with files:\n"
" - \"character_WIP.jpg\" (file match) -> SKIPPED.\n"
" - \"final_scene.png\" (file OK) -> DOWNLOADED.\n\n"
"Click to cycle to: Files"
)
else:
self.skip_scope_toggle_button.setText("Scope: Unknown")
self.skip_scope_toggle_button.setToolTip(
"Current Skip Scope: Unknown\n\n"
"The skip words scope is in an unknown state. Please cycle or reset.\n\n"
"Click to cycle to: Files"
)
def _cycle_skip_scope(self):
@@ -1585,28 +1592,74 @@ class DownloaderApp(QWidget):
if self.char_filter_scope_toggle_button:
if self.char_filter_scope == CHAR_SCOPE_FILES:
self.char_filter_scope_toggle_button.setText("Filter: Files")
self.char_filter_scope_toggle_button.setToolTip(
"Current Scope: Files\n\n"
"Filters individual files by name. A post is kept if any file matches.\n"
"Only matching files from that post are downloaded.\n"
"Example: Filter 'Tifa'. File 'Tifa_artwork.jpg' matches and is downloaded.\n"
"Folder Naming: Uses character from matching filename.\n\n"
"Click to cycle to: Title"
)
elif self.char_filter_scope == CHAR_SCOPE_TITLE:
self.char_filter_scope_toggle_button.setText("Filter: Title")
self.char_filter_scope_toggle_button.setToolTip(
"Current Scope: Title\n\n"
"Filters entire posts by their title. All files from a matching post are downloaded.\n"
"Example: Filter 'Aerith'. Post titled 'Aerith's Garden' matches; all its files are downloaded.\n"
"Folder Naming: Uses character from matching post title.\n\n"
"Click to cycle to: Both"
)
elif self.char_filter_scope == CHAR_SCOPE_BOTH:
self.char_filter_scope_toggle_button.setText("Filter: Both")
self.char_filter_scope_toggle_button.setToolTip(
"Current Scope: Both (Title then Files)\n\n"
"1. Checks post title: If matches, all files from post are downloaded.\n"
"2. If title doesn't match, checks filenames: If any file matches, only that file is downloaded.\n"
"Example: Filter 'Cloud'.\n"
" - Post 'Cloud Strife' (title match) -> all files downloaded.\n"
" - Post 'Bike Chase' with 'Cloud_fenrir.jpg' (file match) -> only 'Cloud_fenrir.jpg' downloaded.\n"
"Folder Naming: Prioritizes title match, then file match.\n\n"
"Click to cycle to: Comments"
)
elif self.char_filter_scope == CHAR_SCOPE_COMMENTS:
self.char_filter_scope_toggle_button.setText("Filter: Comments (Beta)")
self.char_filter_scope_toggle_button.setToolTip(
"Current Scope: Comments (Beta - Files first, then Comments as fallback)\n\n"
"1. Checks filenames: If any file in the post matches the filter, the entire post is downloaded. Comments are NOT checked for this filter term.\n"
"2. If no file matches, THEN checks post comments: If a comment matches, the entire post is downloaded.\n"
"Example: Filter 'Barret'.\n"
" - Post A: Files 'Barret_gunarm.jpg', 'other.png'. File 'Barret_gunarm.jpg' matches. All files from Post A downloaded. Comments not checked for 'Barret'.\n"
" - Post B: Files 'dyne.jpg', 'weapon.gif'. Comments: '...a drawing of Barret Wallace...'. No file match for 'Barret'. Comment matches. All files from Post B downloaded.\n"
"Folder Naming: Prioritizes character from file match, then from comment match.\n\n"
"Click to cycle to: Files"
)
else:
self.char_filter_scope_toggle_button.setText("Filter: Unknown")
self.char_filter_scope_toggle_button.setToolTip(
"Current Scope: Unknown\n\n"
"The character filter scope is in an unknown state. Please cycle or reset.\n\n"
"Click to cycle to: Files"
)
def _cycle_char_filter_scope(self):
# Cycle: Files -> Title -> Both -> Comments -> Files
if self.char_filter_scope == CHAR_SCOPE_FILES:
self.char_filter_scope = CHAR_SCOPE_TITLE
elif self.char_filter_scope == CHAR_SCOPE_TITLE:
self.char_filter_scope = CHAR_SCOPE_BOTH
elif self.char_filter_scope == CHAR_SCOPE_BOTH:
self.char_filter_scope = CHAR_SCOPE_COMMENTS
elif self.char_filter_scope == CHAR_SCOPE_COMMENTS:
self.char_filter_scope = CHAR_SCOPE_FILES
else:
self.char_filter_scope = CHAR_SCOPE_FILES
self.char_filter_scope = CHAR_SCOPE_FILES # Default fallback
self._update_char_filter_scope_button_text()
self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope)
self.log_signal.emit(f" Character filter scope changed to: '{self.char_filter_scope}'")
def add_new_character(self):
global KNOWN_NAMES, clean_folder_name
name_to_add = self.new_char_input.text().strip()
@@ -1751,18 +1804,34 @@ class DownloaderApp(QWidget):
if self.manga_filename_style == STYLE_POST_TITLE:
self.manga_rename_toggle_button.setText("Name: Post Title")
self.manga_rename_toggle_button.setToolTip(
"Manga files: First file named by post title. Subsequent files in same post keep original names.\n"
"Click to change to original file names for all files."
"Manga Filename Style: Post Title\n\n"
"When Manga/Comic Mode is active for a creator feed:\n"
"- The *first* file in a post is named after the post's title (e.g., \"MyMangaChapter1.jpg\").\n"
"- Any *subsequent* files within the *same post* will retain their original filenames (e.g., \"page_02.png\", \"bonus_art.jpg\").\n"
"- This is generally recommended for better organization of sequential content.\n"
"- Example: Post \"Chapter 1: The Beginning\" with files \"001.jpg\", \"002.jpg\".\n"
" Downloads as: \"Chapter 1 The Beginning.jpg\", \"002.jpg\".\n\n"
"Click to change to: Original File Name"
)
elif self.manga_filename_style == STYLE_ORIGINAL_NAME:
self.manga_rename_toggle_button.setText("Name: Original File")
self.manga_rename_toggle_button.setToolTip(
"Manga files will keep their original names as provided by the site (e.g., 001.jpg, page_01.png).\n"
"Click to change to post title based naming for the first file."
"Manga Filename Style: Original File Name\n\n"
"When Manga/Comic Mode is active for a creator feed:\n"
"- *All* files in a post will attempt to keep their original filenames as provided by the site (e.g., \"001.jpg\", \"page_02.png\").\n"
"- This can be useful if original names are already well-structured and sequential.\n"
"- If original names are inconsistent, using \"Post Title\" style is often better.\n"
"- Example: Post \"Chapter 1: The Beginning\" with files \"001.jpg\", \"002.jpg\".\n"
" Downloads as: \"001.jpg\", \"002.jpg\".\n\n"
"Click to change to: Post Title"
)
else:
self.manga_rename_toggle_button.setText("Name: Unknown Style")
self.manga_rename_toggle_button.setToolTip("Manga filename style is in an unknown state.")
self.manga_rename_toggle_button.setToolTip(
"Manga Filename Style: Unknown\n\n"
"The manga filename style is in an unknown state. Please cycle or reset.\n\n"
"Click to change to: Post Title"
)
def _toggle_manga_filename_style(self):
@@ -1816,11 +1885,6 @@ class DownloaderApp(QWidget):
# Visible if manga mode is on AND not in "Only Links" or "Only Archives" mode
self.manga_rename_toggle_button.setVisible(manga_mode_effectively_on and not (is_only_links_mode or is_only_archives_mode))
if hasattr(self, 'duplicate_mode_toggle_button'):
# Visible if manga mode is OFF AND not in "Only Links" or "Only Archives" mode
self.duplicate_mode_toggle_button.setVisible(
not manga_mode_effectively_on and not (is_only_links_mode or is_only_archives_mode)
)
if manga_mode_effectively_on:
if self.page_range_label: self.page_range_label.setEnabled(False)
@@ -1909,12 +1973,11 @@ class DownloaderApp(QWidget):
raw_skip_words = self.skip_words_input.text().strip()
skip_words_list = [word.strip().lower() for word in raw_skip_words.split(',') if word.strip()]
current_skip_words_scope = self.get_skip_words_scope()
raw_remove_filename_words = self.remove_from_filename_input.text().strip() if hasattr(self, 'remove_from_filename_input') else ""
effective_duplicate_file_mode = self.duplicate_file_mode # Start with user's choice
allow_multipart = self.allow_multipart_download_setting # Use the internal setting
remove_from_filename_words_list = [word.strip() for word in raw_remove_filename_words.split(',') if word.strip()]
current_skip_words_scope = self.get_skip_words_scope()
current_char_filter_scope = self.get_char_filter_scope()
manga_mode_is_checked = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False
@@ -1967,10 +2030,8 @@ class DownloaderApp(QWidget):
elif manga_mode:
start_page, end_page = None, None
# effective_duplicate_file_mode will be self.duplicate_file_mode (UI button's state).
# Manga Mode specific duplicate handling is now managed entirely within downloader_utils.py
self.external_link_queue.clear(); self.extracted_links_cache = []; self._is_processing_external_link_queue = False; self._current_link_post_title = None
self.all_kept_original_filenames = []
raw_character_filters_text = self.character_input.text().strip()
@@ -2130,6 +2191,7 @@ class DownloaderApp(QWidget):
self.total_posts_to_process = 0; self.processed_posts_count = 0; self.download_counter = 0; self.skip_counter = 0
self.progress_label.setText("Progress: Initializing...")
effective_num_post_workers = 1
effective_num_file_threads_per_worker = 1
@@ -2179,8 +2241,7 @@ class DownloaderApp(QWidget):
f" Skip Words Scope: {current_skip_words_scope.capitalize()}",
f" Remove Words from Filename: {', '.join(remove_from_filename_words_list) if remove_from_filename_words_list else 'None'}",
f" Compress Images: {'Enabled' if compress_images else 'Disabled'}",
f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}",
f" Multi-part Download: {'Enabled' if allow_multipart else 'Disabled'}"
f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}" # Removed duplicate file handling log
])
else:
log_messages.append(f" Mode: Extracting Links Only")
@@ -2192,7 +2253,6 @@ class DownloaderApp(QWidget):
log_messages.append(f" ↳ Manga Filename Style: {'Post Title Based' if self.manga_filename_style == STYLE_POST_TITLE else 'Original File Name'}")
if filter_character_list_to_pass:
log_messages.append(f" ↳ Manga Character Filter (for naming/folder): {', '.join(item['name'] for item in filter_character_list_to_pass)}")
log_messages.append(f" ↳ Char Filter Scope (Manga): {current_char_filter_scope.capitalize()}")
log_messages.append(f" ↳ Manga Duplicates: Will be renamed with numeric suffix if names clash (e.g., _1, _2).")
should_use_multithreading_for_posts = use_multithreading_enabled_by_checkbox and not post_id_from_url
@@ -2242,8 +2302,8 @@ class DownloaderApp(QWidget):
'signals': self.worker_signals,
'manga_filename_style': self.manga_filename_style,
'num_file_threads_for_worker': effective_num_file_threads_per_worker,
'allow_multipart_download': allow_multipart, # Corrected from previous thought
'duplicate_file_mode': effective_duplicate_file_mode # Pass the potentially overridden mode
'allow_multipart_download': allow_multipart,
# 'duplicate_file_mode' and session-wide tracking removed
}
try:
@@ -2258,13 +2318,11 @@ class DownloaderApp(QWidget):
'use_subfolders', 'use_post_subfolders', 'custom_folder_name',
'compress_images', 'download_thumbnails', 'service', 'user_id',
'downloaded_files', 'downloaded_file_hashes', 'remove_from_filename_words_list',
'downloaded_files_lock', 'downloaded_file_hashes_lock',
'skip_words_list', 'skip_words_scope', 'char_filter_scope',
'show_external_links', 'extract_links_only',
'num_file_threads_for_worker',
'skip_current_file_flag',
'start_page', 'end_page', 'target_post_id_from_initial_url',
'manga_mode_active', 'unwanted_keywords', 'manga_filename_style', 'duplicate_file_mode',
'downloaded_files_lock', 'downloaded_file_hashes_lock',
'skip_words_list', 'skip_words_scope', 'char_filter_scope',
'show_external_links', 'extract_links_only', 'num_file_threads_for_worker',
'start_page', 'end_page', 'target_post_id_from_initial_url', 'duplicate_file_mode',
'manga_mode_active', 'unwanted_keywords', 'manga_filename_style',
'allow_multipart_download'
]
args_template['skip_current_file_flag'] = None
@@ -2385,18 +2443,17 @@ class DownloaderApp(QWidget):
'downloaded_files_lock', 'downloaded_file_hashes_lock', 'remove_from_filename_words_list',
'skip_words_list', 'skip_words_scope', 'char_filter_scope',
'show_external_links', 'extract_links_only', 'allow_multipart_download',
'num_file_threads',
'skip_current_file_flag',
'num_file_threads', 'skip_current_file_flag',
'manga_mode_active', 'manga_filename_style'
]
# Ensure 'allow_multipart_download' is also considered for optional keys if it has a default in PostProcessorWorker
ppw_optional_keys_with_defaults = {
'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'remove_from_filename_words_list',
'show_external_links', 'extract_links_only',
'num_file_threads', 'skip_current_file_flag', 'manga_mode_active', 'manga_filename_style'
'show_external_links', 'extract_links_only', 'duplicate_file_mode', # Added duplicate_file_mode here
'num_file_threads', 'skip_current_file_flag', 'manga_mode_active', 'manga_filename_style',
'processed_base_filenames_session_wide', 'processed_base_filenames_session_wide_lock' # Add these
}
for post_data_item in all_posts_data:
if self.cancellation_event.is_set(): break
if not isinstance(post_data_item, dict):
@@ -2464,12 +2521,10 @@ class DownloaderApp(QWidget):
widgets_to_toggle = [ self.download_btn, self.link_input, self.radio_all, self.radio_images, self.radio_videos, self.radio_only_links,
self.skip_zip_checkbox, self.skip_rar_checkbox, self.use_subfolders_checkbox, self.compress_images_checkbox,
self.download_thumbnails_checkbox, self.use_multithreading_checkbox, self.skip_words_input, self.character_search_input,
self.new_char_input, self.add_char_button, self.delete_char_button,
self.char_filter_scope_toggle_button,
self.start_page_input, self.end_page_input,
self.page_range_label, self.to_label, self.character_input, self.custom_folder_input, self.custom_folder_label, self.remove_from_filename_input,
self.reset_button, self.manga_mode_checkbox, self.manga_rename_toggle_button, self.multipart_toggle_button,
self.skip_scope_toggle_button
self.new_char_input, self.add_char_button, self.delete_char_button, self.char_filter_scope_toggle_button, # duplicate_file_mode_toggle_button removed
self.start_page_input, self.end_page_input, self.page_range_label, self.to_label,
self.character_input, self.custom_folder_input, self.custom_folder_label, self.remove_from_filename_input,
self.reset_button, self.manga_mode_checkbox, self.manga_rename_toggle_button, self.multipart_toggle_button, self.skip_scope_toggle_button
]
for widget in widgets_to_toggle:
@@ -2663,15 +2718,10 @@ class DownloaderApp(QWidget):
self.settings.setValue(SKIP_WORDS_SCOPE_KEY, self.skip_words_scope)
self._update_skip_scope_button_text()
self.char_filter_scope = CHAR_SCOPE_TITLE
self.char_filter_scope = CHAR_SCOPE_FILES # Default to Files on full reset
self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope)
self._update_char_filter_scope_button_text()
self.duplicate_file_mode = DUPLICATE_MODE_DELETE # Reset to default (Delete)
self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode)
self._update_duplicate_mode_button_text()
self.settings.sync()
self._update_manga_filename_style_button_text()
self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)
@@ -2693,12 +2743,8 @@ class DownloaderApp(QWidget):
self.skip_words_scope = SKIP_SCOPE_POSTS
self._update_skip_scope_button_text()
self.char_filter_scope = CHAR_SCOPE_TITLE
self.char_filter_scope = CHAR_SCOPE_FILES # Default to Files
self._update_char_filter_scope_button_text()
self.duplicate_file_mode = DUPLICATE_MODE_DELETE # Default to DELETE
self._update_duplicate_mode_button_text()
self._handle_filter_mode_change(self.radio_all, True)
self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked())
self.filter_character_list("")
@@ -2728,6 +2774,26 @@ class DownloaderApp(QWidget):
if hasattr(self, 'multipart_toggle_button'):
text = "Multi-part: ON" if self.allow_multipart_download_setting else "Multi-part: OFF"
self.multipart_toggle_button.setText(text)
if self.allow_multipart_download_setting:
self.multipart_toggle_button.setToolTip(
"Multi-part Download: ON\n\n"
"Enables downloading large files in multiple segments (parts) simultaneously.\n"
"- Can significantly speed up downloads for *single large files* (e.g., videos, large archives) if the server supports it.\n"
"- May increase CPU/network usage.\n"
"- For creator feeds with many *small files* (e.g., images), this might not offer speed benefits and could make the UI/log feel busy.\n"
"- If a multi-part download fails for a file, it will automatically retry with a single stream.\n"
"- Example: A 500MB video might be downloaded in 5 parts of 100MB each, concurrently.\n\n"
"Click to turn OFF (use single-stream for all files)."
)
else:
self.multipart_toggle_button.setToolTip(
"Multi-part Download: OFF\n\n"
"All files will be downloaded using a single connection (stream).\n"
"- This is generally stable and works well for most scenarios, especially for feeds with many smaller files.\n"
"- Large files will be downloaded sequentially in one go.\n"
"- Example: A 500MB video will be downloaded as one continuous stream.\n\n"
"Click to turn ON (enable multi-part for large files, see advisory on click)."
)
def _toggle_multipart_mode(self):
# If currently OFF, and user is trying to turn it ON
@@ -2762,23 +2828,6 @@ class DownloaderApp(QWidget):
self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting)
self.log_signal.emit(f" Multi-part download set to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'}")
def _update_duplicate_mode_button_text(self):
if hasattr(self, 'duplicate_mode_toggle_button'):
if self.duplicate_file_mode == DUPLICATE_MODE_DELETE:
self.duplicate_mode_toggle_button.setText("Duplicates: Delete")
elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER:
self.duplicate_mode_toggle_button.setText("Duplicates: Move")
else: # Should not happen
self.duplicate_mode_toggle_button.setText("Duplicates: Move") # Default to Move if unknown
def _cycle_duplicate_mode(self):
if self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER:
self.duplicate_file_mode = DUPLICATE_MODE_DELETE
else: # If it's DELETE or unknown, cycle back to MOVE
self.duplicate_file_mode = DUPLICATE_MODE_MOVE_TO_SUBFOLDER
self._update_duplicate_mode_button_text()
self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode)
self.log_signal.emit(f" Duplicate file handling mode changed to: '{self.duplicate_file_mode.capitalize()}'")
if __name__ == '__main__':
import traceback