mirror of
https://github.com/Yuvi9587/Kemono-Downloader.git
synced 2025-12-29 16:14:44 +00:00
Commit
This commit is contained in:
@@ -38,10 +38,7 @@ SKIP_SCOPE_BOTH = "both"
|
||||
CHAR_SCOPE_TITLE = "title"
|
||||
CHAR_SCOPE_FILES = "files"
|
||||
CHAR_SCOPE_BOTH = "both"
|
||||
|
||||
# DUPLICATE_MODE_RENAME is removed. Renaming only happens within a target folder if needed.
|
||||
DUPLICATE_MODE_DELETE = "delete"
|
||||
DUPLICATE_MODE_MOVE_TO_SUBFOLDER = "move"
|
||||
CHAR_SCOPE_COMMENTS = "comments"
|
||||
|
||||
fastapi_app = None
|
||||
KNOWN_NAMES = []
|
||||
@@ -99,6 +96,15 @@ def clean_filename(name):
|
||||
cleaned = re.sub(r'\s+', '_', cleaned)
|
||||
return cleaned if cleaned else "untitled_file"
|
||||
|
||||
def strip_html_tags(html_text):
|
||||
if not html_text: return ""
|
||||
# First, unescape HTML entities
|
||||
text = html.unescape(html_text)
|
||||
# Then, remove HTML tags using a simple regex
|
||||
# This is a basic approach and might not handle all complex HTML perfectly
|
||||
clean_pattern = re.compile('<.*?>')
|
||||
cleaned_text = re.sub(clean_pattern, '', text)
|
||||
return cleaned_text.strip()
|
||||
|
||||
def extract_folder_name_from_title(title, unwanted_keywords):
|
||||
if not title: return 'Uncategorized'
|
||||
@@ -221,6 +227,31 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Unexpected error fetching offset {offset} ({paginated_url}): {e}")
|
||||
|
||||
def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None):
|
||||
if cancellation_event and cancellation_event.is_set():
|
||||
logger(" Comment fetch cancelled before request.")
|
||||
raise RuntimeError("Comment fetch operation cancelled by user.")
|
||||
|
||||
comments_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}/comments"
|
||||
logger(f" Fetching comments: {comments_api_url}")
|
||||
try:
|
||||
response = requests.get(comments_api_url, headers=headers, timeout=(10, 30)) # Shorter timeout for comments
|
||||
response.raise_for_status()
|
||||
if 'application/json' not in response.headers.get('Content-Type', '').lower():
|
||||
logger(f"⚠️ Unexpected content type from comments API: {response.headers.get('Content-Type')}. Body: {response.text[:200]}")
|
||||
return [] # Return empty list if not JSON
|
||||
return response.json()
|
||||
except requests.exceptions.Timeout:
|
||||
raise RuntimeError(f"Timeout fetching comments for post {post_id} from {comments_api_url}")
|
||||
except requests.exceptions.RequestException as e:
|
||||
err_msg = f"Error fetching comments for post {post_id} from {comments_api_url}: {e}"
|
||||
if e.response is not None:
|
||||
err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})"
|
||||
raise RuntimeError(err_msg)
|
||||
except ValueError as e: # JSONDecodeError inherits from ValueError
|
||||
raise RuntimeError(f"Error decoding JSON from comments API for post {post_id} ({comments_api_url}): {e}. Response text: {response.text[:200]}")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Unexpected error fetching comments for post {post_id} ({comments_api_url}): {e}")
|
||||
|
||||
def download_from_api(api_url_input, logger=print, start_page=None, end_page=None, manga_mode=False, cancellation_event=None):
|
||||
headers = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}
|
||||
@@ -412,7 +443,7 @@ class PostProcessorWorker:
|
||||
char_filter_scope=CHAR_SCOPE_FILES,
|
||||
remove_from_filename_words_list=None,
|
||||
allow_multipart_download=True,
|
||||
duplicate_file_mode=DUPLICATE_MODE_DELETE):
|
||||
): # Removed duplicate_file_mode and session-wide tracking
|
||||
self.post = post_data
|
||||
self.download_root = download_root
|
||||
self.known_names = known_names
|
||||
@@ -450,7 +481,7 @@ class PostProcessorWorker:
|
||||
self.char_filter_scope = char_filter_scope
|
||||
self.remove_from_filename_words_list = remove_from_filename_words_list if remove_from_filename_words_list is not None else []
|
||||
self.allow_multipart_download = allow_multipart_download
|
||||
self.duplicate_file_mode = duplicate_file_mode # This will be the effective mode (possibly overridden by main.py for manga)
|
||||
# self.duplicate_file_mode and session-wide tracking removed
|
||||
|
||||
if self.compress_images and Image is None:
|
||||
self.logger("⚠️ Image compression disabled: Pillow library not found.")
|
||||
@@ -469,10 +500,7 @@ class PostProcessorWorker:
|
||||
post_title="", file_index_in_post=0, num_files_in_this_post=1):
|
||||
was_original_name_kept_flag = False
|
||||
final_filename_saved_for_return = ""
|
||||
|
||||
# current_target_folder_path is the actual folder where the file will be saved.
|
||||
# It starts as the main character/post folder (target_folder_path) by default.
|
||||
current_target_folder_path = target_folder_path
|
||||
# target_folder_path is the base character/post folder.
|
||||
|
||||
if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1, "", False
|
||||
|
||||
@@ -561,44 +589,29 @@ class PostProcessorWorker:
|
||||
self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR).")
|
||||
return 0, 1, api_original_filename, False
|
||||
|
||||
# --- Pre-Download Duplicate Handling (Standard Mode Only - Manga mode has its own suffixing) ---
|
||||
if not self.manga_mode_active:
|
||||
# --- Pre-Download Duplicate Handling (Standard Mode Only) ---
|
||||
is_duplicate_for_main_folder_by_path = os.path.exists(os.path.join(target_folder_path, filename_to_save_in_main_path)) and \
|
||||
os.path.getsize(os.path.join(target_folder_path, filename_to_save_in_main_path)) > 0
|
||||
path_in_main_folder_check = os.path.join(target_folder_path, filename_to_save_in_main_path)
|
||||
is_duplicate_by_path = os.path.exists(path_in_main_folder_check) and \
|
||||
os.path.getsize(path_in_main_folder_check) > 0
|
||||
|
||||
is_duplicate_for_main_folder_by_session_name = False
|
||||
is_duplicate_by_session_name = False
|
||||
with self.downloaded_files_lock:
|
||||
if filename_to_save_in_main_path in self.downloaded_files:
|
||||
is_duplicate_for_main_folder_by_session_name = True
|
||||
is_duplicate_by_session_name = True
|
||||
|
||||
if is_duplicate_for_main_folder_by_path or is_duplicate_for_main_folder_by_session_name:
|
||||
if self.duplicate_file_mode == DUPLICATE_MODE_DELETE:
|
||||
reason = "Path Exists" if is_duplicate_for_main_folder_by_path else "Session Name"
|
||||
self.logger(f" -> Delete Duplicate ({reason}): '{filename_to_save_in_main_path}'. Skipping download.")
|
||||
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
|
||||
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
|
||||
|
||||
elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER:
|
||||
reason = "Path Exists" if is_duplicate_for_main_folder_by_path else "Session Name"
|
||||
self.logger(f" -> Pre-DL Move ({reason}): '{filename_to_save_in_main_path}'. Will target 'Duplicate' subfolder.")
|
||||
current_target_folder_path = os.path.join(target_folder_path, "Duplicate")
|
||||
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
|
||||
if is_duplicate_by_path or is_duplicate_by_session_name:
|
||||
reason = "Path Exists" if is_duplicate_by_path else "Session Name"
|
||||
self.logger(f" -> Skip Duplicate ({reason}, Pre-DL): '{filename_to_save_in_main_path}'. Skipping download.")
|
||||
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Mark as processed
|
||||
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
|
||||
|
||||
# Ensure base target folder exists (used for .part file with multipart)
|
||||
try:
|
||||
os.makedirs(current_target_folder_path, exist_ok=True)
|
||||
os.makedirs(target_folder_path, exist_ok=True) # For .part file
|
||||
except OSError as e:
|
||||
self.logger(f" ❌ Critical error creating directory '{current_target_folder_path}': {e}. Skipping file '{api_original_filename}'.")
|
||||
self.logger(f" ❌ Critical error creating directory '{target_folder_path}': {e}. Skipping file '{api_original_filename}'.")
|
||||
return 0, 1, api_original_filename, False
|
||||
|
||||
# If mode is MOVE (and not manga mode), and current_target_folder_path is now "Duplicate",
|
||||
# check if the file *already* exists by its base name in this "Duplicate" folder. (Standard Mode Only)
|
||||
if not self.manga_mode_active and \
|
||||
self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER and \
|
||||
"Duplicate" in current_target_folder_path.split(os.sep) and \
|
||||
os.path.exists(os.path.join(current_target_folder_path, filename_to_save_in_main_path)):
|
||||
self.logger(f" -> File '{filename_to_save_in_main_path}' already exists in '{os.path.basename(current_target_folder_path)}' subfolder. Skipping download.")
|
||||
# The name was already added to downloaded_files if it was a pre-DL move.
|
||||
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
|
||||
|
||||
# --- Download Attempt ---
|
||||
max_retries = 3
|
||||
@@ -633,9 +646,10 @@ class PostProcessorWorker:
|
||||
if self.signals and hasattr(self.signals, 'file_download_status_signal'):
|
||||
self.signals.file_download_status_signal.emit(False)
|
||||
|
||||
mp_save_path_base = os.path.join(current_target_folder_path, filename_to_save_in_main_path)
|
||||
# .part file is always based on the main target_folder_path and filename_to_save_in_main_path
|
||||
mp_save_path_base_for_part = os.path.join(target_folder_path, filename_to_save_in_main_path)
|
||||
mp_success, mp_bytes, mp_hash, mp_file_handle = download_file_in_parts(
|
||||
file_url, mp_save_path_base, total_size_bytes, num_parts_for_file, headers,
|
||||
file_url, mp_save_path_base_for_part, total_size_bytes, num_parts_for_file, headers,
|
||||
api_original_filename, self.signals, self.cancellation_event, skip_event, self.logger
|
||||
)
|
||||
if mp_success:
|
||||
@@ -705,130 +719,132 @@ class PostProcessorWorker:
|
||||
if file_content_bytes: file_content_bytes.close()
|
||||
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
|
||||
|
||||
if not self.manga_mode_active:
|
||||
# --- Post-Download Hash Check (Standard Mode Only) ---
|
||||
with self.downloaded_file_hashes_lock:
|
||||
if calculated_file_hash in self.downloaded_file_hashes:
|
||||
if self.duplicate_file_mode == DUPLICATE_MODE_DELETE:
|
||||
self.logger(f" -> Delete Duplicate (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...). Skipping save.")
|
||||
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
|
||||
if file_content_bytes: file_content_bytes.close()
|
||||
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
|
||||
|
||||
elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER:
|
||||
self.logger(f" -> Post-DL Move (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...). Content already downloaded.")
|
||||
if "Duplicate" not in current_target_folder_path.split(os.sep):
|
||||
current_target_folder_path = os.path.join(target_folder_path, "Duplicate")
|
||||
self.logger(f" Redirecting to 'Duplicate' subfolder: '{current_target_folder_path}'")
|
||||
# Ensure "Duplicate" folder exists if this is a new redirection due to hash
|
||||
try: os.makedirs(current_target_folder_path, exist_ok=True)
|
||||
except OSError as e_mkdir_hash: self.logger(f" Error creating Duplicate folder for hash collision: {e_mkdir_hash}")
|
||||
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
|
||||
|
||||
# --- Final Filename Determination for Saving ---
|
||||
filename_for_actual_save = filename_to_save_in_main_path
|
||||
# --- Universal Post-Download Hash Check ---
|
||||
with self.downloaded_file_hashes_lock:
|
||||
if calculated_file_hash in self.downloaded_file_hashes:
|
||||
self.logger(f" -> Skip Saving Duplicate (Hash Match): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...).")
|
||||
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Mark logical name
|
||||
if file_content_bytes: file_content_bytes.close()
|
||||
# If it was a multipart download, its .part file needs cleanup
|
||||
if not isinstance(file_content_bytes, BytesIO): # Indicates multipart download
|
||||
part_file_to_remove = os.path.join(target_folder_path, filename_to_save_in_main_path + ".part")
|
||||
if os.path.exists(part_file_to_remove):
|
||||
try: os.remove(part_file_to_remove);
|
||||
except OSError: self.logger(f" -> Failed to remove .part file for hash duplicate: {part_file_to_remove}")
|
||||
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
|
||||
|
||||
# If mode is MOVE (and not manga mode) and the file is destined for the main folder,
|
||||
# but a file with that name *now* exists (e.g. race condition, or different file with same name not caught by hash),
|
||||
# reroute it to the "Duplicate" folder.
|
||||
if not self.manga_mode_active and \
|
||||
self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER and \
|
||||
current_target_folder_path == target_folder_path and \
|
||||
os.path.exists(os.path.join(current_target_folder_path, filename_for_actual_save)):
|
||||
self.logger(f" -> Post-DL Move (Late Name Collision in Main): '{filename_for_actual_save}'. Moving to 'Duplicate'.")
|
||||
current_target_folder_path = os.path.join(target_folder_path, "Duplicate")
|
||||
try: # Ensure "Duplicate" folder exists if this is a new redirection
|
||||
os.makedirs(current_target_folder_path, exist_ok=True)
|
||||
except OSError as e_mkdir: self.logger(f" Error creating Duplicate folder during late move: {e_mkdir}")
|
||||
# The name filename_to_save_in_main_path was already added to downloaded_files if it was a pre-DL name collision.
|
||||
# If it was a hash collision that got rerouted, it was also added.
|
||||
# If this is a new reroute due to late name collision, ensure it's marked.
|
||||
# --- Determine Save Location and Final Filename ---
|
||||
effective_save_folder = target_folder_path # Default: main character/post folder
|
||||
# filename_to_save_in_main_path is the logical name after cleaning, manga styling, word removal
|
||||
filename_after_styling_and_word_removal = filename_to_save_in_main_path
|
||||
|
||||
# "Move" logic and "Duplicate" subfolder logic removed.
|
||||
# effective_save_folder will always be target_folder_path.
|
||||
|
||||
try: # Ensure the chosen save folder (main or Duplicate) exists
|
||||
os.makedirs(effective_save_folder, exist_ok=True)
|
||||
except OSError as e:
|
||||
self.logger(f" ❌ Critical error creating directory '{effective_save_folder}': {e}. Skipping file '{api_original_filename}'.")
|
||||
if file_content_bytes: file_content_bytes.close()
|
||||
# Cleanup .part file if multipart
|
||||
if not isinstance(file_content_bytes, BytesIO):
|
||||
part_file_to_remove = os.path.join(target_folder_path, filename_to_save_in_main_path + ".part")
|
||||
if os.path.exists(part_file_to_remove): os.remove(part_file_to_remove)
|
||||
return 0, 1, api_original_filename, False
|
||||
|
||||
# --- Image Compression ---
|
||||
# This operates on file_content_bytes (which is BytesIO or a file handle from multipart)
|
||||
# It might change filename_after_styling_and_word_removal's extension (e.g., .jpg to .webp)
|
||||
# and returns new data_to_write_after_compression (BytesIO) or original file_content_bytes.
|
||||
data_to_write_after_compression = file_content_bytes
|
||||
filename_after_compression = filename_after_styling_and_word_removal
|
||||
|
||||
# Apply numeric suffix renaming (_1, _2) *only if needed within the current_target_folder_path*
|
||||
# This means:
|
||||
# - If current_target_folder_path is the main folder (and not MOVE mode, or MOVE mode but file was unique):
|
||||
# Renaming happens if a file with filename_for_actual_save exists there.
|
||||
# - If current_target_folder_path is "Duplicate" (because of MOVE mode):
|
||||
# Renaming happens if filename_for_actual_save exists *within "Duplicate"*.
|
||||
counter = 1
|
||||
base_name_final_coll, ext_final_coll = os.path.splitext(filename_for_actual_save)
|
||||
temp_filename_final_check = filename_for_actual_save
|
||||
while os.path.exists(os.path.join(current_target_folder_path, temp_filename_final_check)):
|
||||
temp_filename_final_check = f"{base_name_final_coll}_{counter}{ext_final_coll}"
|
||||
counter += 1
|
||||
if temp_filename_final_check != filename_for_actual_save:
|
||||
self.logger(f" Final rename for target folder '{os.path.basename(current_target_folder_path)}': '{temp_filename_final_check}' (was '{filename_for_actual_save}')")
|
||||
filename_for_actual_save = temp_filename_final_check
|
||||
|
||||
bytes_to_write = file_content_bytes
|
||||
final_filename_after_processing = filename_for_actual_save
|
||||
current_save_path_final = os.path.join(current_target_folder_path, final_filename_after_processing)
|
||||
|
||||
is_img_for_compress_check = is_image(api_original_filename)
|
||||
if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024):
|
||||
self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024*1024):.2f} MB)...")
|
||||
try:
|
||||
bytes_to_write.seek(0)
|
||||
with Image.open(bytes_to_write) as img_obj:
|
||||
file_content_bytes.seek(0)
|
||||
with Image.open(file_content_bytes) as img_obj:
|
||||
if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA')
|
||||
elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB')
|
||||
compressed_bytes_io = BytesIO()
|
||||
img_obj.save(compressed_bytes_io, format='WebP', quality=80, method=4)
|
||||
compressed_size = compressed_bytes_io.getbuffer().nbytes
|
||||
|
||||
if compressed_size < downloaded_size_bytes * 0.9:
|
||||
if compressed_size < downloaded_size_bytes * 0.9: # If significantly smaller
|
||||
self.logger(f" Compression success: {compressed_size / (1024*1024):.2f} MB.")
|
||||
if hasattr(bytes_to_write, 'close'): bytes_to_write.close()
|
||||
|
||||
original_part_file_path = os.path.join(current_target_folder_path, filename_to_save_in_main_path) + ".part" # Use original base for .part
|
||||
if os.path.exists(original_part_file_path):
|
||||
os.remove(original_part_file_path)
|
||||
|
||||
bytes_to_write = compressed_bytes_io; bytes_to_write.seek(0)
|
||||
base_name_orig, _ = os.path.splitext(filename_for_actual_save)
|
||||
final_filename_after_processing = base_name_orig + '.webp'
|
||||
current_save_path_final = os.path.join(current_target_folder_path, final_filename_after_processing)
|
||||
self.logger(f" Updated filename (compressed): {final_filename_after_processing}")
|
||||
data_to_write_after_compression = compressed_bytes_io; data_to_write_after_compression.seek(0)
|
||||
base_name_orig, _ = os.path.splitext(filename_after_compression)
|
||||
filename_after_compression = base_name_orig + '.webp'
|
||||
self.logger(f" Updated filename (compressed): {filename_after_compression}")
|
||||
else:
|
||||
self.logger(f" Compression skipped: WebP not significantly smaller."); bytes_to_write.seek(0)
|
||||
self.logger(f" Compression skipped: WebP not significantly smaller."); file_content_bytes.seek(0) # Reset original stream
|
||||
data_to_write_after_compression = file_content_bytes # Use original
|
||||
except Exception as comp_e:
|
||||
self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); bytes_to_write.seek(0)
|
||||
self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); file_content_bytes.seek(0)
|
||||
data_to_write_after_compression = file_content_bytes # Use original
|
||||
|
||||
if final_filename_after_processing != filename_for_actual_save and \
|
||||
os.path.exists(current_save_path_final) and os.path.getsize(current_save_path_final) > 0:
|
||||
self.logger(f" -> Exists (Path - Post-Compress): '{final_filename_after_processing}' in '{os.path.basename(current_target_folder_path)}'.")
|
||||
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
|
||||
if bytes_to_write and hasattr(bytes_to_write, 'close'): bytes_to_write.close()
|
||||
return 0, 1, final_filename_after_processing, was_original_name_kept_flag
|
||||
# --- Final Numeric Suffixing in the effective_save_folder ---
|
||||
final_filename_on_disk = filename_after_compression # This is the name after potential compression
|
||||
temp_base, temp_ext = os.path.splitext(final_filename_on_disk)
|
||||
suffix_counter = 1
|
||||
while os.path.exists(os.path.join(effective_save_folder, final_filename_on_disk)):
|
||||
final_filename_on_disk = f"{temp_base}_{suffix_counter}{temp_ext}"
|
||||
suffix_counter += 1
|
||||
|
||||
if final_filename_on_disk != filename_after_compression:
|
||||
self.logger(f" Applied numeric suffix in '{os.path.basename(effective_save_folder)}': '{final_filename_on_disk}' (was '{filename_after_compression}')")
|
||||
|
||||
# --- Save File ---
|
||||
final_save_path = os.path.join(effective_save_folder, final_filename_on_disk)
|
||||
|
||||
try:
|
||||
os.makedirs(current_target_folder_path, exist_ok=True)
|
||||
# data_to_write_after_compression is BytesIO (single stream, or compressed multipart)
|
||||
# OR it's the original file_content_bytes (which is a file handle if uncompressed multipart)
|
||||
|
||||
if isinstance(bytes_to_write, BytesIO):
|
||||
with open(current_save_path_final, 'wb') as f_out:
|
||||
f_out.write(bytes_to_write.getvalue())
|
||||
else:
|
||||
if hasattr(bytes_to_write, 'close'): bytes_to_write.close()
|
||||
source_part_file = os.path.join(current_target_folder_path, filename_to_save_in_main_path) + ".part" # Use original base for .part
|
||||
os.rename(source_part_file, current_save_path_final)
|
||||
if data_to_write_after_compression is file_content_bytes and not isinstance(file_content_bytes, BytesIO):
|
||||
# This means uncompressed multipart download. Original .part file handle is file_content_bytes.
|
||||
# The .part file is at target_folder_path/filename_to_save_in_main_path.part
|
||||
original_part_file_actual_path = file_content_bytes.name
|
||||
file_content_bytes.close() # Close handle first
|
||||
os.rename(original_part_file_actual_path, final_save_path)
|
||||
self.logger(f" Renamed .part file to final: {final_save_path}")
|
||||
else: # Single stream download, or compressed multipart. Write from BytesIO.
|
||||
with open(final_save_path, 'wb') as f_out:
|
||||
f_out.write(data_to_write_after_compression.getvalue())
|
||||
|
||||
# If original was multipart and then compressed, clean up original .part file
|
||||
if data_to_write_after_compression is not file_content_bytes and not isinstance(file_content_bytes, BytesIO):
|
||||
original_part_file_actual_path = file_content_bytes.name
|
||||
file_content_bytes.close()
|
||||
if os.path.exists(original_part_file_actual_path):
|
||||
try: os.remove(original_part_file_actual_path)
|
||||
except OSError as e_rem: self.logger(f" -> Failed to remove .part after compression: {e_rem}")
|
||||
|
||||
with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash)
|
||||
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
|
||||
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Track by logical name
|
||||
|
||||
final_filename_saved_for_return = final_filename_after_processing
|
||||
self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{os.path.basename(current_target_folder_path)}'")
|
||||
final_filename_saved_for_return = final_filename_on_disk
|
||||
self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{os.path.basename(effective_save_folder)}'")
|
||||
# Session-wide base name tracking removed.
|
||||
time.sleep(0.05)
|
||||
return 1, 0, final_filename_saved_for_return, was_original_name_kept_flag
|
||||
except Exception as save_err:
|
||||
self.logger(f"❌ Save Fail for '{final_filename_after_processing}': {save_err}")
|
||||
if os.path.exists(current_save_path_final):
|
||||
try: os.remove(current_save_path_final);
|
||||
except OSError: self.logger(f" -> Failed to remove partially saved file: {current_save_path_final}")
|
||||
self.logger(f"❌ Save Fail for '{final_filename_on_disk}': {save_err}")
|
||||
if os.path.exists(final_save_path):
|
||||
try: os.remove(final_save_path);
|
||||
except OSError: self.logger(f" -> Failed to remove partially saved file: {final_save_path}")
|
||||
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag
|
||||
finally:
|
||||
if bytes_to_write and hasattr(bytes_to_write, 'close'):
|
||||
bytes_to_write.close()
|
||||
# Ensure all handles are closed
|
||||
if data_to_write_after_compression and hasattr(data_to_write_after_compression, 'close'):
|
||||
data_to_write_after_compression.close()
|
||||
# If original file_content_bytes was a different handle (e.g. multipart before compression) and not closed yet
|
||||
if file_content_bytes and file_content_bytes is not data_to_write_after_compression and hasattr(file_content_bytes, 'close'):
|
||||
try:
|
||||
if not file_content_bytes.closed: # Check if already closed
|
||||
file_content_bytes.close()
|
||||
except Exception: pass # Ignore errors on close if already handled
|
||||
|
||||
|
||||
def process(self):
|
||||
@@ -858,36 +874,140 @@ class PostProcessorWorker:
|
||||
|
||||
post_is_candidate_by_title_char_match = False
|
||||
char_filter_that_matched_title = None
|
||||
post_is_candidate_by_comment_char_match = False
|
||||
# New variables for CHAR_SCOPE_COMMENTS file-first logic
|
||||
post_is_candidate_by_file_char_match_in_comment_scope = False
|
||||
char_filter_that_matched_file_in_comment_scope = None
|
||||
char_filter_that_matched_comment = None
|
||||
|
||||
if self.filter_character_list_objects and \
|
||||
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH):
|
||||
self.logger(f" [Debug Title Match] Checking post title '{post_title}' against {len(self.filter_character_list_objects)} filter objects. Scope: {self.char_filter_scope}")
|
||||
# self.logger(f" [Debug Title Match] Checking post title '{post_title}' against {len(self.filter_character_list_objects)} filter objects. Scope: {self.char_filter_scope}")
|
||||
for idx, filter_item_obj in enumerate(self.filter_character_list_objects):
|
||||
self.logger(f" [Debug Title Match] Filter obj #{idx}: {filter_item_obj}")
|
||||
if self.check_cancel(): break
|
||||
# self.logger(f" [Debug Title Match] Filter obj #{idx}: {filter_item_obj}")
|
||||
terms_to_check_for_title = list(filter_item_obj["aliases"])
|
||||
if filter_item_obj["is_group"]:
|
||||
if filter_item_obj["name"] not in terms_to_check_for_title:
|
||||
terms_to_check_for_title.append(filter_item_obj["name"])
|
||||
|
||||
unique_terms_for_title_check = list(set(terms_to_check_for_title))
|
||||
self.logger(f" [Debug Title Match] Unique terms for this filter obj: {unique_terms_for_title_check}")
|
||||
# self.logger(f" [Debug Title Match] Unique terms for this filter obj: {unique_terms_for_title_check}")
|
||||
|
||||
for term_to_match in unique_terms_for_title_check:
|
||||
self.logger(f" [Debug Title Match] Checking term: '{term_to_match}'")
|
||||
# self.logger(f" [Debug Title Match] Checking term: '{term_to_match}'")
|
||||
match_found_for_term = is_title_match_for_character(post_title, term_to_match)
|
||||
self.logger(f" [Debug Title Match] Result for '{term_to_match}': {match_found_for_term}")
|
||||
# self.logger(f" [Debug Title Match] Result for '{term_to_match}': {match_found_for_term}")
|
||||
if match_found_for_term:
|
||||
post_is_candidate_by_title_char_match = True
|
||||
char_filter_that_matched_title = filter_item_obj
|
||||
self.logger(f" Post title matches char filter term '{term_to_match}' (from group/name '{filter_item_obj['name']}', Scope: {self.char_filter_scope}). Post is candidate.")
|
||||
break
|
||||
if post_is_candidate_by_title_char_match: break
|
||||
self.logger(f" [Debug Title Match] Final post_is_candidate_by_title_char_match: {post_is_candidate_by_title_char_match}")
|
||||
# self.logger(f" [Debug Title Match] Final post_is_candidate_by_title_char_match: {post_is_candidate_by_title_char_match}")
|
||||
|
||||
if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match:
|
||||
self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.")
|
||||
return 0, num_potential_files_in_post, []
|
||||
# --- Populate all_files_from_post_api before character filter logic that needs it ---
|
||||
# This is needed for the file-first check in CHAR_SCOPE_COMMENTS
|
||||
all_files_from_post_api_for_char_check = []
|
||||
api_file_domain_for_char_check = urlparse(self.api_url_input).netloc
|
||||
if not api_file_domain_for_char_check or not any(d in api_file_domain_for_char_check.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']):
|
||||
api_file_domain_for_char_check = "kemono.su" if "kemono" in self.service.lower() else "coomer.party"
|
||||
|
||||
if post_main_file_info and isinstance(post_main_file_info, dict) and post_main_file_info.get('path'):
|
||||
original_api_name = post_main_file_info.get('name') or os.path.basename(post_main_file_info['path'].lstrip('/'))
|
||||
if original_api_name:
|
||||
all_files_from_post_api_for_char_check.append({'_original_name_for_log': original_api_name})
|
||||
|
||||
for att_info in post_attachments:
|
||||
if isinstance(att_info, dict) and att_info.get('path'):
|
||||
original_api_att_name = att_info.get('name') or os.path.basename(att_info['path'].lstrip('/'))
|
||||
if original_api_att_name:
|
||||
all_files_from_post_api_for_char_check.append({'_original_name_for_log': original_api_att_name})
|
||||
# --- End population of all_files_from_post_api_for_char_check ---
|
||||
|
||||
|
||||
if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
|
||||
self.logger(f" [Char Scope: Comments] Phase 1: Checking post files for matches before comments for post ID '{post_id}'.")
|
||||
for file_info_item in all_files_from_post_api_for_char_check: # Use the pre-populated list of file names
|
||||
if self.check_cancel(): break
|
||||
current_api_original_filename_for_check = file_info_item.get('_original_name_for_log')
|
||||
if not current_api_original_filename_for_check: continue
|
||||
|
||||
for filter_item_obj in self.filter_character_list_objects:
|
||||
terms_to_check = list(filter_item_obj["aliases"])
|
||||
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check:
|
||||
terms_to_check.append(filter_item_obj["name"])
|
||||
|
||||
for term_to_match in terms_to_check:
|
||||
if is_filename_match_for_character(current_api_original_filename_for_check, term_to_match):
|
||||
post_is_candidate_by_file_char_match_in_comment_scope = True
|
||||
char_filter_that_matched_file_in_comment_scope = filter_item_obj
|
||||
self.logger(f" Match Found (File in Comments Scope): File '{current_api_original_filename_for_check}' matches char filter term '{term_to_match}' (from group/name '{filter_item_obj['name']}'). Post is candidate.")
|
||||
break
|
||||
if post_is_candidate_by_file_char_match_in_comment_scope: break
|
||||
if post_is_candidate_by_file_char_match_in_comment_scope: break
|
||||
self.logger(f" [Char Scope: Comments] Phase 1 Result: post_is_candidate_by_file_char_match_in_comment_scope = {post_is_candidate_by_file_char_match_in_comment_scope}")
|
||||
|
||||
if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
|
||||
if not post_is_candidate_by_file_char_match_in_comment_scope:
|
||||
self.logger(f" [Char Scope: Comments] Phase 2: No file match found. Checking post comments for post ID '{post_id}'.")
|
||||
try:
|
||||
parsed_input_url_for_comments = urlparse(self.api_url_input)
|
||||
api_domain_for_comments = parsed_input_url_for_comments.netloc
|
||||
if not any(d in api_domain_for_comments.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']):
|
||||
self.logger(f"⚠️ Unrecognized domain '{api_domain_for_comments}' for comment API. Defaulting based on service.")
|
||||
api_domain_for_comments = "kemono.su" if "kemono" in self.service.lower() else "coomer.party"
|
||||
|
||||
comments_data = fetch_post_comments(
|
||||
api_domain_for_comments, self.service, self.user_id, post_id,
|
||||
headers, self.logger, self.cancellation_event
|
||||
)
|
||||
if comments_data:
|
||||
self.logger(f" Fetched {len(comments_data)} comments for post {post_id}.")
|
||||
for comment_item_idx, comment_item in enumerate(comments_data):
|
||||
if self.check_cancel(): break
|
||||
raw_comment_content = comment_item.get('content', '')
|
||||
if not raw_comment_content: continue
|
||||
|
||||
cleaned_comment_text = strip_html_tags(raw_comment_content)
|
||||
if not cleaned_comment_text.strip(): continue
|
||||
|
||||
for filter_item_obj in self.filter_character_list_objects:
|
||||
terms_to_check_comment = list(filter_item_obj["aliases"])
|
||||
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_comment:
|
||||
terms_to_check_comment.append(filter_item_obj["name"])
|
||||
|
||||
for term_to_match_comment in terms_to_check_comment:
|
||||
if is_title_match_for_character(cleaned_comment_text, term_to_match_comment): # Re-use title matcher
|
||||
post_is_candidate_by_comment_char_match = True
|
||||
char_filter_that_matched_comment = filter_item_obj
|
||||
self.logger(f" Match Found (Comment in Comments Scope): Comment in post {post_id} matches char filter term '{term_to_match_comment}' (from group/name '{filter_item_obj['name']}'). Post is candidate.")
|
||||
self.logger(f" Matching comment (first 100 chars): '{cleaned_comment_text[:100]}...'")
|
||||
break
|
||||
if post_is_candidate_by_comment_char_match: break
|
||||
if post_is_candidate_by_comment_char_match: break
|
||||
else:
|
||||
self.logger(f" No comments found or fetched for post {post_id} to check against character filters.")
|
||||
|
||||
except RuntimeError as e_fetch_comment:
|
||||
self.logger(f" ⚠️ Error fetching or processing comments for post {post_id}: {e_fetch_comment}")
|
||||
except Exception as e_generic_comment:
|
||||
self.logger(f" ❌ Unexpected error during comment processing for post {post_id}: {e_generic_comment}\n{traceback.format_exc(limit=2)}")
|
||||
self.logger(f" [Char Scope: Comments] Phase 2 Result: post_is_candidate_by_comment_char_match = {post_is_candidate_by_comment_char_match}")
|
||||
else: # post_is_candidate_by_file_char_match_in_comment_scope was True
|
||||
self.logger(f" [Char Scope: Comments] Phase 2: Skipped comment check for post ID '{post_id}' because a file match already made it a candidate.")
|
||||
|
||||
# --- Skip Post Logic based on Title or Comment Scope (if filters are active) ---
|
||||
if self.filter_character_list_objects:
|
||||
if self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match:
|
||||
self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.")
|
||||
return 0, num_potential_files_in_post, []
|
||||
if self.char_filter_scope == CHAR_SCOPE_COMMENTS and \
|
||||
not post_is_candidate_by_file_char_match_in_comment_scope and \
|
||||
not post_is_candidate_by_comment_char_match: # MODIFIED: Check both file and comment match flags
|
||||
self.logger(f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id}', Title '{post_title[:50]}...'")
|
||||
return 0, num_potential_files_in_post, []
|
||||
|
||||
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
|
||||
post_title_lower = post_title.lower()
|
||||
for skip_word in self.skip_words_list:
|
||||
@@ -907,9 +1027,26 @@ class PostProcessorWorker:
|
||||
|
||||
base_folder_names_for_post_content = []
|
||||
if not self.extract_links_only and self.use_subfolders:
|
||||
if post_is_candidate_by_title_char_match and char_filter_that_matched_title:
|
||||
base_folder_names_for_post_content = [clean_folder_name(char_filter_that_matched_title["name"])]
|
||||
elif not self.filter_character_list_objects:
|
||||
primary_char_filter_for_folder = None
|
||||
log_reason_for_folder = ""
|
||||
|
||||
if self.char_filter_scope == CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment:
|
||||
# For CHAR_SCOPE_COMMENTS, prioritize file match for folder name if it happened
|
||||
if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope:
|
||||
primary_char_filter_for_folder = char_filter_that_matched_file_in_comment_scope
|
||||
log_reason_for_folder = "Matched char filter in filename (Comments scope)"
|
||||
elif post_is_candidate_by_comment_char_match and char_filter_that_matched_comment: # Fallback to comment match
|
||||
primary_char_filter_for_folder = char_filter_that_matched_comment
|
||||
log_reason_for_folder = "Matched char filter in comments (Comments scope, no file match)"
|
||||
elif (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and char_filter_that_matched_title: # Existing logic for other scopes
|
||||
primary_char_filter_for_folder = char_filter_that_matched_title
|
||||
log_reason_for_folder = "Matched char filter in title"
|
||||
# If scope is FILES, primary_char_filter_for_folder will be None here. Folder determined per file.
|
||||
|
||||
if primary_char_filter_for_folder:
|
||||
base_folder_names_for_post_content = [clean_folder_name(primary_char_filter_for_folder["name"])]
|
||||
self.logger(f" Base folder name(s) for post content ({log_reason_for_folder}): {', '.join(base_folder_names_for_post_content)}")
|
||||
elif not self.filter_character_list_objects: # No char filters defined, use generic logic
|
||||
derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords)
|
||||
if derived_folders:
|
||||
base_folder_names_for_post_content.extend(derived_folders)
|
||||
@@ -917,11 +1054,10 @@ class PostProcessorWorker:
|
||||
base_folder_names_for_post_content.append(extract_folder_name_from_title(post_title, self.unwanted_keywords))
|
||||
if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]:
|
||||
base_folder_names_for_post_content = [clean_folder_name(post_title if post_title else "untitled_creator_content")]
|
||||
self.logger(f" Base folder name(s) for post content (Generic title parsing - no char filters): {', '.join(base_folder_names_for_post_content)}")
|
||||
# If char filters are defined, and scope is FILES, then base_folder_names_for_post_content remains empty.
|
||||
# The folder will be determined by char_filter_info_that_matched_file later.
|
||||
|
||||
if base_folder_names_for_post_content:
|
||||
log_reason = "Matched char filter" if (post_is_candidate_by_title_char_match and char_filter_that_matched_title) else "Generic title parsing (no char filters)"
|
||||
self.logger(f" Base folder name(s) for post content ({log_reason}): {', '.join(base_folder_names_for_post_content)}")
|
||||
|
||||
if not self.extract_links_only and self.use_subfolders and self.skip_words_list:
|
||||
for folder_name_to_check in base_folder_names_for_post_content:
|
||||
if not folder_name_to_check: continue
|
||||
@@ -1066,19 +1202,32 @@ class PostProcessorWorker:
|
||||
char_filter_info_that_matched_file = char_filter_that_matched_title
|
||||
self.logger(f" File '{current_api_original_filename}' is candidate because post title matched. Scope: Both (Title part).")
|
||||
else:
|
||||
for filter_item_obj in self.filter_character_list_objects:
|
||||
terms_to_check_for_file_both = list(filter_item_obj["aliases"])
|
||||
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_for_file_both:
|
||||
terms_to_check_for_file_both.append(filter_item_obj["name"])
|
||||
unique_terms_for_file_both_check = list(set(terms_to_check_for_file_both))
|
||||
# This part is for the "File" part of "Both" scope
|
||||
for filter_item_obj_both_file in self.filter_character_list_objects:
|
||||
terms_to_check_for_file_both = list(filter_item_obj_both_file["aliases"])
|
||||
if filter_item_obj_both_file["is_group"] and filter_item_obj_both_file["name"] not in terms_to_check_for_file_both:
|
||||
terms_to_check_for_file_both.append(filter_item_obj_both_file["name"])
|
||||
# Ensure unique_terms_for_file_both_check is defined here
|
||||
unique_terms_for_file_both_check = list(set(terms_to_check_for_file_both))
|
||||
|
||||
for term_to_match in unique_terms_for_file_both_check:
|
||||
if is_filename_match_for_character(current_api_original_filename, term_to_match):
|
||||
file_is_candidate_by_char_filter_scope = True
|
||||
char_filter_info_that_matched_file = filter_item_obj
|
||||
char_filter_info_that_matched_file = filter_item_obj_both_file # Use the filter that matched the file
|
||||
self.logger(f" File '{current_api_original_filename}' matches char filter term '{term_to_match}' (from '{filter_item_obj['name']}'). Scope: Both (File part).")
|
||||
break
|
||||
if file_is_candidate_by_char_filter_scope: break
|
||||
elif self.char_filter_scope == CHAR_SCOPE_COMMENTS:
|
||||
# If the post is a candidate (either by file or comment under this scope), then this file is also a candidate.
|
||||
# The folder naming will use the filter that made the POST a candidate.
|
||||
if post_is_candidate_by_file_char_match_in_comment_scope: # Post was candidate due to a file match
|
||||
file_is_candidate_by_char_filter_scope = True
|
||||
char_filter_info_that_matched_file = char_filter_that_matched_file_in_comment_scope # Use the filter that matched a file in the post
|
||||
self.logger(f" File '{current_api_original_filename}' is candidate because a file in this post matched char filter (Overall Scope: Comments).")
|
||||
elif post_is_candidate_by_comment_char_match: # Post was candidate due to comment match (no file match for post)
|
||||
file_is_candidate_by_char_filter_scope = True
|
||||
char_filter_info_that_matched_file = char_filter_that_matched_comment # Use the filter that matched comments
|
||||
self.logger(f" File '{current_api_original_filename}' is candidate because post comments matched char filter (Overall Scope: Comments).")
|
||||
|
||||
if not file_is_candidate_by_char_filter_scope:
|
||||
self.logger(f" -> Skip File (Char Filter Scope '{self.char_filter_scope}'): '{current_api_original_filename}' no match.")
|
||||
@@ -1178,7 +1327,7 @@ class DownloadThread(QThread):
|
||||
char_filter_scope=CHAR_SCOPE_FILES,
|
||||
remove_from_filename_words_list=None,
|
||||
allow_multipart_download=True,
|
||||
duplicate_file_mode=DUPLICATE_MODE_DELETE): # Default to DELETE
|
||||
): # Removed duplicate_file_mode and session-wide tracking
|
||||
super().__init__()
|
||||
self.api_url_input = api_url_input
|
||||
self.output_dir = output_dir
|
||||
@@ -1219,7 +1368,7 @@ class DownloadThread(QThread):
|
||||
self.char_filter_scope = char_filter_scope
|
||||
self.remove_from_filename_words_list = remove_from_filename_words_list
|
||||
self.allow_multipart_download = allow_multipart_download
|
||||
self.duplicate_file_mode = duplicate_file_mode
|
||||
# self.duplicate_file_mode and session-wide tracking removed
|
||||
if self.compress_images and Image is None:
|
||||
self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
|
||||
self.compress_images = False
|
||||
@@ -1297,7 +1446,7 @@ class DownloadThread(QThread):
|
||||
char_filter_scope=self.char_filter_scope,
|
||||
remove_from_filename_words_list=self.remove_from_filename_words_list,
|
||||
allow_multipart_download=self.allow_multipart_download,
|
||||
duplicate_file_mode=self.duplicate_file_mode)
|
||||
) # Removed duplicate_file_mode and session-wide tracking
|
||||
try:
|
||||
dl_count, skip_count, kept_originals_this_post = post_processing_worker.process()
|
||||
grand_total_downloaded_files += dl_count
|
||||
|
||||
267
main.py
267
main.py
@@ -48,8 +48,9 @@ try:
|
||||
SKIP_SCOPE_POSTS,
|
||||
SKIP_SCOPE_BOTH,
|
||||
CHAR_SCOPE_TITLE, # Added for completeness if used directly
|
||||
CHAR_SCOPE_FILES, # Added
|
||||
CHAR_SCOPE_BOTH # Added
|
||||
CHAR_SCOPE_FILES, # Ensure this is imported
|
||||
CHAR_SCOPE_BOTH,
|
||||
CHAR_SCOPE_COMMENTS
|
||||
)
|
||||
print("Successfully imported names from downloader_utils.")
|
||||
except ImportError as e:
|
||||
@@ -68,6 +69,7 @@ except ImportError as e:
|
||||
CHAR_SCOPE_TITLE = "title"
|
||||
CHAR_SCOPE_FILES = "files"
|
||||
CHAR_SCOPE_BOTH = "both"
|
||||
CHAR_SCOPE_COMMENTS = "comments"
|
||||
|
||||
except Exception as e:
|
||||
print(f"--- UNEXPECTED IMPORT ERROR ---")
|
||||
@@ -80,6 +82,7 @@ except Exception as e:
|
||||
MAX_THREADS = 200
|
||||
RECOMMENDED_MAX_THREADS = 50
|
||||
MAX_FILE_THREADS_PER_POST_OR_WORKER = 10
|
||||
MAX_POST_WORKERS_WHEN_COMMENT_FILTERING = 3 # New constant
|
||||
|
||||
HTML_PREFIX = "<!HTML!>"
|
||||
|
||||
@@ -92,13 +95,7 @@ SKIP_WORDS_SCOPE_KEY = "skipWordsScopeV1"
|
||||
ALLOW_MULTIPART_DOWNLOAD_KEY = "allowMultipartDownloadV1"
|
||||
|
||||
CHAR_FILTER_SCOPE_KEY = "charFilterScopeV1"
|
||||
# CHAR_SCOPE_TITLE, CHAR_SCOPE_FILES, CHAR_SCOPE_BOTH are already defined or imported
|
||||
|
||||
DUPLICATE_FILE_MODE_KEY = "duplicateFileModeV1"
|
||||
# DUPLICATE_MODE_RENAME is removed. Renaming only happens within a target folder if needed.
|
||||
DUPLICATE_MODE_DELETE = "delete"
|
||||
DUPLICATE_MODE_MOVE_TO_SUBFOLDER = "move" # New mode
|
||||
|
||||
# CHAR_SCOPE_TITLE, CHAR_SCOPE_FILES, CHAR_SCOPE_BOTH, CHAR_SCOPE_COMMENTS are already defined or imported
|
||||
|
||||
# --- Tour Classes (Moved from tour.py) ---
|
||||
class TourStepWidget(QWidget):
|
||||
@@ -480,25 +477,21 @@ class DownloaderApp(QWidget):
|
||||
self.radio_only_links = None
|
||||
self.radio_only_archives = None
|
||||
|
||||
self.skip_scope_toggle_button = None
|
||||
self.char_filter_scope_toggle_button = None
|
||||
|
||||
self.all_kept_original_filenames = []
|
||||
|
||||
self.manga_filename_style = self.settings.value(MANGA_FILENAME_STYLE_KEY, STYLE_POST_TITLE, type=str)
|
||||
self.skip_words_scope = self.settings.value(SKIP_WORDS_SCOPE_KEY, SKIP_SCOPE_POSTS, type=str)
|
||||
self.char_filter_scope = self.settings.value(CHAR_FILTER_SCOPE_KEY, CHAR_SCOPE_TITLE, type=str)
|
||||
self.char_filter_scope = self.settings.value(CHAR_FILTER_SCOPE_KEY, CHAR_SCOPE_FILES, type=str) # Default to Files
|
||||
# Always default multi-part download to OFF on launch, ignoring any saved setting.
|
||||
self.allow_multipart_download_setting = False
|
||||
self.duplicate_file_mode = self.settings.value(DUPLICATE_FILE_MODE_KEY, DUPLICATE_MODE_DELETE, type=str) # Default to DELETE
|
||||
print(f"ℹ️ Known.txt will be loaded/saved at: {self.config_file}")
|
||||
|
||||
|
||||
|
||||
self.load_known_names_from_util()
|
||||
self.setWindowTitle("Kemono Downloader v3.2.0")
|
||||
# self.setGeometry(150, 150, 1050, 820) # Initial geometry will be set after showing
|
||||
self.setStyleSheet(self.get_dark_theme())
|
||||
|
||||
self.init_ui()
|
||||
self._connect_signals()
|
||||
|
||||
@@ -510,7 +503,6 @@ class DownloaderApp(QWidget):
|
||||
self.log_signal.emit(f"ℹ️ Skip words scope loaded: '{self.skip_words_scope}'")
|
||||
self.log_signal.emit(f"ℹ️ Character filter scope loaded: '{self.char_filter_scope}'")
|
||||
self.log_signal.emit(f"ℹ️ Multi-part download defaults to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'} on launch")
|
||||
self.log_signal.emit(f"ℹ️ Duplicate file handling mode loaded: '{self.duplicate_file_mode.capitalize()}'")
|
||||
|
||||
|
||||
def _connect_signals(self):
|
||||
@@ -560,7 +552,6 @@ class DownloaderApp(QWidget):
|
||||
self.char_filter_scope_toggle_button.clicked.connect(self._cycle_char_filter_scope)
|
||||
|
||||
if hasattr(self, 'multipart_toggle_button'): self.multipart_toggle_button.clicked.connect(self._toggle_multipart_mode)
|
||||
if hasattr(self, 'duplicate_mode_toggle_button'): self.duplicate_mode_toggle_button.clicked.connect(self._cycle_duplicate_mode)
|
||||
|
||||
|
||||
def load_known_names_from_util(self):
|
||||
@@ -606,7 +597,6 @@ class DownloaderApp(QWidget):
|
||||
self.settings.setValue(SKIP_WORDS_SCOPE_KEY, self.skip_words_scope)
|
||||
self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope)
|
||||
self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting)
|
||||
self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode) # Save current mode
|
||||
self.settings.sync()
|
||||
|
||||
should_exit = True
|
||||
@@ -726,17 +716,17 @@ class DownloaderApp(QWidget):
|
||||
self.character_input = QLineEdit()
|
||||
self.character_input.setPlaceholderText("e.g., Tifa, Aerith, (Cloud, Zack)")
|
||||
self.character_input.setToolTip(
|
||||
"Filter files or posts by character/series names (comma-separated).\n"
|
||||
" - Normal Mode: Filters individual files by matching their filenames.\n"
|
||||
" - Manga/Comic Mode: Filters entire posts by matching the post title.\n"
|
||||
"Filter by character/series names (comma-separated, e.g., Tifa, Aerith).\n"
|
||||
"The behavior of this filter (Files, Title, Both, or Comments) is controlled by the 'Filter: [Scope]' button next to this input.\n"
|
||||
"Also used for folder naming if 'Separate Folders' is enabled.\n"
|
||||
"Group aliases for a combined folder name: (alias1, alias2) -> folder 'alias1 alias2'.\n"
|
||||
"Example: yor, Tifa, (Boa, Hancock)")
|
||||
char_input_and_button_layout.addWidget(self.character_input, 3)
|
||||
|
||||
self.char_filter_scope_toggle_button = QPushButton()
|
||||
# Initial text and tooltip will be set by calling _update_char_filter_scope_button_text()
|
||||
# at the end of init_ui or when the scope is first set.
|
||||
self._update_char_filter_scope_button_text()
|
||||
self.char_filter_scope_toggle_button.setToolTip("Click to cycle character filter scope (Files -> Title -> Both)")
|
||||
self.char_filter_scope_toggle_button.setStyleSheet("padding: 6px 10px;")
|
||||
self.char_filter_scope_toggle_button.setMinimumWidth(100)
|
||||
char_input_and_button_layout.addWidget(self.char_filter_scope_toggle_button, 1)
|
||||
@@ -794,7 +784,6 @@ class DownloaderApp(QWidget):
|
||||
skip_input_and_button_layout.addWidget(self.skip_words_input, 1) # Input field takes available space
|
||||
self.skip_scope_toggle_button = QPushButton()
|
||||
self._update_skip_scope_button_text()
|
||||
self.skip_scope_toggle_button.setToolTip("Click to cycle skip scope (Files -> Posts -> Both)")
|
||||
self.skip_scope_toggle_button.setStyleSheet("padding: 6px 10px;")
|
||||
self.skip_scope_toggle_button.setMinimumWidth(100)
|
||||
skip_input_and_button_layout.addWidget(self.skip_scope_toggle_button, 0) # Button takes its minimum
|
||||
@@ -1017,38 +1006,26 @@ class DownloaderApp(QWidget):
|
||||
log_title_layout.addWidget(self.link_search_button)
|
||||
|
||||
self.manga_rename_toggle_button = QPushButton()
|
||||
# Tooltip is dynamically set by _update_manga_filename_style_button_text
|
||||
self.manga_rename_toggle_button.setVisible(False)
|
||||
self.manga_rename_toggle_button.setFixedWidth(140)
|
||||
self.manga_rename_toggle_button.setStyleSheet("padding: 4px 8px;")
|
||||
self._update_manga_filename_style_button_text()
|
||||
log_title_layout.addWidget(self.manga_rename_toggle_button)
|
||||
|
||||
self.multipart_toggle_button = QPushButton() # Create the button
|
||||
# Tooltip is dynamically set by _update_multipart_toggle_button_text
|
||||
self.multipart_toggle_button = QPushButton()
|
||||
self.multipart_toggle_button.setToolTip("Toggle between Multi-part and Single-stream downloads for large files.")
|
||||
self.multipart_toggle_button.setFixedWidth(130) # Adjust width as needed
|
||||
self.multipart_toggle_button.setStyleSheet("padding: 4px 8px;") # Added padding
|
||||
self._update_multipart_toggle_button_text() # Set initial text
|
||||
log_title_layout.addWidget(self.multipart_toggle_button) # Add to layout
|
||||
|
||||
self.duplicate_mode_toggle_button = QPushButton()
|
||||
# Tooltip is dynamically set by _update_duplicate_mode_button_text
|
||||
self.duplicate_mode_toggle_button.setToolTip("Toggle how duplicate filenames are handled (Rename or Delete).")
|
||||
self.duplicate_mode_toggle_button.setFixedWidth(150) # Adjust width
|
||||
self.duplicate_mode_toggle_button.setStyleSheet("padding: 4px 8px;") # Added padding
|
||||
self._update_duplicate_mode_button_text() # Set initial text
|
||||
log_title_layout.addWidget(self.duplicate_mode_toggle_button)
|
||||
|
||||
self.log_verbosity_button = QPushButton("Show Basic Log")
|
||||
# Tooltip already exists for log_verbosity_button
|
||||
self.log_verbosity_button.setToolTip("Toggle between full and basic log details.")
|
||||
self.log_verbosity_button.setFixedWidth(110)
|
||||
self.log_verbosity_button.setStyleSheet("padding: 4px 8px;")
|
||||
log_title_layout.addWidget(self.log_verbosity_button)
|
||||
|
||||
self.reset_button = QPushButton("🔄 Reset")
|
||||
# Tooltip already exists for reset_button
|
||||
self.reset_button.setToolTip("Reset all inputs and logs to default state (only when idle).")
|
||||
self.reset_button.setFixedWidth(80)
|
||||
self.reset_button.setStyleSheet("padding: 4px 8px;")
|
||||
@@ -1125,7 +1102,6 @@ class DownloaderApp(QWidget):
|
||||
self._update_manga_filename_style_button_text()
|
||||
self._update_skip_scope_button_text()
|
||||
self._update_char_filter_scope_button_text()
|
||||
self._update_duplicate_mode_button_text()
|
||||
|
||||
def _center_on_screen(self):
|
||||
"""Centers the widget on the screen."""
|
||||
@@ -1382,8 +1358,7 @@ class DownloaderApp(QWidget):
|
||||
self.skip_scope_toggle_button.setVisible(not (is_only_links or is_only_archives))
|
||||
if hasattr(self, 'multipart_toggle_button') and self.multipart_toggle_button:
|
||||
self.multipart_toggle_button.setVisible(not (is_only_links or is_only_archives))
|
||||
# Other log header buttons (manga, duplicate, char filter scope) are handled by
|
||||
# update_ui_for_manga_mode and update_ui_for_subfolders, which are called below.
|
||||
# Other log header buttons (manga, char filter scope) are handled by update_ui_for_manga_mode and update_ui_for_subfolders
|
||||
|
||||
if self.link_search_input: self.link_search_input.setVisible(is_only_links)
|
||||
if self.link_search_button: self.link_search_button.setVisible(is_only_links)
|
||||
@@ -1466,9 +1441,7 @@ class DownloaderApp(QWidget):
|
||||
|
||||
self.update_ui_for_subfolders(subfolders_on)
|
||||
self.update_custom_folder_visibility()
|
||||
# Ensure manga mode UI updates (which includes the visibility of
|
||||
# manga_rename_toggle_button and duplicate_mode_toggle_button)
|
||||
# are triggered after filter mode changes.
|
||||
# Ensure manga mode UI updates (which includes the visibility of manga_rename_toggle_button)
|
||||
self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)
|
||||
|
||||
|
||||
@@ -1556,12 +1529,46 @@ class DownloaderApp(QWidget):
|
||||
if self.skip_scope_toggle_button:
|
||||
if self.skip_words_scope == SKIP_SCOPE_FILES:
|
||||
self.skip_scope_toggle_button.setText("Scope: Files")
|
||||
self.skip_scope_toggle_button.setToolTip(
|
||||
"Current Skip Scope: Files\n\n"
|
||||
"Skips individual files if their names contain any of the 'Skip with Words'.\n"
|
||||
"Example: Skip words \"WIP, sketch\".\n"
|
||||
"- File \"art_WIP.jpg\" -> SKIPPED.\n"
|
||||
"- File \"final_art.png\" -> DOWNLOADED (if other conditions met).\n"
|
||||
"Post is still processed for other non-skipped files.\n\n"
|
||||
"Click to cycle to: Posts"
|
||||
)
|
||||
elif self.skip_words_scope == SKIP_SCOPE_POSTS:
|
||||
self.skip_scope_toggle_button.setText("Scope: Posts")
|
||||
self.skip_scope_toggle_button.setToolTip(
|
||||
"Current Skip Scope: Posts\n\n"
|
||||
"Skips entire posts if their titles contain any of the 'Skip with Words'.\n"
|
||||
"All files from a skipped post are ignored.\n"
|
||||
"Example: Skip words \"preview, announcement\".\n"
|
||||
"- Post \"Exciting Announcement!\" -> SKIPPED.\n"
|
||||
"- Post \"Finished Artwork\" -> PROCESSED (if other conditions met).\n\n"
|
||||
"Click to cycle to: Both"
|
||||
)
|
||||
elif self.skip_words_scope == SKIP_SCOPE_BOTH:
|
||||
self.skip_scope_toggle_button.setText("Scope: Both")
|
||||
self.skip_scope_toggle_button.setToolTip(
|
||||
"Current Skip Scope: Both (Posts then Files)\n\n"
|
||||
"1. Checks post title: If title contains a skip word, the entire post is SKIPPED.\n"
|
||||
"2. If post title is OK, then checks individual filenames: If a filename contains a skip word, only that file is SKIPPED.\n"
|
||||
"Example: Skip words \"WIP, sketch\".\n"
|
||||
"- Post \"Sketches and WIPs\" (title match) -> ENTIRE POST SKIPPED.\n"
|
||||
"- Post \"Art Update\" (title OK) with files:\n"
|
||||
" - \"character_WIP.jpg\" (file match) -> SKIPPED.\n"
|
||||
" - \"final_scene.png\" (file OK) -> DOWNLOADED.\n\n"
|
||||
"Click to cycle to: Files"
|
||||
)
|
||||
else:
|
||||
self.skip_scope_toggle_button.setText("Scope: Unknown")
|
||||
self.skip_scope_toggle_button.setToolTip(
|
||||
"Current Skip Scope: Unknown\n\n"
|
||||
"The skip words scope is in an unknown state. Please cycle or reset.\n\n"
|
||||
"Click to cycle to: Files"
|
||||
)
|
||||
|
||||
|
||||
def _cycle_skip_scope(self):
|
||||
@@ -1585,28 +1592,74 @@ class DownloaderApp(QWidget):
|
||||
if self.char_filter_scope_toggle_button:
|
||||
if self.char_filter_scope == CHAR_SCOPE_FILES:
|
||||
self.char_filter_scope_toggle_button.setText("Filter: Files")
|
||||
self.char_filter_scope_toggle_button.setToolTip(
|
||||
"Current Scope: Files\n\n"
|
||||
"Filters individual files by name. A post is kept if any file matches.\n"
|
||||
"Only matching files from that post are downloaded.\n"
|
||||
"Example: Filter 'Tifa'. File 'Tifa_artwork.jpg' matches and is downloaded.\n"
|
||||
"Folder Naming: Uses character from matching filename.\n\n"
|
||||
"Click to cycle to: Title"
|
||||
)
|
||||
elif self.char_filter_scope == CHAR_SCOPE_TITLE:
|
||||
self.char_filter_scope_toggle_button.setText("Filter: Title")
|
||||
self.char_filter_scope_toggle_button.setToolTip(
|
||||
"Current Scope: Title\n\n"
|
||||
"Filters entire posts by their title. All files from a matching post are downloaded.\n"
|
||||
"Example: Filter 'Aerith'. Post titled 'Aerith's Garden' matches; all its files are downloaded.\n"
|
||||
"Folder Naming: Uses character from matching post title.\n\n"
|
||||
"Click to cycle to: Both"
|
||||
)
|
||||
elif self.char_filter_scope == CHAR_SCOPE_BOTH:
|
||||
self.char_filter_scope_toggle_button.setText("Filter: Both")
|
||||
self.char_filter_scope_toggle_button.setToolTip(
|
||||
"Current Scope: Both (Title then Files)\n\n"
|
||||
"1. Checks post title: If matches, all files from post are downloaded.\n"
|
||||
"2. If title doesn't match, checks filenames: If any file matches, only that file is downloaded.\n"
|
||||
"Example: Filter 'Cloud'.\n"
|
||||
" - Post 'Cloud Strife' (title match) -> all files downloaded.\n"
|
||||
" - Post 'Bike Chase' with 'Cloud_fenrir.jpg' (file match) -> only 'Cloud_fenrir.jpg' downloaded.\n"
|
||||
"Folder Naming: Prioritizes title match, then file match.\n\n"
|
||||
"Click to cycle to: Comments"
|
||||
)
|
||||
elif self.char_filter_scope == CHAR_SCOPE_COMMENTS:
|
||||
self.char_filter_scope_toggle_button.setText("Filter: Comments (Beta)")
|
||||
self.char_filter_scope_toggle_button.setToolTip(
|
||||
"Current Scope: Comments (Beta - Files first, then Comments as fallback)\n\n"
|
||||
"1. Checks filenames: If any file in the post matches the filter, the entire post is downloaded. Comments are NOT checked for this filter term.\n"
|
||||
"2. If no file matches, THEN checks post comments: If a comment matches, the entire post is downloaded.\n"
|
||||
"Example: Filter 'Barret'.\n"
|
||||
" - Post A: Files 'Barret_gunarm.jpg', 'other.png'. File 'Barret_gunarm.jpg' matches. All files from Post A downloaded. Comments not checked for 'Barret'.\n"
|
||||
" - Post B: Files 'dyne.jpg', 'weapon.gif'. Comments: '...a drawing of Barret Wallace...'. No file match for 'Barret'. Comment matches. All files from Post B downloaded.\n"
|
||||
"Folder Naming: Prioritizes character from file match, then from comment match.\n\n"
|
||||
"Click to cycle to: Files"
|
||||
)
|
||||
else:
|
||||
self.char_filter_scope_toggle_button.setText("Filter: Unknown")
|
||||
self.char_filter_scope_toggle_button.setToolTip(
|
||||
"Current Scope: Unknown\n\n"
|
||||
"The character filter scope is in an unknown state. Please cycle or reset.\n\n"
|
||||
"Click to cycle to: Files"
|
||||
)
|
||||
|
||||
def _cycle_char_filter_scope(self):
|
||||
# Cycle: Files -> Title -> Both -> Comments -> Files
|
||||
if self.char_filter_scope == CHAR_SCOPE_FILES:
|
||||
self.char_filter_scope = CHAR_SCOPE_TITLE
|
||||
elif self.char_filter_scope == CHAR_SCOPE_TITLE:
|
||||
self.char_filter_scope = CHAR_SCOPE_BOTH
|
||||
elif self.char_filter_scope == CHAR_SCOPE_BOTH:
|
||||
self.char_filter_scope = CHAR_SCOPE_COMMENTS
|
||||
elif self.char_filter_scope == CHAR_SCOPE_COMMENTS:
|
||||
self.char_filter_scope = CHAR_SCOPE_FILES
|
||||
else:
|
||||
self.char_filter_scope = CHAR_SCOPE_FILES
|
||||
self.char_filter_scope = CHAR_SCOPE_FILES # Default fallback
|
||||
|
||||
self._update_char_filter_scope_button_text()
|
||||
self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope)
|
||||
self.log_signal.emit(f"ℹ️ Character filter scope changed to: '{self.char_filter_scope}'")
|
||||
|
||||
|
||||
|
||||
def add_new_character(self):
|
||||
global KNOWN_NAMES, clean_folder_name
|
||||
name_to_add = self.new_char_input.text().strip()
|
||||
@@ -1751,18 +1804,34 @@ class DownloaderApp(QWidget):
|
||||
if self.manga_filename_style == STYLE_POST_TITLE:
|
||||
self.manga_rename_toggle_button.setText("Name: Post Title")
|
||||
self.manga_rename_toggle_button.setToolTip(
|
||||
"Manga files: First file named by post title. Subsequent files in same post keep original names.\n"
|
||||
"Click to change to original file names for all files."
|
||||
"Manga Filename Style: Post Title\n\n"
|
||||
"When Manga/Comic Mode is active for a creator feed:\n"
|
||||
"- The *first* file in a post is named after the post's title (e.g., \"MyMangaChapter1.jpg\").\n"
|
||||
"- Any *subsequent* files within the *same post* will retain their original filenames (e.g., \"page_02.png\", \"bonus_art.jpg\").\n"
|
||||
"- This is generally recommended for better organization of sequential content.\n"
|
||||
"- Example: Post \"Chapter 1: The Beginning\" with files \"001.jpg\", \"002.jpg\".\n"
|
||||
" Downloads as: \"Chapter 1 The Beginning.jpg\", \"002.jpg\".\n\n"
|
||||
"Click to change to: Original File Name"
|
||||
)
|
||||
elif self.manga_filename_style == STYLE_ORIGINAL_NAME:
|
||||
self.manga_rename_toggle_button.setText("Name: Original File")
|
||||
self.manga_rename_toggle_button.setToolTip(
|
||||
"Manga files will keep their original names as provided by the site (e.g., 001.jpg, page_01.png).\n"
|
||||
"Click to change to post title based naming for the first file."
|
||||
"Manga Filename Style: Original File Name\n\n"
|
||||
"When Manga/Comic Mode is active for a creator feed:\n"
|
||||
"- *All* files in a post will attempt to keep their original filenames as provided by the site (e.g., \"001.jpg\", \"page_02.png\").\n"
|
||||
"- This can be useful if original names are already well-structured and sequential.\n"
|
||||
"- If original names are inconsistent, using \"Post Title\" style is often better.\n"
|
||||
"- Example: Post \"Chapter 1: The Beginning\" with files \"001.jpg\", \"002.jpg\".\n"
|
||||
" Downloads as: \"001.jpg\", \"002.jpg\".\n\n"
|
||||
"Click to change to: Post Title"
|
||||
)
|
||||
else:
|
||||
self.manga_rename_toggle_button.setText("Name: Unknown Style")
|
||||
self.manga_rename_toggle_button.setToolTip("Manga filename style is in an unknown state.")
|
||||
self.manga_rename_toggle_button.setToolTip(
|
||||
"Manga Filename Style: Unknown\n\n"
|
||||
"The manga filename style is in an unknown state. Please cycle or reset.\n\n"
|
||||
"Click to change to: Post Title"
|
||||
)
|
||||
|
||||
|
||||
def _toggle_manga_filename_style(self):
|
||||
@@ -1816,11 +1885,6 @@ class DownloaderApp(QWidget):
|
||||
# Visible if manga mode is on AND not in "Only Links" or "Only Archives" mode
|
||||
self.manga_rename_toggle_button.setVisible(manga_mode_effectively_on and not (is_only_links_mode or is_only_archives_mode))
|
||||
|
||||
if hasattr(self, 'duplicate_mode_toggle_button'):
|
||||
# Visible if manga mode is OFF AND not in "Only Links" or "Only Archives" mode
|
||||
self.duplicate_mode_toggle_button.setVisible(
|
||||
not manga_mode_effectively_on and not (is_only_links_mode or is_only_archives_mode)
|
||||
)
|
||||
|
||||
if manga_mode_effectively_on:
|
||||
if self.page_range_label: self.page_range_label.setEnabled(False)
|
||||
@@ -1909,12 +1973,11 @@ class DownloaderApp(QWidget):
|
||||
|
||||
raw_skip_words = self.skip_words_input.text().strip()
|
||||
skip_words_list = [word.strip().lower() for word in raw_skip_words.split(',') if word.strip()]
|
||||
current_skip_words_scope = self.get_skip_words_scope()
|
||||
|
||||
raw_remove_filename_words = self.remove_from_filename_input.text().strip() if hasattr(self, 'remove_from_filename_input') else ""
|
||||
effective_duplicate_file_mode = self.duplicate_file_mode # Start with user's choice
|
||||
allow_multipart = self.allow_multipart_download_setting # Use the internal setting
|
||||
remove_from_filename_words_list = [word.strip() for word in raw_remove_filename_words.split(',') if word.strip()]
|
||||
current_skip_words_scope = self.get_skip_words_scope()
|
||||
current_char_filter_scope = self.get_char_filter_scope()
|
||||
manga_mode_is_checked = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False
|
||||
|
||||
@@ -1967,10 +2030,8 @@ class DownloaderApp(QWidget):
|
||||
elif manga_mode:
|
||||
start_page, end_page = None, None
|
||||
|
||||
# effective_duplicate_file_mode will be self.duplicate_file_mode (UI button's state).
|
||||
# Manga Mode specific duplicate handling is now managed entirely within downloader_utils.py
|
||||
self.external_link_queue.clear(); self.extracted_links_cache = []; self._is_processing_external_link_queue = False; self._current_link_post_title = None
|
||||
self.all_kept_original_filenames = []
|
||||
|
||||
raw_character_filters_text = self.character_input.text().strip()
|
||||
|
||||
@@ -2130,6 +2191,7 @@ class DownloaderApp(QWidget):
|
||||
self.total_posts_to_process = 0; self.processed_posts_count = 0; self.download_counter = 0; self.skip_counter = 0
|
||||
self.progress_label.setText("Progress: Initializing...")
|
||||
|
||||
|
||||
effective_num_post_workers = 1
|
||||
effective_num_file_threads_per_worker = 1
|
||||
|
||||
@@ -2179,8 +2241,7 @@ class DownloaderApp(QWidget):
|
||||
f" Skip Words Scope: {current_skip_words_scope.capitalize()}",
|
||||
f" Remove Words from Filename: {', '.join(remove_from_filename_words_list) if remove_from_filename_words_list else 'None'}",
|
||||
f" Compress Images: {'Enabled' if compress_images else 'Disabled'}",
|
||||
f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}",
|
||||
f" Multi-part Download: {'Enabled' if allow_multipart else 'Disabled'}"
|
||||
f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}" # Removed duplicate file handling log
|
||||
])
|
||||
else:
|
||||
log_messages.append(f" Mode: Extracting Links Only")
|
||||
@@ -2192,7 +2253,6 @@ class DownloaderApp(QWidget):
|
||||
log_messages.append(f" ↳ Manga Filename Style: {'Post Title Based' if self.manga_filename_style == STYLE_POST_TITLE else 'Original File Name'}")
|
||||
if filter_character_list_to_pass:
|
||||
log_messages.append(f" ↳ Manga Character Filter (for naming/folder): {', '.join(item['name'] for item in filter_character_list_to_pass)}")
|
||||
log_messages.append(f" ↳ Char Filter Scope (Manga): {current_char_filter_scope.capitalize()}")
|
||||
log_messages.append(f" ↳ Manga Duplicates: Will be renamed with numeric suffix if names clash (e.g., _1, _2).")
|
||||
|
||||
should_use_multithreading_for_posts = use_multithreading_enabled_by_checkbox and not post_id_from_url
|
||||
@@ -2242,8 +2302,8 @@ class DownloaderApp(QWidget):
|
||||
'signals': self.worker_signals,
|
||||
'manga_filename_style': self.manga_filename_style,
|
||||
'num_file_threads_for_worker': effective_num_file_threads_per_worker,
|
||||
'allow_multipart_download': allow_multipart, # Corrected from previous thought
|
||||
'duplicate_file_mode': effective_duplicate_file_mode # Pass the potentially overridden mode
|
||||
'allow_multipart_download': allow_multipart,
|
||||
# 'duplicate_file_mode' and session-wide tracking removed
|
||||
}
|
||||
|
||||
try:
|
||||
@@ -2258,13 +2318,11 @@ class DownloaderApp(QWidget):
|
||||
'use_subfolders', 'use_post_subfolders', 'custom_folder_name',
|
||||
'compress_images', 'download_thumbnails', 'service', 'user_id',
|
||||
'downloaded_files', 'downloaded_file_hashes', 'remove_from_filename_words_list',
|
||||
'downloaded_files_lock', 'downloaded_file_hashes_lock',
|
||||
'skip_words_list', 'skip_words_scope', 'char_filter_scope',
|
||||
'show_external_links', 'extract_links_only',
|
||||
'num_file_threads_for_worker',
|
||||
'skip_current_file_flag',
|
||||
'start_page', 'end_page', 'target_post_id_from_initial_url',
|
||||
'manga_mode_active', 'unwanted_keywords', 'manga_filename_style', 'duplicate_file_mode',
|
||||
'downloaded_files_lock', 'downloaded_file_hashes_lock',
|
||||
'skip_words_list', 'skip_words_scope', 'char_filter_scope',
|
||||
'show_external_links', 'extract_links_only', 'num_file_threads_for_worker',
|
||||
'start_page', 'end_page', 'target_post_id_from_initial_url', 'duplicate_file_mode',
|
||||
'manga_mode_active', 'unwanted_keywords', 'manga_filename_style',
|
||||
'allow_multipart_download'
|
||||
]
|
||||
args_template['skip_current_file_flag'] = None
|
||||
@@ -2385,18 +2443,17 @@ class DownloaderApp(QWidget):
|
||||
'downloaded_files_lock', 'downloaded_file_hashes_lock', 'remove_from_filename_words_list',
|
||||
'skip_words_list', 'skip_words_scope', 'char_filter_scope',
|
||||
'show_external_links', 'extract_links_only', 'allow_multipart_download',
|
||||
'num_file_threads',
|
||||
'skip_current_file_flag',
|
||||
'num_file_threads', 'skip_current_file_flag',
|
||||
'manga_mode_active', 'manga_filename_style'
|
||||
]
|
||||
# Ensure 'allow_multipart_download' is also considered for optional keys if it has a default in PostProcessorWorker
|
||||
ppw_optional_keys_with_defaults = {
|
||||
'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'remove_from_filename_words_list',
|
||||
'show_external_links', 'extract_links_only',
|
||||
'num_file_threads', 'skip_current_file_flag', 'manga_mode_active', 'manga_filename_style'
|
||||
'show_external_links', 'extract_links_only', 'duplicate_file_mode', # Added duplicate_file_mode here
|
||||
'num_file_threads', 'skip_current_file_flag', 'manga_mode_active', 'manga_filename_style',
|
||||
'processed_base_filenames_session_wide', 'processed_base_filenames_session_wide_lock' # Add these
|
||||
}
|
||||
|
||||
|
||||
|
||||
for post_data_item in all_posts_data:
|
||||
if self.cancellation_event.is_set(): break
|
||||
if not isinstance(post_data_item, dict):
|
||||
@@ -2464,12 +2521,10 @@ class DownloaderApp(QWidget):
|
||||
widgets_to_toggle = [ self.download_btn, self.link_input, self.radio_all, self.radio_images, self.radio_videos, self.radio_only_links,
|
||||
self.skip_zip_checkbox, self.skip_rar_checkbox, self.use_subfolders_checkbox, self.compress_images_checkbox,
|
||||
self.download_thumbnails_checkbox, self.use_multithreading_checkbox, self.skip_words_input, self.character_search_input,
|
||||
self.new_char_input, self.add_char_button, self.delete_char_button,
|
||||
self.char_filter_scope_toggle_button,
|
||||
self.start_page_input, self.end_page_input,
|
||||
self.page_range_label, self.to_label, self.character_input, self.custom_folder_input, self.custom_folder_label, self.remove_from_filename_input,
|
||||
self.reset_button, self.manga_mode_checkbox, self.manga_rename_toggle_button, self.multipart_toggle_button,
|
||||
self.skip_scope_toggle_button
|
||||
self.new_char_input, self.add_char_button, self.delete_char_button, self.char_filter_scope_toggle_button, # duplicate_file_mode_toggle_button removed
|
||||
self.start_page_input, self.end_page_input, self.page_range_label, self.to_label,
|
||||
self.character_input, self.custom_folder_input, self.custom_folder_label, self.remove_from_filename_input,
|
||||
self.reset_button, self.manga_mode_checkbox, self.manga_rename_toggle_button, self.multipart_toggle_button, self.skip_scope_toggle_button
|
||||
]
|
||||
|
||||
for widget in widgets_to_toggle:
|
||||
@@ -2663,15 +2718,10 @@ class DownloaderApp(QWidget):
|
||||
self.settings.setValue(SKIP_WORDS_SCOPE_KEY, self.skip_words_scope)
|
||||
self._update_skip_scope_button_text()
|
||||
|
||||
self.char_filter_scope = CHAR_SCOPE_TITLE
|
||||
self.char_filter_scope = CHAR_SCOPE_FILES # Default to Files on full reset
|
||||
self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope)
|
||||
self._update_char_filter_scope_button_text()
|
||||
|
||||
self.duplicate_file_mode = DUPLICATE_MODE_DELETE # Reset to default (Delete)
|
||||
self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode)
|
||||
|
||||
self._update_duplicate_mode_button_text()
|
||||
|
||||
self.settings.sync()
|
||||
self._update_manga_filename_style_button_text()
|
||||
self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)
|
||||
@@ -2693,12 +2743,8 @@ class DownloaderApp(QWidget):
|
||||
|
||||
self.skip_words_scope = SKIP_SCOPE_POSTS
|
||||
self._update_skip_scope_button_text()
|
||||
self.char_filter_scope = CHAR_SCOPE_TITLE
|
||||
self.char_filter_scope = CHAR_SCOPE_FILES # Default to Files
|
||||
self._update_char_filter_scope_button_text()
|
||||
self.duplicate_file_mode = DUPLICATE_MODE_DELETE # Default to DELETE
|
||||
self._update_duplicate_mode_button_text()
|
||||
|
||||
|
||||
self._handle_filter_mode_change(self.radio_all, True)
|
||||
self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked())
|
||||
self.filter_character_list("")
|
||||
@@ -2728,6 +2774,26 @@ class DownloaderApp(QWidget):
|
||||
if hasattr(self, 'multipart_toggle_button'):
|
||||
text = "Multi-part: ON" if self.allow_multipart_download_setting else "Multi-part: OFF"
|
||||
self.multipart_toggle_button.setText(text)
|
||||
if self.allow_multipart_download_setting:
|
||||
self.multipart_toggle_button.setToolTip(
|
||||
"Multi-part Download: ON\n\n"
|
||||
"Enables downloading large files in multiple segments (parts) simultaneously.\n"
|
||||
"- Can significantly speed up downloads for *single large files* (e.g., videos, large archives) if the server supports it.\n"
|
||||
"- May increase CPU/network usage.\n"
|
||||
"- For creator feeds with many *small files* (e.g., images), this might not offer speed benefits and could make the UI/log feel busy.\n"
|
||||
"- If a multi-part download fails for a file, it will automatically retry with a single stream.\n"
|
||||
"- Example: A 500MB video might be downloaded in 5 parts of 100MB each, concurrently.\n\n"
|
||||
"Click to turn OFF (use single-stream for all files)."
|
||||
)
|
||||
else:
|
||||
self.multipart_toggle_button.setToolTip(
|
||||
"Multi-part Download: OFF\n\n"
|
||||
"All files will be downloaded using a single connection (stream).\n"
|
||||
"- This is generally stable and works well for most scenarios, especially for feeds with many smaller files.\n"
|
||||
"- Large files will be downloaded sequentially in one go.\n"
|
||||
"- Example: A 500MB video will be downloaded as one continuous stream.\n\n"
|
||||
"Click to turn ON (enable multi-part for large files, see advisory on click)."
|
||||
)
|
||||
|
||||
def _toggle_multipart_mode(self):
|
||||
# If currently OFF, and user is trying to turn it ON
|
||||
@@ -2762,23 +2828,6 @@ class DownloaderApp(QWidget):
|
||||
self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting)
|
||||
self.log_signal.emit(f"ℹ️ Multi-part download set to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'}")
|
||||
|
||||
def _update_duplicate_mode_button_text(self):
|
||||
if hasattr(self, 'duplicate_mode_toggle_button'):
|
||||
if self.duplicate_file_mode == DUPLICATE_MODE_DELETE:
|
||||
self.duplicate_mode_toggle_button.setText("Duplicates: Delete")
|
||||
elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER:
|
||||
self.duplicate_mode_toggle_button.setText("Duplicates: Move")
|
||||
else: # Should not happen
|
||||
self.duplicate_mode_toggle_button.setText("Duplicates: Move") # Default to Move if unknown
|
||||
|
||||
def _cycle_duplicate_mode(self):
|
||||
if self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER:
|
||||
self.duplicate_file_mode = DUPLICATE_MODE_DELETE
|
||||
else: # If it's DELETE or unknown, cycle back to MOVE
|
||||
self.duplicate_file_mode = DUPLICATE_MODE_MOVE_TO_SUBFOLDER
|
||||
self._update_duplicate_mode_button_text()
|
||||
self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode)
|
||||
self.log_signal.emit(f"ℹ️ Duplicate file handling mode changed to: '{self.duplicate_file_mode.capitalize()}'")
|
||||
|
||||
if __name__ == '__main__':
|
||||
import traceback
|
||||
|
||||
Reference in New Issue
Block a user