diff --git a/Read.png b/Read.png
new file mode 100644
index 0000000..85e3c9f
Binary files /dev/null and b/Read.png differ
diff --git a/downloader_utils.py b/downloader_utils.py
index c834d01..a4de183 100644
--- a/downloader_utils.py
+++ b/downloader_utils.py
@@ -41,8 +41,12 @@ CHAR_SCOPE_FILES = "files"
CHAR_SCOPE_BOTH = "both"
CHAR_SCOPE_COMMENTS = "comments"
+FILE_DOWNLOAD_STATUS_SUCCESS = "success"
+FILE_DOWNLOAD_STATUS_SKIPPED = "skipped"
+FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER = "failed_retry_later"
+
fastapi_app = None
-KNOWN_NAMES = []
+KNOWN_NAMES = [] # This will now store dicts: {'name': str, 'is_group': bool, 'aliases': list[str]}
MIN_SIZE_FOR_MULTIPART_DOWNLOAD = 10 * 1024 * 1024 # 10 MB - Stays the same
MAX_PARTS_FOR_MULTIPART_DOWNLOAD = 15 # Max concurrent connections for a single file
@@ -87,7 +91,19 @@ def clean_folder_name(name):
cleaned = re.sub(r'[^\w\s\-\_\.\(\)]', '', name)
cleaned = cleaned.strip()
cleaned = re.sub(r'\s+', ' ', cleaned)
- return cleaned if cleaned else "untitled_folder"
+
+ if not cleaned: # If empty after initial cleaning
+ return "untitled_folder"
+
+ # Strip all trailing dots and spaces.
+ # This handles cases like "folder...", "folder. .", "folder . ." -> "folder"
+ temp_name = cleaned
+ while len(temp_name) > 0 and (temp_name.endswith('.') or temp_name.endswith(' ')):
+ temp_name = temp_name[:-1]
+
+ # If stripping all trailing dots/spaces made it empty (e.g., original was "."), use default
+ # Also handles if the original name was just spaces and became empty.
+ return temp_name if temp_name else "untitled_folder"
def clean_filename(name):
@@ -120,20 +136,33 @@ def extract_folder_name_from_title(title, unwanted_keywords):
def match_folders_from_title(title, names_to_match, unwanted_keywords):
+ """
+ Matches folder names from a title based on a list of known name objects.
+ Each name object in names_to_match is expected to be a dict:
+ {'name': 'PrimaryFolderName', 'aliases': ['alias1', 'alias2', ...]}
+ """
if not title or not names_to_match: return []
title_lower = title.lower()
matched_cleaned_names = set()
- sorted_names_to_match = sorted(names_to_match, key=len, reverse=True)
+ # Sort by the length of the primary name for matching longer, more specific names first.
+ # This is a heuristic; alias length might also be a factor but primary name length is simpler.
+ sorted_name_objects = sorted(names_to_match, key=lambda x: len(x.get("name", "")), reverse=True)
- for name in sorted_names_to_match:
- name_lower = name.lower()
- if not name_lower: continue
+ for name_obj in sorted_name_objects:
+ primary_folder_name = name_obj.get("name")
+ aliases = name_obj.get("aliases", [])
+ if not primary_folder_name or not aliases:
+ continue
- pattern = r'\b' + re.escape(name_lower) + r'\b'
- if re.search(pattern, title_lower):
- cleaned_name_for_folder = clean_folder_name(name)
- if cleaned_name_for_folder.lower() not in unwanted_keywords:
- matched_cleaned_names.add(cleaned_name_for_folder)
+ for alias in aliases:
+ alias_lower = alias.lower()
+ if not alias_lower: continue
+ pattern = r'\b' + re.escape(alias_lower) + r'\b'
+ if re.search(pattern, title_lower):
+ cleaned_primary_name = clean_folder_name(primary_folder_name)
+ if cleaned_primary_name.lower() not in unwanted_keywords:
+ matched_cleaned_names.add(cleaned_primary_name)
+ break # Found a match for this primary name via one of its aliases
return sorted(list(matched_cleaned_names))
@@ -202,11 +231,20 @@ def extract_post_info(url_string):
return None, None, None
-def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None):
+def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None):
if cancellation_event and cancellation_event.is_set():
logger(" Fetch cancelled before request.")
raise RuntimeError("Fetch operation cancelled by user.")
+ if pause_event and pause_event.is_set():
+ logger(" Post fetching paused...")
+ while pause_event.is_set():
+ if cancellation_event and cancellation_event.is_set():
+ logger(" Post fetching cancelled while paused.")
+ raise RuntimeError("Fetch operation cancelled by user.")
+ time.sleep(0.5)
+ logger(" Post fetching resumed.")
+
paginated_url = f'{api_url_base}?o={offset}'
logger(f" Fetching: {paginated_url} (Page approx. {offset // 50 + 1})")
try:
@@ -228,11 +266,20 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
except Exception as e:
raise RuntimeError(f"Unexpected error fetching offset {offset} ({paginated_url}): {e}")
-def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None):
+def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None, pause_event=None):
if cancellation_event and cancellation_event.is_set():
logger(" Comment fetch cancelled before request.")
raise RuntimeError("Comment fetch operation cancelled by user.")
+ if pause_event and pause_event.is_set():
+ logger(" Comment fetching paused...")
+ while pause_event.is_set():
+ if cancellation_event and cancellation_event.is_set():
+ logger(" Comment fetching cancelled while paused.")
+ raise RuntimeError("Comment fetch operation cancelled by user.")
+ time.sleep(0.5)
+ logger(" Comment fetching resumed.")
+
comments_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}/comments"
logger(f" Fetching comments: {comments_api_url}")
try:
@@ -254,7 +301,7 @@ def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger,
except Exception as e:
raise RuntimeError(f"Unexpected error fetching comments for post {post_id} ({comments_api_url}): {e}")
-def download_from_api(api_url_input, logger=print, start_page=None, end_page=None, manga_mode=False, cancellation_event=None):
+def download_from_api(api_url_input, logger=print, start_page=None, end_page=None, manga_mode=False, cancellation_event=None, pause_event=None):
headers = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}
service, user_id, target_post_id = extract_post_info(api_url_input)
@@ -286,11 +333,19 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non
all_posts_for_manga_mode = []
current_offset_manga = 0
while True:
+ if pause_event and pause_event.is_set():
+ logger(" Manga mode post fetching paused...")
+ while pause_event.is_set():
+ if cancellation_event and cancellation_event.is_set():
+ logger(" Manga mode post fetching cancelled while paused.")
+ break
+ time.sleep(0.5)
+ if not (cancellation_event and cancellation_event.is_set()): logger(" Manga mode post fetching resumed.")
if cancellation_event and cancellation_event.is_set():
logger(" Manga mode post fetching cancelled.")
break
try:
- posts_batch_manga = fetch_posts_paginated(api_base_url, headers, current_offset_manga, logger, cancellation_event)
+ posts_batch_manga = fetch_posts_paginated(api_base_url, headers, current_offset_manga, logger, cancellation_event, pause_event)
if not isinstance(posts_batch_manga, list):
logger(f"❌ API Error (Manga Mode): Expected list of posts, got {type(posts_batch_manga)}.")
break
@@ -357,6 +412,14 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non
logger(f" Starting from page {current_page_num} (calculated offset {current_offset}).")
while True:
+ if pause_event and pause_event.is_set():
+ logger(" Post fetching loop paused...")
+ while pause_event.is_set():
+ if cancellation_event and cancellation_event.is_set():
+ logger(" Post fetching loop cancelled while paused.")
+ break
+ time.sleep(0.5)
+ if not (cancellation_event and cancellation_event.is_set()): logger(" Post fetching loop resumed.")
if cancellation_event and cancellation_event.is_set():
logger(" Post fetching loop cancelled.")
break
@@ -369,7 +432,7 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non
break
try:
- posts_batch = fetch_posts_paginated(api_base_url, headers, current_offset, logger, cancellation_event)
+ posts_batch = fetch_posts_paginated(api_base_url, headers, current_offset, logger, cancellation_event, pause_event)
if not isinstance(posts_batch, list):
logger(f"❌ API Error: Expected list of posts, got {type(posts_batch)} at page {current_page_num} (offset {current_offset}).")
break
@@ -453,10 +516,10 @@ class PostProcessorWorker:
filter_character_list, emitter, # Changed signals to emitter
unwanted_keywords, filter_mode, skip_zip, skip_rar,
use_subfolders, use_post_subfolders, target_post_id_from_initial_url, custom_folder_name,
- compress_images, download_thumbnails, service, user_id,
+ compress_images, download_thumbnails, service, user_id, pause_event, # Added pause_event
api_url_input, cancellation_event,
downloaded_files, downloaded_file_hashes, downloaded_files_lock, downloaded_file_hashes_lock,
- skip_words_list=None,
+ dynamic_character_filter_holder=None, skip_words_list=None, # Added dynamic_character_filter_holder
skip_words_scope=SKIP_SCOPE_FILES,
show_external_links=False,
extract_links_only=False,
@@ -471,7 +534,8 @@ class PostProcessorWorker:
self.post = post_data
self.download_root = download_root
self.known_names = known_names
- self.filter_character_list_objects = filter_character_list if filter_character_list else []
+ self.filter_character_list_objects_initial = filter_character_list if filter_character_list else [] # Store initial
+ self.dynamic_filter_holder = dynamic_character_filter_holder # Store the holder
self.unwanted_keywords = unwanted_keywords if unwanted_keywords is not None else set()
self.filter_mode = filter_mode
self.skip_zip = skip_zip
@@ -486,6 +550,7 @@ class PostProcessorWorker:
self.user_id = user_id
self.api_url_input = api_url_input
self.cancellation_event = cancellation_event
+ self.pause_event = pause_event # Store pause_event
self.emitter = emitter # Store the emitter
if not self.emitter:
# This case should ideally be prevented by the caller
@@ -533,7 +598,18 @@ class PostProcessorWorker:
def check_cancel(self):
return self.cancellation_event.is_set()
- def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event,
+ def _check_pause(self, context_message="Operation"):
+ if self.pause_event and self.pause_event.is_set():
+ self.logger(f" {context_message} paused...")
+ while self.pause_event.is_set(): # Loop while pause_event is set
+ if self.check_cancel():
+ self.logger(f" {context_message} cancelled while paused.")
+ return True # Indicates cancellation occurred
+ time.sleep(0.5)
+ if not self.check_cancel(): self.logger(f" {context_message} resumed.")
+ return False # Not cancelled during pause
+
+ def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event, # skip_event is threading.Event
# emitter_for_file_ops, # This will be self.emitter
post_title="", file_index_in_post=0, num_files_in_this_post=1,
manga_date_file_counter_ref=None): # Added manga_date_file_counter_ref
@@ -541,6 +617,22 @@ class PostProcessorWorker:
final_filename_saved_for_return = ""
# target_folder_path is the base character/post folder.
+ def _get_current_character_filters(self):
+ if self.dynamic_filter_holder:
+ return self.dynamic_filter_holder.get_filters()
+ return self.filter_character_list_objects_initial
+
+ def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event,
+ # emitter_for_file_ops, # This will be self.emitter
+ post_title="", file_index_in_post=0, num_files_in_this_post=1, # Added manga_date_file_counter_ref
+ manga_date_file_counter_ref=None,
+ forced_filename_override=None): # New for retries
+ was_original_name_kept_flag = False
+ final_filename_saved_for_return = ""
+ retry_later_details = None # For storing info if retryable failure
+ # target_folder_path is the base character/post folder.
+
+ if self._check_pause(f"File download prep for '{file_info.get('name', 'unknown file')}'"): return 0, 1, "", False
if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1, "", False
file_url = file_info.get('url')
@@ -549,84 +641,85 @@ class PostProcessorWorker:
# This is the ideal name for the file if it were to be saved in the main target_folder_path.
filename_to_save_in_main_path = ""
- if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_FILES or self.skip_words_scope == SKIP_SCOPE_BOTH):
- filename_to_check_for_skip_words = api_original_filename.lower()
- for skip_word in self.skip_words_list:
- if skip_word.lower() in filename_to_check_for_skip_words:
- self.logger(f" -> Skip File (Keyword in Original Name '{skip_word}'): '{api_original_filename}'. Scope: {self.skip_words_scope}")
- return 0, 1, api_original_filename, False
-
- original_filename_cleaned_base, original_ext = os.path.splitext(clean_filename(api_original_filename))
- if not original_ext.startswith('.'): original_ext = '.' + original_ext if original_ext else ''
-
- if self.manga_mode_active: # Note: duplicate_file_mode is overridden to "Delete" in main.py if manga_mode is on
- if self.manga_filename_style == STYLE_ORIGINAL_NAME:
- filename_to_save_in_main_path = clean_filename(api_original_filename)
- was_original_name_kept_flag = True
- elif self.manga_filename_style == STYLE_POST_TITLE:
- if post_title and post_title.strip():
- cleaned_post_title_base = clean_filename(post_title.strip())
- if num_files_in_this_post > 1:
- if file_index_in_post == 0:
- filename_to_save_in_main_path = f"{cleaned_post_title_base}{original_ext}"
- else:
- filename_to_save_in_main_path = clean_filename(api_original_filename)
- was_original_name_kept_flag = True
- else:
- filename_to_save_in_main_path = f"{cleaned_post_title_base}{original_ext}"
- else:
- filename_to_save_in_main_path = clean_filename(api_original_filename) # Fallback to original if no title
- self.logger(f"⚠️ Manga mode (Post Title Style): Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save_in_main_path}'.")
- elif self.manga_filename_style == STYLE_DATE_BASED:
- current_thread_name = threading.current_thread().name
- self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Manga Date Mode. Counter Ref ID: {id(manga_date_file_counter_ref)}, Value before access: {manga_date_file_counter_ref}")
-
- if manga_date_file_counter_ref is not None and len(manga_date_file_counter_ref) == 2:
- counter_val_for_filename = -1
- counter_lock = manga_date_file_counter_ref[1]
-
- self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Attempting to acquire lock. Counter value before lock: {manga_date_file_counter_ref[0]}")
- with counter_lock:
- self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Lock acquired. Counter value at lock acquisition: {manga_date_file_counter_ref[0]}")
- counter_val_for_filename = manga_date_file_counter_ref[0]
- # Increment is done here, under lock, before this number is used by another thread.
- # This number is now "reserved" for this file.
- # If this file download fails, this number is "lost" (sequence will have a gap). This is acceptable.
- manga_date_file_counter_ref[0] += 1
- self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Incremented counter. New counter value: {manga_date_file_counter_ref[0]}. Filename will use: {counter_val_for_filename}")
-
- filename_to_save_in_main_path = f"{counter_val_for_filename:03d}{original_ext}"
- self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Lock released. Generated filename: {filename_to_save_in_main_path}")
- else:
- self.logger(f"⚠️ Manga Date Mode: Counter ref not provided or malformed for '{api_original_filename}'. Using original. Ref: {manga_date_file_counter_ref}")
- # This log line below had a typo, fixed to reflect Date Mode context
- filename_to_save_in_main_path = clean_filename(api_original_filename)
- self.logger(f"⚠️ Manga mode (Date Based Style Fallback): Using cleaned original filename '{filename_to_save_in_main_path}' for post {original_post_id_for_log}.")
- else:
- self.logger(f"⚠️ Manga mode: Unknown filename style '{self.manga_filename_style}'. Defaulting to original filename for '{api_original_filename}'.")
- filename_to_save_in_main_path = clean_filename(api_original_filename)
-
- if not filename_to_save_in_main_path:
- filename_to_save_in_main_path = f"manga_file_{original_post_id_for_log}_{file_index_in_post + 1}{original_ext}"
- self.logger(f"⚠️ Manga mode: Generated filename was empty. Using generic fallback: '{filename_to_save_in_main_path}'.")
- was_original_name_kept_flag = False
- else:
- filename_to_save_in_main_path = clean_filename(api_original_filename)
- was_original_name_kept_flag = False
+ if forced_filename_override:
+ filename_to_save_in_main_path = forced_filename_override
+ self.logger(f" Retrying with forced filename: '{filename_to_save_in_main_path}'")
+ # was_original_name_kept_flag might need to be determined based on how forced_filename_override was created
+ else:
+ if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_FILES or self.skip_words_scope == SKIP_SCOPE_BOTH):
+ filename_to_check_for_skip_words = api_original_filename.lower()
+ for skip_word in self.skip_words_list:
+ if skip_word.lower() in filename_to_check_for_skip_words:
+ self.logger(f" -> Skip File (Keyword in Original Name '{skip_word}'): '{api_original_filename}'. Scope: {self.skip_words_scope}")
+ return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
- if self.remove_from_filename_words_list and filename_to_save_in_main_path:
- base_name_for_removal, ext_for_removal = os.path.splitext(filename_to_save_in_main_path)
- modified_base_name = base_name_for_removal
- for word_to_remove in self.remove_from_filename_words_list:
- if not word_to_remove: continue
- pattern = re.compile(re.escape(word_to_remove), re.IGNORECASE)
- modified_base_name = pattern.sub("", modified_base_name)
- modified_base_name = re.sub(r'[_.\s-]+', '_', modified_base_name)
- modified_base_name = modified_base_name.strip('_')
- if modified_base_name and modified_base_name != ext_for_removal.lstrip('.'):
- filename_to_save_in_main_path = modified_base_name + ext_for_removal
+ original_filename_cleaned_base, original_ext = os.path.splitext(clean_filename(api_original_filename))
+ if not original_ext.startswith('.'): original_ext = '.' + original_ext if original_ext else ''
+
+ if self.manga_mode_active: # Note: duplicate_file_mode is overridden to "Delete" in main.py if manga_mode is on
+ if self.manga_filename_style == STYLE_ORIGINAL_NAME:
+ filename_to_save_in_main_path = clean_filename(api_original_filename)
+ was_original_name_kept_flag = True
+ elif self.manga_filename_style == STYLE_POST_TITLE:
+ if post_title and post_title.strip():
+ cleaned_post_title_base = clean_filename(post_title.strip())
+ if num_files_in_this_post > 1:
+ if file_index_in_post == 0:
+ filename_to_save_in_main_path = f"{cleaned_post_title_base}{original_ext}"
+ else:
+ filename_to_save_in_main_path = clean_filename(api_original_filename)
+ was_original_name_kept_flag = True
+ else:
+ filename_to_save_in_main_path = f"{cleaned_post_title_base}{original_ext}"
+ else:
+ filename_to_save_in_main_path = clean_filename(api_original_filename) # Fallback to original if no title
+ self.logger(f"⚠️ Manga mode (Post Title Style): Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save_in_main_path}'.")
+ elif self.manga_filename_style == STYLE_DATE_BASED:
+ current_thread_name = threading.current_thread().name
+ # self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Manga Date Mode. Counter Ref ID: {id(manga_date_file_counter_ref)}, Value before access: {manga_date_file_counter_ref}")
+
+ if manga_date_file_counter_ref is not None and len(manga_date_file_counter_ref) == 2:
+ counter_val_for_filename = -1
+ counter_lock = manga_date_file_counter_ref[1]
+
+ # self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Attempting to acquire lock. Counter value before lock: {manga_date_file_counter_ref[0]}")
+ with counter_lock:
+ # self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Lock acquired. Counter value at lock acquisition: {manga_date_file_counter_ref[0]}")
+ counter_val_for_filename = manga_date_file_counter_ref[0]
+ manga_date_file_counter_ref[0] += 1
+ # self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Incremented counter. New counter value: {manga_date_file_counter_ref[0]}. Filename will use: {counter_val_for_filename}")
+
+ filename_to_save_in_main_path = f"{counter_val_for_filename:03d}{original_ext}"
+ # self.logger(f"DEBUG_COUNTER [{current_thread_name}, PostID: {original_post_id_for_log}]: File '{api_original_filename}'. Lock released. Generated filename: {filename_to_save_in_main_path}")
+ else:
+ self.logger(f"⚠️ Manga Date Mode: Counter ref not provided or malformed for '{api_original_filename}'. Using original. Ref: {manga_date_file_counter_ref}")
+ filename_to_save_in_main_path = clean_filename(api_original_filename)
+ self.logger(f"⚠️ Manga mode (Date Based Style Fallback): Using cleaned original filename '{filename_to_save_in_main_path}' for post {original_post_id_for_log}.")
+ else:
+ self.logger(f"⚠️ Manga mode: Unknown filename style '{self.manga_filename_style}'. Defaulting to original filename for '{api_original_filename}'.")
+ filename_to_save_in_main_path = clean_filename(api_original_filename)
+
+ if not filename_to_save_in_main_path:
+ filename_to_save_in_main_path = f"manga_file_{original_post_id_for_log}_{file_index_in_post + 1}{original_ext}"
+ self.logger(f"⚠️ Manga mode: Generated filename was empty. Using generic fallback: '{filename_to_save_in_main_path}'.")
+ was_original_name_kept_flag = False
else:
- filename_to_save_in_main_path = base_name_for_removal + ext_for_removal
+ filename_to_save_in_main_path = clean_filename(api_original_filename)
+ was_original_name_kept_flag = False
+
+ if self.remove_from_filename_words_list and filename_to_save_in_main_path:
+ base_name_for_removal, ext_for_removal = os.path.splitext(filename_to_save_in_main_path)
+ modified_base_name = base_name_for_removal
+ for word_to_remove in self.remove_from_filename_words_list:
+ if not word_to_remove: continue
+ pattern = re.compile(re.escape(word_to_remove), re.IGNORECASE)
+ modified_base_name = pattern.sub("", modified_base_name)
+ modified_base_name = re.sub(r'[_.\s-]+', '_', modified_base_name)
+ modified_base_name = modified_base_name.strip('_')
+ if modified_base_name and modified_base_name != ext_for_removal.lstrip('.'):
+ filename_to_save_in_main_path = modified_base_name + ext_for_removal
+ else:
+ filename_to_save_in_main_path = base_name_for_removal + ext_for_removal
if not self.download_thumbnails:
is_img_type = is_image(api_original_filename)
@@ -636,46 +729,33 @@ class PostProcessorWorker:
if self.filter_mode == 'archive':
if not is_archive_type:
self.logger(f" -> Filter Skip (Archive Mode): '{api_original_filename}' (Not an Archive).")
- return 0, 1, api_original_filename, False
+ return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
elif self.filter_mode == 'image':
if not is_img_type:
self.logger(f" -> Filter Skip: '{api_original_filename}' (Not Image).")
- return 0, 1, api_original_filename, False
+ return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
elif self.filter_mode == 'video':
if not is_vid_type:
self.logger(f" -> Filter Skip: '{api_original_filename}' (Not Video).")
- return 0, 1, api_original_filename, False
+ return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
if self.skip_zip and is_zip(api_original_filename):
self.logger(f" -> Pref Skip: '{api_original_filename}' (ZIP).")
- return 0, 1, api_original_filename, False
+ return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
if self.skip_rar and is_rar(api_original_filename):
self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR).")
- return 0, 1, api_original_filename, False
+ return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
- # --- Pre-Download Duplicate Handling (Standard Mode Only - Manga mode has its own suffixing) ---
- if not self.manga_mode_active:
- path_in_main_folder_check = os.path.join(target_folder_path, filename_to_save_in_main_path)
- is_duplicate_by_path = os.path.exists(path_in_main_folder_check) and \
- os.path.getsize(path_in_main_folder_check) > 0
-
- is_duplicate_by_session_name = False
- with self.downloaded_files_lock:
- if filename_to_save_in_main_path in self.downloaded_files:
- is_duplicate_by_session_name = True
-
- if is_duplicate_by_path or is_duplicate_by_session_name:
- reason = "Path Exists" if is_duplicate_by_path else "Session Name"
- self.logger(f" -> Skip Duplicate ({reason}, Pre-DL): '{filename_to_save_in_main_path}'. Skipping download.")
- with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Mark as processed
- return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
-
+ # --- Pre-Download Duplicate Handling ---
+ # Skipping based on filename before download is removed to allow suffixing for files from different posts.
+ # Hash-based skipping occurs after download.
+ # Physical path existence is handled by suffixing logic later.
# Ensure base target folder exists (used for .part file with multipart)
try:
os.makedirs(target_folder_path, exist_ok=True) # For .part file
except OSError as e:
self.logger(f" ❌ Critical error creating directory '{target_folder_path}': {e}. Skipping file '{api_original_filename}'.")
- return 0, 1, api_original_filename, False
+ return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None # Treat as skip
# --- Download Attempt ---
max_retries = 3
@@ -685,8 +765,10 @@ class PostProcessorWorker:
file_content_bytes = None
total_size_bytes = 0
download_successful_flag = False
+ last_exception_for_retry_later = None
for attempt_num_single_stream in range(max_retries + 1):
+ if self._check_pause(f"File download attempt for '{api_original_filename}'"): break
if self.check_cancel() or (skip_event and skip_event.is_set()): break
try:
if attempt_num_single_stream > 0:
@@ -703,7 +785,8 @@ class PostProcessorWorker:
attempt_multipart = (self.allow_multipart_download and MULTIPART_DOWNLOADER_AVAILABLE and
num_parts_for_file > 1 and total_size_bytes > MIN_SIZE_FOR_MULTIPART_DOWNLOAD and
'bytes' in response.headers.get('Accept-Ranges', '').lower())
-
+
+ if self._check_pause(f"Multipart decision for '{api_original_filename}'"): break # Check pause before potentially long operation
if attempt_multipart:
response.close()
self._emit_signal('file_download_status', False)
@@ -713,7 +796,8 @@ class PostProcessorWorker:
mp_success, mp_bytes, mp_hash, mp_file_handle = download_file_in_parts(
file_url, mp_save_path_base_for_part, total_size_bytes, num_parts_for_file, headers, api_original_filename,
emitter_for_multipart=self.emitter, # Pass the worker's emitter
- cancellation_event=self.cancellation_event, skip_event=skip_event, logger_func=self.logger
+ cancellation_event=self.cancellation_event, skip_event=skip_event, logger_func=self.logger,
+ pause_event=self.pause_event # Pass pause_event
)
if mp_success:
download_successful_flag = True
@@ -734,6 +818,7 @@ class PostProcessorWorker:
last_progress_time = time.time()
for chunk in response.iter_content(chunk_size=1 * 1024 * 1024):
+ if self._check_pause(f"Chunk download for '{api_original_filename}'"): break
if self.check_cancel() or (skip_event and skip_event.is_set()): break
if chunk:
file_content_buffer.write(chunk); md5_hasher.update(chunk)
@@ -742,7 +827,7 @@ class PostProcessorWorker:
self._emit_signal('file_progress', api_original_filename, (current_attempt_downloaded_bytes, total_size_bytes))
last_progress_time = time.time()
- if self.check_cancel() or (skip_event and skip_event.is_set()):
+ if self.check_cancel() or (skip_event and skip_event.is_set()) or (self.pause_event and self.pause_event.is_set()):
if file_content_buffer: file_content_buffer.close(); break
if current_attempt_downloaded_bytes > 0 or (total_size_bytes == 0 and response.status_code == 200):
@@ -756,9 +841,11 @@ class PostProcessorWorker:
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e:
self.logger(f" ❌ Download Error (Retryable): {api_original_filename}. Error: {e}")
+ last_exception_for_retry_later = e # Store this specific exception
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close()
except requests.exceptions.RequestException as e:
self.logger(f" ❌ Download Error (Non-Retryable): {api_original_filename}. Error: {e}")
+ last_exception_for_retry_later = e # Store this too
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close(); break
except Exception as e:
self.logger(f" ❌ Unexpected Download Error: {api_original_filename}: {e}\n{traceback.format_exc(limit=2)}")
@@ -770,16 +857,34 @@ class PostProcessorWorker:
final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes
self._emit_signal('file_progress', api_original_filename, (downloaded_size_bytes, final_total_for_progress))
- if self.check_cancel() or (skip_event and skip_event.is_set()):
+ if self.check_cancel() or (skip_event and skip_event.is_set()) or (self.pause_event and self.pause_event.is_set() and not download_successful_flag):
self.logger(f" ⚠️ Download process interrupted for {api_original_filename}.")
if file_content_bytes: file_content_bytes.close()
- return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
+ return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
if not download_successful_flag:
self.logger(f"❌ Download failed for '{api_original_filename}' after {max_retries + 1} attempts.")
if file_content_bytes: file_content_bytes.close()
- return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
+
+ # Check if this failure is one we want to mark for later retry
+ if isinstance(last_exception_for_retry_later, http.client.IncompleteRead):
+ self.logger(f" Marking '{api_original_filename}' for potential retry later due to IncompleteRead.")
+ retry_later_details = {
+ 'file_info': file_info,
+ 'target_folder_path': target_folder_path, # This is the base character/post folder
+ 'headers': headers, # Original headers
+ 'original_post_id_for_log': original_post_id_for_log,
+ 'post_title': post_title,
+ 'file_index_in_post': file_index_in_post,
+ 'num_files_in_this_post': num_files_in_this_post,
+ 'forced_filename_override': filename_to_save_in_main_path, # The name it was trying to save as
+ 'manga_mode_active_for_file': self.manga_mode_active, # Store context
+ 'manga_filename_style_for_file': self.manga_filename_style, # Store context
+ }
+ return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER, retry_later_details
+ return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None # Generic failure
+ if self._check_pause(f"Post-download hash check for '{api_original_filename}'"): return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
# --- Universal Post-Download Hash Check ---
with self.downloaded_file_hashes_lock:
if calculated_file_hash in self.downloaded_file_hashes:
@@ -791,8 +896,8 @@ class PostProcessorWorker:
part_file_to_remove = os.path.join(target_folder_path, filename_to_save_in_main_path + ".part")
if os.path.exists(part_file_to_remove):
try: os.remove(part_file_to_remove);
- except OSError: self.logger(f" -> Failed to remove .part file for hash duplicate: {part_file_to_remove}")
- return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
+ except OSError: self.logger(f" -> Failed to remove .part file for hash duplicate: {part_file_to_remove}") # type: ignore
+ return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
# --- Determine Save Location and Final Filename ---
effective_save_folder = target_folder_path # Default: main character/post folder
@@ -811,7 +916,7 @@ class PostProcessorWorker:
if not isinstance(file_content_bytes, BytesIO):
part_file_to_remove = os.path.join(target_folder_path, filename_to_save_in_main_path + ".part")
if os.path.exists(part_file_to_remove): os.remove(part_file_to_remove)
- return 0, 1, api_original_filename, False
+ return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
# --- Image Compression ---
# This operates on file_content_bytes (which is BytesIO or a file handle from multipart)
@@ -823,6 +928,7 @@ class PostProcessorWorker:
is_img_for_compress_check = is_image(api_original_filename)
if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024):
self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024*1024):.2f} MB)...")
+ if self._check_pause(f"Image compression for '{api_original_filename}'"): return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None # Allow pause before compression
try:
file_content_bytes.seek(0)
with Image.open(file_content_bytes) as img_obj:
@@ -860,7 +966,7 @@ class PostProcessorWorker:
if final_filename_on_disk != filename_after_compression: # Log if a suffix was applied
self.logger(f" Applied numeric suffix in '{os.path.basename(effective_save_folder)}': '{final_filename_on_disk}' (was '{filename_after_compression}')")
# else: for STYLE_DATE_BASED, final_filename_on_disk remains filename_after_compression.
-
+ if self._check_pause(f"File saving for '{final_filename_on_disk}'"): return 0, 1, final_filename_on_disk, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
# --- Save File ---
final_save_path = os.path.join(effective_save_folder, final_filename_on_disk)
@@ -893,14 +999,14 @@ class PostProcessorWorker:
final_filename_saved_for_return = final_filename_on_disk
self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{os.path.basename(effective_save_folder)}'")
# Session-wide base name tracking removed.
- time.sleep(0.05)
- return 1, 0, final_filename_saved_for_return, was_original_name_kept_flag
+ time.sleep(0.05) # Brief pause after successful save
+ return 1, 0, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SUCCESS, None
except Exception as save_err:
self.logger(f"❌ Save Fail for '{final_filename_on_disk}': {save_err}")
if os.path.exists(final_save_path):
try: os.remove(final_save_path);
except OSError: self.logger(f" -> Failed to remove partially saved file: {final_save_path}")
- return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag
+ return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None # Treat save fail as skip
finally:
# Ensure all handles are closed
if data_to_write_after_compression and hasattr(data_to_write_after_compression, 'close'):
@@ -914,9 +1020,15 @@ class PostProcessorWorker:
def process(self):
- if self.check_cancel(): return 0, 0, []
+ if self._check_pause(f"Post processing for ID {self.post.get('id', 'N/A')}"): return 0,0,[], []
+ if self.check_cancel(): return 0, 0, [], []
+ # Get the potentially updated character filters at the start of processing this post
+ current_character_filters = self._get_current_character_filters()
+ # self.logger(f"DEBUG: Post {post_id}, Worker using filters: {[(f['name'], f['aliases']) for f in current_character_filters]}")
+
kept_original_filenames_for_log = []
+ retryable_failures_this_post = [] # New list to store retryable failure details
total_downloaded_this_post = 0
total_skipped_this_post = 0
@@ -945,11 +1057,12 @@ class PostProcessorWorker:
post_is_candidate_by_file_char_match_in_comment_scope = False
char_filter_that_matched_file_in_comment_scope = None
char_filter_that_matched_comment = None
-
- if self.filter_character_list_objects and \
+
+ if current_character_filters and \
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH):
# self.logger(f" [Debug Title Match] Checking post title '{post_title}' against {len(self.filter_character_list_objects)} filter objects. Scope: {self.char_filter_scope}")
- for idx, filter_item_obj in enumerate(self.filter_character_list_objects):
+ if self._check_pause(f"Character title filter for post {post_id}"): return 0, num_potential_files_in_post, [], []
+ for idx, filter_item_obj in enumerate(current_character_filters):
if self.check_cancel(): break
# self.logger(f" [Debug Title Match] Filter obj #{idx}: {filter_item_obj}")
terms_to_check_for_title = list(filter_item_obj["aliases"])
@@ -992,14 +1105,14 @@ class PostProcessorWorker:
# --- End population of all_files_from_post_api_for_char_check ---
- if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
+ if current_character_filters and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
self.logger(f" [Char Scope: Comments] Phase 1: Checking post files for matches before comments for post ID '{post_id}'.")
+ if self._check_pause(f"File check (comments scope) for post {post_id}"): return 0, num_potential_files_in_post, [], []
for file_info_item in all_files_from_post_api_for_char_check: # Use the pre-populated list of file names
if self.check_cancel(): break
current_api_original_filename_for_check = file_info_item.get('_original_name_for_log')
if not current_api_original_filename_for_check: continue
-
- for filter_item_obj in self.filter_character_list_objects:
+ for filter_item_obj in current_character_filters:
terms_to_check = list(filter_item_obj["aliases"])
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check:
terms_to_check.append(filter_item_obj["name"])
@@ -1014,8 +1127,9 @@ class PostProcessorWorker:
if post_is_candidate_by_file_char_match_in_comment_scope: break
self.logger(f" [Char Scope: Comments] Phase 1 Result: post_is_candidate_by_file_char_match_in_comment_scope = {post_is_candidate_by_file_char_match_in_comment_scope}")
- if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
+ if current_character_filters and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
if not post_is_candidate_by_file_char_match_in_comment_scope:
+ if self._check_pause(f"Comment check for post {post_id}"): return 0, num_potential_files_in_post, [], []
self.logger(f" [Char Scope: Comments] Phase 2: No file match found. Checking post comments for post ID '{post_id}'.")
try:
parsed_input_url_for_comments = urlparse(self.api_url_input)
@@ -1026,7 +1140,7 @@ class PostProcessorWorker:
comments_data = fetch_post_comments(
api_domain_for_comments, self.service, self.user_id, post_id,
- headers, self.logger, self.cancellation_event
+ headers, self.logger, self.cancellation_event, self.pause_event # Pass pause_event
)
if comments_data:
self.logger(f" Fetched {len(comments_data)} comments for post {post_id}.")
@@ -1038,7 +1152,7 @@ class PostProcessorWorker:
cleaned_comment_text = strip_html_tags(raw_comment_content)
if not cleaned_comment_text.strip(): continue
- for filter_item_obj in self.filter_character_list_objects:
+ for filter_item_obj in current_character_filters:
terms_to_check_comment = list(filter_item_obj["aliases"])
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_comment:
terms_to_check_comment.append(filter_item_obj["name"])
@@ -1064,20 +1178,21 @@ class PostProcessorWorker:
self.logger(f" [Char Scope: Comments] Phase 2: Skipped comment check for post ID '{post_id}' because a file match already made it a candidate.")
# --- Skip Post Logic based on Title or Comment Scope (if filters are active) ---
- if self.filter_character_list_objects:
+ if current_character_filters: # Check if any filters are defined
if self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match:
self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.")
self._emit_signal('missed_character_post', post_title, "No title match for character filter")
- return 0, num_potential_files_in_post, []
+ return 0, num_potential_files_in_post, [], []
if self.char_filter_scope == CHAR_SCOPE_COMMENTS and \
not post_is_candidate_by_file_char_match_in_comment_scope and \
not post_is_candidate_by_comment_char_match: # MODIFIED: Check both file and comment match flags
self.logger(f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id}', Title '{post_title[:50]}...'")
- if self.signals and hasattr(self.signals, 'missed_character_post_signal'):
+ if self.emitter and hasattr(self.emitter, 'missed_character_post_signal'): # Check emitter
self._emit_signal('missed_character_post', post_title, "No character match in files or comments (Comments scope)")
- return 0, num_potential_files_in_post, []
+ return 0, num_potential_files_in_post, [], []
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
+ if self._check_pause(f"Skip words (post title) for post {post_id}"): return 0, num_potential_files_in_post, [], []
post_title_lower = post_title.lower()
for skip_word in self.skip_words_list:
if skip_word.lower() in post_title_lower:
@@ -1085,14 +1200,14 @@ class PostProcessorWorker:
# If you want these in the "Missed Character Log" too, you'd add a signal emit here.
# For now, sticking to the request for character filter misses.
self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'. Scope: {self.skip_words_scope}")
- return 0, num_potential_files_in_post, []
-
- if not self.extract_links_only and self.manga_mode_active and self.filter_character_list_objects and \
+ return 0, num_potential_files_in_post, [], []
+
+ if not self.extract_links_only and self.manga_mode_active and current_character_filters and \
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and \
not post_is_candidate_by_title_char_match:
self.logger(f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title[:50]}' doesn't match filters.")
self._emit_signal('missed_character_post', post_title, "Manga Mode: No title match for character filter (Title/Both scope)")
- return 0, num_potential_files_in_post, []
+ return 0, num_potential_files_in_post, [], []
if not isinstance(post_attachments, list):
self.logger(f"⚠️ Corrupt attachment data for post {post_id} (expected list, got {type(post_attachments)}). Skipping attachments.")
@@ -1100,7 +1215,8 @@ class PostProcessorWorker:
base_folder_names_for_post_content = []
if not self.extract_links_only and self.use_subfolders:
- primary_char_filter_for_folder = None
+ if self._check_pause(f"Subfolder determination for post {post_id}"): return 0, num_potential_files_in_post, []
+ primary_char_filter_for_folder = None # type: ignore
log_reason_for_folder = ""
if self.char_filter_scope == CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment:
@@ -1115,14 +1231,17 @@ class PostProcessorWorker:
primary_char_filter_for_folder = char_filter_that_matched_title
log_reason_for_folder = "Matched char filter in title"
# If scope is FILES, primary_char_filter_for_folder will be None here. Folder determined per file.
-
+
+ # When determining base_folder_names_for_post_content without a direct character filter match:
if primary_char_filter_for_folder:
base_folder_names_for_post_content = [clean_folder_name(primary_char_filter_for_folder["name"])]
self.logger(f" Base folder name(s) for post content ({log_reason_for_folder}): {', '.join(base_folder_names_for_post_content)}")
- elif not self.filter_character_list_objects: # No char filters defined, use generic logic
+ elif not current_character_filters: # No char filters defined, use generic logic
derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords)
if derived_folders:
- base_folder_names_for_post_content.extend(derived_folders)
+ # Use the live KNOWN_NAMES from downloader_utils for generic title parsing
+ # self.known_names is a snapshot from when the worker was created.
+ base_folder_names_for_post_content.extend(match_folders_from_title(post_title, KNOWN_NAMES, self.unwanted_keywords))
else:
base_folder_names_for_post_content.append(extract_folder_name_from_title(post_title, self.unwanted_keywords))
if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]:
@@ -1132,14 +1251,16 @@ class PostProcessorWorker:
# The folder will be determined by char_filter_info_that_matched_file later.
if not self.extract_links_only and self.use_subfolders and self.skip_words_list:
- for folder_name_to_check in base_folder_names_for_post_content:
+ if self._check_pause(f"Folder keyword skip check for post {post_id}"): return 0, num_potential_files_in_post, []
+ for folder_name_to_check in base_folder_names_for_post_content: # type: ignore
if not folder_name_to_check: continue
if any(skip_word.lower() in folder_name_to_check.lower() for skip_word in self.skip_words_list):
matched_skip = next((sw for sw in self.skip_words_list if sw.lower() in folder_name_to_check.lower()), "unknown_skip_word")
self.logger(f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check}' contains '{matched_skip}'.")
- return 0, num_potential_files_in_post, []
+ return 0, num_potential_files_in_post, [], []
if (self.show_external_links or self.extract_links_only) and post_content_html:
+ if self._check_pause(f"External link extraction for post {post_id}"): return 0, num_potential_files_in_post, [], []
try:
unique_links_data = {}
for match in link_pattern.finditer(post_content_html):
@@ -1170,7 +1291,7 @@ class PostProcessorWorker:
if self.extract_links_only:
self.logger(f" Extract Links Only mode: Finished processing post {post_id} for links.")
- return 0, 0, []
+ return 0, 0, [], []
all_files_from_post_api = []
api_file_domain = urlparse(self.api_url_input).netloc
@@ -1208,7 +1329,7 @@ class PostProcessorWorker:
all_files_from_post_api = [finfo for finfo in all_files_from_post_api if finfo['_is_thumbnail']]
if not all_files_from_post_api:
self.logger(f" -> No image thumbnails found for post {post_id} in thumbnail-only mode.")
- return 0, 0, []
+ return 0, 0, [], []
# Sort files within the post by original name if in Date Based manga mode
if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED:
@@ -1223,7 +1344,7 @@ class PostProcessorWorker:
if not all_files_from_post_api:
self.logger(f" No files found to download for post {post_id}.")
- return 0, 0, []
+ return 0, 0, [], []
files_to_download_info_list = []
processed_original_filenames_in_this_post = set()
@@ -1239,7 +1360,7 @@ class PostProcessorWorker:
if not files_to_download_info_list:
self.logger(f" All files for post {post_id} were duplicate original names or skipped earlier.")
- return 0, total_skipped_this_post, []
+ return 0, total_skipped_this_post, [], []
num_files_in_this_post_for_naming = len(files_to_download_info_list)
@@ -1249,6 +1370,7 @@ class PostProcessorWorker:
with ThreadPoolExecutor(max_workers=self.num_file_threads, thread_name_prefix=f'P{post_id}File_') as file_pool:
futures_list = []
for file_idx, file_info_to_dl in enumerate(files_to_download_info_list):
+ if self._check_pause(f"File processing loop for post {post_id}, file {file_idx}"): break
if self.check_cancel(): break
current_api_original_filename = file_info_to_dl.get('_original_name_for_log')
@@ -1256,11 +1378,11 @@ class PostProcessorWorker:
file_is_candidate_by_char_filter_scope = False
char_filter_info_that_matched_file = None
- if not self.filter_character_list_objects:
+ if not current_character_filters:
file_is_candidate_by_char_filter_scope = True
else:
if self.char_filter_scope == CHAR_SCOPE_FILES:
- for filter_item_obj in self.filter_character_list_objects:
+ for filter_item_obj in current_character_filters:
terms_to_check_for_file = list(filter_item_obj["aliases"])
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_for_file:
terms_to_check_for_file.append(filter_item_obj["name"])
@@ -1285,7 +1407,7 @@ class PostProcessorWorker:
self.logger(f" File '{current_api_original_filename}' is candidate because post title matched. Scope: Both (Title part).")
else:
# This part is for the "File" part of "Both" scope
- for filter_item_obj_both_file in self.filter_character_list_objects:
+ for filter_item_obj_both_file in current_character_filters:
terms_to_check_for_file_both = list(filter_item_obj_both_file["aliases"])
if filter_item_obj_both_file["is_group"] and filter_item_obj_both_file["name"] not in terms_to_check_for_file_both:
terms_to_check_for_file_both.append(filter_item_obj_both_file["name"])
@@ -1295,7 +1417,7 @@ class PostProcessorWorker:
for term_to_match in unique_terms_for_file_both_check:
if is_filename_match_for_character(current_api_original_filename, term_to_match):
file_is_candidate_by_char_filter_scope = True
- char_filter_info_that_matched_file = filter_item_obj_both_file # Use the filter that matched the file
+ char_filter_info_that_matched_file = filter_item_obj_both_file
self.logger(f" File '{current_api_original_filename}' matches char filter term '{term_to_match}' (from '{filter_item_obj['name']}'). Scope: Both (File part).")
break
if file_is_candidate_by_char_filter_scope: break
@@ -1359,11 +1481,13 @@ class PostProcessorWorker:
f_to_cancel.cancel()
break
try:
- dl_count, skip_count, actual_filename_saved, original_kept_flag = future.result()
+ dl_count, skip_count, actual_filename_saved, original_kept_flag, status, retry_details = future.result()
total_downloaded_this_post += dl_count
total_skipped_this_post += skip_count
if original_kept_flag and dl_count > 0 and actual_filename_saved:
kept_original_filenames_for_log.append(actual_filename_saved)
+ if status == FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER and retry_details:
+ retryable_failures_this_post.append(retry_details)
except CancelledError:
self.logger(f" File download task for post {post_id} was cancelled.")
total_skipped_this_post += 1
@@ -1377,22 +1501,23 @@ class PostProcessorWorker:
if self.check_cancel(): self.logger(f" Post {post_id} processing interrupted/cancelled.");
else: self.logger(f" Post {post_id} Summary: Downloaded={total_downloaded_this_post}, Skipped Files={total_skipped_this_post}")
- return total_downloaded_this_post, total_skipped_this_post, kept_original_filenames_for_log
+ return total_downloaded_this_post, total_skipped_this_post, kept_original_filenames_for_log, retryable_failures_this_post
class DownloadThread(QThread):
- progress_signal = pyqtSignal(str)
+ progress_signal = pyqtSignal(str) # Already QObject, no need to change
add_character_prompt_signal = pyqtSignal(str)
file_download_status_signal = pyqtSignal(bool)
finished_signal = pyqtSignal(int, int, bool, list)
external_link_signal = pyqtSignal(str, str, str, str)
file_progress_signal = pyqtSignal(str, object)
+ retryable_file_failed_signal = pyqtSignal(list) # New: list of retry_details dicts
missed_character_post_signal = pyqtSignal(str, str) # New: post_title, reason
def __init__(self, api_url_input, output_dir, known_names_copy,
cancellation_event,
- filter_character_list=None,
+ pause_event, filter_character_list=None, dynamic_character_filter_holder=None, # Added pause_event and holder
filter_mode='all', skip_zip=True, skip_rar=True,
use_subfolders=True, use_post_subfolders=False, custom_folder_name=None, compress_images=False,
download_thumbnails=False, service=None, user_id=None,
@@ -1408,7 +1533,7 @@ class DownloadThread(QThread):
manga_mode_active=False,
unwanted_keywords=None,
manga_filename_style=STYLE_POST_TITLE,
- char_filter_scope=CHAR_SCOPE_FILES,
+ char_filter_scope=CHAR_SCOPE_FILES, # manga_date_file_counter_ref removed from here
remove_from_filename_words_list=None,
allow_multipart_download=True,
manga_date_file_counter_ref=None, # New parameter
@@ -1418,9 +1543,11 @@ class DownloadThread(QThread):
self.output_dir = output_dir
self.known_names = list(known_names_copy)
self.cancellation_event = cancellation_event
+ self.pause_event = pause_event # Store pause_event
self.skip_current_file_flag = skip_current_file_flag
self.initial_target_post_id = target_post_id_from_initial_url
- self.filter_character_list_objects = filter_character_list if filter_character_list else []
+ self.filter_character_list_objects_initial = filter_character_list if filter_character_list else [] # Store initial
+ self.dynamic_filter_holder = dynamic_character_filter_holder # Store the holder
self.filter_mode = filter_mode
self.skip_zip = skip_zip
self.skip_rar = skip_rar
@@ -1453,7 +1580,8 @@ class DownloadThread(QThread):
self.char_filter_scope = char_filter_scope
self.remove_from_filename_words_list = remove_from_filename_words_list
self.allow_multipart_download = allow_multipart_download
- self.manga_date_file_counter_ref = manga_date_file_counter_ref # Store for passing to worker
+ self.manga_date_file_counter_ref = manga_date_file_counter_ref # Store for passing to worker by DownloadThread
+ # self.manga_date_scan_dir = manga_date_scan_dir # Store scan directory
if self.compress_images and Image is None:
self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
self.compress_images = False
@@ -1464,6 +1592,16 @@ class DownloadThread(QThread):
def isInterruptionRequested(self):
return self.cancellation_event.is_set() or super().isInterruptionRequested()
+ def _check_pause_self(self, context_message="DownloadThread operation"):
+ if self.pause_event and self.pause_event.is_set():
+ self.logger(f" {context_message} paused...")
+ while self.pause_event.is_set():
+ if self.isInterruptionRequested():
+ self.logger(f" {context_message} cancelled while paused.")
+ return True # Indicates cancellation occurred
+ time.sleep(0.5)
+ if not self.isInterruptionRequested(): self.logger(f" {context_message} resumed.")
+ return False
def skip_file(self):
if self.isRunning() and self.skip_current_file_flag:
@@ -1477,6 +1615,33 @@ class DownloadThread(QThread):
grand_total_skipped_files = 0
grand_list_of_kept_original_filenames = []
was_process_cancelled = False
+
+ # Initialize manga_date_file_counter_ref if needed (moved from main.py)
+ # This is now done within the DownloadThread's run method.
+ current_manga_date_file_counter_ref = self.manga_date_file_counter_ref
+ if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED and \
+ not self.extract_links_only and current_manga_date_file_counter_ref is None: # Check if it needs calculation
+
+ # series_scan_directory calculation logic (simplified for direct use here)
+ series_scan_dir = self.output_dir
+ if self.use_subfolders:
+ if self.filter_character_list_objects and self.filter_character_list_objects[0] and self.filter_character_list_objects[0].get("name"):
+ series_folder_name = clean_folder_name(self.filter_character_list_objects[0]["name"])
+ series_scan_dir = os.path.join(series_scan_dir, series_folder_name)
+ elif self.service and self.user_id:
+ creator_based_folder_name = clean_folder_name(self.user_id)
+ series_scan_dir = os.path.join(series_scan_dir, creator_based_folder_name)
+
+ highest_num = 0
+ if os.path.isdir(series_scan_dir):
+ self.logger(f"ℹ️ [Thread] Manga Date Mode: Scanning for existing files in '{series_scan_dir}'...")
+ for dirpath, _, filenames_in_dir in os.walk(series_scan_dir):
+ for filename_to_check in filenames_in_dir:
+ base_name_no_ext = os.path.splitext(filename_to_check)[0]
+ match = re.match(r"(\d{3,})", base_name_no_ext)
+ if match: highest_num = max(highest_num, int(match.group(1)))
+ current_manga_date_file_counter_ref = [highest_num + 1, threading.Lock()]
+ self.logger(f"ℹ️ [Thread] Manga Date Mode: Initialized counter at {current_manga_date_file_counter_ref[0]}.")
# This DownloadThread (being a QThread) will use its own signals object
# to communicate with PostProcessorWorker if needed.
@@ -1495,19 +1660,23 @@ class DownloadThread(QThread):
start_page=self.start_page,
end_page=self.end_page,
manga_mode=self.manga_mode_active,
- cancellation_event=self.cancellation_event
+ cancellation_event=self.cancellation_event,
+ pause_event=self.pause_event # Pass pause_event
)
for posts_batch_data in post_generator:
+ if self._check_pause_self("Post batch processing"): was_process_cancelled = True; break
if self.isInterruptionRequested(): was_process_cancelled = True; break
for individual_post_data in posts_batch_data:
+ if self._check_pause_self(f"Individual post processing for {individual_post_data.get('id', 'N/A')}"): was_process_cancelled = True; break
if self.isInterruptionRequested(): was_process_cancelled = True; break
post_processing_worker = PostProcessorWorker(
post_data=individual_post_data,
download_root=self.output_dir,
known_names=self.known_names,
- filter_character_list=self.filter_character_list_objects,
+ filter_character_list=self.filter_character_list_objects_initial, # Pass initial
+ dynamic_character_filter_holder=self.dynamic_filter_holder, # Pass the holder
unwanted_keywords=self.unwanted_keywords,
filter_mode=self.filter_mode,
skip_zip=self.skip_zip, skip_rar=self.skip_rar,
@@ -1517,6 +1686,7 @@ class DownloadThread(QThread):
compress_images=self.compress_images, download_thumbnails=self.download_thumbnails,
service=self.service, user_id=self.user_id,
api_url_input=self.api_url_input,
+ pause_event=self.pause_event, # Pass pause_event to worker
cancellation_event=self.cancellation_event, # emitter is PostProcessorSignals for single-thread
emitter=worker_signals_obj, # Pass the signals object as the emitter
downloaded_files=self.downloaded_files,
@@ -1534,14 +1704,16 @@ class DownloadThread(QThread):
char_filter_scope=self.char_filter_scope,
remove_from_filename_words_list=self.remove_from_filename_words_list,
allow_multipart_download=self.allow_multipart_download,
- manga_date_file_counter_ref=self.manga_date_file_counter_ref, # Pass it here
+ manga_date_file_counter_ref=current_manga_date_file_counter_ref, # Pass the calculated or passed-in ref
)
try:
- dl_count, skip_count, kept_originals_this_post = post_processing_worker.process()
+ dl_count, skip_count, kept_originals_this_post, retryable_failures = post_processing_worker.process()
grand_total_downloaded_files += dl_count
grand_total_skipped_files += skip_count
if kept_originals_this_post:
grand_list_of_kept_original_filenames.extend(kept_originals_this_post)
+ if retryable_failures:
+ self.retryable_file_failed_signal.emit(retryable_failures)
except Exception as proc_err:
post_id_for_err = individual_post_data.get('id', 'N/A')
self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}")
@@ -1572,6 +1744,7 @@ class DownloadThread(QThread):
worker_signals_obj.external_link_signal.disconnect(self.external_link_signal)
worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal)
worker_signals_obj.missed_character_post_signal.disconnect(self.missed_character_post_signal)
+ # No need to disconnect retryable_file_failed_signal from worker_signals_obj as it's not on it
except (TypeError, RuntimeError) as e:
self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}")
diff --git a/main.py b/main.py
index 9f52e4e..fde7224 100644
--- a/main.py
+++ b/main.py
@@ -8,6 +8,7 @@ import queue
import hashlib
import http.client
import traceback
+import subprocess # Added for opening files cross-platform
import random
from collections import deque
@@ -20,11 +21,11 @@ from PyQt5.QtGui import (
from PyQt5.QtWidgets import (
QApplication, QWidget, QLabel, QLineEdit, QTextEdit, QPushButton,
QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget, QRadioButton, QButtonGroup, QCheckBox, QSplitter,
- QDialog, QStackedWidget,
+ QDialog, QStackedWidget, QScrollArea,
QFrame,
QAbstractButton
)
-from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject, QTimer, QSettings, QStandardPaths
+from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject, QTimer, QSettings, QStandardPaths, QCoreApplication
from urllib.parse import urlparse
try:
@@ -32,7 +33,7 @@ try:
except ImportError:
Image = None
-from io import BytesIO
+from io import BytesIO # Keep this if used elsewhere, though not directly in this diff
try:
print("Attempting to import from downloader_utils...")
@@ -50,7 +51,8 @@ try:
CHAR_SCOPE_TITLE, # Added for completeness if used directly
CHAR_SCOPE_FILES, # Ensure this is imported
CHAR_SCOPE_BOTH,
- CHAR_SCOPE_COMMENTS
+ CHAR_SCOPE_COMMENTS,
+ FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER # Import the new status
)
print("Successfully imported names from downloader_utils.")
except ImportError as e:
@@ -70,6 +72,7 @@ except ImportError as e:
CHAR_SCOPE_FILES = "files"
CHAR_SCOPE_BOTH = "both"
CHAR_SCOPE_COMMENTS = "comments"
+ FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER = "failed_retry_later"
except Exception as e:
print(f"--- UNEXPECTED IMPORT ERROR ---")
@@ -82,6 +85,11 @@ except Exception as e:
MAX_THREADS = 200
RECOMMENDED_MAX_THREADS = 50
MAX_FILE_THREADS_PER_POST_OR_WORKER = 10
+# New constants for batching high thread counts for post workers
+POST_WORKER_BATCH_THRESHOLD = 30
+POST_WORKER_NUM_BATCHES = 4
+SOFT_WARNING_THREAD_THRESHOLD = 40 # New constant for soft warning
+POST_WORKER_BATCH_DELAY_SECONDS = 2.5 # Seconds
MAX_POST_WORKERS_WHEN_COMMENT_FILTERING = 3 # New constant
HTML_PREFIX = ""
@@ -111,17 +119,32 @@ class TourStepWidget(QWidget):
title_label.setAlignment(Qt.AlignCenter)
# Increased padding-bottom for more space below title
title_label.setStyleSheet("font-size: 18px; font-weight: bold; color: #E0E0E0; padding-bottom: 15px;")
+ layout.addWidget(title_label)
+
+ # Create QScrollArea for content
+ scroll_area = QScrollArea()
+ scroll_area.setWidgetResizable(True) # Important for the content_label to resize correctly
+ scroll_area.setFrameShape(QFrame.NoFrame) # Make it look seamless with the dialog
+ scroll_area.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) # Content is word-wrapped
+ scroll_area.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded) # Show scrollbar only when needed
+ scroll_area.setStyleSheet("background-color: transparent;") # Match dialog background
content_label = QLabel(content_text)
content_label.setWordWrap(True)
- content_label.setAlignment(Qt.AlignLeft)
+ # AlignTop ensures text starts from the top if it's shorter than the scroll area view
+ content_label.setAlignment(Qt.AlignLeft | Qt.AlignTop)
content_label.setTextFormat(Qt.RichText)
# Adjusted line-height for bullet point readability
content_label.setStyleSheet("font-size: 11pt; color: #C8C8C8; line-height: 1.8;")
- layout.addWidget(title_label)
- layout.addWidget(content_label)
- layout.addStretch(1)
+ # Set the content_label as the widget for the scroll_area
+ scroll_area.setWidget(content_label)
+
+ # Add the scroll_area to the layout, allowing it to take available space
+ layout.addWidget(scroll_area, 1) # The '1' is a stretch factor
+
+ # Removed layout.addStretch(1) as the scroll_area now handles stretching.
+
class TourDialog(QDialog):
"""
@@ -134,7 +157,7 @@ class TourDialog(QDialog):
CONFIG_ORGANIZATION_NAME = "KemonoDownloader" # Shared with main app for consistency if needed, but can be distinct
CONFIG_APP_NAME_TOUR = "ApplicationTour" # Specific QSettings group for tour
- TOUR_SHOWN_KEY = "neverShowTourAgainV4" # Updated key for new tour content
+ TOUR_SHOWN_KEY = "neverShowTourAgainV5" # Updated key to re-show tour
def __init__(self, parent=None):
super().__init__(parent)
@@ -213,7 +236,7 @@ class TourDialog(QDialog):
step1_content = (
"Hello! This quick tour will walk you through the main features of the Kemono Downloader, including recent updates."
"
"
- "- Our goal is to help you easily download content from Kemono and Coomer.
"
+ "- My goal is to help you easily download content from Kemono and Coomer.
"
"- Use the Next and Back buttons to navigate.
"
"- Click Skip Tour to close this guide at any time.
"
"- Check 'Never show this tour again' if you don't want to see this on future startups.
"
@@ -441,6 +464,23 @@ class TourDialog(QDialog):
return QDialog.Rejected
# --- End Tour Classes ---
+# Helper class to hold dynamic character filters
+class DynamicFilterHolder:
+ def __init__(self, initial_filters=None):
+ self.lock = threading.Lock()
+ # Store filters as a list of dicts, same as parsed_character_filter_objects
+ self._filters = initial_filters if initial_filters is not None else []
+
+ def get_filters(self):
+ with self.lock:
+ # Return a deep copy to prevent modification of the internal list by workers
+ # and to ensure thread safety if workers iterate over it while it's being set.
+ return [dict(f) for f in self._filters]
+
+ def set_filters(self, new_filters):
+ with self.lock:
+ # Store a deep copy, ensuring new_filters is a list of dicts
+ self._filters = [dict(f) for f in (new_filters if new_filters else [])]
class DownloaderApp(QWidget):
character_prompt_response_signal = pyqtSignal(bool)
@@ -457,37 +497,30 @@ class DownloaderApp(QWidget):
super().__init__()
self.settings = QSettings(CONFIG_ORGANIZATION_NAME, CONFIG_APP_NAME_MAIN)
- # Determine path for Known.txt in user's app data directory
- app_config_dir = ""
- try:
- # Use AppLocalDataLocation for user-specific, non-roaming data
- app_data_root = QStandardPaths.writableLocation(QStandardPaths.AppLocalDataLocation)
- if not app_data_root: # Fallback if somehow empty
- app_data_root = QStandardPaths.writableLocation(QStandardPaths.GenericDataLocation)
-
- if app_data_root and CONFIG_ORGANIZATION_NAME:
- app_config_dir = os.path.join(app_data_root, CONFIG_ORGANIZATION_NAME)
- elif app_data_root: # If no org name, use a generic app name folder
- app_config_dir = os.path.join(app_data_root, "KemonoDownloaderAppData") # Fallback app name
- else: # Absolute fallback: current working directory (less ideal for bundled app)
- app_config_dir = os.getcwd()
-
- if not os.path.exists(app_config_dir):
- os.makedirs(app_config_dir, exist_ok=True)
- except Exception as e_path:
- print(f"Error setting up app_config_dir: {e_path}. Defaulting to CWD for Known.txt.")
- app_config_dir = os.getcwd() # Fallback
- self.config_file = os.path.join(app_config_dir, "Known.txt")
+ # Determine path for Known.txt in the application's directory
+ if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
+ # Application is frozen (bundled with PyInstaller or similar)
+ # sys.executable is the path to the .exe file
+ app_base_dir = os.path.dirname(sys.executable)
+ else:
+ # Application is running as a script
+ # __file__ is the path to the script file
+ app_base_dir = os.path.dirname(os.path.abspath(__file__))
+ self.config_file = os.path.join(app_base_dir, "Known.txt")
self.download_thread = None
self.thread_pool = None
self.cancellation_event = threading.Event()
+ self.pause_event = threading.Event() # New event for pausing
self.active_futures = []
self.total_posts_to_process = 0
+ self.dynamic_character_filter_holder = DynamicFilterHolder() # For live character filter updates
self.processed_posts_count = 0
self.download_counter = 0
self.skip_counter = 0
+ self.retryable_failed_files_info = [] # For storing info about files that failed but can be retried
+ self.is_paused = False # New state for pause functionality
# For handling signals from worker threads via a queue
self.worker_to_gui_queue = queue.Queue()
self.gui_update_timer = QTimer(self)
@@ -549,7 +582,7 @@ class DownloaderApp(QWidget):
self.load_known_names_from_util()
- self.setWindowTitle("Kemono Downloader v3.4.0")
+ self.setWindowTitle("Kemono Downloader v3.5.0")
# self.setGeometry(150, 150, 1050, 820) # Initial geometry will be set after showing
self.setStyleSheet(self.get_dark_theme())
@@ -573,7 +606,9 @@ class DownloaderApp(QWidget):
self.actual_gui_signals.missed_character_post_signal.connect(self.handle_missed_character_post)
self.actual_gui_signals.external_link_signal.connect(self.handle_external_link_signal)
self.actual_gui_signals.file_download_status_signal.connect(lambda status: None) # Placeholder if needed, or connect to UI
-
+
+ if hasattr(self, 'character_input'): # Connect live update for character input
+ self.character_input.textChanged.connect(self._on_character_input_changed_live)
# Timer for processing the worker queue
self.gui_update_timer.timeout.connect(self._process_worker_queue)
self.gui_update_timer.start(100) # Check queue every 100ms
@@ -585,6 +620,7 @@ class DownloaderApp(QWidget):
self.overall_progress_signal.connect(self.update_progress_display)
self.finished_signal.connect(self.download_finished)
# self.external_link_signal.connect(self.handle_external_link_signal) # Covered by actual_gui_signals
+ # self.retryable_file_failed_signal will be connected in start_single_threaded_download
# self.file_progress_signal.connect(self.update_file_progress_display) # Covered by actual_gui_signals
# UI element connections
@@ -620,6 +656,59 @@ class DownloaderApp(QWidget):
if hasattr(self, 'multipart_toggle_button'): self.multipart_toggle_button.clicked.connect(self._toggle_multipart_mode) # Keep this if it's separate
+ if hasattr(self, 'open_known_txt_button'): # Connect the new button
+ self.open_known_txt_button.clicked.connect(self._open_known_txt_file)
+
+ def _on_character_input_changed_live(self, text):
+ """
+ Called when the character input field text changes.
+ If a download is active (running or paused), this updates the dynamic filter holder.
+ """
+ if self._is_download_active(): # Only update if download is active/paused
+ # self.log_signal.emit("ℹ️ Character filter input changed during active session. Updating dynamic filters...")
+ # Use QCoreApplication.processEvents() to keep UI responsive during parsing if it's complex
+ QCoreApplication.processEvents()
+ raw_character_filters_text = self.character_input.text().strip()
+ parsed_filters = self._parse_character_filters(raw_character_filters_text)
+
+ self.dynamic_character_filter_holder.set_filters(parsed_filters)
+ # Limit logging to avoid spamming if typing fast
+ # self.log_signal.emit(f" Dynamic filters updated to: {', '.join(item['name'] for item in parsed_filters) if parsed_filters else 'None'}")
+
+ def _parse_character_filters(self, raw_text):
+ """Helper to parse character filter string into list of objects."""
+ parsed_character_filter_objects = []
+ if raw_text:
+ raw_parts = []
+ current_part_buffer = ""
+ in_group_parsing = False
+ for char_token in raw_text:
+ if char_token == '(':
+ in_group_parsing = True
+ current_part_buffer += char_token
+ elif char_token == ')':
+ in_group_parsing = False
+ current_part_buffer += char_token
+ elif char_token == ',' and not in_group_parsing:
+ if current_part_buffer.strip(): raw_parts.append(current_part_buffer.strip())
+ current_part_buffer = ""
+ else:
+ current_part_buffer += char_token
+ if current_part_buffer.strip(): raw_parts.append(current_part_buffer.strip())
+
+ for part_str in raw_parts:
+ part_str = part_str.strip()
+ if not part_str: continue
+ if part_str.startswith("(") and part_str.endswith(")"):
+ group_content_str = part_str[1:-1].strip()
+ aliases_in_group = [alias.strip() for alias in group_content_str.split(',') if alias.strip()]
+ if aliases_in_group:
+ group_folder_name = " ".join(aliases_in_group)
+ parsed_character_filter_objects.append({"name": group_folder_name, "is_group": True, "aliases": aliases_in_group})
+ else:
+ parsed_character_filter_objects.append({"name": part_str, "is_group": False, "aliases": [part_str]})
+ return parsed_character_filter_objects
+
def _process_worker_queue(self):
"""Processes messages from the worker queue and emits Qt signals from the GUI thread."""
while not self.worker_to_gui_queue.empty():
@@ -649,35 +738,79 @@ class DownloaderApp(QWidget):
def load_known_names_from_util(self):
global KNOWN_NAMES
if os.path.exists(self.config_file):
+ parsed_known_objects = []
try:
with open(self.config_file, 'r', encoding='utf-8') as f:
- raw_names = [line.strip() for line in f]
- KNOWN_NAMES[:] = sorted(list(set(filter(None, raw_names))))
- log_msg = f"ℹ️ Loaded {len(KNOWN_NAMES)} known names from {self.config_file}"
+ for line_num, line in enumerate(f, 1):
+ line = line.strip()
+ if not line: continue
+
+ if line.startswith("(") and line.endswith(")"):
+ content = line[1:-1].strip()
+ parts = [p.strip() for p in content.split(',') if p.strip()]
+ if parts:
+ primary_name = parts[0]
+ # Aliases include the primary name for matching convenience
+ unique_aliases = sorted(list(set([primary_name] + parts)))
+ parsed_known_objects.append({
+ "name": primary_name,
+ "is_group": True,
+ "aliases": unique_aliases
+ })
+ else:
+ if hasattr(self, 'log_signal'): self.log_signal.emit(f"⚠️ Empty group found in Known.txt on line {line_num}: '{line}'")
+ else:
+ parsed_known_objects.append({
+ "name": line,
+ "is_group": False,
+ "aliases": [line] # Simple entry, alias is itself
+ })
+
+ # Sort by primary name, case-insensitive
+ parsed_known_objects.sort(key=lambda x: x["name"].lower())
+ KNOWN_NAMES[:] = parsed_known_objects # Update global list
+ log_msg = f"ℹ️ Loaded {len(KNOWN_NAMES)} known entries from {self.config_file}"
except Exception as e:
log_msg = f"❌ Error loading config '{self.config_file}': {e}"
QMessageBox.warning(self, "Config Load Error", f"Could not load list from {self.config_file}:\n{e}")
KNOWN_NAMES[:] = []
else:
- log_msg = f"ℹ️ Config file '{self.config_file}' not found. Starting empty."
+ log_msg = f"ℹ️ Config file '{self.config_file}' not found. Starting with default entries."
KNOWN_NAMES[:] = []
-
+
if hasattr(self, 'log_signal'): self.log_signal.emit(log_msg)
if hasattr(self, 'character_list'):
self.character_list.clear()
- self.character_list.addItems(KNOWN_NAMES)
+ # Display only the primary 'name' in the QListWidget
+
+ # Add default entries if the list is empty after loading (meaning file didn't exist)
+ if not KNOWN_NAMES:
+ default_entry = {
+ "name": "Boa Hancock",
+ "is_group": True,
+ "aliases": sorted(list(set(["Boa Hancock", "Boa", "Hancock", "Snakequeen"]))) # Ensure unique and sorted aliases
+ }
+ KNOWN_NAMES.append(default_entry)
+ # Add more defaults here if needed
+ self.save_known_names() # Save to disk immediately if file was created with defaults
+ self.log_signal.emit("ℹ️ Added default entry for 'Boa Hancock'.")
+
+ self.character_list.addItems([entry["name"] for entry in KNOWN_NAMES])
def save_known_names(self):
global KNOWN_NAMES
try:
- unique_sorted_names = sorted(list(set(filter(None, KNOWN_NAMES))))
- KNOWN_NAMES[:] = unique_sorted_names
-
+ # KNOWN_NAMES is already sorted by primary name during load/add
with open(self.config_file, 'w', encoding='utf-8') as f:
- for name in unique_sorted_names:
- f.write(name + '\n')
- if hasattr(self, 'log_signal'): self.log_signal.emit(f"💾 Saved {len(unique_sorted_names)} known names to {self.config_file}")
+ for entry in KNOWN_NAMES:
+ if entry["is_group"] and len(entry["aliases"]) > 1:
+ # Join all aliases with ", " for readability
+ joined_aliases = ", ".join(entry["aliases"])
+ f.write(f"({joined_aliases})\n")
+ else: # Simple entry or group with only one alias (the name itself)
+ f.write(entry["name"] + '\n')
+ if hasattr(self, 'log_signal'): self.log_signal.emit(f"💾 Saved {len(KNOWN_NAMES)} known entries to {self.config_file}")
except Exception as e:
log_msg = f"❌ Error saving config '{self.config_file}': {e}"
if hasattr(self, 'log_signal'): self.log_signal.emit(log_msg)
@@ -1004,8 +1137,9 @@ class DownloaderApp(QWidget):
self.thread_count_input.setToolTip(
f"Number of concurrent operations.\n"
f"- Single Post: Concurrent file downloads (1-{MAX_FILE_THREADS_PER_POST_OR_WORKER} recommended).\n"
- f"- Creator Feed: Concurrent post processing (1-{MAX_THREADS}).\n"
- f" File downloads per post worker also use this value (1-{MAX_FILE_THREADS_PER_POST_OR_WORKER} recommended)."
+ f"- Creator Feed URL: Number of posts to process simultaneously (1-{MAX_THREADS} recommended).\n"
+ f" Files within each post are downloaded one by one by its worker.\n"
+ f"If 'Use Multithreading' is unchecked, 1 thread is used."
)
self.thread_count_input.setValidator(QIntValidator(1, MAX_THREADS))
multithreading_layout.addWidget(self.thread_count_input)
@@ -1036,22 +1170,38 @@ class DownloaderApp(QWidget):
self.download_btn.setStyleSheet("padding: 8px 15px; font-weight: bold;")
self.download_btn.clicked.connect(self.start_download)
self.cancel_btn = QPushButton("❌ Cancel & Reset UI") # Updated button text for clarity
+ self.pause_btn = QPushButton("⏸️ Pause Download")
+ self.pause_btn.setToolTip("Click to pause the ongoing download process.")
+ self.pause_btn.setEnabled(False)
+ self.pause_btn.clicked.connect(self._handle_pause_resume_action)
+
self.cancel_btn.setEnabled(False)
self.cancel_btn.setToolTip("Click to cancel the ongoing download/extraction process and reset the UI fields (preserving URL and Directory).")
self.cancel_btn.clicked.connect(self.cancel_download_button_action) # Changed connection
btn_layout.addWidget(self.download_btn)
+ btn_layout.addWidget(self.pause_btn) # Add pause button in the middle
btn_layout.addWidget(self.cancel_btn)
left_layout.addLayout(btn_layout)
left_layout.addSpacing(10)
+
known_chars_label_layout = QHBoxLayout()
known_chars_label_layout.setSpacing(10)
self.known_chars_label = QLabel("🎭 Known Shows/Characters (for Folder Names):")
+ known_chars_label_layout.addWidget(self.known_chars_label) # Add label first
+
+ # Create and add the "Open Known.txt" button BEFORE the search input
+ self.open_known_txt_button = QPushButton("Open Known.txt")
+ self.open_known_txt_button.setToolTip("Open the 'Known.txt' file in your default text editor.\nThe file is located in the application's directory.")
+ self.open_known_txt_button.setStyleSheet("padding: 4px 8px;") # Consistent small button style
+ self.open_known_txt_button.setFixedWidth(120) # Adjust width as needed
+ known_chars_label_layout.addWidget(self.open_known_txt_button) # Add button second
+
+ # Then create and add the character search input, allowing it to stretch
self.character_search_input = QLineEdit()
self.character_search_input.setToolTip("Type here to filter the list of known shows/characters below.")
self.character_search_input.setPlaceholderText("Search characters...")
- known_chars_label_layout.addWidget(self.known_chars_label, 1)
- known_chars_label_layout.addWidget(self.character_search_input)
+ known_chars_label_layout.addWidget(self.character_search_input, 1) # Added stretch factor of 1
left_layout.addLayout(known_chars_label_layout)
self.character_list = QListWidget()
@@ -1836,16 +1986,27 @@ class DownloaderApp(QWidget):
if not name_to_add:
QMessageBox.warning(self, "Input Error", "Name cannot be empty."); return False
- name_lower = name_to_add.lower()
- if any(existing.lower() == name_lower for existing in KNOWN_NAMES):
- QMessageBox.warning(self, "Duplicate Name", f"The name '{name_to_add}' (case-insensitive) already exists."); return False
+ name_to_add_lower = name_to_add.lower()
+
+ # Check for duplicates (primary name or any alias)
+ for kn_entry in KNOWN_NAMES:
+ if kn_entry["name"].lower() == name_to_add_lower:
+ QMessageBox.warning(self, "Duplicate Name", f"The name '{name_to_add}' already exists as a primary folder name."); return False
+ for alias in kn_entry["aliases"]:
+ if alias.lower() == name_to_add_lower:
+ QMessageBox.warning(self, "Duplicate Alias", f"The name '{name_to_add}' already exists as an alias for '{kn_entry['name']}'."); return False
similar_names_details = []
- for existing_name in KNOWN_NAMES:
- existing_name_lower = existing_name.lower()
- if name_lower != existing_name_lower and (name_lower in existing_name_lower or existing_name_lower in name_lower):
- similar_names_details.append((name_to_add, existing_name))
-
+ # Check for similarity with existing primary names or aliases
+ for kn_entry in KNOWN_NAMES:
+ for term_to_check_similarity_against in kn_entry["aliases"]: # Check against all aliases
+ term_lower = term_to_check_similarity_against.lower()
+ if name_to_add_lower != term_lower and \
+ (name_to_add_lower in term_lower or term_lower in name_to_add_lower):
+ # Warn about similarity with the primary name of the group
+ similar_names_details.append((name_to_add, kn_entry["name"]))
+ break # Found a similarity for this entry, no need to check its other aliases
+
if similar_names_details:
first_similar_new, first_similar_existing = similar_names_details[0]
shorter, longer = sorted([first_similar_new, first_similar_existing], key=len)
@@ -1855,7 +2016,7 @@ class DownloaderApp(QWidget):
msg_box.setWindowTitle("Potential Name Conflict")
msg_box.setText(
f"The name '{first_similar_new}' is very similar to an existing name: '{first_similar_existing}'.\n\n"
- f"This could lead to files being grouped into less specific folders (e.g., under '{clean_folder_name(shorter)}' instead of a more specific '{clean_folder_name(longer)}').\n\n"
+ f"This could lead to unexpected folder grouping (e.g., under '{clean_folder_name(shorter)}' instead of a more specific '{clean_folder_name(longer)}' or vice-versa).\n\n"
"Do you want to change the name you are adding, or proceed anyway?"
)
change_button = msg_box.addButton("Change Name", QMessageBox.RejectRole)
@@ -1865,16 +2026,22 @@ class DownloaderApp(QWidget):
msg_box.exec_()
if msg_box.clickedButton() == change_button:
- self.log_signal.emit(f"ℹ️ User chose to change '{first_similar_new}' due to similarity with '{first_similar_existing}'.")
+ self.log_signal.emit(f"ℹ️ User chose to change '{first_similar_new}' due to similarity with an alias of '{first_similar_existing_primary}'.")
return False
- self.log_signal.emit(f"⚠️ User proceeded with adding '{first_similar_new}' despite similarity with '{first_similar_existing}'.")
+ self.log_signal.emit(f"⚠️ User proceeded with adding '{first_similar_new}' despite similarity with an alias of '{first_similar_existing_primary}'.")
- KNOWN_NAMES.append(name_to_add)
- KNOWN_NAMES.sort(key=str.lower)
+ # Add as a simple (non-group) entry
+ new_entry = {
+ "name": name_to_add,
+ "is_group": False,
+ "aliases": [name_to_add]
+ }
+ KNOWN_NAMES.append(new_entry)
+ KNOWN_NAMES.sort(key=lambda x: x["name"].lower()) # Sort by primary name
self.character_list.clear()
- self.character_list.addItems(KNOWN_NAMES)
+ self.character_list.addItems([entry["name"] for entry in KNOWN_NAMES])
self.filter_character_list(self.character_search_input.text())
self.log_signal.emit(f"✅ Added '{name_to_add}' to known names list.")
@@ -1889,19 +2056,19 @@ class DownloaderApp(QWidget):
if not selected_items:
QMessageBox.warning(self, "Selection Error", "Please select one or more names to delete."); return
- names_to_remove = {item.text() for item in selected_items}
+ primary_names_to_remove = {item.text() for item in selected_items}
confirm = QMessageBox.question(self, "Confirm Deletion",
- f"Are you sure you want to delete {len(names_to_remove)} name(s)?",
+ f"Are you sure you want to delete {len(primary_names_to_remove)} selected entry/entries (and their aliases)?",
QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
if confirm == QMessageBox.Yes:
original_count = len(KNOWN_NAMES)
- KNOWN_NAMES[:] = [n for n in KNOWN_NAMES if n not in names_to_remove]
+ KNOWN_NAMES[:] = [entry for entry in KNOWN_NAMES if entry["name"] not in primary_names_to_remove]
removed_count = original_count - len(KNOWN_NAMES)
if removed_count > 0:
self.log_signal.emit(f"🗑️ Removed {removed_count} name(s).")
self.character_list.clear()
- self.character_list.addItems(KNOWN_NAMES)
+ self.character_list.addItems([entry["name"] for entry in KNOWN_NAMES])
self.filter_character_list(self.character_search_input.text())
self.save_known_names()
else:
@@ -2178,7 +2345,50 @@ class DownloaderApp(QWidget):
QMessageBox.critical(self, "Thread Count Error", "Invalid number of threads. Please enter a positive number.")
self.set_ui_enabled(True)
return
-
+
+ if use_multithreading_enabled_by_checkbox:
+ # Hard Warning: Threads > MAX_THREADS (200)
+ if num_threads_from_gui > MAX_THREADS:
+ hard_warning_msg = (
+ f"You've entered a thread count ({num_threads_from_gui}) exceeding the maximum of {MAX_THREADS}.\n\n"
+ "Using an extremely high number of threads can lead to:\n"
+ " - Diminishing returns (no significant speed increase).\n"
+ " - Increased system instability or application crashes.\n"
+ " - Higher chance of being rate-limited or temporarily IP-banned by the server.\n\n"
+ f"The thread count has been automatically capped to {MAX_THREADS} for stability."
+ )
+ QMessageBox.warning(self, "High Thread Count Warning", hard_warning_msg)
+ num_threads_from_gui = MAX_THREADS
+ self.thread_count_input.setText(str(MAX_THREADS)) # Update the input field
+ self.log_signal.emit(f"⚠️ User attempted {num_threads_from_gui} threads, capped to {MAX_THREADS}.")
+
+ # Soft Warning: SOFT_WARNING_THREAD_THRESHOLD < Threads <= MAX_THREADS
+ # This uses the potentially capped num_threads_from_gui from the hard warning
+ if SOFT_WARNING_THREAD_THRESHOLD < num_threads_from_gui <= MAX_THREADS:
+ soft_warning_msg_box = QMessageBox(self)
+ soft_warning_msg_box.setIcon(QMessageBox.Question)
+ soft_warning_msg_box.setWindowTitle("Thread Count Advisory")
+ soft_warning_msg_box.setText(
+ f"You've set the thread count to {num_threads_from_gui}.\n\n"
+ "While this is within the allowed limit, using a high number of threads (typically above 40-50) can sometimes lead to:\n"
+ " - Increased errors or failed file downloads.\n"
+ " - Connection issues with the server.\n"
+ " - Higher system resource usage.\n\n"
+ "For most users and connections, 10-30 threads provide a good balance.\n\n"
+ f"Do you want to proceed with {num_threads_from_gui} threads, or would you like to change the value?"
+ )
+ proceed_button = soft_warning_msg_box.addButton("Proceed Anyway", QMessageBox.AcceptRole)
+ change_button = soft_warning_msg_box.addButton("Change Thread Value", QMessageBox.RejectRole)
+ soft_warning_msg_box.setDefaultButton(proceed_button)
+ soft_warning_msg_box.setEscapeButton(change_button)
+ soft_warning_msg_box.exec_()
+
+ if soft_warning_msg_box.clickedButton() == change_button:
+ self.log_signal.emit(f"ℹ️ User opted to change thread count from {num_threads_from_gui} after advisory.")
+ self.thread_count_input.setFocus()
+ self.thread_count_input.selectAll()
+ return # Exit start_download to allow user to change value
+
raw_skip_words = self.skip_words_input.text().strip()
skip_words_list = [word.strip().lower() for word in raw_skip_words.split(',') if word.strip()]
@@ -2241,48 +2451,8 @@ class DownloaderApp(QWidget):
# Manga Mode specific duplicate handling is now managed entirely within downloader_utils.py
self.external_link_queue.clear(); self.extracted_links_cache = []; self._is_processing_external_link_queue = False; self._current_link_post_title = None
- raw_character_filters_text = self.character_input.text().strip()
-
- # --- New parsing logic for character filters ---
- parsed_character_filter_objects = []
- if raw_character_filters_text:
- raw_parts = []
- current_part_buffer = ""
- in_group_parsing = False
- for char_token in raw_character_filters_text:
- if char_token == '(':
- in_group_parsing = True
- current_part_buffer += char_token
- elif char_token == ')':
- in_group_parsing = False
- current_part_buffer += char_token
- elif char_token == ',' and not in_group_parsing:
- if current_part_buffer.strip(): raw_parts.append(current_part_buffer.strip())
- current_part_buffer = ""
- else:
- current_part_buffer += char_token
- if current_part_buffer.strip(): raw_parts.append(current_part_buffer.strip())
-
- for part_str in raw_parts:
- part_str = part_str.strip()
- if not part_str: continue
- if part_str.startswith("(") and part_str.endswith(")"):
- group_content_str = part_str[1:-1].strip()
- aliases_in_group = [alias.strip() for alias in group_content_str.split(',') if alias.strip()]
- if aliases_in_group:
- group_folder_name = " ".join(aliases_in_group)
- parsed_character_filter_objects.append({
- "name": group_folder_name, # This is the primary/folder name
- "is_group": True,
- "aliases": aliases_in_group # These are for matching
- })
- else:
- parsed_character_filter_objects.append({
- "name": part_str, # Folder name and matching name are the same
- "is_group": False,
- "aliases": [part_str]
- })
- # --- End new parsing logic ---
+ raw_character_filters_text = self.character_input.text().strip() # Get current text
+ parsed_character_filter_objects = self._parse_character_filters(raw_character_filters_text) # Parse it
filter_character_list_to_pass = None
needs_folder_naming_validation = (use_subfolders or manga_mode) and not extract_links_only
@@ -2324,7 +2494,7 @@ class DownloaderApp(QWidget):
should_prompt_to_add_to_known_list = (
needs_folder_naming_validation and
not manga_mode and # Do NOT prompt if Manga Mode is ON
- item_primary_name.lower() not in {kn.lower() for kn in KNOWN_NAMES}
+ item_primary_name.lower() not in {kn_entry["name"].lower() for kn_entry in KNOWN_NAMES}
)
if should_prompt_to_add_to_known_list:
@@ -2344,7 +2514,7 @@ class DownloaderApp(QWidget):
# - OR Manga Mode is ON (filter is used without adding to Known.txt)
# - OR extract_links_only is true (folder naming validation is false)
valid_filters_for_backend.append(filter_item_obj)
- if manga_mode and needs_folder_naming_validation and item_primary_name.lower() not in {kn.lower() for kn in KNOWN_NAMES}:
+ if manga_mode and needs_folder_naming_validation and item_primary_name.lower() not in {kn_entry["name"].lower() for kn_entry in KNOWN_NAMES}:
self.log_signal.emit(f"ℹ️ Manga Mode: Using filter '{item_primary_name}' for this session without adding to Known Names.")
if user_cancelled_validation: return
@@ -2377,6 +2547,9 @@ class DownloaderApp(QWidget):
else:
self.log_signal.emit("⚠️ Proceeding with Manga Mode without a specific title filter.")
+ # Set the dynamic filter holder with the filters determined for this run
+ # This ensures workers get the correct initial set if they start before any live changes.
+ self.dynamic_character_filter_holder.set_filters(filter_character_list_to_pass if filter_character_list_to_pass else [])
custom_folder_name_cleaned = None
if use_subfolders and post_id_from_url and self.custom_folder_widget and self.custom_folder_widget.isVisible() and not extract_links_only:
@@ -2399,65 +2572,39 @@ class DownloaderApp(QWidget):
self.total_posts_to_process = 0; self.processed_posts_count = 0; self.download_counter = 0; self.skip_counter = 0
self.progress_label.setText("Progress: Initializing...")
-
- self.manga_date_file_counter_obj = [1, threading.Lock()] # Default: [value, lock]
+ self.retryable_failed_files_info.clear() # Clear previous retryable failures before new session
+ # Manga date file counter initialization is now moved into DownloadThread.run()
+ # We will pass None or a placeholder if needed, and DownloadThread will calculate it.
+ manga_date_file_counter_ref_for_thread = None
if manga_mode and self.manga_filename_style == STYLE_DATE_BASED and not extract_links_only:
- # Determine the directory to scan for existing numbered files for this series
- # This path should be the "series" root, before any "per-post" subfolders.
- series_scan_directory = output_dir # Base download location
-
- if use_subfolders: # If 'Separate Folders by Name/Title' is ON
- # Try to get folder name from character filter (manga series title)
- if filter_character_list_to_pass and filter_character_list_to_pass[0] and filter_character_list_to_pass[0].get("name"):
- # Assuming the first filter is the series name for folder creation
- series_folder_name = clean_folder_name(filter_character_list_to_pass[0]["name"])
- series_scan_directory = os.path.join(series_scan_directory, series_folder_name)
- elif service and user_id: # Fallback if no char filter, but subfolders are on
- # This might group multiple series from one creator if no distinct char filter is used.
- # The counter is per download operation, so this is consistent.
- creator_based_folder_name = clean_folder_name(user_id) # Or a more specific creator name convention
- series_scan_directory = os.path.join(series_scan_directory, creator_based_folder_name)
- # If neither, series_scan_directory remains output_dir (files go directly there if use_subfolders is on but no name found)
- # If use_subfolders is OFF, files go into output_dir. So, series_scan_directory remains output_dir.
-
- highest_num = 0
- if os.path.isdir(series_scan_directory):
- self.log_signal.emit(f"ℹ️ Manga Date Mode: Scanning for existing numbered files in '{series_scan_directory}' and its subdirectories...")
- for dirpath, _, filenames_in_dir in os.walk(series_scan_directory):
- for filename_to_check in filenames_in_dir:
- # Check the base name (without extension) for leading digits
- base_name_no_ext = os.path.splitext(filename_to_check)[0]
- match = re.match(r"(\d{3,})", base_name_no_ext) # Matches "001" from "001.jpg" or "001_13.jpg"
- if match:
- try:
- num = int(match.group(1))
- if num > highest_num:
- highest_num = num
- except ValueError:
- continue
- else:
- self.log_signal.emit(f"ℹ️ Manga Date Mode: Scan directory '{series_scan_directory}' not found or is not a directory. Starting counter at 1.")
- self.manga_date_file_counter_obj = [highest_num + 1, threading.Lock()] # [value, lock]
- self.log_signal.emit(f"ℹ️ Manga Date Mode: Initialized file counter at {self.manga_date_file_counter_obj[0]}.")
+ # Pass None; DownloadThread will calculate if it's a single-threaded download.
+ # For multi-threaded, this ref needs to be created here and shared.
+ # However, with date_based manga mode forcing single post worker, this specific ref might only be used by that one worker.
+ # Let's keep it as None for now, assuming DownloadThread handles its init if it's the one doing sequential processing.
+ # If multi-threaded post processing were allowed with date-based, this would need careful shared state.
+ manga_date_file_counter_ref_for_thread = None
+ self.log_signal.emit(f"ℹ️ Manga Date Mode: File counter will be initialized by the download thread.")
effective_num_post_workers = 1
- effective_num_file_threads_per_worker = 1
-
- # Determine if multithreading for posts should be used
+ effective_num_file_threads_per_worker = 1 # Default to 1 for all cases initially
+
if post_id_from_url:
- # Single post URL: no post workers, but file threads can be > 1
+ # Single post URL: UI threads control concurrent file downloads for that post
if use_multithreading_enabled_by_checkbox:
effective_num_file_threads_per_worker = max(1, min(num_threads_from_gui, MAX_FILE_THREADS_PER_POST_OR_WORKER))
+ # else: effective_num_file_threads_per_worker remains 1
+ # effective_num_post_workers remains 1 (not used for post thread pool)
else:
- # Creator feed
+ # Creator feed URL
if manga_mode and self.manga_filename_style == STYLE_DATE_BASED:
# Force single post worker for date-based manga mode
effective_num_post_workers = 1
# File threads per worker can still be > 1 if user set it
- effective_num_file_threads_per_worker = max(1, min(num_threads_from_gui, MAX_FILE_THREADS_PER_POST_OR_WORKER)) if use_multithreading_enabled_by_checkbox else 1
+ effective_num_file_threads_per_worker = 1 # Files are sequential for this worker too
elif use_multithreading_enabled_by_checkbox: # Standard creator feed with multithreading enabled
effective_num_post_workers = max(1, min(num_threads_from_gui, MAX_THREADS)) # For posts
- effective_num_file_threads_per_worker = max(1, min(num_threads_from_gui, MAX_FILE_THREADS_PER_POST_OR_WORKER)) # For files within each post worker
-
+ effective_num_file_threads_per_worker = 1 # Files within each post worker are sequential
+ # else (not multithreading for creator feed):
+ # effective_num_post_workers remains 1, effective_num_file_threads_per_worker remains 1
log_messages = ["="*40, f"🚀 Starting {'Link Extraction' if extract_links_only else ('Archive Download' if backend_filter_mode == 'archive' else 'Download')} @ {time.strftime('%Y-%m-%d %H:%M:%S')}", f" URL: {api_url}"]
if not extract_links_only: log_messages.append(f" Save Location: {output_dir}")
@@ -2559,10 +2706,12 @@ class DownloaderApp(QWidget):
'manga_mode_active': manga_mode,
'unwanted_keywords': unwanted_keywords_for_folders,
'cancellation_event': self.cancellation_event,
+ 'dynamic_character_filter_holder': self.dynamic_character_filter_holder, # Pass the holder
+ 'pause_event': self.pause_event, # Explicitly add pause_event here
# 'emitter' will be set based on single/multi-thread mode below
'manga_filename_style': self.manga_filename_style,
'num_file_threads_for_worker': effective_num_file_threads_per_worker,
- 'manga_date_file_counter_ref': self.manga_date_file_counter_obj if manga_mode and self.manga_filename_style == STYLE_DATE_BASED else None,
+ 'manga_date_file_counter_ref': manga_date_file_counter_ref_for_thread,
'allow_multipart_download': allow_multipart,
# 'duplicate_file_mode' and session-wide tracking removed
}
@@ -2580,8 +2729,8 @@ class DownloaderApp(QWidget):
'filter_character_list', 'filter_mode', 'skip_zip', 'skip_rar',
'use_subfolders', 'use_post_subfolders', 'custom_folder_name',
'compress_images', 'download_thumbnails', 'service', 'user_id',
- 'downloaded_files', 'downloaded_file_hashes', 'remove_from_filename_words_list',
- 'downloaded_files_lock', 'downloaded_file_hashes_lock',
+ 'downloaded_files', 'downloaded_file_hashes', 'pause_event', 'remove_from_filename_words_list', # Added pause_event
+ 'downloaded_files_lock', 'downloaded_file_hashes_lock', 'dynamic_character_filter_holder', # Added holder
'skip_words_list', 'skip_words_scope', 'char_filter_scope',
'show_external_links', 'extract_links_only', 'num_file_threads_for_worker',
'start_page', 'end_page', 'target_post_id_from_initial_url',
@@ -2596,12 +2745,16 @@ class DownloaderApp(QWidget):
self.log_signal.emit(f"❌ CRITICAL ERROR preparing download: {e}\n{traceback.format_exc()}")
QMessageBox.critical(self, "Start Error", f"Failed to start process:\n{e}")
self.download_finished(0,0,False, [])
+ if self.pause_event: self.pause_event.clear()
+ self.is_paused = False # Ensure pause state is reset on error
def start_single_threaded_download(self, **kwargs):
global BackendDownloadThread
try:
self.download_thread = BackendDownloadThread(**kwargs)
+ if self.pause_event: self.pause_event.clear() # Clear pause before starting
+ self.is_paused = False # Reset pause state
if hasattr(self.download_thread, 'progress_signal'): self.download_thread.progress_signal.connect(self.handle_main_log)
if hasattr(self.download_thread, 'add_character_prompt_signal'): self.download_thread.add_character_prompt_signal.connect(self.add_character_prompt_signal)
if hasattr(self.download_thread, 'finished_signal'): self.download_thread.finished_signal.connect(self.download_finished)
@@ -2610,17 +2763,75 @@ class DownloaderApp(QWidget):
if hasattr(self.download_thread, 'file_progress_signal'): self.download_thread.file_progress_signal.connect(self.update_file_progress_display)
if hasattr(self.download_thread, 'missed_character_post_signal'): # New
self.download_thread.missed_character_post_signal.connect(self.handle_missed_character_post)
+ if hasattr(self.download_thread, 'retryable_file_failed_signal'): # New for retry
+ self.download_thread.retryable_file_failed_signal.connect(self._handle_retryable_file_failure)
self.download_thread.start()
self.log_signal.emit("✅ Single download thread (for posts) started.")
except Exception as e:
self.log_signal.emit(f"❌ CRITICAL ERROR starting single-thread: {e}\n{traceback.format_exc()}")
QMessageBox.critical(self, "Thread Start Error", f"Failed to start download process: {e}")
- self.download_finished(0,0,False, [])
+ if self.pause_event: self.pause_event.clear()
+ self.is_paused = False # Ensure pause state is reset on error
+ def _handle_retryable_file_failure(self, list_of_retry_details):
+ """Appends details of files that failed but might be retryable later."""
+ if list_of_retry_details:
+ self.retryable_failed_files_info.extend(list_of_retry_details)
+
+ def _submit_post_to_worker_pool(self, post_data_item, worker_args_template, num_file_dl_threads_for_each_worker, emitter_for_worker, ppw_expected_keys, ppw_optional_keys_with_defaults):
+ """Helper to prepare and submit a single post processing task to the thread pool."""
+ global PostProcessorWorker # Ensure PostProcessorWorker is accessible
+ if not isinstance(post_data_item, dict):
+ self.log_signal.emit(f"⚠️ Skipping invalid post data item (not a dict): {type(post_data_item)}");
+ # Note: This skip does not directly increment processed_posts_count here,
+ # as that counter is tied to future completion.
+ # The overall effect is that total_posts_to_process might be higher than actual futures.
+ return False # Indicate failure or skip
+
+ worker_init_args = {}
+ missing_keys = []
+ for key in ppw_expected_keys:
+ if key == 'post_data': worker_init_args[key] = post_data_item
+ elif key == 'num_file_threads': worker_init_args[key] = num_file_dl_threads_for_each_worker
+ elif key == 'emitter': worker_init_args[key] = emitter_for_worker
+ elif key in worker_args_template: worker_init_args[key] = worker_args_template[key]
+ elif key in ppw_optional_keys_with_defaults: pass # It has a default in PostProcessorWorker
+ else: missing_keys.append(key)
+
+ if missing_keys:
+ self.log_signal.emit(f"❌ CRITICAL ERROR: Missing keys for PostProcessorWorker: {', '.join(missing_keys)}");
+ self.cancellation_event.set()
+ return False
+
+ try:
+ worker_instance = PostProcessorWorker(**worker_init_args)
+ if self.thread_pool:
+ future = self.thread_pool.submit(worker_instance.process)
+ future.add_done_callback(self._handle_future_result)
+ self.active_futures.append(future)
+ return True # Indicate success
+ else:
+ self.log_signal.emit("⚠️ Thread pool not available. Cannot submit task.");
+ self.cancellation_event.set() # Signal cancellation as we can't proceed
+ return False
+ except TypeError as te:
+ self.log_signal.emit(f"❌ TypeError creating PostProcessorWorker: {te}\n Passed Args: [{', '.join(sorted(worker_init_args.keys()))}]\n{traceback.format_exc(limit=5)}")
+ self.cancellation_event.set()
+ return False
+ except RuntimeError: # Pool likely shutting down
+ self.log_signal.emit(f"⚠️ RuntimeError submitting task (pool likely shutting down).")
+ self.cancellation_event.set()
+ return False
+ except Exception as e:
+ self.log_signal.emit(f"❌ Error submitting post {post_data_item.get('id','N/A')} to worker: {e}")
+ self.cancellation_event.set()
+ return False
def start_multi_threaded_download(self, num_post_workers, **kwargs):
global PostProcessorWorker
if self.thread_pool is None:
+ if self.pause_event: self.pause_event.clear() # Clear pause before starting
+ self.is_paused = False # Reset pause state
self.thread_pool = ThreadPoolExecutor(max_workers=num_post_workers, thread_name_prefix='PostWorker_')
self.active_futures = []
@@ -2638,7 +2849,7 @@ class DownloaderApp(QWidget):
def _fetch_and_queue_posts(self, api_url_input_for_fetcher, worker_args_template, num_post_workers):
- global PostProcessorWorker, download_from_api
+ global PostProcessorWorker, download_from_api # Ensure PostProcessorWorker is in scope
all_posts_data = []
fetch_error_occurred = False
manga_mode_active_for_fetch = worker_args_template.get('manga_mode_active', False)
@@ -2675,6 +2886,25 @@ class DownloaderApp(QWidget):
if not fetch_error_occurred and not self.cancellation_event.is_set():
self.log_signal.emit(f"✅ Post fetching complete. Total posts to process: {self.total_posts_to_process}")
+ # --- De-duplicate posts by ID ---
+ unique_posts_dict = {}
+ for post in all_posts_data:
+ post_id = post.get('id')
+ if post_id is not None:
+ # Keep the first occurrence of each post ID
+ if post_id not in unique_posts_dict:
+ unique_posts_dict[post_id] = post
+ else:
+ self.log_signal.emit(f"⚠️ Skipping post with no ID: {post.get('title', 'Untitled')}")
+
+ all_posts_data = list(unique_posts_dict.values())
+ # --- End De-duplication ---
+
+ self.total_posts_to_process = len(all_posts_data)
+ self.log_signal.emit(f" Processed {len(unique_posts_dict)} unique posts after de-duplication.")
+ if len(unique_posts_dict) < len(all_posts_data):
+ self.log_signal.emit(f" Note: {len(all_posts_data) - len(unique_posts_dict)} duplicate post IDs were removed.")
+
except TypeError as te:
self.log_signal.emit(f"❌ TypeError calling download_from_api: {te}\n Check 'downloader_utils.py' signature.\n{traceback.format_exc(limit=2)}"); fetch_error_occurred = True
except RuntimeError as re_err:
@@ -2689,13 +2919,13 @@ class DownloaderApp(QWidget):
return
if self.total_posts_to_process == 0:
- self.log_signal.emit("😕 No posts found or fetched to process.");
- self.finished_signal.emit(0,0,False, []);
+ self.log_signal.emit("😕 No posts found or fetched to process.")
+ self.finished_signal.emit(0,0,False, [])
return
- self.log_signal.emit(f" Submitting {self.total_posts_to_process} post processing tasks to thread pool...")
+ self.log_signal.emit(f" Preparing to submit {self.total_posts_to_process} post processing tasks to thread pool...")
self.processed_posts_count = 0
- self.overall_progress_signal.emit(self.total_posts_to_process, 0)
+ self.overall_progress_signal.emit(self.total_posts_to_process, 0) # Emit initial progress
num_file_dl_threads_for_each_worker = worker_args_template.get('num_file_threads_for_worker', 1)
@@ -2703,10 +2933,10 @@ class DownloaderApp(QWidget):
ppw_expected_keys = [
'post_data', 'download_root', 'known_names', 'filter_character_list', 'unwanted_keywords',
'filter_mode', 'skip_zip', 'skip_rar', 'use_subfolders', 'use_post_subfolders',
- 'target_post_id_from_initial_url', 'custom_folder_name', 'compress_images', 'emitter',
+ 'target_post_id_from_initial_url', 'custom_folder_name', 'compress_images', 'emitter', 'pause_event', # Added pause_event
'download_thumbnails', 'service', 'user_id', 'api_url_input',
'cancellation_event', 'downloaded_files', 'downloaded_file_hashes',
- 'downloaded_files_lock', 'downloaded_file_hashes_lock', 'remove_from_filename_words_list',
+ 'downloaded_files_lock', 'downloaded_file_hashes_lock', 'remove_from_filename_words_list', 'dynamic_character_filter_holder', # Added holder
'skip_words_list', 'skip_words_scope', 'char_filter_scope',
'show_external_links', 'extract_links_only', 'allow_multipart_download',
'num_file_threads', 'skip_current_file_flag', 'manga_date_file_counter_ref',
@@ -2720,40 +2950,72 @@ class DownloaderApp(QWidget):
'manga_date_file_counter_ref' # Add this
}
- for post_data_item in all_posts_data:
- if self.cancellation_event.is_set(): break
- if not isinstance(post_data_item, dict):
- self.log_signal.emit(f"⚠️ Skipping invalid post data item (not a dict): {type(post_data_item)}");
- self.processed_posts_count += 1;
- continue
+ # --- Batching Logic ---
+ if num_post_workers > POST_WORKER_BATCH_THRESHOLD and self.total_posts_to_process > POST_WORKER_NUM_BATCHES :
+ self.log_signal.emit(f" High thread count ({num_post_workers}) detected. Batching post submissions into {POST_WORKER_NUM_BATCHES} parts.")
+
+ import math # Moved import here
+ batch_size = math.ceil(self.total_posts_to_process / POST_WORKER_NUM_BATCHES)
+ submitted_count_in_batching = 0
- worker_init_args = {}; missing_keys = []
- for key in ppw_expected_keys:
- if key == 'post_data': worker_init_args[key] = post_data_item
- elif key == 'num_file_threads': worker_init_args[key] = num_file_dl_threads_for_each_worker
- elif key == 'emitter': worker_init_args[key] = emitter_for_worker # Pass the queue
- elif key in worker_args_template: worker_init_args[key] = worker_args_template[key]
- elif key in ppw_optional_keys_with_defaults: pass
- else: missing_keys.append(key)
+ for batch_num in range(POST_WORKER_NUM_BATCHES):
+ if self.cancellation_event.is_set(): break
+
+ if self.pause_event and self.pause_event.is_set():
+ self.log_signal.emit(f" [Fetcher] Batch submission paused before batch {batch_num + 1}/{POST_WORKER_NUM_BATCHES}...")
+ while self.pause_event.is_set():
+ if self.cancellation_event.is_set():
+ self.log_signal.emit(" [Fetcher] Batch submission cancelled while paused.")
+ break
+ time.sleep(0.5)
+ if self.cancellation_event.is_set(): break
+ if not self.cancellation_event.is_set():
+ self.log_signal.emit(f" [Fetcher] Batch submission resumed. Processing batch {batch_num + 1}/{POST_WORKER_NUM_BATCHES}.")
+
+ start_index = batch_num * batch_size
+ end_index = min((batch_num + 1) * batch_size, self.total_posts_to_process)
+ current_batch_posts = all_posts_data[start_index:end_index]
- if missing_keys:
- self.log_signal.emit(f"❌ CRITICAL ERROR: Missing keys for PostProcessorWorker: {', '.join(missing_keys)}");
- self.cancellation_event.set(); break
+ if not current_batch_posts: continue
- try:
- worker_instance = PostProcessorWorker(**worker_init_args)
- if self.thread_pool:
- future = self.thread_pool.submit(worker_instance.process)
- future.add_done_callback(self._handle_future_result)
- self.active_futures.append(future)
- else:
- self.log_signal.emit("⚠️ Thread pool not available. Cannot submit more tasks."); break
- except TypeError as te: self.log_signal.emit(f"❌ TypeError creating PostProcessorWorker: {te}\n Passed Args: [{', '.join(sorted(worker_init_args.keys()))}]\n{traceback.format_exc(limit=5)}"); self.cancellation_event.set(); break
- except RuntimeError: self.log_signal.emit("⚠️ Runtime error submitting task (pool likely shutting down)."); break
- except Exception as e: self.log_signal.emit(f"❌ Error submitting post {post_data_item.get('id','N/A')} to worker: {e}"); break
+ self.log_signal.emit(f" Submitting batch {batch_num + 1}/{POST_WORKER_NUM_BATCHES} ({len(current_batch_posts)} posts) to pool...")
+ for post_data_item in current_batch_posts:
+ if self.cancellation_event.is_set(): break
+ success = self._submit_post_to_worker_pool(post_data_item, worker_args_template, num_file_dl_threads_for_each_worker, emitter_for_worker, ppw_expected_keys, ppw_optional_keys_with_defaults)
+ if success:
+ submitted_count_in_batching += 1
+ elif self.cancellation_event.is_set():
+ break
+
+ if self.cancellation_event.is_set(): break
+
+ if batch_num < POST_WORKER_NUM_BATCHES - 1:
+ self.log_signal.emit(f" Batch {batch_num + 1} submitted. Waiting {POST_WORKER_BATCH_DELAY_SECONDS}s before next batch...")
+ delay_start_time = time.time()
+ while time.time() - delay_start_time < POST_WORKER_BATCH_DELAY_SECONDS:
+ if self.cancellation_event.is_set(): break
+ time.sleep(0.1)
+ if self.cancellation_event.is_set(): break
+
+ self.log_signal.emit(f" All {POST_WORKER_NUM_BATCHES} batches ({submitted_count_in_batching} total tasks) submitted to pool via batching.")
+
+ else: # Standard submission (no batching)
+ self.log_signal.emit(f" Submitting all {self.total_posts_to_process} tasks to pool directly...")
+ submitted_count_direct = 0
+ for post_data_item in all_posts_data:
+ if self.cancellation_event.is_set(): break
+ success = self._submit_post_to_worker_pool(post_data_item, worker_args_template, num_file_dl_threads_for_each_worker, emitter_for_worker, ppw_expected_keys, ppw_optional_keys_with_defaults)
+ if success:
+ submitted_count_direct += 1
+ elif self.cancellation_event.is_set():
+ break
+
+ if not self.cancellation_event.is_set():
+ self.log_signal.emit(f" All {submitted_count_direct} post processing tasks submitted directly to pool.")
+
+ if self.cancellation_event.is_set():
+ self.log_signal.emit(" Cancellation detected after/during task submission loop.")
- if not self.cancellation_event.is_set(): self.log_signal.emit(f" {len(self.active_futures)} post processing tasks submitted to pool.")
- else:
self.finished_signal.emit(self.download_counter, self.skip_counter, True, self.all_kept_original_filenames)
if self.thread_pool: self.thread_pool.shutdown(wait=False, cancel_futures=True); self.thread_pool = None
@@ -2762,10 +3024,14 @@ class DownloaderApp(QWidget):
downloaded_files_from_future, skipped_files_from_future = 0, 0
kept_originals_from_future = []
try:
- if future.cancelled(): self.log_signal.emit(" A post processing task was cancelled.")
- elif future.exception(): self.log_signal.emit(f"❌ Post processing worker error: {future.exception()}")
- else:
- downloaded_files_from_future, skipped_files_from_future, kept_originals_from_future = future.result()
+ if future.cancelled():
+ self.log_signal.emit(" A post processing task was cancelled.")
+ elif future.exception():
+ self.log_signal.emit(f"❌ Post processing worker error: {future.exception()}")
+ else: # Future completed successfully
+ downloaded_files_from_future, skipped_files_from_future, kept_originals_from_future, retryable_failures_from_post = future.result()
+ if retryable_failures_from_post:
+ self.retryable_failed_files_info.extend(retryable_failures_from_post)
with self.downloaded_files_lock:
self.download_counter += downloaded_files_from_future
@@ -2775,26 +3041,63 @@ class DownloaderApp(QWidget):
self.all_kept_original_filenames.extend(kept_originals_from_future)
self.overall_progress_signal.emit(self.total_posts_to_process, self.processed_posts_count)
- except Exception as e: self.log_signal.emit(f"❌ Error in _handle_future_result callback: {e}\n{traceback.format_exc(limit=2)}")
+ except Exception as e:
+ self.log_signal.emit(f"❌ Error in _handle_future_result callback: {e}\n{traceback.format_exc(limit=2)}")
+ # If an error occurs, ensure we don't get stuck waiting for this future
+ if self.processed_posts_count < self.total_posts_to_process:
+ self.processed_posts_count = self.total_posts_to_process # Mark as if all processed to allow finish
if self.total_posts_to_process > 0 and self.processed_posts_count >= self.total_posts_to_process:
if all(f.done() for f in self.active_futures):
QApplication.processEvents()
self.log_signal.emit("🏁 All submitted post tasks have completed or failed.")
self.finished_signal.emit(self.download_counter, self.skip_counter, self.cancellation_event.is_set(), self.all_kept_original_filenames)
+ def _get_configurable_widgets_on_pause(self):
+ """Returns a list of widgets that should be re-enabled when paused."""
+ return [
+ self.dir_input, self.dir_button,
+ self.character_input, self.char_filter_scope_toggle_button,
+ self.skip_words_input, self.skip_scope_toggle_button,
+ self.remove_from_filename_input,
+ self.radio_all, self.radio_images, self.radio_videos,
+ self.radio_only_archives, self.radio_only_links, # Radio buttons themselves
+ self.skip_zip_checkbox, self.skip_rar_checkbox,
+ self.download_thumbnails_checkbox, self.compress_images_checkbox,
+ self.use_subfolders_checkbox, self.use_subfolder_per_post_checkbox,
+ self.manga_mode_checkbox,
+ self.manga_rename_toggle_button, # Visibility handled by update_ui_for_manga_mode
+ self.multipart_toggle_button,
+ self.external_links_checkbox
+ ]
def set_ui_enabled(self, enabled):
- widgets_to_toggle = [ self.download_btn, self.link_input, self.radio_all, self.radio_images, self.radio_videos, self.radio_only_links,
- self.skip_zip_checkbox, self.skip_rar_checkbox, self.use_subfolders_checkbox, self.compress_images_checkbox,
- self.download_thumbnails_checkbox, self.use_multithreading_checkbox, self.skip_words_input, self.character_search_input,
- self.new_char_input, self.add_char_button, self.delete_char_button, self.char_filter_scope_toggle_button, # duplicate_file_mode_toggle_button removed
- self.start_page_input, self.end_page_input, self.page_range_label, self.to_label,
- self.character_input, self.custom_folder_input, self.custom_folder_label, self.remove_from_filename_input,
- self.reset_button, self.manga_mode_checkbox, self.manga_rename_toggle_button, self.multipart_toggle_button, self.skip_scope_toggle_button
+ # This list contains all widgets whose enabled state might change.
+ all_potentially_toggleable_widgets = [
+ self.link_input, self.dir_input, self.dir_button,
+ self.page_range_label, self.start_page_input, self.to_label, self.end_page_input,
+ self.character_input, self.char_filter_scope_toggle_button,
+ self.custom_folder_label, self.custom_folder_input,
+ self.skip_words_input, self.skip_scope_toggle_button, self.remove_from_filename_input,
+ self.radio_all, self.radio_images, self.radio_videos, self.radio_only_archives, self.radio_only_links,
+ self.skip_zip_checkbox, self.skip_rar_checkbox, self.download_thumbnails_checkbox, self.compress_images_checkbox,
+ self.use_subfolders_checkbox, self.use_subfolder_per_post_checkbox,
+ self.use_multithreading_checkbox, self.thread_count_input, self.thread_count_label,
+ self.external_links_checkbox, self.manga_mode_checkbox, self.manga_rename_toggle_button,
+ self.multipart_toggle_button,
+ self.character_search_input, self.new_char_input, self.add_char_button, self.delete_char_button,
+ self.reset_button
]
- for widget in widgets_to_toggle:
- if widget: widget.setEnabled(enabled)
+ widgets_to_enable_on_pause = self._get_configurable_widgets_on_pause()
+ download_is_active_or_paused = not enabled # True if a download is running or paused
+
+ for widget in all_potentially_toggleable_widgets:
+ if not widget: continue
+
+ if self.is_paused and widget in widgets_to_enable_on_pause:
+ widget.setEnabled(True) # Re-enable specific widgets if paused
+ else:
+ widget.setEnabled(enabled) # Standard behavior: enable if idle, disable if running
if enabled:
self._handle_filter_mode_change(self.radio_group.checkedButton(), True)
@@ -2802,24 +3105,60 @@ class DownloaderApp(QWidget):
if self.external_links_checkbox:
is_only_links = self.radio_only_links and self.radio_only_links.isChecked()
self.external_links_checkbox.setEnabled(enabled and not is_only_links)
+ if self.is_paused and not is_only_links: # Also re-enable if paused and not in link mode
+ self.external_links_checkbox.setEnabled(True)
if self.log_verbosity_toggle_button: self.log_verbosity_toggle_button.setEnabled(True) # New button, always enabled
multithreading_currently_on = self.use_multithreading_checkbox.isChecked()
- self.thread_count_input.setEnabled(enabled and multithreading_currently_on)
- self.thread_count_label.setEnabled(enabled and multithreading_currently_on)
+ # Thread count related widgets follow 'enabled' strictly (disabled if paused)
+ if self.thread_count_input: self.thread_count_input.setEnabled(enabled and multithreading_currently_on)
+ if self.thread_count_label: self.thread_count_label.setEnabled(enabled and multithreading_currently_on)
subfolders_currently_on = self.use_subfolders_checkbox.isChecked()
- self.use_subfolder_per_post_checkbox.setEnabled(enabled)
+ if self.use_subfolder_per_post_checkbox:
+ self.use_subfolder_per_post_checkbox.setEnabled(enabled or (self.is_paused and self.use_subfolder_per_post_checkbox in widgets_to_enable_on_pause))
- self.cancel_btn.setEnabled(not enabled)
+ # --- Main Action Buttons ---
+ self.download_btn.setEnabled(enabled) # Start Download only enabled when fully idle
+ self.cancel_btn.setEnabled(download_is_active_or_paused) # Cancel enabled if running or paused
+
+ # Pause button logic
+ if self.pause_btn:
+ self.pause_btn.setEnabled(download_is_active_or_paused)
+ if download_is_active_or_paused:
+ self.pause_btn.setText("▶️ Resume Download" if self.is_paused else "⏸️ Pause Download")
+ self.pause_btn.setToolTip("Click to resume the download." if self.is_paused else "Click to pause the download.")
+ else: # Download not active
+ self.pause_btn.setText("⏸️ Pause Download")
+ self.pause_btn.setToolTip("Click to pause the ongoing download process.")
+ self.is_paused = False # Ensure pause state is reset if download finishes/cancels
if enabled: # Ensure these are updated based on current (possibly reset) checkbox states
+ if self.pause_event: self.pause_event.clear()
+
+ # --- UI Updates based on current states ---
+ # These should run if UI is idle OR if paused (to reflect changes made during pause)
+ if enabled or self.is_paused:
self._handle_multithreading_toggle(multithreading_currently_on)
self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)
self.update_custom_folder_visibility(self.link_input.text())
self.update_page_range_enabled_state()
+ # Re-evaluate filter mode as radio buttons might have been changed during pause
+ if self.radio_group and self.radio_group.checkedButton():
+ self._handle_filter_mode_change(self.radio_group.checkedButton(), True)
+ self.update_ui_for_subfolders(subfolders_currently_on) # Re-evaluate subfolder UI
+ def _handle_pause_resume_action(self):
+ if self._is_download_active(): # Check if a download is actually running
+ self.is_paused = not self.is_paused
+ if self.is_paused:
+ if self.pause_event: self.pause_event.set()
+ self.log_signal.emit("ℹ️ Download paused by user. Some settings can now be changed for subsequent operations.")
+ else:
+ if self.pause_event: self.pause_event.clear()
+ self.log_signal.emit("ℹ️ Download resumed by user.")
+ self.set_ui_enabled(False) # Re-evaluate UI state (buttons will update)
def _perform_soft_ui_reset(self, preserve_url=None, preserve_dir=None):
"""Resets UI elements and some state to app defaults, then applies preserved inputs."""
@@ -2860,9 +3199,11 @@ class DownloaderApp(QWidget):
# 4. Reset operational state variables (but not session-based downloaded_files/hashes)
self.external_link_queue.clear(); self.extracted_links_cache = []
self._is_processing_external_link_queue = False; self._current_link_post_title = None
+ if self.pause_event: self.pause_event.clear()
self.total_posts_to_process = 0; self.processed_posts_count = 0
self.download_counter = 0; self.skip_counter = 0
self.all_kept_original_filenames = []
+ self.is_paused = False # Reset pause state on soft reset
# 5. Update UI based on new (default or preserved) states
self._handle_filter_mode_change(self.radio_group.checkedButton(), True)
@@ -2900,7 +3241,14 @@ class DownloaderApp(QWidget):
self.progress_label.setText("Progress: Cancelled. Ready for new task.")
self.file_progress_label.setText("")
+ if self.pause_event: self.pause_event.clear()
self.log_signal.emit("ℹ️ UI reset. Ready for new operation. Background tasks are being terminated.")
+ self.is_paused = False # Ensure pause state is reset
+
+ # Also clear retryable files on a manual cancel, as the context is lost.
+ if self.retryable_failed_files_info:
+ self.log_signal.emit(f" Discarding {len(self.retryable_failed_files_info)} pending retryable file(s) due to cancellation.")
+ self.retryable_failed_files_info.clear()
def download_finished(self, total_downloaded, total_skipped, cancelled_by_user, kept_original_names_list=None):
if kept_original_names_list is None:
@@ -2910,6 +3258,11 @@ class DownloaderApp(QWidget):
status_message = "Cancelled by user" if cancelled_by_user else "Finished"
+ # If cancelled, don't offer retry for this session's failures
+ if cancelled_by_user and self.retryable_failed_files_info:
+ self.log_signal.emit(f" Download cancelled, discarding {len(self.retryable_failed_files_info)} file(s) that were pending retry.")
+ self.retryable_failed_files_info.clear()
+
summary_log = "="*40
summary_log += f"\n🏁 Download {status_message}!\n Summary: Downloaded Files={total_downloaded}, Skipped Files={total_skipped}\n"
summary_log += "="*40
@@ -2941,6 +3294,8 @@ class DownloaderApp(QWidget):
if hasattr(self.download_thread, 'file_progress_signal'): self.download_thread.file_progress_signal.disconnect(self.update_file_progress_display)
if hasattr(self.download_thread, 'missed_character_post_signal'): # New
self.download_thread.missed_character_post_signal.disconnect(self.handle_missed_character_post)
+ if hasattr(self.download_thread, 'retryable_file_failed_signal'): # New
+ self.download_thread.retryable_file_failed_signal.disconnect(self._handle_retryable_file_failure)
except (TypeError, RuntimeError) as e:
self.log_signal.emit(f"ℹ️ Note during single-thread signal disconnection: {e}")
@@ -2955,9 +3310,166 @@ class DownloaderApp(QWidget):
self.log_signal.emit(" Ensuring worker thread pool is shut down...")
self.thread_pool.shutdown(wait=True, cancel_futures=True)
self.thread_pool = None
+
self.active_futures = []
- self.set_ui_enabled(True)
+ if self.pause_event: self.pause_event.clear()
self.cancel_btn.setEnabled(False)
+ self.is_paused = False # Reset pause state when download finishes
+
+ # Offer to retry failed files if any were collected and not cancelled
+ if not cancelled_by_user and self.retryable_failed_files_info:
+ num_failed = len(self.retryable_failed_files_info)
+ reply = QMessageBox.question(self, "Retry Failed Downloads?",
+ f"{num_failed} file(s) failed with potentially recoverable errors (e.g., IncompleteRead).\n\n"
+ "Would you like to attempt to download these failed files again?",
+ QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes)
+ if reply == QMessageBox.Yes:
+ self._start_failed_files_retry_session()
+ return # Don't fully reset UI if retrying
+ else:
+ self.log_signal.emit("ℹ️ User chose not to retry failed files.")
+ self.retryable_failed_files_info.clear() # Clear if not retrying
+
+ self.set_ui_enabled(True) # Full UI reset if not retrying
+
+ def _start_failed_files_retry_session(self):
+ self.log_signal.emit(f"🔄 Starting retry session for {len(self.retryable_failed_files_info)} file(s)...")
+ self.set_ui_enabled(False) # Disable UI, but cancel button will be enabled
+ if self.cancel_btn: self.cancel_btn.setText("❌ Cancel Retry")
+
+ self.files_for_current_retry_session = list(self.retryable_failed_files_info)
+ self.retryable_failed_files_info.clear() # Clear original list
+
+ self.active_retry_futures = []
+ self.processed_retry_count = 0
+ self.succeeded_retry_count = 0
+ self.failed_retry_count_in_session = 0 # Renamed to avoid clash
+ self.total_files_for_retry = len(self.files_for_current_retry_session)
+
+ self.progress_label.setText(f"Retrying 0 / {self.total_files_for_retry} files...")
+ self.cancellation_event.clear() # Clear main cancellation for retry session
+
+ num_retry_threads = 1
+ try:
+ num_threads_from_gui = int(self.thread_count_input.text().strip())
+ num_retry_threads = max(1, min(num_threads_from_gui, MAX_FILE_THREADS_PER_POST_OR_WORKER, self.total_files_for_retry if self.total_files_for_retry > 0 else 1))
+ except ValueError:
+ num_retry_threads = 1 # Default to 1 if input is bad
+
+ self.retry_thread_pool = ThreadPoolExecutor(max_workers=num_retry_threads, thread_name_prefix='RetryFile_')
+
+ # Prepare common arguments for PostProcessorWorker instances during retry
+ common_ppw_args_for_retry = {
+ 'download_root': self.dir_input.text().strip(),
+ 'known_names': list(KNOWN_NAMES),
+ 'emitter': self.worker_to_gui_queue, # Use main queue for progress
+ 'unwanted_keywords': {'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'},
+ 'filter_mode': self.get_filter_mode(), # Use current filter mode
+ 'skip_zip': self.skip_zip_checkbox.isChecked(),
+ 'skip_rar': self.skip_rar_checkbox.isChecked(),
+ 'use_subfolders': self.use_subfolders_checkbox.isChecked(),
+ 'use_post_subfolders': self.use_subfolder_per_post_checkbox.isChecked(),
+ 'compress_images': self.compress_images_checkbox.isChecked(),
+ 'download_thumbnails': self.download_thumbnails_checkbox.isChecked(),
+ 'pause_event': self.pause_event,
+ 'cancellation_event': self.cancellation_event,
+ 'downloaded_files': self.downloaded_files, # Share session's downloaded sets
+ 'downloaded_file_hashes': self.downloaded_file_hashes,
+ 'downloaded_files_lock': self.downloaded_files_lock,
+ 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock,
+ 'skip_words_list': [word.strip().lower() for word in self.skip_words_input.text().strip().split(',') if word.strip()],
+ 'skip_words_scope': self.get_skip_words_scope(),
+ 'char_filter_scope': self.get_char_filter_scope(),
+ 'remove_from_filename_words_list': [word.strip() for word in self.remove_from_filename_input.text().strip().split(',') if word.strip()] if hasattr(self, 'remove_from_filename_input') else [],
+ 'allow_multipart_download': self.allow_multipart_download_setting,
+ # These are not strictly needed for retry of a single file if path is fixed, but good to pass
+ 'filter_character_list': None,
+ 'dynamic_character_filter_holder': None,
+ 'target_post_id_from_initial_url': None, # Not relevant for file retry
+ 'custom_folder_name': None, # Path is already determined
+ 'num_file_threads': 1, # Each retry task is one file, multipart handled by _download_single_file
+ 'manga_date_file_counter_ref': None, # Filename is forced
+ }
+
+ for job_details in self.files_for_current_retry_session:
+ future = self.retry_thread_pool.submit(self._execute_single_file_retry, job_details, common_ppw_args_for_retry)
+ future.add_done_callback(self._handle_retry_future_result)
+ self.active_retry_futures.append(future)
+
+ def _execute_single_file_retry(self, job_details, common_args):
+ """Executes a single file download retry attempt."""
+ # Construct a dummy post_data, service, user_id, api_url_input for PPW init
+ dummy_post_data = {'id': job_details['original_post_id_for_log'], 'title': job_details['post_title']}
+ # Extract service/user from a known URL or pass them if available in job_details
+ # For simplicity, assuming we might not have original service/user easily.
+ # This might affect some logging or minor details in PPW if it relies on them beyond post_id.
+ # Let's assume job_details can store 'service' and 'user_id' from the original post.
+
+ ppw_init_args = {
+ **common_args,
+ 'post_data': dummy_post_data,
+ 'service': job_details.get('service', 'unknown_service'), # Get from job_details or default
+ 'user_id': job_details.get('user_id', 'unknown_user'), # Get from job_details or default
+ 'api_url_input': job_details.get('api_url_input', ''), # Original post's API URL
+ 'manga_mode_active': job_details.get('manga_mode_active_for_file', False),
+ 'manga_filename_style': job_details.get('manga_filename_style_for_file', STYLE_POST_TITLE),
+ }
+ worker = PostProcessorWorker(**ppw_init_args)
+
+ dl_count, skip_count, filename_saved, original_kept, status, _ = worker._download_single_file(
+ file_info=job_details['file_info'],
+ target_folder_path=job_details['target_folder_path'],
+ headers=job_details['headers'],
+ original_post_id_for_log=job_details['original_post_id_for_log'],
+ skip_event=None, # No individual skip for retry items
+ post_title=job_details['post_title'],
+ file_index_in_post=job_details['file_index_in_post'],
+ num_files_in_this_post=job_details['num_files_in_this_post'],
+ forced_filename_override=job_details.get('forced_filename_override')
+ )
+ return dl_count > 0 # True if successful, False otherwise
+
+ def _handle_retry_future_result(self, future):
+ self.processed_retry_count += 1
+ was_successful = False
+ try:
+ if future.cancelled():
+ self.log_signal.emit(" A retry task was cancelled.")
+ elif future.exception():
+ self.log_signal.emit(f"❌ Retry task worker error: {future.exception()}")
+ else:
+ was_successful = future.result()
+ if was_successful:
+ self.succeeded_retry_count += 1
+ else:
+ self.failed_retry_count_in_session += 1
+ except Exception as e:
+ self.log_signal.emit(f"❌ Error in _handle_retry_future_result: {e}")
+ self.failed_retry_count_in_session +=1
+
+ self.progress_label.setText(f"Retrying {self.processed_retry_count} / {self.total_files_for_retry} files... (Succeeded: {self.succeeded_retry_count}, Failed: {self.failed_retry_count_in_session})")
+
+ if self.processed_retry_count >= self.total_files_for_retry:
+ if all(f.done() for f in self.active_retry_futures):
+ self._retry_session_finished()
+
+ def _retry_session_finished(self):
+ self.log_signal.emit("🏁 Retry session finished.")
+ self.log_signal.emit(f" Summary: {self.succeeded_retry_count} Succeeded, {self.failed_retry_count_in_session} Failed.")
+
+ if self.retry_thread_pool:
+ self.retry_thread_pool.shutdown(wait=True)
+ self.retry_thread_pool = None
+
+ self.active_retry_futures.clear()
+ self.files_for_current_retry_session.clear()
+
+ self.set_ui_enabled(True) # Re-enable UI
+ if self.cancel_btn: self.cancel_btn.setText("❌ Cancel & Reset UI") # Reset cancel button text
+ self.progress_label.setText(f"Retry Finished. Succeeded: {self.succeeded_retry_count}, Failed: {self.failed_retry_count_in_session}. Ready for new task.")
+ self.file_progress_label.setText("")
+ if self.pause_event: self.pause_event.clear()
+ self.is_paused = False
def toggle_active_log_view(self):
if self.current_log_view == 'progress':
@@ -3008,6 +3520,8 @@ class DownloaderApp(QWidget):
self.total_posts_to_process = 0; self.processed_posts_count = 0; self.download_counter = 0; self.skip_counter = 0
self.all_kept_original_filenames = []
self.cancellation_event.clear()
+ if self.pause_event: self.pause_event.clear()
+ self.is_paused = False # Reset pause state on full reset
self.manga_filename_style = STYLE_POST_TITLE
self.settings.setValue(MANGA_FILENAME_STYLE_KEY, self.manga_filename_style)
@@ -3039,6 +3553,8 @@ class DownloaderApp(QWidget):
self.missed_title_key_terms_examples.clear()
self.logged_summary_for_key_term.clear()
self.already_logged_bold_key_terms.clear()
+ if self.pause_event: self.pause_event.clear()
+ self.is_paused = False # Reset pause state
self.missed_key_terms_buffer.clear()
if self.missed_character_log_output: self.missed_character_log_output.clear()
@@ -3139,6 +3655,29 @@ class DownloaderApp(QWidget):
self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting)
self.log_signal.emit(f"ℹ️ Multi-part download set to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'}")
+ def _open_known_txt_file(self):
+ if not os.path.exists(self.config_file):
+ QMessageBox.warning(self, "File Not Found",
+ f"The file 'Known.txt' was not found at:\n{self.config_file}\n\n"
+ "It will be created automatically when you add a known name or close the application.")
+ self.log_signal.emit(f"ℹ️ 'Known.txt' not found at {self.config_file}. It will be created later.")
+ return
+
+ try:
+ if sys.platform == "win32":
+ os.startfile(self.config_file)
+ elif sys.platform == "darwin": # macOS
+ subprocess.call(['open', self.config_file])
+ else: # Linux and other Unix-like
+ subprocess.call(['xdg-open', self.config_file])
+ self.log_signal.emit(f"ℹ️ Attempted to open '{os.path.basename(self.config_file)}' with the default editor.")
+ except FileNotFoundError: # Should be caught by os.path.exists, but as a fallback
+ QMessageBox.critical(self, "Error", f"Could not find '{os.path.basename(self.config_file)}' at {self.config_file} to open it.")
+ self.log_signal.emit(f"❌ Error: '{os.path.basename(self.config_file)}' not found at {self.config_file} when trying to open.")
+ except Exception as e:
+ QMessageBox.critical(self, "Error Opening File", f"Could not open '{os.path.basename(self.config_file)}':\n{e}")
+ self.log_signal.emit(f"❌ Error opening '{os.path.basename(self.config_file)}': {e}")
+
if __name__ == '__main__':
import traceback
try:
diff --git a/multipart_downloader.py b/multipart_downloader.py
index d93976b..befb139 100644
--- a/multipart_downloader.py
+++ b/multipart_downloader.py
@@ -13,7 +13,7 @@ DOWNLOAD_CHUNK_SIZE_ITER = 1024 * 256 # 256KB for iter_content within a chunk d
def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte, headers,
- part_num, total_parts, progress_data, cancellation_event, skip_event,
+ part_num, total_parts, progress_data, cancellation_event, skip_event, pause_event, global_emit_time_ref, # Added global_emit_time_ref
logger_func, emitter=None, api_original_filename=None): # Renamed logger, signals to emitter
"""Downloads a single chunk of a file and writes it to the temp file."""
if cancellation_event and cancellation_event.is_set():
@@ -23,6 +23,15 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte,
logger_func(f" [Chunk {part_num + 1}/{total_parts}] Skip event triggered before start.")
return 0, False
+ if pause_event and pause_event.is_set():
+ logger_func(f" [Chunk {part_num + 1}/{total_parts}] Download paused before start...")
+ while pause_event.is_set():
+ if cancellation_event and cancellation_event.is_set():
+ logger_func(f" [Chunk {part_num + 1}/{total_parts}] Download cancelled while paused.")
+ return 0, False
+ time.sleep(0.2) # Shorter sleep for responsive resume
+ logger_func(f" [Chunk {part_num + 1}/{total_parts}] Download resumed.")
+
chunk_headers = headers.copy()
# end_byte can be -1 for 0-byte files, meaning download from start_byte to end of file (which is start_byte itself)
if end_byte != -1 : # For 0-byte files, end_byte might be -1, Range header should not be set or be 0-0
@@ -38,7 +47,7 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte,
bytes_this_chunk = 0
- last_progress_emit_time_for_chunk = time.time()
+ # last_progress_emit_time_for_chunk = time.time() # Replaced by global_emit_time_ref logic
last_speed_calc_time = time.time()
bytes_at_last_speed_calc = 0
@@ -49,6 +58,14 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte,
if skip_event and skip_event.is_set():
logger_func(f" [Chunk {part_num + 1}/{total_parts}] Skip event during retry loop.")
return bytes_this_chunk, False
+ if pause_event and pause_event.is_set():
+ logger_func(f" [Chunk {part_num + 1}/{total_parts}] Paused during retry loop...")
+ while pause_event.is_set():
+ if cancellation_event and cancellation_event.is_set():
+ logger_func(f" [Chunk {part_num + 1}/{total_parts}] Cancelled while paused in retry loop.")
+ return bytes_this_chunk, False
+ time.sleep(0.2)
+ logger_func(f" [Chunk {part_num + 1}/{total_parts}] Resumed from retry loop pause.")
try:
if attempt > 0:
@@ -82,6 +99,14 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte,
if skip_event and skip_event.is_set():
logger_func(f" [Chunk {part_num + 1}/{total_parts}] Skip event during data iteration.")
return bytes_this_chunk, False
+ if pause_event and pause_event.is_set():
+ logger_func(f" [Chunk {part_num + 1}/{total_parts}] Paused during data iteration...")
+ while pause_event.is_set():
+ if cancellation_event and cancellation_event.is_set():
+ logger_func(f" [Chunk {part_num + 1}/{total_parts}] Cancelled while paused in data iteration.")
+ return bytes_this_chunk, False
+ time.sleep(0.2)
+ logger_func(f" [Chunk {part_num + 1}/{total_parts}] Resumed from data iteration pause.")
if data_segment:
f.write(data_segment)
bytes_this_chunk += len(data_segment)
@@ -99,19 +124,19 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte,
current_speed_bps = (bytes_delta * 8) / time_delta_speed if time_delta_speed > 0 else 0
progress_data['chunks_status'][part_num]['speed_bps'] = current_speed_bps
last_speed_calc_time = current_time
- bytes_at_last_speed_calc = bytes_this_chunk
+ bytes_at_last_speed_calc = bytes_this_chunk
- # Emit progress more frequently from within the chunk download
- if current_time - last_progress_emit_time_for_chunk > 0.1: # Emit up to 10 times/sec per chunk
- if emitter:
+ # Throttle emissions globally for this file download
+ if emitter and (current_time - global_emit_time_ref[0] > 0.25): # Max ~4Hz for the whole file
+ global_emit_time_ref[0] = current_time # Update shared last emit time
+
+ # Prepare and emit the status_list_copy
+ status_list_copy = [dict(s) for s in progress_data['chunks_status']] # Make a deep enough copy
+ if isinstance(emitter, queue.Queue):
+ emitter.put({'type': 'file_progress', 'payload': (api_original_filename, status_list_copy)})
+ elif hasattr(emitter, 'file_progress_signal'): # PostProcessorSignals-like
# Ensure we read the latest total downloaded from progress_data
- # Send a copy of the chunks_status list
- status_list_copy = [dict(s) for s in progress_data['chunks_status']] # Make a deep enough copy
- if isinstance(emitter, queue.Queue):
- emitter.put({'type': 'file_progress', 'payload': (api_original_filename, status_list_copy)})
- elif hasattr(emitter, 'file_progress_signal'): # PostProcessorSignals-like
- emitter.file_progress_signal.emit(api_original_filename, status_list_copy)
- last_progress_emit_time_for_chunk = current_time
+ emitter.file_progress_signal.emit(api_original_filename, status_list_copy)
return bytes_this_chunk, True
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e:
@@ -134,7 +159,7 @@ def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte,
def download_file_in_parts(file_url, save_path, total_size, num_parts, headers, api_original_filename,
- emitter_for_multipart, cancellation_event, skip_event, logger_func): # Renamed signals, logger
+ emitter_for_multipart, cancellation_event, skip_event, logger_func, pause_event): # Added pause_event
"""
Downloads a file in multiple parts concurrently.
Returns: (download_successful_flag, downloaded_bytes, calculated_file_hash, temp_file_handle_or_None)
@@ -181,7 +206,8 @@ def download_file_in_parts(file_url, save_path, total_size, num_parts, headers,
{'id': i, 'downloaded': 0, 'total': chunk_actual_sizes[i] if i < len(chunk_actual_sizes) else 0, 'active': False, 'speed_bps': 0.0}
for i in range(num_parts)
],
- 'lock': threading.Lock()
+ 'lock': threading.Lock(),
+ 'last_global_emit_time': [time.time()] # Shared mutable for global throttling timestamp
}
chunk_futures = []
@@ -194,8 +220,8 @@ def download_file_in_parts(file_url, save_path, total_size, num_parts, headers,
chunk_futures.append(chunk_pool.submit(
_download_individual_chunk, chunk_url=file_url, temp_file_path=temp_file_path,
start_byte=start, end_byte=end, headers=headers, part_num=i, total_parts=num_parts,
- progress_data=progress_data, cancellation_event=cancellation_event, skip_event=skip_event,
- logger_func=logger_func, emitter=emitter_for_multipart, # Pass emitter
+ progress_data=progress_data, cancellation_event=cancellation_event, skip_event=skip_event, global_emit_time_ref=progress_data['last_global_emit_time'],
+ pause_event=pause_event, logger_func=logger_func, emitter=emitter_for_multipart, # Pass pause_event and emitter
api_original_filename=api_original_filename
))
diff --git a/readme.md b/readme.md
index fca28c5..a68c2f4 100644
--- a/readme.md
+++ b/readme.md
@@ -1,4 +1,10 @@
-# Kemono Downloader v3.4.0
+Kemono Downloader v3.4.0
+
+
+

+
+
+---
A powerful, feature-rich GUI application for downloading content from **[Kemono.su](https://kemono.su)** and **[Coomer.party](https://coomer.party)**.
Built with **PyQt5**, this tool is ideal for users who want deep filtering, customizable folder structures, efficient downloads, and intelligent automation — all within a modern, user-friendly graphical interface.
@@ -9,8 +15,6 @@ Built with **PyQt5**, this tool is ideal for users who want deep filtering, cust
This version brings significant enhancements to manga/comic downloading, filtering capabilities, and user experience:
----
-
### 📖 Enhanced Manga/Comic Mode
- **New "Date Based" Filename Style:**
@@ -29,7 +33,7 @@ This version brings significant enhancements to manga/comic downloading, filteri
- Specify comma-separated words or phrases (case-insensitive) that will be automatically removed from filenames.
-- Example: `patreon, [HD], _final` transforms `AwesomeArt_patreon_[HD]_final.jpg` into `AwesomeArt.jpg`.
+- Example: `patreon, [HD], _final` transforms `AwesomeArt_patreon` `Hinata_Hd` into `AwesomeArt.jpg` `Hinata.jpg`.
---