diff --git a/Known.txt b/Known.txt index f3a5b38..58451fa 100644 --- a/Known.txt +++ b/Known.txt @@ -1,14 +1,8 @@ -Hanabi intrusive -Hanzo -Hinata -Jett -Makima -Rangiku - Page -Reyna -Sage -Yor -Yoruichi -killjoy -neon -power -viper +Boa Hancock +Hairy D.va +Mercy +Misc +Nami +Robin +Sombra +Yamato diff --git a/downloader_utils.py b/downloader_utils.py index 0886eaa..29b3012 100644 --- a/downloader_utils.py +++ b/downloader_utils.py @@ -18,6 +18,13 @@ except ImportError: print("ERROR: Pillow library not found. Please install it: pip install Pillow") Image = None +try: + from multipart_downloader import download_file_in_parts + MULTIPART_DOWNLOADER_AVAILABLE = True +except ImportError as e: + print(f"Warning: multipart_downloader.py not found or import error: {e}. Multi-part downloads will be disabled.") + MULTIPART_DOWNLOADER_AVAILABLE = False + def download_file_in_parts(*args, **kwargs): return False, 0, None, None # Dummy function from io import BytesIO @@ -32,9 +39,16 @@ CHAR_SCOPE_TITLE = "title" CHAR_SCOPE_FILES = "files" CHAR_SCOPE_BOTH = "both" +# DUPLICATE_MODE_RENAME is removed. Renaming only happens within a target folder if needed. +DUPLICATE_MODE_DELETE = "delete" +DUPLICATE_MODE_MOVE_TO_SUBFOLDER = "move" + fastapi_app = None KNOWN_NAMES = [] +MIN_SIZE_FOR_MULTIPART_DOWNLOAD = 10 * 1024 * 1024 # 10 MB +MAX_PARTS_FOR_MULTIPART_DOWNLOAD = 8 # Max concurrent connections for a single file + IMAGE_EXTENSIONS = { '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.tif', '.webp', '.heic', '.heif', '.svg', '.ico', '.jfif', '.pjpeg', '.pjp', '.avif' @@ -50,20 +64,31 @@ ARCHIVE_EXTENSIONS = { def is_title_match_for_character(post_title, character_name_filter): if not post_title or not character_name_filter: return False - pattern = r"(?i)\b" + re.escape(character_name_filter) + r"\b" - return bool(re.search(pattern, post_title)) + safe_filter = str(character_name_filter).strip() + if not safe_filter: + return False + + pattern = r"(?i)\b" + re.escape(safe_filter) + r"\b" + match_result = bool(re.search(pattern, post_title)) + return match_result def is_filename_match_for_character(filename, character_name_filter): if not filename or not character_name_filter: return False - return character_name_filter.lower() in filename.lower() + + safe_filter = str(character_name_filter).strip().lower() + if not safe_filter: + return False + + match_result = safe_filter in filename.lower() + return match_result def clean_folder_name(name): if not isinstance(name, str): name = str(name) cleaned = re.sub(r'[^\w\s\-\_\.\(\)]', '', name) cleaned = cleaned.strip() - cleaned = re.sub(r'\s+', '_', cleaned) + cleaned = re.sub(r'\s+', ' ', cleaned) return cleaned if cleaned else "untitled_folder" @@ -366,7 +391,7 @@ class PostProcessorSignals(QObject): progress_signal = pyqtSignal(str) file_download_status_signal = pyqtSignal(bool) external_link_signal = pyqtSignal(str, str, str, str) - file_progress_signal = pyqtSignal(str, int, int) + file_progress_signal = pyqtSignal(str, object) class PostProcessorWorker: @@ -384,12 +409,14 @@ class PostProcessorWorker: num_file_threads=4, skip_current_file_flag=None, manga_mode_active=False, manga_filename_style=STYLE_POST_TITLE, - char_filter_scope=CHAR_SCOPE_FILES - ): + char_filter_scope=CHAR_SCOPE_FILES, + remove_from_filename_words_list=None, + allow_multipart_download=True, + duplicate_file_mode=DUPLICATE_MODE_DELETE): self.post = post_data self.download_root = download_root self.known_names = known_names - self.filter_character_list = filter_character_list if filter_character_list else [] + self.filter_character_list_objects = filter_character_list if filter_character_list else [] self.unwanted_keywords = unwanted_keywords if unwanted_keywords is not None else set() self.filter_mode = filter_mode self.skip_zip = skip_zip @@ -421,7 +448,10 @@ class PostProcessorWorker: self.manga_mode_active = manga_mode_active self.manga_filename_style = manga_filename_style self.char_filter_scope = char_filter_scope - + self.remove_from_filename_words_list = remove_from_filename_words_list if remove_from_filename_words_list is not None else [] + self.allow_multipart_download = allow_multipart_download + self.duplicate_file_mode = duplicate_file_mode # This will be the effective mode (possibly overridden by main.py for manga) + if self.compress_images and Image is None: self.logger("⚠️ Image compression disabled: Pillow library not found.") self.compress_images = False @@ -438,15 +468,19 @@ class PostProcessorWorker: def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event, post_title="", file_index_in_post=0, num_files_in_this_post=1): was_original_name_kept_flag = False - final_filename_saved_for_return = "" + final_filename_saved_for_return = "" + # current_target_folder_path is the actual folder where the file will be saved. + # It starts as the main character/post folder (target_folder_path) by default. + current_target_folder_path = target_folder_path if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1, "", False file_url = file_info.get('url') api_original_filename = file_info.get('_original_name_for_log', file_info.get('name')) - final_filename_saved_for_return = api_original_filename + # This is the ideal name for the file if it were to be saved in the main target_folder_path. + filename_to_save_in_main_path = "" if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_FILES or self.skip_words_scope == SKIP_SCOPE_BOTH): filename_to_check_for_skip_words = api_original_filename.lower() @@ -458,71 +492,55 @@ class PostProcessorWorker: original_filename_cleaned_base, original_ext = os.path.splitext(clean_filename(api_original_filename)) if not original_ext.startswith('.'): original_ext = '.' + original_ext if original_ext else '' - filename_to_save = "" - if self.manga_mode_active: + if self.manga_mode_active: # Note: duplicate_file_mode is overridden to "Delete" in main.py if manga_mode is on if self.manga_filename_style == STYLE_ORIGINAL_NAME: - filename_to_save = clean_filename(api_original_filename) + filename_to_save_in_main_path = clean_filename(api_original_filename) was_original_name_kept_flag = True elif self.manga_filename_style == STYLE_POST_TITLE: if post_title and post_title.strip(): cleaned_post_title_base = clean_filename(post_title.strip()) if num_files_in_this_post > 1: if file_index_in_post == 0: - filename_to_save = f"{cleaned_post_title_base}{original_ext}" - was_original_name_kept_flag = False + filename_to_save_in_main_path = f"{cleaned_post_title_base}{original_ext}" else: - filename_to_save = clean_filename(api_original_filename) + filename_to_save_in_main_path = clean_filename(api_original_filename) was_original_name_kept_flag = True else: - filename_to_save = f"{cleaned_post_title_base}{original_ext}" - was_original_name_kept_flag = False + filename_to_save_in_main_path = f"{cleaned_post_title_base}{original_ext}" else: - filename_to_save = clean_filename(api_original_filename) - was_original_name_kept_flag = False - self.logger(f"⚠️ Manga mode (Post Title Style): Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save}'.") - else: + filename_to_save_in_main_path = clean_filename(api_original_filename) + self.logger(f"⚠️ Manga mode (Post Title Style): Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save_in_main_path}'.") + else: self.logger(f"⚠️ Manga mode: Unknown filename style '{self.manga_filename_style}'. Defaulting to original filename for '{api_original_filename}'.") - filename_to_save = clean_filename(api_original_filename) - was_original_name_kept_flag = False + filename_to_save_in_main_path = clean_filename(api_original_filename) - if filename_to_save: - counter = 1 - base_name_coll, ext_coll = os.path.splitext(filename_to_save) - temp_filename_for_collision_check = filename_to_save - while os.path.exists(os.path.join(target_folder_path, temp_filename_for_collision_check)): - if self.manga_filename_style == STYLE_POST_TITLE and file_index_in_post == 0 and num_files_in_this_post > 1: - temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}" - else: - temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}" - counter += 1 - if temp_filename_for_collision_check != filename_to_save: - filename_to_save = temp_filename_for_collision_check - else: - filename_to_save = f"manga_file_{original_post_id_for_log}_{file_index_in_post + 1}{original_ext}" - self.logger(f"⚠️ Manga mode: Generated filename was empty. Using generic fallback: '{filename_to_save}'.") + if not filename_to_save_in_main_path: + filename_to_save_in_main_path = f"manga_file_{original_post_id_for_log}_{file_index_in_post + 1}{original_ext}" + self.logger(f"⚠️ Manga mode: Generated filename was empty. Using generic fallback: '{filename_to_save_in_main_path}'.") was_original_name_kept_flag = False - - else: - filename_to_save = clean_filename(api_original_filename) + else: + filename_to_save_in_main_path = clean_filename(api_original_filename) was_original_name_kept_flag = False - counter = 1 - base_name_coll, ext_coll = os.path.splitext(filename_to_save) - temp_filename_for_collision_check = filename_to_save - while os.path.exists(os.path.join(target_folder_path, temp_filename_for_collision_check)): - temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}" - counter += 1 - if temp_filename_for_collision_check != filename_to_save: - filename_to_save = temp_filename_for_collision_check - - final_filename_for_sets_and_saving = filename_to_save - final_filename_saved_for_return = final_filename_for_sets_and_saving - - if not self.download_thumbnails: + + if self.remove_from_filename_words_list and filename_to_save_in_main_path: + base_name_for_removal, ext_for_removal = os.path.splitext(filename_to_save_in_main_path) + modified_base_name = base_name_for_removal + for word_to_remove in self.remove_from_filename_words_list: + if not word_to_remove: continue + pattern = re.compile(re.escape(word_to_remove), re.IGNORECASE) + modified_base_name = pattern.sub("", modified_base_name) + modified_base_name = re.sub(r'[_.\s-]+', '_', modified_base_name) + modified_base_name = modified_base_name.strip('_') + if modified_base_name and modified_base_name != ext_for_removal.lstrip('.'): + filename_to_save_in_main_path = modified_base_name + ext_for_removal + else: + filename_to_save_in_main_path = base_name_for_removal + ext_for_removal + + if not self.download_thumbnails: is_img_type = is_image(api_original_filename) is_vid_type = is_video(api_original_filename) is_archive_type = is_archive(api_original_filename) - if self.filter_mode == 'archive': if not is_archive_type: self.logger(f" -> Filter Skip (Archive Mode): '{api_original_filename}' (Not an Archive).") @@ -543,174 +561,265 @@ class PostProcessorWorker: self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR).") return 0, 1, api_original_filename, False - target_folder_basename = os.path.basename(target_folder_path) - current_save_path = os.path.join(target_folder_path, final_filename_for_sets_and_saving) + if not self.manga_mode_active: + # --- Pre-Download Duplicate Handling (Standard Mode Only) --- + is_duplicate_for_main_folder_by_path = os.path.exists(os.path.join(target_folder_path, filename_to_save_in_main_path)) and \ + os.path.getsize(os.path.join(target_folder_path, filename_to_save_in_main_path)) > 0 + + is_duplicate_for_main_folder_by_session_name = False + with self.downloaded_files_lock: + if filename_to_save_in_main_path in self.downloaded_files: + is_duplicate_for_main_folder_by_session_name = True - if os.path.exists(current_save_path) and os.path.getsize(current_save_path) > 0: - self.logger(f" -> Exists (Path): '{final_filename_for_sets_and_saving}' in '{target_folder_basename}'.") - with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving) - return 0, 1, final_filename_for_sets_and_saving, was_original_name_kept_flag + if is_duplicate_for_main_folder_by_path or is_duplicate_for_main_folder_by_session_name: + if self.duplicate_file_mode == DUPLICATE_MODE_DELETE: + reason = "Path Exists" if is_duplicate_for_main_folder_by_path else "Session Name" + self.logger(f" -> Delete Duplicate ({reason}): '{filename_to_save_in_main_path}'. Skipping download.") + with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) + return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag + + elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER: + reason = "Path Exists" if is_duplicate_for_main_folder_by_path else "Session Name" + self.logger(f" -> Pre-DL Move ({reason}): '{filename_to_save_in_main_path}'. Will target 'Duplicate' subfolder.") + current_target_folder_path = os.path.join(target_folder_path, "Duplicate") + with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) - with self.downloaded_files_lock: - if final_filename_for_sets_and_saving in self.downloaded_files: - self.logger(f" -> Global Skip (Filename): '{final_filename_for_sets_and_saving}' already recorded this session.") - return 0, 1, final_filename_for_sets_and_saving, was_original_name_kept_flag + try: + os.makedirs(current_target_folder_path, exist_ok=True) + except OSError as e: + self.logger(f" ❌ Critical error creating directory '{current_target_folder_path}': {e}. Skipping file '{api_original_filename}'.") + return 0, 1, api_original_filename, False + + # If mode is MOVE (and not manga mode), and current_target_folder_path is now "Duplicate", + # check if the file *already* exists by its base name in this "Duplicate" folder. (Standard Mode Only) + if not self.manga_mode_active and \ + self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER and \ + "Duplicate" in current_target_folder_path.split(os.sep) and \ + os.path.exists(os.path.join(current_target_folder_path, filename_to_save_in_main_path)): + self.logger(f" -> File '{filename_to_save_in_main_path}' already exists in '{os.path.basename(current_target_folder_path)}' subfolder. Skipping download.") + # The name was already added to downloaded_files if it was a pre-DL move. + return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag + # --- Download Attempt --- max_retries = 3 retry_delay = 5 downloaded_size_bytes = 0 calculated_file_hash = None - file_content_bytes = None - total_size_bytes = 0 - download_successful_flag = False - - for attempt_num in range(max_retries + 1): - if self.check_cancel() or (skip_event and skip_event.is_set()): - break + file_content_bytes = None + total_size_bytes = 0 + download_successful_flag = False + + for attempt_num_single_stream in range(max_retries + 1): + if self.check_cancel() or (skip_event and skip_event.is_set()): break try: - if attempt_num > 0: - self.logger(f" Retrying '{api_original_filename}' (Attempt {attempt_num}/{max_retries})...") - time.sleep(retry_delay * (2**(attempt_num - 1))) - + if attempt_num_single_stream > 0: + self.logger(f" Retrying download for '{api_original_filename}' (Overall Attempt {attempt_num_single_stream + 1}/{max_retries + 1})...") + time.sleep(retry_delay * (2**(attempt_num_single_stream - 1))) + if self.signals and hasattr(self.signals, 'file_download_status_signal'): self.signals.file_download_status_signal.emit(True) - + response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True) response.raise_for_status() + total_size_bytes = int(response.headers.get('Content-Length', 0)) - current_total_size_bytes_from_headers = int(response.headers.get('Content-Length', 0)) + num_parts_for_file = min(self.num_file_threads, MAX_PARTS_FOR_MULTIPART_DOWNLOAD) + attempt_multipart = (self.allow_multipart_download and MULTIPART_DOWNLOADER_AVAILABLE and + num_parts_for_file > 1 and total_size_bytes > MIN_SIZE_FOR_MULTIPART_DOWNLOAD and + 'bytes' in response.headers.get('Accept-Ranges', '').lower()) - if attempt_num == 0: - total_size_bytes = current_total_size_bytes_from_headers - size_str = f"{total_size_bytes / (1024 * 1024):.2f} MB" if total_size_bytes > 0 else "unknown size" - self.logger(f"⬇️ Downloading: '{api_original_filename}' (Size: {size_str}) [Saving as: '{final_filename_for_sets_and_saving}']") - - current_attempt_total_size = total_size_bytes + if attempt_multipart: + response.close() + if self.signals and hasattr(self.signals, 'file_download_status_signal'): + self.signals.file_download_status_signal.emit(False) + + mp_save_path_base = os.path.join(current_target_folder_path, filename_to_save_in_main_path) + mp_success, mp_bytes, mp_hash, mp_file_handle = download_file_in_parts( + file_url, mp_save_path_base, total_size_bytes, num_parts_for_file, headers, + api_original_filename, self.signals, self.cancellation_event, skip_event, self.logger + ) + if mp_success: + download_successful_flag = True + downloaded_size_bytes = mp_bytes + calculated_file_hash = mp_hash + file_content_bytes = mp_file_handle + break + else: + if attempt_num_single_stream < max_retries: + self.logger(f" Multi-part download attempt failed for '{api_original_filename}'. Retrying with single stream.") + else: + download_successful_flag = False; break + self.logger(f"⬇️ Downloading (Single Stream): '{api_original_filename}' (Size: {total_size_bytes / (1024*1024):.2f} MB if known) [Base Name: '{filename_to_save_in_main_path}']") file_content_buffer = BytesIO() current_attempt_downloaded_bytes = 0 md5_hasher = hashlib.md5() last_progress_time = time.time() for chunk in response.iter_content(chunk_size=1 * 1024 * 1024): - if self.check_cancel() or (skip_event and skip_event.is_set()): - break + if self.check_cancel() or (skip_event and skip_event.is_set()): break if chunk: - file_content_buffer.write(chunk) - md5_hasher.update(chunk) + file_content_buffer.write(chunk); md5_hasher.update(chunk) current_attempt_downloaded_bytes += len(chunk) - if time.time() - last_progress_time > 1 and current_attempt_total_size > 0 and \ + if time.time() - last_progress_time > 1 and total_size_bytes > 0 and \ self.signals and hasattr(self.signals, 'file_progress_signal'): - self.signals.file_progress_signal.emit( - api_original_filename, - current_attempt_downloaded_bytes, - current_attempt_total_size - ) + self.signals.file_progress_signal.emit(api_original_filename, (current_attempt_downloaded_bytes, total_size_bytes)) last_progress_time = time.time() if self.check_cancel() or (skip_event and skip_event.is_set()): - if file_content_buffer: file_content_buffer.close() - break + if file_content_buffer: file_content_buffer.close(); break - if current_attempt_downloaded_bytes > 0 or (current_attempt_total_size == 0 and response.status_code == 200): + if current_attempt_downloaded_bytes > 0 or (total_size_bytes == 0 and response.status_code == 200): calculated_file_hash = md5_hasher.hexdigest() downloaded_size_bytes = current_attempt_downloaded_bytes - if file_content_bytes: file_content_bytes.close() - file_content_bytes = file_content_buffer - file_content_bytes.seek(0) - download_successful_flag = True - break - else: + if file_content_bytes: file_content_bytes.close() + file_content_bytes = file_content_buffer; file_content_bytes.seek(0) + download_successful_flag = True; break + else: if file_content_buffer: file_content_buffer.close() except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e: self.logger(f" ❌ Download Error (Retryable): {api_original_filename}. Error: {e}") if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close() - except requests.exceptions.RequestException as e: + except requests.exceptions.RequestException as e: self.logger(f" ❌ Download Error (Non-Retryable): {api_original_filename}. Error: {e}") - if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close() - break + if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close(); break except Exception as e: self.logger(f" ❌ Unexpected Download Error: {api_original_filename}: {e}\n{traceback.format_exc(limit=2)}") - if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close() - break + if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close(); break finally: if self.signals and hasattr(self.signals, 'file_download_status_signal'): self.signals.file_download_status_signal.emit(False) - + if self.signals and hasattr(self.signals, 'file_progress_signal'): final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes - self.signals.file_progress_signal.emit(api_original_filename, downloaded_size_bytes, final_total_for_progress) + self.signals.file_progress_signal.emit(api_original_filename, (downloaded_size_bytes, final_total_for_progress)) if self.check_cancel() or (skip_event and skip_event.is_set()): - self.logger(f" ⚠️ Download interrupted for {api_original_filename}.") + self.logger(f" ⚠️ Download process interrupted for {api_original_filename}.") if file_content_bytes: file_content_bytes.close() - return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag + return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag if not download_successful_flag: self.logger(f"❌ Download failed for '{api_original_filename}' after {max_retries + 1} attempts.") if file_content_bytes: file_content_bytes.close() - return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag + return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag - with self.downloaded_file_hashes_lock: - if calculated_file_hash in self.downloaded_file_hashes: - self.logger(f" -> Content Skip (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...) already downloaded this session.") - with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving) - if file_content_bytes: file_content_bytes.close() - return 0, 1, final_filename_for_sets_and_saving, was_original_name_kept_flag + if not self.manga_mode_active: + # --- Post-Download Hash Check (Standard Mode Only) --- + with self.downloaded_file_hashes_lock: + if calculated_file_hash in self.downloaded_file_hashes: + if self.duplicate_file_mode == DUPLICATE_MODE_DELETE: + self.logger(f" -> Delete Duplicate (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...). Skipping save.") + with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) + if file_content_bytes: file_content_bytes.close() + return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag + + elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER: + self.logger(f" -> Post-DL Move (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...). Content already downloaded.") + if "Duplicate" not in current_target_folder_path.split(os.sep): + current_target_folder_path = os.path.join(target_folder_path, "Duplicate") + self.logger(f" Redirecting to 'Duplicate' subfolder: '{current_target_folder_path}'") + # Ensure "Duplicate" folder exists if this is a new redirection due to hash + try: os.makedirs(current_target_folder_path, exist_ok=True) + except OSError as e_mkdir_hash: self.logger(f" Error creating Duplicate folder for hash collision: {e_mkdir_hash}") + with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) + + # --- Final Filename Determination for Saving --- + filename_for_actual_save = filename_to_save_in_main_path - bytes_to_write = file_content_bytes - final_filename_after_processing = final_filename_for_sets_and_saving - current_save_path_final = current_save_path + # If mode is MOVE (and not manga mode) and the file is destined for the main folder, + # but a file with that name *now* exists (e.g. race condition, or different file with same name not caught by hash), + # reroute it to the "Duplicate" folder. + if not self.manga_mode_active and \ + self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER and \ + current_target_folder_path == target_folder_path and \ + os.path.exists(os.path.join(current_target_folder_path, filename_for_actual_save)): + self.logger(f" -> Post-DL Move (Late Name Collision in Main): '{filename_for_actual_save}'. Moving to 'Duplicate'.") + current_target_folder_path = os.path.join(target_folder_path, "Duplicate") + try: # Ensure "Duplicate" folder exists if this is a new redirection + os.makedirs(current_target_folder_path, exist_ok=True) + except OSError as e_mkdir: self.logger(f" Error creating Duplicate folder during late move: {e_mkdir}") + # The name filename_to_save_in_main_path was already added to downloaded_files if it was a pre-DL name collision. + # If it was a hash collision that got rerouted, it was also added. + # If this is a new reroute due to late name collision, ensure it's marked. + + # Apply numeric suffix renaming (_1, _2) *only if needed within the current_target_folder_path* + # This means: + # - If current_target_folder_path is the main folder (and not MOVE mode, or MOVE mode but file was unique): + # Renaming happens if a file with filename_for_actual_save exists there. + # - If current_target_folder_path is "Duplicate" (because of MOVE mode): + # Renaming happens if filename_for_actual_save exists *within "Duplicate"*. + counter = 1 + base_name_final_coll, ext_final_coll = os.path.splitext(filename_for_actual_save) + temp_filename_final_check = filename_for_actual_save + while os.path.exists(os.path.join(current_target_folder_path, temp_filename_final_check)): + temp_filename_final_check = f"{base_name_final_coll}_{counter}{ext_final_coll}" + counter += 1 + if temp_filename_final_check != filename_for_actual_save: + self.logger(f" Final rename for target folder '{os.path.basename(current_target_folder_path)}': '{temp_filename_final_check}' (was '{filename_for_actual_save}')") + filename_for_actual_save = temp_filename_final_check + + bytes_to_write = file_content_bytes + final_filename_after_processing = filename_for_actual_save + current_save_path_final = os.path.join(current_target_folder_path, final_filename_after_processing) + is_img_for_compress_check = is_image(api_original_filename) if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024): self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024*1024):.2f} MB)...") try: - bytes_to_write.seek(0) - with Image.open(bytes_to_write) as img_obj: + bytes_to_write.seek(0) + with Image.open(bytes_to_write) as img_obj: if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA') elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB') - compressed_bytes_io = BytesIO() img_obj.save(compressed_bytes_io, format='WebP', quality=80, method=4) compressed_size = compressed_bytes_io.getbuffer().nbytes - if compressed_size < downloaded_size_bytes * 0.9: + if compressed_size < downloaded_size_bytes * 0.9: self.logger(f" Compression success: {compressed_size / (1024*1024):.2f} MB.") - bytes_to_write.close() - bytes_to_write = compressed_bytes_io - bytes_to_write.seek(0) - - base_name_orig, _ = os.path.splitext(final_filename_for_sets_and_saving) + if hasattr(bytes_to_write, 'close'): bytes_to_write.close() + + original_part_file_path = os.path.join(current_target_folder_path, filename_to_save_in_main_path) + ".part" # Use original base for .part + if os.path.exists(original_part_file_path): + os.remove(original_part_file_path) + + bytes_to_write = compressed_bytes_io; bytes_to_write.seek(0) + base_name_orig, _ = os.path.splitext(filename_for_actual_save) final_filename_after_processing = base_name_orig + '.webp' - current_save_path_final = os.path.join(target_folder_path, final_filename_after_processing) + current_save_path_final = os.path.join(current_target_folder_path, final_filename_after_processing) self.logger(f" Updated filename (compressed): {final_filename_after_processing}") else: self.logger(f" Compression skipped: WebP not significantly smaller."); bytes_to_write.seek(0) except Exception as comp_e: self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); bytes_to_write.seek(0) - final_filename_saved_for_return = final_filename_after_processing - - if final_filename_after_processing != final_filename_for_sets_and_saving and \ + if final_filename_after_processing != filename_for_actual_save and \ os.path.exists(current_save_path_final) and os.path.getsize(current_save_path_final) > 0: - self.logger(f" -> Exists (Path - Post-Compress): '{final_filename_after_processing}' in '{target_folder_basename}'.") - with self.downloaded_files_lock: self.downloaded_files.add(final_filename_after_processing) - bytes_to_write.close() + self.logger(f" -> Exists (Path - Post-Compress): '{final_filename_after_processing}' in '{os.path.basename(current_target_folder_path)}'.") + with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) + if bytes_to_write and hasattr(bytes_to_write, 'close'): bytes_to_write.close() return 0, 1, final_filename_after_processing, was_original_name_kept_flag try: - os.makedirs(os.path.dirname(current_save_path_final), exist_ok=True) - with open(current_save_path_final, 'wb') as f_out: - f_out.write(bytes_to_write.getvalue()) + os.makedirs(current_target_folder_path, exist_ok=True) + + if isinstance(bytes_to_write, BytesIO): + with open(current_save_path_final, 'wb') as f_out: + f_out.write(bytes_to_write.getvalue()) + else: + if hasattr(bytes_to_write, 'close'): bytes_to_write.close() + source_part_file = os.path.join(current_target_folder_path, filename_to_save_in_main_path) + ".part" # Use original base for .part + os.rename(source_part_file, current_save_path_final) with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash) - with self.downloaded_files_lock: self.downloaded_files.add(final_filename_after_processing) - - self.logger(f"✅ Saved: '{final_filename_after_processing}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{target_folder_basename}'") + with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) + + final_filename_saved_for_return = final_filename_after_processing + self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{os.path.basename(current_target_folder_path)}'") time.sleep(0.05) - return 1, 0, final_filename_after_processing, was_original_name_kept_flag + return 1, 0, final_filename_saved_for_return, was_original_name_kept_flag except Exception as save_err: self.logger(f"❌ Save Fail for '{final_filename_after_processing}': {save_err}") if os.path.exists(current_save_path_final): @@ -718,7 +827,8 @@ class PostProcessorWorker: except OSError: self.logger(f" -> Failed to remove partially saved file: {current_save_path_final}") return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag finally: - if bytes_to_write: bytes_to_write.close() + if bytes_to_write and hasattr(bytes_to_write, 'close'): + bytes_to_write.close() def process(self): @@ -749,16 +859,32 @@ class PostProcessorWorker: post_is_candidate_by_title_char_match = False char_filter_that_matched_title = None - if self.filter_character_list and \ + if self.filter_character_list_objects and \ (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH): - for char_name in self.filter_character_list: - if is_title_match_for_character(post_title, char_name): - post_is_candidate_by_title_char_match = True - char_filter_that_matched_title = char_name - self.logger(f" Post title matches char filter '{char_name}' (Scope: {self.char_filter_scope}). Post is candidate.") - break + self.logger(f" [Debug Title Match] Checking post title '{post_title}' against {len(self.filter_character_list_objects)} filter objects. Scope: {self.char_filter_scope}") + for idx, filter_item_obj in enumerate(self.filter_character_list_objects): + self.logger(f" [Debug Title Match] Filter obj #{idx}: {filter_item_obj}") + terms_to_check_for_title = list(filter_item_obj["aliases"]) + if filter_item_obj["is_group"]: + if filter_item_obj["name"] not in terms_to_check_for_title: + terms_to_check_for_title.append(filter_item_obj["name"]) + + unique_terms_for_title_check = list(set(terms_to_check_for_title)) + self.logger(f" [Debug Title Match] Unique terms for this filter obj: {unique_terms_for_title_check}") + + for term_to_match in unique_terms_for_title_check: + self.logger(f" [Debug Title Match] Checking term: '{term_to_match}'") + match_found_for_term = is_title_match_for_character(post_title, term_to_match) + self.logger(f" [Debug Title Match] Result for '{term_to_match}': {match_found_for_term}") + if match_found_for_term: + post_is_candidate_by_title_char_match = True + char_filter_that_matched_title = filter_item_obj + self.logger(f" Post title matches char filter term '{term_to_match}' (from group/name '{filter_item_obj['name']}', Scope: {self.char_filter_scope}). Post is candidate.") + break + if post_is_candidate_by_title_char_match: break + self.logger(f" [Debug Title Match] Final post_is_candidate_by_title_char_match: {post_is_candidate_by_title_char_match}") - if self.filter_character_list and self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match: + if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match: self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.") return 0, num_potential_files_in_post, [] @@ -769,7 +895,7 @@ class PostProcessorWorker: self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'. Scope: {self.skip_words_scope}") return 0, num_potential_files_in_post, [] - if not self.extract_links_only and self.manga_mode_active and self.filter_character_list and \ + if not self.extract_links_only and self.manga_mode_active and self.filter_character_list_objects and \ (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and \ not post_is_candidate_by_title_char_match: self.logger(f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title[:50]}' doesn't match filters.") @@ -782,8 +908,8 @@ class PostProcessorWorker: base_folder_names_for_post_content = [] if not self.extract_links_only and self.use_subfolders: if post_is_candidate_by_title_char_match and char_filter_that_matched_title: - base_folder_names_for_post_content = [clean_folder_name(char_filter_that_matched_title)] - else: + base_folder_names_for_post_content = [clean_folder_name(char_filter_that_matched_title["name"])] + elif not self.filter_character_list_objects: derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords) if derived_folders: base_folder_names_for_post_content.extend(derived_folders) @@ -791,7 +917,10 @@ class PostProcessorWorker: base_folder_names_for_post_content.append(extract_folder_name_from_title(post_title, self.unwanted_keywords)) if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]: base_folder_names_for_post_content = [clean_folder_name(post_title if post_title else "untitled_creator_content")] - self.logger(f" Base folder name(s) for post content (if title matched char or generic): {', '.join(base_folder_names_for_post_content)}") + + if base_folder_names_for_post_content: + log_reason = "Matched char filter" if (post_is_candidate_by_title_char_match and char_filter_that_matched_title) else "Generic title parsing (no char filters)" + self.logger(f" Base folder name(s) for post content ({log_reason}): {', '.join(base_folder_names_for_post_content)}") if not self.extract_links_only and self.use_subfolders and self.skip_words_list: for folder_name_to_check in base_folder_names_for_post_content: @@ -907,28 +1036,49 @@ class PostProcessorWorker: current_api_original_filename = file_info_to_dl.get('_original_name_for_log') file_is_candidate_by_char_filter_scope = False - char_filter_that_matched_file = None + char_filter_info_that_matched_file = None - if not self.filter_character_list: + if not self.filter_character_list_objects: file_is_candidate_by_char_filter_scope = True - elif self.char_filter_scope == CHAR_SCOPE_FILES: - for char_name in self.filter_character_list: - if is_filename_match_for_character(current_api_original_filename, char_name): + else: + if self.char_filter_scope == CHAR_SCOPE_FILES: + for filter_item_obj in self.filter_character_list_objects: + terms_to_check_for_file = list(filter_item_obj["aliases"]) + if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_for_file: + terms_to_check_for_file.append(filter_item_obj["name"]) + unique_terms_for_file_check = list(set(terms_to_check_for_file)) + + for term_to_match in unique_terms_for_file_check: + if is_filename_match_for_character(current_api_original_filename, term_to_match): + file_is_candidate_by_char_filter_scope = True + char_filter_info_that_matched_file = filter_item_obj + self.logger(f" File '{current_api_original_filename}' matches char filter term '{term_to_match}' (from '{filter_item_obj['name']}'). Scope: Files.") + break + if file_is_candidate_by_char_filter_scope: break + elif self.char_filter_scope == CHAR_SCOPE_TITLE: + if post_is_candidate_by_title_char_match: file_is_candidate_by_char_filter_scope = True - char_filter_that_matched_file = char_name - break - elif self.char_filter_scope == CHAR_SCOPE_TITLE: - if post_is_candidate_by_title_char_match: - file_is_candidate_by_char_filter_scope = True - elif self.char_filter_scope == CHAR_SCOPE_BOTH: - if post_is_candidate_by_title_char_match: - file_is_candidate_by_char_filter_scope = True - else: - for char_name in self.filter_character_list: - if is_filename_match_for_character(current_api_original_filename, char_name): - file_is_candidate_by_char_filter_scope = True - char_filter_that_matched_file = char_name - break + char_filter_info_that_matched_file = char_filter_that_matched_title + self.logger(f" File '{current_api_original_filename}' is candidate because post title matched. Scope: Title.") + elif self.char_filter_scope == CHAR_SCOPE_BOTH: + if post_is_candidate_by_title_char_match: + file_is_candidate_by_char_filter_scope = True + char_filter_info_that_matched_file = char_filter_that_matched_title + self.logger(f" File '{current_api_original_filename}' is candidate because post title matched. Scope: Both (Title part).") + else: + for filter_item_obj in self.filter_character_list_objects: + terms_to_check_for_file_both = list(filter_item_obj["aliases"]) + if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_for_file_both: + terms_to_check_for_file_both.append(filter_item_obj["name"]) + unique_terms_for_file_both_check = list(set(terms_to_check_for_file_both)) + + for term_to_match in unique_terms_for_file_both_check: + if is_filename_match_for_character(current_api_original_filename, term_to_match): + file_is_candidate_by_char_filter_scope = True + char_filter_info_that_matched_file = filter_item_obj + self.logger(f" File '{current_api_original_filename}' matches char filter term '{term_to_match}' (from '{filter_item_obj['name']}'). Scope: Both (File part).") + break + if file_is_candidate_by_char_filter_scope: break if not file_is_candidate_by_char_filter_scope: self.logger(f" -> Skip File (Char Filter Scope '{self.char_filter_scope}'): '{current_api_original_filename}' no match.") @@ -941,10 +1091,10 @@ class PostProcessorWorker: char_title_subfolder_name = None if self.target_post_id_from_initial_url and self.custom_folder_name: char_title_subfolder_name = self.custom_folder_name - elif char_filter_that_matched_title: - char_title_subfolder_name = clean_folder_name(char_filter_that_matched_title) - elif char_filter_that_matched_file: - char_title_subfolder_name = clean_folder_name(char_filter_that_matched_file) + elif char_filter_info_that_matched_file: + char_title_subfolder_name = clean_folder_name(char_filter_info_that_matched_file["name"]) + elif char_filter_that_matched_title: + char_title_subfolder_name = clean_folder_name(char_filter_that_matched_title["name"]) elif base_folder_names_for_post_content: char_title_subfolder_name = base_folder_names_for_post_content[0] @@ -953,7 +1103,7 @@ class PostProcessorWorker: if self.use_post_subfolders: cleaned_title_for_subfolder = clean_folder_name(post_title) - post_specific_subfolder_name = f"{post_id}_{cleaned_title_for_subfolder}" if cleaned_title_for_subfolder else f"{post_id}_untitled" + post_specific_subfolder_name = cleaned_title_for_subfolder # Use only the cleaned title current_path_for_file = os.path.join(current_path_for_file, post_specific_subfolder_name) target_folder_path_for_this_file = current_path_for_file @@ -990,7 +1140,7 @@ class PostProcessorWorker: total_skipped_this_post += 1 if self.signals and hasattr(self.signals, 'file_progress_signal'): - self.signals.file_progress_signal.emit("", 0, 0) + self.signals.file_progress_signal.emit("", None) if self.check_cancel(): self.logger(f" Post {post_id} processing interrupted/cancelled."); else: self.logger(f" Post {post_id} Summary: Downloaded={total_downloaded_this_post}, Skipped Files={total_skipped_this_post}") @@ -1004,7 +1154,7 @@ class DownloadThread(QThread): file_download_status_signal = pyqtSignal(bool) finished_signal = pyqtSignal(int, int, bool, list) external_link_signal = pyqtSignal(str, str, str, str) - file_progress_signal = pyqtSignal(str, int, int) + file_progress_signal = pyqtSignal(str, object) def __init__(self, api_url_input, output_dir, known_names_copy, @@ -1025,8 +1175,10 @@ class DownloadThread(QThread): manga_mode_active=False, unwanted_keywords=None, manga_filename_style=STYLE_POST_TITLE, - char_filter_scope=CHAR_SCOPE_FILES - ): + char_filter_scope=CHAR_SCOPE_FILES, + remove_from_filename_words_list=None, + allow_multipart_download=True, + duplicate_file_mode=DUPLICATE_MODE_DELETE): # Default to DELETE super().__init__() self.api_url_input = api_url_input self.output_dir = output_dir @@ -1034,7 +1186,7 @@ class DownloadThread(QThread): self.cancellation_event = cancellation_event self.skip_current_file_flag = skip_current_file_flag self.initial_target_post_id = target_post_id_from_initial_url - self.filter_character_list = filter_character_list if filter_character_list else [] + self.filter_character_list_objects = filter_character_list if filter_character_list else [] self.filter_mode = filter_mode self.skip_zip = skip_zip self.skip_rar = skip_rar @@ -1065,7 +1217,9 @@ class DownloadThread(QThread): {'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'} self.manga_filename_style = manga_filename_style self.char_filter_scope = char_filter_scope - + self.remove_from_filename_words_list = remove_from_filename_words_list + self.allow_multipart_download = allow_multipart_download + self.duplicate_file_mode = duplicate_file_mode if self.compress_images and Image is None: self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).") self.compress_images = False @@ -1116,7 +1270,7 @@ class DownloadThread(QThread): post_data=individual_post_data, download_root=self.output_dir, known_names=self.known_names, - filter_character_list=self.filter_character_list, + filter_character_list=self.filter_character_list_objects, unwanted_keywords=self.unwanted_keywords, filter_mode=self.filter_mode, skip_zip=self.skip_zip, skip_rar=self.skip_rar, @@ -1140,8 +1294,10 @@ class DownloadThread(QThread): skip_current_file_flag=self.skip_current_file_flag, manga_mode_active=self.manga_mode_active, manga_filename_style=self.manga_filename_style, - char_filter_scope=self.char_filter_scope - ) + char_filter_scope=self.char_filter_scope, + remove_from_filename_words_list=self.remove_from_filename_words_list, + allow_multipart_download=self.allow_multipart_download, + duplicate_file_mode=self.duplicate_file_mode) try: dl_count, skip_count, kept_originals_this_post = post_processing_worker.process() grand_total_downloaded_files += dl_count diff --git a/main.py b/main.py index 7cc0dd0..18b36ad 100644 --- a/main.py +++ b/main.py @@ -19,12 +19,12 @@ from PyQt5.QtGui import ( ) from PyQt5.QtWidgets import ( QApplication, QWidget, QLabel, QLineEdit, QTextEdit, QPushButton, - QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget, + QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget, QDesktopWidget, QRadioButton, QButtonGroup, QCheckBox, QSplitter, QSizePolicy, QDialog, QFrame, QAbstractButton ) -from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject, QTimer, QSettings +from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject, QTimer, QSettings, QStandardPaths from urllib.parse import urlparse try: @@ -47,6 +47,9 @@ try: SKIP_SCOPE_FILES, SKIP_SCOPE_POSTS, SKIP_SCOPE_BOTH, + CHAR_SCOPE_TITLE, # Added for completeness if used directly + CHAR_SCOPE_FILES, # Added + CHAR_SCOPE_BOTH # Added ) print("Successfully imported names from downloader_utils.") except ImportError as e: @@ -62,6 +65,9 @@ except ImportError as e: SKIP_SCOPE_FILES = "files" SKIP_SCOPE_POSTS = "posts" SKIP_SCOPE_BOTH = "both" + CHAR_SCOPE_TITLE = "title" + CHAR_SCOPE_FILES = "files" + CHAR_SCOPE_BOTH = "both" except Exception as e: print(f"--- UNEXPECTED IMPORT ERROR ---") @@ -97,11 +103,16 @@ MANGA_FILENAME_STYLE_KEY = "mangaFilenameStyleV1" STYLE_POST_TITLE = "post_title" STYLE_ORIGINAL_NAME = "original_name" SKIP_WORDS_SCOPE_KEY = "skipWordsScopeV1" +ALLOW_MULTIPART_DOWNLOAD_KEY = "allowMultipartDownloadV1" CHAR_FILTER_SCOPE_KEY = "charFilterScopeV1" -CHAR_SCOPE_TITLE = "title" -CHAR_SCOPE_FILES = "files" -CHAR_SCOPE_BOTH = "both" +# CHAR_SCOPE_TITLE, CHAR_SCOPE_FILES, CHAR_SCOPE_BOTH are already defined or imported + +DUPLICATE_FILE_MODE_KEY = "duplicateFileModeV1" +# DUPLICATE_MODE_RENAME is removed. Renaming only happens within a target folder if needed. +DUPLICATE_MODE_DELETE = "delete" +DUPLICATE_MODE_MOVE_TO_SUBFOLDER = "move" # New mode + class DownloaderApp(QWidget): @@ -111,13 +122,35 @@ class DownloaderApp(QWidget): overall_progress_signal = pyqtSignal(int, int) finished_signal = pyqtSignal(int, int, bool, list) external_link_signal = pyqtSignal(str, str, str, str) - file_progress_signal = pyqtSignal(str, int, int) + # Changed to object to handle both (int, int) for single stream and list for multipart + file_progress_signal = pyqtSignal(str, object) def __init__(self): super().__init__() self.settings = QSettings(CONFIG_ORGANIZATION_NAME, CONFIG_APP_NAME_MAIN) - self.config_file = "Known.txt" + + # Determine path for Known.txt in user's app data directory + app_config_dir = "" + try: + # Use AppLocalDataLocation for user-specific, non-roaming data + app_data_root = QStandardPaths.writableLocation(QStandardPaths.AppLocalDataLocation) + if not app_data_root: # Fallback if somehow empty + app_data_root = QStandardPaths.writableLocation(QStandardPaths.GenericDataLocation) + + if app_data_root and CONFIG_ORGANIZATION_NAME: + app_config_dir = os.path.join(app_data_root, CONFIG_ORGANIZATION_NAME) + elif app_data_root: # If no org name, use a generic app name folder + app_config_dir = os.path.join(app_data_root, "KemonoDownloaderAppData") # Fallback app name + else: # Absolute fallback: current working directory (less ideal for bundled app) + app_config_dir = os.getcwd() + + if not os.path.exists(app_config_dir): + os.makedirs(app_config_dir, exist_ok=True) + except Exception as e_path: + print(f"Error setting up app_config_dir: {e_path}. Defaulting to CWD for Known.txt.") + app_config_dir = os.getcwd() # Fallback + self.config_file = os.path.join(app_config_dir, "Known.txt") self.download_thread = None self.thread_pool = None @@ -170,12 +203,15 @@ class DownloaderApp(QWidget): self.manga_filename_style = self.settings.value(MANGA_FILENAME_STYLE_KEY, STYLE_POST_TITLE, type=str) self.skip_words_scope = self.settings.value(SKIP_WORDS_SCOPE_KEY, SKIP_SCOPE_POSTS, type=str) self.char_filter_scope = self.settings.value(CHAR_FILTER_SCOPE_KEY, CHAR_SCOPE_TITLE, type=str) + self.allow_multipart_download_setting = self.settings.value(ALLOW_MULTIPART_DOWNLOAD_KEY, False, type=bool) # Default to OFF + self.duplicate_file_mode = self.settings.value(DUPLICATE_FILE_MODE_KEY, DUPLICATE_MODE_DELETE, type=str) # Default to DELETE + print(f"ℹ️ Known.txt will be loaded/saved at: {self.config_file}") self.load_known_names_from_util() - self.setWindowTitle("Kemono Downloader v3.1.1") - self.setGeometry(150, 150, 1050, 820) + self.setWindowTitle("Kemono Downloader v3.2.0") + # self.setGeometry(150, 150, 1050, 820) # Initial geometry will be set after showing self.setStyleSheet(self.get_dark_theme()) self.init_ui() self._connect_signals() @@ -183,10 +219,12 @@ class DownloaderApp(QWidget): self.log_signal.emit("ℹ️ Local API server functionality has been removed.") self.log_signal.emit("ℹ️ 'Skip Current File' button has been removed.") if hasattr(self, 'character_input'): - self.character_input.setToolTip("Enter one or more character names, separated by commas (e.g., yor, makima)") + self.character_input.setToolTip("Names, comma-separated. Group aliases: (alias1, alias2) for combined folder name 'alias1 alias2'. E.g., yor, (Boa, Hancock)") self.log_signal.emit(f"ℹ️ Manga filename style loaded: '{self.manga_filename_style}'") self.log_signal.emit(f"ℹ️ Skip words scope loaded: '{self.skip_words_scope}'") self.log_signal.emit(f"ℹ️ Character filter scope loaded: '{self.char_filter_scope}'") + self.log_signal.emit(f"ℹ️ Multi-part download preference loaded: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'}") + self.log_signal.emit(f"ℹ️ Duplicate file handling mode loaded: '{self.duplicate_file_mode.capitalize()}'") def _connect_signals(self): @@ -234,6 +272,9 @@ class DownloaderApp(QWidget): if self.char_filter_scope_toggle_button: self.char_filter_scope_toggle_button.clicked.connect(self._cycle_char_filter_scope) + + if hasattr(self, 'multipart_toggle_button'): self.multipart_toggle_button.clicked.connect(self._toggle_multipart_mode) + if hasattr(self, 'duplicate_mode_toggle_button'): self.duplicate_mode_toggle_button.clicked.connect(self._cycle_duplicate_mode) def load_known_names_from_util(self): @@ -278,6 +319,8 @@ class DownloaderApp(QWidget): self.settings.setValue(MANGA_FILENAME_STYLE_KEY, self.manga_filename_style) self.settings.setValue(SKIP_WORDS_SCOPE_KEY, self.skip_words_scope) self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope) + self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting) + self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode) # Save current mode self.settings.sync() should_exit = True @@ -289,17 +332,26 @@ class DownloaderApp(QWidget): QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if reply == QMessageBox.Yes: self.log_signal.emit("⚠️ Cancelling active download due to application exit...") - self.cancel_download() - self.log_signal.emit(" Waiting briefly for threads to acknowledge cancellation...") + # Direct cancellation for exit - different from button cancel + self.cancellation_event.set() if self.download_thread and self.download_thread.isRunning(): + self.download_thread.requestInterruption() + self.log_signal.emit(" Signaled single download thread to interrupt.") + + # For thread pool, we want to wait on exit. + if self.download_thread and self.download_thread.isRunning(): + self.log_signal.emit(" Waiting for single download thread to finish...") self.download_thread.wait(3000) if self.download_thread.isRunning(): self.log_signal.emit(" ⚠️ Single download thread did not terminate gracefully.") + if self.thread_pool: + self.log_signal.emit(" Shutting down thread pool (waiting for completion)...") self.thread_pool.shutdown(wait=True, cancel_futures=True) self.log_signal.emit(" Thread pool shutdown complete.") self.thread_pool = None + self.log_signal.emit(" Cancellation for exit complete.") else: should_exit = False self.log_signal.emit("ℹ️ Application exit cancelled.") @@ -381,7 +433,7 @@ class DownloaderApp(QWidget): char_input_and_button_layout.setSpacing(10) self.character_input = QLineEdit() - self.character_input.setPlaceholderText("e.g., yor, Tifa, Reyna") + self.character_input.setPlaceholderText("e.g., yor, Tifa, (Reyna, Sage)") char_input_and_button_layout.addWidget(self.character_input, 3) self.char_filter_scope_toggle_button = QPushButton() @@ -411,20 +463,51 @@ class DownloaderApp(QWidget): left_layout.addWidget(self.filters_and_custom_folder_container_widget) - left_layout.addWidget(QLabel("🚫 Skip with Words (comma-separated):")) + # --- Word Manipulation Section (Skip Words & Remove from Filename) --- + word_manipulation_container_widget = QWidget() + word_manipulation_outer_layout = QHBoxLayout(word_manipulation_container_widget) + word_manipulation_outer_layout.setContentsMargins(0,0,0,0) # No margins for the outer container + word_manipulation_outer_layout.setSpacing(15) # Spacing between the two vertical groups + + # Group 1: Skip Words (Left, ~70% space) + skip_words_widget = QWidget() + skip_words_vertical_layout = QVBoxLayout(skip_words_widget) + skip_words_vertical_layout.setContentsMargins(0,0,0,0) # No margins for the inner group + skip_words_vertical_layout.setSpacing(2) # Small spacing between label and input row + + skip_words_label = QLabel("🚫 Skip with Words (comma-separated):") + skip_words_vertical_layout.addWidget(skip_words_label) + + skip_input_and_button_layout = QHBoxLayout() skip_input_and_button_layout = QHBoxLayout() skip_input_and_button_layout.setContentsMargins(0, 0, 0, 0) skip_input_and_button_layout.setSpacing(10) self.skip_words_input = QLineEdit() self.skip_words_input.setPlaceholderText("e.g., WM, WIP, sketch, preview") - skip_input_and_button_layout.addWidget(self.skip_words_input, 3) + skip_input_and_button_layout.addWidget(self.skip_words_input, 1) # Input field takes available space self.skip_scope_toggle_button = QPushButton() self._update_skip_scope_button_text() self.skip_scope_toggle_button.setToolTip("Click to cycle skip scope (Files -> Posts -> Both)") self.skip_scope_toggle_button.setStyleSheet("padding: 6px 10px;") self.skip_scope_toggle_button.setMinimumWidth(100) - skip_input_and_button_layout.addWidget(self.skip_scope_toggle_button, 1) - left_layout.addLayout(skip_input_and_button_layout) + skip_input_and_button_layout.addWidget(self.skip_scope_toggle_button, 0) # Button takes its minimum + skip_words_vertical_layout.addLayout(skip_input_and_button_layout) + word_manipulation_outer_layout.addWidget(skip_words_widget, 7) # 70% stretch for left group + + # Group 2: Remove Words from name (Right, ~30% space) + remove_words_widget = QWidget() + remove_words_vertical_layout = QVBoxLayout(remove_words_widget) + remove_words_vertical_layout.setContentsMargins(0,0,0,0) # No margins for the inner group + remove_words_vertical_layout.setSpacing(2) + self.remove_from_filename_label = QLabel("✂️ Remove Words from name:") + remove_words_vertical_layout.addWidget(self.remove_from_filename_label) + self.remove_from_filename_input = QLineEdit() + self.remove_from_filename_input.setPlaceholderText("e.g., patreon, HD") # Placeholder for the new field + remove_words_vertical_layout.addWidget(self.remove_from_filename_input) + word_manipulation_outer_layout.addWidget(remove_words_widget, 3) # 30% stretch for right group + + left_layout.addWidget(word_manipulation_container_widget) + # --- End Word Manipulation Section --- file_filter_layout = QVBoxLayout() @@ -527,7 +610,8 @@ class DownloaderApp(QWidget): self.manga_mode_checkbox = QCheckBox("Manga/Comic Mode") self.manga_mode_checkbox.setToolTip("Downloads posts from oldest to newest and renames files based on post title (for creator feeds only).") self.manga_mode_checkbox.setChecked(False) - advanced_row2_layout.addWidget(self.manga_mode_checkbox) + advanced_row2_layout.addWidget(self.manga_mode_checkbox) # Keep manga mode checkbox here + advanced_row2_layout.addStretch(1) checkboxes_group_layout.addLayout(advanced_row2_layout) left_layout.addLayout(checkboxes_group_layout) @@ -538,9 +622,9 @@ class DownloaderApp(QWidget): self.download_btn = QPushButton("⬇️ Start Download") self.download_btn.setStyleSheet("padding: 8px 15px; font-weight: bold;") self.download_btn.clicked.connect(self.start_download) - self.cancel_btn = QPushButton("❌ Cancel") + self.cancel_btn = QPushButton("❌ Cancel & Reset UI") # Updated button text for clarity self.cancel_btn.setEnabled(False) - self.cancel_btn.clicked.connect(self.cancel_download) + self.cancel_btn.clicked.connect(self.cancel_download_button_action) # Changed connection btn_layout.addWidget(self.download_btn) btn_layout.addWidget(self.cancel_btn) left_layout.addLayout(btn_layout) @@ -598,6 +682,20 @@ class DownloaderApp(QWidget): self._update_manga_filename_style_button_text() log_title_layout.addWidget(self.manga_rename_toggle_button) + self.multipart_toggle_button = QPushButton() # Create the button + self.multipart_toggle_button.setToolTip("Toggle between Multi-part and Single-stream downloads for large files.") + self.multipart_toggle_button.setFixedWidth(130) # Adjust width as needed + self.multipart_toggle_button.setStyleSheet("padding: 4px 8px;") # Added padding + self._update_multipart_toggle_button_text() # Set initial text + log_title_layout.addWidget(self.multipart_toggle_button) # Add to layout + + self.duplicate_mode_toggle_button = QPushButton() + self.duplicate_mode_toggle_button.setToolTip("Toggle how duplicate filenames are handled (Rename or Delete).") + self.duplicate_mode_toggle_button.setFixedWidth(150) # Adjust width + self.duplicate_mode_toggle_button.setStyleSheet("padding: 4px 8px;") # Added padding + self._update_duplicate_mode_button_text() # Set initial text + log_title_layout.addWidget(self.duplicate_mode_toggle_button) + self.log_verbosity_button = QPushButton("Show Basic Log") self.log_verbosity_button.setToolTip("Toggle between full and basic log details.") self.log_verbosity_button.setFixedWidth(110) @@ -676,6 +774,17 @@ class DownloaderApp(QWidget): self._update_manga_filename_style_button_text() self._update_skip_scope_button_text() self._update_char_filter_scope_button_text() + self._update_duplicate_mode_button_text() + + def _center_on_screen(self): + """Centers the widget on the screen.""" + try: + screen_geometry = QDesktopWidget().screenGeometry() + widget_geometry = self.frameGeometry() + widget_geometry.moveCenter(screen_geometry.center()) + self.move(widget_geometry.topLeft()) + except Exception as e: + self.log_signal.emit(f"⚠️ Error centering window: {e}") def get_dark_theme(self): @@ -826,30 +935,57 @@ class DownloaderApp(QWidget): print(f"GUI External Log Error (Append): {e}\nOriginal Message: {formatted_link_text}") - def update_file_progress_display(self, filename, downloaded_bytes, total_bytes): - if not filename and total_bytes == 0 and downloaded_bytes == 0: + def update_file_progress_display(self, filename, progress_info): + if not filename and progress_info is None: # Explicit clear self.file_progress_label.setText("") return - max_filename_len = 25 - display_filename = filename - if len(filename) > max_filename_len: - display_filename = filename[:max_filename_len-3].strip() + "..." - - if total_bytes > 0: - downloaded_mb = downloaded_bytes / (1024 * 1024) - total_mb = total_bytes / (1024 * 1024) - progress_text = f"Downloading '{display_filename}' ({downloaded_mb:.1f}MB / {total_mb:.1f}MB)" - else: - downloaded_mb = downloaded_bytes / (1024 * 1024) - progress_text = f"Downloading '{display_filename}' ({downloaded_mb:.1f}MB)" + if isinstance(progress_info, list): # Multi-part progress (list of chunk dicts) + if not progress_info: # Empty list + self.file_progress_label.setText(f"File: {filename} - Initializing parts...") + return - if len(progress_text) > 75: - display_filename = filename[:15].strip() + "..." if len(filename) > 18 else display_filename - if total_bytes > 0: progress_text = f"DL '{display_filename}' ({downloaded_mb:.1f}/{total_mb:.1f}MB)" - else: progress_text = f"DL '{display_filename}' ({downloaded_mb:.1f}MB)" + total_downloaded_overall = sum(cs.get('downloaded', 0) for cs in progress_info) + # total_file_size_overall should ideally be from progress_data['total_file_size'] + # For now, we sum chunk totals. This assumes all chunks are for the same file. + total_file_size_overall = sum(cs.get('total', 0) for cs in progress_info) + + active_chunks_count = 0 + combined_speed_bps = 0 + for cs in progress_info: + if cs.get('active', False): + active_chunks_count += 1 + combined_speed_bps += cs.get('speed_bps', 0) - self.file_progress_label.setText(progress_text) + dl_mb = total_downloaded_overall / (1024 * 1024) + total_mb = total_file_size_overall / (1024 * 1024) + speed_MBps = (combined_speed_bps / 8) / (1024 * 1024) + + progress_text = f"DL '{filename[:20]}...': {dl_mb:.1f}/{total_mb:.1f} MB ({active_chunks_count} parts @ {speed_MBps:.2f} MB/s)" + self.file_progress_label.setText(progress_text) + + elif isinstance(progress_info, tuple) and len(progress_info) == 2: # Single stream (downloaded_bytes, total_bytes) + downloaded_bytes, total_bytes = progress_info + if not filename and total_bytes == 0 and downloaded_bytes == 0: # Clear if no info + self.file_progress_label.setText("") + return + + max_fn_len = 25 + disp_fn = filename if len(filename) <= max_fn_len else filename[:max_fn_len-3].strip()+"..." + + dl_mb = downloaded_bytes / (1024*1024) + prog_text_base = f"Downloading '{disp_fn}' ({dl_mb:.1f}MB" + if total_bytes > 0: + tot_mb = total_bytes / (1024*1024) + prog_text_base += f" / {tot_mb:.1f}MB)" + else: + prog_text_base += ")" + + self.file_progress_label.setText(prog_text_base) + elif filename and progress_info is None: # Explicit request to clear for a specific file (e.g. download finished/failed) + self.file_progress_label.setText("") + elif not filename and not progress_info: # General clear + self.file_progress_label.setText("") def update_external_links_setting(self, checked): @@ -903,6 +1039,7 @@ class DownloaderApp(QWidget): if self.use_subfolders_checkbox: self.use_subfolders_checkbox.setEnabled(file_download_mode_active) if self.skip_words_input: self.skip_words_input.setEnabled(file_download_mode_active) if self.skip_scope_toggle_button: self.skip_scope_toggle_button.setEnabled(file_download_mode_active) + if hasattr(self, 'remove_from_filename_input'): self.remove_from_filename_input.setEnabled(file_download_mode_active) if self.skip_zip_checkbox: can_skip_zip = not is_only_links and not is_only_archives @@ -1302,6 +1439,9 @@ class DownloaderApp(QWidget): if self.manga_rename_toggle_button: self.manga_rename_toggle_button.setVisible(manga_mode_effectively_on) + if hasattr(self, 'duplicate_mode_toggle_button'): + self.duplicate_mode_toggle_button.setVisible(not manga_mode_effectively_on) # Hidden in Manga Mode + if manga_mode_effectively_on: if self.page_range_label: self.page_range_label.setEnabled(False) if self.start_page_input: self.start_page_input.setEnabled(False); self.start_page_input.clear() @@ -1390,6 +1530,11 @@ class DownloaderApp(QWidget): raw_skip_words = self.skip_words_input.text().strip() skip_words_list = [word.strip().lower() for word in raw_skip_words.split(',') if word.strip()] current_skip_words_scope = self.get_skip_words_scope() + + raw_remove_filename_words = self.remove_from_filename_input.text().strip() if hasattr(self, 'remove_from_filename_input') else "" + effective_duplicate_file_mode = self.duplicate_file_mode # Start with user's choice + allow_multipart = self.allow_multipart_download_setting # Use the internal setting + remove_from_filename_words_list = [word.strip() for word in raw_remove_filename_words.split(',') if word.strip()] current_char_filter_scope = self.get_char_filter_scope() manga_mode_is_checked = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False @@ -1442,54 +1587,127 @@ class DownloaderApp(QWidget): elif manga_mode: start_page, end_page = None, None + # effective_duplicate_file_mode will be self.duplicate_file_mode (UI button's state). + # Manga Mode specific duplicate handling is now managed entirely within downloader_utils.py self.external_link_queue.clear(); self.extracted_links_cache = []; self._is_processing_external_link_queue = False; self._current_link_post_title = None self.all_kept_original_filenames = [] raw_character_filters_text = self.character_input.text().strip() - parsed_character_list = [name.strip() for name in raw_character_filters_text.split(',') if name.strip()] if raw_character_filters_text else None - filter_character_list_to_pass = None + # --- New parsing logic for character filters --- + parsed_character_filter_objects = [] + if raw_character_filters_text: + raw_parts = [] + current_part_buffer = "" + in_group_parsing = False + for char_token in raw_character_filters_text: + if char_token == '(': + in_group_parsing = True + current_part_buffer += char_token + elif char_token == ')': + in_group_parsing = False + current_part_buffer += char_token + elif char_token == ',' and not in_group_parsing: + if current_part_buffer.strip(): raw_parts.append(current_part_buffer.strip()) + current_part_buffer = "" + else: + current_part_buffer += char_token + if current_part_buffer.strip(): raw_parts.append(current_part_buffer.strip()) + + for part_str in raw_parts: + part_str = part_str.strip() + if not part_str: continue + if part_str.startswith("(") and part_str.endswith(")"): + group_content_str = part_str[1:-1].strip() + aliases_in_group = [alias.strip() for alias in group_content_str.split(',') if alias.strip()] + if aliases_in_group: + group_folder_name = " ".join(aliases_in_group) + parsed_character_filter_objects.append({ + "name": group_folder_name, # This is the primary/folder name + "is_group": True, + "aliases": aliases_in_group # These are for matching + }) + else: + parsed_character_filter_objects.append({ + "name": part_str, # Folder name and matching name are the same + "is_group": False, + "aliases": [part_str] + }) + # --- End new parsing logic --- + + filter_character_list_to_pass = None needs_folder_naming_validation = (use_subfolders or manga_mode) and not extract_links_only - if parsed_character_list and not extract_links_only : - self.log_signal.emit(f"ℹ️ Validating character filters: {', '.join(parsed_character_list)}") + if parsed_character_filter_objects and not extract_links_only : + self.log_signal.emit(f"ℹ️ Validating character filters: {', '.join(item['name'] + (' (Group: ' + '/'.join(item['aliases']) + ')' if item['is_group'] else '') for item in parsed_character_filter_objects)}") valid_filters_for_backend = [] user_cancelled_validation = False - for char_name in parsed_character_list: - cleaned_name_test = clean_folder_name(char_name) + for filter_item_obj in parsed_character_filter_objects: + item_primary_name = filter_item_obj["name"] + cleaned_name_test = clean_folder_name(item_primary_name) + + if needs_folder_naming_validation and not cleaned_name_test: - QMessageBox.warning(self, "Invalid Filter Name for Folder", f"Filter name '{char_name}' is invalid for a folder and will be skipped for folder naming.") - self.log_signal.emit(f"⚠️ Skipping invalid filter for folder naming: '{char_name}'") - if not needs_folder_naming_validation: valid_filters_for_backend.append(char_name) + QMessageBox.warning(self, "Invalid Filter Name for Folder", f"Filter name '{item_primary_name}' is invalid for a folder and will be skipped for folder naming.") + self.log_signal.emit(f"⚠️ Skipping invalid filter for folder naming: '{item_primary_name}'") continue - if needs_folder_naming_validation and char_name.lower() not in {kn.lower() for kn in KNOWN_NAMES}: + # --- New: Check if any alias of a group is already known --- + an_alias_is_already_known = False + if filter_item_obj["is_group"] and needs_folder_naming_validation: + for alias in filter_item_obj["aliases"]: + if any(existing_known.lower() == alias.lower() for existing_known in KNOWN_NAMES): + an_alias_is_already_known = True + self.log_signal.emit(f"ℹ️ Alias '{alias}' (from group '{item_primary_name}') is already in Known Names. Group name '{item_primary_name}' will not be added to Known.txt.") + break + # --- End new check --- + + if an_alias_is_already_known: + valid_filters_for_backend.append(filter_item_obj) + continue + + # Determine if we should prompt to add the name to the Known.txt list. + # Prompt if: + # - Folder naming validation is relevant (subfolders or manga mode, and not just extracting links) + # - AND Manga Mode is OFF (this is the key change for your request) + # - AND the primary name of the filter isn't already in Known.txt + should_prompt_to_add_to_known_list = ( + needs_folder_naming_validation and + not manga_mode and # Do NOT prompt if Manga Mode is ON + item_primary_name.lower() not in {kn.lower() for kn in KNOWN_NAMES} + ) + + if should_prompt_to_add_to_known_list: reply = QMessageBox.question(self, "Add to Known List?", - f"Filter '{char_name}' (used for folder/manga naming) is not in known names list.\nAdd it now?", + f"Filter name '{item_primary_name}' (used for folder/manga naming) is not in known names list.\nAdd it now?", QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel, QMessageBox.Yes) if reply == QMessageBox.Yes: - self.new_char_input.setText(char_name) - if self.add_new_character(): valid_filters_for_backend.append(char_name) - else: - if cleaned_name_test or not needs_folder_naming_validation: valid_filters_for_backend.append(char_name) + self.new_char_input.setText(item_primary_name) # Use the primary name for adding + if self.add_new_character(): + valid_filters_for_backend.append(filter_item_obj) elif reply == QMessageBox.Cancel: user_cancelled_validation = True; break - else: - if cleaned_name_test or not needs_folder_naming_validation: valid_filters_for_backend.append(char_name) + # If 'No', the filter is not used and not added to Known.txt for this session. else: - valid_filters_for_backend.append(char_name) + # Add to filters to be used for this session if: + # - Prompting is not needed (e.g., name already known, or not manga_mode but name is known) + # - OR Manga Mode is ON (filter is used without adding to Known.txt) + # - OR extract_links_only is true (folder naming validation is false) + valid_filters_for_backend.append(filter_item_obj) + if manga_mode and needs_folder_naming_validation and item_primary_name.lower() not in {kn.lower() for kn in KNOWN_NAMES}: + self.log_signal.emit(f"ℹ️ Manga Mode: Using filter '{item_primary_name}' for this session without adding to Known Names.") if user_cancelled_validation: return if valid_filters_for_backend: filter_character_list_to_pass = valid_filters_for_backend - self.log_signal.emit(f" Using validated character filters for subfolders: {', '.join(filter_character_list_to_pass)}") + self.log_signal.emit(f" Using validated character filters: {', '.join(item['name'] for item in filter_character_list_to_pass)}") else: - self.log_signal.emit("⚠️ No valid character filters remaining (after validation).") - elif parsed_character_list : - filter_character_list_to_pass = parsed_character_list - self.log_signal.emit(f"ℹ️ Character filters provided: {', '.join(filter_character_list_to_pass)} (Folder naming validation may not apply).") + self.log_signal.emit("⚠️ No valid character filters to use for this session.") + elif parsed_character_filter_objects : # If not extract_links_only is false, but filters exist + filter_character_list_to_pass = parsed_character_filter_objects + self.log_signal.emit(f"ℹ️ Character filters provided (folder naming validation may not apply): {', '.join(item['name'] for item in filter_character_list_to_pass)}") if manga_mode and not filter_character_list_to_pass and not extract_links_only: @@ -1568,7 +1786,7 @@ class DownloaderApp(QWidget): if use_subfolders: if custom_folder_name_cleaned: log_messages.append(f" Custom Folder (Post): '{custom_folder_name_cleaned}'") if filter_character_list_to_pass: - log_messages.append(f" Character Filters: {', '.join(filter_character_list_to_pass)}") + log_messages.append(f" Character Filters: {', '.join(item['name'] for item in filter_character_list_to_pass)}") log_messages.append(f" ↳ Char Filter Scope: {current_char_filter_scope.capitalize()}") elif use_subfolders: log_messages.append(f" Folder Naming: Automatic (based on title/known names)") @@ -1579,8 +1797,10 @@ class DownloaderApp(QWidget): f" Skip Archives: {'.zip' if effective_skip_zip else ''}{', ' if effective_skip_zip and effective_skip_rar else ''}{'.rar' if effective_skip_rar else ''}{'None (Archive Mode)' if backend_filter_mode == 'archive' else ('None' if not (effective_skip_zip or effective_skip_rar) else '')}", f" Skip Words (posts/files): {', '.join(skip_words_list) if skip_words_list else 'None'}", f" Skip Words Scope: {current_skip_words_scope.capitalize()}", + f" Remove Words from Filename: {', '.join(remove_from_filename_words_list) if remove_from_filename_words_list else 'None'}", f" Compress Images: {'Enabled' if compress_images else 'Disabled'}", - f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}" + f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}", + f" Multi-part Download: {'Enabled' if allow_multipart else 'Disabled'}" ]) else: log_messages.append(f" Mode: Extracting Links Only") @@ -1591,11 +1811,9 @@ class DownloaderApp(QWidget): log_messages.append(f" Manga Mode (File Renaming by Post Title): Enabled") log_messages.append(f" ↳ Manga Filename Style: {'Post Title Based' if self.manga_filename_style == STYLE_POST_TITLE else 'Original File Name'}") if filter_character_list_to_pass: - log_messages.append(f" ↳ Manga Character Filter (for naming/folder): {', '.join(filter_character_list_to_pass)}") + log_messages.append(f" ↳ Manga Character Filter (for naming/folder): {', '.join(item['name'] for item in filter_character_list_to_pass)}") log_messages.append(f" ↳ Char Filter Scope (Manga): {current_char_filter_scope.capitalize()}") - - if not extract_links_only: - log_messages.append(f" Subfolder per Post: {'Enabled' if use_post_subfolders else 'Disabled'}") + log_messages.append(f" ↳ Manga Duplicates: Will be renamed with numeric suffix if names clash (e.g., _1, _2).") should_use_multithreading_for_posts = use_multithreading_enabled_by_checkbox and not post_id_from_url log_messages.append(f" Threading: {'Multi-threaded (posts)' if should_use_multithreading_for_posts else 'Single-threaded (posts)'}") @@ -1630,6 +1848,7 @@ class DownloaderApp(QWidget): 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, 'skip_words_list': skip_words_list, 'skip_words_scope': current_skip_words_scope, + 'remove_from_filename_words_list': remove_from_filename_words_list, 'char_filter_scope': current_char_filter_scope, 'show_external_links': self.show_external_links, 'extract_links_only': extract_links_only, @@ -1642,7 +1861,9 @@ class DownloaderApp(QWidget): 'cancellation_event': self.cancellation_event, 'signals': self.worker_signals, 'manga_filename_style': self.manga_filename_style, - 'num_file_threads_for_worker': effective_num_file_threads_per_worker + 'num_file_threads_for_worker': effective_num_file_threads_per_worker, + 'allow_multipart_download': allow_multipart, # Corrected from previous thought + 'duplicate_file_mode': effective_duplicate_file_mode # Pass the potentially overridden mode } try: @@ -1656,14 +1877,15 @@ class DownloaderApp(QWidget): 'filter_character_list', 'filter_mode', 'skip_zip', 'skip_rar', 'use_subfolders', 'use_post_subfolders', 'custom_folder_name', 'compress_images', 'download_thumbnails', 'service', 'user_id', - 'downloaded_files', 'downloaded_file_hashes', + 'downloaded_files', 'downloaded_file_hashes', 'remove_from_filename_words_list', 'downloaded_files_lock', 'downloaded_file_hashes_lock', 'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'show_external_links', 'extract_links_only', 'num_file_threads_for_worker', 'skip_current_file_flag', 'start_page', 'end_page', 'target_post_id_from_initial_url', - 'manga_mode_active', 'unwanted_keywords', 'manga_filename_style' + 'manga_mode_active', 'unwanted_keywords', 'manga_filename_style', 'duplicate_file_mode', + 'allow_multipart_download' ] args_template['skip_current_file_flag'] = None single_thread_args = {key: args_template[key] for key in dt_expected_keys if key in args_template} @@ -1780,15 +2002,16 @@ class DownloaderApp(QWidget): 'target_post_id_from_initial_url', 'custom_folder_name', 'compress_images', 'download_thumbnails', 'service', 'user_id', 'api_url_input', 'cancellation_event', 'signals', 'downloaded_files', 'downloaded_file_hashes', - 'downloaded_files_lock', 'downloaded_file_hashes_lock', + 'downloaded_files_lock', 'downloaded_file_hashes_lock', 'remove_from_filename_words_list', 'skip_words_list', 'skip_words_scope', 'char_filter_scope', - 'show_external_links', 'extract_links_only', + 'show_external_links', 'extract_links_only', 'allow_multipart_download', 'num_file_threads', 'skip_current_file_flag', 'manga_mode_active', 'manga_filename_style' ] + # Ensure 'allow_multipart_download' is also considered for optional keys if it has a default in PostProcessorWorker ppw_optional_keys_with_defaults = { - 'skip_words_list', 'skip_words_scope', 'char_filter_scope', + 'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'remove_from_filename_words_list', 'show_external_links', 'extract_links_only', 'num_file_threads', 'skip_current_file_flag', 'manga_mode_active', 'manga_filename_style' } @@ -1864,8 +2087,8 @@ class DownloaderApp(QWidget): self.new_char_input, self.add_char_button, self.delete_char_button, self.char_filter_scope_toggle_button, self.start_page_input, self.end_page_input, - self.page_range_label, self.to_label, self.character_input, self.custom_folder_input, self.custom_folder_label, - self.reset_button, self.manga_mode_checkbox, self.manga_rename_toggle_button, + self.page_range_label, self.to_label, self.character_input, self.custom_folder_input, self.custom_folder_label, self.remove_from_filename_input, + self.reset_button, self.manga_mode_checkbox, self.manga_rename_toggle_button, self.multipart_toggle_button, self.skip_scope_toggle_button ] @@ -1890,17 +2113,93 @@ class DownloaderApp(QWidget): self.cancel_btn.setEnabled(not enabled) - if enabled: + if enabled: # Ensure these are updated based on current (possibly reset) checkbox states self._handle_multithreading_toggle(multithreading_currently_on) self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False) + self.update_custom_folder_visibility(self.link_input.text()) + self.update_page_range_enabled_state() - def cancel_download(self): + + def _perform_soft_ui_reset(self, preserve_url=None, preserve_dir=None): + """Resets UI elements and some state to app defaults, then applies preserved inputs.""" + self.log_signal.emit("🔄 Performing soft UI reset...") + + # 1. Reset UI fields to their visual defaults + self.link_input.clear() # Will be set later if preserve_url is given + self.dir_input.clear() # Will be set later if preserve_dir is given + self.custom_folder_input.clear(); self.character_input.clear(); + self.skip_words_input.clear(); self.start_page_input.clear(); self.end_page_input.clear(); self.new_char_input.clear(); + if hasattr(self, 'remove_from_filename_input'): self.remove_from_filename_input.clear() + self.character_search_input.clear(); self.thread_count_input.setText("4"); self.radio_all.setChecked(True); + self.skip_zip_checkbox.setChecked(True); self.skip_rar_checkbox.setChecked(True); self.download_thumbnails_checkbox.setChecked(False); + self.compress_images_checkbox.setChecked(False); self.use_subfolders_checkbox.setChecked(True); + self.use_subfolder_per_post_checkbox.setChecked(False); self.use_multithreading_checkbox.setChecked(True); + self.external_links_checkbox.setChecked(False) + if self.manga_mode_checkbox: self.manga_mode_checkbox.setChecked(False) + + # 2. Reset internal state for UI-managed settings to app defaults (not from QSettings) + self.allow_multipart_download_setting = False # Default to OFF + self._update_multipart_toggle_button_text() + + self.skip_words_scope = SKIP_SCOPE_POSTS # Default + self._update_skip_scope_button_text() + + self.char_filter_scope = CHAR_SCOPE_TITLE # Default + self._update_char_filter_scope_button_text() + + self.manga_filename_style = STYLE_POST_TITLE # Reset to app default + self._update_manga_filename_style_button_text() + + # 3. Restore preserved URL and Directory + if preserve_url is not None: + self.link_input.setText(preserve_url) + if preserve_dir is not None: + self.dir_input.setText(preserve_dir) + + # 4. Reset operational state variables (but not session-based downloaded_files/hashes) + self.external_link_queue.clear(); self.extracted_links_cache = [] + self._is_processing_external_link_queue = False; self._current_link_post_title = None + self.total_posts_to_process = 0; self.processed_posts_count = 0 + self.download_counter = 0; self.skip_counter = 0 + self.all_kept_original_filenames = [] + + # 5. Update UI based on new (default or preserved) states + self._handle_filter_mode_change(self.radio_group.checkedButton(), True) + self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked()) + self.filter_character_list(self.character_search_input.text()) + + self.set_ui_enabled(True) # This enables buttons and calls other UI update methods + + # Explicitly call these to ensure they reflect changes from preserved inputs + self.update_custom_folder_visibility(self.link_input.text()) + self.update_page_range_enabled_state() + # update_ui_for_manga_mode is called within set_ui_enabled + + self.log_signal.emit("✅ Soft UI reset complete. Preserved URL and Directory (if provided).") + + + def cancel_download_button_action(self): if not self.cancel_btn.isEnabled() and not self.cancellation_event.is_set(): self.log_signal.emit("ℹ️ No active download to cancel or already cancelling."); return - self.log_signal.emit("⚠️ Requesting cancellation of download process..."); self.cancellation_event.set() + self.log_signal.emit("⚠️ Requesting cancellation of download process (soft reset)...") + + current_url = self.link_input.text() + current_dir = self.dir_input.text() + + self.cancellation_event.set() if self.download_thread and self.download_thread.isRunning(): self.download_thread.requestInterruption(); self.log_signal.emit(" Signaled single download thread to interrupt.") - if self.thread_pool: self.log_signal.emit(" Initiating immediate shutdown and cancellation of worker pool tasks..."); self.thread_pool.shutdown(wait=False, cancel_futures=True) + if self.thread_pool: + self.log_signal.emit(" Initiating non-blocking shutdown and cancellation of worker pool tasks...") + self.thread_pool.shutdown(wait=False, cancel_futures=True) + self.thread_pool = None # Allow recreation for next download + self.active_futures = [] + self.external_link_queue.clear(); self._is_processing_external_link_queue = False; self._current_link_post_title = None - self.cancel_btn.setEnabled(False); self.progress_label.setText("Progress: Cancelling..."); self.file_progress_label.setText("") + + self._perform_soft_ui_reset(preserve_url=current_url, preserve_dir=current_dir) + + self.progress_label.setText("Progress: Cancelled. Ready for new task.") + self.file_progress_label.setText("") + self.log_signal.emit("ℹ️ UI reset. Ready for new operation. Background tasks are being terminated.") def download_finished(self, total_downloaded, total_skipped, cancelled_by_user, kept_original_names_list=None): if kept_original_names_list is None: @@ -1945,7 +2244,10 @@ class DownloaderApp(QWidget): if hasattr(self.download_thread, 'external_link_signal'): self.download_thread.external_link_signal.disconnect(self.handle_external_link_signal) if hasattr(self.download_thread, 'file_progress_signal'): self.download_thread.file_progress_signal.disconnect(self.update_file_progress_display) except (TypeError, RuntimeError) as e: self.log_signal.emit(f"ℹ️ Note during single-thread signal disconnection: {e}") - self.download_thread = None + # Ensure these are cleared if the download_finished is for the single download thread + if self.download_thread and not self.download_thread.isRunning(): # Check if it was this thread + self.download_thread = None + if self.thread_pool: self.log_signal.emit(" Ensuring worker thread pool is shut down..."); self.thread_pool.shutdown(wait=True, cancel_futures=True); self.thread_pool = None self.active_futures = [] @@ -1985,6 +2287,10 @@ class DownloaderApp(QWidget): self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope) self._update_char_filter_scope_button_text() + self.duplicate_file_mode = DUPLICATE_MODE_DELETE # Reset to default (Delete) + self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode) + self._update_duplicate_mode_button_text() + self.settings.sync() self._update_manga_filename_style_button_text() self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False) @@ -1994,17 +2300,22 @@ class DownloaderApp(QWidget): def _reset_ui_to_defaults(self): self.link_input.clear(); self.dir_input.clear(); self.custom_folder_input.clear(); self.character_input.clear(); self.skip_words_input.clear(); self.start_page_input.clear(); self.end_page_input.clear(); self.new_char_input.clear(); + if hasattr(self, 'remove_from_filename_input'): self.remove_from_filename_input.clear() self.character_search_input.clear(); self.thread_count_input.setText("4"); self.radio_all.setChecked(True); self.skip_zip_checkbox.setChecked(True); self.skip_rar_checkbox.setChecked(True); self.download_thumbnails_checkbox.setChecked(False); self.compress_images_checkbox.setChecked(False); self.use_subfolders_checkbox.setChecked(True); self.use_subfolder_per_post_checkbox.setChecked(False); self.use_multithreading_checkbox.setChecked(True); self.external_links_checkbox.setChecked(False) - if self.manga_mode_checkbox: self.manga_mode_checkbox.setChecked(False) - + if self.manga_mode_checkbox: self.manga_mode_checkbox.setChecked(False) + self.allow_multipart_download_setting = False # Default to OFF + self._update_multipart_toggle_button_text() # Update button text + self.skip_words_scope = SKIP_SCOPE_POSTS self._update_skip_scope_button_text() self.char_filter_scope = CHAR_SCOPE_TITLE self._update_char_filter_scope_button_text() + self.duplicate_file_mode = DUPLICATE_MODE_DELETE # Default to DELETE + self._update_duplicate_mode_button_text() self._handle_filter_mode_change(self.radio_all, True) @@ -2032,6 +2343,61 @@ class DownloaderApp(QWidget): with QMutexLocker(self.prompt_mutex): self._add_character_response = result self.log_signal.emit(f" Main thread received character prompt response: {'Action resulted in addition/confirmation' if result else 'Action resulted in no addition/declined'}") + def _update_multipart_toggle_button_text(self): + if hasattr(self, 'multipart_toggle_button'): + text = "Multi-part: ON" if self.allow_multipart_download_setting else "Multi-part: OFF" + self.multipart_toggle_button.setText(text) + + def _toggle_multipart_mode(self): + # If currently OFF, and user is trying to turn it ON + if not self.allow_multipart_download_setting: + msg_box = QMessageBox(self) + msg_box.setIcon(QMessageBox.Warning) + msg_box.setWindowTitle("Multi-part Download Advisory") + msg_box.setText( + "Multi-part download advisory:

" + "
" + "Do you want to enable multi-part download?" + ) + proceed_button = msg_box.addButton("Proceed Anyway", QMessageBox.AcceptRole) + cancel_button = msg_box.addButton("Cancel", QMessageBox.RejectRole) + msg_box.setDefaultButton(proceed_button) # Default to Proceed + msg_box.exec_() + + if msg_box.clickedButton() == cancel_button: + # User cancelled, so don't change the setting (it's already False) + self.log_signal.emit("ℹ️ Multi-part download enabling cancelled by user.") + return # Exit without changing the state or button text + + self.allow_multipart_download_setting = not self.allow_multipart_download_setting # Toggle the actual setting + self._update_multipart_toggle_button_text() + self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting) + self.log_signal.emit(f"ℹ️ Multi-part download set to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'}") + + def _update_duplicate_mode_button_text(self): + if hasattr(self, 'duplicate_mode_toggle_button'): + if self.duplicate_file_mode == DUPLICATE_MODE_DELETE: + self.duplicate_mode_toggle_button.setText("Duplicates: Delete") + elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER: + self.duplicate_mode_toggle_button.setText("Duplicates: Move") + else: # Should not happen + self.duplicate_mode_toggle_button.setText("Duplicates: Move") # Default to Move if unknown + + def _cycle_duplicate_mode(self): + if self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER: + self.duplicate_file_mode = DUPLICATE_MODE_DELETE + else: # If it's DELETE or unknown, cycle back to MOVE + self.duplicate_file_mode = DUPLICATE_MODE_MOVE_TO_SUBFOLDER + self._update_duplicate_mode_button_text() + self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode) + self.log_signal.emit(f"ℹ️ Duplicate file handling mode changed to: '{self.duplicate_file_mode.capitalize()}'") if __name__ == '__main__': import traceback @@ -2044,9 +2410,19 @@ if __name__ == '__main__': else: print(f"Warning: Application icon 'Kemono.ico' not found at {icon_path}") downloader_app_instance = DownloaderApp() + # Set a reasonable default size before showing + downloader_app_instance.resize(1150, 780) # Adjusted default size downloader_app_instance.show() + # Center the window on the screen after it's shown and sized + downloader_app_instance._center_on_screen() if TourDialog: + # Temporarily force the tour to be considered as "not shown" + # This ensures it appears for this run, especially for a fresh .exe + tour_settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR) + tour_settings.setValue(TourDialog.TOUR_SHOWN_KEY, False) + tour_settings.sync() + print("[Main] Forcing tour to be active for this session.") tour_result = TourDialog.run_tour_if_needed(downloader_app_instance) if tour_result == QDialog.Accepted: print("Tour completed by user.") elif tour_result == QDialog.Rejected: print("Tour skipped or was already shown.") diff --git a/multipart_downloader.py b/multipart_downloader.py new file mode 100644 index 0000000..e1798ab --- /dev/null +++ b/multipart_downloader.py @@ -0,0 +1,232 @@ +import os +import time +import requests +import hashlib +import http.client +import traceback +import threading +from concurrent.futures import ThreadPoolExecutor, as_completed + +CHUNK_DOWNLOAD_RETRY_DELAY = 2 # Slightly reduced for faster retries if needed +MAX_CHUNK_DOWNLOAD_RETRIES = 1 # Further reduced for quicker fallback if a chunk is problematic +DOWNLOAD_CHUNK_SIZE_ITER = 1024 * 256 # 256KB for iter_content within a chunk download + + +def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte, headers, + part_num, total_parts, progress_data, cancellation_event, skip_event, logger, + signals=None, api_original_filename=None): # Added signals and api_original_filename + """Downloads a single chunk of a file and writes it to the temp file.""" + if cancellation_event and cancellation_event.is_set(): + logger(f" [Chunk {part_num + 1}/{total_parts}] Download cancelled before start.") + return 0, False # bytes_downloaded, success + if skip_event and skip_event.is_set(): + logger(f" [Chunk {part_num + 1}/{total_parts}] Skip event triggered before start.") + return 0, False + + chunk_headers = headers.copy() + # end_byte can be -1 for 0-byte files, meaning download from start_byte to end of file (which is start_byte itself) + if end_byte != -1 : # For 0-byte files, end_byte might be -1, Range header should not be set or be 0-0 + chunk_headers['Range'] = f"bytes={start_byte}-{end_byte}" + elif start_byte == 0 and end_byte == -1: # Specifically for 0-byte files + # Some servers might not like Range: bytes=0--1. + # For a 0-byte file, we might not even need a range header, or Range: bytes=0-0 + # Let's try without for 0-byte, or rely on server to handle 0-0 if Content-Length was 0. + # If Content-Length was 0, the main function might handle it directly. + # This chunking logic is primarily for files > 0 bytes. + # For now, if end_byte is -1, it implies a 0-byte file, so we expect 0 bytes. + pass + + + bytes_this_chunk = 0 + last_progress_emit_time_for_chunk = time.time() + last_speed_calc_time = time.time() + bytes_at_last_speed_calc = 0 + + for attempt in range(MAX_CHUNK_DOWNLOAD_RETRIES + 1): + if cancellation_event and cancellation_event.is_set(): + logger(f" [Chunk {part_num + 1}/{total_parts}] Cancelled during retry loop.") + return bytes_this_chunk, False + if skip_event and skip_event.is_set(): + logger(f" [Chunk {part_num + 1}/{total_parts}] Skip event during retry loop.") + return bytes_this_chunk, False + + try: + if attempt > 0: + logger(f" [Chunk {part_num + 1}/{total_parts}] Retrying download (Attempt {attempt}/{MAX_CHUNK_DOWNLOAD_RETRIES})...") + time.sleep(CHUNK_DOWNLOAD_RETRY_DELAY * (2 ** (attempt - 1))) + # Reset speed calculation on retry + last_speed_calc_time = time.time() + bytes_at_last_speed_calc = bytes_this_chunk # Current progress of this chunk + + # Enhanced log message for chunk start + log_msg = f" 🚀 [Chunk {part_num + 1}/{total_parts}] Starting download: bytes {start_byte}-{end_byte if end_byte != -1 else 'EOF'}" + logger(log_msg) + print(f"DEBUG_MULTIPART: {log_msg}") # Direct console print for debugging + response = requests.get(chunk_url, headers=chunk_headers, timeout=(10, 120), stream=True) + response.raise_for_status() + + # For 0-byte files, if end_byte was -1, we expect 0 content. + if start_byte == 0 and end_byte == -1 and int(response.headers.get('Content-Length', 0)) == 0: + logger(f" [Chunk {part_num + 1}/{total_parts}] Confirmed 0-byte file.") + with progress_data['lock']: + progress_data['chunks_status'][part_num]['active'] = False + progress_data['chunks_status'][part_num]['speed_bps'] = 0 + return 0, True + + with open(temp_file_path, 'r+b') as f: # Open in read-write binary + f.seek(start_byte) + for data_segment in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE_ITER): + if cancellation_event and cancellation_event.is_set(): + logger(f" [Chunk {part_num + 1}/{total_parts}] Cancelled during data iteration.") + return bytes_this_chunk, False + if skip_event and skip_event.is_set(): + logger(f" [Chunk {part_num + 1}/{total_parts}] Skip event during data iteration.") + return bytes_this_chunk, False + if data_segment: + f.write(data_segment) + bytes_this_chunk += len(data_segment) + + with progress_data['lock']: + # Increment both the chunk's downloaded and the overall downloaded + progress_data['total_downloaded_so_far'] += len(data_segment) + progress_data['chunks_status'][part_num]['downloaded'] = bytes_this_chunk + progress_data['chunks_status'][part_num]['active'] = True + + current_time = time.time() + time_delta_speed = current_time - last_speed_calc_time + if time_delta_speed > 0.5: # Calculate speed every 0.5 seconds + bytes_delta = bytes_this_chunk - bytes_at_last_speed_calc + current_speed_bps = (bytes_delta * 8) / time_delta_speed if time_delta_speed > 0 else 0 + progress_data['chunks_status'][part_num]['speed_bps'] = current_speed_bps + last_speed_calc_time = current_time + bytes_at_last_speed_calc = bytes_this_chunk + + # Emit progress more frequently from within the chunk download + if current_time - last_progress_emit_time_for_chunk > 0.1: # Emit up to 10 times/sec per chunk + if signals and hasattr(signals, 'file_progress_signal'): + # Ensure we read the latest total downloaded from progress_data + # Send a copy of the chunks_status list + status_list_copy = [dict(s) for s in progress_data['chunks_status']] # Make a deep enough copy + signals.file_progress_signal.emit(api_original_filename, status_list_copy) + last_progress_emit_time_for_chunk = current_time + return bytes_this_chunk, True + + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e: + logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Retryable error: {e}") + if attempt == MAX_CHUNK_DOWNLOAD_RETRIES: + logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Failed after {MAX_CHUNK_DOWNLOAD_RETRIES} retries.") + return bytes_this_chunk, False + except requests.exceptions.RequestException as e: # Includes 4xx/5xx errors after raise_for_status + logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Non-retryable error: {e}") + return bytes_this_chunk, False + except Exception as e: + logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Unexpected error: {e}\n{traceback.format_exc(limit=1)}") + return bytes_this_chunk, False + + # Ensure final status is marked as inactive if loop finishes due to retries + with progress_data['lock']: + progress_data['chunks_status'][part_num]['active'] = False + progress_data['chunks_status'][part_num]['speed_bps'] = 0 + return bytes_this_chunk, False # Should be unreachable + + +def download_file_in_parts(file_url, save_path, total_size, num_parts, headers, + api_original_filename, signals, cancellation_event, skip_event, logger): + """ + Downloads a file in multiple parts concurrently. + Returns: (download_successful_flag, downloaded_bytes, calculated_file_hash, temp_file_handle_or_None) + The temp_file_handle will be an open read-binary file handle to the .part file if successful, otherwise None. + It is the responsibility of the caller to close this handle and rename/delete the .part file. + """ + logger(f"⬇️ Initializing Multi-part Download ({num_parts} parts) for: '{api_original_filename}' (Size: {total_size / (1024*1024):.2f} MB)") + temp_file_path = save_path + ".part" + + try: + with open(temp_file_path, 'wb') as f_temp: + if total_size > 0: + f_temp.truncate(total_size) # Pre-allocate space + except IOError as e: + logger(f" ❌ Error creating/truncating temp file '{temp_file_path}': {e}") + return False, 0, None, None + + chunk_size_calc = total_size // num_parts + chunks_ranges = [] + for i in range(num_parts): + start = i * chunk_size_calc + end = start + chunk_size_calc - 1 if i < num_parts - 1 else total_size - 1 + if start <= end: # Valid range + chunks_ranges.append((start, end)) + elif total_size == 0 and i == 0: # Special case for 0-byte file + chunks_ranges.append((0, -1)) # Indicates 0-byte file, download 0 bytes from offset 0 + + chunk_actual_sizes = [] + for start, end in chunks_ranges: + if end == -1 and start == 0: # 0-byte file + chunk_actual_sizes.append(0) + else: + chunk_actual_sizes.append(end - start + 1) + + if not chunks_ranges and total_size > 0: + logger(f" ⚠️ No valid chunk ranges for multipart download of '{api_original_filename}'. Aborting multipart.") + if os.path.exists(temp_file_path): os.remove(temp_file_path) + return False, 0, None, None + + progress_data = { + 'total_file_size': total_size, # Overall file size for reference + 'total_downloaded_so_far': 0, # New key for overall progress + 'chunks_status': [ # Status for each chunk + {'id': i, 'downloaded': 0, 'total': chunk_actual_sizes[i] if i < len(chunk_actual_sizes) else 0, 'active': False, 'speed_bps': 0.0} + for i in range(num_parts) + ], + 'lock': threading.Lock() + } + + chunk_futures = [] + all_chunks_successful = True + total_bytes_from_chunks = 0 # Still useful to verify total downloaded against file size + + with ThreadPoolExecutor(max_workers=num_parts, thread_name_prefix=f"MPChunk_{api_original_filename[:10]}_") as chunk_pool: + for i, (start, end) in enumerate(chunks_ranges): + if cancellation_event and cancellation_event.is_set(): all_chunks_successful = False; break + chunk_futures.append(chunk_pool.submit( + _download_individual_chunk, chunk_url=file_url, temp_file_path=temp_file_path, + start_byte=start, end_byte=end, headers=headers, part_num=i, total_parts=num_parts, + progress_data=progress_data, cancellation_event=cancellation_event, skip_event=skip_event, logger=logger, + signals=signals, api_original_filename=api_original_filename # Pass them here + )) + + for future in as_completed(chunk_futures): + if cancellation_event and cancellation_event.is_set(): all_chunks_successful = False; break + bytes_downloaded_this_chunk, success_this_chunk = future.result() + total_bytes_from_chunks += bytes_downloaded_this_chunk + if not success_this_chunk: + all_chunks_successful = False + # Progress is emitted from within _download_individual_chunk + + if cancellation_event and cancellation_event.is_set(): + logger(f" Multi-part download for '{api_original_filename}' cancelled by main event.") + all_chunks_successful = False + + # Ensure a final progress update is sent with all chunks marked inactive (unless still active due to error) + if signals and hasattr(signals, 'file_progress_signal'): + with progress_data['lock']: + # Ensure all chunks are marked inactive for the final signal if download didn't fully succeed or was cancelled + status_list_copy = [dict(s) for s in progress_data['chunks_status']] + signals.file_progress_signal.emit(api_original_filename, status_list_copy) + + if all_chunks_successful and (total_bytes_from_chunks == total_size or total_size == 0): + logger(f" ✅ Multi-part download successful for '{api_original_filename}'. Total bytes: {total_bytes_from_chunks}") + md5_hasher = hashlib.md5() + with open(temp_file_path, 'rb') as f_hash: + for buf in iter(lambda: f_hash.read(4096*10), b''): # Read in larger buffers for hashing + md5_hasher.update(buf) + calculated_hash = md5_hasher.hexdigest() + # Return an open file handle for the caller to manage (e.g., for compression) + # The caller is responsible for closing this handle and renaming/deleting the .part file. + return True, total_bytes_from_chunks, calculated_hash, open(temp_file_path, 'rb') + else: + logger(f" ❌ Multi-part download failed for '{api_original_filename}'. Success: {all_chunks_successful}, Bytes: {total_bytes_from_chunks}/{total_size}. Cleaning up.") + if os.path.exists(temp_file_path): + try: os.remove(temp_file_path) + except OSError as e: logger(f" Failed to remove temp part file '{temp_file_path}': {e}") + return False, total_bytes_from_chunks, None, None \ No newline at end of file diff --git a/tour.py b/tour.py index b73a1c5..c376e79 100644 --- a/tour.py +++ b/tour.py @@ -288,13 +288,15 @@ class TourDialog(QDialog): def run_tour_if_needed(parent_app_window): try: settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR) - never_show_again = settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool) + never_show_again_from_settings = settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool) - if never_show_again: + if never_show_again_from_settings: + print(f"[Tour] Skipped: '{TourDialog.TOUR_SHOWN_KEY}' is True in settings.") return QDialog.Rejected tour_dialog = TourDialog(parent_app_window) result = tour_dialog.exec_() + return result except Exception as e: print(f"[Tour] CRITICAL ERROR in run_tour_if_needed: {e}") @@ -305,10 +307,11 @@ if __name__ == '__main__': app = QApplication(sys.argv) # --- For testing: force the tour to show by resetting the flag --- - # print("[Tour Test] Resetting 'Never show again' flag for testing purposes.") - # test_settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR) - # test_settings.setValue(TourDialog.TOUR_SHOWN_KEY, False) # Set to False to force tour - # test_settings.sync() + # This block ensures that if tour.py is run directly, the "Never show again" flag in QSettings is reset. + print("[Tour Direct Run] Resetting 'Never show again' flag in QSettings.") + test_settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR) + test_settings.setValue(TourDialog.TOUR_SHOWN_KEY, False) # Set to False to force tour + test_settings.sync() # --- End testing block --- print("[Tour Test] Running tour standalone...") @@ -322,4 +325,4 @@ if __name__ == '__main__': final_settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR) print(f"[Tour Test] Final state of '{TourDialog.TOUR_SHOWN_KEY}' in settings: {final_settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool)}") - sys.exit() \ No newline at end of file + sys.exit()