This commit is contained in:
Yuvi9587 2025-05-12 10:54:31 +05:30
parent ccfb8496a2
commit f85de58fcb
5 changed files with 1059 additions and 298 deletions

View File

@ -1,14 +1,8 @@
Hanabi intrusive
Hanzo
Hinata
Jett
Makima
Rangiku - Page
Reyna
Sage
Yor
Yoruichi
killjoy
neon
power
viper
Boa Hancock
Hairy D.va
Mercy
Misc
Nami
Robin
Sombra
Yamato

View File

@ -18,6 +18,13 @@ except ImportError:
print("ERROR: Pillow library not found. Please install it: pip install Pillow")
Image = None
try:
from multipart_downloader import download_file_in_parts
MULTIPART_DOWNLOADER_AVAILABLE = True
except ImportError as e:
print(f"Warning: multipart_downloader.py not found or import error: {e}. Multi-part downloads will be disabled.")
MULTIPART_DOWNLOADER_AVAILABLE = False
def download_file_in_parts(*args, **kwargs): return False, 0, None, None # Dummy function
from io import BytesIO
@ -32,9 +39,16 @@ CHAR_SCOPE_TITLE = "title"
CHAR_SCOPE_FILES = "files"
CHAR_SCOPE_BOTH = "both"
# DUPLICATE_MODE_RENAME is removed. Renaming only happens within a target folder if needed.
DUPLICATE_MODE_DELETE = "delete"
DUPLICATE_MODE_MOVE_TO_SUBFOLDER = "move"
fastapi_app = None
KNOWN_NAMES = []
MIN_SIZE_FOR_MULTIPART_DOWNLOAD = 10 * 1024 * 1024 # 10 MB
MAX_PARTS_FOR_MULTIPART_DOWNLOAD = 8 # Max concurrent connections for a single file
IMAGE_EXTENSIONS = {
'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.tif', '.webp',
'.heic', '.heif', '.svg', '.ico', '.jfif', '.pjpeg', '.pjp', '.avif'
@ -50,20 +64,31 @@ ARCHIVE_EXTENSIONS = {
def is_title_match_for_character(post_title, character_name_filter):
if not post_title or not character_name_filter:
return False
pattern = r"(?i)\b" + re.escape(character_name_filter) + r"\b"
return bool(re.search(pattern, post_title))
safe_filter = str(character_name_filter).strip()
if not safe_filter:
return False
pattern = r"(?i)\b" + re.escape(safe_filter) + r"\b"
match_result = bool(re.search(pattern, post_title))
return match_result
def is_filename_match_for_character(filename, character_name_filter):
if not filename or not character_name_filter:
return False
return character_name_filter.lower() in filename.lower()
safe_filter = str(character_name_filter).strip().lower()
if not safe_filter:
return False
match_result = safe_filter in filename.lower()
return match_result
def clean_folder_name(name):
if not isinstance(name, str): name = str(name)
cleaned = re.sub(r'[^\w\s\-\_\.\(\)]', '', name)
cleaned = cleaned.strip()
cleaned = re.sub(r'\s+', '_', cleaned)
cleaned = re.sub(r'\s+', ' ', cleaned)
return cleaned if cleaned else "untitled_folder"
@ -366,7 +391,7 @@ class PostProcessorSignals(QObject):
progress_signal = pyqtSignal(str)
file_download_status_signal = pyqtSignal(bool)
external_link_signal = pyqtSignal(str, str, str, str)
file_progress_signal = pyqtSignal(str, int, int)
file_progress_signal = pyqtSignal(str, object)
class PostProcessorWorker:
@ -384,12 +409,14 @@ class PostProcessorWorker:
num_file_threads=4, skip_current_file_flag=None,
manga_mode_active=False,
manga_filename_style=STYLE_POST_TITLE,
char_filter_scope=CHAR_SCOPE_FILES
):
char_filter_scope=CHAR_SCOPE_FILES,
remove_from_filename_words_list=None,
allow_multipart_download=True,
duplicate_file_mode=DUPLICATE_MODE_DELETE):
self.post = post_data
self.download_root = download_root
self.known_names = known_names
self.filter_character_list = filter_character_list if filter_character_list else []
self.filter_character_list_objects = filter_character_list if filter_character_list else []
self.unwanted_keywords = unwanted_keywords if unwanted_keywords is not None else set()
self.filter_mode = filter_mode
self.skip_zip = skip_zip
@ -421,7 +448,10 @@ class PostProcessorWorker:
self.manga_mode_active = manga_mode_active
self.manga_filename_style = manga_filename_style
self.char_filter_scope = char_filter_scope
self.remove_from_filename_words_list = remove_from_filename_words_list if remove_from_filename_words_list is not None else []
self.allow_multipart_download = allow_multipart_download
self.duplicate_file_mode = duplicate_file_mode # This will be the effective mode (possibly overridden by main.py for manga)
if self.compress_images and Image is None:
self.logger("⚠️ Image compression disabled: Pillow library not found.")
self.compress_images = False
@ -438,15 +468,19 @@ class PostProcessorWorker:
def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event,
post_title="", file_index_in_post=0, num_files_in_this_post=1):
was_original_name_kept_flag = False
final_filename_saved_for_return = ""
final_filename_saved_for_return = ""
# current_target_folder_path is the actual folder where the file will be saved.
# It starts as the main character/post folder (target_folder_path) by default.
current_target_folder_path = target_folder_path
if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1, "", False
file_url = file_info.get('url')
api_original_filename = file_info.get('_original_name_for_log', file_info.get('name'))
final_filename_saved_for_return = api_original_filename
# This is the ideal name for the file if it were to be saved in the main target_folder_path.
filename_to_save_in_main_path = ""
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_FILES or self.skip_words_scope == SKIP_SCOPE_BOTH):
filename_to_check_for_skip_words = api_original_filename.lower()
@ -458,71 +492,55 @@ class PostProcessorWorker:
original_filename_cleaned_base, original_ext = os.path.splitext(clean_filename(api_original_filename))
if not original_ext.startswith('.'): original_ext = '.' + original_ext if original_ext else ''
filename_to_save = ""
if self.manga_mode_active:
if self.manga_mode_active: # Note: duplicate_file_mode is overridden to "Delete" in main.py if manga_mode is on
if self.manga_filename_style == STYLE_ORIGINAL_NAME:
filename_to_save = clean_filename(api_original_filename)
filename_to_save_in_main_path = clean_filename(api_original_filename)
was_original_name_kept_flag = True
elif self.manga_filename_style == STYLE_POST_TITLE:
if post_title and post_title.strip():
cleaned_post_title_base = clean_filename(post_title.strip())
if num_files_in_this_post > 1:
if file_index_in_post == 0:
filename_to_save = f"{cleaned_post_title_base}{original_ext}"
was_original_name_kept_flag = False
filename_to_save_in_main_path = f"{cleaned_post_title_base}{original_ext}"
else:
filename_to_save = clean_filename(api_original_filename)
filename_to_save_in_main_path = clean_filename(api_original_filename)
was_original_name_kept_flag = True
else:
filename_to_save = f"{cleaned_post_title_base}{original_ext}"
was_original_name_kept_flag = False
filename_to_save_in_main_path = f"{cleaned_post_title_base}{original_ext}"
else:
filename_to_save = clean_filename(api_original_filename)
was_original_name_kept_flag = False
self.logger(f"⚠️ Manga mode (Post Title Style): Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save}'.")
else:
filename_to_save_in_main_path = clean_filename(api_original_filename)
self.logger(f"⚠️ Manga mode (Post Title Style): Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save_in_main_path}'.")
else:
self.logger(f"⚠️ Manga mode: Unknown filename style '{self.manga_filename_style}'. Defaulting to original filename for '{api_original_filename}'.")
filename_to_save = clean_filename(api_original_filename)
was_original_name_kept_flag = False
filename_to_save_in_main_path = clean_filename(api_original_filename)
if filename_to_save:
counter = 1
base_name_coll, ext_coll = os.path.splitext(filename_to_save)
temp_filename_for_collision_check = filename_to_save
while os.path.exists(os.path.join(target_folder_path, temp_filename_for_collision_check)):
if self.manga_filename_style == STYLE_POST_TITLE and file_index_in_post == 0 and num_files_in_this_post > 1:
temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}"
else:
temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}"
counter += 1
if temp_filename_for_collision_check != filename_to_save:
filename_to_save = temp_filename_for_collision_check
else:
filename_to_save = f"manga_file_{original_post_id_for_log}_{file_index_in_post + 1}{original_ext}"
self.logger(f"⚠️ Manga mode: Generated filename was empty. Using generic fallback: '{filename_to_save}'.")
if not filename_to_save_in_main_path:
filename_to_save_in_main_path = f"manga_file_{original_post_id_for_log}_{file_index_in_post + 1}{original_ext}"
self.logger(f"⚠️ Manga mode: Generated filename was empty. Using generic fallback: '{filename_to_save_in_main_path}'.")
was_original_name_kept_flag = False
else:
filename_to_save = clean_filename(api_original_filename)
else:
filename_to_save_in_main_path = clean_filename(api_original_filename)
was_original_name_kept_flag = False
counter = 1
base_name_coll, ext_coll = os.path.splitext(filename_to_save)
temp_filename_for_collision_check = filename_to_save
while os.path.exists(os.path.join(target_folder_path, temp_filename_for_collision_check)):
temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}"
counter += 1
if temp_filename_for_collision_check != filename_to_save:
filename_to_save = temp_filename_for_collision_check
final_filename_for_sets_and_saving = filename_to_save
final_filename_saved_for_return = final_filename_for_sets_and_saving
if not self.download_thumbnails:
if self.remove_from_filename_words_list and filename_to_save_in_main_path:
base_name_for_removal, ext_for_removal = os.path.splitext(filename_to_save_in_main_path)
modified_base_name = base_name_for_removal
for word_to_remove in self.remove_from_filename_words_list:
if not word_to_remove: continue
pattern = re.compile(re.escape(word_to_remove), re.IGNORECASE)
modified_base_name = pattern.sub("", modified_base_name)
modified_base_name = re.sub(r'[_.\s-]+', '_', modified_base_name)
modified_base_name = modified_base_name.strip('_')
if modified_base_name and modified_base_name != ext_for_removal.lstrip('.'):
filename_to_save_in_main_path = modified_base_name + ext_for_removal
else:
filename_to_save_in_main_path = base_name_for_removal + ext_for_removal
if not self.download_thumbnails:
is_img_type = is_image(api_original_filename)
is_vid_type = is_video(api_original_filename)
is_archive_type = is_archive(api_original_filename)
if self.filter_mode == 'archive':
if not is_archive_type:
self.logger(f" -> Filter Skip (Archive Mode): '{api_original_filename}' (Not an Archive).")
@ -543,174 +561,265 @@ class PostProcessorWorker:
self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR).")
return 0, 1, api_original_filename, False
target_folder_basename = os.path.basename(target_folder_path)
current_save_path = os.path.join(target_folder_path, final_filename_for_sets_and_saving)
if not self.manga_mode_active:
# --- Pre-Download Duplicate Handling (Standard Mode Only) ---
is_duplicate_for_main_folder_by_path = os.path.exists(os.path.join(target_folder_path, filename_to_save_in_main_path)) and \
os.path.getsize(os.path.join(target_folder_path, filename_to_save_in_main_path)) > 0
is_duplicate_for_main_folder_by_session_name = False
with self.downloaded_files_lock:
if filename_to_save_in_main_path in self.downloaded_files:
is_duplicate_for_main_folder_by_session_name = True
if os.path.exists(current_save_path) and os.path.getsize(current_save_path) > 0:
self.logger(f" -> Exists (Path): '{final_filename_for_sets_and_saving}' in '{target_folder_basename}'.")
with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving)
return 0, 1, final_filename_for_sets_and_saving, was_original_name_kept_flag
if is_duplicate_for_main_folder_by_path or is_duplicate_for_main_folder_by_session_name:
if self.duplicate_file_mode == DUPLICATE_MODE_DELETE:
reason = "Path Exists" if is_duplicate_for_main_folder_by_path else "Session Name"
self.logger(f" -> Delete Duplicate ({reason}): '{filename_to_save_in_main_path}'. Skipping download.")
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER:
reason = "Path Exists" if is_duplicate_for_main_folder_by_path else "Session Name"
self.logger(f" -> Pre-DL Move ({reason}): '{filename_to_save_in_main_path}'. Will target 'Duplicate' subfolder.")
current_target_folder_path = os.path.join(target_folder_path, "Duplicate")
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
with self.downloaded_files_lock:
if final_filename_for_sets_and_saving in self.downloaded_files:
self.logger(f" -> Global Skip (Filename): '{final_filename_for_sets_and_saving}' already recorded this session.")
return 0, 1, final_filename_for_sets_and_saving, was_original_name_kept_flag
try:
os.makedirs(current_target_folder_path, exist_ok=True)
except OSError as e:
self.logger(f" ❌ Critical error creating directory '{current_target_folder_path}': {e}. Skipping file '{api_original_filename}'.")
return 0, 1, api_original_filename, False
# If mode is MOVE (and not manga mode), and current_target_folder_path is now "Duplicate",
# check if the file *already* exists by its base name in this "Duplicate" folder. (Standard Mode Only)
if not self.manga_mode_active and \
self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER and \
"Duplicate" in current_target_folder_path.split(os.sep) and \
os.path.exists(os.path.join(current_target_folder_path, filename_to_save_in_main_path)):
self.logger(f" -> File '{filename_to_save_in_main_path}' already exists in '{os.path.basename(current_target_folder_path)}' subfolder. Skipping download.")
# The name was already added to downloaded_files if it was a pre-DL move.
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
# --- Download Attempt ---
max_retries = 3
retry_delay = 5
downloaded_size_bytes = 0
calculated_file_hash = None
file_content_bytes = None
total_size_bytes = 0
download_successful_flag = False
for attempt_num in range(max_retries + 1):
if self.check_cancel() or (skip_event and skip_event.is_set()):
break
file_content_bytes = None
total_size_bytes = 0
download_successful_flag = False
for attempt_num_single_stream in range(max_retries + 1):
if self.check_cancel() or (skip_event and skip_event.is_set()): break
try:
if attempt_num > 0:
self.logger(f" Retrying '{api_original_filename}' (Attempt {attempt_num}/{max_retries})...")
time.sleep(retry_delay * (2**(attempt_num - 1)))
if attempt_num_single_stream > 0:
self.logger(f" Retrying download for '{api_original_filename}' (Overall Attempt {attempt_num_single_stream + 1}/{max_retries + 1})...")
time.sleep(retry_delay * (2**(attempt_num_single_stream - 1)))
if self.signals and hasattr(self.signals, 'file_download_status_signal'):
self.signals.file_download_status_signal.emit(True)
response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True)
response.raise_for_status()
total_size_bytes = int(response.headers.get('Content-Length', 0))
current_total_size_bytes_from_headers = int(response.headers.get('Content-Length', 0))
num_parts_for_file = min(self.num_file_threads, MAX_PARTS_FOR_MULTIPART_DOWNLOAD)
attempt_multipart = (self.allow_multipart_download and MULTIPART_DOWNLOADER_AVAILABLE and
num_parts_for_file > 1 and total_size_bytes > MIN_SIZE_FOR_MULTIPART_DOWNLOAD and
'bytes' in response.headers.get('Accept-Ranges', '').lower())
if attempt_num == 0:
total_size_bytes = current_total_size_bytes_from_headers
size_str = f"{total_size_bytes / (1024 * 1024):.2f} MB" if total_size_bytes > 0 else "unknown size"
self.logger(f"⬇️ Downloading: '{api_original_filename}' (Size: {size_str}) [Saving as: '{final_filename_for_sets_and_saving}']")
current_attempt_total_size = total_size_bytes
if attempt_multipart:
response.close()
if self.signals and hasattr(self.signals, 'file_download_status_signal'):
self.signals.file_download_status_signal.emit(False)
mp_save_path_base = os.path.join(current_target_folder_path, filename_to_save_in_main_path)
mp_success, mp_bytes, mp_hash, mp_file_handle = download_file_in_parts(
file_url, mp_save_path_base, total_size_bytes, num_parts_for_file, headers,
api_original_filename, self.signals, self.cancellation_event, skip_event, self.logger
)
if mp_success:
download_successful_flag = True
downloaded_size_bytes = mp_bytes
calculated_file_hash = mp_hash
file_content_bytes = mp_file_handle
break
else:
if attempt_num_single_stream < max_retries:
self.logger(f" Multi-part download attempt failed for '{api_original_filename}'. Retrying with single stream.")
else:
download_successful_flag = False; break
self.logger(f"⬇️ Downloading (Single Stream): '{api_original_filename}' (Size: {total_size_bytes / (1024*1024):.2f} MB if known) [Base Name: '{filename_to_save_in_main_path}']")
file_content_buffer = BytesIO()
current_attempt_downloaded_bytes = 0
md5_hasher = hashlib.md5()
last_progress_time = time.time()
for chunk in response.iter_content(chunk_size=1 * 1024 * 1024):
if self.check_cancel() or (skip_event and skip_event.is_set()):
break
if self.check_cancel() or (skip_event and skip_event.is_set()): break
if chunk:
file_content_buffer.write(chunk)
md5_hasher.update(chunk)
file_content_buffer.write(chunk); md5_hasher.update(chunk)
current_attempt_downloaded_bytes += len(chunk)
if time.time() - last_progress_time > 1 and current_attempt_total_size > 0 and \
if time.time() - last_progress_time > 1 and total_size_bytes > 0 and \
self.signals and hasattr(self.signals, 'file_progress_signal'):
self.signals.file_progress_signal.emit(
api_original_filename,
current_attempt_downloaded_bytes,
current_attempt_total_size
)
self.signals.file_progress_signal.emit(api_original_filename, (current_attempt_downloaded_bytes, total_size_bytes))
last_progress_time = time.time()
if self.check_cancel() or (skip_event and skip_event.is_set()):
if file_content_buffer: file_content_buffer.close()
break
if file_content_buffer: file_content_buffer.close(); break
if current_attempt_downloaded_bytes > 0 or (current_attempt_total_size == 0 and response.status_code == 200):
if current_attempt_downloaded_bytes > 0 or (total_size_bytes == 0 and response.status_code == 200):
calculated_file_hash = md5_hasher.hexdigest()
downloaded_size_bytes = current_attempt_downloaded_bytes
if file_content_bytes: file_content_bytes.close()
file_content_bytes = file_content_buffer
file_content_bytes.seek(0)
download_successful_flag = True
break
else:
if file_content_bytes: file_content_bytes.close()
file_content_bytes = file_content_buffer; file_content_bytes.seek(0)
download_successful_flag = True; break
else:
if file_content_buffer: file_content_buffer.close()
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e:
self.logger(f" ❌ Download Error (Retryable): {api_original_filename}. Error: {e}")
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close()
except requests.exceptions.RequestException as e:
except requests.exceptions.RequestException as e:
self.logger(f" ❌ Download Error (Non-Retryable): {api_original_filename}. Error: {e}")
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close()
break
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close(); break
except Exception as e:
self.logger(f" ❌ Unexpected Download Error: {api_original_filename}: {e}\n{traceback.format_exc(limit=2)}")
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close()
break
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close(); break
finally:
if self.signals and hasattr(self.signals, 'file_download_status_signal'):
self.signals.file_download_status_signal.emit(False)
if self.signals and hasattr(self.signals, 'file_progress_signal'):
final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes
self.signals.file_progress_signal.emit(api_original_filename, downloaded_size_bytes, final_total_for_progress)
self.signals.file_progress_signal.emit(api_original_filename, (downloaded_size_bytes, final_total_for_progress))
if self.check_cancel() or (skip_event and skip_event.is_set()):
self.logger(f" ⚠️ Download interrupted for {api_original_filename}.")
self.logger(f" ⚠️ Download process interrupted for {api_original_filename}.")
if file_content_bytes: file_content_bytes.close()
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
if not download_successful_flag:
self.logger(f"❌ Download failed for '{api_original_filename}' after {max_retries + 1} attempts.")
if file_content_bytes: file_content_bytes.close()
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
with self.downloaded_file_hashes_lock:
if calculated_file_hash in self.downloaded_file_hashes:
self.logger(f" -> Content Skip (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...) already downloaded this session.")
with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving)
if file_content_bytes: file_content_bytes.close()
return 0, 1, final_filename_for_sets_and_saving, was_original_name_kept_flag
if not self.manga_mode_active:
# --- Post-Download Hash Check (Standard Mode Only) ---
with self.downloaded_file_hashes_lock:
if calculated_file_hash in self.downloaded_file_hashes:
if self.duplicate_file_mode == DUPLICATE_MODE_DELETE:
self.logger(f" -> Delete Duplicate (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...). Skipping save.")
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
if file_content_bytes: file_content_bytes.close()
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER:
self.logger(f" -> Post-DL Move (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...). Content already downloaded.")
if "Duplicate" not in current_target_folder_path.split(os.sep):
current_target_folder_path = os.path.join(target_folder_path, "Duplicate")
self.logger(f" Redirecting to 'Duplicate' subfolder: '{current_target_folder_path}'")
# Ensure "Duplicate" folder exists if this is a new redirection due to hash
try: os.makedirs(current_target_folder_path, exist_ok=True)
except OSError as e_mkdir_hash: self.logger(f" Error creating Duplicate folder for hash collision: {e_mkdir_hash}")
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
# --- Final Filename Determination for Saving ---
filename_for_actual_save = filename_to_save_in_main_path
bytes_to_write = file_content_bytes
final_filename_after_processing = final_filename_for_sets_and_saving
current_save_path_final = current_save_path
# If mode is MOVE (and not manga mode) and the file is destined for the main folder,
# but a file with that name *now* exists (e.g. race condition, or different file with same name not caught by hash),
# reroute it to the "Duplicate" folder.
if not self.manga_mode_active and \
self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER and \
current_target_folder_path == target_folder_path and \
os.path.exists(os.path.join(current_target_folder_path, filename_for_actual_save)):
self.logger(f" -> Post-DL Move (Late Name Collision in Main): '{filename_for_actual_save}'. Moving to 'Duplicate'.")
current_target_folder_path = os.path.join(target_folder_path, "Duplicate")
try: # Ensure "Duplicate" folder exists if this is a new redirection
os.makedirs(current_target_folder_path, exist_ok=True)
except OSError as e_mkdir: self.logger(f" Error creating Duplicate folder during late move: {e_mkdir}")
# The name filename_to_save_in_main_path was already added to downloaded_files if it was a pre-DL name collision.
# If it was a hash collision that got rerouted, it was also added.
# If this is a new reroute due to late name collision, ensure it's marked.
# Apply numeric suffix renaming (_1, _2) *only if needed within the current_target_folder_path*
# This means:
# - If current_target_folder_path is the main folder (and not MOVE mode, or MOVE mode but file was unique):
# Renaming happens if a file with filename_for_actual_save exists there.
# - If current_target_folder_path is "Duplicate" (because of MOVE mode):
# Renaming happens if filename_for_actual_save exists *within "Duplicate"*.
counter = 1
base_name_final_coll, ext_final_coll = os.path.splitext(filename_for_actual_save)
temp_filename_final_check = filename_for_actual_save
while os.path.exists(os.path.join(current_target_folder_path, temp_filename_final_check)):
temp_filename_final_check = f"{base_name_final_coll}_{counter}{ext_final_coll}"
counter += 1
if temp_filename_final_check != filename_for_actual_save:
self.logger(f" Final rename for target folder '{os.path.basename(current_target_folder_path)}': '{temp_filename_final_check}' (was '{filename_for_actual_save}')")
filename_for_actual_save = temp_filename_final_check
bytes_to_write = file_content_bytes
final_filename_after_processing = filename_for_actual_save
current_save_path_final = os.path.join(current_target_folder_path, final_filename_after_processing)
is_img_for_compress_check = is_image(api_original_filename)
if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024):
self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024*1024):.2f} MB)...")
try:
bytes_to_write.seek(0)
with Image.open(bytes_to_write) as img_obj:
bytes_to_write.seek(0)
with Image.open(bytes_to_write) as img_obj:
if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA')
elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB')
compressed_bytes_io = BytesIO()
img_obj.save(compressed_bytes_io, format='WebP', quality=80, method=4)
compressed_size = compressed_bytes_io.getbuffer().nbytes
if compressed_size < downloaded_size_bytes * 0.9:
if compressed_size < downloaded_size_bytes * 0.9:
self.logger(f" Compression success: {compressed_size / (1024*1024):.2f} MB.")
bytes_to_write.close()
bytes_to_write = compressed_bytes_io
bytes_to_write.seek(0)
base_name_orig, _ = os.path.splitext(final_filename_for_sets_and_saving)
if hasattr(bytes_to_write, 'close'): bytes_to_write.close()
original_part_file_path = os.path.join(current_target_folder_path, filename_to_save_in_main_path) + ".part" # Use original base for .part
if os.path.exists(original_part_file_path):
os.remove(original_part_file_path)
bytes_to_write = compressed_bytes_io; bytes_to_write.seek(0)
base_name_orig, _ = os.path.splitext(filename_for_actual_save)
final_filename_after_processing = base_name_orig + '.webp'
current_save_path_final = os.path.join(target_folder_path, final_filename_after_processing)
current_save_path_final = os.path.join(current_target_folder_path, final_filename_after_processing)
self.logger(f" Updated filename (compressed): {final_filename_after_processing}")
else:
self.logger(f" Compression skipped: WebP not significantly smaller."); bytes_to_write.seek(0)
except Exception as comp_e:
self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); bytes_to_write.seek(0)
final_filename_saved_for_return = final_filename_after_processing
if final_filename_after_processing != final_filename_for_sets_and_saving and \
if final_filename_after_processing != filename_for_actual_save and \
os.path.exists(current_save_path_final) and os.path.getsize(current_save_path_final) > 0:
self.logger(f" -> Exists (Path - Post-Compress): '{final_filename_after_processing}' in '{target_folder_basename}'.")
with self.downloaded_files_lock: self.downloaded_files.add(final_filename_after_processing)
bytes_to_write.close()
self.logger(f" -> Exists (Path - Post-Compress): '{final_filename_after_processing}' in '{os.path.basename(current_target_folder_path)}'.")
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
if bytes_to_write and hasattr(bytes_to_write, 'close'): bytes_to_write.close()
return 0, 1, final_filename_after_processing, was_original_name_kept_flag
try:
os.makedirs(os.path.dirname(current_save_path_final), exist_ok=True)
with open(current_save_path_final, 'wb') as f_out:
f_out.write(bytes_to_write.getvalue())
os.makedirs(current_target_folder_path, exist_ok=True)
if isinstance(bytes_to_write, BytesIO):
with open(current_save_path_final, 'wb') as f_out:
f_out.write(bytes_to_write.getvalue())
else:
if hasattr(bytes_to_write, 'close'): bytes_to_write.close()
source_part_file = os.path.join(current_target_folder_path, filename_to_save_in_main_path) + ".part" # Use original base for .part
os.rename(source_part_file, current_save_path_final)
with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash)
with self.downloaded_files_lock: self.downloaded_files.add(final_filename_after_processing)
self.logger(f"✅ Saved: '{final_filename_after_processing}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{target_folder_basename}'")
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path)
final_filename_saved_for_return = final_filename_after_processing
self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{os.path.basename(current_target_folder_path)}'")
time.sleep(0.05)
return 1, 0, final_filename_after_processing, was_original_name_kept_flag
return 1, 0, final_filename_saved_for_return, was_original_name_kept_flag
except Exception as save_err:
self.logger(f"❌ Save Fail for '{final_filename_after_processing}': {save_err}")
if os.path.exists(current_save_path_final):
@ -718,7 +827,8 @@ class PostProcessorWorker:
except OSError: self.logger(f" -> Failed to remove partially saved file: {current_save_path_final}")
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag
finally:
if bytes_to_write: bytes_to_write.close()
if bytes_to_write and hasattr(bytes_to_write, 'close'):
bytes_to_write.close()
def process(self):
@ -749,16 +859,32 @@ class PostProcessorWorker:
post_is_candidate_by_title_char_match = False
char_filter_that_matched_title = None
if self.filter_character_list and \
if self.filter_character_list_objects and \
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH):
for char_name in self.filter_character_list:
if is_title_match_for_character(post_title, char_name):
post_is_candidate_by_title_char_match = True
char_filter_that_matched_title = char_name
self.logger(f" Post title matches char filter '{char_name}' (Scope: {self.char_filter_scope}). Post is candidate.")
break
self.logger(f" [Debug Title Match] Checking post title '{post_title}' against {len(self.filter_character_list_objects)} filter objects. Scope: {self.char_filter_scope}")
for idx, filter_item_obj in enumerate(self.filter_character_list_objects):
self.logger(f" [Debug Title Match] Filter obj #{idx}: {filter_item_obj}")
terms_to_check_for_title = list(filter_item_obj["aliases"])
if filter_item_obj["is_group"]:
if filter_item_obj["name"] not in terms_to_check_for_title:
terms_to_check_for_title.append(filter_item_obj["name"])
unique_terms_for_title_check = list(set(terms_to_check_for_title))
self.logger(f" [Debug Title Match] Unique terms for this filter obj: {unique_terms_for_title_check}")
for term_to_match in unique_terms_for_title_check:
self.logger(f" [Debug Title Match] Checking term: '{term_to_match}'")
match_found_for_term = is_title_match_for_character(post_title, term_to_match)
self.logger(f" [Debug Title Match] Result for '{term_to_match}': {match_found_for_term}")
if match_found_for_term:
post_is_candidate_by_title_char_match = True
char_filter_that_matched_title = filter_item_obj
self.logger(f" Post title matches char filter term '{term_to_match}' (from group/name '{filter_item_obj['name']}', Scope: {self.char_filter_scope}). Post is candidate.")
break
if post_is_candidate_by_title_char_match: break
self.logger(f" [Debug Title Match] Final post_is_candidate_by_title_char_match: {post_is_candidate_by_title_char_match}")
if self.filter_character_list and self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match:
if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match:
self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.")
return 0, num_potential_files_in_post, []
@ -769,7 +895,7 @@ class PostProcessorWorker:
self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'. Scope: {self.skip_words_scope}")
return 0, num_potential_files_in_post, []
if not self.extract_links_only and self.manga_mode_active and self.filter_character_list and \
if not self.extract_links_only and self.manga_mode_active and self.filter_character_list_objects and \
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and \
not post_is_candidate_by_title_char_match:
self.logger(f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title[:50]}' doesn't match filters.")
@ -782,8 +908,8 @@ class PostProcessorWorker:
base_folder_names_for_post_content = []
if not self.extract_links_only and self.use_subfolders:
if post_is_candidate_by_title_char_match and char_filter_that_matched_title:
base_folder_names_for_post_content = [clean_folder_name(char_filter_that_matched_title)]
else:
base_folder_names_for_post_content = [clean_folder_name(char_filter_that_matched_title["name"])]
elif not self.filter_character_list_objects:
derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords)
if derived_folders:
base_folder_names_for_post_content.extend(derived_folders)
@ -791,7 +917,10 @@ class PostProcessorWorker:
base_folder_names_for_post_content.append(extract_folder_name_from_title(post_title, self.unwanted_keywords))
if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]:
base_folder_names_for_post_content = [clean_folder_name(post_title if post_title else "untitled_creator_content")]
self.logger(f" Base folder name(s) for post content (if title matched char or generic): {', '.join(base_folder_names_for_post_content)}")
if base_folder_names_for_post_content:
log_reason = "Matched char filter" if (post_is_candidate_by_title_char_match and char_filter_that_matched_title) else "Generic title parsing (no char filters)"
self.logger(f" Base folder name(s) for post content ({log_reason}): {', '.join(base_folder_names_for_post_content)}")
if not self.extract_links_only and self.use_subfolders and self.skip_words_list:
for folder_name_to_check in base_folder_names_for_post_content:
@ -907,28 +1036,49 @@ class PostProcessorWorker:
current_api_original_filename = file_info_to_dl.get('_original_name_for_log')
file_is_candidate_by_char_filter_scope = False
char_filter_that_matched_file = None
char_filter_info_that_matched_file = None
if not self.filter_character_list:
if not self.filter_character_list_objects:
file_is_candidate_by_char_filter_scope = True
elif self.char_filter_scope == CHAR_SCOPE_FILES:
for char_name in self.filter_character_list:
if is_filename_match_for_character(current_api_original_filename, char_name):
else:
if self.char_filter_scope == CHAR_SCOPE_FILES:
for filter_item_obj in self.filter_character_list_objects:
terms_to_check_for_file = list(filter_item_obj["aliases"])
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_for_file:
terms_to_check_for_file.append(filter_item_obj["name"])
unique_terms_for_file_check = list(set(terms_to_check_for_file))
for term_to_match in unique_terms_for_file_check:
if is_filename_match_for_character(current_api_original_filename, term_to_match):
file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = filter_item_obj
self.logger(f" File '{current_api_original_filename}' matches char filter term '{term_to_match}' (from '{filter_item_obj['name']}'). Scope: Files.")
break
if file_is_candidate_by_char_filter_scope: break
elif self.char_filter_scope == CHAR_SCOPE_TITLE:
if post_is_candidate_by_title_char_match:
file_is_candidate_by_char_filter_scope = True
char_filter_that_matched_file = char_name
break
elif self.char_filter_scope == CHAR_SCOPE_TITLE:
if post_is_candidate_by_title_char_match:
file_is_candidate_by_char_filter_scope = True
elif self.char_filter_scope == CHAR_SCOPE_BOTH:
if post_is_candidate_by_title_char_match:
file_is_candidate_by_char_filter_scope = True
else:
for char_name in self.filter_character_list:
if is_filename_match_for_character(current_api_original_filename, char_name):
file_is_candidate_by_char_filter_scope = True
char_filter_that_matched_file = char_name
break
char_filter_info_that_matched_file = char_filter_that_matched_title
self.logger(f" File '{current_api_original_filename}' is candidate because post title matched. Scope: Title.")
elif self.char_filter_scope == CHAR_SCOPE_BOTH:
if post_is_candidate_by_title_char_match:
file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = char_filter_that_matched_title
self.logger(f" File '{current_api_original_filename}' is candidate because post title matched. Scope: Both (Title part).")
else:
for filter_item_obj in self.filter_character_list_objects:
terms_to_check_for_file_both = list(filter_item_obj["aliases"])
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_for_file_both:
terms_to_check_for_file_both.append(filter_item_obj["name"])
unique_terms_for_file_both_check = list(set(terms_to_check_for_file_both))
for term_to_match in unique_terms_for_file_both_check:
if is_filename_match_for_character(current_api_original_filename, term_to_match):
file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = filter_item_obj
self.logger(f" File '{current_api_original_filename}' matches char filter term '{term_to_match}' (from '{filter_item_obj['name']}'). Scope: Both (File part).")
break
if file_is_candidate_by_char_filter_scope: break
if not file_is_candidate_by_char_filter_scope:
self.logger(f" -> Skip File (Char Filter Scope '{self.char_filter_scope}'): '{current_api_original_filename}' no match.")
@ -941,10 +1091,10 @@ class PostProcessorWorker:
char_title_subfolder_name = None
if self.target_post_id_from_initial_url and self.custom_folder_name:
char_title_subfolder_name = self.custom_folder_name
elif char_filter_that_matched_title:
char_title_subfolder_name = clean_folder_name(char_filter_that_matched_title)
elif char_filter_that_matched_file:
char_title_subfolder_name = clean_folder_name(char_filter_that_matched_file)
elif char_filter_info_that_matched_file:
char_title_subfolder_name = clean_folder_name(char_filter_info_that_matched_file["name"])
elif char_filter_that_matched_title:
char_title_subfolder_name = clean_folder_name(char_filter_that_matched_title["name"])
elif base_folder_names_for_post_content:
char_title_subfolder_name = base_folder_names_for_post_content[0]
@ -953,7 +1103,7 @@ class PostProcessorWorker:
if self.use_post_subfolders:
cleaned_title_for_subfolder = clean_folder_name(post_title)
post_specific_subfolder_name = f"{post_id}_{cleaned_title_for_subfolder}" if cleaned_title_for_subfolder else f"{post_id}_untitled"
post_specific_subfolder_name = cleaned_title_for_subfolder # Use only the cleaned title
current_path_for_file = os.path.join(current_path_for_file, post_specific_subfolder_name)
target_folder_path_for_this_file = current_path_for_file
@ -990,7 +1140,7 @@ class PostProcessorWorker:
total_skipped_this_post += 1
if self.signals and hasattr(self.signals, 'file_progress_signal'):
self.signals.file_progress_signal.emit("", 0, 0)
self.signals.file_progress_signal.emit("", None)
if self.check_cancel(): self.logger(f" Post {post_id} processing interrupted/cancelled.");
else: self.logger(f" Post {post_id} Summary: Downloaded={total_downloaded_this_post}, Skipped Files={total_skipped_this_post}")
@ -1004,7 +1154,7 @@ class DownloadThread(QThread):
file_download_status_signal = pyqtSignal(bool)
finished_signal = pyqtSignal(int, int, bool, list)
external_link_signal = pyqtSignal(str, str, str, str)
file_progress_signal = pyqtSignal(str, int, int)
file_progress_signal = pyqtSignal(str, object)
def __init__(self, api_url_input, output_dir, known_names_copy,
@ -1025,8 +1175,10 @@ class DownloadThread(QThread):
manga_mode_active=False,
unwanted_keywords=None,
manga_filename_style=STYLE_POST_TITLE,
char_filter_scope=CHAR_SCOPE_FILES
):
char_filter_scope=CHAR_SCOPE_FILES,
remove_from_filename_words_list=None,
allow_multipart_download=True,
duplicate_file_mode=DUPLICATE_MODE_DELETE): # Default to DELETE
super().__init__()
self.api_url_input = api_url_input
self.output_dir = output_dir
@ -1034,7 +1186,7 @@ class DownloadThread(QThread):
self.cancellation_event = cancellation_event
self.skip_current_file_flag = skip_current_file_flag
self.initial_target_post_id = target_post_id_from_initial_url
self.filter_character_list = filter_character_list if filter_character_list else []
self.filter_character_list_objects = filter_character_list if filter_character_list else []
self.filter_mode = filter_mode
self.skip_zip = skip_zip
self.skip_rar = skip_rar
@ -1065,7 +1217,9 @@ class DownloadThread(QThread):
{'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'}
self.manga_filename_style = manga_filename_style
self.char_filter_scope = char_filter_scope
self.remove_from_filename_words_list = remove_from_filename_words_list
self.allow_multipart_download = allow_multipart_download
self.duplicate_file_mode = duplicate_file_mode
if self.compress_images and Image is None:
self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
self.compress_images = False
@ -1116,7 +1270,7 @@ class DownloadThread(QThread):
post_data=individual_post_data,
download_root=self.output_dir,
known_names=self.known_names,
filter_character_list=self.filter_character_list,
filter_character_list=self.filter_character_list_objects,
unwanted_keywords=self.unwanted_keywords,
filter_mode=self.filter_mode,
skip_zip=self.skip_zip, skip_rar=self.skip_rar,
@ -1140,8 +1294,10 @@ class DownloadThread(QThread):
skip_current_file_flag=self.skip_current_file_flag,
manga_mode_active=self.manga_mode_active,
manga_filename_style=self.manga_filename_style,
char_filter_scope=self.char_filter_scope
)
char_filter_scope=self.char_filter_scope,
remove_from_filename_words_list=self.remove_from_filename_words_list,
allow_multipart_download=self.allow_multipart_download,
duplicate_file_mode=self.duplicate_file_mode)
try:
dl_count, skip_count, kept_originals_this_post = post_processing_worker.process()
grand_total_downloaded_files += dl_count

544
main.py
View File

@ -19,12 +19,12 @@ from PyQt5.QtGui import (
)
from PyQt5.QtWidgets import (
QApplication, QWidget, QLabel, QLineEdit, QTextEdit, QPushButton,
QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget,
QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget, QDesktopWidget,
QRadioButton, QButtonGroup, QCheckBox, QSplitter, QSizePolicy, QDialog,
QFrame,
QAbstractButton
)
from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject, QTimer, QSettings
from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject, QTimer, QSettings, QStandardPaths
from urllib.parse import urlparse
try:
@ -47,6 +47,9 @@ try:
SKIP_SCOPE_FILES,
SKIP_SCOPE_POSTS,
SKIP_SCOPE_BOTH,
CHAR_SCOPE_TITLE, # Added for completeness if used directly
CHAR_SCOPE_FILES, # Added
CHAR_SCOPE_BOTH # Added
)
print("Successfully imported names from downloader_utils.")
except ImportError as e:
@ -62,6 +65,9 @@ except ImportError as e:
SKIP_SCOPE_FILES = "files"
SKIP_SCOPE_POSTS = "posts"
SKIP_SCOPE_BOTH = "both"
CHAR_SCOPE_TITLE = "title"
CHAR_SCOPE_FILES = "files"
CHAR_SCOPE_BOTH = "both"
except Exception as e:
print(f"--- UNEXPECTED IMPORT ERROR ---")
@ -97,11 +103,16 @@ MANGA_FILENAME_STYLE_KEY = "mangaFilenameStyleV1"
STYLE_POST_TITLE = "post_title"
STYLE_ORIGINAL_NAME = "original_name"
SKIP_WORDS_SCOPE_KEY = "skipWordsScopeV1"
ALLOW_MULTIPART_DOWNLOAD_KEY = "allowMultipartDownloadV1"
CHAR_FILTER_SCOPE_KEY = "charFilterScopeV1"
CHAR_SCOPE_TITLE = "title"
CHAR_SCOPE_FILES = "files"
CHAR_SCOPE_BOTH = "both"
# CHAR_SCOPE_TITLE, CHAR_SCOPE_FILES, CHAR_SCOPE_BOTH are already defined or imported
DUPLICATE_FILE_MODE_KEY = "duplicateFileModeV1"
# DUPLICATE_MODE_RENAME is removed. Renaming only happens within a target folder if needed.
DUPLICATE_MODE_DELETE = "delete"
DUPLICATE_MODE_MOVE_TO_SUBFOLDER = "move" # New mode
class DownloaderApp(QWidget):
@ -111,13 +122,35 @@ class DownloaderApp(QWidget):
overall_progress_signal = pyqtSignal(int, int)
finished_signal = pyqtSignal(int, int, bool, list)
external_link_signal = pyqtSignal(str, str, str, str)
file_progress_signal = pyqtSignal(str, int, int)
# Changed to object to handle both (int, int) for single stream and list for multipart
file_progress_signal = pyqtSignal(str, object)
def __init__(self):
super().__init__()
self.settings = QSettings(CONFIG_ORGANIZATION_NAME, CONFIG_APP_NAME_MAIN)
self.config_file = "Known.txt"
# Determine path for Known.txt in user's app data directory
app_config_dir = ""
try:
# Use AppLocalDataLocation for user-specific, non-roaming data
app_data_root = QStandardPaths.writableLocation(QStandardPaths.AppLocalDataLocation)
if not app_data_root: # Fallback if somehow empty
app_data_root = QStandardPaths.writableLocation(QStandardPaths.GenericDataLocation)
if app_data_root and CONFIG_ORGANIZATION_NAME:
app_config_dir = os.path.join(app_data_root, CONFIG_ORGANIZATION_NAME)
elif app_data_root: # If no org name, use a generic app name folder
app_config_dir = os.path.join(app_data_root, "KemonoDownloaderAppData") # Fallback app name
else: # Absolute fallback: current working directory (less ideal for bundled app)
app_config_dir = os.getcwd()
if not os.path.exists(app_config_dir):
os.makedirs(app_config_dir, exist_ok=True)
except Exception as e_path:
print(f"Error setting up app_config_dir: {e_path}. Defaulting to CWD for Known.txt.")
app_config_dir = os.getcwd() # Fallback
self.config_file = os.path.join(app_config_dir, "Known.txt")
self.download_thread = None
self.thread_pool = None
@ -170,12 +203,15 @@ class DownloaderApp(QWidget):
self.manga_filename_style = self.settings.value(MANGA_FILENAME_STYLE_KEY, STYLE_POST_TITLE, type=str)
self.skip_words_scope = self.settings.value(SKIP_WORDS_SCOPE_KEY, SKIP_SCOPE_POSTS, type=str)
self.char_filter_scope = self.settings.value(CHAR_FILTER_SCOPE_KEY, CHAR_SCOPE_TITLE, type=str)
self.allow_multipart_download_setting = self.settings.value(ALLOW_MULTIPART_DOWNLOAD_KEY, False, type=bool) # Default to OFF
self.duplicate_file_mode = self.settings.value(DUPLICATE_FILE_MODE_KEY, DUPLICATE_MODE_DELETE, type=str) # Default to DELETE
print(f" Known.txt will be loaded/saved at: {self.config_file}")
self.load_known_names_from_util()
self.setWindowTitle("Kemono Downloader v3.1.1")
self.setGeometry(150, 150, 1050, 820)
self.setWindowTitle("Kemono Downloader v3.2.0")
# self.setGeometry(150, 150, 1050, 820) # Initial geometry will be set after showing
self.setStyleSheet(self.get_dark_theme())
self.init_ui()
self._connect_signals()
@ -183,10 +219,12 @@ class DownloaderApp(QWidget):
self.log_signal.emit(" Local API server functionality has been removed.")
self.log_signal.emit(" 'Skip Current File' button has been removed.")
if hasattr(self, 'character_input'):
self.character_input.setToolTip("Enter one or more character names, separated by commas (e.g., yor, makima)")
self.character_input.setToolTip("Names, comma-separated. Group aliases: (alias1, alias2) for combined folder name 'alias1 alias2'. E.g., yor, (Boa, Hancock)")
self.log_signal.emit(f" Manga filename style loaded: '{self.manga_filename_style}'")
self.log_signal.emit(f" Skip words scope loaded: '{self.skip_words_scope}'")
self.log_signal.emit(f" Character filter scope loaded: '{self.char_filter_scope}'")
self.log_signal.emit(f" Multi-part download preference loaded: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'}")
self.log_signal.emit(f" Duplicate file handling mode loaded: '{self.duplicate_file_mode.capitalize()}'")
def _connect_signals(self):
@ -234,6 +272,9 @@ class DownloaderApp(QWidget):
if self.char_filter_scope_toggle_button:
self.char_filter_scope_toggle_button.clicked.connect(self._cycle_char_filter_scope)
if hasattr(self, 'multipart_toggle_button'): self.multipart_toggle_button.clicked.connect(self._toggle_multipart_mode)
if hasattr(self, 'duplicate_mode_toggle_button'): self.duplicate_mode_toggle_button.clicked.connect(self._cycle_duplicate_mode)
def load_known_names_from_util(self):
@ -278,6 +319,8 @@ class DownloaderApp(QWidget):
self.settings.setValue(MANGA_FILENAME_STYLE_KEY, self.manga_filename_style)
self.settings.setValue(SKIP_WORDS_SCOPE_KEY, self.skip_words_scope)
self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope)
self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting)
self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode) # Save current mode
self.settings.sync()
should_exit = True
@ -289,17 +332,26 @@ class DownloaderApp(QWidget):
QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
if reply == QMessageBox.Yes:
self.log_signal.emit("⚠️ Cancelling active download due to application exit...")
self.cancel_download()
self.log_signal.emit(" Waiting briefly for threads to acknowledge cancellation...")
# Direct cancellation for exit - different from button cancel
self.cancellation_event.set()
if self.download_thread and self.download_thread.isRunning():
self.download_thread.requestInterruption()
self.log_signal.emit(" Signaled single download thread to interrupt.")
# For thread pool, we want to wait on exit.
if self.download_thread and self.download_thread.isRunning():
self.log_signal.emit(" Waiting for single download thread to finish...")
self.download_thread.wait(3000)
if self.download_thread.isRunning():
self.log_signal.emit(" ⚠️ Single download thread did not terminate gracefully.")
if self.thread_pool:
self.log_signal.emit(" Shutting down thread pool (waiting for completion)...")
self.thread_pool.shutdown(wait=True, cancel_futures=True)
self.log_signal.emit(" Thread pool shutdown complete.")
self.thread_pool = None
self.log_signal.emit(" Cancellation for exit complete.")
else:
should_exit = False
self.log_signal.emit(" Application exit cancelled.")
@ -381,7 +433,7 @@ class DownloaderApp(QWidget):
char_input_and_button_layout.setSpacing(10)
self.character_input = QLineEdit()
self.character_input.setPlaceholderText("e.g., yor, Tifa, Reyna")
self.character_input.setPlaceholderText("e.g., yor, Tifa, (Reyna, Sage)")
char_input_and_button_layout.addWidget(self.character_input, 3)
self.char_filter_scope_toggle_button = QPushButton()
@ -411,20 +463,51 @@ class DownloaderApp(QWidget):
left_layout.addWidget(self.filters_and_custom_folder_container_widget)
left_layout.addWidget(QLabel("🚫 Skip with Words (comma-separated):"))
# --- Word Manipulation Section (Skip Words & Remove from Filename) ---
word_manipulation_container_widget = QWidget()
word_manipulation_outer_layout = QHBoxLayout(word_manipulation_container_widget)
word_manipulation_outer_layout.setContentsMargins(0,0,0,0) # No margins for the outer container
word_manipulation_outer_layout.setSpacing(15) # Spacing between the two vertical groups
# Group 1: Skip Words (Left, ~70% space)
skip_words_widget = QWidget()
skip_words_vertical_layout = QVBoxLayout(skip_words_widget)
skip_words_vertical_layout.setContentsMargins(0,0,0,0) # No margins for the inner group
skip_words_vertical_layout.setSpacing(2) # Small spacing between label and input row
skip_words_label = QLabel("🚫 Skip with Words (comma-separated):")
skip_words_vertical_layout.addWidget(skip_words_label)
skip_input_and_button_layout = QHBoxLayout()
skip_input_and_button_layout = QHBoxLayout()
skip_input_and_button_layout.setContentsMargins(0, 0, 0, 0)
skip_input_and_button_layout.setSpacing(10)
self.skip_words_input = QLineEdit()
self.skip_words_input.setPlaceholderText("e.g., WM, WIP, sketch, preview")
skip_input_and_button_layout.addWidget(self.skip_words_input, 3)
skip_input_and_button_layout.addWidget(self.skip_words_input, 1) # Input field takes available space
self.skip_scope_toggle_button = QPushButton()
self._update_skip_scope_button_text()
self.skip_scope_toggle_button.setToolTip("Click to cycle skip scope (Files -> Posts -> Both)")
self.skip_scope_toggle_button.setStyleSheet("padding: 6px 10px;")
self.skip_scope_toggle_button.setMinimumWidth(100)
skip_input_and_button_layout.addWidget(self.skip_scope_toggle_button, 1)
left_layout.addLayout(skip_input_and_button_layout)
skip_input_and_button_layout.addWidget(self.skip_scope_toggle_button, 0) # Button takes its minimum
skip_words_vertical_layout.addLayout(skip_input_and_button_layout)
word_manipulation_outer_layout.addWidget(skip_words_widget, 7) # 70% stretch for left group
# Group 2: Remove Words from name (Right, ~30% space)
remove_words_widget = QWidget()
remove_words_vertical_layout = QVBoxLayout(remove_words_widget)
remove_words_vertical_layout.setContentsMargins(0,0,0,0) # No margins for the inner group
remove_words_vertical_layout.setSpacing(2)
self.remove_from_filename_label = QLabel("✂️ Remove Words from name:")
remove_words_vertical_layout.addWidget(self.remove_from_filename_label)
self.remove_from_filename_input = QLineEdit()
self.remove_from_filename_input.setPlaceholderText("e.g., patreon, HD") # Placeholder for the new field
remove_words_vertical_layout.addWidget(self.remove_from_filename_input)
word_manipulation_outer_layout.addWidget(remove_words_widget, 3) # 30% stretch for right group
left_layout.addWidget(word_manipulation_container_widget)
# --- End Word Manipulation Section ---
file_filter_layout = QVBoxLayout()
@ -527,7 +610,8 @@ class DownloaderApp(QWidget):
self.manga_mode_checkbox = QCheckBox("Manga/Comic Mode")
self.manga_mode_checkbox.setToolTip("Downloads posts from oldest to newest and renames files based on post title (for creator feeds only).")
self.manga_mode_checkbox.setChecked(False)
advanced_row2_layout.addWidget(self.manga_mode_checkbox)
advanced_row2_layout.addWidget(self.manga_mode_checkbox) # Keep manga mode checkbox here
advanced_row2_layout.addStretch(1)
checkboxes_group_layout.addLayout(advanced_row2_layout)
left_layout.addLayout(checkboxes_group_layout)
@ -538,9 +622,9 @@ class DownloaderApp(QWidget):
self.download_btn = QPushButton("⬇️ Start Download")
self.download_btn.setStyleSheet("padding: 8px 15px; font-weight: bold;")
self.download_btn.clicked.connect(self.start_download)
self.cancel_btn = QPushButton("❌ Cancel")
self.cancel_btn = QPushButton("❌ Cancel & Reset UI") # Updated button text for clarity
self.cancel_btn.setEnabled(False)
self.cancel_btn.clicked.connect(self.cancel_download)
self.cancel_btn.clicked.connect(self.cancel_download_button_action) # Changed connection
btn_layout.addWidget(self.download_btn)
btn_layout.addWidget(self.cancel_btn)
left_layout.addLayout(btn_layout)
@ -598,6 +682,20 @@ class DownloaderApp(QWidget):
self._update_manga_filename_style_button_text()
log_title_layout.addWidget(self.manga_rename_toggle_button)
self.multipart_toggle_button = QPushButton() # Create the button
self.multipart_toggle_button.setToolTip("Toggle between Multi-part and Single-stream downloads for large files.")
self.multipart_toggle_button.setFixedWidth(130) # Adjust width as needed
self.multipart_toggle_button.setStyleSheet("padding: 4px 8px;") # Added padding
self._update_multipart_toggle_button_text() # Set initial text
log_title_layout.addWidget(self.multipart_toggle_button) # Add to layout
self.duplicate_mode_toggle_button = QPushButton()
self.duplicate_mode_toggle_button.setToolTip("Toggle how duplicate filenames are handled (Rename or Delete).")
self.duplicate_mode_toggle_button.setFixedWidth(150) # Adjust width
self.duplicate_mode_toggle_button.setStyleSheet("padding: 4px 8px;") # Added padding
self._update_duplicate_mode_button_text() # Set initial text
log_title_layout.addWidget(self.duplicate_mode_toggle_button)
self.log_verbosity_button = QPushButton("Show Basic Log")
self.log_verbosity_button.setToolTip("Toggle between full and basic log details.")
self.log_verbosity_button.setFixedWidth(110)
@ -676,6 +774,17 @@ class DownloaderApp(QWidget):
self._update_manga_filename_style_button_text()
self._update_skip_scope_button_text()
self._update_char_filter_scope_button_text()
self._update_duplicate_mode_button_text()
def _center_on_screen(self):
"""Centers the widget on the screen."""
try:
screen_geometry = QDesktopWidget().screenGeometry()
widget_geometry = self.frameGeometry()
widget_geometry.moveCenter(screen_geometry.center())
self.move(widget_geometry.topLeft())
except Exception as e:
self.log_signal.emit(f"⚠️ Error centering window: {e}")
def get_dark_theme(self):
@ -826,30 +935,57 @@ class DownloaderApp(QWidget):
print(f"GUI External Log Error (Append): {e}\nOriginal Message: {formatted_link_text}")
def update_file_progress_display(self, filename, downloaded_bytes, total_bytes):
if not filename and total_bytes == 0 and downloaded_bytes == 0:
def update_file_progress_display(self, filename, progress_info):
if not filename and progress_info is None: # Explicit clear
self.file_progress_label.setText("")
return
max_filename_len = 25
display_filename = filename
if len(filename) > max_filename_len:
display_filename = filename[:max_filename_len-3].strip() + "..."
if total_bytes > 0:
downloaded_mb = downloaded_bytes / (1024 * 1024)
total_mb = total_bytes / (1024 * 1024)
progress_text = f"Downloading '{display_filename}' ({downloaded_mb:.1f}MB / {total_mb:.1f}MB)"
else:
downloaded_mb = downloaded_bytes / (1024 * 1024)
progress_text = f"Downloading '{display_filename}' ({downloaded_mb:.1f}MB)"
if isinstance(progress_info, list): # Multi-part progress (list of chunk dicts)
if not progress_info: # Empty list
self.file_progress_label.setText(f"File: {filename} - Initializing parts...")
return
if len(progress_text) > 75:
display_filename = filename[:15].strip() + "..." if len(filename) > 18 else display_filename
if total_bytes > 0: progress_text = f"DL '{display_filename}' ({downloaded_mb:.1f}/{total_mb:.1f}MB)"
else: progress_text = f"DL '{display_filename}' ({downloaded_mb:.1f}MB)"
total_downloaded_overall = sum(cs.get('downloaded', 0) for cs in progress_info)
# total_file_size_overall should ideally be from progress_data['total_file_size']
# For now, we sum chunk totals. This assumes all chunks are for the same file.
total_file_size_overall = sum(cs.get('total', 0) for cs in progress_info)
active_chunks_count = 0
combined_speed_bps = 0
for cs in progress_info:
if cs.get('active', False):
active_chunks_count += 1
combined_speed_bps += cs.get('speed_bps', 0)
self.file_progress_label.setText(progress_text)
dl_mb = total_downloaded_overall / (1024 * 1024)
total_mb = total_file_size_overall / (1024 * 1024)
speed_MBps = (combined_speed_bps / 8) / (1024 * 1024)
progress_text = f"DL '{filename[:20]}...': {dl_mb:.1f}/{total_mb:.1f} MB ({active_chunks_count} parts @ {speed_MBps:.2f} MB/s)"
self.file_progress_label.setText(progress_text)
elif isinstance(progress_info, tuple) and len(progress_info) == 2: # Single stream (downloaded_bytes, total_bytes)
downloaded_bytes, total_bytes = progress_info
if not filename and total_bytes == 0 and downloaded_bytes == 0: # Clear if no info
self.file_progress_label.setText("")
return
max_fn_len = 25
disp_fn = filename if len(filename) <= max_fn_len else filename[:max_fn_len-3].strip()+"..."
dl_mb = downloaded_bytes / (1024*1024)
prog_text_base = f"Downloading '{disp_fn}' ({dl_mb:.1f}MB"
if total_bytes > 0:
tot_mb = total_bytes / (1024*1024)
prog_text_base += f" / {tot_mb:.1f}MB)"
else:
prog_text_base += ")"
self.file_progress_label.setText(prog_text_base)
elif filename and progress_info is None: # Explicit request to clear for a specific file (e.g. download finished/failed)
self.file_progress_label.setText("")
elif not filename and not progress_info: # General clear
self.file_progress_label.setText("")
def update_external_links_setting(self, checked):
@ -903,6 +1039,7 @@ class DownloaderApp(QWidget):
if self.use_subfolders_checkbox: self.use_subfolders_checkbox.setEnabled(file_download_mode_active)
if self.skip_words_input: self.skip_words_input.setEnabled(file_download_mode_active)
if self.skip_scope_toggle_button: self.skip_scope_toggle_button.setEnabled(file_download_mode_active)
if hasattr(self, 'remove_from_filename_input'): self.remove_from_filename_input.setEnabled(file_download_mode_active)
if self.skip_zip_checkbox:
can_skip_zip = not is_only_links and not is_only_archives
@ -1302,6 +1439,9 @@ class DownloaderApp(QWidget):
if self.manga_rename_toggle_button:
self.manga_rename_toggle_button.setVisible(manga_mode_effectively_on)
if hasattr(self, 'duplicate_mode_toggle_button'):
self.duplicate_mode_toggle_button.setVisible(not manga_mode_effectively_on) # Hidden in Manga Mode
if manga_mode_effectively_on:
if self.page_range_label: self.page_range_label.setEnabled(False)
if self.start_page_input: self.start_page_input.setEnabled(False); self.start_page_input.clear()
@ -1390,6 +1530,11 @@ class DownloaderApp(QWidget):
raw_skip_words = self.skip_words_input.text().strip()
skip_words_list = [word.strip().lower() for word in raw_skip_words.split(',') if word.strip()]
current_skip_words_scope = self.get_skip_words_scope()
raw_remove_filename_words = self.remove_from_filename_input.text().strip() if hasattr(self, 'remove_from_filename_input') else ""
effective_duplicate_file_mode = self.duplicate_file_mode # Start with user's choice
allow_multipart = self.allow_multipart_download_setting # Use the internal setting
remove_from_filename_words_list = [word.strip() for word in raw_remove_filename_words.split(',') if word.strip()]
current_char_filter_scope = self.get_char_filter_scope()
manga_mode_is_checked = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False
@ -1442,54 +1587,127 @@ class DownloaderApp(QWidget):
elif manga_mode:
start_page, end_page = None, None
# effective_duplicate_file_mode will be self.duplicate_file_mode (UI button's state).
# Manga Mode specific duplicate handling is now managed entirely within downloader_utils.py
self.external_link_queue.clear(); self.extracted_links_cache = []; self._is_processing_external_link_queue = False; self._current_link_post_title = None
self.all_kept_original_filenames = []
raw_character_filters_text = self.character_input.text().strip()
parsed_character_list = [name.strip() for name in raw_character_filters_text.split(',') if name.strip()] if raw_character_filters_text else None
filter_character_list_to_pass = None
# --- New parsing logic for character filters ---
parsed_character_filter_objects = []
if raw_character_filters_text:
raw_parts = []
current_part_buffer = ""
in_group_parsing = False
for char_token in raw_character_filters_text:
if char_token == '(':
in_group_parsing = True
current_part_buffer += char_token
elif char_token == ')':
in_group_parsing = False
current_part_buffer += char_token
elif char_token == ',' and not in_group_parsing:
if current_part_buffer.strip(): raw_parts.append(current_part_buffer.strip())
current_part_buffer = ""
else:
current_part_buffer += char_token
if current_part_buffer.strip(): raw_parts.append(current_part_buffer.strip())
for part_str in raw_parts:
part_str = part_str.strip()
if not part_str: continue
if part_str.startswith("(") and part_str.endswith(")"):
group_content_str = part_str[1:-1].strip()
aliases_in_group = [alias.strip() for alias in group_content_str.split(',') if alias.strip()]
if aliases_in_group:
group_folder_name = " ".join(aliases_in_group)
parsed_character_filter_objects.append({
"name": group_folder_name, # This is the primary/folder name
"is_group": True,
"aliases": aliases_in_group # These are for matching
})
else:
parsed_character_filter_objects.append({
"name": part_str, # Folder name and matching name are the same
"is_group": False,
"aliases": [part_str]
})
# --- End new parsing logic ---
filter_character_list_to_pass = None
needs_folder_naming_validation = (use_subfolders or manga_mode) and not extract_links_only
if parsed_character_list and not extract_links_only :
self.log_signal.emit(f" Validating character filters: {', '.join(parsed_character_list)}")
if parsed_character_filter_objects and not extract_links_only :
self.log_signal.emit(f" Validating character filters: {', '.join(item['name'] + (' (Group: ' + '/'.join(item['aliases']) + ')' if item['is_group'] else '') for item in parsed_character_filter_objects)}")
valid_filters_for_backend = []
user_cancelled_validation = False
for char_name in parsed_character_list:
cleaned_name_test = clean_folder_name(char_name)
for filter_item_obj in parsed_character_filter_objects:
item_primary_name = filter_item_obj["name"]
cleaned_name_test = clean_folder_name(item_primary_name)
if needs_folder_naming_validation and not cleaned_name_test:
QMessageBox.warning(self, "Invalid Filter Name for Folder", f"Filter name '{char_name}' is invalid for a folder and will be skipped for folder naming.")
self.log_signal.emit(f"⚠️ Skipping invalid filter for folder naming: '{char_name}'")
if not needs_folder_naming_validation: valid_filters_for_backend.append(char_name)
QMessageBox.warning(self, "Invalid Filter Name for Folder", f"Filter name '{item_primary_name}' is invalid for a folder and will be skipped for folder naming.")
self.log_signal.emit(f"⚠️ Skipping invalid filter for folder naming: '{item_primary_name}'")
continue
if needs_folder_naming_validation and char_name.lower() not in {kn.lower() for kn in KNOWN_NAMES}:
# --- New: Check if any alias of a group is already known ---
an_alias_is_already_known = False
if filter_item_obj["is_group"] and needs_folder_naming_validation:
for alias in filter_item_obj["aliases"]:
if any(existing_known.lower() == alias.lower() for existing_known in KNOWN_NAMES):
an_alias_is_already_known = True
self.log_signal.emit(f" Alias '{alias}' (from group '{item_primary_name}') is already in Known Names. Group name '{item_primary_name}' will not be added to Known.txt.")
break
# --- End new check ---
if an_alias_is_already_known:
valid_filters_for_backend.append(filter_item_obj)
continue
# Determine if we should prompt to add the name to the Known.txt list.
# Prompt if:
# - Folder naming validation is relevant (subfolders or manga mode, and not just extracting links)
# - AND Manga Mode is OFF (this is the key change for your request)
# - AND the primary name of the filter isn't already in Known.txt
should_prompt_to_add_to_known_list = (
needs_folder_naming_validation and
not manga_mode and # Do NOT prompt if Manga Mode is ON
item_primary_name.lower() not in {kn.lower() for kn in KNOWN_NAMES}
)
if should_prompt_to_add_to_known_list:
reply = QMessageBox.question(self, "Add to Known List?",
f"Filter '{char_name}' (used for folder/manga naming) is not in known names list.\nAdd it now?",
f"Filter name '{item_primary_name}' (used for folder/manga naming) is not in known names list.\nAdd it now?",
QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel, QMessageBox.Yes)
if reply == QMessageBox.Yes:
self.new_char_input.setText(char_name)
if self.add_new_character(): valid_filters_for_backend.append(char_name)
else:
if cleaned_name_test or not needs_folder_naming_validation: valid_filters_for_backend.append(char_name)
self.new_char_input.setText(item_primary_name) # Use the primary name for adding
if self.add_new_character():
valid_filters_for_backend.append(filter_item_obj)
elif reply == QMessageBox.Cancel:
user_cancelled_validation = True; break
else:
if cleaned_name_test or not needs_folder_naming_validation: valid_filters_for_backend.append(char_name)
# If 'No', the filter is not used and not added to Known.txt for this session.
else:
valid_filters_for_backend.append(char_name)
# Add to filters to be used for this session if:
# - Prompting is not needed (e.g., name already known, or not manga_mode but name is known)
# - OR Manga Mode is ON (filter is used without adding to Known.txt)
# - OR extract_links_only is true (folder naming validation is false)
valid_filters_for_backend.append(filter_item_obj)
if manga_mode and needs_folder_naming_validation and item_primary_name.lower() not in {kn.lower() for kn in KNOWN_NAMES}:
self.log_signal.emit(f" Manga Mode: Using filter '{item_primary_name}' for this session without adding to Known Names.")
if user_cancelled_validation: return
if valid_filters_for_backend:
filter_character_list_to_pass = valid_filters_for_backend
self.log_signal.emit(f" Using validated character filters for subfolders: {', '.join(filter_character_list_to_pass)}")
self.log_signal.emit(f" Using validated character filters: {', '.join(item['name'] for item in filter_character_list_to_pass)}")
else:
self.log_signal.emit("⚠️ No valid character filters remaining (after validation).")
elif parsed_character_list :
filter_character_list_to_pass = parsed_character_list
self.log_signal.emit(f" Character filters provided: {', '.join(filter_character_list_to_pass)} (Folder naming validation may not apply).")
self.log_signal.emit("⚠️ No valid character filters to use for this session.")
elif parsed_character_filter_objects : # If not extract_links_only is false, but filters exist
filter_character_list_to_pass = parsed_character_filter_objects
self.log_signal.emit(f" Character filters provided (folder naming validation may not apply): {', '.join(item['name'] for item in filter_character_list_to_pass)}")
if manga_mode and not filter_character_list_to_pass and not extract_links_only:
@ -1568,7 +1786,7 @@ class DownloaderApp(QWidget):
if use_subfolders:
if custom_folder_name_cleaned: log_messages.append(f" Custom Folder (Post): '{custom_folder_name_cleaned}'")
if filter_character_list_to_pass:
log_messages.append(f" Character Filters: {', '.join(filter_character_list_to_pass)}")
log_messages.append(f" Character Filters: {', '.join(item['name'] for item in filter_character_list_to_pass)}")
log_messages.append(f" ↳ Char Filter Scope: {current_char_filter_scope.capitalize()}")
elif use_subfolders:
log_messages.append(f" Folder Naming: Automatic (based on title/known names)")
@ -1579,8 +1797,10 @@ class DownloaderApp(QWidget):
f" Skip Archives: {'.zip' if effective_skip_zip else ''}{', ' if effective_skip_zip and effective_skip_rar else ''}{'.rar' if effective_skip_rar else ''}{'None (Archive Mode)' if backend_filter_mode == 'archive' else ('None' if not (effective_skip_zip or effective_skip_rar) else '')}",
f" Skip Words (posts/files): {', '.join(skip_words_list) if skip_words_list else 'None'}",
f" Skip Words Scope: {current_skip_words_scope.capitalize()}",
f" Remove Words from Filename: {', '.join(remove_from_filename_words_list) if remove_from_filename_words_list else 'None'}",
f" Compress Images: {'Enabled' if compress_images else 'Disabled'}",
f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}"
f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}",
f" Multi-part Download: {'Enabled' if allow_multipart else 'Disabled'}"
])
else:
log_messages.append(f" Mode: Extracting Links Only")
@ -1591,11 +1811,9 @@ class DownloaderApp(QWidget):
log_messages.append(f" Manga Mode (File Renaming by Post Title): Enabled")
log_messages.append(f" ↳ Manga Filename Style: {'Post Title Based' if self.manga_filename_style == STYLE_POST_TITLE else 'Original File Name'}")
if filter_character_list_to_pass:
log_messages.append(f" ↳ Manga Character Filter (for naming/folder): {', '.join(filter_character_list_to_pass)}")
log_messages.append(f" ↳ Manga Character Filter (for naming/folder): {', '.join(item['name'] for item in filter_character_list_to_pass)}")
log_messages.append(f" ↳ Char Filter Scope (Manga): {current_char_filter_scope.capitalize()}")
if not extract_links_only:
log_messages.append(f" Subfolder per Post: {'Enabled' if use_post_subfolders else 'Disabled'}")
log_messages.append(f" ↳ Manga Duplicates: Will be renamed with numeric suffix if names clash (e.g., _1, _2).")
should_use_multithreading_for_posts = use_multithreading_enabled_by_checkbox and not post_id_from_url
log_messages.append(f" Threading: {'Multi-threaded (posts)' if should_use_multithreading_for_posts else 'Single-threaded (posts)'}")
@ -1630,6 +1848,7 @@ class DownloaderApp(QWidget):
'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock,
'skip_words_list': skip_words_list,
'skip_words_scope': current_skip_words_scope,
'remove_from_filename_words_list': remove_from_filename_words_list,
'char_filter_scope': current_char_filter_scope,
'show_external_links': self.show_external_links,
'extract_links_only': extract_links_only,
@ -1642,7 +1861,9 @@ class DownloaderApp(QWidget):
'cancellation_event': self.cancellation_event,
'signals': self.worker_signals,
'manga_filename_style': self.manga_filename_style,
'num_file_threads_for_worker': effective_num_file_threads_per_worker
'num_file_threads_for_worker': effective_num_file_threads_per_worker,
'allow_multipart_download': allow_multipart, # Corrected from previous thought
'duplicate_file_mode': effective_duplicate_file_mode # Pass the potentially overridden mode
}
try:
@ -1656,14 +1877,15 @@ class DownloaderApp(QWidget):
'filter_character_list', 'filter_mode', 'skip_zip', 'skip_rar',
'use_subfolders', 'use_post_subfolders', 'custom_folder_name',
'compress_images', 'download_thumbnails', 'service', 'user_id',
'downloaded_files', 'downloaded_file_hashes',
'downloaded_files', 'downloaded_file_hashes', 'remove_from_filename_words_list',
'downloaded_files_lock', 'downloaded_file_hashes_lock',
'skip_words_list', 'skip_words_scope', 'char_filter_scope',
'show_external_links', 'extract_links_only',
'num_file_threads_for_worker',
'skip_current_file_flag',
'start_page', 'end_page', 'target_post_id_from_initial_url',
'manga_mode_active', 'unwanted_keywords', 'manga_filename_style'
'manga_mode_active', 'unwanted_keywords', 'manga_filename_style', 'duplicate_file_mode',
'allow_multipart_download'
]
args_template['skip_current_file_flag'] = None
single_thread_args = {key: args_template[key] for key in dt_expected_keys if key in args_template}
@ -1780,15 +2002,16 @@ class DownloaderApp(QWidget):
'target_post_id_from_initial_url', 'custom_folder_name', 'compress_images',
'download_thumbnails', 'service', 'user_id', 'api_url_input',
'cancellation_event', 'signals', 'downloaded_files', 'downloaded_file_hashes',
'downloaded_files_lock', 'downloaded_file_hashes_lock',
'downloaded_files_lock', 'downloaded_file_hashes_lock', 'remove_from_filename_words_list',
'skip_words_list', 'skip_words_scope', 'char_filter_scope',
'show_external_links', 'extract_links_only',
'show_external_links', 'extract_links_only', 'allow_multipart_download',
'num_file_threads',
'skip_current_file_flag',
'manga_mode_active', 'manga_filename_style'
]
# Ensure 'allow_multipart_download' is also considered for optional keys if it has a default in PostProcessorWorker
ppw_optional_keys_with_defaults = {
'skip_words_list', 'skip_words_scope', 'char_filter_scope',
'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'remove_from_filename_words_list',
'show_external_links', 'extract_links_only',
'num_file_threads', 'skip_current_file_flag', 'manga_mode_active', 'manga_filename_style'
}
@ -1864,8 +2087,8 @@ class DownloaderApp(QWidget):
self.new_char_input, self.add_char_button, self.delete_char_button,
self.char_filter_scope_toggle_button,
self.start_page_input, self.end_page_input,
self.page_range_label, self.to_label, self.character_input, self.custom_folder_input, self.custom_folder_label,
self.reset_button, self.manga_mode_checkbox, self.manga_rename_toggle_button,
self.page_range_label, self.to_label, self.character_input, self.custom_folder_input, self.custom_folder_label, self.remove_from_filename_input,
self.reset_button, self.manga_mode_checkbox, self.manga_rename_toggle_button, self.multipart_toggle_button,
self.skip_scope_toggle_button
]
@ -1890,17 +2113,93 @@ class DownloaderApp(QWidget):
self.cancel_btn.setEnabled(not enabled)
if enabled:
if enabled: # Ensure these are updated based on current (possibly reset) checkbox states
self._handle_multithreading_toggle(multithreading_currently_on)
self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)
self.update_custom_folder_visibility(self.link_input.text())
self.update_page_range_enabled_state()
def cancel_download(self):
def _perform_soft_ui_reset(self, preserve_url=None, preserve_dir=None):
"""Resets UI elements and some state to app defaults, then applies preserved inputs."""
self.log_signal.emit("🔄 Performing soft UI reset...")
# 1. Reset UI fields to their visual defaults
self.link_input.clear() # Will be set later if preserve_url is given
self.dir_input.clear() # Will be set later if preserve_dir is given
self.custom_folder_input.clear(); self.character_input.clear();
self.skip_words_input.clear(); self.start_page_input.clear(); self.end_page_input.clear(); self.new_char_input.clear();
if hasattr(self, 'remove_from_filename_input'): self.remove_from_filename_input.clear()
self.character_search_input.clear(); self.thread_count_input.setText("4"); self.radio_all.setChecked(True);
self.skip_zip_checkbox.setChecked(True); self.skip_rar_checkbox.setChecked(True); self.download_thumbnails_checkbox.setChecked(False);
self.compress_images_checkbox.setChecked(False); self.use_subfolders_checkbox.setChecked(True);
self.use_subfolder_per_post_checkbox.setChecked(False); self.use_multithreading_checkbox.setChecked(True);
self.external_links_checkbox.setChecked(False)
if self.manga_mode_checkbox: self.manga_mode_checkbox.setChecked(False)
# 2. Reset internal state for UI-managed settings to app defaults (not from QSettings)
self.allow_multipart_download_setting = False # Default to OFF
self._update_multipart_toggle_button_text()
self.skip_words_scope = SKIP_SCOPE_POSTS # Default
self._update_skip_scope_button_text()
self.char_filter_scope = CHAR_SCOPE_TITLE # Default
self._update_char_filter_scope_button_text()
self.manga_filename_style = STYLE_POST_TITLE # Reset to app default
self._update_manga_filename_style_button_text()
# 3. Restore preserved URL and Directory
if preserve_url is not None:
self.link_input.setText(preserve_url)
if preserve_dir is not None:
self.dir_input.setText(preserve_dir)
# 4. Reset operational state variables (but not session-based downloaded_files/hashes)
self.external_link_queue.clear(); self.extracted_links_cache = []
self._is_processing_external_link_queue = False; self._current_link_post_title = None
self.total_posts_to_process = 0; self.processed_posts_count = 0
self.download_counter = 0; self.skip_counter = 0
self.all_kept_original_filenames = []
# 5. Update UI based on new (default or preserved) states
self._handle_filter_mode_change(self.radio_group.checkedButton(), True)
self._handle_multithreading_toggle(self.use_multithreading_checkbox.isChecked())
self.filter_character_list(self.character_search_input.text())
self.set_ui_enabled(True) # This enables buttons and calls other UI update methods
# Explicitly call these to ensure they reflect changes from preserved inputs
self.update_custom_folder_visibility(self.link_input.text())
self.update_page_range_enabled_state()
# update_ui_for_manga_mode is called within set_ui_enabled
self.log_signal.emit("✅ Soft UI reset complete. Preserved URL and Directory (if provided).")
def cancel_download_button_action(self):
if not self.cancel_btn.isEnabled() and not self.cancellation_event.is_set(): self.log_signal.emit(" No active download to cancel or already cancelling."); return
self.log_signal.emit("⚠️ Requesting cancellation of download process..."); self.cancellation_event.set()
self.log_signal.emit("⚠️ Requesting cancellation of download process (soft reset)...")
current_url = self.link_input.text()
current_dir = self.dir_input.text()
self.cancellation_event.set()
if self.download_thread and self.download_thread.isRunning(): self.download_thread.requestInterruption(); self.log_signal.emit(" Signaled single download thread to interrupt.")
if self.thread_pool: self.log_signal.emit(" Initiating immediate shutdown and cancellation of worker pool tasks..."); self.thread_pool.shutdown(wait=False, cancel_futures=True)
if self.thread_pool:
self.log_signal.emit(" Initiating non-blocking shutdown and cancellation of worker pool tasks...")
self.thread_pool.shutdown(wait=False, cancel_futures=True)
self.thread_pool = None # Allow recreation for next download
self.active_futures = []
self.external_link_queue.clear(); self._is_processing_external_link_queue = False; self._current_link_post_title = None
self.cancel_btn.setEnabled(False); self.progress_label.setText("Progress: Cancelling..."); self.file_progress_label.setText("")
self._perform_soft_ui_reset(preserve_url=current_url, preserve_dir=current_dir)
self.progress_label.setText("Progress: Cancelled. Ready for new task.")
self.file_progress_label.setText("")
self.log_signal.emit(" UI reset. Ready for new operation. Background tasks are being terminated.")
def download_finished(self, total_downloaded, total_skipped, cancelled_by_user, kept_original_names_list=None):
if kept_original_names_list is None:
@ -1945,7 +2244,10 @@ class DownloaderApp(QWidget):
if hasattr(self.download_thread, 'external_link_signal'): self.download_thread.external_link_signal.disconnect(self.handle_external_link_signal)
if hasattr(self.download_thread, 'file_progress_signal'): self.download_thread.file_progress_signal.disconnect(self.update_file_progress_display)
except (TypeError, RuntimeError) as e: self.log_signal.emit(f" Note during single-thread signal disconnection: {e}")
self.download_thread = None
# Ensure these are cleared if the download_finished is for the single download thread
if self.download_thread and not self.download_thread.isRunning(): # Check if it was this thread
self.download_thread = None
if self.thread_pool: self.log_signal.emit(" Ensuring worker thread pool is shut down..."); self.thread_pool.shutdown(wait=True, cancel_futures=True); self.thread_pool = None
self.active_futures = []
@ -1985,6 +2287,10 @@ class DownloaderApp(QWidget):
self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope)
self._update_char_filter_scope_button_text()
self.duplicate_file_mode = DUPLICATE_MODE_DELETE # Reset to default (Delete)
self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode)
self._update_duplicate_mode_button_text()
self.settings.sync()
self._update_manga_filename_style_button_text()
self.update_ui_for_manga_mode(self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False)
@ -1994,17 +2300,22 @@ class DownloaderApp(QWidget):
def _reset_ui_to_defaults(self):
self.link_input.clear(); self.dir_input.clear(); self.custom_folder_input.clear(); self.character_input.clear();
self.skip_words_input.clear(); self.start_page_input.clear(); self.end_page_input.clear(); self.new_char_input.clear();
if hasattr(self, 'remove_from_filename_input'): self.remove_from_filename_input.clear()
self.character_search_input.clear(); self.thread_count_input.setText("4"); self.radio_all.setChecked(True);
self.skip_zip_checkbox.setChecked(True); self.skip_rar_checkbox.setChecked(True); self.download_thumbnails_checkbox.setChecked(False);
self.compress_images_checkbox.setChecked(False); self.use_subfolders_checkbox.setChecked(True);
self.use_subfolder_per_post_checkbox.setChecked(False); self.use_multithreading_checkbox.setChecked(True);
self.external_links_checkbox.setChecked(False)
if self.manga_mode_checkbox: self.manga_mode_checkbox.setChecked(False)
if self.manga_mode_checkbox: self.manga_mode_checkbox.setChecked(False)
self.allow_multipart_download_setting = False # Default to OFF
self._update_multipart_toggle_button_text() # Update button text
self.skip_words_scope = SKIP_SCOPE_POSTS
self._update_skip_scope_button_text()
self.char_filter_scope = CHAR_SCOPE_TITLE
self._update_char_filter_scope_button_text()
self.duplicate_file_mode = DUPLICATE_MODE_DELETE # Default to DELETE
self._update_duplicate_mode_button_text()
self._handle_filter_mode_change(self.radio_all, True)
@ -2032,6 +2343,61 @@ class DownloaderApp(QWidget):
with QMutexLocker(self.prompt_mutex): self._add_character_response = result
self.log_signal.emit(f" Main thread received character prompt response: {'Action resulted in addition/confirmation' if result else 'Action resulted in no addition/declined'}")
def _update_multipart_toggle_button_text(self):
if hasattr(self, 'multipart_toggle_button'):
text = "Multi-part: ON" if self.allow_multipart_download_setting else "Multi-part: OFF"
self.multipart_toggle_button.setText(text)
def _toggle_multipart_mode(self):
# If currently OFF, and user is trying to turn it ON
if not self.allow_multipart_download_setting:
msg_box = QMessageBox(self)
msg_box.setIcon(QMessageBox.Warning)
msg_box.setWindowTitle("Multi-part Download Advisory")
msg_box.setText(
"<b>Multi-part download advisory:</b><br><br>"
"<ul>"
"<li>Best suited for <b>large files</b> (e.g., single post videos).</li>"
"<li>When downloading a full creator feed with many small files (like images):"
"<ul><li>May not offer significant speed benefits.</li>"
"<li>Could potentially make the UI feel <b>choppy</b>.</li>"
"<li>May <b>spam the process log</b> with rapid, numerous small download messages.</li></ul></li>"
"<li>Consider using the <b>'Videos' filter</b> if downloading a creator feed to primarily target large files for multi-part.</li>"
"</ul><br>"
"Do you want to enable multi-part download?"
)
proceed_button = msg_box.addButton("Proceed Anyway", QMessageBox.AcceptRole)
cancel_button = msg_box.addButton("Cancel", QMessageBox.RejectRole)
msg_box.setDefaultButton(proceed_button) # Default to Proceed
msg_box.exec_()
if msg_box.clickedButton() == cancel_button:
# User cancelled, so don't change the setting (it's already False)
self.log_signal.emit(" Multi-part download enabling cancelled by user.")
return # Exit without changing the state or button text
self.allow_multipart_download_setting = not self.allow_multipart_download_setting # Toggle the actual setting
self._update_multipart_toggle_button_text()
self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting)
self.log_signal.emit(f" Multi-part download set to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'}")
def _update_duplicate_mode_button_text(self):
if hasattr(self, 'duplicate_mode_toggle_button'):
if self.duplicate_file_mode == DUPLICATE_MODE_DELETE:
self.duplicate_mode_toggle_button.setText("Duplicates: Delete")
elif self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER:
self.duplicate_mode_toggle_button.setText("Duplicates: Move")
else: # Should not happen
self.duplicate_mode_toggle_button.setText("Duplicates: Move") # Default to Move if unknown
def _cycle_duplicate_mode(self):
if self.duplicate_file_mode == DUPLICATE_MODE_MOVE_TO_SUBFOLDER:
self.duplicate_file_mode = DUPLICATE_MODE_DELETE
else: # If it's DELETE or unknown, cycle back to MOVE
self.duplicate_file_mode = DUPLICATE_MODE_MOVE_TO_SUBFOLDER
self._update_duplicate_mode_button_text()
self.settings.setValue(DUPLICATE_FILE_MODE_KEY, self.duplicate_file_mode)
self.log_signal.emit(f" Duplicate file handling mode changed to: '{self.duplicate_file_mode.capitalize()}'")
if __name__ == '__main__':
import traceback
@ -2044,9 +2410,19 @@ if __name__ == '__main__':
else: print(f"Warning: Application icon 'Kemono.ico' not found at {icon_path}")
downloader_app_instance = DownloaderApp()
# Set a reasonable default size before showing
downloader_app_instance.resize(1150, 780) # Adjusted default size
downloader_app_instance.show()
# Center the window on the screen after it's shown and sized
downloader_app_instance._center_on_screen()
if TourDialog:
# Temporarily force the tour to be considered as "not shown"
# This ensures it appears for this run, especially for a fresh .exe
tour_settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR)
tour_settings.setValue(TourDialog.TOUR_SHOWN_KEY, False)
tour_settings.sync()
print("[Main] Forcing tour to be active for this session.")
tour_result = TourDialog.run_tour_if_needed(downloader_app_instance)
if tour_result == QDialog.Accepted: print("Tour completed by user.")
elif tour_result == QDialog.Rejected: print("Tour skipped or was already shown.")

232
multipart_downloader.py Normal file
View File

@ -0,0 +1,232 @@
import os
import time
import requests
import hashlib
import http.client
import traceback
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
CHUNK_DOWNLOAD_RETRY_DELAY = 2 # Slightly reduced for faster retries if needed
MAX_CHUNK_DOWNLOAD_RETRIES = 1 # Further reduced for quicker fallback if a chunk is problematic
DOWNLOAD_CHUNK_SIZE_ITER = 1024 * 256 # 256KB for iter_content within a chunk download
def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte, headers,
part_num, total_parts, progress_data, cancellation_event, skip_event, logger,
signals=None, api_original_filename=None): # Added signals and api_original_filename
"""Downloads a single chunk of a file and writes it to the temp file."""
if cancellation_event and cancellation_event.is_set():
logger(f" [Chunk {part_num + 1}/{total_parts}] Download cancelled before start.")
return 0, False # bytes_downloaded, success
if skip_event and skip_event.is_set():
logger(f" [Chunk {part_num + 1}/{total_parts}] Skip event triggered before start.")
return 0, False
chunk_headers = headers.copy()
# end_byte can be -1 for 0-byte files, meaning download from start_byte to end of file (which is start_byte itself)
if end_byte != -1 : # For 0-byte files, end_byte might be -1, Range header should not be set or be 0-0
chunk_headers['Range'] = f"bytes={start_byte}-{end_byte}"
elif start_byte == 0 and end_byte == -1: # Specifically for 0-byte files
# Some servers might not like Range: bytes=0--1.
# For a 0-byte file, we might not even need a range header, or Range: bytes=0-0
# Let's try without for 0-byte, or rely on server to handle 0-0 if Content-Length was 0.
# If Content-Length was 0, the main function might handle it directly.
# This chunking logic is primarily for files > 0 bytes.
# For now, if end_byte is -1, it implies a 0-byte file, so we expect 0 bytes.
pass
bytes_this_chunk = 0
last_progress_emit_time_for_chunk = time.time()
last_speed_calc_time = time.time()
bytes_at_last_speed_calc = 0
for attempt in range(MAX_CHUNK_DOWNLOAD_RETRIES + 1):
if cancellation_event and cancellation_event.is_set():
logger(f" [Chunk {part_num + 1}/{total_parts}] Cancelled during retry loop.")
return bytes_this_chunk, False
if skip_event and skip_event.is_set():
logger(f" [Chunk {part_num + 1}/{total_parts}] Skip event during retry loop.")
return bytes_this_chunk, False
try:
if attempt > 0:
logger(f" [Chunk {part_num + 1}/{total_parts}] Retrying download (Attempt {attempt}/{MAX_CHUNK_DOWNLOAD_RETRIES})...")
time.sleep(CHUNK_DOWNLOAD_RETRY_DELAY * (2 ** (attempt - 1)))
# Reset speed calculation on retry
last_speed_calc_time = time.time()
bytes_at_last_speed_calc = bytes_this_chunk # Current progress of this chunk
# Enhanced log message for chunk start
log_msg = f" 🚀 [Chunk {part_num + 1}/{total_parts}] Starting download: bytes {start_byte}-{end_byte if end_byte != -1 else 'EOF'}"
logger(log_msg)
print(f"DEBUG_MULTIPART: {log_msg}") # Direct console print for debugging
response = requests.get(chunk_url, headers=chunk_headers, timeout=(10, 120), stream=True)
response.raise_for_status()
# For 0-byte files, if end_byte was -1, we expect 0 content.
if start_byte == 0 and end_byte == -1 and int(response.headers.get('Content-Length', 0)) == 0:
logger(f" [Chunk {part_num + 1}/{total_parts}] Confirmed 0-byte file.")
with progress_data['lock']:
progress_data['chunks_status'][part_num]['active'] = False
progress_data['chunks_status'][part_num]['speed_bps'] = 0
return 0, True
with open(temp_file_path, 'r+b') as f: # Open in read-write binary
f.seek(start_byte)
for data_segment in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE_ITER):
if cancellation_event and cancellation_event.is_set():
logger(f" [Chunk {part_num + 1}/{total_parts}] Cancelled during data iteration.")
return bytes_this_chunk, False
if skip_event and skip_event.is_set():
logger(f" [Chunk {part_num + 1}/{total_parts}] Skip event during data iteration.")
return bytes_this_chunk, False
if data_segment:
f.write(data_segment)
bytes_this_chunk += len(data_segment)
with progress_data['lock']:
# Increment both the chunk's downloaded and the overall downloaded
progress_data['total_downloaded_so_far'] += len(data_segment)
progress_data['chunks_status'][part_num]['downloaded'] = bytes_this_chunk
progress_data['chunks_status'][part_num]['active'] = True
current_time = time.time()
time_delta_speed = current_time - last_speed_calc_time
if time_delta_speed > 0.5: # Calculate speed every 0.5 seconds
bytes_delta = bytes_this_chunk - bytes_at_last_speed_calc
current_speed_bps = (bytes_delta * 8) / time_delta_speed if time_delta_speed > 0 else 0
progress_data['chunks_status'][part_num]['speed_bps'] = current_speed_bps
last_speed_calc_time = current_time
bytes_at_last_speed_calc = bytes_this_chunk
# Emit progress more frequently from within the chunk download
if current_time - last_progress_emit_time_for_chunk > 0.1: # Emit up to 10 times/sec per chunk
if signals and hasattr(signals, 'file_progress_signal'):
# Ensure we read the latest total downloaded from progress_data
# Send a copy of the chunks_status list
status_list_copy = [dict(s) for s in progress_data['chunks_status']] # Make a deep enough copy
signals.file_progress_signal.emit(api_original_filename, status_list_copy)
last_progress_emit_time_for_chunk = current_time
return bytes_this_chunk, True
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e:
logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Retryable error: {e}")
if attempt == MAX_CHUNK_DOWNLOAD_RETRIES:
logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Failed after {MAX_CHUNK_DOWNLOAD_RETRIES} retries.")
return bytes_this_chunk, False
except requests.exceptions.RequestException as e: # Includes 4xx/5xx errors after raise_for_status
logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Non-retryable error: {e}")
return bytes_this_chunk, False
except Exception as e:
logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Unexpected error: {e}\n{traceback.format_exc(limit=1)}")
return bytes_this_chunk, False
# Ensure final status is marked as inactive if loop finishes due to retries
with progress_data['lock']:
progress_data['chunks_status'][part_num]['active'] = False
progress_data['chunks_status'][part_num]['speed_bps'] = 0
return bytes_this_chunk, False # Should be unreachable
def download_file_in_parts(file_url, save_path, total_size, num_parts, headers,
api_original_filename, signals, cancellation_event, skip_event, logger):
"""
Downloads a file in multiple parts concurrently.
Returns: (download_successful_flag, downloaded_bytes, calculated_file_hash, temp_file_handle_or_None)
The temp_file_handle will be an open read-binary file handle to the .part file if successful, otherwise None.
It is the responsibility of the caller to close this handle and rename/delete the .part file.
"""
logger(f"⬇️ Initializing Multi-part Download ({num_parts} parts) for: '{api_original_filename}' (Size: {total_size / (1024*1024):.2f} MB)")
temp_file_path = save_path + ".part"
try:
with open(temp_file_path, 'wb') as f_temp:
if total_size > 0:
f_temp.truncate(total_size) # Pre-allocate space
except IOError as e:
logger(f" ❌ Error creating/truncating temp file '{temp_file_path}': {e}")
return False, 0, None, None
chunk_size_calc = total_size // num_parts
chunks_ranges = []
for i in range(num_parts):
start = i * chunk_size_calc
end = start + chunk_size_calc - 1 if i < num_parts - 1 else total_size - 1
if start <= end: # Valid range
chunks_ranges.append((start, end))
elif total_size == 0 and i == 0: # Special case for 0-byte file
chunks_ranges.append((0, -1)) # Indicates 0-byte file, download 0 bytes from offset 0
chunk_actual_sizes = []
for start, end in chunks_ranges:
if end == -1 and start == 0: # 0-byte file
chunk_actual_sizes.append(0)
else:
chunk_actual_sizes.append(end - start + 1)
if not chunks_ranges and total_size > 0:
logger(f" ⚠️ No valid chunk ranges for multipart download of '{api_original_filename}'. Aborting multipart.")
if os.path.exists(temp_file_path): os.remove(temp_file_path)
return False, 0, None, None
progress_data = {
'total_file_size': total_size, # Overall file size for reference
'total_downloaded_so_far': 0, # New key for overall progress
'chunks_status': [ # Status for each chunk
{'id': i, 'downloaded': 0, 'total': chunk_actual_sizes[i] if i < len(chunk_actual_sizes) else 0, 'active': False, 'speed_bps': 0.0}
for i in range(num_parts)
],
'lock': threading.Lock()
}
chunk_futures = []
all_chunks_successful = True
total_bytes_from_chunks = 0 # Still useful to verify total downloaded against file size
with ThreadPoolExecutor(max_workers=num_parts, thread_name_prefix=f"MPChunk_{api_original_filename[:10]}_") as chunk_pool:
for i, (start, end) in enumerate(chunks_ranges):
if cancellation_event and cancellation_event.is_set(): all_chunks_successful = False; break
chunk_futures.append(chunk_pool.submit(
_download_individual_chunk, chunk_url=file_url, temp_file_path=temp_file_path,
start_byte=start, end_byte=end, headers=headers, part_num=i, total_parts=num_parts,
progress_data=progress_data, cancellation_event=cancellation_event, skip_event=skip_event, logger=logger,
signals=signals, api_original_filename=api_original_filename # Pass them here
))
for future in as_completed(chunk_futures):
if cancellation_event and cancellation_event.is_set(): all_chunks_successful = False; break
bytes_downloaded_this_chunk, success_this_chunk = future.result()
total_bytes_from_chunks += bytes_downloaded_this_chunk
if not success_this_chunk:
all_chunks_successful = False
# Progress is emitted from within _download_individual_chunk
if cancellation_event and cancellation_event.is_set():
logger(f" Multi-part download for '{api_original_filename}' cancelled by main event.")
all_chunks_successful = False
# Ensure a final progress update is sent with all chunks marked inactive (unless still active due to error)
if signals and hasattr(signals, 'file_progress_signal'):
with progress_data['lock']:
# Ensure all chunks are marked inactive for the final signal if download didn't fully succeed or was cancelled
status_list_copy = [dict(s) for s in progress_data['chunks_status']]
signals.file_progress_signal.emit(api_original_filename, status_list_copy)
if all_chunks_successful and (total_bytes_from_chunks == total_size or total_size == 0):
logger(f" ✅ Multi-part download successful for '{api_original_filename}'. Total bytes: {total_bytes_from_chunks}")
md5_hasher = hashlib.md5()
with open(temp_file_path, 'rb') as f_hash:
for buf in iter(lambda: f_hash.read(4096*10), b''): # Read in larger buffers for hashing
md5_hasher.update(buf)
calculated_hash = md5_hasher.hexdigest()
# Return an open file handle for the caller to manage (e.g., for compression)
# The caller is responsible for closing this handle and renaming/deleting the .part file.
return True, total_bytes_from_chunks, calculated_hash, open(temp_file_path, 'rb')
else:
logger(f" ❌ Multi-part download failed for '{api_original_filename}'. Success: {all_chunks_successful}, Bytes: {total_bytes_from_chunks}/{total_size}. Cleaning up.")
if os.path.exists(temp_file_path):
try: os.remove(temp_file_path)
except OSError as e: logger(f" Failed to remove temp part file '{temp_file_path}': {e}")
return False, total_bytes_from_chunks, None, None

17
tour.py
View File

@ -288,13 +288,15 @@ class TourDialog(QDialog):
def run_tour_if_needed(parent_app_window):
try:
settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR)
never_show_again = settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool)
never_show_again_from_settings = settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool)
if never_show_again:
if never_show_again_from_settings:
print(f"[Tour] Skipped: '{TourDialog.TOUR_SHOWN_KEY}' is True in settings.")
return QDialog.Rejected
tour_dialog = TourDialog(parent_app_window)
result = tour_dialog.exec_()
return result
except Exception as e:
print(f"[Tour] CRITICAL ERROR in run_tour_if_needed: {e}")
@ -305,10 +307,11 @@ if __name__ == '__main__':
app = QApplication(sys.argv)
# --- For testing: force the tour to show by resetting the flag ---
# print("[Tour Test] Resetting 'Never show again' flag for testing purposes.")
# test_settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR)
# test_settings.setValue(TourDialog.TOUR_SHOWN_KEY, False) # Set to False to force tour
# test_settings.sync()
# This block ensures that if tour.py is run directly, the "Never show again" flag in QSettings is reset.
print("[Tour Direct Run] Resetting 'Never show again' flag in QSettings.")
test_settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR)
test_settings.setValue(TourDialog.TOUR_SHOWN_KEY, False) # Set to False to force tour
test_settings.sync()
# --- End testing block ---
print("[Tour Test] Running tour standalone...")
@ -322,4 +325,4 @@ if __name__ == '__main__':
final_settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR)
print(f"[Tour Test] Final state of '{TourDialog.TOUR_SHOWN_KEY}' in settings: {final_settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool)}")
sys.exit()
sys.exit()