import sys import os import time import requests import re import threading import queue import hashlib from concurrent.futures import ThreadPoolExecutor, Future, CancelledError from PyQt5.QtGui import QIcon from PyQt5.QtWidgets import ( QApplication, QWidget, QLabel, QLineEdit, QTextEdit, QPushButton, QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget, QRadioButton, QButtonGroup, QCheckBox, QMainWindow ) from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject from urllib.parse import urlparse try: from PIL import Image except ImportError: print("ERROR: Pillow library not found. Please install it: pip install Pillow") Image = None from io import BytesIO fastapi_app = None KNOWN_NAMES = [] def clean_folder_name(name): if not isinstance(name, str): name = str(name) cleaned = re.sub(r'[^\w\s\-\_]', '', name) return cleaned.strip().replace(' ', '_') def clean_filename(name): if not isinstance(name, str): name = str(name) cleaned = re.sub(r'[^\w\s\-\_\.]', '', name) return cleaned.strip().replace(' ', '_') def extract_folder_name_from_title(title, unwanted_keywords): if not title: return 'Uncategorized' title_lower = title.lower() tokens = title_lower.split() for token in tokens: clean_token = clean_folder_name(token) if clean_token and clean_token not in unwanted_keywords: return clean_token return 'Uncategorized' def match_folders_from_title(title, known_names, unwanted_keywords): if not title: return [] cleaned_title = clean_folder_name(title.lower()) matched_cleaned_names = set() for name in known_names: cleaned_name_for_match = clean_folder_name(name.lower()) if not cleaned_name_for_match: continue if cleaned_name_for_match in cleaned_title: if cleaned_name_for_match not in unwanted_keywords: matched_cleaned_names.add(cleaned_name_for_match) return list(matched_cleaned_names) def is_image(filename): if not filename: return False return filename.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.gif')) def is_video(filename): if not filename: return False return filename.lower().endswith(('.mp4', '.mov', '.mkv', '.webm', '.avi', '.wmv')) def is_zip(filename): if not filename: return False return filename.lower().endswith('.zip') def is_rar(filename): if not filename: return False return filename.lower().endswith('.rar') def is_post_url(url): if not isinstance(url, str): return False return '/post/' in urlparse(url).path def extract_post_info(url_string): service, user_id, post_id = None, None, None if not isinstance(url_string, str) or not url_string.strip(): return None, None, None try: parsed_url = urlparse(url_string.strip()) domain = parsed_url.netloc.lower() path_parts = [part for part in parsed_url.path.strip('/').split('/') if part] is_kemono = 'kemono.su' in domain or 'kemono.party' in domain is_coomer = 'coomer.su' in domain or 'coomer.party' in domain if not (is_kemono or is_coomer): return None, None, None if len(path_parts) >= 3 and path_parts[1].lower() == 'user': service = path_parts[0] user_id = path_parts[2] if len(path_parts) >= 5 and path_parts[3].lower() == 'post': post_id = path_parts[4] return service, user_id, post_id if len(path_parts) >= 5 and path_parts[0].lower() == 'api' and path_parts[1].lower() == 'v1' and path_parts[3].lower() == 'user': service = path_parts[2] user_id = path_parts[4] if len(path_parts) >= 7 and path_parts[5].lower() == 'post': post_id = path_parts[6] return service, user_id, post_id except ValueError: print(f"Debug: ValueError parsing URL '{url_string}'") return None, None, None except Exception as e: print(f"Debug: Exception during extract_post_info for URL '{url_string}': {e}") return None, None, None return None, None, None def fetch_posts_paginated(api_url_base, headers, offset, logger): paginated_url = f'{api_url_base}?o={offset}' logger(f" Fetching: {paginated_url}") try: response = requests.get(paginated_url, headers=headers, timeout=45) response.raise_for_status() if 'application/json' not in response.headers.get('Content-Type', ''): raise RuntimeError(f"Unexpected content type received: {response.headers.get('Content-Type')}. Body: {response.text[:200]}") return response.json() except requests.exceptions.Timeout: raise RuntimeError(f"Timeout fetching page offset {offset}") except requests.exceptions.RequestException as e: err_msg = f"Error fetching page offset {offset}: {e}" if e.response is not None: err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})" raise RuntimeError(err_msg) except ValueError as e: raise RuntimeError(f"Error decoding JSON response for offset {offset}: {e}. Body: {response.text[:200]}") except Exception as e: raise RuntimeError(f"Unexpected error processing page offset {offset}: {e}") def download_from_api(api_url_input, logger=print): headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'} service, user_id, target_post_id = extract_post_info(api_url_input) if not service or not user_id: logger(f"āŒ Invalid or unrecognized URL: {api_url_input}. Cannot fetch.") return parsed_input = urlparse(api_url_input) api_domain = parsed_input.netloc if ('kemono.su' in parsed_input.netloc.lower() or 'coomer.su' in parsed_input.netloc.lower() or 'kemono.party' in parsed_input.netloc.lower() or 'coomer.party' in parsed_input.netloc.lower()) else "kemono.su" api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}" offset = 0 page = 1 processed_target_post = False while True: if target_post_id and processed_target_post: logger(f"āœ… Target post {target_post_id} found and processed. Stopping.") break logger(f"\nšŸ”„ Fetching page {page} (offset {offset}) for user {user_id} on {api_domain}...") try: posts_batch = fetch_posts_paginated(api_base_url, headers, offset, logger) if not isinstance(posts_batch, list): logger(f"āŒ API Error: Expected a list of posts, got {type(posts_batch)}. Response: {str(posts_batch)[:200]}") break except RuntimeError as e: logger(f"āŒ {e}") logger(" Aborting pagination due to error.") break except Exception as e: logger(f"āŒ Unexpected error during fetch loop: {e}") break if not posts_batch: if page == 1 and not target_post_id: logger("šŸ˜• No posts found for this creator.") elif not target_post_id: logger("āœ… Reached end of posts.") break logger(f"šŸ“¦ Found {len(posts_batch)} posts on page {page}.") if target_post_id: matching_post = next((post for post in posts_batch if str(post.get('id')) == str(target_post_id)), None) if matching_post: logger(f"šŸŽÆ Found target post {target_post_id} on page {page}.") yield [matching_post] processed_target_post = True else: logger(f" Target post {target_post_id} not found on this page.") pass else: yield posts_batch if not (target_post_id and processed_target_post): page_size = 50 offset += page_size page += 1 time.sleep(0.6) if target_post_id and not processed_target_post: logger(f"āŒ Target post ID {target_post_id} was not found for this creator.") class PostProcessorSignals(QObject): progress_signal = pyqtSignal(str) file_download_status_signal = pyqtSignal(bool) class PostProcessorWorker: def __init__(self, post_data, download_root, known_names, filter_character, unwanted_keywords, filter_mode, skip_zip, skip_rar, use_subfolders, target_post_id_from_initial_url, custom_folder_name, compress_images, download_thumbnails, service, user_id, api_url_input, cancellation_event, signals, downloaded_files, downloaded_file_hashes, downloaded_files_lock, downloaded_file_hashes_lock, skip_words_list=None): self.post = post_data self.download_root = download_root self.known_names = known_names self.filter_character = filter_character self.unwanted_keywords = unwanted_keywords self.filter_mode = filter_mode self.skip_zip = skip_zip self.skip_rar = skip_rar self.use_subfolders = use_subfolders self.target_post_id_from_initial_url = target_post_id_from_initial_url self.custom_folder_name = custom_folder_name self.compress_images = compress_images self.download_thumbnails = download_thumbnails self.service = service self.user_id = user_id self.api_url_input = api_url_input self.cancellation_event = cancellation_event self.signals = signals self.skip_current_file_flag = threading.Event() self.is_downloading_file = False self.current_download_path = None self.downloaded_files = downloaded_files self.downloaded_file_hashes = downloaded_file_hashes self.downloaded_files_lock = downloaded_files_lock self.downloaded_file_hashes_lock = downloaded_file_hashes_lock self.skip_words_list = skip_words_list if skip_words_list is not None else [] if self.compress_images and Image is None: self.logger("āš ļø Image compression enabled, but Pillow library is not loaded. Disabling compression.") self.compress_images = False def logger(self, message): if self.signals and hasattr(self.signals, 'progress_signal'): self.signals.progress_signal.emit(message) else: print(f"(Worker Log): {message}") def check_cancel(self): is_cancelled = self.cancellation_event.is_set() return is_cancelled def skip_file(self): pass def process(self): if self.check_cancel(): return 0, 0 total_downloaded_post = 0 total_skipped_post = 0 headers = {'User-Agent': 'Mozilla/5.0', 'Referer': f'https://{urlparse(self.api_url_input).netloc}/'} url_pattern = re.compile(r'https?://[^\s<>"]+|www\.[^\s<>"]+') LARGE_THUMBNAIL_THRESHOLD = 1 * 1024 * 1024 post = self.post api_title = post.get('title', '') title = api_title if api_title else 'untitled_post' post_id = post.get('id', 'unknown_id') post_file_info = post.get('file') attachments = post.get('attachments', []) post_content = post.get('content', '') is_target_post = (self.target_post_id_from_initial_url is not None) and (str(post_id) == str(self.target_post_id_from_initial_url)) self.logger(f"\n--- Processing Post {post_id} ('{title[:50]}...') (Thread: {threading.current_thread().name}) ---") if self.skip_words_list: title_lower = title.lower() for skip_word in self.skip_words_list: if skip_word.lower() in title_lower: self.logger(f" -> Skip Post (Title): Post {post_id} title ('{title[:30]}...') contains skip word '{skip_word}'. Skipping entire post.") return 0, 1 if not isinstance(attachments, list): self.logger(f"āš ļø Corrupt attachment data for post {post_id}. Skipping attachments.") attachments = [] valid_folder_paths = [] folder_decision_reason = "" api_domain = urlparse(self.api_url_input).netloc if ('kemono.su' in urlparse(self.api_url_input).netloc.lower() or 'coomer.su' in urlparse(self.api_url_input).netloc.lower() or 'kemono.party' in urlparse(self.api_url_input).netloc.lower() or 'coomer.party' in urlparse(self.api_url_input).netloc.lower()) else "kemono.su" if is_target_post and self.custom_folder_name and self.use_subfolders: folder_path_full = os.path.join(self.download_root, self.custom_folder_name) valid_folder_paths = [folder_path_full] folder_decision_reason = f"Using custom folder for target post: '{self.custom_folder_name}'" if not valid_folder_paths and self.use_subfolders: folder_names_for_post = [] if self.filter_character: clean_char_filter = clean_folder_name(self.filter_character.lower()) matched_names_in_title = match_folders_from_title(title, self.known_names, self.unwanted_keywords) if clean_char_filter and clean_char_filter in matched_names_in_title: folder_names_for_post = [clean_char_filter] folder_decision_reason = f"Character filter '{self.filter_character}' matched title. Using folder '{clean_char_filter}'." else: self.logger(f" -> Filter Skip Post {post_id}: Character filter '{self.filter_character}' not found in title matches ({matched_names_in_title}).") return 0, 1 else: matched_folders = match_folders_from_title(title, self.known_names, self.unwanted_keywords) if matched_folders: folder_names_for_post = matched_folders folder_decision_reason = f"Found known name(s) in title: {matched_folders}" else: extracted_folder = extract_folder_name_from_title(title, self.unwanted_keywords) folder_names_for_post = [extracted_folder] folder_decision_reason = f"No known names in title. Using derived folder: '{extracted_folder}'" for folder_name in folder_names_for_post: folder_path_full = os.path.join(self.download_root, folder_name) valid_folder_paths.append(folder_path_full) if not valid_folder_paths: valid_folder_paths = [self.download_root] if not folder_decision_reason: folder_decision_reason = "Subfolders disabled or no specific folder determined. Using root download directory." self.logger(f" Folder Decision: {folder_decision_reason}") if not valid_folder_paths: self.logger(f" ERROR: No valid folder paths determined for post {post_id}. Skipping.") return 0, 1 if post_content: try: found_links = re.findall(r'href=["\'](https?://[^"\']+)["\']', post_content) if found_links: self.logger(f"šŸ”— Links found in post content:") unique_links = sorted(list(set(found_links))) for link in unique_links[:10]: if not any(x in link for x in ['.css', '.js', 'javascript:']): self.logger(f" - {link}") if len(unique_links) > 10: self.logger(f" - ... ({len(unique_links) - 10} more links not shown)") except Exception as e: self.logger(f"āš ļø Error parsing content for links in post {post_id}: {e}") files_to_process_for_download = [] api_domain = urlparse(self.api_url_input).netloc if ('kemono.su' in urlparse(self.api_url_input).netloc.lower() or 'coomer.su' in urlparse(self.api_url_input).netloc.lower() or 'kemono.party' in urlparse(self.api_url_input).netloc.lower() or 'coomer.party' in urlparse(self.api_url_input).netloc.lower()) else "kemono.su" if self.download_thumbnails: self.logger(f" Mode: Attempting to download thumbnail...") self.logger(" Thumbnail download via API is disabled as the local API is not used.") self.logger(f" -> Skipping Post {post_id}: Thumbnail download requested but API is disabled.") return 0, 1 else: self.logger(f" Mode: Downloading post file/attachments.") if post_file_info and isinstance(post_file_info, dict) and post_file_info.get('path'): main_file_path = post_file_info['path'].lstrip('/') main_file_name = post_file_info.get('name') or os.path.basename(main_file_path) if main_file_name: file_url = f"https://{api_domain}/data/{main_file_path}" files_to_process_for_download.append({ 'url': file_url, 'name': main_file_name, '_is_thumbnail': False, '_source': 'post_file' }) else: self.logger(f" āš ļø Skipping main post file: Missing filename (Path: {main_file_path})") attachment_counter = 0 for idx, attachment in enumerate(attachments): if isinstance(attachment, dict) and attachment.get('path'): attach_path = attachment['path'].lstrip('/') attach_name = attachment.get('name') or os.path.basename(attach_path) if attach_name: base, ext = os.path.splitext(clean_filename(attach_name)) final_attach_name = f"{post_id}_{attachment_counter}{ext}" if base and base != f"{post_id}_{attachment_counter}": final_attach_name = f"{post_id}_{attachment_counter}_{base}{ext}" attach_url = f"https://{api_domain}/data/{attach_path}" files_to_process_for_download.append({ 'url': attach_url, 'name': final_attach_name, '_is_thumbnail': False, '_source': f'attachment_{idx+1}', '_original_name_for_log': attach_name }) attachment_counter += 1 else: self.logger(f" āš ļø Skipping attachment {idx+1}: Missing filename (Path: {attach_path})") else: self.logger(f" āš ļø Skipping invalid attachment entry {idx+1}: {str(attachment)[:100]}") if not files_to_process_for_download: self.logger(f" No files found to download for post {post_id}.") return 0, 0 self.logger(f" Files identified for download: {len(files_to_process_for_download)}") post_download_count = 0 post_skip_count = 0 local_processed_filenames = set() local_filenames_lock = threading.Lock() for file_info in files_to_process_for_download: if self.check_cancel(): break if self.skip_current_file_flag.is_set(): original_name_for_log = file_info.get('_original_name_for_log', file_info.get('name', 'unknown_file')) self.logger(f"ā­ļø File skip requested: {original_name_for_log}") post_skip_count += 1 self.skip_current_file_flag.clear() continue file_url = file_info.get('url') original_filename = file_info.get('name') is_thumbnail = file_info.get('_is_thumbnail', False) original_name_for_log = file_info.get('_original_name_for_log', original_filename) if not file_url or not original_filename: self.logger(f"āš ļø Skipping file entry due to missing URL or name: {str(file_info)[:100]}") post_skip_count += 1 continue cleaned_save_filename = clean_filename(original_filename) if self.skip_words_list: filename_lower = cleaned_save_filename.lower() file_skipped_by_word = False for skip_word in self.skip_words_list: if skip_word.lower() in filename_lower: self.logger(f" -> Skip File (Filename): File '{original_name_for_log}' contains skip word '{skip_word}'.") post_skip_count += 1 file_skipped_by_word = True break if file_skipped_by_word: continue if not self.download_thumbnails: file_skipped_by_filter = False is_img = is_image(cleaned_save_filename) is_vid = is_video(cleaned_save_filename) is_zip_file = is_zip(cleaned_save_filename) is_rar_file = is_rar(cleaned_save_filename) if self.filter_mode == 'image' and not is_img: self.logger(f" -> Filter Skip: '{original_name_for_log}' (Not image/gif)") file_skipped_by_filter = True elif self.filter_mode == 'video' and not is_vid: self.logger(f" -> Filter Skip: '{original_name_for_log}' (Not video)") file_skipped_by_filter = True elif self.skip_zip and is_zip_file: self.logger(f" -> Pref Skip: '{original_name_for_log}' (Zip)") file_skipped_by_filter = True elif self.skip_rar and is_rar_file: self.logger(f" -> Pref Skip: '{original_name_for_log}' (RAR)") file_skipped_by_filter = True if file_skipped_by_filter: post_skip_count += 1 continue file_downloaded_or_exists = False for folder_path in valid_folder_paths: if self.check_cancel(): break try: os.makedirs(folder_path, exist_ok=True) except OSError as e: self.logger(f"āŒ Error ensuring directory exists {folder_path}: {e}. Skipping path.") continue except Exception as e: self.logger(f"āŒ Unexpected error creating dir {folder_path}: {e}. Skipping path.") continue save_path = os.path.join(folder_path, cleaned_save_filename) folder_basename = os.path.basename(folder_path) with local_filenames_lock: if os.path.exists(save_path) and os.path.getsize(save_path) > 0: self.logger(f" -> Exists Skip: '{original_name_for_log}' in '{folder_basename}'") post_skip_count += 1 file_downloaded_or_exists = True with self.downloaded_files_lock: self.downloaded_files.add(cleaned_save_filename) break elif cleaned_save_filename in local_processed_filenames: self.logger(f" -> Local Skip: '{original_name_for_log}' in '{folder_basename}' (already processed in this post)") post_skip_count += 1 file_downloaded_or_exists = True with self.downloaded_files_lock: self.downloaded_files.add(cleaned_save_filename) break with self.downloaded_files_lock: if cleaned_save_filename in self.downloaded_files: self.logger(f" -> Global Filename Skip: '{original_name_for_log}' in '{folder_basename}' (filename already downloaded globally)") post_skip_count += 1 file_downloaded_or_exists = True break try: self.logger(f"ā¬‡ļø Downloading '{original_name_for_log}' to '{folder_basename}'...") self.current_download_path = save_path self.is_downloading_file = True self.signals.file_download_status_signal.emit(True) response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True) response.raise_for_status() file_content_bytes = BytesIO() downloaded_size = 0 chunk_count = 0 md5_hash = hashlib.md5() for chunk in response.iter_content(chunk_size=32 * 1024): if self.check_cancel(): break if self.skip_current_file_flag.is_set(): break if chunk: file_content_bytes.write(chunk) md5_hash.update(chunk) downloaded_size += len(chunk) chunk_count += 1 if self.check_cancel() or self.skip_current_file_flag.is_set(): self.logger(f" āš ļø Download interrupted {'(cancelled)' if self.cancellation_event.is_set() else '(skipped)'} for {original_name_for_log}.") if self.skip_current_file_flag.is_set(): post_skip_count += 1 self.skip_current_file_flag.clear() break final_save_path = save_path current_filename_for_log = cleaned_save_filename file_content_bytes.seek(0) if downloaded_size == 0 and chunk_count > 0: self.logger(f"āš ļø Warning: Downloaded 0 bytes despite receiving chunks for {original_name_for_log}. Skipping save.") post_skip_count += 1 break if downloaded_size > 0: calculated_hash = md5_hash.hexdigest() with self.downloaded_file_hashes_lock: if calculated_hash in self.downloaded_file_hashes: self.logger(f" -> Content Skip: '{original_name_for_log}' (Hash: {calculated_hash}) already downloaded.") post_skip_count += 1 file_downloaded_or_exists = True with self.downloaded_files_lock: self.downloaded_files.add(cleaned_save_filename) with local_filenames_lock: local_processed_filenames.add(cleaned_save_filename) break else: pass if not file_downloaded_or_exists: final_bytes_to_save = file_content_bytes is_img_for_compress = is_image(cleaned_save_filename) if is_img_for_compress and not is_thumbnail and self.compress_images and Image and downloaded_size > 1500 * 1024: self.logger(f" Compressing large image ({downloaded_size / 1024:.2f} KB)...") try: with Image.open(file_content_bytes) as img: original_format = img.format if img.mode == 'P': img = img.convert('RGBA') elif img.mode not in ['RGB', 'RGBA', 'L']: img = img.convert('RGB') compressed_bytes = BytesIO() img.save(compressed_bytes, format='WebP', quality=75, method=4) compressed_size = compressed_bytes.getbuffer().nbytes if compressed_size < downloaded_size * 0.90: self.logger(f" Compression success: {compressed_size / 1024:.2f} KB (WebP Q75)") compressed_bytes.seek(0) final_bytes_to_save = compressed_bytes base, _ = os.path.splitext(cleaned_save_filename) current_filename_for_log = base + '.webp' final_save_path = os.path.join(folder_path, current_filename_for_log) self.logger(f" Updated filename: {current_filename_for_log}") else: self.logger(f" Compression skipped: WebP not significantly smaller ({compressed_size / 1024:.2f} KB).") file_content_bytes.seek(0) final_bytes_to_save = file_content_bytes except Exception as comp_e: self.logger(f"āŒ Image compression failed for {original_name_for_log}: {comp_e}. Saving original.") file_content_bytes.seek(0) final_bytes_to_save = file_content_bytes final_save_path = save_path elif is_img_for_compress and not is_thumbnail and self.compress_images: self.logger(f" Skipping compression: Image size ({downloaded_size / 1024:.2f} KB) below threshold.") file_content_bytes.seek(0) final_bytes_to_save = file_content_bytes elif is_thumbnail and downloaded_size > LARGE_THUMBNAIL_THRESHOLD: self.logger(f"āš ļø Downloaded thumbnail '{current_filename_for_log}' ({downloaded_size / 1024:.2f} KB) is large.") file_content_bytes.seek(0) final_bytes_to_save = file_content_bytes else: file_content_bytes.seek(0) final_bytes_to_save = file_content_bytes save_file = False with self.downloaded_files_lock: with local_filenames_lock: if os.path.exists(final_save_path) and os.path.getsize(final_save_path) > 0: self.logger(f" -> Exists Skip (pre-write): '{current_filename_for_log}' in '{folder_basename}'") post_skip_count += 1 file_downloaded_or_exists = True elif current_filename_for_log in self.downloaded_files: self.logger(f" -> Global Skip (pre-write): '{current_filename_for_log}' in '{folder_basename}' (already downloaded globally)") post_skip_count += 1 file_downloaded_or_exists = True elif current_filename_for_log in local_processed_filenames: self.logger(f" -> Local Skip (pre-write): '{current_filename_for_log}' in '{folder_basename}' (already processed in this post)") post_skip_count += 1 file_downloaded_or_exists = True else: save_file = True if save_file: try: with open(final_save_path, 'wb') as f: while True: chunk = final_bytes_to_save.read(64 * 1024) if not chunk: break f.write(chunk) with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_hash) with self.downloaded_files_lock: self.downloaded_files.add(current_filename_for_log) with local_filenames_lock: local_processed_filenames.add(current_filename_for_log) post_download_count += 1 file_downloaded_or_exists = True self.logger(f"āœ… Saved: '{current_filename_for_log}' ({downloaded_size / 1024:.1f} KB, Hash: {calculated_hash[:8]}...) in '{folder_basename}'") time.sleep(0.05) except IOError as io_err: self.logger(f"āŒ Save Fail: '{current_filename_for_log}' to '{folder_basename}'. Error: {io_err}") post_skip_count += 1 if os.path.exists(final_save_path): try: os.remove(final_save_path) except OSError: pass break except Exception as save_err: self.logger(f"āŒ Unexpected Save Error: '{current_filename_for_log}' in '{folder_basename}'. Error: {save_err}") post_skip_count += 1 if os.path.exists(final_save_path): try: os.remove(final_save_path) except OSError: pass break final_bytes_to_save.close() if file_content_bytes is not final_bytes_to_save: file_content_bytes.close() if file_downloaded_or_exists: break except requests.exceptions.RequestException as e: self.logger(f"āŒ Download Fail: {original_name_for_log}. Error: {e}") post_skip_count += 1 break except IOError as e: self.logger(f"āŒ File I/O Error: {original_name_for_log} in '{folder_basename}'. Error: {e}") post_skip_count += 1 break except Exception as e: self.logger(f"āŒ Unexpected Error during download/save for {original_name_for_log}: {e}") import traceback self.logger(f" Traceback: {traceback.format_exc(limit=2)}") post_skip_count += 1 break finally: self.is_downloading_file = False self.current_download_path = None self.signals.file_download_status_signal.emit(False) if self.check_cancel(): break if self.skip_current_file_flag.is_set(): self.skip_current_file_flag.clear() if not file_downloaded_or_exists: pass if self.check_cancel(): self.logger(f" Post {post_id} processing cancelled.") return post_download_count, post_skip_count self.logger(f" Post {post_id} Summary: Downloaded={post_download_count}, Skipped={post_skip_count}") return post_download_count, post_skip_count class DownloaderApp(QWidget): character_prompt_response_signal = pyqtSignal(bool) log_signal = pyqtSignal(str) add_character_prompt_signal = pyqtSignal(str) file_download_status_signal = pyqtSignal(bool) overall_progress_signal = pyqtSignal(int, int) finished_signal = pyqtSignal(int, int, bool) def __init__(self): super().__init__() self.config_file = "Known.txt" self.download_thread = None self.thread_pool = None self.cancellation_event = threading.Event() self.active_futures = [] self.total_posts_to_process = 0 self.processed_posts_count = 0 self.download_counter = 0 self.skip_counter = 0 self.worker_signals = PostProcessorSignals() self.prompt_mutex = QMutex() self._add_character_response = None self.downloaded_files = set() self.downloaded_files_lock = threading.Lock() self.downloaded_file_hashes = set() self.downloaded_file_hashes_lock = threading.Lock() self.load_known_names() self.setWindowTitle("Kemono Downloader v2.3 (Content Dedupe & Skip)") self.setGeometry(150, 150, 1050, 820) self.setStyleSheet(self.get_dark_theme()) self.init_ui() self._connect_signals() self.log_signal.emit("ā„¹ļø Local API server functionality has been removed.") def _connect_signals(self): self.worker_signals.progress_signal.connect(self.log) self.worker_signals.file_download_status_signal.connect(self.update_skip_button_state) self.log_signal.connect(self.log) self.add_character_prompt_signal.connect(self.prompt_add_character) self.character_prompt_response_signal.connect(self.receive_add_character_result) self.overall_progress_signal.connect(self.update_progress_display) self.finished_signal.connect(self.download_finished) self.character_search_input.textChanged.connect(self.filter_character_list) def load_known_names(self): global KNOWN_NAMES loaded_names = [] if os.path.exists(self.config_file): try: with open(self.config_file, 'r', encoding='utf-8') as f: raw_names = [line.strip() for line in f] loaded_names = sorted(list(set(filter(None, raw_names)))) log_msg = f"ā„¹ļø Loaded {len(loaded_names)} known names from {self.config_file}" except Exception as e: log_msg = f"āŒ Error loading config '{self.config_file}': {e}" QMessageBox.warning(self, "Config Load Error", f"Could not load list from {self.config_file}:\n{e}") loaded_names = [] else: log_msg = f"ā„¹ļø Config file '{self.config_file}' not found. Starting empty." loaded_names = [] KNOWN_NAMES = loaded_names if hasattr(self, 'log_output'): self.log_signal.emit(log_msg) else: print(log_msg) def save_known_names(self): global KNOWN_NAMES try: unique_sorted_names = sorted(list(set(filter(None, KNOWN_NAMES)))) with open(self.config_file, 'w', encoding='utf-8') as f: for name in unique_sorted_names: f.write(name + '\n') KNOWN_NAMES = unique_sorted_names if hasattr(self, 'log_signal'): self.log_signal.emit(f"šŸ’¾ Saved {len(unique_sorted_names)} known names to {self.config_file}") else: print(f"Saved {len(unique_sorted_names)} names to {self.config_file}") except Exception as e: log_msg = f"āŒ Error saving config '{self.config_file}': {e}" if hasattr(self, 'log_signal'): self.log_signal.emit(log_msg) else: print(log_msg) QMessageBox.warning(self, "Config Save Error", f"Could not save list to {self.config_file}:\n{e}") def closeEvent(self, event): self.save_known_names() should_exit = True is_downloading = (self.download_thread and self.download_thread.isRunning()) or (self.thread_pool is not None) if is_downloading: reply = QMessageBox.question(self, "Confirm Exit", "Download in progress. Are you sure you want to exit and cancel?", QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if reply == QMessageBox.Yes: self.log_signal.emit("āš ļø Cancelling active download due to application exit...") self.cancel_download() else: should_exit = False self.log_signal.emit("ā„¹ļø Application exit cancelled.") event.ignore() return if should_exit: self.log_signal.emit("ā„¹ļø Application closing.") self.log_signal.emit("šŸ‘‹ Exiting application.") event.accept() def init_ui(self): main_layout = QHBoxLayout() left_layout = QVBoxLayout() right_layout = QVBoxLayout() left_layout.addWidget(QLabel("šŸ”— Kemono Creator/Post URL:")) self.link_input = QLineEdit() self.link_input.setPlaceholderText("e.g., https://kemono.su/patreon/user/12345 or .../post/98765") self.link_input.textChanged.connect(self.update_custom_folder_visibility) left_layout.addWidget(self.link_input) left_layout.addWidget(QLabel("šŸ“ Download Location:")) self.dir_input = QLineEdit() self.dir_input.setPlaceholderText("Select folder where downloads will be saved") self.dir_button = QPushButton("Browse...") self.dir_button.clicked.connect(self.browse_directory) dir_layout = QHBoxLayout() dir_layout.addWidget(self.dir_input, 1) dir_layout.addWidget(self.dir_button) left_layout.addLayout(dir_layout) self.custom_folder_widget = QWidget() custom_folder_layout = QVBoxLayout(self.custom_folder_widget) custom_folder_layout.setContentsMargins(0, 5, 0, 0) self.custom_folder_label = QLabel("šŸ—„ļø Custom Folder Name (Single Post Only):") self.custom_folder_input = QLineEdit() self.custom_folder_input.setPlaceholderText("Optional: Save this post to specific folder") custom_folder_layout.addWidget(self.custom_folder_label) custom_folder_layout.addWidget(self.custom_folder_input) self.custom_folder_widget.setVisible(False) left_layout.addWidget(self.custom_folder_widget) self.character_filter_widget = QWidget() character_filter_layout = QVBoxLayout(self.character_filter_widget) character_filter_layout.setContentsMargins(0, 5, 0, 0) self.character_label = QLabel("šŸŽÆ Filter by Show/Character Name:") self.character_input = QLineEdit() self.character_input.setPlaceholderText("Only download posts matching this known name in title") character_filter_layout.addWidget(self.character_label) character_filter_layout.addWidget(self.character_input) self.character_filter_widget.setVisible(True) left_layout.addWidget(self.character_filter_widget) left_layout.addWidget(QLabel("🚫 Skip Posts/Files with Words (comma-separated):")) self.skip_words_input = QLineEdit() self.skip_words_input.setPlaceholderText("e.g., WM, WIP, sketch, preview") left_layout.addWidget(self.skip_words_input) options_layout_1 = QHBoxLayout() options_layout_1.addWidget(QLabel("Filter Files:")) self.radio_group = QButtonGroup(self) self.radio_all = QRadioButton("All") self.radio_images = QRadioButton("Images/GIFs") self.radio_videos = QRadioButton("Videos") self.radio_all.setChecked(True) self.radio_group.addButton(self.radio_all) self.radio_group.addButton(self.radio_images) self.radio_group.addButton(self.radio_videos) options_layout_1.addWidget(self.radio_all) options_layout_1.addWidget(self.radio_images) options_layout_1.addWidget(self.radio_videos) options_layout_1.addStretch(1) left_layout.addLayout(options_layout_1) options_layout_2 = QHBoxLayout() self.use_subfolders_checkbox = QCheckBox("Separate Folders by Name/Title") self.use_subfolders_checkbox.setChecked(True) self.use_subfolders_checkbox.toggled.connect(self.update_ui_for_subfolders) options_layout_2.addWidget(self.use_subfolders_checkbox) self.download_thumbnails_checkbox = QCheckBox("Download Thumbnails Only") self.download_thumbnails_checkbox.setChecked(False) self.download_thumbnails_checkbox.setToolTip("Thumbnail download functionality is currently limited without the API.") options_layout_2.addWidget(self.download_thumbnails_checkbox) options_layout_2.addStretch(1) left_layout.addLayout(options_layout_2) options_layout_3 = QHBoxLayout() self.skip_zip_checkbox = QCheckBox("Skip .zip") self.skip_zip_checkbox.setChecked(True) options_layout_3.addWidget(self.skip_zip_checkbox) self.skip_rar_checkbox = QCheckBox("Skip .rar") self.skip_rar_checkbox.setChecked(True) options_layout_3.addWidget(self.skip_rar_checkbox) self.compress_images_checkbox = QCheckBox("Compress Large Images (to WebP)") self.compress_images_checkbox.setChecked(False) self.compress_images_checkbox.setToolTip("Compress images > 1.5MB to WebP format (requires Pillow).") options_layout_3.addWidget(self.compress_images_checkbox) options_layout_3.addStretch(1) left_layout.addLayout(options_layout_3) options_layout_4 = QHBoxLayout() self.use_multithreading_checkbox = QCheckBox(f"Use Multithreading ({4} Threads)") self.use_multithreading_checkbox.setChecked(True) self.use_multithreading_checkbox.setToolTip("Speeds up downloads for full creator pages.\nSingle post URLs always use one thread.") options_layout_4.addWidget(self.use_multithreading_checkbox) options_layout_4.addStretch(1) left_layout.addLayout(options_layout_4) btn_layout = QHBoxLayout() self.download_btn = QPushButton("ā¬‡ļø Start Download") self.download_btn.setStyleSheet("padding: 8px 15px; font-weight: bold;") self.download_btn.clicked.connect(self.start_download) self.cancel_btn = QPushButton("āŒ Cancel") self.cancel_btn.setEnabled(False) self.cancel_btn.clicked.connect(self.cancel_download) self.skip_file_btn = QPushButton("ā­ļø Skip Current File") self.skip_file_btn.setEnabled(False) self.skip_file_btn.setToolTip("Only available in single-thread mode during file download.") self.skip_file_btn.clicked.connect(self.skip_current_file) btn_layout.addWidget(self.download_btn) btn_layout.addWidget(self.cancel_btn) btn_layout.addWidget(self.skip_file_btn) left_layout.addLayout(btn_layout) left_layout.addSpacing(10) known_chars_label_layout = QHBoxLayout() self.known_chars_label = QLabel("šŸŽ­ Known Shows/Characters (for Folder Names):") self.character_search_input = QLineEdit() self.character_search_input.setPlaceholderText("Search characters...") known_chars_label_layout.addWidget(self.known_chars_label, 1) known_chars_label_layout.addWidget(self.character_search_input) left_layout.addLayout(known_chars_label_layout) self.character_list = QListWidget() self.character_list.addItems(KNOWN_NAMES) self.character_list.setSelectionMode(QListWidget.ExtendedSelection) left_layout.addWidget(self.character_list, 1) char_manage_layout = QHBoxLayout() self.new_char_input = QLineEdit() self.new_char_input.setPlaceholderText("Add new show/character name") self.add_char_button = QPushButton("āž• Add") self.delete_char_button = QPushButton("šŸ—‘ļø Delete Selected") self.add_char_button.clicked.connect(self.add_new_character) self.new_char_input.returnPressed.connect(self.add_char_button.click) self.delete_char_button.clicked.connect(self.delete_selected_character) char_manage_layout.addWidget(self.new_char_input, 2) char_manage_layout.addWidget(self.add_char_button, 1) char_manage_layout.addWidget(self.delete_char_button, 1) left_layout.addLayout(char_manage_layout) right_layout.addWidget(QLabel("šŸ“œ Progress Log:")) self.log_output = QTextEdit() self.log_output.setReadOnly(True) self.log_output.setMinimumWidth(450) self.log_output.setLineWrapMode(QTextEdit.WidgetWidth) right_layout.addWidget(self.log_output, 1) self.progress_label = QLabel("Progress: Idle") self.progress_label.setStyleSheet("padding-top: 5px; font-style: italic;") right_layout.addWidget(self.progress_label) main_layout.addLayout(left_layout, 5) main_layout.addLayout(right_layout, 4) self.setLayout(main_layout) self.update_ui_for_subfolders(self.use_subfolders_checkbox.isChecked()) self.update_custom_folder_visibility() def get_dark_theme(self): return """ QWidget { background-color: #2E2E2E; color: #E0E0E0; font-family: Segoe UI, Arial, sans-serif; font-size: 10pt; } QLineEdit, QTextEdit, QListWidget { background-color: #3C3F41; border: 1px solid #5A5A5A; padding: 5px; color: #F0F0F0; border-radius: 4px; } QTextEdit { font-family: Consolas, Courier New, monospace; font-size: 9.5pt; } QPushButton { background-color: #555; color: #F0F0F0; border: 1px solid #6A6A6A; padding: 6px 12px; border-radius: 4px; min-height: 22px; } QPushButton:hover { background-color: #656565; border: 1px solid #7A7A7A; } QPushButton:pressed { background-color: #4A4A4A; } QPushButton:disabled { background-color: #404040; color: #888; border-color: #555; } QLabel { font-weight: bold; padding-top: 4px; padding-bottom: 2px; color: #C0C0C0; } QRadioButton, QCheckBox { spacing: 5px; color: #E0E0E0; padding-top: 4px; padding-bottom: 4px; } QRadioButton::indicator, QCheckBox::indicator { width: 14px; height: 14px; } QListWidget { alternate-background-color: #353535; border: 1px solid #5A5A5A; } QListWidget::item:selected { background-color: #007ACC; color: #FFFFFF; } QToolTip { background-color: #4A4A4A; color: #F0F0F0; border: 1px solid #6A6A6A; padding: 4px; border-radius: 3px; } """ def browse_directory(self): current_dir = self.dir_input.text() if os.path.isdir(self.dir_input.text()) else "" folder = QFileDialog.getExistingDirectory(self, "Select Download Folder", current_dir) if folder: self.dir_input.setText(folder) def log(self, message): try: safe_message = str(message).replace('\x00', '[NULL]') self.log_output.append(safe_message) scrollbar = self.log_output.verticalScrollBar() if scrollbar.value() >= scrollbar.maximum() - 30: scrollbar.setValue(scrollbar.maximum()) except Exception as e: print(f"GUI Log Error: {e}") print(f"Original Message: {message}") def get_filter_mode(self): if self.radio_images.isChecked(): return 'image' elif self.radio_videos.isChecked(): return 'video' return 'all' def add_new_character(self): global KNOWN_NAMES name_to_add = self.new_char_input.text().strip() if not name_to_add: QMessageBox.warning(self, "Input Error", "Name cannot be empty.") return name_lower = name_to_add.lower() is_duplicate = any(existing.lower() == name_lower for existing in KNOWN_NAMES) if not is_duplicate: KNOWN_NAMES.append(name_to_add) KNOWN_NAMES.sort(key=str.lower) self.character_list.clear() self.character_list.addItems(KNOWN_NAMES) self.filter_character_list(self.character_search_input.text()) self.log_signal.emit(f"āœ… Added '{name_to_add}' to known names list.") self.new_char_input.clear() self.save_known_names() else: QMessageBox.warning(self, "Duplicate Name", f"The name '{name_to_add}' (or similar) already exists in the list.") def delete_selected_character(self): global KNOWN_NAMES selected_items = self.character_list.selectedItems() if not selected_items: QMessageBox.warning(self, "Selection Error", "Please select one or more names to delete.") return names_to_remove = {item.text() for item in selected_items} confirm = QMessageBox.question(self, "Confirm Deletion", f"Are you sure you want to delete {len(names_to_remove)} selected name(s)?", QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if confirm == QMessageBox.Yes: original_count = len(KNOWN_NAMES) KNOWN_NAMES = [n for n in KNOWN_NAMES if n not in names_to_remove] removed_count = original_count - len(KNOWN_NAMES) if removed_count > 0: self.log_signal.emit(f"šŸ—‘ļø Removed {removed_count} name(s) from the list.") self.character_list.clear() KNOWN_NAMES.sort(key=str.lower) self.character_list.addItems(KNOWN_NAMES) self.filter_character_list(self.character_search_input.text()) self.save_known_names() else: self.log_signal.emit("ā„¹ļø No names were removed (selection might have changed?).") def update_custom_folder_visibility(self, url_text=None): if url_text is None: url_text = self.link_input.text() _, _, post_id = extract_post_info(url_text.strip()) should_show = bool(post_id) and self.use_subfolders_checkbox.isChecked() self.custom_folder_widget.setVisible(should_show) if not should_show: self.custom_folder_input.clear() def update_ui_for_subfolders(self, checked): self.character_filter_widget.setVisible(checked) self.update_custom_folder_visibility() if not checked: self.character_input.clear() def filter_character_list(self, search_text): search_text = search_text.lower() for i in range(self.character_list.count()): item = self.character_list.item(i) if search_text in item.text().lower(): item.setHidden(False) else: item.setHidden(True) def update_progress_display(self, total_posts, processed_posts): if total_posts > 0: try: percent = (processed_posts / total_posts) * 100 self.progress_label.setText(f"Progress: {processed_posts} / {total_posts} posts ({percent:.1f}%)") except ZeroDivisionError: self.progress_label.setText(f"Progress: {processed_posts} / {total_posts} posts") elif processed_posts > 0: self.progress_label.setText(f"Progress: Processing post {processed_posts}...") else: self.progress_label.setText("Progress: Starting...") def start_download(self): is_running = (self.download_thread and self.download_thread.isRunning()) or (self.thread_pool is not None) if is_running: self.log_signal.emit("āš ļø Download already in progress.") QMessageBox.warning(self, "Busy", "A download is already running.") return api_url = self.link_input.text().strip() output_dir = self.dir_input.text().strip() filter_mode = self.get_filter_mode() skip_zip = self.skip_zip_checkbox.isChecked() skip_rar = self.skip_rar_checkbox.isChecked() use_subfolders = self.use_subfolders_checkbox.isChecked() compress_images = self.compress_images_checkbox.isChecked() download_thumbnails = self.download_thumbnails_checkbox.isChecked() use_multithreading = self.use_multithreading_checkbox.isChecked() num_threads = 4 raw_skip_words = self.skip_words_input.text().strip() skip_words_list = [] if raw_skip_words: skip_words_list = [word.strip() for word in raw_skip_words.split(',') if word.strip()] service, user_id, post_id_from_url = extract_post_info(api_url) if not api_url: QMessageBox.critical(self, "Input Error", "Please enter a Kemono/Coomer URL.") return if not service or not user_id: QMessageBox.critical(self, "Input Error", "Invalid or unsupported URL format.\nPlease provide a valid creator page or post URL.") self.log_signal.emit(f"āŒ Invalid URL detected: {api_url}") return if not output_dir: QMessageBox.critical(self, "Input Error", "Please select a download directory.") return if not os.path.isdir(output_dir): reply = QMessageBox.question(self, "Directory Not Found", f"The directory '{output_dir}' does not exist.\n\nCreate it?", QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) if reply == QMessageBox.Yes: try: os.makedirs(output_dir) self.log_signal.emit(f"ā„¹ļø Created download directory: {output_dir}") except Exception as e: QMessageBox.critical(self, "Directory Error", f"Could not create directory:\n{e}") self.log_signal.emit(f"āŒ Failed to create directory: {output_dir} - {e}") return else: return if compress_images and Image is None: QMessageBox.warning(self, "Dependency Missing", "Image compression requires the Pillow library, but it's not installed.\nPlease run: pip install Pillow\n\nCompression will be disabled for this session.") self.log_signal.emit("āŒ Cannot compress images: Pillow library not found.") compress_images = False filter_character = None if use_subfolders and self.character_filter_widget.isVisible(): filter_character = self.character_input.text().strip() or None custom_folder_name = None if use_subfolders and post_id_from_url and self.custom_folder_widget.isVisible(): raw_custom_name = self.custom_folder_input.text().strip() if raw_custom_name: cleaned_custom = clean_folder_name(raw_custom_name) if cleaned_custom: custom_folder_name = cleaned_custom else: QMessageBox.warning(self, "Input Warning", f"Custom folder name '{raw_custom_name}' is invalid and will be ignored.") self.log_signal.emit(f"āš ļø Invalid custom folder name ignored: {raw_custom_name}") if use_subfolders and filter_character and not post_id_from_url: clean_char_filter = clean_folder_name(filter_character.lower()) known_names_lower = {name.lower() for name in KNOWN_NAMES} if not clean_char_filter: self.log_signal.emit(f"āŒ Filter name '{filter_character}' is invalid. Aborting.") QMessageBox.critical(self, "Filter Error", "The provided filter name is invalid (contains only spaces or special characters).") return elif filter_character.lower() not in known_names_lower: reply = QMessageBox.question(self, "Add Filter Name?", f"The filter name '{filter_character}' is not in your known names list.\n\nAdd it now and continue?", QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel, QMessageBox.Yes) if reply == QMessageBox.Yes: self.new_char_input.setText(filter_character) self.add_new_character() if filter_character.lower() not in {name.lower() for name in KNOWN_NAMES}: self.log_signal.emit(f"āš ļø Failed to add '{filter_character}' automatically. Please add manually if needed.") else: self.log_signal.emit(f"āœ… Added filter '{filter_character}' to list.") elif reply == QMessageBox.No: self.log_signal.emit(f"ā„¹ļø Proceeding without adding '{filter_character}'. Posts matching it might not be saved to a specific folder unless name is derived.") else: self.log_signal.emit("āŒ Download cancelled by user during filter check.") return self.log_output.clear() self.cancellation_event.clear() self.active_futures = [] self.total_posts_to_process = 0 self.processed_posts_count = 0 self.download_counter = 0 self.skip_counter = 0 with self.downloaded_files_lock: self.downloaded_files.clear() with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.clear() self.progress_label.setText("Progress: Initializing...") self.log_signal.emit("="*40) self.log_signal.emit(f"šŸš€ Starting Download Task @ {time.strftime('%Y-%m-%d %H:%M:%S')}") self.log_signal.emit(f" URL: {api_url}") self.log_signal.emit(f" Save Location: {output_dir}") mode = "Single Post" if post_id_from_url else "Creator Feed" self.log_signal.emit(f" Mode: {mode}") self.log_signal.emit(f" Subfolders: {'Enabled' if use_subfolders else 'Disabled'}") if use_subfolders: if custom_folder_name: self.log_signal.emit(f" Custom Folder (Post): '{custom_folder_name}'") elif filter_character: self.log_signal.emit(f" Character Filter: '{filter_character}'") else: self.log_signal.emit(f" Folder Naming: Automatic (Known Names > Title Extraction)") self.log_signal.emit(f" File Type Filter: {filter_mode}") self.log_signal.emit(f" Skip: {'.zip' if skip_zip else ''}{', ' if skip_zip and skip_rar else ''}{'.rar' if skip_rar else ''}{'None' if not (skip_zip or skip_rar) else ''}") if skip_words_list: self.log_signal.emit(f" Skip Words (Title/Filename): {', '.join(skip_words_list)}") else: self.log_signal.emit(f" Skip Words (Title/Filename): None") self.log_signal.emit(f" Compress Images: {'Enabled' if compress_images else 'Disabled'}") self.log_signal.emit(f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}") should_use_multithreading = use_multithreading and not post_id_from_url self.log_signal.emit(f" Threading: {'Multi-threaded' if should_use_multithreading else 'Single-threaded'}") self.log_signal.emit("="*40) self.set_ui_enabled(False) self.cancel_btn.setEnabled(True) try: common_args = { 'api_url': api_url, 'output_dir': output_dir, 'known_names_copy': list(KNOWN_NAMES), 'filter_character': filter_character, 'filter_mode': filter_mode, 'skip_zip': skip_zip, 'skip_rar': skip_rar, 'use_subfolders': use_subfolders, 'compress_images': compress_images, 'download_thumbnails': download_thumbnails, 'service': service, 'user_id': user_id, 'downloaded_files': self.downloaded_files, 'downloaded_files_lock': self.downloaded_files_lock, 'downloaded_file_hashes': self.downloaded_file_hashes, 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, 'skip_words_list': skip_words_list, } if should_use_multithreading: self.log_signal.emit(" Initializing multi-threaded download...") multi_args = common_args.copy() multi_args['num_threads'] = num_threads self.start_multi_threaded_download(**multi_args) else: self.log_signal.emit(" Initializing single-threaded download...") single_args = common_args.copy() single_args['custom_folder_name'] = custom_folder_name single_args['single_post_id'] = post_id_from_url self.start_single_threaded_download(**single_args) except Exception as e: self.log_signal.emit(f"āŒ CRITICAL ERROR preparing download task: {e}") import traceback self.log_signal.emit(traceback.format_exc()) QMessageBox.critical(self, "Start Error", f"Failed to start download task:\n{e}") self.download_finished(0, 0, False) def start_single_threaded_download(self, **kwargs): try: self.download_thread = DownloadThread( cancellation_event = self.cancellation_event, **kwargs ) if self.download_thread._init_failed: QMessageBox.critical(self, "Thread Error", "Failed to initialize the download thread.\nCheck the log for details.") self.download_finished(0, 0, False) return self.download_thread.progress_signal.connect(self.log_signal) self.download_thread.add_character_prompt_signal.connect(self.add_character_prompt_signal) self.download_thread.file_download_status_signal.connect(self.file_download_status_signal) self.download_thread.finished_signal.connect(self.finished_signal) self.character_prompt_response_signal.connect(self.download_thread.receive_add_character_result) self.download_thread.start() self.log_signal.emit("āœ… Single download thread started.") except Exception as e: self.log_signal.emit(f"āŒ CRITICAL ERROR starting single-thread task: {e}") import traceback self.log_signal.emit(traceback.format_exc()) QMessageBox.critical(self, "Thread Start Error", f"Failed to start download thread:\n{e}") self.download_finished(0, 0, False) def start_multi_threaded_download(self, **kwargs): num_threads = kwargs['num_threads'] self.thread_pool = ThreadPoolExecutor(max_workers=num_threads, thread_name_prefix='Downloader_') self.active_futures = [] self.processed_posts_count = 0 self.total_posts_to_process = 0 self.download_counter = 0 self.skip_counter = 0 worker_args_template = kwargs.copy() del worker_args_template['num_threads'] fetcher_thread = threading.Thread( target=self._fetch_and_queue_posts, args=(kwargs['api_url'], worker_args_template), daemon=True, name="PostFetcher" ) fetcher_thread.start() self.log_signal.emit(f"āœ… Post fetcher thread started. {num_threads} worker threads initializing...") def _fetch_and_queue_posts(self, api_url_input, worker_args_template): all_posts = [] fetch_error = False try: self.log_signal.emit(" Starting post fetch...") def fetcher_logger(msg): self.log_signal.emit(f"[Fetcher] {msg}") post_generator = download_from_api(api_url_input, logger=fetcher_logger) for posts_batch in post_generator: if self.cancellation_event.is_set(): self.log_signal.emit("āš ļø Post fetching cancelled by user.") fetch_error = True break if isinstance(posts_batch, list): all_posts.extend(posts_batch) self.total_posts_to_process = len(all_posts) if self.total_posts_to_process % 250 == 0: self.log_signal.emit(f" Fetched {self.total_posts_to_process} posts...") else: self.log_signal.emit(f"āŒ API returned non-list batch: {type(posts_batch)}. Stopping fetch.") fetch_error = True break if not fetch_error: self.log_signal.emit(f"āœ… Finished fetching. Total posts found: {self.total_posts_to_process}") except Exception as e: self.log_signal.emit(f"āŒ Unexpected Error during post fetching: {e}") import traceback self.log_signal.emit(traceback.format_exc(limit=3)) fetch_error = True if self.cancellation_event.is_set() or fetch_error: self.finished_signal.emit(self.download_counter, self.skip_counter, self.cancellation_event.is_set()) if self.thread_pool: self.thread_pool.shutdown(wait=False, cancel_futures=True) self.thread_pool = None return if self.total_posts_to_process == 0: self.log_signal.emit("šŸ˜• No posts found or fetched successfully.") self.finished_signal.emit(0, 0, False) return self.log_signal.emit(f" Submitting {self.total_posts_to_process} post tasks to worker pool...") self.processed_posts_count = 0 self.overall_progress_signal.emit(self.total_posts_to_process, 0) common_worker_args = { 'download_root': worker_args_template['output_dir'], 'known_names': worker_args_template['known_names_copy'], 'filter_character': worker_args_template['filter_character'], 'unwanted_keywords': {'spicy', 'hd', 'nsfw', '4k', 'preview'}, 'filter_mode': worker_args_template['filter_mode'], 'skip_zip': worker_args_template['skip_zip'], 'skip_rar': worker_args_template['skip_rar'], 'use_subfolders': worker_args_template['use_subfolders'], 'target_post_id_from_initial_url': worker_args_template.get('single_post_id'), 'custom_folder_name': worker_args_template.get('custom_folder_name'), 'compress_images': worker_args_template['compress_images'], 'download_thumbnails': worker_args_template['download_thumbnails'], 'service': worker_args_template['service'], 'user_id': worker_args_template['user_id'], 'api_url_input': worker_args_template['api_url'], 'cancellation_event': self.cancellation_event, 'signals': self.worker_signals, 'downloaded_files': self.downloaded_files, 'downloaded_files_lock': self.downloaded_files_lock, 'downloaded_file_hashes': self.downloaded_file_hashes, 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, 'skip_words_list': worker_args_template['skip_words_list'], } for post_data in all_posts: if self.cancellation_event.is_set(): self.log_signal.emit("āš ļø Cancellation detected during task submission.") break if not isinstance(post_data, dict): self.log_signal.emit(f"āš ļø Skipping invalid post data item (type: {type(post_data)}).") self.processed_posts_count += 1 self.total_posts_to_process -=1 continue worker = PostProcessorWorker(post_data=post_data, **common_worker_args) try: if self.thread_pool: future = self.thread_pool.submit(worker.process) future.add_done_callback(self._handle_future_result) self.active_futures.append(future) else: self.log_signal.emit("āš ļø Thread pool shutdown before submitting all tasks.") break except RuntimeError as e: self.log_signal.emit(f"āš ļø Error submitting task (pool might be shutting down): {e}") break except Exception as e: self.log_signal.emit(f"āŒ Unexpected error submitting task: {e}") break submitted_count = len(self.active_futures) self.log_signal.emit(f" {submitted_count} / {self.total_posts_to_process} tasks submitted.") def _handle_future_result(self, future: Future): self.processed_posts_count += 1 downloaded_res, skipped_res = 0, 0 try: if future.cancelled(): pass elif future.exception(): exc = future.exception() self.log_signal.emit(f"āŒ Error in worker thread: {exc}") pass else: downloaded, skipped = future.result() downloaded_res = downloaded skipped_res = skipped with threading.Lock(): self.download_counter += downloaded_res self.skip_counter += skipped_res self.overall_progress_signal.emit(self.total_posts_to_process, self.processed_posts_count) except Exception as e: self.log_signal.emit(f"āŒ Error in result callback handling: {e}") if self.processed_posts_count >= self.total_posts_to_process and self.total_posts_to_process > 0: if self.processed_posts_count >= self.total_posts_to_process: self.log_signal.emit("šŸ All submitted tasks have completed or failed.") cancelled = self.cancellation_event.is_set() self.finished_signal.emit(self.download_counter, self.skip_counter, cancelled) def set_ui_enabled(self, enabled): self.download_btn.setEnabled(enabled) self.link_input.setEnabled(enabled) self.dir_input.setEnabled(enabled) self.dir_button.setEnabled(enabled) self.radio_all.setEnabled(enabled) self.radio_images.setEnabled(enabled) self.radio_videos.setEnabled(enabled) self.skip_zip_checkbox.setEnabled(enabled) self.skip_rar_checkbox.setEnabled(enabled) self.use_subfolders_checkbox.setEnabled(enabled) self.compress_images_checkbox.setEnabled(enabled) self.download_thumbnails_checkbox.setEnabled(enabled) self.use_multithreading_checkbox.setEnabled(enabled) self.skip_words_input.setEnabled(enabled) self.character_search_input.setEnabled(enabled) self.new_char_input.setEnabled(enabled) self.add_char_button.setEnabled(enabled) self.delete_char_button.setEnabled(enabled) subfolders_on = self.use_subfolders_checkbox.isChecked() self.custom_folder_widget.setEnabled(enabled and subfolders_on) self.character_filter_widget.setEnabled(enabled and subfolders_on) if enabled: self.update_ui_for_subfolders(subfolders_on) self.update_custom_folder_visibility() self.cancel_btn.setEnabled(not enabled) if enabled: self.skip_file_btn.setEnabled(False) def cancel_download(self): if not self.cancel_btn.isEnabled(): return self.log_signal.emit("āš ļø Requesting cancellation...") self.cancellation_event.set() self.cancel_btn.setEnabled(False) self.progress_label.setText("Progress: Cancelling...") if self.thread_pool and self.active_futures: cancelled_count = 0 for future in self.active_futures: if future.cancel(): cancelled_count += 1 if cancelled_count > 0: self.log_signal.emit(f" Attempted to cancel {cancelled_count} pending/running tasks.") def skip_current_file(self): if self.download_thread and self.download_thread.isRunning(): self.download_thread.skip_file() elif self.thread_pool: self.log_signal.emit("ā„¹ļø Skipping individual files is not supported in multi-threaded mode.") QMessageBox.information(self, "Action Not Supported", "Skipping individual files is only available in single-threaded mode.") else: self.log_signal.emit("ā„¹ļø Skip requested, but no download is active.") def update_skip_button_state(self, is_downloading_active): can_skip = (not self.download_btn.isEnabled()) and \ (self.download_thread and self.download_thread.isRunning()) and \ is_downloading_active if self.thread_pool is not None: can_skip = False self.skip_file_btn.setEnabled(can_skip) def download_finished(self, total_downloaded, total_skipped, cancelled): self.log_signal.emit("="*40) status = "Cancelled" if cancelled else "Finished" self.log_signal.emit(f"šŸ Download {status}!") self.log_signal.emit(f" Summary: Downloaded={total_downloaded}, Skipped={total_skipped}") self.progress_label.setText(f"{status}: {total_downloaded} downloaded, {total_skipped} skipped.") self.log_signal.emit("="*40) if self.download_thread: try: self.character_prompt_response_signal.disconnect(self.download_thread.receive_add_character_result) except TypeError: pass self.download_thread = None if self.thread_pool: self.log_signal.emit(" Shutting down worker thread pool...") self.thread_pool.shutdown(wait=False, cancel_futures=True) self.thread_pool = None self.active_futures = [] self.cancellation_event.clear() self.set_ui_enabled(True) self.cancel_btn.setEnabled(False) self.skip_file_btn.setEnabled(False) def prompt_add_character(self, character_name): reply = QMessageBox.question(self, "Add Filter Name?", f"The filter name '{character_name}' is not in your known list.\n\nAdd it now and continue download?", QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) result = (reply == QMessageBox.Yes) if result: self.new_char_input.setText(character_name) if character_name.lower() not in {n.lower() for n in KNOWN_NAMES}: self.add_new_character() if character_name.lower() not in {n.lower() for n in KNOWN_NAMES}: self.log_signal.emit(f"āš ļø Failed to add '{character_name}' via prompt. Check for errors.") result = False else: self.log_signal.emit(f"ā„¹ļø Filter name '{character_name}' was already present or added.") self.character_prompt_response_signal.emit(result) def receive_add_character_result(self, result): with QMutexLocker(self.prompt_mutex): self._add_character_response = result self.log_signal.emit(f" Received prompt response: {'Yes' if result else 'No'}") class DownloadThread(QThread): progress_signal = pyqtSignal(str) add_character_prompt_signal = pyqtSignal(str) file_download_status_signal = pyqtSignal(bool) finished_signal = pyqtSignal(int, int, bool) def __init__(self, api_url, output_dir, known_names_copy, cancellation_event, single_post_id=None, filter_character=None, filter_mode='all', skip_zip=True, skip_rar=True, use_subfolders=True, custom_folder_name=None, compress_images=False, download_thumbnails=False, service=None, user_id=None, downloaded_files=None, downloaded_files_lock=None, downloaded_file_hashes=None, downloaded_file_hashes_lock=None, skip_words_list=None): super().__init__() self._init_failed = False self.api_url_input = api_url self.output_dir = output_dir self.known_names = list(known_names_copy) self.cancellation_event = cancellation_event self.initial_target_post_id = single_post_id self.filter_character = filter_character self.filter_mode = filter_mode self.skip_zip = skip_zip self.skip_rar = skip_rar self.use_subfolders = use_subfolders self.custom_folder_name = custom_folder_name self.compress_images = compress_images self.download_thumbnails = download_thumbnails self.service = service self.user_id = user_id self.skip_words_list = skip_words_list if skip_words_list is not None else [] self.downloaded_files = downloaded_files if downloaded_files is not None else set() self.downloaded_files_lock = downloaded_files_lock if downloaded_files_lock is not None else threading.Lock() self.downloaded_file_hashes = downloaded_file_hashes if downloaded_file_hashes is not None else set() self.downloaded_file_hashes_lock = downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading.Lock() self.skip_current_file_flag = threading.Event() self.is_downloading_file = False self.current_download_path = None self._add_character_response = None self.prompt_mutex = QMutex() if not self.service or not self.user_id: log_msg = f"āŒ Thread Init Error: Missing service ('{self.service}') or user ID ('{self.user_id}') for URL '{api_url}'" print(log_msg) try: self.progress_signal.emit(log_msg) except RuntimeError: pass self._init_failed = True def run(self): if self._init_failed: self.finished_signal.emit(0, 0, False) return unwanted_keywords = {'spicy', 'hd', 'nsfw', '4k', 'preview'} grand_total_downloaded = 0 grand_total_skipped = 0 cancelled_by_user = False try: if self.use_subfolders and self.filter_character and not self.custom_folder_name: if not self._check_and_prompt_filter_character(): self.finished_signal.emit(0, 0, False) return worker_signals_adapter = PostProcessorSignals() worker_signals_adapter.progress_signal.connect(self.progress_signal) worker_signals_adapter.file_download_status_signal.connect(self.file_download_status_signal) post_worker = PostProcessorWorker( post_data=None, download_root=self.output_dir, known_names=self.known_names, filter_character=self.filter_character, unwanted_keywords=unwanted_keywords, filter_mode=self.filter_mode, skip_zip=self.skip_zip, skip_rar=self.skip_rar, use_subfolders=self.use_subfolders, target_post_id_from_initial_url=self.initial_target_post_id, custom_folder_name=self.custom_folder_name, compress_images=self.compress_images, download_thumbnails=self.download_thumbnails, service=self.service, user_id=self.user_id, api_url_input=self.api_url_input, cancellation_event=self.cancellation_event, signals=worker_signals_adapter, downloaded_files=self.downloaded_files, downloaded_files_lock=self.downloaded_files_lock, downloaded_file_hashes=self.downloaded_file_hashes, downloaded_file_hashes_lock=self.downloaded_file_hashes_lock, skip_words_list=self.skip_words_list, ) post_worker.skip_current_file_flag = self.skip_current_file_flag self.progress_signal.emit(" Starting post fetch...") def thread_logger(msg): self.progress_signal.emit(msg) post_generator = download_from_api(self.api_url_input, logger=thread_logger) for posts_batch in post_generator: if self.isInterruptionRequested(): self.progress_signal.emit("āš ļø Download cancelled before processing batch.") cancelled_by_user = True break for post in posts_batch: if self.isInterruptionRequested(): self.progress_signal.emit("āš ļø Download cancelled during post processing.") cancelled_by_user = True break post_worker.post = post try: downloaded, skipped = post_worker.process() grand_total_downloaded += downloaded grand_total_skipped += skipped except Exception as proc_e: post_id_err = post.get('id', 'N/A') if isinstance(post, dict) else 'N/A' self.progress_signal.emit(f"āŒ Error processing post {post_id_err}: {proc_e}") import traceback self.progress_signal.emit(traceback.format_exc(limit=2)) grand_total_skipped += 1 self.msleep(20) if cancelled_by_user: break if not cancelled_by_user: self.progress_signal.emit("āœ… Post fetching and processing complete.") except Exception as e: log_msg = f"\nāŒ An critical error occurred in download thread: {e}" self.progress_signal.emit(log_msg) import traceback tb_str = traceback.format_exc() self.progress_signal.emit("--- Traceback ---") for line in tb_str.splitlines(): self.progress_signal.emit(" " + line) self.progress_signal.emit("--- End Traceback ---") cancelled_by_user = False finally: self.finished_signal.emit(grand_total_downloaded, grand_total_skipped, cancelled_by_user) def _check_and_prompt_filter_character(self): clean_char_filter = clean_folder_name(self.filter_character.lower()) known_names_lower = {name.lower() for name in self.known_names} if not clean_char_filter: self.progress_signal.emit(f"āŒ Filter name '{self.filter_character}' is invalid. Aborting.") return False if self.filter_character.lower() not in known_names_lower: self.progress_signal.emit(f"ā“ Filter '{self.filter_character}' not found in known list.") with QMutexLocker(self.prompt_mutex): self._add_character_response = None self.add_character_prompt_signal.emit(self.filter_character) self.progress_signal.emit(" Waiting for user confirmation to add filter name...") while self._add_character_response is None: if self.isInterruptionRequested(): self.progress_signal.emit("āš ļø Cancelled while waiting for user input on filter name.") return False self.msleep(200) if self._add_character_response: self.progress_signal.emit(f"āœ… User confirmed adding '{self.filter_character}'. Continuing.") if self.filter_character not in self.known_names: self.known_names.append(self.filter_character) return True else: self.progress_signal.emit(f"āŒ User declined to add filter '{self.filter_character}'. Aborting download.") return False return True def skip_file(self): if self.isRunning() and self.is_downloading_file: self.progress_signal.emit("ā­ļø Skip requested for current file.") self.skip_current_file_flag.set() elif self.isRunning(): self.progress_signal.emit("ā„¹ļø Skip requested, but no file download active.") def receive_add_character_result(self, result): with QMutexLocker(self.prompt_mutex): self._add_character_response = result self.progress_signal.emit(f" Received prompt response: {'Yes' if result else 'No'}") def isInterruptionRequested(self): return super().isInterruptionRequested() or self.cancellation_event.is_set() if __name__ == '__main__': app = QApplication(sys.argv) app.setWindowIcon(QIcon("Kemono.ico")) from PyQt5.QtGui import QIcon app.setWindowIcon(QIcon("Kemono.ico")) qt_app = QApplication(sys.argv) qt_app.setWindowIcon(QIcon(os.path.join(os.path.dirname(__file__), 'Kemono.ico'))) downloader = DownloaderApp() downloader.show() exit_code = qt_app.exec_() print(f"Application finished with exit code: {exit_code}") sys.exit(exit_code)