diff --git a/main.py b/main.py index 61db148..f637bd9 100644 --- a/main.py +++ b/main.py @@ -4,7 +4,6 @@ import time import requests import re import threading -# import uvicorn # Removed uvicorn import import queue import hashlib # Import hashlib for hashing from concurrent.futures import ThreadPoolExecutor, Future, CancelledError @@ -15,11 +14,8 @@ from PyQt5.QtWidgets import ( QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget, QRadioButton, QButtonGroup, QCheckBox, QMainWindow ) - -# Import QObject before other Qt classes that inherit from it if needed from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject from urllib.parse import urlparse -# Import Image module correctly try: from PIL import Image except ImportError: @@ -37,27 +33,20 @@ class MainWindow(QMainWindow): self.setWindowTitle("My App") from io import BytesIO - - -# Removed try-except block for kemono_api fastapi_app = None # Set to None directly KNOWN_NAMES = [] -# --- Helper Functions --- - def clean_folder_name(name): """Removes invalid characters for folder names and replaces spaces with underscores.""" if not isinstance(name, str): name = str(name) # Ensure input is string - # Allow spaces and common separators, remove others cleaned = re.sub(r'[^\w\s\-\_]', '', name) return cleaned.strip().replace(' ', '_') def clean_filename(name): """Removes invalid characters for filenames and replaces spaces with underscores.""" if not isinstance(name, str): name = str(name) # Ensure input is string - # Allow dots for file extensions, and common filename characters cleaned = re.sub(r'[^\w\s\-\_\.]', '', name) return cleaned.strip().replace(' ', '_') @@ -72,7 +61,6 @@ def extract_folder_name_from_title(title, unwanted_keywords): tokens = title_lower.split() for token in tokens: clean_token = clean_folder_name(token) - # Check if token is not empty and not just unwanted keywords if clean_token and clean_token not in unwanted_keywords: return clean_token return 'Uncategorized' # Fallback if no suitable token found @@ -90,10 +78,7 @@ def match_folders_from_title(title, known_names, unwanted_keywords): for name in known_names: cleaned_name_for_match = clean_folder_name(name.lower()) if not cleaned_name_for_match: continue # Skip empty known names - - # Check if the cleaned known name is a substring of the cleaned title if cleaned_name_for_match in cleaned_title: - # Ensure the match itself isn't an unwanted keyword if cleaned_name_for_match not in unwanted_keywords: matched_cleaned_names.add(cleaned_name_for_match) @@ -106,7 +91,6 @@ def is_image(filename): def is_video(filename): if not filename: return False - # Consider common video formats return filename.lower().endswith(('.mp4', '.mov', '.mkv', '.webm', '.avi', '.wmv')) def is_zip(filename): @@ -120,7 +104,6 @@ def is_rar(filename): def is_post_url(url): if not isinstance(url, str): return False - # Simple check for '/post/' segment in the path return '/post/' in urlparse(url).path def extract_post_info(url_string): @@ -136,27 +119,19 @@ def extract_post_info(url_string): parsed_url = urlparse(url_string.strip()) domain = parsed_url.netloc.lower() path_parts = [part for part in parsed_url.path.strip('/').split('/') if part] - - # Check for known domains is_kemono = 'kemono.su' in domain or 'kemono.party' in domain # Added kemono.party is_coomer = 'coomer.su' in domain or 'coomer.party' in domain # Added coomer.party if not (is_kemono or is_coomer): return None, None, None # Unknown domain - - # Pattern: //user/ if len(path_parts) >= 3 and path_parts[1].lower() == 'user': service = path_parts[0] user_id = path_parts[2] - # Pattern: //user//post/ if len(path_parts) >= 5 and path_parts[3].lower() == 'post': post_id = path_parts[4] return service, user_id, post_id - - # API Pattern: /api/v1//user/ if len(path_parts) >= 5 and path_parts[0].lower() == 'api' and path_parts[1].lower() == 'v1' and path_parts[3].lower() == 'user': service = path_parts[2] user_id = path_parts[4] - # API Pattern: /api/v1//user//post/ if len(path_parts) >= 7 and path_parts[5].lower() == 'post': post_id = path_parts[6] return service, user_id, post_id @@ -167,8 +142,6 @@ def extract_post_info(url_string): except Exception as e: # Catch other unexpected errors print(f"Debug: Exception during extract_post_info for URL '{url_string}': {e}") return None, None, None - - # If no pattern matched return None, None, None @@ -179,14 +152,12 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger): try: response = requests.get(paginated_url, headers=headers, timeout=45) # Increased timeout response.raise_for_status() # Check for 4xx/5xx errors - # Check content type before parsing JSON if 'application/json' not in response.headers.get('Content-Type', ''): raise RuntimeError(f"Unexpected content type received: {response.headers.get('Content-Type')}. Body: {response.text[:200]}") return response.json() except requests.exceptions.Timeout: raise RuntimeError(f"Timeout fetching page offset {offset}") except requests.exceptions.RequestException as e: - # Provide more context on request errors err_msg = f"Error fetching page offset {offset}: {e}" if e.response is not None: err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})" @@ -246,7 +217,6 @@ def download_from_api(api_url_input, logger=print): logger(f"šŸ“¦ Found {len(posts_batch)} posts on page {page}.") if target_post_id: - # Search for the target post in this batch matching_post = next((post for post in posts_batch if str(post.get('id')) == str(target_post_id)), None) if matching_post: @@ -255,33 +225,20 @@ def download_from_api(api_url_input, logger=print): processed_target_post = True # Set flag to stop after this else: logger(f" Target post {target_post_id} not found on this page.") - # Assumption: If target_post_id is given, it should be on the first few pages. - # For now, continue pagination until the end if target not found. pass else: - # If not looking for a target post, yield the whole batch yield posts_batch - - # Increment offset only if we are not processing a target post that we just found if not (target_post_id and processed_target_post): - # Determine offset increment (Kemono uses 50, but check API docs if possible) page_size = 50 offset += page_size page += 1 time.sleep(0.6) # Slightly increased delay between page fetches - - # Final check if target post was specified but never found if target_post_id and not processed_target_post: logger(f"āŒ Target post ID {target_post_id} was not found for this creator.") - - - -# --- Worker Object for ThreadPoolExecutor --- class PostProcessorSignals(QObject): """Defines signals emitted by worker threads.""" progress_signal = pyqtSignal(str) file_download_status_signal = pyqtSignal(bool) # True=start, False=end - # No request_cancel_signal needed as workers check shared event class PostProcessorWorker: """Processes a single post within a ThreadPoolExecutor.""" @@ -292,7 +249,6 @@ class PostProcessorWorker: api_url_input, cancellation_event, signals, downloaded_files, downloaded_file_hashes, downloaded_files_lock, downloaded_file_hashes_lock, skip_words_list=None): # ADDED skip_words_list - # Store all arguments passed self.post = post_data self.download_root = download_root self.known_names = known_names @@ -319,8 +275,6 @@ class PostProcessorWorker: self.downloaded_files_lock = downloaded_files_lock # Use passed lock self.downloaded_file_hashes_lock = downloaded_file_hashes_lock # Use passed lock # ADDED self.skip_words_list = skip_words_list if skip_words_list is not None else [] # ADDED - - # Ensure Pillow is available if compression is enabled if self.compress_images and Image is None: self.logger("āš ļø Image compression enabled, but Pillow library is not loaded. Disabling compression.") self.compress_images = False @@ -335,13 +289,10 @@ class PostProcessorWorker: def check_cancel(self): """Checks the shared cancellation event.""" is_cancelled = self.cancellation_event.is_set() - # No need to log cancellation detection repeatedly here, parent thread handles final status. return is_cancelled def skip_file(self): """Sets the skip flag (not typically called directly on worker).""" - # This method is primarily for the single-threaded QThread context. - # Skip logic for workers relies on checking skip_current_file_flag. pass def process(self): @@ -350,10 +301,8 @@ class PostProcessorWorker: total_downloaded_post = 0 total_skipped_post = 0 - # Standard headers, consider customizing if needed headers = {'User-Agent': 'Mozilla/5.0', 'Referer': f'https://{urlparse(self.api_url_input).netloc}/'} url_pattern = re.compile(r'https?://[^\s<>"]+|www\.[^\s<>"]+') - # LOCAL_API_BASE = "http://127.0.0.1:8000" # Removed LOCAL_API_BASE LARGE_THUMBNAIL_THRESHOLD = 1 * 1024 * 1024 # 1MB post = self.post @@ -363,76 +312,51 @@ class PostProcessorWorker: post_file_info = post.get('file') attachments = post.get('attachments', []) post_content = post.get('content', '') - # is_target_post used for custom folder logic primarily is_target_post = (self.target_post_id_from_initial_url is not None) and (str(post_id) == str(self.target_post_id_from_initial_url)) self.logger(f"\n--- Processing Post {post_id} ('{title[:50]}...') (Thread: {threading.current_thread().name}) ---") - - # --- NEW: Skip Words Check for Post Title --- if self.skip_words_list: title_lower = title.lower() for skip_word in self.skip_words_list: if skip_word.lower() in title_lower: self.logger(f" -> Skip Post (Title): Post {post_id} title ('{title[:30]}...') contains skip word '{skip_word}'. Skipping entire post.") return 0, 1 # 0 downloaded, 1 skipped (the whole post) - # --- END NEW --- if not isinstance(attachments, list): self.logger(f"āš ļø Corrupt attachment data for post {post_id}. Skipping attachments.") attachments = [] - - # --- Determine Download Folder(s) --- valid_folder_paths = [] folder_decision_reason = "" api_domain = urlparse(self.api_url_input).netloc if ('kemono.su' in urlparse(self.api_url_input).netloc.lower() or 'coomer.su' in urlparse(self.api_url_input).netloc.lower() or 'kemono.party' in urlparse(self.api_url_input).netloc.lower() or 'coomer.party' in urlparse(self.api_url_input).netloc.lower()) else "kemono.su" - - # 1. Custom Folder for Single Target Post (Highest Priority if applicable) if is_target_post and self.custom_folder_name and self.use_subfolders: - # custom_folder_name should already be cleaned by GUI logic folder_path_full = os.path.join(self.download_root, self.custom_folder_name) valid_folder_paths = [folder_path_full] folder_decision_reason = f"Using custom folder for target post: '{self.custom_folder_name}'" - - - # 2. Subfolders Enabled (Character Filter or Automatic) - Only if custom wasn't used if not valid_folder_paths and self.use_subfolders: folder_names_for_post = [] # Cleaned folder names derived for this post - - # a) Character Filter Applied if self.filter_character: clean_char_filter = clean_folder_name(self.filter_character.lower()) - # Match against known names found *in this post's title* matched_names_in_title = match_folders_from_title(title, self.known_names, self.unwanted_keywords) if clean_char_filter and clean_char_filter in matched_names_in_title: - # Use only the filtered character's folder name folder_names_for_post = [clean_char_filter] folder_decision_reason = f"Character filter '{self.filter_character}' matched title. Using folder '{clean_char_filter}'." else: - # Filter specified but doesn't match this post -> SKIP POST self.logger(f" -> Filter Skip Post {post_id}: Character filter '{self.filter_character}' not found in title matches ({matched_names_in_title}).") return 0, 1 # 0 downloaded, 1 skipped (the whole post) - - # b) No Character Filter -> Automatic Naming else: matched_folders = match_folders_from_title(title, self.known_names, self.unwanted_keywords) if matched_folders: folder_names_for_post = matched_folders # Use all matched known names as folders folder_decision_reason = f"Found known name(s) in title: {matched_folders}" else: - # Try extracting a generic name from title extracted_folder = extract_folder_name_from_title(title, self.unwanted_keywords) folder_names_for_post = [extracted_folder] folder_decision_reason = f"No known names in title. Using derived folder: '{extracted_folder}'" - - # Create full paths for the determined folder names for folder_name in folder_names_for_post: folder_path_full = os.path.join(self.download_root, folder_name) valid_folder_paths.append(folder_path_full) - - - # 3. Fallback: Subfolders disabled OR no specific folder determined above if not valid_folder_paths: valid_folder_paths = [self.download_root] # Save directly to root if not folder_decision_reason: # Add reason if not already set @@ -443,48 +367,32 @@ class PostProcessorWorker: if not valid_folder_paths: self.logger(f" ERROR: No valid folder paths determined for post {post_id}. Skipping.") return 0, 1 # Skip post - - - # --- Link Extraction from Content --- if post_content: try: - # More robust link finding, avoid javascript: etc. found_links = re.findall(r'href=["\'](https?://[^"\']+)["\']', post_content) if found_links: self.logger(f"šŸ”— Links found in post content:") unique_links = sorted(list(set(found_links))) # Remove duplicates for link in unique_links[:10]: # Log max 10 links - # Basic filtering of common unwanted links if not any(x in link for x in ['.css', '.js', 'javascript:']): self.logger(f" - {link}") if len(unique_links) > 10: self.logger(f" - ... ({len(unique_links) - 10} more links not shown)") except Exception as e: self.logger(f"āš ļø Error parsing content for links in post {post_id}: {e}") - - - # --- Identify Files/Attachments/Thumbnails --- files_to_process_for_download = [] api_domain = urlparse(self.api_url_input).netloc if ('kemono.su' in urlparse(self.api_url_input).netloc.lower() or 'coomer.su' in urlparse(self.api_url_input).netloc.lower() or 'kemono.party' in urlparse(self.api_url_input).netloc.lower() or 'coomer.party' in urlparse(self.api_url_input).netloc.lower()) else "kemono.su" if self.download_thumbnails: - # Thumbnail download attempt (original logic, modified to not use local API) self.logger(f" Mode: Attempting to download thumbnail...") # Modified log - # The original code relied on a local API for thumbnails. - # Since we removed the API, this section needs to be adapted or removed. - # For now, we'll simulate that thumbnail download is not available without the API. self.logger(" Thumbnail download via API is disabled as the local API is not used.") - # If download_thumbnails is true, and we can't get it, then we should skip the post - # as per the original logic's intention. self.logger(f" -> Skipping Post {post_id}: Thumbnail download requested but API is disabled.") return 0, 1 # 0 downloaded, 1 skipped post else: # Normal file download mode self.logger(f" Mode: Downloading post file/attachments.") - # Process main post file ('file' field) if post_file_info and isinstance(post_file_info, dict) and post_file_info.get('path'): main_file_path = post_file_info['path'].lstrip('/') - # Use provided name or derive from path main_file_name = post_file_info.get('name') or os.path.basename(main_file_path) if main_file_name: file_url = f"https://{api_domain}/data/{main_file_path}" @@ -494,20 +402,14 @@ class PostProcessorWorker: }) else: self.logger(f" āš ļø Skipping main post file: Missing filename (Path: {main_file_path})") - - # Process attachments - # Use a counter for attachments within the same post for unique naming attachment_counter = 0 for idx, attachment in enumerate(attachments): if isinstance(attachment, dict) and attachment.get('path'): attach_path = attachment['path'].lstrip('/') attach_name = attachment.get('name') or os.path.basename(attach_path) if attach_name: - # Construct a unique name including post ID and attachment index base, ext = os.path.splitext(clean_filename(attach_name)) - # Ensure index is added consistently for attachments final_attach_name = f"{post_id}_{attachment_counter}{ext}" - # Add cleaned original name for readability/lookup if needed, but base on post+index for uniqueness if base and base != f"{post_id}_{attachment_counter}": # Avoid doubling if base is already post_id_index final_attach_name = f"{post_id}_{attachment_counter}_{base}{ext}" @@ -531,22 +433,14 @@ class PostProcessorWorker: return 0, 0 # No files, no action needed, not skipped post self.logger(f" Files identified for download: {len(files_to_process_for_download)}") - - - # --- File Download Loop --- post_download_count = 0 post_skip_count = 0 # Files skipped within this post - - # Use a local set and lock for filenames *within this post's processing* - # This is secondary to the global hash check but helps with filename conflicts within the same post local_processed_filenames = set() local_filenames_lock = threading.Lock() for file_info in files_to_process_for_download: if self.check_cancel(): break # Check cancellation before each file - - # Check skip flag (set by GUI for single-thread mode, usually not used here) if self.skip_current_file_flag.is_set(): original_name_for_log = file_info.get('_original_name_for_log', file_info.get('name', 'unknown_file')) self.logger(f"ā­ļø File skip requested: {original_name_for_log}") @@ -565,8 +459,6 @@ class PostProcessorWorker: continue cleaned_save_filename = clean_filename(original_filename) # Clean the potentially unique name - - # --- NEW: Skip Words Check for Filename --- if self.skip_words_list: filename_lower = cleaned_save_filename.lower() file_skipped_by_word = False @@ -578,10 +470,6 @@ class PostProcessorWorker: break if file_skipped_by_word: continue # Skip to next file in the post - # --- END NEW --- - - - # --- Apply File Type Filters (if not in thumbnail mode) --- if not self.download_thumbnails: # This condition will always be true now file_skipped_by_filter = False is_img = is_image(cleaned_save_filename) @@ -605,13 +493,9 @@ class PostProcessorWorker: if file_skipped_by_filter: post_skip_count += 1 continue # Skip to next file - - # --- Attempt Download to Each Valid Folder --- file_downloaded_or_exists = False for folder_path in valid_folder_paths: if self.check_cancel(): break # Check cancellation before each folder attempt - - # --- Ensure Directory Exists --- try: os.makedirs(folder_path, exist_ok=True) except OSError as e: @@ -623,15 +507,11 @@ class PostProcessorWorker: save_path = os.path.join(folder_path, cleaned_save_filename) folder_basename = os.path.basename(folder_path) # For logging - - # --- Check if File Already Exists on Disk OR Downloaded (Filename Check) --- - # Check size > 0 to avoid re-downloading empty files from previous failures with local_filenames_lock: # Use local lock for filename set within post if os.path.exists(save_path) and os.path.getsize(save_path) > 0: self.logger(f" -> Exists Skip: '{original_name_for_log}' in '{folder_basename}'") post_skip_count += 1 # Count exists as skipped for this post's summary file_downloaded_or_exists = True - # Add to global filename set just in case with self.downloaded_files_lock: self.downloaded_files.add(cleaned_save_filename) break # Don't try other folders if it exists in one valid location @@ -639,31 +519,22 @@ class PostProcessorWorker: self.logger(f" -> Local Skip: '{original_name_for_log}' in '{folder_basename}' (already processed in this post)") post_skip_count += 1 file_downloaded_or_exists = True - # Add to global filename set just in case with self.downloaded_files_lock: self.downloaded_files.add(cleaned_save_filename) break # Don't try other folders - # Global filename check (less critical with hash check, but for consistency) with self.downloaded_files_lock: if cleaned_save_filename in self.downloaded_files: self.logger(f" -> Global Filename Skip: '{original_name_for_log}' in '{folder_basename}' (filename already downloaded globally)") post_skip_count += 1 file_downloaded_or_exists = True break # Don't try other folders - - - # --- Actual Download Attempt --- try: self.logger(f"ā¬‡ļø Downloading '{original_name_for_log}' to '{folder_basename}'...") self.current_download_path = save_path # Still set the potential path self.is_downloading_file = True self.signals.file_download_status_signal.emit(True) # Signal START - - # Use stream=True for large files, adjust timeout response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True) # (connect_timeout, read_timeout) response.raise_for_status() # Check for HTTP errors - - # --- Download Content in Chunks and Calculate Hash --- file_content_bytes = BytesIO() downloaded_size = 0 chunk_count = 0 @@ -671,7 +542,6 @@ class PostProcessorWorker: for chunk in response.iter_content(chunk_size=32 * 1024): # 32KB chunks if self.check_cancel(): break # Check cancellation frequently - # Skip flag check (less relevant for worker, but for consistency) if self.skip_current_file_flag.is_set(): break if chunk: # filter out keep-alive new chunks @@ -679,21 +549,12 @@ class PostProcessorWorker: md5_hash.update(chunk) # Update hash with chunk downloaded_size += len(chunk) chunk_count += 1 - # Optional: Add progress reporting per chunk if needed - - # Check again if loop was broken by cancellation/skip if self.check_cancel() or self.skip_current_file_flag.is_set(): self.logger(f" āš ļø Download interrupted {'(cancelled)' if self.cancellation_event.is_set() else '(skipped)'} for {original_name_for_log}.") - # Clean up partial file - not needed here as we haven't saved yet - # Ensure this file is marked as skipped if interrupted by skip flag if self.skip_current_file_flag.is_set(): post_skip_count += 1 self.skip_current_file_flag.clear() - # Need to break from the folder loop as well break # Break from trying other folders - - - # --- Process Downloaded Content (Hash Check, Compression, Save) --- final_save_path = save_path # May change if compressed current_filename_for_log = cleaned_save_filename # May change file_content_bytes.seek(0) # Rewind the BytesIO object to the beginning @@ -705,52 +566,38 @@ class PostProcessorWorker: if downloaded_size > 0: calculated_hash = md5_hash.hexdigest() # Get the final hash - - # --- Content Hash Check --- with self.downloaded_file_hashes_lock: # Use lock for hash set if calculated_hash in self.downloaded_file_hashes: self.logger(f" -> Content Skip: '{original_name_for_log}' (Hash: {calculated_hash}) already downloaded.") post_skip_count += 1 file_downloaded_or_exists = True # Mark as handled - # Add filename to global set just in case filename checks are used elsewhere with self.downloaded_files_lock: self.downloaded_files.add(cleaned_save_filename) - # Add filename to local set as well with local_filenames_lock: local_processed_filenames.add(cleaned_save_filename) - # No need to save or compress, break from folder loop break else: - # Hash not found, proceed with saving and adding hash later pass if not file_downloaded_or_exists: # Only proceed if not skipped by hash check final_bytes_to_save = file_content_bytes - - # --- Image Compression --- - # Re-check if it's an image *after* download, just in case is_img_for_compress = is_image(cleaned_save_filename) if is_img_for_compress and not is_thumbnail and self.compress_images and Image and downloaded_size > 1500 * 1024: self.logger(f" Compressing large image ({downloaded_size / 1024:.2f} KB)...") try: - # Open image from bytes with Image.open(file_content_bytes) as img: original_format = img.format - # Handle palette/mode issues for saving to WebP if img.mode == 'P': img = img.convert('RGBA') elif img.mode not in ['RGB', 'RGBA', 'L']: img = img.convert('RGB') compressed_bytes = BytesIO() img.save(compressed_bytes, format='WebP', quality=75, method=4) # Adjust quality/method compressed_size = compressed_bytes.getbuffer().nbytes - - # Only save if significantly smaller (e.g., > 10% reduction) if compressed_size < downloaded_size * 0.90: self.logger(f" Compression success: {compressed_size / 1024:.2f} KB (WebP Q75)") compressed_bytes.seek(0) final_bytes_to_save = compressed_bytes - # Update filename and save path base, _ = os.path.splitext(cleaned_save_filename) current_filename_for_log = base + '.webp' final_save_path = os.path.join(folder_path, current_filename_for_log) @@ -767,7 +614,6 @@ class PostProcessorWorker: final_save_path = save_path # Ensure original path elif is_img_for_compress and not is_thumbnail and self.compress_images: - # Log reason if compression enabled but size too small self.logger(f" Skipping compression: Image size ({downloaded_size / 1024:.2f} KB) below threshold.") file_content_bytes.seek(0) final_bytes_to_save = file_content_bytes @@ -779,11 +625,6 @@ class PostProcessorWorker: else: # Ensure stream is rewound if no compression happened file_content_bytes.seek(0) final_bytes_to_save = file_content_bytes - - - # --- Save to Disk --- - # Check existence again before writing (reduce race slightly), though hash check is primary now. - # Also check filename sets again. save_file = False with self.downloaded_files_lock: # Lock for global filename set with local_filenames_lock: # Lock for local filename set @@ -806,13 +647,10 @@ class PostProcessorWorker: if save_file: try: with open(final_save_path, 'wb') as f: - # Write in chunks to handle potentially large compressed streams while True: chunk = final_bytes_to_save.read(64 * 1024) # 64KB write chunks if not chunk: break f.write(chunk) - - # File saved successfully, now add hash and filename to sets with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_hash) # ADD HASH with self.downloaded_files_lock: @@ -828,11 +666,9 @@ class PostProcessorWorker: except IOError as io_err: self.logger(f"āŒ Save Fail: '{current_filename_for_log}' to '{folder_basename}'. Error: {io_err}") post_skip_count += 1 # Count save failure as skip - # Attempt cleanup of potentially partial file if os.path.exists(final_save_path): try: os.remove(final_save_path) except OSError: pass - # Continue to next folder? Probably not useful if save failed. Break folder loop. break except Exception as save_err: self.logger(f"āŒ Unexpected Save Error: '{current_filename_for_log}' in '{folder_basename}'. Error: {save_err}") @@ -841,26 +677,16 @@ class PostProcessorWorker: try: os.remove(final_save_path) except OSError: pass break # Break folder loop on unexpected error - - # Clean up BytesIO streams final_bytes_to_save.close() - # Only close original if it's different from the one saved if file_content_bytes is not final_bytes_to_save: file_content_bytes.close() - - # If downloaded/exists/saved successfully, break from folder loop if file_downloaded_or_exists: break - - # --- Error Handling for Download Attempt --- except requests.exceptions.RequestException as e: self.logger(f"āŒ Download Fail: {original_name_for_log}. Error: {e}") - # Clean up file if it was created partially/empty - not needed with BytesIO first post_skip_count += 1 - # Break folder loop: If download failed once, it will likely fail again. break except IOError as e: - # This might happen if folder becomes inaccessible between check and write self.logger(f"āŒ File I/O Error: {original_name_for_log} in '{folder_basename}'. Error: {e}") post_skip_count += 1 break # Break folder loop @@ -872,42 +698,22 @@ class PostProcessorWorker: break # Break folder loop on unexpected error finally: - # --- Cleanup after each folder attempt --- self.is_downloading_file = False self.current_download_path = None self.signals.file_download_status_signal.emit(False) # Signal END - - # --- End of Folder Loop --- if self.check_cancel(): break # Check cancellation after trying all folders - - # Reset skip flag if it was processed if self.skip_current_file_flag.is_set(): self.skip_current_file_flag.clear() - - # If the file wasn't handled (downloaded/exists/skipped) in any folder, log it. - # The skip count should already reflect failures or explicit skips. if not file_downloaded_or_exists: - # Check if it wasn't skipped for other reasons already counted - # This log might be redundant if errors were already logged. - # self.logger(f" -> File '{original_name_for_log}' not downloaded/found in any target folder.") pass - - - # --- End of File Loop for Post --- if self.check_cancel(): self.logger(f" Post {post_id} processing cancelled.") - # Return counts accumulated *before* cancellation return post_download_count, post_skip_count self.logger(f" Post {post_id} Summary: Downloaded={post_download_count}, Skipped={post_skip_count}") return post_download_count, post_skip_count - - - -# --- Main Application Class --- class DownloaderApp(QWidget): - # Signals for cross-thread communication character_prompt_response_signal = pyqtSignal(bool) log_signal = pyqtSignal(str) add_character_prompt_signal = pyqtSignal(str) @@ -918,7 +724,6 @@ class DownloaderApp(QWidget): def __init__(self): super().__init__() - # Initialize core attributes first self.config_file = "Known.txt" self.download_thread = None self.thread_pool = None @@ -931,49 +736,29 @@ class DownloaderApp(QWidget): self.worker_signals = PostProcessorSignals() # Single instance for workers self.prompt_mutex = QMutex() self._add_character_response = None - # self.api_server = None # Removed api_server - # self.api_thread = None # Removed api_thread self.downloaded_files = set() # Shared set for tracking downloaded filenames (secondary check) self.downloaded_files_lock = threading.Lock() # Lock for filenames set self.downloaded_file_hashes = set() # Shared set for tracking downloaded file hashes (primary check) # ADDED self.downloaded_file_hashes_lock = threading.Lock() # Lock for hashes set # ADDED - - - # Load configuration *after* initializing essential attributes self.load_known_names() # Load KNOWN_NAMES global - - # Setup UI last self.setWindowTitle("Kemono Downloader v2.3 (Content Dedupe & Skip)") # Updated Title self.setGeometry(150, 150, 1050, 820) # Adjusted size for new field self.setStyleSheet(self.get_dark_theme()) self.init_ui() # Initialize UI elements - - # Connect signals self._connect_signals() - - # Start API server if configured - Removed call to start_api_server - # self.start_api_server() self.log_signal.emit("ā„¹ļø Local API server functionality has been removed.") def _connect_signals(self): """Connect all signals for clarity.""" - # Signals from worker helper (for multi-threading) self.worker_signals.progress_signal.connect(self.log) self.worker_signals.file_download_status_signal.connect(self.update_skip_button_state) - - # Internal signals for GUI updates and thread communication self.log_signal.connect(self.log) self.add_character_prompt_signal.connect(self.prompt_add_character) self.character_prompt_response_signal.connect(self.receive_add_character_result) self.overall_progress_signal.connect(self.update_progress_display) self.finished_signal.connect(self.download_finished) - - # Connect search bar signal self.character_search_input.textChanged.connect(self.filter_character_list) # CONNECTED - - - # --- Config Loading/Saving --- def load_known_names(self): """Loads known names from the config file into the global KNOWN_NAMES list.""" global KNOWN_NAMES @@ -981,7 +766,6 @@ class DownloaderApp(QWidget): if os.path.exists(self.config_file): try: with open(self.config_file, 'r', encoding='utf-8') as f: - # Read lines, strip whitespace, filter empty lines raw_names = [line.strip() for line in f] loaded_names = sorted(list(set(filter(None, raw_names)))) log_msg = f"ā„¹ļø Loaded {len(loaded_names)} known names from {self.config_file}" @@ -994,9 +778,6 @@ class DownloaderApp(QWidget): loaded_names = [] KNOWN_NAMES = loaded_names # Update global list - - # Log the message (use signal if UI ready, otherwise print) - # Check if log_output exists before emitting signal during init if hasattr(self, 'log_output'): self.log_signal.emit(log_msg) else: @@ -1007,16 +788,11 @@ class DownloaderApp(QWidget): """Saves the current global KNOWN_NAMES list to the config file.""" global KNOWN_NAMES try: - # Ensure uniqueness and sort before saving unique_sorted_names = sorted(list(set(filter(None, KNOWN_NAMES)))) with open(self.config_file, 'w', encoding='utf-8') as f: for name in unique_sorted_names: f.write(name + '\n') - - # Update global list to cleaned version (consistency) KNOWN_NAMES = unique_sorted_names - - # Use log_signal safely if hasattr(self, 'log_signal'): self.log_signal.emit(f"šŸ’¾ Saved {len(unique_sorted_names)} known names to {self.config_file}") else: @@ -1029,14 +805,10 @@ class DownloaderApp(QWidget): else: print(log_msg) QMessageBox.warning(self, "Config Save Error", f"Could not save list to {self.config_file}:\n{e}") - - # --- Event Handling --- def closeEvent(self, event): """Handles application closing: saves config, checks for running downloads.""" self.save_known_names() # Save names first should_exit = True - - # Check if download is active (either mode) is_downloading = (self.download_thread and self.download_thread.isRunning()) or (self.thread_pool is not None) if is_downloading: @@ -1046,7 +818,6 @@ class DownloaderApp(QWidget): if reply == QMessageBox.Yes: self.log_signal.emit("āš ļø Cancelling active download due to application exit...") self.cancel_download() # Request cancellation - # Allow some time for cancellation signal to propagate? Maybe not needed. else: should_exit = False self.log_signal.emit("ā„¹ļø Application exit cancelled.") @@ -1055,22 +826,13 @@ class DownloaderApp(QWidget): if should_exit: self.log_signal.emit("ā„¹ļø Application closing.") # Removed "Stopping API server..." - # self._shutdown_api_server() # Removed call to shutdown API server self.log_signal.emit("šŸ‘‹ Exiting application.") event.accept() # Allow closing - - # Removed _shutdown_api_server method - # def _shutdown_api_server(self): ... - - - # --- UI Initialization --- def init_ui(self): """Sets up all the UI widgets and layouts.""" main_layout = QHBoxLayout() left_layout = QVBoxLayout() right_layout = QVBoxLayout() - - # --- Left Side Controls --- left_layout.addWidget(QLabel("šŸ”— Kemono Creator/Post URL:")) self.link_input = QLineEdit() self.link_input.setPlaceholderText("e.g., https://kemono.su/patreon/user/12345 or .../post/98765") @@ -1086,8 +848,6 @@ class DownloaderApp(QWidget): dir_layout.addWidget(self.dir_input, 1) # Input takes more space dir_layout.addWidget(self.dir_button) left_layout.addLayout(dir_layout) - - # Custom Folder Input (Visible only for single posts + subfolders) self.custom_folder_widget = QWidget() custom_folder_layout = QVBoxLayout(self.custom_folder_widget) custom_folder_layout.setContentsMargins(0, 5, 0, 0) # Add top margin @@ -1098,8 +858,6 @@ class DownloaderApp(QWidget): custom_folder_layout.addWidget(self.custom_folder_input) self.custom_folder_widget.setVisible(False) # Initially hidden left_layout.addWidget(self.custom_folder_widget) - - # Character Filter Input (Visible only with subfolders) self.character_filter_widget = QWidget() character_filter_layout = QVBoxLayout(self.character_filter_widget) character_filter_layout.setContentsMargins(0, 5, 0, 0) # Add top margin @@ -1110,16 +868,10 @@ class DownloaderApp(QWidget): character_filter_layout.addWidget(self.character_input) self.character_filter_widget.setVisible(True) # Initially visible, controlled by subfolder checkbox left_layout.addWidget(self.character_filter_widget) - - # --- NEW: Skip Words Input Field --- left_layout.addWidget(QLabel("🚫 Skip Posts/Files with Words (comma-separated):")) self.skip_words_input = QLineEdit() self.skip_words_input.setPlaceholderText("e.g., WM, WIP, sketch, preview") left_layout.addWidget(self.skip_words_input) - # --- END NEW --- - - - # --- Options Row 1 --- options_layout_1 = QHBoxLayout() options_layout_1.addWidget(QLabel("Filter Files:")) self.radio_group = QButtonGroup(self) @@ -1127,7 +879,6 @@ class DownloaderApp(QWidget): self.radio_images = QRadioButton("Images/GIFs") self.radio_videos = QRadioButton("Videos") self.radio_all.setChecked(True) - # Add radios to group for exclusivity self.radio_group.addButton(self.radio_all) self.radio_group.addButton(self.radio_images) self.radio_group.addButton(self.radio_videos) @@ -1136,8 +887,6 @@ class DownloaderApp(QWidget): options_layout_1.addWidget(self.radio_videos) options_layout_1.addStretch(1) left_layout.addLayout(options_layout_1) - - # --- Options Row 2 (Checkboxes) --- options_layout_2 = QHBoxLayout() self.use_subfolders_checkbox = QCheckBox("Separate Folders by Name/Title") self.use_subfolders_checkbox.setChecked(True) @@ -1150,8 +899,6 @@ class DownloaderApp(QWidget): options_layout_2.addWidget(self.download_thumbnails_checkbox) options_layout_2.addStretch(1) left_layout.addLayout(options_layout_2) - - # --- Options Row 3 (Checkboxes) --- options_layout_3 = QHBoxLayout() self.skip_zip_checkbox = QCheckBox("Skip .zip") self.skip_zip_checkbox.setChecked(True) @@ -1166,8 +913,6 @@ class DownloaderApp(QWidget): options_layout_3.addWidget(self.compress_images_checkbox) options_layout_3.addStretch(1) left_layout.addLayout(options_layout_3) - - # --- Options Row 4 (Threading) --- options_layout_4 = QHBoxLayout() self.use_multithreading_checkbox = QCheckBox(f"Use Multithreading ({4} Threads)") # Use constant self.use_multithreading_checkbox.setChecked(True) # Default to on @@ -1175,9 +920,6 @@ class DownloaderApp(QWidget): options_layout_4.addWidget(self.use_multithreading_checkbox) options_layout_4.addStretch(1) left_layout.addLayout(options_layout_4) - - - # --- Action Buttons --- btn_layout = QHBoxLayout() self.download_btn = QPushButton("ā¬‡ļø Start Download") self.download_btn.setStyleSheet("padding: 8px 15px; font-weight: bold;") # Make prominent @@ -1194,9 +936,6 @@ class DownloaderApp(QWidget): btn_layout.addWidget(self.skip_file_btn) left_layout.addLayout(btn_layout) left_layout.addSpacing(10) # Add space before list - - - # --- Known Names List with Search --- known_chars_label_layout = QHBoxLayout() self.known_chars_label = QLabel("šŸŽ­ Known Shows/Characters (for Folder Names):") self.character_search_input = QLineEdit() # ADDED search bar @@ -1207,54 +946,38 @@ class DownloaderApp(QWidget): left_layout.addLayout(known_chars_label_layout) # Use the new layout self.character_list = QListWidget() - # Load names *after* list widget is created self.character_list.addItems(KNOWN_NAMES) self.character_list.setSelectionMode(QListWidget.ExtendedSelection) left_layout.addWidget(self.character_list, 1) # Allow list to stretch vertically - - # Add/Delete Known Names Controls char_manage_layout = QHBoxLayout() self.new_char_input = QLineEdit() self.new_char_input.setPlaceholderText("Add new show/character name") self.add_char_button = QPushButton("āž• Add") self.delete_char_button = QPushButton("šŸ—‘ļø Delete Selected") self.add_char_button.clicked.connect(self.add_new_character) - # Allow adding via Enter key self.new_char_input.returnPressed.connect(self.add_char_button.click) self.delete_char_button.clicked.connect(self.delete_selected_character) char_manage_layout.addWidget(self.new_char_input, 2) # Input wider char_manage_layout.addWidget(self.add_char_button, 1) char_manage_layout.addWidget(self.delete_char_button, 1) left_layout.addLayout(char_manage_layout) - - - # --- Right Side Log & Progress --- right_layout.addWidget(QLabel("šŸ“œ Progress Log:")) self.log_output = QTextEdit() self.log_output.setReadOnly(True) self.log_output.setMinimumWidth(450) # Ensure decent width self.log_output.setLineWrapMode(QTextEdit.WidgetWidth) # Wrap lines right_layout.addWidget(self.log_output, 1) # Log area stretches - - # Progress Label self.progress_label = QLabel("Progress: Idle") self.progress_label.setStyleSheet("padding-top: 5px; font-style: italic;") right_layout.addWidget(self.progress_label) - # Consider adding QProgressBar if desired - - - # --- Assemble Main Layout --- main_layout.addLayout(left_layout, 5) # Left side takes 5 parts width main_layout.addLayout(right_layout, 4) # Right side takes 4 parts width self.setLayout(main_layout) - - # Initial UI state updates based on defaults self.update_ui_for_subfolders(self.use_subfolders_checkbox.isChecked()) self.update_custom_folder_visibility() def get_dark_theme(self): - # Dark theme CSS (improved readability slightly) return """ QWidget { background-color: #2E2E2E; /* Slightly lighter dark */ @@ -1325,10 +1048,7 @@ class DownloaderApp(QWidget): border-radius: 3px; } """ - - # --- UI Interaction Methods --- def browse_directory(self): - # Suggest last used directory? QSettings could store this. current_dir = self.dir_input.text() if os.path.isdir(self.dir_input.text()) else "" folder = QFileDialog.getExistingDirectory(self, "Select Download Folder", current_dir) if folder: @@ -1339,12 +1059,10 @@ class DownloaderApp(QWidget): try: safe_message = str(message).replace('\x00', '[NULL]') # Ensure string, sanitize nulls self.log_output.append(safe_message) - # Auto-scroll only if near the bottom scrollbar = self.log_output.verticalScrollBar() if scrollbar.value() >= scrollbar.maximum() - 30: # Threshold scrollbar.setValue(scrollbar.maximum()) except Exception as e: - # Fallback if GUI logging fails print(f"GUI Log Error: {e}") print(f"Original Message: {message}") @@ -1363,18 +1081,12 @@ class DownloaderApp(QWidget): if not name_to_add: QMessageBox.warning(self, "Input Error", "Name cannot be empty.") return - - # Check for duplicates using case-insensitive comparison - # Store original case, but compare lower-case name_lower = name_to_add.lower() is_duplicate = any(existing.lower() == name_lower for existing in KNOWN_NAMES) if not is_duplicate: - # Add the name with its original casing KNOWN_NAMES.append(name_to_add) - # Sort case-insensitively for display consistency KNOWN_NAMES.sort(key=str.lower) - # Update the list widget and apply current filter self.character_list.clear() self.character_list.addItems(KNOWN_NAMES) self.filter_character_list(self.character_search_input.text()) # Apply current filter # MODIFIED @@ -1400,31 +1112,26 @@ class DownloaderApp(QWidget): if confirm == QMessageBox.Yes: original_count = len(KNOWN_NAMES) - # Filter list, keeping names NOT in the removal set KNOWN_NAMES = [n for n in KNOWN_NAMES if n not in names_to_remove] removed_count = original_count - len(KNOWN_NAMES) if removed_count > 0: self.log_signal.emit(f"šŸ—‘ļø Removed {removed_count} name(s) from the list.") - # Update UI list (ensure sorted after removal) self.character_list.clear() KNOWN_NAMES.sort(key=str.lower) # Re-sort remaining names self.character_list.addItems(KNOWN_NAMES) self.filter_character_list(self.character_search_input.text()) # Apply current filter # MODIFIED self.save_known_names() # Save changes else: - # This shouldn't happen if items were selected, but handle just in case self.log_signal.emit("ā„¹ļø No names were removed (selection might have changed?).") def update_custom_folder_visibility(self, url_text=None): """Shows/hides the custom folder input based on URL and subfolder setting.""" - # If called by signal without text, get it from input widget if url_text is None: url_text = self.link_input.text() _, _, post_id = extract_post_info(url_text.strip()) - # Show only if it's a single post URL AND subfolders are enabled should_show = bool(post_id) and self.use_subfolders_checkbox.isChecked() self.custom_folder_widget.setVisible(should_show) @@ -1434,21 +1141,15 @@ class DownloaderApp(QWidget): def update_ui_for_subfolders(self, checked): """Updates related UI elements when 'Separate Folders' checkbox changes.""" - # Show/hide the character filter input self.character_filter_widget.setVisible(checked) - # Re-evaluate custom folder visibility (depends on both subfolders and URL type) self.update_custom_folder_visibility() - # Clear character filter input if subfolders are disabled if not checked: self.character_input.clear() - - # ADDED method to filter the character list def filter_character_list(self, search_text): """Filters the character list based on the search text.""" search_text = search_text.lower() for i in range(self.character_list.count()): item = self.character_list.item(i) - # Check if the item's text contains the search text (case-insensitive) if search_text in item.text().lower(): item.setHidden(False) else: @@ -1468,20 +1169,13 @@ class DownloaderApp(QWidget): else: self.progress_label.setText("Progress: Starting...") - - - # --- Download Logic Orchestration --- - def start_download(self): """Validates inputs and starts the download in single or multi-threaded mode.""" - # Check if already running is_running = (self.download_thread and self.download_thread.isRunning()) or (self.thread_pool is not None) if is_running: self.log_signal.emit("āš ļø Download already in progress.") QMessageBox.warning(self, "Busy", "A download is already running.") return - - # --- Gather Inputs --- api_url = self.link_input.text().strip() output_dir = self.dir_input.text().strip() filter_mode = self.get_filter_mode() @@ -1492,17 +1186,10 @@ class DownloaderApp(QWidget): download_thumbnails = self.download_thumbnails_checkbox.isChecked() use_multithreading = self.use_multithreading_checkbox.isChecked() num_threads = 4 # Define number of threads - - # --- NEW: Get Skip Words --- raw_skip_words = self.skip_words_input.text().strip() skip_words_list = [] if raw_skip_words: - # Split by comma, strip whitespace from each word, and filter out empty strings skip_words_list = [word.strip() for word in raw_skip_words.split(',') if word.strip()] - # --- END NEW --- - - - # --- Input Validation --- service, user_id, post_id_from_url = extract_post_info(api_url) if not api_url: @@ -1529,18 +1216,10 @@ class DownloaderApp(QWidget): return else: # User chose not to create return - - - - - # Pillow Check for Compression if compress_images and Image is None: QMessageBox.warning(self, "Dependency Missing", "Image compression requires the Pillow library, but it's not installed.\nPlease run: pip install Pillow\n\nCompression will be disabled for this session.") self.log_signal.emit("āŒ Cannot compress images: Pillow library not found.") compress_images = False # Disable for this run - - - # --- Gather Filters --- filter_character = None if use_subfolders and self.character_filter_widget.isVisible(): filter_character = self.character_input.text().strip() or None @@ -1555,9 +1234,6 @@ class DownloaderApp(QWidget): else: QMessageBox.warning(self, "Input Warning", f"Custom folder name '{raw_custom_name}' is invalid and will be ignored.") self.log_signal.emit(f"āš ļø Invalid custom folder name ignored: {raw_custom_name}") - - - # --- Character Filter Pre-Validation --- if use_subfolders and filter_character and not post_id_from_url: # Only validate filter if for whole creator clean_char_filter = clean_folder_name(filter_character.lower()) known_names_lower = {name.lower() for name in KNOWN_NAMES} @@ -1567,31 +1243,22 @@ class DownloaderApp(QWidget): QMessageBox.critical(self, "Filter Error", "The provided filter name is invalid (contains only spaces or special characters).") return elif filter_character.lower() not in known_names_lower: - # Prompt user to add the name before starting threads reply = QMessageBox.question(self, "Add Filter Name?", f"The filter name '{filter_character}' is not in your known names list.\n\nAdd it now and continue?", QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel, QMessageBox.Yes) if reply == QMessageBox.Yes: - # Add the name and save (use existing method) self.new_char_input.setText(filter_character) # Pre-fill input for user convenience? No, just add it. self.add_new_character() # This adds, sorts, saves, updates UI - # Check if adding failed (e.g., duplicate detected by add_new_character) if filter_character.lower() not in {name.lower() for name in KNOWN_NAMES}: self.log_signal.emit(f"āš ļø Failed to add '{filter_character}' automatically. Please add manually if needed.") - # Don't abort here, let download proceed without guaranteed filter match yet else: self.log_signal.emit(f"āœ… Added filter '{filter_character}' to list.") elif reply == QMessageBox.No: self.log_signal.emit(f"ā„¹ļø Proceeding without adding '{filter_character}'. Posts matching it might not be saved to a specific folder unless name is derived.") - # Allow proceeding, but filter might not work as expected if name isn't known - # Filter logic inside worker will handle skipping if name doesn't match title else: # Cancel self.log_signal.emit("āŒ Download cancelled by user during filter check.") return # Abort download - - - # --- Reset State & Log Start --- self.log_output.clear() self.cancellation_event.clear() # Reset cancellation flag self.active_futures = [] @@ -1599,15 +1266,12 @@ class DownloaderApp(QWidget): self.processed_posts_count = 0 self.download_counter = 0 self.skip_counter = 0 - # Clear both downloaded sets with self.downloaded_files_lock: self.downloaded_files.clear() # Clear downloaded files set (filenames) with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.clear() # Clear downloaded file hashes set # ADDED self.progress_label.setText("Progress: Initializing...") - - # Log settings clearly before starting self.log_signal.emit("="*40) self.log_signal.emit(f"šŸš€ Starting Download Task @ {time.strftime('%Y-%m-%d %H:%M:%S')}") self.log_signal.emit(f" URL: {api_url}") @@ -1624,30 +1288,18 @@ class DownloaderApp(QWidget): self.log_signal.emit(f" Folder Naming: Automatic (Known Names > Title Extraction)") self.log_signal.emit(f" File Type Filter: {filter_mode}") self.log_signal.emit(f" Skip: {'.zip' if skip_zip else ''}{', ' if skip_zip and skip_rar else ''}{'.rar' if skip_rar else ''}{'None' if not (skip_zip or skip_rar) else ''}") - # --- NEW: Log Skip Words --- if skip_words_list: self.log_signal.emit(f" Skip Words (Title/Filename): {', '.join(skip_words_list)}") else: self.log_signal.emit(f" Skip Words (Title/Filename): None") - # --- END NEW --- self.log_signal.emit(f" Compress Images: {'Enabled' if compress_images else 'Disabled'}") self.log_signal.emit(f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}") - - - # --- Determine Execution Mode --- - # Always single-thread for single post URLs should_use_multithreading = use_multithreading and not post_id_from_url self.log_signal.emit(f" Threading: {'Multi-threaded' if should_use_multithreading else 'Single-threaded'}") self.log_signal.emit("="*40) - - - # --- Disable UI & Enable Cancel --- self.set_ui_enabled(False) self.cancel_btn.setEnabled(True) - - # --- Start Execution --- try: - # Collect arguments common to both modes common_args = { 'api_url': api_url, 'output_dir': output_dir, @@ -1661,7 +1313,6 @@ class DownloaderApp(QWidget): 'download_thumbnails': download_thumbnails, 'service': service, 'user_id': user_id, - # Pass shared state and locks 'downloaded_files': self.downloaded_files, 'downloaded_files_lock': self.downloaded_files_lock, 'downloaded_file_hashes': self.downloaded_file_hashes, # ADDED @@ -1671,14 +1322,11 @@ class DownloaderApp(QWidget): if should_use_multithreading: self.log_signal.emit(" Initializing multi-threaded download...") - # Add multi-threading specific args multi_args = common_args.copy() multi_args['num_threads'] = num_threads self.start_multi_threaded_download(**multi_args) else: - # Single post or multi-threading disabled self.log_signal.emit(" Initializing single-threaded download...") - # Add single-thread specific args single_args = common_args.copy() single_args['custom_folder_name'] = custom_folder_name single_args['single_post_id'] = post_id_from_url @@ -1689,7 +1337,6 @@ class DownloaderApp(QWidget): import traceback self.log_signal.emit(traceback.format_exc()) QMessageBox.critical(self, "Start Error", f"Failed to start download task:\n{e}") - # Ensure UI is reset if start fails critically self.download_finished(0, 0, False) # Reset UI state @@ -1698,24 +1345,17 @@ class DownloaderApp(QWidget): try: self.download_thread = DownloadThread( cancellation_event = self.cancellation_event, # Pass the shared event - # Pass all other relevant kwargs collected in start_download **kwargs ) if self.download_thread._init_failed: - # Error already logged by thread's init QMessageBox.critical(self, "Thread Error", "Failed to initialize the download thread.\nCheck the log for details.") self.download_finished(0, 0, False) # Reset UI return - - # Connect signals from this specific thread instance - # These replace the direct connections used in previous versions self.download_thread.progress_signal.connect(self.log_signal) # Use log_signal slot self.download_thread.add_character_prompt_signal.connect(self.add_character_prompt_signal) # Forward signal self.download_thread.file_download_status_signal.connect(self.file_download_status_signal) # Forward signal self.download_thread.finished_signal.connect(self.finished_signal) # Forward signal - - # Connect response signal *to* the thread instance self.character_prompt_response_signal.connect(self.download_thread.receive_add_character_result) self.download_thread.start() @@ -1739,17 +1379,8 @@ class DownloaderApp(QWidget): self.total_posts_to_process = 0 # Updated by fetcher self.download_counter = 0 self.skip_counter = 0 - - # Prepare arguments for worker threads (PostProcessorWorker) - # Remove args not needed by worker's init or handled differently worker_args_template = kwargs.copy() del worker_args_template['num_threads'] - # api_url is used by fetcher, but also needed by worker for domain/URL construction - # service/user_id are passed explicitly - # output_dir needs to be mapped to download_root - # Pass shared state explicitly - - # Start the fetcher thread fetcher_thread = threading.Thread( target=self._fetch_and_queue_posts, args=(kwargs['api_url'], worker_args_template), @@ -1766,9 +1397,7 @@ class DownloaderApp(QWidget): fetch_error = False try: self.log_signal.emit(" Starting post fetch...") - # Define logger for the generator def fetcher_logger(msg): - # Add prefix to distinguish fetcher logs if needed self.log_signal.emit(f"[Fetcher] {msg}") post_generator = download_from_api(api_url_input, logger=fetcher_logger) @@ -1778,11 +1407,9 @@ class DownloaderApp(QWidget): self.log_signal.emit("āš ļø Post fetching cancelled by user.") fetch_error = True # Treat cancellation during fetch as an error state for cleanup break - # Basic validation of batch if isinstance(posts_batch, list): all_posts.extend(posts_batch) self.total_posts_to_process = len(all_posts) - # Emit progress less frequently to avoid flooding log if self.total_posts_to_process % 250 == 0: # Log every 250 posts self.log_signal.emit(f" Fetched {self.total_posts_to_process} posts...") else: @@ -1799,13 +1426,8 @@ class DownloaderApp(QWidget): import traceback self.log_signal.emit(traceback.format_exc(limit=3)) fetch_error = True - - - # --- Handle Fetch Completion / Errors --- if self.cancellation_event.is_set() or fetch_error: - # If cancelled or error during fetch, signal completion immediately self.finished_signal.emit(self.download_counter, self.skip_counter, self.cancellation_event.is_set()) - # Ensure pool is cleaned up if fetch fails before submitting if self.thread_pool: self.thread_pool.shutdown(wait=False, cancel_futures=True) self.thread_pool = None @@ -1816,15 +1438,9 @@ class DownloaderApp(QWidget): self.log_signal.emit("šŸ˜• No posts found or fetched successfully.") self.finished_signal.emit(0, 0, False) # Signal completion with zero counts return - - - # --- Submit Tasks to Worker Pool --- self.log_signal.emit(f" Submitting {self.total_posts_to_process} post tasks to worker pool...") self.processed_posts_count = 0 # Reset counter before submitting self.overall_progress_signal.emit(self.total_posts_to_process, 0) # Update progress display - - - # Extract arguments needed explicitly for the worker common_worker_args = { 'download_root': worker_args_template['output_dir'], # **FIXED HERE** 'known_names': worker_args_template['known_names_copy'], @@ -1834,9 +1450,7 @@ class DownloaderApp(QWidget): 'skip_zip': worker_args_template['skip_zip'], 'skip_rar': worker_args_template['skip_rar'], 'use_subfolders': worker_args_template['use_subfolders'], - # target_post_id is likely None here, but pass it for consistency 'target_post_id_from_initial_url': worker_args_template.get('single_post_id'), - # custom_folder_name is likely None here 'custom_folder_name': worker_args_template.get('custom_folder_name'), 'compress_images': worker_args_template['compress_images'], 'download_thumbnails': worker_args_template['download_thumbnails'], @@ -1845,7 +1459,6 @@ class DownloaderApp(QWidget): 'api_url_input': worker_args_template['api_url'], # Pass original URL 'cancellation_event': self.cancellation_event, 'signals': self.worker_signals, # Pass the shared signals object - # Pass shared state and locks 'downloaded_files': self.downloaded_files, 'downloaded_files_lock': self.downloaded_files_lock, 'downloaded_file_hashes': self.downloaded_file_hashes, # ADDED @@ -1863,11 +1476,7 @@ class DownloaderApp(QWidget): self.processed_posts_count += 1 # Count as processed (skipped) self.total_posts_to_process -=1 # Adjust total if skipping invalid data continue - - # Create worker with specific post and common args worker = PostProcessorWorker(post_data=post_data, **common_worker_args) - - # Submit the worker's process method to the pool try: if self.thread_pool: # Check if pool still exists future = self.thread_pool.submit(worker.process) @@ -1881,92 +1490,43 @@ class DownloaderApp(QWidget): break except Exception as e: self.log_signal.emit(f"āŒ Unexpected error submitting task: {e}") - # Decide whether to continue or break on submission error break - - - # Log after submission loop completes or breaks submitted_count = len(self.active_futures) self.log_signal.emit(f" {submitted_count} / {self.total_posts_to_process} tasks submitted.") - # If cancelled during submission, remaining tasks won't run. - # Fetcher thread's job is done. Callbacks handle the rest. - # If submission loop finished and submitted_count == total_posts_to_process, check if pool needs shutdown signal? - # No, shutdown happens in finished_signal handler or closeEvent. def _handle_future_result(self, future: Future): """(Callback) Handles results from worker threads.""" - # This runs in one of the ThreadPoolExecutor threads, use signals for UI updates self.processed_posts_count += 1 downloaded_res, skipped_res = 0, 0 # Default results try: if future.cancelled(): - # Task was cancelled before/during execution - # Count cancelled task as skipped only if it wasn't already counted within the worker - # The worker's process method should return counts before checking cancel state at the end. - # Let's assume worker reports counts correctly up to cancellation point. - # We don't increment skipped_res here, as the worker should have handled it. pass elif future.exception(): exc = future.exception() self.log_signal.emit(f"āŒ Error in worker thread: {exc}") - # Log traceback snippet if helpful - # import traceback - # self.log_signal.emit(traceback.format_exc(limit=2)) - # Count errored task as skipped only if it wasn't already handled in worker. - # Assume worker reports counts correctly up to error point. - # We don't increment skipped_res here. pass else: - # Task completed, get results downloaded, skipped = future.result() # Result from worker.process() downloaded_res = downloaded skipped_res = skipped - # Log task completion? Might be too verbose. - - # --- Safely update shared counters (using main thread or locks if needed) --- - # These counters are primarily for the final summary. - # Direct update is acceptable for progress display with potential minor races. - # A more robust way uses thread-safe counters (e.g., threading.Lock with counters) - # Let's use locks for absolute accuracy in the final count. with threading.Lock(): # Use a temporary lock for updating these counters self.download_counter += downloaded_res self.skip_counter += skipped_res - - - # --- Update Progress --- self.overall_progress_signal.emit(self.total_posts_to_process, self.processed_posts_count) except Exception as e: - # Catch errors within the callback itself self.log_signal.emit(f"āŒ Error in result callback handling: {e}") - - # --- Check for Overall Completion --- - # This check is tricky due to callbacks running concurrently. - # Rely on the processed_posts_count reaching total_posts_to_process - # Ensure this check is race-safe if possible. Using a simple counter is okay for a check-and-signal pattern. if self.processed_posts_count >= self.total_posts_to_process and self.total_posts_to_process > 0: - # Add a small delay to ensure all pending callbacks (if any) have a chance to run - # This is heuristic and not guaranteed thread-safe completion detection. - # A more robust solution involves tracking active futures explicitly. - # Given the current structure, this simple counter check is the most practical. - # time.sleep(0.1) # Short delay before final check - - # Re-check count just in case if self.processed_posts_count >= self.total_posts_to_process: self.log_signal.emit("šŸ All submitted tasks have completed or failed.") cancelled = self.cancellation_event.is_set() - # Use the final accumulated counters (updated under lock) self.finished_signal.emit(self.download_counter, self.skip_counter, cancelled) - - - # --- UI State Management --- def set_ui_enabled(self, enabled): """Enable/disable UI controls based on download state.""" - # Controls to disable during download self.download_btn.setEnabled(enabled) self.link_input.setEnabled(enabled) self.dir_input.setEnabled(enabled) @@ -1981,47 +1541,27 @@ class DownloaderApp(QWidget): self.download_thumbnails_checkbox.setEnabled(enabled) self.use_multithreading_checkbox.setEnabled(enabled) self.skip_words_input.setEnabled(enabled) # --- NEW: Enable/disable skip words input --- - # self.character_list.setEnabled(enabled) # Keep enabled for Browse self.character_search_input.setEnabled(enabled) # ADDED self.new_char_input.setEnabled(enabled) self.add_char_button.setEnabled(enabled) self.delete_char_button.setEnabled(enabled) - - - # Enable/disable conditional controls subfolders_on = self.use_subfolders_checkbox.isChecked() self.custom_folder_widget.setEnabled(enabled and subfolders_on) self.character_filter_widget.setEnabled(enabled and subfolders_on) - - - # Update visibility if enabling UI if enabled: self.update_ui_for_subfolders(subfolders_on) self.update_custom_folder_visibility() # Update based on current URL - - # Cancel button is enabled only when download is running (UI disabled) self.cancel_btn.setEnabled(not enabled) - - # Skip button state is handled separately, reset when UI enabled if enabled: self.skip_file_btn.setEnabled(False) - - - # --- Actions --- def cancel_download(self): """Requests cancellation of the ongoing download (single or multi-thread).""" if not self.cancel_btn.isEnabled(): return # Prevent double clicks self.log_signal.emit("āš ļø Requesting cancellation...") self.cancellation_event.set() # Set the shared event - - # Disable cancel button immediately to prevent multiple signals self.cancel_btn.setEnabled(False) self.progress_label.setText("Progress: Cancelling...") - - # Optional: Try to actively cancel futures if using thread pool - # This might help stop tasks quicker if they check the future status, - # but reliance on the cancellation_event is the primary mechanism. if self.thread_pool and self.active_futures: cancelled_count = 0 for future in self.active_futures: @@ -2033,7 +1573,6 @@ class DownloaderApp(QWidget): def skip_current_file(self): """Signals the active download thread (if single-threaded) to skip the current file.""" - # Check if single-threaded mode is active if self.download_thread and self.download_thread.isRunning(): self.download_thread.skip_file() # Call method on the QThread instance elif self.thread_pool: @@ -2045,13 +1584,9 @@ class DownloaderApp(QWidget): def update_skip_button_state(self, is_downloading_active): """Enables/disables the skip button based on download state.""" - # Enable only if: download running (UI disabled), AND single-thread mode, AND file download active can_skip = (not self.download_btn.isEnabled()) and \ (self.download_thread and self.download_thread.isRunning()) and \ is_downloading_active - - # Explicitly disable if multi-threading was used for this run - # Check if thread_pool was initialized and is not None if self.thread_pool is not None: can_skip = False @@ -2060,50 +1595,27 @@ class DownloaderApp(QWidget): def download_finished(self, total_downloaded, total_skipped, cancelled): """Cleans up resources and resets UI after download completion/cancellation.""" - # Log final status self.log_signal.emit("="*40) status = "Cancelled" if cancelled else "Finished" self.log_signal.emit(f"šŸ Download {status}!") self.log_signal.emit(f" Summary: Downloaded={total_downloaded}, Skipped={total_skipped}") self.progress_label.setText(f"{status}: {total_downloaded} downloaded, {total_skipped} skipped.") self.log_signal.emit("="*40) - - - # --- Cleanup Resources --- - # QThread cleanup if self.download_thread: - # Disconnect response signal *from* the thread instance try: self.character_prompt_response_signal.disconnect(self.download_thread.receive_add_character_result) except TypeError: pass # Ignore if not connected - # QThread should exit naturally after finished signal self.download_thread = None - - - # ThreadPoolExecutor cleanup if self.thread_pool: self.log_signal.emit(" Shutting down worker thread pool...") - # Shutdown non-blockingly, attempt to cancel any remaining futures self.thread_pool.shutdown(wait=False, cancel_futures=True) self.thread_pool = None self.active_futures = [] # Clear future list - - - # Reset cancellation event for the next run self.cancellation_event.clear() - - # --- Reset UI --- self.set_ui_enabled(True) - # Ensure cancel/skip buttons are disabled self.cancel_btn.setEnabled(False) self.skip_file_btn.setEnabled(False) - - - # --- Character Prompt Handling --- - # Runs in GUI thread, triggered by signal from DownloadThread def prompt_add_character(self, character_name): - # Ensure prompt mutex is available if needed, though GUI thread access is usually safe - # with QMutexLocker(self.prompt_mutex): # Might not be strictly necessary here reply = QMessageBox.question(self, "Add Filter Name?", f"The filter name '{character_name}' is not in your known list.\n\nAdd it now and continue download?", @@ -2111,40 +1623,21 @@ class DownloaderApp(QWidget): result = (reply == QMessageBox.Yes) if result: - # Use the existing add method to handle adding, UI update, save self.new_char_input.setText(character_name) # Pre-fill for clarity if needed? No, just add. - # Find the item in the list to simulate adding it if Add button was used - # Check if already added by another mechanism if character_name.lower() not in {n.lower() for n in KNOWN_NAMES}: self.add_new_character() # Add the name - # Verify it was added successfully if character_name.lower() not in {n.lower() for n in KNOWN_NAMES}: self.log_signal.emit(f"āš ļø Failed to add '{character_name}' via prompt. Check for errors.") result = False # Treat as failure if not added else: self.log_signal.emit(f"ā„¹ļø Filter name '{character_name}' was already present or added.") - - # Signal the result back to the waiting DownloadThread self.character_prompt_response_signal.emit(result) - - - # Slot to receive the result for the waiting DownloadThread def receive_add_character_result(self, result): """Slot to receive the boolean result from the GUI prompt.""" - # This runs in the thread's event loop, triggered by the signal connection with QMutexLocker(self.prompt_mutex): self._add_character_response = result self.log_signal.emit(f" Received prompt response: {'Yes' if result else 'No'}") - - - # Removed start_api_server method - # def start_api_server(self): ... - - -# --- Modified DownloadThread for Single-Threaded Mode --- -# This class handles the entire download process when multi-threading is off or for single posts. class DownloadThread(QThread): - # Signals emitted by this thread back to the GUI progress_signal = pyqtSignal(str) add_character_prompt_signal = pyqtSignal(str) # Ask GUI to prompt user file_download_status_signal = pyqtSignal(bool) # File download start/end @@ -2177,28 +1670,18 @@ class DownloadThread(QThread): self.service = service # Use passed value self.user_id = user_id # Use passed value self.skip_words_list = skip_words_list if skip_words_list is not None else [] # --- NEW: Store skip_words_list --- - - - # Shared state and locks (passed from DownloaderApp) self.downloaded_files = downloaded_files if downloaded_files is not None else set() self.downloaded_files_lock = downloaded_files_lock if downloaded_files_lock is not None else threading.Lock() self.downloaded_file_hashes = downloaded_file_hashes if downloaded_file_hashes is not None else set() # ADDED self.downloaded_file_hashes_lock = downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading.Lock() # ADDED - - - # Internal state self.skip_current_file_flag = threading.Event() self.is_downloading_file = False self.current_download_path = None self._add_character_response = None # Stores response from GUI prompt self.prompt_mutex = QMutex() # Protects access to _add_character_response - - - # Basic validation in init if not self.service or not self.user_id: log_msg = f"āŒ Thread Init Error: Missing service ('{self.service}') or user ID ('{self.user_id}') for URL '{api_url}'" print(log_msg) # Print error as signals might not be connected yet - # Try emitting signal as well, might work if called after main init try: self.progress_signal.emit(log_msg) except RuntimeError: pass # Ignore if signal connection fails during init self._init_failed = True @@ -2207,7 +1690,6 @@ class DownloadThread(QThread): def run(self): """Main execution logic for the single-threaded download.""" if self._init_failed: - # Error already logged in __init__ self.finished_signal.emit(0, 0, False) # Signal completion with zero counts return @@ -2217,19 +1699,10 @@ class DownloadThread(QThread): cancelled_by_user = False try: - # --- Character Filter Pre-Check --- - # This check is necessary here because the thread runs independently if self.use_subfolders and self.filter_character and not self.custom_folder_name: if not self._check_and_prompt_filter_character(): - # If check fails (invalid, or user declines adding), abort. - # Error/reason logged inside the check method. self.finished_signal.emit(0, 0, False) # Not cancelled, aborted by validation/user return - - - # --- Setup Worker Instance --- - # Use the PostProcessorWorker logic for processing each post - # Create adapter signals object that routes back to this QThread's signals worker_signals_adapter = PostProcessorSignals() worker_signals_adapter.progress_signal.connect(self.progress_signal) # Route log messages worker_signals_adapter.file_download_status_signal.connect(self.file_download_status_signal) # Route file status @@ -2253,20 +1726,14 @@ class DownloadThread(QThread): api_url_input=self.api_url_input, cancellation_event=self.cancellation_event, # Pass the shared event signals=worker_signals_adapter, # Use the adapter signals - # Pass shared state and locks downloaded_files=self.downloaded_files, downloaded_files_lock=self.downloaded_files_lock, downloaded_file_hashes=self.downloaded_file_hashes, # ADDED downloaded_file_hashes_lock=self.downloaded_file_hashes_lock, # ADDED skip_words_list=self.skip_words_list, # --- NEW: Pass skip words to worker --- ) - # Allow worker to use this thread's skip flag directly post_worker.skip_current_file_flag = self.skip_current_file_flag - - - # --- Fetch and Process Posts --- self.progress_signal.emit(" Starting post fetch...") - # Use a local logger function that emits the progress signal def thread_logger(msg): self.progress_signal.emit(msg) @@ -2283,11 +1750,8 @@ class DownloadThread(QThread): self.progress_signal.emit("āš ļø Download cancelled during post processing.") cancelled_by_user = True break # Exit inner post loop - - # Assign post data to the worker instance post_worker.post = post try: - # Process the post using the worker's logic downloaded, skipped = post_worker.process() grand_total_downloaded += downloaded grand_total_skipped += skipped @@ -2297,20 +1761,15 @@ class DownloadThread(QThread): import traceback self.progress_signal.emit(traceback.format_exc(limit=2)) grand_total_skipped += 1 # Count post as skipped on error - - # Brief pause between posts to yield control, keep UI responsive self.msleep(20) # 20 milliseconds if cancelled_by_user: break # Exit outer batch loop as well - - # --- Finished Processing --- if not cancelled_by_user: self.progress_signal.emit("āœ… Post fetching and processing complete.") except Exception as e: - # Catch unexpected errors during the main run loop log_msg = f"\nāŒ An critical error occurred in download thread: {e}" self.progress_signal.emit(log_msg) import traceback @@ -2322,14 +1781,12 @@ class DownloadThread(QThread): cancelled_by_user = False # Not cancelled by user, but by error finally: - # Ensure finished signal is always emitted self.finished_signal.emit(grand_total_downloaded, grand_total_skipped, cancelled_by_user) def _check_and_prompt_filter_character(self): """Validates filter character and prompts user if it's not known. Returns True if OK to proceed.""" clean_char_filter = clean_folder_name(self.filter_character.lower()) - # Use current known_names list (potentially updated if added via prompt earlier) known_names_lower = {name.lower() for name in self.known_names} if not clean_char_filter: @@ -2338,39 +1795,28 @@ class DownloadThread(QThread): if self.filter_character.lower() not in known_names_lower: self.progress_signal.emit(f"ā“ Filter '{self.filter_character}' not found in known list.") - - # Reset response flag and signal GUI to ask with QMutexLocker(self.prompt_mutex): self._add_character_response = None self.add_character_prompt_signal.emit(self.filter_character) - - # Wait loop for response from GUI (via receive_add_character_result) self.progress_signal.emit(" Waiting for user confirmation to add filter name...") while self._add_character_response is None: if self.isInterruptionRequested(): # Check cancellation self.progress_signal.emit("āš ļø Cancelled while waiting for user input on filter name.") return False # Abort if cancelled self.msleep(200) # Check every 200ms - - # Process the response stored in self._add_character_response if self._add_character_response: self.progress_signal.emit(f"āœ… User confirmed adding '{self.filter_character}'. Continuing.") - # Update thread's local list if GUI added it if self.filter_character not in self.known_names: self.known_names.append(self.filter_character) return True # OK to proceed else: self.progress_signal.emit(f"āŒ User declined to add filter '{self.filter_character}'. Aborting download.") return False # User declined, abort - - # Filter character is valid and already known return True def skip_file(self): """Sets the skip flag for the currently downloading file.""" - # Check if a download is actually active (using worker's state) - # Accessing worker state directly isn't ideal. Use internal flag. if self.isRunning() and self.is_downloading_file: self.progress_signal.emit("ā­ļø Skip requested for current file.") self.skip_current_file_flag.set() # Signal the worker's process loop @@ -2380,7 +1826,6 @@ class DownloadThread(QThread): def receive_add_character_result(self, result): """Slot to receive the boolean result from the GUI prompt.""" - # This runs in the thread's event loop, triggered by the signal connection with QMutexLocker(self.prompt_mutex): self._add_character_response = result self.progress_signal.emit(f" Received prompt response: {'Yes' if result else 'No'}") @@ -2389,25 +1834,12 @@ class DownloadThread(QThread): def isInterruptionRequested(self): """Overrides QThread method to check both interruption flag and shared event.""" return super().isInterruptionRequested() or self.cancellation_event.is_set() - - - -# --- Main Execution Block --- if __name__ == '__main__': - # Set high DPI scaling attribute if needed (Qt 5.6+) - # QApplication.setAttribute(Qt.AA_EnableHighDpiScaling, True) - # QApplication.setAttribute(Qt.AA_UseHighDpiPixmaps, True) qt_app = QApplication(sys.argv) - # Optional: Apply a style like Fusion for consistency - # qt_app.setStyle('Fusion') downloader = DownloaderApp() # Create the main application window downloader.show() # Show the window - - # Start the Qt event loop exit_code = qt_app.exec_() - - # Code here runs after the application window is closed print(f"Application finished with exit code: {exit_code}") sys.exit(exit_code) # Exit the script with the application's exit code \ No newline at end of file