2025-05-06 22:49:19 +05:30

1784 lines
88 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import sys
import os
import time
import requests
import re
import threading
import queue
import hashlib
from concurrent.futures import ThreadPoolExecutor, Future, CancelledError
from PyQt5.QtGui import QIcon
from PyQt5.QtWidgets import (
QApplication, QWidget, QLabel, QLineEdit, QTextEdit, QPushButton,
QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget,
QRadioButton, QButtonGroup, QCheckBox, QMainWindow
)
from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject
from urllib.parse import urlparse
try:
from PIL import Image
except ImportError:
print("ERROR: Pillow library not found. Please install it: pip install Pillow")
Image = None
from io import BytesIO
fastapi_app = None
KNOWN_NAMES = []
def clean_folder_name(name):
if not isinstance(name, str): name = str(name)
cleaned = re.sub(r'[^\w\s\-\_]', '', name)
return cleaned.strip().replace(' ', '_')
def clean_filename(name):
if not isinstance(name, str): name = str(name)
cleaned = re.sub(r'[^\w\s\-\_\.]', '', name)
return cleaned.strip().replace(' ', '_')
def extract_folder_name_from_title(title, unwanted_keywords):
if not title: return 'Uncategorized'
title_lower = title.lower()
tokens = title_lower.split()
for token in tokens:
clean_token = clean_folder_name(token)
if clean_token and clean_token not in unwanted_keywords:
return clean_token
return 'Uncategorized'
def match_folders_from_title(title, known_names, unwanted_keywords):
if not title: return []
cleaned_title = clean_folder_name(title.lower())
matched_cleaned_names = set()
for name in known_names:
cleaned_name_for_match = clean_folder_name(name.lower())
if not cleaned_name_for_match: continue
if cleaned_name_for_match in cleaned_title:
if cleaned_name_for_match not in unwanted_keywords:
matched_cleaned_names.add(cleaned_name_for_match)
return list(matched_cleaned_names)
def is_image(filename):
if not filename: return False
return filename.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.gif'))
def is_video(filename):
if not filename: return False
return filename.lower().endswith(('.mp4', '.mov', '.mkv', '.webm', '.avi', '.wmv'))
def is_zip(filename):
if not filename: return False
return filename.lower().endswith('.zip')
def is_rar(filename):
if not filename: return False
return filename.lower().endswith('.rar')
def is_post_url(url):
if not isinstance(url, str): return False
return '/post/' in urlparse(url).path
def extract_post_info(url_string):
service, user_id, post_id = None, None, None
if not isinstance(url_string, str) or not url_string.strip():
return None, None, None
try:
parsed_url = urlparse(url_string.strip())
domain = parsed_url.netloc.lower()
path_parts = [part for part in parsed_url.path.strip('/').split('/') if part]
is_kemono = 'kemono.su' in domain or 'kemono.party' in domain
is_coomer = 'coomer.su' in domain or 'coomer.party' in domain
if not (is_kemono or is_coomer):
return None, None, None
if len(path_parts) >= 3 and path_parts[1].lower() == 'user':
service = path_parts[0]
user_id = path_parts[2]
if len(path_parts) >= 5 and path_parts[3].lower() == 'post':
post_id = path_parts[4]
return service, user_id, post_id
if len(path_parts) >= 5 and path_parts[0].lower() == 'api' and path_parts[1].lower() == 'v1' and path_parts[3].lower() == 'user':
service = path_parts[2]
user_id = path_parts[4]
if len(path_parts) >= 7 and path_parts[5].lower() == 'post':
post_id = path_parts[6]
return service, user_id, post_id
except ValueError:
print(f"Debug: ValueError parsing URL '{url_string}'")
return None, None, None
except Exception as e:
print(f"Debug: Exception during extract_post_info for URL '{url_string}': {e}")
return None, None, None
return None, None, None
def fetch_posts_paginated(api_url_base, headers, offset, logger):
paginated_url = f'{api_url_base}?o={offset}'
logger(f" Fetching: {paginated_url}")
try:
response = requests.get(paginated_url, headers=headers, timeout=45)
response.raise_for_status()
if 'application/json' not in response.headers.get('Content-Type', ''):
raise RuntimeError(f"Unexpected content type received: {response.headers.get('Content-Type')}. Body: {response.text[:200]}")
return response.json()
except requests.exceptions.Timeout:
raise RuntimeError(f"Timeout fetching page offset {offset}")
except requests.exceptions.RequestException as e:
err_msg = f"Error fetching page offset {offset}: {e}"
if e.response is not None:
err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})"
raise RuntimeError(err_msg)
except ValueError as e:
raise RuntimeError(f"Error decoding JSON response for offset {offset}: {e}. Body: {response.text[:200]}")
except Exception as e:
raise RuntimeError(f"Unexpected error processing page offset {offset}: {e}")
def download_from_api(api_url_input, logger=print):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
service, user_id, target_post_id = extract_post_info(api_url_input)
if not service or not user_id:
logger(f"❌ Invalid or unrecognized URL: {api_url_input}. Cannot fetch.")
return
parsed_input = urlparse(api_url_input)
api_domain = parsed_input.netloc if ('kemono.su' in parsed_input.netloc.lower() or 'coomer.su' in parsed_input.netloc.lower() or 'kemono.party' in parsed_input.netloc.lower() or 'coomer.party' in parsed_input.netloc.lower()) else "kemono.su"
api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}"
offset = 0
page = 1
processed_target_post = False
while True:
if target_post_id and processed_target_post:
logger(f"✅ Target post {target_post_id} found and processed. Stopping.")
break
logger(f"\n🔄 Fetching page {page} (offset {offset}) for user {user_id} on {api_domain}...")
try:
posts_batch = fetch_posts_paginated(api_base_url, headers, offset, logger)
if not isinstance(posts_batch, list):
logger(f"❌ API Error: Expected a list of posts, got {type(posts_batch)}. Response: {str(posts_batch)[:200]}")
break
except RuntimeError as e:
logger(f"{e}")
logger(" Aborting pagination due to error.")
break
except Exception as e:
logger(f"❌ Unexpected error during fetch loop: {e}")
break
if not posts_batch:
if page == 1 and not target_post_id:
logger("😕 No posts found for this creator.")
elif not target_post_id:
logger("✅ Reached end of posts.")
break
logger(f"📦 Found {len(posts_batch)} posts on page {page}.")
if target_post_id:
matching_post = next((post for post in posts_batch if str(post.get('id')) == str(target_post_id)), None)
if matching_post:
logger(f"🎯 Found target post {target_post_id} on page {page}.")
yield [matching_post]
processed_target_post = True
else:
logger(f" Target post {target_post_id} not found on this page.")
pass
else:
yield posts_batch
if not (target_post_id and processed_target_post):
page_size = 50
offset += page_size
page += 1
time.sleep(0.6)
if target_post_id and not processed_target_post:
logger(f"❌ Target post ID {target_post_id} was not found for this creator.")
class PostProcessorSignals(QObject):
progress_signal = pyqtSignal(str)
file_download_status_signal = pyqtSignal(bool)
class PostProcessorWorker:
def __init__(self, post_data, download_root, known_names, filter_character,
unwanted_keywords, filter_mode, skip_zip, skip_rar,
use_subfolders, target_post_id_from_initial_url, custom_folder_name,
compress_images, download_thumbnails, service, user_id,
api_url_input, cancellation_event, signals,
downloaded_files, downloaded_file_hashes, downloaded_files_lock, downloaded_file_hashes_lock,
skip_words_list=None):
self.post = post_data
self.download_root = download_root
self.known_names = known_names
self.filter_character = filter_character
self.unwanted_keywords = unwanted_keywords
self.filter_mode = filter_mode
self.skip_zip = skip_zip
self.skip_rar = skip_rar
self.use_subfolders = use_subfolders
self.target_post_id_from_initial_url = target_post_id_from_initial_url
self.custom_folder_name = custom_folder_name
self.compress_images = compress_images
self.download_thumbnails = download_thumbnails
self.service = service
self.user_id = user_id
self.api_url_input = api_url_input
self.cancellation_event = cancellation_event
self.signals = signals
self.skip_current_file_flag = threading.Event()
self.is_downloading_file = False
self.current_download_path = None
self.downloaded_files = downloaded_files
self.downloaded_file_hashes = downloaded_file_hashes
self.downloaded_files_lock = downloaded_files_lock
self.downloaded_file_hashes_lock = downloaded_file_hashes_lock
self.skip_words_list = skip_words_list if skip_words_list is not None else []
if self.compress_images and Image is None:
self.logger("⚠️ Image compression enabled, but Pillow library is not loaded. Disabling compression.")
self.compress_images = False
def logger(self, message):
if self.signals and hasattr(self.signals, 'progress_signal'):
self.signals.progress_signal.emit(message)
else:
print(f"(Worker Log): {message}")
def check_cancel(self):
is_cancelled = self.cancellation_event.is_set()
return is_cancelled
def skip_file(self):
pass
def process(self):
if self.check_cancel(): return 0, 0
total_downloaded_post = 0
total_skipped_post = 0
headers = {'User-Agent': 'Mozilla/5.0', 'Referer': f'https://{urlparse(self.api_url_input).netloc}/'}
url_pattern = re.compile(r'https?://[^\s<>"]+|www\.[^\s<>"]+')
LARGE_THUMBNAIL_THRESHOLD = 1 * 1024 * 1024
post = self.post
api_title = post.get('title', '')
title = api_title if api_title else 'untitled_post'
post_id = post.get('id', 'unknown_id')
post_file_info = post.get('file')
attachments = post.get('attachments', [])
post_content = post.get('content', '')
is_target_post = (self.target_post_id_from_initial_url is not None) and (str(post_id) == str(self.target_post_id_from_initial_url))
self.logger(f"\n--- Processing Post {post_id} ('{title[:50]}...') (Thread: {threading.current_thread().name}) ---")
if self.skip_words_list:
title_lower = title.lower()
for skip_word in self.skip_words_list:
if skip_word.lower() in title_lower:
self.logger(f" -> Skip Post (Title): Post {post_id} title ('{title[:30]}...') contains skip word '{skip_word}'. Skipping entire post.")
return 0, 1
if not isinstance(attachments, list):
self.logger(f"⚠️ Corrupt attachment data for post {post_id}. Skipping attachments.")
attachments = []
valid_folder_paths = []
folder_decision_reason = ""
api_domain = urlparse(self.api_url_input).netloc if ('kemono.su' in urlparse(self.api_url_input).netloc.lower() or 'coomer.su' in urlparse(self.api_url_input).netloc.lower() or 'kemono.party' in urlparse(self.api_url_input).netloc.lower() or 'coomer.party' in urlparse(self.api_url_input).netloc.lower()) else "kemono.su"
if is_target_post and self.custom_folder_name and self.use_subfolders:
folder_path_full = os.path.join(self.download_root, self.custom_folder_name)
valid_folder_paths = [folder_path_full]
folder_decision_reason = f"Using custom folder for target post: '{self.custom_folder_name}'"
if not valid_folder_paths and self.use_subfolders:
folder_names_for_post = []
if self.filter_character:
clean_char_filter = clean_folder_name(self.filter_character.lower())
matched_names_in_title = match_folders_from_title(title, self.known_names, self.unwanted_keywords)
if clean_char_filter and clean_char_filter in matched_names_in_title:
folder_names_for_post = [clean_char_filter]
folder_decision_reason = f"Character filter '{self.filter_character}' matched title. Using folder '{clean_char_filter}'."
else:
self.logger(f" -> Filter Skip Post {post_id}: Character filter '{self.filter_character}' not found in title matches ({matched_names_in_title}).")
return 0, 1
else:
matched_folders = match_folders_from_title(title, self.known_names, self.unwanted_keywords)
if matched_folders:
folder_names_for_post = matched_folders
folder_decision_reason = f"Found known name(s) in title: {matched_folders}"
else:
extracted_folder = extract_folder_name_from_title(title, self.unwanted_keywords)
folder_names_for_post = [extracted_folder]
folder_decision_reason = f"No known names in title. Using derived folder: '{extracted_folder}'"
for folder_name in folder_names_for_post:
folder_path_full = os.path.join(self.download_root, folder_name)
valid_folder_paths.append(folder_path_full)
if not valid_folder_paths:
valid_folder_paths = [self.download_root]
if not folder_decision_reason:
folder_decision_reason = "Subfolders disabled or no specific folder determined. Using root download directory."
self.logger(f" Folder Decision: {folder_decision_reason}")
if not valid_folder_paths:
self.logger(f" ERROR: No valid folder paths determined for post {post_id}. Skipping.")
return 0, 1
if post_content:
try:
found_links = re.findall(r'href=["\'](https?://[^"\']+)["\']', post_content)
if found_links:
self.logger(f"🔗 Links found in post content:")
unique_links = sorted(list(set(found_links)))
for link in unique_links[:10]:
if not any(x in link for x in ['.css', '.js', 'javascript:']):
self.logger(f" - {link}")
if len(unique_links) > 10:
self.logger(f" - ... ({len(unique_links) - 10} more links not shown)")
except Exception as e:
self.logger(f"⚠️ Error parsing content for links in post {post_id}: {e}")
files_to_process_for_download = []
api_domain = urlparse(self.api_url_input).netloc if ('kemono.su' in urlparse(self.api_url_input).netloc.lower() or 'coomer.su' in urlparse(self.api_url_input).netloc.lower() or 'kemono.party' in urlparse(self.api_url_input).netloc.lower() or 'coomer.party' in urlparse(self.api_url_input).netloc.lower()) else "kemono.su"
if self.download_thumbnails:
self.logger(f" Mode: Attempting to download thumbnail...")
self.logger(" Thumbnail download via API is disabled as the local API is not used.")
self.logger(f" -> Skipping Post {post_id}: Thumbnail download requested but API is disabled.")
return 0, 1
else:
self.logger(f" Mode: Downloading post file/attachments.")
if post_file_info and isinstance(post_file_info, dict) and post_file_info.get('path'):
main_file_path = post_file_info['path'].lstrip('/')
main_file_name = post_file_info.get('name') or os.path.basename(main_file_path)
if main_file_name:
file_url = f"https://{api_domain}/data/{main_file_path}"
files_to_process_for_download.append({
'url': file_url, 'name': main_file_name,
'_is_thumbnail': False, '_source': 'post_file'
})
else:
self.logger(f" ⚠️ Skipping main post file: Missing filename (Path: {main_file_path})")
attachment_counter = 0
for idx, attachment in enumerate(attachments):
if isinstance(attachment, dict) and attachment.get('path'):
attach_path = attachment['path'].lstrip('/')
attach_name = attachment.get('name') or os.path.basename(attach_path)
if attach_name:
base, ext = os.path.splitext(clean_filename(attach_name))
final_attach_name = f"{post_id}_{attachment_counter}{ext}"
if base and base != f"{post_id}_{attachment_counter}":
final_attach_name = f"{post_id}_{attachment_counter}_{base}{ext}"
attach_url = f"https://{api_domain}/data/{attach_path}"
files_to_process_for_download.append({
'url': attach_url, 'name': final_attach_name,
'_is_thumbnail': False, '_source': f'attachment_{idx+1}',
'_original_name_for_log': attach_name
})
attachment_counter += 1
else:
self.logger(f" ⚠️ Skipping attachment {idx+1}: Missing filename (Path: {attach_path})")
else:
self.logger(f" ⚠️ Skipping invalid attachment entry {idx+1}: {str(attachment)[:100]}")
if not files_to_process_for_download:
self.logger(f" No files found to download for post {post_id}.")
return 0, 0
self.logger(f" Files identified for download: {len(files_to_process_for_download)}")
post_download_count = 0
post_skip_count = 0
local_processed_filenames = set()
local_filenames_lock = threading.Lock()
for file_info in files_to_process_for_download:
if self.check_cancel(): break
if self.skip_current_file_flag.is_set():
original_name_for_log = file_info.get('_original_name_for_log', file_info.get('name', 'unknown_file'))
self.logger(f"⏭️ File skip requested: {original_name_for_log}")
post_skip_count += 1
self.skip_current_file_flag.clear()
continue
file_url = file_info.get('url')
original_filename = file_info.get('name')
is_thumbnail = file_info.get('_is_thumbnail', False)
original_name_for_log = file_info.get('_original_name_for_log', original_filename)
if not file_url or not original_filename:
self.logger(f"⚠️ Skipping file entry due to missing URL or name: {str(file_info)[:100]}")
post_skip_count += 1
continue
cleaned_save_filename = clean_filename(original_filename)
if self.skip_words_list:
filename_lower = cleaned_save_filename.lower()
file_skipped_by_word = False
for skip_word in self.skip_words_list:
if skip_word.lower() in filename_lower:
self.logger(f" -> Skip File (Filename): File '{original_name_for_log}' contains skip word '{skip_word}'.")
post_skip_count += 1
file_skipped_by_word = True
break
if file_skipped_by_word:
continue
if not self.download_thumbnails:
file_skipped_by_filter = False
is_img = is_image(cleaned_save_filename)
is_vid = is_video(cleaned_save_filename)
is_zip_file = is_zip(cleaned_save_filename)
is_rar_file = is_rar(cleaned_save_filename)
if self.filter_mode == 'image' and not is_img:
self.logger(f" -> Filter Skip: '{original_name_for_log}' (Not image/gif)")
file_skipped_by_filter = True
elif self.filter_mode == 'video' and not is_vid:
self.logger(f" -> Filter Skip: '{original_name_for_log}' (Not video)")
file_skipped_by_filter = True
elif self.skip_zip and is_zip_file:
self.logger(f" -> Pref Skip: '{original_name_for_log}' (Zip)")
file_skipped_by_filter = True
elif self.skip_rar and is_rar_file:
self.logger(f" -> Pref Skip: '{original_name_for_log}' (RAR)")
file_skipped_by_filter = True
if file_skipped_by_filter:
post_skip_count += 1
continue
file_downloaded_or_exists = False
for folder_path in valid_folder_paths:
if self.check_cancel(): break
try:
os.makedirs(folder_path, exist_ok=True)
except OSError as e:
self.logger(f"❌ Error ensuring directory exists {folder_path}: {e}. Skipping path.")
continue
except Exception as e:
self.logger(f"❌ Unexpected error creating dir {folder_path}: {e}. Skipping path.")
continue
save_path = os.path.join(folder_path, cleaned_save_filename)
folder_basename = os.path.basename(folder_path)
with local_filenames_lock:
if os.path.exists(save_path) and os.path.getsize(save_path) > 0:
self.logger(f" -> Exists Skip: '{original_name_for_log}' in '{folder_basename}'")
post_skip_count += 1
file_downloaded_or_exists = True
with self.downloaded_files_lock:
self.downloaded_files.add(cleaned_save_filename)
break
elif cleaned_save_filename in local_processed_filenames:
self.logger(f" -> Local Skip: '{original_name_for_log}' in '{folder_basename}' (already processed in this post)")
post_skip_count += 1
file_downloaded_or_exists = True
with self.downloaded_files_lock:
self.downloaded_files.add(cleaned_save_filename)
break
with self.downloaded_files_lock:
if cleaned_save_filename in self.downloaded_files:
self.logger(f" -> Global Filename Skip: '{original_name_for_log}' in '{folder_basename}' (filename already downloaded globally)")
post_skip_count += 1
file_downloaded_or_exists = True
break
try:
self.logger(f"⬇️ Downloading '{original_name_for_log}' to '{folder_basename}'...")
self.current_download_path = save_path
self.is_downloading_file = True
self.signals.file_download_status_signal.emit(True)
response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True)
response.raise_for_status()
file_content_bytes = BytesIO()
downloaded_size = 0
chunk_count = 0
md5_hash = hashlib.md5()
for chunk in response.iter_content(chunk_size=32 * 1024):
if self.check_cancel(): break
if self.skip_current_file_flag.is_set(): break
if chunk:
file_content_bytes.write(chunk)
md5_hash.update(chunk)
downloaded_size += len(chunk)
chunk_count += 1
if self.check_cancel() or self.skip_current_file_flag.is_set():
self.logger(f" ⚠️ Download interrupted {'(cancelled)' if self.cancellation_event.is_set() else '(skipped)'} for {original_name_for_log}.")
if self.skip_current_file_flag.is_set():
post_skip_count += 1
self.skip_current_file_flag.clear()
break
final_save_path = save_path
current_filename_for_log = cleaned_save_filename
file_content_bytes.seek(0)
if downloaded_size == 0 and chunk_count > 0:
self.logger(f"⚠️ Warning: Downloaded 0 bytes despite receiving chunks for {original_name_for_log}. Skipping save.")
post_skip_count += 1
break
if downloaded_size > 0:
calculated_hash = md5_hash.hexdigest()
with self.downloaded_file_hashes_lock:
if calculated_hash in self.downloaded_file_hashes:
self.logger(f" -> Content Skip: '{original_name_for_log}' (Hash: {calculated_hash}) already downloaded.")
post_skip_count += 1
file_downloaded_or_exists = True
with self.downloaded_files_lock:
self.downloaded_files.add(cleaned_save_filename)
with local_filenames_lock:
local_processed_filenames.add(cleaned_save_filename)
break
else:
pass
if not file_downloaded_or_exists:
final_bytes_to_save = file_content_bytes
is_img_for_compress = is_image(cleaned_save_filename)
if is_img_for_compress and not is_thumbnail and self.compress_images and Image and downloaded_size > 1500 * 1024:
self.logger(f" Compressing large image ({downloaded_size / 1024:.2f} KB)...")
try:
with Image.open(file_content_bytes) as img:
original_format = img.format
if img.mode == 'P': img = img.convert('RGBA')
elif img.mode not in ['RGB', 'RGBA', 'L']: img = img.convert('RGB')
compressed_bytes = BytesIO()
img.save(compressed_bytes, format='WebP', quality=75, method=4)
compressed_size = compressed_bytes.getbuffer().nbytes
if compressed_size < downloaded_size * 0.90:
self.logger(f" Compression success: {compressed_size / 1024:.2f} KB (WebP Q75)")
compressed_bytes.seek(0)
final_bytes_to_save = compressed_bytes
base, _ = os.path.splitext(cleaned_save_filename)
current_filename_for_log = base + '.webp'
final_save_path = os.path.join(folder_path, current_filename_for_log)
self.logger(f" Updated filename: {current_filename_for_log}")
else:
self.logger(f" Compression skipped: WebP not significantly smaller ({compressed_size / 1024:.2f} KB).")
file_content_bytes.seek(0)
final_bytes_to_save = file_content_bytes
except Exception as comp_e:
self.logger(f"❌ Image compression failed for {original_name_for_log}: {comp_e}. Saving original.")
file_content_bytes.seek(0)
final_bytes_to_save = file_content_bytes
final_save_path = save_path
elif is_img_for_compress and not is_thumbnail and self.compress_images:
self.logger(f" Skipping compression: Image size ({downloaded_size / 1024:.2f} KB) below threshold.")
file_content_bytes.seek(0)
final_bytes_to_save = file_content_bytes
elif is_thumbnail and downloaded_size > LARGE_THUMBNAIL_THRESHOLD:
self.logger(f"⚠️ Downloaded thumbnail '{current_filename_for_log}' ({downloaded_size / 1024:.2f} KB) is large.")
file_content_bytes.seek(0)
final_bytes_to_save = file_content_bytes
else:
file_content_bytes.seek(0)
final_bytes_to_save = file_content_bytes
save_file = False
with self.downloaded_files_lock:
with local_filenames_lock:
if os.path.exists(final_save_path) and os.path.getsize(final_save_path) > 0:
self.logger(f" -> Exists Skip (pre-write): '{current_filename_for_log}' in '{folder_basename}'")
post_skip_count += 1
file_downloaded_or_exists = True
elif current_filename_for_log in self.downloaded_files:
self.logger(f" -> Global Skip (pre-write): '{current_filename_for_log}' in '{folder_basename}' (already downloaded globally)")
post_skip_count += 1
file_downloaded_or_exists = True
elif current_filename_for_log in local_processed_filenames:
self.logger(f" -> Local Skip (pre-write): '{current_filename_for_log}' in '{folder_basename}' (already processed in this post)")
post_skip_count += 1
file_downloaded_or_exists = True
else:
save_file = True
if save_file:
try:
with open(final_save_path, 'wb') as f:
while True:
chunk = final_bytes_to_save.read(64 * 1024)
if not chunk: break
f.write(chunk)
with self.downloaded_file_hashes_lock:
self.downloaded_file_hashes.add(calculated_hash)
with self.downloaded_files_lock:
self.downloaded_files.add(current_filename_for_log)
with local_filenames_lock:
local_processed_filenames.add(current_filename_for_log)
post_download_count += 1
file_downloaded_or_exists = True
self.logger(f"✅ Saved: '{current_filename_for_log}' ({downloaded_size / 1024:.1f} KB, Hash: {calculated_hash[:8]}...) in '{folder_basename}'")
time.sleep(0.05)
except IOError as io_err:
self.logger(f"❌ Save Fail: '{current_filename_for_log}' to '{folder_basename}'. Error: {io_err}")
post_skip_count += 1
if os.path.exists(final_save_path):
try: os.remove(final_save_path)
except OSError: pass
break
except Exception as save_err:
self.logger(f"❌ Unexpected Save Error: '{current_filename_for_log}' in '{folder_basename}'. Error: {save_err}")
post_skip_count += 1
if os.path.exists(final_save_path):
try: os.remove(final_save_path)
except OSError: pass
break
final_bytes_to_save.close()
if file_content_bytes is not final_bytes_to_save:
file_content_bytes.close()
if file_downloaded_or_exists:
break
except requests.exceptions.RequestException as e:
self.logger(f"❌ Download Fail: {original_name_for_log}. Error: {e}")
post_skip_count += 1
break
except IOError as e:
self.logger(f"❌ File I/O Error: {original_name_for_log} in '{folder_basename}'. Error: {e}")
post_skip_count += 1
break
except Exception as e:
self.logger(f"❌ Unexpected Error during download/save for {original_name_for_log}: {e}")
import traceback
self.logger(f" Traceback: {traceback.format_exc(limit=2)}")
post_skip_count += 1
break
finally:
self.is_downloading_file = False
self.current_download_path = None
self.signals.file_download_status_signal.emit(False)
if self.check_cancel(): break
if self.skip_current_file_flag.is_set():
self.skip_current_file_flag.clear()
if not file_downloaded_or_exists:
pass
if self.check_cancel():
self.logger(f" Post {post_id} processing cancelled.")
return post_download_count, post_skip_count
self.logger(f" Post {post_id} Summary: Downloaded={post_download_count}, Skipped={post_skip_count}")
return post_download_count, post_skip_count
class DownloaderApp(QWidget):
character_prompt_response_signal = pyqtSignal(bool)
log_signal = pyqtSignal(str)
add_character_prompt_signal = pyqtSignal(str)
file_download_status_signal = pyqtSignal(bool)
overall_progress_signal = pyqtSignal(int, int)
finished_signal = pyqtSignal(int, int, bool)
def __init__(self):
super().__init__()
self.config_file = "Known.txt"
self.download_thread = None
self.thread_pool = None
self.cancellation_event = threading.Event()
self.active_futures = []
self.total_posts_to_process = 0
self.processed_posts_count = 0
self.download_counter = 0
self.skip_counter = 0
self.worker_signals = PostProcessorSignals()
self.prompt_mutex = QMutex()
self._add_character_response = None
self.downloaded_files = set()
self.downloaded_files_lock = threading.Lock()
self.downloaded_file_hashes = set()
self.downloaded_file_hashes_lock = threading.Lock()
self.load_known_names()
self.setWindowTitle("Kemono Downloader v2.3 (Content Dedupe & Skip)")
self.setGeometry(150, 150, 1050, 820)
self.setStyleSheet(self.get_dark_theme())
self.init_ui()
self._connect_signals()
self.log_signal.emit(" Local API server functionality has been removed.")
def _connect_signals(self):
self.worker_signals.progress_signal.connect(self.log)
self.worker_signals.file_download_status_signal.connect(self.update_skip_button_state)
self.log_signal.connect(self.log)
self.add_character_prompt_signal.connect(self.prompt_add_character)
self.character_prompt_response_signal.connect(self.receive_add_character_result)
self.overall_progress_signal.connect(self.update_progress_display)
self.finished_signal.connect(self.download_finished)
self.character_search_input.textChanged.connect(self.filter_character_list)
def load_known_names(self):
global KNOWN_NAMES
loaded_names = []
if os.path.exists(self.config_file):
try:
with open(self.config_file, 'r', encoding='utf-8') as f:
raw_names = [line.strip() for line in f]
loaded_names = sorted(list(set(filter(None, raw_names))))
log_msg = f" Loaded {len(loaded_names)} known names from {self.config_file}"
except Exception as e:
log_msg = f"❌ Error loading config '{self.config_file}': {e}"
QMessageBox.warning(self, "Config Load Error", f"Could not load list from {self.config_file}:\n{e}")
loaded_names = []
else:
log_msg = f" Config file '{self.config_file}' not found. Starting empty."
loaded_names = []
KNOWN_NAMES = loaded_names
if hasattr(self, 'log_output'):
self.log_signal.emit(log_msg)
else:
print(log_msg)
def save_known_names(self):
global KNOWN_NAMES
try:
unique_sorted_names = sorted(list(set(filter(None, KNOWN_NAMES))))
with open(self.config_file, 'w', encoding='utf-8') as f:
for name in unique_sorted_names:
f.write(name + '\n')
KNOWN_NAMES = unique_sorted_names
if hasattr(self, 'log_signal'):
self.log_signal.emit(f"💾 Saved {len(unique_sorted_names)} known names to {self.config_file}")
else:
print(f"Saved {len(unique_sorted_names)} names to {self.config_file}")
except Exception as e:
log_msg = f"❌ Error saving config '{self.config_file}': {e}"
if hasattr(self, 'log_signal'):
self.log_signal.emit(log_msg)
else:
print(log_msg)
QMessageBox.warning(self, "Config Save Error", f"Could not save list to {self.config_file}:\n{e}")
def closeEvent(self, event):
self.save_known_names()
should_exit = True
is_downloading = (self.download_thread and self.download_thread.isRunning()) or (self.thread_pool is not None)
if is_downloading:
reply = QMessageBox.question(self, "Confirm Exit",
"Download in progress. Are you sure you want to exit and cancel?",
QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
if reply == QMessageBox.Yes:
self.log_signal.emit("⚠️ Cancelling active download due to application exit...")
self.cancel_download()
else:
should_exit = False
self.log_signal.emit(" Application exit cancelled.")
event.ignore()
return
if should_exit:
self.log_signal.emit(" Application closing.")
self.log_signal.emit("👋 Exiting application.")
event.accept()
def init_ui(self):
main_layout = QHBoxLayout()
left_layout = QVBoxLayout()
right_layout = QVBoxLayout()
left_layout.addWidget(QLabel("🔗 Kemono Creator/Post URL:"))
self.link_input = QLineEdit()
self.link_input.setPlaceholderText("e.g., https://kemono.su/patreon/user/12345 or .../post/98765")
self.link_input.textChanged.connect(self.update_custom_folder_visibility)
left_layout.addWidget(self.link_input)
left_layout.addWidget(QLabel("📁 Download Location:"))
self.dir_input = QLineEdit()
self.dir_input.setPlaceholderText("Select folder where downloads will be saved")
self.dir_button = QPushButton("Browse...")
self.dir_button.clicked.connect(self.browse_directory)
dir_layout = QHBoxLayout()
dir_layout.addWidget(self.dir_input, 1)
dir_layout.addWidget(self.dir_button)
left_layout.addLayout(dir_layout)
self.custom_folder_widget = QWidget()
custom_folder_layout = QVBoxLayout(self.custom_folder_widget)
custom_folder_layout.setContentsMargins(0, 5, 0, 0)
self.custom_folder_label = QLabel("🗄️ Custom Folder Name (Single Post Only):")
self.custom_folder_input = QLineEdit()
self.custom_folder_input.setPlaceholderText("Optional: Save this post to specific folder")
custom_folder_layout.addWidget(self.custom_folder_label)
custom_folder_layout.addWidget(self.custom_folder_input)
self.custom_folder_widget.setVisible(False)
left_layout.addWidget(self.custom_folder_widget)
self.character_filter_widget = QWidget()
character_filter_layout = QVBoxLayout(self.character_filter_widget)
character_filter_layout.setContentsMargins(0, 5, 0, 0)
self.character_label = QLabel("🎯 Filter by Show/Character Name:")
self.character_input = QLineEdit()
self.character_input.setPlaceholderText("Only download posts matching this known name in title")
character_filter_layout.addWidget(self.character_label)
character_filter_layout.addWidget(self.character_input)
self.character_filter_widget.setVisible(True)
left_layout.addWidget(self.character_filter_widget)
left_layout.addWidget(QLabel("🚫 Skip Posts/Files with Words (comma-separated):"))
self.skip_words_input = QLineEdit()
self.skip_words_input.setPlaceholderText("e.g., WM, WIP, sketch, preview")
left_layout.addWidget(self.skip_words_input)
options_layout_1 = QHBoxLayout()
options_layout_1.addWidget(QLabel("Filter Files:"))
self.radio_group = QButtonGroup(self)
self.radio_all = QRadioButton("All")
self.radio_images = QRadioButton("Images/GIFs")
self.radio_videos = QRadioButton("Videos")
self.radio_all.setChecked(True)
self.radio_group.addButton(self.radio_all)
self.radio_group.addButton(self.radio_images)
self.radio_group.addButton(self.radio_videos)
options_layout_1.addWidget(self.radio_all)
options_layout_1.addWidget(self.radio_images)
options_layout_1.addWidget(self.radio_videos)
options_layout_1.addStretch(1)
left_layout.addLayout(options_layout_1)
options_layout_2 = QHBoxLayout()
self.use_subfolders_checkbox = QCheckBox("Separate Folders by Name/Title")
self.use_subfolders_checkbox.setChecked(True)
self.use_subfolders_checkbox.toggled.connect(self.update_ui_for_subfolders)
options_layout_2.addWidget(self.use_subfolders_checkbox)
self.download_thumbnails_checkbox = QCheckBox("Download Thumbnails Only")
self.download_thumbnails_checkbox.setChecked(False)
self.download_thumbnails_checkbox.setToolTip("Thumbnail download functionality is currently limited without the API.")
options_layout_2.addWidget(self.download_thumbnails_checkbox)
options_layout_2.addStretch(1)
left_layout.addLayout(options_layout_2)
options_layout_3 = QHBoxLayout()
self.skip_zip_checkbox = QCheckBox("Skip .zip")
self.skip_zip_checkbox.setChecked(True)
options_layout_3.addWidget(self.skip_zip_checkbox)
self.skip_rar_checkbox = QCheckBox("Skip .rar")
self.skip_rar_checkbox.setChecked(True)
options_layout_3.addWidget(self.skip_rar_checkbox)
self.compress_images_checkbox = QCheckBox("Compress Large Images (to WebP)")
self.compress_images_checkbox.setChecked(False)
self.compress_images_checkbox.setToolTip("Compress images > 1.5MB to WebP format (requires Pillow).")
options_layout_3.addWidget(self.compress_images_checkbox)
options_layout_3.addStretch(1)
left_layout.addLayout(options_layout_3)
options_layout_4 = QHBoxLayout()
self.use_multithreading_checkbox = QCheckBox(f"Use Multithreading ({4} Threads)")
self.use_multithreading_checkbox.setChecked(True)
self.use_multithreading_checkbox.setToolTip("Speeds up downloads for full creator pages.\nSingle post URLs always use one thread.")
options_layout_4.addWidget(self.use_multithreading_checkbox)
options_layout_4.addStretch(1)
left_layout.addLayout(options_layout_4)
btn_layout = QHBoxLayout()
self.download_btn = QPushButton("⬇️ Start Download")
self.download_btn.setStyleSheet("padding: 8px 15px; font-weight: bold;")
self.download_btn.clicked.connect(self.start_download)
self.cancel_btn = QPushButton("❌ Cancel")
self.cancel_btn.setEnabled(False)
self.cancel_btn.clicked.connect(self.cancel_download)
self.skip_file_btn = QPushButton("⏭️ Skip Current File")
self.skip_file_btn.setEnabled(False)
self.skip_file_btn.setToolTip("Only available in single-thread mode during file download.")
self.skip_file_btn.clicked.connect(self.skip_current_file)
btn_layout.addWidget(self.download_btn)
btn_layout.addWidget(self.cancel_btn)
btn_layout.addWidget(self.skip_file_btn)
left_layout.addLayout(btn_layout)
left_layout.addSpacing(10)
known_chars_label_layout = QHBoxLayout()
self.known_chars_label = QLabel("🎭 Known Shows/Characters (for Folder Names):")
self.character_search_input = QLineEdit()
self.character_search_input.setPlaceholderText("Search characters...")
known_chars_label_layout.addWidget(self.known_chars_label, 1)
known_chars_label_layout.addWidget(self.character_search_input)
left_layout.addLayout(known_chars_label_layout)
self.character_list = QListWidget()
self.character_list.addItems(KNOWN_NAMES)
self.character_list.setSelectionMode(QListWidget.ExtendedSelection)
left_layout.addWidget(self.character_list, 1)
char_manage_layout = QHBoxLayout()
self.new_char_input = QLineEdit()
self.new_char_input.setPlaceholderText("Add new show/character name")
self.add_char_button = QPushButton(" Add")
self.delete_char_button = QPushButton("🗑️ Delete Selected")
self.add_char_button.clicked.connect(self.add_new_character)
self.new_char_input.returnPressed.connect(self.add_char_button.click)
self.delete_char_button.clicked.connect(self.delete_selected_character)
char_manage_layout.addWidget(self.new_char_input, 2)
char_manage_layout.addWidget(self.add_char_button, 1)
char_manage_layout.addWidget(self.delete_char_button, 1)
left_layout.addLayout(char_manage_layout)
right_layout.addWidget(QLabel("📜 Progress Log:"))
self.log_output = QTextEdit()
self.log_output.setReadOnly(True)
self.log_output.setMinimumWidth(450)
self.log_output.setLineWrapMode(QTextEdit.WidgetWidth)
right_layout.addWidget(self.log_output, 1)
self.progress_label = QLabel("Progress: Idle")
self.progress_label.setStyleSheet("padding-top: 5px; font-style: italic;")
right_layout.addWidget(self.progress_label)
main_layout.addLayout(left_layout, 5)
main_layout.addLayout(right_layout, 4)
self.setLayout(main_layout)
self.update_ui_for_subfolders(self.use_subfolders_checkbox.isChecked())
self.update_custom_folder_visibility()
def get_dark_theme(self):
return """
QWidget {
background-color: #2E2E2E;
color: #E0E0E0;
font-family: Segoe UI, Arial, sans-serif;
font-size: 10pt;
}
QLineEdit, QTextEdit, QListWidget {
background-color: #3C3F41;
border: 1px solid #5A5A5A;
padding: 5px;
color: #F0F0F0;
border-radius: 4px;
}
QTextEdit {
font-family: Consolas, Courier New, monospace;
font-size: 9.5pt;
}
QPushButton {
background-color: #555;
color: #F0F0F0;
border: 1px solid #6A6A6A;
padding: 6px 12px;
border-radius: 4px;
min-height: 22px;
}
QPushButton:hover {
background-color: #656565;
border: 1px solid #7A7A7A;
}
QPushButton:pressed {
background-color: #4A4A4A;
}
QPushButton:disabled {
background-color: #404040;
color: #888;
border-color: #555;
}
QLabel {
font-weight: bold;
padding-top: 4px;
padding-bottom: 2px;
color: #C0C0C0;
}
QRadioButton, QCheckBox {
spacing: 5px;
color: #E0E0E0;
padding-top: 4px;
padding-bottom: 4px;
}
QRadioButton::indicator, QCheckBox::indicator {
width: 14px;
height: 14px;
}
QListWidget {
alternate-background-color: #353535;
border: 1px solid #5A5A5A;
}
QListWidget::item:selected {
background-color: #007ACC;
color: #FFFFFF;
}
QToolTip {
background-color: #4A4A4A;
color: #F0F0F0;
border: 1px solid #6A6A6A;
padding: 4px;
border-radius: 3px;
}
"""
def browse_directory(self):
current_dir = self.dir_input.text() if os.path.isdir(self.dir_input.text()) else ""
folder = QFileDialog.getExistingDirectory(self, "Select Download Folder", current_dir)
if folder:
self.dir_input.setText(folder)
def log(self, message):
try:
safe_message = str(message).replace('\x00', '[NULL]')
self.log_output.append(safe_message)
scrollbar = self.log_output.verticalScrollBar()
if scrollbar.value() >= scrollbar.maximum() - 30:
scrollbar.setValue(scrollbar.maximum())
except Exception as e:
print(f"GUI Log Error: {e}")
print(f"Original Message: {message}")
def get_filter_mode(self):
if self.radio_images.isChecked():
return 'image'
elif self.radio_videos.isChecked():
return 'video'
return 'all'
def add_new_character(self):
global KNOWN_NAMES
name_to_add = self.new_char_input.text().strip()
if not name_to_add:
QMessageBox.warning(self, "Input Error", "Name cannot be empty.")
return
name_lower = name_to_add.lower()
is_duplicate = any(existing.lower() == name_lower for existing in KNOWN_NAMES)
if not is_duplicate:
KNOWN_NAMES.append(name_to_add)
KNOWN_NAMES.sort(key=str.lower)
self.character_list.clear()
self.character_list.addItems(KNOWN_NAMES)
self.filter_character_list(self.character_search_input.text())
self.log_signal.emit(f"✅ Added '{name_to_add}' to known names list.")
self.new_char_input.clear()
self.save_known_names()
else:
QMessageBox.warning(self, "Duplicate Name", f"The name '{name_to_add}' (or similar) already exists in the list.")
def delete_selected_character(self):
global KNOWN_NAMES
selected_items = self.character_list.selectedItems()
if not selected_items:
QMessageBox.warning(self, "Selection Error", "Please select one or more names to delete.")
return
names_to_remove = {item.text() for item in selected_items}
confirm = QMessageBox.question(self, "Confirm Deletion",
f"Are you sure you want to delete {len(names_to_remove)} selected name(s)?",
QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
if confirm == QMessageBox.Yes:
original_count = len(KNOWN_NAMES)
KNOWN_NAMES = [n for n in KNOWN_NAMES if n not in names_to_remove]
removed_count = original_count - len(KNOWN_NAMES)
if removed_count > 0:
self.log_signal.emit(f"🗑️ Removed {removed_count} name(s) from the list.")
self.character_list.clear()
KNOWN_NAMES.sort(key=str.lower)
self.character_list.addItems(KNOWN_NAMES)
self.filter_character_list(self.character_search_input.text())
self.save_known_names()
else:
self.log_signal.emit(" No names were removed (selection might have changed?).")
def update_custom_folder_visibility(self, url_text=None):
if url_text is None:
url_text = self.link_input.text()
_, _, post_id = extract_post_info(url_text.strip())
should_show = bool(post_id) and self.use_subfolders_checkbox.isChecked()
self.custom_folder_widget.setVisible(should_show)
if not should_show:
self.custom_folder_input.clear()
def update_ui_for_subfolders(self, checked):
self.character_filter_widget.setVisible(checked)
self.update_custom_folder_visibility()
if not checked:
self.character_input.clear()
def filter_character_list(self, search_text):
search_text = search_text.lower()
for i in range(self.character_list.count()):
item = self.character_list.item(i)
if search_text in item.text().lower():
item.setHidden(False)
else:
item.setHidden(True)
def update_progress_display(self, total_posts, processed_posts):
if total_posts > 0:
try:
percent = (processed_posts / total_posts) * 100
self.progress_label.setText(f"Progress: {processed_posts} / {total_posts} posts ({percent:.1f}%)")
except ZeroDivisionError:
self.progress_label.setText(f"Progress: {processed_posts} / {total_posts} posts")
elif processed_posts > 0:
self.progress_label.setText(f"Progress: Processing post {processed_posts}...")
else:
self.progress_label.setText("Progress: Starting...")
def start_download(self):
is_running = (self.download_thread and self.download_thread.isRunning()) or (self.thread_pool is not None)
if is_running:
self.log_signal.emit("⚠️ Download already in progress.")
QMessageBox.warning(self, "Busy", "A download is already running.")
return
api_url = self.link_input.text().strip()
output_dir = self.dir_input.text().strip()
filter_mode = self.get_filter_mode()
skip_zip = self.skip_zip_checkbox.isChecked()
skip_rar = self.skip_rar_checkbox.isChecked()
use_subfolders = self.use_subfolders_checkbox.isChecked()
compress_images = self.compress_images_checkbox.isChecked()
download_thumbnails = self.download_thumbnails_checkbox.isChecked()
use_multithreading = self.use_multithreading_checkbox.isChecked()
num_threads = 4
raw_skip_words = self.skip_words_input.text().strip()
skip_words_list = []
if raw_skip_words:
skip_words_list = [word.strip() for word in raw_skip_words.split(',') if word.strip()]
service, user_id, post_id_from_url = extract_post_info(api_url)
if not api_url:
QMessageBox.critical(self, "Input Error", "Please enter a Kemono/Coomer URL.")
return
if not service or not user_id:
QMessageBox.critical(self, "Input Error", "Invalid or unsupported URL format.\nPlease provide a valid creator page or post URL.")
self.log_signal.emit(f"❌ Invalid URL detected: {api_url}")
return
if not output_dir:
QMessageBox.critical(self, "Input Error", "Please select a download directory.")
return
if not os.path.isdir(output_dir):
reply = QMessageBox.question(self, "Directory Not Found",
f"The directory '{output_dir}' does not exist.\n\nCreate it?",
QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes)
if reply == QMessageBox.Yes:
try:
os.makedirs(output_dir)
self.log_signal.emit(f" Created download directory: {output_dir}")
except Exception as e:
QMessageBox.critical(self, "Directory Error", f"Could not create directory:\n{e}")
self.log_signal.emit(f"❌ Failed to create directory: {output_dir} - {e}")
return
else:
return
if compress_images and Image is None:
QMessageBox.warning(self, "Dependency Missing", "Image compression requires the Pillow library, but it's not installed.\nPlease run: pip install Pillow\n\nCompression will be disabled for this session.")
self.log_signal.emit("❌ Cannot compress images: Pillow library not found.")
compress_images = False
filter_character = None
if use_subfolders and self.character_filter_widget.isVisible():
filter_character = self.character_input.text().strip() or None
custom_folder_name = None
if use_subfolders and post_id_from_url and self.custom_folder_widget.isVisible():
raw_custom_name = self.custom_folder_input.text().strip()
if raw_custom_name:
cleaned_custom = clean_folder_name(raw_custom_name)
if cleaned_custom:
custom_folder_name = cleaned_custom
else:
QMessageBox.warning(self, "Input Warning", f"Custom folder name '{raw_custom_name}' is invalid and will be ignored.")
self.log_signal.emit(f"⚠️ Invalid custom folder name ignored: {raw_custom_name}")
if use_subfolders and filter_character and not post_id_from_url:
clean_char_filter = clean_folder_name(filter_character.lower())
known_names_lower = {name.lower() for name in KNOWN_NAMES}
if not clean_char_filter:
self.log_signal.emit(f"❌ Filter name '{filter_character}' is invalid. Aborting.")
QMessageBox.critical(self, "Filter Error", "The provided filter name is invalid (contains only spaces or special characters).")
return
elif filter_character.lower() not in known_names_lower:
reply = QMessageBox.question(self, "Add Filter Name?",
f"The filter name '{filter_character}' is not in your known names list.\n\nAdd it now and continue?",
QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel, QMessageBox.Yes)
if reply == QMessageBox.Yes:
self.new_char_input.setText(filter_character)
self.add_new_character()
if filter_character.lower() not in {name.lower() for name in KNOWN_NAMES}:
self.log_signal.emit(f"⚠️ Failed to add '{filter_character}' automatically. Please add manually if needed.")
else:
self.log_signal.emit(f"✅ Added filter '{filter_character}' to list.")
elif reply == QMessageBox.No:
self.log_signal.emit(f" Proceeding without adding '{filter_character}'. Posts matching it might not be saved to a specific folder unless name is derived.")
else:
self.log_signal.emit("❌ Download cancelled by user during filter check.")
return
self.log_output.clear()
self.cancellation_event.clear()
self.active_futures = []
self.total_posts_to_process = 0
self.processed_posts_count = 0
self.download_counter = 0
self.skip_counter = 0
with self.downloaded_files_lock:
self.downloaded_files.clear()
with self.downloaded_file_hashes_lock:
self.downloaded_file_hashes.clear()
self.progress_label.setText("Progress: Initializing...")
self.log_signal.emit("="*40)
self.log_signal.emit(f"🚀 Starting Download Task @ {time.strftime('%Y-%m-%d %H:%M:%S')}")
self.log_signal.emit(f" URL: {api_url}")
self.log_signal.emit(f" Save Location: {output_dir}")
mode = "Single Post" if post_id_from_url else "Creator Feed"
self.log_signal.emit(f" Mode: {mode}")
self.log_signal.emit(f" Subfolders: {'Enabled' if use_subfolders else 'Disabled'}")
if use_subfolders:
if custom_folder_name:
self.log_signal.emit(f" Custom Folder (Post): '{custom_folder_name}'")
elif filter_character:
self.log_signal.emit(f" Character Filter: '{filter_character}'")
else:
self.log_signal.emit(f" Folder Naming: Automatic (Known Names > Title Extraction)")
self.log_signal.emit(f" File Type Filter: {filter_mode}")
self.log_signal.emit(f" Skip: {'.zip' if skip_zip else ''}{', ' if skip_zip and skip_rar else ''}{'.rar' if skip_rar else ''}{'None' if not (skip_zip or skip_rar) else ''}")
if skip_words_list:
self.log_signal.emit(f" Skip Words (Title/Filename): {', '.join(skip_words_list)}")
else:
self.log_signal.emit(f" Skip Words (Title/Filename): None")
self.log_signal.emit(f" Compress Images: {'Enabled' if compress_images else 'Disabled'}")
self.log_signal.emit(f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}")
should_use_multithreading = use_multithreading and not post_id_from_url
self.log_signal.emit(f" Threading: {'Multi-threaded' if should_use_multithreading else 'Single-threaded'}")
self.log_signal.emit("="*40)
self.set_ui_enabled(False)
self.cancel_btn.setEnabled(True)
try:
common_args = {
'api_url': api_url,
'output_dir': output_dir,
'known_names_copy': list(KNOWN_NAMES),
'filter_character': filter_character,
'filter_mode': filter_mode,
'skip_zip': skip_zip,
'skip_rar': skip_rar,
'use_subfolders': use_subfolders,
'compress_images': compress_images,
'download_thumbnails': download_thumbnails,
'service': service,
'user_id': user_id,
'downloaded_files': self.downloaded_files,
'downloaded_files_lock': self.downloaded_files_lock,
'downloaded_file_hashes': self.downloaded_file_hashes,
'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock,
'skip_words_list': skip_words_list,
}
if should_use_multithreading:
self.log_signal.emit(" Initializing multi-threaded download...")
multi_args = common_args.copy()
multi_args['num_threads'] = num_threads
self.start_multi_threaded_download(**multi_args)
else:
self.log_signal.emit(" Initializing single-threaded download...")
single_args = common_args.copy()
single_args['custom_folder_name'] = custom_folder_name
single_args['single_post_id'] = post_id_from_url
self.start_single_threaded_download(**single_args)
except Exception as e:
self.log_signal.emit(f"❌ CRITICAL ERROR preparing download task: {e}")
import traceback
self.log_signal.emit(traceback.format_exc())
QMessageBox.critical(self, "Start Error", f"Failed to start download task:\n{e}")
self.download_finished(0, 0, False)
def start_single_threaded_download(self, **kwargs):
try:
self.download_thread = DownloadThread(
cancellation_event = self.cancellation_event,
**kwargs
)
if self.download_thread._init_failed:
QMessageBox.critical(self, "Thread Error", "Failed to initialize the download thread.\nCheck the log for details.")
self.download_finished(0, 0, False)
return
self.download_thread.progress_signal.connect(self.log_signal)
self.download_thread.add_character_prompt_signal.connect(self.add_character_prompt_signal)
self.download_thread.file_download_status_signal.connect(self.file_download_status_signal)
self.download_thread.finished_signal.connect(self.finished_signal)
self.character_prompt_response_signal.connect(self.download_thread.receive_add_character_result)
self.download_thread.start()
self.log_signal.emit("✅ Single download thread started.")
except Exception as e:
self.log_signal.emit(f"❌ CRITICAL ERROR starting single-thread task: {e}")
import traceback
self.log_signal.emit(traceback.format_exc())
QMessageBox.critical(self, "Thread Start Error", f"Failed to start download thread:\n{e}")
self.download_finished(0, 0, False)
def start_multi_threaded_download(self, **kwargs):
num_threads = kwargs['num_threads']
self.thread_pool = ThreadPoolExecutor(max_workers=num_threads, thread_name_prefix='Downloader_')
self.active_futures = []
self.processed_posts_count = 0
self.total_posts_to_process = 0
self.download_counter = 0
self.skip_counter = 0
worker_args_template = kwargs.copy()
del worker_args_template['num_threads']
fetcher_thread = threading.Thread(
target=self._fetch_and_queue_posts,
args=(kwargs['api_url'], worker_args_template),
daemon=True,
name="PostFetcher"
)
fetcher_thread.start()
self.log_signal.emit(f"✅ Post fetcher thread started. {num_threads} worker threads initializing...")
def _fetch_and_queue_posts(self, api_url_input, worker_args_template):
all_posts = []
fetch_error = False
try:
self.log_signal.emit(" Starting post fetch...")
def fetcher_logger(msg):
self.log_signal.emit(f"[Fetcher] {msg}")
post_generator = download_from_api(api_url_input, logger=fetcher_logger)
for posts_batch in post_generator:
if self.cancellation_event.is_set():
self.log_signal.emit("⚠️ Post fetching cancelled by user.")
fetch_error = True
break
if isinstance(posts_batch, list):
all_posts.extend(posts_batch)
self.total_posts_to_process = len(all_posts)
if self.total_posts_to_process % 250 == 0:
self.log_signal.emit(f" Fetched {self.total_posts_to_process} posts...")
else:
self.log_signal.emit(f"❌ API returned non-list batch: {type(posts_batch)}. Stopping fetch.")
fetch_error = True
break
if not fetch_error:
self.log_signal.emit(f"✅ Finished fetching. Total posts found: {self.total_posts_to_process}")
except Exception as e:
self.log_signal.emit(f"❌ Unexpected Error during post fetching: {e}")
import traceback
self.log_signal.emit(traceback.format_exc(limit=3))
fetch_error = True
if self.cancellation_event.is_set() or fetch_error:
self.finished_signal.emit(self.download_counter, self.skip_counter, self.cancellation_event.is_set())
if self.thread_pool:
self.thread_pool.shutdown(wait=False, cancel_futures=True)
self.thread_pool = None
return
if self.total_posts_to_process == 0:
self.log_signal.emit("😕 No posts found or fetched successfully.")
self.finished_signal.emit(0, 0, False)
return
self.log_signal.emit(f" Submitting {self.total_posts_to_process} post tasks to worker pool...")
self.processed_posts_count = 0
self.overall_progress_signal.emit(self.total_posts_to_process, 0)
common_worker_args = {
'download_root': worker_args_template['output_dir'],
'known_names': worker_args_template['known_names_copy'],
'filter_character': worker_args_template['filter_character'],
'unwanted_keywords': {'spicy', 'hd', 'nsfw', '4k', 'preview'},
'filter_mode': worker_args_template['filter_mode'],
'skip_zip': worker_args_template['skip_zip'],
'skip_rar': worker_args_template['skip_rar'],
'use_subfolders': worker_args_template['use_subfolders'],
'target_post_id_from_initial_url': worker_args_template.get('single_post_id'),
'custom_folder_name': worker_args_template.get('custom_folder_name'),
'compress_images': worker_args_template['compress_images'],
'download_thumbnails': worker_args_template['download_thumbnails'],
'service': worker_args_template['service'],
'user_id': worker_args_template['user_id'],
'api_url_input': worker_args_template['api_url'],
'cancellation_event': self.cancellation_event,
'signals': self.worker_signals,
'downloaded_files': self.downloaded_files,
'downloaded_files_lock': self.downloaded_files_lock,
'downloaded_file_hashes': self.downloaded_file_hashes,
'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock,
'skip_words_list': worker_args_template['skip_words_list'],
}
for post_data in all_posts:
if self.cancellation_event.is_set():
self.log_signal.emit("⚠️ Cancellation detected during task submission.")
break
if not isinstance(post_data, dict):
self.log_signal.emit(f"⚠️ Skipping invalid post data item (type: {type(post_data)}).")
self.processed_posts_count += 1
self.total_posts_to_process -=1
continue
worker = PostProcessorWorker(post_data=post_data, **common_worker_args)
try:
if self.thread_pool:
future = self.thread_pool.submit(worker.process)
future.add_done_callback(self._handle_future_result)
self.active_futures.append(future)
else:
self.log_signal.emit("⚠️ Thread pool shutdown before submitting all tasks.")
break
except RuntimeError as e:
self.log_signal.emit(f"⚠️ Error submitting task (pool might be shutting down): {e}")
break
except Exception as e:
self.log_signal.emit(f"❌ Unexpected error submitting task: {e}")
break
submitted_count = len(self.active_futures)
self.log_signal.emit(f" {submitted_count} / {self.total_posts_to_process} tasks submitted.")
def _handle_future_result(self, future: Future):
self.processed_posts_count += 1
downloaded_res, skipped_res = 0, 0
try:
if future.cancelled():
pass
elif future.exception():
exc = future.exception()
self.log_signal.emit(f"❌ Error in worker thread: {exc}")
pass
else:
downloaded, skipped = future.result()
downloaded_res = downloaded
skipped_res = skipped
with threading.Lock():
self.download_counter += downloaded_res
self.skip_counter += skipped_res
self.overall_progress_signal.emit(self.total_posts_to_process, self.processed_posts_count)
except Exception as e:
self.log_signal.emit(f"❌ Error in result callback handling: {e}")
if self.processed_posts_count >= self.total_posts_to_process and self.total_posts_to_process > 0:
if self.processed_posts_count >= self.total_posts_to_process:
self.log_signal.emit("🏁 All submitted tasks have completed or failed.")
cancelled = self.cancellation_event.is_set()
self.finished_signal.emit(self.download_counter, self.skip_counter, cancelled)
def set_ui_enabled(self, enabled):
self.download_btn.setEnabled(enabled)
self.link_input.setEnabled(enabled)
self.dir_input.setEnabled(enabled)
self.dir_button.setEnabled(enabled)
self.radio_all.setEnabled(enabled)
self.radio_images.setEnabled(enabled)
self.radio_videos.setEnabled(enabled)
self.skip_zip_checkbox.setEnabled(enabled)
self.skip_rar_checkbox.setEnabled(enabled)
self.use_subfolders_checkbox.setEnabled(enabled)
self.compress_images_checkbox.setEnabled(enabled)
self.download_thumbnails_checkbox.setEnabled(enabled)
self.use_multithreading_checkbox.setEnabled(enabled)
self.skip_words_input.setEnabled(enabled)
self.character_search_input.setEnabled(enabled)
self.new_char_input.setEnabled(enabled)
self.add_char_button.setEnabled(enabled)
self.delete_char_button.setEnabled(enabled)
subfolders_on = self.use_subfolders_checkbox.isChecked()
self.custom_folder_widget.setEnabled(enabled and subfolders_on)
self.character_filter_widget.setEnabled(enabled and subfolders_on)
if enabled:
self.update_ui_for_subfolders(subfolders_on)
self.update_custom_folder_visibility()
self.cancel_btn.setEnabled(not enabled)
if enabled:
self.skip_file_btn.setEnabled(False)
def cancel_download(self):
if not self.cancel_btn.isEnabled(): return
self.log_signal.emit("⚠️ Requesting cancellation...")
self.cancellation_event.set()
self.cancel_btn.setEnabled(False)
self.progress_label.setText("Progress: Cancelling...")
if self.thread_pool and self.active_futures:
cancelled_count = 0
for future in self.active_futures:
if future.cancel():
cancelled_count += 1
if cancelled_count > 0:
self.log_signal.emit(f" Attempted to cancel {cancelled_count} pending/running tasks.")
def skip_current_file(self):
if self.download_thread and self.download_thread.isRunning():
self.download_thread.skip_file()
elif self.thread_pool:
self.log_signal.emit(" Skipping individual files is not supported in multi-threaded mode.")
QMessageBox.information(self, "Action Not Supported", "Skipping individual files is only available in single-threaded mode.")
else:
self.log_signal.emit(" Skip requested, but no download is active.")
def update_skip_button_state(self, is_downloading_active):
can_skip = (not self.download_btn.isEnabled()) and \
(self.download_thread and self.download_thread.isRunning()) and \
is_downloading_active
if self.thread_pool is not None:
can_skip = False
self.skip_file_btn.setEnabled(can_skip)
def download_finished(self, total_downloaded, total_skipped, cancelled):
self.log_signal.emit("="*40)
status = "Cancelled" if cancelled else "Finished"
self.log_signal.emit(f"🏁 Download {status}!")
self.log_signal.emit(f" Summary: Downloaded={total_downloaded}, Skipped={total_skipped}")
self.progress_label.setText(f"{status}: {total_downloaded} downloaded, {total_skipped} skipped.")
self.log_signal.emit("="*40)
if self.download_thread:
try:
self.character_prompt_response_signal.disconnect(self.download_thread.receive_add_character_result)
except TypeError: pass
self.download_thread = None
if self.thread_pool:
self.log_signal.emit(" Shutting down worker thread pool...")
self.thread_pool.shutdown(wait=False, cancel_futures=True)
self.thread_pool = None
self.active_futures = []
self.cancellation_event.clear()
self.set_ui_enabled(True)
self.cancel_btn.setEnabled(False)
self.skip_file_btn.setEnabled(False)
def prompt_add_character(self, character_name):
reply = QMessageBox.question(self, "Add Filter Name?",
f"The filter name '{character_name}' is not in your known list.\n\nAdd it now and continue download?",
QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes)
result = (reply == QMessageBox.Yes)
if result:
self.new_char_input.setText(character_name)
if character_name.lower() not in {n.lower() for n in KNOWN_NAMES}:
self.add_new_character()
if character_name.lower() not in {n.lower() for n in KNOWN_NAMES}:
self.log_signal.emit(f"⚠️ Failed to add '{character_name}' via prompt. Check for errors.")
result = False
else:
self.log_signal.emit(f" Filter name '{character_name}' was already present or added.")
self.character_prompt_response_signal.emit(result)
def receive_add_character_result(self, result):
with QMutexLocker(self.prompt_mutex):
self._add_character_response = result
self.log_signal.emit(f" Received prompt response: {'Yes' if result else 'No'}")
class DownloadThread(QThread):
progress_signal = pyqtSignal(str)
add_character_prompt_signal = pyqtSignal(str)
file_download_status_signal = pyqtSignal(bool)
finished_signal = pyqtSignal(int, int, bool)
def __init__(self, api_url, output_dir, known_names_copy,
cancellation_event, single_post_id=None,
filter_character=None, filter_mode='all', skip_zip=True, skip_rar=True,
use_subfolders=True, custom_folder_name=None, compress_images=False,
download_thumbnails=False, service=None, user_id=None,
downloaded_files=None, downloaded_files_lock=None,
downloaded_file_hashes=None, downloaded_file_hashes_lock=None,
skip_words_list=None):
super().__init__()
self._init_failed = False
self.api_url_input = api_url
self.output_dir = output_dir
self.known_names = list(known_names_copy)
self.cancellation_event = cancellation_event
self.initial_target_post_id = single_post_id
self.filter_character = filter_character
self.filter_mode = filter_mode
self.skip_zip = skip_zip
self.skip_rar = skip_rar
self.use_subfolders = use_subfolders
self.custom_folder_name = custom_folder_name
self.compress_images = compress_images
self.download_thumbnails = download_thumbnails
self.service = service
self.user_id = user_id
self.skip_words_list = skip_words_list if skip_words_list is not None else []
self.downloaded_files = downloaded_files if downloaded_files is not None else set()
self.downloaded_files_lock = downloaded_files_lock if downloaded_files_lock is not None else threading.Lock()
self.downloaded_file_hashes = downloaded_file_hashes if downloaded_file_hashes is not None else set()
self.downloaded_file_hashes_lock = downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading.Lock()
self.skip_current_file_flag = threading.Event()
self.is_downloading_file = False
self.current_download_path = None
self._add_character_response = None
self.prompt_mutex = QMutex()
if not self.service or not self.user_id:
log_msg = f"❌ Thread Init Error: Missing service ('{self.service}') or user ID ('{self.user_id}') for URL '{api_url}'"
print(log_msg)
try: self.progress_signal.emit(log_msg)
except RuntimeError: pass
self._init_failed = True
def run(self):
if self._init_failed:
self.finished_signal.emit(0, 0, False)
return
unwanted_keywords = {'spicy', 'hd', 'nsfw', '4k', 'preview'}
grand_total_downloaded = 0
grand_total_skipped = 0
cancelled_by_user = False
try:
if self.use_subfolders and self.filter_character and not self.custom_folder_name:
if not self._check_and_prompt_filter_character():
self.finished_signal.emit(0, 0, False)
return
worker_signals_adapter = PostProcessorSignals()
worker_signals_adapter.progress_signal.connect(self.progress_signal)
worker_signals_adapter.file_download_status_signal.connect(self.file_download_status_signal)
post_worker = PostProcessorWorker(
post_data=None,
download_root=self.output_dir,
known_names=self.known_names,
filter_character=self.filter_character,
unwanted_keywords=unwanted_keywords,
filter_mode=self.filter_mode,
skip_zip=self.skip_zip,
skip_rar=self.skip_rar,
use_subfolders=self.use_subfolders,
target_post_id_from_initial_url=self.initial_target_post_id,
custom_folder_name=self.custom_folder_name,
compress_images=self.compress_images,
download_thumbnails=self.download_thumbnails,
service=self.service,
user_id=self.user_id,
api_url_input=self.api_url_input,
cancellation_event=self.cancellation_event,
signals=worker_signals_adapter,
downloaded_files=self.downloaded_files,
downloaded_files_lock=self.downloaded_files_lock,
downloaded_file_hashes=self.downloaded_file_hashes,
downloaded_file_hashes_lock=self.downloaded_file_hashes_lock,
skip_words_list=self.skip_words_list,
)
post_worker.skip_current_file_flag = self.skip_current_file_flag
self.progress_signal.emit(" Starting post fetch...")
def thread_logger(msg):
self.progress_signal.emit(msg)
post_generator = download_from_api(self.api_url_input, logger=thread_logger)
for posts_batch in post_generator:
if self.isInterruptionRequested():
self.progress_signal.emit("⚠️ Download cancelled before processing batch.")
cancelled_by_user = True
break
for post in posts_batch:
if self.isInterruptionRequested():
self.progress_signal.emit("⚠️ Download cancelled during post processing.")
cancelled_by_user = True
break
post_worker.post = post
try:
downloaded, skipped = post_worker.process()
grand_total_downloaded += downloaded
grand_total_skipped += skipped
except Exception as proc_e:
post_id_err = post.get('id', 'N/A') if isinstance(post, dict) else 'N/A'
self.progress_signal.emit(f"❌ Error processing post {post_id_err}: {proc_e}")
import traceback
self.progress_signal.emit(traceback.format_exc(limit=2))
grand_total_skipped += 1
self.msleep(20)
if cancelled_by_user:
break
if not cancelled_by_user:
self.progress_signal.emit("✅ Post fetching and processing complete.")
except Exception as e:
log_msg = f"\n❌ An critical error occurred in download thread: {e}"
self.progress_signal.emit(log_msg)
import traceback
tb_str = traceback.format_exc()
self.progress_signal.emit("--- Traceback ---")
for line in tb_str.splitlines():
self.progress_signal.emit(" " + line)
self.progress_signal.emit("--- End Traceback ---")
cancelled_by_user = False
finally:
self.finished_signal.emit(grand_total_downloaded, grand_total_skipped, cancelled_by_user)
def _check_and_prompt_filter_character(self):
clean_char_filter = clean_folder_name(self.filter_character.lower())
known_names_lower = {name.lower() for name in self.known_names}
if not clean_char_filter:
self.progress_signal.emit(f"❌ Filter name '{self.filter_character}' is invalid. Aborting.")
return False
if self.filter_character.lower() not in known_names_lower:
self.progress_signal.emit(f"❓ Filter '{self.filter_character}' not found in known list.")
with QMutexLocker(self.prompt_mutex):
self._add_character_response = None
self.add_character_prompt_signal.emit(self.filter_character)
self.progress_signal.emit(" Waiting for user confirmation to add filter name...")
while self._add_character_response is None:
if self.isInterruptionRequested():
self.progress_signal.emit("⚠️ Cancelled while waiting for user input on filter name.")
return False
self.msleep(200)
if self._add_character_response:
self.progress_signal.emit(f"✅ User confirmed adding '{self.filter_character}'. Continuing.")
if self.filter_character not in self.known_names:
self.known_names.append(self.filter_character)
return True
else:
self.progress_signal.emit(f"❌ User declined to add filter '{self.filter_character}'. Aborting download.")
return False
return True
def skip_file(self):
if self.isRunning() and self.is_downloading_file:
self.progress_signal.emit("⏭️ Skip requested for current file.")
self.skip_current_file_flag.set()
elif self.isRunning():
self.progress_signal.emit(" Skip requested, but no file download active.")
def receive_add_character_result(self, result):
with QMutexLocker(self.prompt_mutex):
self._add_character_response = result
self.progress_signal.emit(f" Received prompt response: {'Yes' if result else 'No'}")
def isInterruptionRequested(self):
return super().isInterruptionRequested() or self.cancellation_event.is_set()
if __name__ == '__main__':
app = QApplication(sys.argv)
app.setWindowIcon(QIcon("Kemono.ico"))
from PyQt5.QtGui import QIcon
app.setWindowIcon(QIcon("Kemono.ico"))
qt_app = QApplication(sys.argv)
qt_app.setWindowIcon(QIcon(os.path.join(os.path.dirname(__file__), 'Kemono.ico')))
downloader = DownloaderApp()
downloader.show()
exit_code = qt_app.exec_()
print(f"Application finished with exit code: {exit_code}")
sys.exit(exit_code)