1784 lines
88 KiB
Python
Raw Normal View History

2025-05-05 19:35:24 +05:30
import sys
import os
import time
import requests
import re
2025-05-06 22:08:27 +05:30
import threading
import queue
2025-05-06 22:49:19 +05:30
import hashlib
2025-05-06 22:08:27 +05:30
from concurrent.futures import ThreadPoolExecutor, Future, CancelledError
from PyQt5.QtGui import QIcon
2025-05-05 19:35:24 +05:30
from PyQt5.QtWidgets import (
QApplication, QWidget, QLabel, QLineEdit, QTextEdit, QPushButton,
QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget,
2025-05-06 22:08:27 +05:30
QRadioButton, QButtonGroup, QCheckBox, QMainWindow
2025-05-05 19:35:24 +05:30
)
2025-05-06 22:08:27 +05:30
from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject
2025-05-05 19:35:24 +05:30
from urllib.parse import urlparse
2025-05-06 22:08:27 +05:30
try:
from PIL import Image
except ImportError:
print("ERROR: Pillow library not found. Please install it: pip install Pillow")
2025-05-06 22:49:19 +05:30
Image = None
2025-05-06 22:08:27 +05:30
from io import BytesIO
2025-05-06 22:49:19 +05:30
fastapi_app = None
2025-05-05 19:35:24 +05:30
KNOWN_NAMES = []
def clean_folder_name(name):
2025-05-06 22:49:19 +05:30
if not isinstance(name, str): name = str(name)
2025-05-06 22:08:27 +05:30
cleaned = re.sub(r'[^\w\s\-\_]', '', name)
return cleaned.strip().replace(' ', '_')
2025-05-05 19:35:24 +05:30
def clean_filename(name):
2025-05-06 22:49:19 +05:30
if not isinstance(name, str): name = str(name)
2025-05-06 22:08:27 +05:30
cleaned = re.sub(r'[^\w\s\-\_\.]', '', name)
return cleaned.strip().replace(' ', '_')
2025-05-05 19:35:24 +05:30
def extract_folder_name_from_title(title, unwanted_keywords):
2025-05-06 22:08:27 +05:30
if not title: return 'Uncategorized'
2025-05-05 19:35:24 +05:30
title_lower = title.lower()
tokens = title_lower.split()
for token in tokens:
clean_token = clean_folder_name(token)
if clean_token and clean_token not in unwanted_keywords:
return clean_token
2025-05-06 22:49:19 +05:30
return 'Uncategorized'
2025-05-05 19:35:24 +05:30
def match_folders_from_title(title, known_names, unwanted_keywords):
2025-05-06 22:08:27 +05:30
if not title: return []
cleaned_title = clean_folder_name(title.lower())
matched_cleaned_names = set()
2025-05-05 19:35:24 +05:30
for name in known_names:
2025-05-06 22:08:27 +05:30
cleaned_name_for_match = clean_folder_name(name.lower())
2025-05-06 22:49:19 +05:30
if not cleaned_name_for_match: continue
2025-05-06 22:08:27 +05:30
if cleaned_name_for_match in cleaned_title:
if cleaned_name_for_match not in unwanted_keywords:
matched_cleaned_names.add(cleaned_name_for_match)
2025-05-05 19:35:24 +05:30
2025-05-06 22:08:27 +05:30
return list(matched_cleaned_names)
2025-05-05 19:35:24 +05:30
def is_image(filename):
2025-05-06 22:08:27 +05:30
if not filename: return False
2025-05-06 22:49:19 +05:30
return filename.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.gif'))
2025-05-05 19:35:24 +05:30
def is_video(filename):
2025-05-06 22:08:27 +05:30
if not filename: return False
return filename.lower().endswith(('.mp4', '.mov', '.mkv', '.webm', '.avi', '.wmv'))
2025-05-05 19:35:24 +05:30
def is_zip(filename):
2025-05-06 22:08:27 +05:30
if not filename: return False
2025-05-05 19:35:24 +05:30
return filename.lower().endswith('.zip')
def is_rar(filename):
2025-05-06 22:08:27 +05:30
if not filename: return False
2025-05-05 19:35:24 +05:30
return filename.lower().endswith('.rar')
def is_post_url(url):
2025-05-06 22:08:27 +05:30
if not isinstance(url, str): return False
return '/post/' in urlparse(url).path
def extract_post_info(url_string):
service, user_id, post_id = None, None, None
if not isinstance(url_string, str) or not url_string.strip():
return None, None, None
2025-05-05 19:35:24 +05:30
try:
2025-05-06 22:08:27 +05:30
parsed_url = urlparse(url_string.strip())
domain = parsed_url.netloc.lower()
path_parts = [part for part in parsed_url.path.strip('/').split('/') if part]
2025-05-06 22:49:19 +05:30
is_kemono = 'kemono.su' in domain or 'kemono.party' in domain
is_coomer = 'coomer.su' in domain or 'coomer.party' in domain
2025-05-06 22:08:27 +05:30
if not (is_kemono or is_coomer):
2025-05-06 22:49:19 +05:30
return None, None, None
2025-05-06 22:08:27 +05:30
if len(path_parts) >= 3 and path_parts[1].lower() == 'user':
service = path_parts[0]
user_id = path_parts[2]
if len(path_parts) >= 5 and path_parts[3].lower() == 'post':
post_id = path_parts[4]
return service, user_id, post_id
if len(path_parts) >= 5 and path_parts[0].lower() == 'api' and path_parts[1].lower() == 'v1' and path_parts[3].lower() == 'user':
service = path_parts[2]
user_id = path_parts[4]
if len(path_parts) >= 7 and path_parts[5].lower() == 'post':
post_id = path_parts[6]
return service, user_id, post_id
2025-05-06 22:49:19 +05:30
except ValueError:
2025-05-06 22:08:27 +05:30
print(f"Debug: ValueError parsing URL '{url_string}'")
return None, None, None
2025-05-06 22:49:19 +05:30
except Exception as e:
2025-05-06 22:08:27 +05:30
print(f"Debug: Exception during extract_post_info for URL '{url_string}': {e}")
return None, None, None
2025-05-05 19:35:24 +05:30
return None, None, None
2025-05-06 22:08:27 +05:30
def fetch_posts_paginated(api_url_base, headers, offset, logger):
paginated_url = f'{api_url_base}?o={offset}'
logger(f" Fetching: {paginated_url}")
2025-05-05 19:35:24 +05:30
try:
2025-05-06 22:49:19 +05:30
response = requests.get(paginated_url, headers=headers, timeout=45)
response.raise_for_status()
2025-05-06 22:08:27 +05:30
if 'application/json' not in response.headers.get('Content-Type', ''):
raise RuntimeError(f"Unexpected content type received: {response.headers.get('Content-Type')}. Body: {response.text[:200]}")
2025-05-05 19:35:24 +05:30
return response.json()
2025-05-06 22:08:27 +05:30
except requests.exceptions.Timeout:
raise RuntimeError(f"Timeout fetching page offset {offset}")
2025-05-05 19:35:24 +05:30
except requests.exceptions.RequestException as e:
2025-05-06 22:08:27 +05:30
err_msg = f"Error fetching page offset {offset}: {e}"
if e.response is not None:
err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})"
raise RuntimeError(err_msg)
2025-05-06 22:49:19 +05:30
except ValueError as e:
2025-05-06 22:08:27 +05:30
raise RuntimeError(f"Error decoding JSON response for offset {offset}: {e}. Body: {response.text[:200]}")
2025-05-05 19:35:24 +05:30
except Exception as e:
2025-05-06 22:08:27 +05:30
raise RuntimeError(f"Unexpected error processing page offset {offset}: {e}")
2025-05-05 19:35:24 +05:30
2025-05-06 22:08:27 +05:30
def download_from_api(api_url_input, logger=print):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
service, user_id, target_post_id = extract_post_info(api_url_input)
if not service or not user_id:
logger(f"❌ Invalid or unrecognized URL: {api_url_input}. Cannot fetch.")
2025-05-06 22:49:19 +05:30
return
2025-05-06 22:08:27 +05:30
parsed_input = urlparse(api_url_input)
2025-05-06 22:49:19 +05:30
api_domain = parsed_input.netloc if ('kemono.su' in parsed_input.netloc.lower() or 'coomer.su' in parsed_input.netloc.lower() or 'kemono.party' in parsed_input.netloc.lower() or 'coomer.party' in parsed_input.netloc.lower()) else "kemono.su"
2025-05-06 22:08:27 +05:30
api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}"
offset = 0
page = 1
2025-05-06 22:49:19 +05:30
processed_target_post = False
2025-05-05 19:35:24 +05:30
2025-05-06 22:08:27 +05:30
while True:
if target_post_id and processed_target_post:
logger(f"✅ Target post {target_post_id} found and processed. Stopping.")
break
2025-05-05 19:35:24 +05:30
2025-05-06 22:08:27 +05:30
logger(f"\n🔄 Fetching page {page} (offset {offset}) for user {user_id} on {api_domain}...")
try:
posts_batch = fetch_posts_paginated(api_base_url, headers, offset, logger)
if not isinstance(posts_batch, list):
logger(f"❌ API Error: Expected a list of posts, got {type(posts_batch)}. Response: {str(posts_batch)[:200]}")
2025-05-06 22:49:19 +05:30
break
2025-05-06 22:08:27 +05:30
except RuntimeError as e:
logger(f"{e}")
logger(" Aborting pagination due to error.")
break
except Exception as e:
logger(f"❌ Unexpected error during fetch loop: {e}")
break
2025-05-06 22:49:19 +05:30
if not posts_batch:
2025-05-06 22:08:27 +05:30
if page == 1 and not target_post_id:
logger("😕 No posts found for this creator.")
elif not target_post_id:
logger("✅ Reached end of posts.")
2025-05-06 22:49:19 +05:30
break
2025-05-06 22:08:27 +05:30
logger(f"📦 Found {len(posts_batch)} posts on page {page}.")
if target_post_id:
matching_post = next((post for post in posts_batch if str(post.get('id')) == str(target_post_id)), None)
if matching_post:
logger(f"🎯 Found target post {target_post_id} on page {page}.")
2025-05-06 22:49:19 +05:30
yield [matching_post]
processed_target_post = True
2025-05-06 22:08:27 +05:30
else:
logger(f" Target post {target_post_id} not found on this page.")
pass
else:
2025-05-05 19:35:24 +05:30
yield posts_batch
2025-05-06 22:08:27 +05:30
if not (target_post_id and processed_target_post):
page_size = 50
offset += page_size
2025-05-05 19:35:24 +05:30
page += 1
2025-05-06 22:49:19 +05:30
time.sleep(0.6)
2025-05-06 22:08:27 +05:30
if target_post_id and not processed_target_post:
logger(f"❌ Target post ID {target_post_id} was not found for this creator.")
2025-05-06 22:49:19 +05:30
2025-05-06 22:08:27 +05:30
class PostProcessorSignals(QObject):
progress_signal = pyqtSignal(str)
2025-05-06 22:49:19 +05:30
file_download_status_signal = pyqtSignal(bool)
2025-05-06 22:08:27 +05:30
class PostProcessorWorker:
def __init__(self, post_data, download_root, known_names, filter_character,
unwanted_keywords, filter_mode, skip_zip, skip_rar,
use_subfolders, target_post_id_from_initial_url, custom_folder_name,
compress_images, download_thumbnails, service, user_id,
api_url_input, cancellation_event, signals,
downloaded_files, downloaded_file_hashes, downloaded_files_lock, downloaded_file_hashes_lock,
2025-05-06 22:49:19 +05:30
skip_words_list=None):
2025-05-06 22:08:27 +05:30
self.post = post_data
self.download_root = download_root
self.known_names = known_names
self.filter_character = filter_character
self.unwanted_keywords = unwanted_keywords
self.filter_mode = filter_mode
self.skip_zip = skip_zip
self.skip_rar = skip_rar
self.use_subfolders = use_subfolders
self.target_post_id_from_initial_url = target_post_id_from_initial_url
self.custom_folder_name = custom_folder_name
self.compress_images = compress_images
self.download_thumbnails = download_thumbnails
self.service = service
self.user_id = user_id
2025-05-06 22:49:19 +05:30
self.api_url_input = api_url_input
self.cancellation_event = cancellation_event
self.signals = signals
self.skip_current_file_flag = threading.Event()
2025-05-06 22:08:27 +05:30
self.is_downloading_file = False
self.current_download_path = None
2025-05-06 22:49:19 +05:30
self.downloaded_files = downloaded_files
self.downloaded_file_hashes = downloaded_file_hashes
self.downloaded_files_lock = downloaded_files_lock
self.downloaded_file_hashes_lock = downloaded_file_hashes_lock
self.skip_words_list = skip_words_list if skip_words_list is not None else []
2025-05-06 22:08:27 +05:30
if self.compress_images and Image is None:
self.logger("⚠️ Image compression enabled, but Pillow library is not loaded. Disabling compression.")
self.compress_images = False
def logger(self, message):
if self.signals and hasattr(self.signals, 'progress_signal'):
self.signals.progress_signal.emit(message)
else:
2025-05-06 22:49:19 +05:30
print(f"(Worker Log): {message}")
2025-05-06 22:08:27 +05:30
def check_cancel(self):
is_cancelled = self.cancellation_event.is_set()
return is_cancelled
def skip_file(self):
pass
def process(self):
if self.check_cancel(): return 0, 0
total_downloaded_post = 0
total_skipped_post = 0
headers = {'User-Agent': 'Mozilla/5.0', 'Referer': f'https://{urlparse(self.api_url_input).netloc}/'}
url_pattern = re.compile(r'https?://[^\s<>"]+|www\.[^\s<>"]+')
2025-05-06 22:49:19 +05:30
LARGE_THUMBNAIL_THRESHOLD = 1 * 1024 * 1024
2025-05-06 22:08:27 +05:30
post = self.post
2025-05-06 22:49:19 +05:30
api_title = post.get('title', '')
2025-05-06 22:08:27 +05:30
title = api_title if api_title else 'untitled_post'
2025-05-05 19:35:24 +05:30
post_id = post.get('id', 'unknown_id')
2025-05-06 22:08:27 +05:30
post_file_info = post.get('file')
2025-05-05 19:35:24 +05:30
attachments = post.get('attachments', [])
post_content = post.get('content', '')
2025-05-06 22:08:27 +05:30
is_target_post = (self.target_post_id_from_initial_url is not None) and (str(post_id) == str(self.target_post_id_from_initial_url))
self.logger(f"\n--- Processing Post {post_id} ('{title[:50]}...') (Thread: {threading.current_thread().name}) ---")
if self.skip_words_list:
title_lower = title.lower()
for skip_word in self.skip_words_list:
if skip_word.lower() in title_lower:
self.logger(f" -> Skip Post (Title): Post {post_id} title ('{title[:30]}...') contains skip word '{skip_word}'. Skipping entire post.")
2025-05-06 22:49:19 +05:30
return 0, 1
2025-05-06 22:08:27 +05:30
2025-05-05 19:35:24 +05:30
if not isinstance(attachments, list):
2025-05-06 22:08:27 +05:30
self.logger(f"⚠️ Corrupt attachment data for post {post_id}. Skipping attachments.")
2025-05-05 19:35:24 +05:30
attachments = []
valid_folder_paths = []
2025-05-06 22:08:27 +05:30
folder_decision_reason = ""
api_domain = urlparse(self.api_url_input).netloc if ('kemono.su' in urlparse(self.api_url_input).netloc.lower() or 'coomer.su' in urlparse(self.api_url_input).netloc.lower() or 'kemono.party' in urlparse(self.api_url_input).netloc.lower() or 'coomer.party' in urlparse(self.api_url_input).netloc.lower()) else "kemono.su"
if is_target_post and self.custom_folder_name and self.use_subfolders:
folder_path_full = os.path.join(self.download_root, self.custom_folder_name)
valid_folder_paths = [folder_path_full]
folder_decision_reason = f"Using custom folder for target post: '{self.custom_folder_name}'"
if not valid_folder_paths and self.use_subfolders:
2025-05-06 22:49:19 +05:30
folder_names_for_post = []
2025-05-06 22:08:27 +05:30
if self.filter_character:
clean_char_filter = clean_folder_name(self.filter_character.lower())
matched_names_in_title = match_folders_from_title(title, self.known_names, self.unwanted_keywords)
if clean_char_filter and clean_char_filter in matched_names_in_title:
folder_names_for_post = [clean_char_filter]
folder_decision_reason = f"Character filter '{self.filter_character}' matched title. Using folder '{clean_char_filter}'."
2025-05-05 19:35:24 +05:30
else:
2025-05-06 22:08:27 +05:30
self.logger(f" -> Filter Skip Post {post_id}: Character filter '{self.filter_character}' not found in title matches ({matched_names_in_title}).")
2025-05-06 22:49:19 +05:30
return 0, 1
2025-05-05 19:35:24 +05:30
else:
2025-05-06 22:08:27 +05:30
matched_folders = match_folders_from_title(title, self.known_names, self.unwanted_keywords)
2025-05-05 19:35:24 +05:30
if matched_folders:
2025-05-06 22:49:19 +05:30
folder_names_for_post = matched_folders
2025-05-06 22:08:27 +05:30
folder_decision_reason = f"Found known name(s) in title: {matched_folders}"
2025-05-05 19:35:24 +05:30
else:
2025-05-06 22:08:27 +05:30
extracted_folder = extract_folder_name_from_title(title, self.unwanted_keywords)
folder_names_for_post = [extracted_folder]
folder_decision_reason = f"No known names in title. Using derived folder: '{extracted_folder}'"
for folder_name in folder_names_for_post:
folder_path_full = os.path.join(self.download_root, folder_name)
valid_folder_paths.append(folder_path_full)
2025-05-05 19:35:24 +05:30
if not valid_folder_paths:
2025-05-06 22:49:19 +05:30
valid_folder_paths = [self.download_root]
if not folder_decision_reason:
2025-05-06 22:08:27 +05:30
folder_decision_reason = "Subfolders disabled or no specific folder determined. Using root download directory."
self.logger(f" Folder Decision: {folder_decision_reason}")
if not valid_folder_paths:
self.logger(f" ERROR: No valid folder paths determined for post {post_id}. Skipping.")
2025-05-06 22:49:19 +05:30
return 0, 1
2025-05-05 19:35:24 +05:30
if post_content:
2025-05-06 22:08:27 +05:30
try:
found_links = re.findall(r'href=["\'](https?://[^"\']+)["\']', post_content)
if found_links:
self.logger(f"🔗 Links found in post content:")
2025-05-06 22:49:19 +05:30
unique_links = sorted(list(set(found_links)))
for link in unique_links[:10]:
2025-05-06 22:08:27 +05:30
if not any(x in link for x in ['.css', '.js', 'javascript:']):
self.logger(f" - {link}")
if len(unique_links) > 10:
self.logger(f" - ... ({len(unique_links) - 10} more links not shown)")
except Exception as e:
self.logger(f"⚠️ Error parsing content for links in post {post_id}: {e}")
files_to_process_for_download = []
api_domain = urlparse(self.api_url_input).netloc if ('kemono.su' in urlparse(self.api_url_input).netloc.lower() or 'coomer.su' in urlparse(self.api_url_input).netloc.lower() or 'kemono.party' in urlparse(self.api_url_input).netloc.lower() or 'coomer.party' in urlparse(self.api_url_input).netloc.lower()) else "kemono.su"
if self.download_thumbnails:
2025-05-06 22:49:19 +05:30
self.logger(f" Mode: Attempting to download thumbnail...")
2025-05-06 22:08:27 +05:30
self.logger(" Thumbnail download via API is disabled as the local API is not used.")
self.logger(f" -> Skipping Post {post_id}: Thumbnail download requested but API is disabled.")
2025-05-06 22:49:19 +05:30
return 0, 1
2025-05-06 22:08:27 +05:30
2025-05-06 22:49:19 +05:30
else:
2025-05-06 22:08:27 +05:30
self.logger(f" Mode: Downloading post file/attachments.")
if post_file_info and isinstance(post_file_info, dict) and post_file_info.get('path'):
main_file_path = post_file_info['path'].lstrip('/')
main_file_name = post_file_info.get('name') or os.path.basename(main_file_path)
if main_file_name:
file_url = f"https://{api_domain}/data/{main_file_path}"
files_to_process_for_download.append({
'url': file_url, 'name': main_file_name,
'_is_thumbnail': False, '_source': 'post_file'
})
else:
self.logger(f" ⚠️ Skipping main post file: Missing filename (Path: {main_file_path})")
attachment_counter = 0
for idx, attachment in enumerate(attachments):
if isinstance(attachment, dict) and attachment.get('path'):
attach_path = attachment['path'].lstrip('/')
attach_name = attachment.get('name') or os.path.basename(attach_path)
if attach_name:
base, ext = os.path.splitext(clean_filename(attach_name))
final_attach_name = f"{post_id}_{attachment_counter}{ext}"
2025-05-06 22:49:19 +05:30
if base and base != f"{post_id}_{attachment_counter}":
2025-05-06 22:08:27 +05:30
final_attach_name = f"{post_id}_{attachment_counter}_{base}{ext}"
attach_url = f"https://{api_domain}/data/{attach_path}"
files_to_process_for_download.append({
2025-05-06 22:49:19 +05:30
'url': attach_url, 'name': final_attach_name,
2025-05-06 22:08:27 +05:30
'_is_thumbnail': False, '_source': f'attachment_{idx+1}',
2025-05-06 22:49:19 +05:30
'_original_name_for_log': attach_name
2025-05-06 22:08:27 +05:30
})
2025-05-06 22:49:19 +05:30
attachment_counter += 1
2025-05-06 22:08:27 +05:30
else:
self.logger(f" ⚠️ Skipping attachment {idx+1}: Missing filename (Path: {attach_path})")
else:
self.logger(f" ⚠️ Skipping invalid attachment entry {idx+1}: {str(attachment)[:100]}")
2025-05-05 19:35:24 +05:30
2025-05-06 22:08:27 +05:30
if not files_to_process_for_download:
self.logger(f" No files found to download for post {post_id}.")
2025-05-06 22:49:19 +05:30
return 0, 0
2025-05-05 19:35:24 +05:30
2025-05-06 22:08:27 +05:30
self.logger(f" Files identified for download: {len(files_to_process_for_download)}")
post_download_count = 0
2025-05-06 22:49:19 +05:30
post_skip_count = 0
2025-05-06 22:08:27 +05:30
local_processed_filenames = set()
local_filenames_lock = threading.Lock()
2025-05-05 19:35:24 +05:30
2025-05-06 22:08:27 +05:30
for file_info in files_to_process_for_download:
2025-05-06 22:49:19 +05:30
if self.check_cancel(): break
2025-05-06 22:08:27 +05:30
if self.skip_current_file_flag.is_set():
original_name_for_log = file_info.get('_original_name_for_log', file_info.get('name', 'unknown_file'))
self.logger(f"⏭️ File skip requested: {original_name_for_log}")
post_skip_count += 1
2025-05-06 22:49:19 +05:30
self.skip_current_file_flag.clear()
2025-05-06 22:08:27 +05:30
continue
2025-05-05 19:35:24 +05:30
2025-05-06 22:08:27 +05:30
file_url = file_info.get('url')
2025-05-06 22:49:19 +05:30
original_filename = file_info.get('name')
2025-05-06 22:08:27 +05:30
is_thumbnail = file_info.get('_is_thumbnail', False)
2025-05-06 22:49:19 +05:30
original_name_for_log = file_info.get('_original_name_for_log', original_filename)
2025-05-05 19:35:24 +05:30
2025-05-06 22:08:27 +05:30
if not file_url or not original_filename:
self.logger(f"⚠️ Skipping file entry due to missing URL or name: {str(file_info)[:100]}")
post_skip_count += 1
continue
2025-05-05 19:35:24 +05:30
2025-05-06 22:49:19 +05:30
cleaned_save_filename = clean_filename(original_filename)
2025-05-06 22:08:27 +05:30
if self.skip_words_list:
filename_lower = cleaned_save_filename.lower()
file_skipped_by_word = False
for skip_word in self.skip_words_list:
if skip_word.lower() in filename_lower:
self.logger(f" -> Skip File (Filename): File '{original_name_for_log}' contains skip word '{skip_word}'.")
post_skip_count += 1
file_skipped_by_word = True
break
if file_skipped_by_word:
2025-05-06 22:49:19 +05:30
continue
if not self.download_thumbnails:
2025-05-06 22:08:27 +05:30
file_skipped_by_filter = False
is_img = is_image(cleaned_save_filename)
2025-05-06 22:49:19 +05:30
is_vid = is_video(cleaned_save_filename)
2025-05-06 22:08:27 +05:30
is_zip_file = is_zip(cleaned_save_filename)
is_rar_file = is_rar(cleaned_save_filename)
if self.filter_mode == 'image' and not is_img:
self.logger(f" -> Filter Skip: '{original_name_for_log}' (Not image/gif)")
file_skipped_by_filter = True
elif self.filter_mode == 'video' and not is_vid:
self.logger(f" -> Filter Skip: '{original_name_for_log}' (Not video)")
file_skipped_by_filter = True
elif self.skip_zip and is_zip_file:
self.logger(f" -> Pref Skip: '{original_name_for_log}' (Zip)")
file_skipped_by_filter = True
elif self.skip_rar and is_rar_file:
self.logger(f" -> Pref Skip: '{original_name_for_log}' (RAR)")
file_skipped_by_filter = True
if file_skipped_by_filter:
post_skip_count += 1
2025-05-06 22:49:19 +05:30
continue
2025-05-06 22:08:27 +05:30
file_downloaded_or_exists = False
for folder_path in valid_folder_paths:
2025-05-06 22:49:19 +05:30
if self.check_cancel(): break
2025-05-06 22:08:27 +05:30
try:
os.makedirs(folder_path, exist_ok=True)
except OSError as e:
self.logger(f"❌ Error ensuring directory exists {folder_path}: {e}. Skipping path.")
2025-05-06 22:49:19 +05:30
continue
2025-05-06 22:08:27 +05:30
except Exception as e:
self.logger(f"❌ Unexpected error creating dir {folder_path}: {e}. Skipping path.")
continue
2025-05-05 19:35:24 +05:30
2025-05-06 22:08:27 +05:30
save_path = os.path.join(folder_path, cleaned_save_filename)
2025-05-06 22:49:19 +05:30
folder_basename = os.path.basename(folder_path)
with local_filenames_lock:
2025-05-06 22:08:27 +05:30
if os.path.exists(save_path) and os.path.getsize(save_path) > 0:
self.logger(f" -> Exists Skip: '{original_name_for_log}' in '{folder_basename}'")
2025-05-06 22:49:19 +05:30
post_skip_count += 1
2025-05-06 22:08:27 +05:30
file_downloaded_or_exists = True
with self.downloaded_files_lock:
self.downloaded_files.add(cleaned_save_filename)
2025-05-06 22:49:19 +05:30
break
2025-05-06 22:08:27 +05:30
elif cleaned_save_filename in local_processed_filenames:
self.logger(f" -> Local Skip: '{original_name_for_log}' in '{folder_basename}' (already processed in this post)")
post_skip_count += 1
file_downloaded_or_exists = True
with self.downloaded_files_lock:
self.downloaded_files.add(cleaned_save_filename)
2025-05-06 22:49:19 +05:30
break
2025-05-06 22:08:27 +05:30
with self.downloaded_files_lock:
if cleaned_save_filename in self.downloaded_files:
self.logger(f" -> Global Filename Skip: '{original_name_for_log}' in '{folder_basename}' (filename already downloaded globally)")
post_skip_count += 1
file_downloaded_or_exists = True
2025-05-06 22:49:19 +05:30
break
2025-05-06 22:08:27 +05:30
try:
self.logger(f"⬇️ Downloading '{original_name_for_log}' to '{folder_basename}'...")
2025-05-06 22:49:19 +05:30
self.current_download_path = save_path
2025-05-06 22:08:27 +05:30
self.is_downloading_file = True
2025-05-06 22:49:19 +05:30
self.signals.file_download_status_signal.emit(True)
response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True)
response.raise_for_status()
2025-05-06 22:08:27 +05:30
file_content_bytes = BytesIO()
downloaded_size = 0
chunk_count = 0
2025-05-06 22:49:19 +05:30
md5_hash = hashlib.md5()
2025-05-06 22:08:27 +05:30
2025-05-06 22:49:19 +05:30
for chunk in response.iter_content(chunk_size=32 * 1024):
if self.check_cancel(): break
2025-05-06 22:08:27 +05:30
if self.skip_current_file_flag.is_set(): break
2025-05-06 22:49:19 +05:30
if chunk:
2025-05-06 22:08:27 +05:30
file_content_bytes.write(chunk)
2025-05-06 22:49:19 +05:30
md5_hash.update(chunk)
2025-05-06 22:08:27 +05:30
downloaded_size += len(chunk)
chunk_count += 1
if self.check_cancel() or self.skip_current_file_flag.is_set():
self.logger(f" ⚠️ Download interrupted {'(cancelled)' if self.cancellation_event.is_set() else '(skipped)'} for {original_name_for_log}.")
if self.skip_current_file_flag.is_set():
post_skip_count += 1
self.skip_current_file_flag.clear()
2025-05-06 22:49:19 +05:30
break
final_save_path = save_path
current_filename_for_log = cleaned_save_filename
file_content_bytes.seek(0)
2025-05-06 22:08:27 +05:30
if downloaded_size == 0 and chunk_count > 0:
self.logger(f"⚠️ Warning: Downloaded 0 bytes despite receiving chunks for {original_name_for_log}. Skipping save.")
post_skip_count += 1
2025-05-06 22:49:19 +05:30
break
2025-05-06 22:08:27 +05:30
if downloaded_size > 0:
2025-05-06 22:49:19 +05:30
calculated_hash = md5_hash.hexdigest()
with self.downloaded_file_hashes_lock:
2025-05-06 22:08:27 +05:30
if calculated_hash in self.downloaded_file_hashes:
self.logger(f" -> Content Skip: '{original_name_for_log}' (Hash: {calculated_hash}) already downloaded.")
post_skip_count += 1
2025-05-06 22:49:19 +05:30
file_downloaded_or_exists = True
2025-05-06 22:08:27 +05:30
with self.downloaded_files_lock:
self.downloaded_files.add(cleaned_save_filename)
with local_filenames_lock:
local_processed_filenames.add(cleaned_save_filename)
break
else:
pass
2025-05-06 22:49:19 +05:30
if not file_downloaded_or_exists:
2025-05-06 22:08:27 +05:30
final_bytes_to_save = file_content_bytes
is_img_for_compress = is_image(cleaned_save_filename)
if is_img_for_compress and not is_thumbnail and self.compress_images and Image and downloaded_size > 1500 * 1024:
self.logger(f" Compressing large image ({downloaded_size / 1024:.2f} KB)...")
try:
with Image.open(file_content_bytes) as img:
original_format = img.format
if img.mode == 'P': img = img.convert('RGBA')
elif img.mode not in ['RGB', 'RGBA', 'L']: img = img.convert('RGB')
compressed_bytes = BytesIO()
2025-05-06 22:49:19 +05:30
img.save(compressed_bytes, format='WebP', quality=75, method=4)
2025-05-06 22:08:27 +05:30
compressed_size = compressed_bytes.getbuffer().nbytes
if compressed_size < downloaded_size * 0.90:
self.logger(f" Compression success: {compressed_size / 1024:.2f} KB (WebP Q75)")
compressed_bytes.seek(0)
final_bytes_to_save = compressed_bytes
base, _ = os.path.splitext(cleaned_save_filename)
current_filename_for_log = base + '.webp'
final_save_path = os.path.join(folder_path, current_filename_for_log)
self.logger(f" Updated filename: {current_filename_for_log}")
else:
self.logger(f" Compression skipped: WebP not significantly smaller ({compressed_size / 1024:.2f} KB).")
2025-05-06 22:49:19 +05:30
file_content_bytes.seek(0)
2025-05-06 22:08:27 +05:30
final_bytes_to_save = file_content_bytes
except Exception as comp_e:
self.logger(f"❌ Image compression failed for {original_name_for_log}: {comp_e}. Saving original.")
2025-05-06 22:49:19 +05:30
file_content_bytes.seek(0)
2025-05-06 22:08:27 +05:30
final_bytes_to_save = file_content_bytes
2025-05-06 22:49:19 +05:30
final_save_path = save_path
2025-05-06 22:08:27 +05:30
elif is_img_for_compress and not is_thumbnail and self.compress_images:
self.logger(f" Skipping compression: Image size ({downloaded_size / 1024:.2f} KB) below threshold.")
file_content_bytes.seek(0)
final_bytes_to_save = file_content_bytes
2025-05-06 22:49:19 +05:30
elif is_thumbnail and downloaded_size > LARGE_THUMBNAIL_THRESHOLD:
2025-05-06 22:08:27 +05:30
self.logger(f"⚠️ Downloaded thumbnail '{current_filename_for_log}' ({downloaded_size / 1024:.2f} KB) is large.")
file_content_bytes.seek(0)
final_bytes_to_save = file_content_bytes
2025-05-06 22:49:19 +05:30
else:
2025-05-06 22:08:27 +05:30
file_content_bytes.seek(0)
final_bytes_to_save = file_content_bytes
save_file = False
2025-05-06 22:49:19 +05:30
with self.downloaded_files_lock:
with local_filenames_lock:
2025-05-06 22:08:27 +05:30
if os.path.exists(final_save_path) and os.path.getsize(final_save_path) > 0:
self.logger(f" -> Exists Skip (pre-write): '{current_filename_for_log}' in '{folder_basename}'")
post_skip_count += 1
file_downloaded_or_exists = True
elif current_filename_for_log in self.downloaded_files:
self.logger(f" -> Global Skip (pre-write): '{current_filename_for_log}' in '{folder_basename}' (already downloaded globally)")
post_skip_count += 1
file_downloaded_or_exists = True
elif current_filename_for_log in local_processed_filenames:
self.logger(f" -> Local Skip (pre-write): '{current_filename_for_log}' in '{folder_basename}' (already processed in this post)")
post_skip_count += 1
file_downloaded_or_exists = True
else:
2025-05-06 22:49:19 +05:30
save_file = True
2025-05-06 22:08:27 +05:30
if save_file:
try:
with open(final_save_path, 'wb') as f:
while True:
2025-05-06 22:49:19 +05:30
chunk = final_bytes_to_save.read(64 * 1024)
2025-05-06 22:08:27 +05:30
if not chunk: break
f.write(chunk)
with self.downloaded_file_hashes_lock:
2025-05-06 22:49:19 +05:30
self.downloaded_file_hashes.add(calculated_hash)
2025-05-06 22:08:27 +05:30
with self.downloaded_files_lock:
2025-05-06 22:49:19 +05:30
self.downloaded_files.add(current_filename_for_log)
2025-05-06 22:08:27 +05:30
with local_filenames_lock:
2025-05-06 22:49:19 +05:30
local_processed_filenames.add(current_filename_for_log)
2025-05-06 22:08:27 +05:30
post_download_count += 1
file_downloaded_or_exists = True
self.logger(f"✅ Saved: '{current_filename_for_log}' ({downloaded_size / 1024:.1f} KB, Hash: {calculated_hash[:8]}...) in '{folder_basename}'")
2025-05-06 22:49:19 +05:30
time.sleep(0.05)
2025-05-06 22:08:27 +05:30
except IOError as io_err:
self.logger(f"❌ Save Fail: '{current_filename_for_log}' to '{folder_basename}'. Error: {io_err}")
2025-05-06 22:49:19 +05:30
post_skip_count += 1
2025-05-06 22:08:27 +05:30
if os.path.exists(final_save_path):
try: os.remove(final_save_path)
except OSError: pass
break
except Exception as save_err:
self.logger(f"❌ Unexpected Save Error: '{current_filename_for_log}' in '{folder_basename}'. Error: {save_err}")
post_skip_count += 1
if os.path.exists(final_save_path):
try: os.remove(final_save_path)
except OSError: pass
2025-05-06 22:49:19 +05:30
break
2025-05-06 22:08:27 +05:30
final_bytes_to_save.close()
if file_content_bytes is not final_bytes_to_save:
file_content_bytes.close()
if file_downloaded_or_exists:
break
except requests.exceptions.RequestException as e:
self.logger(f"❌ Download Fail: {original_name_for_log}. Error: {e}")
post_skip_count += 1
break
except IOError as e:
self.logger(f"❌ File I/O Error: {original_name_for_log} in '{folder_basename}'. Error: {e}")
post_skip_count += 1
2025-05-06 22:49:19 +05:30
break
2025-05-06 22:08:27 +05:30
except Exception as e:
self.logger(f"❌ Unexpected Error during download/save for {original_name_for_log}: {e}")
import traceback
self.logger(f" Traceback: {traceback.format_exc(limit=2)}")
post_skip_count += 1
2025-05-06 22:49:19 +05:30
break
2025-05-05 19:35:24 +05:30
2025-05-06 22:08:27 +05:30
finally:
self.is_downloading_file = False
self.current_download_path = None
2025-05-06 22:49:19 +05:30
self.signals.file_download_status_signal.emit(False)
if self.check_cancel(): break
2025-05-06 22:08:27 +05:30
if self.skip_current_file_flag.is_set():
self.skip_current_file_flag.clear()
if not file_downloaded_or_exists:
pass
if self.check_cancel():
self.logger(f" Post {post_id} processing cancelled.")
return post_download_count, post_skip_count
2025-05-05 19:35:24 +05:30
2025-05-06 22:08:27 +05:30
self.logger(f" Post {post_id} Summary: Downloaded={post_download_count}, Skipped={post_skip_count}")
return post_download_count, post_skip_count
class DownloaderApp(QWidget):
character_prompt_response_signal = pyqtSignal(bool)
log_signal = pyqtSignal(str)
add_character_prompt_signal = pyqtSignal(str)
2025-05-06 22:49:19 +05:30
file_download_status_signal = pyqtSignal(bool)
overall_progress_signal = pyqtSignal(int, int)
finished_signal = pyqtSignal(int, int, bool)
2025-05-05 19:35:24 +05:30
2025-05-06 22:08:27 +05:30
def __init__(self):
super().__init__()
self.config_file = "Known.txt"
self.download_thread = None
self.thread_pool = None
self.cancellation_event = threading.Event()
self.active_futures = []
self.total_posts_to_process = 0
self.processed_posts_count = 0
self.download_counter = 0
self.skip_counter = 0
2025-05-06 22:49:19 +05:30
self.worker_signals = PostProcessorSignals()
2025-05-06 22:08:27 +05:30
self.prompt_mutex = QMutex()
self._add_character_response = None
2025-05-06 22:49:19 +05:30
self.downloaded_files = set()
self.downloaded_files_lock = threading.Lock()
self.downloaded_file_hashes = set()
self.downloaded_file_hashes_lock = threading.Lock()
self.load_known_names()
self.setWindowTitle("Kemono Downloader v2.3 (Content Dedupe & Skip)")
self.setGeometry(150, 150, 1050, 820)
2025-05-06 22:08:27 +05:30
self.setStyleSheet(self.get_dark_theme())
2025-05-06 22:49:19 +05:30
self.init_ui()
2025-05-06 22:08:27 +05:30
self._connect_signals()
self.log_signal.emit(" Local API server functionality has been removed.")
2025-05-05 19:35:24 +05:30
2025-05-06 22:08:27 +05:30
def _connect_signals(self):
self.worker_signals.progress_signal.connect(self.log)
self.worker_signals.file_download_status_signal.connect(self.update_skip_button_state)
self.log_signal.connect(self.log)
self.add_character_prompt_signal.connect(self.prompt_add_character)
self.character_prompt_response_signal.connect(self.receive_add_character_result)
self.overall_progress_signal.connect(self.update_progress_display)
self.finished_signal.connect(self.download_finished)
2025-05-06 22:49:19 +05:30
self.character_search_input.textChanged.connect(self.filter_character_list)
2025-05-05 19:35:24 +05:30
def load_known_names(self):
global KNOWN_NAMES
2025-05-06 22:08:27 +05:30
loaded_names = []
2025-05-05 19:35:24 +05:30
if os.path.exists(self.config_file):
try:
with open(self.config_file, 'r', encoding='utf-8') as f:
2025-05-06 22:08:27 +05:30
raw_names = [line.strip() for line in f]
loaded_names = sorted(list(set(filter(None, raw_names))))
log_msg = f" Loaded {len(loaded_names)} known names from {self.config_file}"
2025-05-05 19:35:24 +05:30
except Exception as e:
2025-05-06 22:08:27 +05:30
log_msg = f"❌ Error loading config '{self.config_file}': {e}"
QMessageBox.warning(self, "Config Load Error", f"Could not load list from {self.config_file}:\n{e}")
2025-05-06 22:49:19 +05:30
loaded_names = []
2025-05-05 19:35:24 +05:30
else:
2025-05-06 22:08:27 +05:30
log_msg = f" Config file '{self.config_file}' not found. Starting empty."
loaded_names = []
2025-05-06 22:49:19 +05:30
KNOWN_NAMES = loaded_names
2025-05-06 22:08:27 +05:30
if hasattr(self, 'log_output'):
self.log_signal.emit(log_msg)
else:
print(log_msg)
2025-05-05 19:35:24 +05:30
def save_known_names(self):
2025-05-06 22:08:27 +05:30
global KNOWN_NAMES
2025-05-05 19:35:24 +05:30
try:
2025-05-06 22:08:27 +05:30
unique_sorted_names = sorted(list(set(filter(None, KNOWN_NAMES))))
2025-05-05 19:35:24 +05:30
with open(self.config_file, 'w', encoding='utf-8') as f:
2025-05-06 22:08:27 +05:30
for name in unique_sorted_names:
2025-05-05 19:35:24 +05:30
f.write(name + '\n')
2025-05-06 22:08:27 +05:30
KNOWN_NAMES = unique_sorted_names
if hasattr(self, 'log_signal'):
self.log_signal.emit(f"💾 Saved {len(unique_sorted_names)} known names to {self.config_file}")
else:
print(f"Saved {len(unique_sorted_names)} names to {self.config_file}")
2025-05-05 19:35:24 +05:30
except Exception as e:
2025-05-06 22:08:27 +05:30
log_msg = f"❌ Error saving config '{self.config_file}': {e}"
if hasattr(self, 'log_signal'):
self.log_signal.emit(log_msg)
else:
print(log_msg)
QMessageBox.warning(self, "Config Save Error", f"Could not save list to {self.config_file}:\n{e}")
2025-05-05 19:35:24 +05:30
def closeEvent(self, event):
2025-05-06 22:49:19 +05:30
self.save_known_names()
2025-05-06 22:08:27 +05:30
should_exit = True
is_downloading = (self.download_thread and self.download_thread.isRunning()) or (self.thread_pool is not None)
if is_downloading:
2025-05-05 19:35:24 +05:30
reply = QMessageBox.question(self, "Confirm Exit",
2025-05-06 22:08:27 +05:30
"Download in progress. Are you sure you want to exit and cancel?",
2025-05-05 19:35:24 +05:30
QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
if reply == QMessageBox.Yes:
2025-05-06 22:08:27 +05:30
self.log_signal.emit("⚠️ Cancelling active download due to application exit...")
2025-05-06 22:49:19 +05:30
self.cancel_download()
2025-05-05 19:35:24 +05:30
else:
2025-05-06 22:08:27 +05:30
should_exit = False
self.log_signal.emit(" Application exit cancelled.")
2025-05-06 22:49:19 +05:30
event.ignore()
2025-05-06 22:08:27 +05:30
return
if should_exit:
2025-05-06 22:49:19 +05:30
self.log_signal.emit(" Application closing.")
2025-05-06 22:08:27 +05:30
self.log_signal.emit("👋 Exiting application.")
2025-05-06 22:49:19 +05:30
event.accept()
2025-05-05 19:35:24 +05:30
def init_ui(self):
main_layout = QHBoxLayout()
left_layout = QVBoxLayout()
2025-05-06 22:08:27 +05:30
right_layout = QVBoxLayout()
left_layout.addWidget(QLabel("🔗 Kemono Creator/Post URL:"))
2025-05-05 19:35:24 +05:30
self.link_input = QLineEdit()
2025-05-06 22:08:27 +05:30
self.link_input.setPlaceholderText("e.g., https://kemono.su/patreon/user/12345 or .../post/98765")
self.link_input.textChanged.connect(self.update_custom_folder_visibility)
2025-05-05 19:35:24 +05:30
left_layout.addWidget(self.link_input)
2025-05-06 22:08:27 +05:30
left_layout.addWidget(QLabel("📁 Download Location:"))
2025-05-05 19:35:24 +05:30
self.dir_input = QLineEdit()
2025-05-06 22:08:27 +05:30
self.dir_input.setPlaceholderText("Select folder where downloads will be saved")
self.dir_button = QPushButton("Browse...")
2025-05-05 19:35:24 +05:30
self.dir_button.clicked.connect(self.browse_directory)
dir_layout = QHBoxLayout()
2025-05-06 22:49:19 +05:30
dir_layout.addWidget(self.dir_input, 1)
2025-05-05 19:35:24 +05:30
dir_layout.addWidget(self.dir_button)
left_layout.addLayout(dir_layout)
2025-05-06 22:08:27 +05:30
self.custom_folder_widget = QWidget()
custom_folder_layout = QVBoxLayout(self.custom_folder_widget)
2025-05-06 22:49:19 +05:30
custom_folder_layout.setContentsMargins(0, 5, 0, 0)
2025-05-06 22:08:27 +05:30
self.custom_folder_label = QLabel("🗄️ Custom Folder Name (Single Post Only):")
self.custom_folder_input = QLineEdit()
self.custom_folder_input.setPlaceholderText("Optional: Save this post to specific folder")
custom_folder_layout.addWidget(self.custom_folder_label)
custom_folder_layout.addWidget(self.custom_folder_input)
2025-05-06 22:49:19 +05:30
self.custom_folder_widget.setVisible(False)
2025-05-06 22:08:27 +05:30
left_layout.addWidget(self.custom_folder_widget)
self.character_filter_widget = QWidget()
character_filter_layout = QVBoxLayout(self.character_filter_widget)
2025-05-06 22:49:19 +05:30
character_filter_layout.setContentsMargins(0, 5, 0, 0)
2025-05-06 22:08:27 +05:30
self.character_label = QLabel("🎯 Filter by Show/Character Name:")
2025-05-05 19:35:24 +05:30
self.character_input = QLineEdit()
2025-05-06 22:08:27 +05:30
self.character_input.setPlaceholderText("Only download posts matching this known name in title")
character_filter_layout.addWidget(self.character_label)
character_filter_layout.addWidget(self.character_input)
2025-05-06 22:49:19 +05:30
self.character_filter_widget.setVisible(True)
2025-05-06 22:08:27 +05:30
left_layout.addWidget(self.character_filter_widget)
left_layout.addWidget(QLabel("🚫 Skip Posts/Files with Words (comma-separated):"))
self.skip_words_input = QLineEdit()
self.skip_words_input.setPlaceholderText("e.g., WM, WIP, sketch, preview")
left_layout.addWidget(self.skip_words_input)
options_layout_1 = QHBoxLayout()
options_layout_1.addWidget(QLabel("Filter Files:"))
2025-05-05 19:35:24 +05:30
self.radio_group = QButtonGroup(self)
2025-05-06 22:08:27 +05:30
self.radio_all = QRadioButton("All")
self.radio_images = QRadioButton("Images/GIFs")
self.radio_videos = QRadioButton("Videos")
2025-05-05 19:35:24 +05:30
self.radio_all.setChecked(True)
self.radio_group.addButton(self.radio_all)
self.radio_group.addButton(self.radio_images)
self.radio_group.addButton(self.radio_videos)
2025-05-06 22:08:27 +05:30
options_layout_1.addWidget(self.radio_all)
options_layout_1.addWidget(self.radio_images)
options_layout_1.addWidget(self.radio_videos)
options_layout_1.addStretch(1)
left_layout.addLayout(options_layout_1)
options_layout_2 = QHBoxLayout()
self.use_subfolders_checkbox = QCheckBox("Separate Folders by Name/Title")
self.use_subfolders_checkbox.setChecked(True)
self.use_subfolders_checkbox.toggled.connect(self.update_ui_for_subfolders)
options_layout_2.addWidget(self.use_subfolders_checkbox)
2025-05-06 22:49:19 +05:30
self.download_thumbnails_checkbox = QCheckBox("Download Thumbnails Only")
2025-05-06 22:08:27 +05:30
self.download_thumbnails_checkbox.setChecked(False)
2025-05-06 22:49:19 +05:30
self.download_thumbnails_checkbox.setToolTip("Thumbnail download functionality is currently limited without the API.")
2025-05-06 22:08:27 +05:30
options_layout_2.addWidget(self.download_thumbnails_checkbox)
options_layout_2.addStretch(1)
left_layout.addLayout(options_layout_2)
options_layout_3 = QHBoxLayout()
self.skip_zip_checkbox = QCheckBox("Skip .zip")
2025-05-05 19:35:24 +05:30
self.skip_zip_checkbox.setChecked(True)
2025-05-06 22:08:27 +05:30
options_layout_3.addWidget(self.skip_zip_checkbox)
self.skip_rar_checkbox = QCheckBox("Skip .rar")
2025-05-05 19:35:24 +05:30
self.skip_rar_checkbox.setChecked(True)
2025-05-06 22:08:27 +05:30
options_layout_3.addWidget(self.skip_rar_checkbox)
self.compress_images_checkbox = QCheckBox("Compress Large Images (to WebP)")
self.compress_images_checkbox.setChecked(False)
self.compress_images_checkbox.setToolTip("Compress images > 1.5MB to WebP format (requires Pillow).")
options_layout_3.addWidget(self.compress_images_checkbox)
options_layout_3.addStretch(1)
left_layout.addLayout(options_layout_3)
options_layout_4 = QHBoxLayout()
2025-05-06 22:49:19 +05:30
self.use_multithreading_checkbox = QCheckBox(f"Use Multithreading ({4} Threads)")
self.use_multithreading_checkbox.setChecked(True)
2025-05-06 22:08:27 +05:30
self.use_multithreading_checkbox.setToolTip("Speeds up downloads for full creator pages.\nSingle post URLs always use one thread.")
options_layout_4.addWidget(self.use_multithreading_checkbox)
options_layout_4.addStretch(1)
left_layout.addLayout(options_layout_4)
2025-05-05 19:35:24 +05:30
btn_layout = QHBoxLayout()
self.download_btn = QPushButton("⬇️ Start Download")
2025-05-06 22:49:19 +05:30
self.download_btn.setStyleSheet("padding: 8px 15px; font-weight: bold;")
2025-05-05 19:35:24 +05:30
self.download_btn.clicked.connect(self.start_download)
2025-05-06 22:08:27 +05:30
self.cancel_btn = QPushButton("❌ Cancel")
2025-05-05 19:35:24 +05:30
self.cancel_btn.setEnabled(False)
2025-05-06 22:08:27 +05:30
self.cancel_btn.clicked.connect(self.cancel_download)
2025-05-05 19:35:24 +05:30
self.skip_file_btn = QPushButton("⏭️ Skip Current File")
self.skip_file_btn.setEnabled(False)
2025-05-06 22:08:27 +05:30
self.skip_file_btn.setToolTip("Only available in single-thread mode during file download.")
self.skip_file_btn.clicked.connect(self.skip_current_file)
2025-05-05 19:35:24 +05:30
btn_layout.addWidget(self.download_btn)
btn_layout.addWidget(self.cancel_btn)
btn_layout.addWidget(self.skip_file_btn)
left_layout.addLayout(btn_layout)
2025-05-06 22:49:19 +05:30
left_layout.addSpacing(10)
2025-05-06 22:08:27 +05:30
known_chars_label_layout = QHBoxLayout()
self.known_chars_label = QLabel("🎭 Known Shows/Characters (for Folder Names):")
2025-05-06 22:49:19 +05:30
self.character_search_input = QLineEdit()
self.character_search_input.setPlaceholderText("Search characters...")
known_chars_label_layout.addWidget(self.known_chars_label, 1)
known_chars_label_layout.addWidget(self.character_search_input)
2025-05-06 22:08:27 +05:30
2025-05-06 22:49:19 +05:30
left_layout.addLayout(known_chars_label_layout)
2025-05-06 22:08:27 +05:30
2025-05-05 19:35:24 +05:30
self.character_list = QListWidget()
2025-05-06 22:08:27 +05:30
self.character_list.addItems(KNOWN_NAMES)
self.character_list.setSelectionMode(QListWidget.ExtendedSelection)
2025-05-06 22:49:19 +05:30
left_layout.addWidget(self.character_list, 1)
2025-05-06 22:08:27 +05:30
char_manage_layout = QHBoxLayout()
2025-05-05 19:35:24 +05:30
self.new_char_input = QLineEdit()
2025-05-06 22:08:27 +05:30
self.new_char_input.setPlaceholderText("Add new show/character name")
2025-05-05 19:35:24 +05:30
self.add_char_button = QPushButton(" Add")
self.delete_char_button = QPushButton("🗑️ Delete Selected")
self.add_char_button.clicked.connect(self.add_new_character)
self.new_char_input.returnPressed.connect(self.add_char_button.click)
self.delete_char_button.clicked.connect(self.delete_selected_character)
2025-05-06 22:49:19 +05:30
char_manage_layout.addWidget(self.new_char_input, 2)
2025-05-06 22:08:27 +05:30
char_manage_layout.addWidget(self.add_char_button, 1)
char_manage_layout.addWidget(self.delete_char_button, 1)
left_layout.addLayout(char_manage_layout)
right_layout.addWidget(QLabel("📜 Progress Log:"))
self.log_output = QTextEdit()
self.log_output.setReadOnly(True)
2025-05-06 22:49:19 +05:30
self.log_output.setMinimumWidth(450)
self.log_output.setLineWrapMode(QTextEdit.WidgetWidth)
right_layout.addWidget(self.log_output, 1)
2025-05-06 22:08:27 +05:30
self.progress_label = QLabel("Progress: Idle")
self.progress_label.setStyleSheet("padding-top: 5px; font-style: italic;")
right_layout.addWidget(self.progress_label)
2025-05-06 22:49:19 +05:30
main_layout.addLayout(left_layout, 5)
main_layout.addLayout(right_layout, 4)
2025-05-05 19:35:24 +05:30
self.setLayout(main_layout)
2025-05-06 22:08:27 +05:30
self.update_ui_for_subfolders(self.use_subfolders_checkbox.isChecked())
self.update_custom_folder_visibility()
2025-05-05 19:35:24 +05:30
def get_dark_theme(self):
return """
QWidget {
2025-05-06 22:49:19 +05:30
background-color: #2E2E2E;
color: #E0E0E0;
2025-05-05 19:35:24 +05:30
font-family: Segoe UI, Arial, sans-serif;
font-size: 10pt;
}
QLineEdit, QTextEdit, QListWidget {
2025-05-06 22:08:27 +05:30
background-color: #3C3F41;
2025-05-06 22:49:19 +05:30
border: 1px solid #5A5A5A;
2025-05-05 19:35:24 +05:30
padding: 5px;
2025-05-06 22:49:19 +05:30
color: #F0F0F0;
border-radius: 4px;
2025-05-06 22:08:27 +05:30
}
QTextEdit {
2025-05-06 22:49:19 +05:30
font-family: Consolas, Courier New, monospace;
2025-05-06 22:08:27 +05:30
font-size: 9.5pt;
2025-05-05 19:35:24 +05:30
}
QPushButton {
background-color: #555;
2025-05-06 22:08:27 +05:30
color: #F0F0F0;
border: 1px solid #6A6A6A;
2025-05-05 19:35:24 +05:30
padding: 6px 12px;
2025-05-06 22:08:27 +05:30
border-radius: 4px;
2025-05-06 22:49:19 +05:30
min-height: 22px;
2025-05-05 19:35:24 +05:30
}
QPushButton:hover {
2025-05-06 22:49:19 +05:30
background-color: #656565;
2025-05-06 22:08:27 +05:30
border: 1px solid #7A7A7A;
2025-05-05 19:35:24 +05:30
}
QPushButton:pressed {
2025-05-06 22:49:19 +05:30
background-color: #4A4A4A;
2025-05-05 19:35:24 +05:30
}
QPushButton:disabled {
2025-05-06 22:49:19 +05:30
background-color: #404040;
2025-05-05 19:35:24 +05:30
color: #888;
border-color: #555;
}
QLabel {
font-weight: bold;
padding-top: 4px;
2025-05-06 22:08:27 +05:30
padding-bottom: 2px;
2025-05-06 22:49:19 +05:30
color: #C0C0C0;
2025-05-05 19:35:24 +05:30
}
2025-05-06 22:08:27 +05:30
QRadioButton, QCheckBox {
2025-05-05 19:35:24 +05:30
spacing: 5px;
2025-05-06 22:08:27 +05:30
color: #E0E0E0;
padding-top: 4px;
padding-bottom: 4px;
2025-05-05 19:35:24 +05:30
}
2025-05-06 22:08:27 +05:30
QRadioButton::indicator, QCheckBox::indicator {
2025-05-06 22:49:19 +05:30
width: 14px;
2025-05-06 22:08:27 +05:30
height: 14px;
2025-05-05 19:35:24 +05:30
}
QListWidget {
2025-05-06 22:49:19 +05:30
alternate-background-color: #353535;
2025-05-06 22:08:27 +05:30
border: 1px solid #5A5A5A;
2025-05-05 19:35:24 +05:30
}
QListWidget::item:selected {
2025-05-06 22:49:19 +05:30
background-color: #007ACC;
2025-05-06 22:08:27 +05:30
color: #FFFFFF;
2025-05-05 19:35:24 +05:30
}
2025-05-06 22:08:27 +05:30
QToolTip {
background-color: #4A4A4A;
color: #F0F0F0;
border: 1px solid #6A6A6A;
padding: 4px;
border-radius: 3px;
2025-05-05 19:35:24 +05:30
}
"""
def browse_directory(self):
2025-05-06 22:08:27 +05:30
current_dir = self.dir_input.text() if os.path.isdir(self.dir_input.text()) else ""
folder = QFileDialog.getExistingDirectory(self, "Select Download Folder", current_dir)
2025-05-05 19:35:24 +05:30
if folder:
self.dir_input.setText(folder)
def log(self, message):
2025-05-06 22:08:27 +05:30
try:
2025-05-06 22:49:19 +05:30
safe_message = str(message).replace('\x00', '[NULL]')
2025-05-06 22:08:27 +05:30
self.log_output.append(safe_message)
scrollbar = self.log_output.verticalScrollBar()
2025-05-06 22:49:19 +05:30
if scrollbar.value() >= scrollbar.maximum() - 30:
2025-05-06 22:08:27 +05:30
scrollbar.setValue(scrollbar.maximum())
except Exception as e:
print(f"GUI Log Error: {e}")
print(f"Original Message: {message}")
2025-05-05 19:35:24 +05:30
def get_filter_mode(self):
if self.radio_images.isChecked():
return 'image'
elif self.radio_videos.isChecked():
return 'video'
return 'all'
def add_new_character(self):
global KNOWN_NAMES
2025-05-06 22:08:27 +05:30
name_to_add = self.new_char_input.text().strip()
if not name_to_add:
QMessageBox.warning(self, "Input Error", "Name cannot be empty.")
return
name_lower = name_to_add.lower()
is_duplicate = any(existing.lower() == name_lower for existing in KNOWN_NAMES)
if not is_duplicate:
KNOWN_NAMES.append(name_to_add)
KNOWN_NAMES.sort(key=str.lower)
self.character_list.clear()
self.character_list.addItems(KNOWN_NAMES)
2025-05-06 22:49:19 +05:30
self.filter_character_list(self.character_search_input.text())
2025-05-06 22:08:27 +05:30
self.log_signal.emit(f"✅ Added '{name_to_add}' to known names list.")
self.new_char_input.clear()
2025-05-06 22:49:19 +05:30
self.save_known_names()
2025-05-05 19:35:24 +05:30
else:
2025-05-06 22:08:27 +05:30
QMessageBox.warning(self, "Duplicate Name", f"The name '{name_to_add}' (or similar) already exists in the list.")
2025-05-05 19:35:24 +05:30
def delete_selected_character(self):
global KNOWN_NAMES
selected_items = self.character_list.selectedItems()
if not selected_items:
2025-05-06 22:08:27 +05:30
QMessageBox.warning(self, "Selection Error", "Please select one or more names to delete.")
2025-05-05 19:35:24 +05:30
return
2025-05-06 22:08:27 +05:30
names_to_remove = {item.text() for item in selected_items}
2025-05-05 19:35:24 +05:30
confirm = QMessageBox.question(self, "Confirm Deletion",
2025-05-06 22:08:27 +05:30
f"Are you sure you want to delete {len(names_to_remove)} selected name(s)?",
2025-05-05 19:35:24 +05:30
QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
if confirm == QMessageBox.Yes:
original_count = len(KNOWN_NAMES)
2025-05-06 22:08:27 +05:30
KNOWN_NAMES = [n for n in KNOWN_NAMES if n not in names_to_remove]
2025-05-05 19:35:24 +05:30
removed_count = original_count - len(KNOWN_NAMES)
2025-05-06 22:08:27 +05:30
2025-05-05 19:35:24 +05:30
if removed_count > 0:
2025-05-06 22:08:27 +05:30
self.log_signal.emit(f"🗑️ Removed {removed_count} name(s) from the list.")
2025-05-05 19:35:24 +05:30
self.character_list.clear()
2025-05-06 22:49:19 +05:30
KNOWN_NAMES.sort(key=str.lower)
2025-05-06 22:08:27 +05:30
self.character_list.addItems(KNOWN_NAMES)
2025-05-06 22:49:19 +05:30
self.filter_character_list(self.character_search_input.text())
self.save_known_names()
2025-05-06 22:08:27 +05:30
else:
self.log_signal.emit(" No names were removed (selection might have changed?).")
def update_custom_folder_visibility(self, url_text=None):
if url_text is None:
url_text = self.link_input.text()
_, _, post_id = extract_post_info(url_text.strip())
should_show = bool(post_id) and self.use_subfolders_checkbox.isChecked()
self.custom_folder_widget.setVisible(should_show)
if not should_show:
2025-05-06 22:49:19 +05:30
self.custom_folder_input.clear()
2025-05-06 22:08:27 +05:30
def update_ui_for_subfolders(self, checked):
self.character_filter_widget.setVisible(checked)
self.update_custom_folder_visibility()
if not checked:
self.character_input.clear()
def filter_character_list(self, search_text):
search_text = search_text.lower()
for i in range(self.character_list.count()):
item = self.character_list.item(i)
if search_text in item.text().lower():
item.setHidden(False)
2025-05-05 19:35:24 +05:30
else:
2025-05-06 22:08:27 +05:30
item.setHidden(True)
def update_progress_display(self, total_posts, processed_posts):
if total_posts > 0:
try:
percent = (processed_posts / total_posts) * 100
self.progress_label.setText(f"Progress: {processed_posts} / {total_posts} posts ({percent:.1f}%)")
except ZeroDivisionError:
2025-05-06 22:49:19 +05:30
self.progress_label.setText(f"Progress: {processed_posts} / {total_posts} posts")
elif processed_posts > 0:
2025-05-06 22:08:27 +05:30
self.progress_label.setText(f"Progress: Processing post {processed_posts}...")
else:
self.progress_label.setText("Progress: Starting...")
2025-05-05 19:35:24 +05:30
def start_download(self):
2025-05-06 22:08:27 +05:30
is_running = (self.download_thread and self.download_thread.isRunning()) or (self.thread_pool is not None)
if is_running:
self.log_signal.emit("⚠️ Download already in progress.")
QMessageBox.warning(self, "Busy", "A download is already running.")
2025-05-05 19:35:24 +05:30
return
api_url = self.link_input.text().strip()
output_dir = self.dir_input.text().strip()
filter_mode = self.get_filter_mode()
skip_zip = self.skip_zip_checkbox.isChecked()
skip_rar = self.skip_rar_checkbox.isChecked()
use_subfolders = self.use_subfolders_checkbox.isChecked()
2025-05-06 22:08:27 +05:30
compress_images = self.compress_images_checkbox.isChecked()
download_thumbnails = self.download_thumbnails_checkbox.isChecked()
use_multithreading = self.use_multithreading_checkbox.isChecked()
2025-05-06 22:49:19 +05:30
num_threads = 4
2025-05-06 22:08:27 +05:30
raw_skip_words = self.skip_words_input.text().strip()
skip_words_list = []
if raw_skip_words:
skip_words_list = [word.strip() for word in raw_skip_words.split(',') if word.strip()]
service, user_id, post_id_from_url = extract_post_info(api_url)
2025-05-05 19:35:24 +05:30
if not api_url:
2025-05-06 22:08:27 +05:30
QMessageBox.critical(self, "Input Error", "Please enter a Kemono/Coomer URL.")
2025-05-05 19:35:24 +05:30
return
2025-05-06 22:08:27 +05:30
if not service or not user_id:
QMessageBox.critical(self, "Input Error", "Invalid or unsupported URL format.\nPlease provide a valid creator page or post URL.")
self.log_signal.emit(f"❌ Invalid URL detected: {api_url}")
return
2025-05-05 19:35:24 +05:30
if not output_dir:
2025-05-06 22:08:27 +05:30
QMessageBox.critical(self, "Input Error", "Please select a download directory.")
return
if not os.path.isdir(output_dir):
reply = QMessageBox.question(self, "Directory Not Found",
f"The directory '{output_dir}' does not exist.\n\nCreate it?",
QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes)
if reply == QMessageBox.Yes:
try:
os.makedirs(output_dir)
self.log_signal.emit(f" Created download directory: {output_dir}")
except Exception as e:
QMessageBox.critical(self, "Directory Error", f"Could not create directory:\n{e}")
self.log_signal.emit(f"❌ Failed to create directory: {output_dir} - {e}")
return
2025-05-06 22:49:19 +05:30
else:
2025-05-06 22:08:27 +05:30
return
if compress_images and Image is None:
QMessageBox.warning(self, "Dependency Missing", "Image compression requires the Pillow library, but it's not installed.\nPlease run: pip install Pillow\n\nCompression will be disabled for this session.")
self.log_signal.emit("❌ Cannot compress images: Pillow library not found.")
2025-05-06 22:49:19 +05:30
compress_images = False
2025-05-06 22:08:27 +05:30
filter_character = None
if use_subfolders and self.character_filter_widget.isVisible():
filter_character = self.character_input.text().strip() or None
custom_folder_name = None
if use_subfolders and post_id_from_url and self.custom_folder_widget.isVisible():
raw_custom_name = self.custom_folder_input.text().strip()
if raw_custom_name:
cleaned_custom = clean_folder_name(raw_custom_name)
if cleaned_custom:
custom_folder_name = cleaned_custom
else:
QMessageBox.warning(self, "Input Warning", f"Custom folder name '{raw_custom_name}' is invalid and will be ignored.")
self.log_signal.emit(f"⚠️ Invalid custom folder name ignored: {raw_custom_name}")
2025-05-06 22:49:19 +05:30
if use_subfolders and filter_character and not post_id_from_url:
2025-05-06 22:08:27 +05:30
clean_char_filter = clean_folder_name(filter_character.lower())
known_names_lower = {name.lower() for name in KNOWN_NAMES}
if not clean_char_filter:
self.log_signal.emit(f"❌ Filter name '{filter_character}' is invalid. Aborting.")
QMessageBox.critical(self, "Filter Error", "The provided filter name is invalid (contains only spaces or special characters).")
return
elif filter_character.lower() not in known_names_lower:
reply = QMessageBox.question(self, "Add Filter Name?",
f"The filter name '{filter_character}' is not in your known names list.\n\nAdd it now and continue?",
QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel, QMessageBox.Yes)
if reply == QMessageBox.Yes:
2025-05-06 22:49:19 +05:30
self.new_char_input.setText(filter_character)
self.add_new_character()
2025-05-06 22:08:27 +05:30
if filter_character.lower() not in {name.lower() for name in KNOWN_NAMES}:
self.log_signal.emit(f"⚠️ Failed to add '{filter_character}' automatically. Please add manually if needed.")
else:
self.log_signal.emit(f"✅ Added filter '{filter_character}' to list.")
elif reply == QMessageBox.No:
self.log_signal.emit(f" Proceeding without adding '{filter_character}'. Posts matching it might not be saved to a specific folder unless name is derived.")
2025-05-06 22:49:19 +05:30
else:
2025-05-06 22:08:27 +05:30
self.log_signal.emit("❌ Download cancelled by user during filter check.")
2025-05-06 22:49:19 +05:30
return
2025-05-05 19:35:24 +05:30
self.log_output.clear()
2025-05-06 22:49:19 +05:30
self.cancellation_event.clear()
2025-05-06 22:08:27 +05:30
self.active_futures = []
self.total_posts_to_process = 0
self.processed_posts_count = 0
self.download_counter = 0
self.skip_counter = 0
with self.downloaded_files_lock:
2025-05-06 22:49:19 +05:30
self.downloaded_files.clear()
2025-05-06 22:08:27 +05:30
with self.downloaded_file_hashes_lock:
2025-05-06 22:49:19 +05:30
self.downloaded_file_hashes.clear()
2025-05-06 22:08:27 +05:30
self.progress_label.setText("Progress: Initializing...")
self.log_signal.emit("="*40)
self.log_signal.emit(f"🚀 Starting Download Task @ {time.strftime('%Y-%m-%d %H:%M:%S')}")
self.log_signal.emit(f" URL: {api_url}")
self.log_signal.emit(f" Save Location: {output_dir}")
mode = "Single Post" if post_id_from_url else "Creator Feed"
self.log_signal.emit(f" Mode: {mode}")
self.log_signal.emit(f" Subfolders: {'Enabled' if use_subfolders else 'Disabled'}")
if use_subfolders:
if custom_folder_name:
self.log_signal.emit(f" Custom Folder (Post): '{custom_folder_name}'")
elif filter_character:
self.log_signal.emit(f" Character Filter: '{filter_character}'")
else:
self.log_signal.emit(f" Folder Naming: Automatic (Known Names > Title Extraction)")
self.log_signal.emit(f" File Type Filter: {filter_mode}")
self.log_signal.emit(f" Skip: {'.zip' if skip_zip else ''}{', ' if skip_zip and skip_rar else ''}{'.rar' if skip_rar else ''}{'None' if not (skip_zip or skip_rar) else ''}")
if skip_words_list:
self.log_signal.emit(f" Skip Words (Title/Filename): {', '.join(skip_words_list)}")
else:
self.log_signal.emit(f" Skip Words (Title/Filename): None")
self.log_signal.emit(f" Compress Images: {'Enabled' if compress_images else 'Disabled'}")
self.log_signal.emit(f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}")
should_use_multithreading = use_multithreading and not post_id_from_url
self.log_signal.emit(f" Threading: {'Multi-threaded' if should_use_multithreading else 'Single-threaded'}")
self.log_signal.emit("="*40)
self.set_ui_enabled(False)
2025-05-05 19:35:24 +05:30
self.cancel_btn.setEnabled(True)
2025-05-06 22:08:27 +05:30
try:
common_args = {
'api_url': api_url,
'output_dir': output_dir,
2025-05-06 22:49:19 +05:30
'known_names_copy': list(KNOWN_NAMES),
2025-05-06 22:08:27 +05:30
'filter_character': filter_character,
'filter_mode': filter_mode,
'skip_zip': skip_zip,
'skip_rar': skip_rar,
'use_subfolders': use_subfolders,
'compress_images': compress_images,
'download_thumbnails': download_thumbnails,
'service': service,
'user_id': user_id,
'downloaded_files': self.downloaded_files,
'downloaded_files_lock': self.downloaded_files_lock,
2025-05-06 22:49:19 +05:30
'downloaded_file_hashes': self.downloaded_file_hashes,
'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock,
'skip_words_list': skip_words_list,
2025-05-06 22:08:27 +05:30
}
if should_use_multithreading:
self.log_signal.emit(" Initializing multi-threaded download...")
multi_args = common_args.copy()
multi_args['num_threads'] = num_threads
self.start_multi_threaded_download(**multi_args)
else:
self.log_signal.emit(" Initializing single-threaded download...")
single_args = common_args.copy()
single_args['custom_folder_name'] = custom_folder_name
single_args['single_post_id'] = post_id_from_url
self.start_single_threaded_download(**single_args)
except Exception as e:
self.log_signal.emit(f"❌ CRITICAL ERROR preparing download task: {e}")
import traceback
self.log_signal.emit(traceback.format_exc())
QMessageBox.critical(self, "Start Error", f"Failed to start download task:\n{e}")
2025-05-06 22:49:19 +05:30
self.download_finished(0, 0, False)
2025-05-06 22:08:27 +05:30
def start_single_threaded_download(self, **kwargs):
try:
self.download_thread = DownloadThread(
2025-05-06 22:49:19 +05:30
cancellation_event = self.cancellation_event,
2025-05-06 22:08:27 +05:30
**kwargs
)
if self.download_thread._init_failed:
QMessageBox.critical(self, "Thread Error", "Failed to initialize the download thread.\nCheck the log for details.")
2025-05-06 22:49:19 +05:30
self.download_finished(0, 0, False)
2025-05-06 22:08:27 +05:30
return
2025-05-06 22:49:19 +05:30
self.download_thread.progress_signal.connect(self.log_signal)
self.download_thread.add_character_prompt_signal.connect(self.add_character_prompt_signal)
self.download_thread.file_download_status_signal.connect(self.file_download_status_signal)
self.download_thread.finished_signal.connect(self.finished_signal)
2025-05-06 22:08:27 +05:30
self.character_prompt_response_signal.connect(self.download_thread.receive_add_character_result)
self.download_thread.start()
self.log_signal.emit("✅ Single download thread started.")
except Exception as e:
self.log_signal.emit(f"❌ CRITICAL ERROR starting single-thread task: {e}")
import traceback
self.log_signal.emit(traceback.format_exc())
QMessageBox.critical(self, "Thread Start Error", f"Failed to start download thread:\n{e}")
2025-05-06 22:49:19 +05:30
self.download_finished(0, 0, False)
2025-05-06 22:08:27 +05:30
def start_multi_threaded_download(self, **kwargs):
num_threads = kwargs['num_threads']
self.thread_pool = ThreadPoolExecutor(max_workers=num_threads, thread_name_prefix='Downloader_')
self.active_futures = []
self.processed_posts_count = 0
2025-05-06 22:49:19 +05:30
self.total_posts_to_process = 0
2025-05-06 22:08:27 +05:30
self.download_counter = 0
self.skip_counter = 0
worker_args_template = kwargs.copy()
del worker_args_template['num_threads']
fetcher_thread = threading.Thread(
target=self._fetch_and_queue_posts,
args=(kwargs['api_url'], worker_args_template),
daemon=True,
name="PostFetcher"
)
fetcher_thread.start()
self.log_signal.emit(f"✅ Post fetcher thread started. {num_threads} worker threads initializing...")
def _fetch_and_queue_posts(self, api_url_input, worker_args_template):
all_posts = []
fetch_error = False
try:
self.log_signal.emit(" Starting post fetch...")
def fetcher_logger(msg):
self.log_signal.emit(f"[Fetcher] {msg}")
post_generator = download_from_api(api_url_input, logger=fetcher_logger)
for posts_batch in post_generator:
if self.cancellation_event.is_set():
self.log_signal.emit("⚠️ Post fetching cancelled by user.")
2025-05-06 22:49:19 +05:30
fetch_error = True
2025-05-06 22:08:27 +05:30
break
if isinstance(posts_batch, list):
all_posts.extend(posts_batch)
self.total_posts_to_process = len(all_posts)
2025-05-06 22:49:19 +05:30
if self.total_posts_to_process % 250 == 0:
2025-05-06 22:08:27 +05:30
self.log_signal.emit(f" Fetched {self.total_posts_to_process} posts...")
else:
self.log_signal.emit(f"❌ API returned non-list batch: {type(posts_batch)}. Stopping fetch.")
fetch_error = True
break
if not fetch_error:
self.log_signal.emit(f"✅ Finished fetching. Total posts found: {self.total_posts_to_process}")
except Exception as e:
self.log_signal.emit(f"❌ Unexpected Error during post fetching: {e}")
import traceback
self.log_signal.emit(traceback.format_exc(limit=3))
fetch_error = True
if self.cancellation_event.is_set() or fetch_error:
self.finished_signal.emit(self.download_counter, self.skip_counter, self.cancellation_event.is_set())
if self.thread_pool:
self.thread_pool.shutdown(wait=False, cancel_futures=True)
self.thread_pool = None
2025-05-06 22:49:19 +05:30
return
2025-05-06 22:08:27 +05:30
if self.total_posts_to_process == 0:
self.log_signal.emit("😕 No posts found or fetched successfully.")
2025-05-06 22:49:19 +05:30
self.finished_signal.emit(0, 0, False)
2025-05-06 22:08:27 +05:30
return
self.log_signal.emit(f" Submitting {self.total_posts_to_process} post tasks to worker pool...")
2025-05-06 22:49:19 +05:30
self.processed_posts_count = 0
self.overall_progress_signal.emit(self.total_posts_to_process, 0)
2025-05-06 22:08:27 +05:30
common_worker_args = {
2025-05-06 22:49:19 +05:30
'download_root': worker_args_template['output_dir'],
2025-05-06 22:08:27 +05:30
'known_names': worker_args_template['known_names_copy'],
'filter_character': worker_args_template['filter_character'],
2025-05-06 22:49:19 +05:30
'unwanted_keywords': {'spicy', 'hd', 'nsfw', '4k', 'preview'},
2025-05-06 22:08:27 +05:30
'filter_mode': worker_args_template['filter_mode'],
'skip_zip': worker_args_template['skip_zip'],
'skip_rar': worker_args_template['skip_rar'],
'use_subfolders': worker_args_template['use_subfolders'],
'target_post_id_from_initial_url': worker_args_template.get('single_post_id'),
'custom_folder_name': worker_args_template.get('custom_folder_name'),
'compress_images': worker_args_template['compress_images'],
'download_thumbnails': worker_args_template['download_thumbnails'],
'service': worker_args_template['service'],
'user_id': worker_args_template['user_id'],
2025-05-06 22:49:19 +05:30
'api_url_input': worker_args_template['api_url'],
2025-05-06 22:08:27 +05:30
'cancellation_event': self.cancellation_event,
2025-05-06 22:49:19 +05:30
'signals': self.worker_signals,
2025-05-06 22:08:27 +05:30
'downloaded_files': self.downloaded_files,
'downloaded_files_lock': self.downloaded_files_lock,
2025-05-06 22:49:19 +05:30
'downloaded_file_hashes': self.downloaded_file_hashes,
'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock,
'skip_words_list': worker_args_template['skip_words_list'],
2025-05-06 22:08:27 +05:30
}
for post_data in all_posts:
if self.cancellation_event.is_set():
self.log_signal.emit("⚠️ Cancellation detected during task submission.")
2025-05-06 22:49:19 +05:30
break
2025-05-06 22:08:27 +05:30
if not isinstance(post_data, dict):
self.log_signal.emit(f"⚠️ Skipping invalid post data item (type: {type(post_data)}).")
2025-05-06 22:49:19 +05:30
self.processed_posts_count += 1
self.total_posts_to_process -=1
2025-05-06 22:08:27 +05:30
continue
worker = PostProcessorWorker(post_data=post_data, **common_worker_args)
try:
2025-05-06 22:49:19 +05:30
if self.thread_pool:
2025-05-06 22:08:27 +05:30
future = self.thread_pool.submit(worker.process)
future.add_done_callback(self._handle_future_result)
self.active_futures.append(future)
2025-05-06 22:49:19 +05:30
else:
2025-05-06 22:08:27 +05:30
self.log_signal.emit("⚠️ Thread pool shutdown before submitting all tasks.")
break
2025-05-06 22:49:19 +05:30
except RuntimeError as e:
2025-05-06 22:08:27 +05:30
self.log_signal.emit(f"⚠️ Error submitting task (pool might be shutting down): {e}")
break
except Exception as e:
self.log_signal.emit(f"❌ Unexpected error submitting task: {e}")
break
submitted_count = len(self.active_futures)
self.log_signal.emit(f" {submitted_count} / {self.total_posts_to_process} tasks submitted.")
def _handle_future_result(self, future: Future):
self.processed_posts_count += 1
2025-05-06 22:49:19 +05:30
downloaded_res, skipped_res = 0, 0
2025-05-06 22:08:27 +05:30
try:
if future.cancelled():
pass
elif future.exception():
exc = future.exception()
self.log_signal.emit(f"❌ Error in worker thread: {exc}")
pass
else:
2025-05-06 22:49:19 +05:30
downloaded, skipped = future.result()
2025-05-06 22:08:27 +05:30
downloaded_res = downloaded
skipped_res = skipped
2025-05-05 19:35:24 +05:30
2025-05-06 22:49:19 +05:30
with threading.Lock():
2025-05-06 22:08:27 +05:30
self.download_counter += downloaded_res
self.skip_counter += skipped_res
self.overall_progress_signal.emit(self.total_posts_to_process, self.processed_posts_count)
except Exception as e:
self.log_signal.emit(f"❌ Error in result callback handling: {e}")
if self.processed_posts_count >= self.total_posts_to_process and self.total_posts_to_process > 0:
2025-05-06 22:49:19 +05:30
2025-05-06 22:08:27 +05:30
if self.processed_posts_count >= self.total_posts_to_process:
self.log_signal.emit("🏁 All submitted tasks have completed or failed.")
cancelled = self.cancellation_event.is_set()
self.finished_signal.emit(self.download_counter, self.skip_counter, cancelled)
def set_ui_enabled(self, enabled):
self.download_btn.setEnabled(enabled)
self.link_input.setEnabled(enabled)
self.dir_input.setEnabled(enabled)
self.dir_button.setEnabled(enabled)
self.radio_all.setEnabled(enabled)
self.radio_images.setEnabled(enabled)
self.radio_videos.setEnabled(enabled)
self.skip_zip_checkbox.setEnabled(enabled)
self.skip_rar_checkbox.setEnabled(enabled)
self.use_subfolders_checkbox.setEnabled(enabled)
self.compress_images_checkbox.setEnabled(enabled)
self.download_thumbnails_checkbox.setEnabled(enabled)
self.use_multithreading_checkbox.setEnabled(enabled)
2025-05-06 22:49:19 +05:30
self.skip_words_input.setEnabled(enabled)
self.character_search_input.setEnabled(enabled)
2025-05-06 22:08:27 +05:30
self.new_char_input.setEnabled(enabled)
self.add_char_button.setEnabled(enabled)
self.delete_char_button.setEnabled(enabled)
subfolders_on = self.use_subfolders_checkbox.isChecked()
self.custom_folder_widget.setEnabled(enabled and subfolders_on)
self.character_filter_widget.setEnabled(enabled and subfolders_on)
if enabled:
self.update_ui_for_subfolders(subfolders_on)
2025-05-06 22:49:19 +05:30
self.update_custom_folder_visibility()
2025-05-06 22:08:27 +05:30
self.cancel_btn.setEnabled(not enabled)
if enabled:
self.skip_file_btn.setEnabled(False)
2025-05-05 19:35:24 +05:30
def cancel_download(self):
2025-05-06 22:49:19 +05:30
if not self.cancel_btn.isEnabled(): return
2025-05-06 22:08:27 +05:30
self.log_signal.emit("⚠️ Requesting cancellation...")
2025-05-06 22:49:19 +05:30
self.cancellation_event.set()
2025-05-06 22:08:27 +05:30
self.cancel_btn.setEnabled(False)
self.progress_label.setText("Progress: Cancelling...")
if self.thread_pool and self.active_futures:
cancelled_count = 0
for future in self.active_futures:
2025-05-06 22:49:19 +05:30
if future.cancel():
2025-05-06 22:08:27 +05:30
cancelled_count += 1
if cancelled_count > 0:
self.log_signal.emit(f" Attempted to cancel {cancelled_count} pending/running tasks.")
2025-05-05 19:35:24 +05:30
def skip_current_file(self):
if self.download_thread and self.download_thread.isRunning():
2025-05-06 22:49:19 +05:30
self.download_thread.skip_file()
2025-05-06 22:08:27 +05:30
elif self.thread_pool:
self.log_signal.emit(" Skipping individual files is not supported in multi-threaded mode.")
QMessageBox.information(self, "Action Not Supported", "Skipping individual files is only available in single-threaded mode.")
else:
self.log_signal.emit(" Skip requested, but no download is active.")
2025-05-05 19:35:24 +05:30
2025-05-06 22:08:27 +05:30
def update_skip_button_state(self, is_downloading_active):
can_skip = (not self.download_btn.isEnabled()) and \
(self.download_thread and self.download_thread.isRunning()) and \
is_downloading_active
if self.thread_pool is not None:
can_skip = False
2025-05-05 19:35:24 +05:30
2025-05-06 22:08:27 +05:30
self.skip_file_btn.setEnabled(can_skip)
def download_finished(self, total_downloaded, total_skipped, cancelled):
self.log_signal.emit("="*40)
status = "Cancelled" if cancelled else "Finished"
self.log_signal.emit(f"🏁 Download {status}!")
self.log_signal.emit(f" Summary: Downloaded={total_downloaded}, Skipped={total_skipped}")
self.progress_label.setText(f"{status}: {total_downloaded} downloaded, {total_skipped} skipped.")
self.log_signal.emit("="*40)
if self.download_thread:
try:
self.character_prompt_response_signal.disconnect(self.download_thread.receive_add_character_result)
2025-05-06 22:49:19 +05:30
except TypeError: pass
2025-05-06 22:08:27 +05:30
self.download_thread = None
if self.thread_pool:
self.log_signal.emit(" Shutting down worker thread pool...")
self.thread_pool.shutdown(wait=False, cancel_futures=True)
self.thread_pool = None
2025-05-06 22:49:19 +05:30
self.active_futures = []
2025-05-06 22:08:27 +05:30
self.cancellation_event.clear()
self.set_ui_enabled(True)
2025-05-05 19:35:24 +05:30
self.cancel_btn.setEnabled(False)
self.skip_file_btn.setEnabled(False)
def prompt_add_character(self, character_name):
2025-05-06 22:08:27 +05:30
reply = QMessageBox.question(self, "Add Filter Name?",
f"The filter name '{character_name}' is not in your known list.\n\nAdd it now and continue download?",
QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes)
result = (reply == QMessageBox.Yes)
if result:
2025-05-06 22:49:19 +05:30
self.new_char_input.setText(character_name)
2025-05-06 22:08:27 +05:30
if character_name.lower() not in {n.lower() for n in KNOWN_NAMES}:
2025-05-06 22:49:19 +05:30
self.add_new_character()
2025-05-06 22:08:27 +05:30
if character_name.lower() not in {n.lower() for n in KNOWN_NAMES}:
self.log_signal.emit(f"⚠️ Failed to add '{character_name}' via prompt. Check for errors.")
2025-05-06 22:49:19 +05:30
result = False
2025-05-06 22:08:27 +05:30
else:
self.log_signal.emit(f" Filter name '{character_name}' was already present or added.")
self.character_prompt_response_signal.emit(result)
def receive_add_character_result(self, result):
with QMutexLocker(self.prompt_mutex):
self._add_character_response = result
self.log_signal.emit(f" Received prompt response: {'Yes' if result else 'No'}")
class DownloadThread(QThread):
progress_signal = pyqtSignal(str)
2025-05-06 22:49:19 +05:30
add_character_prompt_signal = pyqtSignal(str)
file_download_status_signal = pyqtSignal(bool)
finished_signal = pyqtSignal(int, int, bool)
2025-05-06 22:08:27 +05:30
def __init__(self, api_url, output_dir, known_names_copy,
2025-05-06 22:49:19 +05:30
cancellation_event, single_post_id=None,
2025-05-06 22:08:27 +05:30
filter_character=None, filter_mode='all', skip_zip=True, skip_rar=True,
use_subfolders=True, custom_folder_name=None, compress_images=False,
download_thumbnails=False, service=None, user_id=None,
downloaded_files=None, downloaded_files_lock=None,
downloaded_file_hashes=None, downloaded_file_hashes_lock=None,
2025-05-06 22:49:19 +05:30
skip_words_list=None):
2025-05-06 22:08:27 +05:30
super().__init__()
self._init_failed = False
self.api_url_input = api_url
self.output_dir = output_dir
self.known_names = list(known_names_copy)
self.cancellation_event = cancellation_event
self.initial_target_post_id = single_post_id
self.filter_character = filter_character
self.filter_mode = filter_mode
self.skip_zip = skip_zip
self.skip_rar = skip_rar
self.use_subfolders = use_subfolders
self.custom_folder_name = custom_folder_name
self.compress_images = compress_images
self.download_thumbnails = download_thumbnails
2025-05-06 22:49:19 +05:30
self.service = service
self.user_id = user_id
self.skip_words_list = skip_words_list if skip_words_list is not None else []
2025-05-06 22:08:27 +05:30
self.downloaded_files = downloaded_files if downloaded_files is not None else set()
self.downloaded_files_lock = downloaded_files_lock if downloaded_files_lock is not None else threading.Lock()
2025-05-06 22:49:19 +05:30
self.downloaded_file_hashes = downloaded_file_hashes if downloaded_file_hashes is not None else set()
self.downloaded_file_hashes_lock = downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading.Lock()
2025-05-06 22:08:27 +05:30
self.skip_current_file_flag = threading.Event()
self.is_downloading_file = False
self.current_download_path = None
2025-05-06 22:49:19 +05:30
self._add_character_response = None
self.prompt_mutex = QMutex()
2025-05-06 22:08:27 +05:30
if not self.service or not self.user_id:
log_msg = f"❌ Thread Init Error: Missing service ('{self.service}') or user ID ('{self.user_id}') for URL '{api_url}'"
2025-05-06 22:49:19 +05:30
print(log_msg)
2025-05-06 22:08:27 +05:30
try: self.progress_signal.emit(log_msg)
2025-05-06 22:49:19 +05:30
except RuntimeError: pass
2025-05-06 22:08:27 +05:30
self._init_failed = True
def run(self):
if self._init_failed:
2025-05-06 22:49:19 +05:30
self.finished_signal.emit(0, 0, False)
2025-05-06 22:08:27 +05:30
return
2025-05-06 22:49:19 +05:30
unwanted_keywords = {'spicy', 'hd', 'nsfw', '4k', 'preview'}
2025-05-06 22:08:27 +05:30
grand_total_downloaded = 0
grand_total_skipped = 0
cancelled_by_user = False
try:
if self.use_subfolders and self.filter_character and not self.custom_folder_name:
if not self._check_and_prompt_filter_character():
2025-05-06 22:49:19 +05:30
self.finished_signal.emit(0, 0, False)
2025-05-06 22:08:27 +05:30
return
worker_signals_adapter = PostProcessorSignals()
2025-05-06 22:49:19 +05:30
worker_signals_adapter.progress_signal.connect(self.progress_signal)
worker_signals_adapter.file_download_status_signal.connect(self.file_download_status_signal)
2025-05-06 22:08:27 +05:30
post_worker = PostProcessorWorker(
2025-05-06 22:49:19 +05:30
post_data=None,
2025-05-06 22:08:27 +05:30
download_root=self.output_dir,
2025-05-06 22:49:19 +05:30
known_names=self.known_names,
2025-05-06 22:08:27 +05:30
filter_character=self.filter_character,
unwanted_keywords=unwanted_keywords,
filter_mode=self.filter_mode,
skip_zip=self.skip_zip,
skip_rar=self.skip_rar,
use_subfolders=self.use_subfolders,
target_post_id_from_initial_url=self.initial_target_post_id,
custom_folder_name=self.custom_folder_name,
compress_images=self.compress_images,
download_thumbnails=self.download_thumbnails,
service=self.service,
user_id=self.user_id,
api_url_input=self.api_url_input,
2025-05-06 22:49:19 +05:30
cancellation_event=self.cancellation_event,
signals=worker_signals_adapter,
2025-05-06 22:08:27 +05:30
downloaded_files=self.downloaded_files,
downloaded_files_lock=self.downloaded_files_lock,
2025-05-06 22:49:19 +05:30
downloaded_file_hashes=self.downloaded_file_hashes,
downloaded_file_hashes_lock=self.downloaded_file_hashes_lock,
skip_words_list=self.skip_words_list,
2025-05-06 22:08:27 +05:30
)
post_worker.skip_current_file_flag = self.skip_current_file_flag
self.progress_signal.emit(" Starting post fetch...")
def thread_logger(msg):
self.progress_signal.emit(msg)
post_generator = download_from_api(self.api_url_input, logger=thread_logger)
for posts_batch in post_generator:
2025-05-06 22:49:19 +05:30
if self.isInterruptionRequested():
2025-05-06 22:08:27 +05:30
self.progress_signal.emit("⚠️ Download cancelled before processing batch.")
cancelled_by_user = True
2025-05-06 22:49:19 +05:30
break
2025-05-06 22:08:27 +05:30
for post in posts_batch:
if self.isInterruptionRequested():
self.progress_signal.emit("⚠️ Download cancelled during post processing.")
cancelled_by_user = True
2025-05-06 22:49:19 +05:30
break
2025-05-06 22:08:27 +05:30
post_worker.post = post
try:
downloaded, skipped = post_worker.process()
grand_total_downloaded += downloaded
grand_total_skipped += skipped
except Exception as proc_e:
post_id_err = post.get('id', 'N/A') if isinstance(post, dict) else 'N/A'
self.progress_signal.emit(f"❌ Error processing post {post_id_err}: {proc_e}")
import traceback
self.progress_signal.emit(traceback.format_exc(limit=2))
2025-05-06 22:49:19 +05:30
grand_total_skipped += 1
self.msleep(20)
2025-05-06 22:08:27 +05:30
if cancelled_by_user:
2025-05-06 22:49:19 +05:30
break
2025-05-06 22:08:27 +05:30
if not cancelled_by_user:
self.progress_signal.emit("✅ Post fetching and processing complete.")
except Exception as e:
log_msg = f"\n❌ An critical error occurred in download thread: {e}"
self.progress_signal.emit(log_msg)
import traceback
tb_str = traceback.format_exc()
self.progress_signal.emit("--- Traceback ---")
for line in tb_str.splitlines():
self.progress_signal.emit(" " + line)
self.progress_signal.emit("--- End Traceback ---")
2025-05-06 22:49:19 +05:30
cancelled_by_user = False
2025-05-06 22:08:27 +05:30
finally:
self.finished_signal.emit(grand_total_downloaded, grand_total_skipped, cancelled_by_user)
def _check_and_prompt_filter_character(self):
clean_char_filter = clean_folder_name(self.filter_character.lower())
known_names_lower = {name.lower() for name in self.known_names}
if not clean_char_filter:
self.progress_signal.emit(f"❌ Filter name '{self.filter_character}' is invalid. Aborting.")
2025-05-06 22:49:19 +05:30
return False
2025-05-06 22:08:27 +05:30
if self.filter_character.lower() not in known_names_lower:
self.progress_signal.emit(f"❓ Filter '{self.filter_character}' not found in known list.")
with QMutexLocker(self.prompt_mutex):
self._add_character_response = None
self.add_character_prompt_signal.emit(self.filter_character)
self.progress_signal.emit(" Waiting for user confirmation to add filter name...")
while self._add_character_response is None:
2025-05-06 22:49:19 +05:30
if self.isInterruptionRequested():
2025-05-06 22:08:27 +05:30
self.progress_signal.emit("⚠️ Cancelled while waiting for user input on filter name.")
2025-05-06 22:49:19 +05:30
return False
self.msleep(200)
2025-05-06 22:08:27 +05:30
if self._add_character_response:
self.progress_signal.emit(f"✅ User confirmed adding '{self.filter_character}'. Continuing.")
if self.filter_character not in self.known_names:
self.known_names.append(self.filter_character)
2025-05-06 22:49:19 +05:30
return True
2025-05-06 22:08:27 +05:30
else:
self.progress_signal.emit(f"❌ User declined to add filter '{self.filter_character}'. Aborting download.")
2025-05-06 22:49:19 +05:30
return False
2025-05-06 22:08:27 +05:30
return True
def skip_file(self):
if self.isRunning() and self.is_downloading_file:
self.progress_signal.emit("⏭️ Skip requested for current file.")
2025-05-06 22:49:19 +05:30
self.skip_current_file_flag.set()
2025-05-06 22:08:27 +05:30
elif self.isRunning():
self.progress_signal.emit(" Skip requested, but no file download active.")
def receive_add_character_result(self, result):
with QMutexLocker(self.prompt_mutex):
self._add_character_response = result
self.progress_signal.emit(f" Received prompt response: {'Yes' if result else 'No'}")
def isInterruptionRequested(self):
return super().isInterruptionRequested() or self.cancellation_event.is_set()
2025-05-05 19:35:24 +05:30
if __name__ == '__main__':
2025-05-06 22:08:27 +05:30
2025-05-06 22:49:19 +05:30
app = QApplication(sys.argv)
app.setWindowIcon(QIcon("Kemono.ico"))
from PyQt5.QtGui import QIcon
app.setWindowIcon(QIcon("Kemono.ico"))
2025-05-06 22:08:27 +05:30
qt_app = QApplication(sys.argv)
2025-05-06 22:49:19 +05:30
qt_app.setWindowIcon(QIcon(os.path.join(os.path.dirname(__file__), 'Kemono.ico')))
2025-05-06 22:08:27 +05:30
2025-05-06 22:49:19 +05:30
downloader = DownloaderApp()
downloader.show()
2025-05-06 22:08:27 +05:30
exit_code = qt_app.exec_()
print(f"Application finished with exit code: {exit_code}")
2025-05-06 22:49:19 +05:30
sys.exit(exit_code)