Kemono-Downloader/downloader_utils.py
2025-05-10 23:59:00 +05:30

1189 lines
61 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import time
import requests
import re
import threading
import queue
import hashlib
import http.client
import traceback
from concurrent.futures import ThreadPoolExecutor, Future, CancelledError, as_completed
import html
from PyQt5.QtCore import QObject, pyqtSignal, QThread, QMutex, QMutexLocker
from urllib.parse import urlparse
try:
from PIL import Image
except ImportError:
print("ERROR: Pillow library not found. Please install it: pip install Pillow")
Image = None
from io import BytesIO
STYLE_POST_TITLE = "post_title"
STYLE_ORIGINAL_NAME = "original_name"
SKIP_SCOPE_FILES = "files"
SKIP_SCOPE_POSTS = "posts"
SKIP_SCOPE_BOTH = "both"
CHAR_SCOPE_TITLE = "title"
CHAR_SCOPE_FILES = "files"
CHAR_SCOPE_BOTH = "both"
fastapi_app = None
KNOWN_NAMES = []
IMAGE_EXTENSIONS = {
'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.tif', '.webp',
'.heic', '.heif', '.svg', '.ico', '.jfif', '.pjpeg', '.pjp', '.avif'
}
VIDEO_EXTENSIONS = {
'.mp4', '.mov', '.mkv', '.webm', '.avi', '.wmv', '.flv', '.mpeg',
'.mpg', '.m4v', '.3gp', '.ogv', '.ts', '.vob'
}
ARCHIVE_EXTENSIONS = {
'.zip', '.rar', '.7z', '.tar', '.gz', '.bz2'
}
def is_title_match_for_character(post_title, character_name_filter):
if not post_title or not character_name_filter:
return False
pattern = r"(?i)\b" + re.escape(character_name_filter) + r"\b"
return bool(re.search(pattern, post_title))
def is_filename_match_for_character(filename, character_name_filter):
if not filename or not character_name_filter:
return False
return character_name_filter.lower() in filename.lower()
def clean_folder_name(name):
if not isinstance(name, str): name = str(name)
cleaned = re.sub(r'[^\w\s\-\_\.\(\)]', '', name)
cleaned = cleaned.strip()
cleaned = re.sub(r'\s+', '_', cleaned)
return cleaned if cleaned else "untitled_folder"
def clean_filename(name):
if not isinstance(name, str): name = str(name)
cleaned = re.sub(r'[^\w\s\-\_\.\(\)]', '', name)
cleaned = cleaned.strip()
cleaned = re.sub(r'\s+', '_', cleaned)
return cleaned if cleaned else "untitled_file"
def extract_folder_name_from_title(title, unwanted_keywords):
if not title: return 'Uncategorized'
title_lower = title.lower()
tokens = re.findall(r'\b[\w\-]+\b', title_lower)
for token in tokens:
clean_token = clean_folder_name(token)
if clean_token and clean_token.lower() not in unwanted_keywords:
return clean_token
cleaned_full_title = clean_folder_name(title)
return cleaned_full_title if cleaned_full_title else 'Uncategorized'
def match_folders_from_title(title, names_to_match, unwanted_keywords):
if not title or not names_to_match: return []
title_lower = title.lower()
matched_cleaned_names = set()
sorted_names_to_match = sorted(names_to_match, key=len, reverse=True)
for name in sorted_names_to_match:
name_lower = name.lower()
if not name_lower: continue
pattern = r'\b' + re.escape(name_lower) + r'\b'
if re.search(pattern, title_lower):
cleaned_name_for_folder = clean_folder_name(name)
if cleaned_name_for_folder.lower() not in unwanted_keywords:
matched_cleaned_names.add(cleaned_name_for_folder)
return sorted(list(matched_cleaned_names))
def is_image(filename):
if not filename: return False
_, ext = os.path.splitext(filename)
return ext.lower() in IMAGE_EXTENSIONS
def is_video(filename):
if not filename: return False
_, ext = os.path.splitext(filename)
return ext.lower() in VIDEO_EXTENSIONS
def is_zip(filename):
if not filename: return False
return filename.lower().endswith('.zip')
def is_rar(filename):
if not filename: return False
return filename.lower().endswith('.rar')
def is_archive(filename):
if not filename: return False
_, ext = os.path.splitext(filename)
return ext.lower() in ARCHIVE_EXTENSIONS
def is_post_url(url):
if not isinstance(url, str): return False
return '/post/' in urlparse(url).path
def extract_post_info(url_string):
service, user_id, post_id = None, None, None
if not isinstance(url_string, str) or not url_string.strip(): return None, None, None
try:
parsed_url = urlparse(url_string.strip())
domain = parsed_url.netloc.lower()
is_kemono = any(d in domain for d in ['kemono.su', 'kemono.party'])
is_coomer = any(d in domain for d in ['coomer.su', 'coomer.party'])
if not (is_kemono or is_coomer): return None, None, None
path_parts = [part for part in parsed_url.path.strip('/').split('/') if part]
if len(path_parts) >= 3 and path_parts[1].lower() == 'user':
service = path_parts[0]
user_id = path_parts[2]
if len(path_parts) >= 5 and path_parts[3].lower() == 'post':
post_id = path_parts[4]
return service, user_id, post_id
if len(path_parts) >= 5 and path_parts[0].lower() == 'api' and \
path_parts[1].lower() == 'v1' and path_parts[3].lower() == 'user':
service = path_parts[2]
user_id = path_parts[4]
if len(path_parts) >= 7 and path_parts[5].lower() == 'post':
post_id = path_parts[6]
return service, user_id, post_id
except Exception as e:
print(f"Debug: Exception during extract_post_info for URL '{url_string}': {e}")
return None, None, None
def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None):
if cancellation_event and cancellation_event.is_set():
logger(" Fetch cancelled before request.")
raise RuntimeError("Fetch operation cancelled by user.")
paginated_url = f'{api_url_base}?o={offset}'
logger(f" Fetching: {paginated_url} (Page approx. {offset // 50 + 1})")
try:
response = requests.get(paginated_url, headers=headers, timeout=(10, 60))
response.raise_for_status()
if 'application/json' not in response.headers.get('Content-Type', '').lower():
logger(f"⚠️ Unexpected content type from API: {response.headers.get('Content-Type')}. Body: {response.text[:200]}")
return []
return response.json()
except requests.exceptions.Timeout:
raise RuntimeError(f"Timeout fetching offset {offset} from {paginated_url}")
except requests.exceptions.RequestException as e:
err_msg = f"Error fetching offset {offset} from {paginated_url}: {e}"
if e.response is not None:
err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})"
raise RuntimeError(err_msg)
except ValueError as e:
raise RuntimeError(f"Error decoding JSON from offset {offset} ({paginated_url}): {e}. Response text: {response.text[:200]}")
except Exception as e:
raise RuntimeError(f"Unexpected error fetching offset {offset} ({paginated_url}): {e}")
def download_from_api(api_url_input, logger=print, start_page=None, end_page=None, manga_mode=False, cancellation_event=None):
headers = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}
service, user_id, target_post_id = extract_post_info(api_url_input)
if cancellation_event and cancellation_event.is_set():
logger(" Download_from_api cancelled at start.")
return
if not service or not user_id:
logger(f"❌ Invalid URL or could not extract service/user: {api_url_input}")
return
if target_post_id and (start_page or end_page):
logger("⚠️ Page range (start/end page) is ignored when a specific post URL is provided (searching all pages for the post).")
start_page = end_page = None
is_creator_feed_for_manga = manga_mode and not target_post_id
parsed_input = urlparse(api_url_input)
api_domain = parsed_input.netloc
if not any(d in api_domain.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']):
logger(f"⚠️ Unrecognized domain '{api_domain}'. Defaulting to kemono.su for API calls.")
api_domain = "kemono.su"
api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}"
page_size = 50
if is_creator_feed_for_manga:
logger(" Manga Mode: Fetching all posts to reverse order (oldest posts processed first)...")
all_posts_for_manga_mode = []
current_offset_manga = 0
while True:
if cancellation_event and cancellation_event.is_set():
logger(" Manga mode post fetching cancelled.")
break
try:
posts_batch_manga = fetch_posts_paginated(api_base_url, headers, current_offset_manga, logger, cancellation_event)
if not isinstance(posts_batch_manga, list):
logger(f"❌ API Error (Manga Mode): Expected list of posts, got {type(posts_batch_manga)}.")
break
if not posts_batch_manga:
logger("✅ Reached end of posts (Manga Mode fetch all).")
break
all_posts_for_manga_mode.extend(posts_batch_manga)
current_offset_manga += len(posts_batch_manga)
time.sleep(0.6)
except RuntimeError as e:
if "cancelled by user" in str(e).lower():
logger(f" Manga mode pagination stopped due to cancellation: {e}")
else:
logger(f"{e}\n Aborting manga mode pagination.")
break
except Exception as e:
logger(f"❌ Unexpected error during manga mode fetch: {e}")
traceback.print_exc()
break
if cancellation_event and cancellation_event.is_set(): return
if all_posts_for_manga_mode:
logger(f" Manga Mode: Fetched {len(all_posts_for_manga_mode)} total posts. Reversing order...")
all_posts_for_manga_mode.reverse()
for i in range(0, len(all_posts_for_manga_mode), page_size):
if cancellation_event and cancellation_event.is_set():
logger(" Manga mode post yielding cancelled.")
break
yield all_posts_for_manga_mode[i:i + page_size]
else:
logger(" Manga Mode: No posts found to process.")
return
current_page_num = 1
current_offset = 0
processed_target_post_flag = False
if start_page and start_page > 1 and not target_post_id:
current_offset = (start_page - 1) * page_size
current_page_num = start_page
logger(f" Starting from page {current_page_num} (calculated offset {current_offset}).")
while True:
if cancellation_event and cancellation_event.is_set():
logger(" Post fetching loop cancelled.")
break
if target_post_id and processed_target_post_flag:
break
if not target_post_id and end_page and current_page_num > end_page:
logger(f"✅ Reached specified end page ({end_page}) for creator feed. Stopping.")
break
try:
posts_batch = fetch_posts_paginated(api_base_url, headers, current_offset, logger, cancellation_event)
if not isinstance(posts_batch, list):
logger(f"❌ API Error: Expected list of posts, got {type(posts_batch)} at page {current_page_num} (offset {current_offset}).")
break
except RuntimeError as e:
if "cancelled by user" in str(e).lower():
logger(f" Pagination stopped due to cancellation: {e}")
else:
logger(f"{e}\n Aborting pagination at page {current_page_num} (offset {current_offset}).")
break
except Exception as e:
logger(f"❌ Unexpected error fetching page {current_page_num} (offset {current_offset}): {e}")
traceback.print_exc()
break
if not posts_batch:
if target_post_id and not processed_target_post_flag:
logger(f"❌ Target post {target_post_id} not found after checking all available pages (API returned no more posts at offset {current_offset}).")
elif not target_post_id:
if current_page_num == (start_page or 1):
logger(f"😕 No posts found on the first page checked (page {current_page_num}, offset {current_offset}).")
else:
logger(f"✅ Reached end of posts (no more content from API at offset {current_offset}).")
break
if target_post_id and not processed_target_post_flag:
matching_post = next((p for p in posts_batch if str(p.get('id')) == str(target_post_id)), None)
if matching_post:
logger(f"🎯 Found target post {target_post_id} on page {current_page_num} (offset {current_offset}).")
yield [matching_post]
processed_target_post_flag = True
elif not target_post_id:
yield posts_batch
if processed_target_post_flag:
break
current_offset += len(posts_batch)
current_page_num += 1
time.sleep(0.6)
if target_post_id and not processed_target_post_flag and not (cancellation_event and cancellation_event.is_set()):
logger(f"❌ Target post {target_post_id} could not be found after checking all relevant pages (final check after loop).")
def get_link_platform(url):
try:
domain = urlparse(url).netloc.lower()
if 'drive.google.com' in domain: return 'google drive'
if 'mega.nz' in domain or 'mega.io' in domain: return 'mega'
if 'dropbox.com' in domain: return 'dropbox'
if 'patreon.com' in domain: return 'patreon'
if 'instagram.com' in domain: return 'instagram'
if 'twitter.com' in domain or 'x.com' in domain: return 'twitter/x'
if 'discord.gg' in domain or 'discord.com/invite' in domain: return 'discord invite'
if 'pixiv.net' in domain: return 'pixiv'
if 'kemono.su' in domain or 'kemono.party' in domain: return 'kemono'
if 'coomer.su' in domain or 'coomer.party' in domain: return 'coomer'
parts = domain.split('.')
if len(parts) >= 2:
if parts[-2] not in ['com', 'org', 'net', 'gov', 'edu', 'co'] or len(parts) == 2:
return parts[-2]
elif len(parts) >= 3 and parts[-3] not in ['com', 'org', 'net', 'gov', 'edu', 'co']:
return parts[-3]
else:
return domain
return 'external'
except Exception: return 'unknown'
class PostProcessorSignals(QObject):
progress_signal = pyqtSignal(str)
file_download_status_signal = pyqtSignal(bool)
external_link_signal = pyqtSignal(str, str, str, str)
file_progress_signal = pyqtSignal(str, int, int)
class PostProcessorWorker:
def __init__(self, post_data, download_root, known_names,
filter_character_list,
unwanted_keywords, filter_mode, skip_zip, skip_rar,
use_subfolders, use_post_subfolders, target_post_id_from_initial_url, custom_folder_name,
compress_images, download_thumbnails, service, user_id,
api_url_input, cancellation_event, signals,
downloaded_files, downloaded_file_hashes, downloaded_files_lock, downloaded_file_hashes_lock,
skip_words_list=None,
skip_words_scope=SKIP_SCOPE_FILES,
show_external_links=False,
extract_links_only=False,
num_file_threads=4, skip_current_file_flag=None,
manga_mode_active=False,
manga_filename_style=STYLE_POST_TITLE,
char_filter_scope=CHAR_SCOPE_FILES
):
self.post = post_data
self.download_root = download_root
self.known_names = known_names
self.filter_character_list = filter_character_list if filter_character_list else []
self.unwanted_keywords = unwanted_keywords if unwanted_keywords is not None else set()
self.filter_mode = filter_mode
self.skip_zip = skip_zip
self.skip_rar = skip_rar
self.use_subfolders = use_subfolders
self.use_post_subfolders = use_post_subfolders
self.target_post_id_from_initial_url = target_post_id_from_initial_url
self.custom_folder_name = custom_folder_name
self.compress_images = compress_images
self.download_thumbnails = download_thumbnails
self.service = service
self.user_id = user_id
self.api_url_input = api_url_input
self.cancellation_event = cancellation_event
self.signals = signals
self.skip_current_file_flag = skip_current_file_flag
self.downloaded_files = downloaded_files if downloaded_files is not None else set()
self.downloaded_file_hashes = downloaded_file_hashes if downloaded_file_hashes is not None else set()
self.downloaded_files_lock = downloaded_files_lock if downloaded_files_lock is not None else threading.Lock()
self.downloaded_file_hashes_lock = downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading.Lock()
self.skip_words_list = skip_words_list if skip_words_list is not None else []
self.skip_words_scope = skip_words_scope
self.show_external_links = show_external_links
self.extract_links_only = extract_links_only
self.num_file_threads = num_file_threads
self.manga_mode_active = manga_mode_active
self.manga_filename_style = manga_filename_style
self.char_filter_scope = char_filter_scope
if self.compress_images and Image is None:
self.logger("⚠️ Image compression disabled: Pillow library not found.")
self.compress_images = False
def logger(self, message):
if self.signals and hasattr(self.signals, 'progress_signal'):
self.signals.progress_signal.emit(message)
else:
print(f"(Worker Log - No Signal): {message}")
def check_cancel(self):
return self.cancellation_event.is_set()
def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event,
post_title="", file_index_in_post=0, num_files_in_this_post=1):
was_original_name_kept_flag = False
final_filename_saved_for_return = ""
if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1, "", False
file_url = file_info.get('url')
api_original_filename = file_info.get('_original_name_for_log', file_info.get('name'))
final_filename_saved_for_return = api_original_filename
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_FILES or self.skip_words_scope == SKIP_SCOPE_BOTH):
filename_to_check_for_skip_words = api_original_filename.lower()
for skip_word in self.skip_words_list:
if skip_word.lower() in filename_to_check_for_skip_words:
self.logger(f" -> Skip File (Keyword in Original Name '{skip_word}'): '{api_original_filename}'. Scope: {self.skip_words_scope}")
return 0, 1, api_original_filename, False
original_filename_cleaned_base, original_ext = os.path.splitext(clean_filename(api_original_filename))
if not original_ext.startswith('.'): original_ext = '.' + original_ext if original_ext else ''
filename_to_save = ""
if self.manga_mode_active:
if self.manga_filename_style == STYLE_ORIGINAL_NAME:
filename_to_save = clean_filename(api_original_filename)
was_original_name_kept_flag = True
elif self.manga_filename_style == STYLE_POST_TITLE:
if post_title and post_title.strip():
cleaned_post_title_base = clean_filename(post_title.strip())
if num_files_in_this_post > 1:
if file_index_in_post == 0:
filename_to_save = f"{cleaned_post_title_base}{original_ext}"
was_original_name_kept_flag = False
else:
filename_to_save = clean_filename(api_original_filename)
was_original_name_kept_flag = True
else:
filename_to_save = f"{cleaned_post_title_base}{original_ext}"
was_original_name_kept_flag = False
else:
filename_to_save = clean_filename(api_original_filename)
was_original_name_kept_flag = False
self.logger(f"⚠️ Manga mode (Post Title Style): Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save}'.")
else:
self.logger(f"⚠️ Manga mode: Unknown filename style '{self.manga_filename_style}'. Defaulting to original filename for '{api_original_filename}'.")
filename_to_save = clean_filename(api_original_filename)
was_original_name_kept_flag = False
if filename_to_save:
counter = 1
base_name_coll, ext_coll = os.path.splitext(filename_to_save)
temp_filename_for_collision_check = filename_to_save
while os.path.exists(os.path.join(target_folder_path, temp_filename_for_collision_check)):
if self.manga_filename_style == STYLE_POST_TITLE and file_index_in_post == 0 and num_files_in_this_post > 1:
temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}"
else:
temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}"
counter += 1
if temp_filename_for_collision_check != filename_to_save:
filename_to_save = temp_filename_for_collision_check
else:
filename_to_save = f"manga_file_{original_post_id_for_log}_{file_index_in_post + 1}{original_ext}"
self.logger(f"⚠️ Manga mode: Generated filename was empty. Using generic fallback: '{filename_to_save}'.")
was_original_name_kept_flag = False
else:
filename_to_save = clean_filename(api_original_filename)
was_original_name_kept_flag = False
counter = 1
base_name_coll, ext_coll = os.path.splitext(filename_to_save)
temp_filename_for_collision_check = filename_to_save
while os.path.exists(os.path.join(target_folder_path, temp_filename_for_collision_check)):
temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}"
counter += 1
if temp_filename_for_collision_check != filename_to_save:
filename_to_save = temp_filename_for_collision_check
final_filename_for_sets_and_saving = filename_to_save
final_filename_saved_for_return = final_filename_for_sets_and_saving
if not self.download_thumbnails:
is_img_type = is_image(api_original_filename)
is_vid_type = is_video(api_original_filename)
is_archive_type = is_archive(api_original_filename)
if self.filter_mode == 'archive':
if not is_archive_type:
self.logger(f" -> Filter Skip (Archive Mode): '{api_original_filename}' (Not an Archive).")
return 0, 1, api_original_filename, False
elif self.filter_mode == 'image':
if not is_img_type:
self.logger(f" -> Filter Skip: '{api_original_filename}' (Not Image).")
return 0, 1, api_original_filename, False
elif self.filter_mode == 'video':
if not is_vid_type:
self.logger(f" -> Filter Skip: '{api_original_filename}' (Not Video).")
return 0, 1, api_original_filename, False
if self.skip_zip and is_zip(api_original_filename):
self.logger(f" -> Pref Skip: '{api_original_filename}' (ZIP).")
return 0, 1, api_original_filename, False
if self.skip_rar and is_rar(api_original_filename):
self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR).")
return 0, 1, api_original_filename, False
target_folder_basename = os.path.basename(target_folder_path)
current_save_path = os.path.join(target_folder_path, final_filename_for_sets_and_saving)
if os.path.exists(current_save_path) and os.path.getsize(current_save_path) > 0:
self.logger(f" -> Exists (Path): '{final_filename_for_sets_and_saving}' in '{target_folder_basename}'.")
with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving)
return 0, 1, final_filename_for_sets_and_saving, was_original_name_kept_flag
with self.downloaded_files_lock:
if final_filename_for_sets_and_saving in self.downloaded_files:
self.logger(f" -> Global Skip (Filename): '{final_filename_for_sets_and_saving}' already recorded this session.")
return 0, 1, final_filename_for_sets_and_saving, was_original_name_kept_flag
max_retries = 3
retry_delay = 5
downloaded_size_bytes = 0
calculated_file_hash = None
file_content_bytes = None
total_size_bytes = 0
download_successful_flag = False
for attempt_num in range(max_retries + 1):
if self.check_cancel() or (skip_event and skip_event.is_set()):
break
try:
if attempt_num > 0:
self.logger(f" Retrying '{api_original_filename}' (Attempt {attempt_num}/{max_retries})...")
time.sleep(retry_delay * (2**(attempt_num - 1)))
if self.signals and hasattr(self.signals, 'file_download_status_signal'):
self.signals.file_download_status_signal.emit(True)
response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True)
response.raise_for_status()
current_total_size_bytes_from_headers = int(response.headers.get('Content-Length', 0))
if attempt_num == 0:
total_size_bytes = current_total_size_bytes_from_headers
size_str = f"{total_size_bytes / (1024 * 1024):.2f} MB" if total_size_bytes > 0 else "unknown size"
self.logger(f"⬇️ Downloading: '{api_original_filename}' (Size: {size_str}) [Saving as: '{final_filename_for_sets_and_saving}']")
current_attempt_total_size = total_size_bytes
file_content_buffer = BytesIO()
current_attempt_downloaded_bytes = 0
md5_hasher = hashlib.md5()
last_progress_time = time.time()
for chunk in response.iter_content(chunk_size=1 * 1024 * 1024):
if self.check_cancel() or (skip_event and skip_event.is_set()):
break
if chunk:
file_content_buffer.write(chunk)
md5_hasher.update(chunk)
current_attempt_downloaded_bytes += len(chunk)
if time.time() - last_progress_time > 1 and current_attempt_total_size > 0 and \
self.signals and hasattr(self.signals, 'file_progress_signal'):
self.signals.file_progress_signal.emit(
api_original_filename,
current_attempt_downloaded_bytes,
current_attempt_total_size
)
last_progress_time = time.time()
if self.check_cancel() or (skip_event and skip_event.is_set()):
if file_content_buffer: file_content_buffer.close()
break
if current_attempt_downloaded_bytes > 0 or (current_attempt_total_size == 0 and response.status_code == 200):
calculated_file_hash = md5_hasher.hexdigest()
downloaded_size_bytes = current_attempt_downloaded_bytes
if file_content_bytes: file_content_bytes.close()
file_content_bytes = file_content_buffer
file_content_bytes.seek(0)
download_successful_flag = True
break
else:
if file_content_buffer: file_content_buffer.close()
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e:
self.logger(f" ❌ Download Error (Retryable): {api_original_filename}. Error: {e}")
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close()
except requests.exceptions.RequestException as e:
self.logger(f" ❌ Download Error (Non-Retryable): {api_original_filename}. Error: {e}")
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close()
break
except Exception as e:
self.logger(f" ❌ Unexpected Download Error: {api_original_filename}: {e}\n{traceback.format_exc(limit=2)}")
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close()
break
finally:
if self.signals and hasattr(self.signals, 'file_download_status_signal'):
self.signals.file_download_status_signal.emit(False)
if self.signals and hasattr(self.signals, 'file_progress_signal'):
final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes
self.signals.file_progress_signal.emit(api_original_filename, downloaded_size_bytes, final_total_for_progress)
if self.check_cancel() or (skip_event and skip_event.is_set()):
self.logger(f" ⚠️ Download interrupted for {api_original_filename}.")
if file_content_bytes: file_content_bytes.close()
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag
if not download_successful_flag:
self.logger(f"❌ Download failed for '{api_original_filename}' after {max_retries + 1} attempts.")
if file_content_bytes: file_content_bytes.close()
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag
with self.downloaded_file_hashes_lock:
if calculated_file_hash in self.downloaded_file_hashes:
self.logger(f" -> Content Skip (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...) already downloaded this session.")
with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving)
if file_content_bytes: file_content_bytes.close()
return 0, 1, final_filename_for_sets_and_saving, was_original_name_kept_flag
bytes_to_write = file_content_bytes
final_filename_after_processing = final_filename_for_sets_and_saving
current_save_path_final = current_save_path
is_img_for_compress_check = is_image(api_original_filename)
if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024):
self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024*1024):.2f} MB)...")
try:
bytes_to_write.seek(0)
with Image.open(bytes_to_write) as img_obj:
if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA')
elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB')
compressed_bytes_io = BytesIO()
img_obj.save(compressed_bytes_io, format='WebP', quality=80, method=4)
compressed_size = compressed_bytes_io.getbuffer().nbytes
if compressed_size < downloaded_size_bytes * 0.9:
self.logger(f" Compression success: {compressed_size / (1024*1024):.2f} MB.")
bytes_to_write.close()
bytes_to_write = compressed_bytes_io
bytes_to_write.seek(0)
base_name_orig, _ = os.path.splitext(final_filename_for_sets_and_saving)
final_filename_after_processing = base_name_orig + '.webp'
current_save_path_final = os.path.join(target_folder_path, final_filename_after_processing)
self.logger(f" Updated filename (compressed): {final_filename_after_processing}")
else:
self.logger(f" Compression skipped: WebP not significantly smaller."); bytes_to_write.seek(0)
except Exception as comp_e:
self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); bytes_to_write.seek(0)
final_filename_saved_for_return = final_filename_after_processing
if final_filename_after_processing != final_filename_for_sets_and_saving and \
os.path.exists(current_save_path_final) and os.path.getsize(current_save_path_final) > 0:
self.logger(f" -> Exists (Path - Post-Compress): '{final_filename_after_processing}' in '{target_folder_basename}'.")
with self.downloaded_files_lock: self.downloaded_files.add(final_filename_after_processing)
bytes_to_write.close()
return 0, 1, final_filename_after_processing, was_original_name_kept_flag
try:
os.makedirs(os.path.dirname(current_save_path_final), exist_ok=True)
with open(current_save_path_final, 'wb') as f_out:
f_out.write(bytes_to_write.getvalue())
with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash)
with self.downloaded_files_lock: self.downloaded_files.add(final_filename_after_processing)
self.logger(f"✅ Saved: '{final_filename_after_processing}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{target_folder_basename}'")
time.sleep(0.05)
return 1, 0, final_filename_after_processing, was_original_name_kept_flag
except Exception as save_err:
self.logger(f"❌ Save Fail for '{final_filename_after_processing}': {save_err}")
if os.path.exists(current_save_path_final):
try: os.remove(current_save_path_final);
except OSError: self.logger(f" -> Failed to remove partially saved file: {current_save_path_final}")
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag
finally:
if bytes_to_write: bytes_to_write.close()
def process(self):
if self.check_cancel(): return 0, 0, []
kept_original_filenames_for_log = []
total_downloaded_this_post = 0
total_skipped_this_post = 0
parsed_api_url = urlparse(self.api_url_input)
referer_url = f"https://{parsed_api_url.netloc}/"
headers = {'User-Agent': 'Mozilla/5.0', 'Referer': referer_url, 'Accept': '*/*'}
link_pattern = re.compile(r"""<a\s+.*?href=["'](https?://[^"']+)["'][^>]*>(.*?)</a>""",
re.IGNORECASE | re.DOTALL)
post_data = self.post
post_title = post_data.get('title', '') or 'untitled_post'
post_id = post_data.get('id', 'unknown_id')
post_main_file_info = post_data.get('file')
post_attachments = post_data.get('attachments', [])
post_content_html = post_data.get('content', '')
self.logger(f"\n--- Processing Post {post_id} ('{post_title[:50]}...') (Thread: {threading.current_thread().name}) ---")
num_potential_files_in_post = len(post_attachments or []) + (1 if post_main_file_info and post_main_file_info.get('path') else 0)
post_is_candidate_by_title_char_match = False
char_filter_that_matched_title = None
if self.filter_character_list and \
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH):
for char_name in self.filter_character_list:
if is_title_match_for_character(post_title, char_name):
post_is_candidate_by_title_char_match = True
char_filter_that_matched_title = char_name
self.logger(f" Post title matches char filter '{char_name}' (Scope: {self.char_filter_scope}). Post is candidate.")
break
if self.filter_character_list and self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match:
self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.")
return 0, num_potential_files_in_post, []
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
post_title_lower = post_title.lower()
for skip_word in self.skip_words_list:
if skip_word.lower() in post_title_lower:
self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'. Scope: {self.skip_words_scope}")
return 0, num_potential_files_in_post, []
if not self.extract_links_only and self.manga_mode_active and self.filter_character_list and \
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and \
not post_is_candidate_by_title_char_match:
self.logger(f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title[:50]}' doesn't match filters.")
return 0, num_potential_files_in_post, []
if not isinstance(post_attachments, list):
self.logger(f"⚠️ Corrupt attachment data for post {post_id} (expected list, got {type(post_attachments)}). Skipping attachments.")
post_attachments = []
base_folder_names_for_post_content = []
if not self.extract_links_only and self.use_subfolders:
if post_is_candidate_by_title_char_match and char_filter_that_matched_title:
base_folder_names_for_post_content = [clean_folder_name(char_filter_that_matched_title)]
else:
derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords)
if derived_folders:
base_folder_names_for_post_content.extend(derived_folders)
else:
base_folder_names_for_post_content.append(extract_folder_name_from_title(post_title, self.unwanted_keywords))
if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]:
base_folder_names_for_post_content = [clean_folder_name(post_title if post_title else "untitled_creator_content")]
self.logger(f" Base folder name(s) for post content (if title matched char or generic): {', '.join(base_folder_names_for_post_content)}")
if not self.extract_links_only and self.use_subfolders and self.skip_words_list:
for folder_name_to_check in base_folder_names_for_post_content:
if not folder_name_to_check: continue
if any(skip_word.lower() in folder_name_to_check.lower() for skip_word in self.skip_words_list):
matched_skip = next((sw for sw in self.skip_words_list if sw.lower() in folder_name_to_check.lower()), "unknown_skip_word")
self.logger(f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check}' contains '{matched_skip}'.")
return 0, num_potential_files_in_post, []
if (self.show_external_links or self.extract_links_only) and post_content_html:
try:
unique_links_data = {}
for match in link_pattern.finditer(post_content_html):
link_url = match.group(1).strip()
link_inner_text = match.group(2)
if not any(ext in link_url.lower() for ext in ['.css', '.js', '.ico', '.xml', '.svg']) \
and not link_url.startswith('javascript:') \
and link_url not in unique_links_data:
clean_link_text = re.sub(r'<.*?>', '', link_inner_text)
clean_link_text = html.unescape(clean_link_text).strip()
display_text = clean_link_text if clean_link_text else "[Link]"
unique_links_data[link_url] = display_text
links_emitted_count = 0
scraped_platforms = {'kemono', 'coomer', 'patreon'}
for link_url, link_text in unique_links_data.items():
platform = get_link_platform(link_url)
if platform not in scraped_platforms:
if self.signals and hasattr(self.signals, 'external_link_signal'):
self.signals.external_link_signal.emit(post_title, link_text, link_url, platform)
links_emitted_count +=1
if links_emitted_count > 0: self.logger(f" 🔗 Found {links_emitted_count} potential external link(s) in post content.")
except Exception as e: self.logger(f"⚠️ Error parsing post content for links: {e}\n{traceback.format_exc(limit=2)}")
if self.extract_links_only:
self.logger(f" Extract Links Only mode: Finished processing post {post_id} for links.")
return 0, 0, []
all_files_from_post_api = []
api_file_domain = urlparse(self.api_url_input).netloc
if not api_file_domain or not any(d in api_file_domain.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']):
api_file_domain = "kemono.su" if "kemono" in self.service.lower() else "coomer.party"
if post_main_file_info and isinstance(post_main_file_info, dict) and post_main_file_info.get('path'):
file_path = post_main_file_info['path'].lstrip('/')
original_api_name = post_main_file_info.get('name') or os.path.basename(file_path)
if original_api_name:
all_files_from_post_api.append({
'url': f"https://{api_file_domain}{file_path}" if file_path.startswith('/') else f"https://{api_file_domain}/data/{file_path}",
'name': original_api_name,
'_original_name_for_log': original_api_name,
'_is_thumbnail': self.download_thumbnails and is_image(original_api_name)
})
else: self.logger(f" ⚠️ Skipping main file for post {post_id}: Missing name (Path: {file_path})")
for idx, att_info in enumerate(post_attachments):
if isinstance(att_info, dict) and att_info.get('path'):
att_path = att_info['path'].lstrip('/')
original_api_att_name = att_info.get('name') or os.path.basename(att_path)
if original_api_att_name:
all_files_from_post_api.append({
'url': f"https://{api_file_domain}{att_path}" if att_path.startswith('/') else f"https://{api_file_domain}/data/{att_path}",
'name': original_api_att_name,
'_original_name_for_log': original_api_att_name,
'_is_thumbnail': self.download_thumbnails and is_image(original_api_att_name)
})
else: self.logger(f" ⚠️ Skipping attachment {idx+1} for post {post_id}: Missing name (Path: {att_path})")
else: self.logger(f" ⚠️ Skipping invalid attachment {idx+1} for post {post_id}: {str(att_info)[:100]}")
if self.download_thumbnails:
all_files_from_post_api = [finfo for finfo in all_files_from_post_api if finfo['_is_thumbnail']]
if not all_files_from_post_api:
self.logger(f" -> No image thumbnails found for post {post_id} in thumbnail-only mode.")
return 0, 0, []
if not all_files_from_post_api:
self.logger(f" No files found to download for post {post_id}.")
return 0, 0, []
files_to_download_info_list = []
processed_original_filenames_in_this_post = set()
for file_info in all_files_from_post_api:
current_api_original_filename = file_info.get('_original_name_for_log')
if current_api_original_filename in processed_original_filenames_in_this_post:
self.logger(f" -> Skip Duplicate Original Name (within post {post_id}): '{current_api_original_filename}' already processed/listed for this post.")
total_skipped_this_post += 1
else:
files_to_download_info_list.append(file_info)
if current_api_original_filename:
processed_original_filenames_in_this_post.add(current_api_original_filename)
if not files_to_download_info_list:
self.logger(f" All files for post {post_id} were duplicate original names or skipped earlier.")
return 0, total_skipped_this_post, []
num_files_in_this_post_for_naming = len(files_to_download_info_list)
self.logger(f" Identified {num_files_in_this_post_for_naming} unique original file(s) for potential download from post {post_id}.")
with ThreadPoolExecutor(max_workers=self.num_file_threads, thread_name_prefix=f'P{post_id}File_') as file_pool:
futures_list = []
for file_idx, file_info_to_dl in enumerate(files_to_download_info_list):
if self.check_cancel(): break
current_api_original_filename = file_info_to_dl.get('_original_name_for_log')
file_is_candidate_by_char_filter_scope = False
char_filter_that_matched_file = None
if not self.filter_character_list:
file_is_candidate_by_char_filter_scope = True
elif self.char_filter_scope == CHAR_SCOPE_FILES:
for char_name in self.filter_character_list:
if is_filename_match_for_character(current_api_original_filename, char_name):
file_is_candidate_by_char_filter_scope = True
char_filter_that_matched_file = char_name
break
elif self.char_filter_scope == CHAR_SCOPE_TITLE:
if post_is_candidate_by_title_char_match:
file_is_candidate_by_char_filter_scope = True
elif self.char_filter_scope == CHAR_SCOPE_BOTH:
if post_is_candidate_by_title_char_match:
file_is_candidate_by_char_filter_scope = True
else:
for char_name in self.filter_character_list:
if is_filename_match_for_character(current_api_original_filename, char_name):
file_is_candidate_by_char_filter_scope = True
char_filter_that_matched_file = char_name
break
if not file_is_candidate_by_char_filter_scope:
self.logger(f" -> Skip File (Char Filter Scope '{self.char_filter_scope}'): '{current_api_original_filename}' no match.")
total_skipped_this_post += 1
continue
current_path_for_file = self.download_root
if self.use_subfolders:
char_title_subfolder_name = None
if self.target_post_id_from_initial_url and self.custom_folder_name:
char_title_subfolder_name = self.custom_folder_name
elif char_filter_that_matched_title:
char_title_subfolder_name = clean_folder_name(char_filter_that_matched_title)
elif char_filter_that_matched_file:
char_title_subfolder_name = clean_folder_name(char_filter_that_matched_file)
elif base_folder_names_for_post_content:
char_title_subfolder_name = base_folder_names_for_post_content[0]
if char_title_subfolder_name:
current_path_for_file = os.path.join(current_path_for_file, char_title_subfolder_name)
if self.use_post_subfolders:
cleaned_title_for_subfolder = clean_folder_name(post_title)
post_specific_subfolder_name = f"{post_id}_{cleaned_title_for_subfolder}" if cleaned_title_for_subfolder else f"{post_id}_untitled"
current_path_for_file = os.path.join(current_path_for_file, post_specific_subfolder_name)
target_folder_path_for_this_file = current_path_for_file
futures_list.append(file_pool.submit(
self._download_single_file,
file_info_to_dl,
target_folder_path_for_this_file,
headers,
post_id,
self.skip_current_file_flag,
post_title,
file_idx,
num_files_in_this_post_for_naming
))
for future in as_completed(futures_list):
if self.check_cancel():
for f_to_cancel in futures_list:
if not f_to_cancel.done():
f_to_cancel.cancel()
break
try:
dl_count, skip_count, actual_filename_saved, original_kept_flag = future.result()
total_downloaded_this_post += dl_count
total_skipped_this_post += skip_count
if original_kept_flag and dl_count > 0 and actual_filename_saved:
kept_original_filenames_for_log.append(actual_filename_saved)
except CancelledError:
self.logger(f" File download task for post {post_id} was cancelled.")
total_skipped_this_post += 1
except Exception as exc_f:
self.logger(f"❌ File download task for post {post_id} resulted in error: {exc_f}")
total_skipped_this_post += 1
if self.signals and hasattr(self.signals, 'file_progress_signal'):
self.signals.file_progress_signal.emit("", 0, 0)
if self.check_cancel(): self.logger(f" Post {post_id} processing interrupted/cancelled.");
else: self.logger(f" Post {post_id} Summary: Downloaded={total_downloaded_this_post}, Skipped Files={total_skipped_this_post}")
return total_downloaded_this_post, total_skipped_this_post, kept_original_filenames_for_log
class DownloadThread(QThread):
progress_signal = pyqtSignal(str)
add_character_prompt_signal = pyqtSignal(str)
file_download_status_signal = pyqtSignal(bool)
finished_signal = pyqtSignal(int, int, bool, list)
external_link_signal = pyqtSignal(str, str, str, str)
file_progress_signal = pyqtSignal(str, int, int)
def __init__(self, api_url_input, output_dir, known_names_copy,
cancellation_event,
filter_character_list=None,
filter_mode='all', skip_zip=True, skip_rar=True,
use_subfolders=True, use_post_subfolders=False, custom_folder_name=None, compress_images=False,
download_thumbnails=False, service=None, user_id=None,
downloaded_files=None, downloaded_file_hashes=None, downloaded_files_lock=None, downloaded_file_hashes_lock=None,
skip_words_list=None,
skip_words_scope=SKIP_SCOPE_FILES,
show_external_links=False,
extract_links_only=False,
num_file_threads_for_worker=1,
skip_current_file_flag=None,
start_page=None, end_page=None,
target_post_id_from_initial_url=None,
manga_mode_active=False,
unwanted_keywords=None,
manga_filename_style=STYLE_POST_TITLE,
char_filter_scope=CHAR_SCOPE_FILES
):
super().__init__()
self.api_url_input = api_url_input
self.output_dir = output_dir
self.known_names = list(known_names_copy)
self.cancellation_event = cancellation_event
self.skip_current_file_flag = skip_current_file_flag
self.initial_target_post_id = target_post_id_from_initial_url
self.filter_character_list = filter_character_list if filter_character_list else []
self.filter_mode = filter_mode
self.skip_zip = skip_zip
self.skip_rar = skip_rar
self.use_subfolders = use_subfolders
self.use_post_subfolders = use_post_subfolders
self.custom_folder_name = custom_folder_name
self.compress_images = compress_images
self.download_thumbnails = download_thumbnails
self.service = service
self.user_id = user_id
self.skip_words_list = skip_words_list if skip_words_list is not None else []
self.skip_words_scope = skip_words_scope
self.downloaded_files = downloaded_files
self.downloaded_files_lock = downloaded_files_lock
self.downloaded_file_hashes = downloaded_file_hashes
self.downloaded_file_hashes_lock = downloaded_file_hashes_lock
self._add_character_response = None
self.prompt_mutex = QMutex()
self.show_external_links = show_external_links
self.extract_links_only = extract_links_only
self.num_file_threads_for_worker = num_file_threads_for_worker
self.start_page = start_page
self.end_page = end_page
self.manga_mode_active = manga_mode_active
self.unwanted_keywords = unwanted_keywords if unwanted_keywords is not None else \
{'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'}
self.manga_filename_style = manga_filename_style
self.char_filter_scope = char_filter_scope
if self.compress_images and Image is None:
self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
self.compress_images = False
def logger(self, message):
self.progress_signal.emit(str(message))
def isInterruptionRequested(self):
return self.cancellation_event.is_set() or super().isInterruptionRequested()
def skip_file(self):
if self.isRunning() and self.skip_current_file_flag:
self.logger("⏭️ Skip requested for current file (single-thread mode).")
self.skip_current_file_flag.set()
else: self.logger(" Skip file: No download active or skip flag not available for current context.")
def run(self):
grand_total_downloaded_files = 0
grand_total_skipped_files = 0
grand_list_of_kept_original_filenames = []
was_process_cancelled = False
worker_signals_obj = PostProcessorSignals()
try:
worker_signals_obj.progress_signal.connect(self.progress_signal)
worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal)
worker_signals_obj.file_progress_signal.connect(self.file_progress_signal)
worker_signals_obj.external_link_signal.connect(self.external_link_signal)
self.logger(" Starting post fetch (single-threaded download process)...")
post_generator = download_from_api(
self.api_url_input,
logger=self.logger,
start_page=self.start_page,
end_page=self.end_page,
manga_mode=self.manga_mode_active,
cancellation_event=self.cancellation_event
)
for posts_batch_data in post_generator:
if self.isInterruptionRequested(): was_process_cancelled = True; break
for individual_post_data in posts_batch_data:
if self.isInterruptionRequested(): was_process_cancelled = True; break
post_processing_worker = PostProcessorWorker(
post_data=individual_post_data,
download_root=self.output_dir,
known_names=self.known_names,
filter_character_list=self.filter_character_list,
unwanted_keywords=self.unwanted_keywords,
filter_mode=self.filter_mode,
skip_zip=self.skip_zip, skip_rar=self.skip_rar,
use_subfolders=self.use_subfolders, use_post_subfolders=self.use_post_subfolders,
target_post_id_from_initial_url=self.initial_target_post_id,
custom_folder_name=self.custom_folder_name,
compress_images=self.compress_images, download_thumbnails=self.download_thumbnails,
service=self.service, user_id=self.user_id,
api_url_input=self.api_url_input,
cancellation_event=self.cancellation_event,
signals=worker_signals_obj,
downloaded_files=self.downloaded_files,
downloaded_file_hashes=self.downloaded_file_hashes,
downloaded_files_lock=self.downloaded_files_lock,
downloaded_file_hashes_lock=self.downloaded_file_hashes_lock,
skip_words_list=self.skip_words_list,
skip_words_scope=self.skip_words_scope,
show_external_links=self.show_external_links,
extract_links_only=self.extract_links_only,
num_file_threads=self.num_file_threads_for_worker,
skip_current_file_flag=self.skip_current_file_flag,
manga_mode_active=self.manga_mode_active,
manga_filename_style=self.manga_filename_style,
char_filter_scope=self.char_filter_scope
)
try:
dl_count, skip_count, kept_originals_this_post = post_processing_worker.process()
grand_total_downloaded_files += dl_count
grand_total_skipped_files += skip_count
if kept_originals_this_post:
grand_list_of_kept_original_filenames.extend(kept_originals_this_post)
except Exception as proc_err:
post_id_for_err = individual_post_data.get('id', 'N/A')
self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}")
traceback.print_exc()
num_potential_files_est = len(individual_post_data.get('attachments', [])) + \
(1 if individual_post_data.get('file') else 0)
grand_total_skipped_files += num_potential_files_est
if self.skip_current_file_flag and self.skip_current_file_flag.is_set():
self.skip_current_file_flag.clear()
self.logger(" Skip current file flag was processed and cleared by DownloadThread.")
self.msleep(10)
if was_process_cancelled: break
if not was_process_cancelled and not self.isInterruptionRequested():
self.logger("✅ All posts processed or end of content reached by DownloadThread.")
except Exception as main_thread_err:
self.logger(f"\n❌ Critical error within DownloadThread run loop: {main_thread_err}")
traceback.print_exc()
if not self.isInterruptionRequested(): was_process_cancelled = False
finally:
try:
if worker_signals_obj:
worker_signals_obj.progress_signal.disconnect(self.progress_signal)
worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal)
worker_signals_obj.external_link_signal.disconnect(self.external_link_signal)
worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal)
except (TypeError, RuntimeError) as e:
self.logger(f" Note during DownloadThread signal disconnection: {e}")
self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, self.isInterruptionRequested(), grand_list_of_kept_original_filenames)
def receive_add_character_result(self, result):
with QMutexLocker(self.prompt_mutex):
self._add_character_response = result
self.logger(f" (DownloadThread) Received character prompt response: {'Yes (added/confirmed)' if result else 'No (declined/failed)'}")