From 78357df07fe923c871b1e30f8c5d4baaa978e3ba Mon Sep 17 00:00:00 2001
From: Yuvi9587 <114073886+Yuvi9587@users.noreply.github.com>
Date: Thu, 29 May 2025 08:50:01 +0100
Subject: [PATCH] Commit
---
downloader_utils.py | 323 ++-----------------------------------------
main.py | 329 ++++++++++++++++++++++++++------------------
2 files changed, 208 insertions(+), 444 deletions(-)
diff --git a/downloader_utils.py b/downloader_utils.py
index 7ffd68f..7552d08 100644
--- a/downloader_utils.py
+++ b/downloader_utils.py
@@ -9,7 +9,6 @@ import http.client
import traceback
from concurrent.futures import ThreadPoolExecutor, Future, CancelledError, as_completed
import html
-
from PyQt5.QtCore import QObject, pyqtSignal, QThread, QMutex, QMutexLocker
from urllib.parse import urlparse
try:
@@ -17,7 +16,6 @@ try:
except ImportError:
print("ERROR: Pillow library not found. Please install it: pip install Pillow")
Image = None
-
try:
from multipart_downloader import download_file_in_parts
MULTIPART_DOWNLOADER_AVAILABLE = True
@@ -25,37 +23,27 @@ except ImportError as e:
print(f"Warning: multipart_downloader.py not found or import error: {e}. Multi-part downloads will be disabled.")
MULTIPART_DOWNLOADER_AVAILABLE = False
def download_file_in_parts(*args, **kwargs): return False, 0, None, None # Dummy function
-
from io import BytesIO
-
STYLE_POST_TITLE = "post_title"
STYLE_ORIGINAL_NAME = "original_name"
STYLE_DATE_BASED = "date_based" # For manga date-based sequential naming
MANGA_DATE_PREFIX_DEFAULT = "" # Default for the new prefix
STYLE_POST_TITLE_GLOBAL_NUMBERING = "post_title_global_numbering" # For manga post title + global counter
-
SKIP_SCOPE_FILES = "files"
SKIP_SCOPE_POSTS = "posts"
SKIP_SCOPE_BOTH = "both"
-
CHAR_SCOPE_TITLE = "title"
CHAR_SCOPE_FILES = "files"
CHAR_SCOPE_BOTH = "both"
CHAR_SCOPE_COMMENTS = "comments"
-
FILE_DOWNLOAD_STATUS_SUCCESS = "success"
FILE_DOWNLOAD_STATUS_SKIPPED = "skipped"
FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER = "failed_retry_later"
-
fastapi_app = None
KNOWN_NAMES = [] # This will now store dicts: {'name': str, 'is_group': bool, 'aliases': list[str]}
-
MIN_SIZE_FOR_MULTIPART_DOWNLOAD = 10 * 1024 * 1024 # 10 MB - Stays the same
MAX_PARTS_FOR_MULTIPART_DOWNLOAD = 15 # Max concurrent connections for a single file
-# Max length for a single filename or folder name component to ensure cross-OS compatibility
-# Windows MAX_PATH is 260 for the full path. Individual components are usually shorter.
MAX_FILENAME_COMPONENT_LENGTH = 150
-
IMAGE_EXTENSIONS = {
'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.tif', '.webp',
'.heic', '.heif', '.svg', '.ico', '.jfif', '.pjpeg', '.pjp', '.avif'
@@ -71,7 +59,13 @@ AUDIO_EXTENSIONS = {
'.mp3', '.wav', '.aac', '.flac', '.ogg', '.wma', '.m4a', '.opus',
'.aiff', '.ape', '.mid', '.midi'
}
-
+FOLDER_NAME_STOP_WORDS = {
+ "a", "alone", "am", "an", "and", "at", "be", "blues", "but", "by", "com",
+ "for", "grown", "hard", "he", "her", "his", "hitting", "i", "im", "in", "is", "it", "its",
+ "me", "much", "my", "net", "not", "of", "on", "or", "org", "our", "please",
+ "right", "s", "she", "so", "technically", "tell", "the", "their", "they", "this",
+ "to", "ve", "was", "we", "well", "were", "with", "www", "year", "you", "your",
+}
def parse_cookie_string(cookie_string):
"""Parses a 'name=value; name2=value2' cookie string into a dict."""
cookies = {}
@@ -84,7 +78,6 @@ def parse_cookie_string(cookie_string):
if name: # Ensure name is not empty
cookies[name] = value
return cookies if cookies else None
-
def load_cookies_from_netscape_file(filepath, logger_func):
"""Loads cookies from a Netscape-formatted cookies.txt file."""
cookies = {}
@@ -108,86 +101,62 @@ def load_cookies_from_netscape_file(filepath, logger_func):
except Exception as e:
logger_func(f" đĒ Error parsing cookie file '{os.path.basename(filepath)}': {e}")
return None
-
def is_title_match_for_character(post_title, character_name_filter):
if not post_title or not character_name_filter:
return False
safe_filter = str(character_name_filter).strip()
if not safe_filter:
return False
-
pattern = r"(?i)\b" + re.escape(safe_filter) + r"\b"
match_result = bool(re.search(pattern, post_title))
return match_result
-
def is_filename_match_for_character(filename, character_name_filter):
if not filename or not character_name_filter:
return False
-
safe_filter = str(character_name_filter).strip().lower()
if not safe_filter:
return False
-
match_result = safe_filter in filename.lower()
return match_result
-
-
def clean_folder_name(name):
if not isinstance(name, str): name = str(name)
cleaned = re.sub(r'[^\w\s\-\_\.\(\)]', '', name)
- cleaned = cleaned.strip()
- cleaned = re.sub(r'\s+', ' ', cleaned)
-
+ cleaned = cleaned.strip() # Initial strip
+ cleaned = re.sub(r'\s+', ' ', cleaned) # Condense multiple spaces
+ if cleaned: # Only process if not empty
+ words = cleaned.split(' ')
+ filtered_words = [word for word in words if word.lower() not in FOLDER_NAME_STOP_WORDS and word]
+ cleaned = ' '.join(filtered_words)
+ cleaned = cleaned.strip() # Strip again in case stop words were at ends
if not cleaned: # If empty after initial cleaning
return "untitled_folder"
-
- # Truncate if too long
if len(cleaned) > MAX_FILENAME_COMPONENT_LENGTH:
cleaned = cleaned[:MAX_FILENAME_COMPONENT_LENGTH]
- # After truncation, it's possible a new trailing space/dot is at the end
- # or an existing one remains. So, strip them using the loop below.
-
- # Strip trailing dots/spaces (original logic, now applied to potentially truncated name)
temp_name = cleaned
while len(temp_name) > 0 and (temp_name.endswith('.') or temp_name.endswith(' ')):
temp_name = temp_name[:-1]
-
return temp_name if temp_name else "untitled_folder"
-
-
def clean_filename(name):
if not isinstance(name, str): name = str(name)
cleaned = re.sub(r'[^\w\s\-\_\.\(\)]', '', name)
cleaned = cleaned.strip() # Remove leading/trailing spaces first
cleaned = re.sub(r'\s+', ' ', cleaned) # Replace multiple internal spaces with a single space
-
if not cleaned: return "untitled_file"
-
base_name, ext = os.path.splitext(cleaned)
-
- # Calculate max length for base_name, reserving space for the extension
max_base_len = MAX_FILENAME_COMPONENT_LENGTH - len(ext)
-
if len(base_name) > max_base_len:
if max_base_len > 0: # If there's space for at least some of the base name
base_name = base_name[:max_base_len]
else: # No space for base name (extension is too long or fills the entire allowed space)
- # In this case, we have to truncate the original 'cleaned' string,
- # which might cut into the extension, but it's necessary to meet the length.
return cleaned[:MAX_FILENAME_COMPONENT_LENGTH] if cleaned else "untitled_file"
-
final_name = base_name + ext
- # Ensure the final reconstructed name isn't empty (e.g. if base_name became empty and ext was also empty)
return final_name if final_name else "untitled_file"
-
-
def strip_html_tags(html_text):
if not html_text: return ""
text = html.unescape(html_text)
clean_pattern = re.compile('<.*?>')
cleaned_text = re.sub(clean_pattern, '', text)
return cleaned_text.strip()
-
def extract_folder_name_from_title(title, unwanted_keywords):
if not title: return 'Uncategorized'
title_lower = title.lower()
@@ -198,8 +167,6 @@ def extract_folder_name_from_title(title, unwanted_keywords):
return clean_token
cleaned_full_title = clean_folder_name(title)
return cleaned_full_title if cleaned_full_title else 'Uncategorized'
-
-
def match_folders_from_title(title, names_to_match, unwanted_keywords):
"""
Matches folder names from a title based on a list of known name objects.
@@ -210,13 +177,11 @@ def match_folders_from_title(title, names_to_match, unwanted_keywords):
title_lower = title.lower()
matched_cleaned_names = set()
sorted_name_objects = sorted(names_to_match, key=lambda x: len(x.get("name", "")), reverse=True)
-
for name_obj in sorted_name_objects:
primary_folder_name = name_obj.get("name")
aliases = name_obj.get("aliases", [])
if not primary_folder_name or not aliases:
continue
-
for alias in aliases:
alias_lower = alias.lower()
if not alias_lower: continue
@@ -227,46 +192,31 @@ def match_folders_from_title(title, names_to_match, unwanted_keywords):
matched_cleaned_names.add(cleaned_primary_name)
break # Found a match for this primary name via one of its aliases
return sorted(list(matched_cleaned_names))
-
-
def is_image(filename):
if not filename: return False
_, ext = os.path.splitext(filename)
return ext.lower() in IMAGE_EXTENSIONS
-
-
def is_video(filename):
if not filename: return False
_, ext = os.path.splitext(filename)
return ext.lower() in VIDEO_EXTENSIONS
-
-
def is_zip(filename):
if not filename: return False
return filename.lower().endswith('.zip')
-
-
def is_rar(filename):
if not filename: return False
return filename.lower().endswith('.rar')
-
def is_archive(filename):
if not filename: return False
_, ext = os.path.splitext(filename)
return ext.lower() in ARCHIVE_EXTENSIONS
-
def is_audio(filename):
if not filename: return False
_, ext = os.path.splitext(filename)
return ext.lower() in AUDIO_EXTENSIONS
-
-
-
def is_post_url(url):
if not isinstance(url, str): return False
return '/post/' in urlparse(url).path
-
-
def extract_post_info(url_string):
service, user_id, post_id = None, None, None
if not isinstance(url_string, str) or not url_string.strip(): return None, None, None
@@ -275,18 +225,14 @@ def extract_post_info(url_string):
domain = parsed_url.netloc.lower()
is_kemono = any(d in domain for d in ['kemono.su', 'kemono.party'])
is_coomer = any(d in domain for d in ['coomer.su', 'coomer.party'])
-
if not (is_kemono or is_coomer): return None, None, None
-
path_parts = [part for part in parsed_url.path.strip('/').split('/') if part]
-
if len(path_parts) >= 3 and path_parts[1].lower() == 'user':
service = path_parts[0]
user_id = path_parts[2]
if len(path_parts) >= 5 and path_parts[3].lower() == 'post':
post_id = path_parts[4]
return service, user_id, post_id
-
if len(path_parts) >= 5 and path_parts[0].lower() == 'api' and \
path_parts[1].lower() == 'v1' and path_parts[3].lower() == 'user':
service = path_parts[2]
@@ -294,18 +240,13 @@ def extract_post_info(url_string):
if len(path_parts) >= 7 and path_parts[5].lower() == 'post':
post_id = path_parts[6]
return service, user_id, post_id
-
except Exception as e:
print(f"Debug: Exception during extract_post_info for URL '{url_string}': {e}")
return None, None, None
-
-
def prepare_cookies_for_request(use_cookie_flag, cookie_text_input, selected_cookie_file_path, app_base_dir, logger_func):
"""Prepares a cookie dictionary from text input or cookies.txt file."""
if not use_cookie_flag:
return None
-
- # Attempt 1: Selected cookie file
if selected_cookie_file_path:
logger_func(f" đĒ Attempting to load cookies from selected file: '{os.path.basename(selected_cookie_file_path)}'...")
cookies = load_cookies_from_netscape_file(selected_cookie_file_path, logger_func)
@@ -313,13 +254,7 @@ def prepare_cookies_for_request(use_cookie_flag, cookie_text_input, selected_coo
return cookies
else:
logger_func(f" â ī¸ Failed to load cookies from selected file: '{os.path.basename(selected_cookie_file_path)}'. Trying other methods.")
- # Fall through if selected file is invalid or not found
-
- # Attempt 2: Default cookies.txt in app directory
- # This is tried if no specific file was selected OR if the selected file was provided but failed to load.
if app_base_dir: # Only proceed if app_base_dir is available
- # Avoid re-logging "not found" or "failed" if a selected_cookie_file_path was already attempted and failed.
- # Only log the attempt for default if no selected_cookie_file_path was given.
default_cookies_path = os.path.join(app_base_dir, "cookies.txt")
if os.path.exists(default_cookies_path): # Only attempt if it exists
if not selected_cookie_file_path: # Log attempt only if we didn't just try a selected file
@@ -329,9 +264,6 @@ def prepare_cookies_for_request(use_cookie_flag, cookie_text_input, selected_coo
return cookies
elif not selected_cookie_file_path: # Log failure only if we tried default as primary file method
logger_func(f" â ī¸ Failed to load cookies from default file: '{os.path.basename(default_cookies_path)}'. Trying text input.")
- # Fall through if default file is invalid or not found
-
- # Attempt 3: Cookies from UI text input
if cookie_text_input:
logger_func(" đĒ Using cookies from UI text input (as file methods failed or were not applicable).")
cookies = parse_cookie_string(cookie_text_input)
@@ -339,15 +271,12 @@ def prepare_cookies_for_request(use_cookie_flag, cookie_text_input, selected_coo
return cookies
else:
logger_func(" â ī¸ UI cookie text input was provided but was empty or invalid.")
-
logger_func(" đĒ Cookie usage enabled, but no valid cookies found from any source (selected file, default file, or text input).")
return None
-
def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
if cancellation_event and cancellation_event.is_set(): # type: ignore
logger(" Fetch cancelled before request.")
raise RuntimeError("Fetch operation cancelled by user.")
-
if pause_event and pause_event.is_set(): # type: ignore
logger(" Post fetching paused...")
while pause_event.is_set():
@@ -356,7 +285,6 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
raise RuntimeError("Fetch operation cancelled by user.")
time.sleep(0.5)
logger(" Post fetching resumed.")
-
paginated_url = f'{api_url_base}?o={offset}'
logger(f" Fetching: {paginated_url} (Page approx. {offset // 50 + 1})")
try:
@@ -380,12 +308,10 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
raise RuntimeError(f"Error decoding JSON from offset {offset} ({paginated_url}): {e}. Response text: {response.text[:200]}")
except Exception as e:
raise RuntimeError(f"Unexpected error fetching offset {offset} ({paginated_url}): {e}")
-
def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
if cancellation_event and cancellation_event.is_set():
logger(" Comment fetch cancelled before request.")
raise RuntimeError("Comment fetch operation cancelled by user.")
-
if pause_event and pause_event.is_set(): # type: ignore
logger(" Comment fetching paused...")
while pause_event.is_set():
@@ -394,7 +320,6 @@ def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger,
raise RuntimeError("Comment fetch operation cancelled by user.")
time.sleep(0.5)
logger(" Comment fetching resumed.")
-
comments_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}/comments"
logger(f" Fetching comments: {comments_api_url}")
try:
@@ -418,29 +343,21 @@ def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger,
raise RuntimeError(f"Error decoding JSON from comments API for post {post_id} ({comments_api_url}): {e}. Response text: {response.text[:200]}")
except Exception as e:
raise RuntimeError(f"Unexpected error fetching comments for post {post_id} ({comments_api_url}): {e}")
-
def download_from_api(api_url_input, logger=print, start_page=None, end_page=None, manga_mode=False,
cancellation_event=None, pause_event=None, use_cookie=False, cookie_text="", selected_cookie_file=None, app_base_dir=None):
headers = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}
service, user_id, target_post_id = extract_post_info(api_url_input)
-
if cancellation_event and cancellation_event.is_set():
logger(" Download_from_api cancelled at start.")
return
-
- # --- Moved Up: Parse api_domain and prepare cookies early ---
parsed_input_url_for_domain = urlparse(api_url_input)
api_domain = parsed_input_url_for_domain.netloc
if not any(d in api_domain.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']):
logger(f"â ī¸ Unrecognized domain '{api_domain}' from input URL. Defaulting to kemono.su for API calls.")
api_domain = "kemono.su" # Default domain if input is unusual
-
cookies_for_api = None
if use_cookie and app_base_dir: # app_base_dir is needed for cookies.txt path
cookies_for_api = prepare_cookies_for_request(use_cookie, cookie_text, selected_cookie_file, app_base_dir, logger)
- # --- End Moved Up ---
-
- # --- New: Attempt direct fetch for specific post URL first ---
if target_post_id:
direct_post_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{target_post_id}"
logger(f" Attempting direct fetch for target post: {direct_post_api_url}")
@@ -448,22 +365,15 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non
direct_response = requests.get(direct_post_api_url, headers=headers, timeout=(10, 30), cookies=cookies_for_api)
direct_response.raise_for_status()
direct_post_data = direct_response.json()
-
- # The direct endpoint might return a single post object or a list containing one post.
- # Check if it's a list and take the first item, or use the object directly.
if isinstance(direct_post_data, list) and direct_post_data:
direct_post_data = direct_post_data[0]
-
- # Check if the data is a dict and contains a 'post' key (new format)
if isinstance(direct_post_data, dict) and 'post' in direct_post_data and isinstance(direct_post_data['post'], dict):
direct_post_data = direct_post_data['post'] # Extract the nested post data
-
if isinstance(direct_post_data, dict) and direct_post_data.get('id') == target_post_id: # Now check the extracted/direct dict
logger(f" â
Direct fetch successful for post {target_post_id}.")
yield [direct_post_data] # Yield the single post data as a list
return # Exit the generator, no need to paginate
else:
- # Log more details about the unexpected response
response_type = type(direct_post_data).__name__
response_snippet = str(direct_post_data)[:200] # Log first 200 chars
logger(f" â ī¸ Direct fetch for post {target_post_id} returned unexpected data (Type: {response_type}, Snippet: '{response_snippet}'). Falling back to pagination.")
@@ -471,40 +381,25 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non
logger(f" â ī¸ Direct fetch failed for post {target_post_id}: {e}. Falling back to pagination.")
except Exception as e:
logger(f" â ī¸ Unexpected error during direct fetch for post {target_post_id}: {e}. Falling back to pagination.")
- # --- End New: Attempt direct fetch ---
-
if not service or not user_id:
logger(f"â Invalid URL or could not extract service/user: {api_url_input}")
return
-
if target_post_id and (start_page or end_page):
logger("â ī¸ Page range (start/end page) is ignored when a specific post URL is provided (searching all pages for the post).")
- # start_page = end_page = None # Keep these potentially for the fallback pagination
-
is_creator_feed_for_manga = manga_mode and not target_post_id
-
- # api_domain is already parsed and validated above
-
api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}"
-
-
page_size = 50
-
if is_creator_feed_for_manga:
logger(" Manga Mode: Fetching posts to sort by date (oldest processed first)...")
all_posts_for_manga_mode = []
-
current_offset_manga = 0
- # Determine starting page and offset for manga mode
if start_page and start_page > 1:
current_offset_manga = (start_page - 1) * page_size
logger(f" Manga Mode: Starting fetch from page {start_page} (offset {current_offset_manga}).")
elif start_page: # start_page is 1
logger(f" Manga Mode: Starting fetch from page 1 (offset 0).")
-
if end_page:
logger(f" Manga Mode: Will fetch up to page {end_page}.")
-
while True:
if pause_event and pause_event.is_set():
logger(" Manga mode post fetching paused...") # type: ignore
@@ -517,7 +412,6 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non
if cancellation_event and cancellation_event.is_set():
logger(" Manga mode post fetching cancelled.")
break
-
current_page_num_manga = (current_offset_manga // page_size) + 1
if end_page and current_page_num_manga > end_page:
logger(f" Manga Mode: Reached specified end page ({end_page}). Stopping post fetch.")
@@ -547,17 +441,13 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non
logger(f"â Unexpected error during manga mode fetch: {e}")
traceback.print_exc()
break
-
if cancellation_event and cancellation_event.is_set(): return
-
if all_posts_for_manga_mode:
logger(f" Manga Mode: Fetched {len(all_posts_for_manga_mode)} total posts. Sorting by publication date (oldest first)...")
- # ... (rest of sorting and yielding logic for manga mode remains the same) ...
def sort_key_tuple(post):
published_date_str = post.get('published')
added_date_str = post.get('added')
post_id_str = post.get('id', "0")
-
primary_sort_val = "0000-00-00T00:00:00" # Default for missing dates (effectively oldest)
if published_date_str:
primary_sort_val = published_date_str
@@ -566,33 +456,26 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non
primary_sort_val = added_date_str
else:
logger(f" â ī¸ Post ID {post_id_str} missing both 'published' and 'added' dates. Placing at start of sort (using default earliest date).")
-
secondary_sort_val = 0 # Default for non-integer IDs
try:
secondary_sort_val = int(post_id_str)
except ValueError:
logger(f" â ī¸ Post ID '{post_id_str}' is not a valid integer for secondary sorting, using 0.")
-
return (primary_sort_val, secondary_sort_val)
-
all_posts_for_manga_mode.sort(key=sort_key_tuple) # Sorts ascending by (date, id)
-
for i in range(0, len(all_posts_for_manga_mode), page_size):
if cancellation_event and cancellation_event.is_set():
logger(" Manga mode post yielding cancelled.")
break
yield all_posts_for_manga_mode[i:i + page_size]
return
-
current_page_num = 1
current_offset = 0
processed_target_post_flag = False
-
if start_page and start_page > 1 and not target_post_id: # Only apply start_page if not targeting a specific post directly
current_offset = (start_page - 1) * page_size
current_page_num = start_page
logger(f" Starting from page {current_page_num} (calculated offset {current_offset}).")
-
while True:
if pause_event and pause_event.is_set():
logger(" Post fetching loop paused...") # type: ignore
@@ -605,14 +488,11 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non
if cancellation_event and cancellation_event.is_set():
logger(" Post fetching loop cancelled.")
break
-
if target_post_id and processed_target_post_flag:
break
-
if not target_post_id and end_page and current_page_num > end_page:
logger(f"â
Reached specified end page ({end_page}) for creator feed. Stopping.")
break
-
try:
posts_batch = fetch_posts_paginated(api_base_url, headers, current_offset, logger, cancellation_event, pause_event, cookies_dict=cookies_for_api)
if not isinstance(posts_batch, list):
@@ -628,7 +508,6 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non
logger(f"â Unexpected error fetching page {current_page_num} (offset {current_offset}): {e}")
traceback.print_exc()
break
-
if not posts_batch:
if target_post_id and not processed_target_post_flag: # Only log this if we were searching for a specific post
logger(f"â Target post {target_post_id} not found after checking all available pages (API returned no more posts at offset {current_offset}).")
@@ -638,7 +517,6 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non
else:
logger(f"â
Reached end of posts (no more content from API at offset {current_offset}).")
break
-
if target_post_id and not processed_target_post_flag:
matching_post = next((p for p in posts_batch if str(p.get('id')) == str(target_post_id)), None)
if matching_post:
@@ -647,18 +525,13 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non
processed_target_post_flag = True
elif not target_post_id:
yield posts_batch
-
if processed_target_post_flag:
break
-
current_offset += page_size # Increment by page_size for the next API call's 'o' parameter
current_page_num += 1
time.sleep(0.6)
-
if target_post_id and not processed_target_post_flag and not (cancellation_event and cancellation_event.is_set()):
logger(f"â Target post {target_post_id} could not be found after checking all relevant pages (final check after loop).")
-
-
def get_link_platform(url):
try:
domain = urlparse(url).netloc.lower()
@@ -672,7 +545,6 @@ def get_link_platform(url):
if 'pixiv.net' in domain: return 'pixiv'
if 'kemono.su' in domain or 'kemono.party' in domain: return 'kemono'
if 'coomer.su' in domain or 'coomer.party' in domain: return 'coomer'
-
parts = domain.split('.')
if len(parts) >= 2:
if parts[-2] not in ['com', 'org', 'net', 'gov', 'edu', 'co'] or len(parts) == 2:
@@ -683,16 +555,12 @@ def get_link_platform(url):
return domain
return 'external'
except Exception: return 'unknown'
-
-
class PostProcessorSignals(QObject):
progress_signal = pyqtSignal(str)
file_download_status_signal = pyqtSignal(bool)
external_link_signal = pyqtSignal(str, str, str, str)
file_progress_signal = pyqtSignal(str, object)
missed_character_post_signal = pyqtSignal(str, str) # New: post_title, reason
-
-
class PostProcessorWorker:
def __init__(self, post_data, download_root, known_names,
filter_character_list, emitter, # Changed signals to emitter
@@ -745,18 +613,15 @@ class PostProcessorWorker:
if not self.emitter:
raise ValueError("PostProcessorWorker requires an emitter (signals object or queue).")
self.skip_current_file_flag = skip_current_file_flag
-
self.downloaded_files = downloaded_files if downloaded_files is not None else set()
self.downloaded_file_hashes = downloaded_file_hashes if downloaded_file_hashes is not None else set()
self.downloaded_files_lock = downloaded_files_lock if downloaded_files_lock is not None else threading.Lock()
self.downloaded_file_hashes_lock = downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading.Lock()
-
self.skip_words_list = skip_words_list if skip_words_list is not None else []
self.skip_words_scope = skip_words_scope
self.show_external_links = show_external_links
self.extract_links_only = extract_links_only
self.num_file_threads = num_file_threads
-
self.manga_mode_active = manga_mode_active
self.manga_filename_style = manga_filename_style
self.char_filter_scope = char_filter_scope
@@ -771,12 +636,9 @@ class PostProcessorWorker:
self.use_cookie = use_cookie # Store cookie setting
self.override_output_dir = override_output_dir # Store the override directory
self.scan_content_for_images = scan_content_for_images # Store new flag
-
if self.compress_images and Image is None:
- # type: ignore
self.logger("â ī¸ Image compression disabled: Pillow library not found.")
self.compress_images = False
-
def _emit_signal(self, signal_type_str, *payload_args):
"""Helper to emit signal either directly or via queue."""
if isinstance(self.emitter, queue.Queue):
@@ -786,13 +648,10 @@ class PostProcessorWorker:
signal_attr.emit(*payload_args)
else:
print(f"(Worker Log - Unrecognized Emitter for {signal_type_str}): {payload_args[0] if payload_args else ''}")
-
def logger(self, message):
self._emit_signal('progress', message)
-
def check_cancel(self):
return self.cancellation_event.is_set()
-
def _check_pause(self, context_message="Operation"):
if self.pause_event and self.pause_event.is_set(): # type: ignore
self.logger(f" {context_message} paused...")
@@ -803,19 +662,16 @@ class PostProcessorWorker:
time.sleep(0.5)
if not self.check_cancel(): self.logger(f" {context_message} resumed.")
return False # Not cancelled during pause
-
def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event, # skip_event is threading.Event
post_title="", file_index_in_post=0, num_files_in_this_post=1,
manga_date_file_counter_ref=None): # Added manga_date_file_counter_ref
was_original_name_kept_flag = False
manga_global_file_counter_ref = None # Placeholder, will be passed from process()
final_filename_saved_for_return = ""
-
def _get_current_character_filters(self):
if self.dynamic_filter_holder:
return self.dynamic_filter_holder.get_filters()
return self.filter_character_list_objects_initial
-
def _download_single_file(self, file_info, target_folder_path, headers, original_post_id_for_log, skip_event,
post_title="", file_index_in_post=0, num_files_in_this_post=1, # Added manga_date_file_counter_ref
manga_date_file_counter_ref=None,
@@ -824,17 +680,14 @@ class PostProcessorWorker:
was_original_name_kept_flag = False
final_filename_saved_for_return = ""
retry_later_details = None # For storing info if retryable failure
-
if self._check_pause(f"File download prep for '{file_info.get('name', 'unknown file')}'"): return 0, 1, "", False
if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1, "", False
-
file_url = file_info.get('url')
cookies_to_use_for_file = None
if self.use_cookie: # This flag comes from the checkbox
cookies_to_use_for_file = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger)
api_original_filename = file_info.get('_original_name_for_log', file_info.get('name'))
filename_to_save_in_main_path = ""
-
if forced_filename_override:
filename_to_save_in_main_path = forced_filename_override
self.logger(f" Retrying with forced filename: '{filename_to_save_in_main_path}'")
@@ -845,21 +698,17 @@ class PostProcessorWorker:
if skip_word.lower() in filename_to_check_for_skip_words:
self.logger(f" -> Skip File (Keyword in Original Name '{skip_word}'): '{api_original_filename}'. Scope: {self.skip_words_scope}")
return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
-
original_filename_cleaned_base, original_ext = os.path.splitext(clean_filename(api_original_filename))
if not original_ext.startswith('.'): original_ext = '.' + original_ext if original_ext else ''
-
if self.manga_mode_active: # Note: duplicate_file_mode is overridden to "Delete" in main.py if manga_mode is on
if self.manga_filename_style == STYLE_ORIGINAL_NAME:
filename_to_save_in_main_path = clean_filename(api_original_filename)
- # Apply prefix if provided for Original Name style
if self.manga_date_prefix and self.manga_date_prefix.strip():
cleaned_prefix = clean_filename(self.manga_date_prefix.strip())
if cleaned_prefix:
filename_to_save_in_main_path = f"{cleaned_prefix} {filename_to_save_in_main_path}"
else:
self.logger(f"â ī¸ Manga Original Name Mode: Provided prefix '{self.manga_date_prefix}' was empty after cleaning. Using original name only.")
-
was_original_name_kept_flag = True
elif self.manga_filename_style == STYLE_POST_TITLE:
if post_title and post_title.strip():
@@ -877,14 +726,12 @@ class PostProcessorWorker:
self.logger(f"â ī¸ Manga mode (Post Title Style): Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save_in_main_path}'.")
elif self.manga_filename_style == STYLE_DATE_BASED:
current_thread_name = threading.current_thread().name
-
if manga_date_file_counter_ref is not None and len(manga_date_file_counter_ref) == 2:
counter_val_for_filename = -1
counter_lock = manga_date_file_counter_ref[1]
with counter_lock:
counter_val_for_filename = manga_date_file_counter_ref[0]
manga_date_file_counter_ref[0] += 1
-
base_numbered_name = f"{counter_val_for_filename:03d}"
if self.manga_date_prefix and self.manga_date_prefix.strip():
cleaned_prefix = clean_filename(self.manga_date_prefix.strip())
@@ -905,7 +752,6 @@ class PostProcessorWorker:
with counter_lock:
counter_val_for_filename = manga_global_file_counter_ref[0]
manga_global_file_counter_ref[0] += 1
-
cleaned_post_title_base_for_global = clean_filename(post_title.strip() if post_title and post_title.strip() else "post")
filename_to_save_in_main_path = f"{cleaned_post_title_base_for_global}_{counter_val_for_filename:03d}{original_ext}"
else:
@@ -914,7 +760,6 @@ class PostProcessorWorker:
else:
self.logger(f"â ī¸ Manga mode: Unknown filename style '{self.manga_filename_style}'. Defaulting to original filename for '{api_original_filename}'.")
filename_to_save_in_main_path = clean_filename(api_original_filename)
-
if not filename_to_save_in_main_path:
filename_to_save_in_main_path = f"manga_file_{original_post_id_for_log}_{file_index_in_post + 1}{original_ext}"
self.logger(f"â ī¸ Manga mode: Generated filename was empty. Using generic fallback: '{filename_to_save_in_main_path}'.")
@@ -922,7 +767,6 @@ class PostProcessorWorker:
else:
filename_to_save_in_main_path = clean_filename(api_original_filename)
was_original_name_kept_flag = False
-
if self.remove_from_filename_words_list and filename_to_save_in_main_path:
base_name_for_removal, ext_for_removal = os.path.splitext(filename_to_save_in_main_path)
modified_base_name = base_name_for_removal
@@ -930,7 +774,6 @@ class PostProcessorWorker:
if not word_to_remove: continue
pattern = re.compile(re.escape(word_to_remove), re.IGNORECASE)
modified_base_name = pattern.sub("", modified_base_name)
- # After removals, normalize all seps (underscore, dot, multiple spaces, hyphen) to a single space, then strip.
modified_base_name = re.sub(r'[_.\s-]+', ' ', modified_base_name) # Convert all separators to spaces
modified_base_name = re.sub(r'\s+', ' ', modified_base_name) # Condense multiple spaces to one
modified_base_name = modified_base_name.strip() # Remove leading/trailing spaces
@@ -938,13 +781,11 @@ class PostProcessorWorker:
filename_to_save_in_main_path = modified_base_name + ext_for_removal
else:
filename_to_save_in_main_path = base_name_for_removal + ext_for_removal
-
if not self.download_thumbnails:
is_img_type = is_image(api_original_filename)
is_vid_type = is_video(api_original_filename)
is_archive_type = is_archive(api_original_filename)
is_audio_type = is_audio(api_original_filename)
-
if self.filter_mode == 'archive':
if not is_archive_type:
self.logger(f" -> Filter Skip (Archive Mode): '{api_original_filename}' (Not an Archive).")
@@ -961,7 +802,6 @@ class PostProcessorWorker:
if not is_audio_type:
self.logger(f" -> Filter Skip: '{api_original_filename}' (Not Audio).")
return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
-
if self.skip_zip and is_zip(api_original_filename):
self.logger(f" -> Pref Skip: '{api_original_filename}' (ZIP).")
return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
@@ -981,7 +821,6 @@ class PostProcessorWorker:
total_size_bytes = 0
download_successful_flag = False
last_exception_for_retry_later = None
-
for attempt_num_single_stream in range(max_retries + 1):
if self._check_pause(f"File download attempt for '{api_original_filename}'"): break
if self.check_cancel() or (skip_event and skip_event.is_set()): break
@@ -989,18 +828,14 @@ class PostProcessorWorker:
if attempt_num_single_stream > 0:
self.logger(f" Retrying download for '{api_original_filename}' (Overall Attempt {attempt_num_single_stream + 1}/{max_retries + 1})...")
time.sleep(retry_delay * (2**(attempt_num_single_stream - 1)))
-
self._emit_signal('file_download_status', True)
-
response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True, cookies=cookies_to_use_for_file)
response.raise_for_status()
total_size_bytes = int(response.headers.get('Content-Length', 0))
-
num_parts_for_file = min(self.num_file_threads, MAX_PARTS_FOR_MULTIPART_DOWNLOAD)
attempt_multipart = (self.allow_multipart_download and MULTIPART_DOWNLOADER_AVAILABLE and
num_parts_for_file > 1 and total_size_bytes > MIN_SIZE_FOR_MULTIPART_DOWNLOAD and
'bytes' in response.headers.get('Accept-Ranges', '').lower())
-
if self._check_pause(f"Multipart decision for '{api_original_filename}'"): break # Check pause before potentially long operation
if attempt_multipart:
response.close()
@@ -1023,13 +858,11 @@ class PostProcessorWorker:
self.logger(f" Multi-part download attempt failed for '{api_original_filename}'. Retrying with single stream.")
else:
download_successful_flag = False; break
-
self.logger(f"âŦī¸ Downloading (Single Stream): '{api_original_filename}' (Size: {total_size_bytes / (1024*1024):.2f} MB if known) [Base Name: '{filename_to_save_in_main_path}']")
file_content_buffer = BytesIO()
current_attempt_downloaded_bytes = 0
md5_hasher = hashlib.md5()
last_progress_time = time.time()
-
for chunk in response.iter_content(chunk_size=1 * 1024 * 1024):
if self._check_pause(f"Chunk download for '{api_original_filename}'"): break
if self.check_cancel() or (skip_event and skip_event.is_set()): break
@@ -1039,10 +872,8 @@ class PostProcessorWorker:
if time.time() - last_progress_time > 1 and total_size_bytes > 0:
self._emit_signal('file_progress', api_original_filename, (current_attempt_downloaded_bytes, total_size_bytes))
last_progress_time = time.time()
-
if self.check_cancel() or (skip_event and skip_event.is_set()) or (self.pause_event and self.pause_event.is_set()):
if file_content_buffer: file_content_buffer.close(); break
-
if current_attempt_downloaded_bytes > 0 or (total_size_bytes == 0 and response.status_code == 200):
calculated_file_hash = md5_hasher.hexdigest()
downloaded_size_bytes = current_attempt_downloaded_bytes
@@ -1051,7 +882,6 @@ class PostProcessorWorker:
download_successful_flag = True; break
else:
if file_content_buffer: file_content_buffer.close()
-
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e:
self.logger(f" â Download Error (Retryable): {api_original_filename}. Error: {e}")
last_exception_for_retry_later = e # Store this specific exception
@@ -1072,12 +902,10 @@ class PostProcessorWorker:
self._emit_signal('file_download_status', False)
final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes
self._emit_signal('file_progress', api_original_filename, (downloaded_size_bytes, final_total_for_progress))
-
if self.check_cancel() or (skip_event and skip_event.is_set()) or (self.pause_event and self.pause_event.is_set() and not download_successful_flag):
self.logger(f" â ī¸ Download process interrupted for {api_original_filename}.")
if file_content_bytes: file_content_bytes.close()
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
-
if not download_successful_flag:
self.logger(f"â Download failed for '{api_original_filename}' after {max_retries + 1} attempts.")
if file_content_bytes: file_content_bytes.close()
@@ -1097,7 +925,6 @@ class PostProcessorWorker:
}
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER, retry_later_details
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None # Generic failure
-
if self._check_pause(f"Post-download hash check for '{api_original_filename}'"): return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
with self.downloaded_file_hashes_lock:
if calculated_file_hash in self.downloaded_file_hashes:
@@ -1112,7 +939,6 @@ class PostProcessorWorker:
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
effective_save_folder = target_folder_path # Default: main character/post folder
filename_after_styling_and_word_removal = filename_to_save_in_main_path
-
try: # Ensure the chosen save folder (main or Duplicate) exists
os.makedirs(effective_save_folder, exist_ok=True)
except OSError as e:
@@ -1124,7 +950,6 @@ class PostProcessorWorker:
return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
data_to_write_after_compression = file_content_bytes
filename_after_compression = filename_after_styling_and_word_removal
-
is_img_for_compress_check = is_image(api_original_filename)
if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024):
self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024*1024):.2f} MB)...")
@@ -1137,7 +962,6 @@ class PostProcessorWorker:
compressed_bytes_io = BytesIO()
img_obj.save(compressed_bytes_io, format='WebP', quality=80, method=4)
compressed_size = compressed_bytes_io.getbuffer().nbytes
-
if compressed_size < downloaded_size_bytes * 0.9: # If significantly smaller
self.logger(f" Compression success: {compressed_size / (1024*1024):.2f} MB.")
data_to_write_after_compression = compressed_bytes_io; data_to_write_after_compression.seek(0)
@@ -1161,9 +985,7 @@ class PostProcessorWorker:
self.logger(f" Applied numeric suffix in '{os.path.basename(effective_save_folder)}': '{final_filename_on_disk}' (was '{filename_after_compression}')")
if self._check_pause(f"File saving for '{final_filename_on_disk}'"): return 0, 1, final_filename_on_disk, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
final_save_path = os.path.join(effective_save_folder, final_filename_on_disk)
-
try:
-
if data_to_write_after_compression is file_content_bytes and not isinstance(file_content_bytes, BytesIO):
original_part_file_actual_path = file_content_bytes.name
file_content_bytes.close() # Close handle first
@@ -1178,7 +1000,6 @@ class PostProcessorWorker:
if os.path.exists(original_part_file_actual_path):
try: os.remove(original_part_file_actual_path)
except OSError as e_rem: self.logger(f" -> Failed to remove .part after compression: {e_rem}")
-
with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash)
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Track by logical name
final_filename_saved_for_return = final_filename_on_disk
@@ -1199,43 +1020,33 @@ class PostProcessorWorker:
if not file_content_bytes.closed: # Check if already closed
file_content_bytes.close()
except Exception: pass # Ignore errors on close if already handled
-
-
def process(self):
if self._check_pause(f"Post processing for ID {self.post.get('id', 'N/A')}"): return 0,0,[], []
if self.check_cancel(): return 0, 0, [], []
current_character_filters = self._get_current_character_filters()
-
kept_original_filenames_for_log = []
retryable_failures_this_post = [] # New list to store retryable failure details
total_downloaded_this_post = 0
total_skipped_this_post = 0
-
parsed_api_url = urlparse(self.api_url_input)
referer_url = f"https://{parsed_api_url.netloc}/"
headers = {'User-Agent': 'Mozilla/5.0', 'Referer': referer_url, 'Accept': '*/*'}
-
link_pattern = re.compile(r"""]*>(.*?)""",
re.IGNORECASE | re.DOTALL)
-
post_data = self.post
post_title = post_data.get('title', '') or 'untitled_post'
post_id = post_data.get('id', 'unknown_id')
post_main_file_info = post_data.get('file')
post_attachments = post_data.get('attachments', [])
post_content_html = post_data.get('content', '')
-
self.logger(f"\n--- Processing Post {post_id} ('{post_title[:50]}...') (Thread: {threading.current_thread().name}) ---")
-
num_potential_files_in_post = len(post_attachments or []) + (1 if post_main_file_info and post_main_file_info.get('path') else 0)
-
post_is_candidate_by_title_char_match = False
char_filter_that_matched_title = None
post_is_candidate_by_comment_char_match = False
post_is_candidate_by_file_char_match_in_comment_scope = False
char_filter_that_matched_file_in_comment_scope = None
char_filter_that_matched_comment = None
-
if current_character_filters and \
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH):
if self._check_pause(f"Character title filter for post {post_id}"): return 0, num_potential_files_in_post, [], []
@@ -1245,9 +1056,7 @@ class PostProcessorWorker:
if filter_item_obj["is_group"]:
if filter_item_obj["name"] not in terms_to_check_for_title:
terms_to_check_for_title.append(filter_item_obj["name"])
-
unique_terms_for_title_check = list(set(terms_to_check_for_title))
-
for term_to_match in unique_terms_for_title_check:
match_found_for_term = is_title_match_for_character(post_title, term_to_match)
if match_found_for_term:
@@ -1260,19 +1069,15 @@ class PostProcessorWorker:
api_file_domain_for_char_check = urlparse(self.api_url_input).netloc
if not api_file_domain_for_char_check or not any(d in api_file_domain_for_char_check.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']):
api_file_domain_for_char_check = "kemono.su" if "kemono" in self.service.lower() else "coomer.party"
-
if post_main_file_info and isinstance(post_main_file_info, dict) and post_main_file_info.get('path'):
original_api_name = post_main_file_info.get('name') or os.path.basename(post_main_file_info['path'].lstrip('/'))
if original_api_name:
all_files_from_post_api_for_char_check.append({'_original_name_for_log': original_api_name})
-
for att_info in post_attachments:
if isinstance(att_info, dict) and att_info.get('path'):
original_api_att_name = att_info.get('name') or os.path.basename(att_info['path'].lstrip('/'))
if original_api_att_name:
all_files_from_post_api_for_char_check.append({'_original_name_for_log': original_api_att_name})
-
-
if current_character_filters and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
self.logger(f" [Char Scope: Comments] Phase 1: Checking post files for matches before comments for post ID '{post_id}'.")
if self._check_pause(f"File check (comments scope) for post {post_id}"): return 0, num_potential_files_in_post, [], []
@@ -1284,7 +1089,6 @@ class PostProcessorWorker:
terms_to_check = list(filter_item_obj["aliases"])
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check:
terms_to_check.append(filter_item_obj["name"])
-
for term_to_match in terms_to_check:
if is_filename_match_for_character(current_api_original_filename_for_check, term_to_match):
post_is_candidate_by_file_char_match_in_comment_scope = True
@@ -1294,7 +1098,6 @@ class PostProcessorWorker:
if post_is_candidate_by_file_char_match_in_comment_scope: break
if post_is_candidate_by_file_char_match_in_comment_scope: break
self.logger(f" [Char Scope: Comments] Phase 1 Result: post_is_candidate_by_file_char_match_in_comment_scope = {post_is_candidate_by_file_char_match_in_comment_scope}")
-
if current_character_filters and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
if not post_is_candidate_by_file_char_match_in_comment_scope:
if self._check_pause(f"Comment check for post {post_id}"): return 0, num_potential_files_in_post, [], []
@@ -1305,7 +1108,6 @@ class PostProcessorWorker:
if not any(d in api_domain_for_comments.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']):
self.logger(f"â ī¸ Unrecognized domain '{api_domain_for_comments}' for comment API. Defaulting based on service.")
api_domain_for_comments = "kemono.su" if "kemono" in self.service.lower() else "coomer.party"
-
comments_data = fetch_post_comments(
api_domain_for_comments, self.service, self.user_id, post_id,
headers, self.logger, self.cancellation_event, self.pause_event, # Pass pause_event
@@ -1319,15 +1121,12 @@ class PostProcessorWorker:
if self.check_cancel(): break
raw_comment_content = comment_item.get('content', '')
if not raw_comment_content: continue
-
cleaned_comment_text = strip_html_tags(raw_comment_content)
if not cleaned_comment_text.strip(): continue
-
for filter_item_obj in current_character_filters:
terms_to_check_comment = list(filter_item_obj["aliases"])
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_comment:
terms_to_check_comment.append(filter_item_obj["name"])
-
for term_to_match_comment in terms_to_check_comment:
if is_title_match_for_character(cleaned_comment_text, term_to_match_comment): # Re-use title matcher
post_is_candidate_by_comment_char_match = True
@@ -1339,7 +1138,6 @@ class PostProcessorWorker:
if post_is_candidate_by_comment_char_match: break
else:
self.logger(f" No comments found or fetched for post {post_id} to check against character filters.")
-
except RuntimeError as e_fetch_comment:
self.logger(f" â ī¸ Error fetching or processing comments for post {post_id}: {e_fetch_comment}")
except Exception as e_generic_comment:
@@ -1359,7 +1157,6 @@ class PostProcessorWorker:
if self.emitter and hasattr(self.emitter, 'missed_character_post_signal'): # Check emitter
self._emit_signal('missed_character_post', post_title, "No character match in files or comments (Comments scope)")
return 0, num_potential_files_in_post, [], []
-
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
if self._check_pause(f"Skip words (post title) for post {post_id}"): return 0, num_potential_files_in_post, [], []
post_title_lower = post_title.lower()
@@ -1367,24 +1164,20 @@ class PostProcessorWorker:
if skip_word.lower() in post_title_lower:
self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'. Scope: {self.skip_words_scope}")
return 0, num_potential_files_in_post, [], []
-
if not self.extract_links_only and self.manga_mode_active and current_character_filters and \
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and \
not post_is_candidate_by_title_char_match:
self.logger(f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title[:50]}' doesn't match filters.")
self._emit_signal('missed_character_post', post_title, "Manga Mode: No title match for character filter (Title/Both scope)")
return 0, num_potential_files_in_post, [], []
-
if not isinstance(post_attachments, list):
self.logger(f"â ī¸ Corrupt attachment data for post {post_id} (expected list, got {type(post_attachments)}). Skipping attachments.")
post_attachments = []
-
base_folder_names_for_post_content = []
if not self.extract_links_only and self.use_subfolders:
if self._check_pause(f"Subfolder determination for post {post_id}"): return 0, num_potential_files_in_post, []
primary_char_filter_for_folder = None # type: ignore
log_reason_for_folder = ""
-
if self.char_filter_scope == CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment:
if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope:
primary_char_filter_for_folder = char_filter_that_matched_file_in_comment_scope
@@ -1407,7 +1200,6 @@ class PostProcessorWorker:
if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]:
base_folder_names_for_post_content = [clean_folder_name(post_title if post_title else "untitled_creator_content")]
self.logger(f" Base folder name(s) for post content (Generic title parsing - no char filters): {', '.join(base_folder_names_for_post_content)}")
-
if not self.extract_links_only and self.use_subfolders and self.skip_words_list:
if self._check_pause(f"Folder keyword skip check for post {post_id}"): return 0, num_potential_files_in_post, []
for folder_name_to_check in base_folder_names_for_post_content: # type: ignore
@@ -1416,7 +1208,6 @@ class PostProcessorWorker:
matched_skip = next((sw for sw in self.skip_words_list if sw.lower() in folder_name_to_check.lower()), "unknown_skip_word")
self.logger(f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check}' contains '{matched_skip}'.")
return 0, num_potential_files_in_post, [], []
-
if (self.show_external_links or self.extract_links_only) and post_content_html: # type: ignore
if self._check_pause(f"External link extraction for post {post_id}"): return 0, num_potential_files_in_post, [], []
try:
@@ -1425,39 +1216,29 @@ class PostProcessorWorker:
link_url = match.group(1).strip()
link_url = html.unescape(link_url) # Decode HTML entities in the URL
link_inner_text = match.group(2)
-
if not any(ext in link_url.lower() for ext in ['.css', '.js', '.ico', '.xml', '.svg']) \
and not link_url.startswith('javascript:') \
and link_url not in unique_links_data:
-
clean_link_text = re.sub(r'<.*?>', '', link_inner_text)
clean_link_text = html.unescape(clean_link_text).strip()
-
display_text = clean_link_text if clean_link_text else "[Link]"
unique_links_data[link_url] = display_text
-
links_emitted_count = 0
scraped_platforms = {'kemono', 'coomer', 'patreon'}
-
for link_url, link_text in unique_links_data.items():
platform = get_link_platform(link_url)
if platform not in scraped_platforms:
self._emit_signal('external_link', post_title, link_text, link_url, platform)
links_emitted_count +=1
-
if links_emitted_count > 0: self.logger(f" đ Found {links_emitted_count} potential external link(s) in post content.")
except Exception as e: self.logger(f"â ī¸ Error parsing post content for links: {e}\n{traceback.format_exc(limit=2)}")
-
if self.extract_links_only:
self.logger(f" Extract Links Only mode: Finished processing post {post_id} for links.")
return 0, 0, [], []
-
all_files_from_post_api = []
api_file_domain = urlparse(self.api_url_input).netloc
if not api_file_domain or not any(d in api_file_domain.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']):
api_file_domain = "kemono.su" if "kemono" in self.service.lower() else "coomer.party"
-
-
if post_main_file_info and isinstance(post_main_file_info, dict) and post_main_file_info.get('path'):
file_path = post_main_file_info['path'].lstrip('/')
original_api_name = post_main_file_info.get('name') or os.path.basename(file_path)
@@ -1469,7 +1250,6 @@ class PostProcessorWorker:
'_is_thumbnail': is_image(original_api_name) # Mark if it's an image from API
})
else: self.logger(f" â ī¸ Skipping main file for post {post_id}: Missing name (Path: {file_path})")
-
for idx, att_info in enumerate(post_attachments):
if isinstance(att_info, dict) and att_info.get('path'): # Ensure att_info is a dict
att_path = att_info['path'].lstrip('/')
@@ -1483,30 +1263,20 @@ class PostProcessorWorker:
})
else: self.logger(f" â ī¸ Skipping attachment {idx+1} for post {post_id}: Missing name (Path: {att_path})")
else: self.logger(f" â ī¸ Skipping invalid attachment {idx+1} for post {post_id}: {str(att_info)[:100]}")
-
- # --- New: Scan post content for additional image URLs if enabled ---
if self.scan_content_for_images and post_content_html and not self.extract_links_only: # This block was duplicated, ensure only one exists
self.logger(f" Scanning post content for additional image URLs (Post ID: {post_id})...")
-
parsed_input_url = urlparse(self.api_url_input)
base_url_for_relative_paths = f"{parsed_input_url.scheme}://{parsed_input_url.netloc}"
img_ext_pattern = "|".join(ext.lstrip('.') for ext in IMAGE_EXTENSIONS)
-
- # 1. Regex for direct absolute image URLs in text
direct_url_pattern_str = r"""(?i)\b(https?://[^\s"'<>\[\]\{\}\|\^\\^~\[\]`]+\.(?:""" + img_ext_pattern + r"""))\b"""
- # 2. Regex for tags (captures src content)
img_tag_src_pattern_str = r"""
]*?src\s*=\s*["']([^"']+)["']"""
-
found_image_sources = set()
-
for direct_url_match in re.finditer(direct_url_pattern_str, post_content_html):
found_image_sources.add(direct_url_match.group(1))
-
for img_tag_match in re.finditer(img_tag_src_pattern_str, post_content_html, re.IGNORECASE):
src_attr = img_tag_match.group(1).strip()
src_attr = html.unescape(src_attr)
if not src_attr: continue
-
resolved_src_url = ""
if src_attr.startswith(('http://', 'https://')):
resolved_src_url = src_attr
@@ -1514,16 +1284,13 @@ class PostProcessorWorker:
resolved_src_url = f"{parsed_input_url.scheme}:{src_attr}"
elif src_attr.startswith('/'):
resolved_src_url = f"{base_url_for_relative_paths}{src_attr}"
-
if resolved_src_url:
parsed_resolved_url = urlparse(resolved_src_url)
if any(parsed_resolved_url.path.lower().endswith(ext) for ext in IMAGE_EXTENSIONS):
found_image_sources.add(resolved_src_url)
-
if found_image_sources:
self.logger(f" Found {len(found_image_sources)} potential image URLs/sources in content.")
existing_urls_in_api_list = {f_info['url'] for f_info in all_files_from_post_api}
-
for found_url in found_image_sources: # Iterate over the unique, resolved URLs
if self.check_cancel(): break
if found_url in existing_urls_in_api_list:
@@ -1535,7 +1302,6 @@ class PostProcessorWorker:
if not url_filename or not is_image(url_filename):
self.logger(f" Skipping URL from content (no filename part or not an image extension): {found_url[:70]}...")
continue
-
self.logger(f" Adding image from content: {url_filename} (URL: {found_url[:70]}...)")
all_files_from_post_api.append({
'url': found_url,
@@ -1549,42 +1315,28 @@ class PostProcessorWorker:
self.logger(f" Error processing URL from content '{found_url[:70]}...': {e_url_parse}")
else:
self.logger(f" No additional image URLs found in post content scan for post {post_id}.")
- # --- End of new content scanning logic ---
-
- # --- Final filtering based on download_thumbnails and scan_content_for_images flags ---
if self.download_thumbnails:
if self.scan_content_for_images:
- # Both "Download Thumbnails Only" AND "Scan Content for Images" are checked.
- # Prioritize images from content scan.
self.logger(f" Mode: 'Download Thumbnails Only' + 'Scan Content for Images' active. Prioritizing images from content scan for post {post_id}.")
all_files_from_post_api = [finfo for finfo in all_files_from_post_api if finfo.get('_from_content_scan')]
if not all_files_from_post_api:
self.logger(f" -> No images found via content scan for post {post_id} in this combined mode.")
return 0, 0, [], [] # No files to download for this post
else:
- # Only "Download Thumbnails Only" is checked. Filter for API thumbnails.
self.logger(f" Mode: 'Download Thumbnails Only' active. Filtering for API thumbnails for post {post_id}.")
all_files_from_post_api = [finfo for finfo in all_files_from_post_api if finfo.get('_is_thumbnail')]
if not all_files_from_post_api:
self.logger(f" -> No API image thumbnails found for post {post_id} in thumbnail-only mode.")
return 0, 0, [], [] # No files to download for this post
- # If self.download_thumbnails is False, all_files_from_post_api remains as is.
- # It will contain all API files (images marked with _is_thumbnail: True, others False)
- # and potentially content-scanned images (marked with _from_content_scan: True).
-
if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED:
def natural_sort_key_for_files(file_api_info):
name = file_api_info.get('_original_name_for_log', '').lower()
return [int(text) if text.isdigit() else text for text in re.split('([0-9]+)', name)]
-
all_files_from_post_api.sort(key=natural_sort_key_for_files)
self.logger(f" Manga Date Mode: Sorted {len(all_files_from_post_api)} files within post {post_id} by original name for sequential numbering.")
-
-
if not all_files_from_post_api:
self.logger(f" No files found to download for post {post_id}.")
return 0, 0, [], []
-
files_to_download_info_list = []
processed_original_filenames_in_this_post = set()
for file_info in all_files_from_post_api:
@@ -1596,27 +1348,19 @@ class PostProcessorWorker:
files_to_download_info_list.append(file_info)
if current_api_original_filename:
processed_original_filenames_in_this_post.add(current_api_original_filename)
-
if not files_to_download_info_list:
self.logger(f" All files for post {post_id} were duplicate original names or skipped earlier.")
return 0, total_skipped_this_post, [], []
-
-
num_files_in_this_post_for_naming = len(files_to_download_info_list)
self.logger(f" Identified {num_files_in_this_post_for_naming} unique original file(s) for potential download from post {post_id}.")
-
-
with ThreadPoolExecutor(max_workers=self.num_file_threads, thread_name_prefix=f'P{post_id}File_') as file_pool:
futures_list = []
for file_idx, file_info_to_dl in enumerate(files_to_download_info_list):
if self._check_pause(f"File processing loop for post {post_id}, file {file_idx}"): break
if self.check_cancel(): break
-
current_api_original_filename = file_info_to_dl.get('_original_name_for_log')
-
file_is_candidate_by_char_filter_scope = False
char_filter_info_that_matched_file = None
-
if not current_character_filters:
file_is_candidate_by_char_filter_scope = True
else:
@@ -1626,7 +1370,6 @@ class PostProcessorWorker:
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_for_file:
terms_to_check_for_file.append(filter_item_obj["name"])
unique_terms_for_file_check = list(set(terms_to_check_for_file))
-
for term_to_match in unique_terms_for_file_check:
if is_filename_match_for_character(current_api_original_filename, term_to_match):
file_is_candidate_by_char_filter_scope = True
@@ -1650,7 +1393,6 @@ class PostProcessorWorker:
if filter_item_obj_both_file["is_group"] and filter_item_obj_both_file["name"] not in terms_to_check_for_file_both:
terms_to_check_for_file_both.append(filter_item_obj_both_file["name"])
unique_terms_for_file_both_check = list(set(terms_to_check_for_file_both))
-
for term_to_match in unique_terms_for_file_both_check:
if is_filename_match_for_character(current_api_original_filename, term_to_match):
file_is_candidate_by_char_filter_scope = True
@@ -1667,14 +1409,11 @@ class PostProcessorWorker:
file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = char_filter_that_matched_comment # Use the filter that matched comments
self.logger(f" File '{current_api_original_filename}' is candidate because post comments matched char filter (Overall Scope: Comments).")
-
if not file_is_candidate_by_char_filter_scope:
self.logger(f" -> Skip File (Char Filter Scope '{self.char_filter_scope}'): '{current_api_original_filename}' no match.")
total_skipped_this_post += 1
continue
-
current_path_for_file = self.override_output_dir if self.override_output_dir else self.download_root # Use override if provided
-
if self.use_subfolders:
char_title_subfolder_name = None
if self.target_post_id_from_initial_url and self.custom_folder_name:
@@ -1685,17 +1424,13 @@ class PostProcessorWorker:
char_title_subfolder_name = clean_folder_name(char_filter_that_matched_title["name"])
elif base_folder_names_for_post_content:
char_title_subfolder_name = base_folder_names_for_post_content[0]
-
if char_title_subfolder_name:
current_path_for_file = os.path.join(current_path_for_file, char_title_subfolder_name)
-
if self.use_post_subfolders:
cleaned_title_for_subfolder = clean_folder_name(post_title)
post_specific_subfolder_name = cleaned_title_for_subfolder # Use only the cleaned title
current_path_for_file = os.path.join(current_path_for_file, post_specific_subfolder_name)
-
target_folder_path_for_this_file = current_path_for_file
-
manga_date_counter_to_pass = None
manga_global_counter_to_pass = None
if self.manga_mode_active:
@@ -1703,7 +1438,6 @@ class PostProcessorWorker:
manga_date_counter_to_pass = self.manga_date_file_counter_ref
elif self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING:
manga_global_counter_to_pass = self.manga_global_file_counter_ref if self.manga_global_file_counter_ref is not None else self.manga_date_file_counter_ref
-
futures_list.append(file_pool.submit(
self._download_single_file,
file_info_to_dl,
@@ -1717,7 +1451,6 @@ class PostProcessorWorker:
file_index_in_post=file_idx, # Changed to keyword argument
num_files_in_this_post=num_files_in_this_post_for_naming # Changed to keyword argument
))
-
for future in as_completed(futures_list):
if self.check_cancel():
for f_to_cancel in futures_list:
@@ -1739,13 +1472,9 @@ class PostProcessorWorker:
self.logger(f"â File download task for post {post_id} resulted in error: {exc_f}")
total_skipped_this_post += 1
self._emit_signal('file_progress', "", None)
-
if self.check_cancel(): self.logger(f" Post {post_id} processing interrupted/cancelled.");
else: self.logger(f" Post {post_id} Summary: Downloaded={total_downloaded_this_post}, Skipped Files={total_skipped_this_post}")
-
return total_downloaded_this_post, total_skipped_this_post, kept_original_filenames_for_log, retryable_failures_this_post
-
-
class DownloadThread(QThread):
progress_signal = pyqtSignal(str) # Already QObject, no need to change
add_character_prompt_signal = pyqtSignal(str)
@@ -1755,8 +1484,6 @@ class DownloadThread(QThread):
file_progress_signal = pyqtSignal(str, object)
retryable_file_failed_signal = pyqtSignal(list) # New: list of retry_details dicts
missed_character_post_signal = pyqtSignal(str, str) # New: post_title, reason
-
-
def __init__(self, api_url_input, output_dir, known_names_copy,
cancellation_event,
pause_event, filter_character_list=None, dynamic_character_filter_holder=None, # Added pause_event and holder
@@ -1814,10 +1541,8 @@ class DownloadThread(QThread):
self.downloaded_files_lock = downloaded_files_lock
self.downloaded_file_hashes = downloaded_file_hashes
self.downloaded_file_hashes_lock = downloaded_file_hashes_lock
-
self._add_character_response = None
self.prompt_mutex = QMutex()
-
self.show_external_links = show_external_links
self.extract_links_only = extract_links_only
self.num_file_threads_for_worker = num_file_threads_for_worker
@@ -1842,13 +1567,10 @@ class DownloadThread(QThread):
if self.compress_images and Image is None:
self.logger("â ī¸ Image compression disabled: Pillow library not found (DownloadThread).")
self.compress_images = False
-
def logger(self, message):
self.progress_signal.emit(str(message))
-
def isInterruptionRequested(self):
return self.cancellation_event.is_set() or super().isInterruptionRequested()
-
def _check_pause_self(self, context_message="DownloadThread operation"):
if self.pause_event and self.pause_event.is_set():
self.logger(f" {context_message} paused...")
@@ -1859,14 +1581,11 @@ class DownloadThread(QThread):
time.sleep(0.5)
if not self.isInterruptionRequested(): self.logger(f" {context_message} resumed.")
return False
-
def skip_file(self):
if self.isRunning() and self.skip_current_file_flag:
self.logger("âī¸ Skip requested for current file (single-thread mode).")
self.skip_current_file_flag.set()
else: self.logger("âšī¸ Skip file: No download active or skip flag not available for current context.")
-
-
def run(self):
grand_total_downloaded_files = 0
grand_total_skipped_files = 0
@@ -1883,7 +1602,6 @@ class DownloadThread(QThread):
elif self.service and self.user_id:
creator_based_folder_name = clean_folder_name(self.user_id)
series_scan_dir = os.path.join(series_scan_dir, creator_based_folder_name)
-
highest_num = 0
if os.path.isdir(series_scan_dir):
self.logger(f"âšī¸ [Thread] Manga Date Mode: Scanning for existing files in '{series_scan_dir}'...")
@@ -1895,12 +1613,8 @@ class DownloadThread(QThread):
current_manga_date_file_counter_ref = [highest_num + 1, threading.Lock()]
self.logger(f"âšī¸ [Thread] Manga Date Mode: Initialized counter at {current_manga_date_file_counter_ref[0]}.")
elif self.manga_mode_active and self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING and not self.extract_links_only and current_manga_date_file_counter_ref is None: # Use current_manga_date_file_counter_ref for STYLE_POST_TITLE_GLOBAL_NUMBERING as well
- # For global numbering, we always start from 1 for the session unless a ref is passed.
- # If you need to resume global numbering across sessions, similar scanning logic would be needed.
- # For now, it starts at 1 per session if no ref is provided.
current_manga_date_file_counter_ref = [1, threading.Lock()] # Start global numbering at 1
self.logger(f"âšī¸ [Thread] Manga Title+GlobalNum Mode: Initialized counter at {current_manga_date_file_counter_ref[0]}.")
-
worker_signals_obj = PostProcessorSignals()
try:
worker_signals_obj.progress_signal.connect(self.progress_signal)
@@ -1908,7 +1622,6 @@ class DownloadThread(QThread):
worker_signals_obj.file_progress_signal.connect(self.file_progress_signal)
worker_signals_obj.external_link_signal.connect(self.external_link_signal)
worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal)
-
self.logger(" Starting post fetch (single-threaded download process)...")
post_generator = download_from_api(
self.api_url_input,
@@ -1923,14 +1636,12 @@ class DownloadThread(QThread):
selected_cookie_file=self.selected_cookie_file,
app_base_dir=self.app_base_dir
)
-
for posts_batch_data in post_generator:
if self._check_pause_self("Post batch processing"): was_process_cancelled = True; break
if self.isInterruptionRequested(): was_process_cancelled = True; break
for individual_post_data in posts_batch_data:
if self._check_pause_self(f"Individual post processing for {individual_post_data.get('id', 'N/A')}"): was_process_cancelled = True; break
if self.isInterruptionRequested(): was_process_cancelled = True; break
-
post_processing_worker = PostProcessorWorker(
post_data=individual_post_data,
download_root=self.output_dir,
@@ -1989,17 +1700,13 @@ class DownloadThread(QThread):
num_potential_files_est = len(individual_post_data.get('attachments', [])) + \
(1 if individual_post_data.get('file') else 0)
grand_total_skipped_files += num_potential_files_est
-
if self.skip_current_file_flag and self.skip_current_file_flag.is_set():
self.skip_current_file_flag.clear()
self.logger(" Skip current file flag was processed and cleared by DownloadThread.")
-
self.msleep(10)
if was_process_cancelled: break
-
if not was_process_cancelled and not self.isInterruptionRequested():
self.logger("â
All posts processed or end of content reached by DownloadThread.")
-
except Exception as main_thread_err:
self.logger(f"\nâ Critical error within DownloadThread run loop: {main_thread_err}")
traceback.print_exc()
@@ -2014,9 +1721,7 @@ class DownloadThread(QThread):
worker_signals_obj.missed_character_post_signal.disconnect(self.missed_character_post_signal)
except (TypeError, RuntimeError) as e:
self.logger(f"âšī¸ Note during DownloadThread signal disconnection: {e}")
-
self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, self.isInterruptionRequested(), grand_list_of_kept_original_filenames)
-
def receive_add_character_result(self, result):
with QMutexLocker(self.prompt_mutex):
self._add_character_response = result
diff --git a/main.py b/main.py
index 14a67fc..d50f950 100644
--- a/main.py
+++ b/main.py
@@ -329,8 +329,8 @@ class FavoriteArtistsDialog(QDialog):
self.selected_artist_urls = []
self.setWindowTitle("Favorite Artists")
- self.setModal(True)
- self.setMinimumSize(500, 600)
+ self.setModal(True) # type: ignore
+ self.setMinimumSize(500, 500) # Reduced minimum height
if hasattr(self.parent_app, 'get_dark_theme'):
self.setStyleSheet(self.parent_app.get_dark_theme())
@@ -349,6 +349,7 @@ class FavoriteArtistsDialog(QDialog):
self.search_input.textChanged.connect(self._filter_artist_list_display)
main_layout.addWidget(self.search_input)
+
self.artist_list_widget = QListWidget()
self.artist_list_widget.setStyleSheet("""
QListWidget::item {
@@ -357,7 +358,14 @@ class FavoriteArtistsDialog(QDialog):
padding-bottom: 4px;
}""")
main_layout.addWidget(self.artist_list_widget)
+ self.artist_list_widget.setAlternatingRowColors(True)
+ # Initially hide list and search until content is loaded
+ self.search_input.setVisible(False)
+ self.artist_list_widget.setVisible(False)
+
+ self.status_label.setText("âŗ Loading favorite artists...") # Initial loading message
+ self.status_label.setAlignment(Qt.AlignCenter)
combined_buttons_layout = QHBoxLayout()
self.select_all_button = QPushButton("Select All")
@@ -390,6 +398,11 @@ class FavoriteArtistsDialog(QDialog):
else:
print(f"[FavArtistsDialog] {message}")
+ def _show_content_elements(self, show):
+ """Helper to show/hide content-related widgets."""
+ self.search_input.setVisible(show)
+ self.artist_list_widget.setVisible(show)
+
def _fetch_favorite_artists(self):
fav_url = "https://kemono.su/api/v1/account/favorites?type=artist"
self._logger(f"Attempting to fetch favorite artists from: {fav_url}")
@@ -404,8 +417,10 @@ class FavoriteArtistsDialog(QDialog):
if self.cookies_config['use_cookie'] and not cookies_dict:
self.status_label.setText("Error: Cookies enabled but could not be loaded. Cannot fetch favorites.")
+ self._show_content_elements(False)
self._logger("Error: Cookies enabled but could not be loaded.")
QMessageBox.warning(self, "Cookie Error", "Cookies are enabled, but no valid cookies could be loaded. Please check your cookie settings or file.")
+ self.download_button.setEnabled(False)
return
try:
@@ -417,6 +432,7 @@ class FavoriteArtistsDialog(QDialog):
if not isinstance(artists_data, list):
self.status_label.setText("Error: API did not return a list of artists.")
+ self._show_content_elements(False)
self._logger(f"Error: Expected a list from API, got {type(artists_data)}")
QMessageBox.critical(self, "API Error", "The favorite artists API did not return the expected data format (list).")
return
@@ -435,17 +451,28 @@ class FavoriteArtistsDialog(QDialog):
self.all_fetched_artists.sort(key=lambda x: x['name'].lower())
self._populate_artist_list_widget()
- self.status_label.setText(f"{len(self.all_fetched_artists)} favorite artist(s) found.")
- self.download_button.setEnabled(len(self.all_fetched_artists) > 0)
+
+ if self.all_fetched_artists:
+ self.status_label.setText(f"Found {len(self.all_fetched_artists)} favorite artist(s).")
+ self._show_content_elements(True)
+ self.download_button.setEnabled(True)
+ else:
+ self.status_label.setText("No favorite artists found.")
+ self._show_content_elements(False)
+ self.download_button.setEnabled(False)
except requests.exceptions.RequestException as e:
self.status_label.setText(f"Error fetching favorites: {e}")
+ self._show_content_elements(False)
self._logger(f"Error fetching favorites: {e}")
QMessageBox.critical(self, "Fetch Error", f"Could not fetch favorite artists: {e}")
+ self.download_button.setEnabled(False)
except Exception as e:
self.status_label.setText(f"An unexpected error occurred: {e}")
+ self._show_content_elements(False)
self._logger(f"Unexpected error: {e}")
QMessageBox.critical(self, "Error", f"An unexpected error occurred: {e}")
+ self.download_button.setEnabled(False)
def _populate_artist_list_widget(self, artists_to_display=None):
self.artist_list_widget.clear()
@@ -498,12 +525,11 @@ class FavoritePostsFetcherThread(QThread):
progress_bar_update = pyqtSignal(int, int) # value, maximum
finished = pyqtSignal(list, str) # list of posts, error message (or None)
- def __init__(self, cookies_config, parent_logger_func, parent_get_domain_func):
+ def __init__(self, cookies_config, parent_logger_func): # Removed parent_get_domain_func
super().__init__()
self.cookies_config = cookies_config
self.parent_logger_func = parent_logger_func
- self.parent_get_domain_func = parent_get_domain_func
- self.cancellation_event = threading.Event() # For potential future cancellation
+ self.cancellation_event = threading.Event()
def _logger(self, message):
self.parent_logger_func(f"[FavPostsFetcherThread] {message}")
@@ -553,53 +579,9 @@ class FavoritePostsFetcherThread(QThread):
else:
self._logger(f"Warning: Skipping favorite post entry due to missing data: {post_entry}")
- unique_creators = {}
- for post_data in all_fetched_posts_temp:
- creator_key = (post_data['service'], post_data['creator_id'])
- if creator_key not in unique_creators:
- unique_creators[creator_key] = None
-
- creator_name_cache_local = {}
- if unique_creators:
- self.status_update.emit(f"Found {len(all_fetched_posts_temp)} posts. Fetching {len(unique_creators)} unique creator names...")
- self.progress_bar_update.emit(0, len(unique_creators)) # Set max for creator name fetching
-
- fetched_names_count = 0
- total_unique_creators = len(unique_creators)
- for (service, creator_id_val) in unique_creators.keys():
- if self.cancellation_event.is_set():
- self.finished.emit([], "Fetching cancelled.")
- return
-
- creator_api_url = f"https://{self.parent_get_domain_func(service)}/api/v1/{service}/user/{creator_id_val}?o=0"
- try:
- creator_response = requests.get(creator_api_url, headers=headers, cookies=cookies_dict, timeout=10)
- creator_response.raise_for_status()
- creator_info_list = creator_response.json()
- if isinstance(creator_info_list, list) and creator_info_list:
- creator_name_from_api = creator_info_list[0].get("user_name")
- creator_name = html.unescape(creator_name_from_api.strip()) if creator_name_from_api else creator_id_val
- creator_name_cache_local[(service, creator_id_val)] = creator_name
- fetched_names_count += 1
- self.status_update.emit(f"Fetched {fetched_names_count}/{total_unique_creators} creator names...")
- self.progress_bar_update.emit(fetched_names_count, total_unique_creators)
- else:
- self._logger(f"Warning: Could not get name for {service}/{creator_id_val}. API response not a list or empty.")
- creator_name_cache_local[(service, creator_id_val)] = creator_id_val
- time.sleep(0.1) # Be polite
- except requests.exceptions.RequestException as e_creator:
- self._logger(f"Error fetching name for {service}/{creator_id_val}: {e_creator}")
- creator_name_cache_local[(service, creator_id_val)] = creator_id_val # Fallback
- except Exception as e_gen_creator:
- self._logger(f"Unexpected error fetching name for {service}/{creator_id_val}: {e_gen_creator}")
- creator_name_cache_local[(service, creator_id_val)] = creator_id_val # Fallback
-
- for post_data in all_fetched_posts_temp:
- post_data['creator_name'] = creator_name_cache_local.get(
- (post_data['service'], post_data['creator_id']), post_data['creator_id']
- )
-
- all_fetched_posts_temp.sort(key=lambda x: x.get('added_date', ''), reverse=True)
+ # Creator name fetching logic removed.
+ # Sort by service, then creator_id, then date for consistent grouping
+ all_fetched_posts_temp.sort(key=lambda x: (x.get('service','').lower(), x.get('creator_id','').lower(), x.get('added_date', '')), reverse=False)
self.finished.emit(all_fetched_posts_temp, None)
except requests.exceptions.RequestException as e:
@@ -627,25 +609,27 @@ class PostListItemWidget(QWidget):
self.layout.addWidget(self.info_label, 1)
self._setup_display_text()
-
def _setup_display_text(self):
- creator_display = self.post_data.get('creator_name', self.post_data.get('creator_id', 'N/A'))
- post_title_text = self.post_data.get('title', 'Untitled Post')
+ suffix_plain = self.post_data.get('suffix_for_display', "") # Changed from prefix_for_display
+ title_plain = self.post_data.get('title', 'Untitled Post')
- known_char_name = self.parent_dialog._find_known_character_in_title(post_title_text)
- known_line_text = f"Known - {known_char_name}" if known_char_name else "Known - "
-
- service_val = self.post_data.get('service', 'N/A').capitalize()
- added_date_str = self.post_data.get('added_date', 'N/A')
- added_date_formatted = added_date_str.split('T')[0] if added_date_str and 'T' in added_date_str else added_date_str
- details_line_text = f"{service_val} - Added: {added_date_formatted}"
+ # Escape them for HTML display
+ escaped_suffix = html.escape(suffix_plain) # Changed from escaped_prefix
+ escaped_title = html.escape(title_plain)
- line1_html = f"{html.escape(creator_display)} - {html.escape(post_title_text)}"
- line2_html = html.escape(known_line_text)
- line3_html = html.escape(details_line_text)
-
- display_html = f"{line1_html}
{line2_html}
{line3_html}"
- self.info_label.setText(display_html)
+ # Styles
+ p_style_paragraph = "font-size:10.5pt; margin:0; padding:0;" # Base paragraph style (size, margins)
+ title_span_style = "font-weight:bold; color:#E0E0E0;" # Style for the title part (bold, bright white)
+ suffix_span_style = "color:#999999; font-weight:normal; font-size:9.5pt;" # Style for the suffix (dimmer gray, normal weight, slightly smaller)
+
+ if escaped_suffix:
+ # Title part is bold and bright, suffix part is normal weight and dimmer
+ display_html_content = f"
{escaped_title}{escaped_suffix}
" + else: + # Only title part + display_html_content = f"{escaped_title}
" + + self.info_label.setText(display_html_content) def isChecked(self): return self.checkbox.isChecked() def setCheckState(self, state): self.checkbox.setCheckState(state) @@ -660,11 +644,12 @@ class FavoritePostsDialog(QDialog): self.all_fetched_posts = [] self.selected_posts_data = [] self.known_names_list_ref = known_names_list_ref # Store reference to global KNOWN_NAMES + self.displayable_grouped_posts = {} # For storing posts grouped by artist self.fetcher_thread = None # For the worker thread - self.setWindowTitle("Favorite Posts") - self.setModal(True) - self.setMinimumSize(600, 600) # Slightly wider for post titles + self.setWindowTitle("Favorite Posts") # type: ignore + self.setModal(True) # type: ignore + self.setMinimumSize(600, 600) # Reduced minimum height if hasattr(self.parent_app, 'get_dark_theme'): self.setStyleSheet(self.parent_app.get_dark_theme()) @@ -695,6 +680,7 @@ class FavoritePostsDialog(QDialog): padding-top: 4px; padding-bottom: 4px; }""") + self.post_list_widget.setAlternatingRowColors(True) main_layout.addWidget(self.post_list_widget) combined_buttons_layout = QHBoxLayout() @@ -731,11 +717,10 @@ class FavoritePostsDialog(QDialog): self.fetcher_thread = FavoritePostsFetcherThread( self.cookies_config, self.parent_app.log_signal.emit, # Pass parent's logger - self._get_domain_for_service # Pass method reference - ) - self.fetcher_thread.progress_bar_update.connect(self._set_progress_bar_value) + ) # Removed _get_domain_for_service self.fetcher_thread.status_update.connect(self.status_label.setText) self.fetcher_thread.finished.connect(self._on_fetch_completed) + self.fetcher_thread.progress_bar_update.connect(self._set_progress_bar_value) # Connect the missing signal self.progress_bar.setVisible(True) self.fetcher_thread.start() @@ -757,94 +742,142 @@ class FavoritePostsDialog(QDialog): self.progress_bar.setVisible(False) self.all_fetched_posts = fetched_posts_list - self._populate_post_list_widget() + self._populate_post_list_widget() # This will now group and display self.status_label.setText(f"{len(self.all_fetched_posts)} favorite post(s) found.") self.download_button.setEnabled(len(self.all_fetched_posts) > 0) - + if self.fetcher_thread: self.fetcher_thread.quit() self.fetcher_thread.wait() self.fetcher_thread = None - - def _get_domain_for_service(self, service_name): - # Basic heuristic, might need refinement if more domains are supported - if service_name and "coomer" in service_name.lower(): # e.g. if service is 'coomer_onlyfans' - return "coomer.su" # Or coomer.party - return "kemono.su" # Default - - def _find_known_character_in_title(self, post_title): - if not post_title or not self.known_names_list_ref: + def _find_best_known_name_match_in_title(self, title_raw): + if not title_raw or not self.known_names_list_ref: return None - - # Sort by length of primary name to prioritize more specific matches. - sorted_known_names = sorted(self.known_names_list_ref, key=lambda x: len(x.get("name", "")), reverse=True) - for known_entry in sorted_known_names: - aliases_to_check = known_entry.get("aliases", []) - if not aliases_to_check and known_entry.get("name"): - aliases_to_check = [known_entry.get("name")] + title_lower = title_raw.lower() + best_match_known_name_primary = None + longest_match_len = 0 - for alias in aliases_to_check: - if not alias: + for known_entry in self.known_names_list_ref: + aliases_to_check = set() + # Add all explicit aliases from the known entry + for alias_val in known_entry.get("aliases", []): + aliases_to_check.add(alias_val) + # For non-group entries, the primary name is also a key alias + if not known_entry.get("is_group", False): + aliases_to_check.add(known_entry["name"]) + + # Sort this entry's aliases by length (longest first) + # to prioritize more specific aliases within the same known_entry + sorted_aliases_for_entry = sorted(list(aliases_to_check), key=len, reverse=True) + + for alias in sorted_aliases_for_entry: + alias_lower = alias.lower() + if not alias_lower: continue - pattern = r"(?i)\b" + re.escape(alias) + r"\b" - if re.search(pattern, post_title): - return known_entry.get("name") - return None + + # Check for whole word match using regex + if re.search(r'\b' + re.escape(alias_lower) + r'\b', title_lower): + if len(alias_lower) > longest_match_len: + longest_match_len = len(alias_lower) + best_match_known_name_primary = known_entry["name"] # Store the primary name + # Since aliases for this entry are sorted by length, first match is the best for this entry + break # Move to the next known_entry + return best_match_known_name_primary def _populate_post_list_widget(self, posts_to_display=None): self.post_list_widget.clear() - source_list = posts_to_display if posts_to_display is not None else self.all_fetched_posts - for post_data in source_list: - creator_display = post_data.get('creator_name', post_data.get('creator_id', 'N/A')) # Use creator_name - post_title_text = post_data.get('title', 'Untitled Post') - # The HTML generation is now inside PostListItemWidget + + source_list_for_grouping = posts_to_display if posts_to_display is not None else self.all_fetched_posts - list_item = QListWidgetItem(self.post_list_widget) # Parent it to the list widget - custom_widget = PostListItemWidget(post_data, self) # Pass self (FavoritePostsDialog) - - list_item.setSizeHint(custom_widget.sizeHint()) # Set size hint for the QListWidgetItem - list_item.setData(Qt.UserRole, post_data) - self.post_list_widget.addItem(list_item) - self.post_list_widget.setItemWidget(list_item, custom_widget) # Set the custom widget + # Group posts by (service, creator_id) + grouped_posts = {} + for post in source_list_for_grouping: + service = post.get('service', 'unknown_service') + creator_id = post.get('creator_id', 'unknown_id') + group_key = (service, creator_id) # Use tuple as key + if group_key not in grouped_posts: + grouped_posts[group_key] = [] + grouped_posts[group_key].append(post) + + sorted_group_keys = sorted(grouped_posts.keys(), key=lambda x: (x[0].lower(), x[1].lower())) + + self.displayable_grouped_posts = { + key: sorted(grouped_posts[key], key=lambda p: p.get('added_date', ''), reverse=True) + for key in sorted_group_keys + } + for service, creator_id_val in sorted_group_keys: + artist_name = f"{service.capitalize()} / {creator_id_val}" # Display service and ID + # Add artist header item + artist_header_item = QListWidgetItem(f"đ¨ {artist_name}") + artist_header_item.setFlags(Qt.NoItemFlags) # Not selectable, not checkable + font = artist_header_item.font() + font.setBold(True) + font.setPointSize(font.pointSize() + 1) # Make it a bit larger + artist_header_item.setFont(font) + artist_header_item.setForeground(Qt.cyan) # Style for header + self.post_list_widget.addItem(artist_header_item) + + # Add post items for this artist + for post_data in self.displayable_grouped_posts[(service, creator_id_val)]: + post_title_raw = post_data.get('title', 'Untitled Post') + + # Find if a known name is in the title and prepare prefix + found_known_name_primary = self._find_best_known_name_match_in_title(post_title_raw) + + plain_text_title_for_list_item = post_title_raw + if found_known_name_primary: + suffix_text = f" [Known - {found_known_name_primary}]" # Changed to suffix format + post_data['suffix_for_display'] = suffix_text # Store as suffix_for_display + plain_text_title_for_list_item = post_title_raw + suffix_text # Append suffix + else: + post_data.pop('suffix_for_display', None) # Ensure suffix key is removed if no match + + list_item = QListWidgetItem(self.post_list_widget) # Parent it + list_item.setText(plain_text_title_for_list_item) # Use plain text (possibly prefixed) + list_item.setFlags(list_item.flags() | Qt.ItemIsUserCheckable) + list_item.setCheckState(Qt.Unchecked) + list_item.setData(Qt.UserRole, post_data) # Store full data for this post + self.post_list_widget.addItem(list_item) def _filter_post_list_display(self): search_text = self.search_input.text().lower().strip() if not search_text: - self._populate_post_list_widget() + self._populate_post_list_widget(self.all_fetched_posts) # Repopulate with all, which will group return - filtered_posts = [ - post for post in self.all_fetched_posts - if search_text in post['title'].lower() or \ - search_text in post.get('creator_name', post.get('creator_id', '')).lower() or \ - search_text in post['service'].lower() - ] - self._populate_post_list_widget(filtered_posts) + filtered_posts_to_group = [] + for post in self.all_fetched_posts: + # Check if search text matches post title, creator name, creator ID, or service + matches_post_title = search_text in post.get('title', '').lower() + matches_creator_name = False # Creator name is no longer fetched + matches_creator_id = search_text in post.get('creator_id', '').lower() + matches_service = search_text in post['service'].lower() + + if matches_post_title or matches_creator_name or matches_creator_id or matches_service: + filtered_posts_to_group.append(post) + + self._populate_post_list_widget(filtered_posts_to_group) # Repopulate with filtered, which will group def _select_all_items(self): for i in range(self.post_list_widget.count()): item = self.post_list_widget.item(i) - widget = self.post_list_widget.itemWidget(item) - if widget and hasattr(widget, 'setCheckState'): - widget.setCheckState(Qt.Checked) + if item and item.flags() & Qt.ItemIsUserCheckable: # Only check actual post items + item.setCheckState(Qt.Checked) def _deselect_all_items(self): for i in range(self.post_list_widget.count()): item = self.post_list_widget.item(i) - widget = self.post_list_widget.itemWidget(item) - if widget and hasattr(widget, 'setCheckState'): - widget.setCheckState(Qt.Unchecked) + if item and item.flags() & Qt.ItemIsUserCheckable: # Only uncheck actual post items + item.setCheckState(Qt.Unchecked) def _accept_selection_action(self): self.selected_posts_data = [] for i in range(self.post_list_widget.count()): item = self.post_list_widget.item(i) - widget = self.post_list_widget.itemWidget(item) # Get the custom widget - if widget and hasattr(widget, 'isChecked') and widget.isChecked(): - # Retrieve post_data from the custom widget or the item's UserRole - post_data_for_download = widget.get_post_data() if hasattr(widget, 'get_post_data') else item.data(Qt.UserRole) + if item and item.checkState() == Qt.Checked: + post_data_for_download = item.data(Qt.UserRole) self.selected_posts_data.append(post_data_for_download) if not self.selected_posts_data: @@ -855,6 +888,7 @@ class FavoritePostsDialog(QDialog): def get_selected_posts(self): return self.selected_posts_data + class HelpGuideDialog(QDialog): """A multi-page dialog for displaying the feature guide.""" def __init__(self, steps_data, parent=None): @@ -1543,7 +1577,19 @@ class DownloaderApp(QWidget): self.log_signal.emit("âšī¸ Local API server functionality has been removed.") self.log_signal.emit("âšī¸ 'Skip Current File' button has been removed.") if hasattr(self, 'character_input'): - self.character_input.setToolTip("Names, comma-separated. Group aliases: (alias1, alias2, alias3) becomes folder name 'alias1 alias2 alias3' (after cleaning).\nAll names in the group are used as aliases for matching.\nE.g., yor, (Boa, Hancock, Snake Princess)") + self.character_input.setToolTip("Enter character names (comma-separated). Supports advanced grouping and affects folder naming " + "if 'Separate Folders' is enabled.\n\n" + "Examples:\n" + "- Nami â Matches 'Nami', creates folder 'Nami'.\n" + "- (Ulti, Vivi) â Matches either, folder 'Ulti Vivi', adds both to Known.txt separately.\n" + "- (Boa, Hancock)~ â Matches either, folder 'Boa Hancock', adds as one group in Known.txt.\n\n" + "Names are treated as aliases for matching.\n\n" + "Filter Modes (button cycles):\n" + "- Files: Filters by filename.\n" + "- Title: Filters by post title.\n" + "- Both: Title first, then filename.\n" + "- Comments (Beta): Filename first, then post comments." + ) self.log_signal.emit(f"âšī¸ Manga filename style loaded: '{self.manga_filename_style}'") self.log_signal.emit(f"âšī¸ Skip words scope loaded: '{self.skip_words_scope}'") self.log_signal.emit(f"âšī¸ Character filter scope set to default: '{self.char_filter_scope}'") @@ -1949,10 +1995,19 @@ class DownloaderApp(QWidget): self.character_input = QLineEdit() self.character_input.setPlaceholderText("e.g., Tifa, Aerith, (Cloud, Zack)") self.character_input.setToolTip( - self._get_tooltip_for_character_input() + "Enter character names, comma-separated (e.g., Tifa, Aerith).\n" + "Group aliases for a combined folder name: (alias1, alias2, alias3) becomes folder 'alias1 alias2 alias3'.\n" + "All names in the group are used as aliases for matching content.\n\n" + "The 'Filter: [Type]' button next to this input cycles how this filter applies:\n" + "- Filter: Files: Checks individual filenames. Only matching files are downloaded.\n" + "- Filter: Title: Checks post titles. All files from a matching post are downloaded.\n" + "- Filter: Both: Checks post title first. If no match, then checks filenames.\n" + "- Filter: Comments (Beta): Checks filenames first. If no match, then checks post comments.\n\n" + "This filter also influences folder naming if 'Separate Folders by Name/Title' is enabled." ) char_input_and_button_layout.addWidget(self.character_input, 3) + self.char_filter_scope_toggle_button = QPushButton() self._update_char_filter_scope_button_text() self.char_filter_scope_toggle_button.setStyleSheet("padding: 6px 10px;") @@ -1998,13 +2053,17 @@ class DownloaderApp(QWidget): skip_input_and_button_layout.setContentsMargins(0, 0, 0, 0) skip_input_and_button_layout.setSpacing(10) self.skip_words_input = QLineEdit() + # Updated tooltip for skip_words_input self.skip_words_input.setToolTip( - "Enter words, comma-separated, to skip downloading certain files or posts.\n" - "The 'Scope' button determines if this applies to file names, post titles, or both.\n" - "Example: WIP, sketch, preview, text post" + "Enter words, comma-separated, to skip downloading certain content (e.g., WIP, sketch, preview).\n\n" + "The 'Scope: [Type]' button next to this input cycles how this filter applies:\n" + "- Scope: Files: Skips individual files if their names contain any of these words.\n" + "- Scope: Posts: Skips entire posts if their titles contain any of these words.\n" + "- Scope: Both: Applies both (post title first, then individual files if post title is okay)." ) self.skip_words_input.setPlaceholderText("e.g., WM, WIP, sketch, preview") skip_input_and_button_layout.addWidget(self.skip_words_input, 1) + self.skip_scope_toggle_button = QPushButton() self._update_skip_scope_button_text() self.skip_scope_toggle_button.setStyleSheet("padding: 6px 10px;")