This commit is contained in:
Yuvi9587
2025-06-03 18:07:20 +01:00
parent 10b2ec666f
commit cdf4e9bdfb
2 changed files with 63 additions and 299 deletions

View File

@@ -67,33 +67,6 @@ FOLDER_NAME_STOP_WORDS = {
"right", "s", "she", "so", "technically", "tell", "the", "their", "they", "this", "right", "s", "she", "so", "technically", "tell", "the", "their", "they", "this",
"to", "ve", "was", "we", "well", "were", "with", "www", "year", "you", "your", "to", "ve", "was", "we", "well", "were", "with", "www", "year", "you", "your",
} }
DEFAULT_UNWANTED_FOLDER_KEYWORDS_FOR_GENERIC_NAMING = { # Keywords to avoid for folder names if UI filter is empty
"fan-art", "fanart", "request", "requests", "poll", "holiday", "commission", "commissions",
"jan", "feb", "mar", "apr", "may", "jun",
"jul", "aug", "sep", "oct", "nov", "dec",
"january", "february", "march", "april", "may", "june", # Full month names (some were already here)
"july", "august", "september", "october", "november", "december", # Full month names
"mon", "tue", "wed", "thu", "fri", "sat", "sun",
"couple", "cuff", "cuffs", "flash", "first", "second", "third", "fourth", "fifth", "etc", "futa", # Added "cuffs"
"late", "early", "form", "post", "dawn", "dark", # Added "late", "early", "form", "dawn", "dark". "post", "first" were already effectively covered or present.
"red", "blue", "green", "black", "white", "yellow", "pink", "purple", "orange", "brown", "gray", "grey", "silver", "gold",
"open", "close", "batch", "winner", "loser", # Added new words
"web", "cum", "sfw", # Added per new request ("nsfw" is in BASE_UNWANTED_KEYWORDS_FOR_FOLDERS)
"big", "small", "another", "other", "some", "more", "new", "old",
"one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", # Numbers as words
"eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", "twenty", # Added 11-20 as words
"1", "2", "3", "4", "5", "6", "7", "8", "9", "0", # Numbers as digits
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", # Added 10-20 as digits
"let", "lot", "better", "post", "image", "video", "art", "drawing", "sketch"
}
BASE_UNWANTED_KEYWORDS_FOR_FOLDERS = {'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'}
FILENAME_TOKEN_STRIPPABLE_PARTS = {"web", "cum", "nsfw", "sfw"} # Parts to strip from filename tokens for folder naming
def parse_cookie_string(cookie_string): def parse_cookie_string(cookie_string):
"""Parses a 'name=value; name2=value2' cookie string into a dict.""" """Parses a 'name=value; name2=value2' cookie string into a dict."""
cookies = {} cookies = {}
@@ -188,30 +161,13 @@ def strip_html_tags(html_text):
def extract_folder_name_from_title(title, unwanted_keywords): def extract_folder_name_from_title(title, unwanted_keywords):
if not title: return 'Uncategorized' if not title: return 'Uncategorized'
title_lower = title.lower() title_lower = title.lower()
tokens = re.findall(r'\b[\w\-]+\b', title_lower) # Finds words tokens = re.findall(r'\b[\w\-]+\b', title_lower)
for token in tokens: for token in tokens:
clean_token = clean_folder_name(token) # clean_folder_name also removes FOLDER_NAME_STOP_WORDS clean_token = clean_folder_name(token)
if clean_token and clean_token.lower() not in unwanted_keywords: if clean_token and clean_token.lower() not in unwanted_keywords:
return clean_token return clean_token
# If no single valid token, try the full title cleaned_full_title = clean_folder_name(title)
cleaned_full_title = clean_folder_name(title) # This already applies FOLDER_NAME_STOP_WORDS return cleaned_full_title if cleaned_full_title else 'Uncategorized'
if cleaned_full_title:
# Now, check if this cleaned_full_title is composed *entirely* of unwanted_keywords
# Re-tokenize the cleaned_full_title for this check
cleaned_title_tokens = re.findall(r'\b[\w\-]+\b', cleaned_full_title.lower())
if not cleaned_title_tokens: # If cleaned_full_title became empty after tokenizing (e.g., was just "...")
return 'Uncategorized'
all_tokens_unwanted = True
for c_token in cleaned_title_tokens:
if c_token not in unwanted_keywords: # c_token is already lowercased
all_tokens_unwanted = False
break
if not all_tokens_unwanted:
return cleaned_full_title # It contains at least one desired token
return 'Uncategorized' # Fallback if everything is unwanted or title cleans to empty
def match_folders_from_title(title, names_to_match, unwanted_keywords): def match_folders_from_title(title, names_to_match, unwanted_keywords):
""" """
Matches folder names from a title based on a list of known name objects. Matches folder names from a title based on a list of known name objects.
@@ -237,41 +193,6 @@ def match_folders_from_title(title, names_to_match, unwanted_keywords):
matched_cleaned_names.add(cleaned_primary_name) matched_cleaned_names.add(cleaned_primary_name)
break # Found a match for this primary name via one of its aliases break # Found a match for this primary name via one of its aliases
return sorted(list(matched_cleaned_names)) return sorted(list(matched_cleaned_names))
def extract_folder_name_from_filename_tokens(filename, unwanted_keywords, strippable_suffixes_prefixes):
"""
Extracts a folder name from a filename by finding the first token
with 4 or more alphabetic characters that is not in unwanted_keywords,
after attempting to strip common suffixes/prefixes.
"""
if not filename:
return None
# Extract base name without extension
base_name, _ = os.path.splitext(filename)
if not base_name:
return None
tokens = re.findall(r'\b[\w\-]+\b', base_name) # Finds words
for token_candidate in tokens:
modified_token = token_candidate
for part in strippable_suffixes_prefixes:
# Case-insensitive suffix stripping
if modified_token.lower().endswith(part.lower()):
modified_token = modified_token[:-len(part)]
# Case-insensitive prefix stripping (can be added if needed for other words)
# elif modified_token.lower().startswith(part.lower()):
# modified_token = modified_token[len(part):]
# Clean the (potentially) modified token
cleaned_token = clean_folder_name(modified_token)
# Validate the final cleaned token
alpha_chars_count = sum(1 for char in cleaned_token if char.isalpha())
if alpha_chars_count >= 4:
if cleaned_token and cleaned_token.lower() not in unwanted_keywords:
return cleaned_token # Return the first valid one
return None
def is_image(filename): def is_image(filename):
if not filename: return False if not filename: return False
_, ext = os.path.splitext(filename) _, ext = os.path.splitext(filename)
@@ -1121,15 +1042,6 @@ class PostProcessorWorker:
permanent_failures_this_post = [] # New list for permanent failures permanent_failures_this_post = [] # New list for permanent failures
total_downloaded_this_post = 0 total_downloaded_this_post = 0
total_skipped_this_post = 0 total_skipped_this_post = 0
# Determine effective unwanted keywords for folder naming
effective_unwanted_keywords_for_folders = set(BASE_UNWANTED_KEYWORDS_FOR_FOLDERS) # Start with base
if not current_character_filters: # UI filter is empty
self.logger(" UI 'Filter by Character(s)' is empty. Applying extended unwanted keywords for folder naming.")
effective_unwanted_keywords_for_folders.update(DEFAULT_UNWANTED_FOLDER_KEYWORDS_FOR_GENERIC_NAMING)
else:
self.logger(f" UI 'Filter by Character(s)' is NOT empty. Using base unwanted keywords for folder naming: {effective_unwanted_keywords_for_folders}")
parsed_api_url = urlparse(self.api_url_input) parsed_api_url = urlparse(self.api_url_input)
referer_url = f"https://{parsed_api_url.netloc}/" referer_url = f"https://{parsed_api_url.netloc}/"
headers = {'User-Agent': 'Mozilla/5.0', 'Referer': referer_url, 'Accept': '*/*'} headers = {'User-Agent': 'Mozilla/5.0', 'Referer': referer_url, 'Accept': '*/*'}
@@ -1279,135 +1191,41 @@ class PostProcessorWorker:
if not self.extract_links_only and self.use_subfolders: if not self.extract_links_only and self.use_subfolders:
if self._check_pause(f"Subfolder determination for post {post_id}"): return 0, num_potential_files_in_post, [] if self._check_pause(f"Subfolder determination for post {post_id}"): return 0, num_potential_files_in_post, []
primary_char_filter_for_folder = None # type: ignore primary_char_filter_for_folder = None # type: ignore
folder_name_from_ui_post_match = None # Will store the cleaned folder name if a UI filter matches the post
log_reason_for_folder = "" log_reason_for_folder = ""
if self.char_filter_scope == CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment:
# Only consider UI filters if they are actually provided by the user if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope:
if current_character_filters: primary_char_filter_for_folder = char_filter_that_matched_file_in_comment_scope
# Check if a UI-provided character filter matched the post at a post-level (Title or Comment scope) log_reason_for_folder = "Matched char filter in filename (Comments scope)"
if self.char_filter_scope == CHAR_SCOPE_COMMENTS: elif post_is_candidate_by_comment_char_match and char_filter_that_matched_comment: # Fallback to comment match
if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope: primary_char_filter_for_folder = char_filter_that_matched_comment
folder_name_from_ui_post_match = clean_folder_name(char_filter_that_matched_file_in_comment_scope["name"]) log_reason_for_folder = "Matched char filter in comments (Comments scope, no file match)"
log_reason_for_folder = "UI Filter: Matched char filter in filename (Comments scope)" elif (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and char_filter_that_matched_title: # Existing logic for other scopes
elif post_is_candidate_by_comment_char_match and char_filter_that_matched_comment: primary_char_filter_for_folder = char_filter_that_matched_title
folder_name_from_ui_post_match = clean_folder_name(char_filter_that_matched_comment["name"]) log_reason_for_folder = "Matched char filter in title"
log_reason_for_folder = "UI Filter: Matched char filter in comments (Comments scope, no file match)" if primary_char_filter_for_folder:
elif (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and char_filter_that_matched_title: base_folder_names_for_post_content = [clean_folder_name(primary_char_filter_for_folder["name"])]
folder_name_from_ui_post_match = clean_folder_name(char_filter_that_matched_title["name"])
log_reason_for_folder = "UI Filter: Matched char filter in title"
# Note: CHAR_SCOPE_FILES from UI filter doesn't set a post-level folder here; it's handled per-file.
if folder_name_from_ui_post_match:
base_folder_names_for_post_content = [folder_name_from_ui_post_match]
self.logger(f" Base folder name(s) for post content ({log_reason_for_folder}): {', '.join(base_folder_names_for_post_content)}") self.logger(f" Base folder name(s) for post content ({log_reason_for_folder}): {', '.join(base_folder_names_for_post_content)}")
elif not current_character_filters: # No char filters defined, use generic logic
# If no post-level UI filter match, OR if UI filters are empty, try Known.txt derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords)
if not base_folder_names_for_post_content and self.known_names:
derived_folders = match_folders_from_title(post_title, self.known_names, effective_unwanted_keywords_for_folders)
if derived_folders: if derived_folders:
base_folder_names_for_post_content.extend(derived_folders) base_folder_names_for_post_content.extend(match_folders_from_title(post_title, KNOWN_NAMES, self.unwanted_keywords))
log_msg_known_txt = "Matched from Known.txt"
if not current_character_filters: # UI filter input was empty
log_msg_known_txt += " (UI filter was empty)"
else: # UI filters were present but didn't result in a post-level folder name
log_msg_known_txt += " (No post-level UI filter match from UI)"
self.logger(f" Base folder name(s) for post content ({log_msg_known_txt}): {', '.join(base_folder_names_for_post_content)}")
# If still no folder name (e.g., no UI match, Known.txt empty or no match from Known.txt), fallback to generic title extraction.
if not base_folder_names_for_post_content:
generic_folder_name_from_title = extract_folder_name_from_title(post_title, effective_unwanted_keywords_for_folders)
# If UI filter is empty AND title was generic/unwanted AND Known.txt exists, try Known.txt against filenames
if not current_character_filters and generic_folder_name_from_title.lower() == 'uncategorized' and self.known_names:
self.logger(" Title was generic/unwanted (UI filter empty). Trying Known.txt against filenames...")
found_match_from_filename = False
# all_files_from_post_api_for_char_check is populated earlier and contains {'_original_name_for_log': ...}
for file_info_item in all_files_from_post_api_for_char_check: # Ensure this list is available
current_api_original_filename_for_check = file_info_item.get('_original_name_for_log')
if not current_api_original_filename_for_check: continue
derived_folders_from_filename = match_folders_from_title(
current_api_original_filename_for_check,
self.known_names,
effective_unwanted_keywords_for_folders # Use the same unwanted keywords for consistency
)
if derived_folders_from_filename:
base_folder_names_for_post_content.extend(derived_folders_from_filename)
self.logger(f" Base folder name(s) for post content (Known.txt matched filename '{current_api_original_filename_for_check}'): {', '.join(base_folder_names_for_post_content)}")
found_match_from_filename = True
break # Stop after first filename match that yields folder(s)
if not found_match_from_filename:
self.logger(" Known.txt did not match any filenames after generic title check.")
# If Known.txt vs Filenames didn't work (and still under generic title + empty UI filter), try Filename Token Extraction
if not base_folder_names_for_post_content and not current_character_filters and generic_folder_name_from_title.lower() == 'uncategorized' and all_files_from_post_api_for_char_check:
self.logger(" Known.txt vs filenames failed or N/A. Trying filename token extraction (min 4 alpha chars)...")
found_match_from_filename_token = False
for file_info_item in all_files_from_post_api_for_char_check:
current_api_original_filename_for_check = file_info_item.get('_original_name_for_log')
if not current_api_original_filename_for_check: continue
folder_from_filename_token = extract_folder_name_from_filename_tokens(
current_api_original_filename_for_check,
effective_unwanted_keywords_for_folders,
FILENAME_TOKEN_STRIPPABLE_PARTS # Pass the new set
)
if folder_from_filename_token: # extract_folder_name_from_filename_tokens returns a single string or None
base_folder_names_for_post_content.append(folder_from_filename_token)
self.logger(f" Base folder name(s) for post content (Filename token '{folder_from_filename_token}' from '{current_api_original_filename_for_check}'): {', '.join(base_folder_names_for_post_content)}")
found_match_from_filename_token = True
break # First suitable token wins
if not found_match_from_filename_token:
self.logger(" Filename token extraction did not yield a folder name.")
# If, after the above filename check (if it ran), we still don't have a folder,
# OR if the title wasn't generic/unwanted, OR if UI filter was NOT empty,
# then consider using the generic_folder_name_from_title (if it's valid).
if not base_folder_names_for_post_content:
if generic_folder_name_from_title and generic_folder_name_from_title.lower() != 'uncategorized':
base_folder_names_for_post_content.append(generic_folder_name_from_title)
self.logger(f" Base folder name(s) for post content (Generic title parsing - no specific filter match from UI/Known.txt(title/filename)/FilenameToken): {', '.join(base_folder_names_for_post_content)}")
else: # generic_folder_name_from_title was 'uncategorized' and filename check (if ran) didn't yield anything
self.logger(f" Base folder name(s) for post content (Generic title parsing resulted in 'uncategorized', no match from Known.txt(vs filename) or FilenameToken): N/A")
# Final cleanup: ensure there's at least one valid folder name.
base_folder_names_for_post_content = [name for name in base_folder_names_for_post_content if name and name.strip()]
if not base_folder_names_for_post_content:
ultimate_fallback_candidate = clean_folder_name(post_title if post_title else "untitled_creator_content")
if not current_character_filters and ultimate_fallback_candidate.lower() in effective_unwanted_keywords_for_folders:
base_folder_names_for_post_content = ["general_content"] # A very generic, safe fallback
else: else:
base_folder_names_for_post_content = [ultimate_fallback_candidate if ultimate_fallback_candidate else "general_content"] base_folder_names_for_post_content.append(extract_folder_name_from_title(post_title, self.unwanted_keywords))
self.logger(f" Base folder name(s) for post content (Ultimate fallback): {', '.join(base_folder_names_for_post_content)}") if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]:
base_folder_names_for_post_content = [clean_folder_name(post_title if post_title else "untitled_creator_content")]
self.logger(f" Base folder name(s) for post content (Generic title parsing - no char filters): {', '.join(base_folder_names_for_post_content)}")
if not self.extract_links_only and self.use_subfolders and self.skip_words_list: if not self.extract_links_only and self.use_subfolders and self.skip_words_list:
if self._check_pause(f"Folder keyword skip check for post {post_id}"): return 0, num_potential_files_in_post, [] if self._check_pause(f"Folder keyword skip check for post {post_id}"): return 0, num_potential_files_in_post, []
for folder_name_to_check in base_folder_names_for_post_content: # type: ignore for folder_name_to_check in base_folder_names_for_post_content: # type: ignore
if not folder_name_to_check: continue if not folder_name_to_check: continue
if any(skip_word.lower() in folder_name_to_check.lower() for skip_word in self.skip_words_list): if any(skip_word.lower() in folder_name_to_check.lower() for skip_word in self.skip_words_list):
matched_skip = next((sw for sw in self.skip_words_list if sw.lower() in folder_name_to_check.lower()), "unknown_skip_word") # type: ignore matched_skip = next((sw for sw in self.skip_words_list if sw.lower() in folder_name_to_check.lower()), "unknown_skip_word") # type: ignore
# Determine if the special per-file character folder logic should be activated for this post self.logger(f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check}' contains '{matched_skip}'.")
is_ui_filter_empty_for_per_file_logic = not current_character_filters return 0, num_potential_files_in_post, [], [], []
# Check if the determined base_folder_names_for_post_content are generic
# This implies that neither UI filters nor Known.txt (against title) yielded a specific folder.
is_base_folder_generic_for_per_file_logic = False
if not base_folder_names_for_post_content:
is_base_folder_generic_for_per_file_logic = True
elif len(base_folder_names_for_post_content) == 1 and \
base_folder_names_for_post_content[0].lower() == 'uncategorized': # A common generic fallback
is_base_folder_generic_for_per_file_logic = True
activate_per_file_character_folder_logic = (
is_ui_filter_empty_for_per_file_logic and
is_base_folder_generic_for_per_file_logic and
self.use_subfolders and # User wants subfolders
not self.extract_links_only # Not in links-only mode
)
if activate_per_file_character_folder_logic:
self.logger(f" Per-file character folder logic activated for post {post_id} (UI filter empty, generic post title/folder).")
if (self.show_external_links or self.extract_links_only) and post_content_html: # type: ignore if (self.show_external_links or self.extract_links_only) and post_content_html: # type: ignore
if self._check_pause(f"External link extraction for post {post_id}"): return 0, num_potential_files_in_post, [], [] if self._check_pause(f"External link extraction for post {post_id}"): return 0, num_potential_files_in_post, [], []
try: try:
mega_key_pattern = re.compile(r'\b([a-zA-Z0-9_-]{22,43})\b') # Adjusted for typical Mega key lengths mega_key_pattern = re.compile(r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b') # type: ignore
unique_links_data = {} unique_links_data = {}
for match in link_pattern.finditer(post_content_html): for match in link_pattern.finditer(post_content_html):
link_url = match.group(1).strip() link_url = match.group(1).strip()
@@ -1572,16 +1390,11 @@ class PostProcessorWorker:
if self._check_pause(f"File processing loop for post {post_id}, file {file_idx}"): break if self._check_pause(f"File processing loop for post {post_id}, file {file_idx}"): break
if self.check_cancel(): break if self.check_cancel(): break
current_api_original_filename = file_info_to_dl.get('_original_name_for_log') current_api_original_filename = file_info_to_dl.get('_original_name_for_log')
file_is_candidate_by_char_filter_scope = False file_is_candidate_by_char_filter_scope = False
char_filter_info_that_matched_file = None char_filter_info_that_matched_file = None
if not current_character_filters: if not current_character_filters:
file_is_candidate_by_char_filter_scope = True file_is_candidate_by_char_filter_scope = True
else: else:
# This block determines if the file is a candidate based on the *overall post/comment/file filter scope*
# It's important for deciding if the file should be downloaded *at all* if UI filters are present.
# The new per-file logic for folder naming is separate but related.
if self.char_filter_scope == CHAR_SCOPE_FILES: if self.char_filter_scope == CHAR_SCOPE_FILES:
for filter_item_obj in current_character_filters: for filter_item_obj in current_character_filters:
terms_to_check_for_file = list(filter_item_obj["aliases"]) terms_to_check_for_file = list(filter_item_obj["aliases"])
@@ -1594,9 +1407,7 @@ class PostProcessorWorker:
char_filter_info_that_matched_file = filter_item_obj char_filter_info_that_matched_file = filter_item_obj
self.logger(f" File '{current_api_original_filename}' matches char filter term '{term_to_match}' (from '{filter_item_obj['name']}'). Scope: Files.") self.logger(f" File '{current_api_original_filename}' matches char filter term '{term_to_match}' (from '{filter_item_obj['name']}'). Scope: Files.")
break break
if file_is_candidate_by_char_filter_scope: break if file_is_candidate_by_char_filter_scope: break
# ... (rest of the existing char_filter_scope logic for CHAR_SCOPE_TITLE, CHAR_SCOPE_BOTH, CHAR_SCOPE_COMMENTS) ...
elif self.char_filter_scope == CHAR_SCOPE_TITLE: elif self.char_filter_scope == CHAR_SCOPE_TITLE:
if post_is_candidate_by_title_char_match: if post_is_candidate_by_title_char_match:
file_is_candidate_by_char_filter_scope = True file_is_candidate_by_char_filter_scope = True
@@ -1621,66 +1432,31 @@ class PostProcessorWorker:
break break
if file_is_candidate_by_char_filter_scope: break if file_is_candidate_by_char_filter_scope: break
elif self.char_filter_scope == CHAR_SCOPE_COMMENTS: elif self.char_filter_scope == CHAR_SCOPE_COMMENTS:
if post_is_candidate_by_file_char_match_in_comment_scope: if post_is_candidate_by_file_char_match_in_comment_scope: # Post was candidate due to a file match
file_is_candidate_by_char_filter_scope = True file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = char_filter_that_matched_file_in_comment_scope char_filter_info_that_matched_file = char_filter_that_matched_file_in_comment_scope # Use the filter that matched a file in the post
self.logger(f" File '{current_api_original_filename}' is candidate because a file in this post matched char filter (Overall Scope: Comments).") self.logger(f" File '{current_api_original_filename}' is candidate because a file in this post matched char filter (Overall Scope: Comments).")
elif post_is_candidate_by_comment_char_match: elif post_is_candidate_by_comment_char_match: # Post was candidate due to comment match (no file match for post)
file_is_candidate_by_char_filter_scope = True file_is_candidate_by_char_filter_scope = True
char_filter_info_that_matched_file = char_filter_that_matched_comment char_filter_info_that_matched_file = char_filter_that_matched_comment # Use the filter that matched comments
self.logger(f" File '{current_api_original_filename}' is candidate because post comments matched char filter (Overall Scope: Comments).") self.logger(f" File '{current_api_original_filename}' is candidate because post comments matched char filter (Overall Scope: Comments).")
if not file_is_candidate_by_char_filter_scope: if not file_is_candidate_by_char_filter_scope:
self.logger(f" -> Skip File (Char Filter Scope '{self.char_filter_scope}'): '{current_api_original_filename}' no match.") self.logger(f" -> Skip File (Char Filter Scope '{self.char_filter_scope}'): '{current_api_original_filename}' no match.")
total_skipped_this_post += 1 total_skipped_this_post += 1
continue continue
current_path_for_file = self.override_output_dir if self.override_output_dir else self.download_root # Use override if provided
# Determine the target subfolder for *this specific file* if self.use_subfolders:
target_path_subfolder_component_for_this_file = None char_title_subfolder_name = None
log_reason_for_file_subfolder = "Default post-level folder"
if activate_per_file_character_folder_logic:
base_name_for_file_logic, _ = os.path.splitext(current_api_original_filename)
stripped_base_name_for_file_logic = base_name_for_file_logic
for keyword_to_strip in FILENAME_TOKEN_STRIPPABLE_PARTS:
if stripped_base_name_for_file_logic.lower().endswith(keyword_to_strip.lower()):
stripped_base_name_for_file_logic = stripped_base_name_for_file_logic[:-len(keyword_to_strip)]
stripped_base_name_for_file_logic = stripped_base_name_for_file_logic.strip()
PotentialCharacterName_for_file = clean_folder_name(stripped_base_name_for_file_logic)
if PotentialCharacterName_for_file and self.known_names:
for known_entry in self.known_names:
primary_known_name = known_entry['name']
aliases_to_check = set(known_entry.get("aliases", []))
if not known_entry.get("is_group", False): # For non-groups, primary name is also an alias
aliases_to_check.add(primary_known_name)
if any(PotentialCharacterName_for_file.lower() == alias.lower() for alias in aliases_to_check):
character_subfolder_candidate = clean_folder_name(primary_known_name)
if character_subfolder_candidate and \
character_subfolder_candidate.lower() not in effective_unwanted_keywords_for_folders:
target_path_subfolder_component_for_this_file = character_subfolder_candidate
log_reason_for_file_subfolder = f"File '{current_api_original_filename}' matched Known Name '{primary_known_name}'"
self.logger(f" {log_reason_for_file_subfolder}. Using subfolder: '{target_path_subfolder_component_for_this_file}'")
break
# If per-file logic didn't find a specific character folder, or wasn't active,
# fall back to the general post-level folder determination.
if target_path_subfolder_component_for_this_file is None and self.use_subfolders:
if self.target_post_id_from_initial_url and self.custom_folder_name: if self.target_post_id_from_initial_url and self.custom_folder_name:
target_path_subfolder_component_for_this_file = self.custom_folder_name char_title_subfolder_name = self.custom_folder_name
log_reason_for_file_subfolder = "Custom folder name for single post" elif char_filter_info_that_matched_file:
elif char_filter_info_that_matched_file: # This is from the UI filter check earlier char_title_subfolder_name = clean_folder_name(char_filter_info_that_matched_file["name"])
target_path_subfolder_component_for_this_file = clean_folder_name(char_filter_info_that_matched_file["name"]) elif char_filter_that_matched_title:
log_reason_for_file_subfolder = f"UI Filter matched ({char_filter_info_that_matched_file['name']})" char_title_subfolder_name = clean_folder_name(char_filter_that_matched_title["name"])
elif base_folder_names_for_post_content: # From Known.txt on title or generic title extraction elif base_folder_names_for_post_content:
target_path_subfolder_component_for_this_file = base_folder_names_for_post_content[0] char_title_subfolder_name = base_folder_names_for_post_content[0]
log_reason_for_file_subfolder = f"Post-level folder derived from title/Known.txt ('{base_folder_names_for_post_content[0]}')" if char_title_subfolder_name:
# If still None, it means no subfolder is applicable based on these rules. current_path_for_file = os.path.join(current_path_for_file, char_title_subfolder_name)
# Construct the full path
current_path_for_file = self.override_output_dir if self.override_output_dir else self.download_root # Use override if provided
if target_path_subfolder_component_for_this_file: # If a subfolder name was determined
current_path_for_file = os.path.join(current_path_for_file, target_path_subfolder_component_for_this_file)
if self.use_post_subfolders: if self.use_post_subfolders:
cleaned_title_for_subfolder = clean_folder_name(post_title) cleaned_title_for_subfolder = clean_folder_name(post_title)
post_specific_subfolder_name = cleaned_title_for_subfolder # Use only the cleaned title post_specific_subfolder_name = cleaned_title_for_subfolder # Use only the cleaned title
@@ -1688,8 +1464,11 @@ class PostProcessorWorker:
target_folder_path_for_this_file = current_path_for_file target_folder_path_for_this_file = current_path_for_file
manga_date_counter_to_pass = None manga_date_counter_to_pass = None
manga_global_counter_to_pass = None manga_global_counter_to_pass = None
if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED: manga_date_counter_to_pass = self.manga_date_file_counter_ref if self.manga_mode_active:
if self.manga_mode_active and self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING: manga_global_counter_to_pass = self.manga_global_file_counter_ref if self.manga_filename_style == STYLE_DATE_BASED:
manga_date_counter_to_pass = self.manga_date_file_counter_ref
elif self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING:
manga_global_counter_to_pass = self.manga_global_file_counter_ref if self.manga_global_file_counter_ref is not None else self.manga_date_file_counter_ref
futures_list.append(file_pool.submit( futures_list.append(file_pool.submit(
self._download_single_file, self._download_single_file,
file_info_to_dl, file_info_to_dl,

47
main.py
View File

@@ -842,11 +842,10 @@ class KnownNamesFilterDialog(QDialog):
class FavoriteArtistsDialog(QDialog): class FavoriteArtistsDialog(QDialog):
"""Dialog to display and select favorite artists.""" """Dialog to display and select favorite artists."""
def __init__(self, parent_app, cookies_config, target_service_domain): def __init__(self, parent_app, cookies_config):
super().__init__(parent_app) super().__init__(parent_app)
self.parent_app = parent_app self.parent_app = parent_app
self.cookies_config = cookies_config self.cookies_config = cookies_config
self.target_service_domain = target_service_domain # Store the target domain
self.all_fetched_artists = [] self.all_fetched_artists = []
self.selected_artist_urls = [] self.selected_artist_urls = []
@@ -924,8 +923,9 @@ class FavoriteArtistsDialog(QDialog):
self.artist_list_widget.setVisible(show) self.artist_list_widget.setVisible(show)
def _fetch_favorite_artists(self): def _fetch_favorite_artists(self):
fav_url = f"https://{self.target_service_domain}/api/v1/account/favorites?type=artist" fav_url = "https://kemono.su/api/v1/account/favorites?type=artist"
self._logger(f"Attempting to fetch favorite artists from: {fav_url} (Targeting: {self.target_service_domain})") self._logger(f"Attempting to fetch favorite artists from: {fav_url}")
cookies_dict = prepare_cookies_for_request( cookies_dict = prepare_cookies_for_request(
self.cookies_config['use_cookie'], self.cookies_config['use_cookie'],
self.cookies_config['cookie_text'], self.cookies_config['cookie_text'],
@@ -964,7 +964,7 @@ class FavoriteArtistsDialog(QDialog):
artist_service = artist_entry.get("service") artist_service = artist_entry.get("service")
if artist_id and artist_name and artist_service: if artist_id and artist_name and artist_service:
full_url = f"https://{self.target_service_domain}/{artist_service}/user/{artist_id}" full_url = f"https://kemono.su/{artist_service}/user/{artist_id}"
self.all_fetched_artists.append({'name': artist_name, 'url': full_url, 'service': artist_service}) self.all_fetched_artists.append({'name': artist_name, 'url': full_url, 'service': artist_service})
else: else:
self._logger(f"Warning: Skipping favorite artist entry due to missing data: {artist_entry}") self._logger(f"Warning: Skipping favorite artist entry due to missing data: {artist_entry}")
@@ -1045,19 +1045,18 @@ class FavoritePostsFetcherThread(QThread):
progress_bar_update = pyqtSignal(int, int) # value, maximum progress_bar_update = pyqtSignal(int, int) # value, maximum
finished = pyqtSignal(list, str) # list of posts, error message (or None) finished = pyqtSignal(list, str) # list of posts, error message (or None)
def __init__(self, cookies_config, parent_logger_func, target_service_domain): def __init__(self, cookies_config, parent_logger_func): # Removed parent_get_domain_func
super().__init__() super().__init__()
self.cookies_config = cookies_config self.cookies_config = cookies_config
self.parent_logger_func = parent_logger_func self.parent_logger_func = parent_logger_func
self.target_service_domain = target_service_domain
self.cancellation_event = threading.Event() self.cancellation_event = threading.Event()
def _logger(self, message): def _logger(self, message):
self.parent_logger_func(f"[FavPostsFetcherThread] {message}") self.parent_logger_func(f"[FavPostsFetcherThread] {message}")
def run(self): def run(self):
fav_url = f"https://{self.target_service_domain}/api/v1/account/favorites?type=post" fav_url = "https://kemono.su/api/v1/account/favorites?type=post"
self._logger(f"Attempting to fetch favorite posts from: {fav_url} (Targeting: {self.target_service_domain})") self._logger(f"Attempting to fetch favorite posts from: {fav_url}")
self.status_update.emit("Fetching list of favorite posts...") self.status_update.emit("Fetching list of favorite posts...")
self.progress_bar_update.emit(0, 0) # Indeterminate state for initial fetch self.progress_bar_update.emit(0, 0) # Indeterminate state for initial fetch
@@ -1147,11 +1146,10 @@ class PostListItemWidget(QWidget):
class FavoritePostsDialog(QDialog): class FavoritePostsDialog(QDialog):
"""Dialog to display and select favorite posts.""" """Dialog to display and select favorite posts."""
def __init__(self, parent_app, cookies_config, known_names_list_ref, target_service_domain): def __init__(self, parent_app, cookies_config, known_names_list_ref):
super().__init__(parent_app) super().__init__(parent_app)
self.parent_app = parent_app self.parent_app = parent_app
self.cookies_config = cookies_config self.cookies_config = cookies_config
self.target_service_domain = target_service_domain # Store the target domain
self.all_fetched_posts = [] self.all_fetched_posts = []
self.selected_posts_data = [] self.selected_posts_data = []
self.known_names_list_ref = known_names_list_ref # Store reference to global KNOWN_NAMES self.known_names_list_ref = known_names_list_ref # Store reference to global KNOWN_NAMES
@@ -1269,9 +1267,8 @@ class FavoritePostsDialog(QDialog):
self.fetcher_thread = FavoritePostsFetcherThread( self.fetcher_thread = FavoritePostsFetcherThread(
self.cookies_config, self.cookies_config,
self.parent_app.log_signal.emit, # Pass parent's logger, self.parent_app.log_signal.emit, # Pass parent's logger
self.target_service_domain # Pass the target domain ) # Removed _get_domain_for_service
)
self.fetcher_thread.status_update.connect(self.status_label.setText) self.fetcher_thread.status_update.connect(self.status_label.setText)
self.fetcher_thread.finished.connect(self._on_fetch_completed) self.fetcher_thread.finished.connect(self._on_fetch_completed)
self.fetcher_thread.progress_bar_update.connect(self._set_progress_bar_value) # Connect the missing signal self.fetcher_thread.progress_bar_update.connect(self._set_progress_bar_value) # Connect the missing signal
@@ -1465,7 +1462,7 @@ class FavoritePostsDialog(QDialog):
item = self.post_list_widget.item(i) item = self.post_list_widget.item(i)
if item and item.checkState() == Qt.Checked: if item and item.checkState() == Qt.Checked:
post_data_for_download = item.data(Qt.UserRole) post_data_for_download = item.data(Qt.UserRole)
self.selected_posts_data.append(post_data_for_download) self.selected_posts_data.append(post_data_for_download)
if not self.selected_posts_data: if not self.selected_posts_data:
QMessageBox.information(self, "No Selection", "Please select at least one post to download.") QMessageBox.information(self, "No Selection", "Please select at least one post to download.")
@@ -6330,13 +6327,7 @@ class DownloaderApp(QWidget):
'app_base_dir': self.app_base_dir 'app_base_dir': self.app_base_dir
} }
cookie_text_lower = self.cookie_text_input.text().lower() if hasattr(self, 'cookie_text_input') else "" dialog = FavoriteArtistsDialog(self, cookies_config)
selected_path_lower = self.selected_cookie_filepath.lower() if self.selected_cookie_filepath else ""
target_service_domain = "kemono.su" # Default
if "coomer.su" in cookie_text_lower or "coomer.party" in cookie_text_lower or \
("coomer" in selected_path_lower and ".txt" in selected_path_lower): # Check if "coomer" is in the filename part
target_service_domain = "coomer.su"
dialog = FavoriteArtistsDialog(self, cookies_config, target_service_domain)
if dialog.exec_() == QDialog.Accepted: if dialog.exec_() == QDialog.Accepted:
selected_artists = dialog.get_selected_artists() # Changed method name selected_artists = dialog.get_selected_artists() # Changed method name
if selected_artists: if selected_artists:
@@ -6386,20 +6377,14 @@ class DownloaderApp(QWidget):
cookie_help_dialog.exec_() cookie_help_dialog.exec_()
return # Don't proceed to show FavoritePostsDialog if cookies are needed but not found return # Don't proceed to show FavoritePostsDialog if cookies are needed but not found
cookie_text_lower = self.cookie_text_input.text().lower() if hasattr(self, 'cookie_text_input') else "" dialog = FavoritePostsDialog(self, cookies_config, KNOWN_NAMES) # Pass KNOWN_NAMES
selected_path_lower = self.selected_cookie_filepath.lower() if self.selected_cookie_filepath else ""
target_service_domain_for_posts = "kemono.su" # Default
if "coomer.su" in cookie_text_lower or "coomer.party" in cookie_text_lower or \
("coomer" in selected_path_lower and ".txt" in selected_path_lower):
target_service_domain_for_posts = "coomer.su"
dialog = FavoritePostsDialog(self, cookies_config, KNOWN_NAMES, target_service_domain_for_posts) # Pass KNOWN_NAMES and target_service_domain
if dialog.exec_() == QDialog.Accepted: if dialog.exec_() == QDialog.Accepted:
selected_posts = dialog.get_selected_posts() selected_posts = dialog.get_selected_posts()
if selected_posts: if selected_posts:
self.log_signal.emit(f" Queuing {len(selected_posts)} favorite post(s) for download.") self.log_signal.emit(f" Queuing {len(selected_posts)} favorite post(s) for download.")
for post_data in selected_posts: for post_data in selected_posts:
direct_post_url = f"https://{target_service_domain_for_posts}/{post_data['service']}/user/{post_data['creator_id']}/post/{post_data['post_id']}" domain = "kemono.su" # Or determine from service/parent app settings
direct_post_url = f"https://{domain}/{post_data['service']}/user/{post_data['creator_id']}/post/{post_data['post_id']}"
queue_item = { queue_item = {
'url': direct_post_url, 'url': direct_post_url,