mirror of
https://github.com/Yuvi9587/Kemono-Downloader.git
synced 2025-12-29 16:14:44 +00:00
Commit
This commit is contained in:
@@ -67,33 +67,6 @@ FOLDER_NAME_STOP_WORDS = {
|
|||||||
"right", "s", "she", "so", "technically", "tell", "the", "their", "they", "this",
|
"right", "s", "she", "so", "technically", "tell", "the", "their", "they", "this",
|
||||||
"to", "ve", "was", "we", "well", "were", "with", "www", "year", "you", "your",
|
"to", "ve", "was", "we", "well", "were", "with", "www", "year", "you", "your",
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFAULT_UNWANTED_FOLDER_KEYWORDS_FOR_GENERIC_NAMING = { # Keywords to avoid for folder names if UI filter is empty
|
|
||||||
"fan-art", "fanart", "request", "requests", "poll", "holiday", "commission", "commissions",
|
|
||||||
"jan", "feb", "mar", "apr", "may", "jun",
|
|
||||||
"jul", "aug", "sep", "oct", "nov", "dec",
|
|
||||||
"january", "february", "march", "april", "may", "june", # Full month names (some were already here)
|
|
||||||
"july", "august", "september", "october", "november", "december", # Full month names
|
|
||||||
"mon", "tue", "wed", "thu", "fri", "sat", "sun",
|
|
||||||
"couple", "cuff", "cuffs", "flash", "first", "second", "third", "fourth", "fifth", "etc", "futa", # Added "cuffs"
|
|
||||||
"late", "early", "form", "post", "dawn", "dark", # Added "late", "early", "form", "dawn", "dark". "post", "first" were already effectively covered or present.
|
|
||||||
"red", "blue", "green", "black", "white", "yellow", "pink", "purple", "orange", "brown", "gray", "grey", "silver", "gold",
|
|
||||||
"open", "close", "batch", "winner", "loser", # Added new words
|
|
||||||
"web", "cum", "sfw", # Added per new request ("nsfw" is in BASE_UNWANTED_KEYWORDS_FOR_FOLDERS)
|
|
||||||
"big", "small", "another", "other", "some", "more", "new", "old",
|
|
||||||
"one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", # Numbers as words
|
|
||||||
"eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", "twenty", # Added 11-20 as words
|
|
||||||
"1", "2", "3", "4", "5", "6", "7", "8", "9", "0", # Numbers as digits
|
|
||||||
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", # Added 10-20 as digits
|
|
||||||
"let", "lot", "better", "post", "image", "video", "art", "drawing", "sketch"
|
|
||||||
}
|
|
||||||
|
|
||||||
BASE_UNWANTED_KEYWORDS_FOR_FOLDERS = {'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'}
|
|
||||||
|
|
||||||
FILENAME_TOKEN_STRIPPABLE_PARTS = {"web", "cum", "nsfw", "sfw"} # Parts to strip from filename tokens for folder naming
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_cookie_string(cookie_string):
|
def parse_cookie_string(cookie_string):
|
||||||
"""Parses a 'name=value; name2=value2' cookie string into a dict."""
|
"""Parses a 'name=value; name2=value2' cookie string into a dict."""
|
||||||
cookies = {}
|
cookies = {}
|
||||||
@@ -188,30 +161,13 @@ def strip_html_tags(html_text):
|
|||||||
def extract_folder_name_from_title(title, unwanted_keywords):
|
def extract_folder_name_from_title(title, unwanted_keywords):
|
||||||
if not title: return 'Uncategorized'
|
if not title: return 'Uncategorized'
|
||||||
title_lower = title.lower()
|
title_lower = title.lower()
|
||||||
tokens = re.findall(r'\b[\w\-]+\b', title_lower) # Finds words
|
tokens = re.findall(r'\b[\w\-]+\b', title_lower)
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
clean_token = clean_folder_name(token) # clean_folder_name also removes FOLDER_NAME_STOP_WORDS
|
clean_token = clean_folder_name(token)
|
||||||
if clean_token and clean_token.lower() not in unwanted_keywords:
|
if clean_token and clean_token.lower() not in unwanted_keywords:
|
||||||
return clean_token
|
return clean_token
|
||||||
# If no single valid token, try the full title
|
cleaned_full_title = clean_folder_name(title)
|
||||||
cleaned_full_title = clean_folder_name(title) # This already applies FOLDER_NAME_STOP_WORDS
|
return cleaned_full_title if cleaned_full_title else 'Uncategorized'
|
||||||
|
|
||||||
if cleaned_full_title:
|
|
||||||
# Now, check if this cleaned_full_title is composed *entirely* of unwanted_keywords
|
|
||||||
# Re-tokenize the cleaned_full_title for this check
|
|
||||||
cleaned_title_tokens = re.findall(r'\b[\w\-]+\b', cleaned_full_title.lower())
|
|
||||||
if not cleaned_title_tokens: # If cleaned_full_title became empty after tokenizing (e.g., was just "...")
|
|
||||||
return 'Uncategorized'
|
|
||||||
|
|
||||||
all_tokens_unwanted = True
|
|
||||||
for c_token in cleaned_title_tokens:
|
|
||||||
if c_token not in unwanted_keywords: # c_token is already lowercased
|
|
||||||
all_tokens_unwanted = False
|
|
||||||
break
|
|
||||||
if not all_tokens_unwanted:
|
|
||||||
return cleaned_full_title # It contains at least one desired token
|
|
||||||
|
|
||||||
return 'Uncategorized' # Fallback if everything is unwanted or title cleans to empty
|
|
||||||
def match_folders_from_title(title, names_to_match, unwanted_keywords):
|
def match_folders_from_title(title, names_to_match, unwanted_keywords):
|
||||||
"""
|
"""
|
||||||
Matches folder names from a title based on a list of known name objects.
|
Matches folder names from a title based on a list of known name objects.
|
||||||
@@ -237,41 +193,6 @@ def match_folders_from_title(title, names_to_match, unwanted_keywords):
|
|||||||
matched_cleaned_names.add(cleaned_primary_name)
|
matched_cleaned_names.add(cleaned_primary_name)
|
||||||
break # Found a match for this primary name via one of its aliases
|
break # Found a match for this primary name via one of its aliases
|
||||||
return sorted(list(matched_cleaned_names))
|
return sorted(list(matched_cleaned_names))
|
||||||
|
|
||||||
def extract_folder_name_from_filename_tokens(filename, unwanted_keywords, strippable_suffixes_prefixes):
|
|
||||||
"""
|
|
||||||
Extracts a folder name from a filename by finding the first token
|
|
||||||
with 4 or more alphabetic characters that is not in unwanted_keywords,
|
|
||||||
after attempting to strip common suffixes/prefixes.
|
|
||||||
"""
|
|
||||||
if not filename:
|
|
||||||
return None
|
|
||||||
# Extract base name without extension
|
|
||||||
base_name, _ = os.path.splitext(filename)
|
|
||||||
if not base_name:
|
|
||||||
return None
|
|
||||||
|
|
||||||
tokens = re.findall(r'\b[\w\-]+\b', base_name) # Finds words
|
|
||||||
|
|
||||||
for token_candidate in tokens:
|
|
||||||
modified_token = token_candidate
|
|
||||||
for part in strippable_suffixes_prefixes:
|
|
||||||
# Case-insensitive suffix stripping
|
|
||||||
if modified_token.lower().endswith(part.lower()):
|
|
||||||
modified_token = modified_token[:-len(part)]
|
|
||||||
# Case-insensitive prefix stripping (can be added if needed for other words)
|
|
||||||
# elif modified_token.lower().startswith(part.lower()):
|
|
||||||
# modified_token = modified_token[len(part):]
|
|
||||||
|
|
||||||
# Clean the (potentially) modified token
|
|
||||||
cleaned_token = clean_folder_name(modified_token)
|
|
||||||
|
|
||||||
# Validate the final cleaned token
|
|
||||||
alpha_chars_count = sum(1 for char in cleaned_token if char.isalpha())
|
|
||||||
if alpha_chars_count >= 4:
|
|
||||||
if cleaned_token and cleaned_token.lower() not in unwanted_keywords:
|
|
||||||
return cleaned_token # Return the first valid one
|
|
||||||
return None
|
|
||||||
def is_image(filename):
|
def is_image(filename):
|
||||||
if not filename: return False
|
if not filename: return False
|
||||||
_, ext = os.path.splitext(filename)
|
_, ext = os.path.splitext(filename)
|
||||||
@@ -1121,15 +1042,6 @@ class PostProcessorWorker:
|
|||||||
permanent_failures_this_post = [] # New list for permanent failures
|
permanent_failures_this_post = [] # New list for permanent failures
|
||||||
total_downloaded_this_post = 0
|
total_downloaded_this_post = 0
|
||||||
total_skipped_this_post = 0
|
total_skipped_this_post = 0
|
||||||
|
|
||||||
# Determine effective unwanted keywords for folder naming
|
|
||||||
effective_unwanted_keywords_for_folders = set(BASE_UNWANTED_KEYWORDS_FOR_FOLDERS) # Start with base
|
|
||||||
if not current_character_filters: # UI filter is empty
|
|
||||||
self.logger(" UI 'Filter by Character(s)' is empty. Applying extended unwanted keywords for folder naming.")
|
|
||||||
effective_unwanted_keywords_for_folders.update(DEFAULT_UNWANTED_FOLDER_KEYWORDS_FOR_GENERIC_NAMING)
|
|
||||||
else:
|
|
||||||
self.logger(f" UI 'Filter by Character(s)' is NOT empty. Using base unwanted keywords for folder naming: {effective_unwanted_keywords_for_folders}")
|
|
||||||
|
|
||||||
parsed_api_url = urlparse(self.api_url_input)
|
parsed_api_url = urlparse(self.api_url_input)
|
||||||
referer_url = f"https://{parsed_api_url.netloc}/"
|
referer_url = f"https://{parsed_api_url.netloc}/"
|
||||||
headers = {'User-Agent': 'Mozilla/5.0', 'Referer': referer_url, 'Accept': '*/*'}
|
headers = {'User-Agent': 'Mozilla/5.0', 'Referer': referer_url, 'Accept': '*/*'}
|
||||||
@@ -1279,135 +1191,41 @@ class PostProcessorWorker:
|
|||||||
if not self.extract_links_only and self.use_subfolders:
|
if not self.extract_links_only and self.use_subfolders:
|
||||||
if self._check_pause(f"Subfolder determination for post {post_id}"): return 0, num_potential_files_in_post, []
|
if self._check_pause(f"Subfolder determination for post {post_id}"): return 0, num_potential_files_in_post, []
|
||||||
primary_char_filter_for_folder = None # type: ignore
|
primary_char_filter_for_folder = None # type: ignore
|
||||||
folder_name_from_ui_post_match = None # Will store the cleaned folder name if a UI filter matches the post
|
|
||||||
log_reason_for_folder = ""
|
log_reason_for_folder = ""
|
||||||
|
if self.char_filter_scope == CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment:
|
||||||
# Only consider UI filters if they are actually provided by the user
|
if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope:
|
||||||
if current_character_filters:
|
primary_char_filter_for_folder = char_filter_that_matched_file_in_comment_scope
|
||||||
# Check if a UI-provided character filter matched the post at a post-level (Title or Comment scope)
|
log_reason_for_folder = "Matched char filter in filename (Comments scope)"
|
||||||
if self.char_filter_scope == CHAR_SCOPE_COMMENTS:
|
elif post_is_candidate_by_comment_char_match and char_filter_that_matched_comment: # Fallback to comment match
|
||||||
if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope:
|
primary_char_filter_for_folder = char_filter_that_matched_comment
|
||||||
folder_name_from_ui_post_match = clean_folder_name(char_filter_that_matched_file_in_comment_scope["name"])
|
log_reason_for_folder = "Matched char filter in comments (Comments scope, no file match)"
|
||||||
log_reason_for_folder = "UI Filter: Matched char filter in filename (Comments scope)"
|
elif (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and char_filter_that_matched_title: # Existing logic for other scopes
|
||||||
elif post_is_candidate_by_comment_char_match and char_filter_that_matched_comment:
|
primary_char_filter_for_folder = char_filter_that_matched_title
|
||||||
folder_name_from_ui_post_match = clean_folder_name(char_filter_that_matched_comment["name"])
|
log_reason_for_folder = "Matched char filter in title"
|
||||||
log_reason_for_folder = "UI Filter: Matched char filter in comments (Comments scope, no file match)"
|
if primary_char_filter_for_folder:
|
||||||
elif (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and char_filter_that_matched_title:
|
base_folder_names_for_post_content = [clean_folder_name(primary_char_filter_for_folder["name"])]
|
||||||
folder_name_from_ui_post_match = clean_folder_name(char_filter_that_matched_title["name"])
|
|
||||||
log_reason_for_folder = "UI Filter: Matched char filter in title"
|
|
||||||
# Note: CHAR_SCOPE_FILES from UI filter doesn't set a post-level folder here; it's handled per-file.
|
|
||||||
|
|
||||||
if folder_name_from_ui_post_match:
|
|
||||||
base_folder_names_for_post_content = [folder_name_from_ui_post_match]
|
|
||||||
self.logger(f" Base folder name(s) for post content ({log_reason_for_folder}): {', '.join(base_folder_names_for_post_content)}")
|
self.logger(f" Base folder name(s) for post content ({log_reason_for_folder}): {', '.join(base_folder_names_for_post_content)}")
|
||||||
|
elif not current_character_filters: # No char filters defined, use generic logic
|
||||||
# If no post-level UI filter match, OR if UI filters are empty, try Known.txt
|
derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords)
|
||||||
if not base_folder_names_for_post_content and self.known_names:
|
|
||||||
derived_folders = match_folders_from_title(post_title, self.known_names, effective_unwanted_keywords_for_folders)
|
|
||||||
if derived_folders:
|
if derived_folders:
|
||||||
base_folder_names_for_post_content.extend(derived_folders)
|
base_folder_names_for_post_content.extend(match_folders_from_title(post_title, KNOWN_NAMES, self.unwanted_keywords))
|
||||||
log_msg_known_txt = "Matched from Known.txt"
|
|
||||||
if not current_character_filters: # UI filter input was empty
|
|
||||||
log_msg_known_txt += " (UI filter was empty)"
|
|
||||||
else: # UI filters were present but didn't result in a post-level folder name
|
|
||||||
log_msg_known_txt += " (No post-level UI filter match from UI)"
|
|
||||||
self.logger(f" Base folder name(s) for post content ({log_msg_known_txt}): {', '.join(base_folder_names_for_post_content)}")
|
|
||||||
|
|
||||||
# If still no folder name (e.g., no UI match, Known.txt empty or no match from Known.txt), fallback to generic title extraction.
|
|
||||||
if not base_folder_names_for_post_content:
|
|
||||||
generic_folder_name_from_title = extract_folder_name_from_title(post_title, effective_unwanted_keywords_for_folders)
|
|
||||||
|
|
||||||
# If UI filter is empty AND title was generic/unwanted AND Known.txt exists, try Known.txt against filenames
|
|
||||||
if not current_character_filters and generic_folder_name_from_title.lower() == 'uncategorized' and self.known_names:
|
|
||||||
self.logger(" Title was generic/unwanted (UI filter empty). Trying Known.txt against filenames...")
|
|
||||||
found_match_from_filename = False
|
|
||||||
# all_files_from_post_api_for_char_check is populated earlier and contains {'_original_name_for_log': ...}
|
|
||||||
for file_info_item in all_files_from_post_api_for_char_check: # Ensure this list is available
|
|
||||||
current_api_original_filename_for_check = file_info_item.get('_original_name_for_log')
|
|
||||||
if not current_api_original_filename_for_check: continue
|
|
||||||
|
|
||||||
derived_folders_from_filename = match_folders_from_title(
|
|
||||||
current_api_original_filename_for_check,
|
|
||||||
self.known_names,
|
|
||||||
effective_unwanted_keywords_for_folders # Use the same unwanted keywords for consistency
|
|
||||||
)
|
|
||||||
if derived_folders_from_filename:
|
|
||||||
base_folder_names_for_post_content.extend(derived_folders_from_filename)
|
|
||||||
self.logger(f" Base folder name(s) for post content (Known.txt matched filename '{current_api_original_filename_for_check}'): {', '.join(base_folder_names_for_post_content)}")
|
|
||||||
found_match_from_filename = True
|
|
||||||
break # Stop after first filename match that yields folder(s)
|
|
||||||
if not found_match_from_filename:
|
|
||||||
self.logger(" Known.txt did not match any filenames after generic title check.")
|
|
||||||
|
|
||||||
# If Known.txt vs Filenames didn't work (and still under generic title + empty UI filter), try Filename Token Extraction
|
|
||||||
if not base_folder_names_for_post_content and not current_character_filters and generic_folder_name_from_title.lower() == 'uncategorized' and all_files_from_post_api_for_char_check:
|
|
||||||
self.logger(" Known.txt vs filenames failed or N/A. Trying filename token extraction (min 4 alpha chars)...")
|
|
||||||
found_match_from_filename_token = False
|
|
||||||
for file_info_item in all_files_from_post_api_for_char_check:
|
|
||||||
current_api_original_filename_for_check = file_info_item.get('_original_name_for_log')
|
|
||||||
if not current_api_original_filename_for_check: continue
|
|
||||||
|
|
||||||
folder_from_filename_token = extract_folder_name_from_filename_tokens(
|
|
||||||
current_api_original_filename_for_check,
|
|
||||||
effective_unwanted_keywords_for_folders,
|
|
||||||
FILENAME_TOKEN_STRIPPABLE_PARTS # Pass the new set
|
|
||||||
)
|
|
||||||
if folder_from_filename_token: # extract_folder_name_from_filename_tokens returns a single string or None
|
|
||||||
base_folder_names_for_post_content.append(folder_from_filename_token)
|
|
||||||
self.logger(f" Base folder name(s) for post content (Filename token '{folder_from_filename_token}' from '{current_api_original_filename_for_check}'): {', '.join(base_folder_names_for_post_content)}")
|
|
||||||
found_match_from_filename_token = True
|
|
||||||
break # First suitable token wins
|
|
||||||
if not found_match_from_filename_token:
|
|
||||||
self.logger(" Filename token extraction did not yield a folder name.")
|
|
||||||
|
|
||||||
# If, after the above filename check (if it ran), we still don't have a folder,
|
|
||||||
# OR if the title wasn't generic/unwanted, OR if UI filter was NOT empty,
|
|
||||||
# then consider using the generic_folder_name_from_title (if it's valid).
|
|
||||||
if not base_folder_names_for_post_content:
|
|
||||||
if generic_folder_name_from_title and generic_folder_name_from_title.lower() != 'uncategorized':
|
|
||||||
base_folder_names_for_post_content.append(generic_folder_name_from_title)
|
|
||||||
self.logger(f" Base folder name(s) for post content (Generic title parsing - no specific filter match from UI/Known.txt(title/filename)/FilenameToken): {', '.join(base_folder_names_for_post_content)}")
|
|
||||||
else: # generic_folder_name_from_title was 'uncategorized' and filename check (if ran) didn't yield anything
|
|
||||||
self.logger(f" Base folder name(s) for post content (Generic title parsing resulted in 'uncategorized', no match from Known.txt(vs filename) or FilenameToken): N/A")
|
|
||||||
|
|
||||||
# Final cleanup: ensure there's at least one valid folder name.
|
|
||||||
base_folder_names_for_post_content = [name for name in base_folder_names_for_post_content if name and name.strip()]
|
|
||||||
if not base_folder_names_for_post_content:
|
|
||||||
ultimate_fallback_candidate = clean_folder_name(post_title if post_title else "untitled_creator_content")
|
|
||||||
if not current_character_filters and ultimate_fallback_candidate.lower() in effective_unwanted_keywords_for_folders:
|
|
||||||
base_folder_names_for_post_content = ["general_content"] # A very generic, safe fallback
|
|
||||||
else:
|
else:
|
||||||
base_folder_names_for_post_content = [ultimate_fallback_candidate if ultimate_fallback_candidate else "general_content"]
|
base_folder_names_for_post_content.append(extract_folder_name_from_title(post_title, self.unwanted_keywords))
|
||||||
self.logger(f" Base folder name(s) for post content (Ultimate fallback): {', '.join(base_folder_names_for_post_content)}")
|
if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]:
|
||||||
|
base_folder_names_for_post_content = [clean_folder_name(post_title if post_title else "untitled_creator_content")]
|
||||||
|
self.logger(f" Base folder name(s) for post content (Generic title parsing - no char filters): {', '.join(base_folder_names_for_post_content)}")
|
||||||
if not self.extract_links_only and self.use_subfolders and self.skip_words_list:
|
if not self.extract_links_only and self.use_subfolders and self.skip_words_list:
|
||||||
if self._check_pause(f"Folder keyword skip check for post {post_id}"): return 0, num_potential_files_in_post, []
|
if self._check_pause(f"Folder keyword skip check for post {post_id}"): return 0, num_potential_files_in_post, []
|
||||||
for folder_name_to_check in base_folder_names_for_post_content: # type: ignore
|
for folder_name_to_check in base_folder_names_for_post_content: # type: ignore
|
||||||
if not folder_name_to_check: continue
|
if not folder_name_to_check: continue
|
||||||
if any(skip_word.lower() in folder_name_to_check.lower() for skip_word in self.skip_words_list):
|
if any(skip_word.lower() in folder_name_to_check.lower() for skip_word in self.skip_words_list):
|
||||||
matched_skip = next((sw for sw in self.skip_words_list if sw.lower() in folder_name_to_check.lower()), "unknown_skip_word") # type: ignore
|
matched_skip = next((sw for sw in self.skip_words_list if sw.lower() in folder_name_to_check.lower()), "unknown_skip_word") # type: ignore
|
||||||
# Determine if the special per-file character folder logic should be activated for this post
|
self.logger(f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check}' contains '{matched_skip}'.")
|
||||||
is_ui_filter_empty_for_per_file_logic = not current_character_filters
|
return 0, num_potential_files_in_post, [], [], []
|
||||||
# Check if the determined base_folder_names_for_post_content are generic
|
|
||||||
# This implies that neither UI filters nor Known.txt (against title) yielded a specific folder.
|
|
||||||
is_base_folder_generic_for_per_file_logic = False
|
|
||||||
if not base_folder_names_for_post_content:
|
|
||||||
is_base_folder_generic_for_per_file_logic = True
|
|
||||||
elif len(base_folder_names_for_post_content) == 1 and \
|
|
||||||
base_folder_names_for_post_content[0].lower() == 'uncategorized': # A common generic fallback
|
|
||||||
is_base_folder_generic_for_per_file_logic = True
|
|
||||||
|
|
||||||
activate_per_file_character_folder_logic = (
|
|
||||||
is_ui_filter_empty_for_per_file_logic and
|
|
||||||
is_base_folder_generic_for_per_file_logic and
|
|
||||||
self.use_subfolders and # User wants subfolders
|
|
||||||
not self.extract_links_only # Not in links-only mode
|
|
||||||
)
|
|
||||||
if activate_per_file_character_folder_logic:
|
|
||||||
self.logger(f" ℹ️ Per-file character folder logic activated for post {post_id} (UI filter empty, generic post title/folder).")
|
|
||||||
if (self.show_external_links or self.extract_links_only) and post_content_html: # type: ignore
|
if (self.show_external_links or self.extract_links_only) and post_content_html: # type: ignore
|
||||||
if self._check_pause(f"External link extraction for post {post_id}"): return 0, num_potential_files_in_post, [], []
|
if self._check_pause(f"External link extraction for post {post_id}"): return 0, num_potential_files_in_post, [], []
|
||||||
try:
|
try:
|
||||||
mega_key_pattern = re.compile(r'\b([a-zA-Z0-9_-]{22,43})\b') # Adjusted for typical Mega key lengths
|
mega_key_pattern = re.compile(r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b') # type: ignore
|
||||||
unique_links_data = {}
|
unique_links_data = {}
|
||||||
for match in link_pattern.finditer(post_content_html):
|
for match in link_pattern.finditer(post_content_html):
|
||||||
link_url = match.group(1).strip()
|
link_url = match.group(1).strip()
|
||||||
@@ -1572,16 +1390,11 @@ class PostProcessorWorker:
|
|||||||
if self._check_pause(f"File processing loop for post {post_id}, file {file_idx}"): break
|
if self._check_pause(f"File processing loop for post {post_id}, file {file_idx}"): break
|
||||||
if self.check_cancel(): break
|
if self.check_cancel(): break
|
||||||
current_api_original_filename = file_info_to_dl.get('_original_name_for_log')
|
current_api_original_filename = file_info_to_dl.get('_original_name_for_log')
|
||||||
|
|
||||||
file_is_candidate_by_char_filter_scope = False
|
file_is_candidate_by_char_filter_scope = False
|
||||||
char_filter_info_that_matched_file = None
|
char_filter_info_that_matched_file = None
|
||||||
|
|
||||||
if not current_character_filters:
|
if not current_character_filters:
|
||||||
file_is_candidate_by_char_filter_scope = True
|
file_is_candidate_by_char_filter_scope = True
|
||||||
else:
|
else:
|
||||||
# This block determines if the file is a candidate based on the *overall post/comment/file filter scope*
|
|
||||||
# It's important for deciding if the file should be downloaded *at all* if UI filters are present.
|
|
||||||
# The new per-file logic for folder naming is separate but related.
|
|
||||||
if self.char_filter_scope == CHAR_SCOPE_FILES:
|
if self.char_filter_scope == CHAR_SCOPE_FILES:
|
||||||
for filter_item_obj in current_character_filters:
|
for filter_item_obj in current_character_filters:
|
||||||
terms_to_check_for_file = list(filter_item_obj["aliases"])
|
terms_to_check_for_file = list(filter_item_obj["aliases"])
|
||||||
@@ -1594,9 +1407,7 @@ class PostProcessorWorker:
|
|||||||
char_filter_info_that_matched_file = filter_item_obj
|
char_filter_info_that_matched_file = filter_item_obj
|
||||||
self.logger(f" File '{current_api_original_filename}' matches char filter term '{term_to_match}' (from '{filter_item_obj['name']}'). Scope: Files.")
|
self.logger(f" File '{current_api_original_filename}' matches char filter term '{term_to_match}' (from '{filter_item_obj['name']}'). Scope: Files.")
|
||||||
break
|
break
|
||||||
|
|
||||||
if file_is_candidate_by_char_filter_scope: break
|
if file_is_candidate_by_char_filter_scope: break
|
||||||
# ... (rest of the existing char_filter_scope logic for CHAR_SCOPE_TITLE, CHAR_SCOPE_BOTH, CHAR_SCOPE_COMMENTS) ...
|
|
||||||
elif self.char_filter_scope == CHAR_SCOPE_TITLE:
|
elif self.char_filter_scope == CHAR_SCOPE_TITLE:
|
||||||
if post_is_candidate_by_title_char_match:
|
if post_is_candidate_by_title_char_match:
|
||||||
file_is_candidate_by_char_filter_scope = True
|
file_is_candidate_by_char_filter_scope = True
|
||||||
@@ -1621,66 +1432,31 @@ class PostProcessorWorker:
|
|||||||
break
|
break
|
||||||
if file_is_candidate_by_char_filter_scope: break
|
if file_is_candidate_by_char_filter_scope: break
|
||||||
elif self.char_filter_scope == CHAR_SCOPE_COMMENTS:
|
elif self.char_filter_scope == CHAR_SCOPE_COMMENTS:
|
||||||
if post_is_candidate_by_file_char_match_in_comment_scope:
|
if post_is_candidate_by_file_char_match_in_comment_scope: # Post was candidate due to a file match
|
||||||
file_is_candidate_by_char_filter_scope = True
|
file_is_candidate_by_char_filter_scope = True
|
||||||
char_filter_info_that_matched_file = char_filter_that_matched_file_in_comment_scope
|
char_filter_info_that_matched_file = char_filter_that_matched_file_in_comment_scope # Use the filter that matched a file in the post
|
||||||
self.logger(f" File '{current_api_original_filename}' is candidate because a file in this post matched char filter (Overall Scope: Comments).")
|
self.logger(f" File '{current_api_original_filename}' is candidate because a file in this post matched char filter (Overall Scope: Comments).")
|
||||||
elif post_is_candidate_by_comment_char_match:
|
elif post_is_candidate_by_comment_char_match: # Post was candidate due to comment match (no file match for post)
|
||||||
file_is_candidate_by_char_filter_scope = True
|
file_is_candidate_by_char_filter_scope = True
|
||||||
char_filter_info_that_matched_file = char_filter_that_matched_comment
|
char_filter_info_that_matched_file = char_filter_that_matched_comment # Use the filter that matched comments
|
||||||
self.logger(f" File '{current_api_original_filename}' is candidate because post comments matched char filter (Overall Scope: Comments).")
|
self.logger(f" File '{current_api_original_filename}' is candidate because post comments matched char filter (Overall Scope: Comments).")
|
||||||
|
|
||||||
if not file_is_candidate_by_char_filter_scope:
|
if not file_is_candidate_by_char_filter_scope:
|
||||||
self.logger(f" -> Skip File (Char Filter Scope '{self.char_filter_scope}'): '{current_api_original_filename}' no match.")
|
self.logger(f" -> Skip File (Char Filter Scope '{self.char_filter_scope}'): '{current_api_original_filename}' no match.")
|
||||||
total_skipped_this_post += 1
|
total_skipped_this_post += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Determine the target subfolder for *this specific file*
|
|
||||||
target_path_subfolder_component_for_this_file = None
|
|
||||||
log_reason_for_file_subfolder = "Default post-level folder"
|
|
||||||
|
|
||||||
if activate_per_file_character_folder_logic:
|
|
||||||
base_name_for_file_logic, _ = os.path.splitext(current_api_original_filename)
|
|
||||||
stripped_base_name_for_file_logic = base_name_for_file_logic
|
|
||||||
for keyword_to_strip in FILENAME_TOKEN_STRIPPABLE_PARTS:
|
|
||||||
if stripped_base_name_for_file_logic.lower().endswith(keyword_to_strip.lower()):
|
|
||||||
stripped_base_name_for_file_logic = stripped_base_name_for_file_logic[:-len(keyword_to_strip)]
|
|
||||||
stripped_base_name_for_file_logic = stripped_base_name_for_file_logic.strip()
|
|
||||||
PotentialCharacterName_for_file = clean_folder_name(stripped_base_name_for_file_logic)
|
|
||||||
|
|
||||||
if PotentialCharacterName_for_file and self.known_names:
|
|
||||||
for known_entry in self.known_names:
|
|
||||||
primary_known_name = known_entry['name']
|
|
||||||
aliases_to_check = set(known_entry.get("aliases", []))
|
|
||||||
if not known_entry.get("is_group", False): # For non-groups, primary name is also an alias
|
|
||||||
aliases_to_check.add(primary_known_name)
|
|
||||||
|
|
||||||
if any(PotentialCharacterName_for_file.lower() == alias.lower() for alias in aliases_to_check):
|
|
||||||
character_subfolder_candidate = clean_folder_name(primary_known_name)
|
|
||||||
if character_subfolder_candidate and \
|
|
||||||
character_subfolder_candidate.lower() not in effective_unwanted_keywords_for_folders:
|
|
||||||
target_path_subfolder_component_for_this_file = character_subfolder_candidate
|
|
||||||
log_reason_for_file_subfolder = f"File '{current_api_original_filename}' matched Known Name '{primary_known_name}'"
|
|
||||||
self.logger(f" {log_reason_for_file_subfolder}. Using subfolder: '{target_path_subfolder_component_for_this_file}'")
|
|
||||||
break
|
|
||||||
|
|
||||||
# If per-file logic didn't find a specific character folder, or wasn't active,
|
|
||||||
# fall back to the general post-level folder determination.
|
|
||||||
if target_path_subfolder_component_for_this_file is None and self.use_subfolders:
|
|
||||||
if self.target_post_id_from_initial_url and self.custom_folder_name:
|
|
||||||
target_path_subfolder_component_for_this_file = self.custom_folder_name
|
|
||||||
log_reason_for_file_subfolder = "Custom folder name for single post"
|
|
||||||
elif char_filter_info_that_matched_file: # This is from the UI filter check earlier
|
|
||||||
target_path_subfolder_component_for_this_file = clean_folder_name(char_filter_info_that_matched_file["name"])
|
|
||||||
log_reason_for_file_subfolder = f"UI Filter matched ({char_filter_info_that_matched_file['name']})"
|
|
||||||
elif base_folder_names_for_post_content: # From Known.txt on title or generic title extraction
|
|
||||||
target_path_subfolder_component_for_this_file = base_folder_names_for_post_content[0]
|
|
||||||
log_reason_for_file_subfolder = f"Post-level folder derived from title/Known.txt ('{base_folder_names_for_post_content[0]}')"
|
|
||||||
# If still None, it means no subfolder is applicable based on these rules.
|
|
||||||
# Construct the full path
|
|
||||||
current_path_for_file = self.override_output_dir if self.override_output_dir else self.download_root # Use override if provided
|
current_path_for_file = self.override_output_dir if self.override_output_dir else self.download_root # Use override if provided
|
||||||
if target_path_subfolder_component_for_this_file: # If a subfolder name was determined
|
if self.use_subfolders:
|
||||||
current_path_for_file = os.path.join(current_path_for_file, target_path_subfolder_component_for_this_file)
|
char_title_subfolder_name = None
|
||||||
|
if self.target_post_id_from_initial_url and self.custom_folder_name:
|
||||||
|
char_title_subfolder_name = self.custom_folder_name
|
||||||
|
elif char_filter_info_that_matched_file:
|
||||||
|
char_title_subfolder_name = clean_folder_name(char_filter_info_that_matched_file["name"])
|
||||||
|
elif char_filter_that_matched_title:
|
||||||
|
char_title_subfolder_name = clean_folder_name(char_filter_that_matched_title["name"])
|
||||||
|
elif base_folder_names_for_post_content:
|
||||||
|
char_title_subfolder_name = base_folder_names_for_post_content[0]
|
||||||
|
if char_title_subfolder_name:
|
||||||
|
current_path_for_file = os.path.join(current_path_for_file, char_title_subfolder_name)
|
||||||
if self.use_post_subfolders:
|
if self.use_post_subfolders:
|
||||||
cleaned_title_for_subfolder = clean_folder_name(post_title)
|
cleaned_title_for_subfolder = clean_folder_name(post_title)
|
||||||
post_specific_subfolder_name = cleaned_title_for_subfolder # Use only the cleaned title
|
post_specific_subfolder_name = cleaned_title_for_subfolder # Use only the cleaned title
|
||||||
@@ -1688,8 +1464,11 @@ class PostProcessorWorker:
|
|||||||
target_folder_path_for_this_file = current_path_for_file
|
target_folder_path_for_this_file = current_path_for_file
|
||||||
manga_date_counter_to_pass = None
|
manga_date_counter_to_pass = None
|
||||||
manga_global_counter_to_pass = None
|
manga_global_counter_to_pass = None
|
||||||
if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED: manga_date_counter_to_pass = self.manga_date_file_counter_ref
|
if self.manga_mode_active:
|
||||||
if self.manga_mode_active and self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING: manga_global_counter_to_pass = self.manga_global_file_counter_ref
|
if self.manga_filename_style == STYLE_DATE_BASED:
|
||||||
|
manga_date_counter_to_pass = self.manga_date_file_counter_ref
|
||||||
|
elif self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING:
|
||||||
|
manga_global_counter_to_pass = self.manga_global_file_counter_ref if self.manga_global_file_counter_ref is not None else self.manga_date_file_counter_ref
|
||||||
futures_list.append(file_pool.submit(
|
futures_list.append(file_pool.submit(
|
||||||
self._download_single_file,
|
self._download_single_file,
|
||||||
file_info_to_dl,
|
file_info_to_dl,
|
||||||
|
|||||||
45
main.py
45
main.py
@@ -842,11 +842,10 @@ class KnownNamesFilterDialog(QDialog):
|
|||||||
|
|
||||||
class FavoriteArtistsDialog(QDialog):
|
class FavoriteArtistsDialog(QDialog):
|
||||||
"""Dialog to display and select favorite artists."""
|
"""Dialog to display and select favorite artists."""
|
||||||
def __init__(self, parent_app, cookies_config, target_service_domain):
|
def __init__(self, parent_app, cookies_config):
|
||||||
super().__init__(parent_app)
|
super().__init__(parent_app)
|
||||||
self.parent_app = parent_app
|
self.parent_app = parent_app
|
||||||
self.cookies_config = cookies_config
|
self.cookies_config = cookies_config
|
||||||
self.target_service_domain = target_service_domain # Store the target domain
|
|
||||||
self.all_fetched_artists = []
|
self.all_fetched_artists = []
|
||||||
self.selected_artist_urls = []
|
self.selected_artist_urls = []
|
||||||
|
|
||||||
@@ -924,8 +923,9 @@ class FavoriteArtistsDialog(QDialog):
|
|||||||
self.artist_list_widget.setVisible(show)
|
self.artist_list_widget.setVisible(show)
|
||||||
|
|
||||||
def _fetch_favorite_artists(self):
|
def _fetch_favorite_artists(self):
|
||||||
fav_url = f"https://{self.target_service_domain}/api/v1/account/favorites?type=artist"
|
fav_url = "https://kemono.su/api/v1/account/favorites?type=artist"
|
||||||
self._logger(f"Attempting to fetch favorite artists from: {fav_url} (Targeting: {self.target_service_domain})")
|
self._logger(f"Attempting to fetch favorite artists from: {fav_url}")
|
||||||
|
|
||||||
cookies_dict = prepare_cookies_for_request(
|
cookies_dict = prepare_cookies_for_request(
|
||||||
self.cookies_config['use_cookie'],
|
self.cookies_config['use_cookie'],
|
||||||
self.cookies_config['cookie_text'],
|
self.cookies_config['cookie_text'],
|
||||||
@@ -964,7 +964,7 @@ class FavoriteArtistsDialog(QDialog):
|
|||||||
artist_service = artist_entry.get("service")
|
artist_service = artist_entry.get("service")
|
||||||
|
|
||||||
if artist_id and artist_name and artist_service:
|
if artist_id and artist_name and artist_service:
|
||||||
full_url = f"https://{self.target_service_domain}/{artist_service}/user/{artist_id}"
|
full_url = f"https://kemono.su/{artist_service}/user/{artist_id}"
|
||||||
self.all_fetched_artists.append({'name': artist_name, 'url': full_url, 'service': artist_service})
|
self.all_fetched_artists.append({'name': artist_name, 'url': full_url, 'service': artist_service})
|
||||||
else:
|
else:
|
||||||
self._logger(f"Warning: Skipping favorite artist entry due to missing data: {artist_entry}")
|
self._logger(f"Warning: Skipping favorite artist entry due to missing data: {artist_entry}")
|
||||||
@@ -1045,19 +1045,18 @@ class FavoritePostsFetcherThread(QThread):
|
|||||||
progress_bar_update = pyqtSignal(int, int) # value, maximum
|
progress_bar_update = pyqtSignal(int, int) # value, maximum
|
||||||
finished = pyqtSignal(list, str) # list of posts, error message (or None)
|
finished = pyqtSignal(list, str) # list of posts, error message (or None)
|
||||||
|
|
||||||
def __init__(self, cookies_config, parent_logger_func, target_service_domain):
|
def __init__(self, cookies_config, parent_logger_func): # Removed parent_get_domain_func
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.cookies_config = cookies_config
|
self.cookies_config = cookies_config
|
||||||
self.parent_logger_func = parent_logger_func
|
self.parent_logger_func = parent_logger_func
|
||||||
self.target_service_domain = target_service_domain
|
|
||||||
self.cancellation_event = threading.Event()
|
self.cancellation_event = threading.Event()
|
||||||
|
|
||||||
def _logger(self, message):
|
def _logger(self, message):
|
||||||
self.parent_logger_func(f"[FavPostsFetcherThread] {message}")
|
self.parent_logger_func(f"[FavPostsFetcherThread] {message}")
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
fav_url = f"https://{self.target_service_domain}/api/v1/account/favorites?type=post"
|
fav_url = "https://kemono.su/api/v1/account/favorites?type=post"
|
||||||
self._logger(f"Attempting to fetch favorite posts from: {fav_url} (Targeting: {self.target_service_domain})")
|
self._logger(f"Attempting to fetch favorite posts from: {fav_url}")
|
||||||
self.status_update.emit("Fetching list of favorite posts...")
|
self.status_update.emit("Fetching list of favorite posts...")
|
||||||
self.progress_bar_update.emit(0, 0) # Indeterminate state for initial fetch
|
self.progress_bar_update.emit(0, 0) # Indeterminate state for initial fetch
|
||||||
|
|
||||||
@@ -1147,11 +1146,10 @@ class PostListItemWidget(QWidget):
|
|||||||
|
|
||||||
class FavoritePostsDialog(QDialog):
|
class FavoritePostsDialog(QDialog):
|
||||||
"""Dialog to display and select favorite posts."""
|
"""Dialog to display and select favorite posts."""
|
||||||
def __init__(self, parent_app, cookies_config, known_names_list_ref, target_service_domain):
|
def __init__(self, parent_app, cookies_config, known_names_list_ref):
|
||||||
super().__init__(parent_app)
|
super().__init__(parent_app)
|
||||||
self.parent_app = parent_app
|
self.parent_app = parent_app
|
||||||
self.cookies_config = cookies_config
|
self.cookies_config = cookies_config
|
||||||
self.target_service_domain = target_service_domain # Store the target domain
|
|
||||||
self.all_fetched_posts = []
|
self.all_fetched_posts = []
|
||||||
self.selected_posts_data = []
|
self.selected_posts_data = []
|
||||||
self.known_names_list_ref = known_names_list_ref # Store reference to global KNOWN_NAMES
|
self.known_names_list_ref = known_names_list_ref # Store reference to global KNOWN_NAMES
|
||||||
@@ -1269,9 +1267,8 @@ class FavoritePostsDialog(QDialog):
|
|||||||
|
|
||||||
self.fetcher_thread = FavoritePostsFetcherThread(
|
self.fetcher_thread = FavoritePostsFetcherThread(
|
||||||
self.cookies_config,
|
self.cookies_config,
|
||||||
self.parent_app.log_signal.emit, # Pass parent's logger,
|
self.parent_app.log_signal.emit, # Pass parent's logger
|
||||||
self.target_service_domain # Pass the target domain
|
) # Removed _get_domain_for_service
|
||||||
)
|
|
||||||
self.fetcher_thread.status_update.connect(self.status_label.setText)
|
self.fetcher_thread.status_update.connect(self.status_label.setText)
|
||||||
self.fetcher_thread.finished.connect(self._on_fetch_completed)
|
self.fetcher_thread.finished.connect(self._on_fetch_completed)
|
||||||
self.fetcher_thread.progress_bar_update.connect(self._set_progress_bar_value) # Connect the missing signal
|
self.fetcher_thread.progress_bar_update.connect(self._set_progress_bar_value) # Connect the missing signal
|
||||||
@@ -6330,13 +6327,7 @@ class DownloaderApp(QWidget):
|
|||||||
'app_base_dir': self.app_base_dir
|
'app_base_dir': self.app_base_dir
|
||||||
}
|
}
|
||||||
|
|
||||||
cookie_text_lower = self.cookie_text_input.text().lower() if hasattr(self, 'cookie_text_input') else ""
|
dialog = FavoriteArtistsDialog(self, cookies_config)
|
||||||
selected_path_lower = self.selected_cookie_filepath.lower() if self.selected_cookie_filepath else ""
|
|
||||||
target_service_domain = "kemono.su" # Default
|
|
||||||
if "coomer.su" in cookie_text_lower or "coomer.party" in cookie_text_lower or \
|
|
||||||
("coomer" in selected_path_lower and ".txt" in selected_path_lower): # Check if "coomer" is in the filename part
|
|
||||||
target_service_domain = "coomer.su"
|
|
||||||
dialog = FavoriteArtistsDialog(self, cookies_config, target_service_domain)
|
|
||||||
if dialog.exec_() == QDialog.Accepted:
|
if dialog.exec_() == QDialog.Accepted:
|
||||||
selected_artists = dialog.get_selected_artists() # Changed method name
|
selected_artists = dialog.get_selected_artists() # Changed method name
|
||||||
if selected_artists:
|
if selected_artists:
|
||||||
@@ -6386,20 +6377,14 @@ class DownloaderApp(QWidget):
|
|||||||
cookie_help_dialog.exec_()
|
cookie_help_dialog.exec_()
|
||||||
return # Don't proceed to show FavoritePostsDialog if cookies are needed but not found
|
return # Don't proceed to show FavoritePostsDialog if cookies are needed but not found
|
||||||
|
|
||||||
cookie_text_lower = self.cookie_text_input.text().lower() if hasattr(self, 'cookie_text_input') else ""
|
dialog = FavoritePostsDialog(self, cookies_config, KNOWN_NAMES) # Pass KNOWN_NAMES
|
||||||
selected_path_lower = self.selected_cookie_filepath.lower() if self.selected_cookie_filepath else ""
|
|
||||||
target_service_domain_for_posts = "kemono.su" # Default
|
|
||||||
if "coomer.su" in cookie_text_lower or "coomer.party" in cookie_text_lower or \
|
|
||||||
("coomer" in selected_path_lower and ".txt" in selected_path_lower):
|
|
||||||
target_service_domain_for_posts = "coomer.su"
|
|
||||||
|
|
||||||
dialog = FavoritePostsDialog(self, cookies_config, KNOWN_NAMES, target_service_domain_for_posts) # Pass KNOWN_NAMES and target_service_domain
|
|
||||||
if dialog.exec_() == QDialog.Accepted:
|
if dialog.exec_() == QDialog.Accepted:
|
||||||
selected_posts = dialog.get_selected_posts()
|
selected_posts = dialog.get_selected_posts()
|
||||||
if selected_posts:
|
if selected_posts:
|
||||||
self.log_signal.emit(f"ℹ️ Queuing {len(selected_posts)} favorite post(s) for download.")
|
self.log_signal.emit(f"ℹ️ Queuing {len(selected_posts)} favorite post(s) for download.")
|
||||||
for post_data in selected_posts:
|
for post_data in selected_posts:
|
||||||
direct_post_url = f"https://{target_service_domain_for_posts}/{post_data['service']}/user/{post_data['creator_id']}/post/{post_data['post_id']}"
|
domain = "kemono.su" # Or determine from service/parent app settings
|
||||||
|
direct_post_url = f"https://{domain}/{post_data['service']}/user/{post_data['creator_id']}/post/{post_data['post_id']}"
|
||||||
|
|
||||||
queue_item = {
|
queue_item = {
|
||||||
'url': direct_post_url,
|
'url': direct_post_url,
|
||||||
|
|||||||
Reference in New Issue
Block a user