From 2cf73e6dbde58e1e210286a26f15780a1075c32c Mon Sep 17 00:00:00 2001 From: Yuvi9587 <114073886+Yuvi9587@users.noreply.github.com> Date: Wed, 4 Jun 2025 16:09:00 +0100 Subject: [PATCH] Update downloader_utils.py --- downloader_utils.py | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/downloader_utils.py b/downloader_utils.py index f2e2ba5..536ca79 100644 --- a/downloader_utils.py +++ b/downloader_utils.py @@ -69,10 +69,11 @@ FOLDER_NAME_STOP_WORDS = { } CREATOR_DOWNLOAD_DEFAULT_FOLDER_IGNORE_WORDS = { - "poll", "cover", "fan-art", "fanart", "requests", "request", "holiday", - "batch", "open", "closed", "winner", "loser", # Added new words + "poll", "cover", "fan-art", "fanart", "requests", "request", "holiday", "suggest", "suggestions", + "batch", "open", "closed", "winner", "loser", "minor", "adult", "wip", + "update", "news", "discussion", "question", "stream", "video", "sketchbook", "artwork", # Added more generic words # Numbers 1-20 (as strings and words) - "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", + "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", # Added "WIP" here as well for consistency "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", @@ -240,6 +241,42 @@ def match_folders_from_title(title, names_to_match, unwanted_keywords): matched_cleaned_names.add(cleaned_primary_name) break # Found a match for this primary name via one of its aliases return sorted(list(matched_cleaned_names)) + +def match_folders_from_filename_enhanced(filename, names_to_match, unwanted_keywords): + if not filename or not names_to_match: + return [] + + filename_lower = filename.lower() # Raw filename, lowercase, no KNOWN_TXT_MATCH_CLEANUP_PATTERNS + matched_primary_names = set() + + # Create a flat list of (alias_lower, primary_folder_name_cleaned) + # and sort by alias length (descending) to prioritize longer, more specific alias matches. + alias_map_to_primary = [] + for name_obj in names_to_match: + primary_folder_name = name_obj.get("name") + if not primary_folder_name: + continue + # Clean the primary name once here + cleaned_primary_name = clean_folder_name(primary_folder_name) + # Only consider this known_name entry if its cleaned primary name is valid and not an unwanted keyword + if not cleaned_primary_name or cleaned_primary_name.lower() in unwanted_keywords: + continue + + aliases_for_obj = name_obj.get("aliases", []) + for alias in aliases_for_obj: + alias_lower = alias.lower() + if alias_lower: # Ensure alias is not empty + alias_map_to_primary.append((alias_lower, cleaned_primary_name)) + + alias_map_to_primary.sort(key=lambda x: len(x[0]), reverse=True) + + for alias_lower, primary_name_for_alias in alias_map_to_primary: + if filename_lower.startswith(alias_lower): + if primary_name_for_alias not in matched_primary_names: + matched_primary_names.add(primary_name_for_alias) + + return sorted(list(matched_primary_names)) + def is_image(filename): if not filename: return False _, ext = os.path.splitext(filename)