mirror of
https://github.com/Yuvi9587/Kemono-Downloader.git
synced 2025-12-17 15:36:51 +00:00
Commit
This commit is contained in:
parent
169ded3fd8
commit
9563ce82db
@ -1810,6 +1810,31 @@ class PostProcessorWorker:
|
|||||||
|
|
||||||
if not all_files_from_post_api:
|
if not all_files_from_post_api:
|
||||||
self.logger(f" No files found to download for post {post_id}.")
|
self.logger(f" No files found to download for post {post_id}.")
|
||||||
|
if not self.extract_links_only and should_create_post_subfolder:
|
||||||
|
path_to_check_for_emptiness = determined_post_save_path_for_history
|
||||||
|
try:
|
||||||
|
if os.path.isdir(path_to_check_for_emptiness):
|
||||||
|
dir_contents = os.listdir(path_to_check_for_emptiness)
|
||||||
|
# Check if the directory is empty OR only contains our ID file
|
||||||
|
is_effectively_empty = True
|
||||||
|
if dir_contents:
|
||||||
|
if not all(f.startswith('.postid_') for f in dir_contents):
|
||||||
|
is_effectively_empty = False
|
||||||
|
|
||||||
|
if is_effectively_empty:
|
||||||
|
self.logger(f" 🗑️ Removing empty post-specific subfolder (post had no files): '{path_to_check_for_emptiness}'")
|
||||||
|
if dir_contents:
|
||||||
|
for id_file in dir_contents:
|
||||||
|
if id_file.startswith('.postid_'):
|
||||||
|
try:
|
||||||
|
os.remove(os.path.join(path_to_check_for_emptiness, id_file))
|
||||||
|
except OSError as e_rm_id:
|
||||||
|
self.logger(f" ⚠️ Could not remove ID file '{id_file}' during cleanup: {e_rm_id}")
|
||||||
|
os.rmdir(path_to_check_for_emptiness)
|
||||||
|
except OSError as e_rmdir:
|
||||||
|
self.logger(f" ⚠️ Could not remove effectively empty subfolder (no files) '{path_to_check_for_emptiness}': {e_rmdir}")
|
||||||
|
# --- END NEW CLEANUP LOGIC ---
|
||||||
|
|
||||||
history_data_for_no_files_post = {
|
history_data_for_no_files_post = {
|
||||||
'post_title': post_title,
|
'post_title': post_title,
|
||||||
'post_id': post_id,
|
'post_id': post_id,
|
||||||
@ -1823,7 +1848,7 @@ class PostProcessorWorker:
|
|||||||
result_tuple = (0, 0, [], [], [], history_data_for_no_files_post, None)
|
result_tuple = (0, 0, [], [], [], history_data_for_no_files_post, None)
|
||||||
self._emit_signal('worker_finished', result_tuple)
|
self._emit_signal('worker_finished', result_tuple)
|
||||||
return result_tuple
|
return result_tuple
|
||||||
|
|
||||||
files_to_download_info_list = []
|
files_to_download_info_list = []
|
||||||
processed_original_filenames_in_this_post = set()
|
processed_original_filenames_in_this_post = set()
|
||||||
if self.keep_in_post_duplicates:
|
if self.keep_in_post_duplicates:
|
||||||
@ -2052,9 +2077,27 @@ class PostProcessorWorker:
|
|||||||
if not self.extract_links_only and self.use_post_subfolders and total_downloaded_this_post == 0:
|
if not self.extract_links_only and self.use_post_subfolders and total_downloaded_this_post == 0:
|
||||||
path_to_check_for_emptiness = determined_post_save_path_for_history
|
path_to_check_for_emptiness = determined_post_save_path_for_history
|
||||||
try:
|
try:
|
||||||
if os.path.isdir(path_to_check_for_emptiness) and not os.listdir(path_to_check_for_emptiness):
|
if os.path.isdir(path_to_check_for_emptiness):
|
||||||
self.logger(f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness}'")
|
dir_contents = os.listdir(path_to_check_for_emptiness)
|
||||||
os.rmdir(path_to_check_for_emptiness)
|
# Check if the directory is empty OR only contains our ID file
|
||||||
|
is_effectively_empty = True
|
||||||
|
if dir_contents:
|
||||||
|
# If there are files, check if ALL of them are .postid files
|
||||||
|
if not all(f.startswith('.postid_') for f in dir_contents):
|
||||||
|
is_effectively_empty = False
|
||||||
|
|
||||||
|
if is_effectively_empty:
|
||||||
|
self.logger(f" 🗑️ Removing empty post-specific subfolder (no files downloaded): '{path_to_check_for_emptiness}'")
|
||||||
|
# We must first remove the ID file(s) before removing the dir
|
||||||
|
if dir_contents:
|
||||||
|
for id_file in dir_contents:
|
||||||
|
if id_file.startswith('.postid_'):
|
||||||
|
try:
|
||||||
|
os.remove(os.path.join(path_to_check_for_emptiness, id_file))
|
||||||
|
except OSError as e_rm_id:
|
||||||
|
self.logger(f" ⚠️ Could not remove ID file '{id_file}' during cleanup: {e_rm_id}")
|
||||||
|
|
||||||
|
os.rmdir(path_to_check_for_emptiness) # Now the rmdir should work
|
||||||
except OSError as e_rmdir:
|
except OSError as e_rmdir:
|
||||||
self.logger(f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness}': {e_rmdir}")
|
self.logger(f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness}': {e_rmdir}")
|
||||||
|
|
||||||
@ -2066,11 +2109,29 @@ class PostProcessorWorker:
|
|||||||
if not self.extract_links_only and self.use_post_subfolders and total_downloaded_this_post == 0:
|
if not self.extract_links_only and self.use_post_subfolders and total_downloaded_this_post == 0:
|
||||||
path_to_check_for_emptiness = determined_post_save_path_for_history
|
path_to_check_for_emptiness = determined_post_save_path_for_history
|
||||||
try:
|
try:
|
||||||
if os.path.isdir(path_to_check_for_emptiness) and not os.listdir(path_to_check_for_emptiness):
|
if os.path.isdir(path_to_check_for_emptiness):
|
||||||
self.logger(f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness}'")
|
dir_contents = os.listdir(path_to_check_for_emptiness)
|
||||||
os.rmdir(path_to_check_for_emptiness)
|
# Check if the directory is empty OR only contains our ID file
|
||||||
|
is_effectively_empty = True
|
||||||
|
if dir_contents:
|
||||||
|
# If there are files, check if ALL of them are .postid files
|
||||||
|
if not all(f.startswith('.postid_') for f in dir_contents):
|
||||||
|
is_effectively_empty = False
|
||||||
|
|
||||||
|
if is_effectively_empty:
|
||||||
|
self.logger(f" 🗑️ Removing empty post-specific subfolder (no files downloaded): '{path_to_check_for_emptiness}'")
|
||||||
|
# We must first remove the ID file(s) before removing the dir
|
||||||
|
if dir_contents:
|
||||||
|
for id_file in dir_contents:
|
||||||
|
if id_file.startswith('.postid_'):
|
||||||
|
try:
|
||||||
|
os.remove(os.path.join(path_to_check_for_emptiness, id_file))
|
||||||
|
except OSError as e_rm_id:
|
||||||
|
self.logger(f" ⚠️ Could not remove ID file '{id_file}' during cleanup: {e_rm_id}")
|
||||||
|
|
||||||
|
os.rmdir(path_to_check_for_emptiness) # Now the rmdir should work
|
||||||
except OSError as e_rmdir:
|
except OSError as e_rmdir:
|
||||||
self.logger(f" ⚠️ Could not remove potentially empty subfolder '{path_to_check_for_emptiness}': {e_rmdir}")
|
self.logger(f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness}': {e_rmdir}")
|
||||||
|
|
||||||
self._emit_signal('worker_finished', result_tuple)
|
self._emit_signal('worker_finished', result_tuple)
|
||||||
return result_tuple
|
return result_tuple
|
||||||
|
|||||||
@ -205,6 +205,8 @@ def match_folders_from_filename_enhanced(filename, names_to_match, unwanted_keyw
|
|||||||
"""
|
"""
|
||||||
Matches folder names from a filename, prioritizing longer and more specific aliases.
|
Matches folder names from a filename, prioritizing longer and more specific aliases.
|
||||||
It returns immediately after finding the first (longest) match.
|
It returns immediately after finding the first (longest) match.
|
||||||
|
MODIFIED: Prioritizes boundary-aware matches for Latin characters,
|
||||||
|
falls back to substring search for CJK compatibility.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
filename (str): The filename to check.
|
filename (str): The filename to check.
|
||||||
@ -230,10 +232,9 @@ def match_folders_from_filename_enhanced(filename, names_to_match, unwanted_keyw
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
for alias in name_obj.get("aliases", []):
|
for alias in name_obj.get("aliases", []):
|
||||||
# <<< MODIFICATION: Ensure alias is not empty before converting to lower case >>>
|
|
||||||
if alias: # Check if alias is not None and not an empty string
|
if alias: # Check if alias is not None and not an empty string
|
||||||
alias_lower_val = alias.lower()
|
alias_lower_val = alias.lower()
|
||||||
if alias_lower_val: # Check again after lowercasing (handles case where alias might be just spaces)
|
if alias_lower_val: # Check again after lowercasing
|
||||||
alias_map_to_primary.append((alias_lower_val, cleaned_primary_name))
|
alias_map_to_primary.append((alias_lower_val, cleaned_primary_name))
|
||||||
|
|
||||||
# Sort by alias length, descending, to match longer aliases first
|
# Sort by alias length, descending, to match longer aliases first
|
||||||
@ -241,9 +242,33 @@ def match_folders_from_filename_enhanced(filename, names_to_match, unwanted_keyw
|
|||||||
|
|
||||||
# Return the FIRST match found, which will be the longest
|
# Return the FIRST match found, which will be the longest
|
||||||
for alias_lower, primary_name_for_alias in alias_map_to_primary:
|
for alias_lower, primary_name_for_alias in alias_map_to_primary:
|
||||||
if alias_lower in filename_lower:
|
try:
|
||||||
# Found the longest possible alias that is a substring. Return immediately.
|
# 1. Attempt boundary-aware match first (good for English/Latin)
|
||||||
return [primary_name_for_alias]
|
# Matches alias if it's at the start/end or surrounded by common separators
|
||||||
|
# We use word boundaries (\b) and also check for common non-word separators like +_-
|
||||||
|
pattern = r'(?:^|[\s_+-])' + re.escape(alias_lower) + r'(?:[\s_+-]|$)'
|
||||||
|
|
||||||
|
if re.search(pattern, filename_lower):
|
||||||
|
# Found a precise, boundary-aware match. This is the best case.
|
||||||
|
return [primary_name_for_alias]
|
||||||
|
|
||||||
|
# 2. Fallback: Simple substring check (for CJK or other cases)
|
||||||
|
# This executes ONLY if the boundary match above failed.
|
||||||
|
# We check if the alias contains CJK OR if the filename does.
|
||||||
|
# This avoids applying the simple 'in' check for Latin-only aliases in Latin-only filenames.
|
||||||
|
elif (contains_cjk(alias_lower) or contains_cjk(filename_lower)) and alias_lower in filename_lower:
|
||||||
|
# This is the fallback for CJK compatibility.
|
||||||
|
return [primary_name_for_alias]
|
||||||
|
|
||||||
|
# If alias is "ul" and filename is "sin+título":
|
||||||
|
# 1. re.search(r'(?:^|[\s_+-])ul(?:[\s_+-]|$)', "sin+título") -> Fails (good)
|
||||||
|
# 2. contains_cjk("ul") -> False
|
||||||
|
# 3. contains_cjk("sin+título") -> False
|
||||||
|
# 4. No match is found for "ul". (correct)
|
||||||
|
|
||||||
|
except re.error as e:
|
||||||
|
print(f"Regex error matching alias '{alias_lower}' in filename '{filename_lower}': {e}")
|
||||||
|
continue # Skip this alias if regex fails
|
||||||
|
|
||||||
# If the loop finishes without any matches, return an empty list.
|
# If the loop finishes without any matches, return an empty list.
|
||||||
return []
|
return []
|
||||||
Loading…
x
Reference in New Issue
Block a user