mirror of
https://github.com/Yuvi9587/Kemono-Downloader.git
synced 2025-12-29 16:14:44 +00:00
commit
This commit is contained in:
@@ -67,6 +67,24 @@ FOLDER_NAME_STOP_WORDS = {
|
||||
"right", "s", "she", "so", "technically", "tell", "the", "their", "they", "this",
|
||||
"to", "ve", "was", "we", "well", "were", "with", "www", "year", "you", "your",
|
||||
}
|
||||
|
||||
CREATOR_DOWNLOAD_DEFAULT_FOLDER_IGNORE_WORDS = {
|
||||
"poll", "cover", "fan-art", "fanart", "requests", "request", "holiday",
|
||||
# Numbers 1-20 (as strings and words)
|
||||
"1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
|
||||
"11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
|
||||
"one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten",
|
||||
"eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen",
|
||||
"eighteen", "nineteen", "twenty",
|
||||
# Months (short and long)
|
||||
"jan", "january", "feb", "february", "mar", "march", "apr", "april",
|
||||
"may", "jun", "june", "jul", "july", "aug", "august", "sep", "september",
|
||||
"oct", "october", "nov", "november", "dec", "december",
|
||||
# Weekdays (short and long)
|
||||
"mon", "monday", "tue", "tuesday", "wed", "wednesday", "thu", "thursday",
|
||||
"fri", "friday", "sat", "saturday", "sun", "sunday"
|
||||
}
|
||||
|
||||
def parse_cookie_string(cookie_string):
|
||||
"""Parses a 'name=value; name2=value2' cookie string into a dict."""
|
||||
cookies = {}
|
||||
@@ -588,6 +606,7 @@ class PostProcessorWorker:
|
||||
manga_date_prefix=MANGA_DATE_PREFIX_DEFAULT, # New parameter for date-based prefix
|
||||
manga_date_file_counter_ref=None, # New parameter for date-based manga naming
|
||||
scan_content_for_images=False, # New flag for scanning HTML content
|
||||
creator_download_folder_ignore_words=None, # New: For ignoring specific words for folder names
|
||||
manga_global_file_counter_ref=None, # New parameter for global numbering
|
||||
): # type: ignore
|
||||
self.post = post_data # type: ignore
|
||||
@@ -637,7 +656,9 @@ class PostProcessorWorker:
|
||||
self.use_cookie = use_cookie # Store cookie setting
|
||||
self.override_output_dir = override_output_dir # Store the override directory
|
||||
self.scan_content_for_images = scan_content_for_images # Store new flag
|
||||
self.creator_download_folder_ignore_words = creator_download_folder_ignore_words # Store new ignore words
|
||||
if self.compress_images and Image is None:
|
||||
# self.logger is not available yet, PostProcessorSignals.progress_signal.emit can be used or print
|
||||
self.logger("⚠️ Image compression disabled: Pillow library not found.")
|
||||
self.compress_images = False
|
||||
def _emit_signal(self, signal_type_str, *payload_args):
|
||||
@@ -1052,6 +1073,13 @@ class PostProcessorWorker:
|
||||
post_id = post_data.get('id', 'unknown_id')
|
||||
post_main_file_info = post_data.get('file')
|
||||
post_attachments = post_data.get('attachments', [])
|
||||
|
||||
effective_unwanted_keywords_for_folder_naming = self.unwanted_keywords.copy()
|
||||
is_full_creator_download_no_char_filter = not self.target_post_id_from_initial_url and not current_character_filters
|
||||
if is_full_creator_download_no_char_filter and self.creator_download_folder_ignore_words:
|
||||
self.logger(f" Applying creator download specific folder ignore words ({len(self.creator_download_folder_ignore_words)} words).")
|
||||
effective_unwanted_keywords_for_folder_naming.update(self.creator_download_folder_ignore_words)
|
||||
|
||||
post_content_html = post_data.get('content', '')
|
||||
self.logger(f"\n--- Processing Post {post_id} ('{post_title[:50]}...') (Thread: {threading.current_thread().name}) ---")
|
||||
num_potential_files_in_post = len(post_attachments or []) + (1 if post_main_file_info and post_main_file_info.get('path') else 0)
|
||||
@@ -1204,16 +1232,48 @@ class PostProcessorWorker:
|
||||
log_reason_for_folder = "Matched char filter in title"
|
||||
if primary_char_filter_for_folder:
|
||||
base_folder_names_for_post_content = [clean_folder_name(primary_char_filter_for_folder["name"])]
|
||||
cleaned_primary_folder_name = clean_folder_name(primary_char_filter_for_folder["name"])
|
||||
if cleaned_primary_folder_name.lower() in effective_unwanted_keywords_for_folder_naming and cleaned_primary_folder_name.lower() != "untitled_folder":
|
||||
self.logger(f" ⚠️ Primary char filter folder name '{cleaned_primary_folder_name}' is in ignore list. Using generic name.")
|
||||
base_folder_names_for_post_content = ["Generic Post Content"]
|
||||
else:
|
||||
base_folder_names_for_post_content = [cleaned_primary_folder_name]
|
||||
self.logger(f" Base folder name(s) for post content ({log_reason_for_folder}): {', '.join(base_folder_names_for_post_content)}")
|
||||
elif not current_character_filters: # No char filters defined, use generic logic
|
||||
derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords)
|
||||
if derived_folders:
|
||||
base_folder_names_for_post_content.extend(match_folders_from_title(post_title, KNOWN_NAMES, self.unwanted_keywords))
|
||||
# 1. Try to match folder names from Known.txt using the post title
|
||||
derived_folders_from_known_txt = match_folders_from_title(
|
||||
post_title,
|
||||
self.known_names,
|
||||
effective_unwanted_keywords_for_folder_naming
|
||||
)
|
||||
|
||||
# Filter out any "untitled_folder" that might come from Known.txt if the primary name was problematic,
|
||||
# and also filter empty strings.
|
||||
valid_derived_folders = [
|
||||
name for name in derived_folders_from_known_txt
|
||||
if name and name.strip() and name.lower() != "untitled_folder"
|
||||
]
|
||||
|
||||
if valid_derived_folders:
|
||||
base_folder_names_for_post_content.extend(valid_derived_folders)
|
||||
self.logger(f" Base folder name(s) for post content (Derived from Known.txt & Post Title): {', '.join(base_folder_names_for_post_content)}")
|
||||
else:
|
||||
base_folder_names_for_post_content.append(extract_folder_name_from_title(post_title, self.unwanted_keywords))
|
||||
if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]:
|
||||
base_folder_names_for_post_content = [clean_folder_name(post_title if post_title else "untitled_creator_content")]
|
||||
self.logger(f" Base folder name(s) for post content (Generic title parsing - no char filters): {', '.join(base_folder_names_for_post_content)}")
|
||||
# 2. If no valid folders from Known.txt, fall back to extracting from title directly.
|
||||
extracted_folder_name = extract_folder_name_from_title(
|
||||
post_title,
|
||||
effective_unwanted_keywords_for_folder_naming
|
||||
)
|
||||
base_folder_names_for_post_content.append(extracted_folder_name)
|
||||
self.logger(f" Base folder name(s) for post content (Generic title parsing - no valid Known.txt match): {', '.join(base_folder_names_for_post_content)}")
|
||||
|
||||
# 3. Final cleanup: Ensure list is not empty and contains valid, non-empty strings.
|
||||
base_folder_names_for_post_content = [
|
||||
name for name in base_folder_names_for_post_content if name and name.strip()
|
||||
]
|
||||
if not base_folder_names_for_post_content:
|
||||
final_fallback_name = clean_folder_name(post_title if post_title and post_title.strip() else "Generic Post Content")
|
||||
base_folder_names_for_post_content = [final_fallback_name]
|
||||
self.logger(f" Fallback folder name due to all derivations failing: {final_fallback_name}")
|
||||
if not self.extract_links_only and self.use_subfolders and self.skip_words_list:
|
||||
if self._check_pause(f"Folder keyword skip check for post {post_id}"): return 0, num_potential_files_in_post, []
|
||||
for folder_name_to_check in base_folder_names_for_post_content: # type: ignore
|
||||
@@ -1547,6 +1607,7 @@ class DownloadThread(QThread):
|
||||
manga_global_file_counter_ref=None, # New parameter for global numbering
|
||||
use_cookie=False, # Added: Expected by main.py
|
||||
scan_content_for_images=False, # Added new flag
|
||||
creator_download_folder_ignore_words=None, # Added for DownloadThread
|
||||
cookie_text="", # Added: Expected by main.py
|
||||
):
|
||||
super().__init__()
|
||||
@@ -1597,6 +1658,7 @@ class DownloadThread(QThread):
|
||||
self.override_output_dir = override_output_dir # Store override dir
|
||||
self.manga_date_file_counter_ref = manga_date_file_counter_ref # Store for passing to worker by DownloadThread
|
||||
self.scan_content_for_images = scan_content_for_images # Store new flag
|
||||
self.creator_download_folder_ignore_words = creator_download_folder_ignore_words # Store new ignore words
|
||||
self.manga_global_file_counter_ref = manga_global_file_counter_ref # Store for global numbering
|
||||
if self.compress_images and Image is None:
|
||||
self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
|
||||
@@ -1718,6 +1780,7 @@ class DownloadThread(QThread):
|
||||
use_cookie=self.use_cookie, # Pass cookie setting to worker
|
||||
manga_date_file_counter_ref=current_manga_date_file_counter_ref, # Pass the calculated or passed-in ref
|
||||
scan_content_for_images=self.scan_content_for_images, # Pass new flag
|
||||
creator_download_folder_ignore_words=self.creator_download_folder_ignore_words, # Pass new ignore words
|
||||
)
|
||||
try:
|
||||
dl_count, skip_count, kept_originals_this_post, retryable_failures, permanent_failures = post_processing_worker.process()
|
||||
|
||||
46
main.py
46
main.py
@@ -59,7 +59,8 @@ try:
|
||||
CHAR_SCOPE_COMMENTS,
|
||||
FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER,
|
||||
STYLE_DATE_BASED,
|
||||
STYLE_POST_TITLE_GLOBAL_NUMBERING
|
||||
STYLE_POST_TITLE_GLOBAL_NUMBERING,
|
||||
CREATOR_DOWNLOAD_DEFAULT_FOLDER_IGNORE_WORDS # Added import
|
||||
|
||||
)
|
||||
print("Successfully imported names from downloader_utils.")
|
||||
@@ -93,6 +94,7 @@ except ImportError as e:
|
||||
FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER = "failed_retry_later"
|
||||
STYLE_DATE_BASED = "date_based"
|
||||
STYLE_POST_TITLE_GLOBAL_NUMBERING = "post_title_global_numbering"
|
||||
CREATOR_DOWNLOAD_DEFAULT_FOLDER_IGNORE_WORDS = set() # Mock for import error
|
||||
|
||||
except Exception as e:
|
||||
print(f"--- UNEXPECTED IMPORT ERROR ---")
|
||||
@@ -3077,8 +3079,12 @@ class DownloaderApp(QWidget):
|
||||
|
||||
|
||||
self.main_splitter.addWidget(left_panel_widget)
|
||||
self.main_splitter.addWidget(right_panel_widget)
|
||||
initial_width = self.width()
|
||||
self.main_splitter.addWidget(right_panel_widget) # type: ignore
|
||||
# Ensure the window has a size before calculating splitter sizes
|
||||
if self.width() == 0 or self.height() == 0: # Default size if not shown yet
|
||||
initial_width = 1024 # A reasonable default
|
||||
else:
|
||||
initial_width = self.width()
|
||||
left_width = int(initial_width * 0.35)
|
||||
right_width = initial_width - left_width
|
||||
self.main_splitter.setSizes([left_width, right_width])
|
||||
@@ -4476,6 +4482,16 @@ class DownloaderApp(QWidget):
|
||||
QMessageBox.critical(self, "Input Error", "Invalid or unsupported URL format.")
|
||||
return False # Indicate failure to start
|
||||
|
||||
creator_folder_ignore_words_for_run = None
|
||||
is_full_creator_download = not post_id_from_url
|
||||
# Use actual_filters_to_use_for_run which is populated after parsing character_input
|
||||
# This check needs to happen *after* actual_filters_to_use_for_run is determined.
|
||||
# We will move this logic block down.
|
||||
# if is_full_creator_download and character_filters_are_empty:
|
||||
# creator_folder_ignore_words_for_run = CREATOR_DOWNLOAD_DEFAULT_FOLDER_IGNORE_WORDS
|
||||
# log_messages.append(f" Creator Download (No Char Filter): Applying default folder name ignore list ({len(creator_folder_ignore_words_for_run)} words).")
|
||||
|
||||
|
||||
|
||||
if compress_images and Image is None:
|
||||
QMessageBox.warning(self, "Missing Dependency", "Pillow library (for image compression) not found. Compression will be disabled.")
|
||||
@@ -4635,6 +4651,13 @@ class DownloaderApp(QWidget):
|
||||
else:
|
||||
self.log_signal.emit("⚠️ Proceeding with Manga Mode without a specific title filter.")
|
||||
self.dynamic_character_filter_holder.set_filters(actual_filters_to_use_for_run)
|
||||
|
||||
# Determine creator_folder_ignore_words_for_run *after* actual_filters_to_use_for_run is set
|
||||
creator_folder_ignore_words_for_run = None
|
||||
character_filters_are_empty = not actual_filters_to_use_for_run # Now this is accurate
|
||||
if is_full_creator_download and character_filters_are_empty: # is_full_creator_download defined earlier
|
||||
creator_folder_ignore_words_for_run = CREATOR_DOWNLOAD_DEFAULT_FOLDER_IGNORE_WORDS
|
||||
log_messages.append(f" Creator Download (No Char Filter): Applying default folder name ignore list ({len(creator_folder_ignore_words_for_run)} words).")
|
||||
|
||||
custom_folder_name_cleaned = None
|
||||
if use_subfolders and post_id_from_url and self.custom_folder_widget and self.custom_folder_widget.isVisible() and not extract_links_only:
|
||||
@@ -4747,8 +4770,8 @@ class DownloaderApp(QWidget):
|
||||
should_use_multithreading_for_posts = use_multithreading_enabled_by_checkbox and not post_id_from_url
|
||||
if manga_mode and (self.manga_filename_style == STYLE_DATE_BASED or self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING) and not post_id_from_url:
|
||||
enforced_by_style = "Date Mode" if self.manga_filename_style == STYLE_DATE_BASED else "Title+GlobalNum Mode"
|
||||
log_messages.append(f" Threading: Single-threaded (posts) - Enforced by Manga {enforced_by_style}")
|
||||
should_use_multithreading_for_posts = False # Ensure this reflects the forced state
|
||||
log_messages.append(f" Threading: Single-threaded (posts) - Enforced by Manga {enforced_by_style} (Actual workers: {effective_num_post_workers if effective_num_post_workers > 1 else 1})")
|
||||
else:
|
||||
log_messages.append(f" Threading: {'Multi-threaded (posts)' if should_use_multithreading_for_posts else 'Single-threaded (posts)'}")
|
||||
if should_use_multithreading_for_posts:
|
||||
@@ -4757,8 +4780,10 @@ class DownloaderApp(QWidget):
|
||||
for msg in log_messages: self.log_signal.emit(msg)
|
||||
|
||||
self.set_ui_enabled(False)
|
||||
|
||||
unwanted_keywords_for_folders = {'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'}
|
||||
|
||||
# Use the global FOLDER_NAME_STOP_WORDS from downloader_utils
|
||||
from downloader_utils import FOLDER_NAME_STOP_WORDS
|
||||
# unwanted_keywords_for_folders = {'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'} # Old specific set
|
||||
|
||||
args_template = {
|
||||
'api_url_input': api_url,
|
||||
@@ -4790,8 +4815,8 @@ class DownloaderApp(QWidget):
|
||||
'end_page': end_page,
|
||||
'target_post_id_from_initial_url': post_id_from_url,
|
||||
'custom_folder_name': custom_folder_name_cleaned,
|
||||
'manga_mode_active': manga_mode,
|
||||
'unwanted_keywords': unwanted_keywords_for_folders,
|
||||
'manga_mode_active': manga_mode, # type: ignore
|
||||
'unwanted_keywords': FOLDER_NAME_STOP_WORDS, # Pass the global set
|
||||
'cancellation_event': self.cancellation_event,
|
||||
'manga_date_prefix': manga_date_prefix_text, # NEW ARGUMENT
|
||||
'dynamic_character_filter_holder': self.dynamic_character_filter_holder, # Pass the holder
|
||||
@@ -4806,6 +4831,7 @@ class DownloaderApp(QWidget):
|
||||
'manga_global_file_counter_ref': manga_global_file_counter_ref_for_thread, # Pass new counter
|
||||
'app_base_dir': app_base_dir_for_cookies, # Pass app base dir
|
||||
'use_cookie': use_cookie_for_this_run, # Pass the potentially modified cookie setting
|
||||
'creator_download_folder_ignore_words': creator_folder_ignore_words_for_run, # New
|
||||
}
|
||||
|
||||
args_template['override_output_dir'] = override_output_dir # Pass override dir in template
|
||||
@@ -5044,7 +5070,9 @@ class DownloaderApp(QWidget):
|
||||
'num_file_threads', 'skip_current_file_flag', 'manga_date_file_counter_ref', 'scan_content_for_images', # Added scan_content_for_images
|
||||
'manga_mode_active', 'manga_filename_style', 'manga_date_prefix', # ADD manga_date_prefix
|
||||
'manga_global_file_counter_ref' # Add new counter here
|
||||
]
|
||||
, 'creator_download_folder_ignore_words' # Add new ignore words list
|
||||
] # type: ignore
|
||||
|
||||
ppw_optional_keys_with_defaults = {
|
||||
'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'remove_from_filename_words_list',
|
||||
'show_external_links', 'extract_links_only', 'duplicate_file_mode', # Added duplicate_file_mode here
|
||||
|
||||
Reference in New Issue
Block a user