diff --git a/downloader_utils.py b/downloader_utils.py index cce762b..a2e2660 100644 --- a/downloader_utils.py +++ b/downloader_utils.py @@ -649,6 +649,7 @@ class PostProcessorWorker: app_base_dir=None, # New parameter for app's base directory manga_date_prefix=MANGA_DATE_PREFIX_DEFAULT, # New parameter for date-based prefix manga_date_file_counter_ref=None, # New parameter for date-based manga naming + scan_content_for_images=False, # New flag for scanning HTML content manga_global_file_counter_ref=None, # New parameter for global numbering ): # type: ignore self.post = post_data @@ -699,6 +700,7 @@ class PostProcessorWorker: self.manga_date_prefix = manga_date_prefix # Store the prefix self.manga_global_file_counter_ref = manga_global_file_counter_ref # Store global counter self.use_cookie = use_cookie # Store cookie setting + self.scan_content_for_images = scan_content_for_images # Store new flag if self.compress_images and Image is None: self.logger("⚠️ Image compression disabled: Pillow library not found.") @@ -1386,14 +1388,14 @@ class PostProcessorWorker: if original_api_name: all_files_from_post_api.append({ 'url': f"https://{api_file_domain}{file_path}" if file_path.startswith('/') else f"https://{api_file_domain}/data/{file_path}", - 'name': original_api_name, + 'name': original_api_name, # This is the cleaned/API provided name '_original_name_for_log': original_api_name, - '_is_thumbnail': self.download_thumbnails and is_image(original_api_name) + '_is_thumbnail': is_image(original_api_name) # Mark if it's an image from API }) else: self.logger(f" ⚠️ Skipping main file for post {post_id}: Missing name (Path: {file_path})") for idx, att_info in enumerate(post_attachments): - if isinstance(att_info, dict) and att_info.get('path'): + if isinstance(att_info, dict) and att_info.get('path'): # Ensure att_info is a dict att_path = att_info['path'].lstrip('/') original_api_att_name = att_info.get('name') or os.path.basename(att_path) if original_api_att_name: @@ -1401,16 +1403,99 @@ class PostProcessorWorker: 'url': f"https://{api_file_domain}{att_path}" if att_path.startswith('/') else f"https://{api_file_domain}/data/{att_path}", 'name': original_api_att_name, '_original_name_for_log': original_api_att_name, - '_is_thumbnail': self.download_thumbnails and is_image(original_api_att_name) + '_is_thumbnail': is_image(original_api_att_name) # Mark if it's an image from API }) else: self.logger(f" ⚠️ Skipping attachment {idx+1} for post {post_id}: Missing name (Path: {att_path})") else: self.logger(f" ⚠️ Skipping invalid attachment {idx+1} for post {post_id}: {str(att_info)[:100]}") + # --- New: Scan post content for additional image URLs if enabled --- + if self.scan_content_for_images and post_content_html and not self.extract_links_only: # This block was duplicated, ensure only one exists + self.logger(f" Scanning post content for additional image URLs (Post ID: {post_id})...") + + parsed_input_url = urlparse(self.api_url_input) + base_url_for_relative_paths = f"{parsed_input_url.scheme}://{parsed_input_url.netloc}" + img_ext_pattern = "|".join(ext.lstrip('.') for ext in IMAGE_EXTENSIONS) + + # 1. Regex for direct absolute image URLs in text + direct_url_pattern_str = r"""(?i)\b(https?://[^\s"'<>\[\]\{\}\|\^\\^~\[\]`]+\.(?:""" + img_ext_pattern + r"""))\b""" + # 2. Regex for tags (captures src content) + img_tag_src_pattern_str = r"""]*?src\s*=\s*["']([^"']+)["']""" + + found_image_sources = set() + + for direct_url_match in re.finditer(direct_url_pattern_str, post_content_html): + found_image_sources.add(direct_url_match.group(1)) + + for img_tag_match in re.finditer(img_tag_src_pattern_str, post_content_html, re.IGNORECASE): + src_attr = img_tag_match.group(1).strip() + src_attr = html.unescape(src_attr) + if not src_attr: continue + + resolved_src_url = "" + if src_attr.startswith(('http://', 'https://')): + resolved_src_url = src_attr + elif src_attr.startswith('//'): + resolved_src_url = f"{parsed_input_url.scheme}:{src_attr}" + elif src_attr.startswith('/'): + resolved_src_url = f"{base_url_for_relative_paths}{src_attr}" + + if resolved_src_url: + parsed_resolved_url = urlparse(resolved_src_url) + if any(parsed_resolved_url.path.lower().endswith(ext) for ext in IMAGE_EXTENSIONS): + found_image_sources.add(resolved_src_url) + + if found_image_sources: + self.logger(f" Found {len(found_image_sources)} potential image URLs/sources in content.") + existing_urls_in_api_list = {f_info['url'] for f_info in all_files_from_post_api} + + for found_url in found_image_sources: # Iterate over the unique, resolved URLs + if self.check_cancel(): break + if found_url in existing_urls_in_api_list: + self.logger(f" Skipping URL from content (already in API list or previously added from content): {found_url[:70]}...") + continue + try: + parsed_found_url = urlparse(found_url) + url_filename = os.path.basename(parsed_found_url.path) + if not url_filename or not is_image(url_filename): + self.logger(f" Skipping URL from content (no filename part or not an image extension): {found_url[:70]}...") + continue + + self.logger(f" Adding image from content: {url_filename} (URL: {found_url[:70]}...)") + all_files_from_post_api.append({ + 'url': found_url, + 'name': url_filename, + '_original_name_for_log': url_filename, + '_is_thumbnail': False, # Images from content are not API thumbnails + '_from_content_scan': True + }) + existing_urls_in_api_list.add(found_url) + except Exception as e_url_parse: + self.logger(f" Error processing URL from content '{found_url[:70]}...': {e_url_parse}") + else: + self.logger(f" No additional image URLs found in post content scan for post {post_id}.") + # --- End of new content scanning logic --- + + # --- Final filtering based on download_thumbnails and scan_content_for_images flags --- if self.download_thumbnails: - all_files_from_post_api = [finfo for finfo in all_files_from_post_api if finfo['_is_thumbnail']] - if not all_files_from_post_api: - self.logger(f" -> No image thumbnails found for post {post_id} in thumbnail-only mode.") - return 0, 0, [], [] + if self.scan_content_for_images: + # Both "Download Thumbnails Only" AND "Scan Content for Images" are checked. + # Prioritize images from content scan. + self.logger(f" Mode: 'Download Thumbnails Only' + 'Scan Content for Images' active. Prioritizing images from content scan for post {post_id}.") + all_files_from_post_api = [finfo for finfo in all_files_from_post_api if finfo.get('_from_content_scan')] + if not all_files_from_post_api: + self.logger(f" -> No images found via content scan for post {post_id} in this combined mode.") + return 0, 0, [], [] # No files to download for this post + else: + # Only "Download Thumbnails Only" is checked. Filter for API thumbnails. + self.logger(f" Mode: 'Download Thumbnails Only' active. Filtering for API thumbnails for post {post_id}.") + all_files_from_post_api = [finfo for finfo in all_files_from_post_api if finfo.get('_is_thumbnail')] + if not all_files_from_post_api: + self.logger(f" -> No API image thumbnails found for post {post_id} in thumbnail-only mode.") + return 0, 0, [], [] # No files to download for this post + # If self.download_thumbnails is False, all_files_from_post_api remains as is. + # It will contain all API files (images marked with _is_thumbnail: True, others False) + # and potentially content-scanned images (marked with _from_content_scan: True). + if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED: def natural_sort_key_for_files(file_api_info): name = file_api_info.get('_original_name_for_log', '').lower() @@ -1623,6 +1708,7 @@ class DownloadThread(QThread): manga_date_file_counter_ref=None, # New parameter manga_global_file_counter_ref=None, # New parameter for global numbering use_cookie=False, # Added: Expected by main.py + scan_content_for_images=False, # Added new flag cookie_text="", # Added: Expected by main.py ): super().__init__() @@ -1673,6 +1759,7 @@ class DownloadThread(QThread): self.cookie_text = cookie_text # Store cookie text self.use_cookie = use_cookie # Store cookie setting self.manga_date_file_counter_ref = manga_date_file_counter_ref # Store for passing to worker by DownloadThread + self.scan_content_for_images = scan_content_for_images # Store new flag self.manga_global_file_counter_ref = manga_global_file_counter_ref # Store for global numbering if self.compress_images and Image is None: self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).") @@ -1806,6 +1893,7 @@ class DownloadThread(QThread): manga_global_file_counter_ref=self.manga_global_file_counter_ref, # Pass the ref use_cookie=self.use_cookie, # Pass cookie setting to worker manga_date_file_counter_ref=current_manga_date_file_counter_ref, # Pass the calculated or passed-in ref + scan_content_for_images=self.scan_content_for_images, # Pass new flag ) try: dl_count, skip_count, kept_originals_this_post, retryable_failures = post_processing_worker.process() diff --git a/main.py b/main.py index 2d53590..a935665 100644 --- a/main.py +++ b/main.py @@ -57,6 +57,7 @@ try: FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER, STYLE_DATE_BASED, # Import new manga style STYLE_POST_TITLE_GLOBAL_NUMBERING # Import new manga style + # IMAGE_EXTENSIONS will be used from downloader_utils directly ) print("Successfully imported names from downloader_utils.") except ImportError as e: @@ -124,6 +125,7 @@ ALLOW_MULTIPART_DOWNLOAD_KEY = "allowMultipartDownloadV1" USE_COOKIE_KEY = "useCookieV1" # New setting key COOKIE_TEXT_KEY = "cookieTextV1" # New setting key for cookie text CHAR_FILTER_SCOPE_KEY = "charFilterScopeV1" +SCAN_CONTENT_IMAGES_KEY = "scanContentForImagesV1" # New setting key CONFIRM_ADD_ALL_ACCEPTED = 1 CONFIRM_ADD_ALL_SKIP_ADDING = 2 @@ -815,6 +817,12 @@ class DownloaderApp(QWidget): self.prompt_mutex = QMutex() self._add_character_response = None + # Store original tooltips for dynamic updates. Label changed, tooltip content remains valid. + self._original_scan_content_tooltip = ("If checked, the downloader will scan the HTML content of posts for image URLs (from tags or direct links).\n" + "This includes resolving relative paths from tags to full URLs.\n" + "Relative paths in tags (e.g., /data/image.jpg) will be resolved to full URLs.\n" + "Useful for cases where images are in the post description but not in the API's file/attachment list.") + self.downloaded_files = set() self.downloaded_files_lock = threading.Lock() self.downloaded_file_hashes = set() @@ -857,6 +865,7 @@ class DownloaderApp(QWidget): self.char_filter_scope = self.settings.value(CHAR_FILTER_SCOPE_KEY, CHAR_SCOPE_FILES, type=str) # Default to Files self.allow_multipart_download_setting = False self.use_cookie_setting = False # Always default to False on launch + self.scan_content_images_setting = self.settings.value(SCAN_CONTENT_IMAGES_KEY, False, type=bool) # Load new setting self.cookie_text_setting = "" # Always default to empty on launch print(f"ℹ️ Known.txt will be loaded/saved at: {self.config_file}") @@ -878,6 +887,7 @@ class DownloaderApp(QWidget): self.log_signal.emit(f"ℹ️ Multi-part download defaults to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'} on launch") self.log_signal.emit(f"ℹ️ Cookie text defaults to: Empty on launch") self.log_signal.emit(f"ℹ️ 'Use Cookie' setting defaults to: Disabled on launch") + self.log_signal.emit(f"ℹ️ Scan post content for images defaults to: {'Enabled' if self.scan_content_images_setting else 'Disabled'}") def _get_tooltip_for_character_input(self): return ( @@ -902,6 +912,8 @@ class DownloaderApp(QWidget): self.cookie_browse_button.clicked.connect(self._browse_cookie_file) if hasattr(self, 'cookie_text_input'): # Connect text changed for manual clear detection self.cookie_text_input.textChanged.connect(self._handle_cookie_text_manual_change) + if hasattr(self, 'download_thumbnails_checkbox'): # Connect the new handler + self.download_thumbnails_checkbox.toggled.connect(self._handle_thumbnail_mode_change) self.gui_update_timer.timeout.connect(self._process_worker_queue) self.gui_update_timer.start(100) # Check queue every 100ms self.log_signal.connect(self.handle_main_log) @@ -1123,6 +1135,7 @@ class DownloaderApp(QWidget): self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope) self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting) self.settings.setValue(COOKIE_TEXT_KEY, self.cookie_text_input.text() if hasattr(self, 'cookie_text_input') else "") + self.settings.setValue(SCAN_CONTENT_IMAGES_KEY, self.scan_content_images_checkbox.isChecked() if hasattr(self, 'scan_content_images_checkbox') else False) self.settings.setValue(USE_COOKIE_KEY, self.use_cookie_checkbox.isChecked() if hasattr(self, 'use_cookie_checkbox') else False) self.settings.sync() @@ -1371,12 +1384,23 @@ class DownloaderApp(QWidget): row1_layout.addWidget(self.skip_rar_checkbox) self.download_thumbnails_checkbox = QCheckBox("Download Thumbnails Only") self.download_thumbnails_checkbox.setChecked(False) - self.download_thumbnails_checkbox.setToolTip("Thumbnail download functionality is currently limited without the API.") + self.download_thumbnails_checkbox.setToolTip( + "Downloads small preview images from the API instead of full-sized files (if available).\n" + "If 'Scan Post Content for Image URLs' is also checked, this mode will *only* download images found by the content scan (ignoring API thumbnails)." + ) row1_layout.addWidget(self.download_thumbnails_checkbox) + + self.scan_content_images_checkbox = QCheckBox("Scan Content for Images") # Shortened Label + self.scan_content_images_checkbox.setToolTip( + self._original_scan_content_tooltip) # Use stored original tooltip + self.scan_content_images_checkbox.setChecked(self.scan_content_images_setting) # Set from loaded setting + row1_layout.addWidget(self.scan_content_images_checkbox) # Added to row1_layout + self.compress_images_checkbox = QCheckBox("Compress Large Images (to WebP)") self.compress_images_checkbox.setChecked(False) self.compress_images_checkbox.setToolTip("Compress images > 1.5MB to WebP format (requires Pillow).") row1_layout.addWidget(self.compress_images_checkbox) + row1_layout.addStretch(1) checkboxes_group_layout.addLayout(row1_layout) @@ -1464,6 +1488,7 @@ class DownloaderApp(QWidget): self.manga_mode_checkbox = QCheckBox("Manga/Comic Mode") self.manga_mode_checkbox.setToolTip("Downloads posts from oldest to newest and renames files based on post title (for creator feeds only).") self.manga_mode_checkbox.setChecked(False) + advanced_row2_layout.addWidget(self.manga_mode_checkbox) # Keep manga mode checkbox here advanced_row2_layout.addStretch(1) @@ -1684,6 +1709,8 @@ class DownloaderApp(QWidget): self._update_skip_scope_button_text() self._update_char_filter_scope_button_text() self._update_multithreading_for_date_mode() # Ensure correct initial state + if hasattr(self, 'download_thumbnails_checkbox'): # Set initial state for scan_content checkbox based on thumbnail checkbox + self._handle_thumbnail_mode_change(self.download_thumbnails_checkbox.isChecked()) def _browse_cookie_file(self): """Opens a file dialog to select a cookie file.""" @@ -2838,6 +2865,7 @@ class DownloaderApp(QWidget): raw_remove_filename_words = self.remove_from_filename_input.text().strip() if hasattr(self, 'remove_from_filename_input') else "" allow_multipart = self.allow_multipart_download_setting # Use the internal setting remove_from_filename_words_list = [word.strip() for word in raw_remove_filename_words.split(',') if word.strip()] + scan_content_for_images = self.scan_content_images_checkbox.isChecked() if hasattr(self, 'scan_content_images_checkbox') else False use_cookie_from_checkbox = self.use_cookie_checkbox.isChecked() if hasattr(self, 'use_cookie_checkbox') else False app_base_dir_for_cookies = os.path.dirname(self.config_file) # Directory of Known.txt cookie_text_from_input = self.cookie_text_input.text().strip() if hasattr(self, 'cookie_text_input') and use_cookie_from_checkbox else "" @@ -3141,6 +3169,7 @@ class DownloaderApp(QWidget): f" Compress Images: {'Enabled' if compress_images else 'Disabled'}", f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}" # Removed duplicate file handling log ]) + log_messages.append(f" Scan Post Content for Images: {'Enabled' if scan_content_for_images else 'Disabled'}") else: log_messages.append(f" Mode: Extracting Links Only") @@ -3210,6 +3239,7 @@ class DownloaderApp(QWidget): 'manga_date_prefix': manga_date_prefix_text, # NEW ARGUMENT 'dynamic_character_filter_holder': self.dynamic_character_filter_holder, # Pass the holder 'pause_event': self.pause_event, # Explicitly add pause_event here + 'scan_content_for_images': scan_content_for_images, # Pass new flag 'manga_filename_style': self.manga_filename_style, 'num_file_threads_for_worker': effective_num_file_threads_per_worker, 'manga_date_file_counter_ref': manga_date_file_counter_ref_for_thread, @@ -3240,7 +3270,7 @@ class DownloaderApp(QWidget): 'start_page', 'end_page', 'target_post_id_from_initial_url', 'manga_date_file_counter_ref', 'manga_global_file_counter_ref', 'manga_date_prefix', # Pass new counter and prefix for single thread mode - 'manga_mode_active', 'unwanted_keywords', 'manga_filename_style', + 'manga_mode_active', 'unwanted_keywords', 'manga_filename_style', 'scan_content_for_images', # Added scan_content_for_images 'allow_multipart_download', 'use_cookie', 'cookie_text', 'app_base_dir', 'selected_cookie_file' # Added selected_cookie_file ] args_template['skip_current_file_flag'] = None @@ -3435,7 +3465,7 @@ class DownloaderApp(QWidget): 'downloaded_files_lock', 'downloaded_file_hashes_lock', 'remove_from_filename_words_list', 'dynamic_character_filter_holder', # Added holder 'skip_words_list', 'skip_words_scope', 'char_filter_scope', 'show_external_links', 'extract_links_only', 'allow_multipart_download', 'use_cookie', 'cookie_text', 'app_base_dir', 'selected_cookie_file', # Added selected_cookie_file - 'num_file_threads', 'skip_current_file_flag', 'manga_date_file_counter_ref', + 'num_file_threads', 'skip_current_file_flag', 'manga_date_file_counter_ref', 'scan_content_for_images', # Added scan_content_for_images 'manga_mode_active', 'manga_filename_style', 'manga_date_prefix', # ADD manga_date_prefix 'manga_global_file_counter_ref' # Add new counter here ] @@ -3573,7 +3603,8 @@ class DownloaderApp(QWidget): self.manga_rename_toggle_button, # Visibility handled by update_ui_for_manga_mode self.cookie_browse_button, # Add cookie browse button self.multipart_toggle_button, - self.cookie_text_input, # Add cookie text input + self.cookie_text_input, # Add cookie text input, + self.scan_content_images_checkbox, # Add scan content checkbox self.use_cookie_checkbox, # Add cookie checkbox here self.external_links_checkbox ] @@ -3588,7 +3619,7 @@ class DownloaderApp(QWidget): self.skip_words_input, self.skip_scope_toggle_button, self.remove_from_filename_input, self.radio_all, self.radio_images, self.radio_videos, self.radio_only_archives, self.radio_only_links, self.skip_zip_checkbox, self.skip_rar_checkbox, self.download_thumbnails_checkbox, self.compress_images_checkbox, - self.use_subfolders_checkbox, self.use_subfolder_per_post_checkbox, + self.use_subfolders_checkbox, self.use_subfolder_per_post_checkbox, self.scan_content_images_checkbox, # Added scan_content_images_checkbox self.use_multithreading_checkbox, self.thread_count_input, self.thread_count_label, self.external_links_checkbox, self.manga_mode_checkbox, self.manga_rename_toggle_button, self.use_cookie_checkbox, self.cookie_text_input, self.cookie_browse_button, self.multipart_toggle_button, self.radio_only_audio, # Added radio_only_audio @@ -3679,6 +3710,7 @@ class DownloaderApp(QWidget): self.skip_zip_checkbox.setChecked(True); self.skip_rar_checkbox.setChecked(True); self.download_thumbnails_checkbox.setChecked(False); self.compress_images_checkbox.setChecked(False); self.use_subfolders_checkbox.setChecked(True); self.use_subfolder_per_post_checkbox.setChecked(False); self.use_multithreading_checkbox.setChecked(True); + if hasattr(self, 'scan_content_images_checkbox'): self.scan_content_images_checkbox.setChecked(False) # Reset new checkbox self.external_links_checkbox.setChecked(False) if self.manga_mode_checkbox: self.manga_mode_checkbox.setChecked(False) if hasattr(self, 'use_cookie_checkbox'): self.use_cookie_checkbox.setChecked(self.use_cookie_setting) # Reset to loaded or False @@ -3693,7 +3725,7 @@ class DownloaderApp(QWidget): if hasattr(self, 'manga_date_prefix_input'): self.manga_date_prefix_input.clear() # Clear prefix input - self.char_filter_scope = CHAR_SCOPE_TITLE # Default + self.char_filter_scope = CHAR_SCOPE_FILES # Default to Files on soft reset self._update_char_filter_scope_button_text() self.manga_filename_style = STYLE_POST_TITLE # Reset to app default @@ -3827,6 +3859,24 @@ class DownloaderApp(QWidget): self.set_ui_enabled(True) # Full UI reset if not retrying + def _handle_thumbnail_mode_change(self, thumbnails_checked): + """Handles UI changes when 'Download Thumbnails Only' is toggled.""" + if not hasattr(self, 'scan_content_images_checkbox'): + return + + if thumbnails_checked: + self.scan_content_images_checkbox.setChecked(True) + self.scan_content_images_checkbox.setEnabled(False) + self.scan_content_images_checkbox.setToolTip( + "Automatically enabled and locked because 'Download Thumbnails Only' is active.\n" + "In this mode, only images found by content scanning will be downloaded." + ) + else: + self.scan_content_images_checkbox.setEnabled(True) + # Revert to unchecked when thumbnail mode is off. User can manually re-check if desired. + self.scan_content_images_checkbox.setChecked(False) + self.scan_content_images_checkbox.setToolTip(self._original_scan_content_tooltip) + def _start_failed_files_retry_session(self): self.log_signal.emit(f"🔄 Starting retry session for {len(self.retryable_failed_files_info)} file(s)...") self.set_ui_enabled(False) # Disable UI, but cancel button will be enabled @@ -3900,6 +3950,12 @@ class DownloaderApp(QWidget): 'api_url_input': job_details.get('api_url_input', ''), # Original post's API URL 'manga_mode_active': job_details.get('manga_mode_active_for_file', False), 'manga_filename_style': job_details.get('manga_filename_style_for_file', STYLE_POST_TITLE), + # Ensure scan_content_for_images is passed if it's part of common_args or needed + 'scan_content_for_images': common_args.get('scan_content_for_images', False), + 'use_cookie': common_args.get('use_cookie', False), + 'cookie_text': common_args.get('cookie_text', ""), + 'selected_cookie_file': common_args.get('selected_cookie_file', None), + 'app_base_dir': common_args.get('app_base_dir', None), } worker = PostProcessorWorker(**ppw_init_args) @@ -4069,10 +4125,16 @@ class DownloaderApp(QWidget): self.log_verbosity_toggle_button.setToolTip("Current View: Progress Log. Click to switch to Missed Character Log.") self._update_manga_filename_style_button_text() self.update_ui_for_manga_mode(False) + # Ensure scan_content_images_checkbox is reset and its state updated by thumbnail mode + if hasattr(self, 'scan_content_images_checkbox'): + self.scan_content_images_checkbox.setChecked(False) + if hasattr(self, 'download_thumbnails_checkbox'): + self._handle_thumbnail_mode_change(self.download_thumbnails_checkbox.isChecked()) def _show_feature_guide(self): # Define content for each page page1_title = "① Introduction & Main Inputs" + page1_content = """

This guide provides an overview of the Kemono Downloader's features, fields, and buttons.

diff --git a/readme.md b/readme.md index b735a07..9d92ea9 100644 --- a/readme.md +++ b/readme.md @@ -1,4 +1,4 @@ -

Kemono Downloader v4.0.1

+

Kemono Downloader v4.1.1

Kemono Downloader @@ -11,7 +11,31 @@ Built with **PyQt5**, this tool is ideal for users who want deep filtering, cust --- -## What's New in v4.0.1? +## What's New in v4.1.1? + +Version 4.1.1 introduces a smarter way to capture images that might be embedded directly within post descriptions, enhancing content discovery. + +### "Scan Content for Images" Feature + +- **Enhanced Image Discovery:** A new checkbox, "**Scan Content for Images**," has been added to the UI (grouped with "Download Thumbnails Only" and "Compress Large Images"). +- **How it Works:** + - When enabled, the downloader scans the HTML content of posts (e.g., the description area). + - It looks for images embedded via HTML `` tags or as direct absolute URL links (e.g., `https://.../image.png`). + - It intelligently resolves relative image paths found in `` tags (like `/data/image.jpg`) into full, downloadable URLs. + - This is particularly useful for capturing images that are part of the post's narrative but not formally listed in the API's file or attachment sections. +- **Default State:** This option is **unchecked by default**. +- **Interaction with "Download Thumbnails Only":** + - If you check "Download Thumbnails Only": + - The "Scan Content for Images" checkbox will **automatically become checked and disabled** (locked). + - In this combined mode, the downloader will **only download images found by the content scan**. API-listed thumbnails will be ignored, prioritizing images from the post's body. + - If you uncheck "Download Thumbnails Only": + - The "Scan Content for Images" checkbox will become **enabled again and revert to being unchecked**. You can then manually enable it if you wish to scan content without being in thumbnail-only mode. + +This feature ensures a more comprehensive download experience, especially for posts where images are integrated directly into the text. + +--- + +## Previous Update: What's New in v4.0.1? Version 4.0.1 focuses on enhancing access to content and providing even smarter organization: @@ -74,7 +98,7 @@ This field allows for dynamic filtering for the current download session and pro --- ## What's in v3.5.0? (Previous Update) -This version brings significant enhancements to manga/comic downloading, filtering capabilities, and user experience: +This version brought significant enhancements to manga/comic downloading, filtering capabilities, and user experience: ### Enhanced Manga/Comic Mode @@ -232,12 +256,17 @@ This version brings significant enhancements to manga/comic downloading, filteri --- ### Thumbnail & Compression Tools - -- **Download Thumbnails Only** - +- **Download Thumbnails Only:** + - Downloads small preview images from the API instead of full-sized files (if available). + - **Interaction with "Scan Content for Images" (New in v4.1.1):** When "Download Thumbnails Only" is active, "Scan Content for Images" is auto-enabled, and only images found by the content scan are downloaded. See "What's New in v4.1.1" for details. +- **Scan Content for Images (New in v4.1.1):** + - A UI option to scan the HTML content of posts for embedded image URLs (from `` tags or direct links). + - Resolves relative paths and helps capture images not listed in the API's formal attachments. + - See the "What's New in v4.1.1?" section for a comprehensive explanation. - **Compress to WebP** (via Pillow) - Converts large images to smaller WebP versions + --- ### Performance Features