Commit

2025-12-29 16:14:44 +00:00 · 2025-05-26 09:48:00 +05:30
parent 8928cb92da
commit 7f2976a4f4
3 changed files with 199 additions and 20 deletions
--- a/downloader_utils.py
+++ b/downloader_utils.py
@@ -649,6 +649,7 @@ class PostProcessorWorker:
                 app_base_dir=None, # New parameter for app's base directory
                 manga_date_prefix=MANGA_DATE_PREFIX_DEFAULT, # New parameter for date-based prefix
                 manga_date_file_counter_ref=None, # New parameter for date-based manga naming
+                 scan_content_for_images=False, # New flag for scanning HTML content
                 manga_global_file_counter_ref=None, # New parameter for global numbering
                 ): # type: ignore
        self.post = post_data
@@ -699,6 +700,7 @@ class PostProcessorWorker:
        self.manga_date_prefix = manga_date_prefix # Store the prefix
        self.manga_global_file_counter_ref = manga_global_file_counter_ref # Store global counter
        self.use_cookie = use_cookie # Store cookie setting
+        self.scan_content_for_images = scan_content_for_images # Store new flag

        if self.compress_images and Image is None:
            self.logger("⚠️ Image compression disabled: Pillow library not found.")
@@ -1386,14 +1388,14 @@ class PostProcessorWorker:
            if original_api_name:
                all_files_from_post_api.append({
                    'url': f"https://{api_file_domain}{file_path}" if file_path.startswith('/') else f"https://{api_file_domain}/data/{file_path}",
-                    'name': original_api_name,
+                    'name': original_api_name, # This is the cleaned/API provided name
                    '_original_name_for_log': original_api_name,
-                    '_is_thumbnail': self.download_thumbnails and is_image(original_api_name)
+                    '_is_thumbnail': is_image(original_api_name) # Mark if it's an image from API
                })
            else: self.logger(f"   ⚠️ Skipping main file for post {post_id}: Missing name (Path: {file_path})")

        for idx, att_info in enumerate(post_attachments):
-            if isinstance(att_info, dict) and att_info.get('path'):
+            if isinstance(att_info, dict) and att_info.get('path'): # Ensure att_info is a dict
                att_path = att_info['path'].lstrip('/')
                original_api_att_name = att_info.get('name') or os.path.basename(att_path)
                if original_api_att_name:
@@ -1401,16 +1403,99 @@ class PostProcessorWorker:
                        'url': f"https://{api_file_domain}{att_path}" if att_path.startswith('/') else f"https://{api_file_domain}/data/{att_path}",
                        'name': original_api_att_name,
                        '_original_name_for_log': original_api_att_name,
-                        '_is_thumbnail': self.download_thumbnails and is_image(original_api_att_name)
+                        '_is_thumbnail': is_image(original_api_att_name) # Mark if it's an image from API
                    })
                else: self.logger(f"   ⚠️ Skipping attachment {idx+1} for post {post_id}: Missing name (Path: {att_path})")
            else: self.logger(f"   ⚠️ Skipping invalid attachment {idx+1} for post {post_id}: {str(att_info)[:100]}")

+        # --- New: Scan post content for additional image URLs if enabled ---
+        if self.scan_content_for_images and post_content_html and not self.extract_links_only: # This block was duplicated, ensure only one exists
+            self.logger(f"   Scanning post content for additional image URLs (Post ID: {post_id})...")
+            
+            parsed_input_url = urlparse(self.api_url_input)
+            base_url_for_relative_paths = f"{parsed_input_url.scheme}://{parsed_input_url.netloc}"
+            img_ext_pattern = "|".join(ext.lstrip('.') for ext in IMAGE_EXTENSIONS)
+            
+            # 1. Regex for direct absolute image URLs in text
+            direct_url_pattern_str = r"""(?i)\b(https?://[^\s"'<>\[\]\{\}\|\^\\^~\[\]`]+\.(?:""" + img_ext_pattern + r"""))\b"""
+            # 2. Regex for <img> tags (captures src content)
+            img_tag_src_pattern_str = r"""<img\s+[^>]*?src\s*=\s*["']([^"']+)["']"""
+
+            found_image_sources = set()
+
+            for direct_url_match in re.finditer(direct_url_pattern_str, post_content_html):
+                found_image_sources.add(direct_url_match.group(1))
+
+            for img_tag_match in re.finditer(img_tag_src_pattern_str, post_content_html, re.IGNORECASE):
+                src_attr = img_tag_match.group(1).strip()
+                src_attr = html.unescape(src_attr)
+                if not src_attr: continue
+
+                resolved_src_url = ""
+                if src_attr.startswith(('http://', 'https://')):
+                    resolved_src_url = src_attr
+                elif src_attr.startswith('//'):
+                    resolved_src_url = f"{parsed_input_url.scheme}:{src_attr}"
+                elif src_attr.startswith('/'):
+                    resolved_src_url = f"{base_url_for_relative_paths}{src_attr}"
+                
+                if resolved_src_url:
+                    parsed_resolved_url = urlparse(resolved_src_url)
+                    if any(parsed_resolved_url.path.lower().endswith(ext) for ext in IMAGE_EXTENSIONS):
+                        found_image_sources.add(resolved_src_url)
+
+            if found_image_sources:
+                self.logger(f"      Found {len(found_image_sources)} potential image URLs/sources in content.")
+                existing_urls_in_api_list = {f_info['url'] for f_info in all_files_from_post_api}
+
+                for found_url in found_image_sources: # Iterate over the unique, resolved URLs
+                    if self.check_cancel(): break
+                    if found_url in existing_urls_in_api_list:
+                        self.logger(f"         Skipping URL from content (already in API list or previously added from content): {found_url[:70]}...")
+                        continue
+                    try:
+                        parsed_found_url = urlparse(found_url)
+                        url_filename = os.path.basename(parsed_found_url.path)
+                        if not url_filename or not is_image(url_filename):
+                            self.logger(f"         Skipping URL from content (no filename part or not an image extension): {found_url[:70]}...")
+                            continue
+
+                        self.logger(f"      Adding image from content: {url_filename} (URL: {found_url[:70]}...)")
+                        all_files_from_post_api.append({
+                            'url': found_url,
+                            'name': url_filename,
+                            '_original_name_for_log': url_filename,
+                            '_is_thumbnail': False, # Images from content are not API thumbnails
+                            '_from_content_scan': True 
+                        })
+                        existing_urls_in_api_list.add(found_url) 
+                    except Exception as e_url_parse:
+                        self.logger(f"         Error processing URL from content '{found_url[:70]}...': {e_url_parse}")
+            else:
+                self.logger(f"      No additional image URLs found in post content scan for post {post_id}.")
+        # --- End of new content scanning logic ---
+
+        # --- Final filtering based on download_thumbnails and scan_content_for_images flags ---
        if self.download_thumbnails:
-            all_files_from_post_api = [finfo for finfo in all_files_from_post_api if finfo['_is_thumbnail']]
-            if not all_files_from_post_api:
-                 self.logger(f"   -> No image thumbnails found for post {post_id} in thumbnail-only mode.")
-                 return 0, 0, [], []
+            if self.scan_content_for_images:
+                # Both "Download Thumbnails Only" AND "Scan Content for Images" are checked.
+                # Prioritize images from content scan.
+                self.logger(f"   Mode: 'Download Thumbnails Only' + 'Scan Content for Images' active. Prioritizing images from content scan for post {post_id}.")
+                all_files_from_post_api = [finfo for finfo in all_files_from_post_api if finfo.get('_from_content_scan')]
+                if not all_files_from_post_api:
+                    self.logger(f"   -> No images found via content scan for post {post_id} in this combined mode.")
+                    return 0, 0, [], [] # No files to download for this post
+            else:
+                # Only "Download Thumbnails Only" is checked. Filter for API thumbnails.
+                self.logger(f"   Mode: 'Download Thumbnails Only' active. Filtering for API thumbnails for post {post_id}.")
+                all_files_from_post_api = [finfo for finfo in all_files_from_post_api if finfo.get('_is_thumbnail')]
+                if not all_files_from_post_api:
+                    self.logger(f"   -> No API image thumbnails found for post {post_id} in thumbnail-only mode.")
+                    return 0, 0, [], [] # No files to download for this post
+        # If self.download_thumbnails is False, all_files_from_post_api remains as is.
+        # It will contain all API files (images marked with _is_thumbnail: True, others False)
+        # and potentially content-scanned images (marked with _from_content_scan: True).
+
        if self.manga_mode_active and self.manga_filename_style == STYLE_DATE_BASED:
            def natural_sort_key_for_files(file_api_info):
                name = file_api_info.get('_original_name_for_log', '').lower()
@@ -1623,6 +1708,7 @@ class DownloadThread(QThread):
                 manga_date_file_counter_ref=None, # New parameter
                 manga_global_file_counter_ref=None, # New parameter for global numbering
                 use_cookie=False, # Added: Expected by main.py
+                 scan_content_for_images=False, # Added new flag
                 cookie_text="",   # Added: Expected by main.py
                 ):
        super().__init__()
@@ -1673,6 +1759,7 @@ class DownloadThread(QThread):
        self.cookie_text = cookie_text # Store cookie text
        self.use_cookie = use_cookie # Store cookie setting
        self.manga_date_file_counter_ref = manga_date_file_counter_ref # Store for passing to worker by DownloadThread
+        self.scan_content_for_images = scan_content_for_images # Store new flag
        self.manga_global_file_counter_ref = manga_global_file_counter_ref # Store for global numbering
        if self.compress_images and Image is None:
            self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
@@ -1806,6 +1893,7 @@ class DownloadThread(QThread):
                         manga_global_file_counter_ref=self.manga_global_file_counter_ref, # Pass the ref
                         use_cookie=self.use_cookie, # Pass cookie setting to worker
                         manga_date_file_counter_ref=current_manga_date_file_counter_ref, # Pass the calculated or passed-in ref
+                         scan_content_for_images=self.scan_content_for_images, # Pass new flag
                         )
                    try:
                        dl_count, skip_count, kept_originals_this_post, retryable_failures = post_processing_worker.process()
--- a/main.py
+++ b/main.py
@@ -57,6 +57,7 @@ try:
        FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER,
        STYLE_DATE_BASED, # Import new manga style
        STYLE_POST_TITLE_GLOBAL_NUMBERING # Import new manga style
+        # IMAGE_EXTENSIONS will be used from downloader_utils directly
    )
    print("Successfully imported names from downloader_utils.")
 except ImportError as e:
@@ -124,6 +125,7 @@ ALLOW_MULTIPART_DOWNLOAD_KEY = "allowMultipartDownloadV1"
 USE_COOKIE_KEY = "useCookieV1" # New setting key
 COOKIE_TEXT_KEY = "cookieTextV1" # New setting key for cookie text
 CHAR_FILTER_SCOPE_KEY = "charFilterScopeV1"
+SCAN_CONTENT_IMAGES_KEY = "scanContentForImagesV1" # New setting key

 CONFIRM_ADD_ALL_ACCEPTED = 1
 CONFIRM_ADD_ALL_SKIP_ADDING = 2
@@ -815,6 +817,12 @@ class DownloaderApp(QWidget):
        self.prompt_mutex = QMutex()
        self._add_character_response = None

+        # Store original tooltips for dynamic updates. Label changed, tooltip content remains valid.
+        self._original_scan_content_tooltip = ("If checked, the downloader will scan the HTML content of posts for image URLs (from <img> tags or direct links).\n"
+            "This includes resolving relative paths from <img> tags to full URLs.\n"
+            "Relative paths in <img> tags (e.g., /data/image.jpg) will be resolved to full URLs.\n"
+            "Useful for cases where images are in the post description but not in the API's file/attachment list.")
+
        self.downloaded_files = set()
        self.downloaded_files_lock = threading.Lock()
        self.downloaded_file_hashes = set()
@@ -857,6 +865,7 @@ class DownloaderApp(QWidget):
        self.char_filter_scope = self.settings.value(CHAR_FILTER_SCOPE_KEY, CHAR_SCOPE_FILES, type=str) # Default to Files
        self.allow_multipart_download_setting = False 
        self.use_cookie_setting = False # Always default to False on launch
+        self.scan_content_images_setting = self.settings.value(SCAN_CONTENT_IMAGES_KEY, False, type=bool) # Load new setting        
        self.cookie_text_setting = ""   # Always default to empty on launch

        print(f"ℹ️ Known.txt will be loaded/saved at: {self.config_file}")
@@ -878,6 +887,7 @@ class DownloaderApp(QWidget):
        self.log_signal.emit(f"ℹ️ Multi-part download defaults to: {'Enabled' if self.allow_multipart_download_setting else 'Disabled'} on launch")
        self.log_signal.emit(f"ℹ️ Cookie text defaults to: Empty on launch")
        self.log_signal.emit(f"ℹ️ 'Use Cookie' setting defaults to: Disabled on launch")
+        self.log_signal.emit(f"ℹ️ Scan post content for images defaults to: {'Enabled' if self.scan_content_images_setting else 'Disabled'}")

    def _get_tooltip_for_character_input(self):
        return (
@@ -902,6 +912,8 @@ class DownloaderApp(QWidget):
            self.cookie_browse_button.clicked.connect(self._browse_cookie_file)
        if hasattr(self, 'cookie_text_input'): # Connect text changed for manual clear detection
            self.cookie_text_input.textChanged.connect(self._handle_cookie_text_manual_change)
+        if hasattr(self, 'download_thumbnails_checkbox'): # Connect the new handler
+            self.download_thumbnails_checkbox.toggled.connect(self._handle_thumbnail_mode_change)        
        self.gui_update_timer.timeout.connect(self._process_worker_queue)
        self.gui_update_timer.start(100) # Check queue every 100ms
        self.log_signal.connect(self.handle_main_log)
@@ -1123,6 +1135,7 @@ class DownloaderApp(QWidget):
        self.settings.setValue(CHAR_FILTER_SCOPE_KEY, self.char_filter_scope)
        self.settings.setValue(ALLOW_MULTIPART_DOWNLOAD_KEY, self.allow_multipart_download_setting)
        self.settings.setValue(COOKIE_TEXT_KEY, self.cookie_text_input.text() if hasattr(self, 'cookie_text_input') else "")
+        self.settings.setValue(SCAN_CONTENT_IMAGES_KEY, self.scan_content_images_checkbox.isChecked() if hasattr(self, 'scan_content_images_checkbox') else False)        
        self.settings.setValue(USE_COOKIE_KEY, self.use_cookie_checkbox.isChecked() if hasattr(self, 'use_cookie_checkbox') else False)
        self.settings.sync()

@@ -1371,12 +1384,23 @@ class DownloaderApp(QWidget):
        row1_layout.addWidget(self.skip_rar_checkbox)
        self.download_thumbnails_checkbox = QCheckBox("Download Thumbnails Only")
        self.download_thumbnails_checkbox.setChecked(False)
-        self.download_thumbnails_checkbox.setToolTip("Thumbnail download functionality is currently limited without the API.")
+        self.download_thumbnails_checkbox.setToolTip(
+            "Downloads small preview images from the API instead of full-sized files (if available).\n"
+            "If 'Scan Post Content for Image URLs' is also checked, this mode will *only* download images found by the content scan (ignoring API thumbnails)."
+        )
        row1_layout.addWidget(self.download_thumbnails_checkbox)
+
+        self.scan_content_images_checkbox = QCheckBox("Scan Content for Images") # Shortened Label
+        self.scan_content_images_checkbox.setToolTip(
+            self._original_scan_content_tooltip) # Use stored original tooltip
+        self.scan_content_images_checkbox.setChecked(self.scan_content_images_setting) # Set from loaded setting
+        row1_layout.addWidget(self.scan_content_images_checkbox) # Added to row1_layout
+
        self.compress_images_checkbox = QCheckBox("Compress Large Images (to WebP)")
        self.compress_images_checkbox.setChecked(False)
        self.compress_images_checkbox.setToolTip("Compress images > 1.5MB to WebP format (requires Pillow).")
        row1_layout.addWidget(self.compress_images_checkbox)
+
        row1_layout.addStretch(1)
        checkboxes_group_layout.addLayout(row1_layout)

@@ -1464,6 +1488,7 @@ class DownloaderApp(QWidget):
        self.manga_mode_checkbox = QCheckBox("Manga/Comic Mode")
        self.manga_mode_checkbox.setToolTip("Downloads posts from oldest to newest and renames files based on post title (for creator feeds only).")
        self.manga_mode_checkbox.setChecked(False)
+        
        advanced_row2_layout.addWidget(self.manga_mode_checkbox) # Keep manga mode checkbox here

        advanced_row2_layout.addStretch(1)
@@ -1684,6 +1709,8 @@ class DownloaderApp(QWidget):
        self._update_skip_scope_button_text()
        self._update_char_filter_scope_button_text()
        self._update_multithreading_for_date_mode() # Ensure correct initial state
+        if hasattr(self, 'download_thumbnails_checkbox'): # Set initial state for scan_content checkbox based on thumbnail checkbox
+            self._handle_thumbnail_mode_change(self.download_thumbnails_checkbox.isChecked())
        
    def _browse_cookie_file(self):
        """Opens a file dialog to select a cookie file."""
@@ -2838,6 +2865,7 @@ class DownloaderApp(QWidget):
        raw_remove_filename_words = self.remove_from_filename_input.text().strip() if hasattr(self, 'remove_from_filename_input') else ""
        allow_multipart = self.allow_multipart_download_setting # Use the internal setting
        remove_from_filename_words_list = [word.strip() for word in raw_remove_filename_words.split(',') if word.strip()]
+        scan_content_for_images = self.scan_content_images_checkbox.isChecked() if hasattr(self, 'scan_content_images_checkbox') else False        
        use_cookie_from_checkbox = self.use_cookie_checkbox.isChecked() if hasattr(self, 'use_cookie_checkbox') else False
        app_base_dir_for_cookies = os.path.dirname(self.config_file) # Directory of Known.txt
        cookie_text_from_input = self.cookie_text_input.text().strip() if hasattr(self, 'cookie_text_input') and use_cookie_from_checkbox else ""
@@ -3141,6 +3169,7 @@ class DownloaderApp(QWidget):
                f"   Compress Images: {'Enabled' if compress_images else 'Disabled'}",
                f"   Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}" # Removed duplicate file handling log
            ])
+            log_messages.append(f"   Scan Post Content for Images: {'Enabled' if scan_content_for_images else 'Disabled'}")        
        else:
            log_messages.append(f"   Mode: Extracting Links Only")

@@ -3210,6 +3239,7 @@ class DownloaderApp(QWidget):
            'manga_date_prefix': manga_date_prefix_text, # NEW ARGUMENT            
            'dynamic_character_filter_holder': self.dynamic_character_filter_holder, # Pass the holder
            'pause_event': self.pause_event, # Explicitly add pause_event here
+            'scan_content_for_images': scan_content_for_images, # Pass new flag            
            'manga_filename_style': self.manga_filename_style,
            'num_file_threads_for_worker': effective_num_file_threads_per_worker,
            'manga_date_file_counter_ref': manga_date_file_counter_ref_for_thread,
@@ -3240,7 +3270,7 @@ class DownloaderApp(QWidget):
                    'start_page', 'end_page', 'target_post_id_from_initial_url',
                    'manga_date_file_counter_ref', 
                    'manga_global_file_counter_ref', 'manga_date_prefix', # Pass new counter and prefix for single thread mode
-                    'manga_mode_active', 'unwanted_keywords', 'manga_filename_style',
+                    'manga_mode_active', 'unwanted_keywords', 'manga_filename_style', 'scan_content_for_images', # Added scan_content_for_images
                    'allow_multipart_download', 'use_cookie', 'cookie_text', 'app_base_dir', 'selected_cookie_file' # Added selected_cookie_file
                ]
                args_template['skip_current_file_flag'] = None
@@ -3435,7 +3465,7 @@ class DownloaderApp(QWidget):
            'downloaded_files_lock', 'downloaded_file_hashes_lock', 'remove_from_filename_words_list', 'dynamic_character_filter_holder', # Added holder
            'skip_words_list', 'skip_words_scope', 'char_filter_scope',
            'show_external_links', 'extract_links_only', 'allow_multipart_download', 'use_cookie', 'cookie_text', 'app_base_dir', 'selected_cookie_file', # Added selected_cookie_file
-            'num_file_threads', 'skip_current_file_flag', 'manga_date_file_counter_ref',
+            'num_file_threads', 'skip_current_file_flag', 'manga_date_file_counter_ref', 'scan_content_for_images', # Added scan_content_for_images
            'manga_mode_active', 'manga_filename_style', 'manga_date_prefix', # ADD manga_date_prefix
            'manga_global_file_counter_ref' # Add new counter here
        ]
@@ -3573,7 +3603,8 @@ class DownloaderApp(QWidget):
            self.manga_rename_toggle_button, # Visibility handled by update_ui_for_manga_mode
            self.cookie_browse_button, # Add cookie browse button
            self.multipart_toggle_button,
-            self.cookie_text_input, # Add cookie text input
+            self.cookie_text_input, # Add cookie text input,
+            self.scan_content_images_checkbox, # Add scan content checkbox
            self.use_cookie_checkbox, # Add cookie checkbox here
            self.external_links_checkbox
        ]
@@ -3588,7 +3619,7 @@ class DownloaderApp(QWidget):
            self.skip_words_input, self.skip_scope_toggle_button, self.remove_from_filename_input,
            self.radio_all, self.radio_images, self.radio_videos, self.radio_only_archives, self.radio_only_links,
            self.skip_zip_checkbox, self.skip_rar_checkbox, self.download_thumbnails_checkbox, self.compress_images_checkbox,
-            self.use_subfolders_checkbox, self.use_subfolder_per_post_checkbox,
+            self.use_subfolders_checkbox, self.use_subfolder_per_post_checkbox, self.scan_content_images_checkbox, # Added scan_content_images_checkbox
            self.use_multithreading_checkbox, self.thread_count_input, self.thread_count_label,
            self.external_links_checkbox, self.manga_mode_checkbox, self.manga_rename_toggle_button, self.use_cookie_checkbox, self.cookie_text_input, self.cookie_browse_button,
            self.multipart_toggle_button, self.radio_only_audio, # Added radio_only_audio
@@ -3679,6 +3710,7 @@ class DownloaderApp(QWidget):
        self.skip_zip_checkbox.setChecked(True); self.skip_rar_checkbox.setChecked(True); self.download_thumbnails_checkbox.setChecked(False);
        self.compress_images_checkbox.setChecked(False); self.use_subfolders_checkbox.setChecked(True);
        self.use_subfolder_per_post_checkbox.setChecked(False); self.use_multithreading_checkbox.setChecked(True);
+        if hasattr(self, 'scan_content_images_checkbox'): self.scan_content_images_checkbox.setChecked(False) # Reset new checkbox
        self.external_links_checkbox.setChecked(False)
        if self.manga_mode_checkbox: self.manga_mode_checkbox.setChecked(False)
        if hasattr(self, 'use_cookie_checkbox'): self.use_cookie_checkbox.setChecked(self.use_cookie_setting) # Reset to loaded or False
@@ -3693,7 +3725,7 @@ class DownloaderApp(QWidget):

        if hasattr(self, 'manga_date_prefix_input'): self.manga_date_prefix_input.clear() # Clear prefix input

-        self.char_filter_scope = CHAR_SCOPE_TITLE # Default
+        self.char_filter_scope = CHAR_SCOPE_FILES # Default to Files on soft reset
        self._update_char_filter_scope_button_text()

        self.manga_filename_style = STYLE_POST_TITLE # Reset to app default
@@ -3827,6 +3859,24 @@ class DownloaderApp(QWidget):

        self.set_ui_enabled(True) # Full UI reset if not retrying

+    def _handle_thumbnail_mode_change(self, thumbnails_checked):
+        """Handles UI changes when 'Download Thumbnails Only' is toggled."""
+        if not hasattr(self, 'scan_content_images_checkbox'):
+            return
+
+        if thumbnails_checked:
+            self.scan_content_images_checkbox.setChecked(True)
+            self.scan_content_images_checkbox.setEnabled(False)
+            self.scan_content_images_checkbox.setToolTip(
+                "Automatically enabled and locked because 'Download Thumbnails Only' is active.\n"
+                "In this mode, only images found by content scanning will be downloaded."
+            )
+        else:
+            self.scan_content_images_checkbox.setEnabled(True)
+            # Revert to unchecked when thumbnail mode is off. User can manually re-check if desired.
+            self.scan_content_images_checkbox.setChecked(False) 
+            self.scan_content_images_checkbox.setToolTip(self._original_scan_content_tooltip)
+
    def _start_failed_files_retry_session(self):
        self.log_signal.emit(f"🔄 Starting retry session for {len(self.retryable_failed_files_info)} file(s)...")
        self.set_ui_enabled(False) # Disable UI, but cancel button will be enabled
@@ -3900,6 +3950,12 @@ class DownloaderApp(QWidget):
            'api_url_input': job_details.get('api_url_input', ''), # Original post's API URL
            'manga_mode_active': job_details.get('manga_mode_active_for_file', False),
            'manga_filename_style': job_details.get('manga_filename_style_for_file', STYLE_POST_TITLE),
+            # Ensure scan_content_for_images is passed if it's part of common_args or needed
+            'scan_content_for_images': common_args.get('scan_content_for_images', False),
+            'use_cookie': common_args.get('use_cookie', False),
+            'cookie_text': common_args.get('cookie_text', ""),
+            'selected_cookie_file': common_args.get('selected_cookie_file', None),
+            'app_base_dir': common_args.get('app_base_dir', None),
        }
        worker = PostProcessorWorker(**ppw_init_args)
        
@@ -4069,10 +4125,16 @@ class DownloaderApp(QWidget):
            self.log_verbosity_toggle_button.setToolTip("Current View: Progress Log. Click to switch to Missed Character Log.")
        self._update_manga_filename_style_button_text()
        self.update_ui_for_manga_mode(False)
+        # Ensure scan_content_images_checkbox is reset and its state updated by thumbnail mode
+        if hasattr(self, 'scan_content_images_checkbox'):
+            self.scan_content_images_checkbox.setChecked(False) 
+        if hasattr(self, 'download_thumbnails_checkbox'):
+            self._handle_thumbnail_mode_change(self.download_thumbnails_checkbox.isChecked())

    def _show_feature_guide(self):
        # Define content for each page
        page1_title = "① Introduction & Main Inputs"
+
        page1_content = """<html><head/><body>
        <p>This guide provides an overview of the Kemono Downloader's features, fields, and buttons.</p>

--- a/readme.md
+++ b/readme.md
@@ -1,4 +1,4 @@
-<h1 align="center">Kemono Downloader v4.0.1</h1>
+<h1 align="center">Kemono Downloader v4.1.1</h1>

 <div align="center">
  <img src="https://github.com/Yuvi9587/Kemono-Downloader/blob/main/Read.png" alt="Kemono Downloader"/>
@@ -11,7 +11,31 @@ Built with **PyQt5**, this tool is ideal for users who want deep filtering, cust

 ---

-##  What's New in v4.0.1?
+##  What's New in v4.1.1?
+
+Version 4.1.1 introduces a smarter way to capture images that might be embedded directly within post descriptions, enhancing content discovery.
+
+###  "Scan Content for Images" Feature
+
+- **Enhanced Image Discovery:** A new checkbox, "**Scan Content for Images**," has been added to the UI (grouped with "Download Thumbnails Only" and "Compress Large Images").
+- **How it Works:**
+    - When enabled, the downloader scans the HTML content of posts (e.g., the description area).
+    - It looks for images embedded via HTML `<img>` tags or as direct absolute URL links (e.g., `https://.../image.png`).
+    - It intelligently resolves relative image paths found in `<img>` tags (like `/data/image.jpg`) into full, downloadable URLs.
+    - This is particularly useful for capturing images that are part of the post's narrative but not formally listed in the API's file or attachment sections.
+- **Default State:** This option is **unchecked by default**.
+- **Interaction with "Download Thumbnails Only":**
+    - If you check "Download Thumbnails Only":
+        - The "Scan Content for Images" checkbox will **automatically become checked and disabled** (locked).
+        - In this combined mode, the downloader will **only download images found by the content scan**. API-listed thumbnails will be ignored, prioritizing images from the post's body.
+    - If you uncheck "Download Thumbnails Only":
+        - The "Scan Content for Images" checkbox will become **enabled again and revert to being unchecked**. You can then manually enable it if you wish to scan content without being in thumbnail-only mode.
+
+This feature ensures a more comprehensive download experience, especially for posts where images are integrated directly into the text.
+
+---
+
+##  Previous Update: What's New in v4.0.1?

 Version 4.0.1 focuses on enhancing access to content and providing even smarter organization:

@@ -74,7 +98,7 @@ This field allows for dynamic filtering for the current download session and pro

 ---
 ##  What's in v3.5.0? (Previous Update)
-This version brings significant enhancements to manga/comic downloading, filtering capabilities, and user experience:
+This version brought significant enhancements to manga/comic downloading, filtering capabilities, and user experience:

 ###  Enhanced Manga/Comic Mode

@@ -232,12 +256,17 @@ This version brings significant enhancements to manga/comic downloading, filteri
 ---

 ### Thumbnail & Compression Tools
-
- **Download Thumbnails Only**
-
+- **Download Thumbnails Only:**
+  - Downloads small preview images from the API instead of full-sized files (if available).
+  - **Interaction with "Scan Content for Images" (New in v4.1.1):** When "Download Thumbnails Only" is active, "Scan Content for Images" is auto-enabled, and only images found by the content scan are downloaded. See "What's New in v4.1.1" for details.
+- **Scan Content for Images (New in v4.1.1):**
+  - A UI option to scan the HTML content of posts for embedded image URLs (from `<img>` tags or direct links).
+  - Resolves relative paths and helps capture images not listed in the API's formal attachments.
+  - See the "What's New in v4.1.1?" section for a comprehensive explanation.
 - **Compress to WebP** (via Pillow)
  - Converts large images to smaller WebP versions

+
 ---

 ###  Performance Features