Update main_window.py

Commit
2025-12-29 16:14:44 +00:00 · 2025-07-14 09:04:34 -07:00 · 2025-07-14 08:19:58 -07:00 · 2025-07-13 21:46:34 -07:00 · 2025-07-13 21:45:30 -07:00 · 2025-07-13 20:21:17 -07:00
19 changed files with 8228 additions and 2526 deletions
--- a/data/dejavu-sans/DejaVu
+++ b/data/dejavu-sans/DejaVu
@@ -0,0 +1,97 @@
+Fonts are (c) Bitstream (see below). DejaVu changes are in public domain.
+Glyphs imported from Arev fonts are (c) Tavmjong Bah (see below)
+
+Bitstream Vera Fonts Copyright
+------------------------------
+
+Copyright (c) 2003 by Bitstream, Inc. All Rights Reserved. Bitstream Vera is
+a trademark of Bitstream, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of the fonts accompanying this license ("Fonts") and associated
+documentation files (the "Font Software"), to reproduce and distribute the
+Font Software, including without limitation the rights to use, copy, merge,
+publish, distribute, and/or sell copies of the Font Software, and to permit
+persons to whom the Font Software is furnished to do so, subject to the
+following conditions:
+
+The above copyright and trademark notices and this permission notice shall
+be included in all copies of one or more of the Font Software typefaces.
+
+The Font Software may be modified, altered, or added to, and in particular
+the designs of glyphs or characters in the Fonts may be modified and
+additional glyphs or characters may be added to the Fonts, only if the fonts
+are renamed to names not containing either the words "Bitstream" or the word
+"Vera".
+
+This License becomes null and void to the extent applicable to Fonts or Font
+Software that has been modified and is distributed under the "Bitstream
+Vera" names.
+
+The Font Software may be sold as part of a larger software package but no
+copy of one or more of the Font Software typefaces may be sold by itself.
+
+THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF COPYRIGHT, PATENT,
+TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL BITSTREAM OR THE GNOME
+FOUNDATION BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, INCLUDING
+ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM OTHER DEALINGS IN THE
+FONT SOFTWARE.
+
+Except as contained in this notice, the names of Gnome, the Gnome
+Foundation, and Bitstream Inc., shall not be used in advertising or
+otherwise to promote the sale, use or other dealings in this Font Software
+without prior written authorization from the Gnome Foundation or Bitstream
+Inc., respectively. For further information, contact: fonts at gnome dot
+org. 
+
+Arev Fonts Copyright
+------------------------------
+
+Copyright (c) 2006 by Tavmjong Bah. All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the fonts accompanying this license ("Fonts") and
+associated documentation files (the "Font Software"), to reproduce
+and distribute the modifications to the Bitstream Vera Font Software,
+including without limitation the rights to use, copy, merge, publish,
+distribute, and/or sell copies of the Font Software, and to permit
+persons to whom the Font Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright and trademark notices and this permission notice
+shall be included in all copies of one or more of the Font Software
+typefaces.
+
+The Font Software may be modified, altered, or added to, and in
+particular the designs of glyphs or characters in the Fonts may be
+modified and additional glyphs or characters may be added to the
+Fonts, only if the fonts are renamed to names not containing either
+the words "Tavmjong Bah" or the word "Arev".
+
+This License becomes null and void to the extent applicable to Fonts
+or Font Software that has been modified and is distributed under the 
+"Tavmjong Bah Arev" names.
+
+The Font Software may be sold as part of a larger software package but
+no copy of one or more of the Font Software typefaces may be sold by
+itself.
+
+THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL
+TAVMJONG BAH BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
+DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
+OTHER DEALINGS IN THE FONT SOFTWARE.
+
+Except as contained in this notice, the name of Tavmjong Bah shall not
+be used in advertising or otherwise to promote the sale, use or other
+dealings in this Font Software without prior written authorization
+from Tavmjong Bah. For further information, contact: tavmjong @ free
+. fr.
--- a/data/dejavu-sans/DejaVuSans-Bold.ttf
+++ b/data/dejavu-sans/DejaVuSans-Bold.ttf
--- a/data/dejavu-sans/DejaVuSans-BoldOblique.ttf
+++ b/data/dejavu-sans/DejaVuSans-BoldOblique.ttf
--- a/data/dejavu-sans/DejaVuSans-ExtraLight.ttf
+++ b/data/dejavu-sans/DejaVuSans-ExtraLight.ttf
--- a/data/dejavu-sans/DejaVuSans-Oblique.ttf
+++ b/data/dejavu-sans/DejaVuSans-Oblique.ttf
--- a/data/dejavu-sans/DejaVuSans.ttf
+++ b/data/dejavu-sans/DejaVuSans.ttf
--- a/data/dejavu-sans/DejaVuSansCondensed-Bold.ttf
+++ b/data/dejavu-sans/DejaVuSansCondensed-Bold.ttf
--- a/data/dejavu-sans/DejaVuSansCondensed-BoldOblique.ttf
+++ b/data/dejavu-sans/DejaVuSansCondensed-BoldOblique.ttf
--- a/data/dejavu-sans/DejaVuSansCondensed-Oblique.ttf
+++ b/data/dejavu-sans/DejaVuSansCondensed-Oblique.ttf
--- a/data/dejavu-sans/DejaVuSansCondensed.ttf
+++ b/data/dejavu-sans/DejaVuSansCondensed.ttf
--- a/main_window_old.py
+++ b/main_window_old.py
--- a/src/core/api_client.py
+++ b/src/core/api_client.py
@@ -1,12 +1,10 @@
-# --- Standard Library Imports ---
 import time
 import traceback
 from urllib.parse import urlparse
-
-# --- Third-Party Library Imports ---
+import json # Ensure json is imported
 import requests

-# --- Local Application Imports ---
+# (Keep the rest of your imports)
 from ..utils.network_utils import extract_post_info, prepare_cookies_for_request
 from ..config.constants import (
    STYLE_DATE_POST_TITLE
@@ -15,36 +13,24 @@ from ..config.constants import (

 def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
    """
-    Fetches a single page of posts from the API with retry logic.
-
-    Args:
-        api_url_base (str): The base URL for the user's posts.
-        headers (dict): The request headers.
-        offset (int): The offset for pagination.
-        logger (callable): Function to log messages.
-        cancellation_event (threading.Event): Event to signal cancellation.
-        pause_event (threading.Event): Event to signal pause.
-        cookies_dict (dict): A dictionary of cookies to include in the request.
-
-    Returns:
-        list: A list of post data dictionaries from the API.
-
-    Raises:
-        RuntimeError: If the fetch fails after all retries or encounters a non-retryable error.
+    Fetches a single page of posts from the API with robust retry logic.
+    NEW: Requests only essential fields to keep the response size small and reliable.
    """
    if cancellation_event and cancellation_event.is_set():
-        logger("   Fetch cancelled before request.")
        raise RuntimeError("Fetch operation cancelled by user.")
    if pause_event and pause_event.is_set():
        logger("   Post fetching paused...")
        while pause_event.is_set():
            if cancellation_event and cancellation_event.is_set():
-                logger("   Post fetching cancelled while paused.")
-                raise RuntimeError("Fetch operation cancelled by user.")
+                raise RuntimeError("Fetch operation cancelled by user while paused.")
            time.sleep(0.5)
        logger("   Post fetching resumed.")
    
-    paginated_url = f'{api_url_base}?o={offset}'
+    # --- MODIFICATION: Added `fields` to the URL to request only metadata ---
+    # This prevents the large 'content' field from being included in the list, avoiding timeouts.
+    fields_to_request = "id,user,service,title,shared_file,added,published,edited,file,attachments,tags"
+    paginated_url = f'{api_url_base}?o={offset}&fields={fields_to_request}'
+    
    max_retries = 3
    retry_delay = 5

@@ -52,22 +38,18 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
        if cancellation_event and cancellation_event.is_set():
            raise RuntimeError("Fetch operation cancelled by user during retry loop.")

-        log_message = f"   Fetching: {paginated_url} (Page approx. {offset // 50 + 1})"
+        log_message = f"   Fetching post list: {api_url_base}?o={offset} (Page approx. {offset // 50 + 1})"
        if attempt > 0:
            log_message += f" (Attempt {attempt + 1}/{max_retries})"
        logger(log_message)

        try:
-            response = requests.get(paginated_url, headers=headers, timeout=(15, 90), cookies=cookies_dict)
+            # We can now remove the streaming logic as the response will be small and fast.
+            response = requests.get(paginated_url, headers=headers, timeout=(15, 60), cookies=cookies_dict)
            response.raise_for_status()
-
-            if 'application/json' not in response.headers.get('Content-Type', '').lower():
-                logger(f"⚠️ Unexpected content type from API: {response.headers.get('Content-Type')}. Body: {response.text[:200]}")
-                return []
-
            return response.json()

-        except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
+        except requests.exceptions.RequestException as e:
            logger(f"   ⚠️ Retryable network error on page fetch (Attempt {attempt + 1}): {e}")
            if attempt < max_retries - 1:
                delay = retry_delay * (2 ** attempt)
@@ -76,18 +58,46 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
                continue
            else:
                logger(f"   ❌ Failed to fetch page after {max_retries} attempts.")
-                raise RuntimeError(f"Timeout or connection error fetching offset {offset}")
-        except requests.exceptions.RequestException as e:
-            err_msg = f"Error fetching offset {offset}: {e}"
-            if e.response is not None:
-                err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})"
-            raise RuntimeError(err_msg)
-        except ValueError as e: # JSON decode error
-            raise RuntimeError(f"Error decoding JSON from offset {offset}: {e}. Response: {response.text[:200]}")
+                raise RuntimeError(f"Network error fetching offset {offset}")
+        except json.JSONDecodeError as e:
+            logger(f"   ❌ Failed to decode JSON on page fetch (Attempt {attempt + 1}): {e}")
+            if attempt < max_retries - 1:
+                delay = retry_delay * (2 ** attempt)
+                logger(f"      Retrying in {delay} seconds...")
+                time.sleep(delay)
+                continue
+            else:
+                raise RuntimeError(f"JSONDecodeError fetching offset {offset}")

    raise RuntimeError(f"Failed to fetch page {paginated_url} after all attempts.")


+def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logger, cookies_dict=None):
+    """
+    --- NEW FUNCTION ---
+    Fetches the full data, including the 'content' field, for a single post.
+    """
+    post_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}"
+    logger(f"      Fetching full content for post ID {post_id}...")
+    try:
+        # Use streaming here as a precaution for single posts that are still very large.
+        with requests.get(post_api_url, headers=headers, timeout=(15, 300), cookies=cookies_dict, stream=True) as response:
+            response.raise_for_status()
+            response_body = b""
+            for chunk in response.iter_content(chunk_size=8192):
+                response_body += chunk
+            
+            full_post_data = json.loads(response_body)
+            # The API sometimes wraps the post in a list, handle that.
+            if isinstance(full_post_data, list) and full_post_data:
+                return full_post_data[0]
+            return full_post_data
+            
+    except Exception as e:
+        logger(f"      ❌ Failed to fetch full content for post {post_id}: {e}")
+        return None
+
+        
 def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
    """Fetches all comments for a specific post."""
    if cancellation_event and cancellation_event.is_set():
--- a/src/core/workers.py
+++ b/src/core/workers.py
@@ -20,6 +20,26 @@ try:
    from PIL import Image
 except ImportError:
    Image = None
+#
+try:
+    from fpdf import FPDF
+    # Add a simple class to handle the header/footer for stories
+    class PDF(FPDF):
+        def header(self):
+            pass # No header
+        def footer(self):
+            self.set_y(-15)
+            self.set_font('Arial', 'I', 8)
+            self.cell(0, 10, 'Page %s' % self.page_no(), 0, 0, 'C')
+
+except ImportError:
+    FPDF = None
+
+try:
+    from docx import Document
+except ImportError:
+    Document = None
+  
 # --- PyQt5 Imports ---
 from PyQt5 .QtCore import Qt ,QThread ,pyqtSignal ,QMutex ,QMutexLocker ,QObject ,QTimer ,QSettings ,QStandardPaths ,QCoreApplication ,QUrl ,QSize ,QProcess 
 # --- Local Application Imports ---
@@ -48,6 +68,7 @@ class PostProcessorSignals (QObject ):
    file_progress_signal =pyqtSignal (str ,object )
    file_successfully_downloaded_signal =pyqtSignal (dict )
    missed_character_post_signal =pyqtSignal (str ,str )
+    worker_finished_signal = pyqtSignal(tuple)

 class PostProcessorWorker:
    def __init__ (self ,post_data ,download_root ,known_names ,
@@ -81,6 +102,11 @@ class PostProcessorWorker:
    keep_in_post_duplicates=False,
    session_file_path=None,
    session_lock=None,
+    processed_ids_to_skip=None,
+    text_only_scope=None,
+    text_export_format='txt',
+    single_pdf_mode=False,
+    project_root_dir=None,
    ):
        self .post =post_data 
        self .download_root =download_root 
@@ -134,6 +160,11 @@ class PostProcessorWorker:
        self.keep_in_post_duplicates = keep_in_post_duplicates
        self.session_file_path = session_file_path
        self.session_lock = session_lock
+        self.processed_ids_to_skip = processed_ids_to_skip
+        self.text_only_scope = text_only_scope
+        self.text_export_format = text_export_format
+        self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE
+        self.project_root_dir = project_root_dir
        if self .compress_images and Image is None :

            self .logger ("⚠️ Image compression disabled: Pillow library not found.")
@@ -341,9 +372,9 @@ class PostProcessorWorker:
                filename_to_save_in_main_path =cleaned_original_api_filename 
                was_original_name_kept_flag =False 

-
-
            if self .remove_from_filename_words_list and filename_to_save_in_main_path :
+                # Store the name before this specific modification, so we can revert if it gets destroyed.
+                name_before_word_removal = filename_to_save_in_main_path
                
                base_name_for_removal ,ext_for_removal =os .path .splitext (filename_to_save_in_main_path )
                modified_base_name =base_name_for_removal 
@@ -354,12 +385,13 @@ class PostProcessorWorker:
                modified_base_name =re .sub (r'[_.\s-]+',' ',modified_base_name )
                modified_base_name =re .sub (r'\s+',' ',modified_base_name )
                modified_base_name =modified_base_name .strip ()
+                
                if modified_base_name and modified_base_name !=ext_for_removal .lstrip ('.'):
                    filename_to_save_in_main_path =modified_base_name +ext_for_removal 
                else :
-                    filename_to_save_in_main_path =base_name_for_removal +ext_for_removal 
-
-
+                    # If the name was stripped to nothing, revert to the name from before this block.
+                    self.logger(f"   ⚠️ Filename was empty after removing words. Reverting to '{name_before_word_removal}'.")
+                    filename_to_save_in_main_path = name_before_word_removal

        if not self .download_thumbnails :

@@ -557,6 +589,8 @@ class PostProcessorWorker:
        final_total_for_progress =total_size_bytes if download_successful_flag and total_size_bytes >0 else downloaded_size_bytes 
        self ._emit_signal ('file_progress',api_original_filename ,(downloaded_size_bytes ,final_total_for_progress ))

+# --- Start of Replacement Block ---
+
        # Rescue download if an IncompleteRead error occurred but the file is complete
        if (not download_successful_flag and
                isinstance(last_exception_for_retry_later, http.client.IncompleteRead) and
@@ -614,7 +648,6 @@ class PostProcessorWorker:
            is_img_for_compress_check = is_image(api_original_filename)

            if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024):
-                # ... (This block for image compression remains the same)
                self.logger(f"   Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024 * 1024):.2f} MB)...")
                if self._check_pause(f"Image compression for '{api_original_filename}'"): return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
                img_content_for_pillow = None
@@ -699,7 +732,10 @@ class PostProcessorWorker:
                 if os.path.exists(final_save_path):
                      try: os.remove(final_save_path)
                      except OSError: self.logger(f"  -> Failed to remove partially saved file: {final_save_path}")
-                return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
+                 
+                 # --- FIX: Report as a permanent failure so it appears in the error dialog ---
+                 permanent_failure_details = { 'file_info': file_info, 'target_folder_path': target_folder_path, 'headers': headers, 'original_post_id_for_log': original_post_id_for_log, 'post_title': post_title, 'file_index_in_post': file_index_in_post, 'num_files_in_this_post': num_files_in_this_post, 'forced_filename_override': filename_to_save_in_main_path, }
+                 return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION, permanent_failure_details
            finally:
                if data_to_write_io and hasattr(data_to_write_io, 'close'):
                    data_to_write_io.close()
@@ -745,7 +781,9 @@ class PostProcessorWorker:
            if downloaded_part_file_path and os.path.exists(downloaded_part_file_path):
                try: os.remove(downloaded_part_file_path)
                except OSError: pass
-            return 0 ,1 ,api_original_filename ,False ,FILE_DOWNLOAD_STATUS_SKIPPED ,None 
+            # --- FIX: Report as a permanent failure so it appears in the error dialog ---
+            permanent_failure_details = { 'file_info': file_info, 'target_folder_path': target_folder_path, 'headers': headers, 'original_post_id_for_log': original_post_id_for_log, 'post_title': post_title, 'file_index_in_post': file_index_in_post, 'num_files_in_this_post': num_files_in_this_post, 'forced_filename_override': filename_to_save_in_main_path, }
+            return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION, permanent_failure_details

        data_to_write_io =None 
        filename_after_compression =filename_after_styling_and_word_removal 
@@ -849,15 +887,34 @@ class PostProcessorWorker:
                data_to_write_io .close ()

    def process(self):
-        if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None 
-        if self .check_cancel ():return 0 ,0 ,[],[],[],None 
-        current_character_filters =self ._get_current_character_filters ()
+        # --- FIX START: This entire method is now wrapped in a try...finally block ---
+        # to ensure it always reports completion back to the main window.
+        
+        # Initialize result values to safe defaults for failure cases.
+        total_downloaded_this_post = 0
+        total_skipped_this_post = 0
        kept_original_filenames_for_log = []
        retryable_failures_this_post = []
        permanent_failures_this_post = []
-        total_downloaded_this_post =0 
-        total_skipped_this_post =0 
        history_data_for_this_post = None
+        temp_filepath_for_return = None
+
+        try:
+            post_id_for_skip_check = self.post.get('id')
+            if self.processed_ids_to_skip and post_id_for_skip_check in self.processed_ids_to_skip:
+                self.logger(f"   -> Skipping Post {post_id_for_skip_check} (already processed in previous session).")
+                # We must emit 'worker_finished' so the main UI can count this as a completed (skipped) task.
+                num_potential_files_in_post = len(self.post.get('attachments', [])) + (1 if self.post.get('file') else 0)
+                total_skipped_this_post = num_potential_files_in_post
+                # The rest of the result tuple can be empty defaults
+                result_tuple = (0, total_skipped_this_post, [], [], [], None, None)
+                self._emit_signal('worker_finished', result_tuple)
+                return result_tuple
+
+            # ALL OF THE ORIGINAL LOGIC OF THE `process` METHOD GOES HERE
+            if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None, None 
+            if self .check_cancel ():return 0 ,0 ,[],[],[],None, None
+            current_character_filters =self ._get_current_character_filters ()
            
            parsed_api_url =urlparse (self .api_url_input )
            referer_url =f"https://{parsed_api_url .netloc }/"
@@ -986,23 +1043,23 @@ class PostProcessorWorker:
                if self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match :
                    self .logger (f"   -> Skip Post (Scope: Title - No Char Match): Title '{post_title [:50 ]}' does not match character filters.")
                    self ._emit_signal ('missed_character_post',post_title ,"No title match for character filter")
-                return 0 ,num_potential_files_in_post ,[],[],[],None 
+                    return 0 ,num_potential_files_in_post ,[],[],[],None, None 
                if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match :
                    self .logger (f"   -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id }', Title '{post_title [:50 ]}...'")
                    if self .emitter and hasattr (self .emitter ,'missed_character_post_signal'):
                        self ._emit_signal ('missed_character_post',post_title ,"No character match in files or comments (Comments scope)")
-                return 0 ,num_potential_files_in_post ,[],[],[],None 
+                    return 0 ,num_potential_files_in_post ,[],[],[],None, None 
            if self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH ):
                if self ._check_pause (f"Skip words (post title) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None 
                post_title_lower =post_title .lower ()
                for skip_word in self .skip_words_list :
                    if skip_word .lower ()in post_title_lower :
                        self .logger (f"   -> Skip Post (Keyword in Title '{skip_word }'): '{post_title [:50 ]}...'. Scope: {self .skip_words_scope }")
-                    return 0 ,num_potential_files_in_post ,[],[],[],None 
+                        return 0 ,num_potential_files_in_post ,[],[],[],None, None
            if not self .extract_links_only and self .manga_mode_active and current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and not post_is_candidate_by_title_char_match :
                self .logger (f"   -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title [:50 ]}' doesn't match filters.")
                self ._emit_signal ('missed_character_post',post_title ,"Manga Mode: No title match for character filter (Title/Both scope)")
-            return 0 ,num_potential_files_in_post ,[],[],[],None 
+                return 0 ,num_potential_files_in_post ,[],[],[],None, None 
            if not isinstance (post_attachments ,list ):
                self .logger (f"⚠️ Corrupt attachment data for post {post_id } (expected list, got {type (post_attachments )}). Skipping attachments.")
                post_attachments =[]
@@ -1032,48 +1089,35 @@ class PostProcessorWorker:
                        base_folder_names_for_post_content =[cleaned_primary_folder_name ]
                    self .logger (f"   Base folder name(s) for post content ({log_reason_for_folder }): {', '.join (base_folder_names_for_post_content )}")
                elif not current_character_filters :
-
                    derived_folders_from_title_via_known_txt =match_folders_from_title (
                    post_title ,
                    self .known_names ,
                    effective_unwanted_keywords_for_folder_naming 
                    )
-
                    valid_derived_folders_from_title_known_txt =[
                    name for name in derived_folders_from_title_via_known_txt 
                    if name and name .strip ()and name .lower ()!="untitled_folder"
                    ]
-
                    if valid_derived_folders_from_title_known_txt :
                        base_folder_names_for_post_content .extend (valid_derived_folders_from_title_known_txt )
                        self .logger (f"   Base folder name(s) for post content (Derived from Known.txt & Post Title): {', '.join (base_folder_names_for_post_content )}")
                    else :
-
-
-
-
                        candidate_name_from_title_basic_clean =extract_folder_name_from_title (
                        post_title ,
                        FOLDER_NAME_STOP_WORDS 
                        )
-
                        title_is_only_creator_ignored_words =False 
                        if candidate_name_from_title_basic_clean and candidate_name_from_title_basic_clean .lower ()!="untitled_folder"and self .creator_download_folder_ignore_words :
-
                            candidate_title_words ={word .lower ()for word in candidate_name_from_title_basic_clean .split ()}
                            if candidate_title_words and candidate_title_words .issubset (self .creator_download_folder_ignore_words ):
                                title_is_only_creator_ignored_words =True 
                                self .logger (f"   Title-derived name '{candidate_name_from_title_basic_clean }' consists only of creator-specific ignore words.")
-
                        if title_is_only_creator_ignored_words :
-
                            self .logger (f"   Attempting Known.txt match on filenames as title was poor ('{candidate_name_from_title_basic_clean }').")
-
                            filenames_to_check =[
                            f_info ['_original_name_for_log']for f_info in all_files_from_post_api_for_char_check 
                            if f_info .get ('_original_name_for_log')
                            ]
-
                            derived_folders_from_filenames_known_txt =set ()
                            if filenames_to_check :
                                for fname in filenames_to_check :
@@ -1085,7 +1129,6 @@ class PostProcessorWorker:
                                    for m in matches :
                                        if m and m .strip ()and m .lower ()!="untitled_folder":
                                            derived_folders_from_filenames_known_txt .add (m )
-
                            if derived_folders_from_filenames_known_txt :
                                base_folder_names_for_post_content .extend (list (derived_folders_from_filenames_known_txt ))
                                self .logger (f"   Base folder name(s) for post content (Derived from Known.txt & Filenames): {', '.join (base_folder_names_for_post_content )}")
@@ -1101,7 +1144,6 @@ class PostProcessorWorker:
                            )
                            base_folder_names_for_post_content .append (extracted_name_from_title_full_ignore )
                            self .logger (f"   Base folder name(s) for post content (Generic title parsing - title not solely creator-ignored words): {', '.join (base_folder_names_for_post_content )}")
-
                    base_folder_names_for_post_content =[
                    name for name in base_folder_names_for_post_content if name and name .strip ()
                    ]
@@ -1109,49 +1151,36 @@ class PostProcessorWorker:
                        final_fallback_name =clean_folder_name (post_title if post_title and post_title .strip ()else "Generic Post Content")
                        base_folder_names_for_post_content =[final_fallback_name ]
                        self .logger (f"   Ultimate fallback folder name: {final_fallback_name }")
-
                if base_folder_names_for_post_content :
                    determined_post_save_path_for_history =os .path .join (determined_post_save_path_for_history ,base_folder_names_for_post_content [0 ])
-
            if not self .extract_links_only and self .use_post_subfolders :
                cleaned_post_title_for_sub =clean_folder_name (post_title )
                post_id_for_fallback =self .post .get ('id','unknown_id')
-
-
                if not cleaned_post_title_for_sub or cleaned_post_title_for_sub =="untitled_folder":
                    self .logger (f"   ⚠️ Post title '{post_title }' resulted in a generic subfolder name. Using 'post_{post_id_for_fallback }' as base.")
                    original_cleaned_post_title_for_sub =f"post_{post_id_for_fallback }"
                else :
                    original_cleaned_post_title_for_sub =cleaned_post_title_for_sub 
-
                if self.use_date_prefix_for_subfolder:
-                # Prioritize 'published' date, fall back to 'added' date
                    published_date_str = self.post.get('published') or self.post.get('added')
                    if published_date_str:
                        try:
-                        # Extract just the date part (YYYY-MM-DD)
                            date_prefix = published_date_str.split('T')[0]
-                        # Prepend the date to the folder name
                            original_cleaned_post_title_for_sub = f"{date_prefix} {original_cleaned_post_title_for_sub}"
                            self.logger(f"   ℹ️ Applying date prefix to subfolder: '{original_cleaned_post_title_for_sub}'")
                        except Exception as e:
                            self.logger(f"   ⚠️ Could not parse date '{published_date_str}' for prefix. Using original name. Error: {e}")
                    else:
                        self.logger("   ⚠️ 'Date Prefix' is checked, but post has no 'published' or 'added' date. Omitting prefix.")
-
                base_path_for_post_subfolder =determined_post_save_path_for_history 
-
                suffix_counter =0 
                final_post_subfolder_name =""
-
                while True :
                    if suffix_counter ==0 :
                        name_candidate =original_cleaned_post_title_for_sub 
                    else :
                        name_candidate =f"{original_cleaned_post_title_for_sub }_{suffix_counter }"
-
                    potential_post_subfolder_path =os .path .join (base_path_for_post_subfolder ,name_candidate )
-
                    try :
                        os .makedirs (potential_post_subfolder_path ,exist_ok =False )
                        final_post_subfolder_name =name_candidate 
@@ -1169,9 +1198,139 @@ class PostProcessorWorker:
                        self .logger (f"   ❌ Error creating directory '{potential_post_subfolder_path }': {e_mkdir }. Files for this post might be saved in parent or fail.")
                        final_post_subfolder_name =original_cleaned_post_title_for_sub 
                        break 
-
                determined_post_save_path_for_history =os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name )

+            if self.filter_mode == 'text_only' and not self.extract_links_only:
+                self.logger(f"   Mode: Text Only (Scope: {self.text_only_scope})")
+                post_title_lower = post_title.lower()
+                if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
+                    for skip_word in self.skip_words_list:
+                        if skip_word.lower() in post_title_lower:
+                            self.logger(f"   -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'.")
+                            return 0, num_potential_files_in_post, [], [], [], None, None
+                if current_character_filters and not post_is_candidate_by_title_char_match and not post_is_candidate_by_comment_char_match and not post_is_candidate_by_file_char_match_in_comment_scope:
+                    self.logger(f"   -> Skip Post (No character match for text extraction): '{post_title[:50]}...'.")
+                    return 0, num_potential_files_in_post, [], [], [], None, None
+                raw_text_content = ""
+                final_post_data = post_data
+                if self.text_only_scope == 'content' and 'content' not in final_post_data:
+                    self.logger(f"   Post {post_id} is missing 'content' field, fetching full data...")
+                    parsed_url = urlparse(self.api_url_input)
+                    api_domain = parsed_url.netloc
+                    cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain)
+                    from .api_client import fetch_single_post_data 
+                    full_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies)
+                    if full_data:
+                        final_post_data = full_data
+                if self.text_only_scope == 'content':
+                    raw_text_content = final_post_data.get('content', '')
+                elif self.text_only_scope == 'comments':
+                    try:
+                        parsed_url = urlparse(self.api_url_input)
+                        api_domain = parsed_url.netloc
+                        comments_data = fetch_post_comments(api_domain, self.service, self.user_id, post_id, headers, self.logger, self.cancellation_event, self.pause_event)
+                        if comments_data:
+                            comment_texts = []
+                            for comment in comments_data:
+                                user = comment.get('user', {}).get('name', 'Unknown User')
+                                timestamp = comment.get('updated', 'No Date')
+                                body = strip_html_tags(comment.get('content', ''))
+                                comment_texts.append(f"--- Comment by {user} on {timestamp} ---\n{body}\n")
+                            raw_text_content = "\n".join(comment_texts)
+                    except Exception as e:
+                        self.logger(f"   ❌ Error fetching comments for text-only mode: {e}")
+                if not raw_text_content or not raw_text_content.strip():
+                    self.logger("   -> Skip Saving Text: No content/comments found or fetched.")
+                    return 0, num_potential_files_in_post, [], [], [], None, None
+                paragraph_pattern = re.compile(r'<p.*?>(.*?)</p>', re.IGNORECASE | re.DOTALL)
+                html_paragraphs = paragraph_pattern.findall(raw_text_content)
+                cleaned_text = ""
+                if not html_paragraphs:
+                    self.logger("   ⚠️ No <p> tags found. Falling back to basic HTML cleaning for the whole block.")
+                    text_with_br = re.sub(r'<br\s*/?>', '\n', raw_text_content, flags=re.IGNORECASE)
+                    cleaned_text = re.sub(r'<.*?>', '', text_with_br)
+                else:
+                    cleaned_paragraphs_list = []
+                    for p_content in html_paragraphs:
+                        p_with_br = re.sub(r'<br\s*/?>', '\n', p_content, flags=re.IGNORECASE)
+                        p_cleaned = re.sub(r'<.*?>', '', p_with_br)
+                        p_final = html.unescape(p_cleaned).strip()
+                        if p_final:
+                            cleaned_paragraphs_list.append(p_final)
+                    cleaned_text = '\n\n'.join(cleaned_paragraphs_list)
+                cleaned_text = cleaned_text.replace('…', '...')
+                if self.single_pdf_mode:
+                    if not cleaned_text:
+                        return 0, 0, [], [], [], None, None
+                    content_data = {
+                        'title': post_title,
+                        'content': cleaned_text,
+                        'published': self.post.get('published') or self.post.get('added')
+                    }
+                    temp_dir = os.path.join(self.app_base_dir, "appdata")
+                    os.makedirs(temp_dir, exist_ok=True)
+                    temp_filename = f"tmp_{post_id}_{uuid.uuid4().hex[:8]}.json"
+                    temp_filepath = os.path.join(temp_dir, temp_filename)
+                    try:
+                        with open(temp_filepath, 'w', encoding='utf-8') as f:
+                            json.dump(content_data, f, indent=2)
+                        self.logger(f"   Saved temporary text for '{post_title}' for single PDF compilation.")
+                        self._emit_signal('worker_finished', (0, 0, [], [], [], None, temp_filepath)) 
+                        return (0, 0, [], [], [], None, temp_filepath)
+                    except Exception as e:
+                        self.logger(f"   ❌ Failed to write temporary file for single PDF: {e}")
+                        self._emit_signal('worker_finished', (0, 0, [], [], [], [], None))
+                        return (0, 0, [], [], [], [], None)
+                else:
+                    file_extension = self.text_export_format
+                    txt_filename = clean_filename(post_title) + f".{file_extension}"
+                    final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename)
+                    try:
+                        os.makedirs(determined_post_save_path_for_history, exist_ok=True)
+                        base, ext = os.path.splitext(final_save_path)
+                        counter = 1
+                        while os.path.exists(final_save_path):
+                            final_save_path = f"{base}_{counter}{ext}"
+                            counter += 1
+                        if file_extension == 'pdf':
+                            if FPDF:
+                                self.logger(f"   Converting to PDF...")
+                                pdf = PDF()
+                                font_path = ""
+                                if self.project_root_dir:
+                                    font_path = os.path.join(self.project_root_dir, 'data', 'dejavu-sans', 'DejaVuSans.ttf')
+                                try:
+                                    if not os.path.exists(font_path): raise RuntimeError(f"Font file not found: {font_path}")
+                                    pdf.add_font('DejaVu', '', font_path, uni=True)
+                                    pdf.set_font('DejaVu', '', 12)
+                                except Exception as font_error:
+                                    self.logger(f"   ⚠️ Could not load DejaVu font: {font_error}. Falling back to Arial.")
+                                    pdf.set_font('Arial', '', 12)
+                                pdf.add_page()
+                                pdf.multi_cell(0, 5, cleaned_text)
+                                pdf.output(final_save_path)
+                            else:
+                                self.logger(f"   ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.")
+                                final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
+                                with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
+                        elif file_extension == 'docx':
+                            if Document:
+                                self.logger(f"   Converting to DOCX...")
+                                document = Document()
+                                document.add_paragraph(cleaned_text)
+                                document.save(final_save_path)
+                            else:
+                                self.logger(f"   ⚠️ Cannot create DOCX: 'python-docx' library not installed. Saving as .txt.")
+                                final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
+                                with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
+                        else: 
+                            with open(final_save_path, 'w', encoding='utf-8') as f:
+                                f.write(cleaned_text)
+                        self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'")
+                        return 1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None
+                    except Exception as e:
+                        self.logger(f"   ❌ Critical error saving text file '{txt_filename}': {e}")
+                        return 0, num_potential_files_in_post, [], [], [], None, None
            if not self .extract_links_only and self .use_subfolders and self .skip_words_list :
                if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None 
                for folder_name_to_check in base_folder_names_for_post_content :
@@ -1179,7 +1338,7 @@ class PostProcessorWorker:
                    if any (skip_word .lower ()in folder_name_to_check .lower ()for skip_word in self .skip_words_list ):
                        matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word")
                        self .logger (f"   -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.")
-                    return 0 ,num_potential_files_in_post ,[],[],[],None 
+                        return 0 ,num_potential_files_in_post ,[],[],[],None, None 
            if (self .show_external_links or self .extract_links_only )and post_content_html :
                if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None 
                try :
@@ -1205,7 +1364,6 @@ class PostProcessorWorker:
                                potential_key_from_fragment =parsed_mega_url .fragment .split ('!')[-1 ]
                                if mega_key_pattern .fullmatch (potential_key_from_fragment ):
                                    decryption_key_found =potential_key_from_fragment 
-
                            if not decryption_key_found and link_text :
                                key_match_in_text =mega_key_pattern .search (link_text )
                                if key_match_in_text :
@@ -1326,14 +1484,10 @@ class PostProcessorWorker:
                return 0 ,0 ,[],[],[],None 
            files_to_download_info_list =[]
            processed_original_filenames_in_this_post =set ()
-
            if self.keep_in_post_duplicates:
-            # If we keep duplicates, just add every file to the list to be processed.
-            # The downstream hash check and rename-on-collision logic will handle them.
                files_to_download_info_list.extend(all_files_from_post_api)
                self.logger(f"   ℹ️ 'Keep Duplicates' is on. All {len(all_files_from_post_api)} files from post will be processed.")
            else:
-            # This is the original logic that skips duplicates by name within a post.
                for file_info in all_files_from_post_api:
                    current_api_original_filename = file_info.get('_original_name_for_log')
                    if current_api_original_filename in processed_original_filenames_in_this_post:
@@ -1343,12 +1497,9 @@ class PostProcessorWorker:
                        files_to_download_info_list.append(file_info)
                        if current_api_original_filename:
                            processed_original_filenames_in_this_post.add(current_api_original_filename)
-
            if not files_to_download_info_list:
-
                self .logger (f"   All files for post {post_id } were duplicate original names or skipped earlier.")
                return 0 ,total_skipped_this_post ,[],[],[],None 
-
            self .logger (f"   Identified {len (files_to_download_info_list )} unique original file(s) for potential download from post {post_id }.")
            with ThreadPoolExecutor (max_workers =self .num_file_threads ,thread_name_prefix =f'P{post_id }File_')as file_pool :
                futures_list =[]
@@ -1410,10 +1561,7 @@ class PostProcessorWorker:
                        self .logger (f"   -> Skip File (Char Filter Scope '{self .char_filter_scope }'): '{current_api_original_filename }' no match.")
                        total_skipped_this_post +=1 
                        continue 
-
-
                    target_base_folders_for_this_file_iteration =[]
-
                    if current_character_filters :
                        char_title_subfolder_name =None 
                        if self .target_post_id_from_initial_url and self .custom_folder_name :
@@ -1434,24 +1582,17 @@ class PostProcessorWorker:
                            target_base_folders_for_this_file_iteration .extend (base_folder_names_for_post_content )
                        else :
                            target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title ))
-
                    if not target_base_folders_for_this_file_iteration :
                        target_base_folders_for_this_file_iteration .append (clean_folder_name (post_title if post_title else "Uncategorized_Post_Content"))
-
                    for target_base_folder_name_for_instance in target_base_folders_for_this_file_iteration :
                        current_path_for_file_instance =self .override_output_dir if self .override_output_dir else self .download_root 
                        if self .use_subfolders and target_base_folder_name_for_instance :
                            current_path_for_file_instance =os .path .join (current_path_for_file_instance ,target_base_folder_name_for_instance )
                        if self .use_post_subfolders :
-
                            current_path_for_file_instance =os .path .join (current_path_for_file_instance ,final_post_subfolder_name )
-
                        manga_date_counter_to_pass =self .manga_date_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED else None 
                        manga_global_counter_to_pass =self .manga_global_file_counter_ref if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING else None 
-
-
                        folder_context_for_file =target_base_folder_name_for_instance if self .use_subfolders and target_base_folder_name_for_instance else clean_folder_name (post_title )
-
                        futures_list .append (file_pool .submit (
                        self ._download_single_file ,
                        file_info =file_info_to_dl ,
@@ -1461,7 +1602,6 @@ class PostProcessorWorker:
                        manga_global_file_counter_ref =manga_global_counter_to_pass ,folder_context_name_for_history =folder_context_for_file ,
                        file_index_in_post =file_idx ,num_files_in_this_post =len (files_to_download_info_list )
                        ))
-
                for future in as_completed (futures_list ):
                    if self .check_cancel ():
                        for f_to_cancel in futures_list :
@@ -1485,42 +1625,30 @@ class PostProcessorWorker:
                        self .logger (f"❌ File download task for post {post_id } resulted in error: {exc_f }")
                        total_skipped_this_post +=1 
            self ._emit_signal ('file_progress',"",None )
-
-        # After a post's files are all processed, update the session file to mark this post as done.
            if self.session_file_path and self.session_lock:
                try:
                    with self.session_lock:
-                    if os.path.exists(self.session_file_path): # Only update if the session file exists
-                        # Read current state
+                        if os.path.exists(self.session_file_path): 
                            with open(self.session_file_path, 'r', encoding='utf-8') as f:
                                session_data = json.load(f)
-                        
                            if 'download_state' not in session_data:
                                session_data['download_state'] = {}
-
-                        # Add processed ID
                            if not isinstance(session_data['download_state'].get('processed_post_ids'), list):
                                session_data['download_state']['processed_post_ids'] = []
                            session_data['download_state']['processed_post_ids'].append(self.post.get('id'))
-
-                        # Add any permanent failures from this worker to the session file
                            if permanent_failures_this_post:
                                if not isinstance(session_data['download_state'].get('permanently_failed_files'), list):
                                    session_data['download_state']['permanently_failed_files'] = []
-                            # To avoid duplicates if the same post is somehow re-processed
                                existing_failed_urls = {f.get('file_info', {}).get('url') for f in session_data['download_state']['permanently_failed_files']}
                                for failure in permanent_failures_this_post:
                                    if failure.get('file_info', {}).get('url') not in existing_failed_urls:
                                        session_data['download_state']['permanently_failed_files'].append(failure)
-
-                        # Write to temp file and then atomically replace
                            temp_file_path = self.session_file_path + ".tmp"
                            with open(temp_file_path, 'w', encoding='utf-8') as f_tmp:
                                json.dump(session_data, f_tmp, indent=2)
                            os.replace(temp_file_path, self.session_file_path)
                except Exception as e:
                    self.logger(f"⚠️ Could not update session file for post {post_id}: {e}")
-
            if not self .extract_links_only and (total_downloaded_this_post >0 or not (
            (current_character_filters and (
            (self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match )or 
@@ -1533,7 +1661,6 @@ class PostProcessorWorker:
                    top_file_name_for_history =post_main_file_info ['name']
                elif post_attachments and post_attachments [0 ].get ('name'):
                    top_file_name_for_history =post_attachments [0 ]['name']
-
                history_data_for_this_post ={
                'post_title':post_title ,'post_id':post_id ,
                'top_file_name':top_file_name_for_history ,
@@ -1544,9 +1671,7 @@ class PostProcessorWorker:
                }
            if self .check_cancel ():self .logger (f"   Post {post_id } processing interrupted/cancelled.");
            else :self .logger (f"   Post {post_id } Summary: Downloaded={total_downloaded_this_post }, Skipped Files={total_skipped_this_post }")
-
            if not self .extract_links_only and self .use_post_subfolders and total_downloaded_this_post ==0 :
-
                path_to_check_for_emptiness =determined_post_save_path_for_history 
                try :
                    if os .path .isdir (path_to_check_for_emptiness )and not os .listdir (path_to_check_for_emptiness ):
@@ -1555,7 +1680,26 @@ class PostProcessorWorker:
                except OSError as e_rmdir :
                    self .logger (f"   ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }")
        
-        return total_downloaded_this_post ,total_skipped_this_post ,kept_original_filenames_for_log ,retryable_failures_this_post ,permanent_failures_this_post ,history_data_for_this_post 
+        except Exception as e:
+            post_id = self.post.get('id', 'N/A')
+            # Log the unexpected crash of the worker
+            self.logger(f"❌ CRITICAL WORKER FAILURE on Post ID {post_id}: {e}\n{traceback.format_exc(limit=4)}")
+            # Ensure the number of skipped files reflects the total potential files in the post,
+            # as none of them were processed successfully.
+            num_potential_files_in_post = len(self.post.get('attachments', [])) + (1 if self.post.get('file') else 0)
+            total_skipped_this_post = num_potential_files_in_post
+            total_downloaded_this_post = 0
+
+        finally:
+            # This 'finally' block ensures that the worker ALWAYS reports back,
+            # preventing the main UI from getting stuck.
+            result_tuple = (total_downloaded_this_post, total_skipped_this_post,
+                            kept_original_filenames_for_log, retryable_failures_this_post,
+                            permanent_failures_this_post, history_data_for_this_post,
+                            temp_filepath_for_return)
+            self._emit_signal('worker_finished', result_tuple)
+        
+        return result_tuple

 class DownloadThread (QThread ):
    progress_signal =pyqtSignal (str )
@@ -1605,6 +1749,11 @@ class DownloadThread (QThread ):
    cookie_text ="",
    session_file_path=None,
    session_lock=None,
+    processed_ids_to_skip=None,
+    text_only_scope=None,
+    text_export_format='txt',
+    single_pdf_mode=False,
+    project_root_dir=None,    
    ):
        super ().__init__ ()
        self .api_url_input =api_url_input 
@@ -1659,7 +1808,13 @@ class DownloadThread (QThread ):
        self .manga_global_file_counter_ref =manga_global_file_counter_ref 
        self.session_file_path = session_file_path
        self.session_lock = session_lock
+        self.processed_ids_to_skip = processed_ids_to_skip
        self.history_candidates_buffer =deque (maxlen =8 )
+        self.text_only_scope = text_only_scope
+        self.text_export_format = text_export_format
+        self.single_pdf_mode = single_pdf_mode 
+        self.project_root_dir = project_root_dir
+
        if self .compress_images and Image is None :
            self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
            self .compress_images =False 
@@ -1682,7 +1837,13 @@ class DownloadThread (QThread ):
             self .logger ("⏭️ Skip requested for current file (single-thread mode).")
             self .skip_current_file_flag .set ()
        else :self .logger ("ℹ️ Skip file: No download active or skip flag not available for current context.")
+
    def run(self):
+        """
+        The main execution method for the single-threaded download process.
+        This version is corrected to handle 7 return values from the worker and
+        to pass the 'single_pdf_mode' setting correctly.
+        """
        grand_total_downloaded_files = 0
        grand_total_skipped_files = 0
        grand_list_of_kept_original_filenames = []
@@ -1697,12 +1858,12 @@ class DownloadThread (QThread ):
                elif self.service and self.user_id :
                    creator_based_folder_name = clean_folder_name(str(self.user_id))
                    series_scan_dir = os.path.join(series_scan_dir, creator_based_folder_name)
+            
            highest_num = 0 
            if os.path.isdir(series_scan_dir):
                self.logger(f"ℹ️ [Thread] Manga Date Mode: Scanning for existing files in '{series_scan_dir}'...")
                for dirpath, _, filenames_in_dir in os.walk(series_scan_dir):
                    for filename_to_check in filenames_in_dir:
-
                        prefix_to_check = clean_filename(self.manga_date_prefix.strip()) if self.manga_date_prefix and self.manga_date_prefix.strip() else ""
                        name_part_to_match = filename_to_check 
                        if prefix_to_check and name_part_to_match.startswith(prefix_to_check):
@@ -1710,14 +1871,18 @@ class DownloadThread (QThread ):
                        
                        base_name_no_ext = os.path.splitext(name_part_to_match)[0]
                        match = re.match(r"(\d+)", base_name_no_ext)
-                        if match :highest_num =max (highest_num ,int (match .group (1 )))
+                        if match: 
+                            highest_num = max(highest_num, int(match.group(1)))
+            
            self.manga_date_file_counter_ref = [highest_num + 1, threading.Lock()]
            self.logger(f"ℹ️ [Thread] Manga Date Mode: Initialized date-based counter at {self.manga_date_file_counter_ref[0]}.")
-
+            pass

        if self.manga_mode_active and self.manga_filename_style == STYLE_POST_TITLE_GLOBAL_NUMBERING and not self.extract_links_only and self.manga_global_file_counter_ref is None:
            self.manga_global_file_counter_ref = [1, threading.Lock()]
            self.logger(f"ℹ️ [Thread] Manga Title+GlobalNum Mode: Initialized global counter at {self.manga_global_file_counter_ref[0]}.")
+            pass
+
        worker_signals_obj = PostProcessorSignals()
        try:
            worker_signals_obj.progress_signal.connect(self.progress_signal)
@@ -1726,7 +1891,12 @@ class DownloadThread (QThread ):
            worker_signals_obj.external_link_signal.connect(self.external_link_signal)
            worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal)
            worker_signals_obj.file_successfully_downloaded_signal.connect(self.file_successfully_downloaded_signal)
+            worker_signals_obj.worker_finished_signal.connect(lambda result: None)
+
            self.logger("   Starting post fetch (single-threaded download process)...")
+            self.logger("   Fetching ALL available post information first. This may take a moment...")
+
+            all_posts_data = []
            post_generator = download_from_api(
                self.api_url_input,
                logger=self.logger,
@@ -1741,12 +1911,21 @@ class DownloadThread (QThread ):
                app_base_dir=self.app_base_dir,
                manga_filename_style_for_sort_check=self.manga_filename_style if self.manga_mode_active else None
            )
+
            for posts_batch_data in post_generator:
-                if self ._check_pause_self ("Post batch processing"):was_process_cancelled =True ;break 
-                if self .isInterruptionRequested ():was_process_cancelled =True ;break 
-                for individual_post_data in posts_batch_data :
-                    if self ._check_pause_self (f"Individual post processing for {individual_post_data .get ('id','N/A')}"):was_process_cancelled =True ;break 
-                    if self .isInterruptionRequested ():was_process_cancelled =True ;break 
+                if self.isInterruptionRequested():
+                    was_process_cancelled = True
+                    break
+                all_posts_data.extend(posts_batch_data)
+            
+            if not was_process_cancelled:
+                self.logger(f"✅ Fetching complete. Found {len(all_posts_data)} total posts. Starting download process...")
+
+            for individual_post_data in all_posts_data:
+                if self.isInterruptionRequested():
+                    was_process_cancelled = True
+                    break
+                
                post_processing_worker = PostProcessorWorker(
                    post_data=individual_post_data,
                    download_root=self.output_dir,
@@ -1793,11 +1972,20 @@ class DownloadThread (QThread ):
                    creator_download_folder_ignore_words=self.creator_download_folder_ignore_words,
                    session_file_path=self.session_file_path,
                    session_lock=self.session_lock,
+                    processed_ids_to_skip=self.processed_ids_to_skip, # <-- FIX: Pass the list to the worker
+                    text_only_scope=self.text_only_scope,
+                    text_export_format=self.text_export_format,
+                    single_pdf_mode=self.single_pdf_mode,
+                    project_root_dir=self.project_root_dir
                )
                try:
-                        dl_count ,skip_count ,kept_originals_this_post ,retryable_failures ,permanent_failures ,history_data =post_processing_worker .process ()
+                    (dl_count, skip_count, kept_originals_this_post,
+                     retryable_failures, permanent_failures,
+                     history_data, temp_filepath) = post_processing_worker.process()
+                    
                    grand_total_downloaded_files += dl_count
                    grand_total_skipped_files += skip_count
+                    
                    if kept_originals_this_post:
                        grand_list_of_kept_original_filenames.extend(kept_originals_this_post)
                    if retryable_failures:
@@ -1807,24 +1995,28 @@ class DownloadThread (QThread ):
                            self.post_processed_for_history_signal.emit(history_data)
                    if permanent_failures:
                        self.permanent_file_failed_signal.emit(permanent_failures)
+                    
+                    if self.single_pdf_mode and temp_filepath:
+                        self.progress_signal.emit(f"TEMP_FILE_PATH:{temp_filepath}")
+
                except Exception as proc_err:
                    post_id_for_err = individual_post_data.get('id', 'N/A')
                    self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}")
                    traceback.print_exc()
                    num_potential_files_est = len(individual_post_data.get('attachments', [])) + (1 if individual_post_data.get('file') else 0)
                    grand_total_skipped_files += num_potential_files_est
+
                if self.skip_current_file_flag and self.skip_current_file_flag.is_set():
                    self.skip_current_file_flag.clear()
                    self.logger("   Skip current file flag was processed and cleared by DownloadThread.")
                self.msleep(10)
-                if was_process_cancelled :break 
+
            if not was_process_cancelled and not self.isInterruptionRequested():
                self.logger("✅ All posts processed or end of content reached by DownloadThread.")

        except Exception as main_thread_err:
            self.logger(f"\n❌ Critical error within DownloadThread run loop: {main_thread_err}")
            traceback.print_exc()
-            if not self .isInterruptionRequested ():was_process_cancelled =False 
        finally:
            try:
                if worker_signals_obj:
@@ -1834,14 +2026,10 @@ class DownloadThread (QThread ):
                    worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal)
                    worker_signals_obj.missed_character_post_signal.disconnect(self.missed_character_post_signal)
                    worker_signals_obj.file_successfully_downloaded_signal.disconnect(self.file_successfully_downloaded_signal)
-
            except (TypeError, RuntimeError) as e:
                self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}")
+            
            self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, self.isInterruptionRequested(), grand_list_of_kept_original_filenames)
-    def receive_add_character_result (self ,result ):
-        with QMutexLocker (self .prompt_mutex ):
-             self ._add_character_response =result 
-        self .logger (f"   (DownloadThread) Received character prompt response: {'Yes (added/confirmed)'if result else 'No (declined/failed)'}")

 class InterruptedError(Exception):
    """Custom exception for handling cancellations gracefully."""
--- a/src/ui/dialogs/EmptyPopupDialog.py
+++ b/src/ui/dialogs/EmptyPopupDialog.py
@@ -144,7 +144,7 @@ class EmptyPopupDialog (QDialog ):
        self .setMinimumSize (int (400 *scale_factor ),int (300 *scale_factor ))

        self .parent_app =parent_app_ref 
-        self .current_scope_mode =self .SCOPE_CHARACTERS 
+        self.current_scope_mode = self.SCOPE_CREATORS
        self .app_base_dir =app_base_dir 

        app_icon =get_app_icon_object ()
--- a/src/ui/dialogs/FavoriteArtistsDialog.py
+++ b/src/ui/dialogs/FavoriteArtistsDialog.py
@@ -126,6 +126,21 @@ class FavoriteArtistsDialog (QDialog ):
        self .artist_list_widget .setVisible (show )

    def _fetch_favorite_artists (self ):
+
+        if self.cookies_config['use_cookie']:
+            # Check if we can load cookies for at least one of the services.
+            kemono_cookies = prepare_cookies_for_request(True, self.cookies_config['cookie_text'], self.cookies_config['selected_cookie_file'], self.cookies_config['app_base_dir'], self._logger, target_domain="kemono.su")
+            coomer_cookies = prepare_cookies_for_request(True, self.cookies_config['cookie_text'], self.cookies_config['selected_cookie_file'], self.cookies_config['app_base_dir'], self._logger, target_domain="coomer.su")
+
+            if not kemono_cookies and not coomer_cookies:
+                # If cookies are enabled but none could be loaded, show help and stop.
+                self.status_label.setText(self._tr("fav_artists_cookies_required_status", "Error: Cookies enabled but could not be loaded for any source."))
+                self._logger("Error: Cookies enabled but no valid cookies were loaded. Showing help dialog.")
+                cookie_help_dialog = CookieHelpDialog(self.parent_app, self)
+                cookie_help_dialog.exec_()
+                self.download_button.setEnabled(False)
+                return # Stop further execution
+
        kemono_fav_url ="https://kemono.su/api/v1/account/favorites?type=artist"
        coomer_fav_url ="https://coomer.su/api/v1/account/favorites?type=artist"

--- a/src/ui/dialogs/MoreOptionsDialog.py
+++ b/src/ui/dialogs/MoreOptionsDialog.py
@@ -0,0 +1,83 @@
+from PyQt5.QtWidgets import (
+    QDialog, QVBoxLayout, QRadioButton, QDialogButtonBox, QButtonGroup, QLabel, QComboBox, QHBoxLayout, QCheckBox
+)
+from PyQt5.QtCore import Qt
+
+class MoreOptionsDialog(QDialog):
+    """
+    A dialog for selecting a scope, export format, and single PDF option.
+    """
+    SCOPE_CONTENT = "content"
+    SCOPE_COMMENTS = "comments"
+
+    def __init__(self, parent=None, current_scope=None, current_format=None, single_pdf_checked=False):
+        super().__init__(parent)
+        self.setWindowTitle("More Options")
+        self.setMinimumWidth(350)
+
+        # ... (Layout and other widgets remain the same) ...
+
+        layout = QVBoxLayout(self)
+        self.description_label = QLabel("Please choose the scope for the action:")
+        layout.addWidget(self.description_label)
+        self.radio_button_group = QButtonGroup(self)
+        self.radio_content = QRadioButton("Description/Content")
+        self.radio_comments = QRadioButton("Comments")
+        self.radio_button_group.addButton(self.radio_content)
+        self.radio_button_group.addButton(self.radio_comments)
+        layout.addWidget(self.radio_content)
+        layout.addWidget(self.radio_comments)
+
+        if current_scope == self.SCOPE_COMMENTS:
+            self.radio_comments.setChecked(True)
+        else:
+            self.radio_content.setChecked(True)
+
+        export_layout = QHBoxLayout()
+        export_label = QLabel("Export as:")
+        self.format_combo = QComboBox()
+        self.format_combo.addItems(["PDF", "DOCX", "TXT"])
+
+        if current_format and current_format.upper() in ["PDF", "DOCX", "TXT"]:
+            self.format_combo.setCurrentText(current_format.upper())
+        else:
+            self.format_combo.setCurrentText("PDF")
+
+        export_layout.addWidget(export_label)
+        export_layout.addWidget(self.format_combo)
+        export_layout.addStretch()
+        layout.addLayout(export_layout)
+
+        # --- UPDATED: Single PDF Checkbox ---
+        self.single_pdf_checkbox = QCheckBox("Single PDF")
+        self.single_pdf_checkbox.setToolTip("If checked, all text from matching posts will be compiled into one single PDF file.")
+        self.single_pdf_checkbox.setChecked(single_pdf_checked)
+        layout.addWidget(self.single_pdf_checkbox)
+
+        self.format_combo.currentTextChanged.connect(self.update_single_pdf_checkbox_state)
+        self.update_single_pdf_checkbox_state(self.format_combo.currentText())
+
+        self.button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel)
+        self.button_box.accepted.connect(self.accept)
+        self.button_box.rejected.connect(self.reject)
+        layout.addWidget(self.button_box)
+        self.setLayout(layout)
+
+    def update_single_pdf_checkbox_state(self, text):
+        """Enable the Single PDF checkbox only if the format is PDF."""
+        is_pdf = (text.upper() == "PDF")
+        self.single_pdf_checkbox.setEnabled(is_pdf)
+        if not is_pdf:
+            self.single_pdf_checkbox.setChecked(False)
+
+    def get_selected_scope(self):
+        if self.radio_comments.isChecked():
+            return self.SCOPE_COMMENTS
+        return self.SCOPE_CONTENT
+
+    def get_selected_format(self):
+        return self.format_combo.currentText().lower()
+
+    def get_single_pdf_state(self):
+        """Returns the state of the Single PDF checkbox."""
+        return self.single_pdf_checkbox.isChecked() and self.single_pdf_checkbox.isEnabled()
--- a/src/ui/dialogs/SinglePDF.py
+++ b/src/ui/dialogs/SinglePDF.py
@@ -0,0 +1,77 @@
+# SinglePDF.py
+
+import os
+try:
+    from fpdf import FPDF
+    FPDF_AVAILABLE = True
+except ImportError:
+    FPDF_AVAILABLE = False
+
+class PDF(FPDF):
+    """Custom PDF class to handle headers and footers."""
+    def header(self):
+        # No header
+        pass
+
+    def footer(self):
+        # Position at 1.5 cm from bottom
+        self.set_y(-15)
+        self.set_font('DejaVu', '', 8)
+        # Page number
+        self.cell(0, 10, 'Page ' + str(self.page_no()), 0, 0, 'C')
+
+def create_single_pdf_from_content(posts_data, output_filename, font_path, logger=print):
+    """
+    Creates a single PDF from a list of post titles and content.
+
+    Args:
+        posts_data (list): A list of dictionaries, where each dict has 'title' and 'content' keys.
+        output_filename (str): The full path for the output PDF file.
+        font_path (str): Path to the DejaVuSans.ttf font file.
+        logger (function, optional): A function to log progress and errors. Defaults to print.
+    """
+    if not FPDF_AVAILABLE:
+        logger("❌ PDF Creation failed: 'fpdf2' library is not installed. Please run: pip install fpdf2")
+        return False
+
+    if not posts_data:
+        logger("   No text content was collected to create a PDF.")
+        return False
+
+    pdf = PDF()
+    
+    try:
+        if not os.path.exists(font_path):
+            raise RuntimeError("Font file not found.")
+        pdf.add_font('DejaVu', '', font_path, uni=True)
+        pdf.add_font('DejaVu', 'B', font_path, uni=True) # Add Bold variant
+    except Exception as font_error:
+        logger(f"   ⚠️ Could not load DejaVu font: {font_error}")
+        logger("      PDF may not support all characters. Falling back to default Arial font.")
+        pdf.set_font('Arial', '', 12)
+        pdf.set_font('Arial', 'B', 16)
+
+    logger(f"   Starting PDF creation with content from {len(posts_data)} posts...")
+
+    for post in posts_data:
+        pdf.add_page()
+        # Post Title
+        pdf.set_font('DejaVu', 'B', 16)
+
+        # vvv THIS LINE IS CORRECTED vvv
+        # We explicitly set align='L' and remove the incorrect positional arguments.
+        pdf.multi_cell(w=0, h=10, text=post.get('title', 'Untitled Post'), align='L')
+        
+        pdf.ln(5) # Add a little space after the title
+
+        # Post Content
+        pdf.set_font('DejaVu', '', 12)
+        pdf.multi_cell(w=0, h=7, text=post.get('content', 'No Content'))
+    
+    try:
+        pdf.output(output_filename)
+        logger(f"✅ Successfully created single PDF: '{os.path.basename(output_filename)}'")
+        return True
+    except Exception as e:
+        logger(f"❌ A critical error occurred while saving the final PDF: {e}")
+        return False
--- a/src/ui/flow_layout.py
+++ b/src/ui/flow_layout.py
@@ -0,0 +1,93 @@
+# src/ui/flow_layout.py
+
+from PyQt5.QtWidgets import QLayout, QSizePolicy, QStyle
+from PyQt5.QtCore import QPoint, QRect, QSize, Qt
+
+class FlowLayout(QLayout):
+    """A custom layout that arranges widgets in a flow, wrapping as necessary."""
+    def __init__(self, parent=None, margin=0, spacing=-1):
+        super(FlowLayout, self).__init__(parent)
+
+        if parent is not None:
+            self.setContentsMargins(margin, margin, margin, margin)
+
+        self.setSpacing(spacing)
+        self.itemList = []
+
+    def __del__(self):
+        item = self.takeAt(0)
+        while item:
+            item = self.takeAt(0)
+
+    def addItem(self, item):
+        self.itemList.append(item)
+
+    def count(self):
+        return len(self.itemList)
+
+    def itemAt(self, index):
+        if 0 <= index < len(self.itemList):
+            return self.itemList[index]
+        return None
+
+    def takeAt(self, index):
+        if 0 <= index < len(self.itemList):
+            return self.itemList.pop(index)
+        return None
+
+    def expandingDirections(self):
+        return Qt.Orientations(Qt.Orientation(0))
+
+    def hasHeightForWidth(self):
+        return True
+
+    def heightForWidth(self, width):
+        return self._do_layout(QRect(0, 0, width, 0), True)
+
+    def setGeometry(self, rect):
+        super(FlowLayout, self).setGeometry(rect)
+        self._do_layout(rect, False)
+
+    def sizeHint(self):
+        return self.minimumSize()
+
+    def minimumSize(self):
+        size = QSize()
+        for item in self.itemList:
+            size = size.expandedTo(item.minimumSize())
+
+        margin, _, _, _ = self.getContentsMargins()
+        size += QSize(2 * margin, 2 * margin)
+        return size
+
+    def _do_layout(self, rect, test_only):
+        x = rect.x()
+        y = rect.y()
+        line_height = 0
+
+        space_x = self.spacing()
+        space_y = self.spacing()
+        if self.layout() is not None:
+            space_x = self.spacing()
+            space_y = self.spacing()
+        else:
+            space_x = self.spacing()
+            space_y = self.spacing()
+
+
+        for item in self.itemList:
+            wid = item.widget()
+            next_x = x + item.sizeHint().width() + space_x
+            if next_x - space_x > rect.right() and line_height > 0:
+                x = rect.x()
+                y = y + line_height + space_y
+                next_x = x + item.sizeHint().width() + space_x
+                line_height = 0
+
+            if not test_only:
+                item.setGeometry(QRect(QPoint(x, y), item.sizeHint()))
+
+            x = next_x
+            line_height = max(line_height, item.sizeHint().height())
+
+        return y + line_height - rect.y()
--- a/src/ui/main_window.py
+++ b/src/ui/main_window.py
Author	SHA1	Message	Date
Yuvi9587	cfd869e05a	Update main_window.py	2025-07-14 09:04:34 -07:00
Yuvi9587	b191776f65	Commit	2025-07-14 08:19:58 -07:00
Yuvi9587	f41f354737	Update main_window.py	2025-07-13 21:46:34 -07:00
Yuvi9587	6b57ee099d	Commit	2025-07-13 21:45:30 -07:00
Yuvi9587	21ecb60cb5	commit	2025-07-13 20:21:17 -07:00
Yuvi9587	ee00019f2e	Update workers.py	2025-07-13 18:42:56 -07:00
Yuvi9587	d49c739fe4	Commit	2025-07-13 10:36:52 -07:00
Yuvi9587	dbdf82a079	Commit	2025-07-13 10:22:06 -07:00