Update readme.md

Commit
commit
2025-12-29 16:14:44 +00:00 · 2025-08-11 09:31:53 -07:00 · 2025-08-10 09:16:31 -07:00 · 2025-08-07 21:42:04 -07:00 · 2025-08-06 06:56:49 -07:00
10 changed files with 997 additions and 257 deletions
--- a/readme.md
+++ b/readme.md
@@ -99,7 +99,7 @@ Built with PyQt5, this tool is designed for users who want deep filtering capabi
 ### Install Dependencies

 ```bash
-pip install PyQt5 requests Pillow mega.py fpdf2 python-docx
+pip install PyQt5 requests Pillow mega.py fpdf python-docx
 ```

 ### Running the Application
--- a/src/config/constants.py
+++ b/src/config/constants.py
@@ -60,6 +60,7 @@ DOWNLOAD_LOCATION_KEY = "downloadLocationV1"
 RESOLUTION_KEY = "window_resolution"
 UI_SCALE_KEY = "ui_scale_factor"
 SAVE_CREATOR_JSON_KEY = "saveCreatorJsonProfile"
+FETCH_FIRST_KEY = "fetchAllPostsFirst" 

 # --- UI Constants and Identifiers ---
 HTML_PREFIX = "<!HTML!>"
@@ -97,7 +98,7 @@ FOLDER_NAME_STOP_WORDS = {
    "for", "he", "her", "his", "i", "im", "in", "is", "it", "its",
    "me", "my", "net", "not", "of", "on", "or", "org", "our",
    "s", "she", "so", "the", "their", "they", "this",
-    "to", "ve", "was", "we", "were", "with", "www", "you", "your",
+    "to", "ve", "was", "we", "were", "with", "www", "you", "your", "nsfw", "sfw",
 # add more according to need     
 }

@@ -111,7 +112,9 @@ CREATOR_DOWNLOAD_DEFAULT_FOLDER_IGNORE_WORDS = {
    "may", "jun", "june", "jul", "july", "aug", "august", "sep", "september",
    "oct", "october", "nov", "november", "dec", "december",
    "mon", "monday", "tue", "tuesday", "wed", "wednesday", "thu", "thursday",
-    "fri", "friday", "sat", "saturday", "sun", "sunday"
+    "fri", "friday", "sat", "saturday", "sun", "sunday", "Pack", "tier", "spoiler",
+    
+
    # add more according to need 
 }

--- a/src/core/api_client.py
+++ b/src/core/api_client.py
@@ -1,7 +1,7 @@
 import time
 import traceback
 from urllib.parse import urlparse
-import json # Ensure json is imported
+import json
 import requests
 from ..utils.network_utils import extract_post_info, prepare_cookies_for_request
 from ..config.constants import (
@@ -120,7 +120,8 @@ def download_from_api(
    selected_cookie_file=None,
    app_base_dir=None,
    manga_filename_style_for_sort_check=None,
-    processed_post_ids=None
+    processed_post_ids=None,
+    fetch_all_first=False  
 ):
    headers = {
        'User-Agent': 'Mozilla/5.0',
@@ -183,6 +184,7 @@ def download_from_api(
        logger("⚠️ Page range (start/end page) is ignored when a specific post URL is provided (searching all pages for the post).")

    is_manga_mode_fetch_all_and_sort_oldest_first = manga_mode and (manga_filename_style_for_sort_check != STYLE_DATE_POST_TITLE) and not target_post_id
+    should_fetch_all = fetch_all_first or is_manga_mode_fetch_all_and_sort_oldest_first  
    api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}"
    page_size = 50
    if is_manga_mode_fetch_all_and_sort_oldest_first:
--- a/src/core/discord_client.py
+++ b/src/core/discord_client.py
@@ -0,0 +1,80 @@
+import time
+import requests
+import json
+from urllib.parse import urlparse
+
+def fetch_server_channels(server_id, logger, cookies=None, cancellation_event=None, pause_event=None):
+    """
+    Fetches the list of channels for a given Discord server ID from the Kemono API.
+    UPDATED to be pausable and cancellable.
+    """
+    domains_to_try = ["kemono.cr", "kemono.su"]
+    for domain in domains_to_try:
+        if cancellation_event and cancellation_event.is_set():
+            logger("   Channel fetching cancelled by user.")
+            return None
+        while pause_event and pause_event.is_set():
+            if cancellation_event and cancellation_event.is_set(): break
+            time.sleep(0.5)
+
+        lookup_url = f"https://{domain}/api/v1/discord/channel/lookup/{server_id}"
+        logger(f"   Attempting to fetch channel list from: {lookup_url}")
+        try:
+            response = requests.get(lookup_url, cookies=cookies, timeout=15)
+            response.raise_for_status()
+            channels = response.json()
+            if isinstance(channels, list):
+                logger(f"   ✅ Found {len(channels)} channels for server {server_id}.")
+                return channels
+        except (requests.exceptions.RequestException, json.JSONDecodeError):
+            # This is a silent failure, we'll just try the next domain
+            pass
+            
+    logger(f"   ❌ Failed to fetch channel list for server {server_id} from all available domains.")
+    return None
+
+def fetch_channel_messages(channel_id, logger, cancellation_event, pause_event, cookies=None):
+    """
+    Fetches all messages from a Discord channel by looping through API pages (pagination).
+    Uses a page size of 150 and handles the specific offset logic.
+    """
+    offset = 0
+    page_size = 150 # Corrected page size based on your findings
+    api_base_url = f"https://kemono.cr/api/v1/discord/channel/{channel_id}"
+    
+    while not (cancellation_event and cancellation_event.is_set()):
+        if pause_event and pause_event.is_set():
+            logger("   Message fetching paused...")
+            while pause_event.is_set():
+                if cancellation_event and cancellation_event.is_set(): break
+                time.sleep(0.5)
+            logger("   Message fetching resumed.")
+
+        if cancellation_event and cancellation_event.is_set():
+            break
+            
+        paginated_url = f"{api_base_url}?o={offset}"
+        logger(f"   Fetching messages from API: page starting at offset {offset}")
+
+        try:
+            response = requests.get(paginated_url, cookies=cookies, timeout=20)
+            response.raise_for_status()
+            messages_batch = response.json()
+
+            if not messages_batch:
+                logger(f"   ✅ Reached end of messages for channel {channel_id}.")
+                break
+            
+            logger(f"   Fetched {len(messages_batch)} messages...")
+            yield messages_batch
+
+            if len(messages_batch) < page_size:
+                logger(f"   ✅ Last page of messages received for channel {channel_id}.")
+                break
+
+            offset += page_size
+            time.sleep(0.5)
+
+        except (requests.exceptions.RequestException, json.JSONDecodeError) as e:
+            logger(f"   ❌ Error fetching messages at offset {offset}: {e}")
+            break
--- a/src/core/workers.py
+++ b/src/core/workers.py
@@ -826,37 +826,60 @@ class PostProcessorWorker:
                return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER, details_for_failure

    def process(self):
+        # --- START: REFACTORED PROCESS METHOD ---

+        # 1. DATA MAPPING: Map Discord Message or Creator Post fields to a consistent set of variables.
+        if self.service == 'discord':
+            # For Discord, self.post is a MESSAGE object from the API.
+            post_title = self.post.get('content', '') or f"Message {self.post.get('id', 'N/A')}"
+            post_id = self.post.get('id', 'unknown_id')
+            post_main_file_info = {}  # Discord messages don't have a single main file
+            post_attachments = self.post.get('attachments', [])
+            post_content_html = self.post.get('content', '')
+            post_data = self.post  # Keep a reference to the original message object
+            log_prefix = "Message"
+        else:
+            # Existing logic for standard creator posts
+            post_title = self.post.get('title', '') or 'untitled_post'
+            post_id = self.post.get('id', 'unknown_id')
+            post_main_file_info = self.post.get('file')
+            post_attachments = self.post.get('attachments', [])
+            post_content_html = self.post.get('content', '')
+            post_data = self.post  # Reference to the post object
+            log_prefix = "Post"
+
+        # 2. SHARED PROCESSING LOGIC: The rest of the function now uses the consistent variables from above.
        result_tuple = (0, 0, [], [], [], None, None)
+        total_downloaded_this_post = 0
+        total_skipped_this_post = 0
+        determined_post_save_path_for_history = self.override_output_dir if self.override_output_dir else self.download_root
+        
        try:
-            if self._check_pause(f"Post processing for ID {self.post.get('id', 'N/A')}"):
-                result_tuple = (0, 0, [], [], [], None, None)
-                return result_tuple  
+            if self._check_pause(f"{log_prefix} processing for ID {post_id}"):
+                return (0, 0, [], [], [], None, None)
            if self.check_cancel():
-                result_tuple = (0, 0, [], [], [], None, None)
-                return result_tuple
+                return (0, 0, [], [], [], None, None)

            current_character_filters = self._get_current_character_filters()
            kept_original_filenames_for_log = []
            retryable_failures_this_post = []
            permanent_failures_this_post = []
-            total_downloaded_this_post = 0
-            total_skipped_this_post = 0
+            
            history_data_for_this_post = None

            parsed_api_url = urlparse(self.api_url_input)
-            post_data = self.post
-            post_id = post_data.get('id', 'unknown_id')
+            
+            # CONTEXT-AWARE URL for Referer Header
+            if self.service == 'discord':
+                server_id = self.user_id 
+                channel_id = self.post.get('channel', 'unknown_channel')
+                post_page_url = f"https://{parsed_api_url.netloc}/discord/server/{server_id}/{channel_id}"
+            else:
+                post_page_url = f"https://{parsed_api_url.netloc}/{self.service}/user/{self.user_id}/post/{post_id}"

-            post_page_url = f"https://{parsed_api_url.netloc}/{self.service}/user/{self.user_id}/post/{post_id}"
            headers = {'User-Agent': 'Mozilla/5.0', 'Referer': post_page_url, 'Accept': '*/*'}
            link_pattern = re.compile(r"""<a\s+.*?href=["'](https?://[^"']+)["'][^>]*>(.*?)</a>""", re.IGNORECASE | re.DOTALL)
-            post_data = self.post
-            post_title = post_data.get('title', '') or 'untitled_post'
-            post_id = post_data.get('id', 'unknown_id')
-            post_main_file_info = post_data.get('file')
-            post_attachments = post_data.get('attachments', [])
-
+            
            effective_unwanted_keywords_for_folder_naming = self.unwanted_keywords.copy()
            is_full_creator_download_no_char_filter = not self.target_post_id_from_initial_url and not current_character_filters
           
@@ -874,9 +897,9 @@ class PostProcessorWorker:
                self.logger(f"   Applying creator download specific folder ignore words ({len(self.creator_download_folder_ignore_words)} words).")
                effective_unwanted_keywords_for_folder_naming.update(self.creator_download_folder_ignore_words)

-            post_content_html = post_data.get('content', '')
            if not self.extract_links_only:
-                self.logger(f"\n--- Processing Post {post_id} ('{post_title[:50]}...') (Thread: {threading.current_thread().name}) ---")
+                self.logger(f"\n--- Processing {log_prefix} {post_id} ('{post_title[:50]}...') (Thread: {threading.current_thread().name}) ---")
+            
            num_potential_files_in_post = len(post_attachments or []) + (1 if post_main_file_info and post_main_file_info.get('path') else 0)

            post_is_candidate_by_title_char_match = False
@@ -920,7 +943,7 @@ class PostProcessorWorker:
                    if original_api_att_name:
                        all_files_from_post_api_for_char_check.append({'_original_name_for_log': original_api_att_name})

-            if current_character_filters and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
+            if current_character_filters and self.char_filter_scope == CHAR_SCOPE_COMMENTS and self.service != 'discord':
                self.logger(f"   [Char Scope: Comments] Phase 1: Checking post files for matches before comments for post ID '{post_id}'.")
                if self._check_pause(f"File check (comments scope) for post {post_id}"):
                    result_tuple = (0, num_potential_files_in_post, [], [], [], None, None)
@@ -943,7 +966,7 @@ class PostProcessorWorker:
                    if post_is_candidate_by_file_char_match_in_comment_scope: break
                self.logger(f"   [Char Scope: Comments] Phase 1 Result: post_is_candidate_by_file_char_match_in_comment_scope = {post_is_candidate_by_file_char_match_in_comment_scope}")

-            if current_character_filters and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
+            if current_character_filters and self.char_filter_scope == CHAR_SCOPE_COMMENTS and self.service != 'discord':
                if not post_is_candidate_by_file_char_match_in_comment_scope:
                    if self._check_pause(f"Comment check for post {post_id}"):
                        result_tuple = (0, num_potential_files_in_post, [], [], [], None, None)
@@ -1007,10 +1030,10 @@ class PostProcessorWorker:
                    return result_tuple

            if not self.extract_links_only and self.manga_mode_active and current_character_filters and (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and not post_is_candidate_by_title_char_match:
-                self.logger(f"   -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title[:50]}' doesn't match filters.")
-                self._emit_signal('missed_character_post', post_title, "Manga Mode: No title match for character filter (Title/Both scope)")
-                result_tuple = (0, num_potential_files_in_post, [], [], [], None, None)
-                return result_tuple
+                 self.logger(f"   -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title[:50]}' doesn't match filters.")
+                 self._emit_signal('missed_character_post', post_title, "Manga Mode: No title match for character filter (Title/Both scope)")
+                 result_tuple = (0, num_potential_files_in_post, [], [], [], None, None)
+                 return result_tuple

            if not isinstance(post_attachments, list):
                self.logger(f"⚠️ Corrupt attachment data for post {post_id} (expected list, got {type(post_attachments)}). Skipping attachments.")
@@ -1143,29 +1166,50 @@ class PostProcessorWorker:
                suffix_counter = 0
                final_post_subfolder_name = ""

-                while True:
+                suffix_counter = 0
+                folder_creation_successful = False
+                final_post_subfolder_name = ""
+                post_id_for_folder = str(self.post.get('id', 'unknown_id'))
+
+                while not folder_creation_successful:
                    if suffix_counter == 0:
                        name_candidate = original_cleaned_post_title_for_sub
                    else:
                        name_candidate = f"{original_cleaned_post_title_for_sub}_{suffix_counter}"
+                    
                    potential_post_subfolder_path = os.path.join(base_path_for_post_subfolder, name_candidate)
-                    try:
-                        os.makedirs(potential_post_subfolder_path, exist_ok=False)
-                        final_post_subfolder_name = name_candidate
-                        if suffix_counter > 0:
-                            self.logger(f"   Post subfolder name conflict: Using '{final_post_subfolder_name}' instead of '{original_cleaned_post_title_for_sub}' to avoid mixing posts.")
-                        break
-                    except FileExistsError:
-                        suffix_counter += 1
-                        if suffix_counter > 100:
-                            self.logger(f"   ⚠️ Exceeded 100 attempts to find unique subfolder name for '{original_cleaned_post_title_for_sub}'. Using UUID.")
-                            final_post_subfolder_name = f"{original_cleaned_post_title_for_sub}_{uuid.uuid4().hex[:8]}"
-                            os.makedirs(os.path.join(base_path_for_post_subfolder, final_post_subfolder_name), exist_ok=True)
+                    id_file_path = os.path.join(potential_post_subfolder_path, f".postid_{post_id_for_folder}")
+
+                    if not os.path.isdir(potential_post_subfolder_path):
+                        # Folder does not exist, create it and its ID file
+                        try:
+                            os.makedirs(potential_post_subfolder_path)
+                            with open(id_file_path, 'w') as f:
+                                f.write(post_id_for_folder)
+                            
+                            final_post_subfolder_name = name_candidate
+                            folder_creation_successful = True
+                            if suffix_counter > 0:
+                                self.logger(f"   Post subfolder name conflict: Using '{final_post_subfolder_name}' to avoid mixing posts.")
+                        except OSError as e_mkdir:
+                            self.logger(f"   ❌ Error creating directory '{potential_post_subfolder_path}': {e_mkdir}.")
+                            final_post_subfolder_name = original_cleaned_post_title_for_sub
                            break
-                    except OSError as e_mkdir:
-                        self.logger(f"   ❌ Error creating directory '{potential_post_subfolder_path}': {e_mkdir}. Files for this post might be saved in parent or fail.")
-                        final_post_subfolder_name = original_cleaned_post_title_for_sub
-                        break
+                    else:
+                        # Folder exists, check if it's for this post or a different one
+                        if os.path.exists(id_file_path):
+                            # ID file matches! This is a restore scenario. Reuse the folder.
+                            self.logger(f"   ℹ️ Re-using existing post subfolder: '{name_candidate}'")
+                            final_post_subfolder_name = name_candidate
+                            folder_creation_successful = True
+                        else:
+                            # Folder exists but ID file does not match (or is missing). This is a normal name collision.
+                            suffix_counter += 1
+                            if suffix_counter > 100: # Safety break
+                                self.logger(f"   ⚠️ Exceeded 100 attempts to find unique subfolder for '{original_cleaned_post_title_for_sub}'.")
+                                final_post_subfolder_name = f"{original_cleaned_post_title_for_sub}_{uuid.uuid4().hex[:8]}"
+                                os.makedirs(os.path.join(base_path_for_post_subfolder, final_post_subfolder_name), exist_ok=True)
+                                break
                determined_post_save_path_for_history = os.path.join(base_path_for_post_subfolder, final_post_subfolder_name)

            if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
@@ -1807,14 +1851,23 @@ class PostProcessorWorker:
                            permanent_failures_this_post, history_data_for_this_post,
                            None)

+        except Exception as main_thread_err:
+            self.logger(f"\n❌ Critical error within Worker process for {log_prefix} {post_id}: {main_thread_err}")
+            self.logger(traceback.format_exc())
+            # Ensure we still return a valid tuple to prevent the app from stalling
+            result_tuple = (0, 1, [], [], [{'error': str(main_thread_err)}], None, None)
        finally:
+            # This block ALWAYS executes, ensuring that every task signals its completion.
+            # This is critical for the main thread to know when all work is done.
            if not self.extract_links_only and self.use_post_subfolders and total_downloaded_this_post == 0:
                path_to_check_for_emptiness = determined_post_save_path_for_history
                try:
+                    # Check if the path is a directory and if it's empty
                    if os.path.isdir(path_to_check_for_emptiness) and not os.listdir(path_to_check_for_emptiness):
                        self.logger(f"   🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness}'")
                        os.rmdir(path_to_check_for_emptiness)
                except OSError as e_rmdir:
+                    # Log if removal fails for any reason (e.g., permissions)
                    self.logger(f"   ⚠️ Could not remove potentially empty subfolder '{path_to_check_for_emptiness}': {e_rmdir}")

            self._emit_signal('worker_finished', result_tuple)
@@ -1881,7 +1934,8 @@ class DownloadThread(QThread):
                 single_pdf_mode=False,
                 project_root_dir=None,
                 processed_post_ids=None,
-                 start_offset=0):  
+                 start_offset=0,
+                 fetch_first=False): 
        super().__init__()
        self.api_url_input = api_url_input
        self.output_dir = output_dir
@@ -1947,6 +2001,7 @@ class DownloadThread(QThread):
        self.project_root_dir = project_root_dir
        self.processed_post_ids_set = set(processed_post_ids) if processed_post_ids is not None else set() 
        self.start_offset = start_offset 
+        self.fetch_first = fetch_first

        if self.compress_images and Image is None:
            self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
@@ -1993,7 +2048,8 @@ class DownloadThread(QThread):
                selected_cookie_file=self.selected_cookie_file,
                app_base_dir=self.app_base_dir,
                manga_filename_style_for_sort_check=self.manga_filename_style if self.manga_mode_active else None,
-                processed_post_ids=self.processed_post_ids_set
+                processed_post_ids=self.processed_post_ids_set,
+                fetch_all_first=self.fetch_first 
            )

            for posts_batch_data in post_generator:
--- a/src/ui/dialogs/FutureSettingsDialog.py
+++ b/src/ui/dialogs/FutureSettingsDialog.py
@@ -16,7 +16,8 @@ from ..main_window import get_app_icon_object
 from ...config.constants import (
    THEME_KEY, LANGUAGE_KEY, DOWNLOAD_LOCATION_KEY,
    RESOLUTION_KEY, UI_SCALE_KEY, SAVE_CREATOR_JSON_KEY,
-    COOKIE_TEXT_KEY, USE_COOKIE_KEY
+    COOKIE_TEXT_KEY, USE_COOKIE_KEY,
+    FETCH_FIRST_KEY ### ADDED ###
 )


@@ -36,7 +37,7 @@ class FutureSettingsDialog(QDialog):

        screen_height = QApplication.primaryScreen().availableGeometry().height() if QApplication.primaryScreen() else 800
        scale_factor = screen_height / 800.0
-        base_min_w, base_min_h = 420, 360 # Adjusted height for new layout
+        base_min_w, base_min_h = 420, 390
        scaled_min_w = int(base_min_w * scale_factor)
        scaled_min_h = int(base_min_h * scale_factor)
        self.setMinimumSize(scaled_min_w, scaled_min_h)
@@ -49,7 +50,6 @@ class FutureSettingsDialog(QDialog):
        """Initializes all UI components and layouts for the dialog."""
        main_layout = QVBoxLayout(self)

-        # --- Group 1: Interface Settings ---
        self.interface_group_box = QGroupBox()
        interface_layout = QGridLayout(self.interface_group_box)

@@ -76,36 +76,32 @@ class FutureSettingsDialog(QDialog):

        main_layout.addWidget(self.interface_group_box)

-        # --- Group 2: Download & Window Settings ---
        self.download_window_group_box = QGroupBox()
        download_window_layout = QGridLayout(self.download_window_group_box)
-
-        # Window Size (Resolution)
        self.window_size_label = QLabel()
        self.resolution_combo_box = QComboBox()
        self.resolution_combo_box.currentIndexChanged.connect(self._display_setting_changed)
        download_window_layout.addWidget(self.window_size_label, 0, 0)
        download_window_layout.addWidget(self.resolution_combo_box, 0, 1)

-        # Default Path
        self.default_path_label = QLabel()
        self.save_path_button = QPushButton()
-        # --- START: MODIFIED LOGIC ---
        self.save_path_button.clicked.connect(self._save_cookie_and_path)
-        # --- END: MODIFIED LOGIC ---
        download_window_layout.addWidget(self.default_path_label, 1, 0)
        download_window_layout.addWidget(self.save_path_button, 1, 1)

-        # Save Creator.json Checkbox
        self.save_creator_json_checkbox = QCheckBox()
        self.save_creator_json_checkbox.stateChanged.connect(self._creator_json_setting_changed) 
        download_window_layout.addWidget(self.save_creator_json_checkbox, 2, 0, 1, 2)
+        
+        self.fetch_first_checkbox = QCheckBox()
+        self.fetch_first_checkbox.stateChanged.connect(self._fetch_first_setting_changed)
+        download_window_layout.addWidget(self.fetch_first_checkbox, 3, 0, 1, 2)

        main_layout.addWidget(self.download_window_group_box)

        main_layout.addStretch(1)

-        # --- OK Button ---
        self.ok_button = QPushButton()
        self.ok_button.clicked.connect(self.accept)
        main_layout.addWidget(self.ok_button, 0, Qt.AlignRight | Qt.AlignBottom)
@@ -113,17 +109,27 @@ class FutureSettingsDialog(QDialog):
    def _load_checkbox_states(self):
        """Loads the initial state for all checkboxes from settings."""
        self.save_creator_json_checkbox.blockSignals(True)
-        # Default to True so the feature is on by default for users
        should_save = self.parent_app.settings.value(SAVE_CREATOR_JSON_KEY, True, type=bool)
        self.save_creator_json_checkbox.setChecked(should_save)
        self.save_creator_json_checkbox.blockSignals(False)

+        self.fetch_first_checkbox.blockSignals(True)
+        should_fetch_first = self.parent_app.settings.value(FETCH_FIRST_KEY, False, type=bool)
+        self.fetch_first_checkbox.setChecked(should_fetch_first)
+        self.fetch_first_checkbox.blockSignals(False)
+
    def _creator_json_setting_changed(self, state):
        """Saves the state of the 'Save Creator.json' checkbox."""
        is_checked = state == Qt.Checked
        self.parent_app.settings.setValue(SAVE_CREATOR_JSON_KEY, is_checked)
        self.parent_app.settings.sync()

+    def _fetch_first_setting_changed(self, state):
+        """Saves the state of the 'Fetch First' checkbox."""
+        is_checked = state == Qt.Checked
+        self.parent_app.settings.setValue(FETCH_FIRST_KEY, is_checked)
+        self.parent_app.settings.sync()
+
    def _tr(self, key, default_text=""):
        if callable(get_translation) and self.parent_app:
            return get_translation(self.parent_app.current_selected_language, key, default_text)
@@ -132,33 +138,31 @@ class FutureSettingsDialog(QDialog):
    def _retranslate_ui(self):
        self.setWindowTitle(self._tr("settings_dialog_title", "Settings"))
        
-        # Group Box Titles
        self.interface_group_box.setTitle(self._tr("interface_group_title", "Interface Settings"))
        self.download_window_group_box.setTitle(self._tr("download_window_group_title", "Download & Window Settings"))

-        # Interface Group Labels
        self.theme_label.setText(self._tr("theme_label", "Theme:"))
        self.ui_scale_label.setText(self._tr("ui_scale_label", "UI Scale:"))
        self.language_label.setText(self._tr("language_label", "Language:"))
        
-        # Download & Window Group Labels
        self.window_size_label.setText(self._tr("window_size_label", "Window Size:"))
        self.default_path_label.setText(self._tr("default_path_label", "Default Path:"))
        self.save_creator_json_checkbox.setText(self._tr("save_creator_json_label", "Save Creator.json file"))
        
-        # --- START: MODIFIED LOGIC ---
-        # Buttons and Controls
+        self.fetch_first_checkbox.setText(self._tr("fetch_first_label", "Fetch First (Download after all pages are found)"))
+        self.fetch_first_checkbox.setToolTip(self._tr("fetch_first_tooltip", "If checked, the downloader will find all posts from a creator first before starting any downloads.\nThis can be slower to start but provides a more accurate progress bar."))
+        
        self._update_theme_toggle_button_text()
        self.save_path_button.setText(self._tr("settings_save_cookie_path_button", "Save Cookie + Download Path"))
        self.save_path_button.setToolTip(self._tr("settings_save_cookie_path_tooltip", "Save the current 'Download Location' and Cookie settings for future sessions."))
        self.ok_button.setText(self._tr("ok_button", "OK"))
-        # --- END: MODIFIED LOGIC ---

-        # Populate dropdowns
        self._populate_display_combo_boxes()
        self._populate_language_combo_box()
        self._load_checkbox_states()

+    # --- (The rest of the file remains unchanged) ---
+
    def _apply_theme(self):
        if self.parent_app and self.parent_app.current_theme == "dark":
            scale = getattr(self.parent_app, 'scale_factor', 1)
@@ -285,14 +289,12 @@ class FutureSettingsDialog(QDialog):
        path_saved = False
        cookie_saved = False
        
-        # --- Save Download Path Logic ---
        if hasattr(self.parent_app, 'dir_input') and self.parent_app.dir_input:
            current_path = self.parent_app.dir_input.text().strip()
            if current_path and os.path.isdir(current_path):
                self.parent_app.settings.setValue(DOWNLOAD_LOCATION_KEY, current_path)
                path_saved = True
        
-        # --- Save Cookie Logic ---
        if hasattr(self.parent_app, 'use_cookie_checkbox'):
            use_cookie = self.parent_app.use_cookie_checkbox.isChecked()
            cookie_content = self.parent_app.cookie_text_input.text().strip()
@@ -301,7 +303,7 @@ class FutureSettingsDialog(QDialog):
                self.parent_app.settings.setValue(USE_COOKIE_KEY, True)
                self.parent_app.settings.setValue(COOKIE_TEXT_KEY, cookie_content)
                cookie_saved = True
-            else: # Also save the 'off' state
+            else: 
                self.parent_app.settings.setValue(USE_COOKIE_KEY, False)
                self.parent_app.settings.setValue(COOKIE_TEXT_KEY, "")

@@ -319,4 +321,4 @@ class FutureSettingsDialog(QDialog):
                                self._tr("settings_save_nothing_message", "The download location is not a valid directory and no cookie was active."))
            return

-        QMessageBox.information(self, self._tr("settings_save_success_title", "Settings Saved"), message)
+        QMessageBox.information(self, self._tr("settings_save_success_title", "Settings Saved"), message)
--- a/src/ui/dialogs/discord_pdf_generator.py
+++ b/src/ui/dialogs/discord_pdf_generator.py
@@ -0,0 +1,146 @@
+import os
+import re
+import datetime
+try:
+    from fpdf import FPDF
+    FPDF_AVAILABLE = True
+
+    class PDF(FPDF):
+        """Custom PDF class for Discord chat logs."""
+        def __init__(self, server_name, channel_name, *args, **kwargs):
+            super().__init__(*args, **kwargs)
+            self.server_name = server_name
+            self.channel_name = channel_name
+            self.default_font_family = 'DejaVu' # Can be changed to Arial if font fails
+
+        def header(self):
+            if self.page_no() == 1:
+                return # No header on the title page
+            self.set_font(self.default_font_family, '', 8)
+            self.cell(0, 10, f'{self.server_name} - #{self.channel_name}', 0, 0, 'L')
+            self.cell(0, 10, 'Page ' + str(self.page_no()), 0, 0, 'R')
+            self.ln(10)
+
+        def footer(self):
+            pass # No footer needed, header has page number
+
+except ImportError:
+    FPDF_AVAILABLE = False
+    FPDF = None 
+    PDF = None
+
+def create_pdf_from_discord_messages(messages_data, server_name, channel_name, output_filename, font_path, logger=print):
+    """
+    Creates a single PDF from a list of Discord message objects, formatted as a chat log.
+    UPDATED to include clickable links for attachments and embeds.
+    """
+    if not FPDF_AVAILABLE:
+        logger("❌ PDF Creation failed: 'fpdf2' library is not installed.")
+        return False
+
+    if not messages_data:
+        logger("   No messages were found or fetched to create a PDF.")
+        return False
+
+    logger("   Sorting messages by date (oldest first)...")
+    messages_data.sort(key=lambda m: m.get('published', ''))
+
+    pdf = PDF(server_name, channel_name)
+    default_font_family = 'DejaVu'
+    
+    try:
+        bold_font_path = font_path.replace("DejaVuSans.ttf", "DejaVuSans-Bold.ttf")
+        if not os.path.exists(font_path) or not os.path.exists(bold_font_path):
+            raise RuntimeError("Font files not found")
+        
+        pdf.add_font('DejaVu', '', font_path, uni=True)
+        pdf.add_font('DejaVu', 'B', bold_font_path, uni=True)
+    except Exception as font_error:
+        logger(f"   ⚠️ Could not load DejaVu font: {font_error}. Falling back to Arial.")
+        default_font_family = 'Arial'
+        pdf.default_font_family = 'Arial'
+    
+    # --- Title Page ---
+    pdf.add_page()
+    pdf.set_font(default_font_family, 'B', 24)
+    pdf.cell(w=0, h=20, text="Discord Chat Log", align='C', new_x="LMARGIN", new_y="NEXT")
+    pdf.ln(10)
+    pdf.set_font(default_font_family, '', 16)
+    pdf.cell(w=0, h=10, text=f"Server: {server_name}", align='C', new_x="LMARGIN", new_y="NEXT")
+    pdf.cell(w=0, h=10, text=f"Channel: #{channel_name}", align='C', new_x="LMARGIN", new_y="NEXT")
+    pdf.ln(5)
+    pdf.set_font(default_font_family, '', 10)
+    pdf.cell(w=0, h=10, text=f"Generated on: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", align='C', new_x="LMARGIN", new_y="NEXT")
+    pdf.cell(w=0, h=10, text=f"Total Messages: {len(messages_data)}", align='C', new_x="LMARGIN", new_y="NEXT")
+    
+    pdf.add_page()
+    
+    logger(f"   Starting PDF creation with {len(messages_data)} messages...")
+
+    for i, message in enumerate(messages_data):
+        author = message.get('author', {}).get('global_name') or message.get('author', {}).get('username', 'Unknown User')
+        timestamp_str = message.get('published', '')
+        content = message.get('content', '')
+        attachments = message.get('attachments', [])
+        embeds = message.get('embeds', [])
+
+        try:
+            # Handle timezone information correctly
+            if timestamp_str.endswith('Z'):
+                timestamp_str = timestamp_str[:-1] + '+00:00'
+            dt_obj = datetime.datetime.fromisoformat(timestamp_str)
+            formatted_timestamp = dt_obj.strftime('%Y-%m-%d %H:%M:%S')
+        except (ValueError, TypeError):
+            formatted_timestamp = timestamp_str
+
+        # Draw a separator line
+        if i > 0:
+            pdf.ln(2)
+            pdf.set_draw_color(200, 200, 200) # Light grey line
+            pdf.cell(0, 0, '', border='T')
+            pdf.ln(2)
+
+        # Message Header
+        pdf.set_font(default_font_family, 'B', 11)
+        pdf.write(5, f"{author} ")
+        pdf.set_font(default_font_family, '', 9)
+        pdf.set_text_color(128, 128, 128)
+        pdf.write(5, f"({formatted_timestamp})")
+        pdf.set_text_color(0, 0, 0)
+        pdf.ln(6)
+
+        # Message Content
+        if content:
+            pdf.set_font(default_font_family, '', 10)
+            pdf.multi_cell(w=0, h=5, text=content)
+        
+        # --- START: MODIFIED ATTACHMENT AND EMBED LOGIC ---
+        if attachments or embeds:
+            pdf.ln(1)
+            pdf.set_font(default_font_family, '', 9)
+            pdf.set_text_color(22, 119, 219) # A nice blue for links
+
+            for att in attachments:
+                file_name = att.get('name', 'untitled')
+                file_path = att.get('path', '')
+                # Construct the full, clickable URL for the attachment
+                full_url = f"https://kemono.cr/data{file_path}"
+                pdf.write(5, text=f"[Attachment: {file_name}]", link=full_url)
+                pdf.ln() # New line after each attachment
+
+            for embed in embeds:
+                embed_url = embed.get('url', 'no url')
+                # The embed URL is already a full URL
+                pdf.write(5, text=f"[Embed: {embed_url}]", link=embed_url)
+                pdf.ln() # New line after each embed
+
+            pdf.set_text_color(0, 0, 0) # Reset color to black
+        # --- END: MODIFIED ATTACHMENT AND EMBED LOGIC ---
+
+    try:
+        pdf.output(output_filename)
+        logger(f"✅ Successfully created Discord chat log PDF: '{os.path.basename(output_filename)}'")
+        return True
+    except Exception as e:
+        logger(f"❌ A critical error occurred while saving the final PDF: {e}")
+        return False
--- a/src/ui/main_window.py
+++ b/src/ui/main_window.py
--- a/src/utils/network_utils.py
+++ b/src/utils/network_utils.py
@@ -141,12 +141,15 @@ def prepare_cookies_for_request(use_cookie_flag, cookie_text_input, selected_coo
 def extract_post_info(url_string):
    """
    Parses a URL string to extract the service, user ID, and post ID.
+    UPDATED to support Discord server/channel URLs.

    Args:
        url_string (str): The URL to parse.

    Returns:
-        tuple: A tuple containing (service, user_id, post_id). Any can be None.
+        tuple: A tuple containing (service, id1, id2). 
+               For posts: (service, user_id, post_id).
+               For Discord: ('discord', server_id, channel_id).
    """
    if not isinstance(url_string, str) or not url_string.strip():
        return None, None, None
@@ -155,7 +158,15 @@ def extract_post_info(url_string):
        parsed_url = urlparse(url_string.strip())
        path_parts = [part for part in parsed_url.path.strip('/').split('/') if part]
        
-        # Standard format: /<service>/user/<user_id>/post/<post_id>
+        # Check for new Discord URL format first
+        # e.g., /discord/server/891670433978531850/1252332668805189723
+        if len(path_parts) >= 3 and path_parts[0].lower() == 'discord' and path_parts[1].lower() == 'server':
+            service = 'discord'
+            server_id = path_parts[2]
+            channel_id = path_parts[3] if len(path_parts) >= 4 else None
+            return service, server_id, channel_id
+
+        # Standard creator/post format: /<service>/user/<user_id>/post/<post_id>
        if len(path_parts) >= 3 and path_parts[1].lower() == 'user':
            service = path_parts[0]
            user_id = path_parts[2]
@@ -174,7 +185,6 @@ def extract_post_info(url_string):

    return None, None, None

-
 def get_link_platform(url):
    """
    Identifies the platform of a given URL based on its domain.
--- a/src/utils/resolution.py
+++ b/src/utils/resolution.py
@@ -391,6 +391,10 @@ def setup_ui(main_app):
    main_app.link_search_button.setVisible(False)
    main_app.link_search_button.setFixedWidth(int(30 * scale))
    log_title_layout.addWidget(main_app.link_search_button)
+    main_app.discord_scope_toggle_button = QPushButton("Scope: Files")
+    main_app.discord_scope_toggle_button.setVisible(False) # Hidden by default
+    main_app.discord_scope_toggle_button.setFixedWidth(int(140 * scale))
+    log_title_layout.addWidget(main_app.discord_scope_toggle_button)
    main_app.manga_rename_toggle_button = QPushButton()
    main_app.manga_rename_toggle_button.setVisible(False)
    main_app.manga_rename_toggle_button.setFixedWidth(int(140 * scale))
Author	SHA1	Message	Date
Yuvi9587	56a83195b2	Update readme.md	2025-08-11 09:31:53 -07:00
Yuvi9587	26fa3b9bc1	Commit	2025-08-10 09:16:31 -07:00
Yuvi9587	f7c4d892a8	commit	2025-08-07 21:42:04 -07:00
Yuvi9587	661b97aa16	Commit	2025-08-06 06:56:49 -07:00