Commit

2025-12-29 16:14:44 +00:00 · 2025-05-27 20:34:38 +05:30 · 2025-05-26 20:37:37 +05:30 · 2025-05-26 09:33:45 +05:30 · 2025-05-26 08:43:13 +05:30 · 2025-05-26 13:55:54 +05:30
11 changed files with 7427 additions and 1411 deletions
--- a/Kemono.png
+++ b/Kemono.png
--- a/Known.txt
+++ b/Known.txt
@@ -1 +0,0 @@
-Hinata
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 [Yuvi9587]
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/Read.png
+++ b/Read.png
--- a/assets/discord.png
+++ b/assets/discord.png
--- a/assets/github.png
+++ b/assets/github.png
--- a/assets/instagram.png
+++ b/assets/instagram.png
--- a/downloader_utils.py
+++ b/downloader_utils.py
--- a/main.py
+++ b/main.py
--- a/multipart_downloader.py
+++ b/multipart_downloader.py
@@ -0,0 +1,239 @@
+import os
+import time
+import requests
+import hashlib
+import http.client
+import traceback
+import threading
+import queue # Import the missing 'queue' module
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+CHUNK_DOWNLOAD_RETRY_DELAY = 2 # Slightly reduced for faster retries if needed
+MAX_CHUNK_DOWNLOAD_RETRIES = 1 # Further reduced for quicker fallback if a chunk is problematic
+DOWNLOAD_CHUNK_SIZE_ITER = 1024 * 256  # 256KB for iter_content within a chunk download
+
+
+def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte, headers,
+                               part_num, total_parts, progress_data, cancellation_event, skip_event, pause_event, global_emit_time_ref, cookies_for_chunk, # Added cookies_for_chunk
+                               logger_func, emitter=None, api_original_filename=None): # Renamed logger, signals to emitter
+    """Downloads a single chunk of a file and writes it to the temp file."""
+    if cancellation_event and cancellation_event.is_set():
+        logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Download cancelled before start.")
+        return 0, False  # bytes_downloaded, success
+    if skip_event and skip_event.is_set():
+        logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Skip event triggered before start.")
+        return 0, False
+
+    if pause_event and pause_event.is_set():
+        logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Download paused before start...")
+        while pause_event.is_set():
+            if cancellation_event and cancellation_event.is_set():
+                logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Download cancelled while paused.")
+                return 0, False
+            time.sleep(0.2) # Shorter sleep for responsive resume
+        logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Download resumed.")
+
+    chunk_headers = headers.copy()
+    if end_byte != -1 : # For 0-byte files, end_byte might be -1, Range header should not be set or be 0-0
+        chunk_headers['Range'] = f"bytes={start_byte}-{end_byte}"
+    elif start_byte == 0 and end_byte == -1: # Specifically for 0-byte files
+        pass
+
+
+    bytes_this_chunk = 0
+    last_speed_calc_time = time.time()
+    bytes_at_last_speed_calc = 0
+
+    for attempt in range(MAX_CHUNK_DOWNLOAD_RETRIES + 1):
+        if cancellation_event and cancellation_event.is_set():
+            logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Cancelled during retry loop.")
+            return bytes_this_chunk, False
+        if skip_event and skip_event.is_set():
+            logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Skip event during retry loop.")
+            return bytes_this_chunk, False
+        if pause_event and pause_event.is_set():
+            logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Paused during retry loop...")
+            while pause_event.is_set():
+                if cancellation_event and cancellation_event.is_set():
+                    logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Cancelled while paused in retry loop.")
+                    return bytes_this_chunk, False
+                time.sleep(0.2)
+            logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Resumed from retry loop pause.")
+
+        try:
+            if attempt > 0:
+                logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Retrying download (Attempt {attempt}/{MAX_CHUNK_DOWNLOAD_RETRIES})...")
+                time.sleep(CHUNK_DOWNLOAD_RETRY_DELAY * (2 ** (attempt - 1)))
+                last_speed_calc_time = time.time()
+                bytes_at_last_speed_calc = bytes_this_chunk # Current progress of this chunk
+            log_msg = f"   🚀 [Chunk {part_num + 1}/{total_parts}] Starting download: bytes {start_byte}-{end_byte if end_byte != -1 else 'EOF'}"
+            logger_func(log_msg)
+            response = requests.get(chunk_url, headers=chunk_headers, timeout=(10, 120), stream=True, cookies=cookies_for_chunk)
+            response.raise_for_status()
+            if start_byte == 0 and end_byte == -1 and int(response.headers.get('Content-Length', 0)) == 0:
+                logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Confirmed 0-byte file.")
+                with progress_data['lock']:
+                    progress_data['chunks_status'][part_num]['active'] = False
+                    progress_data['chunks_status'][part_num]['speed_bps'] = 0
+                return 0, True
+
+            with open(temp_file_path, 'r+b') as f:  # Open in read-write binary
+                f.seek(start_byte)
+                for data_segment in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE_ITER):
+                    if cancellation_event and cancellation_event.is_set():
+                        logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Cancelled during data iteration.")
+                        return bytes_this_chunk, False
+                    if skip_event and skip_event.is_set():
+                        logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Skip event during data iteration.")
+                        return bytes_this_chunk, False
+                    if pause_event and pause_event.is_set():
+                        logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Paused during data iteration...")
+                        while pause_event.is_set():
+                            if cancellation_event and cancellation_event.is_set():
+                                logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Cancelled while paused in data iteration.")
+                                return bytes_this_chunk, False
+                            time.sleep(0.2)
+                        logger_func(f"   [Chunk {part_num + 1}/{total_parts}] Resumed from data iteration pause.")
+                    if data_segment:
+                        f.write(data_segment)
+                        bytes_this_chunk += len(data_segment)
+                        
+                        with progress_data['lock']:
+                            progress_data['total_downloaded_so_far'] += len(data_segment)
+                            progress_data['chunks_status'][part_num]['downloaded'] = bytes_this_chunk
+                            progress_data['chunks_status'][part_num]['active'] = True
+
+                            current_time = time.time()
+                            time_delta_speed = current_time - last_speed_calc_time
+                            if time_delta_speed > 0.5: # Calculate speed every 0.5 seconds
+                                bytes_delta = bytes_this_chunk - bytes_at_last_speed_calc
+                                current_speed_bps = (bytes_delta * 8) / time_delta_speed if time_delta_speed > 0 else 0
+                                progress_data['chunks_status'][part_num]['speed_bps'] = current_speed_bps
+                                last_speed_calc_time = current_time
+                                bytes_at_last_speed_calc = bytes_this_chunk                            
+                            if emitter and (current_time - global_emit_time_ref[0] > 0.25): # Max ~4Hz for the whole file
+                                global_emit_time_ref[0] = current_time # Update shared last emit time
+                                status_list_copy = [dict(s) for s in progress_data['chunks_status']] # Make a deep enough copy
+                                if isinstance(emitter, queue.Queue):
+                                    emitter.put({'type': 'file_progress', 'payload': (api_original_filename, status_list_copy)})
+                                elif hasattr(emitter, 'file_progress_signal'): # PostProcessorSignals-like
+                                    emitter.file_progress_signal.emit(api_original_filename, status_list_copy)
+            return bytes_this_chunk, True
+
+        except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e:
+            logger_func(f"   ❌ [Chunk {part_num + 1}/{total_parts}] Retryable error: {e}")
+            if attempt == MAX_CHUNK_DOWNLOAD_RETRIES:
+                logger_func(f"   ❌ [Chunk {part_num + 1}/{total_parts}] Failed after {MAX_CHUNK_DOWNLOAD_RETRIES} retries.")
+                return bytes_this_chunk, False
+        except requests.exceptions.RequestException as e: # Includes 4xx/5xx errors after raise_for_status
+            logger_func(f"   ❌ [Chunk {part_num + 1}/{total_parts}] Non-retryable error: {e}")
+            return bytes_this_chunk, False
+        except Exception as e:
+            logger_func(f"   ❌ [Chunk {part_num + 1}/{total_parts}] Unexpected error: {e}\n{traceback.format_exc(limit=1)}")
+            return bytes_this_chunk, False
+    with progress_data['lock']:
+        progress_data['chunks_status'][part_num]['active'] = False
+        progress_data['chunks_status'][part_num]['speed_bps'] = 0
+    return bytes_this_chunk, False # Should be unreachable
+
+
+def download_file_in_parts(file_url, save_path, total_size, num_parts, headers, api_original_filename,
+                           emitter_for_multipart, cookies_for_chunk_session, # Added cookies_for_chunk_session
+                           cancellation_event, skip_event, logger_func, pause_event):
+    """
+    Downloads a file in multiple parts concurrently.
+    Returns: (download_successful_flag, downloaded_bytes, calculated_file_hash, temp_file_handle_or_None)
+    The temp_file_handle will be an open read-binary file handle to the .part file if successful, otherwise None.
+    It is the responsibility of the caller to close this handle and rename/delete the .part file.
+    """
+    logger_func(f"⬇️ Initializing Multi-part Download ({num_parts} parts) for: '{api_original_filename}' (Size: {total_size / (1024*1024):.2f} MB)")
+    temp_file_path = save_path + ".part"
+
+    try:
+        with open(temp_file_path, 'wb') as f_temp:
+            if total_size > 0:
+                f_temp.truncate(total_size) # Pre-allocate space
+    except IOError as e:
+        logger_func(f"   ❌ Error creating/truncating temp file '{temp_file_path}': {e}")
+        return False, 0, None, None
+
+    chunk_size_calc = total_size // num_parts
+    chunks_ranges = []
+    for i in range(num_parts):
+        start = i * chunk_size_calc
+        end = start + chunk_size_calc - 1 if i < num_parts - 1 else total_size - 1
+        if start <= end: # Valid range
+            chunks_ranges.append((start, end))
+        elif total_size == 0 and i == 0: # Special case for 0-byte file
+            chunks_ranges.append((0, -1)) # Indicates 0-byte file, download 0 bytes from offset 0
+
+    chunk_actual_sizes = [] 
+    for start, end in chunks_ranges:
+        if end == -1 and start == 0: # 0-byte file
+            chunk_actual_sizes.append(0)
+        else:
+            chunk_actual_sizes.append(end - start + 1)
+
+    if not chunks_ranges and total_size > 0:
+        logger_func(f"   ⚠️ No valid chunk ranges for multipart download of '{api_original_filename}'. Aborting multipart.")
+        if os.path.exists(temp_file_path): os.remove(temp_file_path)
+        return False, 0, None, None
+
+    progress_data = {
+        'total_file_size': total_size, # Overall file size for reference
+        'total_downloaded_so_far': 0,  # New key for overall progress
+        'chunks_status': [ # Status for each chunk
+            {'id': i, 'downloaded': 0, 'total': chunk_actual_sizes[i] if i < len(chunk_actual_sizes) else 0, 'active': False, 'speed_bps': 0.0}
+            for i in range(num_parts)
+        ],
+        'lock': threading.Lock(),
+        'last_global_emit_time': [time.time()] # Shared mutable for global throttling timestamp
+    }
+
+    chunk_futures = []
+    all_chunks_successful = True
+    total_bytes_from_chunks = 0 # Still useful to verify total downloaded against file size
+
+    with ThreadPoolExecutor(max_workers=num_parts, thread_name_prefix=f"MPChunk_{api_original_filename[:10]}_") as chunk_pool:
+        for i, (start, end) in enumerate(chunks_ranges):
+            if cancellation_event and cancellation_event.is_set(): all_chunks_successful = False; break
+            chunk_futures.append(chunk_pool.submit(
+                _download_individual_chunk, chunk_url=file_url, temp_file_path=temp_file_path,
+                start_byte=start, end_byte=end, headers=headers, part_num=i, total_parts=num_parts,
+                progress_data=progress_data, cancellation_event=cancellation_event, skip_event=skip_event, global_emit_time_ref=progress_data['last_global_emit_time'],
+                pause_event=pause_event, cookies_for_chunk=cookies_for_chunk_session, logger_func=logger_func, emitter=emitter_for_multipart,
+                api_original_filename=api_original_filename
+            ))
+
+        for future in as_completed(chunk_futures):
+            if cancellation_event and cancellation_event.is_set(): all_chunks_successful = False; break
+            bytes_downloaded_this_chunk, success_this_chunk = future.result()
+            total_bytes_from_chunks += bytes_downloaded_this_chunk
+            if not success_this_chunk:
+                all_chunks_successful = False
+
+    if cancellation_event and cancellation_event.is_set():
+        logger_func(f"   Multi-part download for '{api_original_filename}' cancelled by main event.")
+        all_chunks_successful = False
+    if emitter_for_multipart:
+        with progress_data['lock']:
+            status_list_copy = [dict(s) for s in progress_data['chunks_status']]
+            if isinstance(emitter_for_multipart, queue.Queue):
+                emitter_for_multipart.put({'type': 'file_progress', 'payload': (api_original_filename, status_list_copy)})
+            elif hasattr(emitter_for_multipart, 'file_progress_signal'): # PostProcessorSignals-like
+                emitter_for_multipart.file_progress_signal.emit(api_original_filename, status_list_copy)
+
+    if all_chunks_successful and (total_bytes_from_chunks == total_size or total_size == 0):
+        logger_func(f"   ✅ Multi-part download successful for '{api_original_filename}'. Total bytes: {total_bytes_from_chunks}")
+        md5_hasher = hashlib.md5()
+        with open(temp_file_path, 'rb') as f_hash:
+            for buf in iter(lambda: f_hash.read(4096*10), b''): # Read in larger buffers for hashing
+                md5_hasher.update(buf)
+        calculated_hash = md5_hasher.hexdigest()
+        return True, total_bytes_from_chunks, calculated_hash, open(temp_file_path, 'rb')
+    else:
+        logger_func(f"   ❌ Multi-part download failed for '{api_original_filename}'. Success: {all_chunks_successful}, Bytes: {total_bytes_from_chunks}/{total_size}. Cleaning up.")
+        if os.path.exists(temp_file_path):
+            try: os.remove(temp_file_path)
+            except OSError as e: logger_func(f"    Failed to remove temp part file '{temp_file_path}': {e}")
+        return False, total_bytes_from_chunks, None, None
--- a/readme.md
+++ b/readme.md
@@ -1,108 +1,384 @@
-# Kemono Downloader
+<h1 align="center">Kemono Downloader v4.1.1</h1>

-A simple, multi-platform GUI application built with PyQt5 to download content from Kemono.su or Coomer.party creator pages or specific posts, with options for filtering and organizing downloads.
+<div align="center">
+  <img src="https://github.com/Yuvi9587/Kemono-Downloader/blob/main/Read.png" alt="Kemono Downloader"/>
+</div>

-## Features
+---

-* **GUI Interface:** Easy-to-use graphical interface.
-* **URL Support:** Download from a creator's main page (paginated) or a specific post URL from Kemono or Coomer sites.
-* **Download Location:** Select your desired output directory.
-* **Subfolder Organization:**
-    * Organize downloads into folders based on character/artist names found in post titles (using your "Known Names" list).
-    * Option to create a custom folder for single post downloads.
-    * Automatic folder naming based on post title if no known names are matched.
-* **Known Names List:** Manage a persistent list of known names (artists, characters, series) for improved folder organization and filtering.
-* **Content Filtering:**
-    * **Character/Name Filter:** Only download posts where the specified known name is found in the title.
-    * **File Type Filter:** Download All Files, Images/GIFs Only, or Videos Only.
-    * **Skip Words Filter:** Specify a list of comma-separated words to skip posts or files if these words appear in their titles or filenames.
-* **Archive Skipping:** Options to skip `.zip` and `.rar` files (enabled by default).
-* **Image Compression:** Optionally compress large images (larger than 1.5MB) to WebP format to save space (requires Pillow library).
-* **Thumbnail Downloading:** Option to download thumbnails. (Note: The previous local API method for enhanced thumbnail fetching has been removed. Thumbnail availability might depend on the source.)
-* **Duplicate Prevention:**
-    * Avoids re-downloading files with the same content hash.
-    * Checks for existing filenames in the target directory.
-* **Multithreading:** Utilizes multithreading for faster downloads from full creator pages (single posts are processed in a single thread).
-* **Progress Log:** View detailed download progress, status messages, and errors.
-* **Dark Theme:** Built-in dark theme for comfortable use.
-* **Download Management:**
-    * Ability to cancel an ongoing download process.
-    * Option to skip the specific file currently being downloaded (in single-thread mode).
-* **Persistent Configuration:** Saves the "Known Names" list to a local file.
+A powerful, feature-rich GUI application for downloading content from **[Kemono.su](https://kemono.su)** and **[Coomer.party](https://coomer.party)**.  
+Built with **PyQt5**, this tool is ideal for users who want deep filtering, customizable folder structures, efficient downloads, and intelligent automation — all within a modern, user-friendly graphical interface.

-## Prerequisites
+---

-* Python 3.6 or higher
-* `pip` package installer
+##  What's New in v4.1.1? - Enhanced Image Discovery & Audio Filtering
+
+Version 4.1.1 brings significant enhancements, including smarter image capture from post content and a dedicated filter mode for audio files.
+
+###  "Scan Content for Images" Feature
+
+- **Enhanced Image Discovery:** A new checkbox, "**Scan Content for Images**," has been added to the UI (grouped with "Download Thumbnails Only" and "Compress Large Images").
+- **How it Works:**
+    - When enabled, the downloader meticulously scans the HTML content of each post's description or body.
+    - It searches for images in two main ways:
+        - **Directly linked absolute URLs** (e.g., `https://externalsite.com/image.png`) that end with a common image extension (jpg, png, gif, etc.).
+        - **Images embedded using HTML `<img>` tags.** The downloader extracts the `src` attribute from these tags and can resolve various path types:
+            - Absolute URLs (e.g., `http://...` or `https://...`)
+            - Protocol-relative URLs (e.g., `//cdn.example.com/image.jpg`)
+            - Root-relative paths (e.g., `/data/user_content/image.gif`), which are resolved against the site's base URL (like `https://kemono.su/data/user_content/image.gif`).
+    - This is particularly useful for capturing images that are part of the post's narrative but not formally listed in the API's file or attachment sections.
+- **Default State:** This option is **unchecked by default**.
+- **Key Interaction with "Download Thumbnails Only":** This new feature works closely with the existing "Download Thumbnails Only" option:
+    - If you enable "Download Thumbnails Only":
+        - The "Scan Content for Images" checkbox will **automatically become checked and disabled** (locked).
+        - The downloader then **exclusively downloads images discovered through the content scan**. Any API-listed thumbnails are bypassed, giving priority to images embedded directly in the post.
+    - If you disable "Download Thumbnails Only":
+        - The "Scan Content for Images" checkbox will become **enabled again and revert to being unchecked**. You can then manually enable it if you wish to scan content without being in thumbnail-only mode.
+
+This feature ensures a more comprehensive download experience, especially for posts where images are integrated directly into the text.
+
+### New "🎧 Only Audio" Filter Mode
+
+Alongside image discovery, v4.1.1 also introduces/enhances a dedicated filter mode for audio enthusiasts:
+
+-   **Focused Audio Downloads:** The "🎧 Only Audio" option in the "Filter Files" radio button group allows you to download exclusively common audio file types. This includes formats like MP3, WAV, FLAC, M4A, OGG, and more.
+-   **Streamlined UI:** When "🎧 Only Audio" mode is active:
+    -   Irrelevant UI options such as the "Skip Scope" button (for word-based post/file skipping) and the "Multi-part Download" toggle are hidden to simplify the interface.
+    -   The "Show External Links in Log" checkbox is automatically disabled, as link extraction is not the focus of this mode.
+-   **Archive Handling:** Unlike the "📦 Only Archives" mode (which disables archive skipping), the "Skip .zip" and "Skip .rar" checkboxes remain enabled and configurable when "🎧 Only Audio" is selected. This gives you the flexibility to also exclude any archives encountered while in audio-only mode if desired.
+-   **Purpose:** This mode is perfect for users who primarily want to collect audio tracks, podcasts, or sound effects from posts without downloading other media types.
+
+
+---
+
+##  Previous Update: What's New in v4.0.1?
+
+Version 4.0.1 focuses on enhancing access to content and providing even smarter organization:
+
+###  Cookie Management
+
+- **Access Content:** Seamlessly download from Kemono/Coomer as if you were logged in by using your browser's cookies.
+- **Flexible Input:**
+  - Directly paste your cookie string (e.g., `name1=value1; name2=value2`).
+  - Browse and load cookies from a `cookies.txt` file (Netscape format).
+  - Automatic fallback to a `cookies.txt` file in the application directory if "Use Cookie" is enabled and no other source is specified.
+- **Easy Activation:** A simple "Use Cookie" checkbox in the UI controls this feature.
+- *Important Note: Cookie settings (text, file path, and enabled state) are configured per session and are not saved when the application is closed. You will need to re-apply them on each launch if needed.*
+
+---
+
+###  Advanced `Known.txt` and Character Filtering
+
+The `Known.txt` system has been revamped for improved performance and stability. The previous method of handling known names could become resource-intensive with large lists, potentially leading to application slowdowns or crashes. This new, streamlined system offers more direct control and robust organization.
+The `Known.txt` file and the "Filter by Character(s)" input field work together to provide powerful and flexible content organization. The `Known.txt` file itself has a straightforward syntax, while the UI input allows for more complex session-specific grouping and alias definitions that can then be added to `Known.txt`.
+
+**1. `Known.txt` File Syntax (Located in App Directory):**
+
+`Known.txt` stores your persistent list of characters, series, or keywords for folder organization. Each line is an entry:
+
+- **Simple Entries:**
+  - A line like `My Awesome Series` or `Nami`.
+  - **Behavior:** Content matching this term will be saved into a folder named "My Awesome Series" or "Nami" respectively (if "Separate Folders" is enabled).
+
+**2. "Filter by Character(s)" UI Input Field:**
+
+This field allows for dynamic filtering for the current download session and provides options for how new entries are added to `Known.txt`.
+
+- **Standard Names:**
+  - Input: `Nami, Robin`
+  - Session Behavior: Filters for "Nami" OR "Robin". If "Separate Folders" is on, creates folders "Nami" and "Robin".
+  - `Known.txt` Addition: If "Nami" is new and selected for addition in the confirmation dialog, it's added as `Nami` on a new line in `Known.txt`.
+
+- **Grouped Aliases for a Single Character (using `(...)~` syntax):**
+  - Input: `(Boa, Hancock)~`
+  - Meaning: "Boa" and "Hancock" are different names/aliases for the *same character*. The names are listed within parentheses separated by commas (e.g., `name1, alias1, alias2`), and the entire group is followed by a `~` symbol. This is useful when a creator uses different names for the same character.
+  - Session Behavior: Filters for "Boa" OR "Hancock". If "Separate Folders" is on, creates a single folder named "Boa Hancock".
+  - `Known.txt` Addition: If this group is new and selected for addition, it's added to `Known.txt` as a grouped alias entry, typically `(Boa Hancock)`. The first name in the `Known.txt` entry (e.g., "Boa Hancock") becomes the primary folder name.
+
+- **Combined Folder for Distinct Characters (using `(...)` syntax):**
+  - Input: `(Vivi, Uta)`
+  - Meaning: "Vivi" and "Uta" are *distinct characters*, but for this download session, their content should be grouped into a single folder. The names are listed within parentheses separated by commas. This is useful for grouping art of less frequent characters without creating many small individual folders.
+  - Session Behavior: Filters for "Vivi" OR "Uta". If "Separate Folders" is on, creates a single folder named "Vivi Uta".
+  - `Known.txt` Addition: If this "combined group" is new and selected for addition, "Vivi" and "Uta" are added to `Known.txt` as *separate, individual simple entries* on new lines:
+    ```
+    Vivi
+    Uta
+    ```
+    The combined folder "Vivi Uta" is a session-only convenience; `Known.txt` stores them as distinct entities for future individual use.
+
+**3. Interaction with `Known.txt`:**
+
+- **Adding New Names from Filters:** When you use the "Filter by Character(s)" input, if any names or groups are new (not already in `Known.txt`), a dialog will appear after you start the download. This dialog allows you to select which of these new names/groups should be added to `Known.txt`, formatted according to the rules described above.
+- **Intelligent Fallback:** If "Separate Folders by Name/Title" is active, and content doesn't match the "Filter by Character(s)" UI input, the downloader consults your `Known.txt` file for folder naming.
+- **Direct Management:** You can add simple entries directly to `Known.txt` using the list and "Add" button in the UI's `Known.txt` management section. For creating or modifying complex grouped alias entries directly in the file, or for bulk edits, click the "Open Known.txt" button. The application reloads `Known.txt` on startup or before a download process begins.
+- **Using Known Names to Populate Filters (via "Add to Filter" Button):**
+  - Next to the "Add" button in the `Known.txt` management section, a "⤵️ Add to Filter" button provides a quick way to use your existing known names.
+  - Clicking this opens a popup window displaying all entries from your `Known.txt` file, each with a checkbox.
+  - The popup includes:
+    - A search bar to quickly filter the list of names.
+    - "Select All" and "Deselect All" buttons for convenience.
+  - After selecting the desired names, click "Add Selected".
+  - The chosen names will be inserted into the "Filter by Character(s)" input field.
+  - **Important Formatting:** If a selected entry from `Known.txt` is a group (e.g., originally `(Boa Hancock)` in `Known.txt`, which implies aliases "Boa" and "Hancock"), it will be added to the filter field as `(Boa, Hancock)~`. Simple names are added as-is.
+
+
+---
+##  What's in v3.5.0? (Previous Update)
+This version brought significant enhancements to manga/comic downloading, filtering capabilities, and user experience:
+
+###  Enhanced Manga/Comic Mode
+
+- **Optional Filename Prefix:**
+  - When using the "Date Based" or "Original File Name" manga styles, an optional prefix can be specified in the UI.
+  - This prefix will be prepended to each filename generated by these styles.
+  - **Example (Date Based):** If prefix is `MySeries_`, files become `MySeries_001.jpg`, `MySeries_002.png`, etc.
+  - **Example (Original File Name):** If prefix is `Comic_Vol1_`, an original file `page_01.jpg` becomes `Comic_Vol1_page_01.jpg`.
+  - This input field appears automatically when either of these two manga naming styles is selected.
+
+- **New "Date Based" Filename Style:**
+
+  - Perfect for truly sequential content! Files are named numerically (e.g., `001.jpg`, `002.jpg`, `003.ext`...) across an *entire creator's feed*, strictly following post publication order.
+
+  - **Smart Numbering:** Automatically resumes from the highest existing number found in the series folder (and subfolders, if "Subfolder per Post" is enabled).
+
+  - **Guaranteed Order:** Disables multi-threading for post processing to ensure sequential accuracy.
+
+  - Works alongside the existing "Post Title" and "Original File Name" styles.
+- **New "Title+G.Num (Post Title + Global Numbering)" Filename Style:**
+  - Ideal for series where you want each file to be prefixed by its post title but still maintain a global sequential number across all posts from a single download session.
+  - **Naming Convention:** Files are named using the cleaned post title as a prefix, followed by an underscore and a globally incrementing number (e.g., `Post Title_001.ext`, `Post Title_002.ext`).
+  - **Example:**
+    - Post "Chapter 1: The Adventure Begins" (contains 2 files: `imageA.jpg`, `imageB.png`) -> `Chapter 1 The Adventure Begins_001.jpg`, `Chapter 1 The Adventure Begins_002.png`
+    - Next Post "Chapter 2: New Friends" (contains 1 file: `cover.jpg`) -> `Chapter 2 New Friends_003.jpg`
+  - **Sequential Integrity:** Multithreading for post processing is automatically disabled when this style is selected to ensure the global numbering is strictly sequential.
+
+---
+
+###  "Remove Words from Filename" Feature
+
+- Specify comma-separated words or phrases (case-insensitive) that will be automatically removed from filenames.
+
+- Example: `patreon, [HD], _final` transforms `AwesomeArt_patreon` `Hinata_Hd` into `AwesomeArt.jpg` `Hinata.jpg`.
+
+---
+
+###  New "Only Archives" File Filter Mode
+
+- Exclusively downloads `.zip` and `.rar` files.
+
+- Automatically disables conflicting options like "Skip .zip/.rar" and external link logging.
+
+---
+
+###  Improved Character Filter Scope - "Comments (Beta)"
+
+- **File-First Check:** Prioritizes matching filenames before checking post comments for character names.
+
+- **Comment Fallback:** Only checks comments if no filename match is found, reducing unnecessary API calls.
+
+---
+
+###  Refined "Missed Character Log"
+
+- Displays a capitalized, alphabetized list of key terms from skipped post titles.
+
+- Makes it easier to spot patterns or characters that might be unintentionally excluded.
+
+---
+
+###  Enhanced Multi-part Download Progress
+
+- Granular visibility into active chunk downloads and combined speed for large files.
+
+---
+
+###  Updated Onboarding Tour
+
+- Improved guide for new users, covering v4.0.0 features and existing core functions.
+
+---
+
+###  Robust Configuration Path
+
+- Settings and `Known.txt` are now stored in the same folder as app.
+
+---
+
+##  Core Features
+
+---
+
+### User Interface & Workflow
+
+- **Clean PyQt5 GUI** — Simple, modern, and dark-themed.
+
+- **Persistent Settings** — Saves preferences between sessions.
+
+- **Download Modes:**
+  - Single Post URL
+  - Entire Creator Feed
+
+- **Flexible Options:**
+  - Specify Page Range (disabled in Manga Mode)
+  - Custom Folder Name for single posts
+
+---
+
+###  Smart Filtering
+
+- **Character Name Filtering:**
+  - Use `Tifa, Aerith` or group `(Boa, Hancock)` → folder `Boa Hancock`
+  - Flexible input for current session and for adding to `Known.txt`.
+  - Examples:
+    - `Nami` (simple character)
+    - `(Boa Hancock)~` (aliases for one character, session folder "Boa Hancock", adds `(Boa Hancock)` to `Known.txt`)
+    - `(Vivi, Uta)` (distinct characters, session folder "Vivi Uta", adds `Vivi` and `Uta` separately to `Known.txt`)
+  - A "⤵️ Add to Filter" button (near the `Known.txt` management UI) allows you to quickly populate this field by selecting from your existing `Known.txt` entries via a popup with search and checkbox selection.
+  - See "Advanced `Known.txt` and Character Filtering" for full details.
+  - **Filter Scopes:**
+    - `Files`
+    - `Title`
+    - `Both (Title then Files)`
+    - `Comments (Beta - Files first)`
+
+- **Skip with Words:**
+  - Exclude with `WIP, sketch, preview`
+
+  - **Skip Scopes:**
+    - `Files`
+    - `Posts`
+    - `Both (Posts then Files)`
+
+- **File Type Filters:**
+  - `All`, `Images/GIFs`, `Videos`, `📦 Only Archives`, `🔗 Only Links`
+
+- **Filename Cleanup:**
+  - Remove illegal and unwanted characters or phrases
+
+---
+
+### Manga/Comic Mode (Creator Feeds Only)
+
+- **Chronological Processing** — Oldest posts first
+
+- **Filename Style Options:**
+  - `Name: Post Title (Default)`
+  - `Name: Original File`
+  - `Name: Date Based (New)`
+  - `Name: Title+G.Num (Post Title + Global Numbering)`
+
+- **Best With:** Character filters set to manga/series title
+
+---
+
+### Folder Structure & Naming
+
+- **Subfolders:**
+  - Auto-created based on character name, post title, or `Known.txt`
+
+  - "Subfolder per Post" option for further nesting
+
+- **Smart Naming:** Cleans invalid characters and structures logically
+
+---
+
+### Thumbnail & Compression Tools
+- **Download Thumbnails Only:**
+  - Downloads small preview images from the API instead of full-sized files (if available).
+  - **Interaction with "Scan Content for Images" (New in v4.1.1):** When "Download Thumbnails Only" is active, "Scan Content for Images" is auto-enabled, and only images found by the content scan are downloaded. See "What's New in v4.1.1" for details.
+- **Scan Content for Images (New in v4.1.1):**
+  - A UI option to scan the HTML content of posts for embedded image URLs (from `<img>` tags or direct links).
+  - Resolves relative paths and helps capture images not listed in the API's formal attachments.
+  - See the "What's New in v4.1.1?" section for a comprehensive explanation.
+- **Compress to WebP** (via Pillow)
+  - Converts large images to smaller WebP versions
+
+
+---
+
+###  Performance Features
+
+- **Multithreading:**
+  - For both post processing and file downloading
+
+- **Multi-part Downloads:**
+  - Toggleable in GUI
+  - Splits large files into chunks
+  - Granular chunk-level progress display
+
+---
+
+### Logging & Progress
+
+- **Real-time Logs:** Activity, errors, skipped posts
+
+- **Missed Character Log:** Shows skipped keywords in easy-to-read list
+
+- **External Links Log:** Shows links (unless disabled in some modes)
+
+- **Export Links:** Save `.txt` of links (Only Links mode)
+
+---
+
+###  Config System
+
+- **`Known.txt` for Smart Folder Naming (Located in App Directory):**
+  - A user-editable file that stores a list of preferred names, series titles, or keywords.
+  - It's primarily used as an intelligent fallback for folder creation when "Separate Folders by Name/Title" is enabled.
+  - **Syntax:**
+    - Simple entries: `My Favorite Series` (creates folder "My Favorite Series", matches "My Favorite Series").
+    - Grouped entries: `(Desired Folder Name, alias1, alias2)` (creates folder "Desired Folder Name"; matches "Desired Folder Name", "alias1", or "alias2").
+
+- **Settings Stored in App Directory**
+
+- **Editable Within GUI**
+
+---

 ## Installation

-1.  Clone or download this repository/script to your local machine.
-2.  Navigate to the script's directory in your terminal or command prompt.
-3.  Install the required Python libraries:
-    ```bash
-    pip install PyQt5 requests Pillow
-    ```
-    *(Pillow is required for image compression and potentially for basic image handling.)*
+---

-## How to Run
+### Requirements

-1.  Make sure you have followed the installation steps.
-2.  Open your terminal or command prompt and navigate to the script's directory.
-3.  Run the script using Python:
-    ```bash
-    python main.py
-    ```
+- Python 3.6 or higher  
+- pip

-## How to Use
+---

-1.  **URL Input:** Enter the URL of the Kemono/Coomer creator page (e.g., `https://kemono.su/patreon/user/12345`) or a specific post (e.g., `https://kemono.su/patreon/user/12345/post/67890`) into the "Kemono Creator/Post URL" field.
-2.  **Download Location:** Use the "Browse" button to select the root directory where you want to save the downloaded content.
-3.  **Custom Folder Name (Single Post Only):** If downloading a single post and "Separate Folders" is enabled, you can specify a custom folder name for that post's content.
-4.  **Filter by Show/Character Name (Optional):** If "Separate Folders" is enabled, enter a name from your "Known Names" list. Only posts with titles matching this name will be downloaded into a folder named accordingly. If empty, the script will try to match any known name or derive a folder name from the post title.
-5.  **Skip Posts/Files with Words:** Enter comma-separated words (e.g., `WIP, sketch, preview`). Posts or files containing these words in their title/filename will be skipped.
-6.  **File Type Filter:**
-    * **All:** Downloads all files.
-    * **Images/GIFs:** Downloads common image formats and GIFs.
-    * **Videos:** Downloads common video formats.
-7.  **Options (Checkboxes):**
-    * **Separate Folders by Name/Title:** Enables creation of subfolders based on known names or post titles. Controls visibility of "Filter by Show/Character Name" and "Custom Folder Name". (Default: On)
-    * **Download Thumbnails Only:** Attempts to download only thumbnails for posts. (Default: Off)
-    * **Skip .zip / Skip .rar:** Prevents downloading of these archive types. (Default: On)
-    * **Compress Large Images (to WebP):** Compresses images larger than 1.5MB. (Default: Off)
-    * **Use Multithreading:** Enables faster downloads for full creator pages. (Default: On)
-8.  **Known Names List:**
-    * The list on the left ("Known Shows/Characters") displays names used for folder organization and filtering. This list is saved in `Known.txt`.
-    * Use the input field below the list and the "➕ Add" button to add new names.
-    * Select names and click "🗑️ Delete Selected" to remove them.
-    * A search bar above the list allows you to filter the displayed names.
-9.  **Start Download:** Click "⬇️ Start Download" to begin.
-10. **Cancel / Skip:**
-    * **❌ Cancel:** Stops the entire download process.
-    * **⏭️ Skip Current File:** (Only in single-thread mode during file download) Skips the currently downloading file and moves to the next.
-11. **Progress Log:** The area on the right shows detailed logs of the download process, including fetched posts, saved files, skips, and errors.
+### Install Dependencies

-## Building an Executable (Optional)
+```bash
+pip install PyQt5 requests Pillow

-You can create a standalone `.exe` file for Windows using `PyInstaller`.
+```

-1.  Install PyInstaller: `pip install pyinstaller`
-2.  Obtain an icon file (`.ico`). Place it in the same directory as `main.py`.
-3.  Open your terminal in the script's directory and run:
-    ```bash
-    pyinstaller --name "YourAppName" --onefile --windowed --icon="your_icon.ico" main.py
-    ```
-    Replace `"YourAppName"` with your desired application name and `"your_icon.ico"` with the actual name of your icon file.
-4.  The executable will be found in the `./dist` folder.
+***

-## Configuration
+## ** Build a Standalone Executable (Optional)**

-The application saves your list of known names (characters, artists, series, etc.) to a file named `Known.txt` in the same directory as the script (`main.py`). Each name is stored on a new line. You can manually edit this file if needed.
+1. Install PyInstaller:
+```bash
+pip install pyinstaller
+```

-## Dark Theme
+2. Run:
+```bash
+pyinstaller --name "Kemono Downloader" --onefile --windowed --icon="Kemono.ico" main.py
+```

-The application uses a built-in dark theme for the user interface.
+3. Output will be in the `dist/` folder.

-## Contributing
+***

-Contributions are welcome! If you find a bug or have a feature request, please open an issue on the GitHub repository (if applicable). If you want to contribute code, please fork the repository and create a pull request.
+## ** Config Files**
+- `settings.json` — Stores your UI preferences and settings.
+- `Known.txt` — Stores character names, series titles, or keywords for organizing downloaded content into specific folders.
+  - Supports simple entries (e.g., `My Series`) and grouped entries for aliases (e.g., `(Folder Name, alias1, alias2)` where "Folder Name" is the name of the created folder, and all terms are used for matching).
+
+***
+
+## ** Feedback & Support**
+
+Issues? Suggestions?  
+Open an issue on the [GitHub repository](https://github.com/Yuvi9587/kemono-downloader) or join our community.
Author	SHA1	Message	Date
Yuvi9587	f774773b63	Commit	2025-05-27 20:34:38 +05:30
Yuvi9587	8036cb9835	Commit	2025-05-26 20:37:37 +05:30
Yuvi9587	13fc33d2c0	Commit	2025-05-26 09:33:45 +05:30
Yuvi9587	8663ef54a3	Commit	2025-05-26 08:43:13 +05:30
Yuvi9587	0316813792	Delete dist directory	2025-05-26 13:55:54 +05:30
Yuvi9587	d201a5396c	Delete build/Kemono Downloader directory	2025-05-26 13:55:25 +05:30
Yuvi9587	86f9396b6c	Commit	2025-05-26 13:52:34 +05:30
Yuvi9587	0fb4bb3cb0	Commit	2025-05-26 13:52:07 +05:30
Yuvi9587	1528d7ce25	Update Read.png	2025-05-26 09:54:26 +05:30
Yuvi9587	4e7eeb7989	Commit	2025-05-26 09:52:06 +05:30
Yuvi9587	7f2976a4f4	Commit	2025-05-26 09:48:00 +05:30
Yuvi9587	8928cb92da	readme.md	2025-05-26 01:39:39 +05:30
Yuvi9587	a181b76124	Update main.py	2025-05-25 17:18:11 +05:30
Yuvi9587	8f085a8f63	Commit	2025-05-25 21:52:04 +05:30
Yuvi9587	93a997351b	Update readme.md	2025-05-25 21:22:47 +05:30
Yuvi9587	b3af6c1c15	Commit	2025-05-25 21:21:00 +05:30
Yuvi9587	4a65263f7d	Commit	2025-05-25 19:49:17 +05:30
Yuvi9587	1091b5b9b4	Commit	2025-05-25 19:48:08 +05:30
Yuvi9587	f6b3ff2f5c	Update main.py	2025-05-25 11:36:35 +05:30
Yuvi9587	b399bdf5cf	readme.md	2025-05-25 16:54:35 +05:30
Yuvi9587	9ace161bc8	Update downloader_utils.py	2025-05-25 11:22:04 +05:30
Yuvi9587	66e52cfd78	Commit	2025-05-25 12:27:15 +05:30
Yuvi9587	e665fd3cde	Commit	2025-05-25 11:38:38 +05:30
Yuvi9587	fc94f4c691	Commit	2025-05-24 22:55:23 +05:30
Yuvi9587	78e2012f04	Commit	2025-05-24 13:30:06 +05:30
Yuvi9587	3fe9dbacc6	Commit	2025-05-24 13:15:08 +05:30
Yuvi9587	004dea06e0	Commit	2025-05-24 16:22:47 +05:30
Yuvi9587	8994a69c34	Add files via upload	2025-05-24 10:36:15 +05:30
Yuvi9587	f4a692673e	main.py	2025-05-24 10:35:46 +05:30
Yuvi9587	4cb5f14ef6	Delete Known.txt	2025-05-23 21:01:05 +05:30
Yuvi9587	a596c4f350	Update main.py	2025-05-23 20:59:35 +05:30
Yuvi9587	e091c60d29	Commit	2025-05-23 20:23:36 +05:30
Yuvi9587	d2ea026a41	Commit	2025-05-23 19:11:52 +05:30
Yuvi9587	bb3d5c20f5	Commit	2025-05-23 18:24:42 +05:30
Yuvi9587	a13eae8f16	Commit	2025-05-23 18:19:30 +05:30
Yuvi9587	7e5dc71720	Commit	2025-05-23 18:06:47 +05:30
Yuvi9587	d7960bbb85	Commit	2025-05-23 17:22:54 +05:30
Yuvi9587	c4d5ba3040	Commit	2025-05-22 07:40:10 +05:30
Yuvi9587	fd84de7bce	Commit	2025-05-22 07:03:05 +05:30
Yuvi9587	a6383b20a4	Commit	2025-05-21 17:20:16 +05:30
Yuvi9587	651f9d9f8d	Update main.py	2025-05-18 16:17:40 +05:30
Yuvi9587	decef6730f	Commit	2025-05-18 16:12:19 +05:30
Yuvi9587	32a12e8a09	Commit	2025-05-17 11:41:43 +05:30
Yuvi9587	62007d2d45	Update readme.md	2025-05-16 16:08:48 +05:30
Yuvi9587	f1e592cf99	Update readme.md	2025-05-16 12:50:32 +05:30
Yuvi9587	bf111d109a	Update main.py	2025-05-16 11:37:43 +05:30
Yuvi9587	00f8ff63d6	Commit	2025-05-16 11:23:37 +05:30
Yuvi9587	aee0ff999d	Commit	2025-05-15 08:45:32 +05:30
Yuvi9587	b5e9080285	Commit	2025-05-14 16:26:18 +05:30
Yuvi9587	25d33f1531	readme.md	2025-05-13 21:38:55 +05:30
Yuvi9587	ff0ccb2631	Commit	2025-05-13 07:31:09 +05:30
Yuvi9587	da507b2b3a	Commit	2025-05-12 18:37:11 +05:30
Yuvi9587	9165903e96	Update main.py	2025-05-12 10:54:57 +05:30
Yuvi9587	f85de58fcb	Commit	2025-05-12 10:54:31 +05:30
Yuvi9587	ccfb8496a2	Commit	2025-05-11 15:55:21 +05:30
Yuvi9587	e0d3e1b5af	commit	2025-05-10 23:59:00 +05:30
Yuvi9587	50ee50cd5c	readme.md	2025-05-10 12:16:45 +05:30
Yuvi9587	8982026d79	Commit	2025-05-10 11:11:35 +05:30
Yuvi9587	aec44f1782	Commit	2025-05-10 11:07:27 +05:30
Yuvi9587	866a5a90de	Commit	2025-05-09 19:03:01 +05:30
Yuvi9587	929051d46c	Commit	2025-05-08 22:13:12 +05:30
Yuvi9587	eada5057b7	Add MIT License	2025-05-08 22:10:35 +05:30
Yuvi9587	fe0b369446	Update readme.md	2025-05-08 20:24:22 +05:30
Yuvi9587	c0c2db709b	Commit	2025-05-08 19:49:50 +05:30