mirror of
https://github.com/Yuvi9587/Kemono-Downloader.git
synced 2025-12-29 16:14:44 +00:00
Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f774773b63 | ||
|
|
8036cb9835 | ||
|
|
13fc33d2c0 | ||
|
|
8663ef54a3 |
@@ -305,19 +305,43 @@ def prepare_cookies_for_request(use_cookie_flag, cookie_text_input, selected_coo
|
|||||||
if not use_cookie_flag:
|
if not use_cookie_flag:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if cookie_text_input:
|
# Attempt 1: Selected cookie file
|
||||||
logger_func(" 🍪 Using cookies from UI text input.")
|
if selected_cookie_file_path:
|
||||||
return parse_cookie_string(cookie_text_input)
|
|
||||||
elif selected_cookie_file_path:
|
|
||||||
logger_func(f" 🍪 Attempting to load cookies from selected file: '{os.path.basename(selected_cookie_file_path)}'...")
|
logger_func(f" 🍪 Attempting to load cookies from selected file: '{os.path.basename(selected_cookie_file_path)}'...")
|
||||||
return load_cookies_from_netscape_file(selected_cookie_file_path, logger_func)
|
cookies = load_cookies_from_netscape_file(selected_cookie_file_path, logger_func)
|
||||||
elif app_base_dir:
|
if cookies:
|
||||||
cookies_filepath = os.path.join(app_base_dir, "cookies.txt")
|
return cookies
|
||||||
logger_func(f" 🍪 No UI text or specific file selected. Attempting to load default '{os.path.basename(cookies_filepath)}' from app directory...")
|
else:
|
||||||
return load_cookies_from_netscape_file(cookies_filepath, logger_func)
|
logger_func(f" ⚠️ Failed to load cookies from selected file: '{os.path.basename(selected_cookie_file_path)}'. Trying other methods.")
|
||||||
else:
|
# Fall through if selected file is invalid or not found
|
||||||
logger_func(" 🍪 Cookie usage enabled, but no text input, specific file, or app base directory provided for cookies.txt.")
|
|
||||||
return None
|
# Attempt 2: Default cookies.txt in app directory
|
||||||
|
# This is tried if no specific file was selected OR if the selected file was provided but failed to load.
|
||||||
|
if app_base_dir: # Only proceed if app_base_dir is available
|
||||||
|
# Avoid re-logging "not found" or "failed" if a selected_cookie_file_path was already attempted and failed.
|
||||||
|
# Only log the attempt for default if no selected_cookie_file_path was given.
|
||||||
|
default_cookies_path = os.path.join(app_base_dir, "cookies.txt")
|
||||||
|
if os.path.exists(default_cookies_path): # Only attempt if it exists
|
||||||
|
if not selected_cookie_file_path: # Log attempt only if we didn't just try a selected file
|
||||||
|
logger_func(f" 🍪 No specific file selected. Attempting to load default '{os.path.basename(default_cookies_path)}' from app directory...")
|
||||||
|
cookies = load_cookies_from_netscape_file(default_cookies_path, logger_func)
|
||||||
|
if cookies:
|
||||||
|
return cookies
|
||||||
|
elif not selected_cookie_file_path: # Log failure only if we tried default as primary file method
|
||||||
|
logger_func(f" ⚠️ Failed to load cookies from default file: '{os.path.basename(default_cookies_path)}'. Trying text input.")
|
||||||
|
# Fall through if default file is invalid or not found
|
||||||
|
|
||||||
|
# Attempt 3: Cookies from UI text input
|
||||||
|
if cookie_text_input:
|
||||||
|
logger_func(" 🍪 Using cookies from UI text input (as file methods failed or were not applicable).")
|
||||||
|
cookies = parse_cookie_string(cookie_text_input)
|
||||||
|
if cookies:
|
||||||
|
return cookies
|
||||||
|
else:
|
||||||
|
logger_func(" ⚠️ UI cookie text input was provided but was empty or invalid.")
|
||||||
|
|
||||||
|
logger_func(" 🍪 Cookie usage enabled, but no valid cookies found from any source (selected file, default file, or text input).")
|
||||||
|
return None
|
||||||
|
|
||||||
def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
|
def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
|
||||||
if cancellation_event and cancellation_event.is_set(): # type: ignore
|
if cancellation_event and cancellation_event.is_set(): # type: ignore
|
||||||
@@ -645,6 +669,7 @@ class PostProcessorWorker:
|
|||||||
allow_multipart_download=True,
|
allow_multipart_download=True,
|
||||||
cookie_text="", # Added missing parameter
|
cookie_text="", # Added missing parameter
|
||||||
use_cookie=False, # Added missing parameter
|
use_cookie=False, # Added missing parameter
|
||||||
|
override_output_dir=None, # New parameter
|
||||||
selected_cookie_file=None, # Added missing parameter
|
selected_cookie_file=None, # Added missing parameter
|
||||||
app_base_dir=None, # New parameter for app's base directory
|
app_base_dir=None, # New parameter for app's base directory
|
||||||
manga_date_prefix=MANGA_DATE_PREFIX_DEFAULT, # New parameter for date-based prefix
|
manga_date_prefix=MANGA_DATE_PREFIX_DEFAULT, # New parameter for date-based prefix
|
||||||
@@ -652,7 +677,7 @@ class PostProcessorWorker:
|
|||||||
scan_content_for_images=False, # New flag for scanning HTML content
|
scan_content_for_images=False, # New flag for scanning HTML content
|
||||||
manga_global_file_counter_ref=None, # New parameter for global numbering
|
manga_global_file_counter_ref=None, # New parameter for global numbering
|
||||||
): # type: ignore
|
): # type: ignore
|
||||||
self.post = post_data
|
self.post = post_data # type: ignore
|
||||||
self.download_root = download_root
|
self.download_root = download_root
|
||||||
self.known_names = known_names
|
self.known_names = known_names
|
||||||
self.filter_character_list_objects_initial = filter_character_list if filter_character_list else [] # Store initial
|
self.filter_character_list_objects_initial = filter_character_list if filter_character_list else [] # Store initial
|
||||||
@@ -700,9 +725,11 @@ class PostProcessorWorker:
|
|||||||
self.manga_date_prefix = manga_date_prefix # Store the prefix
|
self.manga_date_prefix = manga_date_prefix # Store the prefix
|
||||||
self.manga_global_file_counter_ref = manga_global_file_counter_ref # Store global counter
|
self.manga_global_file_counter_ref = manga_global_file_counter_ref # Store global counter
|
||||||
self.use_cookie = use_cookie # Store cookie setting
|
self.use_cookie = use_cookie # Store cookie setting
|
||||||
|
self.override_output_dir = override_output_dir # Store the override directory
|
||||||
self.scan_content_for_images = scan_content_for_images # Store new flag
|
self.scan_content_for_images = scan_content_for_images # Store new flag
|
||||||
|
|
||||||
if self.compress_images and Image is None:
|
if self.compress_images and Image is None:
|
||||||
|
# type: ignore
|
||||||
self.logger("⚠️ Image compression disabled: Pillow library not found.")
|
self.logger("⚠️ Image compression disabled: Pillow library not found.")
|
||||||
self.compress_images = False
|
self.compress_images = False
|
||||||
|
|
||||||
@@ -723,9 +750,9 @@ class PostProcessorWorker:
|
|||||||
return self.cancellation_event.is_set()
|
return self.cancellation_event.is_set()
|
||||||
|
|
||||||
def _check_pause(self, context_message="Operation"):
|
def _check_pause(self, context_message="Operation"):
|
||||||
if self.pause_event and self.pause_event.is_set():
|
if self.pause_event and self.pause_event.is_set(): # type: ignore
|
||||||
self.logger(f" {context_message} paused...")
|
self.logger(f" {context_message} paused...")
|
||||||
while self.pause_event.is_set(): # Loop while pause_event is set
|
while self.pause_event.is_set(): # type: ignore # Loop while pause_event is set
|
||||||
if self.check_cancel():
|
if self.check_cancel():
|
||||||
self.logger(f" {context_message} cancelled while paused.")
|
self.logger(f" {context_message} cancelled while paused.")
|
||||||
return True # Indicates cancellation occurred
|
return True # Indicates cancellation occurred
|
||||||
@@ -1341,7 +1368,7 @@ class PostProcessorWorker:
|
|||||||
self.logger(f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check}' contains '{matched_skip}'.")
|
self.logger(f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check}' contains '{matched_skip}'.")
|
||||||
return 0, num_potential_files_in_post, [], []
|
return 0, num_potential_files_in_post, [], []
|
||||||
|
|
||||||
if (self.show_external_links or self.extract_links_only) and post_content_html:
|
if (self.show_external_links or self.extract_links_only) and post_content_html: # type: ignore
|
||||||
if self._check_pause(f"External link extraction for post {post_id}"): return 0, num_potential_files_in_post, [], []
|
if self._check_pause(f"External link extraction for post {post_id}"): return 0, num_potential_files_in_post, [], []
|
||||||
try:
|
try:
|
||||||
unique_links_data = {}
|
unique_links_data = {}
|
||||||
@@ -1597,7 +1624,7 @@ class PostProcessorWorker:
|
|||||||
total_skipped_this_post += 1
|
total_skipped_this_post += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
current_path_for_file = self.download_root
|
current_path_for_file = self.override_output_dir if self.override_output_dir else self.download_root # Use override if provided
|
||||||
|
|
||||||
if self.use_subfolders:
|
if self.use_subfolders:
|
||||||
char_title_subfolder_name = None
|
char_title_subfolder_name = None
|
||||||
@@ -1704,6 +1731,7 @@ class DownloadThread(QThread):
|
|||||||
manga_date_prefix=MANGA_DATE_PREFIX_DEFAULT, # New parameter
|
manga_date_prefix=MANGA_DATE_PREFIX_DEFAULT, # New parameter
|
||||||
allow_multipart_download=True,
|
allow_multipart_download=True,
|
||||||
selected_cookie_file=None, # New parameter for selected cookie file
|
selected_cookie_file=None, # New parameter for selected cookie file
|
||||||
|
override_output_dir=None, # New parameter
|
||||||
app_base_dir=None, # New parameter
|
app_base_dir=None, # New parameter
|
||||||
manga_date_file_counter_ref=None, # New parameter
|
manga_date_file_counter_ref=None, # New parameter
|
||||||
manga_global_file_counter_ref=None, # New parameter for global numbering
|
manga_global_file_counter_ref=None, # New parameter for global numbering
|
||||||
@@ -1714,7 +1742,7 @@ class DownloadThread(QThread):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
self.api_url_input = api_url_input
|
self.api_url_input = api_url_input
|
||||||
self.output_dir = output_dir
|
self.output_dir = output_dir
|
||||||
self.known_names = list(known_names_copy)
|
self.known_names = list(known_names_copy) # type: ignore
|
||||||
self.cancellation_event = cancellation_event
|
self.cancellation_event = cancellation_event
|
||||||
self.pause_event = pause_event # Store pause_event
|
self.pause_event = pause_event # Store pause_event
|
||||||
self.skip_current_file_flag = skip_current_file_flag
|
self.skip_current_file_flag = skip_current_file_flag
|
||||||
@@ -1758,6 +1786,7 @@ class DownloadThread(QThread):
|
|||||||
self.app_base_dir = app_base_dir # Store app base dir
|
self.app_base_dir = app_base_dir # Store app base dir
|
||||||
self.cookie_text = cookie_text # Store cookie text
|
self.cookie_text = cookie_text # Store cookie text
|
||||||
self.use_cookie = use_cookie # Store cookie setting
|
self.use_cookie = use_cookie # Store cookie setting
|
||||||
|
self.override_output_dir = override_output_dir # Store override dir
|
||||||
self.manga_date_file_counter_ref = manga_date_file_counter_ref # Store for passing to worker by DownloadThread
|
self.manga_date_file_counter_ref = manga_date_file_counter_ref # Store for passing to worker by DownloadThread
|
||||||
self.scan_content_for_images = scan_content_for_images # Store new flag
|
self.scan_content_for_images = scan_content_for_images # Store new flag
|
||||||
self.manga_global_file_counter_ref = manga_global_file_counter_ref # Store for global numbering
|
self.manga_global_file_counter_ref = manga_global_file_counter_ref # Store for global numbering
|
||||||
@@ -1890,6 +1919,7 @@ class DownloadThread(QThread):
|
|||||||
selected_cookie_file=self.selected_cookie_file, # Pass selected cookie file
|
selected_cookie_file=self.selected_cookie_file, # Pass selected cookie file
|
||||||
app_base_dir=self.app_base_dir, # Pass app_base_dir
|
app_base_dir=self.app_base_dir, # Pass app_base_dir
|
||||||
cookie_text=self.cookie_text, # Pass cookie text
|
cookie_text=self.cookie_text, # Pass cookie text
|
||||||
|
override_output_dir=self.override_output_dir, # Pass override dir
|
||||||
manga_global_file_counter_ref=self.manga_global_file_counter_ref, # Pass the ref
|
manga_global_file_counter_ref=self.manga_global_file_counter_ref, # Pass the ref
|
||||||
use_cookie=self.use_cookie, # Pass cookie setting to worker
|
use_cookie=self.use_cookie, # Pass cookie setting to worker
|
||||||
manga_date_file_counter_ref=current_manga_date_file_counter_ref, # Pass the calculated or passed-in ref
|
manga_date_file_counter_ref=current_manga_date_file_counter_ref, # Pass the calculated or passed-in ref
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import hashlib
|
|||||||
import http.client
|
import http.client
|
||||||
import traceback
|
import traceback
|
||||||
import threading
|
import threading
|
||||||
|
import queue # Import the missing 'queue' module
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
|
||||||
CHUNK_DOWNLOAD_RETRY_DELAY = 2 # Slightly reduced for faster retries if needed
|
CHUNK_DOWNLOAD_RETRY_DELAY = 2 # Slightly reduced for faster retries if needed
|
||||||
|
|||||||
34
readme.md
34
readme.md
@@ -11,28 +11,44 @@ Built with **PyQt5**, this tool is ideal for users who want deep filtering, cust
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## What's New in v4.1.1?
|
## What's New in v4.1.1? - Enhanced Image Discovery & Audio Filtering
|
||||||
|
|
||||||
Version 4.1.1 introduces a smarter way to capture images that might be embedded directly within post descriptions, enhancing content discovery.
|
Version 4.1.1 brings significant enhancements, including smarter image capture from post content and a dedicated filter mode for audio files.
|
||||||
|
|
||||||
### "Scan Content for Images" Feature
|
### "Scan Content for Images" Feature
|
||||||
|
|
||||||
- **Enhanced Image Discovery:** A new checkbox, "**Scan Content for Images**," has been added to the UI (grouped with "Download Thumbnails Only" and "Compress Large Images").
|
- **Enhanced Image Discovery:** A new checkbox, "**Scan Content for Images**," has been added to the UI (grouped with "Download Thumbnails Only" and "Compress Large Images").
|
||||||
- **How it Works:**
|
- **How it Works:**
|
||||||
- When enabled, the downloader scans the HTML content of posts (e.g., the description area).
|
- When enabled, the downloader meticulously scans the HTML content of each post's description or body.
|
||||||
- It looks for images embedded via HTML `<img>` tags or as direct absolute URL links (e.g., `https://.../image.png`).
|
- It searches for images in two main ways:
|
||||||
- It intelligently resolves relative image paths found in `<img>` tags (like `/data/image.jpg`) into full, downloadable URLs.
|
- **Directly linked absolute URLs** (e.g., `https://externalsite.com/image.png`) that end with a common image extension (jpg, png, gif, etc.).
|
||||||
|
- **Images embedded using HTML `<img>` tags.** The downloader extracts the `src` attribute from these tags and can resolve various path types:
|
||||||
|
- Absolute URLs (e.g., `http://...` or `https://...`)
|
||||||
|
- Protocol-relative URLs (e.g., `//cdn.example.com/image.jpg`)
|
||||||
|
- Root-relative paths (e.g., `/data/user_content/image.gif`), which are resolved against the site's base URL (like `https://kemono.su/data/user_content/image.gif`).
|
||||||
- This is particularly useful for capturing images that are part of the post's narrative but not formally listed in the API's file or attachment sections.
|
- This is particularly useful for capturing images that are part of the post's narrative but not formally listed in the API's file or attachment sections.
|
||||||
- **Default State:** This option is **unchecked by default**.
|
- **Default State:** This option is **unchecked by default**.
|
||||||
- **Interaction with "Download Thumbnails Only":**
|
- **Key Interaction with "Download Thumbnails Only":** This new feature works closely with the existing "Download Thumbnails Only" option:
|
||||||
- If you check "Download Thumbnails Only":
|
- If you enable "Download Thumbnails Only":
|
||||||
- The "Scan Content for Images" checkbox will **automatically become checked and disabled** (locked).
|
- The "Scan Content for Images" checkbox will **automatically become checked and disabled** (locked).
|
||||||
- In this combined mode, the downloader will **only download images found by the content scan**. API-listed thumbnails will be ignored, prioritizing images from the post's body.
|
- The downloader then **exclusively downloads images discovered through the content scan**. Any API-listed thumbnails are bypassed, giving priority to images embedded directly in the post.
|
||||||
- If you uncheck "Download Thumbnails Only":
|
- If you disable "Download Thumbnails Only":
|
||||||
- The "Scan Content for Images" checkbox will become **enabled again and revert to being unchecked**. You can then manually enable it if you wish to scan content without being in thumbnail-only mode.
|
- The "Scan Content for Images" checkbox will become **enabled again and revert to being unchecked**. You can then manually enable it if you wish to scan content without being in thumbnail-only mode.
|
||||||
|
|
||||||
This feature ensures a more comprehensive download experience, especially for posts where images are integrated directly into the text.
|
This feature ensures a more comprehensive download experience, especially for posts where images are integrated directly into the text.
|
||||||
|
|
||||||
|
### New "🎧 Only Audio" Filter Mode
|
||||||
|
|
||||||
|
Alongside image discovery, v4.1.1 also introduces/enhances a dedicated filter mode for audio enthusiasts:
|
||||||
|
|
||||||
|
- **Focused Audio Downloads:** The "🎧 Only Audio" option in the "Filter Files" radio button group allows you to download exclusively common audio file types. This includes formats like MP3, WAV, FLAC, M4A, OGG, and more.
|
||||||
|
- **Streamlined UI:** When "🎧 Only Audio" mode is active:
|
||||||
|
- Irrelevant UI options such as the "Skip Scope" button (for word-based post/file skipping) and the "Multi-part Download" toggle are hidden to simplify the interface.
|
||||||
|
- The "Show External Links in Log" checkbox is automatically disabled, as link extraction is not the focus of this mode.
|
||||||
|
- **Archive Handling:** Unlike the "📦 Only Archives" mode (which disables archive skipping), the "Skip .zip" and "Skip .rar" checkboxes remain enabled and configurable when "🎧 Only Audio" is selected. This gives you the flexibility to also exclude any archives encountered while in audio-only mode if desired.
|
||||||
|
- **Purpose:** This mode is perfect for users who primarily want to collect audio tracks, podcasts, or sound effects from posts without downloading other media types.
|
||||||
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Previous Update: What's New in v4.0.1?
|
## Previous Update: What's New in v4.0.1?
|
||||||
|
|||||||
Reference in New Issue
Block a user