mirror of
https://github.com/Yuvi9587/Kemono-Downloader.git
synced 2025-12-29 16:14:44 +00:00
Compare commits
2 Commits
9cd48bb63a
...
v6.5.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b0a6c264e1 | ||
|
|
d9364f4f91 |
@@ -12,7 +12,6 @@ from ..config.constants import (
|
||||
def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
|
||||
"""
|
||||
Fetches a single page of posts from the API with robust retry logic.
|
||||
NEW: Requests only essential fields to keep the response size small and reliable.
|
||||
"""
|
||||
if cancellation_event and cancellation_event.is_set():
|
||||
raise RuntimeError("Fetch operation cancelled by user.")
|
||||
@@ -33,7 +32,7 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
|
||||
if cancellation_event and cancellation_event.is_set():
|
||||
raise RuntimeError("Fetch operation cancelled by user during retry loop.")
|
||||
|
||||
log_message = f" Fetching post list: {api_url_base}?o={offset} (Page approx. {offset // 50 + 1})"
|
||||
log_message = f" Fetching post list: {paginated_url} (Page approx. {offset // 50 + 1})"
|
||||
if attempt > 0:
|
||||
log_message += f" (Attempt {attempt + 1}/{max_retries})"
|
||||
logger(log_message)
|
||||
@@ -41,9 +40,23 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
|
||||
try:
|
||||
response = requests.get(paginated_url, headers=headers, timeout=(15, 60), cookies=cookies_dict)
|
||||
response.raise_for_status()
|
||||
response.encoding = 'utf-8'
|
||||
return response.json()
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
# Handle 403 error on the FIRST page as a rate limit/block
|
||||
if e.response is not None and e.response.status_code == 403 and offset == 0:
|
||||
logger(" ❌ Access Denied (403 Forbidden) on the first page.")
|
||||
logger(" This is likely a rate limit or a Cloudflare block.")
|
||||
logger(" 💡 SOLUTION: Wait a while, use a VPN, or provide a valid session cookie.")
|
||||
return [] # Stop the process gracefully
|
||||
|
||||
# Handle 400 error as the end of pages
|
||||
if e.response is not None and e.response.status_code == 400:
|
||||
logger(f" ✅ Reached end of posts (API returned 400 Bad Request for offset {offset}).")
|
||||
return []
|
||||
|
||||
# Handle all other network errors with a retry
|
||||
logger(f" ⚠️ Retryable network error on page fetch (Attempt {attempt + 1}): {e}")
|
||||
if attempt < max_retries - 1:
|
||||
delay = retry_delay * (2 ** attempt)
|
||||
@@ -65,7 +78,6 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
|
||||
|
||||
raise RuntimeError(f"Failed to fetch page {paginated_url} after all attempts.")
|
||||
|
||||
|
||||
def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logger, cookies_dict=None):
|
||||
"""
|
||||
--- NEW FUNCTION ---
|
||||
@@ -81,8 +93,11 @@ def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logge
|
||||
response_body += chunk
|
||||
|
||||
full_post_data = json.loads(response_body)
|
||||
|
||||
if isinstance(full_post_data, list) and full_post_data:
|
||||
return full_post_data[0]
|
||||
if isinstance(full_post_data, dict) and 'post' in full_post_data:
|
||||
return full_post_data['post']
|
||||
return full_post_data
|
||||
|
||||
except Exception as e:
|
||||
@@ -101,6 +116,7 @@ def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger,
|
||||
try:
|
||||
response = requests.get(comments_api_url, headers=headers, timeout=(10, 30), cookies=cookies_dict)
|
||||
response.raise_for_status()
|
||||
response.encoding = 'utf-8'
|
||||
return response.json()
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise RuntimeError(f"Error fetching comments for post {post_id}: {e}")
|
||||
@@ -123,10 +139,16 @@ def download_from_api(
|
||||
processed_post_ids=None,
|
||||
fetch_all_first=False
|
||||
):
|
||||
# FIX: Define api_domain FIRST, before it is used in the headers
|
||||
parsed_input_url_for_domain = urlparse(api_url_input)
|
||||
api_domain = parsed_input_url_for_domain.netloc
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0',
|
||||
'Accept': 'application/json'
|
||||
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
|
||||
'Referer': f'https://{api_domain}/',
|
||||
'Accept': 'text/css'
|
||||
}
|
||||
|
||||
if processed_post_ids is None:
|
||||
processed_post_ids = set()
|
||||
else:
|
||||
@@ -138,15 +160,11 @@ def download_from_api(
|
||||
logger(" Download_from_api cancelled at start.")
|
||||
return
|
||||
|
||||
parsed_input_url_for_domain = urlparse(api_url_input)
|
||||
api_domain = parsed_input_url_for_domain.netloc
|
||||
# The code that defined api_domain was moved from here to the top of the function
|
||||
|
||||
# --- START: MODIFIED LOGIC ---
|
||||
# This list is updated to include the new .cr and .st mirrors for validation.
|
||||
if not any(d in api_domain.lower() for d in ['kemono.su', 'kemono.party', 'kemono.cr', 'coomer.su', 'coomer.party', 'coomer.st']):
|
||||
logger(f"⚠️ Unrecognized domain '{api_domain}' from input URL. Defaulting to kemono.su for API calls.")
|
||||
api_domain = "kemono.su"
|
||||
# --- END: MODIFIED LOGIC ---
|
||||
|
||||
cookies_for_api = None
|
||||
if use_cookie and app_base_dir:
|
||||
@@ -160,6 +178,7 @@ def download_from_api(
|
||||
try:
|
||||
direct_response = requests.get(direct_post_api_url, headers=headers, timeout=(10, 30), cookies=cookies_for_api)
|
||||
direct_response.raise_for_status()
|
||||
direct_response.encoding = 'utf-8'
|
||||
direct_post_data = direct_response.json()
|
||||
if isinstance(direct_post_data, list) and direct_post_data:
|
||||
direct_post_data = direct_post_data[0]
|
||||
@@ -356,3 +375,4 @@ def download_from_api(
|
||||
time.sleep(0.6)
|
||||
if target_post_id and not processed_target_post_flag and not (cancellation_event and cancellation_event.is_set()):
|
||||
logger(f"❌ Target post {target_post_id} could not be found after checking all relevant pages (final check after loop).")
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ try:
|
||||
except ImportError:
|
||||
Document = None
|
||||
from PyQt5 .QtCore import Qt ,QThread ,pyqtSignal ,QMutex ,QMutexLocker ,QObject ,QTimer ,QSettings ,QStandardPaths ,QCoreApplication ,QUrl ,QSize ,QProcess
|
||||
from .api_client import download_from_api, fetch_post_comments
|
||||
from .api_client import download_from_api, fetch_post_comments, fetch_single_post_data
|
||||
from ..services.multipart_downloader import download_file_in_parts, MULTIPART_DOWNLOADER_AVAILABLE
|
||||
from ..services.drive_downloader import (
|
||||
download_mega_file, download_gdrive_file, download_dropbox_file
|
||||
@@ -270,7 +270,7 @@ class PostProcessorWorker:
|
||||
return 0, 1, "", False, FILE_DOWNLOAD_STATUS_SKIPPED, None
|
||||
|
||||
file_download_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
|
||||
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
|
||||
'Referer': post_page_url
|
||||
}
|
||||
|
||||
@@ -876,6 +876,37 @@ class PostProcessorWorker:
|
||||
post_data = self.post # Reference to the post object
|
||||
log_prefix = "Post"
|
||||
|
||||
# --- FIX: FETCH FULL POST DATA IF CONTENT IS MISSING BUT NEEDED ---
|
||||
content_is_needed = (
|
||||
self.show_external_links or
|
||||
self.extract_links_only or
|
||||
self.scan_content_for_images or
|
||||
(self.filter_mode == 'text_only' and self.text_only_scope == 'content')
|
||||
)
|
||||
|
||||
if content_is_needed and self.post.get('content') is None and self.service != 'discord':
|
||||
self.logger(f" Post {post_id} is missing 'content' field, fetching full data...")
|
||||
parsed_url = urlparse(self.api_url_input)
|
||||
api_domain = parsed_url.netloc
|
||||
headers = {'User-Agent': 'Mozilla/5.0'}
|
||||
cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain)
|
||||
|
||||
full_post_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies)
|
||||
|
||||
if full_post_data:
|
||||
self.logger(" ✅ Full post data fetched successfully.")
|
||||
# Update the worker's post object with the complete data
|
||||
self.post = full_post_data
|
||||
# Re-initialize local variables from the new, complete post data
|
||||
post_title = self.post.get('title', '') or 'untitled_post'
|
||||
post_main_file_info = self.post.get('file')
|
||||
post_attachments = self.post.get('attachments', [])
|
||||
post_content_html = self.post.get('content', '')
|
||||
post_data = self.post
|
||||
else:
|
||||
self.logger(f" ⚠️ Failed to fetch full content for post {post_id}. Content-dependent features may not work for this post.")
|
||||
# --- END FIX ---
|
||||
|
||||
# 2. SHARED PROCESSING LOGIC: The rest of the function now uses the consistent variables from above.
|
||||
result_tuple = (0, 0, [], [], [], None, None)
|
||||
total_downloaded_this_post = 0
|
||||
@@ -1286,7 +1317,6 @@ class PostProcessorWorker:
|
||||
parsed_url = urlparse(self.api_url_input)
|
||||
api_domain = parsed_url.netloc
|
||||
cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain)
|
||||
from .api_client import fetch_single_post_data
|
||||
full_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies)
|
||||
if full_data:
|
||||
final_post_data = full_data
|
||||
|
||||
@@ -281,7 +281,7 @@ class DownloaderApp (QWidget ):
|
||||
self.download_location_label_widget = None
|
||||
self.remove_from_filename_label_widget = None
|
||||
self.skip_words_label_widget = None
|
||||
self.setWindowTitle("Kemono Downloader v6.4.1")
|
||||
self.setWindowTitle("Kemono Downloader v6.4.3")
|
||||
setup_ui(self)
|
||||
self._connect_signals()
|
||||
self.log_signal.emit("ℹ️ Local API server functionality has been removed.")
|
||||
@@ -688,8 +688,12 @@ class DownloaderApp (QWidget ):
|
||||
return
|
||||
|
||||
self.fetched_posts_for_download = fetched_posts
|
||||
self.is_ready_to_download_fetched = True # <-- ADD THIS LINE
|
||||
self.is_ready_to_download_fetched = True
|
||||
self.log_signal.emit(f"✅ Fetch complete. Found {len(self.fetched_posts_for_download)} posts.")
|
||||
self.log_signal.emit("=" * 40)
|
||||
self.log_signal.emit("✅ Stage 1 complete. All post data has been fetched.")
|
||||
self.log_signal.emit(" 💡 You can now disconnect your VPN (if used) before starting the download.")
|
||||
self.log_signal.emit(" Press the 'Start Download' button to begin Stage 2: Downloading files.")
|
||||
self.progress_label.setText(f"Found {len(self.fetched_posts_for_download)} posts. Ready to download.")
|
||||
|
||||
self._update_button_states_and_connections()
|
||||
@@ -700,7 +704,9 @@ class DownloaderApp (QWidget ):
|
||||
Initiates the download of the posts that were previously fetched.
|
||||
"""
|
||||
self.is_ready_to_download_fetched = False # Reset the state flag
|
||||
self.log_signal.emit(f"🚀 Starting download of {len(self.fetched_posts_for_download)} fetched posts...")
|
||||
self.log_signal.emit("=" * 40)
|
||||
self.log_signal.emit(f"🚀 Starting Stage 2: Downloading files for {len(self.fetched_posts_for_download)} fetched posts.")
|
||||
self.log_signal.emit(" 💡 If you disconnected your VPN, downloads will now use your regular connection.")
|
||||
|
||||
# Manually set the UI to a "downloading" state for reliability
|
||||
self.set_ui_enabled(False)
|
||||
@@ -3954,7 +3960,9 @@ class DownloaderApp (QWidget ):
|
||||
self.last_start_download_args = args_template.copy()
|
||||
|
||||
if fetch_first_enabled and not post_id_from_url:
|
||||
self.log_signal.emit("🚀 Starting Stage 1: Fetching all pages...")
|
||||
self.log_signal.emit("=" * 40)
|
||||
self.log_signal.emit("🚀 'Fetch First' mode is active. Starting Stage 1: Fetching all post data.")
|
||||
self.log_signal.emit(" 💡 If you are using a VPN for this stage, ensure it is connected now.")
|
||||
self.is_fetching_only = True
|
||||
self.set_ui_enabled(False)
|
||||
self._update_button_states_and_connections()
|
||||
|
||||
Reference in New Issue
Block a user