Files
Kemono-Downloader/src/core/api_client.py
Yuvi9587 6de9967e0b Commit
2025-07-27 07:18:08 -07:00

357 lines
18 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import time
import traceback
from urllib.parse import urlparse
import json # Ensure json is imported
import requests
from ..utils.network_utils import extract_post_info, prepare_cookies_for_request
from ..config.constants import (
STYLE_DATE_POST_TITLE
)
def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
"""
Fetches a single page of posts from the API with robust retry logic.
NEW: Requests only essential fields to keep the response size small and reliable.
"""
if cancellation_event and cancellation_event.is_set():
raise RuntimeError("Fetch operation cancelled by user.")
if pause_event and pause_event.is_set():
logger(" Post fetching paused...")
while pause_event.is_set():
if cancellation_event and cancellation_event.is_set():
raise RuntimeError("Fetch operation cancelled by user while paused.")
time.sleep(0.5)
logger(" Post fetching resumed.")
fields_to_request = "id,user,service,title,shared_file,added,published,edited,file,attachments,tags"
paginated_url = f'{api_url_base}?o={offset}&fields={fields_to_request}'
max_retries = 3
retry_delay = 5
for attempt in range(max_retries):
if cancellation_event and cancellation_event.is_set():
raise RuntimeError("Fetch operation cancelled by user during retry loop.")
log_message = f" Fetching post list: {api_url_base}?o={offset} (Page approx. {offset // 50 + 1})"
if attempt > 0:
log_message += f" (Attempt {attempt + 1}/{max_retries})"
logger(log_message)
try:
response = requests.get(paginated_url, headers=headers, timeout=(15, 60), cookies=cookies_dict)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
logger(f" ⚠️ Retryable network error on page fetch (Attempt {attempt + 1}): {e}")
if attempt < max_retries - 1:
delay = retry_delay * (2 ** attempt)
logger(f" Retrying in {delay} seconds...")
time.sleep(delay)
continue
else:
logger(f" ❌ Failed to fetch page after {max_retries} attempts.")
raise RuntimeError(f"Network error fetching offset {offset}")
except json.JSONDecodeError as e:
logger(f" ❌ Failed to decode JSON on page fetch (Attempt {attempt + 1}): {e}")
if attempt < max_retries - 1:
delay = retry_delay * (2 ** attempt)
logger(f" Retrying in {delay} seconds...")
time.sleep(delay)
continue
else:
raise RuntimeError(f"JSONDecodeError fetching offset {offset}")
raise RuntimeError(f"Failed to fetch page {paginated_url} after all attempts.")
def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logger, cookies_dict=None):
"""
--- NEW FUNCTION ---
Fetches the full data, including the 'content' field, for a single post.
"""
post_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}"
logger(f" Fetching full content for post ID {post_id}...")
try:
with requests.get(post_api_url, headers=headers, timeout=(15, 300), cookies=cookies_dict, stream=True) as response:
response.raise_for_status()
response_body = b""
for chunk in response.iter_content(chunk_size=8192):
response_body += chunk
full_post_data = json.loads(response_body)
if isinstance(full_post_data, list) and full_post_data:
return full_post_data[0]
return full_post_data
except Exception as e:
logger(f" ❌ Failed to fetch full content for post {post_id}: {e}")
return None
def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
"""Fetches all comments for a specific post."""
if cancellation_event and cancellation_event.is_set():
raise RuntimeError("Comment fetch operation cancelled by user.")
comments_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}/comments"
logger(f" Fetching comments: {comments_api_url}")
try:
response = requests.get(comments_api_url, headers=headers, timeout=(10, 30), cookies=cookies_dict)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
raise RuntimeError(f"Error fetching comments for post {post_id}: {e}")
except ValueError as e:
raise RuntimeError(f"Error decoding JSON from comments API for post {post_id}: {e}")
def download_from_api(
api_url_input,
logger=print,
start_page=None,
end_page=None,
manga_mode=False,
cancellation_event=None,
pause_event=None,
use_cookie=False,
cookie_text="",
selected_cookie_file=None,
app_base_dir=None,
manga_filename_style_for_sort_check=None,
processed_post_ids=None
):
headers = {
'User-Agent': 'Mozilla/5.0',
'Accept': 'application/json'
}
if processed_post_ids is None:
processed_post_ids = set()
else:
processed_post_ids = set(processed_post_ids)
service, user_id, target_post_id = extract_post_info(api_url_input)
if cancellation_event and cancellation_event.is_set():
logger(" Download_from_api cancelled at start.")
return
parsed_input_url_for_domain = urlparse(api_url_input)
api_domain = parsed_input_url_for_domain.netloc
# --- START: MODIFIED LOGIC ---
# This list is updated to include the new .cr and .st mirrors for validation.
if not any(d in api_domain.lower() for d in ['kemono.su', 'kemono.party', 'kemono.cr', 'coomer.su', 'coomer.party', 'coomer.st']):
logger(f"⚠️ Unrecognized domain '{api_domain}' from input URL. Defaulting to kemono.su for API calls.")
api_domain = "kemono.su"
# --- END: MODIFIED LOGIC ---
cookies_for_api = None
if use_cookie and app_base_dir:
cookies_for_api = prepare_cookies_for_request(use_cookie, cookie_text, selected_cookie_file, app_base_dir, logger, target_domain=api_domain)
if target_post_id:
if target_post_id in processed_post_ids:
logger(f" Skipping already processed target post ID: {target_post_id}")
return
direct_post_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{target_post_id}"
logger(f" Attempting direct fetch for target post: {direct_post_api_url}")
try:
direct_response = requests.get(direct_post_api_url, headers=headers, timeout=(10, 30), cookies=cookies_for_api)
direct_response.raise_for_status()
direct_post_data = direct_response.json()
if isinstance(direct_post_data, list) and direct_post_data:
direct_post_data = direct_post_data[0]
if isinstance(direct_post_data, dict) and 'post' in direct_post_data and isinstance(direct_post_data['post'], dict):
direct_post_data = direct_post_data['post']
if isinstance(direct_post_data, dict) and direct_post_data.get('id') == target_post_id:
logger(f" ✅ Direct fetch successful for post {target_post_id}.")
yield [direct_post_data]
return
else:
response_type = type(direct_post_data).__name__
response_snippet = str(direct_post_data)[:200]
logger(f" ⚠️ Direct fetch for post {target_post_id} returned unexpected data (Type: {response_type}, Snippet: '{response_snippet}'). Falling back to pagination.")
except requests.exceptions.RequestException as e:
logger(f" ⚠️ Direct fetch failed for post {target_post_id}: {e}. Falling back to pagination.")
except Exception as e:
logger(f" ⚠️ Unexpected error during direct fetch for post {target_post_id}: {e}. Falling back to pagination.")
if not service or not user_id:
logger(f"❌ Invalid URL or could not extract service/user: {api_url_input}")
return
if target_post_id and (start_page or end_page):
logger("⚠️ Page range (start/end page) is ignored when a specific post URL is provided (searching all pages for the post).")
is_manga_mode_fetch_all_and_sort_oldest_first = manga_mode and (manga_filename_style_for_sort_check != STYLE_DATE_POST_TITLE) and not target_post_id
api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}"
page_size = 50
if is_manga_mode_fetch_all_and_sort_oldest_first:
logger(f" Manga Mode (Style: {manga_filename_style_for_sort_check if manga_filename_style_for_sort_check else 'Default'} - Oldest First Sort Active): Fetching all posts to sort by date...")
all_posts_for_manga_mode = []
current_offset_manga = 0
if start_page and start_page > 1:
current_offset_manga = (start_page - 1) * page_size
logger(f" Manga Mode: Starting fetch from page {start_page} (offset {current_offset_manga}).")
elif start_page:
logger(f" Manga Mode: Starting fetch from page 1 (offset 0).")
if end_page:
logger(f" Manga Mode: Will fetch up to page {end_page}.")
while True:
if pause_event and pause_event.is_set():
logger(" Manga mode post fetching paused...")
while pause_event.is_set():
if cancellation_event and cancellation_event.is_set():
logger(" Manga mode post fetching cancelled while paused.")
break
time.sleep(0.5)
if not (cancellation_event and cancellation_event.is_set()): logger(" Manga mode post fetching resumed.")
if cancellation_event and cancellation_event.is_set():
logger(" Manga mode post fetching cancelled.")
break
current_page_num_manga = (current_offset_manga // page_size) + 1
if end_page and current_page_num_manga > end_page:
logger(f" Manga Mode: Reached specified end page ({end_page}). Stopping post fetch.")
break
try:
posts_batch_manga = fetch_posts_paginated(api_base_url, headers, current_offset_manga, logger, cancellation_event, pause_event, cookies_dict=cookies_for_api)
if not isinstance(posts_batch_manga, list):
logger(f"❌ API Error (Manga Mode): Expected list of posts, got {type(posts_batch_manga)}.")
break
if not posts_batch_manga:
logger("✅ Reached end of posts (Manga Mode fetch all).")
if start_page and not end_page and current_page_num_manga < start_page:
logger(f" Manga Mode: No posts found on or after specified start page {start_page}.")
elif end_page and current_page_num_manga <= end_page and not all_posts_for_manga_mode:
logger(f" Manga Mode: No posts found within the specified page range ({start_page or 1}-{end_page}).")
break
all_posts_for_manga_mode.extend(posts_batch_manga)
logger(f"MANGA_FETCH_PROGRESS:{len(all_posts_for_manga_mode)}:{current_page_num_manga}")
current_offset_manga += page_size
time.sleep(0.6)
except RuntimeError as e:
if "cancelled by user" in str(e).lower():
logger(f" Manga mode pagination stopped due to cancellation: {e}")
else:
logger(f"{e}\n Aborting manga mode pagination.")
break
except Exception as e:
logger(f"❌ Unexpected error during manga mode fetch: {e}")
traceback.print_exc()
break
if cancellation_event and cancellation_event.is_set(): return
if all_posts_for_manga_mode:
logger(f"MANGA_FETCH_COMPLETE:{len(all_posts_for_manga_mode)}")
if all_posts_for_manga_mode:
if processed_post_ids:
original_count = len(all_posts_for_manga_mode)
all_posts_for_manga_mode = [post for post in all_posts_for_manga_mode if post.get('id') not in processed_post_ids]
skipped_count = original_count - len(all_posts_for_manga_mode)
if skipped_count > 0:
logger(f" Manga Mode: Skipped {skipped_count} already processed post(s) before sorting.")
logger(f" Manga Mode: Fetched {len(all_posts_for_manga_mode)} total posts. Sorting by publication date (oldest first)...")
def sort_key_tuple(post):
published_date_str = post.get('published')
added_date_str = post.get('added')
post_id_str = post.get('id', "0")
primary_sort_val = "0000-00-00T00:00:00"
if published_date_str:
primary_sort_val = published_date_str
elif added_date_str:
logger(f" ⚠️ Post ID {post_id_str} missing 'published' date, using 'added' date '{added_date_str}' for primary sorting.")
primary_sort_val = added_date_str
else:
logger(f" ⚠️ Post ID {post_id_str} missing both 'published' and 'added' dates. Placing at start of sort (using default earliest date).")
secondary_sort_val = 0
try:
secondary_sort_val = int(post_id_str)
except ValueError:
logger(f" ⚠️ Post ID '{post_id_str}' is not a valid integer for secondary sorting, using 0.")
return (primary_sort_val, secondary_sort_val)
all_posts_for_manga_mode.sort(key=sort_key_tuple)
for i in range(0, len(all_posts_for_manga_mode), page_size):
if cancellation_event and cancellation_event.is_set():
logger(" Manga mode post yielding cancelled.")
break
yield all_posts_for_manga_mode[i:i + page_size]
return
if manga_mode and not target_post_id and (manga_filename_style_for_sort_check == STYLE_DATE_POST_TITLE):
logger(f" Manga Mode (Style: {STYLE_DATE_POST_TITLE}): Processing posts in default API order (newest first).")
current_page_num = 1
current_offset = 0
processed_target_post_flag = False
if start_page and start_page > 1 and not target_post_id:
current_offset = (start_page - 1) * page_size
current_page_num = start_page
logger(f" Starting from page {current_page_num} (calculated offset {current_offset}).")
while True:
if pause_event and pause_event.is_set():
logger(" Post fetching loop paused...")
while pause_event.is_set():
if cancellation_event and cancellation_event.is_set():
logger(" Post fetching loop cancelled while paused.")
break
time.sleep(0.5)
if not (cancellation_event and cancellation_event.is_set()): logger(" Post fetching loop resumed.")
if cancellation_event and cancellation_event.is_set():
logger(" Post fetching loop cancelled.")
break
if target_post_id and processed_target_post_flag:
break
if not target_post_id and end_page and current_page_num > end_page:
logger(f"✅ Reached specified end page ({end_page}) for creator feed. Stopping.")
break
try:
posts_batch = fetch_posts_paginated(api_base_url, headers, current_offset, logger, cancellation_event, pause_event, cookies_dict=cookies_for_api)
if not isinstance(posts_batch, list):
logger(f"❌ API Error: Expected list of posts, got {type(posts_batch)} at page {current_page_num} (offset {current_offset}).")
break
except RuntimeError as e:
if "cancelled by user" in str(e).lower():
logger(f" Pagination stopped due to cancellation: {e}")
else:
logger(f"{e}\n Aborting pagination at page {current_page_num} (offset {current_offset}).")
break
except Exception as e:
logger(f"❌ Unexpected error fetching page {current_page_num} (offset {current_offset}): {e}")
traceback.print_exc()
break
if processed_post_ids:
original_count = len(posts_batch)
posts_batch = [post for post in posts_batch if post.get('id') not in processed_post_ids]
skipped_count = original_count - len(posts_batch)
if skipped_count > 0:
logger(f" Skipped {skipped_count} already processed post(s) from page {current_page_num}.")
if not posts_batch:
if target_post_id and not processed_target_post_flag:
logger(f"❌ Target post {target_post_id} not found after checking all available pages (API returned no more posts at offset {current_offset}).")
elif not target_post_id:
if current_page_num == (start_page or 1):
logger(f"😕 No posts found on the first page checked (page {current_page_num}, offset {current_offset}).")
else:
logger(f"✅ Reached end of posts (no more content from API at offset {current_offset}).")
break
if target_post_id and not processed_target_post_flag:
matching_post = next((p for p in posts_batch if str(p.get('id')) == str(target_post_id)), None)
if matching_post:
logger(f"🎯 Found target post {target_post_id} on page {current_page_num} (offset {current_offset}).")
yield [matching_post]
processed_target_post_flag = True
elif not target_post_id:
yield posts_batch
if processed_target_post_flag:
break
current_offset += page_size
current_page_num += 1
time.sleep(0.6)
if target_post_id and not processed_target_post_flag and not (cancellation_event and cancellation_event.is_set()):
logger(f"❌ Target post {target_post_id} could not be found after checking all relevant pages (final check after loop).")