Kemono-Downloader/src/core/api_client.py

import time
import traceback
from urllib.parse import urlparse
import json # Ensure json is imported
import requests
from ..utils.network_utils import extract_post_info, prepare_cookies_for_request
from ..config.constants import (
    STYLE_DATE_POST_TITLE
)


def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
    """
    Fetches a single page of posts from the API with robust retry logic.
    NEW: Requests only essential fields to keep the response size small and reliable.
    """
    if cancellation_event and cancellation_event.is_set():
        raise RuntimeError("Fetch operation cancelled by user.")
    if pause_event and pause_event.is_set():
        logger("   Post fetching paused...")
        while pause_event.is_set():
            if cancellation_event and cancellation_event.is_set():
                raise RuntimeError("Fetch operation cancelled by user while paused.")
            time.sleep(0.5)
        logger("   Post fetching resumed.")
    fields_to_request = "id,user,service,title,shared_file,added,published,edited,file,attachments,tags"
    paginated_url = f'{api_url_base}?o={offset}&fields={fields_to_request}'

    max_retries = 3
    retry_delay = 5

    for attempt in range(max_retries):
        if cancellation_event and cancellation_event.is_set():
            raise RuntimeError("Fetch operation cancelled by user during retry loop.")

        log_message = f"   Fetching post list: {api_url_base}?o={offset} (Page approx. {offset // 50 + 1})"
        if attempt > 0:
            log_message += f" (Attempt {attempt + 1}/{max_retries})"
        logger(log_message)

        try:
            response = requests.get(paginated_url, headers=headers, timeout=(15, 60), cookies=cookies_dict)
            response.raise_for_status()
            return response.json()

        except requests.exceptions.RequestException as e:
            logger(f"   ⚠️ Retryable network error on page fetch (Attempt {attempt + 1}): {e}")
            if attempt < max_retries - 1:
                delay = retry_delay * (2 ** attempt)
                logger(f"      Retrying in {delay} seconds...")
                time.sleep(delay)
                continue
            else:
                logger(f"   ❌ Failed to fetch page after {max_retries} attempts.")
                raise RuntimeError(f"Network error fetching offset {offset}")
        except json.JSONDecodeError as e:
            logger(f"   ❌ Failed to decode JSON on page fetch (Attempt {attempt + 1}): {e}")
            if attempt < max_retries - 1:
                delay = retry_delay * (2 ** attempt)
                logger(f"      Retrying in {delay} seconds...")
                time.sleep(delay)
                continue
            else:
                raise RuntimeError(f"JSONDecodeError fetching offset {offset}")

    raise RuntimeError(f"Failed to fetch page {paginated_url} after all attempts.")


def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logger, cookies_dict=None):
    """
    --- NEW FUNCTION ---
    Fetches the full data, including the 'content' field, for a single post.
    """
    post_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}"
    logger(f"      Fetching full content for post ID {post_id}...")
    try:
        with requests.get(post_api_url, headers=headers, timeout=(15, 300), cookies=cookies_dict, stream=True) as response:
            response.raise_for_status()
            response_body = b""
            for chunk in response.iter_content(chunk_size=8192):
                response_body += chunk

            full_post_data = json.loads(response_body)
            if isinstance(full_post_data, list) and full_post_data:
                return full_post_data[0]
            return full_post_data

    except Exception as e:
        logger(f"      ❌ Failed to fetch full content for post {post_id}: {e}")
        return None


def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
    """Fetches all comments for a specific post."""
    if cancellation_event and cancellation_event.is_set():
        raise RuntimeError("Comment fetch operation cancelled by user.")

    comments_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}/comments"
    logger(f"   Fetching comments: {comments_api_url}")

    try:
        response = requests.get(comments_api_url, headers=headers, timeout=(10, 30), cookies=cookies_dict)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        raise RuntimeError(f"Error fetching comments for post {post_id}: {e}")
    except ValueError as e:
        raise RuntimeError(f"Error decoding JSON from comments API for post {post_id}: {e}")

def download_from_api(
    api_url_input,
    logger=print,
    start_page=None,
    end_page=None,
    manga_mode=False,
    cancellation_event=None,
    pause_event=None,
    use_cookie=False,
    cookie_text="",
    selected_cookie_file=None,
    app_base_dir=None,
    manga_filename_style_for_sort_check=None,
    processed_post_ids=None
):
    headers = {
        'User-Agent': 'Mozilla/5.0',
        'Accept': 'application/json'
    }
    if processed_post_ids is None:
        processed_post_ids = set()
    else:
        processed_post_ids = set(processed_post_ids)

    service, user_id, target_post_id = extract_post_info(api_url_input)

    if cancellation_event and cancellation_event.is_set():
        logger("   Download_from_api cancelled at start.")
        return

    parsed_input_url_for_domain = urlparse(api_url_input)
    api_domain = parsed_input_url_for_domain.netloc

    # --- START: MODIFIED LOGIC ---
    # This list is updated to include the new .cr and .st mirrors for validation.
    if not any(d in api_domain.lower() for d in ['kemono.su', 'kemono.party', 'kemono.cr', 'coomer.su', 'coomer.party', 'coomer.st']):
        logger(f"⚠️ Unrecognized domain '{api_domain}' from input URL. Defaulting to kemono.su for API calls.")
        api_domain = "kemono.su"
    # --- END: MODIFIED LOGIC ---

    cookies_for_api = None
    if use_cookie and app_base_dir:
        cookies_for_api = prepare_cookies_for_request(use_cookie, cookie_text, selected_cookie_file, app_base_dir, logger, target_domain=api_domain)
    if target_post_id:
        if target_post_id in processed_post_ids:
            logger(f"   Skipping already processed target post ID: {target_post_id}")
            return
        direct_post_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{target_post_id}"
        logger(f"   Attempting direct fetch for target post: {direct_post_api_url}")
        try:
            direct_response = requests.get(direct_post_api_url, headers=headers, timeout=(10, 30), cookies=cookies_for_api)
            direct_response.raise_for_status()
            direct_post_data = direct_response.json()
            if isinstance(direct_post_data, list) and direct_post_data:
                direct_post_data = direct_post_data[0]
            if isinstance(direct_post_data, dict) and 'post' in direct_post_data and isinstance(direct_post_data['post'], dict):
                direct_post_data = direct_post_data['post']
            if isinstance(direct_post_data, dict) and direct_post_data.get('id') == target_post_id:
                logger(f"   ✅ Direct fetch successful for post {target_post_id}.")
                yield [direct_post_data]
                return
            else:
                response_type = type(direct_post_data).__name__
                response_snippet = str(direct_post_data)[:200]
                logger(f"   ⚠️ Direct fetch for post {target_post_id} returned unexpected data (Type: {response_type}, Snippet: '{response_snippet}'). Falling back to pagination.")
        except requests.exceptions.RequestException as e:
            logger(f"   ⚠️ Direct fetch failed for post {target_post_id}: {e}. Falling back to pagination.")
        except Exception as e:
            logger(f"   ⚠️ Unexpected error during direct fetch for post {target_post_id}: {e}. Falling back to pagination.")
    if not service or not user_id:
        logger(f"❌ Invalid URL or could not extract service/user: {api_url_input}")
        return
    if target_post_id and (start_page or end_page):
        logger("⚠️ Page range (start/end page) is ignored when a specific post URL is provided (searching all pages for the post).")

    is_manga_mode_fetch_all_and_sort_oldest_first = manga_mode and (manga_filename_style_for_sort_check != STYLE_DATE_POST_TITLE) and not target_post_id
    api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}"
    page_size = 50
    if is_manga_mode_fetch_all_and_sort_oldest_first:
        logger(f"   Manga Mode (Style: {manga_filename_style_for_sort_check if manga_filename_style_for_sort_check else 'Default'} - Oldest First Sort Active): Fetching all posts to sort by date...")
        all_posts_for_manga_mode = []
        current_offset_manga = 0
        if start_page and start_page > 1:
            current_offset_manga = (start_page - 1) * page_size
            logger(f"   Manga Mode: Starting fetch from page {start_page} (offset {current_offset_manga}).")
        elif start_page:
            logger(f"   Manga Mode: Starting fetch from page 1 (offset 0).")
        if end_page:
            logger(f"   Manga Mode: Will fetch up to page {end_page}.")
        while True:
            if pause_event and pause_event.is_set():
                logger("   Manga mode post fetching paused...")
                while pause_event.is_set():
                    if cancellation_event and cancellation_event.is_set():
                        logger("   Manga mode post fetching cancelled while paused.")
                        break
                    time.sleep(0.5)
                if not (cancellation_event and cancellation_event.is_set()): logger("   Manga mode post fetching resumed.")
            if cancellation_event and cancellation_event.is_set():
                logger("   Manga mode post fetching cancelled.")
                break
            current_page_num_manga = (current_offset_manga // page_size) + 1
            if end_page and current_page_num_manga > end_page:
                logger(f"   Manga Mode: Reached specified end page ({end_page}). Stopping post fetch.")
                break
            try:
                posts_batch_manga = fetch_posts_paginated(api_base_url, headers, current_offset_manga, logger, cancellation_event, pause_event, cookies_dict=cookies_for_api)
                if not isinstance(posts_batch_manga, list):
                    logger(f"❌ API Error (Manga Mode): Expected list of posts, got {type(posts_batch_manga)}.")
                    break
                if not posts_batch_manga:
                    logger("✅ Reached end of posts (Manga Mode fetch all).")
                    if start_page and not end_page and current_page_num_manga < start_page:
                        logger(f"   Manga Mode: No posts found on or after specified start page {start_page}.")
                    elif end_page and current_page_num_manga <= end_page and not all_posts_for_manga_mode:
                        logger(f"   Manga Mode: No posts found within the specified page range ({start_page or 1}-{end_page}).")
                    break
                all_posts_for_manga_mode.extend(posts_batch_manga)

                logger(f"MANGA_FETCH_PROGRESS:{len(all_posts_for_manga_mode)}:{current_page_num_manga}")

                current_offset_manga += page_size
                time.sleep(0.6)
            except RuntimeError as e:
                if "cancelled by user" in str(e).lower():
                    logger(f"ℹ️ Manga mode pagination stopped due to cancellation: {e}")
                else:
                    logger(f"❌ {e}\n   Aborting manga mode pagination.")
                break
            except Exception as e:
                logger(f"❌ Unexpected error during manga mode fetch: {e}")
                traceback.print_exc()
                break

        if cancellation_event and cancellation_event.is_set(): return

        if all_posts_for_manga_mode:
            logger(f"MANGA_FETCH_COMPLETE:{len(all_posts_for_manga_mode)}")

        if all_posts_for_manga_mode:
            if processed_post_ids:
                original_count = len(all_posts_for_manga_mode)
                all_posts_for_manga_mode = [post for post in all_posts_for_manga_mode if post.get('id') not in processed_post_ids]
                skipped_count = original_count - len(all_posts_for_manga_mode)
                if skipped_count > 0:
                    logger(f"   Manga Mode: Skipped {skipped_count} already processed post(s) before sorting.")

            logger(f"   Manga Mode: Fetched {len(all_posts_for_manga_mode)} total posts. Sorting by publication date (oldest first)...")
            def sort_key_tuple(post):
                published_date_str = post.get('published')
                added_date_str = post.get('added')
                post_id_str = post.get('id', "0")
                primary_sort_val = "0000-00-00T00:00:00"
                if published_date_str:
                    primary_sort_val = published_date_str
                elif added_date_str:
                    logger(f"    ⚠️ Post ID {post_id_str} missing 'published' date, using 'added' date '{added_date_str}' for primary sorting.")
                    primary_sort_val = added_date_str
                else:
                    logger(f"    ⚠️ Post ID {post_id_str} missing both 'published' and 'added' dates. Placing at start of sort (using default earliest date).")
                secondary_sort_val = 0
                try:
                    secondary_sort_val = int(post_id_str)
                except ValueError:
                    logger(f"    ⚠️ Post ID '{post_id_str}' is not a valid integer for secondary sorting, using 0.")
                return (primary_sort_val, secondary_sort_val)
            all_posts_for_manga_mode.sort(key=sort_key_tuple)
            for i in range(0, len(all_posts_for_manga_mode), page_size):
                if cancellation_event and cancellation_event.is_set():
                    logger("   Manga mode post yielding cancelled.")
                    break
                yield all_posts_for_manga_mode[i:i + page_size]
        return

    if manga_mode and not target_post_id and (manga_filename_style_for_sort_check == STYLE_DATE_POST_TITLE):
        logger(f"   Manga Mode (Style: {STYLE_DATE_POST_TITLE}): Processing posts in default API order (newest first).")

    current_page_num = 1
    current_offset = 0
    processed_target_post_flag = False
    if start_page and start_page > 1 and not target_post_id:
        current_offset = (start_page - 1) * page_size
        current_page_num = start_page
        logger(f"   Starting from page {current_page_num} (calculated offset {current_offset}).")
    while True:
        if pause_event and pause_event.is_set():
            logger("   Post fetching loop paused...")
            while pause_event.is_set():
                if cancellation_event and cancellation_event.is_set():
                    logger("   Post fetching loop cancelled while paused.")
                    break
                time.sleep(0.5)
            if not (cancellation_event and cancellation_event.is_set()): logger("   Post fetching loop resumed.")
        if cancellation_event and cancellation_event.is_set():
            logger("   Post fetching loop cancelled.")
            break
        if target_post_id and processed_target_post_flag:
            break
        if not target_post_id and end_page and current_page_num > end_page:
            logger(f"✅ Reached specified end page ({end_page}) for creator feed. Stopping.")
            break
        try:
            posts_batch = fetch_posts_paginated(api_base_url, headers, current_offset, logger, cancellation_event, pause_event, cookies_dict=cookies_for_api)
            if not isinstance(posts_batch, list):
                logger(f"❌ API Error: Expected list of posts, got {type(posts_batch)} at page {current_page_num} (offset {current_offset}).")
                break
        except RuntimeError as e:
            if "cancelled by user" in str(e).lower():
                logger(f"ℹ️ Pagination stopped due to cancellation: {e}")
            else:
                logger(f"❌ {e}\n   Aborting pagination at page {current_page_num} (offset {current_offset}).")
            break
        except Exception as e:
            logger(f"❌ Unexpected error fetching page {current_page_num} (offset {current_offset}): {e}")
            traceback.print_exc()
            break
        if processed_post_ids:
            original_count = len(posts_batch)
            posts_batch = [post for post in posts_batch if post.get('id') not in processed_post_ids]
            skipped_count = original_count - len(posts_batch)
            if skipped_count > 0:
                logger(f"   Skipped {skipped_count} already processed post(s) from page {current_page_num}.")

        if not posts_batch:
            if target_post_id and not processed_target_post_flag:
                logger(f"❌ Target post {target_post_id} not found after checking all available pages (API returned no more posts at offset {current_offset}).")
            elif not target_post_id:
                if current_page_num == (start_page or 1):
                    logger(f"😕 No posts found on the first page checked (page {current_page_num}, offset {current_offset}).")
                else:
                    logger(f"✅ Reached end of posts (no more content from API at offset {current_offset}).")
            break
        if target_post_id and not processed_target_post_flag:
            matching_post = next((p for p in posts_batch if str(p.get('id')) == str(target_post_id)), None)
            if matching_post:
                logger(f"🎯 Found target post {target_post_id} on page {current_page_num} (offset {current_offset}).")
                yield [matching_post]
                processed_target_post_flag = True
        elif not target_post_id:
            yield posts_batch
        if processed_target_post_flag:
            break
        current_offset += page_size
        current_page_num += 1
        time.sleep(0.6)
    if target_post_id and not processed_target_post_flag and not (cancellation_event and cancellation_event.is_set()):
        logger(f"❌ Target post {target_post_id} could not be found after checking all relevant pages (final check after loop).")