import time import traceback from urllib.parse import urlparse import json # Ensure json is imported import requests # (Keep the rest of your imports) from ..utils.network_utils import extract_post_info, prepare_cookies_for_request from ..config.constants import ( STYLE_DATE_POST_TITLE ) def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None, cookies_dict=None): """ Fetches a single page of posts from the API with robust retry logic. NEW: Requests only essential fields to keep the response size small and reliable. """ if cancellation_event and cancellation_event.is_set(): raise RuntimeError("Fetch operation cancelled by user.") if pause_event and pause_event.is_set(): logger(" Post fetching paused...") while pause_event.is_set(): if cancellation_event and cancellation_event.is_set(): raise RuntimeError("Fetch operation cancelled by user while paused.") time.sleep(0.5) logger(" Post fetching resumed.") # --- MODIFICATION: Added `fields` to the URL to request only metadata --- # This prevents the large 'content' field from being included in the list, avoiding timeouts. fields_to_request = "id,user,service,title,shared_file,added,published,edited,file,attachments,tags" paginated_url = f'{api_url_base}?o={offset}&fields={fields_to_request}' max_retries = 3 retry_delay = 5 for attempt in range(max_retries): if cancellation_event and cancellation_event.is_set(): raise RuntimeError("Fetch operation cancelled by user during retry loop.") log_message = f" Fetching post list: {api_url_base}?o={offset} (Page approx. {offset // 50 + 1})" if attempt > 0: log_message += f" (Attempt {attempt + 1}/{max_retries})" logger(log_message) try: # We can now remove the streaming logic as the response will be small and fast. response = requests.get(paginated_url, headers=headers, timeout=(15, 60), cookies=cookies_dict) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: logger(f" ⚠️ Retryable network error on page fetch (Attempt {attempt + 1}): {e}") if attempt < max_retries - 1: delay = retry_delay * (2 ** attempt) logger(f" Retrying in {delay} seconds...") time.sleep(delay) continue else: logger(f" ❌ Failed to fetch page after {max_retries} attempts.") raise RuntimeError(f"Network error fetching offset {offset}") except json.JSONDecodeError as e: logger(f" ❌ Failed to decode JSON on page fetch (Attempt {attempt + 1}): {e}") if attempt < max_retries - 1: delay = retry_delay * (2 ** attempt) logger(f" Retrying in {delay} seconds...") time.sleep(delay) continue else: raise RuntimeError(f"JSONDecodeError fetching offset {offset}") raise RuntimeError(f"Failed to fetch page {paginated_url} after all attempts.") def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logger, cookies_dict=None): """ --- NEW FUNCTION --- Fetches the full data, including the 'content' field, for a single post. """ post_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}" logger(f" Fetching full content for post ID {post_id}...") try: # Use streaming here as a precaution for single posts that are still very large. with requests.get(post_api_url, headers=headers, timeout=(15, 300), cookies=cookies_dict, stream=True) as response: response.raise_for_status() response_body = b"" for chunk in response.iter_content(chunk_size=8192): response_body += chunk full_post_data = json.loads(response_body) # The API sometimes wraps the post in a list, handle that. if isinstance(full_post_data, list) and full_post_data: return full_post_data[0] return full_post_data except Exception as e: logger(f" ❌ Failed to fetch full content for post {post_id}: {e}") return None def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None, pause_event=None, cookies_dict=None): """Fetches all comments for a specific post.""" if cancellation_event and cancellation_event.is_set(): raise RuntimeError("Comment fetch operation cancelled by user.") comments_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}/comments" logger(f" Fetching comments: {comments_api_url}") try: response = requests.get(comments_api_url, headers=headers, timeout=(10, 30), cookies=cookies_dict) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: raise RuntimeError(f"Error fetching comments for post {post_id}: {e}") except ValueError as e: raise RuntimeError(f"Error decoding JSON from comments API for post {post_id}: {e}") def download_from_api ( api_url_input , logger =print , start_page =None , end_page =None , manga_mode =False , cancellation_event =None , pause_event =None , use_cookie =False , cookie_text ="", selected_cookie_file =None , app_base_dir =None , manga_filename_style_for_sort_check =None ): headers ={ 'User-Agent':'Mozilla/5.0', 'Accept':'application/json' } service ,user_id ,target_post_id =extract_post_info (api_url_input ) if cancellation_event and cancellation_event .is_set (): logger (" Download_from_api cancelled at start.") return parsed_input_url_for_domain =urlparse (api_url_input ) api_domain =parsed_input_url_for_domain .netloc if not any (d in api_domain .lower ()for d in ['kemono.su','kemono.party','coomer.su','coomer.party']): logger (f"⚠️ Unrecognized domain '{api_domain }' from input URL. Defaulting to kemono.su for API calls.") api_domain ="kemono.su" cookies_for_api =None if use_cookie and app_base_dir : cookies_for_api =prepare_cookies_for_request (use_cookie ,cookie_text ,selected_cookie_file ,app_base_dir ,logger ,target_domain =api_domain ) if target_post_id : direct_post_api_url =f"https://{api_domain }/api/v1/{service }/user/{user_id }/post/{target_post_id }" logger (f" Attempting direct fetch for target post: {direct_post_api_url }") try : direct_response =requests .get (direct_post_api_url ,headers =headers ,timeout =(10 ,30 ),cookies =cookies_for_api ) direct_response .raise_for_status () direct_post_data =direct_response .json () if isinstance (direct_post_data ,list )and direct_post_data : direct_post_data =direct_post_data [0 ] if isinstance (direct_post_data ,dict )and 'post'in direct_post_data and isinstance (direct_post_data ['post'],dict ): direct_post_data =direct_post_data ['post'] if isinstance (direct_post_data ,dict )and direct_post_data .get ('id')==target_post_id : logger (f" ✅ Direct fetch successful for post {target_post_id }.") yield [direct_post_data ] return else : response_type =type (direct_post_data ).__name__ response_snippet =str (direct_post_data )[:200 ] logger (f" ⚠️ Direct fetch for post {target_post_id } returned unexpected data (Type: {response_type }, Snippet: '{response_snippet }'). Falling back to pagination.") except requests .exceptions .RequestException as e : logger (f" ⚠️ Direct fetch failed for post {target_post_id }: {e }. Falling back to pagination.") except Exception as e : logger (f" ⚠️ Unexpected error during direct fetch for post {target_post_id }: {e }. Falling back to pagination.") if not service or not user_id : logger (f"❌ Invalid URL or could not extract service/user: {api_url_input }") return if target_post_id and (start_page or end_page ): logger ("⚠️ Page range (start/end page) is ignored when a specific post URL is provided (searching all pages for the post).") is_manga_mode_fetch_all_and_sort_oldest_first =manga_mode and (manga_filename_style_for_sort_check !=STYLE_DATE_POST_TITLE )and not target_post_id api_base_url =f"https://{api_domain }/api/v1/{service }/user/{user_id }" page_size =50 if is_manga_mode_fetch_all_and_sort_oldest_first : logger (f" Manga Mode (Style: {manga_filename_style_for_sort_check if manga_filename_style_for_sort_check else 'Default'} - Oldest First Sort Active): Fetching all posts to sort by date...") all_posts_for_manga_mode =[] current_offset_manga =0 if start_page and start_page >1 : current_offset_manga =(start_page -1 )*page_size logger (f" Manga Mode: Starting fetch from page {start_page } (offset {current_offset_manga }).") elif start_page : logger (f" Manga Mode: Starting fetch from page 1 (offset 0).") if end_page : logger (f" Manga Mode: Will fetch up to page {end_page }.") while True : if pause_event and pause_event .is_set (): logger (" Manga mode post fetching paused...") while pause_event .is_set (): if cancellation_event and cancellation_event .is_set (): logger (" Manga mode post fetching cancelled while paused.") break time .sleep (0.5 ) if not (cancellation_event and cancellation_event .is_set ()):logger (" Manga mode post fetching resumed.") if cancellation_event and cancellation_event .is_set (): logger (" Manga mode post fetching cancelled.") break current_page_num_manga =(current_offset_manga //page_size )+1 if end_page and current_page_num_manga >end_page : logger (f" Manga Mode: Reached specified end page ({end_page }). Stopping post fetch.") break try : posts_batch_manga =fetch_posts_paginated (api_base_url ,headers ,current_offset_manga ,logger ,cancellation_event ,pause_event ,cookies_dict =cookies_for_api ) if not isinstance (posts_batch_manga ,list ): logger (f"❌ API Error (Manga Mode): Expected list of posts, got {type (posts_batch_manga )}.") break if not posts_batch_manga : logger ("✅ Reached end of posts (Manga Mode fetch all).") if start_page and not end_page and current_page_num_manga 1 and not target_post_id : current_offset =(start_page -1 )*page_size current_page_num =start_page logger (f" Starting from page {current_page_num } (calculated offset {current_offset }).") while True : if pause_event and pause_event .is_set (): logger (" Post fetching loop paused...") while pause_event .is_set (): if cancellation_event and cancellation_event .is_set (): logger (" Post fetching loop cancelled while paused.") break time .sleep (0.5 ) if not (cancellation_event and cancellation_event .is_set ()):logger (" Post fetching loop resumed.") if cancellation_event and cancellation_event .is_set (): logger (" Post fetching loop cancelled.") break if target_post_id and processed_target_post_flag : break if not target_post_id and end_page and current_page_num >end_page : logger (f"✅ Reached specified end page ({end_page }) for creator feed. Stopping.") break try : posts_batch =fetch_posts_paginated (api_base_url ,headers ,current_offset ,logger ,cancellation_event ,pause_event ,cookies_dict =cookies_for_api ) if not isinstance (posts_batch ,list ): logger (f"❌ API Error: Expected list of posts, got {type (posts_batch )} at page {current_page_num } (offset {current_offset }).") break except RuntimeError as e : if "cancelled by user"in str (e ).lower (): logger (f"ℹ️ Pagination stopped due to cancellation: {e }") else : logger (f"❌ {e }\n Aborting pagination at page {current_page_num } (offset {current_offset }).") break except Exception as e : logger (f"❌ Unexpected error fetching page {current_page_num } (offset {current_offset }): {e }") traceback .print_exc () break if not posts_batch : if target_post_id and not processed_target_post_flag : logger (f"❌ Target post {target_post_id } not found after checking all available pages (API returned no more posts at offset {current_offset }).") elif not target_post_id : if current_page_num ==(start_page or 1 ): logger (f"😕 No posts found on the first page checked (page {current_page_num }, offset {current_offset }).") else : logger (f"✅ Reached end of posts (no more content from API at offset {current_offset }).") break if target_post_id and not processed_target_post_flag : matching_post =next ((p for p in posts_batch if str (p .get ('id'))==str (target_post_id )),None ) if matching_post : logger (f"🎯 Found target post {target_post_id } on page {current_page_num } (offset {current_offset }).") yield [matching_post ] processed_target_post_flag =True elif not target_post_id : yield posts_batch if processed_target_post_flag : break current_offset +=page_size current_page_num +=1 time .sleep (0.6 ) if target_post_id and not processed_target_post_flag and not (cancellation_event and cancellation_event .is_set ()): logger (f"❌ Target post {target_post_id } could not be found after checking all relevant pages (final check after loop).")