diff --git a/src/core/api_client.py b/src/core/api_client.py index 55e4d4a..dd77236 100644 --- a/src/core/api_client.py +++ b/src/core/api_client.py @@ -12,7 +12,6 @@ from ..config.constants import ( def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None, cookies_dict=None): """ Fetches a single page of posts from the API with robust retry logic. - NEW: Requests only essential fields to keep the response size small and reliable. """ if cancellation_event and cancellation_event.is_set(): raise RuntimeError("Fetch operation cancelled by user.") @@ -33,7 +32,7 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev if cancellation_event and cancellation_event.is_set(): raise RuntimeError("Fetch operation cancelled by user during retry loop.") - log_message = f" Fetching post list: {api_url_base}?o={offset} (Page approx. {offset // 50 + 1})" + log_message = f" Fetching post list: {paginated_url} (Page approx. {offset // 50 + 1})" if attempt > 0: log_message += f" (Attempt {attempt + 1}/{max_retries})" logger(log_message) @@ -45,10 +44,19 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev return response.json() except requests.exceptions.RequestException as e: + # Handle 403 error on the FIRST page as a rate limit/block + if e.response is not None and e.response.status_code == 403 and offset == 0: + logger(" ❌ Access Denied (403 Forbidden) on the first page.") + logger(" This is likely a rate limit or a Cloudflare block.") + logger(" 💡 SOLUTION: Wait a while, use a VPN, or provide a valid session cookie.") + return [] # Stop the process gracefully + + # Handle 400 error as the end of pages if e.response is not None and e.response.status_code == 400: logger(f" ✅ Reached end of posts (API returned 400 Bad Request for offset {offset}).") - return [] + return [] + # Handle all other network errors with a retry logger(f" âš ī¸ Retryable network error on page fetch (Attempt {attempt + 1}): {e}") if attempt < max_retries - 1: delay = retry_delay * (2 ** attempt) @@ -70,7 +78,6 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev raise RuntimeError(f"Failed to fetch page {paginated_url} after all attempts.") - def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logger, cookies_dict=None): """ --- NEW FUNCTION --- @@ -132,10 +139,16 @@ def download_from_api( processed_post_ids=None, fetch_all_first=False ): + # FIX: Define api_domain FIRST, before it is used in the headers + parsed_input_url_for_domain = urlparse(api_url_input) + api_domain = parsed_input_url_for_domain.netloc + headers = { - 'User-Agent': 'Mozilla/5.0', - 'Accept': 'application/json' + 'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', + 'Referer': f'https://{api_domain}/', + 'Accept': 'text/css' } + if processed_post_ids is None: processed_post_ids = set() else: @@ -147,8 +160,7 @@ def download_from_api( logger(" Download_from_api cancelled at start.") return - parsed_input_url_for_domain = urlparse(api_url_input) - api_domain = parsed_input_url_for_domain.netloc + # The code that defined api_domain was moved from here to the top of the function if not any(d in api_domain.lower() for d in ['kemono.su', 'kemono.party', 'kemono.cr', 'coomer.su', 'coomer.party', 'coomer.st']): logger(f"âš ī¸ Unrecognized domain '{api_domain}' from input URL. Defaulting to kemono.su for API calls.") @@ -363,3 +375,4 @@ def download_from_api( time.sleep(0.6) if target_post_id and not processed_target_post_flag and not (cancellation_event and cancellation_event.is_set()): logger(f"❌ Target post {target_post_id} could not be found after checking all relevant pages (final check after loop).") + diff --git a/src/core/workers.py b/src/core/workers.py index 8ae510b..90960d6 100644 --- a/src/core/workers.py +++ b/src/core/workers.py @@ -270,7 +270,7 @@ class PostProcessorWorker: return 0, 1, "", False, FILE_DOWNLOAD_STATUS_SKIPPED, None file_download_headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36', + 'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'Referer': post_page_url } diff --git a/src/ui/main_window.py b/src/ui/main_window.py index 43a4f3d..2165d38 100644 --- a/src/ui/main_window.py +++ b/src/ui/main_window.py @@ -281,7 +281,7 @@ class DownloaderApp (QWidget ): self.download_location_label_widget = None self.remove_from_filename_label_widget = None self.skip_words_label_widget = None - self.setWindowTitle("Kemono Downloader v6.4.2") + self.setWindowTitle("Kemono Downloader v6.4.3") setup_ui(self) self._connect_signals() self.log_signal.emit("â„šī¸ Local API server functionality has been removed.") @@ -688,8 +688,12 @@ class DownloaderApp (QWidget ): return self.fetched_posts_for_download = fetched_posts - self.is_ready_to_download_fetched = True # <-- ADD THIS LINE + self.is_ready_to_download_fetched = True self.log_signal.emit(f"✅ Fetch complete. Found {len(self.fetched_posts_for_download)} posts.") + self.log_signal.emit("=" * 40) + self.log_signal.emit("✅ Stage 1 complete. All post data has been fetched.") + self.log_signal.emit(" 💡 You can now disconnect your VPN (if used) before starting the download.") + self.log_signal.emit(" Press the 'Start Download' button to begin Stage 2: Downloading files.") self.progress_label.setText(f"Found {len(self.fetched_posts_for_download)} posts. Ready to download.") self._update_button_states_and_connections() @@ -700,7 +704,9 @@ class DownloaderApp (QWidget ): Initiates the download of the posts that were previously fetched. """ self.is_ready_to_download_fetched = False # Reset the state flag - self.log_signal.emit(f"🚀 Starting download of {len(self.fetched_posts_for_download)} fetched posts...") + self.log_signal.emit("=" * 40) + self.log_signal.emit(f"🚀 Starting Stage 2: Downloading files for {len(self.fetched_posts_for_download)} fetched posts.") + self.log_signal.emit(" 💡 If you disconnected your VPN, downloads will now use your regular connection.") # Manually set the UI to a "downloading" state for reliability self.set_ui_enabled(False) @@ -3954,7 +3960,9 @@ class DownloaderApp (QWidget ): self.last_start_download_args = args_template.copy() if fetch_first_enabled and not post_id_from_url: - self.log_signal.emit("🚀 Starting Stage 1: Fetching all pages...") + self.log_signal.emit("=" * 40) + self.log_signal.emit("🚀 'Fetch First' mode is active. Starting Stage 1: Fetching all post data.") + self.log_signal.emit(" 💡 If you are using a VPN for this stage, ensure it is connected now.") self.is_fetching_only = True self.set_ui_enabled(False) self._update_button_states_and_connections()