diff --git a/src/core/api_client.py b/src/core/api_client.py index 850c893..4263c67 100644 --- a/src/core/api_client.py +++ b/src/core/api_client.py @@ -185,7 +185,7 @@ def download_from_api( is_manga_mode_fetch_all_and_sort_oldest_first = manga_mode and (manga_filename_style_for_sort_check != STYLE_DATE_POST_TITLE) and not target_post_id should_fetch_all = fetch_all_first or is_manga_mode_fetch_all_and_sort_oldest_first - api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}" + api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/posts" page_size = 50 if is_manga_mode_fetch_all_and_sort_oldest_first: logger(f" Manga Mode (Style: {manga_filename_style_for_sort_check if manga_filename_style_for_sort_check else 'Default'} - Oldest First Sort Active): Fetching all posts to sort by date...") diff --git a/src/core/workers.py b/src/core/workers.py index 584498e..6881aec 100644 --- a/src/core/workers.py +++ b/src/core/workers.py @@ -124,7 +124,8 @@ class PostProcessorWorker: processed_post_ids=None, multipart_scope='both', multipart_parts_count=4, - multipart_min_size_mb=100 + multipart_min_size_mb=100, + skip_file_size_mb=None ): self.post = post_data self.download_root = download_root @@ -189,6 +190,7 @@ class PostProcessorWorker: self.multipart_scope = multipart_scope self.multipart_parts_count = multipart_parts_count self.multipart_min_size_mb = multipart_min_size_mb + self.skip_file_size_mb = skip_file_size_mb if self.compress_images and Image is None: self.logger("⚠️ Image compression disabled: Pillow library not found.") self.compress_images = False @@ -276,7 +278,25 @@ class PostProcessorWorker: cookies_to_use_for_file = None if self.use_cookie: cookies_to_use_for_file = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger) - + + if self.skip_file_size_mb is not None: + api_original_filename_for_size_check = file_info.get('_original_name_for_log', file_info.get('name')) + try: + # Use a stream=True HEAD request to get headers without downloading the body + with requests.head(file_url, headers=file_download_headers, timeout=15, cookies=cookies_to_use_for_file, allow_redirects=True) as head_response: + head_response.raise_for_status() + content_length = head_response.headers.get('Content-Length') + if content_length: + file_size_bytes = int(content_length) + file_size_mb = file_size_bytes / (1024 * 1024) + if file_size_mb < self.skip_file_size_mb: + self.logger(f" -> Skip File (Size): '{api_original_filename_for_size_check}' is {file_size_mb:.2f} MB, which is smaller than the {self.skip_file_size_mb} MB limit.") + return 0, 1, api_original_filename_for_size_check, False, FILE_DOWNLOAD_STATUS_SKIPPED, None + else: + self.logger(f" ⚠️ Could not determine file size for '{api_original_filename_for_size_check}' to check against size limit. Proceeding with download.") + except requests.RequestException as e: + self.logger(f" ⚠️ Could not fetch file headers to check size for '{api_original_filename_for_size_check}': {e}. Proceeding with download.") + api_original_filename = file_info.get('_original_name_for_log', file_info.get('name')) filename_to_save_in_main_path = "" if forced_filename_override: @@ -488,19 +508,18 @@ class PostProcessorWorker: except requests.RequestException as e: self.logger(f" ⚠️ Could not verify size of existing file '{filename_to_save_in_main_path}': {e}. Proceeding with download.") + max_retries = 3 retry_delay = 5 downloaded_size_bytes = 0 calculated_file_hash = None downloaded_part_file_path = None - total_size_bytes = 0 download_successful_flag = False last_exception_for_retry_later = None is_permanent_error = False data_to_write_io = None - response_for_this_attempt = None for attempt_num_single_stream in range(max_retries + 1): - response_for_this_attempt = None + response = None if self._check_pause(f"File download attempt for '{api_original_filename}'"): break if self.check_cancel() or (skip_event and skip_event.is_set()): break try: @@ -519,12 +538,24 @@ class PostProcessorWorker: new_url = self._find_valid_subdomain(current_url_to_try) if new_url != current_url_to_try: self.logger(f" Retrying with new URL: {new_url}") - file_url = new_url # Update the main file_url for subsequent retries + file_url = new_url + response.close() # Close the old response response = requests.get(new_url, headers=file_download_headers, timeout=(30, 300), stream=True, cookies=cookies_to_use_for_file) - response.raise_for_status() + + # --- REVISED AND MOVED SIZE CHECK LOGIC --- total_size_bytes = int(response.headers.get('Content-Length', 0)) + + if self.skip_file_size_mb is not None: + if total_size_bytes > 0: + file_size_mb = total_size_bytes / (1024 * 1024) + if file_size_mb < self.skip_file_size_mb: + self.logger(f" -> Skip File (Size): '{api_original_filename}' is {file_size_mb:.2f} MB, which is smaller than the {self.skip_file_size_mb} MB limit.") + return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None + # If Content-Length is missing, we can't check, so we no longer log a warning here and just proceed. + # --- END OF REVISED LOGIC --- + num_parts_for_file = min(self.multipart_parts_count, MAX_PARTS_FOR_MULTIPART_DOWNLOAD) file_is_eligible_by_scope = False @@ -548,9 +579,7 @@ class PostProcessorWorker: if self._check_pause(f"Multipart decision for '{api_original_filename}'"): break if attempt_multipart: - if response_for_this_attempt: - response_for_this_attempt.close() - response_for_this_attempt = None + response.close() # Close the initial connection before starting multipart mp_save_path_for_unique_part_stem_arg = os.path.join(target_folder_path, f"{unique_part_file_stem_on_disk}{temp_file_ext_for_unique_part}") mp_success, mp_bytes, mp_hash, mp_file_handle = download_file_in_parts( file_url, mp_save_path_for_unique_part_stem_arg, total_size_bytes, num_parts_for_file, file_download_headers, api_original_filename, @@ -576,7 +605,6 @@ class PostProcessorWorker: current_attempt_downloaded_bytes = 0 md5_hasher = hashlib.md5() last_progress_time = time.time() - single_stream_exception = None try: with open(current_single_stream_part_path, 'wb') as f_part: for chunk in response.iter_content(chunk_size=1 * 1024 * 1024): @@ -643,8 +671,8 @@ class PostProcessorWorker: is_permanent_error = True break finally: - if response_for_this_attempt: - response_for_this_attempt.close() + if response: + response.close() self._emit_signal('file_download_status', False) final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes @@ -1935,7 +1963,9 @@ class DownloadThread(QThread): project_root_dir=None, processed_post_ids=None, start_offset=0, - fetch_first=False): + fetch_first=False, + skip_file_size_mb=None + ): super().__init__() self.api_url_input = api_url_input self.output_dir = output_dir @@ -2002,6 +2032,7 @@ class DownloadThread(QThread): self.processed_post_ids_set = set(processed_post_ids) if processed_post_ids is not None else set() self.start_offset = start_offset self.fetch_first = fetch_first + self.skip_file_size_mb = skip_file_size_mb if self.compress_images and Image is None: self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).") @@ -2122,6 +2153,7 @@ class DownloadThread(QThread): 'single_pdf_mode': self.single_pdf_mode, 'multipart_parts_count': self.multipart_parts_count, 'multipart_min_size_mb': self.multipart_min_size_mb, + 'skip_file_size_mb': self.skip_file_size_mb, 'project_root_dir': self.project_root_dir, } diff --git a/src/ui/main_window.py b/src/ui/main_window.py index fb50fce..421cb03 100644 --- a/src/ui/main_window.py +++ b/src/ui/main_window.py @@ -281,7 +281,7 @@ class DownloaderApp (QWidget ): self.download_location_label_widget = None self.remove_from_filename_label_widget = None self.skip_words_label_widget = None - self.setWindowTitle("Kemono Downloader v6.3.1") + self.setWindowTitle("Kemono Downloader v6.4.1") setup_ui(self) self._connect_signals() self.log_signal.emit("ℹ️ Local API server functionality has been removed.") @@ -3354,7 +3354,8 @@ class DownloaderApp (QWidget ): 'pause_event': self.pause_event, 'cancellation_event': self.cancellation_event, 'downloaded_files': self.downloaded_files, 'downloaded_file_hashes': self.downloaded_file_hashes, 'downloaded_files_lock': self.downloaded_files_lock, 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, - 'skip_words_list': [word.strip().lower() for word in self.skip_words_input.text().strip().split(',') if word.strip()], + 'skip_words_list': [part.strip().lower() for part in self.skip_words_input.text().strip().split(',') if not part.strip().startswith('[')], + 'skip_file_size_mb': next((int(re.search(r'\[(\d+)\]', part).group(1)) for part in self.skip_words_input.text().strip().split(',') if re.fullmatch(r'\[\d+\]', part.strip())), None), 'skip_words_scope': self.get_skip_words_scope(), 'char_filter_scope': self.get_char_filter_scope(), 'remove_from_filename_words_list': [word.strip() for word in self.remove_from_filename_input.text().strip().split(',') if word.strip()], 'scan_content_for_images': self.scan_content_images_checkbox.isChecked(), @@ -3523,8 +3524,19 @@ class DownloaderApp (QWidget ): self.thread_count_input.selectAll() return False - raw_skip_words = self.skip_words_input.text().strip() - skip_words_list = [word.strip().lower() for word in raw_skip_words.split(',') if word.strip()] + raw_skip_words_text = self.skip_words_input.text().strip() + skip_words_parts = [part.strip() for part in raw_skip_words_text.split(',') if part.strip()] + skip_words_list = [] + skip_file_size_mb = None + size_pattern = re.compile(r'\[(\d+)\]') + + for part in skip_words_parts: + match = size_pattern.fullmatch(part) + if match: + skip_file_size_mb = int(match.group(1)) + self.log_signal.emit(f"ℹ️ File size skip rule found: Will skip files smaller than {skip_file_size_mb} MB.") + else: + skip_words_list.append(part.lower()) raw_remove_filename_words = self.remove_from_filename_input.text().strip() if hasattr(self, 'remove_from_filename_input') else "" allow_multipart = self.allow_multipart_download_setting @@ -3891,6 +3903,7 @@ class DownloaderApp (QWidget ): 'downloaded_file_hashes': self.downloaded_file_hashes, 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, 'skip_words_list': skip_words_list, + 'skip_file_size_mb': skip_file_size_mb, 'skip_words_scope': current_skip_words_scope, 'remove_from_filename_words_list': remove_from_filename_words_list, 'char_filter_scope': current_char_filter_scope, @@ -5484,7 +5497,8 @@ class DownloaderApp (QWidget ): 'downloaded_files_lock': self.downloaded_files_lock, 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, 'dynamic_character_filter_holder': self.dynamic_character_filter_holder, - 'skip_words_list': [word.strip().lower() for word in self.skip_words_input.text().strip().split(',') if word.strip()], + 'skip_words_list': [part.strip().lower() for part in self.skip_words_input.text().strip().split(',') if not part.strip().startswith('[')], + 'skip_file_size_mb': next((int(re.search(r'\[(\d+)\]', part).group(1)) for part in self.skip_words_input.text().strip().split(',') if re.fullmatch(r'\[\d+\]', part.strip())), None), 'skip_words_scope': self.get_skip_words_scope(), 'show_external_links': self.external_links_checkbox.isChecked(), 'extract_links_only': self.radio_only_links.isChecked(),