1 Commits

Author SHA1 Message Date
Yuvi9587
56a83195b2 Update readme.md 2025-08-11 09:31:53 -07:00
4 changed files with 39 additions and 143 deletions

View File

@@ -99,7 +99,7 @@ Built with PyQt5, this tool is designed for users who want deep filtering capabi
### Install Dependencies ### Install Dependencies
```bash ```bash
pip install PyQt5 requests Pillow mega.py fpdf2 python-docx pip install PyQt5 requests Pillow mega.py fpdf python-docx
``` ```
### Running the Application ### Running the Application

View File

@@ -12,6 +12,7 @@ from ..config.constants import (
def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None, cookies_dict=None): def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
""" """
Fetches a single page of posts from the API with robust retry logic. Fetches a single page of posts from the API with robust retry logic.
NEW: Requests only essential fields to keep the response size small and reliable.
""" """
if cancellation_event and cancellation_event.is_set(): if cancellation_event and cancellation_event.is_set():
raise RuntimeError("Fetch operation cancelled by user.") raise RuntimeError("Fetch operation cancelled by user.")
@@ -32,7 +33,7 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
if cancellation_event and cancellation_event.is_set(): if cancellation_event and cancellation_event.is_set():
raise RuntimeError("Fetch operation cancelled by user during retry loop.") raise RuntimeError("Fetch operation cancelled by user during retry loop.")
log_message = f" Fetching post list: {paginated_url} (Page approx. {offset // 50 + 1})" log_message = f" Fetching post list: {api_url_base}?o={offset} (Page approx. {offset // 50 + 1})"
if attempt > 0: if attempt > 0:
log_message += f" (Attempt {attempt + 1}/{max_retries})" log_message += f" (Attempt {attempt + 1}/{max_retries})"
logger(log_message) logger(log_message)
@@ -40,23 +41,9 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
try: try:
response = requests.get(paginated_url, headers=headers, timeout=(15, 60), cookies=cookies_dict) response = requests.get(paginated_url, headers=headers, timeout=(15, 60), cookies=cookies_dict)
response.raise_for_status() response.raise_for_status()
response.encoding = 'utf-8'
return response.json() return response.json()
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
# Handle 403 error on the FIRST page as a rate limit/block
if e.response is not None and e.response.status_code == 403 and offset == 0:
logger(" ❌ Access Denied (403 Forbidden) on the first page.")
logger(" This is likely a rate limit or a Cloudflare block.")
logger(" 💡 SOLUTION: Wait a while, use a VPN, or provide a valid session cookie.")
return [] # Stop the process gracefully
# Handle 400 error as the end of pages
if e.response is not None and e.response.status_code == 400:
logger(f" ✅ Reached end of posts (API returned 400 Bad Request for offset {offset}).")
return []
# Handle all other network errors with a retry
logger(f" ⚠️ Retryable network error on page fetch (Attempt {attempt + 1}): {e}") logger(f" ⚠️ Retryable network error on page fetch (Attempt {attempt + 1}): {e}")
if attempt < max_retries - 1: if attempt < max_retries - 1:
delay = retry_delay * (2 ** attempt) delay = retry_delay * (2 ** attempt)
@@ -78,6 +65,7 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
raise RuntimeError(f"Failed to fetch page {paginated_url} after all attempts.") raise RuntimeError(f"Failed to fetch page {paginated_url} after all attempts.")
def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logger, cookies_dict=None): def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logger, cookies_dict=None):
""" """
--- NEW FUNCTION --- --- NEW FUNCTION ---
@@ -93,11 +81,8 @@ def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logge
response_body += chunk response_body += chunk
full_post_data = json.loads(response_body) full_post_data = json.loads(response_body)
if isinstance(full_post_data, list) and full_post_data: if isinstance(full_post_data, list) and full_post_data:
return full_post_data[0] return full_post_data[0]
if isinstance(full_post_data, dict) and 'post' in full_post_data:
return full_post_data['post']
return full_post_data return full_post_data
except Exception as e: except Exception as e:
@@ -116,7 +101,6 @@ def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger,
try: try:
response = requests.get(comments_api_url, headers=headers, timeout=(10, 30), cookies=cookies_dict) response = requests.get(comments_api_url, headers=headers, timeout=(10, 30), cookies=cookies_dict)
response.raise_for_status() response.raise_for_status()
response.encoding = 'utf-8'
return response.json() return response.json()
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
raise RuntimeError(f"Error fetching comments for post {post_id}: {e}") raise RuntimeError(f"Error fetching comments for post {post_id}: {e}")
@@ -139,16 +123,10 @@ def download_from_api(
processed_post_ids=None, processed_post_ids=None,
fetch_all_first=False fetch_all_first=False
): ):
# FIX: Define api_domain FIRST, before it is used in the headers
parsed_input_url_for_domain = urlparse(api_url_input)
api_domain = parsed_input_url_for_domain.netloc
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'User-Agent': 'Mozilla/5.0',
'Referer': f'https://{api_domain}/', 'Accept': 'application/json'
'Accept': 'text/css'
} }
if processed_post_ids is None: if processed_post_ids is None:
processed_post_ids = set() processed_post_ids = set()
else: else:
@@ -160,11 +138,15 @@ def download_from_api(
logger(" Download_from_api cancelled at start.") logger(" Download_from_api cancelled at start.")
return return
# The code that defined api_domain was moved from here to the top of the function parsed_input_url_for_domain = urlparse(api_url_input)
api_domain = parsed_input_url_for_domain.netloc
# --- START: MODIFIED LOGIC ---
# This list is updated to include the new .cr and .st mirrors for validation.
if not any(d in api_domain.lower() for d in ['kemono.su', 'kemono.party', 'kemono.cr', 'coomer.su', 'coomer.party', 'coomer.st']): if not any(d in api_domain.lower() for d in ['kemono.su', 'kemono.party', 'kemono.cr', 'coomer.su', 'coomer.party', 'coomer.st']):
logger(f"⚠️ Unrecognized domain '{api_domain}' from input URL. Defaulting to kemono.su for API calls.") logger(f"⚠️ Unrecognized domain '{api_domain}' from input URL. Defaulting to kemono.su for API calls.")
api_domain = "kemono.su" api_domain = "kemono.su"
# --- END: MODIFIED LOGIC ---
cookies_for_api = None cookies_for_api = None
if use_cookie and app_base_dir: if use_cookie and app_base_dir:
@@ -178,7 +160,6 @@ def download_from_api(
try: try:
direct_response = requests.get(direct_post_api_url, headers=headers, timeout=(10, 30), cookies=cookies_for_api) direct_response = requests.get(direct_post_api_url, headers=headers, timeout=(10, 30), cookies=cookies_for_api)
direct_response.raise_for_status() direct_response.raise_for_status()
direct_response.encoding = 'utf-8'
direct_post_data = direct_response.json() direct_post_data = direct_response.json()
if isinstance(direct_post_data, list) and direct_post_data: if isinstance(direct_post_data, list) and direct_post_data:
direct_post_data = direct_post_data[0] direct_post_data = direct_post_data[0]
@@ -204,7 +185,7 @@ def download_from_api(
is_manga_mode_fetch_all_and_sort_oldest_first = manga_mode and (manga_filename_style_for_sort_check != STYLE_DATE_POST_TITLE) and not target_post_id is_manga_mode_fetch_all_and_sort_oldest_first = manga_mode and (manga_filename_style_for_sort_check != STYLE_DATE_POST_TITLE) and not target_post_id
should_fetch_all = fetch_all_first or is_manga_mode_fetch_all_and_sort_oldest_first should_fetch_all = fetch_all_first or is_manga_mode_fetch_all_and_sort_oldest_first
api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/posts" api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}"
page_size = 50 page_size = 50
if is_manga_mode_fetch_all_and_sort_oldest_first: if is_manga_mode_fetch_all_and_sort_oldest_first:
logger(f" Manga Mode (Style: {manga_filename_style_for_sort_check if manga_filename_style_for_sort_check else 'Default'} - Oldest First Sort Active): Fetching all posts to sort by date...") logger(f" Manga Mode (Style: {manga_filename_style_for_sort_check if manga_filename_style_for_sort_check else 'Default'} - Oldest First Sort Active): Fetching all posts to sort by date...")
@@ -375,4 +356,3 @@ def download_from_api(
time.sleep(0.6) time.sleep(0.6)
if target_post_id and not processed_target_post_flag and not (cancellation_event and cancellation_event.is_set()): if target_post_id and not processed_target_post_flag and not (cancellation_event and cancellation_event.is_set()):
logger(f"❌ Target post {target_post_id} could not be found after checking all relevant pages (final check after loop).") logger(f"❌ Target post {target_post_id} could not be found after checking all relevant pages (final check after loop).")

View File

@@ -37,7 +37,7 @@ try:
except ImportError: except ImportError:
Document = None Document = None
from PyQt5 .QtCore import Qt ,QThread ,pyqtSignal ,QMutex ,QMutexLocker ,QObject ,QTimer ,QSettings ,QStandardPaths ,QCoreApplication ,QUrl ,QSize ,QProcess from PyQt5 .QtCore import Qt ,QThread ,pyqtSignal ,QMutex ,QMutexLocker ,QObject ,QTimer ,QSettings ,QStandardPaths ,QCoreApplication ,QUrl ,QSize ,QProcess
from .api_client import download_from_api, fetch_post_comments, fetch_single_post_data from .api_client import download_from_api, fetch_post_comments
from ..services.multipart_downloader import download_file_in_parts, MULTIPART_DOWNLOADER_AVAILABLE from ..services.multipart_downloader import download_file_in_parts, MULTIPART_DOWNLOADER_AVAILABLE
from ..services.drive_downloader import ( from ..services.drive_downloader import (
download_mega_file, download_gdrive_file, download_dropbox_file download_mega_file, download_gdrive_file, download_dropbox_file
@@ -124,8 +124,7 @@ class PostProcessorWorker:
processed_post_ids=None, processed_post_ids=None,
multipart_scope='both', multipart_scope='both',
multipart_parts_count=4, multipart_parts_count=4,
multipart_min_size_mb=100, multipart_min_size_mb=100
skip_file_size_mb=None
): ):
self.post = post_data self.post = post_data
self.download_root = download_root self.download_root = download_root
@@ -190,7 +189,6 @@ class PostProcessorWorker:
self.multipart_scope = multipart_scope self.multipart_scope = multipart_scope
self.multipart_parts_count = multipart_parts_count self.multipart_parts_count = multipart_parts_count
self.multipart_min_size_mb = multipart_min_size_mb self.multipart_min_size_mb = multipart_min_size_mb
self.skip_file_size_mb = skip_file_size_mb
if self.compress_images and Image is None: if self.compress_images and Image is None:
self.logger("⚠️ Image compression disabled: Pillow library not found.") self.logger("⚠️ Image compression disabled: Pillow library not found.")
self.compress_images = False self.compress_images = False
@@ -270,7 +268,7 @@ class PostProcessorWorker:
return 0, 1, "", False, FILE_DOWNLOAD_STATUS_SKIPPED, None return 0, 1, "", False, FILE_DOWNLOAD_STATUS_SKIPPED, None
file_download_headers = { file_download_headers = {
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
'Referer': post_page_url 'Referer': post_page_url
} }
@@ -279,24 +277,6 @@ class PostProcessorWorker:
if self.use_cookie: if self.use_cookie:
cookies_to_use_for_file = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger) cookies_to_use_for_file = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger)
if self.skip_file_size_mb is not None:
api_original_filename_for_size_check = file_info.get('_original_name_for_log', file_info.get('name'))
try:
# Use a stream=True HEAD request to get headers without downloading the body
with requests.head(file_url, headers=file_download_headers, timeout=15, cookies=cookies_to_use_for_file, allow_redirects=True) as head_response:
head_response.raise_for_status()
content_length = head_response.headers.get('Content-Length')
if content_length:
file_size_bytes = int(content_length)
file_size_mb = file_size_bytes / (1024 * 1024)
if file_size_mb < self.skip_file_size_mb:
self.logger(f" -> Skip File (Size): '{api_original_filename_for_size_check}' is {file_size_mb:.2f} MB, which is smaller than the {self.skip_file_size_mb} MB limit.")
return 0, 1, api_original_filename_for_size_check, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
else:
self.logger(f" ⚠️ Could not determine file size for '{api_original_filename_for_size_check}' to check against size limit. Proceeding with download.")
except requests.RequestException as e:
self.logger(f" ⚠️ Could not fetch file headers to check size for '{api_original_filename_for_size_check}': {e}. Proceeding with download.")
api_original_filename = file_info.get('_original_name_for_log', file_info.get('name')) api_original_filename = file_info.get('_original_name_for_log', file_info.get('name'))
filename_to_save_in_main_path = "" filename_to_save_in_main_path = ""
if forced_filename_override: if forced_filename_override:
@@ -508,18 +488,19 @@ class PostProcessorWorker:
except requests.RequestException as e: except requests.RequestException as e:
self.logger(f" ⚠️ Could not verify size of existing file '{filename_to_save_in_main_path}': {e}. Proceeding with download.") self.logger(f" ⚠️ Could not verify size of existing file '{filename_to_save_in_main_path}': {e}. Proceeding with download.")
max_retries = 3
retry_delay = 5 retry_delay = 5
downloaded_size_bytes = 0 downloaded_size_bytes = 0
calculated_file_hash = None calculated_file_hash = None
downloaded_part_file_path = None downloaded_part_file_path = None
total_size_bytes = 0
download_successful_flag = False download_successful_flag = False
last_exception_for_retry_later = None last_exception_for_retry_later = None
is_permanent_error = False is_permanent_error = False
data_to_write_io = None data_to_write_io = None
response_for_this_attempt = None
for attempt_num_single_stream in range(max_retries + 1): for attempt_num_single_stream in range(max_retries + 1):
response = None response_for_this_attempt = None
if self._check_pause(f"File download attempt for '{api_original_filename}'"): break if self._check_pause(f"File download attempt for '{api_original_filename}'"): break
if self.check_cancel() or (skip_event and skip_event.is_set()): break if self.check_cancel() or (skip_event and skip_event.is_set()): break
try: try:
@@ -538,24 +519,12 @@ class PostProcessorWorker:
new_url = self._find_valid_subdomain(current_url_to_try) new_url = self._find_valid_subdomain(current_url_to_try)
if new_url != current_url_to_try: if new_url != current_url_to_try:
self.logger(f" Retrying with new URL: {new_url}") self.logger(f" Retrying with new URL: {new_url}")
file_url = new_url file_url = new_url # Update the main file_url for subsequent retries
response.close() # Close the old response
response = requests.get(new_url, headers=file_download_headers, timeout=(30, 300), stream=True, cookies=cookies_to_use_for_file) response = requests.get(new_url, headers=file_download_headers, timeout=(30, 300), stream=True, cookies=cookies_to_use_for_file)
response.raise_for_status() response.raise_for_status()
# --- REVISED AND MOVED SIZE CHECK LOGIC ---
total_size_bytes = int(response.headers.get('Content-Length', 0)) total_size_bytes = int(response.headers.get('Content-Length', 0))
if self.skip_file_size_mb is not None:
if total_size_bytes > 0:
file_size_mb = total_size_bytes / (1024 * 1024)
if file_size_mb < self.skip_file_size_mb:
self.logger(f" -> Skip File (Size): '{api_original_filename}' is {file_size_mb:.2f} MB, which is smaller than the {self.skip_file_size_mb} MB limit.")
return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
# If Content-Length is missing, we can't check, so we no longer log a warning here and just proceed.
# --- END OF REVISED LOGIC ---
num_parts_for_file = min(self.multipart_parts_count, MAX_PARTS_FOR_MULTIPART_DOWNLOAD) num_parts_for_file = min(self.multipart_parts_count, MAX_PARTS_FOR_MULTIPART_DOWNLOAD)
file_is_eligible_by_scope = False file_is_eligible_by_scope = False
@@ -579,7 +548,9 @@ class PostProcessorWorker:
if self._check_pause(f"Multipart decision for '{api_original_filename}'"): break if self._check_pause(f"Multipart decision for '{api_original_filename}'"): break
if attempt_multipart: if attempt_multipart:
response.close() # Close the initial connection before starting multipart if response_for_this_attempt:
response_for_this_attempt.close()
response_for_this_attempt = None
mp_save_path_for_unique_part_stem_arg = os.path.join(target_folder_path, f"{unique_part_file_stem_on_disk}{temp_file_ext_for_unique_part}") mp_save_path_for_unique_part_stem_arg = os.path.join(target_folder_path, f"{unique_part_file_stem_on_disk}{temp_file_ext_for_unique_part}")
mp_success, mp_bytes, mp_hash, mp_file_handle = download_file_in_parts( mp_success, mp_bytes, mp_hash, mp_file_handle = download_file_in_parts(
file_url, mp_save_path_for_unique_part_stem_arg, total_size_bytes, num_parts_for_file, file_download_headers, api_original_filename, file_url, mp_save_path_for_unique_part_stem_arg, total_size_bytes, num_parts_for_file, file_download_headers, api_original_filename,
@@ -605,6 +576,7 @@ class PostProcessorWorker:
current_attempt_downloaded_bytes = 0 current_attempt_downloaded_bytes = 0
md5_hasher = hashlib.md5() md5_hasher = hashlib.md5()
last_progress_time = time.time() last_progress_time = time.time()
single_stream_exception = None
try: try:
with open(current_single_stream_part_path, 'wb') as f_part: with open(current_single_stream_part_path, 'wb') as f_part:
for chunk in response.iter_content(chunk_size=1 * 1024 * 1024): for chunk in response.iter_content(chunk_size=1 * 1024 * 1024):
@@ -671,8 +643,8 @@ class PostProcessorWorker:
is_permanent_error = True is_permanent_error = True
break break
finally: finally:
if response: if response_for_this_attempt:
response.close() response_for_this_attempt.close()
self._emit_signal('file_download_status', False) self._emit_signal('file_download_status', False)
final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes
@@ -876,37 +848,6 @@ class PostProcessorWorker:
post_data = self.post # Reference to the post object post_data = self.post # Reference to the post object
log_prefix = "Post" log_prefix = "Post"
# --- FIX: FETCH FULL POST DATA IF CONTENT IS MISSING BUT NEEDED ---
content_is_needed = (
self.show_external_links or
self.extract_links_only or
self.scan_content_for_images or
(self.filter_mode == 'text_only' and self.text_only_scope == 'content')
)
if content_is_needed and self.post.get('content') is None and self.service != 'discord':
self.logger(f" Post {post_id} is missing 'content' field, fetching full data...")
parsed_url = urlparse(self.api_url_input)
api_domain = parsed_url.netloc
headers = {'User-Agent': 'Mozilla/5.0'}
cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain)
full_post_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies)
if full_post_data:
self.logger(" ✅ Full post data fetched successfully.")
# Update the worker's post object with the complete data
self.post = full_post_data
# Re-initialize local variables from the new, complete post data
post_title = self.post.get('title', '') or 'untitled_post'
post_main_file_info = self.post.get('file')
post_attachments = self.post.get('attachments', [])
post_content_html = self.post.get('content', '')
post_data = self.post
else:
self.logger(f" ⚠️ Failed to fetch full content for post {post_id}. Content-dependent features may not work for this post.")
# --- END FIX ---
# 2. SHARED PROCESSING LOGIC: The rest of the function now uses the consistent variables from above. # 2. SHARED PROCESSING LOGIC: The rest of the function now uses the consistent variables from above.
result_tuple = (0, 0, [], [], [], None, None) result_tuple = (0, 0, [], [], [], None, None)
total_downloaded_this_post = 0 total_downloaded_this_post = 0
@@ -1317,6 +1258,7 @@ class PostProcessorWorker:
parsed_url = urlparse(self.api_url_input) parsed_url = urlparse(self.api_url_input)
api_domain = parsed_url.netloc api_domain = parsed_url.netloc
cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain) cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain)
from .api_client import fetch_single_post_data
full_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies) full_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies)
if full_data: if full_data:
final_post_data = full_data final_post_data = full_data
@@ -1993,9 +1935,7 @@ class DownloadThread(QThread):
project_root_dir=None, project_root_dir=None,
processed_post_ids=None, processed_post_ids=None,
start_offset=0, start_offset=0,
fetch_first=False, fetch_first=False):
skip_file_size_mb=None
):
super().__init__() super().__init__()
self.api_url_input = api_url_input self.api_url_input = api_url_input
self.output_dir = output_dir self.output_dir = output_dir
@@ -2062,7 +2002,6 @@ class DownloadThread(QThread):
self.processed_post_ids_set = set(processed_post_ids) if processed_post_ids is not None else set() self.processed_post_ids_set = set(processed_post_ids) if processed_post_ids is not None else set()
self.start_offset = start_offset self.start_offset = start_offset
self.fetch_first = fetch_first self.fetch_first = fetch_first
self.skip_file_size_mb = skip_file_size_mb
if self.compress_images and Image is None: if self.compress_images and Image is None:
self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).") self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
@@ -2183,7 +2122,6 @@ class DownloadThread(QThread):
'single_pdf_mode': self.single_pdf_mode, 'single_pdf_mode': self.single_pdf_mode,
'multipart_parts_count': self.multipart_parts_count, 'multipart_parts_count': self.multipart_parts_count,
'multipart_min_size_mb': self.multipart_min_size_mb, 'multipart_min_size_mb': self.multipart_min_size_mb,
'skip_file_size_mb': self.skip_file_size_mb,
'project_root_dir': self.project_root_dir, 'project_root_dir': self.project_root_dir,
} }

View File

@@ -281,7 +281,7 @@ class DownloaderApp (QWidget ):
self.download_location_label_widget = None self.download_location_label_widget = None
self.remove_from_filename_label_widget = None self.remove_from_filename_label_widget = None
self.skip_words_label_widget = None self.skip_words_label_widget = None
self.setWindowTitle("Kemono Downloader v6.4.3") self.setWindowTitle("Kemono Downloader v6.3.1")
setup_ui(self) setup_ui(self)
self._connect_signals() self._connect_signals()
self.log_signal.emit(" Local API server functionality has been removed.") self.log_signal.emit(" Local API server functionality has been removed.")
@@ -688,12 +688,8 @@ class DownloaderApp (QWidget ):
return return
self.fetched_posts_for_download = fetched_posts self.fetched_posts_for_download = fetched_posts
self.is_ready_to_download_fetched = True self.is_ready_to_download_fetched = True # <-- ADD THIS LINE
self.log_signal.emit(f"✅ Fetch complete. Found {len(self.fetched_posts_for_download)} posts.") self.log_signal.emit(f"✅ Fetch complete. Found {len(self.fetched_posts_for_download)} posts.")
self.log_signal.emit("=" * 40)
self.log_signal.emit("✅ Stage 1 complete. All post data has been fetched.")
self.log_signal.emit(" 💡 You can now disconnect your VPN (if used) before starting the download.")
self.log_signal.emit(" Press the 'Start Download' button to begin Stage 2: Downloading files.")
self.progress_label.setText(f"Found {len(self.fetched_posts_for_download)} posts. Ready to download.") self.progress_label.setText(f"Found {len(self.fetched_posts_for_download)} posts. Ready to download.")
self._update_button_states_and_connections() self._update_button_states_and_connections()
@@ -704,9 +700,7 @@ class DownloaderApp (QWidget ):
Initiates the download of the posts that were previously fetched. Initiates the download of the posts that were previously fetched.
""" """
self.is_ready_to_download_fetched = False # Reset the state flag self.is_ready_to_download_fetched = False # Reset the state flag
self.log_signal.emit("=" * 40) self.log_signal.emit(f"🚀 Starting download of {len(self.fetched_posts_for_download)} fetched posts...")
self.log_signal.emit(f"🚀 Starting Stage 2: Downloading files for {len(self.fetched_posts_for_download)} fetched posts.")
self.log_signal.emit(" 💡 If you disconnected your VPN, downloads will now use your regular connection.")
# Manually set the UI to a "downloading" state for reliability # Manually set the UI to a "downloading" state for reliability
self.set_ui_enabled(False) self.set_ui_enabled(False)
@@ -3360,8 +3354,7 @@ class DownloaderApp (QWidget ):
'pause_event': self.pause_event, 'cancellation_event': self.cancellation_event, 'pause_event': self.pause_event, 'cancellation_event': self.cancellation_event,
'downloaded_files': self.downloaded_files, 'downloaded_file_hashes': self.downloaded_file_hashes, 'downloaded_files': self.downloaded_files, 'downloaded_file_hashes': self.downloaded_file_hashes,
'downloaded_files_lock': self.downloaded_files_lock, 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, 'downloaded_files_lock': self.downloaded_files_lock, 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock,
'skip_words_list': [part.strip().lower() for part in self.skip_words_input.text().strip().split(',') if part.strip() and not part.strip().startswith('[')], 'skip_words_list': [word.strip().lower() for word in self.skip_words_input.text().strip().split(',') if word.strip()],
'skip_file_size_mb': next((int(re.search(r'\[(\d+)\]', part).group(1)) for part in self.skip_words_input.text().strip().split(',') if re.fullmatch(r'\[\d+\]', part.strip())), None),
'skip_words_scope': self.get_skip_words_scope(), 'char_filter_scope': self.get_char_filter_scope(), 'skip_words_scope': self.get_skip_words_scope(), 'char_filter_scope': self.get_char_filter_scope(),
'remove_from_filename_words_list': [word.strip() for word in self.remove_from_filename_input.text().strip().split(',') if word.strip()], 'remove_from_filename_words_list': [word.strip() for word in self.remove_from_filename_input.text().strip().split(',') if word.strip()],
'scan_content_for_images': self.scan_content_images_checkbox.isChecked(), 'scan_content_for_images': self.scan_content_images_checkbox.isChecked(),
@@ -3530,19 +3523,8 @@ class DownloaderApp (QWidget ):
self.thread_count_input.selectAll() self.thread_count_input.selectAll()
return False return False
raw_skip_words_text = self.skip_words_input.text().strip() raw_skip_words = self.skip_words_input.text().strip()
skip_words_parts = [part.strip() for part in raw_skip_words_text.split(',') if part.strip()] skip_words_list = [word.strip().lower() for word in raw_skip_words.split(',') if word.strip()]
skip_words_list = []
skip_file_size_mb = None
size_pattern = re.compile(r'\[(\d+)\]')
for part in skip_words_parts:
match = size_pattern.fullmatch(part)
if match:
skip_file_size_mb = int(match.group(1))
self.log_signal.emit(f" File size skip rule found: Will skip files smaller than {skip_file_size_mb} MB.")
else:
skip_words_list.append(part.lower())
raw_remove_filename_words = self.remove_from_filename_input.text().strip() if hasattr(self, 'remove_from_filename_input') else "" raw_remove_filename_words = self.remove_from_filename_input.text().strip() if hasattr(self, 'remove_from_filename_input') else ""
allow_multipart = self.allow_multipart_download_setting allow_multipart = self.allow_multipart_download_setting
@@ -3909,7 +3891,6 @@ class DownloaderApp (QWidget ):
'downloaded_file_hashes': self.downloaded_file_hashes, 'downloaded_file_hashes': self.downloaded_file_hashes,
'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock,
'skip_words_list': skip_words_list, 'skip_words_list': skip_words_list,
'skip_file_size_mb': skip_file_size_mb,
'skip_words_scope': current_skip_words_scope, 'skip_words_scope': current_skip_words_scope,
'remove_from_filename_words_list': remove_from_filename_words_list, 'remove_from_filename_words_list': remove_from_filename_words_list,
'char_filter_scope': current_char_filter_scope, 'char_filter_scope': current_char_filter_scope,
@@ -3960,9 +3941,7 @@ class DownloaderApp (QWidget ):
self.last_start_download_args = args_template.copy() self.last_start_download_args = args_template.copy()
if fetch_first_enabled and not post_id_from_url: if fetch_first_enabled and not post_id_from_url:
self.log_signal.emit("=" * 40) self.log_signal.emit("🚀 Starting Stage 1: Fetching all pages...")
self.log_signal.emit("🚀 'Fetch First' mode is active. Starting Stage 1: Fetching all post data.")
self.log_signal.emit(" 💡 If you are using a VPN for this stage, ensure it is connected now.")
self.is_fetching_only = True self.is_fetching_only = True
self.set_ui_enabled(False) self.set_ui_enabled(False)
self._update_button_states_and_connections() self._update_button_states_and_connections()
@@ -5505,8 +5484,7 @@ class DownloaderApp (QWidget ):
'downloaded_files_lock': self.downloaded_files_lock, 'downloaded_files_lock': self.downloaded_files_lock,
'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock,
'dynamic_character_filter_holder': self.dynamic_character_filter_holder, 'dynamic_character_filter_holder': self.dynamic_character_filter_holder,
'skip_words_list': [part.strip().lower() for part in self.skip_words_input.text().strip().split(',') if part.strip() and not part.strip().startswith('[')], 'skip_words_list': [word.strip().lower() for word in self.skip_words_input.text().strip().split(',') if word.strip()],
'skip_file_size_mb': next((int(re.search(r'\[(\d+)\]', part).group(1)) for part in self.skip_words_input.text().strip().split(',') if re.fullmatch(r'\[\d+\]', part.strip())), None),
'skip_words_scope': self.get_skip_words_scope(), 'skip_words_scope': self.get_skip_words_scope(),
'show_external_links': self.external_links_checkbox.isChecked(), 'show_external_links': self.external_links_checkbox.isChecked(),
'extract_links_only': self.radio_only_links.isChecked(), 'extract_links_only': self.radio_only_links.isChecked(),