From 5f7b52685276e3cbf01f6e57edbc68176838af69 Mon Sep 17 00:00:00 2001 From: Yuvi9587 <114073886+Yuvi9587@users.noreply.github.com> Date: Sun, 17 Aug 2025 05:51:25 -0700 Subject: [PATCH] Commit --- src/core/api_client.py | 36 +++--- src/core/discord_client.py | 104 ++++++++------- src/core/nhentai_client.py | 45 +++++++ src/core/workers.py | 64 ++++++--- src/services/drive_downloader.py | 92 +++++++------ src/ui/main_window.py | 214 ++++++++++++++++++++++++++++--- 6 files changed, 413 insertions(+), 142 deletions(-) create mode 100644 src/core/nhentai_client.py diff --git a/src/core/api_client.py b/src/core/api_client.py index dd77236..138ad9d 100644 --- a/src/core/api_client.py +++ b/src/core/api_client.py @@ -3,6 +3,7 @@ import traceback from urllib.parse import urlparse import json import requests +import cloudscraper from ..utils.network_utils import extract_post_info, prepare_cookies_for_request from ..config.constants import ( STYLE_DATE_POST_TITLE @@ -80,26 +81,26 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logger, cookies_dict=None): """ - --- NEW FUNCTION --- - Fetches the full data, including the 'content' field, for a single post. + --- MODIFIED FUNCTION --- + Fetches the full data, including the 'content' field, for a single post using cloudscraper. """ post_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}" logger(f" Fetching full content for post ID {post_id}...") - try: - with requests.get(post_api_url, headers=headers, timeout=(15, 300), cookies=cookies_dict, stream=True) as response: - response.raise_for_status() - response_body = b"" - for chunk in response.iter_content(chunk_size=8192): - response_body += chunk - - full_post_data = json.loads(response_body) - if isinstance(full_post_data, list) and full_post_data: - return full_post_data[0] - if isinstance(full_post_data, dict) and 'post' in full_post_data: - return full_post_data['post'] - return full_post_data - + scraper = cloudscraper.create_scraper() + + try: + response = scraper.get(post_api_url, headers=headers, timeout=(15, 300), cookies=cookies_dict) + response.raise_for_status() + + full_post_data = response.json() + + if isinstance(full_post_data, list) and full_post_data: + return full_post_data[0] + if isinstance(full_post_data, dict) and 'post' in full_post_data: + return full_post_data['post'] + return full_post_data + except Exception as e: logger(f" ❌ Failed to fetch full content for post {post_id}: {e}") return None @@ -138,8 +139,7 @@ def download_from_api( manga_filename_style_for_sort_check=None, processed_post_ids=None, fetch_all_first=False -): - # FIX: Define api_domain FIRST, before it is used in the headers + ): parsed_input_url_for_domain = urlparse(api_url_input) api_domain = parsed_input_url_for_domain.netloc diff --git a/src/core/discord_client.py b/src/core/discord_client.py index 15e5310..0153153 100644 --- a/src/core/discord_client.py +++ b/src/core/discord_client.py @@ -1,63 +1,70 @@ import time -import requests +import cloudscraper import json -from urllib.parse import urlparse -def fetch_server_channels(server_id, logger, cookies=None, cancellation_event=None, pause_event=None): +def fetch_server_channels(server_id, logger=print, cookies_dict=None): """ - Fetches the list of channels for a given Discord server ID from the Kemono API. - UPDATED to be pausable and cancellable. + Fetches all channels for a given Discord server ID from the API. + Uses cloudscraper to bypass Cloudflare. """ - domains_to_try = ["kemono.cr", "kemono.su"] - for domain in domains_to_try: - if cancellation_event and cancellation_event.is_set(): - logger(" Channel fetching cancelled by user.") - return None - while pause_event and pause_event.is_set(): - if cancellation_event and cancellation_event.is_set(): break - time.sleep(0.5) + api_url = f"https://kemono.cr/api/v1/discord/server/{server_id}" + logger(f" Fetching channels for server: {api_url}") - lookup_url = f"https://{domain}/api/v1/discord/channel/lookup/{server_id}" - logger(f" Attempting to fetch channel list from: {lookup_url}") - try: - response = requests.get(lookup_url, cookies=cookies, timeout=15) - response.raise_for_status() - channels = response.json() - if isinstance(channels, list): - logger(f" ✅ Found {len(channels)} channels for server {server_id}.") - return channels - except (requests.exceptions.RequestException, json.JSONDecodeError): - # This is a silent failure, we'll just try the next domain - pass - - logger(f" ❌ Failed to fetch channel list for server {server_id} from all available domains.") - return None + scraper = cloudscraper.create_scraper() + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Referer': f'https://kemono.cr/discord/server/{server_id}', + 'Accept': 'text/css' + } -def fetch_channel_messages(channel_id, logger, cancellation_event, pause_event, cookies=None): + try: + response = scraper.get(api_url, headers=headers, cookies=cookies_dict, timeout=30) + response.raise_for_status() + channels = response.json() + if isinstance(channels, list): + logger(f" ✅ Found {len(channels)} channels for server {server_id}.") + return channels + return None + except Exception as e: + logger(f" ❌ Error fetching server channels for {server_id}: {e}") + return None + +def fetch_channel_messages(channel_id, logger=print, cancellation_event=None, pause_event=None, cookies_dict=None): """ - Fetches all messages from a Discord channel by looping through API pages (pagination). - Uses a page size of 150 and handles the specific offset logic. + A generator that fetches all messages for a specific Discord channel, handling pagination. + Uses cloudscraper and proper headers to bypass server protection. """ - offset = 0 - page_size = 150 # Corrected page size based on your findings - api_base_url = f"https://kemono.cr/api/v1/discord/channel/{channel_id}" + scraper = cloudscraper.create_scraper() + base_url = f"https://kemono.cr/api/v1/discord/channel/{channel_id}" + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Referer': f'https://kemono.cr/discord/channel/{channel_id}', + 'Accept': 'text/css' + } - while not (cancellation_event and cancellation_event.is_set()): - if pause_event and pause_event.is_set(): - logger(" Message fetching paused...") - while pause_event.is_set(): - if cancellation_event and cancellation_event.is_set(): break - time.sleep(0.5) - logger(" Message fetching resumed.") + offset = 0 + # --- FIX: Corrected the page size for Discord API pagination --- + page_size = 150 + # --- END FIX --- + while True: if cancellation_event and cancellation_event.is_set(): + logger(" Discord message fetching cancelled.") break - - paginated_url = f"{api_base_url}?o={offset}" + if pause_event and pause_event.is_set(): + logger(" Discord message fetching paused...") + while pause_event.is_set(): + if cancellation_event and cancellation_event.is_set(): + break + time.sleep(0.5) + if not (cancellation_event and cancellation_event.is_set()): + logger(" Discord message fetching resumed.") + + paginated_url = f"{base_url}?o={offset}" logger(f" Fetching messages from API: page starting at offset {offset}") try: - response = requests.get(paginated_url, cookies=cookies, timeout=20) + response = scraper.get(paginated_url, headers=headers, cookies=cookies_dict, timeout=30) response.raise_for_status() messages_batch = response.json() @@ -73,8 +80,11 @@ def fetch_channel_messages(channel_id, logger, cancellation_event, pause_event, break offset += page_size - time.sleep(0.5) + time.sleep(0.5) # Be respectful to the API - except (requests.exceptions.RequestException, json.JSONDecodeError) as e: + except (cloudscraper.exceptions.CloudflareException, json.JSONDecodeError) as e: logger(f" ❌ Error fetching messages at offset {offset}: {e}") - break \ No newline at end of file + break + except Exception as e: + logger(f" ❌ An unexpected error occurred while fetching messages: {e}") + break diff --git a/src/core/nhentai_client.py b/src/core/nhentai_client.py new file mode 100644 index 0000000..ee7fd23 --- /dev/null +++ b/src/core/nhentai_client.py @@ -0,0 +1,45 @@ +import requests +import cloudscraper +import json + +def fetch_nhentai_gallery(gallery_id, logger=print): + """ + Fetches the metadata for a single nhentai gallery using cloudscraper to bypass Cloudflare. + + Args: + gallery_id (str or int): The ID of the nhentai gallery. + logger (function): A function to log progress and error messages. + + Returns: + dict: A dictionary containing the gallery's metadata if successful, otherwise None. + """ + api_url = f"https://nhentai.net/api/gallery/{gallery_id}" + + # Create a cloudscraper instance + scraper = cloudscraper.create_scraper() + + logger(f" Fetching nhentai gallery metadata from: {api_url}") + + try: + # Use the scraper to make the GET request + response = scraper.get(api_url, timeout=20) + + if response.status_code == 404: + logger(f" ❌ Gallery not found (404): ID {gallery_id}") + return None + + response.raise_for_status() + + gallery_data = response.json() + + if "id" in gallery_data and "media_id" in gallery_data and "images" in gallery_data: + logger(f" ✅ Successfully fetched metadata for '{gallery_data['title']['english']}'") + gallery_data['pages'] = gallery_data.pop('images')['pages'] + return gallery_data + else: + logger(" ❌ API response is missing essential keys (id, media_id, or images).") + return None + + except Exception as e: + logger(f" ❌ An error occurred while fetching gallery {gallery_id}: {e}") + return None \ No newline at end of file diff --git a/src/core/workers.py b/src/core/workers.py index 90960d6..f7c9b82 100644 --- a/src/core/workers.py +++ b/src/core/workers.py @@ -15,6 +15,8 @@ from concurrent.futures import ThreadPoolExecutor, as_completed, CancelledError, from io import BytesIO from urllib .parse import urlparse import requests +import cloudscraper + try: from PIL import Image except ImportError: @@ -58,18 +60,13 @@ def robust_clean_name(name): """A more robust function to remove illegal characters for filenames and folders.""" if not name: return "" - # Removes illegal characters for Windows, macOS, and Linux: < > : " / \ | ? * - # Also removes control characters (ASCII 0-31) which are invisible but invalid. - illegal_chars_pattern = r'[\x00-\x1f<>:"/\\|?*]' + illegal_chars_pattern = r'[\x00-\x1f<>:"/\\|?*\']' cleaned_name = re.sub(illegal_chars_pattern, '', name) - - # Remove leading/trailing spaces or periods, which can cause issues. + cleaned_name = cleaned_name.strip(' .') - - # If the name is empty after cleaning (e.g., it was only illegal chars), - # provide a safe fallback name. + if not cleaned_name: - return "untitled_folder" # Or "untitled_file" depending on context + return "untitled_folder" return cleaned_name class PostProcessorSignals (QObject ): @@ -271,7 +268,9 @@ class PostProcessorWorker: file_download_headers = { 'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', - 'Referer': post_page_url + 'Referer': post_page_url, + 'Accept': 'text/css' + } file_url = file_info.get('url') @@ -429,8 +428,26 @@ class PostProcessorWorker: self.logger(f"⚠️ Manga mode: Generated filename was empty. Using generic fallback: '{filename_to_save_in_main_path}'.") was_original_name_kept_flag = False else: - filename_to_save_in_main_path = cleaned_original_api_filename - was_original_name_kept_flag = True + is_url_like = 'http' in api_original_filename.lower() + is_too_long = len(cleaned_original_api_filename) > 100 + + if is_url_like or is_too_long: + self.logger(f" ⚠️ Original filename is a URL or too long. Generating a shorter name.") + name_hash = hashlib.md5(api_original_filename.encode()).hexdigest()[:12] + _, ext = os.path.splitext(cleaned_original_api_filename) + if not ext: + try: + path = urlparse(api_original_filename).path + ext = os.path.splitext(path)[1] or ".file" + except Exception: + ext = ".file" + + cleaned_post_title = robust_clean_name(post_title.strip() if post_title else "post")[:40] + filename_to_save_in_main_path = f"{cleaned_post_title}_{name_hash}{ext}" + was_original_name_kept_flag = False + else: + filename_to_save_in_main_path = cleaned_original_api_filename + was_original_name_kept_flag = True if self.remove_from_filename_words_list and filename_to_save_in_main_path: base_name_for_removal, ext_for_removal = os.path.splitext(filename_to_save_in_main_path) @@ -854,9 +871,7 @@ class PostProcessorWorker: return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER, details_for_failure def process(self): - # --- START: REFACTORED PROCESS METHOD --- - # 1. DATA MAPPING: Map Discord Message or Creator Post fields to a consistent set of variables. if self.service == 'discord': # For Discord, self.post is a MESSAGE object from the API. post_title = self.post.get('content', '') or f"Message {self.post.get('id', 'N/A')}" @@ -885,19 +900,26 @@ class PostProcessorWorker: ) if content_is_needed and self.post.get('content') is None and self.service != 'discord': + self.logger(f" Post {post_id} is missing 'content' field, fetching full data...") parsed_url = urlparse(self.api_url_input) api_domain = parsed_url.netloc - headers = {'User-Agent': 'Mozilla/5.0'} + creator_page_url = f"https://{api_domain}/{self.service}/user/{self.user_id}" + headers = { + 'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', + 'Referer': creator_page_url, + 'Accept': 'text/css' + + } + + cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain) full_post_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies) if full_post_data: self.logger(" ✅ Full post data fetched successfully.") - # Update the worker's post object with the complete data self.post = full_post_data - # Re-initialize local variables from the new, complete post data post_title = self.post.get('title', '') or 'untitled_post' post_main_file_info = self.post.get('file') post_attachments = self.post.get('attachments', []) @@ -905,9 +927,7 @@ class PostProcessorWorker: post_data = self.post else: self.logger(f" ⚠️ Failed to fetch full content for post {post_id}. Content-dependent features may not work for this post.") - # --- END FIX --- - # 2. SHARED PROCESSING LOGIC: The rest of the function now uses the consistent variables from above. result_tuple = (0, 0, [], [], [], None, None) total_downloaded_this_post = 0 total_skipped_this_post = 0 @@ -936,7 +956,11 @@ class PostProcessorWorker: else: post_page_url = f"https://{parsed_api_url.netloc}/{self.service}/user/{self.user_id}/post/{post_id}" - headers = {'User-Agent': 'Mozilla/5.0', 'Referer': post_page_url, 'Accept': '*/*'} + headers = { + 'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', + 'Referer': post_page_url, + 'Accept': 'text/css' + } link_pattern = re.compile(r"""]*>(.*?)""", re.IGNORECASE | re.DOTALL) effective_unwanted_keywords_for_folder_naming = self.unwanted_keywords.copy() diff --git a/src/services/drive_downloader.py b/src/services/drive_downloader.py index a8e8ef8..1f4ae90 100644 --- a/src/services/drive_downloader.py +++ b/src/services/drive_downloader.py @@ -5,9 +5,12 @@ import traceback import json import base64 import time +import zipfile from urllib.parse import urlparse, urlunparse, parse_qs, urlencode +# --- Third-party Library Imports --- import requests +import cloudscraper try: from Crypto.Cipher import AES @@ -26,12 +29,12 @@ MEGA_API_URL = "https://g.api.mega.co.nz" def _get_filename_from_headers(headers): """ Extracts a filename from the Content-Disposition header. - (This is from your original file and is kept for Dropbox downloads) """ cd = headers.get('content-disposition') if not cd: return None + # Handles both filename="file.zip" and filename*=UTF-8''file%20name.zip fname_match = re.findall('filename="?([^"]+)"?', cd) if fname_match: sanitized_name = re.sub(r'[<>:"/\\|?*]', '_', fname_match[0].strip()) @@ -39,28 +42,23 @@ def _get_filename_from_headers(headers): return None -# --- NEW: Helper functions for Mega decryption --- +# --- Helper functions for Mega decryption --- def urlb64_to_b64(s): - """Converts a URL-safe base64 string to a standard base64 string.""" s = s.replace('-', '+').replace('_', '/') s += '=' * (-len(s) % 4) return s def b64_to_bytes(s): - """Decodes a URL-safe base64 string to bytes.""" return base64.b64decode(urlb64_to_b64(s)) def bytes_to_hex(b): - """Converts bytes to a hex string.""" return b.hex() def hex_to_bytes(h): - """Converts a hex string to bytes.""" return bytes.fromhex(h) def hrk2hk(hex_raw_key): - """Derives the final AES key from the raw key components for Mega.""" key_part1 = int(hex_raw_key[0:16], 16) key_part2 = int(hex_raw_key[16:32], 16) key_part3 = int(hex_raw_key[32:48], 16) @@ -72,23 +70,20 @@ def hrk2hk(hex_raw_key): return f'{final_key_part1:016x}{final_key_part2:016x}' def decrypt_at(at_b64, key_bytes): - """Decrypts the 'at' attribute to get file metadata.""" at_bytes = b64_to_bytes(at_b64) iv = b'\0' * 16 cipher = AES.new(key_bytes, AES.MODE_CBC, iv) decrypted_at = cipher.decrypt(at_bytes) return decrypted_at.decode('utf-8').strip('\0').replace('MEGA', '') -# --- NEW: Core Logic for Mega Downloads --- +# --- Core Logic for Mega Downloads --- def get_mega_file_info(file_id, file_key, session, logger_func): - """Fetches file metadata and the temporary download URL from the Mega API.""" try: hex_raw_key = bytes_to_hex(b64_to_bytes(file_key)) hex_key = hrk2hk(hex_raw_key) key_bytes = hex_to_bytes(hex_key) - # Request file attributes payload = [{"a": "g", "p": file_id}] response = session.post(f"{MEGA_API_URL}/cs", json=payload, timeout=20) response.raise_for_status() @@ -100,13 +95,10 @@ def get_mega_file_info(file_id, file_key, session, logger_func): file_size = res_json[0]['s'] at_b64 = res_json[0]['at'] - - # Decrypt attributes to get the file name at_dec_json_str = decrypt_at(at_b64, key_bytes) at_dec_json = json.loads(at_dec_json_str) file_name = at_dec_json['n'] - # Request the temporary download URL payload = [{"a": "g", "g": 1, "p": file_id}] response = session.post(f"{MEGA_API_URL}/cs", json=payload, timeout=20) response.raise_for_status() @@ -124,19 +116,16 @@ def get_mega_file_info(file_id, file_key, session, logger_func): return None def download_and_decrypt_mega_file(info, download_path, logger_func): - """Downloads the file and decrypts it chunk by chunk, reporting progress.""" file_name = info['file_name'] file_size = info['file_size'] dl_url = info['dl_url'] hex_raw_key = info['hex_raw_key'] - final_path = os.path.join(download_path, file_name) if os.path.exists(final_path) and os.path.getsize(final_path) == file_size: logger_func(f" [Mega] ℹ️ File '{file_name}' already exists with the correct size. Skipping.") return - # Prepare for decryption key = hex_to_bytes(hrk2hk(hex_raw_key)) iv_hex = hex_raw_key[32:48] + '0000000000000000' iv_bytes = hex_to_bytes(iv_hex) @@ -150,13 +139,11 @@ def download_and_decrypt_mega_file(info, download_path, logger_func): with open(final_path, 'wb') as f: for chunk in r.iter_content(chunk_size=8192): - if not chunk: - continue + if not chunk: continue decrypted_chunk = cipher.decrypt(chunk) f.write(decrypted_chunk) downloaded_bytes += len(chunk) - # Log progress every second current_time = time.time() if current_time - last_log_time > 1: progress_percent = (downloaded_bytes / file_size) * 100 if file_size > 0 else 0 @@ -164,28 +151,16 @@ def download_and_decrypt_mega_file(info, download_path, logger_func): last_log_time = current_time logger_func(f" [Mega] ✅ Successfully downloaded '{file_name}' to '{download_path}'") - except requests.RequestException as e: - logger_func(f" [Mega] ❌ Download failed for '{file_name}': {e}") - except IOError as e: - logger_func(f" [Mega] ❌ Could not write to file '{final_path}': {e}") except Exception as e: logger_func(f" [Mega] ❌ An unexpected error occurred during download/decryption: {e}") - -# --- REPLACEMENT Main Service Downloader Function for Mega --- - def download_mega_file(mega_url, download_path, logger_func=print): - """ - Downloads a file from a Mega.nz URL using direct requests and decryption. - This replaces the old mega.py implementation. - """ if not PYCRYPTODOME_AVAILABLE: logger_func("❌ Mega download failed: 'pycryptodome' library is not installed. Please run: pip install pycryptodome") return logger_func(f" [Mega] Initializing download for: {mega_url}") - # Regex to capture file ID and key from both old and new URL formats match = re.search(r'mega(?:\.co)?\.nz/(?:file/|#!)?([a-zA-Z0-9]+)(?:#|!)([a-zA-Z0-9_.-]+)', mega_url) if not match: logger_func(f" [Mega] ❌ Error: Invalid Mega URL format.") @@ -199,18 +174,14 @@ def download_mega_file(mega_url, download_path, logger_func=print): file_info = get_mega_file_info(file_id, file_key, session, logger_func) if not file_info: - logger_func(f" [Mega] ❌ Failed to get file info. The link may be invalid or expired. Aborting.") + logger_func(f" [Mega] ❌ Failed to get file info. Aborting.") return logger_func(f" [Mega] File found: '{file_info['file_name']}' (Size: {file_info['file_size'] / 1024 / 1024:.2f} MB)") download_and_decrypt_mega_file(file_info, download_path, logger_func) - -# --- ORIGINAL Functions for Google Drive and Dropbox (Unchanged) --- - def download_gdrive_file(url, download_path, logger_func=print): - """Downloads a file from a Google Drive link.""" if not GDRIVE_AVAILABLE: logger_func("❌ Google Drive download failed: 'gdown' library is not installed.") return @@ -227,12 +198,15 @@ def download_gdrive_file(url, download_path, logger_func=print): except Exception as e: logger_func(f" [G-Drive] ❌ An unexpected error occurred: {e}") +# --- MODIFIED DROPBOX DOWNLOADER --- def download_dropbox_file(dropbox_link, download_path=".", logger_func=print): """ - Downloads a file from a public Dropbox link by modifying the URL for direct download. + Downloads a file or a folder (as a zip) from a public Dropbox link. + Uses cloudscraper to handle potential browser checks and auto-extracts zip files. """ logger_func(f" [Dropbox] Attempting to download: {dropbox_link}") + # Modify URL to force download (works for both files and folders) parsed_url = urlparse(dropbox_link) query_params = parse_qs(parsed_url.query) query_params['dl'] = ['1'] @@ -241,26 +215,60 @@ def download_dropbox_file(dropbox_link, download_path=".", logger_func=print): logger_func(f" [Dropbox] Using direct download URL: {direct_download_url}") + scraper = cloudscraper.create_scraper() + try: if not os.path.exists(download_path): os.makedirs(download_path, exist_ok=True) logger_func(f" [Dropbox] Created download directory: {download_path}") - with requests.get(direct_download_url, stream=True, allow_redirects=True, timeout=(10, 300)) as r: + with scraper.get(direct_download_url, stream=True, allow_redirects=True, timeout=(20, 600)) as r: r.raise_for_status() - filename = _get_filename_from_headers(r.headers) or os.path.basename(parsed_url.path) or "dropbox_file" + filename = _get_filename_from_headers(r.headers) or os.path.basename(parsed_url.path) or "dropbox_download" + # If it's a folder, Dropbox will name it FolderName.zip + if not os.path.splitext(filename)[1]: + filename += ".zip" + full_save_path = os.path.join(download_path, filename) logger_func(f" [Dropbox] Starting download of '{filename}'...") + total_size = int(r.headers.get('content-length', 0)) + downloaded_bytes = 0 + last_log_time = time.time() + with open(full_save_path, 'wb') as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) + downloaded_bytes += len(chunk) + current_time = time.time() + if total_size > 0 and current_time - last_log_time > 1: + progress = (downloaded_bytes / total_size) * 100 + logger_func(f" -> Downloading '{filename}'... {downloaded_bytes/1024/1024:.2f}MB / {total_size/1024/1024:.2f}MB ({progress:.1f}%)") + last_log_time = current_time - logger_func(f" [Dropbox] ✅ Dropbox file downloaded successfully: {full_save_path}") + logger_func(f" [Dropbox] ✅ Download complete: {full_save_path}") + + # --- NEW: Auto-extraction logic --- + if zipfile.is_zipfile(full_save_path): + logger_func(f" [Dropbox] ዚ Detected zip file. Attempting to extract...") + extract_folder_name = os.path.splitext(filename)[0] + extract_path = os.path.join(download_path, extract_folder_name) + os.makedirs(extract_path, exist_ok=True) + + with zipfile.ZipFile(full_save_path, 'r') as zip_ref: + zip_ref.extractall(extract_path) + + logger_func(f" [Dropbox] ✅ Successfully extracted to folder: '{extract_path}'") + + # Optional: remove the zip file after extraction + try: + os.remove(full_save_path) + logger_func(f" [Dropbox] 🗑️ Removed original zip file.") + except OSError as e: + logger_func(f" [Dropbox] ⚠️ Could not remove original zip file: {e}") except Exception as e: logger_func(f" [Dropbox] ❌ An error occurred during Dropbox download: {e}") traceback.print_exc(limit=2) - raise diff --git a/src/ui/main_window.py b/src/ui/main_window.py index 2165d38..42347d0 100644 --- a/src/ui/main_window.py +++ b/src/ui/main_window.py @@ -10,6 +10,7 @@ import re import subprocess import datetime import requests +import cloudscraper import unicodedata from collections import deque, defaultdict import threading @@ -36,6 +37,7 @@ from ..core.workers import PostProcessorSignals from ..core.api_client import download_from_api from ..core.discord_client import fetch_server_channels, fetch_channel_messages from ..core.manager import DownloadManager +from ..core.nhentai_client import fetch_nhentai_gallery from .assets import get_app_icon_object from ..config.constants import * from ..utils.file_utils import KNOWN_NAMES, clean_folder_name @@ -281,7 +283,7 @@ class DownloaderApp (QWidget ): self.download_location_label_widget = None self.remove_from_filename_label_widget = None self.skip_words_label_widget = None - self.setWindowTitle("Kemono Downloader v6.4.3") + self.setWindowTitle("Kemono Downloader v6.5.0") setup_ui(self) self._connect_signals() self.log_signal.emit("ℹ️ Local API server functionality has been removed.") @@ -2215,12 +2217,21 @@ class DownloaderApp (QWidget ): if not button or not checked: return is_only_links = (button == self.radio_only_links) - if hasattr(self, 'use_multithreading_checkbox'): + + if hasattr(self, 'use_multithreading_checkbox') and hasattr(self, 'thread_count_input'): if is_only_links: - self.use_multithreading_checkbox.setChecked(False) - self.use_multithreading_checkbox.setEnabled(False) + # When "Only Links" is selected, enable multithreading, set threads to 20, and lock the input. + self.use_multithreading_checkbox.setChecked(True) + self.thread_count_input.setText("20") + self.thread_count_input.setEnabled(False) + self.thread_count_label.setEnabled(False) + self.update_multithreading_label("20") else: - self.use_multithreading_checkbox.setEnabled(True) + # When another mode is selected, re-enable the input for user control. + is_multithreading_checked = self.use_multithreading_checkbox.isChecked() + self.thread_count_input.setEnabled(is_multithreading_checked) + self.thread_count_label.setEnabled(is_multithreading_checked) + if button != self.radio_more and checked: self.radio_more.setText("More") self.more_filter_scope = None @@ -3207,6 +3218,51 @@ class DownloaderApp (QWidget ): api_url = direct_api_url if direct_api_url else self.link_input.text().strip() + # --- NEW: NHENTAI BATCH DOWNLOAD LOGIC --- + if 'nhentai.net' in api_url and not re.search(r'/g/(\d+)', api_url): + self.log_signal.emit("=" * 40) + self.log_signal.emit("🚀 nhentai batch download mode detected.") + + nhentai_txt_path = os.path.join(self.app_base_dir, "appdata", "nhentai.txt") + self.log_signal.emit(f" Looking for batch file at: {nhentai_txt_path}") + + if not os.path.exists(nhentai_txt_path): + QMessageBox.warning(self, "File Not Found", f"To use batch mode, create a file named 'nhentai.txt' in your 'appdata' folder.\n\nPlace one nhentai URL on each line.") + self.log_signal.emit(f" ❌ 'nhentai.txt' not found. Aborting batch download.") + return False + + urls_to_download = [] + try: + with open(nhentai_txt_path, 'r', encoding='utf-8') as f: + for line in f: + # Find all URLs in the line (handles comma separation or just spaces) + found_urls = re.findall(r'https?://nhentai\.net/g/\d+/?', line) + if found_urls: + urls_to_download.extend(found_urls) + except Exception as e: + QMessageBox.critical(self, "File Error", f"Could not read 'nhentai.txt':\n{e}") + self.log_signal.emit(f" ❌ Error reading 'nhentai.txt': {e}") + return False + + if not urls_to_download: + QMessageBox.information(self, "Empty File", "No valid nhentai gallery URLs were found in 'nhentai.txt'.") + self.log_signal.emit(" 'nhentai.txt' was found but contained no valid URLs.") + return False + + self.log_signal.emit(f" Found {len(urls_to_download)} URLs to process.") + self.favorite_download_queue.clear() + for url in urls_to_download: + self.favorite_download_queue.append({ + 'url': url, + 'name': f"nhentai gallery from batch", + 'type': 'post' + }) + + if not self.is_processing_favorites_queue: + self._process_next_favorite_download() + return True + # --- END NEW LOGIC --- + main_ui_download_dir = self.dir_input.text().strip() extract_links_only = (self.radio_only_links and self.radio_only_links.isChecked()) effective_output_dir_for_run = "" @@ -3272,6 +3328,35 @@ class DownloaderApp (QWidget ): self.cancellation_message_logged_this_session = False + # --- MODIFIED NHENTAI HANDLING --- + nhentai_match = re.search(r'nhentai\.net/g/(\d+)', api_url) + if nhentai_match: + gallery_id = nhentai_match.group(1) + self.log_signal.emit("=" * 40) + self.log_signal.emit(f"🚀 Detected nhentai gallery ID: {gallery_id}") + + output_dir = self.dir_input.text().strip() + if not output_dir or not os.path.isdir(output_dir): + QMessageBox.critical(self, "Input Error", "A valid Download Location is required.") + return False + + gallery_data = fetch_nhentai_gallery(gallery_id, self.log_signal.emit) + + if not gallery_data: + QMessageBox.critical(self, "Error", f"Could not retrieve gallery data for ID {gallery_id}. It may not exist or the API is unavailable.") + return False + + self.set_ui_enabled(False) + self.download_thread = NhentaiDownloadThread(gallery_data, output_dir, self) + self.download_thread.progress_signal.connect(self.handle_main_log) + self.download_thread.finished_signal.connect( + lambda dl, skip, cancelled: self.download_finished(dl, skip, cancelled, []) + ) + self.download_thread.start() + self._update_button_states_and_connections() + return True + # --- END MODIFIED HANDLING --- + service, id1, id2 = extract_post_info(api_url) if not service or not id1: @@ -3282,7 +3367,6 @@ class DownloaderApp (QWidget ): server_id, channel_id = id1, id2 def discord_processing_task(): - # --- FIX: Wrap the entire task in a try...finally block --- try: def queue_logger(message): self.worker_to_gui_queue.put({'type': 'progress', 'payload': (message,)}) @@ -3295,7 +3379,6 @@ class DownloaderApp (QWidget ): self.selected_cookie_filepath, self.app_base_dir, queue_logger ) - # --- SCOPE: MESSAGES (PDF CREATION) --- if self.discord_download_scope == 'messages': queue_logger("=" * 40) queue_logger(f"🚀 Starting Discord PDF export for: {api_url}") @@ -3307,7 +3390,7 @@ class DownloaderApp (QWidget ): return default_filename = f"discord_{server_id}_{channel_id or 'server'}.pdf" - output_filepath = os.path.join(output_dir, default_filename) # We'll save with a default name + output_filepath = os.path.join(output_dir, default_filename) all_messages, channels_to_process = [], [] server_name_for_pdf = server_id @@ -3346,7 +3429,6 @@ class DownloaderApp (QWidget ): self.finished_signal.emit(0, len(all_messages), self.cancellation_event.is_set(), []) return - # --- SCOPE: FILES (DOWNLOAD) --- elif self.discord_download_scope == 'files': worker_args = { 'download_root': effective_output_dir_for_run, 'known_names': list(KNOWN_NAMES), @@ -3406,10 +3488,8 @@ class DownloaderApp (QWidget ): self.finished_signal.emit(total_dl, total_skip, self.cancellation_event.is_set(), []) finally: - # This ensures the flag is reset, allowing the UI to finalize correctly self.is_fetcher_thread_running = False - # --- FIX: Set the fetcher running flag to prevent premature finalization --- self.is_fetcher_thread_running = True self.set_ui_enabled(False) @@ -4741,6 +4821,10 @@ class DownloaderApp (QWidget ): self.log_signal.emit(" Cancelling active External Link download thread...") self.external_link_download_thread.cancel() + if isinstance(self.download_thread, NhentaiDownloadThread): + self.log_signal.emit(" Signaling nhentai download thread to cancel.") + self.download_thread.cancel() + def _get_domain_for_service(self, service_name: str) -> str: """Determines the base domain for a given service.""" if not isinstance(service_name, str): @@ -4836,6 +4920,7 @@ class DownloaderApp (QWidget ): if self.download_thread: if isinstance(self.download_thread, QThread): try: + # Disconnect signals to prevent any lingering connections if hasattr(self.download_thread, 'progress_signal'): self.download_thread.progress_signal.disconnect(self.handle_main_log) if hasattr(self.download_thread, 'add_character_prompt_signal'): self.download_thread.add_character_prompt_signal.disconnect(self.add_character_prompt_signal) if hasattr(self.download_thread, 'finished_signal'): self.download_thread.finished_signal.disconnect(self.download_finished) @@ -4849,9 +4934,8 @@ class DownloaderApp (QWidget ): except (TypeError, RuntimeError) as e: self.log_signal.emit(f"ℹ️ Note during single-thread signal disconnection: {e}") - if not self.download_thread.isRunning(): - self.download_thread.deleteLater() - self.download_thread = None + self.download_thread.deleteLater() + self.download_thread = None else: self.download_thread = None @@ -5911,4 +5995,104 @@ class ExternalLinkDownloadThread (QThread ): self .finished_signal .emit () def cancel (self ): - self .is_cancelled =True \ No newline at end of file + self .is_cancelled =True + +class NhentaiDownloadThread(QThread): + progress_signal = pyqtSignal(str) + finished_signal = pyqtSignal(int, int, bool) + + IMAGE_SERVERS = [ + "https://i.nhentai.net", "https://i2.nhentai.net", "https://i3.nhentai.net", + "https://i5.nhentai.net", "https://i7.nhentai.net" + ] + + EXTENSION_MAP = {'j': 'jpg', 'p': 'png', 'g': 'gif', 'w': 'webp' } + + def __init__(self, gallery_data, output_dir, parent=None): + super().__init__(parent) + self.gallery_data = gallery_data + self.output_dir = output_dir + self.is_cancelled = False + + def run(self): + title = self.gallery_data.get("title", {}).get("english", f"gallery_{self.gallery_data.get('id')}") + gallery_id = self.gallery_data.get("id") + media_id = self.gallery_data.get("media_id") + pages_info = self.gallery_data.get("pages", []) + + folder_name = clean_folder_name(title) + gallery_path = os.path.join(self.output_dir, folder_name) + + try: + os.makedirs(gallery_path, exist_ok=True) + except OSError as e: + self.progress_signal.emit(f"❌ Critical error creating directory: {e}") + self.finished_signal.emit(0, len(pages_info), False) + return + + self.progress_signal.emit(f"⬇️ Downloading '{title}' to folder '{folder_name}'...") + + # Create a single cloudscraper instance for the entire download + scraper = cloudscraper.create_scraper() + download_count = 0 + skip_count = 0 + + for i, page_data in enumerate(pages_info): + if self.is_cancelled: + break + + page_num = i + 1 + + ext_char = page_data.get('t', 'j') + extension = self.EXTENSION_MAP.get(ext_char, 'jpg') + + relative_path = f"/galleries/{media_id}/{page_num}.{extension}" + + local_filename = f"{page_num:03d}.{extension}" + filepath = os.path.join(gallery_path, local_filename) + + if os.path.exists(filepath): + self.progress_signal.emit(f" -> Skip (Exists): {local_filename}") + skip_count += 1 + continue + + download_successful = False + for server in self.IMAGE_SERVERS: + if self.is_cancelled: + break + + full_url = f"{server}{relative_path}" + try: + self.progress_signal.emit(f" Downloading page {page_num}/{len(pages_info)} from {server} ...") + + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36', + 'Referer': f'https://nhentai.net/g/{gallery_id}/' + } + + # Use the scraper instance to get the image + response = scraper.get(full_url, headers=headers, timeout=60, stream=True) + + if response.status_code == 200: + with open(filepath, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + download_count += 1 + download_successful = True + break + else: + self.progress_signal.emit(f" -> {server} returned status {response.status_code}. Trying next server...") + + except Exception as e: + self.progress_signal.emit(f" -> {server} failed to connect or timed out: {e}. Trying next server...") + + if not download_successful: + self.progress_signal.emit(f" ❌ Failed to download {local_filename} from all servers.") + skip_count += 1 + + time.sleep(0.5) + + self.finished_signal.emit(download_count, skip_count, self.is_cancelled) + + def cancel(self): + self.is_cancelled = True