This commit is contained in:
Yuvi9587
2025-08-17 05:51:25 -07:00
parent b0a6c264e1
commit 5f7b526852
6 changed files with 413 additions and 142 deletions

View File

@@ -3,6 +3,7 @@ import traceback
from urllib.parse import urlparse from urllib.parse import urlparse
import json import json
import requests import requests
import cloudscraper
from ..utils.network_utils import extract_post_info, prepare_cookies_for_request from ..utils.network_utils import extract_post_info, prepare_cookies_for_request
from ..config.constants import ( from ..config.constants import (
STYLE_DATE_POST_TITLE STYLE_DATE_POST_TITLE
@@ -80,26 +81,26 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logger, cookies_dict=None): def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logger, cookies_dict=None):
""" """
--- NEW FUNCTION --- --- MODIFIED FUNCTION ---
Fetches the full data, including the 'content' field, for a single post. Fetches the full data, including the 'content' field, for a single post using cloudscraper.
""" """
post_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}" post_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}"
logger(f" Fetching full content for post ID {post_id}...") logger(f" Fetching full content for post ID {post_id}...")
try:
with requests.get(post_api_url, headers=headers, timeout=(15, 300), cookies=cookies_dict, stream=True) as response:
response.raise_for_status()
response_body = b""
for chunk in response.iter_content(chunk_size=8192):
response_body += chunk
full_post_data = json.loads(response_body)
if isinstance(full_post_data, list) and full_post_data: scraper = cloudscraper.create_scraper()
return full_post_data[0]
if isinstance(full_post_data, dict) and 'post' in full_post_data: try:
return full_post_data['post'] response = scraper.get(post_api_url, headers=headers, timeout=(15, 300), cookies=cookies_dict)
return full_post_data response.raise_for_status()
full_post_data = response.json()
if isinstance(full_post_data, list) and full_post_data:
return full_post_data[0]
if isinstance(full_post_data, dict) and 'post' in full_post_data:
return full_post_data['post']
return full_post_data
except Exception as e: except Exception as e:
logger(f" ❌ Failed to fetch full content for post {post_id}: {e}") logger(f" ❌ Failed to fetch full content for post {post_id}: {e}")
return None return None
@@ -138,8 +139,7 @@ def download_from_api(
manga_filename_style_for_sort_check=None, manga_filename_style_for_sort_check=None,
processed_post_ids=None, processed_post_ids=None,
fetch_all_first=False fetch_all_first=False
): ):
# FIX: Define api_domain FIRST, before it is used in the headers
parsed_input_url_for_domain = urlparse(api_url_input) parsed_input_url_for_domain = urlparse(api_url_input)
api_domain = parsed_input_url_for_domain.netloc api_domain = parsed_input_url_for_domain.netloc

View File

@@ -1,63 +1,70 @@
import time import time
import requests import cloudscraper
import json import json
from urllib.parse import urlparse
def fetch_server_channels(server_id, logger, cookies=None, cancellation_event=None, pause_event=None): def fetch_server_channels(server_id, logger=print, cookies_dict=None):
""" """
Fetches the list of channels for a given Discord server ID from the Kemono API. Fetches all channels for a given Discord server ID from the API.
UPDATED to be pausable and cancellable. Uses cloudscraper to bypass Cloudflare.
""" """
domains_to_try = ["kemono.cr", "kemono.su"] api_url = f"https://kemono.cr/api/v1/discord/server/{server_id}"
for domain in domains_to_try: logger(f" Fetching channels for server: {api_url}")
if cancellation_event and cancellation_event.is_set():
logger(" Channel fetching cancelled by user.")
return None
while pause_event and pause_event.is_set():
if cancellation_event and cancellation_event.is_set(): break
time.sleep(0.5)
lookup_url = f"https://{domain}/api/v1/discord/channel/lookup/{server_id}" scraper = cloudscraper.create_scraper()
logger(f" Attempting to fetch channel list from: {lookup_url}") headers = {
try: 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
response = requests.get(lookup_url, cookies=cookies, timeout=15) 'Referer': f'https://kemono.cr/discord/server/{server_id}',
response.raise_for_status() 'Accept': 'text/css'
channels = response.json() }
if isinstance(channels, list):
logger(f" ✅ Found {len(channels)} channels for server {server_id}.")
return channels
except (requests.exceptions.RequestException, json.JSONDecodeError):
# This is a silent failure, we'll just try the next domain
pass
logger(f" ❌ Failed to fetch channel list for server {server_id} from all available domains.")
return None
def fetch_channel_messages(channel_id, logger, cancellation_event, pause_event, cookies=None): try:
response = scraper.get(api_url, headers=headers, cookies=cookies_dict, timeout=30)
response.raise_for_status()
channels = response.json()
if isinstance(channels, list):
logger(f" ✅ Found {len(channels)} channels for server {server_id}.")
return channels
return None
except Exception as e:
logger(f" ❌ Error fetching server channels for {server_id}: {e}")
return None
def fetch_channel_messages(channel_id, logger=print, cancellation_event=None, pause_event=None, cookies_dict=None):
""" """
Fetches all messages from a Discord channel by looping through API pages (pagination). A generator that fetches all messages for a specific Discord channel, handling pagination.
Uses a page size of 150 and handles the specific offset logic. Uses cloudscraper and proper headers to bypass server protection.
""" """
offset = 0 scraper = cloudscraper.create_scraper()
page_size = 150 # Corrected page size based on your findings base_url = f"https://kemono.cr/api/v1/discord/channel/{channel_id}"
api_base_url = f"https://kemono.cr/api/v1/discord/channel/{channel_id}" headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Referer': f'https://kemono.cr/discord/channel/{channel_id}',
'Accept': 'text/css'
}
while not (cancellation_event and cancellation_event.is_set()): offset = 0
if pause_event and pause_event.is_set(): # --- FIX: Corrected the page size for Discord API pagination ---
logger(" Message fetching paused...") page_size = 150
while pause_event.is_set(): # --- END FIX ---
if cancellation_event and cancellation_event.is_set(): break
time.sleep(0.5)
logger(" Message fetching resumed.")
while True:
if cancellation_event and cancellation_event.is_set(): if cancellation_event and cancellation_event.is_set():
logger(" Discord message fetching cancelled.")
break break
if pause_event and pause_event.is_set():
paginated_url = f"{api_base_url}?o={offset}" logger(" Discord message fetching paused...")
while pause_event.is_set():
if cancellation_event and cancellation_event.is_set():
break
time.sleep(0.5)
if not (cancellation_event and cancellation_event.is_set()):
logger(" Discord message fetching resumed.")
paginated_url = f"{base_url}?o={offset}"
logger(f" Fetching messages from API: page starting at offset {offset}") logger(f" Fetching messages from API: page starting at offset {offset}")
try: try:
response = requests.get(paginated_url, cookies=cookies, timeout=20) response = scraper.get(paginated_url, headers=headers, cookies=cookies_dict, timeout=30)
response.raise_for_status() response.raise_for_status()
messages_batch = response.json() messages_batch = response.json()
@@ -73,8 +80,11 @@ def fetch_channel_messages(channel_id, logger, cancellation_event, pause_event,
break break
offset += page_size offset += page_size
time.sleep(0.5) time.sleep(0.5) # Be respectful to the API
except (requests.exceptions.RequestException, json.JSONDecodeError) as e: except (cloudscraper.exceptions.CloudflareException, json.JSONDecodeError) as e:
logger(f" ❌ Error fetching messages at offset {offset}: {e}") logger(f" ❌ Error fetching messages at offset {offset}: {e}")
break break
except Exception as e:
logger(f" ❌ An unexpected error occurred while fetching messages: {e}")
break

View File

@@ -0,0 +1,45 @@
import requests
import cloudscraper
import json
def fetch_nhentai_gallery(gallery_id, logger=print):
"""
Fetches the metadata for a single nhentai gallery using cloudscraper to bypass Cloudflare.
Args:
gallery_id (str or int): The ID of the nhentai gallery.
logger (function): A function to log progress and error messages.
Returns:
dict: A dictionary containing the gallery's metadata if successful, otherwise None.
"""
api_url = f"https://nhentai.net/api/gallery/{gallery_id}"
# Create a cloudscraper instance
scraper = cloudscraper.create_scraper()
logger(f" Fetching nhentai gallery metadata from: {api_url}")
try:
# Use the scraper to make the GET request
response = scraper.get(api_url, timeout=20)
if response.status_code == 404:
logger(f" ❌ Gallery not found (404): ID {gallery_id}")
return None
response.raise_for_status()
gallery_data = response.json()
if "id" in gallery_data and "media_id" in gallery_data and "images" in gallery_data:
logger(f" ✅ Successfully fetched metadata for '{gallery_data['title']['english']}'")
gallery_data['pages'] = gallery_data.pop('images')['pages']
return gallery_data
else:
logger(" ❌ API response is missing essential keys (id, media_id, or images).")
return None
except Exception as e:
logger(f" ❌ An error occurred while fetching gallery {gallery_id}: {e}")
return None

View File

@@ -15,6 +15,8 @@ from concurrent.futures import ThreadPoolExecutor, as_completed, CancelledError,
from io import BytesIO from io import BytesIO
from urllib .parse import urlparse from urllib .parse import urlparse
import requests import requests
import cloudscraper
try: try:
from PIL import Image from PIL import Image
except ImportError: except ImportError:
@@ -58,18 +60,13 @@ def robust_clean_name(name):
"""A more robust function to remove illegal characters for filenames and folders.""" """A more robust function to remove illegal characters for filenames and folders."""
if not name: if not name:
return "" return ""
# Removes illegal characters for Windows, macOS, and Linux: < > : " / \ | ? * illegal_chars_pattern = r'[\x00-\x1f<>:"/\\|?*\']'
# Also removes control characters (ASCII 0-31) which are invisible but invalid.
illegal_chars_pattern = r'[\x00-\x1f<>:"/\\|?*]'
cleaned_name = re.sub(illegal_chars_pattern, '', name) cleaned_name = re.sub(illegal_chars_pattern, '', name)
# Remove leading/trailing spaces or periods, which can cause issues.
cleaned_name = cleaned_name.strip(' .') cleaned_name = cleaned_name.strip(' .')
# If the name is empty after cleaning (e.g., it was only illegal chars),
# provide a safe fallback name.
if not cleaned_name: if not cleaned_name:
return "untitled_folder" # Or "untitled_file" depending on context return "untitled_folder"
return cleaned_name return cleaned_name
class PostProcessorSignals (QObject ): class PostProcessorSignals (QObject ):
@@ -271,7 +268,9 @@ class PostProcessorWorker:
file_download_headers = { file_download_headers = {
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'Referer': post_page_url 'Referer': post_page_url,
'Accept': 'text/css'
} }
file_url = file_info.get('url') file_url = file_info.get('url')
@@ -429,8 +428,26 @@ class PostProcessorWorker:
self.logger(f"⚠️ Manga mode: Generated filename was empty. Using generic fallback: '{filename_to_save_in_main_path}'.") self.logger(f"⚠️ Manga mode: Generated filename was empty. Using generic fallback: '{filename_to_save_in_main_path}'.")
was_original_name_kept_flag = False was_original_name_kept_flag = False
else: else:
filename_to_save_in_main_path = cleaned_original_api_filename is_url_like = 'http' in api_original_filename.lower()
was_original_name_kept_flag = True is_too_long = len(cleaned_original_api_filename) > 100
if is_url_like or is_too_long:
self.logger(f" ⚠️ Original filename is a URL or too long. Generating a shorter name.")
name_hash = hashlib.md5(api_original_filename.encode()).hexdigest()[:12]
_, ext = os.path.splitext(cleaned_original_api_filename)
if not ext:
try:
path = urlparse(api_original_filename).path
ext = os.path.splitext(path)[1] or ".file"
except Exception:
ext = ".file"
cleaned_post_title = robust_clean_name(post_title.strip() if post_title else "post")[:40]
filename_to_save_in_main_path = f"{cleaned_post_title}_{name_hash}{ext}"
was_original_name_kept_flag = False
else:
filename_to_save_in_main_path = cleaned_original_api_filename
was_original_name_kept_flag = True
if self.remove_from_filename_words_list and filename_to_save_in_main_path: if self.remove_from_filename_words_list and filename_to_save_in_main_path:
base_name_for_removal, ext_for_removal = os.path.splitext(filename_to_save_in_main_path) base_name_for_removal, ext_for_removal = os.path.splitext(filename_to_save_in_main_path)
@@ -854,9 +871,7 @@ class PostProcessorWorker:
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER, details_for_failure return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER, details_for_failure
def process(self): def process(self):
# --- START: REFACTORED PROCESS METHOD ---
# 1. DATA MAPPING: Map Discord Message or Creator Post fields to a consistent set of variables.
if self.service == 'discord': if self.service == 'discord':
# For Discord, self.post is a MESSAGE object from the API. # For Discord, self.post is a MESSAGE object from the API.
post_title = self.post.get('content', '') or f"Message {self.post.get('id', 'N/A')}" post_title = self.post.get('content', '') or f"Message {self.post.get('id', 'N/A')}"
@@ -885,19 +900,26 @@ class PostProcessorWorker:
) )
if content_is_needed and self.post.get('content') is None and self.service != 'discord': if content_is_needed and self.post.get('content') is None and self.service != 'discord':
self.logger(f" Post {post_id} is missing 'content' field, fetching full data...") self.logger(f" Post {post_id} is missing 'content' field, fetching full data...")
parsed_url = urlparse(self.api_url_input) parsed_url = urlparse(self.api_url_input)
api_domain = parsed_url.netloc api_domain = parsed_url.netloc
headers = {'User-Agent': 'Mozilla/5.0'} creator_page_url = f"https://{api_domain}/{self.service}/user/{self.user_id}"
headers = {
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'Referer': creator_page_url,
'Accept': 'text/css'
}
cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain) cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain)
full_post_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies) full_post_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies)
if full_post_data: if full_post_data:
self.logger(" ✅ Full post data fetched successfully.") self.logger(" ✅ Full post data fetched successfully.")
# Update the worker's post object with the complete data
self.post = full_post_data self.post = full_post_data
# Re-initialize local variables from the new, complete post data
post_title = self.post.get('title', '') or 'untitled_post' post_title = self.post.get('title', '') or 'untitled_post'
post_main_file_info = self.post.get('file') post_main_file_info = self.post.get('file')
post_attachments = self.post.get('attachments', []) post_attachments = self.post.get('attachments', [])
@@ -905,9 +927,7 @@ class PostProcessorWorker:
post_data = self.post post_data = self.post
else: else:
self.logger(f" ⚠️ Failed to fetch full content for post {post_id}. Content-dependent features may not work for this post.") self.logger(f" ⚠️ Failed to fetch full content for post {post_id}. Content-dependent features may not work for this post.")
# --- END FIX ---
# 2. SHARED PROCESSING LOGIC: The rest of the function now uses the consistent variables from above.
result_tuple = (0, 0, [], [], [], None, None) result_tuple = (0, 0, [], [], [], None, None)
total_downloaded_this_post = 0 total_downloaded_this_post = 0
total_skipped_this_post = 0 total_skipped_this_post = 0
@@ -936,7 +956,11 @@ class PostProcessorWorker:
else: else:
post_page_url = f"https://{parsed_api_url.netloc}/{self.service}/user/{self.user_id}/post/{post_id}" post_page_url = f"https://{parsed_api_url.netloc}/{self.service}/user/{self.user_id}/post/{post_id}"
headers = {'User-Agent': 'Mozilla/5.0', 'Referer': post_page_url, 'Accept': '*/*'} headers = {
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'Referer': post_page_url,
'Accept': 'text/css'
}
link_pattern = re.compile(r"""<a\s+.*?href=["'](https?://[^"']+)["'][^>]*>(.*?)</a>""", re.IGNORECASE | re.DOTALL) link_pattern = re.compile(r"""<a\s+.*?href=["'](https?://[^"']+)["'][^>]*>(.*?)</a>""", re.IGNORECASE | re.DOTALL)
effective_unwanted_keywords_for_folder_naming = self.unwanted_keywords.copy() effective_unwanted_keywords_for_folder_naming = self.unwanted_keywords.copy()

View File

@@ -5,9 +5,12 @@ import traceback
import json import json
import base64 import base64
import time import time
import zipfile
from urllib.parse import urlparse, urlunparse, parse_qs, urlencode from urllib.parse import urlparse, urlunparse, parse_qs, urlencode
# --- Third-party Library Imports ---
import requests import requests
import cloudscraper
try: try:
from Crypto.Cipher import AES from Crypto.Cipher import AES
@@ -26,12 +29,12 @@ MEGA_API_URL = "https://g.api.mega.co.nz"
def _get_filename_from_headers(headers): def _get_filename_from_headers(headers):
""" """
Extracts a filename from the Content-Disposition header. Extracts a filename from the Content-Disposition header.
(This is from your original file and is kept for Dropbox downloads)
""" """
cd = headers.get('content-disposition') cd = headers.get('content-disposition')
if not cd: if not cd:
return None return None
# Handles both filename="file.zip" and filename*=UTF-8''file%20name.zip
fname_match = re.findall('filename="?([^"]+)"?', cd) fname_match = re.findall('filename="?([^"]+)"?', cd)
if fname_match: if fname_match:
sanitized_name = re.sub(r'[<>:"/\\|?*]', '_', fname_match[0].strip()) sanitized_name = re.sub(r'[<>:"/\\|?*]', '_', fname_match[0].strip())
@@ -39,28 +42,23 @@ def _get_filename_from_headers(headers):
return None return None
# --- NEW: Helper functions for Mega decryption --- # --- Helper functions for Mega decryption ---
def urlb64_to_b64(s): def urlb64_to_b64(s):
"""Converts a URL-safe base64 string to a standard base64 string."""
s = s.replace('-', '+').replace('_', '/') s = s.replace('-', '+').replace('_', '/')
s += '=' * (-len(s) % 4) s += '=' * (-len(s) % 4)
return s return s
def b64_to_bytes(s): def b64_to_bytes(s):
"""Decodes a URL-safe base64 string to bytes."""
return base64.b64decode(urlb64_to_b64(s)) return base64.b64decode(urlb64_to_b64(s))
def bytes_to_hex(b): def bytes_to_hex(b):
"""Converts bytes to a hex string."""
return b.hex() return b.hex()
def hex_to_bytes(h): def hex_to_bytes(h):
"""Converts a hex string to bytes."""
return bytes.fromhex(h) return bytes.fromhex(h)
def hrk2hk(hex_raw_key): def hrk2hk(hex_raw_key):
"""Derives the final AES key from the raw key components for Mega."""
key_part1 = int(hex_raw_key[0:16], 16) key_part1 = int(hex_raw_key[0:16], 16)
key_part2 = int(hex_raw_key[16:32], 16) key_part2 = int(hex_raw_key[16:32], 16)
key_part3 = int(hex_raw_key[32:48], 16) key_part3 = int(hex_raw_key[32:48], 16)
@@ -72,23 +70,20 @@ def hrk2hk(hex_raw_key):
return f'{final_key_part1:016x}{final_key_part2:016x}' return f'{final_key_part1:016x}{final_key_part2:016x}'
def decrypt_at(at_b64, key_bytes): def decrypt_at(at_b64, key_bytes):
"""Decrypts the 'at' attribute to get file metadata."""
at_bytes = b64_to_bytes(at_b64) at_bytes = b64_to_bytes(at_b64)
iv = b'\0' * 16 iv = b'\0' * 16
cipher = AES.new(key_bytes, AES.MODE_CBC, iv) cipher = AES.new(key_bytes, AES.MODE_CBC, iv)
decrypted_at = cipher.decrypt(at_bytes) decrypted_at = cipher.decrypt(at_bytes)
return decrypted_at.decode('utf-8').strip('\0').replace('MEGA', '') return decrypted_at.decode('utf-8').strip('\0').replace('MEGA', '')
# --- NEW: Core Logic for Mega Downloads --- # --- Core Logic for Mega Downloads ---
def get_mega_file_info(file_id, file_key, session, logger_func): def get_mega_file_info(file_id, file_key, session, logger_func):
"""Fetches file metadata and the temporary download URL from the Mega API."""
try: try:
hex_raw_key = bytes_to_hex(b64_to_bytes(file_key)) hex_raw_key = bytes_to_hex(b64_to_bytes(file_key))
hex_key = hrk2hk(hex_raw_key) hex_key = hrk2hk(hex_raw_key)
key_bytes = hex_to_bytes(hex_key) key_bytes = hex_to_bytes(hex_key)
# Request file attributes
payload = [{"a": "g", "p": file_id}] payload = [{"a": "g", "p": file_id}]
response = session.post(f"{MEGA_API_URL}/cs", json=payload, timeout=20) response = session.post(f"{MEGA_API_URL}/cs", json=payload, timeout=20)
response.raise_for_status() response.raise_for_status()
@@ -100,13 +95,10 @@ def get_mega_file_info(file_id, file_key, session, logger_func):
file_size = res_json[0]['s'] file_size = res_json[0]['s']
at_b64 = res_json[0]['at'] at_b64 = res_json[0]['at']
# Decrypt attributes to get the file name
at_dec_json_str = decrypt_at(at_b64, key_bytes) at_dec_json_str = decrypt_at(at_b64, key_bytes)
at_dec_json = json.loads(at_dec_json_str) at_dec_json = json.loads(at_dec_json_str)
file_name = at_dec_json['n'] file_name = at_dec_json['n']
# Request the temporary download URL
payload = [{"a": "g", "g": 1, "p": file_id}] payload = [{"a": "g", "g": 1, "p": file_id}]
response = session.post(f"{MEGA_API_URL}/cs", json=payload, timeout=20) response = session.post(f"{MEGA_API_URL}/cs", json=payload, timeout=20)
response.raise_for_status() response.raise_for_status()
@@ -124,19 +116,16 @@ def get_mega_file_info(file_id, file_key, session, logger_func):
return None return None
def download_and_decrypt_mega_file(info, download_path, logger_func): def download_and_decrypt_mega_file(info, download_path, logger_func):
"""Downloads the file and decrypts it chunk by chunk, reporting progress."""
file_name = info['file_name'] file_name = info['file_name']
file_size = info['file_size'] file_size = info['file_size']
dl_url = info['dl_url'] dl_url = info['dl_url']
hex_raw_key = info['hex_raw_key'] hex_raw_key = info['hex_raw_key']
final_path = os.path.join(download_path, file_name) final_path = os.path.join(download_path, file_name)
if os.path.exists(final_path) and os.path.getsize(final_path) == file_size: if os.path.exists(final_path) and os.path.getsize(final_path) == file_size:
logger_func(f" [Mega] File '{file_name}' already exists with the correct size. Skipping.") logger_func(f" [Mega] File '{file_name}' already exists with the correct size. Skipping.")
return return
# Prepare for decryption
key = hex_to_bytes(hrk2hk(hex_raw_key)) key = hex_to_bytes(hrk2hk(hex_raw_key))
iv_hex = hex_raw_key[32:48] + '0000000000000000' iv_hex = hex_raw_key[32:48] + '0000000000000000'
iv_bytes = hex_to_bytes(iv_hex) iv_bytes = hex_to_bytes(iv_hex)
@@ -150,13 +139,11 @@ def download_and_decrypt_mega_file(info, download_path, logger_func):
with open(final_path, 'wb') as f: with open(final_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192): for chunk in r.iter_content(chunk_size=8192):
if not chunk: if not chunk: continue
continue
decrypted_chunk = cipher.decrypt(chunk) decrypted_chunk = cipher.decrypt(chunk)
f.write(decrypted_chunk) f.write(decrypted_chunk)
downloaded_bytes += len(chunk) downloaded_bytes += len(chunk)
# Log progress every second
current_time = time.time() current_time = time.time()
if current_time - last_log_time > 1: if current_time - last_log_time > 1:
progress_percent = (downloaded_bytes / file_size) * 100 if file_size > 0 else 0 progress_percent = (downloaded_bytes / file_size) * 100 if file_size > 0 else 0
@@ -164,28 +151,16 @@ def download_and_decrypt_mega_file(info, download_path, logger_func):
last_log_time = current_time last_log_time = current_time
logger_func(f" [Mega] ✅ Successfully downloaded '{file_name}' to '{download_path}'") logger_func(f" [Mega] ✅ Successfully downloaded '{file_name}' to '{download_path}'")
except requests.RequestException as e:
logger_func(f" [Mega] ❌ Download failed for '{file_name}': {e}")
except IOError as e:
logger_func(f" [Mega] ❌ Could not write to file '{final_path}': {e}")
except Exception as e: except Exception as e:
logger_func(f" [Mega] ❌ An unexpected error occurred during download/decryption: {e}") logger_func(f" [Mega] ❌ An unexpected error occurred during download/decryption: {e}")
# --- REPLACEMENT Main Service Downloader Function for Mega ---
def download_mega_file(mega_url, download_path, logger_func=print): def download_mega_file(mega_url, download_path, logger_func=print):
"""
Downloads a file from a Mega.nz URL using direct requests and decryption.
This replaces the old mega.py implementation.
"""
if not PYCRYPTODOME_AVAILABLE: if not PYCRYPTODOME_AVAILABLE:
logger_func("❌ Mega download failed: 'pycryptodome' library is not installed. Please run: pip install pycryptodome") logger_func("❌ Mega download failed: 'pycryptodome' library is not installed. Please run: pip install pycryptodome")
return return
logger_func(f" [Mega] Initializing download for: {mega_url}") logger_func(f" [Mega] Initializing download for: {mega_url}")
# Regex to capture file ID and key from both old and new URL formats
match = re.search(r'mega(?:\.co)?\.nz/(?:file/|#!)?([a-zA-Z0-9]+)(?:#|!)([a-zA-Z0-9_.-]+)', mega_url) match = re.search(r'mega(?:\.co)?\.nz/(?:file/|#!)?([a-zA-Z0-9]+)(?:#|!)([a-zA-Z0-9_.-]+)', mega_url)
if not match: if not match:
logger_func(f" [Mega] ❌ Error: Invalid Mega URL format.") logger_func(f" [Mega] ❌ Error: Invalid Mega URL format.")
@@ -199,18 +174,14 @@ def download_mega_file(mega_url, download_path, logger_func=print):
file_info = get_mega_file_info(file_id, file_key, session, logger_func) file_info = get_mega_file_info(file_id, file_key, session, logger_func)
if not file_info: if not file_info:
logger_func(f" [Mega] ❌ Failed to get file info. The link may be invalid or expired. Aborting.") logger_func(f" [Mega] ❌ Failed to get file info. Aborting.")
return return
logger_func(f" [Mega] File found: '{file_info['file_name']}' (Size: {file_info['file_size'] / 1024 / 1024:.2f} MB)") logger_func(f" [Mega] File found: '{file_info['file_name']}' (Size: {file_info['file_size'] / 1024 / 1024:.2f} MB)")
download_and_decrypt_mega_file(file_info, download_path, logger_func) download_and_decrypt_mega_file(file_info, download_path, logger_func)
# --- ORIGINAL Functions for Google Drive and Dropbox (Unchanged) ---
def download_gdrive_file(url, download_path, logger_func=print): def download_gdrive_file(url, download_path, logger_func=print):
"""Downloads a file from a Google Drive link."""
if not GDRIVE_AVAILABLE: if not GDRIVE_AVAILABLE:
logger_func("❌ Google Drive download failed: 'gdown' library is not installed.") logger_func("❌ Google Drive download failed: 'gdown' library is not installed.")
return return
@@ -227,12 +198,15 @@ def download_gdrive_file(url, download_path, logger_func=print):
except Exception as e: except Exception as e:
logger_func(f" [G-Drive] ❌ An unexpected error occurred: {e}") logger_func(f" [G-Drive] ❌ An unexpected error occurred: {e}")
# --- MODIFIED DROPBOX DOWNLOADER ---
def download_dropbox_file(dropbox_link, download_path=".", logger_func=print): def download_dropbox_file(dropbox_link, download_path=".", logger_func=print):
""" """
Downloads a file from a public Dropbox link by modifying the URL for direct download. Downloads a file or a folder (as a zip) from a public Dropbox link.
Uses cloudscraper to handle potential browser checks and auto-extracts zip files.
""" """
logger_func(f" [Dropbox] Attempting to download: {dropbox_link}") logger_func(f" [Dropbox] Attempting to download: {dropbox_link}")
# Modify URL to force download (works for both files and folders)
parsed_url = urlparse(dropbox_link) parsed_url = urlparse(dropbox_link)
query_params = parse_qs(parsed_url.query) query_params = parse_qs(parsed_url.query)
query_params['dl'] = ['1'] query_params['dl'] = ['1']
@@ -241,26 +215,60 @@ def download_dropbox_file(dropbox_link, download_path=".", logger_func=print):
logger_func(f" [Dropbox] Using direct download URL: {direct_download_url}") logger_func(f" [Dropbox] Using direct download URL: {direct_download_url}")
scraper = cloudscraper.create_scraper()
try: try:
if not os.path.exists(download_path): if not os.path.exists(download_path):
os.makedirs(download_path, exist_ok=True) os.makedirs(download_path, exist_ok=True)
logger_func(f" [Dropbox] Created download directory: {download_path}") logger_func(f" [Dropbox] Created download directory: {download_path}")
with requests.get(direct_download_url, stream=True, allow_redirects=True, timeout=(10, 300)) as r: with scraper.get(direct_download_url, stream=True, allow_redirects=True, timeout=(20, 600)) as r:
r.raise_for_status() r.raise_for_status()
filename = _get_filename_from_headers(r.headers) or os.path.basename(parsed_url.path) or "dropbox_file" filename = _get_filename_from_headers(r.headers) or os.path.basename(parsed_url.path) or "dropbox_download"
# If it's a folder, Dropbox will name it FolderName.zip
if not os.path.splitext(filename)[1]:
filename += ".zip"
full_save_path = os.path.join(download_path, filename) full_save_path = os.path.join(download_path, filename)
logger_func(f" [Dropbox] Starting download of '{filename}'...") logger_func(f" [Dropbox] Starting download of '{filename}'...")
total_size = int(r.headers.get('content-length', 0))
downloaded_bytes = 0
last_log_time = time.time()
with open(full_save_path, 'wb') as f: with open(full_save_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192): for chunk in r.iter_content(chunk_size=8192):
f.write(chunk) f.write(chunk)
downloaded_bytes += len(chunk)
current_time = time.time()
if total_size > 0 and current_time - last_log_time > 1:
progress = (downloaded_bytes / total_size) * 100
logger_func(f" -> Downloading '{filename}'... {downloaded_bytes/1024/1024:.2f}MB / {total_size/1024/1024:.2f}MB ({progress:.1f}%)")
last_log_time = current_time
logger_func(f" [Dropbox] ✅ Dropbox file downloaded successfully: {full_save_path}") logger_func(f" [Dropbox] ✅ Download complete: {full_save_path}")
# --- NEW: Auto-extraction logic ---
if zipfile.is_zipfile(full_save_path):
logger_func(f" [Dropbox] ዚ Detected zip file. Attempting to extract...")
extract_folder_name = os.path.splitext(filename)[0]
extract_path = os.path.join(download_path, extract_folder_name)
os.makedirs(extract_path, exist_ok=True)
with zipfile.ZipFile(full_save_path, 'r') as zip_ref:
zip_ref.extractall(extract_path)
logger_func(f" [Dropbox] ✅ Successfully extracted to folder: '{extract_path}'")
# Optional: remove the zip file after extraction
try:
os.remove(full_save_path)
logger_func(f" [Dropbox] 🗑️ Removed original zip file.")
except OSError as e:
logger_func(f" [Dropbox] ⚠️ Could not remove original zip file: {e}")
except Exception as e: except Exception as e:
logger_func(f" [Dropbox] ❌ An error occurred during Dropbox download: {e}") logger_func(f" [Dropbox] ❌ An error occurred during Dropbox download: {e}")
traceback.print_exc(limit=2) traceback.print_exc(limit=2)
raise

View File

@@ -10,6 +10,7 @@ import re
import subprocess import subprocess
import datetime import datetime
import requests import requests
import cloudscraper
import unicodedata import unicodedata
from collections import deque, defaultdict from collections import deque, defaultdict
import threading import threading
@@ -36,6 +37,7 @@ from ..core.workers import PostProcessorSignals
from ..core.api_client import download_from_api from ..core.api_client import download_from_api
from ..core.discord_client import fetch_server_channels, fetch_channel_messages from ..core.discord_client import fetch_server_channels, fetch_channel_messages
from ..core.manager import DownloadManager from ..core.manager import DownloadManager
from ..core.nhentai_client import fetch_nhentai_gallery
from .assets import get_app_icon_object from .assets import get_app_icon_object
from ..config.constants import * from ..config.constants import *
from ..utils.file_utils import KNOWN_NAMES, clean_folder_name from ..utils.file_utils import KNOWN_NAMES, clean_folder_name
@@ -281,7 +283,7 @@ class DownloaderApp (QWidget ):
self.download_location_label_widget = None self.download_location_label_widget = None
self.remove_from_filename_label_widget = None self.remove_from_filename_label_widget = None
self.skip_words_label_widget = None self.skip_words_label_widget = None
self.setWindowTitle("Kemono Downloader v6.4.3") self.setWindowTitle("Kemono Downloader v6.5.0")
setup_ui(self) setup_ui(self)
self._connect_signals() self._connect_signals()
self.log_signal.emit(" Local API server functionality has been removed.") self.log_signal.emit(" Local API server functionality has been removed.")
@@ -2215,12 +2217,21 @@ class DownloaderApp (QWidget ):
if not button or not checked: if not button or not checked:
return return
is_only_links = (button == self.radio_only_links) is_only_links = (button == self.radio_only_links)
if hasattr(self, 'use_multithreading_checkbox'):
if hasattr(self, 'use_multithreading_checkbox') and hasattr(self, 'thread_count_input'):
if is_only_links: if is_only_links:
self.use_multithreading_checkbox.setChecked(False) # When "Only Links" is selected, enable multithreading, set threads to 20, and lock the input.
self.use_multithreading_checkbox.setEnabled(False) self.use_multithreading_checkbox.setChecked(True)
self.thread_count_input.setText("20")
self.thread_count_input.setEnabled(False)
self.thread_count_label.setEnabled(False)
self.update_multithreading_label("20")
else: else:
self.use_multithreading_checkbox.setEnabled(True) # When another mode is selected, re-enable the input for user control.
is_multithreading_checked = self.use_multithreading_checkbox.isChecked()
self.thread_count_input.setEnabled(is_multithreading_checked)
self.thread_count_label.setEnabled(is_multithreading_checked)
if button != self.radio_more and checked: if button != self.radio_more and checked:
self.radio_more.setText("More") self.radio_more.setText("More")
self.more_filter_scope = None self.more_filter_scope = None
@@ -3207,6 +3218,51 @@ class DownloaderApp (QWidget ):
api_url = direct_api_url if direct_api_url else self.link_input.text().strip() api_url = direct_api_url if direct_api_url else self.link_input.text().strip()
# --- NEW: NHENTAI BATCH DOWNLOAD LOGIC ---
if 'nhentai.net' in api_url and not re.search(r'/g/(\d+)', api_url):
self.log_signal.emit("=" * 40)
self.log_signal.emit("🚀 nhentai batch download mode detected.")
nhentai_txt_path = os.path.join(self.app_base_dir, "appdata", "nhentai.txt")
self.log_signal.emit(f" Looking for batch file at: {nhentai_txt_path}")
if not os.path.exists(nhentai_txt_path):
QMessageBox.warning(self, "File Not Found", f"To use batch mode, create a file named 'nhentai.txt' in your 'appdata' folder.\n\nPlace one nhentai URL on each line.")
self.log_signal.emit(f"'nhentai.txt' not found. Aborting batch download.")
return False
urls_to_download = []
try:
with open(nhentai_txt_path, 'r', encoding='utf-8') as f:
for line in f:
# Find all URLs in the line (handles comma separation or just spaces)
found_urls = re.findall(r'https?://nhentai\.net/g/\d+/?', line)
if found_urls:
urls_to_download.extend(found_urls)
except Exception as e:
QMessageBox.critical(self, "File Error", f"Could not read 'nhentai.txt':\n{e}")
self.log_signal.emit(f" ❌ Error reading 'nhentai.txt': {e}")
return False
if not urls_to_download:
QMessageBox.information(self, "Empty File", "No valid nhentai gallery URLs were found in 'nhentai.txt'.")
self.log_signal.emit(" 'nhentai.txt' was found but contained no valid URLs.")
return False
self.log_signal.emit(f" Found {len(urls_to_download)} URLs to process.")
self.favorite_download_queue.clear()
for url in urls_to_download:
self.favorite_download_queue.append({
'url': url,
'name': f"nhentai gallery from batch",
'type': 'post'
})
if not self.is_processing_favorites_queue:
self._process_next_favorite_download()
return True
# --- END NEW LOGIC ---
main_ui_download_dir = self.dir_input.text().strip() main_ui_download_dir = self.dir_input.text().strip()
extract_links_only = (self.radio_only_links and self.radio_only_links.isChecked()) extract_links_only = (self.radio_only_links and self.radio_only_links.isChecked())
effective_output_dir_for_run = "" effective_output_dir_for_run = ""
@@ -3272,6 +3328,35 @@ class DownloaderApp (QWidget ):
self.cancellation_message_logged_this_session = False self.cancellation_message_logged_this_session = False
# --- MODIFIED NHENTAI HANDLING ---
nhentai_match = re.search(r'nhentai\.net/g/(\d+)', api_url)
if nhentai_match:
gallery_id = nhentai_match.group(1)
self.log_signal.emit("=" * 40)
self.log_signal.emit(f"🚀 Detected nhentai gallery ID: {gallery_id}")
output_dir = self.dir_input.text().strip()
if not output_dir or not os.path.isdir(output_dir):
QMessageBox.critical(self, "Input Error", "A valid Download Location is required.")
return False
gallery_data = fetch_nhentai_gallery(gallery_id, self.log_signal.emit)
if not gallery_data:
QMessageBox.critical(self, "Error", f"Could not retrieve gallery data for ID {gallery_id}. It may not exist or the API is unavailable.")
return False
self.set_ui_enabled(False)
self.download_thread = NhentaiDownloadThread(gallery_data, output_dir, self)
self.download_thread.progress_signal.connect(self.handle_main_log)
self.download_thread.finished_signal.connect(
lambda dl, skip, cancelled: self.download_finished(dl, skip, cancelled, [])
)
self.download_thread.start()
self._update_button_states_and_connections()
return True
# --- END MODIFIED HANDLING ---
service, id1, id2 = extract_post_info(api_url) service, id1, id2 = extract_post_info(api_url)
if not service or not id1: if not service or not id1:
@@ -3282,7 +3367,6 @@ class DownloaderApp (QWidget ):
server_id, channel_id = id1, id2 server_id, channel_id = id1, id2
def discord_processing_task(): def discord_processing_task():
# --- FIX: Wrap the entire task in a try...finally block ---
try: try:
def queue_logger(message): def queue_logger(message):
self.worker_to_gui_queue.put({'type': 'progress', 'payload': (message,)}) self.worker_to_gui_queue.put({'type': 'progress', 'payload': (message,)})
@@ -3295,7 +3379,6 @@ class DownloaderApp (QWidget ):
self.selected_cookie_filepath, self.app_base_dir, queue_logger self.selected_cookie_filepath, self.app_base_dir, queue_logger
) )
# --- SCOPE: MESSAGES (PDF CREATION) ---
if self.discord_download_scope == 'messages': if self.discord_download_scope == 'messages':
queue_logger("=" * 40) queue_logger("=" * 40)
queue_logger(f"🚀 Starting Discord PDF export for: {api_url}") queue_logger(f"🚀 Starting Discord PDF export for: {api_url}")
@@ -3307,7 +3390,7 @@ class DownloaderApp (QWidget ):
return return
default_filename = f"discord_{server_id}_{channel_id or 'server'}.pdf" default_filename = f"discord_{server_id}_{channel_id or 'server'}.pdf"
output_filepath = os.path.join(output_dir, default_filename) # We'll save with a default name output_filepath = os.path.join(output_dir, default_filename)
all_messages, channels_to_process = [], [] all_messages, channels_to_process = [], []
server_name_for_pdf = server_id server_name_for_pdf = server_id
@@ -3346,7 +3429,6 @@ class DownloaderApp (QWidget ):
self.finished_signal.emit(0, len(all_messages), self.cancellation_event.is_set(), []) self.finished_signal.emit(0, len(all_messages), self.cancellation_event.is_set(), [])
return return
# --- SCOPE: FILES (DOWNLOAD) ---
elif self.discord_download_scope == 'files': elif self.discord_download_scope == 'files':
worker_args = { worker_args = {
'download_root': effective_output_dir_for_run, 'known_names': list(KNOWN_NAMES), 'download_root': effective_output_dir_for_run, 'known_names': list(KNOWN_NAMES),
@@ -3406,10 +3488,8 @@ class DownloaderApp (QWidget ):
self.finished_signal.emit(total_dl, total_skip, self.cancellation_event.is_set(), []) self.finished_signal.emit(total_dl, total_skip, self.cancellation_event.is_set(), [])
finally: finally:
# This ensures the flag is reset, allowing the UI to finalize correctly
self.is_fetcher_thread_running = False self.is_fetcher_thread_running = False
# --- FIX: Set the fetcher running flag to prevent premature finalization ---
self.is_fetcher_thread_running = True self.is_fetcher_thread_running = True
self.set_ui_enabled(False) self.set_ui_enabled(False)
@@ -4741,6 +4821,10 @@ class DownloaderApp (QWidget ):
self.log_signal.emit(" Cancelling active External Link download thread...") self.log_signal.emit(" Cancelling active External Link download thread...")
self.external_link_download_thread.cancel() self.external_link_download_thread.cancel()
if isinstance(self.download_thread, NhentaiDownloadThread):
self.log_signal.emit(" Signaling nhentai download thread to cancel.")
self.download_thread.cancel()
def _get_domain_for_service(self, service_name: str) -> str: def _get_domain_for_service(self, service_name: str) -> str:
"""Determines the base domain for a given service.""" """Determines the base domain for a given service."""
if not isinstance(service_name, str): if not isinstance(service_name, str):
@@ -4836,6 +4920,7 @@ class DownloaderApp (QWidget ):
if self.download_thread: if self.download_thread:
if isinstance(self.download_thread, QThread): if isinstance(self.download_thread, QThread):
try: try:
# Disconnect signals to prevent any lingering connections
if hasattr(self.download_thread, 'progress_signal'): self.download_thread.progress_signal.disconnect(self.handle_main_log) if hasattr(self.download_thread, 'progress_signal'): self.download_thread.progress_signal.disconnect(self.handle_main_log)
if hasattr(self.download_thread, 'add_character_prompt_signal'): self.download_thread.add_character_prompt_signal.disconnect(self.add_character_prompt_signal) if hasattr(self.download_thread, 'add_character_prompt_signal'): self.download_thread.add_character_prompt_signal.disconnect(self.add_character_prompt_signal)
if hasattr(self.download_thread, 'finished_signal'): self.download_thread.finished_signal.disconnect(self.download_finished) if hasattr(self.download_thread, 'finished_signal'): self.download_thread.finished_signal.disconnect(self.download_finished)
@@ -4849,9 +4934,8 @@ class DownloaderApp (QWidget ):
except (TypeError, RuntimeError) as e: except (TypeError, RuntimeError) as e:
self.log_signal.emit(f" Note during single-thread signal disconnection: {e}") self.log_signal.emit(f" Note during single-thread signal disconnection: {e}")
if not self.download_thread.isRunning(): self.download_thread.deleteLater()
self.download_thread.deleteLater() self.download_thread = None
self.download_thread = None
else: else:
self.download_thread = None self.download_thread = None
@@ -5911,4 +5995,104 @@ class ExternalLinkDownloadThread (QThread ):
self .finished_signal .emit () self .finished_signal .emit ()
def cancel (self ): def cancel (self ):
self .is_cancelled =True self .is_cancelled =True
class NhentaiDownloadThread(QThread):
progress_signal = pyqtSignal(str)
finished_signal = pyqtSignal(int, int, bool)
IMAGE_SERVERS = [
"https://i.nhentai.net", "https://i2.nhentai.net", "https://i3.nhentai.net",
"https://i5.nhentai.net", "https://i7.nhentai.net"
]
EXTENSION_MAP = {'j': 'jpg', 'p': 'png', 'g': 'gif', 'w': 'webp' }
def __init__(self, gallery_data, output_dir, parent=None):
super().__init__(parent)
self.gallery_data = gallery_data
self.output_dir = output_dir
self.is_cancelled = False
def run(self):
title = self.gallery_data.get("title", {}).get("english", f"gallery_{self.gallery_data.get('id')}")
gallery_id = self.gallery_data.get("id")
media_id = self.gallery_data.get("media_id")
pages_info = self.gallery_data.get("pages", [])
folder_name = clean_folder_name(title)
gallery_path = os.path.join(self.output_dir, folder_name)
try:
os.makedirs(gallery_path, exist_ok=True)
except OSError as e:
self.progress_signal.emit(f"❌ Critical error creating directory: {e}")
self.finished_signal.emit(0, len(pages_info), False)
return
self.progress_signal.emit(f"⬇️ Downloading '{title}' to folder '{folder_name}'...")
# Create a single cloudscraper instance for the entire download
scraper = cloudscraper.create_scraper()
download_count = 0
skip_count = 0
for i, page_data in enumerate(pages_info):
if self.is_cancelled:
break
page_num = i + 1
ext_char = page_data.get('t', 'j')
extension = self.EXTENSION_MAP.get(ext_char, 'jpg')
relative_path = f"/galleries/{media_id}/{page_num}.{extension}"
local_filename = f"{page_num:03d}.{extension}"
filepath = os.path.join(gallery_path, local_filename)
if os.path.exists(filepath):
self.progress_signal.emit(f" -> Skip (Exists): {local_filename}")
skip_count += 1
continue
download_successful = False
for server in self.IMAGE_SERVERS:
if self.is_cancelled:
break
full_url = f"{server}{relative_path}"
try:
self.progress_signal.emit(f" Downloading page {page_num}/{len(pages_info)} from {server} ...")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
'Referer': f'https://nhentai.net/g/{gallery_id}/'
}
# Use the scraper instance to get the image
response = scraper.get(full_url, headers=headers, timeout=60, stream=True)
if response.status_code == 200:
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
download_count += 1
download_successful = True
break
else:
self.progress_signal.emit(f" -> {server} returned status {response.status_code}. Trying next server...")
except Exception as e:
self.progress_signal.emit(f" -> {server} failed to connect or timed out: {e}. Trying next server...")
if not download_successful:
self.progress_signal.emit(f" ❌ Failed to download {local_filename} from all servers.")
skip_count += 1
time.sleep(0.5)
self.finished_signal.emit(download_count, skip_count, self.is_cancelled)
def cancel(self):
self.is_cancelled = True