20 Commits

Author SHA1 Message Date
Yuvi9587
fae9a4bbe2 Commit 2025-12-28 09:25:36 +05:30
Yuvi9587
1ad1e53b57 Commit 2025-12-28 09:23:20 +05:30
Yuvi9587
77bd428b91 Commit 2025-12-25 21:56:04 +05:30
Yuvi9587
4bf57eb752 Socks 4 and 5 proxy support 2025-12-24 09:27:01 +05:30
Yuvi9587
de202961a0 Proxy Type Dropdown List 2025-12-24 09:26:43 +05:30
Yuvi9587
e806b6de66 Update deviantart_downloader_thread.py 2025-12-24 09:26:07 +05:30
Yuvi9587
cb8dd3b7f3 Proxy Type Key 2025-12-24 09:26:04 +05:30
Yuvi9587
5a8c151c97 Deviant Support fix 2025-12-23 22:52:50 +05:30
Yuvi9587
50ba60a461 Fixed devient download 2025-12-23 21:27:21 +05:30
Yuvi9587
23521e7060 Added "Proxy/Network" Tab 2025-12-23 21:27:08 +05:30
Yuvi9587
f9c504b936 Proxy Support 2025-12-23 21:26:49 +05:30
Yuvi9587
efa0abd0f1 Fixed devient download (Kinda) 2025-12-23 21:26:34 +05:30
Yuvi9587
7d76d00470 Proxy 2025-12-23 21:26:18 +05:30
Yuvi9587
1494d3f456 Proxy Support Keys 2025-12-23 21:26:11 +05:30
Yuvi9587
675646e763 Fixed Error Dialog 2025-12-22 09:15:26 +05:30
Yuvi9587
611e892576 "add to queue" button 2025-12-21 22:12:44 +05:30
Yuvi9587
23fd7f0714 Added a "add to queue" feature 2025-12-21 22:12:34 +05:30
Yuvi9587
cfcd800a49 Fixed unnecessary fetch in renaming mode 2025-12-21 22:12:14 +05:30
Yuvi9587
24acec2dc3 Fixed unnecessary fetch in renaming mode 2025-12-21 22:12:09 +05:30
Yuvi63771
b5b6c1bc46 Commit 2025-12-14 19:33:17 +05:30
27 changed files with 2743 additions and 615 deletions

View File

@@ -68,6 +68,15 @@ DISCORD_TOKEN_KEY = "discord/token"
POST_DOWNLOAD_ACTION_KEY = "postDownloadAction"
# --- Proxy / Network Keys ---
PROXY_ENABLED_KEY = "proxy/enabled"
PROXY_HOST_KEY = "proxy/host"
PROXY_PORT_KEY = "proxy/port"
PROXY_USERNAME_KEY = "proxy/username"
PROXY_PASSWORD_KEY = "proxy/password"
PROXY_TYPE_KEY = "proxy_type"
# --- UI Constants and Identifiers ---
HTML_PREFIX = "<!HTML!>"
LOG_DISPLAY_LINKS = "links"

View File

@@ -10,10 +10,9 @@ import queue
def run_hentai2read_download(start_url, output_dir, progress_callback, overall_progress_callback, check_pause_func):
"""
Orchestrates the download process using a producer-consumer model.
The main thread scrapes image URLs and puts them in a queue.
A pool of worker threads consumes from the queue to download images concurrently.
"""
scraper = cloudscraper.create_scraper()
all_failed_files = [] # Track all failures across chapters
try:
progress_callback(" [Hentai2Read] Scraping series page for all metadata...")
@@ -39,8 +38,7 @@ def run_hentai2read_download(start_url, output_dir, progress_callback, overall_p
final_save_path = os.path.join(output_dir, series_folder, chapter_folder)
os.makedirs(final_save_path, exist_ok=True)
# This function now scrapes and downloads simultaneously
dl_count, skip_count = _process_and_download_chapter(
dl_count, skip_count, chapter_failures = _process_and_download_chapter(
chapter_url=chapter['url'],
save_path=final_save_path,
scraper=scraper,
@@ -51,9 +49,22 @@ def run_hentai2read_download(start_url, output_dir, progress_callback, overall_p
total_downloaded_count += dl_count
total_skipped_count += skip_count
if chapter_failures:
all_failed_files.extend(chapter_failures)
overall_progress_callback(total_chapters, idx + 1)
if check_pause_func(): break
# --- FINAL SUMMARY OF FAILURES ---
if all_failed_files:
progress_callback("\n" + "="*40)
progress_callback(f"❌ SUMMARY: {len(all_failed_files)} files failed permanently after 10 retries:")
for fail_msg in all_failed_files:
progress_callback(f"{fail_msg}")
progress_callback("="*40 + "\n")
else:
progress_callback("\n✅ All chapters processed successfully with no permanent failures.")
return total_downloaded_count, total_skipped_count
except Exception as e:
@@ -63,9 +74,8 @@ def run_hentai2read_download(start_url, output_dir, progress_callback, overall_p
def _get_series_metadata(start_url, progress_callback, scraper):
"""
Scrapes the main series page to get the Artist Name, Series Title, and chapter list.
Includes a retry mechanism for the initial connection.
"""
max_retries = 4 # Total number of attempts (1 initial + 3 retries)
max_retries = 4
last_exception = None
soup = None
@@ -77,8 +87,6 @@ def _get_series_metadata(start_url, progress_callback, scraper):
response = scraper.get(start_url, timeout=30)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# If successful, clear exception and break the loop
last_exception = None
break
@@ -86,8 +94,8 @@ def _get_series_metadata(start_url, progress_callback, scraper):
last_exception = e
progress_callback(f" [Hentai2Read] ⚠️ Connection attempt {attempt + 1} failed: {e}")
if attempt < max_retries - 1:
time.sleep(2 * (attempt + 1)) # Wait 2s, 4s, 6s
continue # Try again
time.sleep(2 * (attempt + 1))
continue
if last_exception:
progress_callback(f" [Hentai2Read] ❌ Error getting series metadata after {max_retries} attempts: {last_exception}")
@@ -96,23 +104,36 @@ def _get_series_metadata(start_url, progress_callback, scraper):
try:
series_title = "Unknown Series"
artist_name = None
metadata_list = soup.select_one("ul.list.list-simple-mini")
if metadata_list:
first_li = metadata_list.find('li', recursive=False)
if first_li and not first_li.find('a'):
series_title = first_li.get_text(strip=True)
# 1. Try fetching Title
title_tag = soup.select_one("h3.block-title a")
if title_tag:
series_title = title_tag.get_text(strip=True)
else:
meta_title = soup.select_one("meta[property='og:title']")
if meta_title:
series_title = meta_title.get("content", "Unknown Series").replace(" - Hentai2Read", "")
# 2. Try fetching Artist
metadata_list = soup.select_one("ul.list.list-simple-mini")
if metadata_list:
for b_tag in metadata_list.find_all('b'):
label = b_tag.get_text(strip=True)
if label in ("Artist", "Author"):
if "Artist" in label or "Author" in label:
a_tag = b_tag.find_next_sibling('a')
if a_tag:
artist_name = a_tag.get_text(strip=True)
if label == "Artist":
break
break
top_level_folder_name = artist_name if artist_name else series_title
if not artist_name:
artist_link = soup.find('a', href=re.compile(r'/hentai-list/artist/'))
if artist_link:
artist_name = artist_link.get_text(strip=True)
if artist_name:
top_level_folder_name = f"{artist_name} - {series_title}"
else:
top_level_folder_name = series_title
chapter_links = soup.select("div.media a.pull-left.font-w600")
if not chapter_links:
@@ -124,7 +145,7 @@ def _get_series_metadata(start_url, progress_callback, scraper):
]
chapters_to_process.reverse()
progress_callback(f" [Hentai2Read] ✅ Found Artist/Series: '{top_level_folder_name}'")
progress_callback(f" [Hentai2Read] ✅ Found Metadata: '{top_level_folder_name}'")
progress_callback(f" [Hentai2Read] ✅ Found {len(chapters_to_process)} chapters to process.")
return top_level_folder_name, chapters_to_process
@@ -136,69 +157,102 @@ def _get_series_metadata(start_url, progress_callback, scraper):
def _process_and_download_chapter(chapter_url, save_path, scraper, progress_callback, check_pause_func):
"""
Uses a producer-consumer pattern to download a chapter.
The main thread (producer) scrapes URLs one by one.
Worker threads (consumers) download the URLs as they are found.
Includes RETRY LOGIC and ACTIVE LOGGING.
"""
task_queue = queue.Queue()
num_download_threads = 8
download_stats = {'downloaded': 0, 'skipped': 0}
failed_files_list = []
def downloader_worker():
"""The function that each download thread will run."""
worker_scraper = cloudscraper.create_scraper()
while True:
try:
# Get a task from the queue
task = task_queue.get()
# The sentinel value to signal the end
if task is None:
break
filepath, img_url = task
if os.path.exists(filepath):
progress_callback(f" -> Skip: '{os.path.basename(filepath)}'")
download_stats['skipped'] += 1
else:
progress_callback(f" Downloading: '{os.path.basename(filepath)}'...")
task = task_queue.get()
if task is None:
task_queue.task_done()
break
filepath, img_url = task
filename = os.path.basename(filepath)
if os.path.exists(filepath):
# We log skips to show it's checking files
progress_callback(f" -> Skip (Exists): '{filename}'")
download_stats['skipped'] += 1
task_queue.task_done()
continue
# --- RETRY LOGIC START ---
success = False
# UNCOMMENTED: Log the start of download so you see activity
progress_callback(f" Downloading: '{filename}'...")
for attempt in range(10): # Try 10 times
try:
if attempt > 0:
progress_callback(f" ⚠️ Retrying '{filename}' (Attempt {attempt+1}/10)...")
time.sleep(2)
response = worker_scraper.get(img_url, stream=True, timeout=60, headers={'Referer': chapter_url})
response.raise_for_status()
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
download_stats['downloaded'] += 1
except Exception as e:
progress_callback(f" ❌ Download failed for task. Error: {e}")
download_stats['skipped'] += 1
finally:
task_queue.task_done()
success = True
# UNCOMMENTED: Log success
progress_callback(f" ✅ Downloaded: '{filename}'")
break
except Exception as e:
if attempt == 9:
progress_callback(f" ❌ Failed '{filename}' after 10 attempts: {e}")
if not success:
failed_files_list.append(f"{filename} (Chapter: {os.path.basename(save_path)})")
# Clean up empty file if failed
if os.path.exists(filepath):
try:
os.remove(filepath)
except OSError: pass
task_queue.task_done()
executor = ThreadPoolExecutor(max_workers=num_download_threads, thread_name_prefix='H2R_Downloader')
for _ in range(num_download_threads):
executor.submit(downloader_worker)
page_number = 1
progress_callback(" [Hentai2Read] Scanning pages...") # Initial log
while True:
if check_pause_func(): break
if page_number > 300: # Safety break
if page_number > 300:
progress_callback(" [Hentai2Read] ⚠️ Safety break: Reached 300 pages.")
break
# Log occasionally to show scanning is alive
if page_number % 10 == 0:
progress_callback(f" [Hentai2Read] Scanned {page_number} pages so far...")
page_url_to_check = f"{chapter_url}{page_number}/"
try:
page_response = None
page_last_exception = None
for page_attempt in range(3): # 3 attempts for sub-pages
for page_attempt in range(3):
try:
page_response = scraper.get(page_url_to_check, timeout=30)
page_last_exception = None
break
except Exception as e:
page_last_exception = e
time.sleep(1) # Short delay for page scraping retries
time.sleep(1)
if page_last_exception:
raise page_last_exception # Give up after 3 tries
raise page_last_exception
if page_response.history or page_response.status_code != 200:
progress_callback(f" [Hentai2Read] End of chapter detected on page {page_number}.")
@@ -209,7 +263,7 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
img_src = img_tag.get("src") if img_tag else None
if not img_tag or img_src == "https://static.hentai.direct/hentai":
progress_callback(f" [Hentai2Read] End of chapter detected (Placeholder image on page {page_number}).")
progress_callback(f" [Hentai2Read] End of chapter detected (Last page reached at {page_number}).")
break
normalized_img_src = urljoin(page_response.url, img_src)
@@ -220,15 +274,19 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
task_queue.put((filepath, normalized_img_src))
page_number += 1
time.sleep(0.1) # Small delay between scraping pages
time.sleep(0.1)
except Exception as e:
progress_callback(f" [Hentai2Read] ❌ Error while scraping page {page_number}: {e}")
break
# Signal workers to exit
for _ in range(num_download_threads):
task_queue.put(None)
# Wait for all tasks to complete
task_queue.join()
executor.shutdown(wait=True)
progress_callback(f" Found and processed {page_number - 1} images for this chapter.")
return download_stats['downloaded'], download_stats['skipped']
progress_callback(f" Chapter complete. Processed {page_number - 1} images.")
return download_stats['downloaded'], download_stats['skipped'], failed_files_list

View File

@@ -5,7 +5,8 @@ import time
import random
from urllib.parse import urlparse
def get_chapter_list(scraper, series_url, logger_func):
# 1. Update arguments to accept proxies=None
def get_chapter_list(scraper, series_url, logger_func, proxies=None):
"""
Checks if a URL is a series page and returns a list of all chapter URLs if it is.
Relies on a passed-in scraper session for connection.
@@ -16,9 +17,13 @@ def get_chapter_list(scraper, series_url, logger_func):
response = None
max_retries = 8
# 2. Define smart timeout logic
req_timeout = (30, 120) if proxies else 30
for attempt in range(max_retries):
try:
response = scraper.get(series_url, headers=headers, timeout=30)
# 3. Add proxies, verify=False, and the new timeout
response = scraper.get(series_url, headers=headers, timeout=req_timeout, proxies=proxies, verify=False)
response.raise_for_status()
logger_func(f" [AllComic] Successfully connected to series page on attempt {attempt + 1}.")
break
@@ -53,7 +58,8 @@ def get_chapter_list(scraper, series_url, logger_func):
logger_func(f" [AllComic] ❌ Error parsing chapters after successful connection: {e}")
return []
def fetch_chapter_data(scraper, chapter_url, logger_func):
# 4. Update arguments here too
def fetch_chapter_data(scraper, chapter_url, logger_func, proxies=None):
"""
Fetches the comic title, chapter title, and image URLs for a single chapter page.
Relies on a passed-in scraper session for connection.
@@ -64,9 +70,14 @@ def fetch_chapter_data(scraper, chapter_url, logger_func):
response = None
max_retries = 8
# 5. Define smart timeout logic again
req_timeout = (30, 120) if proxies else 30
for attempt in range(max_retries):
try:
response = scraper.get(chapter_url, headers=headers, timeout=30)
# 6. Add proxies, verify=False, and timeout
response = scraper.get(chapter_url, headers=headers, timeout=req_timeout, proxies=proxies, verify=False)
response.raise_for_status()
break
except requests.RequestException as e:

View File

@@ -4,13 +4,37 @@ from urllib.parse import urlparse
import json
import requests
import cloudscraper
import ssl
from requests.adapters import HTTPAdapter
from urllib3.poolmanager import PoolManager
from ..utils.network_utils import extract_post_info, prepare_cookies_for_request
from ..config.constants import (
STYLE_DATE_POST_TITLE
STYLE_DATE_POST_TITLE,
STYLE_DATE_BASED,
STYLE_POST_TITLE_GLOBAL_NUMBERING
)
# --- NEW: Custom Adapter to fix SSL errors ---
class CustomSSLAdapter(HTTPAdapter):
"""
A custom HTTPAdapter that forces check_hostname=False when using SSL.
This prevents the 'Cannot set verify_mode to CERT_NONE' error.
"""
def init_poolmanager(self, connections, maxsize, block=False):
ctx = ssl.create_default_context()
# Crucial: Disable hostname checking FIRST, then set verify mode
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
self.poolmanager = PoolManager(
num_pools=connections,
maxsize=maxsize,
block=block,
ssl_context=ctx
)
def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None, cookies_dict=None, proxies=None):
"""
Fetches a single page of posts from the API with robust retry logic.
"""
@@ -23,7 +47,7 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
raise RuntimeError("Fetch operation cancelled by user while paused.")
time.sleep(0.5)
logger(" Post fetching resumed.")
fields_to_request = "id,user,service,title,shared_file,added,published,edited,file,attachments,tags"
fields_to_request = "id,user,service,title,shared_file,added,published,edited,file,attachments,tags,content"
paginated_url = f'{api_url_base}?o={offset}&fields={fields_to_request}'
max_retries = 3
@@ -38,11 +62,14 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
log_message += f" (Attempt {attempt + 1}/{max_retries})"
logger(log_message)
request_timeout = (30, 120) if proxies else (15, 60)
try:
response = requests.get(paginated_url, headers=headers, timeout=(15, 60), cookies=cookies_dict)
response.raise_for_status()
response.encoding = 'utf-8'
return response.json()
with requests.get(paginated_url, headers=headers, timeout=request_timeout, cookies=cookies_dict, proxies=proxies, verify=False) as response:
response.raise_for_status()
response.encoding = 'utf-8'
return response.json()
except requests.exceptions.RequestException as e:
# Handle 403 error on the FIRST page as a rate limit/block
@@ -79,34 +106,67 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
raise RuntimeError(f"Failed to fetch page {paginated_url} after all attempts.")
def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logger, cookies_dict=None):
def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logger, cookies_dict=None, proxies=None):
"""
--- MODIFIED FUNCTION ---
Fetches the full data, including the 'content' field, for a single post using cloudscraper.
Includes RETRY logic for 429 Rate Limit errors.
"""
post_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}"
logger(f" Fetching full content for post ID {post_id}...")
scraper = cloudscraper.create_scraper()
try:
response = scraper.get(post_api_url, headers=headers, timeout=(15, 300), cookies=cookies_dict)
response.raise_for_status()
full_post_data = response.json()
if isinstance(full_post_data, list) and full_post_data:
return full_post_data[0]
if isinstance(full_post_data, dict) and 'post' in full_post_data:
return full_post_data['post']
return full_post_data
except Exception as e:
logger(f" ❌ Failed to fetch full content for post {post_id}: {e}")
return None
# Retry settings
max_retries = 4
for attempt in range(max_retries + 1):
scraper = None
try:
scraper = cloudscraper.create_scraper()
# Mount custom SSL adapter
adapter = CustomSSLAdapter()
scraper.mount("https://", adapter)
request_timeout = (30, 300) if proxies else (15, 300)
response = scraper.get(post_api_url, headers=headers, timeout=request_timeout, cookies=cookies_dict, proxies=proxies, verify=False)
def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
# --- FIX: Handle 429 Rate Limit explicitly ---
if response.status_code == 429:
wait_time = 20 + (attempt * 10) # 20s, 30s, 40s...
logger(f" ⚠️ Rate Limited (429) on post {post_id}. Waiting {wait_time} seconds before retrying...")
time.sleep(wait_time)
continue # Try loop again
# ---------------------------------------------
response.raise_for_status()
full_post_data = response.json()
if isinstance(full_post_data, list) and full_post_data:
return full_post_data[0]
if isinstance(full_post_data, dict) and 'post' in full_post_data:
return full_post_data['post']
return full_post_data
except Exception as e:
# Catch "Too Many Requests" if it wasn't caught by status_code check above
if "429" in str(e) or "Too Many Requests" in str(e):
if attempt < max_retries:
wait_time = 20 + (attempt * 10)
logger(f" ⚠️ Rate Limit Error caught: {e}. Waiting {wait_time}s...")
time.sleep(wait_time)
continue
# Only log error if this was the last attempt
if attempt == max_retries:
logger(f" ❌ Failed to fetch full content for post {post_id} after {max_retries} retries: {e}")
return None
finally:
if scraper:
scraper.close()
return None
def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None, pause_event=None, cookies_dict=None, proxies=None):
"""Fetches all comments for a specific post."""
if cancellation_event and cancellation_event.is_set():
raise RuntimeError("Comment fetch operation cancelled by user.")
@@ -115,10 +175,12 @@ def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger,
logger(f" Fetching comments: {comments_api_url}")
try:
response = requests.get(comments_api_url, headers=headers, timeout=(10, 30), cookies=cookies_dict)
response.raise_for_status()
response.encoding = 'utf-8'
return response.json()
request_timeout = (30, 60) if proxies else (10, 30)
with requests.get(comments_api_url, headers=headers, timeout=request_timeout, cookies=cookies_dict, proxies=proxies, verify=False) as response:
response.raise_for_status()
response.encoding = 'utf-8'
return response.json()
except requests.exceptions.RequestException as e:
raise RuntimeError(f"Error fetching comments for post {post_id}: {e}")
except ValueError as e:
@@ -138,7 +200,8 @@ def download_from_api(
app_base_dir=None,
manga_filename_style_for_sort_check=None,
processed_post_ids=None,
fetch_all_first=False
fetch_all_first=False,
proxies=None
):
parsed_input_url_for_domain = urlparse(api_url_input)
api_domain = parsed_input_url_for_domain.netloc
@@ -174,10 +237,13 @@ def download_from_api(
direct_post_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{target_post_id}"
logger(f" Attempting direct fetch for target post: {direct_post_api_url}")
try:
direct_response = requests.get(direct_post_api_url, headers=headers, timeout=(10, 30), cookies=cookies_for_api)
direct_response.raise_for_status()
direct_response.encoding = 'utf-8'
direct_post_data = direct_response.json()
request_timeout = (30, 60) if proxies else (10, 30)
with requests.get(direct_post_api_url, headers=headers, timeout=request_timeout, cookies=cookies_for_api, proxies=proxies, verify=False) as direct_response:
direct_response.raise_for_status()
direct_response.encoding = 'utf-8'
direct_post_data = direct_response.json()
if isinstance(direct_post_data, list) and direct_post_data:
direct_post_data = direct_post_data[0]
if isinstance(direct_post_data, dict) and 'post' in direct_post_data and isinstance(direct_post_data['post'], dict):
@@ -200,12 +266,23 @@ def download_from_api(
if target_post_id and (start_page or end_page):
logger("⚠️ Page range (start/end page) is ignored when a specific post URL is provided (searching all pages for the post).")
is_manga_mode_fetch_all_and_sort_oldest_first = manga_mode and (manga_filename_style_for_sort_check != STYLE_DATE_POST_TITLE) and not target_post_id
# --- FIXED LOGIC HERE ---
# Define which styles require fetching ALL posts first (Sequential Mode)
styles_requiring_fetch_all = [STYLE_DATE_BASED, STYLE_POST_TITLE_GLOBAL_NUMBERING]
# Only enable "fetch all and sort" if the current style is explicitly in the list above
is_manga_mode_fetch_all_and_sort_oldest_first = (
manga_mode and
(manga_filename_style_for_sort_check in styles_requiring_fetch_all) and
not target_post_id
)
should_fetch_all = fetch_all_first or is_manga_mode_fetch_all_and_sort_oldest_first
api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/posts"
page_size = 50
if is_manga_mode_fetch_all_and_sort_oldest_first:
logger(f" Manga Mode (Style: {manga_filename_style_for_sort_check if manga_filename_style_for_sort_check else 'Default'} - Oldest First Sort Active): Fetching all posts to sort by date...")
logger(f" Manga Mode (Style: {manga_filename_style_for_sort_check} - Oldest First Sort Active): Fetching all posts to sort by date...")
all_posts_for_manga_mode = []
current_offset_manga = 0
if start_page and start_page > 1:
@@ -232,7 +309,7 @@ def download_from_api(
logger(f" Manga Mode: Reached specified end page ({end_page}). Stopping post fetch.")
break
try:
posts_batch_manga = fetch_posts_paginated(api_base_url, headers, current_offset_manga, logger, cancellation_event, pause_event, cookies_dict=cookies_for_api)
posts_batch_manga = fetch_posts_paginated(api_base_url, headers, current_offset_manga, logger, cancellation_event, pause_event, cookies_dict=cookies_for_api, proxies=proxies)
if not isinstance(posts_batch_manga, list):
logger(f"❌ API Error (Manga Mode): Expected list of posts, got {type(posts_batch_manga)}.")
break
@@ -300,8 +377,9 @@ def download_from_api(
yield all_posts_for_manga_mode[i:i + page_size]
return
if manga_mode and not target_post_id and (manga_filename_style_for_sort_check == STYLE_DATE_POST_TITLE):
logger(f" Manga Mode (Style: {STYLE_DATE_POST_TITLE}): Processing posts in default API order (newest first).")
# Log specific message for styles that are in Manga Mode but NOT sorting (Streaming)
if manga_mode and not target_post_id and (manga_filename_style_for_sort_check not in styles_requiring_fetch_all):
logger(f" Renaming Mode (Style: {manga_filename_style_for_sort_check}): Processing posts in default API order (Streaming).")
current_page_num = 1
current_offset = 0
@@ -311,7 +389,6 @@ def download_from_api(
current_page_num = start_page
logger(f" Starting from page {current_page_num} (calculated offset {current_offset}).")
# --- START OF MODIFIED BLOCK ---
while True:
if pause_event and pause_event.is_set():
logger(" Post fetching loop paused...")
@@ -334,8 +411,7 @@ def download_from_api(
break
try:
# 1. Fetch the raw batch of posts
raw_posts_batch = fetch_posts_paginated(api_base_url, headers, current_offset, logger, cancellation_event, pause_event, cookies_dict=cookies_for_api)
raw_posts_batch = fetch_posts_paginated(api_base_url, headers, current_offset, logger, cancellation_event, pause_event, cookies_dict=cookies_for_api, proxies=proxies)
if not isinstance(raw_posts_batch, list):
logger(f"❌ API Error: Expected list of posts, got {type(raw_posts_batch)} at page {current_page_num} (offset {current_offset}).")
break
@@ -350,7 +426,6 @@ def download_from_api(
traceback.print_exc()
break
# 2. Check if the *raw* batch from the API was empty. This is the correct "end" condition.
if not raw_posts_batch:
if target_post_id and not processed_target_post_flag:
logger(f"❌ Target post {target_post_id} not found after checking all available pages (API returned no more posts at offset {current_offset}).")
@@ -359,9 +434,8 @@ def download_from_api(
logger(f"😕 No posts found on the first page checked (page {current_page_num}, offset {current_offset}).")
else:
logger(f"✅ Reached end of posts (no more content from API at offset {current_offset}).")
break # This break is now correct.
break
# 3. Filter the batch against processed IDs
posts_batch_to_yield = raw_posts_batch
original_count = len(raw_posts_batch)
@@ -371,25 +445,17 @@ def download_from_api(
if skipped_count > 0:
logger(f" Skipped {skipped_count} already processed post(s) from page {current_page_num}.")
# 4. Process the *filtered* batch
if target_post_id and not processed_target_post_flag:
# Still searching for a specific post
matching_post = next((p for p in posts_batch_to_yield if str(p.get('id')) == str(target_post_id)), None)
if matching_post:
logger(f"🎯 Found target post {target_post_id} on page {current_page_num} (offset {current_offset}).")
yield [matching_post]
processed_target_post_flag = True
elif not target_post_id:
# Downloading a creator feed
if posts_batch_to_yield:
# We found new posts on this page, yield them
yield posts_batch_to_yield
elif original_count > 0:
# We found 0 new posts, but the page *did* have posts (they were just skipped).
# Log this and continue to the next page.
logger(f" No new posts found on page {current_page_num}. Checking next page...")
# If original_count was 0, the `if not raw_posts_batch:` check
# already caught it and broke the loop.
if processed_target_post_flag:
break
@@ -397,7 +463,6 @@ def download_from_api(
current_offset += page_size
current_page_num += 1
time.sleep(0.6)
# --- END OF MODIFIED BLOCK ---
if target_post_id and not processed_target_post_flag and not (cancellation_event and cancellation_event.is_set()):
logger(f"❌ Target post {target_post_id} could not be found after checking all relevant pages (final check after loop).")
logger(f"❌ Target post {target_post_id} could not be found after checking all relevant pages (final check after loop).")

View File

@@ -0,0 +1,193 @@
import requests
import re
import os
import time
import threading
from urllib.parse import urlparse
class DeviantArtClient:
# Public Client Credentials
CLIENT_ID = "5388"
CLIENT_SECRET = "76b08c69cfb27f26d6161f9ab6d061a1"
BASE_API = "https://www.deviantart.com/api/v1/oauth2"
# 1. Accept proxies in init
def __init__(self, logger_func=print, proxies=None):
self.session = requests.Session()
# 2. Configure Session with Proxy & SSL settings immediately
if proxies:
self.session.proxies.update(proxies)
self.session.verify = False # Ignore SSL for proxies
self.proxies_enabled = True
else:
self.proxies_enabled = False
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
})
self.access_token = None
self.logger = logger_func
# --- DEDUPLICATION LOGIC ---
self.logged_waits = set()
self.log_lock = threading.Lock()
def authenticate(self):
"""Authenticates using client credentials flow."""
try:
url = "https://www.deviantart.com/oauth2/token"
data = {
"grant_type": "client_credentials",
"client_id": self.CLIENT_ID,
"client_secret": self.CLIENT_SECRET
}
# 3. Smart timeout (longer if proxy)
req_timeout = 30 if self.proxies_enabled else 10
resp = self.session.post(url, data=data, timeout=req_timeout)
resp.raise_for_status()
data = resp.json()
self.access_token = data.get("access_token")
return True
except Exception as e:
self.logger(f"DA Auth Error: {e}")
return False
def _api_call(self, endpoint, params=None):
if not self.access_token:
if not self.authenticate():
raise Exception("Authentication failed")
url = f"{self.BASE_API}{endpoint}"
params = params or {}
params['access_token'] = self.access_token
params['mature_content'] = 'true'
retries = 0
max_retries = 4
backoff_delay = 2
# 4. Smart timeout
req_timeout = 30 if self.proxies_enabled else 20
while True:
try:
resp = self.session.get(url, params=params, timeout=req_timeout)
# 429: Rate Limit
if resp.status_code == 429:
retry_after = resp.headers.get('Retry-After')
if retry_after:
sleep_time = int(retry_after) + 2 # Add buffer
else:
# 5. Increase default wait time for 429s
sleep_time = 15
self._log_once(sleep_time, f" [DeviantArt] ⚠️ Rate limit (429). Sleeping {sleep_time}s...")
time.sleep(sleep_time)
continue
# 401: Token Expired (Refresh and Retry)
if resp.status_code == 401:
self.logger(" [DeviantArt] Token expired. Refreshing...")
if self.authenticate():
params['access_token'] = self.access_token
continue
else:
raise Exception("Failed to refresh token")
if 400 <= resp.status_code < 500:
resp.raise_for_status()
if 500 <= resp.status_code < 600:
resp.raise_for_status()
resp.raise_for_status()
with self.log_lock:
self.logged_waits.clear()
return resp.json()
except requests.exceptions.HTTPError as e:
if e.response is not None and 400 <= e.response.status_code < 500:
raise e
pass
except requests.exceptions.RequestException as e:
if retries < max_retries:
self._log_once("conn_error", f" [DeviantArt] Connection error: {e}. Retrying...")
time.sleep(backoff_delay)
retries += 1
continue
raise e
def _log_once(self, key, message):
"""Helper to avoid spamming the same log message during loops."""
should_log = False
with self.log_lock:
if key not in self.logged_waits:
self.logged_waits.add(key)
should_log = True
if should_log:
self.logger(message)
def get_deviation_uuid(self, url):
"""Scrapes the deviation page to find the UUID."""
try:
req_timeout = 30 if self.proxies_enabled else 15
resp = self.session.get(url, timeout=req_timeout)
match = re.search(r'"deviationUuid":"([^"]+)"', resp.text)
if match:
return match.group(1)
match = re.search(r'-(\d+)$', url)
if match:
return match.group(1)
except Exception as e:
self.logger(f"Error scraping UUID: {e}")
return None
def get_deviation_content(self, uuid):
"""Fetches download info."""
try:
data = self._api_call(f"/deviation/download/{uuid}")
if 'src' in data:
return data
except:
pass
try:
meta = self._api_call(f"/deviation/{uuid}")
if 'content' in meta:
return meta['content']
except:
pass
return None
def get_gallery_folder(self, username, offset=0, limit=24):
"""Fetches items from a user's gallery."""
return self._api_call("/gallery/all", {"username": username, "offset": offset, "limit": limit})
@staticmethod
def extract_info_from_url(url):
parsed = urlparse(url)
path = parsed.path.strip('/')
parts = path.split('/')
if len(parts) >= 3 and parts[1] == 'art':
return 'post', parts[0], parts[2]
elif len(parts) >= 2 and parts[1] == 'gallery':
return 'gallery', parts[0], None
elif len(parts) == 1:
return 'gallery', parts[0], None
return None, None, None

0
src/core/hentaifox.txt Normal file
View File

View File

@@ -0,0 +1,60 @@
import requests
import re
from bs4 import BeautifulSoup # Optional, but regex is faster for this specific site
# Logic derived from NHdownloader.sh 'hentaifox' function
BASE_URL = "https://hentaifox.com"
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Referer": "https://hentaifox.com/",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
}
def get_gallery_id(url_or_id):
"""Extracts numbers from URL or returns the ID string."""
match = re.search(r"(\d+)", str(url_or_id))
return match.group(1) if match else None
def get_gallery_metadata(gallery_id):
"""
Fetches the main gallery page to get the Title and Total Pages.
Equivalent to the first part of the 'hentaifox' function in .sh file.
"""
url = f"{BASE_URL}/gallery/{gallery_id}/"
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
html = response.text
# Extract Title (Bash: grep -o '<title>.*</title>')
title_match = re.search(r'<title>(.*?)</title>', html)
title = title_match.group(1).replace(" - HentaiFox", "").strip() if title_match else f"Gallery {gallery_id}"
# Extract Total Pages (Bash: grep -Eo 'Pages: [0-9]*')
pages_match = re.search(r'Pages: (\d+)', html)
if not pages_match:
raise ValueError("Could not find total pages count.")
total_pages = int(pages_match.group(1))
return {
"id": gallery_id,
"title": title,
"total_pages": total_pages
}
def get_image_link_for_page(gallery_id, page_num):
"""
Fetches the specific reader page to find the actual image URL.
Equivalent to the loop in the 'hentaifox' function:
url="https://hentaifox.com/g/${id}/${i}/"
"""
url = f"{BASE_URL}/g/{gallery_id}/{page_num}/"
response = requests.get(url, headers=HEADERS)
# Extract image source (Bash: grep -Eo 'data-src="..."')
# Regex looks for: data-src="https://..."
match = re.search(r'data-src="(https://[^"]+)"', response.text)
if match:
return match.group(1)
return None

View File

@@ -3,7 +3,7 @@ import time
import os
import json
import traceback
from concurrent.futures import ThreadPoolExecutor, as_completed, Future
from concurrent.futures import ThreadPoolExecutor, as_completed, Future, CancelledError
from .api_client import download_from_api
from .workers import PostProcessorWorker
from ..config.constants import (
@@ -84,8 +84,18 @@ class DownloadManager:
is_single_post = bool(config.get('target_post_id_from_initial_url'))
use_multithreading = config.get('use_multithreading', True)
is_manga_sequential = config.get('manga_mode_active') and config.get('manga_filename_style') in [STYLE_DATE_BASED, STYLE_POST_TITLE_GLOBAL_NUMBERING]
# --- FIXED LOGIC: Strict check for sequential fetch modes ---
# Only "Date Based" and "Title + Global Numbering" require fetching the full list first.
# "Custom", "Date + Title", "Original Name", and "Post ID" will now use the pool (streaming).
sequential_styles = [STYLE_DATE_BASED, STYLE_POST_TITLE_GLOBAL_NUMBERING]
is_manga_sequential = (
config.get('manga_mode_active') and
config.get('manga_filename_style') in sequential_styles
)
# If it is NOT a strictly sequential manga mode, we use the pool (fetch-as-we-go)
should_use_multithreading_for_posts = use_multithreading and not is_single_post and not is_manga_sequential
if should_use_multithreading_for_posts:
@@ -97,12 +107,34 @@ class DownloadManager:
fetcher_thread.start()
else:
# Single-threaded mode does not use the manager's complex logic
self._log(" Manager is handing off to a single-threaded worker...")
self._log(" Manager is handing off to a single-threaded worker (Sequential Mode)...")
# The single-threaded worker will manage its own lifecycle and signals.
# The manager's role for this session is effectively over.
self.is_running = False # Allow another session to start if needed
self.progress_queue.put({'type': 'handoff_to_single_thread', 'payload': (config,)})
def _get_proxies_from_config(self, config):
"""Constructs the proxy dictionary from the config."""
if not config.get('proxy_enabled'):
return None
host = config.get('proxy_host')
port = config.get('proxy_port')
if not host or not port:
return None
proxy_str = f"http://{host}:{port}"
# Add auth if provided
user = config.get('proxy_username')
password = config.get('proxy_password')
if user and password:
proxy_str = f"http://{user}:{password}@{host}:{port}"
return {
"http": proxy_str,
"https": proxy_str
}
def _fetch_and_queue_posts_for_pool(self, config, restore_data, creator_profile_data):
"""
@@ -117,6 +149,9 @@ class DownloadManager:
session_processed_ids = set(restore_data.get('processed_post_ids', [])) if restore_data else set()
profile_processed_ids = set(creator_profile_data.get('processed_post_ids', []))
processed_ids = session_processed_ids.union(profile_processed_ids)
# Helper to get proxies
proxies = self._get_proxies_from_config(config)
if restore_data and 'all_posts_data' in restore_data:
# This logic for session restore remains as it relies on a pre-fetched list
@@ -132,127 +167,113 @@ class DownloadManager:
return
for post_data in posts_to_process:
if self.cancellation_event.is_set(): break
worker = PostProcessorWorker(post_data, config, self.progress_queue)
if self.cancellation_event.is_set():
break
worker_args = self._map_config_to_worker_args(post_data, config)
# Manually inject proxies here if _map_config_to_worker_args didn't catch it (though it should)
worker_args['proxies'] = proxies
worker = PostProcessorWorker(**worker_args)
future = self.thread_pool.submit(worker.process)
future.add_done_callback(self._handle_future_result)
self.active_futures.append(future)
else:
# --- START: REFACTORED STREAMING LOGIC ---
# --- Streaming Logic ---
if proxies:
self._log(f" 🌐 Using Proxy: {config.get('proxy_host')}:{config.get('proxy_port')}")
post_generator = download_from_api(
api_url_input=config['api_url'],
logger=self._log,
start_page=config.get('start_page'),
end_page=config.get('end_page'),
manga_mode=config.get('manga_mode_active', False),
cancellation_event=self.cancellation_event,
pause_event=self.pause_event,
use_cookie=config.get('use_cookie', False),
cookie_text=config.get('cookie_text', ''),
selected_cookie_file=config.get('selected_cookie_file'),
app_base_dir=config.get('app_base_dir'),
manga_filename_style_for_sort_check=config.get('manga_filename_style'),
processed_post_ids=list(processed_ids)
cookies_dict=None, # Cookie handling handled inside client if needed
proxies=proxies # <--- NEW: Pass proxies to API client
)
self.total_posts = 0
self.processed_posts = 0
# Process posts in batches as they are yielded by the API client
for batch in post_generator:
for post_batch in post_generator:
if self.cancellation_event.is_set():
self._log(" Post fetching cancelled.")
break
# Filter out any posts that might have been processed since the start
posts_in_batch_to_process = [p for p in batch if p.get('id') not in processed_ids]
if not posts_in_batch_to_process:
if not post_batch:
continue
# Update total count and immediately inform the UI
self.total_posts += len(posts_in_batch_to_process)
self.progress_queue.put({'type': 'overall_progress', 'payload': (self.total_posts, self.processed_posts)})
new_posts_batch = [p for p in post_batch if p.get('id') not in processed_ids]
if not new_posts_batch:
continue
for post_data in posts_in_batch_to_process:
if self.cancellation_event.is_set(): break
worker = PostProcessorWorker(post_data, config, self.progress_queue)
# Update total posts dynamically as we find them
self.total_posts += len(new_posts_batch)
for post_data in new_posts_batch:
if self.cancellation_event.is_set():
break
# MAPPING CONFIG TO WORKER ARGS
worker_args = self._map_config_to_worker_args(post_data, config)
worker = PostProcessorWorker(**worker_args)
future = self.thread_pool.submit(worker.process)
future.add_done_callback(self._handle_future_result)
self.active_futures.append(future)
if self.total_posts == 0 and not self.cancellation_event.is_set():
self._log("✅ No new posts found to process.")
# Small sleep to prevent UI freeze
time.sleep(0.01)
except Exception as e:
self._log(f"❌ CRITICAL ERROR in post fetcher thread: {e}")
self._log(traceback.format_exc())
self._log(f"❌ Critical Error in Fetcher Thread: {e}")
traceback.print_exc()
finally:
if self.thread_pool:
self.thread_pool.shutdown(wait=True)
self.is_running = False
self._log("🏁 All processing tasks have completed or been cancelled.")
self.progress_queue.put({
'type': 'finished',
'payload': (self.total_downloads, self.total_skips, self.cancellation_event.is_set(), self.all_kept_original_filenames)
})
self.is_running = False # Mark as not running so we can finish
# The main window checks active futures, so we just exit this thread.
def _handle_future_result(self, future: Future):
"""Callback executed when a worker task completes."""
if self.cancellation_event.is_set():
return
with threading.Lock(): # Protect shared counters
self.processed_posts += 1
try:
if future.cancelled():
self._log("⚠️ A post processing task was cancelled.")
self.total_skips += 1
else:
result = future.result()
(dl_count, skip_count, kept_originals,
retryable, permanent, history) = result
self.total_downloads += dl_count
self.total_skips += skip_count
self.all_kept_original_filenames.extend(kept_originals)
if retryable:
self.progress_queue.put({'type': 'retryable_failure', 'payload': (retryable,)})
if permanent:
self.progress_queue.put({'type': 'permanent_failure', 'payload': (permanent,)})
if history:
self.progress_queue.put({'type': 'post_processed_history', 'payload': (history,)})
post_id = history.get('post_id')
if post_id and self.current_creator_profile_path:
profile_data = self._setup_creator_profile({'creator_name_for_profile': self.current_creator_name_for_profile, 'session_file_path': self.session_file_path})
if post_id not in profile_data.get('processed_post_ids', []):
profile_data.setdefault('processed_post_ids', []).append(post_id)
self._save_creator_profile(profile_data)
except Exception as e:
self._log(f"❌ Worker task resulted in an exception: {e}")
self.total_skips += 1 # Count errored posts as skipped
self.progress_queue.put({'type': 'overall_progress', 'payload': (self.total_posts, self.processed_posts)})
def _map_config_to_worker_args(self, post_data, config):
"""Helper to map the flat config dict to PostProcessorWorker arguments."""
# Get proxy dict
proxies = self._get_proxies_from_config(config)
# This mirrors the arguments in workers.py PostProcessorWorker.__init__
return {
'post_data': post_data,
'download_root': config.get('output_dir'),
'known_names': [], # If needed, pass KNOWN_NAMES or load them
'filter_character_list': [], # Parsed filters if available in config
'emitter': self.progress_queue,
'unwanted_keywords': set(), # Parse if needed
'filter_mode': config.get('filter_mode'),
'skip_zip': config.get('skip_zip'),
'use_subfolders': config.get('use_subfolders'),
'use_post_subfolders': config.get('use_post_subfolders'),
'target_post_id_from_initial_url': config.get('target_post_id_from_initial_url'),
'custom_folder_name': config.get('custom_folder_name'),
'compress_images': config.get('compress_images'),
'download_thumbnails': config.get('download_thumbnails'),
'service': config.get('service') or 'unknown',
'user_id': config.get('user_id') or 'unknown',
'pause_event': self.pause_event,
'api_url_input': config.get('api_url'),
'cancellation_event': self.cancellation_event,
'downloaded_files': None,
'downloaded_file_hashes': None,
'downloaded_files_lock': None,
'downloaded_file_hashes_lock': None,
'manga_mode_active': config.get('manga_mode_active'),
'manga_filename_style': config.get('manga_filename_style'),
'manga_custom_filename_format': config.get('custom_manga_filename_format', "{published} {title}"),
'manga_custom_date_format': config.get('manga_custom_date_format', "YYYY-MM-DD"),
'use_multithreading': config.get('use_multithreading', True),
'proxies': proxies, # <--- NEW: Pass proxies to worker
}
def _setup_creator_profile(self, config):
"""Prepares the path and loads data for the current creator's profile."""
self.current_creator_name_for_profile = config.get('creator_name_for_profile')
if not self.current_creator_name_for_profile:
self._log("⚠️ Cannot create creator profile: Name not provided in config.")
return {}
appdata_dir = os.path.dirname(config.get('session_file_path', '.'))
self.creator_profiles_dir = os.path.join(appdata_dir, "creator_profiles")
os.makedirs(self.creator_profiles_dir, exist_ok=True)
safe_filename = clean_folder_name(self.current_creator_name_for_profile) + ".json"
self.current_creator_profile_path = os.path.join(self.creator_profiles_dir, safe_filename)
if os.path.exists(self.current_creator_profile_path):
try:
with open(self.current_creator_profile_path, 'r', encoding='utf-8') as f:
return json.load(f)
except (json.JSONDecodeError, OSError) as e:
self._log(f"❌ Error loading creator profile '{safe_filename}': {e}. Starting fresh.")
# Extract name logic here or assume config has it
self.current_creator_name_for_profile = "Unknown"
# You should ideally extract name from URL or config here if available
return {}
def _save_creator_profile(self, data):
@@ -280,6 +301,33 @@ class DownloadManager:
self.cancellation_event.set()
if self.thread_pool:
self._log(" Signaling all worker threads to stop and shutting down pool...")
self.thread_pool.shutdown(wait=False)
self.thread_pool.shutdown(wait=False, cancel_futures=True)
def _handle_future_result(self, future):
"""Callback for when a worker task finishes."""
if self.active_futures:
try:
self.active_futures.remove(future)
except ValueError:
pass
try:
result = future.result()
# result tuple: (download_count, skip_count, kept_original_filenames, ...)
if result:
self.total_downloads += result[0]
self.total_skips += result[1]
if len(result) > 3 and result[3]:
# filename was kept original
pass
except CancelledError:
pass
except Exception as e:
self._log(f"❌ Worker Error: {e}")
self.processed_posts += 1
self.progress_queue.put({'type': 'overall_progress', 'payload': (self.total_posts, self.processed_posts)})
if not self.active_futures and not self.is_running:
self._log("✅ All tasks completed.")
self.progress_queue.put({'type': 'worker_finished', 'payload': (self.total_downloads, self.total_skips, [], [])})

View File

@@ -1,31 +1,35 @@
import requests
import cloudscraper
import json
def fetch_nhentai_gallery(gallery_id, logger=print):
# 1. Update arguments to accept proxies=None
def fetch_nhentai_gallery(gallery_id, logger=print, proxies=None):
"""
Fetches the metadata for a single nhentai gallery using cloudscraper to bypass Cloudflare.
Args:
gallery_id (str or int): The ID of the nhentai gallery.
logger (function): A function to log progress and error messages.
Returns:
dict: A dictionary containing the gallery's metadata if successful, otherwise None.
Fetches the metadata for a single nhentai gallery.
Switched to standard requests to support proxies with self-signed certs.
"""
api_url = f"https://nhentai.net/api/gallery/{gallery_id}"
scraper = cloudscraper.create_scraper()
# 2. Use a real User-Agent to avoid immediate blocking
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
}
logger(f" Fetching nhentai gallery metadata from: {api_url}")
# 3. Smart timeout logic
req_timeout = (30, 120) if proxies else 20
try:
# Use the scraper to make the GET request
response = scraper.get(api_url, timeout=20)
# 4. Use requests.get with proxies, verify=False, and timeout
response = requests.get(api_url, headers=headers, timeout=req_timeout, proxies=proxies, verify=False)
if response.status_code == 404:
logger(f" ❌ Gallery not found (404): ID {gallery_id}")
return None
elif response.status_code == 403:
logger(f" ❌ Access Denied (403): Cloudflare blocked the request. Try a different proxy or User-Agent.")
return None
response.raise_for_status()
@@ -36,9 +40,9 @@ def fetch_nhentai_gallery(gallery_id, logger=print):
gallery_data['pages'] = gallery_data.pop('images')['pages']
return gallery_data
else:
logger(" ❌ API response is missing essential keys (id, media_id, or images).")
logger(" ❌ API response is missing essential keys (id, media_id, images).")
return None
except Exception as e:
logger(f"An error occurred while fetching gallery {gallery_id}: {e}")
logger(f"Error fetching nhentai metadata: {e}")
return None

View File

@@ -56,12 +56,14 @@ from ..utils.text_utils import (
match_folders_from_title, match_folders_from_filename_enhanced
)
from ..config.constants import *
from ..ui.dialogs.SinglePDF import create_individual_pdf
def robust_clean_name(name):
"""A more robust function to remove illegal characters for filenames and folders."""
if not name:
return ""
illegal_chars_pattern = r'[\x00-\x1f<>:"/\\|?*\'\[\]]'
# FIX: Removed \' from the list so apostrophes are kept
illegal_chars_pattern = r'[\x00-\x1f<>:"/\\|?*]'
cleaned_name = re.sub(illegal_chars_pattern, '', name)
cleaned_name = cleaned_name.strip(' .')
@@ -132,6 +134,9 @@ class PostProcessorWorker:
sfp_threshold=None,
handle_unknown_mode=False,
creator_name_cache=None,
add_info_in_pdf=False,
proxies=None
):
self.post = post_data
self.download_root = download_root
@@ -205,6 +210,9 @@ class PostProcessorWorker:
self.sfp_threshold = sfp_threshold
self.handle_unknown_mode = handle_unknown_mode
self.creator_name_cache = creator_name_cache
self.add_info_in_pdf = add_info_in_pdf
self.proxies = proxies
if self.compress_images and Image is None:
self.logger("⚠️ Image compression disabled: Pillow library not found.")
@@ -256,7 +264,7 @@ class PostProcessorWorker:
new_url = parsed_url._replace(netloc=new_domain).geturl()
try:
with requests.head(new_url, headers={'User-Agent': 'Mozilla/5.0'}, timeout=5, allow_redirects=True) as resp:
with requests.head(new_url, headers={'User-Agent': 'Mozilla/5.0'}, timeout=5, allow_redirects=True, proxies=self.proxies, verify=False) as resp:
if resp.status_code == 200:
return new_url
except requests.RequestException:
@@ -331,7 +339,8 @@ class PostProcessorWorker:
api_original_filename_for_size_check = file_info.get('_original_name_for_log', file_info.get('name'))
try:
# Use a stream=True HEAD request to get headers without downloading the body
with requests.head(file_url, headers=file_download_headers, timeout=15, cookies=cookies_to_use_for_file, allow_redirects=True) as head_response:
with requests.head(file_url, headers=file_download_headers, timeout=15, cookies=cookies_to_use_for_file, allow_redirects=True, proxies=self.proxies, verify=False) as head_response:
head_response.raise_for_status()
content_length = head_response.headers.get('Content-Length')
if content_length:
@@ -665,7 +674,7 @@ class PostProcessorWorker:
current_url_to_try = file_url
response = requests.get(current_url_to_try, headers=file_download_headers, timeout=(30, 300), stream=True, cookies=cookies_to_use_for_file)
response = requests.get(current_url_to_try, headers=file_download_headers, timeout=(30, 300), stream=True, cookies=cookies_to_use_for_file, proxies=self.proxies, verify=False)
if response.status_code == 403 and ('kemono.' in current_url_to_try or 'coomer.' in current_url_to_try):
self.logger(f" ⚠️ Got 403 Forbidden for '{api_original_filename}'. Attempting subdomain rotation...")
@@ -674,8 +683,7 @@ class PostProcessorWorker:
self.logger(f" Retrying with new URL: {new_url}")
file_url = new_url
response.close() # Close the old response
response = requests.get(new_url, headers=file_download_headers, timeout=(30, 300), stream=True, cookies=cookies_to_use_for_file)
response = requests.get(new_url, headers=file_download_headers, timeout=(30, 300), stream=True, cookies=cookies_to_use_for_file, proxies=self.proxies, verify=False)
response.raise_for_status()
# --- REVISED AND MOVED SIZE CHECK LOGIC ---
@@ -974,6 +982,92 @@ class PostProcessorWorker:
else:
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_RETRYABLE_LATER, details_for_failure
def _get_manga_style_filename_for_post(self, post_title, original_ext):
"""Generates a filename based on manga style, using post data."""
if self.manga_filename_style == STYLE_POST_TITLE:
cleaned_post_title_base = robust_clean_name(post_title.strip() if post_title and post_title.strip() else "post")
return f"{cleaned_post_title_base}{original_ext}"
elif self.manga_filename_style == STYLE_CUSTOM:
try:
def format_date(date_str):
if not date_str or 'NoDate' in date_str:
return "NoDate"
try:
dt_obj = datetime.fromisoformat(date_str)
strftime_format = self.manga_custom_date_format.replace("YYYY", "%Y").replace("MM", "%m").replace("DD", "%d")
return dt_obj.strftime(strftime_format)
except (ValueError, TypeError):
return date_str.split('T')[0]
service = self.service.lower()
user_id = str(self.user_id)
creator_name = self.creator_name_cache.get((service, user_id), user_id)
added_date = self.post.get('added')
published_date = self.post.get('published')
edited_date = self.post.get('edited')
format_values = {
'id': str(self.post.get('id', '')),
'user': user_id,
'creator_name': creator_name,
'service': self.service,
'title': str(self.post.get('title', '')),
'name': robust_clean_name(post_title), # Use post title as a fallback 'name'
'added': format_date(added_date or published_date),
'published': format_date(published_date),
'edited': format_date(edited_date or published_date)
}
custom_base_name = self.manga_custom_filename_format.format(**format_values)
cleaned_custom_name = robust_clean_name(custom_base_name)
return f"{cleaned_custom_name}{original_ext}"
except (KeyError, IndexError, ValueError) as e:
self.logger(f"⚠️ Custom format error for text export: {e}. Falling back to post title.")
return f"{robust_clean_name(post_title.strip() or 'untitled_post')}{original_ext}"
elif self.manga_filename_style == STYLE_DATE_POST_TITLE:
published_date_str = self.post.get('published')
added_date_str = self.post.get('added')
formatted_date_str = "nodate"
if published_date_str:
try:
formatted_date_str = published_date_str.split('T')[0]
except Exception:
pass
elif added_date_str:
try:
formatted_date_str = added_date_str.split('T')[0]
except Exception:
pass
cleaned_post_title_for_filename = robust_clean_name(post_title.strip() or "post")
base_name_for_style = f"{formatted_date_str}_{cleaned_post_title_for_filename}"
return f"{base_name_for_style}{original_ext}"
elif self.manga_filename_style == STYLE_POST_ID:
post_id = str(self.post.get('id', 'unknown_id'))
return f"{post_id}{original_ext}"
elif self.manga_filename_style == STYLE_ORIGINAL_NAME:
published_date_str = self.post.get('published') or self.post.get('added')
formatted_date_str = "nodate"
if published_date_str:
try:
formatted_date_str = published_date_str.split('T')[0]
except Exception:
pass
# Use post title as the name part, as there is no "original filename" for the text export.
cleaned_post_title_base = robust_clean_name(post_title.strip() or "untitled_post")
return f"{formatted_date_str}_{cleaned_post_title_base}{original_ext}"
# Default fallback
return f"{robust_clean_name(post_title.strip() or 'untitled_post')}{original_ext}"
def process(self):
result_tuple = (0, 0, [], [], [], None, None)
try:
@@ -1011,8 +1105,8 @@ class PostProcessorWorker:
'Referer': creator_page_url,
'Accept': 'text/css'
}
cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain)
full_post_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies)
cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain)
full_post_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies, proxies=self.proxies)
if full_post_data:
self.logger(" ✅ Full post data fetched successfully.")
self.post = full_post_data
@@ -1213,13 +1307,17 @@ class PostProcessorWorker:
if not any(d in api_domain_for_comments.lower() for d in ['kemono.su', 'kemono.party', 'kemono.cr', 'coomer.su', 'coomer.party', 'coomer.st']):
self.logger(f"⚠️ Unrecognized domain '{api_domain_for_comments}' for comment API. Defaulting based on service.")
api_domain_for_comments = "kemono.cr" if "kemono" in self.service.lower() else "coomer.st"
# Fetch comments (Indented correctly now)
comments_data = fetch_post_comments(
api_domain_for_comments, self.service, self.user_id, post_id,
headers, self.logger, self.cancellation_event, self.pause_event,
cookies_dict=prepare_cookies_for_request(
self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger
)
),
proxies=self.proxies
)
if comments_data:
self.logger(f" Fetched {len(comments_data)} comments for post {post_id}.")
for comment_item_idx, comment_item in enumerate(comments_data):
@@ -1246,8 +1344,8 @@ class PostProcessorWorker:
except RuntimeError as e_fetch_comment:
self.logger(f" ⚠️ Error fetching or processing comments for post {post_id}: {e_fetch_comment}")
except Exception as e_generic_comment:
self.logger(f" ❌ Unexpected error during comment processing for post {post_id}: {e_generic_comment}\n{traceback.format_exc(limit=2)}")
self.logger(f" [Char Scope: Comments] Phase 2 Result: post_is_candidate_by_comment_char_match = {post_is_candidate_by_comment_char_match}")
self.logger(f" ❌ Unexpected error during comment processing for post {post_id}: {e_generic_comment}\n{traceback.format_exc(limit=2)}")
else:
self.logger(f" [Char Scope: Comments] Phase 2: Skipped comment check for post ID '{post_id}' because a file match already made it a candidate.")
@@ -1269,6 +1367,8 @@ class PostProcessorWorker:
if self.filter_mode == 'text_only' and not self.extract_links_only:
self.logger(f" Mode: Text Only (Scope: {self.text_only_scope})")
post_title_lower = post_title.lower()
# --- Skip Words Check ---
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
for skip_word in self.skip_words_list:
if skip_word.lower() in post_title_lower:
@@ -1287,6 +1387,7 @@ class PostProcessorWorker:
comments_data = []
final_post_data = post_data
# --- Content Fetching ---
if self.text_only_scope == 'content' and 'content' not in final_post_data:
self.logger(f" Post {post_id} is missing 'content' field, fetching full data...")
parsed_url = urlparse(self.api_url_input)
@@ -1304,6 +1405,8 @@ class PostProcessorWorker:
api_domain = parsed_url.netloc
comments_data = fetch_post_comments(api_domain, self.service, self.user_id, post_id, headers, self.logger, self.cancellation_event, self.pause_event)
if comments_data:
# For TXT/DOCX export, we format comments here.
# For PDF, we pass the raw list to the generator.
comment_texts = []
for comment in comments_data:
user = comment.get('commenter_name', 'Unknown User')
@@ -1335,23 +1438,43 @@ class PostProcessorWorker:
self._emit_signal('worker_finished', result_tuple)
return result_tuple
# --- Metadata Preparation ---
# Prepare all data needed for the info page or JSON dump
service_str = self.service
user_id_str = str(self.user_id)
post_id_str = str(post_id)
creator_key = (service_str.lower(), user_id_str)
# Resolve creator name using the cache passed from main_window
creator_name = user_id_str
if self.creator_name_cache:
creator_name = self.creator_name_cache.get(creator_key, user_id_str)
common_content_data = {
'title': post_title,
'published': self.post.get('published') or self.post.get('added'),
'service': service_str,
'user': user_id_str,
'id': post_id_str,
'tags': self.post.get('tags'),
'original_link': post_page_url,
'creator_name': creator_name
}
# --- Single PDF Mode (Save Temp JSON) ---
if self.single_pdf_mode:
content_data = {
'title': post_title,
'published': self.post.get('published') or self.post.get('added')
}
if self.text_only_scope == 'comments':
if not comments_data:
result_tuple = (0, 0, [], [], [], None, None)
self._emit_signal('worker_finished', result_tuple)
return result_tuple
content_data['comments'] = comments_data
common_content_data['comments'] = comments_data
else:
if not cleaned_text.strip():
result_tuple = (0, 0, [], [], [], None, None)
self._emit_signal('worker_finished', result_tuple)
return result_tuple
content_data['content'] = cleaned_text
common_content_data['content'] = cleaned_text
temp_dir = os.path.join(self.app_base_dir, "appdata")
os.makedirs(temp_dir, exist_ok=True)
@@ -1359,7 +1482,7 @@ class PostProcessorWorker:
temp_filepath = os.path.join(temp_dir, temp_filename)
try:
with open(temp_filepath, 'w', encoding='utf-8') as f:
json.dump(content_data, f, indent=2)
json.dump(common_content_data, f, indent=2)
self.logger(f" Saved temporary data for '{post_title}' for single PDF compilation.")
result_tuple = (0, 0, [], [], [], None, temp_filepath)
self._emit_signal('worker_finished', result_tuple)
@@ -1369,82 +1492,67 @@ class PostProcessorWorker:
result_tuple = (0, 0, [], [], [], None, None)
self._emit_signal('worker_finished', result_tuple)
return result_tuple
# --- Individual File Mode ---
else:
file_extension = self.text_export_format
txt_filename = clean_filename(post_title) + f".{file_extension}"
txt_filename = ""
if self.manga_mode_active:
txt_filename = self._get_manga_style_filename_for_post(post_title, f".{file_extension}")
self.logger(f" Applying Renaming Mode. Generated filename: '{txt_filename}'")
else:
txt_filename = clean_filename(post_title) + f".{file_extension}"
final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename)
try:
os.makedirs(determined_post_save_path_for_history, exist_ok=True)
base, ext = os.path.splitext(final_save_path)
base, ext = os.path.splitext(final_save_path)
counter = 1
while os.path.exists(final_save_path):
final_save_path = f"{base}_{counter}{ext}"
counter += 1
# --- PDF Generation ---
if file_extension == 'pdf':
if FPDF:
self.logger(f" Creating formatted PDF for {'comments' if self.text_only_scope == 'comments' else 'content'}...")
pdf = PDF()
base_path = self.project_root_dir
font_path = ""
bold_font_path = ""
if base_path:
font_path = os.path.join(base_path, 'data', 'dejavu-sans', 'DejaVuSans.ttf')
bold_font_path = os.path.join(base_path, 'data', 'dejavu-sans', 'DejaVuSans-Bold.ttf')
try:
if not os.path.exists(font_path): raise RuntimeError(f"Font file not found: {font_path}")
if not os.path.exists(bold_font_path): raise RuntimeError(f"Bold font file not found: {bold_font_path}")
pdf.add_font('DejaVu', '', font_path, uni=True)
pdf.add_font('DejaVu', 'B', bold_font_path, uni=True)
default_font_family = 'DejaVu'
except Exception as font_error:
self.logger(f" ⚠️ Could not load DejaVu font: {font_error}. Falling back to Arial.")
default_font_family = 'Arial'
pdf.add_page()
pdf.set_font(default_font_family, 'B', 16)
pdf.multi_cell(0, 10, post_title)
pdf.ln(10)
if self.text_only_scope == 'comments':
if not comments_data:
self.logger(" -> Skip PDF Creation: No comments to process.")
result_tuple = (0, num_potential_files_in_post, [], [], [], None, None)
self._emit_signal('worker_finished', result_tuple)
return result_tuple
for i, comment in enumerate(comments_data):
user = comment.get('commenter_name', 'Unknown User')
timestamp = comment.get('published', 'No Date')
body = strip_html_tags(comment.get('content', ''))
pdf.set_font(default_font_family, '', 10)
pdf.write(8, "Comment by: ")
pdf.set_font(default_font_family, 'B', 10)
pdf.write(8, user)
pdf.set_font(default_font_family, '', 10)
pdf.write(8, f" on {timestamp}")
pdf.ln(10)
pdf.set_font(default_font_family, '', 11)
pdf.multi_cell(0, 7, body)
if i < len(comments_data) - 1:
pdf.ln(5)
pdf.cell(0, 0, '', border='T')
pdf.ln(5)
else:
pdf.set_font(default_font_family, '', 12)
pdf.multi_cell(0, 7, cleaned_text)
pdf.output(final_save_path)
# Font setup
font_path = ""
if self.project_root_dir:
font_path = os.path.join(self.project_root_dir, 'data', 'dejavu-sans', 'DejaVuSans.ttf')
# Add content specific fields for the generator
if self.text_only_scope == 'comments':
common_content_data['comments_list_for_pdf'] = comments_data
else:
self.logger(f" ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.")
final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
common_content_data['content_text_for_pdf'] = cleaned_text
# Call the centralized function
success = create_individual_pdf(
post_data=common_content_data,
output_filename=final_save_path,
font_path=font_path,
add_info_page=self.add_info_in_pdf, # <--- NEW PARAMETER
logger=self.logger
)
if not success:
raise Exception("PDF generation failed (check logs)")
# --- DOCX Generation ---
elif file_extension == 'docx':
if Document:
self.logger(f" Converting to DOCX...")
document = Document()
# Add simple header info if desired, or keep pure text
if self.add_info_in_pdf:
document.add_heading(post_title, 0)
document.add_paragraph(f"Date: {common_content_data['published']}")
document.add_paragraph(f"Creator: {common_content_data['creator_name']}")
document.add_paragraph(f"URL: {common_content_data['original_link']}")
document.add_page_break()
document.add_paragraph(cleaned_text)
document.save(final_save_path)
else:
@@ -1452,9 +1560,20 @@ class PostProcessorWorker:
final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
else: # TXT file
# --- TXT Generation ---
else:
content_to_write = cleaned_text
# Optional: Add simple text header if "Add Info" is checked
if self.add_info_in_pdf:
header = (f"Title: {post_title}\n"
f"Date: {common_content_data['published']}\n"
f"Creator: {common_content_data['creator_name']}\n"
f"URL: {common_content_data['original_link']}\n"
f"{'-'*40}\n\n")
content_to_write = header + cleaned_text
with open(final_save_path, 'w', encoding='utf-8') as f:
f.write(cleaned_text)
f.write(content_to_write)
self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'")
result_tuple = (1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None)
@@ -1467,6 +1586,7 @@ class PostProcessorWorker:
self._emit_signal('worker_finished', result_tuple)
return result_tuple
if not self.extract_links_only and self.manga_mode_active and current_character_filters and (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and not post_is_candidate_by_title_char_match:
self.logger(f" -> Skip Post (Renaming Mode with Title/Both Scope - No Title Char Match): Title '{post_title[:50]}' doesn't match filters.")
self._emit_signal('missed_character_post', post_title, "Renaming Mode: No title match for character filter (Title/Both scope)")
@@ -1480,12 +1600,11 @@ class PostProcessorWorker:
should_create_post_subfolder = self.use_post_subfolders
if (not self.use_post_subfolders and self.use_subfolders and
if (not self.use_post_subfolders and
self.sfp_threshold is not None and num_potential_files_in_post >= self.sfp_threshold):
self.logger(f" Post has {num_potential_files_in_post} files (≥{self.sfp_threshold}). Activating Subfolder per Post via [sfp] command.")
should_create_post_subfolder = True
base_folder_names_for_post_content = []
determined_post_save_path_for_history = self.override_output_dir if self.override_output_dir else self.download_root
if not self.extract_links_only and self.use_subfolders:
@@ -2212,9 +2331,10 @@ class DownloadThread(QThread):
manga_custom_filename_format="{published} {title}",
manga_custom_date_format="YYYY-MM-DD" ,
sfp_threshold=None,
creator_name_cache=None
creator_name_cache=None,
proxies=None
):
super().__init__()
self.api_url_input = api_url_input
self.output_dir = output_dir
@@ -2289,6 +2409,7 @@ class DownloadThread(QThread):
self.domain_override = domain_override
self.sfp_threshold = sfp_threshold
self.creator_name_cache = creator_name_cache
self.proxies = proxies
if self.compress_images and Image is None:
self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
@@ -2322,6 +2443,7 @@ class DownloadThread(QThread):
self.logger(" Starting post fetch (single-threaded download process)...")
# --- FIX: Removed duplicate proxies argument here ---
post_generator = download_from_api(
self.api_url_input,
logger=self.logger,
@@ -2336,9 +2458,11 @@ class DownloadThread(QThread):
app_base_dir=self.app_base_dir,
manga_filename_style_for_sort_check=self.manga_filename_style if self.manga_mode_active else None,
processed_post_ids=self.processed_post_ids_set,
fetch_all_first=self.fetch_first
fetch_all_first=self.fetch_first,
proxies=self.proxies
)
processed_count_for_delay = 0
for posts_batch_data in post_generator:
if self.isInterruptionRequested():
was_process_cancelled = True
@@ -2349,6 +2473,11 @@ class DownloadThread(QThread):
was_process_cancelled = True
break
processed_count_for_delay += 1
if processed_count_for_delay > 0 and processed_count_for_delay % 50 == 0:
self.logger(" ⏳ Safety Pause: Waiting 10 seconds to respect server rate limits...")
time.sleep(10)
worker_args = {
'post_data': individual_post_data,
'emitter': worker_signals_obj,
@@ -2417,7 +2546,8 @@ class DownloadThread(QThread):
'archive_only_mode': self.archive_only_mode,
'manga_custom_filename_format': self.manga_custom_filename_format,
'manga_custom_date_format': self.manga_custom_date_format,
'sfp_threshold': self.sfp_threshold
'sfp_threshold': self.sfp_threshold,
'proxies': self.proxies
}
post_processing_worker = PostProcessorWorker(**worker_args)

View File

@@ -19,12 +19,14 @@ class AllcomicDownloadThread(QThread):
finished_signal = pyqtSignal(int, int, bool)
overall_progress_signal = pyqtSignal(int, int)
def __init__(self, url, output_dir, parent=None):
# 1. Update __init__ to accept proxies
def __init__(self, url, output_dir, parent=None, proxies=None):
super().__init__(parent)
self.comic_url = url
self.output_dir = output_dir
self.is_cancelled = False
self.pause_event = parent.pause_event if hasattr(parent, 'pause_event') else threading.Event()
self.proxies = proxies # Store the proxies
def _check_pause(self):
if self.is_cancelled: return True
@@ -40,13 +42,19 @@ class AllcomicDownloadThread(QThread):
grand_total_dl = 0
grand_total_skip = 0
# Create the scraper session ONCE for the entire job
scraper = cloudscraper.create_scraper(
browser={'browser': 'firefox', 'platform': 'windows', 'desktop': True}
)
if self.proxies:
self.progress_signal.emit(f" 🌍 Network: Using Proxy {self.proxies}")
else:
self.progress_signal.emit(" 🌍 Network: Direct Connection (No Proxy)")
# Pass the scraper to the function
chapters_to_download = allcomic_get_list(scraper, self.comic_url, self.progress_signal.emit)
scraper = requests.Session()
scraper.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
})
# 2. Pass self.proxies to get_chapter_list
chapters_to_download = allcomic_get_list(scraper, self.comic_url, self.progress_signal.emit, proxies=self.proxies)
if not chapters_to_download:
chapters_to_download = [self.comic_url]
@@ -57,8 +65,9 @@ class AllcomicDownloadThread(QThread):
if self._check_pause(): break
self.progress_signal.emit(f"\n-- Processing Chapter {chapter_idx + 1}/{len(chapters_to_download)} --")
# Pass the scraper to the function
comic_title, chapter_title, image_urls = allcomic_fetch_data(scraper, chapter_url, self.progress_signal.emit)
# 3. Pass self.proxies to fetch_chapter_data
comic_title, chapter_title, image_urls = allcomic_fetch_data(scraper, chapter_url, self.progress_signal.emit, proxies=self.proxies)
if not image_urls:
self.progress_signal.emit(f"❌ Failed to get data for chapter. Skipping.")
@@ -80,6 +89,9 @@ class AllcomicDownloadThread(QThread):
self.overall_progress_signal.emit(total_files_in_chapter, 0)
headers = {'Referer': chapter_url}
# 4. Define smart timeout for images
img_timeout = (30, 120) if self.proxies else 60
for i, img_url in enumerate(image_urls):
if self._check_pause(): break
@@ -97,8 +109,9 @@ class AllcomicDownloadThread(QThread):
if self._check_pause(): break
try:
self.progress_signal.emit(f" Downloading ({i+1}/{total_files_in_chapter}): '{filename}' (Attempt {attempt + 1})...")
# Use the persistent scraper object
response = scraper.get(img_url, stream=True, headers=headers, timeout=60)
# 5. Use proxies, verify=False, and new timeout
response = scraper.get(img_url, stream=True, headers=headers, timeout=img_timeout, proxies=self.proxies, verify=False)
response.raise_for_status()
with open(filepath, 'wb') as f:
@@ -125,7 +138,7 @@ class AllcomicDownloadThread(QThread):
grand_total_skip += 1
self.overall_progress_signal.emit(total_files_in_chapter, i + 1)
time.sleep(0.5) # Increased delay between images for this site
time.sleep(0.5)
if self._check_pause(): break

View File

@@ -0,0 +1,212 @@
import os
import time
import requests
import re
import random # Needed for random delays
from datetime import datetime
from PyQt5.QtCore import QThread, pyqtSignal
from ...core.deviantart_client import DeviantArtClient
from ...utils.file_utils import clean_folder_name
class DeviantArtDownloadThread(QThread):
progress_signal = pyqtSignal(str)
file_progress_signal = pyqtSignal(str, object)
overall_progress_signal = pyqtSignal(int, int)
finished_signal = pyqtSignal(int, int, bool, list)
# 1. Accept proxies in init
def __init__(self, url, output_dir, pause_event, cancellation_event, parent=None, proxies=None):
super().__init__(parent)
self.url = url
self.output_dir = output_dir
self.pause_event = pause_event
self.cancellation_event = cancellation_event
self.proxies = proxies # Store proxies
self.parent_app = parent
self.download_count = 0
self.skip_count = 0
def run(self):
self.client = DeviantArtClient(logger_func=self.progress_signal.emit, proxies=self.proxies)
if self.proxies:
self.progress_signal.emit(f" 🌍 Network: Using Proxy {self.proxies}")
else:
self.progress_signal.emit(" 🌍 Network: Direct Connection")
self.progress_signal.emit("=" * 40)
self.progress_signal.emit(f"🚀 Starting DeviantArt download for: {self.url}")
try:
if not self.client.authenticate():
self.progress_signal.emit("❌ Failed to authenticate with DeviantArt API.")
self.finished_signal.emit(0, 0, True, [])
return
mode, username, _ = self.client.extract_info_from_url(self.url)
if mode == 'post':
self._process_single_post(self.url)
elif mode == 'gallery':
self._process_gallery(username)
else:
self.progress_signal.emit("❌ Could not parse DeviantArt URL type.")
except Exception as e:
self.progress_signal.emit(f"❌ Error during download: {e}")
self.skip_count += 1
finally:
self.finished_signal.emit(self.download_count, self.skip_count, self.cancellation_event.is_set(), [])
def _check_pause_cancel(self):
if self.cancellation_event.is_set(): return True
while self.pause_event.is_set():
time.sleep(0.5)
if self.cancellation_event.is_set(): return True
return False
def _process_single_post(self, url):
self.progress_signal.emit(f" Fetching deviation info...")
uuid = self.client.get_deviation_uuid(url)
if not uuid:
self.progress_signal.emit("❌ Could not find Deviation UUID.")
self.skip_count += 1
return
meta = self.client._api_call(f"/deviation/{uuid}")
content = self.client.get_deviation_content(uuid)
if not content:
self.progress_signal.emit("❌ Could not retrieve download URL.")
self.skip_count += 1
return
self._download_file(content['src'], meta)
def _process_gallery(self, username):
self.progress_signal.emit(f" Fetching gallery for user: {username}...")
offset = 0
has_more = True
base_folder = os.path.join(self.output_dir, clean_folder_name(username))
if not os.path.exists(base_folder):
os.makedirs(base_folder, exist_ok=True)
while has_more:
if self._check_pause_cancel(): break
data = self.client.get_gallery_folder(username, offset=offset)
results = data.get('results', [])
has_more = data.get('has_more', False)
offset = data.get('next_offset')
if not results: break
for deviation in results:
if self._check_pause_cancel(): break
self._process_deviation_task(deviation, base_folder)
# 4. FIX 429: Add a small random delay between items
# This prevents hammering the API 24 times in a single second.
time.sleep(random.uniform(0.5, 1.2))
time.sleep(1)
def _process_deviation_task(self, deviation, base_folder):
if self._check_pause_cancel(): return
dev_id = deviation.get('deviationid')
title = deviation.get('title', 'Unknown')
try:
content = self.client.get_deviation_content(dev_id)
if content:
self._download_file(content['src'], deviation, override_dir=base_folder)
else:
self.skip_count += 1
except Exception as e:
self.progress_signal.emit(f" ❌ Error processing {title}: {e}")
self.skip_count += 1
def _format_date(self, timestamp):
if not timestamp: return "NoDate"
try:
fmt_setting = self.parent_app.manga_custom_date_format
strftime_fmt = fmt_setting.replace("YYYY", "%Y").replace("MM", "%m").replace("DD", "%d")
dt_obj = datetime.fromtimestamp(int(timestamp))
return dt_obj.strftime(strftime_fmt)
except Exception:
return "InvalidDate"
def _download_file(self, file_url, metadata, override_dir=None):
if self._check_pause_cancel(): return
parsed = requests.utils.urlparse(file_url)
path_filename = os.path.basename(parsed.path)
if '?' in path_filename: path_filename = path_filename.split('?')[0]
_, ext = os.path.splitext(path_filename)
title = metadata.get('title', 'Untitled')
safe_title = clean_folder_name(title)
if not safe_title: safe_title = "Untitled"
final_filename = f"{safe_title}{ext}"
if self.parent_app and self.parent_app.manga_mode_checkbox.isChecked():
try:
creator_name = metadata.get('author', {}).get('username', 'Unknown')
published_ts = metadata.get('published_time')
fmt_data = {
"creator_name": creator_name,
"title": title,
"published": self._format_date(published_ts),
"added": self._format_date(published_ts),
"edited": self._format_date(published_ts),
"id": metadata.get('deviationid', ''),
"service": "deviantart",
"name": safe_title
}
custom_fmt = self.parent_app.custom_manga_filename_format
new_name = custom_fmt.format(**fmt_data)
final_filename = f"{clean_folder_name(new_name)}{ext}"
except Exception as e:
pass
save_dir = override_dir if override_dir else self.output_dir
if not os.path.exists(save_dir):
try:
os.makedirs(save_dir, exist_ok=True)
except OSError: pass
filepath = os.path.join(save_dir, final_filename)
if os.path.exists(filepath):
return
try:
self.progress_signal.emit(f" ⬇️ Downloading: {final_filename}")
# 5. Determine smart timeout for files
timeout_val = (30, 120) if self.proxies else 30
# 6. Use proxies and verify=False
with requests.get(file_url, stream=True, timeout=timeout_val, proxies=self.proxies, verify=False) as r:
r.raise_for_status()
with open(filepath, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
if self._check_pause_cancel():
f.close()
os.remove(filepath)
return
if chunk:
f.write(chunk)
self.download_count += 1
except Exception as e:
self.progress_signal.emit(f" ❌ Download failed: {e}")
self.skip_count += 1

View File

@@ -24,7 +24,8 @@ from .rule34video_downloader_thread import Rule34VideoDownloadThread
from .saint2_downloader_thread import Saint2DownloadThread
from .simp_city_downloader_thread import SimpCityDownloadThread
from .toonily_downloader_thread import ToonilyDownloadThread
from .deviantart_downloader_thread import DeviantArtDownloadThread
from .hentaifox_downloader_thread import HentaiFoxDownloadThread
def create_downloader_thread(main_app, api_url, service, id1, id2, effective_output_dir_for_run):
"""
@@ -175,6 +176,28 @@ def create_downloader_thread(main_app, api_url, service, id1, id2, effective_out
# id1 contains the full URL or album ID from extract_post_info
return BunkrDownloadThread(id1, effective_output_dir_for_run, main_app)
# Handler for DeviantArt
if service == 'deviantart':
main_app.log_signal.emit(f" DeviantArt URL detected. Starting dedicated downloader.")
return DeviantArtDownloadThread(
url=api_url,
output_dir=effective_output_dir_for_run,
pause_event=main_app.pause_event,
cancellation_event=main_app.cancellation_event,
parent=main_app
)
# Handler for HentaiFox (New)
if 'hentaifox.com' in api_url or service == 'hentaifox':
main_app.log_signal.emit("🦊 HentaiFox URL detected.")
return HentaiFoxDownloadThread(
url_or_id=api_url,
output_dir=effective_output_dir_for_run,
parent=main_app
)
# ----------------------
# --- Fallback ---
# If no specific handler matched based on service name or URL pattern, return None.
# This signals main_window.py to use the generic BackendDownloadThread/PostProcessorWorker

View File

@@ -0,0 +1,136 @@
import os
import time
import requests
from PyQt5.QtCore import QThread, pyqtSignal
from ...core.hentaifox_client import get_gallery_metadata, get_image_link_for_page, get_gallery_id
from ...utils.file_utils import clean_folder_name
class HentaiFoxDownloadThread(QThread):
progress_signal = pyqtSignal(str) # Log messages
file_progress_signal = pyqtSignal(str, object) # filename, (current_bytes, total_bytes)
# finished_signal: (downloaded_count, skipped_count, was_cancelled, kept_files_list)
finished_signal = pyqtSignal(int, int, bool, list)
def __init__(self, url_or_id, output_dir, parent=None):
super().__init__(parent)
self.gallery_id = get_gallery_id(url_or_id)
self.output_dir = output_dir
self.is_running = True
self.downloaded_count = 0
self.skipped_count = 0
def run(self):
try:
self.progress_signal.emit(f"🔍 [HentaiFox] Fetching metadata for ID: {self.gallery_id}...")
# 1. Get Info
try:
data = get_gallery_metadata(self.gallery_id)
except Exception as e:
self.progress_signal.emit(f"❌ [HentaiFox] Failed to fetch metadata: {e}")
self.finished_signal.emit(0, 0, False, [])
return
title = clean_folder_name(data['title'])
total_pages = data['total_pages']
# 2. Setup Folder
save_folder = os.path.join(self.output_dir, f"[{self.gallery_id}] {title}")
os.makedirs(save_folder, exist_ok=True)
self.progress_signal.emit(f"📂 Saving to: {save_folder}")
self.progress_signal.emit(f"📄 Found {total_pages} pages. Starting download...")
# 3. Iterate and Download
for i in range(1, total_pages + 1):
if not self.is_running:
self.progress_signal.emit("🛑 Download cancelled by user.")
break
# Fetch image link for this specific page
try:
img_url = get_image_link_for_page(self.gallery_id, i)
if img_url:
ext = img_url.split('.')[-1]
filename = f"{i:03d}.{ext}"
filepath = os.path.join(save_folder, filename)
# Check if exists
if os.path.exists(filepath):
self.progress_signal.emit(f"⚠️ [{i}/{total_pages}] Skipped (Exists): {filename}")
self.skipped_count += 1
else:
self.progress_signal.emit(f"⬇️ [{i}/{total_pages}] Downloading: {filename}")
# CALL NEW DOWNLOAD FUNCTION
success = self.download_image_with_progress(img_url, filepath, filename)
if success:
self.progress_signal.emit(f"✅ [{i}/{total_pages}] Finished: {filename}")
self.downloaded_count += 1
else:
self.progress_signal.emit(f"❌ [{i}/{total_pages}] Failed: {filename}")
self.skipped_count += 1
else:
self.progress_signal.emit(f"❌ [{i}/{total_pages}] Error: No image link found.")
self.skipped_count += 1
except Exception as e:
self.progress_signal.emit(f"❌ [{i}/{total_pages}] Exception: {e}")
self.skipped_count += 1
time.sleep(0.5)
# 4. Final Summary
summary = (
f"\n🏁 [HentaiFox] Task Complete!\n"
f" - Total: {total_pages}\n"
f" - Downloaded: {self.downloaded_count}\n"
f" - Skipped: {self.skipped_count}\n"
)
self.progress_signal.emit(summary)
except Exception as e:
self.progress_signal.emit(f"❌ Critical Error: {str(e)}")
self.finished_signal.emit(self.downloaded_count, self.skipped_count, not self.is_running, [])
def download_image_with_progress(self, url, path, filename):
"""Downloads file while emitting byte-level progress signals."""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Referer": "https://hentaifox.com/"
}
try:
# stream=True is required to get size before downloading body
r = requests.get(url, headers=headers, stream=True, timeout=20)
if r.status_code != 200:
return False
# Get Total Size (in bytes)
total_size = int(r.headers.get('content-length', 0))
downloaded_size = 0
chunk_size = 1024 # 1KB chunks
with open(path, 'wb') as f:
for chunk in r.iter_content(chunk_size):
if not self.is_running:
r.close()
return False
if chunk:
f.write(chunk)
downloaded_size += len(chunk)
self.file_progress_signal.emit(filename, (downloaded_size, total_size))
return True
except Exception as e:
print(f"Download Error: {e}")
return False
def stop(self):
self.is_running = False

View File

@@ -1,6 +1,6 @@
import os
import time
import cloudscraper
import requests
from PyQt5.QtCore import QThread, pyqtSignal
from ...utils.file_utils import clean_folder_name
@@ -17,68 +17,78 @@ class NhentaiDownloadThread(QThread):
EXTENSION_MAP = {'j': 'jpg', 'p': 'png', 'g': 'gif', 'w': 'webp' }
# 1. Update init to initialize self.proxies
def __init__(self, gallery_data, output_dir, parent=None):
super().__init__(parent)
self.gallery_data = gallery_data
self.output_dir = output_dir
self.is_cancelled = False
self.proxies = None # Placeholder, will be injected by main_window
def run(self):
# 2. Log Proxy Usage
if self.proxies:
self.progress_signal.emit(f" 🌍 Network: Using Proxy {self.proxies}")
else:
self.progress_signal.emit(" 🌍 Network: Direct Connection (No Proxy)")
title = self.gallery_data.get("title", {}).get("english", f"gallery_{self.gallery_data.get('id')}")
gallery_id = self.gallery_data.get("id")
media_id = self.gallery_data.get("media_id")
pages_info = self.gallery_data.get("pages", [])
folder_name = clean_folder_name(title)
gallery_path = os.path.join(self.output_dir, folder_name)
save_path = os.path.join(self.output_dir, folder_name)
try:
os.makedirs(gallery_path, exist_ok=True)
except OSError as e:
self.progress_signal.emit(f"❌ Critical error creating directory: {e}")
os.makedirs(save_path, exist_ok=True)
self.progress_signal.emit(f" Saving to: {folder_name}")
except Exception as e:
self.progress_signal.emit(f" ❌ Error creating directory: {e}")
self.finished_signal.emit(0, len(pages_info), False)
return
self.progress_signal.emit(f"⬇️ Downloading '{title}' to folder '{folder_name}'...")
scraper = cloudscraper.create_scraper()
download_count = 0
skip_count = 0
total_pages = len(pages_info)
# 3. Use requests.Session instead of cloudscraper
scraper = requests.Session()
# 4. Smart timeout logic
img_timeout = (30, 120) if self.proxies else 60
for i, page_data in enumerate(pages_info):
if self.is_cancelled:
break
page_num = i + 1
if self.is_cancelled: break
ext_char = page_data.get('t', 'j')
extension = self.EXTENSION_MAP.get(ext_char, 'jpg')
relative_path = f"/galleries/{media_id}/{page_num}.{extension}"
local_filename = f"{page_num:03d}.{extension}"
filepath = os.path.join(gallery_path, local_filename)
file_ext = self.EXTENSION_MAP.get(page_data.get('t'), 'jpg')
local_filename = f"{i+1:03d}.{file_ext}"
filepath = os.path.join(save_path, local_filename)
if os.path.exists(filepath):
self.progress_signal.emit(f" -> Skip (Exists): {local_filename}")
self.progress_signal.emit(f" Skipping {local_filename} (already exists).")
skip_count += 1
continue
download_successful = False
# Try servers until one works
for server in self.IMAGE_SERVERS:
if self.is_cancelled:
break
if self.is_cancelled: break
# Construct URL: server/galleries/media_id/page_num.ext
full_url = f"{server}/galleries/{media_id}/{i+1}.{file_ext}"
full_url = f"{server}{relative_path}"
try:
self.progress_signal.emit(f" Downloading page {page_num}/{len(pages_info)} from {server} ...")
self.progress_signal.emit(f" Downloading page {i+1}/{total_pages}...")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Referer': f'https://nhentai.net/g/{gallery_id}/'
}
response = scraper.get(full_url, headers=headers, timeout=60, stream=True)
# 5. Add proxies, verify=False, and timeout
response = scraper.get(full_url, headers=headers, timeout=img_timeout, stream=True, proxies=self.proxies, verify=False)
if response.status_code == 200:
with open(filepath, 'wb') as f:
@@ -86,12 +96,14 @@ class NhentaiDownloadThread(QThread):
f.write(chunk)
download_count += 1
download_successful = True
break
break # Stop trying servers
else:
self.progress_signal.emit(f" -> {server} returned status {response.status_code}. Trying next server...")
# self.progress_signal.emit(f" -> {server} returned status {response.status_code}...")
pass
except Exception as e:
self.progress_signal.emit(f" -> {server} failed to connect or timed out: {e}. Trying next server...")
# self.progress_signal.emit(f" -> {server} failed: {e}")
pass
if not download_successful:
self.progress_signal.emit(f" ❌ Failed to download {local_filename} from all servers.")

View File

@@ -254,6 +254,7 @@ class SimpCityDownloadThread(QThread):
self.should_dl_pixeldrain = self.parent_app.simpcity_dl_pixeldrain_cb.isChecked()
self.should_dl_saint2 = self.parent_app.simpcity_dl_saint2_cb.isChecked()
self.should_dl_mega = self.parent_app.simpcity_dl_mega_cb.isChecked()
self.should_dl_images = self.parent_app.simpcity_dl_images_cb.isChecked()
self.should_dl_bunkr = self.parent_app.simpcity_dl_bunkr_cb.isChecked()
self.should_dl_gofile = self.parent_app.simpcity_dl_gofile_cb.isChecked()
@@ -288,8 +289,10 @@ class SimpCityDownloadThread(QThread):
enriched_jobs = self._get_enriched_jobs(jobs)
if enriched_jobs:
for job in enriched_jobs:
if job['type'] == 'image': self.image_queue.put(job)
else: self.service_queue.put(job)
if job['type'] == 'image':
if self.should_dl_images: self.image_queue.put(job)
else: self.service_queue.put(job)
else:
base_url = re.sub(r'(/page-\d+)|(/post-\d+)', '', self.start_url).split('#')[0].strip('/')
page_counter = 1; end_of_thread = False; MAX_RETRIES = 3
@@ -347,11 +350,14 @@ class SimpCityDownloadThread(QThread):
# This can happen if all new_jobs were e.g. pixeldrain and it's disabled
self.progress_signal.emit(f" -> Page {page_counter} content was filtered out. Reached end of thread.")
end_of_thread = True
else:
for job in enriched_jobs:
self.processed_job_urls.add(job.get('url'))
if job['type'] == 'image': self.image_queue.put(job)
if job['type'] == 'image':
if self.should_dl_images: self.image_queue.put(job)
else: self.service_queue.put(job)
page_fetch_successful = True; break
except requests.exceptions.HTTPError as e:
if e.response.status_code in [403, 404]:

View File

@@ -7,7 +7,6 @@ from PyQt5.QtCore import Qt
class CustomFilenameDialog(QDialog):
"""A dialog for creating a custom filename format string."""
# --- REPLACE THE 'AVAILABLE_KEYS' LIST WITH THIS DICTIONARY ---
DISPLAY_KEY_MAP = {
"PostID": "id",
"CreatorName": "creator_name",
@@ -19,7 +18,10 @@ class CustomFilenameDialog(QDialog):
"name": "name"
}
def __init__(self, current_format, current_date_format, parent=None):
# STRICT LIST: Only these three will be clickable for DeviantArt
DA_ALLOWED_KEYS = ["creator_name", "title", "published"]
def __init__(self, current_format, current_date_format, parent=None, is_deviantart=False):
super().__init__(parent)
self.setWindowTitle("Custom Filename Format")
self.setMinimumWidth(500)
@@ -31,9 +33,11 @@ class CustomFilenameDialog(QDialog):
layout = QVBoxLayout(self)
# --- Description ---
description_label = QLabel(
"Create a filename format using placeholders. The date/time values for 'added', 'published', and 'edited' will be automatically shortened to your specified format."
)
desc_text = "Create a filename format using placeholders. The date/time values will be automatically formatted."
if is_deviantart:
desc_text += "\n\n(DeviantArt Mode: Only Creator Name, Title, and Upload Date are available. Other buttons are disabled.)"
description_label = QLabel(desc_text)
description_label.setWordWrap(True)
layout.addWidget(description_label)
@@ -42,15 +46,20 @@ class CustomFilenameDialog(QDialog):
layout.addWidget(format_label)
self.format_input = QLineEdit(self)
self.format_input.setText(self.current_format)
self.format_input.setPlaceholderText("e.g., {published} {title} {id}")
if is_deviantart:
self.format_input.setPlaceholderText("e.g., {published} {title} {creator_name}")
else:
self.format_input.setPlaceholderText("e.g., {published} {title} {id}")
layout.addWidget(self.format_input)
# --- Date Format Input ---
date_format_label = QLabel("Date Format (for {added}, {published}, {edited}):")
date_format_label = QLabel("Date Format (for {published}):")
layout.addWidget(date_format_label)
self.date_format_input = QLineEdit(self)
self.date_format_input.setText(self.current_date_format)
self.date_format_input.setPlaceholderText("e.g., YYYY-MM-DD or DD-MM-YYYY")
self.date_format_input.setPlaceholderText("e.g., YYYY-MM-DD")
layout.addWidget(self.date_format_input)
# --- Available Keys Display ---
@@ -62,7 +71,20 @@ class CustomFilenameDialog(QDialog):
for display_key, internal_key in self.DISPLAY_KEY_MAP.items():
key_button = QPushButton(f"{{{display_key}}}")
# Use a lambda to pass the correct internal key when the button is clicked
# --- DeviantArt Logic ---
if is_deviantart:
if internal_key in self.DA_ALLOWED_KEYS:
# Active buttons: Bold text, enabled
key_button.setStyleSheet("font-weight: bold; color: black;")
key_button.setEnabled(True)
else:
# Inactive buttons: Disabled (Cannot be clicked)
key_button.setEnabled(False)
key_button.setToolTip("Not available for DeviantArt")
# ------------------------
# Use a lambda to pass the correct internal key when clicked
key_button.clicked.connect(lambda checked, key=internal_key: self.add_key_to_input(key))
keys_layout.addWidget(key_button)
keys_layout.addStretch()
@@ -81,9 +103,7 @@ class CustomFilenameDialog(QDialog):
self.format_input.setFocus()
def get_format_string(self):
"""Returns the final format string from the input field."""
return self.format_input.text().strip()
def get_date_format_string(self):
"""Returns the date format string from its input field."""
return self.date_format_input.text().strip()
return self.date_format_input.text().strip()

View File

@@ -156,6 +156,9 @@ class EmptyPopupDialog (QDialog ):
# --- MODIFIED: Store a list of profiles now ---
self.update_profiles_list = None
# --- NEW: Flag to indicate if settings should load to UI ---
self.load_settings_into_ui_requested = False
# --- DEPRECATED (kept for compatibility if needed, but new logic won't use them) ---
self.update_profile_data = None
self.update_creator_name = None
@@ -341,6 +344,9 @@ class EmptyPopupDialog (QDialog ):
if dialog.exec_() == QDialog.Accepted:
# --- MODIFIED: Get a list of profiles now ---
selected_profiles = dialog.get_selected_profiles()
# --- NEW: Get the checkbox state ---
self.load_settings_into_ui_requested = dialog.should_load_into_ui()
if selected_profiles:
try:
# --- MODIFIED: Store the list ---
@@ -1052,4 +1058,4 @@ class EmptyPopupDialog (QDialog ):
else :
if unique_key in self .globally_selected_creators :
del self .globally_selected_creators [unique_key ]
self .fetch_posts_button .setEnabled (bool (self .globally_selected_creators ))
self .fetch_posts_button .setEnabled (bool (self .globally_selected_creators ))

View File

@@ -5,10 +5,11 @@ import sys
# --- PyQt5 Imports ---
from PyQt5.QtCore import Qt, QStandardPaths, QTimer
from PyQt5.QtGui import QIntValidator # <--- NEW: Added for Port validation
from PyQt5.QtWidgets import (
QApplication, QDialog, QHBoxLayout, QLabel, QPushButton, QVBoxLayout,
QGroupBox, QComboBox, QMessageBox, QGridLayout, QCheckBox, QLineEdit,
QTabWidget, QWidget, QFileDialog # Added QFileDialog
QTabWidget, QWidget, QFileDialog
)
# --- Local Application Imports ---
from ...i18n.translator import get_translation
@@ -21,7 +22,9 @@ from ...config.constants import (
RESOLUTION_KEY, UI_SCALE_KEY, SAVE_CREATOR_JSON_KEY,
DATE_PREFIX_FORMAT_KEY,
COOKIE_TEXT_KEY, USE_COOKIE_KEY,
FETCH_FIRST_KEY, DISCORD_TOKEN_KEY, POST_DOWNLOAD_ACTION_KEY
FETCH_FIRST_KEY, DISCORD_TOKEN_KEY, POST_DOWNLOAD_ACTION_KEY,
PROXY_ENABLED_KEY, PROXY_HOST_KEY, PROXY_PORT_KEY,
PROXY_USERNAME_KEY, PROXY_PASSWORD_KEY
)
from ...services.updater import UpdateChecker, UpdateDownloader
@@ -118,16 +121,15 @@ class FutureSettingsDialog(QDialog):
super().__init__(parent)
self.parent_app = parent_app_ref
self.setModal(True)
self.update_downloader_thread = None # To keep a reference
self.update_downloader_thread = None
app_icon = get_app_icon_object()
if app_icon and not app_icon.isNull():
self.setWindowIcon(app_icon)
screen_height = QApplication.primaryScreen().availableGeometry().height() if QApplication.primaryScreen() else 800
# Use a more balanced aspect ratio
scale_factor = screen_height / 1000.0
base_min_w, base_min_h = 480, 420 # Wider, less tall
base_min_w, base_min_h = 550, 450 # <--- TWEAK: Slightly increased width for better layout
scaled_min_w = int(base_min_w * scale_factor)
scaled_min_h = int(base_min_h * scale_factor)
self.setMinimumSize(scaled_min_w, scaled_min_h)
@@ -135,6 +137,9 @@ class FutureSettingsDialog(QDialog):
self._init_ui()
self._retranslate_ui()
self._apply_theme()
# <--- NEW: Load proxy settings on init
self._load_proxy_settings()
def _init_ui(self):
"""Initializes all UI components and layouts for the dialog."""
@@ -147,14 +152,16 @@ class FutureSettingsDialog(QDialog):
# --- Create Tabs ---
self.display_tab = QWidget()
self.downloads_tab = QWidget()
self.network_tab = QWidget() # <--- NEW: Network Tab
self.updates_tab = QWidget()
# Add tabs to the widget
self.tab_widget.addTab(self.display_tab, "Display")
self.tab_widget.addTab(self.downloads_tab, "Downloads")
self.tab_widget.addTab(self.network_tab, "Proxy/Network") # <--- NEW
self.tab_widget.addTab(self.updates_tab, "Updates")
# --- Populate Display Tab ---
# [Display Tab Code (Unchanged) ...]
display_tab_layout = QVBoxLayout(self.display_tab)
self.display_group_box = QGroupBox()
display_layout = QGridLayout(self.display_group_box)
@@ -184,9 +191,9 @@ class FutureSettingsDialog(QDialog):
display_layout.addWidget(self.resolution_combo_box, 3, 1)
display_tab_layout.addWidget(self.display_group_box)
display_tab_layout.addStretch(1) # Push content to the top
display_tab_layout.addStretch(1)
# --- Populate Downloads Tab ---
# [Downloads Tab Code (Unchanged) ...]
downloads_tab_layout = QVBoxLayout(self.downloads_tab)
self.download_settings_group_box = QGroupBox()
download_settings_layout = QGridLayout(self.download_settings_group_box)
@@ -217,7 +224,6 @@ class FutureSettingsDialog(QDialog):
self.fetch_first_checkbox.stateChanged.connect(self._fetch_first_setting_changed)
download_settings_layout.addWidget(self.fetch_first_checkbox, 4, 0, 1, 2)
# --- START: Add new Load/Save buttons ---
settings_file_layout = QHBoxLayout()
self.load_settings_button = QPushButton()
self.save_settings_button = QPushButton()
@@ -225,18 +231,72 @@ class FutureSettingsDialog(QDialog):
settings_file_layout.addWidget(self.save_settings_button)
settings_file_layout.addStretch(1)
# Add this new layout to the grid
download_settings_layout.addLayout(settings_file_layout, 5, 0, 1, 2) # Row 5, span 2 cols
download_settings_layout.addLayout(settings_file_layout, 5, 0, 1, 2)
# Connect signals
self.load_settings_button.clicked.connect(self._handle_load_settings)
self.save_settings_button.clicked.connect(self._handle_save_settings)
# --- END: Add new Load/Save buttons ---
downloads_tab_layout.addWidget(self.download_settings_group_box)
downloads_tab_layout.addStretch(1) # Push content to the top
downloads_tab_layout.addStretch(1)
# --- Populate Updates Tab ---
# --- START: Network Tab (NEW) ---
network_tab_layout = QVBoxLayout(self.network_tab)
self.proxy_group_box = QGroupBox()
proxy_layout = QGridLayout(self.proxy_group_box)
# Enable Checkbox
self.proxy_enabled_checkbox = QCheckBox()
self.proxy_enabled_checkbox.stateChanged.connect(self._proxy_setting_changed)
proxy_layout.addWidget(self.proxy_enabled_checkbox, 0, 0, 1, 2)
# Proxy Type Dropdown
self.proxy_type_label = QLabel("Proxy Type:")
self.proxy_type_combo = QComboBox()
self.proxy_type_combo.addItems(["HTTP", "SOCKS4", "SOCKS5"])
self.proxy_type_combo.currentIndexChanged.connect(self._proxy_setting_changed)
proxy_layout.addWidget(self.proxy_type_label, 1, 0)
proxy_layout.addWidget(self.proxy_type_combo, 1, 1)
# Host / IP
self.proxy_host_label = QLabel()
self.proxy_host_input = QLineEdit()
self.proxy_host_input.setPlaceholderText("127.0.0.1")
self.proxy_host_input.editingFinished.connect(self._proxy_setting_changed)
proxy_layout.addWidget(self.proxy_host_label, 2, 0) # Changed row to 2
proxy_layout.addWidget(self.proxy_host_input, 2, 1)
# Port
self.proxy_port_label = QLabel()
self.proxy_port_input = QLineEdit()
self.proxy_port_input.setPlaceholderText("8080")
self.proxy_port_input.setValidator(QIntValidator(1, 65535, self)) # Only numbers
self.proxy_port_input.editingFinished.connect(self._proxy_setting_changed)
proxy_layout.addWidget(self.proxy_port_label, 3, 0)
proxy_layout.addWidget(self.proxy_port_input, 3, 1)
# Username
self.proxy_user_label = QLabel()
self.proxy_user_input = QLineEdit()
self.proxy_user_input.setPlaceholderText("(Optional)")
self.proxy_user_input.editingFinished.connect(self._proxy_setting_changed)
proxy_layout.addWidget(self.proxy_user_label, 4, 0)
proxy_layout.addWidget(self.proxy_user_input, 4, 1)
# Password
self.proxy_pass_label = QLabel()
self.proxy_pass_input = QLineEdit()
self.proxy_pass_input.setPlaceholderText("(Optional)")
self.proxy_pass_input.setEchoMode(QLineEdit.Password) # Mask input
self.proxy_pass_input.editingFinished.connect(self._proxy_setting_changed)
proxy_layout.addWidget(self.proxy_pass_label, 5, 0)
proxy_layout.addWidget(self.proxy_pass_input, 5, 1)
network_tab_layout.addWidget(self.proxy_group_box)
network_tab_layout.addStretch(1)
# --- END: Network Tab (NEW) ---
# [Updates Tab Code (Unchanged) ...]
updates_tab_layout = QVBoxLayout(self.updates_tab)
self.update_group_box = QGroupBox()
update_layout = QGridLayout(self.update_group_box)
@@ -249,7 +309,7 @@ class FutureSettingsDialog(QDialog):
update_layout.addWidget(self.check_update_button, 1, 0, 1, 2)
updates_tab_layout.addWidget(self.update_group_box)
updates_tab_layout.addStretch(1) # Push content to the top
updates_tab_layout.addStretch(1)
# --- OK Button (outside tabs) ---
button_layout = QHBoxLayout()
@@ -266,16 +326,17 @@ class FutureSettingsDialog(QDialog):
# --- Tab Titles ---
self.tab_widget.setTabText(0, self._tr("settings_tab_display", "Display"))
self.tab_widget.setTabText(1, self._tr("settings_tab_downloads", "Downloads"))
self.tab_widget.setTabText(2, self._tr("settings_tab_updates", "Updates"))
self.tab_widget.setTabText(2, self._tr("settings_tab_network", "Proxy/Network")) # <--- NEW
self.tab_widget.setTabText(3, self._tr("settings_tab_updates", "Updates"))
# --- Display Tab ---
# [Display Tab (Unchanged) ...]
self.display_group_box.setTitle(self._tr("display_settings_group_title", "Display Settings"))
self.theme_label.setText(self._tr("theme_label", "Theme:"))
self.ui_scale_label.setText(self._tr("ui_scale_label", "UI Scale:"))
self.language_label.setText(self._tr("language_label", "Language:"))
self.window_size_label.setText(self._tr("window_size_label", "Window Size:"))
# --- Downloads Tab ---
# [Downloads Tab (Unchanged) ...]
self.download_settings_group_box.setTitle(self._tr("download_settings_group_title", "Download Settings"))
self.default_path_label.setText(self._tr("default_path_label", "Default Path:"))
self.date_prefix_format_label.setText(self._tr("date_prefix_format_label", "Post Subfolder Format:"))
@@ -294,32 +355,112 @@ class FutureSettingsDialog(QDialog):
self.fetch_first_checkbox.setToolTip(self._tr("fetch_first_tooltip", "If checked, the downloader will find all posts from a creator first before starting any downloads.\nThis can be slower to start but provides a more accurate progress bar."))
self.save_path_button.setText(self._tr("settings_save_all_button", "Save Path + Cookie + Token"))
self.save_path_button.setToolTip(self._tr("settings_save_all_tooltip", "Save the current 'Download Location', Cookie, and Discord Token settings for future sessions."))
# --- START: Add new button text ---
self.load_settings_button.setText(self._tr("load_settings_button", "Load Settings..."))
self.load_settings_button.setToolTip(self._tr("load_settings_tooltip", "Load all download settings from a .json file."))
self.save_settings_button.setText(self._tr("save_settings_button", "Save Settings..."))
self.save_settings_button.setToolTip(self._tr("save_settings_tooltip", "Save all current download settings to a .json file."))
# --- END: Add new button text ---
# --- Updates Tab ---
# --- START: Network Tab (NEW) ---
self.proxy_group_box.setTitle(self._tr("proxy_settings_group_title", "Proxy Configuration"))
self.proxy_enabled_checkbox.setText(self._tr("proxy_enabled_label", "Enable Proxy"))
self.proxy_host_label.setText(self._tr("proxy_host_label", "Host / IP:"))
self.proxy_port_label.setText(self._tr("proxy_port_label", "Port:"))
self.proxy_user_label.setText(self._tr("proxy_user_label", "Username (Optional):"))
self.proxy_pass_label.setText(self._tr("proxy_pass_label", "Password (Optional):"))
# --- END: Network Tab (NEW) ---
# [Updates Tab (Unchanged) ...]
self.update_group_box.setTitle(self._tr("update_group_title", "Application Updates"))
current_version = self.parent_app.windowTitle().split(' v')[-1]
self.version_label.setText(self._tr("current_version_label", f"Current Version: v{current_version}"))
self.update_status_label.setText(self._tr("update_status_ready", "Ready to check."))
self.check_update_button.setText(self._tr("check_for_updates_button", "Check for Updates"))
# --- General ---
self._update_theme_toggle_button_text()
self.ok_button.setText(self._tr("ok_button", "OK"))
# --- Load Data ---
self._populate_display_combo_boxes()
self._populate_language_combo_box()
self._populate_post_download_action_combo()
self._load_date_prefix_format()
self._load_checkbox_states()
# --- START: New Proxy Logic ---
def _load_proxy_settings(self):
"""Loads proxy settings from QSettings into the UI."""
# Block signals to prevent triggering auto-save while loading
self.proxy_enabled_checkbox.blockSignals(True)
self.proxy_type_combo.blockSignals(True) # <--- NEW
self.proxy_host_input.blockSignals(True)
self.proxy_port_input.blockSignals(True)
self.proxy_user_input.blockSignals(True)
self.proxy_pass_input.blockSignals(True)
# Load values
enabled = self.parent_app.settings.value(PROXY_ENABLED_KEY, False, type=bool)
proxy_type = self.parent_app.settings.value("proxy_type", "HTTP", type=str) # <--- NEW
host = self.parent_app.settings.value(PROXY_HOST_KEY, "", type=str)
port = self.parent_app.settings.value(PROXY_PORT_KEY, "", type=str)
user = self.parent_app.settings.value(PROXY_USERNAME_KEY, "", type=str)
password = self.parent_app.settings.value(PROXY_PASSWORD_KEY, "", type=str)
# Apply values to UI
self.proxy_enabled_checkbox.setChecked(enabled)
# <--- NEW: Set the dropdown selection
index = self.proxy_type_combo.findText(proxy_type)
if index >= 0:
self.proxy_type_combo.setCurrentIndex(index)
else:
self.proxy_type_combo.setCurrentIndex(0) # Default to first item if not found
self.proxy_host_input.setText(host)
self.proxy_port_input.setText(port)
self.proxy_user_input.setText(user)
self.proxy_pass_input.setText(password)
self._update_proxy_fields_state(enabled)
# Unblock signals
self.proxy_enabled_checkbox.blockSignals(False)
self.proxy_type_combo.blockSignals(False) # <--- NEW
self.proxy_host_input.blockSignals(False)
self.proxy_port_input.blockSignals(False)
self.proxy_user_input.blockSignals(False)
self.proxy_pass_input.blockSignals(False)
def _proxy_setting_changed(self):
"""Saves the current proxy UI state to QSettings."""
enabled = self.proxy_enabled_checkbox.isChecked()
proxy_type = self.proxy_type_combo.currentText() # <--- NEW
host = self.proxy_host_input.text().strip()
port = self.proxy_port_input.text().strip()
user = self.proxy_user_input.text().strip()
password = self.proxy_pass_input.text().strip()
self.parent_app.settings.setValue(PROXY_ENABLED_KEY, enabled)
self.parent_app.settings.setValue("proxy_type", proxy_type) # <--- NEW
self.parent_app.settings.setValue(PROXY_HOST_KEY, host)
self.parent_app.settings.setValue(PROXY_PORT_KEY, port)
self.parent_app.settings.setValue(PROXY_USERNAME_KEY, user)
self.parent_app.settings.setValue(PROXY_PASSWORD_KEY, password)
self.parent_app.settings.sync()
self._update_proxy_fields_state(enabled)
# Optional: Notify main app that network settings changed if needed
# self.parent_app.reload_proxy_settings()
def _update_proxy_fields_state(self, enabled):
"""Enables or disables input fields based on the checkbox."""
self.proxy_type_combo.setEnabled(enabled)
self.proxy_host_input.setEnabled(enabled)
self.proxy_port_input.setEnabled(enabled)
self.proxy_user_input.setEnabled(enabled)
self.proxy_pass_input.setEnabled(enabled)
# --- END: New Proxy Logic ---
def _check_for_updates(self):
self.check_update_button.setEnabled(False)
self.update_status_label.setText(self._tr("update_status_checking", "Checking..."))

View File

@@ -73,7 +73,6 @@ class HelpGuideDialog(QDialog):
<li>fap-nation.org/</li>
<li>Discord</li>
<li>allporncomic.com</li>
<li>allporncomic.com</li>
<li>hentai2read.com</li>
<li>mangadex.org</li>
<li>Simpcity</li>
@@ -279,6 +278,46 @@ class HelpGuideDialog(QDialog):
</ul>
"""),
("Add to Queue",
"""
<p>This feature allows you to queue up multiple distinct downloads with different settings and run them all sequentially.</p>
<h3 style='color: #E0E0E0;'>Step 1: Prepare the Download</h3>
<p>Before clicking add, configure the download exactly how you want it processed for this specific link:</p>
<ul>
<li><b>Select Directory:</b> Choose where you want the files to go.</li>
<li><b>Configure Options:</b> Check/uncheck boxes (e.g., "Separate Folders", "Use Cookie", "Manga Mode").</li>
<li><b>Paste URL:</b> Enter the link for the creator or post you want to download.</li>
</ul>
<h3 style='color: #E0E0E0;'>Step 2: Add to Queue</h3>
<ol>
<li>Click the <b>Add to Queue</b> button (located near the Start Download).</li>
<li><b>Confirmation:</b> You will see a popup message and the log will print <code>✅ Job added to queue</code>.</li>
<li>The URL box will clear, allowing you to immediately paste the next link.</li>
</ol>
<h3 style='color: #E0E0E0;'>Step 3: Repeat & Start</h3>
<p>You can repeat steps 1 and 2 as many times as you like. You can even change settings (like the download folder) between adds; the queue remembers the specific settings for each individual link.</p>
<p>To start processing the queue:</p>
<ol>
<li>In the Link Input box, type exactly: <code>start queue</code></li>
<li>The main "Start Download" button will change to <b>"🚀 Execute Queue"</b>.</li>
<li>Click that button to begin.</li>
</ol>
<h3 style='color: #E0E0E0;'>Processing Behavior</h3>
<p>Once started, the app will lock the UI, load the first job, download it until finished, and automatically move to the next until the queue is empty.</p>
<h3 style='color: #E0E0E0;'>Special Case: Creator Selection Popup</h3>
<p>If you use the <b>Creator Selection</b> popup (the 🎨 button):</p>
<ul>
<li>Select multiple creators in that popup and click <b>"Queue Selected"</b>.</li>
<li>The app internally adds them to a temporary list.</li>
<li>When you click the main <b>"Add to Queue"</b> button on the main window, it will detect that list and automatically bulk-create job files for all the creators you selected.</li>
</ul>
"""),
("Special Commands",
"""
<p>You can add special commands to the <b>"Filter by Character(s)"</b> input field to change download behavior for a single task. Commands are keywords wrapped in square brackets <code>[]</code>.</p>
@@ -450,7 +489,16 @@ class HelpGuideDialog(QDialog):
("Utility & Advanced Options",
"""
<p>These features provide advanced control over your downloads, sessions, and application settings.</p>
<h3 style='color: #E0E0E0;'>🛡️ Proxy Support </h3>
<p>You can now configure a proxy to bypass region blocks or ISP restrictions (e.g., for AllComic or Nhentai).</p>
<p>Go to <b>Settings ⚙️ > Proxy Tab</b> to set it up:</p>
<ul>
<li><b>Protocols:</b> Full support for <b>HTTP</b>, <b>SOCKS4</b>, and <b>SOCKS5</b>.</li>
<li><b>Authentication:</b> Supports username and password for private proxies.</li>
<li><b>Global Effect:</b> Once enabled, all app connections (including API fetches and file downloads) will route through this proxy.</li>
</ul>
<h3 style='color: #E0E0E0;'>Use Cookie</h3>
<p>This is essential for downloading from sites that require a login (like <b>SimpCity</b> or accessing your <b>favorites</b> on Kemono/Coomer). You can either:</p>
<ul>
@@ -484,6 +532,7 @@ class HelpGuideDialog(QDialog):
<li>Toggle <b>"Fetch First"</b> (to find all posts from a creator before starting any downloads).</li>
</ul>
</li>
<li><b>Proxy Tab:</b> Configure HTTP/SOCKS proxies and authentication.</li>
<li><b>Updates Tab:</b> Check for and install new application updates.</li>
</ul>
@@ -605,7 +654,8 @@ class HelpGuideDialog(QDialog):
main_layout.addLayout(content_layout, 1)
self.nav_list = QListWidget()
self.nav_list.setFixedWidth(int(220 * scale))
# Increased width to prevent scrollbar overlap
self.nav_list.setFixedWidth(int(280 * scale))
# Styles are now set in the __init__ method
content_layout.addWidget(self.nav_list)

View File

@@ -11,17 +11,16 @@ class MoreOptionsDialog(QDialog):
SCOPE_CONTENT = "content"
SCOPE_COMMENTS = "comments"
def __init__(self, parent=None, current_scope=None, current_format=None, single_pdf_checked=False):
def __init__(self, parent=None, current_scope=None, current_format=None, single_pdf_checked=False, add_info_checked=False):
super().__init__(parent)
self.parent_app = parent
self.setWindowTitle("More Options")
self.setMinimumWidth(350)
# ... (Layout and other widgets remain the same) ...
layout = QVBoxLayout(self)
self.description_label = QLabel("Please choose the scope for the action:")
layout.addWidget(self.description_label)
self.radio_button_group = QButtonGroup(self)
self.radio_content = QRadioButton("Description/Content")
self.radio_comments = QRadioButton("Comments")
@@ -50,14 +49,20 @@ class MoreOptionsDialog(QDialog):
export_layout.addStretch()
layout.addLayout(export_layout)
# --- UPDATED: Single PDF Checkbox ---
# --- Single PDF Checkbox ---
self.single_pdf_checkbox = QCheckBox("Single PDF")
self.single_pdf_checkbox.setToolTip("If checked, all text from matching posts will be compiled into one single PDF file.")
self.single_pdf_checkbox.setChecked(single_pdf_checked)
layout.addWidget(self.single_pdf_checkbox)
self.format_combo.currentTextChanged.connect(self.update_single_pdf_checkbox_state)
self.update_single_pdf_checkbox_state(self.format_combo.currentText())
# --- NEW: Add Info Checkbox ---
self.add_info_checkbox = QCheckBox("Add info in PDF")
self.add_info_checkbox.setToolTip("If checked, adds a first page with post details (Title, Date, Link, Creator, Tags, etc.).")
self.add_info_checkbox.setChecked(add_info_checked)
layout.addWidget(self.add_info_checkbox)
self.format_combo.currentTextChanged.connect(self.update_checkbox_states)
self.update_checkbox_states(self.format_combo.currentText())
self.button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel)
self.button_box.accepted.connect(self.accept)
@@ -65,12 +70,18 @@ class MoreOptionsDialog(QDialog):
layout.addWidget(self.button_box)
self.setLayout(layout)
self._apply_theme()
def update_single_pdf_checkbox_state(self, text):
"""Enable the Single PDF checkbox only if the format is PDF."""
def update_checkbox_states(self, text):
"""Enable PDF-specific checkboxes only if the format is PDF."""
is_pdf = (text.upper() == "PDF")
self.single_pdf_checkbox.setEnabled(is_pdf)
self.add_info_checkbox.setEnabled(is_pdf)
if not is_pdf:
self.single_pdf_checkbox.setChecked(False)
# We don't uncheck add_info necessarily, just disable it,
# but unchecking is safer visually to imply "won't happen"
self.add_info_checkbox.setChecked(False)
def get_selected_scope(self):
if self.radio_comments.isChecked():
@@ -84,13 +95,14 @@ class MoreOptionsDialog(QDialog):
"""Returns the state of the Single PDF checkbox."""
return self.single_pdf_checkbox.isChecked() and self.single_pdf_checkbox.isEnabled()
def get_add_info_state(self):
"""Returns the state of the Add Info checkbox."""
return self.add_info_checkbox.isChecked() and self.add_info_checkbox.isEnabled()
def _apply_theme(self):
"""Applies the current theme from the parent application."""
if self.parent_app and self.parent_app.current_theme == "dark":
# Get the scale factor from the parent app
if self.parent_app and hasattr(self.parent_app, 'current_theme') and self.parent_app.current_theme == "dark":
scale = getattr(self.parent_app, 'scale_factor', 1)
# Call the imported function with the correct scale
self.setStyleSheet(get_dark_theme(scale))
else:
# Explicitly set a blank stylesheet for light mode
self.setStyleSheet("")
self.setStyleSheet("")

View File

@@ -1,27 +1,29 @@
import os
import re
import sys
try:
from fpdf import FPDF
FPDF_AVAILABLE = True
# --- FIX: Move the class definition inside the try block ---
class PDF(FPDF):
"""Custom PDF class to handle headers and footers."""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.font_family_main = 'Arial'
def header(self):
pass
def footer(self):
self.set_y(-15)
if self.font_family:
self.set_font(self.font_family, '', 8)
else:
self.set_font('Arial', '', 8)
self.set_font(self.font_family_main, '', 8)
self.cell(0, 10, 'Page ' + str(self.page_no()), 0, 0, 'C')
except ImportError:
except Exception as e:
print(f"\n❌ DEBUG INFO: Import failed. The specific error is: {e}")
print(f"❌ DEBUG INFO: Python running this script is located at: {sys.executable}\n")
FPDF_AVAILABLE = False
# If the import fails, FPDF and PDF will not be defined,
# but the program won't crash here.
FPDF = None
PDF = None
@@ -31,12 +33,169 @@ def strip_html_tags(text):
clean = re.compile('<.*?>')
return re.sub(clean, '', text)
def create_single_pdf_from_content(posts_data, output_filename, font_path, logger=print):
def _setup_pdf_fonts(pdf, font_path, logger=print):
"""Helper to setup fonts for the PDF instance."""
bold_font_path = ""
default_font = 'Arial'
if font_path:
bold_font_path = font_path.replace("DejaVuSans.ttf", "DejaVuSans-Bold.ttf")
try:
if font_path and os.path.exists(font_path):
pdf.add_font('DejaVu', '', font_path, uni=True)
default_font = 'DejaVu'
if os.path.exists(bold_font_path):
pdf.add_font('DejaVu', 'B', bold_font_path, uni=True)
else:
pdf.add_font('DejaVu', 'B', font_path, uni=True)
except Exception as font_error:
logger(f" ⚠️ Could not load DejaVu font: {font_error}. Falling back to Arial.")
default_font = 'Arial'
pdf.font_family_main = default_font
return default_font
def add_metadata_page(pdf, post, font_family):
"""Adds a dedicated metadata page to the PDF with clickable links."""
pdf.add_page()
pdf.set_font(font_family, 'B', 16)
pdf.multi_cell(w=0, h=10, txt=post.get('title', 'Untitled Post'), align='C')
pdf.ln(10)
pdf.set_font(font_family, '', 11)
def add_info_row(label, value, link_url=None):
if not value: return
# Write Label (Bold)
pdf.set_font(font_family, 'B', 11)
pdf.write(8, f"{label}: ")
# Write Value
if link_url:
# Styling for clickable link: Blue + Underline
pdf.set_text_color(0, 0, 255)
# Check if font supports underline style directly or just use 'U'
# FPDF standard allows 'U' in style string.
# We use 'U' combined with the font family.
# Note: DejaVu implementation in fpdf2 might handle 'U' automatically or ignore it depending on version,
# but setting text color indicates link clearly enough usually.
pdf.set_font(font_family, 'U', 11)
# Pass the URL to the 'link' parameter
pdf.multi_cell(w=0, h=8, txt=str(value), link=link_url)
# Reset styles
pdf.set_text_color(0, 0, 0)
pdf.set_font(font_family, '', 11)
else:
pdf.set_font(font_family, '', 11)
pdf.multi_cell(w=0, h=8, txt=str(value))
pdf.ln(2)
date_str = post.get('published') or post.get('added') or 'Unknown'
add_info_row("Date Uploaded", date_str)
creator = post.get('creator_name') or post.get('user') or 'Unknown'
add_info_row("Creator", creator)
add_info_row("Service", post.get('service', 'Unknown'))
link = post.get('original_link')
if not link and post.get('service') and post.get('user') and post.get('id'):
link = f"https://kemono.su/{post['service']}/user/{post['user']}/post/{post['id']}"
# Pass 'link' as both the text value AND the URL target
add_info_row("Original Link", link, link_url=link)
tags = post.get('tags')
if tags:
tags_str = ", ".join(tags) if isinstance(tags, list) else str(tags)
add_info_row("Tags", tags_str)
pdf.ln(10)
pdf.cell(0, 0, border='T')
pdf.ln(10)
def create_individual_pdf(post_data, output_filename, font_path, add_info_page=False, add_comments=False, logger=print):
"""
Creates a single, continuous PDF, correctly formatting both descriptions and comments.
Creates a PDF for a single post.
Supports optional metadata page and appending comments.
"""
if not FPDF_AVAILABLE:
logger("❌ PDF Creation failed: 'fpdf2' library is not installed. Please run: pip install fpdf2")
logger("❌ PDF Creation failed: 'fpdf2' library not installed.")
return False
pdf = PDF()
font_family = _setup_pdf_fonts(pdf, font_path, logger)
if add_info_page:
# add_metadata_page adds the page start itself
add_metadata_page(pdf, post_data, font_family)
# REMOVED: pdf.add_page() <-- This ensures content starts right below the line
else:
pdf.add_page()
# Only add the Title header manually if we didn't add the info page
# (Because the info page already contains the title at the top)
if not add_info_page:
pdf.set_font(font_family, 'B', 16)
pdf.multi_cell(w=0, h=10, txt=post_data.get('title', 'Untitled Post'), align='L')
pdf.ln(5)
content_text = post_data.get('content_text_for_pdf')
comments_list = post_data.get('comments_list_for_pdf')
# 1. Write Content
if content_text:
pdf.set_font(font_family, '', 12)
pdf.multi_cell(w=0, h=7, txt=content_text)
pdf.ln(10)
# 2. Write Comments (if enabled and present)
if comments_list and (add_comments or not content_text):
if add_comments and content_text:
pdf.add_page()
pdf.set_font(font_family, 'B', 14)
pdf.cell(0, 10, "Comments", ln=True)
pdf.ln(5)
for i, comment in enumerate(comments_list):
user = comment.get('commenter_name', 'Unknown User')
timestamp = comment.get('published', 'No Date')
body = strip_html_tags(comment.get('content', ''))
pdf.set_font(font_family, '', 10)
pdf.write(8, "Comment by: ")
pdf.set_font(font_family, 'B', 10)
pdf.write(8, str(user))
pdf.set_font(font_family, '', 10)
pdf.write(8, f" on {timestamp}")
pdf.ln(10)
pdf.set_font(font_family, '', 11)
pdf.multi_cell(w=0, h=7, txt=body)
if i < len(comments_list) - 1:
pdf.ln(3)
pdf.cell(w=0, h=0, border='T')
pdf.ln(3)
try:
pdf.output(output_filename)
return True
except Exception as e:
logger(f"❌ Error saving PDF '{os.path.basename(output_filename)}': {e}")
return False
def create_single_pdf_from_content(posts_data, output_filename, font_path, add_info_page=False, logger=print):
"""
Creates a single, continuous PDF from multiple posts.
"""
if not FPDF_AVAILABLE:
logger("❌ PDF Creation failed: 'fpdf2' library is not installed.")
return False
if not posts_data:
@@ -44,34 +203,21 @@ def create_single_pdf_from_content(posts_data, output_filename, font_path, logge
return False
pdf = PDF()
default_font_family = 'DejaVu'
font_family = _setup_pdf_fonts(pdf, font_path, logger)
bold_font_path = ""
if font_path:
bold_font_path = font_path.replace("DejaVuSans.ttf", "DejaVuSans-Bold.ttf")
try:
if not os.path.exists(font_path): raise RuntimeError(f"Font file not found: {font_path}")
if not os.path.exists(bold_font_path): raise RuntimeError(f"Bold font file not found: {bold_font_path}")
pdf.add_font('DejaVu', '', font_path, uni=True)
pdf.add_font('DejaVu', 'B', bold_font_path, uni=True)
except Exception as font_error:
logger(f" ⚠️ Could not load DejaVu font: {font_error}. Falling back to Arial.")
default_font_family = 'Arial'
pdf.add_page()
logger(f" Starting continuous PDF creation with content from {len(posts_data)} posts...")
for i, post in enumerate(posts_data):
if i > 0:
# This ensures every post after the first gets its own page.
if add_info_page:
add_metadata_page(pdf, post, font_family)
# REMOVED: pdf.add_page() <-- This ensures content starts right below the line
else:
pdf.add_page()
pdf.set_font(default_font_family, 'B', 16)
pdf.multi_cell(w=0, h=10, txt=post.get('title', 'Untitled Post'), align='L')
pdf.ln(5)
if not add_info_page:
pdf.set_font(font_family, 'B', 16)
pdf.multi_cell(w=0, h=10, txt=post.get('title', 'Untitled Post'), align='L')
pdf.ln(5)
if 'comments' in post and post['comments']:
comments_list = post['comments']
@@ -80,17 +226,17 @@ def create_single_pdf_from_content(posts_data, output_filename, font_path, logge
timestamp = comment.get('published', 'No Date')
body = strip_html_tags(comment.get('content', ''))
pdf.set_font(default_font_family, '', 10)
pdf.set_font(font_family, '', 10)
pdf.write(8, "Comment by: ")
if user is not None:
pdf.set_font(default_font_family, 'B', 10)
pdf.set_font(font_family, 'B', 10)
pdf.write(8, str(user))
pdf.set_font(default_font_family, '', 10)
pdf.set_font(font_family, '', 10)
pdf.write(8, f" on {timestamp}")
pdf.ln(10)
pdf.set_font(default_font_family, '', 11)
pdf.set_font(font_family, '', 11)
pdf.multi_cell(w=0, h=7, txt=body)
if comment_index < len(comments_list) - 1:
@@ -98,13 +244,16 @@ def create_single_pdf_from_content(posts_data, output_filename, font_path, logge
pdf.cell(w=0, h=0, border='T')
pdf.ln(3)
elif 'content' in post:
pdf.set_font(default_font_family, '', 12)
pdf.set_font(font_family, '', 12)
pdf.multi_cell(w=0, h=7, txt=post.get('content', 'No Content'))
try:
output_dir = os.path.dirname(output_filename)
if output_dir and not os.path.exists(output_dir):
os.makedirs(output_dir, exist_ok=True)
pdf.output(output_filename)
logger(f"✅ Successfully created single PDF: '{os.path.basename(output_filename)}'")
return True
except Exception as e:
logger(f"❌ A critical error occurred while saving the final PDF: {e}")
return False
return False

View File

@@ -7,7 +7,7 @@ import sys
from PyQt5.QtCore import Qt, pyqtSignal
from PyQt5.QtWidgets import (
QDialog, QVBoxLayout, QHBoxLayout, QListWidget, QListWidgetItem,
QPushButton, QMessageBox, QAbstractItemView, QLabel
QPushButton, QMessageBox, QAbstractItemView, QLabel, QCheckBox
)
# --- Local Application Imports ---
@@ -26,6 +26,11 @@ class UpdateCheckDialog(QDialog):
self.parent_app = parent_app_ref
self.user_data_path = user_data_path
self.selected_profiles_list = [] # Will store a list of {'name': ..., 'data': ...}
self._default_checkbox_tooltip = (
"If checked, the settings fields will be unlocked and editable.\n"
"If unchecked, settings will still load, but in 'Read-Only' mode."
)
self._init_ui()
self._load_profiles()
@@ -56,9 +61,21 @@ class UpdateCheckDialog(QDialog):
self.list_widget = QListWidget()
# No selection mode, we only care about checkboxes
self.list_widget.setSelectionMode(QAbstractItemView.NoSelection)
# Connect signal to handle checkbox state changes
self.list_widget.itemChanged.connect(self._handle_item_changed)
layout.addWidget(self.list_widget)
# --- All Buttons in One Horizontal Layout ---
# Renamed text to reflect new behavior
self.edit_settings_checkbox = QCheckBox("Enable Editing (Unlock Settings)")
self.edit_settings_checkbox.setToolTip(self._default_checkbox_tooltip)
# Checked by default as requested
self.edit_settings_checkbox.setChecked(True)
layout.addWidget(self.edit_settings_checkbox)
# -------------------------------------
button_layout = QHBoxLayout()
button_layout.setSpacing(6) # small even spacing between all buttons
@@ -97,6 +114,8 @@ class UpdateCheckDialog(QDialog):
self.deselect_all_button.setText(self._tr("deselect_all_button_text", "Deselect All"))
self.check_button.setText(self._tr("update_check_dialog_check_button", "Check Selected"))
self.close_button.setText(self._tr("update_check_dialog_close_button", "Close"))
# Updated translation key and default text
self.edit_settings_checkbox.setText(self._tr("update_check_enable_editing_checkbox", "Enable Editing (Unlock Settings)"))
def _load_profiles(self):
"""Loads all .json files from the creator_profiles directory as checkable items."""
@@ -119,7 +138,6 @@ class UpdateCheckDialog(QDialog):
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
# Basic validation to ensure it's a valid profile
if 'creator_url' in data and 'processed_post_ids' in data:
creator_name = os.path.splitext(filename)[0]
profiles_found.append({'name': creator_name, 'data': data})
@@ -133,7 +151,6 @@ class UpdateCheckDialog(QDialog):
for profile_info in profiles_found:
item = QListWidgetItem(profile_info['name'])
item.setData(Qt.UserRole, profile_info)
# --- Make item checkable ---
item.setFlags(item.flags() | Qt.ItemIsUserCheckable)
item.setCheckState(Qt.Unchecked)
self.list_widget.addItem(item)
@@ -144,16 +161,42 @@ class UpdateCheckDialog(QDialog):
self.check_button.setEnabled(False)
self.select_all_button.setEnabled(False)
self.deselect_all_button.setEnabled(False)
self.edit_settings_checkbox.setEnabled(False)
def _toggle_all_checkboxes(self):
"""Handles Select All and Deselect All button clicks."""
sender = self.sender()
check_state = Qt.Checked if sender == self.select_all_button else Qt.Unchecked
self.list_widget.blockSignals(True)
for i in range(self.list_widget.count()):
item = self.list_widget.item(i)
if item.flags() & Qt.ItemIsUserCheckable:
item.setCheckState(check_state)
self.list_widget.blockSignals(False)
self._handle_item_changed(None)
def _handle_item_changed(self, item):
"""
Monitors how many items are checked.
If more than 1 item is checked, disable the 'Enable Editing' checkbox.
"""
checked_count = 0
for i in range(self.list_widget.count()):
if self.list_widget.item(i).checkState() == Qt.Checked:
checked_count += 1
if checked_count > 1:
self.edit_settings_checkbox.setChecked(False)
self.edit_settings_checkbox.setEnabled(False)
self.edit_settings_checkbox.setToolTip(
self._tr("update_check_multi_selection_warning",
"Editing settings is disabled when multiple profiles are selected.")
)
else:
self.edit_settings_checkbox.setEnabled(True)
self.edit_settings_checkbox.setToolTip(self._default_checkbox_tooltip)
def on_check_selected(self):
"""Handles the 'Check Selected' button click."""
@@ -176,4 +219,21 @@ class UpdateCheckDialog(QDialog):
def get_selected_profiles(self):
"""Returns the list of profile data selected by the user."""
return self.selected_profiles_list
return self.selected_profiles_list
def should_load_into_ui(self):
"""
Returns True if the settings SHOULD be loaded into the UI.
NEW LOGIC: Returns True if exactly ONE profile is selected.
It does NOT care about the checkbox state anymore, because we want
to load settings even if the user can't edit them.
"""
return len(self.selected_profiles_list) == 1
def should_enable_editing(self):
"""
NEW METHOD: Returns True if the user is allowed to edit the settings.
This is linked to the checkbox.
"""
return self.edit_settings_checkbox.isEnabled() and self.edit_settings_checkbox.isChecked()

File diff suppressed because it is too large Load Diff

View File

@@ -137,6 +137,12 @@ def extract_post_info(url_string):
stripped_url = url_string.strip()
# --- DeviantArt Check ---
if 'deviantart.com' in stripped_url.lower() or 'fav.me' in stripped_url.lower():
# This MUST return 'deviantart' as the first element
return 'deviantart', 'placeholder_user', 'placeholder_id' # ----------------------
# --- Rule34Video Check ---
rule34video_match = re.search(r'rule34video\.com/video/(\d+)', stripped_url)
if rule34video_match:

View File

@@ -307,14 +307,18 @@ def setup_ui(main_app):
simpcity_settings_label = QLabel("⚙️ SimpCity Download Options:")
simpcity_settings_layout.addWidget(simpcity_settings_label)
# Checkbox row
# Checkbox row
simpcity_checkboxes_layout = QHBoxLayout()
main_app.simpcity_dl_images_cb = QCheckBox("Download Images")
main_app.simpcity_dl_images_cb.setChecked(True) # Checked by default
main_app.simpcity_dl_pixeldrain_cb = QCheckBox("Download Pixeldrain")
main_app.simpcity_dl_saint2_cb = QCheckBox("Download Saint2.su")
main_app.simpcity_dl_mega_cb = QCheckBox("Download Mega")
main_app.simpcity_dl_bunkr_cb = QCheckBox("Download Bunkr")
main_app.simpcity_dl_gofile_cb = QCheckBox("Download Gofile")
simpcity_checkboxes_layout.addWidget(main_app.simpcity_dl_images_cb)
simpcity_checkboxes_layout.addWidget(main_app.simpcity_dl_pixeldrain_cb)
simpcity_checkboxes_layout.addWidget(main_app.simpcity_dl_saint2_cb)
simpcity_checkboxes_layout.addWidget(main_app.simpcity_dl_mega_cb)
@@ -324,7 +328,6 @@ def setup_ui(main_app):
simpcity_settings_layout.addLayout(simpcity_checkboxes_layout)
# --- START NEW CODE ---
# Create the second, dedicated set of cookie controls for SimpCity
simpcity_cookie_layout = QHBoxLayout()
simpcity_cookie_layout.setContentsMargins(0, 5, 0, 0) # Add some top margin
simpcity_cookie_label = QLabel("Cookie:")
@@ -344,7 +347,6 @@ def setup_ui(main_app):
left_layout.addLayout(checkboxes_group_layout)
# --- Action Buttons & Remaining UI ---
# ... (The rest of the setup_ui function remains unchanged)
main_app.standard_action_buttons_widget = QWidget()
btn_layout = QHBoxLayout(main_app.standard_action_buttons_widget)
btn_layout.setContentsMargins(0, 10, 0, 0)
@@ -354,6 +356,11 @@ def setup_ui(main_app):
font.setBold(True)
main_app.download_btn.setFont(font)
main_app.download_btn.clicked.connect(main_app.start_download)
main_app.add_queue_btn = QPushButton(" Add to Queue")
main_app.add_queue_btn.setToolTip("Save current settings as a job for later execution.")
main_app.add_queue_btn.clicked.connect(main_app.add_current_settings_to_queue)
main_app.pause_btn = QPushButton("⏸️ Pause Download")
main_app.pause_btn.setEnabled(False)
main_app.pause_btn.clicked.connect(main_app._handle_pause_resume_action)
@@ -364,6 +371,7 @@ def setup_ui(main_app):
main_app.error_btn.setToolTip("View files skipped due to errors and optionally retry them.")
main_app.error_btn.setEnabled(True)
btn_layout.addWidget(main_app.download_btn)
btn_layout.addWidget(main_app.add_queue_btn)
btn_layout.addWidget(main_app.pause_btn)
btn_layout.addWidget(main_app.cancel_btn)
btn_layout.addWidget(main_app.error_btn)

View File

@@ -20,7 +20,6 @@
│ ├── DejaVuSansCondensed-BoldOblique.ttf
│ ├── DejaVuSansCondensed-Oblique.ttf
│ └── DejaVuSansCondensed.ttf
├── directory_tree.txt
├── main.py
├── src/
│ ├── __init__.py