This commit is contained in:
Yuvi63771 2025-10-18 16:03:34 +05:30
parent 5d4e08f794
commit 9fe0c37127
25 changed files with 2502 additions and 2414 deletions

View File

@ -1,36 +1,36 @@
import requests
import re
from bs4 import BeautifulSoup
import cloudscraper
import time
import random
from urllib.parse import urlparse
def get_chapter_list(series_url, logger_func):
def get_chapter_list(scraper, series_url, logger_func):
"""
Checks if a URL is a series page and returns a list of all chapter URLs if it is.
Includes a retry mechanism for robust connection.
Relies on a passed-in scraper session for connection.
"""
logger_func(f" [AllComic] Checking for chapter list at: {series_url}")
scraper = cloudscraper.create_scraper()
headers = {'Referer': 'https://allporncomic.com/'}
response = None
max_retries = 8
for attempt in range(max_retries):
try:
response = scraper.get(series_url, timeout=30)
response = scraper.get(series_url, headers=headers, timeout=30)
response.raise_for_status()
logger_func(f" [AllComic] Successfully connected to series page on attempt {attempt + 1}.")
break # Success, exit the loop
break
except requests.RequestException as e:
logger_func(f" [AllComic] ⚠️ Series page check attempt {attempt + 1}/{max_retries} failed: {e}")
if attempt < max_retries - 1:
wait_time = 2 * (attempt + 1)
logger_func(f" Retrying in {wait_time} seconds...")
wait_time = (2 ** attempt) + random.uniform(0, 2)
logger_func(f" Retrying in {wait_time:.1f} seconds...")
time.sleep(wait_time)
else:
logger_func(f" [AllComic] ❌ All attempts to check series page failed.")
return [] # Return empty on final failure
return []
if not response:
return []
@ -44,7 +44,7 @@ def get_chapter_list(series_url, logger_func):
return []
chapter_urls = [link['href'] for link in chapter_links]
chapter_urls.reverse() # Reverse for oldest-to-newest reading order
chapter_urls.reverse()
logger_func(f" [AllComic] ✅ Found {len(chapter_urls)} chapters.")
return chapter_urls
@ -53,15 +53,13 @@ def get_chapter_list(series_url, logger_func):
logger_func(f" [AllComic] ❌ Error parsing chapters after successful connection: {e}")
return []
def fetch_chapter_data(chapter_url, logger_func):
def fetch_chapter_data(scraper, chapter_url, logger_func):
"""
Fetches the comic title, chapter title, and image URLs for a single chapter page.
Relies on a passed-in scraper session for connection.
"""
logger_func(f" [AllComic] Fetching page: {chapter_url}")
scraper = cloudscraper.create_scraper(
browser={'browser': 'firefox', 'platform': 'windows', 'desktop': True}
)
headers = {'Referer': 'https://allporncomic.com/'}
response = None
@ -72,16 +70,23 @@ def fetch_chapter_data(chapter_url, logger_func):
response.raise_for_status()
break
except requests.RequestException as e:
logger_func(f" [AllComic] ⚠️ Chapter page connection attempt {attempt + 1}/{max_retries} failed: {e}")
if attempt < max_retries - 1:
time.sleep(2 * (attempt + 1))
wait_time = (2 ** attempt) + random.uniform(0, 2)
logger_func(f" Retrying in {wait_time:.1f} seconds...")
time.sleep(wait_time)
else:
logger_func(f" [AllComic] ❌ All connection attempts failed for chapter: {chapter_url}")
return None, None, None
if not response:
return None, None, None
try:
soup = BeautifulSoup(response.text, 'html.parser')
comic_title = "Unknown Comic"
title_element = soup.find('h1', class_='post-title')
comic_title = None
if title_element:
comic_title = title_element.text.strip()
else:
@ -91,7 +96,7 @@ def fetch_chapter_data(chapter_url, logger_func):
comic_slug = path_parts[-2]
comic_title = comic_slug.replace('-', ' ').title()
except Exception:
comic_title = "Unknown Comic"
pass
chapter_slug = chapter_url.strip('/').split('/')[-1]
chapter_title = chapter_slug.replace('-', ' ').title()
@ -105,8 +110,8 @@ def fetch_chapter_data(chapter_url, logger_func):
if img_url:
list_of_image_urls.append(img_url)
if not comic_title or comic_title == "Unknown Comic" or not list_of_image_urls:
logger_func(f" [AllComic] ❌ Could not find a valid title or images on the page. Title found: '{comic_title}'")
if not list_of_image_urls:
logger_func(f" [AllComic] ❌ Could not find any images on the page.")
return None, None, None
return comic_title, chapter_title, list_of_image_urls

View File

@ -1,4 +1,3 @@
# src/core/booru_client.py
import os
import re

View File

@ -164,17 +164,34 @@ class BunkrAlbumExtractor(Extractor):
def _extract_file(self, webpage_url):
page = self.request(webpage_url).text
data_id = extr(page, 'data-file-id="', '"')
referer = self.root_dl + "/file/" + data_id
headers = {"Referer": referer, "Origin": self.root_dl}
# This referer is for the API call only
api_referer = self.root_dl + "/file/" + data_id
headers = {"Referer": api_referer, "Origin": self.root_dl}
data = self.request_json(self.endpoint, method="POST", headers=headers, json={"id": data_id})
# Get the raw file URL (no domain replacement)
file_url = decrypt_xor(data["url"], f"SECRET_KEY_{data['timestamp'] // 3600}".encode()) if data.get("encrypted") else data["url"]
file_name = extr(page, "<h1", "<").rpartition(">")[2]
# --- NEW FIX ---
# The download thread uses a new `requests` call, so we must
# explicitly pass BOTH the User-Agent and the correct Referer.
# 1. Get the User-Agent from this extractor's session
user_agent = self.session.headers.get("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0")
# 2. Use the original album URL as the Referer
download_referer = self.url
return {
"url": file_url,
"name": unescape(file_name),
"_http_headers": {"Referer": referer}
"_http_headers": {
"Referer": download_referer,
"User-Agent": user_agent
}
}
class BunkrMediaExtractor(BunkrAlbumExtractor):

View File

@ -0,0 +1,107 @@
import cloudscraper
from bs4 import BeautifulSoup
import re
import html
def fetch_rule34video_data(video_url, logger_func):
"""
Scrapes a rule34video.com page by specifically finding the 'Download' div,
then selecting the best available quality link.
Args:
video_url (str): The full URL to the rule34video.com page.
logger_func (callable): Function to use for logging progress.
Returns:
tuple: (video_title, final_video_url) or (None, None) on failure.
"""
logger_func(f" [Rule34Video] Fetching page: {video_url}")
scraper = cloudscraper.create_scraper()
try:
main_page_response = scraper.get(video_url, timeout=20)
main_page_response.raise_for_status()
soup = BeautifulSoup(main_page_response.text, 'html.parser')
page_title_tag = soup.find('title')
video_title = page_title_tag.text.strip() if page_title_tag else "rule34video_file"
# --- START OF FINAL FIX ---
# 1. Find the SPECIFIC "Download" label first. This is the key.
download_label = soup.find('div', class_='label', string='Download')
if not download_label:
logger_func(" [Rule34Video] ❌ Could not find the 'Download' label. Unable to locate the correct links div.")
return None, None
# 2. The correct container is the parent of this label.
download_div = download_label.parent
# 3. Now, find the links ONLY within this correct container.
link_tags = download_div.find_all('a', class_='tag_item')
if not link_tags:
logger_func(" [Rule34Video] ❌ Found the 'Download' div, but no download links were inside it.")
return None, None
# --- END OF FINAL FIX ---
links_by_quality = {}
quality_pattern = re.compile(r'(\d+p|4k)')
for tag in link_tags:
href = tag.get('href')
if not href:
continue
quality = None
text_match = quality_pattern.search(tag.text)
if text_match:
quality = text_match.group(1)
else:
href_match = quality_pattern.search(href)
if href_match:
quality = href_match.group(1)
if quality:
links_by_quality[quality] = href
if not links_by_quality:
logger_func(" [Rule34Video] ⚠️ Could not parse specific qualities. Using first available link as a fallback.")
final_video_url = link_tags[0].get('href')
if not final_video_url:
logger_func(" [Rule34Video] ❌ Fallback failed: First link tag had no href attribute.")
return None, None
final_video_url = html.unescape(final_video_url)
logger_func(f" [Rule34Video] ✅ Selected first available link as fallback: {final_video_url}")
return video_title, final_video_url
logger_func(f" [Rule34Video] Found available qualities: {list(links_by_quality.keys())}")
final_video_url = None
if '1080p' in links_by_quality:
final_video_url = links_by_quality['1080p']
logger_func(" [Rule34Video] ✅ Selected preferred 1080p link.")
elif '720p' in links_by_quality:
final_video_url = links_by_quality['720p']
logger_func(" [Rule34Video] ✅ 1080p not found. Selected fallback 720p link.")
else:
fallback_order = ['480p', '360p']
for quality in fallback_order:
if quality in links_by_quality:
final_video_url = links_by_quality[quality]
logger_func(f" [Rule34Video] ⚠️ 1080p/720p not found. Selected best available fallback: {quality}")
break
if not final_video_url:
logger_func(" [Rule34Video] ❌ Could not find a suitable download link.")
return None, None
final_video_url = html.unescape(final_video_url)
logger_func(f" [Rule34Video] ✅ Selected direct download URL: {final_video_url}")
return video_title, final_video_url
except Exception as e:
logger_func(f" [Rule34Video] ❌ An error occurred: {e}")
return None, None

View File

@ -17,8 +17,10 @@ def fetch_single_simpcity_page(url, logger_func, cookies=None, post_id=None):
try:
response = scraper.get(url, timeout=30, headers=headers, cookies=cookies)
final_url = response.url # Capture the final URL after any redirects
if response.status_code == 404:
return None, []
return None, [], final_url
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
@ -91,9 +93,9 @@ def fetch_single_simpcity_page(url, logger_func, cookies=None, post_id=None):
# We use a set to remove duplicate URLs that might be found in multiple ways
unique_jobs = list({job['url']: job for job in jobs_on_page}.values())
logger_func(f" [SimpCity] Scraper found jobs: {[job['type'] for job in unique_jobs]}")
return album_title, unique_jobs
return album_title, unique_jobs, final_url
return album_title, []
return album_title, [], final_url
except Exception as e:
logger_func(f" [SimpCity] ❌ Error fetching page {url}: {e}")

View File

@ -0,0 +1,137 @@
import os
import threading
import time
from urllib.parse import urlparse
import cloudscraper
import requests
from PyQt5.QtCore import QThread, pyqtSignal
from ...core.allcomic_client import (fetch_chapter_data as allcomic_fetch_data,
get_chapter_list as allcomic_get_list)
from ...utils.file_utils import clean_folder_name
class AllcomicDownloadThread(QThread):
"""A dedicated QThread for handling allcomic.com downloads."""
progress_signal = pyqtSignal(str)
file_progress_signal = pyqtSignal(str, object)
finished_signal = pyqtSignal(int, int, bool)
overall_progress_signal = pyqtSignal(int, int)
def __init__(self, url, output_dir, parent=None):
super().__init__(parent)
self.comic_url = url
self.output_dir = output_dir
self.is_cancelled = False
self.pause_event = parent.pause_event if hasattr(parent, 'pause_event') else threading.Event()
def _check_pause(self):
if self.is_cancelled: return True
if self.pause_event and self.pause_event.is_set():
self.progress_signal.emit(" Download paused...")
while self.pause_event.is_set():
if self.is_cancelled: return True
time.sleep(0.5)
self.progress_signal.emit(" Download resumed.")
return self.is_cancelled
def run(self):
grand_total_dl = 0
grand_total_skip = 0
# Create the scraper session ONCE for the entire job
scraper = cloudscraper.create_scraper(
browser={'browser': 'firefox', 'platform': 'windows', 'desktop': True}
)
# Pass the scraper to the function
chapters_to_download = allcomic_get_list(scraper, self.comic_url, self.progress_signal.emit)
if not chapters_to_download:
chapters_to_download = [self.comic_url]
self.progress_signal.emit(f"--- Starting download of {len(chapters_to_download)} chapter(s) ---")
for chapter_idx, chapter_url in enumerate(chapters_to_download):
if self._check_pause(): break
self.progress_signal.emit(f"\n-- Processing Chapter {chapter_idx + 1}/{len(chapters_to_download)} --")
# Pass the scraper to the function
comic_title, chapter_title, image_urls = allcomic_fetch_data(scraper, chapter_url, self.progress_signal.emit)
if not image_urls:
self.progress_signal.emit(f"❌ Failed to get data for chapter. Skipping.")
continue
series_folder_name = clean_folder_name(comic_title)
chapter_folder_name = clean_folder_name(chapter_title)
final_save_path = os.path.join(self.output_dir, series_folder_name, chapter_folder_name)
try:
os.makedirs(final_save_path, exist_ok=True)
self.progress_signal.emit(f" Saving to folder: '{os.path.join(series_folder_name, chapter_folder_name)}'")
except OSError as e:
self.progress_signal.emit(f"❌ Critical error creating directory: {e}")
grand_total_skip += len(image_urls)
continue
total_files_in_chapter = len(image_urls)
self.overall_progress_signal.emit(total_files_in_chapter, 0)
headers = {'Referer': chapter_url}
for i, img_url in enumerate(image_urls):
if self._check_pause(): break
file_extension = os.path.splitext(urlparse(img_url).path)[1] or '.jpg'
filename = f"{i+1:03d}{file_extension}"
filepath = os.path.join(final_save_path, filename)
if os.path.exists(filepath):
self.progress_signal.emit(f" -> Skip ({i+1}/{total_files_in_chapter}): '{filename}' already exists.")
grand_total_skip += 1
else:
download_successful = False
max_retries = 8
for attempt in range(max_retries):
if self._check_pause(): break
try:
self.progress_signal.emit(f" Downloading ({i+1}/{total_files_in_chapter}): '{filename}' (Attempt {attempt + 1})...")
# Use the persistent scraper object
response = scraper.get(img_url, stream=True, headers=headers, timeout=60)
response.raise_for_status()
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if self._check_pause(): break
f.write(chunk)
if self._check_pause():
if os.path.exists(filepath): os.remove(filepath)
break
download_successful = True
grand_total_dl += 1
break
except requests.RequestException as e:
self.progress_signal.emit(f" ⚠️ Attempt {attempt + 1} failed for '{filename}': {e}")
if attempt < max_retries - 1:
wait_time = 2 * (attempt + 1)
self.progress_signal.emit(f" Retrying in {wait_time} seconds...")
time.sleep(wait_time)
else:
self.progress_signal.emit(f" ❌ All attempts failed for '{filename}'. Skipping.")
grand_total_skip += 1
self.overall_progress_signal.emit(total_files_in_chapter, i + 1)
time.sleep(0.5) # Increased delay between images for this site
if self._check_pause(): break
self.file_progress_signal.emit("", None)
self.finished_signal.emit(grand_total_dl, grand_total_skip, self.is_cancelled)
def cancel(self):
self.is_cancelled = True
self.progress_signal.emit(" Cancellation signal received by AllComic thread.")

View File

@ -0,0 +1,133 @@
import os
import threading
import time
import datetime
import requests
from PyQt5.QtCore import QThread, pyqtSignal
from ...core.booru_client import fetch_booru_data, BooruClientException
from ...utils.file_utils import clean_folder_name
_ff_ver = (datetime.date.today().toordinal() - 735506) // 28
USERAGENT_FIREFOX = (f"Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
f"rv:{_ff_ver}.0) Gecko/20100101 Firefox/{_ff_ver}.0")
class BooruDownloadThread(QThread):
"""A dedicated QThread for handling Danbooru and Gelbooru downloads."""
progress_signal = pyqtSignal(str)
overall_progress_signal = pyqtSignal(int, int)
finished_signal = pyqtSignal(int, int, bool) # dl_count, skip_count, cancelled
def __init__(self, url, output_dir, api_key, user_id, parent=None):
super().__init__(parent)
self.booru_url = url
self.output_dir = output_dir
self.api_key = api_key
self.user_id = user_id
self.is_cancelled = False
self.pause_event = parent.pause_event if hasattr(parent, 'pause_event') else threading.Event()
def run(self):
download_count = 0
skip_count = 0
processed_count = 0
cumulative_total = 0
def logger(msg):
self.progress_signal.emit(str(msg))
try:
self.progress_signal.emit("=" * 40)
self.progress_signal.emit(f"🚀 Starting Booru Download for: {self.booru_url}")
item_generator = fetch_booru_data(self.booru_url, self.api_key, self.user_id, logger)
download_path = self.output_dir # Default path
path_initialized = False
session = requests.Session()
session.headers["User-Agent"] = USERAGENT_FIREFOX
for item in item_generator:
if self.is_cancelled:
break
if isinstance(item, tuple) and item[0] == 'PAGE_UPDATE':
newly_found = item[1]
cumulative_total += newly_found
self.progress_signal.emit(f" Found {newly_found} more posts. Total so far: {cumulative_total}")
self.overall_progress_signal.emit(cumulative_total, processed_count)
continue
post_data = item
processed_count += 1
if not path_initialized:
base_folder_name = post_data.get('search_tags', 'booru_download')
download_path = os.path.join(self.output_dir, clean_folder_name(base_folder_name))
os.makedirs(download_path, exist_ok=True)
path_initialized = True
if self.pause_event.is_set():
self.progress_signal.emit(" Download paused...")
while self.pause_event.is_set():
if self.is_cancelled: break
time.sleep(0.5)
if self.is_cancelled: break
self.progress_signal.emit(" Download resumed.")
file_url = post_data.get('file_url')
if not file_url:
skip_count += 1
self.progress_signal.emit(f" -> Skip ({processed_count}/{cumulative_total}): Post ID {post_data.get('id')} has no file URL.")
continue
cat = post_data.get('category', 'booru')
post_id = post_data.get('id', 'unknown')
md5 = post_data.get('md5', '')
fname = post_data.get('filename', f"file_{post_id}")
ext = post_data.get('extension', 'jpg')
final_filename = f"{cat}_{post_id}_{md5 or fname}.{ext}"
filepath = os.path.join(download_path, final_filename)
if os.path.exists(filepath):
self.progress_signal.emit(f" -> Skip ({processed_count}/{cumulative_total}): '{final_filename}' already exists.")
skip_count += 1
else:
try:
self.progress_signal.emit(f" Downloading ({processed_count}/{cumulative_total}): '{final_filename}'...")
response = session.get(file_url, stream=True, timeout=60)
response.raise_for_status()
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if self.is_cancelled: break
f.write(chunk)
if not self.is_cancelled:
download_count += 1
else:
if os.path.exists(filepath): os.remove(filepath)
skip_count += 1
except Exception as e:
self.progress_signal.emit(f" ❌ Failed to download '{final_filename}': {e}")
skip_count += 1
self.overall_progress_signal.emit(cumulative_total, processed_count)
time.sleep(0.2)
if not path_initialized:
self.progress_signal.emit("No posts found for the given URL/tags.")
except BooruClientException as e:
self.progress_signal.emit(f"❌ A Booru client error occurred: {e}")
except Exception as e:
self.progress_signal.emit(f"❌ An unexpected error occurred in Booru thread: {e}")
finally:
self.finished_signal.emit(download_count, skip_count, self.is_cancelled)
def cancel(self):
self.is_cancelled = True
self.progress_signal.emit(" Cancellation signal received by Booru thread.")

View File

@ -0,0 +1,195 @@
import os
import re
import time
import requests
import threading
from concurrent.futures import ThreadPoolExecutor
from PyQt5.QtCore import QThread, pyqtSignal
from ...core.bunkr_client import fetch_bunkr_data
# Define image extensions
IMG_EXTS = ('.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.avif')
BUNKR_IMG_THREADS = 6 # Hardcoded thread count for images
class BunkrDownloadThread(QThread):
"""A dedicated QThread for handling Bunkr downloads."""
progress_signal = pyqtSignal(str)
file_progress_signal = pyqtSignal(str, object)
finished_signal = pyqtSignal(int, int, bool, list)
def __init__(self, url, output_dir, parent=None):
super().__init__(parent)
self.bunkr_url = url
self.output_dir = output_dir
self.is_cancelled = False
# --- NEW: Threading members ---
self.lock = threading.Lock()
self.download_count = 0
self.skip_count = 0
self.file_index = 0 # Use a shared index for logging
class ThreadLogger:
def __init__(self, signal_emitter):
self.signal_emitter = signal_emitter
def info(self, msg, *args, **kwargs):
self.signal_emitter.emit(str(msg))
def error(self, msg, *args, **kwargs):
self.signal_emitter.emit(f"❌ ERROR: {msg}")
def warning(self, msg, *args, **kwargs):
self.signal_emitter.emit(f"⚠️ WARNING: {msg}")
def debug(self, msg, *args, **kwargs):
pass
self.logger = ThreadLogger(self.progress_signal)
def _download_file(self, file_data, total_files, album_path, is_image_task=False):
"""
A thread-safe method to download a single file.
This function will be called by the main thread (for videos)
and worker threads (for images).
"""
# Stop if a cancellation signal was received before starting
if self.is_cancelled:
return
# --- Thread-safe index for logging ---
with self.lock:
self.file_index += 1
current_file_num = self.file_index
try:
filename = file_data.get('name', 'untitled_file')
file_url = file_data.get('url')
headers = file_data.get('_http_headers')
filename = re.sub(r'[<>:"/\\|?*]', '_', filename).strip()
filepath = os.path.join(album_path, filename)
if os.path.exists(filepath):
self.progress_signal.emit(f" -> Skip ({current_file_num}/{total_files}): '{filename}' already exists.")
with self.lock:
self.skip_count += 1
return
self.progress_signal.emit(f" Downloading ({current_file_num}/{total_files}): '{filename}'...")
response = requests.get(file_url, stream=True, headers=headers, timeout=60)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
downloaded_size = 0
last_update_time = time.time()
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if self.is_cancelled:
break
if chunk:
f.write(chunk)
downloaded_size += len(chunk)
# For videos/other files, send frequent progress
# For images, don't send progress to avoid UI flicker
if not is_image_task:
current_time = time.time()
if total_size > 0 and (current_time - last_update_time) > 0.5:
self.file_progress_signal.emit(filename, (downloaded_size, total_size))
last_update_time = current_time
if self.is_cancelled:
self.progress_signal.emit(f" Download cancelled for '{filename}'.")
if os.path.exists(filepath): os.remove(filepath)
return
if total_size > 0:
self.file_progress_signal.emit(filename, (total_size, total_size))
with self.lock:
self.download_count += 1
except requests.exceptions.RequestException as e:
self.progress_signal.emit(f" ❌ Failed to download '{filename}'. Error: {e}")
if os.path.exists(filepath): os.remove(filepath)
with self.lock:
self.skip_count += 1
except Exception as e:
self.progress_signal.emit(f" ❌ An unexpected error occurred with '{filename}': {e}")
if os.path.exists(filepath): os.remove(filepath)
with self.lock:
self.skip_count += 1
def run(self):
self.progress_signal.emit("=" * 40)
self.progress_signal.emit(f"🚀 Starting Bunkr Download for: {self.bunkr_url}")
album_name, files_to_download = fetch_bunkr_data(self.bunkr_url, self.logger)
if not files_to_download:
self.progress_signal.emit("❌ Failed to extract file information from Bunkr. Aborting.")
self.finished_signal.emit(0, 0, self.is_cancelled, [])
return
album_path = os.path.join(self.output_dir, album_name)
try:
os.makedirs(album_path, exist_ok=True)
self.progress_signal.emit(f" Saving to folder: '{album_path}'")
except OSError as e:
self.progress_signal.emit(f"❌ Critical error creating directory: {e}")
self.finished_signal.emit(0, len(files_to_download), self.is_cancelled, [])
return
total_files = len(files_to_download)
# --- NEW: Separate files into images and others ---
image_files = []
other_files = []
for f in files_to_download:
name = f.get('name', '').lower()
if name.endswith(IMG_EXTS):
image_files.append(f)
else:
other_files.append(f)
self.progress_signal.emit(f" Found {len(image_files)} images and {len(other_files)} other files.")
# --- 1. Process videos and other files sequentially (one by one) ---
if other_files:
self.progress_signal.emit(f" Downloading {len(other_files)} videos/other files sequentially...")
for file_data in other_files:
if self.is_cancelled:
break
# Call the new download helper method
self._download_file(file_data, total_files, album_path, is_image_task=False)
# --- 2. Process images concurrently using a fixed 6-thread pool ---
if image_files and not self.is_cancelled:
self.progress_signal.emit(f" Downloading {len(image_files)} images concurrently ({BUNKR_IMG_THREADS} threads)...")
with ThreadPoolExecutor(max_workers=BUNKR_IMG_THREADS, thread_name_prefix='BunkrImg') as executor:
# Submit all image download tasks
futures = {executor.submit(self._download_file, file_data, total_files, album_path, is_image_task=True): file_data for file_data in image_files}
try:
# Wait for tasks to complete, but check for cancellation
for future in futures:
if self.is_cancelled:
future.cancel() # Try to cancel running/pending tasks
else:
future.result() # Wait for the task to finish (or raise exception)
except Exception as e:
self.progress_signal.emit(f" ❌ A thread pool error occurred: {e}")
if self.is_cancelled:
self.progress_signal.emit(" Download cancelled by user.")
# Update skip count to reflect all non-downloaded files
self.skip_count = total_files - self.download_count
self.file_progress_signal.emit("", None) # Clear file progress
self.finished_signal.emit(self.download_count, self.skip_count, self.is_cancelled, [])
def cancel(self):
self.is_cancelled = True
self.progress_signal.emit(" Cancellation signal received by Bunkr thread.")

View File

@ -0,0 +1,189 @@
import os
import time
import datetime
import requests
from PyQt5.QtCore import QThread, pyqtSignal
# Assuming discord_pdf_generator is in the dialogs folder, sibling to the classes folder
from ..dialogs.discord_pdf_generator import create_pdf_from_discord_messages
# This constant is needed for the thread to function independently
_ff_ver = (datetime.date.today().toordinal() - 735506) // 28
USERAGENT_FIREFOX = (f"Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
f"rv:{_ff_ver}.0) Gecko/20100101 Firefox/{_ff_ver}.0")
class DiscordDownloadThread(QThread):
"""A dedicated QThread for handling all official Discord downloads."""
progress_signal = pyqtSignal(str)
progress_label_signal = pyqtSignal(str)
finished_signal = pyqtSignal(int, int, bool, list)
def __init__(self, mode, session, token, output_dir, server_id, channel_id, url, app_base_dir, limit=None, parent=None):
super().__init__(parent)
self.mode = mode
self.session = session
self.token = token
self.output_dir = output_dir
self.server_id = server_id
self.channel_id = channel_id
self.api_url = url
self.message_limit = limit
self.app_base_dir = app_base_dir # Path to app's base directory
self.is_cancelled = False
self.is_paused = False
def run(self):
if self.mode == 'pdf':
self._run_pdf_creation()
else:
self._run_file_download()
def cancel(self):
self.progress_signal.emit(" Cancellation signal received by Discord thread.")
self.is_cancelled = True
def pause(self):
self.progress_signal.emit(" Pausing Discord download...")
self.is_paused = True
def resume(self):
self.progress_signal.emit(" Resuming Discord download...")
self.is_paused = False
def _check_events(self):
if self.is_cancelled:
return True
while self.is_paused:
time.sleep(0.5)
if self.is_cancelled:
return True
return False
def _fetch_all_messages(self):
all_messages = []
last_message_id = None
headers = {'Authorization': self.token, 'User-Agent': USERAGENT_FIREFOX}
while True:
if self._check_events(): break
endpoint = f"/channels/{self.channel_id}/messages?limit=100"
if last_message_id:
endpoint += f"&before={last_message_id}"
try:
resp = self.session.get(f"https://discord.com/api/v10{endpoint}", headers=headers, timeout=30)
resp.raise_for_status()
message_batch = resp.json()
except Exception as e:
self.progress_signal.emit(f" ❌ Error fetching message batch: {e}")
break
if not message_batch:
break
all_messages.extend(message_batch)
if self.message_limit and len(all_messages) >= self.message_limit:
self.progress_signal.emit(f" Reached message limit of {self.message_limit}. Halting fetch.")
all_messages = all_messages[:self.message_limit]
break
last_message_id = message_batch[-1]['id']
self.progress_label_signal.emit(f"Fetched {len(all_messages)} messages...")
time.sleep(1) # API Rate Limiting
return all_messages
def _run_pdf_creation(self):
self.progress_signal.emit("=" * 40)
self.progress_signal.emit(f"🚀 Starting Discord PDF export for: {self.api_url}")
self.progress_label_signal.emit("Fetching messages...")
all_messages = self._fetch_all_messages()
if self.is_cancelled:
self.finished_signal.emit(0, 0, True, [])
return
self.progress_label_signal.emit(f"Collected {len(all_messages)} total messages. Generating PDF...")
all_messages.reverse()
font_path = os.path.join(self.app_base_dir, 'data', 'dejavu-sans', 'DejaVuSans.ttf')
output_filepath = os.path.join(self.output_dir, f"discord_{self.server_id}_{self.channel_id or 'server'}.pdf")
success = create_pdf_from_discord_messages(
all_messages, self.server_id, self.channel_id,
output_filepath, font_path, logger=self.progress_signal.emit,
cancellation_event=self, pause_event=self
)
if success:
self.progress_label_signal.emit(f"✅ PDF export complete!")
elif not self.is_cancelled:
self.progress_label_signal.emit(f"❌ PDF export failed. Check log for details.")
self.finished_signal.emit(0, len(all_messages), self.is_cancelled, [])
def _run_file_download(self):
download_count = 0
skip_count = 0
try:
self.progress_signal.emit("=" * 40)
self.progress_signal.emit(f"🚀 Starting Discord download for channel: {self.channel_id}")
self.progress_label_signal.emit("Fetching messages...")
all_messages = self._fetch_all_messages()
if self.is_cancelled:
self.finished_signal.emit(0, 0, True, [])
return
self.progress_label_signal.emit(f"Collected {len(all_messages)} messages. Starting downloads...")
total_attachments = sum(len(m.get('attachments', [])) for m in all_messages)
for message in reversed(all_messages):
if self._check_events(): break
for attachment in message.get('attachments', []):
if self._check_events(): break
file_url = attachment['url']
original_filename = attachment['filename']
filepath = os.path.join(self.output_dir, original_filename)
filename_to_use = original_filename
counter = 1
base_name, extension = os.path.splitext(original_filename)
while os.path.exists(filepath):
filename_to_use = f"{base_name} ({counter}){extension}"
filepath = os.path.join(self.output_dir, filename_to_use)
counter += 1
if filename_to_use != original_filename:
self.progress_signal.emit(f" -> Duplicate name '{original_filename}'. Saving as '{filename_to_use}'.")
try:
self.progress_signal.emit(f" Downloading ({download_count+1}/{total_attachments}): '{filename_to_use}'...")
response = requests.get(file_url, stream=True, timeout=60)
response.raise_for_status()
download_cancelled_mid_file = False
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if self._check_events():
download_cancelled_mid_file = True
break
f.write(chunk)
if download_cancelled_mid_file:
self.progress_signal.emit(f" Download cancelled for '{filename_to_use}'. Deleting partial file.")
if os.path.exists(filepath):
os.remove(filepath)
continue
download_count += 1
except Exception as e:
self.progress_signal.emit(f" ❌ Failed to download '{filename_to_use}': {e}")
skip_count += 1
finally:
self.finished_signal.emit(download_count, skip_count, self.is_cancelled, [])

View File

@ -0,0 +1,133 @@
import re
import requests
from urllib.parse import urlparse
from ...utils.network_utils import prepare_cookies_for_request
from ...utils.file_utils import clean_folder_name
from .allcomic_downloader_thread import AllcomicDownloadThread
from .booru_downloader_thread import BooruDownloadThread
from .bunkr_downloader_thread import BunkrDownloadThread
from .discord_downloader_thread import DiscordDownloadThread
from .drive_downloader_thread import DriveDownloadThread
from .erome_downloader_thread import EromeDownloadThread
from .external_link_downloader_thread import ExternalLinkDownloadThread
from .fap_nation_downloader_thread import FapNationDownloadThread
from .hentai2read_downloader_thread import Hentai2readDownloadThread
from .mangadex_downloader_thread import MangaDexDownloadThread
from .nhentai_downloader_thread import NhentaiDownloadThread
from .pixeldrain_downloader_thread import PixeldrainDownloadThread
from .saint2_downloader_thread import Saint2DownloadThread
from .simp_city_downloader_thread import SimpCityDownloadThread
from .toonily_downloader_thread import ToonilyDownloadThread
from .rule34video_downloader_thread import Rule34VideoDownloadThread
def create_downloader_thread(main_app, api_url, service, id1, id2, effective_output_dir_for_run):
"""
Factory function to create and configure the correct QThread for a given URL.
Returns a configured QThread instance or None if no special handler is found.
"""
# Handler for Booru sites (Danbooru, Gelbooru)
if service in ['danbooru', 'gelbooru']:
api_key = main_app.api_key_input.text().strip()
user_id = main_app.user_id_input.text().strip()
return BooruDownloadThread(
url=api_url, output_dir=effective_output_dir_for_run,
api_key=api_key, user_id=user_id, parent=main_app
)
# Handler for cloud storage sites (Mega, GDrive, etc.)
platform = None
if 'mega.nz' in api_url or 'mega.io' in api_url: platform = 'mega'
elif 'drive.google.com' in api_url: platform = 'gdrive'
elif 'dropbox.com' in api_url: platform = 'dropbox'
elif 'gofile.io' in api_url: platform = 'gofile'
if platform:
use_post_subfolder = main_app.use_subfolder_per_post_checkbox.isChecked()
return DriveDownloadThread(
api_url, effective_output_dir_for_run, platform, use_post_subfolder,
main_app.cancellation_event, main_app.pause_event, main_app.log_signal.emit
)
# Handler for Erome
if 'erome.com' in api_url:
return EromeDownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for MangaDex
if 'mangadex.org' in api_url:
return MangaDexDownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for Saint2
is_saint2_url = 'saint2.su' in api_url or 'saint2.pk' in api_url
if is_saint2_url and api_url.strip().lower() != 'saint2.su': # Exclude batch mode trigger
return Saint2DownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for SimpCity
if service == 'simpcity':
cookies = prepare_cookies_for_request(
use_cookie_flag=True, cookie_text_input=main_app.cookie_text_input.text(),
selected_cookie_file_path=main_app.selected_cookie_filepath,
app_base_dir=main_app.app_base_dir, logger_func=main_app.log_signal.emit,
target_domain='simpcity.cr'
)
if not cookies:
# The main app will handle the error dialog
return "COOKIE_ERROR"
return SimpCityDownloadThread(api_url, id2, effective_output_dir_for_run, cookies, main_app)
if service == 'rule34video':
main_app.log_signal.emit(" Rule34Video.com URL detected. Starting dedicated downloader.")
# id1 contains the video_id from extract_post_info
return Rule34VideoDownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for official Discord URLs
if 'discord.com' in api_url and service == 'discord':
token = main_app.remove_from_filename_input.text().strip()
limit_text = main_app.discord_message_limit_input.text().strip()
message_limit = int(limit_text) if limit_text else None
mode = 'pdf' if main_app.discord_download_scope == 'messages' else 'files'
return DiscordDownloadThread(
mode=mode, session=requests.Session(), token=token, output_dir=effective_output_dir_for_run,
server_id=id1, channel_id=id2, url=api_url, app_base_dir=main_app.app_base_dir,
limit=message_limit, parent=main_app
)
# Handler for Allcomic/Allporncomic
if 'allcomic.com' in api_url or 'allporncomic.com' in api_url:
return AllcomicDownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for Hentai2Read
if 'hentai2read.com' in api_url:
return Hentai2readDownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for Fap-Nation
if 'fap-nation.com' in api_url or 'fap-nation.org' in api_url:
use_post_subfolder = main_app.use_subfolder_per_post_checkbox.isChecked()
return FapNationDownloadThread(
api_url, effective_output_dir_for_run, use_post_subfolder,
main_app.pause_event, main_app.cancellation_event, main_app.actual_gui_signals, main_app
)
# Handler for Pixeldrain
if 'pixeldrain.com' in api_url:
return PixeldrainDownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for nHentai
if service == 'nhentai':
from ...core.nhentai_client import fetch_nhentai_gallery
gallery_data = fetch_nhentai_gallery(id1, main_app.log_signal.emit)
if not gallery_data:
return "FETCH_ERROR" # Sentinel value for fetch failure
return NhentaiDownloadThread(gallery_data, effective_output_dir_for_run, main_app)
# Handler for Toonily
if 'toonily.com' in api_url:
return ToonilyDownloadThread(api_url, effective_output_dir_for_run, main_app)
# Handler for Bunkr
if service == 'bunkr':
return BunkrDownloadThread(id1, effective_output_dir_for_run, main_app)
# If no special handler matched, return None
return None

View File

@ -0,0 +1,77 @@
from PyQt5.QtCore import QThread, pyqtSignal
from ...services.drive_downloader import (
download_dropbox_file,
download_gdrive_file,
download_gofile_folder,
download_mega_file as drive_download_mega_file,
)
class DriveDownloadThread(QThread):
"""A dedicated QThread for handling direct Mega, GDrive, and Dropbox links."""
file_progress_signal = pyqtSignal(str, object)
finished_signal = pyqtSignal(int, int, bool, list)
overall_progress_signal = pyqtSignal(int, int)
def __init__(self, url, output_dir, platform, use_post_subfolder, cancellation_event, pause_event, logger_func, parent=None):
super().__init__(parent)
self.drive_url = url
self.output_dir = output_dir
self.platform = platform
self.use_post_subfolder = use_post_subfolder
self.is_cancelled = False
self.cancellation_event = cancellation_event
self.pause_event = pause_event
self.logger_func = logger_func
def run(self):
self.logger_func("=" * 40)
self.logger_func(f"🚀 Starting direct {self.platform.capitalize()} Download for: {self.drive_url}")
try:
if self.platform == 'mega':
drive_download_mega_file(
self.drive_url, self.output_dir,
logger_func=self.logger_func,
progress_callback_func=self.file_progress_signal.emit,
overall_progress_callback=self.overall_progress_signal.emit,
cancellation_event=self.cancellation_event,
pause_event=self.pause_event
)
elif self.platform == 'gdrive':
download_gdrive_file(
self.drive_url, self.output_dir,
logger_func=self.logger_func,
progress_callback_func=self.file_progress_signal.emit,
overall_progress_callback=self.overall_progress_signal.emit,
use_post_subfolder=self.use_post_subfolder,
post_title="Google Drive Download"
)
elif self.platform == 'dropbox':
download_dropbox_file(
self.drive_url, self.output_dir,
logger_func=self.logger_func,
progress_callback_func=self.file_progress_signal.emit,
use_post_subfolder=self.use_post_subfolder,
post_title="Dropbox Download"
)
elif self.platform == 'gofile':
download_gofile_folder(
self.drive_url, self.output_dir,
logger_func=self.logger_func,
progress_callback_func=self.file_progress_signal.emit,
overall_progress_callback=self.overall_progress_signal.emit
)
self.finished_signal.emit(1, 0, self.is_cancelled, [])
except Exception as e:
self.logger_func(f"❌ An unexpected error occurred in DriveDownloadThread: {e}")
self.finished_signal.emit(0, 1, self.is_cancelled, [])
def cancel(self):
self.is_cancelled = True
if self.cancellation_event:
self.cancellation_event.set()
self.logger_func(f" Cancellation signal received by {self.platform.capitalize()} thread.")

View File

@ -0,0 +1,106 @@
import os
import time
import requests
import cloudscraper
from PyQt5.QtCore import QThread, pyqtSignal
from ...core.erome_client import fetch_erome_data
class EromeDownloadThread(QThread):
"""A dedicated QThread for handling erome.com downloads."""
progress_signal = pyqtSignal(str)
file_progress_signal = pyqtSignal(str, object)
finished_signal = pyqtSignal(int, int, bool) # dl_count, skip_count, cancelled
def __init__(self, url, output_dir, parent=None):
super().__init__(parent)
self.erome_url = url
self.output_dir = output_dir
self.is_cancelled = False
def run(self):
download_count = 0
skip_count = 0
self.progress_signal.emit("=" * 40)
self.progress_signal.emit(f"🚀 Starting Erome.com Download for: {self.erome_url}")
album_name, files_to_download = fetch_erome_data(self.erome_url, self.progress_signal.emit)
if not files_to_download:
self.progress_signal.emit("❌ Failed to extract file information from Erome. Aborting.")
self.finished_signal.emit(0, 0, self.is_cancelled)
return
album_path = os.path.join(self.output_dir, album_name)
try:
os.makedirs(album_path, exist_ok=True)
self.progress_signal.emit(f" Saving to folder: '{album_path}'")
except OSError as e:
self.progress_signal.emit(f"❌ Critical error creating directory: {e}")
self.finished_signal.emit(0, len(files_to_download), self.is_cancelled)
return
total_files = len(files_to_download)
session = cloudscraper.create_scraper()
for i, file_data in enumerate(files_to_download):
if self.is_cancelled:
self.progress_signal.emit(" Download cancelled by user.")
skip_count = total_files - download_count
break
filename = file_data.get('filename', f'untitled_{i+1}.mp4')
file_url = file_data.get('url')
headers = file_data.get('headers')
filepath = os.path.join(album_path, filename)
if os.path.exists(filepath):
self.progress_signal.emit(f" -> Skip ({i+1}/{total_files}): '{filename}' already exists.")
skip_count += 1
continue
self.progress_signal.emit(f" Downloading ({i+1}/{total_files}): '{filename}'...")
try:
response = session.get(file_url, stream=True, headers=headers, timeout=60)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
downloaded_size = 0
last_update_time = time.time()
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if self.is_cancelled:
break
if chunk:
f.write(chunk)
downloaded_size += len(chunk)
current_time = time.time()
if total_size > 0 and (current_time - last_update_time) > 0.5:
self.file_progress_signal.emit(filename, (downloaded_size, total_size))
last_update_time = current_time
if self.is_cancelled:
if os.path.exists(filepath): os.remove(filepath)
continue
if total_size > 0:
self.file_progress_signal.emit(filename, (total_size, total_size))
download_count += 1
except requests.exceptions.RequestException as e:
self.progress_signal.emit(f" ❌ Failed to download '{filename}'. Error: {e}")
if os.path.exists(filepath): os.remove(filepath)
skip_count += 1
except Exception as e:
self.progress_signal.emit(f" ❌ An unexpected error occurred with '{filename}': {e}")
if os.path.exists(filepath): os.remove(filepath)
skip_count += 1
self.file_progress_signal.emit("", None)
self.finished_signal.emit(download_count, skip_count, self.is_cancelled)
def cancel(self):
self.is_cancelled = True
self.progress_signal.emit(" Cancellation signal received by Erome thread.")

View File

@ -0,0 +1,86 @@
from PyQt5.QtCore import QThread, pyqtSignal
from ...services.drive_downloader import (
download_dropbox_file,
download_gdrive_file,
download_mega_file as drive_download_mega_file,
)
class ExternalLinkDownloadThread(QThread):
"""A QThread to handle downloading multiple external links sequentially."""
progress_signal = pyqtSignal(str)
file_complete_signal = pyqtSignal(str, bool)
finished_signal = pyqtSignal()
overall_progress_signal = pyqtSignal(int, int)
file_progress_signal = pyqtSignal(str, object)
def __init__(self, tasks_to_download, download_base_path, parent_logger_func, parent=None, use_post_subfolder=False):
super().__init__(parent)
self.tasks = tasks_to_download
self.download_base_path = download_base_path
self.parent_logger_func = parent_logger_func
self.is_cancelled = False
self.use_post_subfolder = use_post_subfolder
def run(self):
total_tasks = len(self.tasks)
self.progress_signal.emit(f" Starting external link download thread for {total_tasks} link(s).")
self.overall_progress_signal.emit(total_tasks, 0)
for i, task_info in enumerate(self.tasks):
if self.is_cancelled:
self.progress_signal.emit("External link download cancelled by user.")
break
self.overall_progress_signal.emit(total_tasks, i + 1)
platform = task_info.get('platform', 'unknown').lower()
full_url = task_info['url']
post_title = task_info['title']
self.progress_signal.emit(f"Download ({i + 1}/{total_tasks}): Starting '{post_title}' ({platform.upper()}) from {full_url}")
try:
if platform == 'mega':
drive_download_mega_file(
full_url,
self.download_base_path,
logger_func=self.parent_logger_func,
progress_callback_func=self.file_progress_signal.emit,
overall_progress_callback=self.overall_progress_signal.emit
)
elif platform == 'google drive':
download_gdrive_file(
full_url,
self.download_base_path,
logger_func=self.parent_logger_func,
progress_callback_func=self.file_progress_signal.emit,
overall_progress_callback=self.overall_progress_signal.emit,
use_post_subfolder=self.use_post_subfolder,
post_title=post_title
)
elif platform == 'dropbox':
download_dropbox_file(
full_url,
self.download_base_path,
logger_func=self.parent_logger_func,
progress_callback_func=self.file_progress_signal.emit,
use_post_subfolder=self.use_post_subfolder,
post_title=post_title
)
else:
self.progress_signal.emit(f"⚠️ Unsupported platform '{platform}' for link: {full_url}")
self.file_complete_signal.emit(full_url, False)
continue
self.file_complete_signal.emit(full_url, True)
except Exception as e:
self.progress_signal.emit(f"❌ Error downloading ({platform.upper()}) link '{full_url}': {e}")
self.file_complete_signal.emit(full_url, False)
self.finished_signal.emit()
def cancel(self):
"""Sets the cancellation flag to stop the thread gracefully."""
self.progress_signal.emit(" [External Links] Cancellation signal received by thread.")
self.is_cancelled = True

View File

@ -0,0 +1,162 @@
import os
import sys
import re
import threading
import time
from PyQt5.QtCore import QThread, pyqtSignal, QProcess
import cloudscraper
from ...core.fap_nation_client import fetch_fap_nation_data
from ...services.multipart_downloader import download_file_in_parts
class FapNationDownloadThread(QThread):
"""
A dedicated QThread for Fap-Nation that uses a hybrid approach, choosing
between yt-dlp for HLS streams and a multipart downloader for direct links.
"""
progress_signal = pyqtSignal(str)
file_progress_signal = pyqtSignal(str, object)
finished_signal = pyqtSignal(int, int, bool)
overall_progress_signal = pyqtSignal(int, int)
def __init__(self, url, output_dir, use_post_subfolder, pause_event, cancellation_event, gui_signals, parent=None):
super().__init__(parent)
self.album_url = url
self.output_dir = output_dir
self.use_post_subfolder = use_post_subfolder
self.is_cancelled = False
self.process = None
self.current_filename = "Unknown File"
self.album_name = "fap-nation_album"
self.pause_event = pause_event
self.cancellation_event = cancellation_event
self.gui_signals = gui_signals
self._is_finished = False
self.process = QProcess(self)
self.process.readyReadStandardOutput.connect(self.handle_ytdlp_output)
def run(self):
self.progress_signal.emit("=" * 40)
self.progress_signal.emit(f"🚀 Starting Fap-Nation Download for: {self.album_url}")
self.album_name, files_to_download = fetch_fap_nation_data(self.album_url, self.progress_signal.emit)
if self.is_cancelled or not files_to_download:
self.progress_signal.emit("❌ Failed to extract file information. Aborting.")
self.finished_signal.emit(0, 1, self.is_cancelled)
return
self.overall_progress_signal.emit(1, 0)
save_path = self.output_dir
if self.use_post_subfolder:
save_path = os.path.join(self.output_dir, self.album_name)
self.progress_signal.emit(f" Subfolder per Post is ON. Saving to: '{self.album_name}'")
os.makedirs(save_path, exist_ok=True)
file_data = files_to_download[0]
self.current_filename = file_data.get('filename')
download_url = file_data.get('url')
link_type = file_data.get('type')
filepath = os.path.join(save_path, self.current_filename)
if os.path.exists(filepath):
self.progress_signal.emit(f" -> Skip: '{self.current_filename}' already exists.")
self.overall_progress_signal.emit(1, 1)
self.finished_signal.emit(0, 1, self.is_cancelled)
return
if link_type == 'hls':
self.download_with_ytdlp(filepath, download_url)
elif link_type == 'direct':
self.download_with_multipart(filepath, download_url)
else:
self.progress_signal.emit(f" ❌ Unknown link type '{link_type}'. Aborting.")
self._on_ytdlp_finished(-1)
def download_with_ytdlp(self, filepath, playlist_url):
self.progress_signal.emit(f" Downloading (HLS Stream): '{self.current_filename}' using yt-dlp...")
try:
if getattr(sys, 'frozen', False):
base_path = sys._MEIPASS
ytdlp_path = os.path.join(base_path, "yt-dlp.exe")
else:
ytdlp_path = "yt-dlp.exe"
if not os.path.exists(ytdlp_path):
self.progress_signal.emit(f" ❌ ERROR: yt-dlp.exe not found at '{ytdlp_path}'.")
self._on_ytdlp_finished(-1)
return
command = [ytdlp_path, '--no-warnings', '--progress', '--output', filepath, '--merge-output-format', 'mp4', playlist_url]
self.process.start(command[0], command[1:])
self.process.waitForFinished(-1)
self._on_ytdlp_finished(self.process.exitCode())
except Exception as e:
self.progress_signal.emit(f" ❌ Failed to start yt-dlp: {e}")
self._on_ytdlp_finished(-1)
def download_with_multipart(self, filepath, direct_url):
self.progress_signal.emit(f" Downloading (Direct Link): '{self.current_filename}' using multipart downloader...")
try:
session = cloudscraper.create_scraper()
head_response = session.head(direct_url, allow_redirects=True, timeout=20)
head_response.raise_for_status()
total_size = int(head_response.headers.get('content-length', 0))
success, _, _, _ = download_file_in_parts(
file_url=direct_url, save_path=filepath, total_size=total_size, num_parts=5,
headers=session.headers, api_original_filename=self.current_filename,
emitter_for_multipart=self.gui_signals,
cookies_for_chunk_session=session.cookies,
cancellation_event=self.cancellation_event,
skip_event=None, logger_func=self.progress_signal.emit, pause_event=self.pause_event
)
self._on_ytdlp_finished(0 if success else 1)
except Exception as e:
self.progress_signal.emit(f" ❌ Multipart download failed: {e}")
self._on_ytdlp_finished(1)
def handle_ytdlp_output(self):
if not self.process:
return
output = self.process.readAllStandardOutput().data().decode('utf-8', errors='ignore')
for line in reversed(output.strip().splitlines()):
line = line.strip()
progress_match = re.search(r'\[download\]\s+([\d.]+)%\s+of\s+~?\s*([\d.]+\w+B)', line)
if progress_match:
percent, size = progress_match.groups()
self.file_progress_signal.emit("yt-dlp:", f"{percent}% of {size}")
break
def _on_ytdlp_finished(self, exit_code):
if self._is_finished:
return
self._is_finished = True
download_count, skip_count = 0, 0
if self.is_cancelled:
self.progress_signal.emit(f" Download of '{self.current_filename}' was cancelled.")
skip_count = 1
elif exit_code == 0:
self.progress_signal.emit(f" ✅ Download process finished successfully for '{self.current_filename}'.")
download_count = 1
else:
self.progress_signal.emit(f" ❌ Download process exited with an error (Code: {exit_code}) for '{self.current_filename}'.")
skip_count = 1
self.overall_progress_signal.emit(1, 1)
self.process = None
self.finished_signal.emit(download_count, skip_count, self.is_cancelled)
def cancel(self):
self.is_cancelled = True
self.cancellation_event.set()
if self.process and self.process.state() == QProcess.Running:
self.progress_signal.emit(" Cancellation signal received, terminating yt-dlp process.")
self.process.kill()

View File

@ -0,0 +1,51 @@
import threading
import time
from PyQt5.QtCore import QThread, pyqtSignal
from ...core.Hentai2read_client import run_hentai2read_download as h2r_run_download
class Hentai2readDownloadThread(QThread):
"""
A dedicated QThread that calls the self-contained Hentai2Read client to
perform scraping and downloading.
"""
progress_signal = pyqtSignal(str)
file_progress_signal = pyqtSignal(str, object)
finished_signal = pyqtSignal(int, int, bool)
overall_progress_signal = pyqtSignal(int, int)
def __init__(self, url, output_dir, parent=None):
super().__init__(parent)
self.start_url = url
self.output_dir = output_dir
self.is_cancelled = False
self.pause_event = parent.pause_event if hasattr(parent, 'pause_event') else threading.Event()
def _check_pause(self):
"""Helper to handle pausing and cancellation events."""
if self.is_cancelled: return True
if self.pause_event and self.pause_event.is_set():
self.progress_signal.emit(" Download paused...")
while self.pause_event.is_set():
if self.is_cancelled: return True
time.sleep(0.5)
self.progress_signal.emit(" Download resumed.")
return self.is_cancelled
def run(self):
"""
Executes the main download logic by calling the dedicated client function.
"""
downloaded, skipped = h2r_run_download(
start_url=self.start_url,
output_dir=self.output_dir,
progress_callback=self.progress_signal.emit,
overall_progress_callback=self.overall_progress_signal.emit,
check_pause_func=self._check_pause
)
self.finished_signal.emit(downloaded, skipped, self.is_cancelled)
def cancel(self):
self.is_cancelled = True

View File

@ -0,0 +1,45 @@
import threading
from PyQt5.QtCore import QThread, pyqtSignal
from ...core.mangadex_client import fetch_mangadex_data
class MangaDexDownloadThread(QThread):
"""A wrapper QThread for running the MangaDex client function."""
progress_signal = pyqtSignal(str)
file_progress_signal = pyqtSignal(str, object)
finished_signal = pyqtSignal(int, int, bool)
overall_progress_signal = pyqtSignal(int, int)
def __init__(self, url, output_dir, parent=None):
super().__init__(parent)
self.start_url = url
self.output_dir = output_dir
self.is_cancelled = False
self.pause_event = parent.pause_event if hasattr(parent, 'pause_event') else threading.Event()
self.cancellation_event = parent.cancellation_event if hasattr(parent, 'cancellation_event') else threading.Event()
def run(self):
downloaded = 0
skipped = 0
try:
downloaded, skipped = fetch_mangadex_data(
self.start_url,
self.output_dir,
logger_func=self.progress_signal.emit,
file_progress_callback=self.file_progress_signal,
overall_progress_callback=self.overall_progress_signal,
pause_event=self.pause_event,
cancellation_event=self.cancellation_event
)
except Exception as e:
self.progress_signal.emit(f"❌ A critical error occurred in the MangaDex thread: {e}")
skipped = 1 # Mark as skipped if there was a critical failure
finally:
self.finished_signal.emit(downloaded, skipped, self.is_cancelled)
def cancel(self):
self.is_cancelled = True
if self.cancellation_event:
self.cancellation_event.set()
self.progress_signal.emit(" Cancellation signal received by MangaDex thread.")

View File

@ -0,0 +1,105 @@
import os
import time
import cloudscraper
from PyQt5.QtCore import QThread, pyqtSignal
from ...utils.file_utils import clean_folder_name
class NhentaiDownloadThread(QThread):
progress_signal = pyqtSignal(str)
finished_signal = pyqtSignal(int, int, bool)
IMAGE_SERVERS = [
"https://i.nhentai.net", "https://i2.nhentai.net", "https://i3.nhentai.net",
"https://i5.nhentai.net", "https://i7.nhentai.net"
]
EXTENSION_MAP = {'j': 'jpg', 'p': 'png', 'g': 'gif', 'w': 'webp' }
def __init__(self, gallery_data, output_dir, parent=None):
super().__init__(parent)
self.gallery_data = gallery_data
self.output_dir = output_dir
self.is_cancelled = False
def run(self):
title = self.gallery_data.get("title", {}).get("english", f"gallery_{self.gallery_data.get('id')}")
gallery_id = self.gallery_data.get("id")
media_id = self.gallery_data.get("media_id")
pages_info = self.gallery_data.get("pages", [])
folder_name = clean_folder_name(title)
gallery_path = os.path.join(self.output_dir, folder_name)
try:
os.makedirs(gallery_path, exist_ok=True)
except OSError as e:
self.progress_signal.emit(f"❌ Critical error creating directory: {e}")
self.finished_signal.emit(0, len(pages_info), False)
return
self.progress_signal.emit(f"⬇️ Downloading '{title}' to folder '{folder_name}'...")
scraper = cloudscraper.create_scraper()
download_count = 0
skip_count = 0
for i, page_data in enumerate(pages_info):
if self.is_cancelled:
break
page_num = i + 1
ext_char = page_data.get('t', 'j')
extension = self.EXTENSION_MAP.get(ext_char, 'jpg')
relative_path = f"/galleries/{media_id}/{page_num}.{extension}"
local_filename = f"{page_num:03d}.{extension}"
filepath = os.path.join(gallery_path, local_filename)
if os.path.exists(filepath):
self.progress_signal.emit(f" -> Skip (Exists): {local_filename}")
skip_count += 1
continue
download_successful = False
for server in self.IMAGE_SERVERS:
if self.is_cancelled:
break
full_url = f"{server}{relative_path}"
try:
self.progress_signal.emit(f" Downloading page {page_num}/{len(pages_info)} from {server} ...")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
'Referer': f'https://nhentai.net/g/{gallery_id}/'
}
response = scraper.get(full_url, headers=headers, timeout=60, stream=True)
if response.status_code == 200:
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
download_count += 1
download_successful = True
break
else:
self.progress_signal.emit(f" -> {server} returned status {response.status_code}. Trying next server...")
except Exception as e:
self.progress_signal.emit(f" -> {server} failed to connect or timed out: {e}. Trying next server...")
if not download_successful:
self.progress_signal.emit(f" ❌ Failed to download {local_filename} from all servers.")
skip_count += 1
time.sleep(0.5)
self.finished_signal.emit(download_count, skip_count, self.is_cancelled)
def cancel(self):
self.is_cancelled = True

View File

@ -0,0 +1,101 @@
import os
import time
import requests
import cloudscraper
from PyQt5.QtCore import QThread, pyqtSignal
from ...core.pixeldrain_client import fetch_pixeldrain_data
from ...utils.file_utils import clean_folder_name
class PixeldrainDownloadThread(QThread):
"""A dedicated QThread for handling pixeldrain.com downloads."""
progress_signal = pyqtSignal(str)
file_progress_signal = pyqtSignal(str, object)
finished_signal = pyqtSignal(int, int, bool) # dl_count, skip_count, cancelled
def __init__(self, url, output_dir, parent=None):
super().__init__(parent)
self.pixeldrain_url = url
self.output_dir = output_dir
self.is_cancelled = False
def run(self):
download_count = 0
skip_count = 0
self.progress_signal.emit("=" * 40)
self.progress_signal.emit(f"🚀 Starting Pixeldrain.com Download for: {self.pixeldrain_url}")
album_title_raw, files_to_download = fetch_pixeldrain_data(self.pixeldrain_url, self.progress_signal.emit)
if not files_to_download:
self.progress_signal.emit("❌ Failed to extract file information from Pixeldrain. Aborting.")
self.finished_signal.emit(0, 0, self.is_cancelled)
return
album_folder_name = clean_folder_name(album_title_raw)
album_path = os.path.join(self.output_dir, album_folder_name)
try:
os.makedirs(album_path, exist_ok=True)
self.progress_signal.emit(f" Saving to folder: '{album_path}'")
except OSError as e:
self.progress_signal.emit(f"❌ Critical error creating directory: {e}")
self.finished_signal.emit(0, len(files_to_download), self.is_cancelled)
return
total_files = len(files_to_download)
session = cloudscraper.create_scraper()
for i, file_data in enumerate(files_to_download):
if self.is_cancelled:
self.progress_signal.emit(" Download cancelled by user.")
skip_count = total_files - download_count
break
filename = file_data.get('filename')
file_url = file_data.get('url')
filepath = os.path.join(album_path, filename)
if os.path.exists(filepath):
self.progress_signal.emit(f" -> Skip ({i+1}/{total_files}): '{filename}' already exists.")
skip_count += 1
continue
self.progress_signal.emit(f" Downloading ({i+1}/{total_files}): '{filename}'...")
try:
response = session.get(file_url, stream=True, timeout=90)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
downloaded_size = 0
last_update_time = time.time()
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if self.is_cancelled:
break
if chunk:
f.write(chunk)
downloaded_size += len(chunk)
current_time = time.time()
if total_size > 0 and (current_time - last_update_time) > 0.5:
self.file_progress_signal.emit(filename, (downloaded_size, total_size))
last_update_time = current_time
if self.is_cancelled:
if os.path.exists(filepath): os.remove(filepath)
continue
download_count += 1
except requests.exceptions.RequestException as e:
self.progress_signal.emit(f" ❌ Failed to download '{filename}'. Error: {e}")
if os.path.exists(filepath): os.remove(filepath)
skip_count += 1
self.file_progress_signal.emit("", None)
self.finished_signal.emit(download_count, skip_count, self.is_cancelled)
def cancel(self):
self.is_cancelled = True
self.progress_signal.emit(" Cancellation signal received by Pixeldrain thread.")

View File

@ -0,0 +1,87 @@
import os
import time
import requests
from PyQt5.QtCore import QThread, pyqtSignal
import cloudscraper
from ...core.rule34video_client import fetch_rule34video_data
from ...utils.file_utils import clean_folder_name
class Rule34VideoDownloadThread(QThread):
"""A dedicated QThread for handling rule34video.com downloads."""
progress_signal = pyqtSignal(str)
file_progress_signal = pyqtSignal(str, object)
finished_signal = pyqtSignal(int, int, bool) # dl_count, skip_count, cancelled
def __init__(self, url, output_dir, parent=None):
super().__init__(parent)
self.video_url = url
self.output_dir = output_dir
self.is_cancelled = False
def run(self):
download_count = 0
skip_count = 0
video_title, final_video_url = fetch_rule34video_data(self.video_url, self.progress_signal.emit)
if not final_video_url:
self.progress_signal.emit("❌ Failed to get video data. Aborting.")
self.finished_signal.emit(0, 1, self.is_cancelled)
return
# Create a safe filename from the title, defaulting if needed
safe_title = clean_folder_name(video_title if video_title else "rule34video_file")
filename = f"{safe_title}.mp4"
filepath = os.path.join(self.output_dir, filename)
if os.path.exists(filepath):
self.progress_signal.emit(f" -> Skip: '{filename}' already exists.")
self.finished_signal.emit(0, 1, self.is_cancelled)
return
self.progress_signal.emit(f" Downloading: '{filename}'...")
try:
scraper = cloudscraper.create_scraper()
# The CDN link might not require special headers, but a referer is good practice
headers = {'Referer': 'https://rule34video.com/'}
response = scraper.get(final_video_url, stream=True, headers=headers, timeout=90)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
downloaded_size = 0
last_update_time = time.time()
with open(filepath, 'wb') as f:
# Use a larger chunk size for video files
for chunk in response.iter_content(chunk_size=8192 * 4):
if self.is_cancelled:
break
if chunk:
f.write(chunk)
downloaded_size += len(chunk)
current_time = time.time()
if total_size > 0 and (current_time - last_update_time) > 0.5:
self.file_progress_signal.emit(filename, (downloaded_size, total_size))
last_update_time = current_time
if self.is_cancelled:
if os.path.exists(filepath):
os.remove(filepath)
skip_count = 1
self.progress_signal.emit(f" Download cancelled for '{filename}'.")
else:
download_count = 1
except Exception as e:
self.progress_signal.emit(f" ❌ Failed to download '{filename}': {e}")
if os.path.exists(filepath):
os.remove(filepath)
skip_count = 1
self.file_progress_signal.emit("", None)
self.finished_signal.emit(download_count, skip_count, self.is_cancelled)
def cancel(self):
self.is_cancelled = True
self.progress_signal.emit(" Cancellation signal received by Rule34Video thread.")

View File

@ -0,0 +1,105 @@
import os
import time
import requests
from PyQt5.QtCore import QThread, pyqtSignal
from ...core.saint2_client import fetch_saint2_data
class Saint2DownloadThread(QThread):
"""A dedicated QThread for handling saint2.su downloads."""
progress_signal = pyqtSignal(str)
file_progress_signal = pyqtSignal(str, object)
finished_signal = pyqtSignal(int, int, bool) # dl_count, skip_count, cancelled
def __init__(self, url, output_dir, parent=None):
super().__init__(parent)
self.saint2_url = url
self.output_dir = output_dir
self.is_cancelled = False
def run(self):
download_count = 0
skip_count = 0
self.progress_signal.emit("=" * 40)
self.progress_signal.emit(f"🚀 Starting Saint2.su Download for: {self.saint2_url}")
album_name, files_to_download = fetch_saint2_data(self.saint2_url, self.progress_signal.emit)
if not files_to_download:
self.progress_signal.emit("❌ Failed to extract file information from Saint2. Aborting.")
self.finished_signal.emit(0, 0, self.is_cancelled)
return
album_path = os.path.join(self.output_dir, album_name)
try:
os.makedirs(album_path, exist_ok=True)
self.progress_signal.emit(f" Saving to folder: '{album_path}'")
except OSError as e:
self.progress_signal.emit(f"❌ Critical error creating directory: {e}")
self.finished_signal.emit(0, len(files_to_download), self.is_cancelled)
return
total_files = len(files_to_download)
session = requests.Session()
for i, file_data in enumerate(files_to_download):
if self.is_cancelled:
self.progress_signal.emit(" Download cancelled by user.")
skip_count = total_files - download_count
break
filename = file_data.get('filename', f'untitled_{i+1}.mp4')
file_url = file_data.get('url')
headers = file_data.get('headers')
filepath = os.path.join(album_path, filename)
if os.path.exists(filepath):
self.progress_signal.emit(f" -> Skip ({i+1}/{total_files}): '{filename}' already exists.")
skip_count += 1
continue
self.progress_signal.emit(f" Downloading ({i+1}/{total_files}): '{filename}'...")
try:
response = session.get(file_url, stream=True, headers=headers, timeout=60)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
downloaded_size = 0
last_update_time = time.time()
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if self.is_cancelled:
break
if chunk:
f.write(chunk)
downloaded_size += len(chunk)
current_time = time.time()
if total_size > 0 and (current_time - last_update_time) > 0.5:
self.file_progress_signal.emit(filename, (downloaded_size, total_size))
last_update_time = current_time
if self.is_cancelled:
if os.path.exists(filepath): os.remove(filepath)
continue
if total_size > 0:
self.file_progress_signal.emit(filename, (total_size, total_size))
download_count += 1
except requests.exceptions.RequestException as e:
self.progress_signal.emit(f" ❌ Failed to download '{filename}'. Error: {e}")
if os.path.exists(filepath): os.remove(filepath)
skip_count += 1
except Exception as e:
self.progress_signal.emit(f" ❌ An unexpected error occurred with '{filename}': {e}")
if os.path.exists(filepath): os.remove(filepath)
skip_count += 1
self.file_progress_signal.emit("", None)
self.finished_signal.emit(download_count, skip_count, self.is_cancelled)
def cancel(self):
self.is_cancelled = True
self.progress_signal.emit(" Cancellation signal received by Saint2 thread.")

View File

@ -0,0 +1,347 @@
import os
import queue
import re
import threading
import time
from collections import Counter
from concurrent.futures import ThreadPoolExecutor
from urllib.parse import urlparse
import cloudscraper
import requests
from PyQt5.QtCore import QThread, pyqtSignal
from ...core.bunkr_client import fetch_bunkr_data
from ...core.pixeldrain_client import fetch_pixeldrain_data
from ...core.saint2_client import fetch_saint2_data
from ...core.simpcity_client import fetch_single_simpcity_page
from ...services.drive_downloader import (
download_mega_file as drive_download_mega_file,
download_gofile_folder
)
from ...utils.file_utils import clean_folder_name
class SimpCityDownloadThread(QThread):
progress_signal = pyqtSignal(str)
file_progress_signal = pyqtSignal(str, object)
finished_signal = pyqtSignal(int, int, bool, list)
overall_progress_signal = pyqtSignal(int, int)
def __init__(self, url, post_id, output_dir, cookies, parent=None):
super().__init__(parent)
self.start_url = url
self.post_id = post_id
self.output_dir = output_dir
self.cookies = cookies
self.is_cancelled = False
self.parent_app = parent
self.image_queue = queue.Queue()
self.service_queue = queue.Queue()
self.counter_lock = threading.Lock()
self.total_dl_count = 0
self.total_skip_count = 0
self.total_jobs_found = 0
self.total_jobs_processed = 0
self.processed_job_urls = set()
def cancel(self):
self.is_cancelled = True
class _ServiceLoggerAdapter:
"""Wraps the progress signal to provide .info(), .error(), .warning() methods for other clients."""
def __init__(self, signal_emitter, prefix=""):
self.emit = signal_emitter
self.prefix = prefix
def __call__(self, msg, *args, **kwargs):
# Make the logger callable, defaulting to the info method.
self.info(msg, *args, **kwargs)
def info(self, msg, *args, **kwargs): self.emit(f"{self.prefix}{str(msg) % args}")
def error(self, msg, *args, **kwargs): self.emit(f"{self.prefix}❌ ERROR: {str(msg) % args}")
def warning(self, msg, *args, **kwargs): self.emit(f"{self.prefix}⚠️ WARNING: {str(msg) % args}")
def _log_interceptor(self, message):
"""Filters out verbose log messages from the simpcity_client."""
if "[SimpCity] Scraper found" in message or "[SimpCity] Scraping page" in message:
pass
else:
self.progress_signal.emit(message)
def _get_enriched_jobs(self, jobs_to_check):
"""Performs a pre-flight check on jobs to get an accurate total file count and summary."""
if not jobs_to_check:
return []
enriched_jobs = []
bunkr_logger = self._ServiceLoggerAdapter(self.progress_signal.emit, prefix=" ")
pixeldrain_logger = self._ServiceLoggerAdapter(self.progress_signal.emit, prefix=" ")
saint2_logger = self._ServiceLoggerAdapter(self.progress_signal.emit, prefix=" ")
for job in jobs_to_check:
job_type = job.get('type')
job_url = job.get('url')
if job_type in ['image', 'saint2_direct']:
enriched_jobs.append(job)
elif (job_type == 'bunkr' and self.should_dl_bunkr) or \
(job_type == 'pixeldrain' and self.should_dl_pixeldrain) or \
(job_type == 'saint2' and self.should_dl_saint2):
self.progress_signal.emit(f" -> Checking {job_type} album for file count...")
fetch_map = {
'bunkr': (fetch_bunkr_data, bunkr_logger),
'pixeldrain': (fetch_pixeldrain_data, pixeldrain_logger),
'saint2': (fetch_saint2_data, saint2_logger)
}
fetch_func, logger_adapter = fetch_map[job_type]
album_name, files = fetch_func(job_url, logger_adapter)
if files:
job['prefetched_files'] = files
job['prefetched_album_name'] = album_name
enriched_jobs.append(job)
if enriched_jobs:
summary_counts = Counter()
current_page_file_count = 0
for job in enriched_jobs:
if job.get('prefetched_files'):
file_count = len(job['prefetched_files'])
summary_counts[job['type']] += file_count
current_page_file_count += file_count
else:
summary_counts[job['type']] += 1
current_page_file_count += 1
summary_parts = [f"{job_type} ({count})" for job_type, count in summary_counts.items()]
self.progress_signal.emit(f" [SimpCity] Content Found: {' | '.join(summary_parts)}")
with self.counter_lock: self.total_jobs_found += current_page_file_count
self.overall_progress_signal.emit(self.total_jobs_found, self.total_jobs_processed)
return enriched_jobs
def _download_single_image(self, job, album_path, session):
"""Downloads one image file; this is run by the image thread pool."""
filename = job['filename']
filepath = os.path.join(album_path, filename)
try:
if os.path.exists(filepath):
self.progress_signal.emit(f" -> Skip (Image): '{filename}'")
with self.counter_lock: self.total_skip_count += 1
return
self.progress_signal.emit(f" -> Downloading (Image): '{filename}'...")
response = session.get(job['url'], stream=True, timeout=90, headers={'Referer': self.start_url})
response.raise_for_status()
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if self.is_cancelled: break
f.write(chunk)
if not self.is_cancelled:
with self.counter_lock: self.total_dl_count += 1
except Exception as e:
self.progress_signal.emit(f" -> ❌ Image download failed for '{filename}': {e}")
with self.counter_lock: self.total_skip_count += 1
finally:
if not self.is_cancelled:
with self.counter_lock: self.total_jobs_processed += 1
self.overall_progress_signal.emit(self.total_jobs_found, self.total_jobs_processed)
def _image_worker(self, album_path):
"""Target function for the image thread pool that pulls jobs from the queue."""
session = cloudscraper.create_scraper()
while True:
if self.is_cancelled: break
try:
job = self.image_queue.get(timeout=1)
if job is None: break
self._download_single_image(job, album_path, session)
self.image_queue.task_done()
except queue.Empty:
continue
def _service_worker(self, album_path):
"""Target function for the single service thread, ensuring sequential downloads."""
while True:
if self.is_cancelled: break
try:
job = self.service_queue.get(timeout=1)
if job is None: break
job_type = job['type']
job_url = job['url']
if job_type in ['pixeldrain', 'saint2', 'bunkr']:
if (job_type == 'pixeldrain' and self.should_dl_pixeldrain) or \
(job_type == 'saint2' and self.should_dl_saint2) or \
(job_type == 'bunkr' and self.should_dl_bunkr):
self.progress_signal.emit(f"\n--- Processing Service ({job_type.capitalize()}): {job_url} ---")
self._download_album(job.get('prefetched_files', []), job_url, album_path)
elif job_type == 'mega' and self.should_dl_mega:
self.progress_signal.emit(f"\n--- Processing Service (Mega): {job_url} ---")
drive_download_mega_file(job_url, album_path, self.progress_signal.emit, self.file_progress_signal.emit)
elif job_type == 'gofile' and self.should_dl_gofile:
self.progress_signal.emit(f"\n--- Processing Service (Gofile): {job_url} ---")
download_gofile_folder(job_url, album_path, self.progress_signal.emit, self.file_progress_signal.emit)
elif job_type == 'saint2_direct' and self.should_dl_saint2:
self.progress_signal.emit(f"\n--- Processing Service (Saint2 Direct): {job_url} ---")
try:
filename = os.path.basename(urlparse(job_url).path)
filepath = os.path.join(album_path, filename)
if os.path.exists(filepath):
with self.counter_lock: self.total_skip_count += 1
else:
response = cloudscraper.create_scraper().get(job_url, stream=True, timeout=120, headers={'Referer': self.start_url})
response.raise_for_status()
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if self.is_cancelled: break
f.write(chunk)
if not self.is_cancelled:
with self.counter_lock: self.total_dl_count += 1
except Exception as e:
with self.counter_lock: self.total_skip_count += 1
finally:
if not self.is_cancelled:
with self.counter_lock: self.total_jobs_processed += 1
self.overall_progress_signal.emit(self.total_jobs_found, self.total_jobs_processed)
self.service_queue.task_done()
except queue.Empty:
continue
def _download_album(self, files_to_process, source_url, album_path):
"""Helper to download all files from a pre-fetched album list."""
if not files_to_process: return
session = cloudscraper.create_scraper()
for file_data in files_to_process:
if self.is_cancelled: return
filename = file_data.get('filename') or file_data.get('name')
filepath = os.path.join(album_path, filename)
try:
if os.path.exists(filepath):
with self.counter_lock: self.total_skip_count += 1
else:
self.progress_signal.emit(f" -> Downloading: '{filename}'...")
headers = file_data.get('headers', {'Referer': source_url})
response = session.get(file_data.get('url'), stream=True, timeout=90, headers=headers)
response.raise_for_status()
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if self.is_cancelled: break
f.write(chunk)
if not self.is_cancelled:
with self.counter_lock: self.total_dl_count += 1
except Exception as e:
with self.counter_lock: self.total_skip_count += 1
finally:
if not self.is_cancelled:
with self.counter_lock: self.total_jobs_processed += 1
self.overall_progress_signal.emit(self.total_jobs_found, self.total_jobs_processed)
def run(self):
"""Main entry point for the thread, orchestrates the entire download."""
self.progress_signal.emit("=" * 40)
self.progress_signal.emit(f"🚀 Starting SimpCity Download for: {self.start_url}")
self.should_dl_pixeldrain = self.parent_app.simpcity_dl_pixeldrain_cb.isChecked()
self.should_dl_saint2 = self.parent_app.simpcity_dl_saint2_cb.isChecked()
self.should_dl_mega = self.parent_app.simpcity_dl_mega_cb.isChecked()
self.should_dl_bunkr = self.parent_app.simpcity_dl_bunkr_cb.isChecked()
self.should_dl_gofile = self.parent_app.simpcity_dl_gofile_cb.isChecked()
is_single_post_mode = self.post_id or '/post-' in self.start_url
album_path = ""
try:
if is_single_post_mode:
self.progress_signal.emit(" Mode: Single Post detected.")
album_title, _, _ = fetch_single_simpcity_page(self.start_url, self._log_interceptor, cookies=self.cookies, post_id=self.post_id)
album_path = os.path.join(self.output_dir, clean_folder_name(album_title or "simpcity_post"))
else:
self.progress_signal.emit(" Mode: Full Thread detected.")
first_page_url = re.sub(r'(/page-\d+)|(/post-\d+)', '', self.start_url).split('#')[0].strip('/')
album_title, _, _ = fetch_single_simpcity_page(first_page_url, self._log_interceptor, cookies=self.cookies)
album_path = os.path.join(self.output_dir, clean_folder_name(album_title or "simpcity_album"))
os.makedirs(album_path, exist_ok=True)
self.progress_signal.emit(f" Saving all content to folder: '{os.path.basename(album_path)}'")
except Exception as e:
self.progress_signal.emit(f"❌ Could not process the initial page. Aborting. Error: {e}")
self.finished_signal.emit(0, 0, self.is_cancelled, []); return
service_thread = threading.Thread(target=self._service_worker, args=(album_path,), daemon=True)
service_thread.start()
num_image_threads = 15
image_executor = ThreadPoolExecutor(max_workers=num_image_threads, thread_name_prefix='SimpCityImage')
for _ in range(num_image_threads): image_executor.submit(self._image_worker, album_path)
try:
if is_single_post_mode:
_, jobs, _ = fetch_single_simpcity_page(self.start_url, self._log_interceptor, cookies=self.cookies, post_id=self.post_id)
enriched_jobs = self._get_enriched_jobs(jobs)
if enriched_jobs:
for job in enriched_jobs:
if job['type'] == 'image': self.image_queue.put(job)
else: self.service_queue.put(job)
else:
base_url = re.sub(r'(/page-\d+)|(/post-\d+)', '', self.start_url).split('#')[0].strip('/')
page_counter = 1; end_of_thread = False; MAX_RETRIES = 3
while not end_of_thread:
if self.is_cancelled: break
page_url = f"{base_url}/page-{page_counter}"; retries = 0; page_fetch_successful = False
while retries < MAX_RETRIES:
if self.is_cancelled: end_of_thread = True; break
self.progress_signal.emit(f"\n--- Analyzing page {page_counter} (Attempt {retries + 1}/{MAX_RETRIES}) ---")
try:
page_title, jobs_on_page, final_url = fetch_single_simpcity_page(page_url, self._log_interceptor, cookies=self.cookies)
if final_url != page_url:
self.progress_signal.emit(f" -> Redirect detected from {page_url} to {final_url}")
try:
req_page_match = re.search(r'/page-(\d+)', page_url)
final_page_match = re.search(r'/page-(\d+)', final_url)
if req_page_match and final_page_match and int(final_page_match.group(1)) < int(req_page_match.group(1)):
self.progress_signal.emit(" -> Redirected to an earlier page. Reached end of thread.")
end_of_thread = True
except (ValueError, TypeError):
pass
if end_of_thread:
page_fetch_successful = True; break
if page_counter > 1 and not page_title:
self.progress_signal.emit(f" -> Page {page_counter} is invalid or has no title. Reached end of thread.")
end_of_thread = True
elif not jobs_on_page:
end_of_thread = True
else:
new_jobs = [job for job in jobs_on_page if job.get('url') not in self.processed_job_urls]
if not new_jobs and page_counter > 1:
end_of_thread = True
else:
enriched_jobs = self._get_enriched_jobs(new_jobs)
for job in enriched_jobs:
self.processed_job_urls.add(job.get('url'))
if job['type'] == 'image': self.image_queue.put(job)
else: self.service_queue.put(job)
page_fetch_successful = True; break
except requests.exceptions.HTTPError as e:
if e.response.status_code in [403, 404]: end_of_thread = True; break
elif e.response.status_code == 429: time.sleep(5 * (retries + 2)); retries += 1
else: end_of_thread = True; break
except Exception as e:
self.progress_signal.emit(f" Stopping crawl due to error on page {page_counter}: {e}"); end_of_thread = True; break
if not page_fetch_successful and not end_of_thread: end_of_thread = True
if not end_of_thread: page_counter += 1
except Exception as e:
self.progress_signal.emit(f"❌ A critical error occurred during the main fetch phase: {e}")
self.progress_signal.emit("\n--- All pages analyzed. Waiting for background downloads to complete... ---")
for _ in range(num_image_threads): self.image_queue.put(None)
self.service_queue.put(None)
image_executor.shutdown(wait=True)
service_thread.join()
self.finished_signal.emit(self.total_dl_count, self.total_skip_count, self.is_cancelled, [])

View File

@ -0,0 +1,128 @@
import os
import threading
import time
from urllib.parse import urlparse
import cloudscraper
from PyQt5.QtCore import QThread, pyqtSignal
from ...core.toonily_client import (
fetch_chapter_data as toonily_fetch_data,
get_chapter_list as toonily_get_list
)
from ...utils.file_utils import clean_folder_name
class ToonilyDownloadThread(QThread):
"""A dedicated QThread for handling toonily.com series or single chapters."""
progress_signal = pyqtSignal(str)
file_progress_signal = pyqtSignal(str, object)
finished_signal = pyqtSignal(int, int, bool)
overall_progress_signal = pyqtSignal(int, int) # Signal for chapter progress
def __init__(self, url, output_dir, parent=None):
super().__init__(parent)
self.start_url = url
self.output_dir = output_dir
self.is_cancelled = False
# Get access to the pause event from the main app
self.pause_event = parent.pause_event if hasattr(parent, 'pause_event') else threading.Event()
def _check_pause(self):
# Helper function to check for pause/cancel events
if self.is_cancelled: return True
if self.pause_event and self.pause_event.is_set():
self.progress_signal.emit(" Download paused...")
while self.pause_event.is_set():
if self.is_cancelled: return True
time.sleep(0.5)
self.progress_signal.emit(" Download resumed.")
return self.is_cancelled
def run(self):
grand_total_dl = 0
grand_total_skip = 0
# Check if the URL is a series or a chapter
if '/chapter-' in self.start_url:
# It's a single chapter URL
chapters_to_download = [self.start_url]
self.progress_signal.emit(" Single Toonily chapter URL detected.")
else:
# It's a series URL, so get all chapters
chapters_to_download = toonily_get_list(self.start_url, self.progress_signal.emit)
if not chapters_to_download:
self.progress_signal.emit("❌ No chapters found to download.")
self.finished_signal.emit(0, 0, self.is_cancelled)
return
self.progress_signal.emit(f"--- Starting download of {len(chapters_to_download)} chapter(s) ---")
self.overall_progress_signal.emit(len(chapters_to_download), 0)
scraper = cloudscraper.create_scraper()
for chapter_idx, chapter_url in enumerate(chapters_to_download):
if self._check_pause(): break
self.progress_signal.emit(f"\n-- Processing Chapter {chapter_idx + 1}/{len(chapters_to_download)} --")
series_title, chapter_title, image_urls = toonily_fetch_data(chapter_url, self.progress_signal.emit, scraper)
if not image_urls:
self.progress_signal.emit(f"❌ Failed to get data for chapter. Skipping.")
continue
# Create folders like: /Downloads/Series Name/Chapter 01/
series_folder_name = clean_folder_name(series_title)
# Make a safe folder name from the full chapter title
chapter_folder_name = clean_folder_name(chapter_title)
final_save_path = os.path.join(self.output_dir, series_folder_name, chapter_folder_name)
try:
os.makedirs(final_save_path, exist_ok=True)
self.progress_signal.emit(f" Saving to folder: '{os.path.join(series_folder_name, chapter_folder_name)}'")
except OSError as e:
self.progress_signal.emit(f"❌ Critical error creating directory: {e}")
grand_total_skip += len(image_urls)
continue
for i, img_url in enumerate(image_urls):
if self._check_pause(): break
try:
file_extension = os.path.splitext(urlparse(img_url).path)[1] or '.jpg'
filename = f"{i+1:03d}{file_extension}"
filepath = os.path.join(final_save_path, filename)
if os.path.exists(filepath):
self.progress_signal.emit(f" -> Skip ({i+1}/{len(image_urls)}): '{filename}' already exists.")
grand_total_skip += 1
else:
self.progress_signal.emit(f" Downloading ({i+1}/{len(image_urls)}): '{filename}'...")
response = scraper.get(img_url, stream=True, timeout=60, headers={'Referer': chapter_url})
response.raise_for_status()
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if self._check_pause(): break
f.write(chunk)
if self._check_pause():
if os.path.exists(filepath): os.remove(filepath)
break
grand_total_dl += 1
time.sleep(0.2)
except Exception as e:
self.progress_signal.emit(f" ❌ Failed to download '{filename}': {e}")
grand_total_skip += 1
self.overall_progress_signal.emit(len(chapters_to_download), chapter_idx + 1)
time.sleep(1) # Wait a second between chapters
self.file_progress_signal.emit("", None)
self.finished_signal.emit(grand_total_dl, grand_total_skip, self.is_cancelled)
def cancel(self):
self.is_cancelled = True
self.progress_signal.emit(" Cancellation signal received by Toonily thread.")

View File

@ -153,7 +153,7 @@ class SupportDialog(QDialog):
community_layout.addWidget(self._create_card_button(
get_asset_path("github.png"), "GitHub", "Report issues",
"https://github.com/Yuvi63771/Kemono-Downloader", "#2E2E2E",
"https://github.com/Yuvi9587/Kemono-Downloader", "#2E2E2E",
min_height=100, icon_size=36
))
community_layout.addWidget(self._create_card_button(

File diff suppressed because it is too large Load Diff

View File

@ -137,6 +137,12 @@ def extract_post_info(url_string):
stripped_url = url_string.strip()
# --- Rule34Video Check ---
rule34video_match = re.search(r'rule34video\.com/video/(\d+)', stripped_url)
if rule34video_match:
video_id = rule34video_match.group(1)
return 'rule34video', video_id, None
# --- Danbooru Check ---
danbooru_match = re.search(r'danbooru\.donmai\.us|safebooru\.donmai\.us', stripped_url)
if danbooru_match: