mirror of
https://github.com/Yuvi9587/Kemono-Downloader.git
synced 2025-12-29 16:14:44 +00:00
Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
be03f914ef | ||
|
|
ec9900b90f | ||
|
|
55ebfdb980 | ||
|
|
4a93b721e2 | ||
|
|
257111d462 | ||
|
|
9563ce82db | ||
|
|
169ded3fd8 | ||
|
|
7e8e8a59e2 | ||
|
|
0acd433920 | ||
|
|
cef4211d7b | ||
|
|
9fe0c37127 |
@@ -127,7 +127,7 @@
|
||||
<p>Feel free to fork this repo and submit pull requests for bug fixes, new features, or UI improvements!</p>
|
||||
<h2>License</h2>
|
||||
<p>This project is under the MIT Licence</p>
|
||||
<h2>Included Third-Party Tools</h2>
|
||||
### Included Third-Party Tools
|
||||
|
||||
This project includes a pre-compiled binary of `yt-dlp` for handling certain video downloads. `yt-dlp` is in the public domain. For more information or to get the latest version, please visit the official [yt-dlp GitHub repository](https://github.com/yt-dlp/yt-dlp).
|
||||
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
# src/core/Hentai2read_client.py
|
||||
|
||||
import re
|
||||
import os
|
||||
import time
|
||||
import time
|
||||
import cloudscraper
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urljoin
|
||||
@@ -65,12 +63,37 @@ def run_hentai2read_download(start_url, output_dir, progress_callback, overall_p
|
||||
def _get_series_metadata(start_url, progress_callback, scraper):
|
||||
"""
|
||||
Scrapes the main series page to get the Artist Name, Series Title, and chapter list.
|
||||
Includes a retry mechanism for the initial connection.
|
||||
"""
|
||||
try:
|
||||
response = scraper.get(start_url, timeout=30)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
max_retries = 4 # Total number of attempts (1 initial + 3 retries)
|
||||
last_exception = None
|
||||
soup = None
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
if attempt > 0:
|
||||
progress_callback(f" [Hentai2Read] ⚠️ Retrying connection (Attempt {attempt + 1}/{max_retries})...")
|
||||
|
||||
response = scraper.get(start_url, timeout=30)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# If successful, clear exception and break the loop
|
||||
last_exception = None
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
last_exception = e
|
||||
progress_callback(f" [Hentai2Read] ⚠️ Connection attempt {attempt + 1} failed: {e}")
|
||||
if attempt < max_retries - 1:
|
||||
time.sleep(2 * (attempt + 1)) # Wait 2s, 4s, 6s
|
||||
continue # Try again
|
||||
|
||||
if last_exception:
|
||||
progress_callback(f" [Hentai2Read] ❌ Error getting series metadata after {max_retries} attempts: {last_exception}")
|
||||
return "Unknown Series", []
|
||||
|
||||
try:
|
||||
series_title = "Unknown Series"
|
||||
artist_name = None
|
||||
metadata_list = soup.select_one("ul.list.list-simple-mini")
|
||||
@@ -107,10 +130,9 @@ def _get_series_metadata(start_url, progress_callback, scraper):
|
||||
return top_level_folder_name, chapters_to_process
|
||||
|
||||
except Exception as e:
|
||||
progress_callback(f" [Hentai2Read] ❌ Error getting series metadata: {e}")
|
||||
progress_callback(f" [Hentai2Read] ❌ Error parsing metadata after successful connection: {e}")
|
||||
return "Unknown Series", []
|
||||
|
||||
### NEW: This function contains the pipeline logic ###
|
||||
def _process_and_download_chapter(chapter_url, save_path, scraper, progress_callback, check_pause_func):
|
||||
"""
|
||||
Uses a producer-consumer pattern to download a chapter.
|
||||
@@ -120,12 +142,10 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
|
||||
task_queue = queue.Queue()
|
||||
num_download_threads = 8
|
||||
|
||||
# These will be updated by the worker threads
|
||||
download_stats = {'downloaded': 0, 'skipped': 0}
|
||||
|
||||
def downloader_worker():
|
||||
"""The function that each download thread will run."""
|
||||
# Create a unique session for each thread to avoid conflicts
|
||||
worker_scraper = cloudscraper.create_scraper()
|
||||
while True:
|
||||
try:
|
||||
@@ -153,12 +173,10 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
|
||||
finally:
|
||||
task_queue.task_done()
|
||||
|
||||
# --- Start the downloader threads ---
|
||||
executor = ThreadPoolExecutor(max_workers=num_download_threads, thread_name_prefix='H2R_Downloader')
|
||||
for _ in range(num_download_threads):
|
||||
executor.submit(downloader_worker)
|
||||
|
||||
# --- Main thread acts as the scraper (producer) ---
|
||||
page_number = 1
|
||||
while True:
|
||||
if check_pause_func(): break
|
||||
@@ -168,12 +186,25 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
|
||||
|
||||
page_url_to_check = f"{chapter_url}{page_number}/"
|
||||
try:
|
||||
response = scraper.get(page_url_to_check, timeout=30)
|
||||
if response.history or response.status_code != 200:
|
||||
page_response = None
|
||||
page_last_exception = None
|
||||
for page_attempt in range(3): # 3 attempts for sub-pages
|
||||
try:
|
||||
page_response = scraper.get(page_url_to_check, timeout=30)
|
||||
page_last_exception = None
|
||||
break
|
||||
except Exception as e:
|
||||
page_last_exception = e
|
||||
time.sleep(1) # Short delay for page scraping retries
|
||||
|
||||
if page_last_exception:
|
||||
raise page_last_exception # Give up after 3 tries
|
||||
|
||||
if page_response.history or page_response.status_code != 200:
|
||||
progress_callback(f" [Hentai2Read] End of chapter detected on page {page_number}.")
|
||||
break
|
||||
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
soup = BeautifulSoup(page_response.text, 'html.parser')
|
||||
img_tag = soup.select_one("img#arf-reader")
|
||||
img_src = img_tag.get("src") if img_tag else None
|
||||
|
||||
@@ -181,12 +212,11 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
|
||||
progress_callback(f" [Hentai2Read] End of chapter detected (Placeholder image on page {page_number}).")
|
||||
break
|
||||
|
||||
normalized_img_src = urljoin(response.url, img_src)
|
||||
normalized_img_src = urljoin(page_response.url, img_src)
|
||||
ext = os.path.splitext(normalized_img_src.split('/')[-1])[-1] or ".jpg"
|
||||
filename = f"{page_number:03d}{ext}"
|
||||
filepath = os.path.join(save_path, filename)
|
||||
|
||||
# Put the download task into the queue for a worker to pick up
|
||||
task_queue.put((filepath, normalized_img_src))
|
||||
|
||||
page_number += 1
|
||||
@@ -195,12 +225,9 @@ def _process_and_download_chapter(chapter_url, save_path, scraper, progress_call
|
||||
progress_callback(f" [Hentai2Read] ❌ Error while scraping page {page_number}: {e}")
|
||||
break
|
||||
|
||||
# --- Shutdown sequence ---
|
||||
# Tell all worker threads to exit by sending the sentinel value
|
||||
for _ in range(num_download_threads):
|
||||
task_queue.put(None)
|
||||
|
||||
# Wait for all download tasks to be completed
|
||||
executor.shutdown(wait=True)
|
||||
|
||||
progress_callback(f" Found and processed {page_number - 1} images for this chapter.")
|
||||
|
||||
@@ -1,36 +1,36 @@
|
||||
import requests
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
import cloudscraper
|
||||
import time
|
||||
import random
|
||||
from urllib.parse import urlparse
|
||||
|
||||
def get_chapter_list(series_url, logger_func):
|
||||
def get_chapter_list(scraper, series_url, logger_func):
|
||||
"""
|
||||
Checks if a URL is a series page and returns a list of all chapter URLs if it is.
|
||||
Includes a retry mechanism for robust connection.
|
||||
Relies on a passed-in scraper session for connection.
|
||||
"""
|
||||
logger_func(f" [AllComic] Checking for chapter list at: {series_url}")
|
||||
|
||||
scraper = cloudscraper.create_scraper()
|
||||
headers = {'Referer': 'https://allporncomic.com/'}
|
||||
response = None
|
||||
max_retries = 8
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
response = scraper.get(series_url, timeout=30)
|
||||
response = scraper.get(series_url, headers=headers, timeout=30)
|
||||
response.raise_for_status()
|
||||
logger_func(f" [AllComic] Successfully connected to series page on attempt {attempt + 1}.")
|
||||
break # Success, exit the loop
|
||||
break
|
||||
except requests.RequestException as e:
|
||||
logger_func(f" [AllComic] ⚠️ Series page check attempt {attempt + 1}/{max_retries} failed: {e}")
|
||||
if attempt < max_retries - 1:
|
||||
wait_time = 2 * (attempt + 1)
|
||||
logger_func(f" Retrying in {wait_time} seconds...")
|
||||
wait_time = (2 ** attempt) + random.uniform(0, 2)
|
||||
logger_func(f" Retrying in {wait_time:.1f} seconds...")
|
||||
time.sleep(wait_time)
|
||||
else:
|
||||
logger_func(f" [AllComic] ❌ All attempts to check series page failed.")
|
||||
return [] # Return empty on final failure
|
||||
return []
|
||||
|
||||
if not response:
|
||||
return []
|
||||
@@ -44,7 +44,7 @@ def get_chapter_list(series_url, logger_func):
|
||||
return []
|
||||
|
||||
chapter_urls = [link['href'] for link in chapter_links]
|
||||
chapter_urls.reverse() # Reverse for oldest-to-newest reading order
|
||||
chapter_urls.reverse()
|
||||
|
||||
logger_func(f" [AllComic] ✅ Found {len(chapter_urls)} chapters.")
|
||||
return chapter_urls
|
||||
@@ -53,15 +53,13 @@ def get_chapter_list(series_url, logger_func):
|
||||
logger_func(f" [AllComic] ❌ Error parsing chapters after successful connection: {e}")
|
||||
return []
|
||||
|
||||
def fetch_chapter_data(chapter_url, logger_func):
|
||||
def fetch_chapter_data(scraper, chapter_url, logger_func):
|
||||
"""
|
||||
Fetches the comic title, chapter title, and image URLs for a single chapter page.
|
||||
Relies on a passed-in scraper session for connection.
|
||||
"""
|
||||
logger_func(f" [AllComic] Fetching page: {chapter_url}")
|
||||
|
||||
scraper = cloudscraper.create_scraper(
|
||||
browser={'browser': 'firefox', 'platform': 'windows', 'desktop': True}
|
||||
)
|
||||
headers = {'Referer': 'https://allporncomic.com/'}
|
||||
|
||||
response = None
|
||||
@@ -72,16 +70,23 @@ def fetch_chapter_data(chapter_url, logger_func):
|
||||
response.raise_for_status()
|
||||
break
|
||||
except requests.RequestException as e:
|
||||
logger_func(f" [AllComic] ⚠️ Chapter page connection attempt {attempt + 1}/{max_retries} failed: {e}")
|
||||
if attempt < max_retries - 1:
|
||||
time.sleep(2 * (attempt + 1))
|
||||
wait_time = (2 ** attempt) + random.uniform(0, 2)
|
||||
logger_func(f" Retrying in {wait_time:.1f} seconds...")
|
||||
time.sleep(wait_time)
|
||||
else:
|
||||
logger_func(f" [AllComic] ❌ All connection attempts failed for chapter: {chapter_url}")
|
||||
return None, None, None
|
||||
|
||||
if not response:
|
||||
return None, None, None
|
||||
|
||||
try:
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
comic_title = "Unknown Comic"
|
||||
title_element = soup.find('h1', class_='post-title')
|
||||
comic_title = None
|
||||
if title_element:
|
||||
comic_title = title_element.text.strip()
|
||||
else:
|
||||
@@ -91,7 +96,7 @@ def fetch_chapter_data(chapter_url, logger_func):
|
||||
comic_slug = path_parts[-2]
|
||||
comic_title = comic_slug.replace('-', ' ').title()
|
||||
except Exception:
|
||||
comic_title = "Unknown Comic"
|
||||
pass
|
||||
|
||||
chapter_slug = chapter_url.strip('/').split('/')[-1]
|
||||
chapter_title = chapter_slug.replace('-', ' ').title()
|
||||
@@ -105,8 +110,8 @@ def fetch_chapter_data(chapter_url, logger_func):
|
||||
if img_url:
|
||||
list_of_image_urls.append(img_url)
|
||||
|
||||
if not comic_title or comic_title == "Unknown Comic" or not list_of_image_urls:
|
||||
logger_func(f" [AllComic] ❌ Could not find a valid title or images on the page. Title found: '{comic_title}'")
|
||||
if not list_of_image_urls:
|
||||
logger_func(f" [AllComic] ❌ Could not find any images on the page.")
|
||||
return None, None, None
|
||||
|
||||
return comic_title, chapter_title, list_of_image_urls
|
||||
|
||||
@@ -159,8 +159,6 @@ def download_from_api(
|
||||
if cancellation_event and cancellation_event.is_set():
|
||||
logger(" Download_from_api cancelled at start.")
|
||||
return
|
||||
|
||||
# The code that defined api_domain was moved from here to the top of the function
|
||||
|
||||
if not any(d in api_domain.lower() for d in ['kemono.su', 'kemono.party', 'kemono.cr', 'coomer.su', 'coomer.party', 'coomer.st']):
|
||||
logger(f"⚠️ Unrecognized domain '{api_domain}' from input URL. Defaulting to kemono.su for API calls.")
|
||||
@@ -312,6 +310,8 @@ def download_from_api(
|
||||
current_offset = (start_page - 1) * page_size
|
||||
current_page_num = start_page
|
||||
logger(f" Starting from page {current_page_num} (calculated offset {current_offset}).")
|
||||
|
||||
# --- START OF MODIFIED BLOCK ---
|
||||
while True:
|
||||
if pause_event and pause_event.is_set():
|
||||
logger(" Post fetching loop paused...")
|
||||
@@ -321,18 +321,23 @@ def download_from_api(
|
||||
break
|
||||
time.sleep(0.5)
|
||||
if not (cancellation_event and cancellation_event.is_set()): logger(" Post fetching loop resumed.")
|
||||
|
||||
if cancellation_event and cancellation_event.is_set():
|
||||
logger(" Post fetching loop cancelled.")
|
||||
break
|
||||
|
||||
if target_post_id and processed_target_post_flag:
|
||||
break
|
||||
|
||||
if not target_post_id and end_page and current_page_num > end_page:
|
||||
logger(f"✅ Reached specified end page ({end_page}) for creator feed. Stopping.")
|
||||
break
|
||||
|
||||
try:
|
||||
posts_batch = fetch_posts_paginated(api_base_url, headers, current_offset, logger, cancellation_event, pause_event, cookies_dict=cookies_for_api)
|
||||
if not isinstance(posts_batch, list):
|
||||
logger(f"❌ API Error: Expected list of posts, got {type(posts_batch)} at page {current_page_num} (offset {current_offset}).")
|
||||
# 1. Fetch the raw batch of posts
|
||||
raw_posts_batch = fetch_posts_paginated(api_base_url, headers, current_offset, logger, cancellation_event, pause_event, cookies_dict=cookies_for_api)
|
||||
if not isinstance(raw_posts_batch, list):
|
||||
logger(f"❌ API Error: Expected list of posts, got {type(raw_posts_batch)} at page {current_page_num} (offset {current_offset}).")
|
||||
break
|
||||
except RuntimeError as e:
|
||||
if "cancelled by user" in str(e).lower():
|
||||
@@ -344,14 +349,9 @@ def download_from_api(
|
||||
logger(f"❌ Unexpected error fetching page {current_page_num} (offset {current_offset}): {e}")
|
||||
traceback.print_exc()
|
||||
break
|
||||
if processed_post_ids:
|
||||
original_count = len(posts_batch)
|
||||
posts_batch = [post for post in posts_batch if post.get('id') not in processed_post_ids]
|
||||
skipped_count = original_count - len(posts_batch)
|
||||
if skipped_count > 0:
|
||||
logger(f" Skipped {skipped_count} already processed post(s) from page {current_page_num}.")
|
||||
|
||||
if not posts_batch:
|
||||
|
||||
# 2. Check if the *raw* batch from the API was empty. This is the correct "end" condition.
|
||||
if not raw_posts_batch:
|
||||
if target_post_id and not processed_target_post_flag:
|
||||
logger(f"❌ Target post {target_post_id} not found after checking all available pages (API returned no more posts at offset {current_offset}).")
|
||||
elif not target_post_id:
|
||||
@@ -359,20 +359,45 @@ def download_from_api(
|
||||
logger(f"😕 No posts found on the first page checked (page {current_page_num}, offset {current_offset}).")
|
||||
else:
|
||||
logger(f"✅ Reached end of posts (no more content from API at offset {current_offset}).")
|
||||
break
|
||||
break # This break is now correct.
|
||||
|
||||
# 3. Filter the batch against processed IDs
|
||||
posts_batch_to_yield = raw_posts_batch
|
||||
original_count = len(raw_posts_batch)
|
||||
|
||||
if processed_post_ids:
|
||||
posts_batch_to_yield = [post for post in raw_posts_batch if post.get('id') not in processed_post_ids]
|
||||
skipped_count = original_count - len(posts_batch_to_yield)
|
||||
if skipped_count > 0:
|
||||
logger(f" Skipped {skipped_count} already processed post(s) from page {current_page_num}.")
|
||||
|
||||
# 4. Process the *filtered* batch
|
||||
if target_post_id and not processed_target_post_flag:
|
||||
matching_post = next((p for p in posts_batch if str(p.get('id')) == str(target_post_id)), None)
|
||||
# Still searching for a specific post
|
||||
matching_post = next((p for p in posts_batch_to_yield if str(p.get('id')) == str(target_post_id)), None)
|
||||
if matching_post:
|
||||
logger(f"🎯 Found target post {target_post_id} on page {current_page_num} (offset {current_offset}).")
|
||||
yield [matching_post]
|
||||
processed_target_post_flag = True
|
||||
elif not target_post_id:
|
||||
yield posts_batch
|
||||
# Downloading a creator feed
|
||||
if posts_batch_to_yield:
|
||||
# We found new posts on this page, yield them
|
||||
yield posts_batch_to_yield
|
||||
elif original_count > 0:
|
||||
# We found 0 new posts, but the page *did* have posts (they were just skipped).
|
||||
# Log this and continue to the next page.
|
||||
logger(f" No new posts found on page {current_page_num}. Checking next page...")
|
||||
# If original_count was 0, the `if not raw_posts_batch:` check
|
||||
# already caught it and broke the loop.
|
||||
|
||||
if processed_target_post_flag:
|
||||
break
|
||||
|
||||
current_offset += page_size
|
||||
current_page_num += 1
|
||||
time.sleep(0.6)
|
||||
# --- END OF MODIFIED BLOCK ---
|
||||
|
||||
if target_post_id and not processed_target_post_flag and not (cancellation_event and cancellation_event.is_set()):
|
||||
logger(f"❌ Target post {target_post_id} could not be found after checking all relevant pages (final check after loop).")
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# src/core/booru_client.py
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
@@ -164,17 +164,34 @@ class BunkrAlbumExtractor(Extractor):
|
||||
def _extract_file(self, webpage_url):
|
||||
page = self.request(webpage_url).text
|
||||
data_id = extr(page, 'data-file-id="', '"')
|
||||
referer = self.root_dl + "/file/" + data_id
|
||||
headers = {"Referer": referer, "Origin": self.root_dl}
|
||||
|
||||
# This referer is for the API call only
|
||||
api_referer = self.root_dl + "/file/" + data_id
|
||||
headers = {"Referer": api_referer, "Origin": self.root_dl}
|
||||
data = self.request_json(self.endpoint, method="POST", headers=headers, json={"id": data_id})
|
||||
|
||||
# Get the raw file URL (no domain replacement)
|
||||
file_url = decrypt_xor(data["url"], f"SECRET_KEY_{data['timestamp'] // 3600}".encode()) if data.get("encrypted") else data["url"]
|
||||
|
||||
file_name = extr(page, "<h1", "<").rpartition(">")[2]
|
||||
|
||||
# --- NEW FIX ---
|
||||
# The download thread uses a new `requests` call, so we must
|
||||
# explicitly pass BOTH the User-Agent and the correct Referer.
|
||||
|
||||
# 1. Get the User-Agent from this extractor's session
|
||||
user_agent = self.session.headers.get("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0")
|
||||
|
||||
# 2. Use the original album URL as the Referer
|
||||
download_referer = self.url
|
||||
|
||||
return {
|
||||
"url": file_url,
|
||||
"name": unescape(file_name),
|
||||
"_http_headers": {"Referer": referer}
|
||||
"_http_headers": {
|
||||
"Referer": download_referer,
|
||||
"User-Agent": user_agent
|
||||
}
|
||||
}
|
||||
|
||||
class BunkrMediaExtractor(BunkrAlbumExtractor):
|
||||
|
||||
@@ -69,15 +69,28 @@ def fetch_fap_nation_data(album_url, logger_func):
|
||||
|
||||
if direct_links_found:
|
||||
logger_func(f" [Fap-Nation] Found {len(direct_links_found)} direct media link(s). Selecting the best quality...")
|
||||
best_link = direct_links_found[0]
|
||||
for link in direct_links_found:
|
||||
if '1080p' in link.lower():
|
||||
best_link = link
|
||||
break
|
||||
best_link = None
|
||||
# Define qualities from highest to lowest
|
||||
qualities_to_check = ['1080p', '720p', '480p', '360p']
|
||||
|
||||
# Find the best quality link by iterating through preferred qualities
|
||||
for quality in qualities_to_check:
|
||||
for link in direct_links_found:
|
||||
if quality in link.lower():
|
||||
best_link = link
|
||||
logger_func(f" [Fap-Nation] Found '{quality}' link: {best_link}")
|
||||
break # Found the best link for this quality level
|
||||
if best_link:
|
||||
break # Found the highest quality available
|
||||
|
||||
# Fallback if no quality string was found in any link
|
||||
if not best_link:
|
||||
best_link = direct_links_found[0]
|
||||
logger_func(f" [Fap-Nation] ⚠️ No quality tags (1080p, 720p, etc.) found in links. Defaulting to first link: {best_link}")
|
||||
|
||||
final_url = best_link
|
||||
link_type = 'direct'
|
||||
logger_func(f" [Fap-Nation] Identified direct media link: {final_url}")
|
||||
|
||||
# If after all checks, we still have no URL, then fail.
|
||||
if not final_url:
|
||||
logger_func(" [Fap-Nation] ❌ Stage 1 Failed: Could not find any HLS stream or direct link.")
|
||||
|
||||
107
src/core/rule34video_client.py
Normal file
107
src/core/rule34video_client.py
Normal file
@@ -0,0 +1,107 @@
|
||||
import cloudscraper
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import html
|
||||
|
||||
def fetch_rule34video_data(video_url, logger_func):
|
||||
"""
|
||||
Scrapes a rule34video.com page by specifically finding the 'Download' div,
|
||||
then selecting the best available quality link.
|
||||
|
||||
Args:
|
||||
video_url (str): The full URL to the rule34video.com page.
|
||||
logger_func (callable): Function to use for logging progress.
|
||||
|
||||
Returns:
|
||||
tuple: (video_title, final_video_url) or (None, None) on failure.
|
||||
"""
|
||||
logger_func(f" [Rule34Video] Fetching page: {video_url}")
|
||||
scraper = cloudscraper.create_scraper()
|
||||
|
||||
try:
|
||||
main_page_response = scraper.get(video_url, timeout=20)
|
||||
main_page_response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(main_page_response.text, 'html.parser')
|
||||
|
||||
page_title_tag = soup.find('title')
|
||||
video_title = page_title_tag.text.strip() if page_title_tag else "rule34video_file"
|
||||
|
||||
# --- START OF FINAL FIX ---
|
||||
# 1. Find the SPECIFIC "Download" label first. This is the key.
|
||||
download_label = soup.find('div', class_='label', string='Download')
|
||||
|
||||
if not download_label:
|
||||
logger_func(" [Rule34Video] ❌ Could not find the 'Download' label. Unable to locate the correct links div.")
|
||||
return None, None
|
||||
|
||||
# 2. The correct container is the parent of this label.
|
||||
download_div = download_label.parent
|
||||
|
||||
# 3. Now, find the links ONLY within this correct container.
|
||||
link_tags = download_div.find_all('a', class_='tag_item')
|
||||
if not link_tags:
|
||||
logger_func(" [Rule34Video] ❌ Found the 'Download' div, but no download links were inside it.")
|
||||
return None, None
|
||||
# --- END OF FINAL FIX ---
|
||||
|
||||
links_by_quality = {}
|
||||
quality_pattern = re.compile(r'(\d+p|4k)')
|
||||
|
||||
for tag in link_tags:
|
||||
href = tag.get('href')
|
||||
if not href:
|
||||
continue
|
||||
|
||||
quality = None
|
||||
text_match = quality_pattern.search(tag.text)
|
||||
if text_match:
|
||||
quality = text_match.group(1)
|
||||
else:
|
||||
href_match = quality_pattern.search(href)
|
||||
if href_match:
|
||||
quality = href_match.group(1)
|
||||
|
||||
if quality:
|
||||
links_by_quality[quality] = href
|
||||
|
||||
if not links_by_quality:
|
||||
logger_func(" [Rule34Video] ⚠️ Could not parse specific qualities. Using first available link as a fallback.")
|
||||
final_video_url = link_tags[0].get('href')
|
||||
if not final_video_url:
|
||||
logger_func(" [Rule34Video] ❌ Fallback failed: First link tag had no href attribute.")
|
||||
return None, None
|
||||
|
||||
final_video_url = html.unescape(final_video_url)
|
||||
logger_func(f" [Rule34Video] ✅ Selected first available link as fallback: {final_video_url}")
|
||||
return video_title, final_video_url
|
||||
|
||||
logger_func(f" [Rule34Video] Found available qualities: {list(links_by_quality.keys())}")
|
||||
|
||||
final_video_url = None
|
||||
if '1080p' in links_by_quality:
|
||||
final_video_url = links_by_quality['1080p']
|
||||
logger_func(" [Rule34Video] ✅ Selected preferred 1080p link.")
|
||||
elif '720p' in links_by_quality:
|
||||
final_video_url = links_by_quality['720p']
|
||||
logger_func(" [Rule34Video] ✅ 1080p not found. Selected fallback 720p link.")
|
||||
else:
|
||||
fallback_order = ['480p', '360p']
|
||||
for quality in fallback_order:
|
||||
if quality in links_by_quality:
|
||||
final_video_url = links_by_quality[quality]
|
||||
logger_func(f" [Rule34Video] ⚠️ 1080p/720p not found. Selected best available fallback: {quality}")
|
||||
break
|
||||
|
||||
if not final_video_url:
|
||||
logger_func(" [Rule34Video] ❌ Could not find a suitable download link.")
|
||||
return None, None
|
||||
|
||||
final_video_url = html.unescape(final_video_url)
|
||||
logger_func(f" [Rule34Video] ✅ Selected direct download URL: {final_video_url}")
|
||||
|
||||
return video_title, final_video_url
|
||||
|
||||
except Exception as e:
|
||||
logger_func(f" [Rule34Video] ❌ An error occurred: {e}")
|
||||
return None, None
|
||||
@@ -17,8 +17,10 @@ def fetch_single_simpcity_page(url, logger_func, cookies=None, post_id=None):
|
||||
|
||||
try:
|
||||
response = scraper.get(url, timeout=30, headers=headers, cookies=cookies)
|
||||
final_url = response.url # Capture the final URL after any redirects
|
||||
|
||||
if response.status_code == 404:
|
||||
return None, []
|
||||
return None, [], final_url
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
@@ -91,9 +93,9 @@ def fetch_single_simpcity_page(url, logger_func, cookies=None, post_id=None):
|
||||
# We use a set to remove duplicate URLs that might be found in multiple ways
|
||||
unique_jobs = list({job['url']: job for job in jobs_on_page}.values())
|
||||
logger_func(f" [SimpCity] Scraper found jobs: {[job['type'] for job in unique_jobs]}")
|
||||
return album_title, unique_jobs
|
||||
return album_title, unique_jobs, final_url
|
||||
|
||||
return album_title, []
|
||||
return album_title, [], final_url
|
||||
|
||||
except Exception as e:
|
||||
logger_func(f" [SimpCity] ❌ Error fetching page {url}: {e}")
|
||||
|
||||
@@ -52,7 +52,7 @@ from ..utils.file_utils import (
|
||||
from ..utils.network_utils import prepare_cookies_for_request, get_link_platform
|
||||
from ..utils.text_utils import (
|
||||
is_title_match_for_character, is_filename_match_for_character, strip_html_tags,
|
||||
extract_folder_name_from_title, # This was the function causing the error
|
||||
extract_folder_name_from_title,
|
||||
match_folders_from_title, match_folders_from_filename_enhanced
|
||||
)
|
||||
from ..config.constants import *
|
||||
@@ -1810,6 +1810,31 @@ class PostProcessorWorker:
|
||||
|
||||
if not all_files_from_post_api:
|
||||
self.logger(f" No files found to download for post {post_id}.")
|
||||
if not self.extract_links_only and should_create_post_subfolder:
|
||||
path_to_check_for_emptiness = determined_post_save_path_for_history
|
||||
try:
|
||||
if os.path.isdir(path_to_check_for_emptiness):
|
||||
dir_contents = os.listdir(path_to_check_for_emptiness)
|
||||
# Check if the directory is empty OR only contains our ID file
|
||||
is_effectively_empty = True
|
||||
if dir_contents:
|
||||
if not all(f.startswith('.postid_') for f in dir_contents):
|
||||
is_effectively_empty = False
|
||||
|
||||
if is_effectively_empty:
|
||||
self.logger(f" 🗑️ Removing empty post-specific subfolder (post had no files): '{path_to_check_for_emptiness}'")
|
||||
if dir_contents:
|
||||
for id_file in dir_contents:
|
||||
if id_file.startswith('.postid_'):
|
||||
try:
|
||||
os.remove(os.path.join(path_to_check_for_emptiness, id_file))
|
||||
except OSError as e_rm_id:
|
||||
self.logger(f" ⚠️ Could not remove ID file '{id_file}' during cleanup: {e_rm_id}")
|
||||
os.rmdir(path_to_check_for_emptiness)
|
||||
except OSError as e_rmdir:
|
||||
self.logger(f" ⚠️ Could not remove effectively empty subfolder (no files) '{path_to_check_for_emptiness}': {e_rmdir}")
|
||||
# --- END NEW CLEANUP LOGIC ---
|
||||
|
||||
history_data_for_no_files_post = {
|
||||
'post_title': post_title,
|
||||
'post_id': post_id,
|
||||
@@ -1823,7 +1848,7 @@ class PostProcessorWorker:
|
||||
result_tuple = (0, 0, [], [], [], history_data_for_no_files_post, None)
|
||||
self._emit_signal('worker_finished', result_tuple)
|
||||
return result_tuple
|
||||
|
||||
|
||||
files_to_download_info_list = []
|
||||
processed_original_filenames_in_this_post = set()
|
||||
if self.keep_in_post_duplicates:
|
||||
@@ -2052,9 +2077,27 @@ class PostProcessorWorker:
|
||||
if not self.extract_links_only and self.use_post_subfolders and total_downloaded_this_post == 0:
|
||||
path_to_check_for_emptiness = determined_post_save_path_for_history
|
||||
try:
|
||||
if os.path.isdir(path_to_check_for_emptiness) and not os.listdir(path_to_check_for_emptiness):
|
||||
self.logger(f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness}'")
|
||||
os.rmdir(path_to_check_for_emptiness)
|
||||
if os.path.isdir(path_to_check_for_emptiness):
|
||||
dir_contents = os.listdir(path_to_check_for_emptiness)
|
||||
# Check if the directory is empty OR only contains our ID file
|
||||
is_effectively_empty = True
|
||||
if dir_contents:
|
||||
# If there are files, check if ALL of them are .postid files
|
||||
if not all(f.startswith('.postid_') for f in dir_contents):
|
||||
is_effectively_empty = False
|
||||
|
||||
if is_effectively_empty:
|
||||
self.logger(f" 🗑️ Removing empty post-specific subfolder (no files downloaded): '{path_to_check_for_emptiness}'")
|
||||
# We must first remove the ID file(s) before removing the dir
|
||||
if dir_contents:
|
||||
for id_file in dir_contents:
|
||||
if id_file.startswith('.postid_'):
|
||||
try:
|
||||
os.remove(os.path.join(path_to_check_for_emptiness, id_file))
|
||||
except OSError as e_rm_id:
|
||||
self.logger(f" ⚠️ Could not remove ID file '{id_file}' during cleanup: {e_rm_id}")
|
||||
|
||||
os.rmdir(path_to_check_for_emptiness) # Now the rmdir should work
|
||||
except OSError as e_rmdir:
|
||||
self.logger(f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness}': {e_rmdir}")
|
||||
|
||||
@@ -2066,11 +2109,29 @@ class PostProcessorWorker:
|
||||
if not self.extract_links_only and self.use_post_subfolders and total_downloaded_this_post == 0:
|
||||
path_to_check_for_emptiness = determined_post_save_path_for_history
|
||||
try:
|
||||
if os.path.isdir(path_to_check_for_emptiness) and not os.listdir(path_to_check_for_emptiness):
|
||||
self.logger(f" 🗑️ Removing empty post-specific subfolder: '{path_to_check_for_emptiness}'")
|
||||
os.rmdir(path_to_check_for_emptiness)
|
||||
if os.path.isdir(path_to_check_for_emptiness):
|
||||
dir_contents = os.listdir(path_to_check_for_emptiness)
|
||||
# Check if the directory is empty OR only contains our ID file
|
||||
is_effectively_empty = True
|
||||
if dir_contents:
|
||||
# If there are files, check if ALL of them are .postid files
|
||||
if not all(f.startswith('.postid_') for f in dir_contents):
|
||||
is_effectively_empty = False
|
||||
|
||||
if is_effectively_empty:
|
||||
self.logger(f" 🗑️ Removing empty post-specific subfolder (no files downloaded): '{path_to_check_for_emptiness}'")
|
||||
# We must first remove the ID file(s) before removing the dir
|
||||
if dir_contents:
|
||||
for id_file in dir_contents:
|
||||
if id_file.startswith('.postid_'):
|
||||
try:
|
||||
os.remove(os.path.join(path_to_check_for_emptiness, id_file))
|
||||
except OSError as e_rm_id:
|
||||
self.logger(f" ⚠️ Could not remove ID file '{id_file}' during cleanup: {e_rm_id}")
|
||||
|
||||
os.rmdir(path_to_check_for_emptiness) # Now the rmdir should work
|
||||
except OSError as e_rmdir:
|
||||
self.logger(f" ⚠️ Could not remove potentially empty subfolder '{path_to_check_for_emptiness}': {e_rmdir}")
|
||||
self.logger(f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness}': {e_rmdir}")
|
||||
|
||||
self._emit_signal('worker_finished', result_tuple)
|
||||
return result_tuple
|
||||
|
||||
137
src/ui/classes/allcomic_downloader_thread.py
Normal file
137
src/ui/classes/allcomic_downloader_thread.py
Normal file
@@ -0,0 +1,137 @@
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import cloudscraper
|
||||
import requests
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
|
||||
from ...core.allcomic_client import (fetch_chapter_data as allcomic_fetch_data,
|
||||
get_chapter_list as allcomic_get_list)
|
||||
from ...utils.file_utils import clean_folder_name
|
||||
|
||||
|
||||
class AllcomicDownloadThread(QThread):
|
||||
"""A dedicated QThread for handling allcomic.com downloads."""
|
||||
progress_signal = pyqtSignal(str)
|
||||
file_progress_signal = pyqtSignal(str, object)
|
||||
finished_signal = pyqtSignal(int, int, bool)
|
||||
overall_progress_signal = pyqtSignal(int, int)
|
||||
|
||||
def __init__(self, url, output_dir, parent=None):
|
||||
super().__init__(parent)
|
||||
self.comic_url = url
|
||||
self.output_dir = output_dir
|
||||
self.is_cancelled = False
|
||||
self.pause_event = parent.pause_event if hasattr(parent, 'pause_event') else threading.Event()
|
||||
|
||||
def _check_pause(self):
|
||||
if self.is_cancelled: return True
|
||||
if self.pause_event and self.pause_event.is_set():
|
||||
self.progress_signal.emit(" Download paused...")
|
||||
while self.pause_event.is_set():
|
||||
if self.is_cancelled: return True
|
||||
time.sleep(0.5)
|
||||
self.progress_signal.emit(" Download resumed.")
|
||||
return self.is_cancelled
|
||||
|
||||
def run(self):
|
||||
grand_total_dl = 0
|
||||
grand_total_skip = 0
|
||||
|
||||
# Create the scraper session ONCE for the entire job
|
||||
scraper = cloudscraper.create_scraper(
|
||||
browser={'browser': 'firefox', 'platform': 'windows', 'desktop': True}
|
||||
)
|
||||
|
||||
# Pass the scraper to the function
|
||||
chapters_to_download = allcomic_get_list(scraper, self.comic_url, self.progress_signal.emit)
|
||||
|
||||
if not chapters_to_download:
|
||||
chapters_to_download = [self.comic_url]
|
||||
|
||||
self.progress_signal.emit(f"--- Starting download of {len(chapters_to_download)} chapter(s) ---")
|
||||
|
||||
for chapter_idx, chapter_url in enumerate(chapters_to_download):
|
||||
if self._check_pause(): break
|
||||
|
||||
self.progress_signal.emit(f"\n-- Processing Chapter {chapter_idx + 1}/{len(chapters_to_download)} --")
|
||||
# Pass the scraper to the function
|
||||
comic_title, chapter_title, image_urls = allcomic_fetch_data(scraper, chapter_url, self.progress_signal.emit)
|
||||
|
||||
if not image_urls:
|
||||
self.progress_signal.emit(f"❌ Failed to get data for chapter. Skipping.")
|
||||
continue
|
||||
|
||||
series_folder_name = clean_folder_name(comic_title)
|
||||
chapter_folder_name = clean_folder_name(chapter_title)
|
||||
final_save_path = os.path.join(self.output_dir, series_folder_name, chapter_folder_name)
|
||||
|
||||
try:
|
||||
os.makedirs(final_save_path, exist_ok=True)
|
||||
self.progress_signal.emit(f" Saving to folder: '{os.path.join(series_folder_name, chapter_folder_name)}'")
|
||||
except OSError as e:
|
||||
self.progress_signal.emit(f"❌ Critical error creating directory: {e}")
|
||||
grand_total_skip += len(image_urls)
|
||||
continue
|
||||
|
||||
total_files_in_chapter = len(image_urls)
|
||||
self.overall_progress_signal.emit(total_files_in_chapter, 0)
|
||||
headers = {'Referer': chapter_url}
|
||||
|
||||
for i, img_url in enumerate(image_urls):
|
||||
if self._check_pause(): break
|
||||
|
||||
file_extension = os.path.splitext(urlparse(img_url).path)[1] or '.jpg'
|
||||
filename = f"{i+1:03d}{file_extension}"
|
||||
filepath = os.path.join(final_save_path, filename)
|
||||
|
||||
if os.path.exists(filepath):
|
||||
self.progress_signal.emit(f" -> Skip ({i+1}/{total_files_in_chapter}): '{filename}' already exists.")
|
||||
grand_total_skip += 1
|
||||
else:
|
||||
download_successful = False
|
||||
max_retries = 8
|
||||
for attempt in range(max_retries):
|
||||
if self._check_pause(): break
|
||||
try:
|
||||
self.progress_signal.emit(f" Downloading ({i+1}/{total_files_in_chapter}): '{filename}' (Attempt {attempt + 1})...")
|
||||
# Use the persistent scraper object
|
||||
response = scraper.get(img_url, stream=True, headers=headers, timeout=60)
|
||||
response.raise_for_status()
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if self._check_pause(): break
|
||||
f.write(chunk)
|
||||
|
||||
if self._check_pause():
|
||||
if os.path.exists(filepath): os.remove(filepath)
|
||||
break
|
||||
|
||||
download_successful = True
|
||||
grand_total_dl += 1
|
||||
break
|
||||
|
||||
except requests.RequestException as e:
|
||||
self.progress_signal.emit(f" ⚠️ Attempt {attempt + 1} failed for '{filename}': {e}")
|
||||
if attempt < max_retries - 1:
|
||||
wait_time = 2 * (attempt + 1)
|
||||
self.progress_signal.emit(f" Retrying in {wait_time} seconds...")
|
||||
time.sleep(wait_time)
|
||||
else:
|
||||
self.progress_signal.emit(f" ❌ All attempts failed for '{filename}'. Skipping.")
|
||||
grand_total_skip += 1
|
||||
|
||||
self.overall_progress_signal.emit(total_files_in_chapter, i + 1)
|
||||
time.sleep(0.5) # Increased delay between images for this site
|
||||
|
||||
if self._check_pause(): break
|
||||
|
||||
self.file_progress_signal.emit("", None)
|
||||
self.finished_signal.emit(grand_total_dl, grand_total_skip, self.is_cancelled)
|
||||
|
||||
def cancel(self):
|
||||
self.is_cancelled = True
|
||||
self.progress_signal.emit(" Cancellation signal received by AllComic thread.")
|
||||
133
src/ui/classes/booru_downloader_thread.py
Normal file
133
src/ui/classes/booru_downloader_thread.py
Normal file
@@ -0,0 +1,133 @@
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
import datetime
|
||||
import requests
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
|
||||
from ...core.booru_client import fetch_booru_data, BooruClientException
|
||||
from ...utils.file_utils import clean_folder_name
|
||||
|
||||
_ff_ver = (datetime.date.today().toordinal() - 735506) // 28
|
||||
USERAGENT_FIREFOX = (f"Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
|
||||
f"rv:{_ff_ver}.0) Gecko/20100101 Firefox/{_ff_ver}.0")
|
||||
|
||||
class BooruDownloadThread(QThread):
|
||||
"""A dedicated QThread for handling Danbooru and Gelbooru downloads."""
|
||||
progress_signal = pyqtSignal(str)
|
||||
overall_progress_signal = pyqtSignal(int, int)
|
||||
finished_signal = pyqtSignal(int, int, bool) # dl_count, skip_count, cancelled
|
||||
|
||||
def __init__(self, url, output_dir, api_key, user_id, parent=None):
|
||||
super().__init__(parent)
|
||||
self.booru_url = url
|
||||
self.output_dir = output_dir
|
||||
self.api_key = api_key
|
||||
self.user_id = user_id
|
||||
self.is_cancelled = False
|
||||
self.pause_event = parent.pause_event if hasattr(parent, 'pause_event') else threading.Event()
|
||||
|
||||
def run(self):
|
||||
download_count = 0
|
||||
skip_count = 0
|
||||
processed_count = 0
|
||||
cumulative_total = 0
|
||||
|
||||
def logger(msg):
|
||||
self.progress_signal.emit(str(msg))
|
||||
|
||||
try:
|
||||
self.progress_signal.emit("=" * 40)
|
||||
self.progress_signal.emit(f"🚀 Starting Booru Download for: {self.booru_url}")
|
||||
|
||||
item_generator = fetch_booru_data(self.booru_url, self.api_key, self.user_id, logger)
|
||||
|
||||
download_path = self.output_dir # Default path
|
||||
path_initialized = False
|
||||
|
||||
session = requests.Session()
|
||||
session.headers["User-Agent"] = USERAGENT_FIREFOX
|
||||
|
||||
for item in item_generator:
|
||||
if self.is_cancelled:
|
||||
break
|
||||
|
||||
if isinstance(item, tuple) and item[0] == 'PAGE_UPDATE':
|
||||
newly_found = item[1]
|
||||
cumulative_total += newly_found
|
||||
self.progress_signal.emit(f" Found {newly_found} more posts. Total so far: {cumulative_total}")
|
||||
self.overall_progress_signal.emit(cumulative_total, processed_count)
|
||||
continue
|
||||
|
||||
post_data = item
|
||||
processed_count += 1
|
||||
|
||||
if not path_initialized:
|
||||
base_folder_name = post_data.get('search_tags', 'booru_download')
|
||||
download_path = os.path.join(self.output_dir, clean_folder_name(base_folder_name))
|
||||
os.makedirs(download_path, exist_ok=True)
|
||||
path_initialized = True
|
||||
|
||||
if self.pause_event.is_set():
|
||||
self.progress_signal.emit(" Download paused...")
|
||||
while self.pause_event.is_set():
|
||||
if self.is_cancelled: break
|
||||
time.sleep(0.5)
|
||||
if self.is_cancelled: break
|
||||
self.progress_signal.emit(" Download resumed.")
|
||||
|
||||
file_url = post_data.get('file_url')
|
||||
if not file_url:
|
||||
skip_count += 1
|
||||
self.progress_signal.emit(f" -> Skip ({processed_count}/{cumulative_total}): Post ID {post_data.get('id')} has no file URL.")
|
||||
continue
|
||||
|
||||
cat = post_data.get('category', 'booru')
|
||||
post_id = post_data.get('id', 'unknown')
|
||||
md5 = post_data.get('md5', '')
|
||||
fname = post_data.get('filename', f"file_{post_id}")
|
||||
ext = post_data.get('extension', 'jpg')
|
||||
|
||||
final_filename = f"{cat}_{post_id}_{md5 or fname}.{ext}"
|
||||
filepath = os.path.join(download_path, final_filename)
|
||||
|
||||
if os.path.exists(filepath):
|
||||
self.progress_signal.emit(f" -> Skip ({processed_count}/{cumulative_total}): '{final_filename}' already exists.")
|
||||
skip_count += 1
|
||||
else:
|
||||
try:
|
||||
self.progress_signal.emit(f" Downloading ({processed_count}/{cumulative_total}): '{final_filename}'...")
|
||||
response = session.get(file_url, stream=True, timeout=60)
|
||||
response.raise_for_status()
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if self.is_cancelled: break
|
||||
f.write(chunk)
|
||||
|
||||
if not self.is_cancelled:
|
||||
download_count += 1
|
||||
else:
|
||||
if os.path.exists(filepath): os.remove(filepath)
|
||||
skip_count += 1
|
||||
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f" ❌ Failed to download '{final_filename}': {e}")
|
||||
skip_count += 1
|
||||
|
||||
self.overall_progress_signal.emit(cumulative_total, processed_count)
|
||||
time.sleep(0.2)
|
||||
|
||||
if not path_initialized:
|
||||
self.progress_signal.emit("No posts found for the given URL/tags.")
|
||||
|
||||
except BooruClientException as e:
|
||||
self.progress_signal.emit(f"❌ A Booru client error occurred: {e}")
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f"❌ An unexpected error occurred in Booru thread: {e}")
|
||||
finally:
|
||||
self.finished_signal.emit(download_count, skip_count, self.is_cancelled)
|
||||
|
||||
def cancel(self):
|
||||
self.is_cancelled = True
|
||||
self.progress_signal.emit(" Cancellation signal received by Booru thread.")
|
||||
195
src/ui/classes/bunkr_downloader_thread.py
Normal file
195
src/ui/classes/bunkr_downloader_thread.py
Normal file
@@ -0,0 +1,195 @@
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import requests
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
|
||||
from ...core.bunkr_client import fetch_bunkr_data
|
||||
|
||||
# Define image extensions
|
||||
IMG_EXTS = ('.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.avif')
|
||||
BUNKR_IMG_THREADS = 6 # Hardcoded thread count for images
|
||||
|
||||
class BunkrDownloadThread(QThread):
|
||||
"""A dedicated QThread for handling Bunkr downloads."""
|
||||
progress_signal = pyqtSignal(str)
|
||||
file_progress_signal = pyqtSignal(str, object)
|
||||
finished_signal = pyqtSignal(int, int, bool, list)
|
||||
|
||||
def __init__(self, url, output_dir, parent=None):
|
||||
super().__init__(parent)
|
||||
self.bunkr_url = url
|
||||
self.output_dir = output_dir
|
||||
self.is_cancelled = False
|
||||
|
||||
# --- NEW: Threading members ---
|
||||
self.lock = threading.Lock()
|
||||
self.download_count = 0
|
||||
self.skip_count = 0
|
||||
self.file_index = 0 # Use a shared index for logging
|
||||
|
||||
class ThreadLogger:
|
||||
def __init__(self, signal_emitter):
|
||||
self.signal_emitter = signal_emitter
|
||||
def info(self, msg, *args, **kwargs):
|
||||
self.signal_emitter.emit(str(msg))
|
||||
def error(self, msg, *args, **kwargs):
|
||||
self.signal_emitter.emit(f"❌ ERROR: {msg}")
|
||||
def warning(self, msg, *args, **kwargs):
|
||||
self.signal_emitter.emit(f"⚠️ WARNING: {msg}")
|
||||
def debug(self, msg, *args, **kwargs):
|
||||
pass
|
||||
|
||||
self.logger = ThreadLogger(self.progress_signal)
|
||||
|
||||
def _download_file(self, file_data, total_files, album_path, is_image_task=False):
|
||||
"""
|
||||
A thread-safe method to download a single file.
|
||||
This function will be called by the main thread (for videos)
|
||||
and worker threads (for images).
|
||||
"""
|
||||
|
||||
# Stop if a cancellation signal was received before starting
|
||||
if self.is_cancelled:
|
||||
return
|
||||
|
||||
# --- Thread-safe index for logging ---
|
||||
with self.lock:
|
||||
self.file_index += 1
|
||||
current_file_num = self.file_index
|
||||
|
||||
try:
|
||||
filename = file_data.get('name', 'untitled_file')
|
||||
file_url = file_data.get('url')
|
||||
headers = file_data.get('_http_headers')
|
||||
|
||||
filename = re.sub(r'[<>:"/\\|?*]', '_', filename).strip()
|
||||
filepath = os.path.join(album_path, filename)
|
||||
|
||||
if os.path.exists(filepath):
|
||||
self.progress_signal.emit(f" -> Skip ({current_file_num}/{total_files}): '{filename}' already exists.")
|
||||
with self.lock:
|
||||
self.skip_count += 1
|
||||
return
|
||||
|
||||
self.progress_signal.emit(f" Downloading ({current_file_num}/{total_files}): '{filename}'...")
|
||||
|
||||
response = requests.get(file_url, stream=True, headers=headers, timeout=60)
|
||||
response.raise_for_status()
|
||||
|
||||
total_size = int(response.headers.get('content-length', 0))
|
||||
downloaded_size = 0
|
||||
last_update_time = time.time()
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if self.is_cancelled:
|
||||
break
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
downloaded_size += len(chunk)
|
||||
|
||||
# For videos/other files, send frequent progress
|
||||
# For images, don't send progress to avoid UI flicker
|
||||
if not is_image_task:
|
||||
current_time = time.time()
|
||||
if total_size > 0 and (current_time - last_update_time) > 0.5:
|
||||
self.file_progress_signal.emit(filename, (downloaded_size, total_size))
|
||||
last_update_time = current_time
|
||||
|
||||
if self.is_cancelled:
|
||||
self.progress_signal.emit(f" Download cancelled for '{filename}'.")
|
||||
if os.path.exists(filepath): os.remove(filepath)
|
||||
return
|
||||
|
||||
if total_size > 0:
|
||||
self.file_progress_signal.emit(filename, (total_size, total_size))
|
||||
|
||||
with self.lock:
|
||||
self.download_count += 1
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
self.progress_signal.emit(f" ❌ Failed to download '{filename}'. Error: {e}")
|
||||
if os.path.exists(filepath): os.remove(filepath)
|
||||
with self.lock:
|
||||
self.skip_count += 1
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f" ❌ An unexpected error occurred with '{filename}': {e}")
|
||||
if os.path.exists(filepath): os.remove(filepath)
|
||||
with self.lock:
|
||||
self.skip_count += 1
|
||||
|
||||
def run(self):
|
||||
self.progress_signal.emit("=" * 40)
|
||||
self.progress_signal.emit(f"🚀 Starting Bunkr Download for: {self.bunkr_url}")
|
||||
|
||||
album_name, files_to_download = fetch_bunkr_data(self.bunkr_url, self.logger)
|
||||
|
||||
if not files_to_download:
|
||||
self.progress_signal.emit("❌ Failed to extract file information from Bunkr. Aborting.")
|
||||
self.finished_signal.emit(0, 0, self.is_cancelled, [])
|
||||
return
|
||||
|
||||
album_path = os.path.join(self.output_dir, album_name)
|
||||
try:
|
||||
os.makedirs(album_path, exist_ok=True)
|
||||
self.progress_signal.emit(f" Saving to folder: '{album_path}'")
|
||||
except OSError as e:
|
||||
self.progress_signal.emit(f"❌ Critical error creating directory: {e}")
|
||||
self.finished_signal.emit(0, len(files_to_download), self.is_cancelled, [])
|
||||
return
|
||||
|
||||
total_files = len(files_to_download)
|
||||
|
||||
# --- NEW: Separate files into images and others ---
|
||||
image_files = []
|
||||
other_files = []
|
||||
for f in files_to_download:
|
||||
name = f.get('name', '').lower()
|
||||
if name.endswith(IMG_EXTS):
|
||||
image_files.append(f)
|
||||
else:
|
||||
other_files.append(f)
|
||||
|
||||
self.progress_signal.emit(f" Found {len(image_files)} images and {len(other_files)} other files.")
|
||||
|
||||
# --- 1. Process videos and other files sequentially (one by one) ---
|
||||
if other_files:
|
||||
self.progress_signal.emit(f" Downloading {len(other_files)} videos/other files sequentially...")
|
||||
for file_data in other_files:
|
||||
if self.is_cancelled:
|
||||
break
|
||||
# Call the new download helper method
|
||||
self._download_file(file_data, total_files, album_path, is_image_task=False)
|
||||
|
||||
# --- 2. Process images concurrently using a fixed 6-thread pool ---
|
||||
if image_files and not self.is_cancelled:
|
||||
self.progress_signal.emit(f" Downloading {len(image_files)} images concurrently ({BUNKR_IMG_THREADS} threads)...")
|
||||
with ThreadPoolExecutor(max_workers=BUNKR_IMG_THREADS, thread_name_prefix='BunkrImg') as executor:
|
||||
|
||||
# Submit all image download tasks
|
||||
futures = {executor.submit(self._download_file, file_data, total_files, album_path, is_image_task=True): file_data for file_data in image_files}
|
||||
|
||||
try:
|
||||
# Wait for tasks to complete, but check for cancellation
|
||||
for future in futures:
|
||||
if self.is_cancelled:
|
||||
future.cancel() # Try to cancel running/pending tasks
|
||||
else:
|
||||
future.result() # Wait for the task to finish (or raise exception)
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f" ❌ A thread pool error occurred: {e}")
|
||||
|
||||
if self.is_cancelled:
|
||||
self.progress_signal.emit(" Download cancelled by user.")
|
||||
# Update skip count to reflect all non-downloaded files
|
||||
self.skip_count = total_files - self.download_count
|
||||
|
||||
self.file_progress_signal.emit("", None) # Clear file progress
|
||||
self.finished_signal.emit(self.download_count, self.skip_count, self.is_cancelled, [])
|
||||
|
||||
def cancel(self):
|
||||
self.is_cancelled = True
|
||||
self.progress_signal.emit(" Cancellation signal received by Bunkr thread.")
|
||||
189
src/ui/classes/discord_downloader_thread.py
Normal file
189
src/ui/classes/discord_downloader_thread.py
Normal file
@@ -0,0 +1,189 @@
|
||||
import os
|
||||
import time
|
||||
import datetime
|
||||
import requests
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
|
||||
# Assuming discord_pdf_generator is in the dialogs folder, sibling to the classes folder
|
||||
from ..dialogs.discord_pdf_generator import create_pdf_from_discord_messages
|
||||
|
||||
# This constant is needed for the thread to function independently
|
||||
_ff_ver = (datetime.date.today().toordinal() - 735506) // 28
|
||||
USERAGENT_FIREFOX = (f"Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
|
||||
f"rv:{_ff_ver}.0) Gecko/20100101 Firefox/{_ff_ver}.0")
|
||||
|
||||
class DiscordDownloadThread(QThread):
|
||||
"""A dedicated QThread for handling all official Discord downloads."""
|
||||
progress_signal = pyqtSignal(str)
|
||||
progress_label_signal = pyqtSignal(str)
|
||||
finished_signal = pyqtSignal(int, int, bool, list)
|
||||
|
||||
def __init__(self, mode, session, token, output_dir, server_id, channel_id, url, app_base_dir, limit=None, parent=None):
|
||||
super().__init__(parent)
|
||||
self.mode = mode
|
||||
self.session = session
|
||||
self.token = token
|
||||
self.output_dir = output_dir
|
||||
self.server_id = server_id
|
||||
self.channel_id = channel_id
|
||||
self.api_url = url
|
||||
self.message_limit = limit
|
||||
self.app_base_dir = app_base_dir # Path to app's base directory
|
||||
|
||||
self.is_cancelled = False
|
||||
self.is_paused = False
|
||||
|
||||
def run(self):
|
||||
if self.mode == 'pdf':
|
||||
self._run_pdf_creation()
|
||||
else:
|
||||
self._run_file_download()
|
||||
|
||||
def cancel(self):
|
||||
self.progress_signal.emit(" Cancellation signal received by Discord thread.")
|
||||
self.is_cancelled = True
|
||||
|
||||
def pause(self):
|
||||
self.progress_signal.emit(" Pausing Discord download...")
|
||||
self.is_paused = True
|
||||
|
||||
def resume(self):
|
||||
self.progress_signal.emit(" Resuming Discord download...")
|
||||
self.is_paused = False
|
||||
|
||||
def _check_events(self):
|
||||
if self.is_cancelled:
|
||||
return True
|
||||
while self.is_paused:
|
||||
time.sleep(0.5)
|
||||
if self.is_cancelled:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _fetch_all_messages(self):
|
||||
all_messages = []
|
||||
last_message_id = None
|
||||
headers = {'Authorization': self.token, 'User-Agent': USERAGENT_FIREFOX}
|
||||
|
||||
while True:
|
||||
if self._check_events(): break
|
||||
|
||||
endpoint = f"/channels/{self.channel_id}/messages?limit=100"
|
||||
if last_message_id:
|
||||
endpoint += f"&before={last_message_id}"
|
||||
|
||||
try:
|
||||
resp = self.session.get(f"https://discord.com/api/v10{endpoint}", headers=headers, timeout=30)
|
||||
resp.raise_for_status()
|
||||
message_batch = resp.json()
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f" ❌ Error fetching message batch: {e}")
|
||||
break
|
||||
|
||||
if not message_batch:
|
||||
break
|
||||
|
||||
all_messages.extend(message_batch)
|
||||
|
||||
if self.message_limit and len(all_messages) >= self.message_limit:
|
||||
self.progress_signal.emit(f" Reached message limit of {self.message_limit}. Halting fetch.")
|
||||
all_messages = all_messages[:self.message_limit]
|
||||
break
|
||||
|
||||
last_message_id = message_batch[-1]['id']
|
||||
self.progress_label_signal.emit(f"Fetched {len(all_messages)} messages...")
|
||||
time.sleep(1) # API Rate Limiting
|
||||
|
||||
return all_messages
|
||||
|
||||
def _run_pdf_creation(self):
|
||||
self.progress_signal.emit("=" * 40)
|
||||
self.progress_signal.emit(f"🚀 Starting Discord PDF export for: {self.api_url}")
|
||||
self.progress_label_signal.emit("Fetching messages...")
|
||||
|
||||
all_messages = self._fetch_all_messages()
|
||||
|
||||
if self.is_cancelled:
|
||||
self.finished_signal.emit(0, 0, True, [])
|
||||
return
|
||||
|
||||
self.progress_label_signal.emit(f"Collected {len(all_messages)} total messages. Generating PDF...")
|
||||
all_messages.reverse()
|
||||
|
||||
font_path = os.path.join(self.app_base_dir, 'data', 'dejavu-sans', 'DejaVuSans.ttf')
|
||||
output_filepath = os.path.join(self.output_dir, f"discord_{self.server_id}_{self.channel_id or 'server'}.pdf")
|
||||
|
||||
success = create_pdf_from_discord_messages(
|
||||
all_messages, self.server_id, self.channel_id,
|
||||
output_filepath, font_path, logger=self.progress_signal.emit,
|
||||
cancellation_event=self, pause_event=self
|
||||
)
|
||||
|
||||
if success:
|
||||
self.progress_label_signal.emit(f"✅ PDF export complete!")
|
||||
elif not self.is_cancelled:
|
||||
self.progress_label_signal.emit(f"❌ PDF export failed. Check log for details.")
|
||||
|
||||
self.finished_signal.emit(0, len(all_messages), self.is_cancelled, [])
|
||||
|
||||
def _run_file_download(self):
|
||||
download_count = 0
|
||||
skip_count = 0
|
||||
try:
|
||||
self.progress_signal.emit("=" * 40)
|
||||
self.progress_signal.emit(f"🚀 Starting Discord download for channel: {self.channel_id}")
|
||||
self.progress_label_signal.emit("Fetching messages...")
|
||||
all_messages = self._fetch_all_messages()
|
||||
|
||||
if self.is_cancelled:
|
||||
self.finished_signal.emit(0, 0, True, [])
|
||||
return
|
||||
|
||||
self.progress_label_signal.emit(f"Collected {len(all_messages)} messages. Starting downloads...")
|
||||
total_attachments = sum(len(m.get('attachments', [])) for m in all_messages)
|
||||
|
||||
for message in reversed(all_messages):
|
||||
if self._check_events(): break
|
||||
for attachment in message.get('attachments', []):
|
||||
if self._check_events(): break
|
||||
|
||||
file_url = attachment['url']
|
||||
original_filename = attachment['filename']
|
||||
filepath = os.path.join(self.output_dir, original_filename)
|
||||
filename_to_use = original_filename
|
||||
|
||||
counter = 1
|
||||
base_name, extension = os.path.splitext(original_filename)
|
||||
while os.path.exists(filepath):
|
||||
filename_to_use = f"{base_name} ({counter}){extension}"
|
||||
filepath = os.path.join(self.output_dir, filename_to_use)
|
||||
counter += 1
|
||||
|
||||
if filename_to_use != original_filename:
|
||||
self.progress_signal.emit(f" -> Duplicate name '{original_filename}'. Saving as '{filename_to_use}'.")
|
||||
|
||||
try:
|
||||
self.progress_signal.emit(f" Downloading ({download_count+1}/{total_attachments}): '{filename_to_use}'...")
|
||||
response = requests.get(file_url, stream=True, timeout=60)
|
||||
response.raise_for_status()
|
||||
|
||||
download_cancelled_mid_file = False
|
||||
with open(filepath, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if self._check_events():
|
||||
download_cancelled_mid_file = True
|
||||
break
|
||||
f.write(chunk)
|
||||
|
||||
if download_cancelled_mid_file:
|
||||
self.progress_signal.emit(f" Download cancelled for '{filename_to_use}'. Deleting partial file.")
|
||||
if os.path.exists(filepath):
|
||||
os.remove(filepath)
|
||||
continue
|
||||
|
||||
download_count += 1
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f" ❌ Failed to download '{filename_to_use}': {e}")
|
||||
skip_count += 1
|
||||
finally:
|
||||
self.finished_signal.emit(download_count, skip_count, self.is_cancelled, [])
|
||||
183
src/ui/classes/downloader_factory.py
Normal file
183
src/ui/classes/downloader_factory.py
Normal file
@@ -0,0 +1,183 @@
|
||||
import re
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# Utility Imports
|
||||
from ...utils.network_utils import prepare_cookies_for_request
|
||||
from ...utils.file_utils import clean_folder_name
|
||||
|
||||
# Downloader Thread Imports (Alphabetical Order Recommended)
|
||||
from .allcomic_downloader_thread import AllcomicDownloadThread
|
||||
from .booru_downloader_thread import BooruDownloadThread
|
||||
from .bunkr_downloader_thread import BunkrDownloadThread
|
||||
from .discord_downloader_thread import DiscordDownloadThread # Official Discord
|
||||
from .drive_downloader_thread import DriveDownloadThread
|
||||
from .erome_downloader_thread import EromeDownloadThread
|
||||
from .external_link_downloader_thread import ExternalLinkDownloadThread
|
||||
from .fap_nation_downloader_thread import FapNationDownloadThread
|
||||
from .hentai2read_downloader_thread import Hentai2readDownloadThread
|
||||
from .kemono_discord_downloader_thread import KemonoDiscordDownloadThread
|
||||
from .mangadex_downloader_thread import MangaDexDownloadThread
|
||||
from .nhentai_downloader_thread import NhentaiDownloadThread
|
||||
from .pixeldrain_downloader_thread import PixeldrainDownloadThread
|
||||
from .rule34video_downloader_thread import Rule34VideoDownloadThread
|
||||
from .saint2_downloader_thread import Saint2DownloadThread
|
||||
from .simp_city_downloader_thread import SimpCityDownloadThread
|
||||
from .toonily_downloader_thread import ToonilyDownloadThread
|
||||
|
||||
|
||||
def create_downloader_thread(main_app, api_url, service, id1, id2, effective_output_dir_for_run):
|
||||
"""
|
||||
Factory function to create and configure the correct QThread for a given URL.
|
||||
Returns a configured QThread instance, a specific error string ("COOKIE_ERROR", "FETCH_ERROR"),
|
||||
or None if no special handler is found (indicating fallback to generic BackendDownloadThread).
|
||||
"""
|
||||
|
||||
|
||||
# Handler for Booru sites (Danbooru, Gelbooru)
|
||||
if service in ['danbooru', 'gelbooru']:
|
||||
api_key = main_app.api_key_input.text().strip()
|
||||
user_id = main_app.user_id_input.text().strip()
|
||||
return BooruDownloadThread(
|
||||
url=api_url, output_dir=effective_output_dir_for_run,
|
||||
api_key=api_key, user_id=user_id, parent=main_app
|
||||
)
|
||||
|
||||
# Handler for cloud storage sites (Mega, GDrive, Dropbox, GoFile)
|
||||
platform = None
|
||||
if 'mega.nz' in api_url or 'mega.io' in api_url: platform = 'mega'
|
||||
elif 'drive.google.com' in api_url: platform = 'gdrive'
|
||||
elif 'dropbox.com' in api_url: platform = 'dropbox'
|
||||
elif 'gofile.io' in api_url: platform = 'gofile'
|
||||
if platform:
|
||||
use_post_subfolder = main_app.use_subfolder_per_post_checkbox.isChecked()
|
||||
return DriveDownloadThread(
|
||||
api_url, effective_output_dir_for_run, platform, use_post_subfolder,
|
||||
main_app.cancellation_event, main_app.pause_event, main_app.log_signal.emit,
|
||||
parent=main_app # Pass parent for consistency
|
||||
)
|
||||
|
||||
# Handler for Erome
|
||||
if 'erome.com' in api_url:
|
||||
return EromeDownloadThread(api_url, effective_output_dir_for_run, main_app)
|
||||
|
||||
# Handler for MangaDex
|
||||
if 'mangadex.org' in api_url:
|
||||
return MangaDexDownloadThread(api_url, effective_output_dir_for_run, main_app)
|
||||
|
||||
# Handler for Saint2
|
||||
is_saint2_url = service == 'saint2' or 'saint2.su' in api_url or 'saint2.pk' in api_url # Add more domains if needed
|
||||
if is_saint2_url and api_url.strip().lower() != 'saint2.su': # Exclude batch mode trigger if using URL input
|
||||
return Saint2DownloadThread(api_url, effective_output_dir_for_run, main_app)
|
||||
|
||||
# Handler for SimpCity
|
||||
if service == 'simpcity':
|
||||
cookies = prepare_cookies_for_request(
|
||||
use_cookie_flag=True, # SimpCity requires cookies
|
||||
cookie_text_input=main_app.simpcity_cookie_text_input.text(), # Use dedicated input
|
||||
selected_cookie_file_path=main_app.selected_cookie_filepath, # Use shared selection
|
||||
app_base_dir=main_app.app_base_dir,
|
||||
logger_func=main_app.log_signal.emit,
|
||||
target_domain='simpcity.cr' # Specific domain
|
||||
)
|
||||
if not cookies:
|
||||
main_app.log_signal.emit("❌ SimpCity requires valid cookies. Please provide them.")
|
||||
return "COOKIE_ERROR" # Sentinel value for cookie failure
|
||||
return SimpCityDownloadThread(api_url, id2, effective_output_dir_for_run, cookies, main_app)
|
||||
|
||||
# Handler for Rule34Video
|
||||
if service == 'rule34video':
|
||||
main_app.log_signal.emit("ℹ️ Rule34Video.com URL detected. Starting dedicated downloader.")
|
||||
return Rule34VideoDownloadThread(api_url, effective_output_dir_for_run, main_app) # id1 (video_id) is used inside the thread
|
||||
|
||||
# HANDLER FOR KEMONO DISCORD (Place BEFORE official Discord)
|
||||
elif service == 'discord' and any(domain in api_url for domain in ['kemono.cr', 'kemono.su', 'kemono.party']):
|
||||
main_app.log_signal.emit("ℹ️ Kemono Discord URL detected. Starting dedicated downloader.")
|
||||
cookies = prepare_cookies_for_request(
|
||||
use_cookie_flag=main_app.use_cookie_checkbox.isChecked(), # Respect UI setting
|
||||
cookie_text_input=main_app.cookie_text_input.text(),
|
||||
selected_cookie_file_path=main_app.selected_cookie_filepath,
|
||||
app_base_dir=main_app.app_base_dir,
|
||||
logger_func=main_app.log_signal.emit,
|
||||
target_domain='kemono.cr' # Primary Kemono domain, adjust if needed
|
||||
)
|
||||
# KemonoDiscordDownloadThread expects parent for events
|
||||
return KemonoDiscordDownloadThread(
|
||||
server_id=id1,
|
||||
channel_id=id2,
|
||||
output_dir=effective_output_dir_for_run,
|
||||
cookies_dict=cookies,
|
||||
parent=main_app
|
||||
)
|
||||
|
||||
# Handler for official Discord URLs
|
||||
elif service == 'discord' and 'discord.com' in api_url:
|
||||
main_app.log_signal.emit("ℹ️ Official Discord URL detected. Starting dedicated downloader.")
|
||||
token = main_app.remove_from_filename_input.text().strip() # Token is in the "Remove Words" field for Discord
|
||||
if not token:
|
||||
main_app.log_signal.emit("❌ Official Discord requires an Authorization Token in the 'Remove Words' field.")
|
||||
return None # Or a specific error sentinel
|
||||
|
||||
limit_text = main_app.discord_message_limit_input.text().strip()
|
||||
message_limit = int(limit_text) if limit_text.isdigit() else None
|
||||
mode = main_app.discord_download_scope # Should be 'pdf' or 'files'
|
||||
|
||||
return DiscordDownloadThread(
|
||||
mode=mode,
|
||||
session=requests.Session(), # Create a session for this thread
|
||||
token=token,
|
||||
output_dir=effective_output_dir_for_run,
|
||||
server_id=id1,
|
||||
channel_id=id2,
|
||||
url=api_url,
|
||||
app_base_dir=main_app.app_base_dir,
|
||||
limit=message_limit,
|
||||
parent=main_app # Pass main_app for events/signals
|
||||
)
|
||||
|
||||
# Check specific domains or rely on service name if extract_post_info provides it
|
||||
if service == 'allcomic' or 'allcomic.com' in api_url or 'allporncomic.com' in api_url:
|
||||
return AllcomicDownloadThread(api_url, effective_output_dir_for_run, main_app)
|
||||
|
||||
# Handler for Hentai2Read
|
||||
if service == 'hentai2read' or 'hentai2read.com' in api_url:
|
||||
return Hentai2readDownloadThread(api_url, effective_output_dir_for_run, main_app)
|
||||
|
||||
# Handler for Fap-Nation
|
||||
if service == 'fap-nation' or 'fap-nation.com' in api_url or 'fap-nation.org' in api_url:
|
||||
use_post_subfolder = main_app.use_subfolder_per_post_checkbox.isChecked()
|
||||
# Ensure signals are passed correctly if needed by the thread
|
||||
return FapNationDownloadThread(
|
||||
api_url, effective_output_dir_for_run, use_post_subfolder,
|
||||
main_app.pause_event, main_app.cancellation_event, main_app.actual_gui_signals, main_app
|
||||
)
|
||||
|
||||
# Handler for Pixeldrain
|
||||
if service == 'pixeldrain' or 'pixeldrain.com' in api_url:
|
||||
return PixeldrainDownloadThread(api_url, effective_output_dir_for_run, main_app) # URL contains the ID
|
||||
|
||||
# Handler for nHentai
|
||||
if service == 'nhentai':
|
||||
from ...core.nhentai_client import fetch_nhentai_gallery
|
||||
main_app.log_signal.emit(f"ℹ️ nHentai gallery ID {id1} detected. Fetching gallery data...")
|
||||
gallery_data = fetch_nhentai_gallery(id1, main_app.log_signal.emit)
|
||||
if not gallery_data:
|
||||
main_app.log_signal.emit(f"❌ Failed to fetch nHentai gallery data for ID {id1}.")
|
||||
return "FETCH_ERROR" # Sentinel value for fetch failure
|
||||
return NhentaiDownloadThread(gallery_data, effective_output_dir_for_run, main_app)
|
||||
|
||||
# Handler for Toonily
|
||||
if service == 'toonily' or 'toonily.com' in api_url:
|
||||
return ToonilyDownloadThread(api_url, effective_output_dir_for_run, main_app)
|
||||
|
||||
# Handler for Bunkr
|
||||
if service == 'bunkr':
|
||||
# id1 contains the full URL or album ID from extract_post_info
|
||||
return BunkrDownloadThread(id1, effective_output_dir_for_run, main_app)
|
||||
|
||||
# --- Fallback ---
|
||||
# If no specific handler matched based on service name or URL pattern, return None.
|
||||
# This signals main_window.py to use the generic BackendDownloadThread/PostProcessorWorker
|
||||
# which uses the standard Kemono/Coomer post API.
|
||||
main_app.log_signal.emit(f"ℹ️ No specialized downloader found for service '{service}' and URL '{api_url[:50]}...'. Using generic downloader.")
|
||||
return None
|
||||
77
src/ui/classes/drive_downloader_thread.py
Normal file
77
src/ui/classes/drive_downloader_thread.py
Normal file
@@ -0,0 +1,77 @@
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
|
||||
from ...services.drive_downloader import (
|
||||
download_dropbox_file,
|
||||
download_gdrive_file,
|
||||
download_gofile_folder,
|
||||
download_mega_file as drive_download_mega_file,
|
||||
)
|
||||
|
||||
|
||||
class DriveDownloadThread(QThread):
|
||||
"""A dedicated QThread for handling direct Mega, GDrive, and Dropbox links."""
|
||||
file_progress_signal = pyqtSignal(str, object)
|
||||
finished_signal = pyqtSignal(int, int, bool, list)
|
||||
overall_progress_signal = pyqtSignal(int, int)
|
||||
|
||||
def __init__(self, url, output_dir, platform, use_post_subfolder, cancellation_event, pause_event, logger_func, parent=None):
|
||||
super().__init__(parent)
|
||||
self.drive_url = url
|
||||
self.output_dir = output_dir
|
||||
self.platform = platform
|
||||
self.use_post_subfolder = use_post_subfolder
|
||||
self.is_cancelled = False
|
||||
self.cancellation_event = cancellation_event
|
||||
self.pause_event = pause_event
|
||||
self.logger_func = logger_func
|
||||
|
||||
def run(self):
|
||||
self.logger_func("=" * 40)
|
||||
self.logger_func(f"🚀 Starting direct {self.platform.capitalize()} Download for: {self.drive_url}")
|
||||
|
||||
try:
|
||||
if self.platform == 'mega':
|
||||
drive_download_mega_file(
|
||||
self.drive_url, self.output_dir,
|
||||
logger_func=self.logger_func,
|
||||
progress_callback_func=self.file_progress_signal.emit,
|
||||
overall_progress_callback=self.overall_progress_signal.emit,
|
||||
cancellation_event=self.cancellation_event,
|
||||
pause_event=self.pause_event
|
||||
)
|
||||
elif self.platform == 'gdrive':
|
||||
download_gdrive_file(
|
||||
self.drive_url, self.output_dir,
|
||||
logger_func=self.logger_func,
|
||||
progress_callback_func=self.file_progress_signal.emit,
|
||||
overall_progress_callback=self.overall_progress_signal.emit,
|
||||
use_post_subfolder=self.use_post_subfolder,
|
||||
post_title="Google Drive Download"
|
||||
)
|
||||
elif self.platform == 'dropbox':
|
||||
download_dropbox_file(
|
||||
self.drive_url, self.output_dir,
|
||||
logger_func=self.logger_func,
|
||||
progress_callback_func=self.file_progress_signal.emit,
|
||||
use_post_subfolder=self.use_post_subfolder,
|
||||
post_title="Dropbox Download"
|
||||
)
|
||||
elif self.platform == 'gofile':
|
||||
download_gofile_folder(
|
||||
self.drive_url, self.output_dir,
|
||||
logger_func=self.logger_func,
|
||||
progress_callback_func=self.file_progress_signal.emit,
|
||||
overall_progress_callback=self.overall_progress_signal.emit
|
||||
)
|
||||
|
||||
self.finished_signal.emit(1, 0, self.is_cancelled, [])
|
||||
|
||||
except Exception as e:
|
||||
self.logger_func(f"❌ An unexpected error occurred in DriveDownloadThread: {e}")
|
||||
self.finished_signal.emit(0, 1, self.is_cancelled, [])
|
||||
|
||||
def cancel(self):
|
||||
self.is_cancelled = True
|
||||
if self.cancellation_event:
|
||||
self.cancellation_event.set()
|
||||
self.logger_func(f" Cancellation signal received by {self.platform.capitalize()} thread.")
|
||||
106
src/ui/classes/erome_downloader_thread.py
Normal file
106
src/ui/classes/erome_downloader_thread.py
Normal file
@@ -0,0 +1,106 @@
|
||||
import os
|
||||
import time
|
||||
import requests
|
||||
import cloudscraper
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
|
||||
from ...core.erome_client import fetch_erome_data
|
||||
|
||||
class EromeDownloadThread(QThread):
|
||||
"""A dedicated QThread for handling erome.com downloads."""
|
||||
progress_signal = pyqtSignal(str)
|
||||
file_progress_signal = pyqtSignal(str, object)
|
||||
finished_signal = pyqtSignal(int, int, bool) # dl_count, skip_count, cancelled
|
||||
|
||||
def __init__(self, url, output_dir, parent=None):
|
||||
super().__init__(parent)
|
||||
self.erome_url = url
|
||||
self.output_dir = output_dir
|
||||
self.is_cancelled = False
|
||||
|
||||
def run(self):
|
||||
download_count = 0
|
||||
skip_count = 0
|
||||
self.progress_signal.emit("=" * 40)
|
||||
self.progress_signal.emit(f"🚀 Starting Erome.com Download for: {self.erome_url}")
|
||||
|
||||
album_name, files_to_download = fetch_erome_data(self.erome_url, self.progress_signal.emit)
|
||||
|
||||
if not files_to_download:
|
||||
self.progress_signal.emit("❌ Failed to extract file information from Erome. Aborting.")
|
||||
self.finished_signal.emit(0, 0, self.is_cancelled)
|
||||
return
|
||||
|
||||
album_path = os.path.join(self.output_dir, album_name)
|
||||
try:
|
||||
os.makedirs(album_path, exist_ok=True)
|
||||
self.progress_signal.emit(f" Saving to folder: '{album_path}'")
|
||||
except OSError as e:
|
||||
self.progress_signal.emit(f"❌ Critical error creating directory: {e}")
|
||||
self.finished_signal.emit(0, len(files_to_download), self.is_cancelled)
|
||||
return
|
||||
|
||||
total_files = len(files_to_download)
|
||||
session = cloudscraper.create_scraper()
|
||||
|
||||
for i, file_data in enumerate(files_to_download):
|
||||
if self.is_cancelled:
|
||||
self.progress_signal.emit(" Download cancelled by user.")
|
||||
skip_count = total_files - download_count
|
||||
break
|
||||
|
||||
filename = file_data.get('filename', f'untitled_{i+1}.mp4')
|
||||
file_url = file_data.get('url')
|
||||
headers = file_data.get('headers')
|
||||
filepath = os.path.join(album_path, filename)
|
||||
|
||||
if os.path.exists(filepath):
|
||||
self.progress_signal.emit(f" -> Skip ({i+1}/{total_files}): '{filename}' already exists.")
|
||||
skip_count += 1
|
||||
continue
|
||||
|
||||
self.progress_signal.emit(f" Downloading ({i+1}/{total_files}): '{filename}'...")
|
||||
|
||||
try:
|
||||
response = session.get(file_url, stream=True, headers=headers, timeout=60)
|
||||
response.raise_for_status()
|
||||
|
||||
total_size = int(response.headers.get('content-length', 0))
|
||||
downloaded_size = 0
|
||||
last_update_time = time.time()
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if self.is_cancelled:
|
||||
break
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
downloaded_size += len(chunk)
|
||||
current_time = time.time()
|
||||
if total_size > 0 and (current_time - last_update_time) > 0.5:
|
||||
self.file_progress_signal.emit(filename, (downloaded_size, total_size))
|
||||
last_update_time = current_time
|
||||
|
||||
if self.is_cancelled:
|
||||
if os.path.exists(filepath): os.remove(filepath)
|
||||
continue
|
||||
|
||||
if total_size > 0:
|
||||
self.file_progress_signal.emit(filename, (total_size, total_size))
|
||||
|
||||
download_count += 1
|
||||
except requests.exceptions.RequestException as e:
|
||||
self.progress_signal.emit(f" ❌ Failed to download '{filename}'. Error: {e}")
|
||||
if os.path.exists(filepath): os.remove(filepath)
|
||||
skip_count += 1
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f" ❌ An unexpected error occurred with '{filename}': {e}")
|
||||
if os.path.exists(filepath): os.remove(filepath)
|
||||
skip_count += 1
|
||||
|
||||
self.file_progress_signal.emit("", None)
|
||||
self.finished_signal.emit(download_count, skip_count, self.is_cancelled)
|
||||
|
||||
def cancel(self):
|
||||
self.is_cancelled = True
|
||||
self.progress_signal.emit(" Cancellation signal received by Erome thread.")
|
||||
86
src/ui/classes/external_link_downloader_thread.py
Normal file
86
src/ui/classes/external_link_downloader_thread.py
Normal file
@@ -0,0 +1,86 @@
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
|
||||
from ...services.drive_downloader import (
|
||||
download_dropbox_file,
|
||||
download_gdrive_file,
|
||||
download_mega_file as drive_download_mega_file,
|
||||
)
|
||||
|
||||
|
||||
class ExternalLinkDownloadThread(QThread):
|
||||
"""A QThread to handle downloading multiple external links sequentially."""
|
||||
progress_signal = pyqtSignal(str)
|
||||
file_complete_signal = pyqtSignal(str, bool)
|
||||
finished_signal = pyqtSignal()
|
||||
overall_progress_signal = pyqtSignal(int, int)
|
||||
file_progress_signal = pyqtSignal(str, object)
|
||||
|
||||
def __init__(self, tasks_to_download, download_base_path, parent_logger_func, parent=None, use_post_subfolder=False):
|
||||
super().__init__(parent)
|
||||
self.tasks = tasks_to_download
|
||||
self.download_base_path = download_base_path
|
||||
self.parent_logger_func = parent_logger_func
|
||||
self.is_cancelled = False
|
||||
self.use_post_subfolder = use_post_subfolder
|
||||
|
||||
def run(self):
|
||||
total_tasks = len(self.tasks)
|
||||
self.progress_signal.emit(f"ℹ️ Starting external link download thread for {total_tasks} link(s).")
|
||||
self.overall_progress_signal.emit(total_tasks, 0)
|
||||
|
||||
for i, task_info in enumerate(self.tasks):
|
||||
if self.is_cancelled:
|
||||
self.progress_signal.emit("External link download cancelled by user.")
|
||||
break
|
||||
|
||||
self.overall_progress_signal.emit(total_tasks, i + 1)
|
||||
|
||||
platform = task_info.get('platform', 'unknown').lower()
|
||||
full_url = task_info['url']
|
||||
post_title = task_info['title']
|
||||
|
||||
self.progress_signal.emit(f"Download ({i + 1}/{total_tasks}): Starting '{post_title}' ({platform.upper()}) from {full_url}")
|
||||
|
||||
try:
|
||||
if platform == 'mega':
|
||||
drive_download_mega_file(
|
||||
full_url,
|
||||
self.download_base_path,
|
||||
logger_func=self.parent_logger_func,
|
||||
progress_callback_func=self.file_progress_signal.emit,
|
||||
overall_progress_callback=self.overall_progress_signal.emit
|
||||
)
|
||||
elif platform == 'google drive':
|
||||
download_gdrive_file(
|
||||
full_url,
|
||||
self.download_base_path,
|
||||
logger_func=self.parent_logger_func,
|
||||
progress_callback_func=self.file_progress_signal.emit,
|
||||
overall_progress_callback=self.overall_progress_signal.emit,
|
||||
use_post_subfolder=self.use_post_subfolder,
|
||||
post_title=post_title
|
||||
)
|
||||
elif platform == 'dropbox':
|
||||
download_dropbox_file(
|
||||
full_url,
|
||||
self.download_base_path,
|
||||
logger_func=self.parent_logger_func,
|
||||
progress_callback_func=self.file_progress_signal.emit,
|
||||
use_post_subfolder=self.use_post_subfolder,
|
||||
post_title=post_title
|
||||
)
|
||||
else:
|
||||
self.progress_signal.emit(f"⚠️ Unsupported platform '{platform}' for link: {full_url}")
|
||||
self.file_complete_signal.emit(full_url, False)
|
||||
continue
|
||||
self.file_complete_signal.emit(full_url, True)
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f"❌ Error downloading ({platform.upper()}) link '{full_url}': {e}")
|
||||
self.file_complete_signal.emit(full_url, False)
|
||||
|
||||
self.finished_signal.emit()
|
||||
|
||||
def cancel(self):
|
||||
"""Sets the cancellation flag to stop the thread gracefully."""
|
||||
self.progress_signal.emit(" [External Links] Cancellation signal received by thread.")
|
||||
self.is_cancelled = True
|
||||
162
src/ui/classes/fap_nation_downloader_thread.py
Normal file
162
src/ui/classes/fap_nation_downloader_thread.py
Normal file
@@ -0,0 +1,162 @@
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
from PyQt5.QtCore import QThread, pyqtSignal, QProcess
|
||||
import cloudscraper
|
||||
|
||||
from ...core.fap_nation_client import fetch_fap_nation_data
|
||||
from ...services.multipart_downloader import download_file_in_parts
|
||||
|
||||
class FapNationDownloadThread(QThread):
|
||||
"""
|
||||
A dedicated QThread for Fap-Nation that uses a hybrid approach, choosing
|
||||
between yt-dlp for HLS streams and a multipart downloader for direct links.
|
||||
"""
|
||||
progress_signal = pyqtSignal(str)
|
||||
file_progress_signal = pyqtSignal(str, object)
|
||||
finished_signal = pyqtSignal(int, int, bool)
|
||||
overall_progress_signal = pyqtSignal(int, int)
|
||||
|
||||
def __init__(self, url, output_dir, use_post_subfolder, pause_event, cancellation_event, gui_signals, parent=None):
|
||||
super().__init__(parent)
|
||||
self.album_url = url
|
||||
self.output_dir = output_dir
|
||||
self.use_post_subfolder = use_post_subfolder
|
||||
self.is_cancelled = False
|
||||
self.process = None
|
||||
self.current_filename = "Unknown File"
|
||||
self.album_name = "fap-nation_album"
|
||||
self.pause_event = pause_event
|
||||
self.cancellation_event = cancellation_event
|
||||
self.gui_signals = gui_signals
|
||||
self._is_finished = False
|
||||
|
||||
self.process = QProcess(self)
|
||||
self.process.readyReadStandardOutput.connect(self.handle_ytdlp_output)
|
||||
|
||||
def run(self):
|
||||
self.progress_signal.emit("=" * 40)
|
||||
self.progress_signal.emit(f"🚀 Starting Fap-Nation Download for: {self.album_url}")
|
||||
|
||||
self.album_name, files_to_download = fetch_fap_nation_data(self.album_url, self.progress_signal.emit)
|
||||
|
||||
if self.is_cancelled or not files_to_download:
|
||||
self.progress_signal.emit("❌ Failed to extract file information. Aborting.")
|
||||
self.finished_signal.emit(0, 1, self.is_cancelled)
|
||||
return
|
||||
|
||||
self.overall_progress_signal.emit(1, 0)
|
||||
|
||||
save_path = self.output_dir
|
||||
if self.use_post_subfolder:
|
||||
save_path = os.path.join(self.output_dir, self.album_name)
|
||||
self.progress_signal.emit(f" Subfolder per Post is ON. Saving to: '{self.album_name}'")
|
||||
os.makedirs(save_path, exist_ok=True)
|
||||
|
||||
file_data = files_to_download[0]
|
||||
self.current_filename = file_data.get('filename')
|
||||
download_url = file_data.get('url')
|
||||
link_type = file_data.get('type')
|
||||
filepath = os.path.join(save_path, self.current_filename)
|
||||
|
||||
if os.path.exists(filepath):
|
||||
self.progress_signal.emit(f" -> Skip: '{self.current_filename}' already exists.")
|
||||
self.overall_progress_signal.emit(1, 1)
|
||||
self.finished_signal.emit(0, 1, self.is_cancelled)
|
||||
return
|
||||
|
||||
if link_type == 'hls':
|
||||
self.download_with_ytdlp(filepath, download_url)
|
||||
elif link_type == 'direct':
|
||||
self.download_with_multipart(filepath, download_url)
|
||||
else:
|
||||
self.progress_signal.emit(f" ❌ Unknown link type '{link_type}'. Aborting.")
|
||||
self._on_ytdlp_finished(-1)
|
||||
|
||||
def download_with_ytdlp(self, filepath, playlist_url):
|
||||
self.progress_signal.emit(f" Downloading (HLS Stream): '{self.current_filename}' using yt-dlp...")
|
||||
try:
|
||||
if getattr(sys, 'frozen', False):
|
||||
base_path = sys._MEIPASS
|
||||
ytdlp_path = os.path.join(base_path, "yt-dlp.exe")
|
||||
else:
|
||||
ytdlp_path = "yt-dlp.exe"
|
||||
|
||||
if not os.path.exists(ytdlp_path):
|
||||
self.progress_signal.emit(f" ❌ ERROR: yt-dlp.exe not found at '{ytdlp_path}'.")
|
||||
self._on_ytdlp_finished(-1)
|
||||
return
|
||||
|
||||
command = [ytdlp_path, '--no-warnings', '--progress', '--output', filepath, '--merge-output-format', 'mp4', playlist_url]
|
||||
|
||||
self.process.start(command[0], command[1:])
|
||||
self.process.waitForFinished(-1)
|
||||
self._on_ytdlp_finished(self.process.exitCode())
|
||||
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f" ❌ Failed to start yt-dlp: {e}")
|
||||
self._on_ytdlp_finished(-1)
|
||||
|
||||
def download_with_multipart(self, filepath, direct_url):
|
||||
self.progress_signal.emit(f" Downloading (Direct Link): '{self.current_filename}' using multipart downloader...")
|
||||
try:
|
||||
session = cloudscraper.create_scraper()
|
||||
head_response = session.head(direct_url, allow_redirects=True, timeout=20)
|
||||
head_response.raise_for_status()
|
||||
total_size = int(head_response.headers.get('content-length', 0))
|
||||
|
||||
success, _, _, _ = download_file_in_parts(
|
||||
file_url=direct_url, save_path=filepath, total_size=total_size, num_parts=5,
|
||||
headers=session.headers, api_original_filename=self.current_filename,
|
||||
emitter_for_multipart=self.gui_signals,
|
||||
cookies_for_chunk_session=session.cookies,
|
||||
cancellation_event=self.cancellation_event,
|
||||
skip_event=None, logger_func=self.progress_signal.emit, pause_event=self.pause_event
|
||||
)
|
||||
self._on_ytdlp_finished(0 if success else 1)
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f" ❌ Multipart download failed: {e}")
|
||||
self._on_ytdlp_finished(1)
|
||||
|
||||
def handle_ytdlp_output(self):
|
||||
if not self.process:
|
||||
return
|
||||
|
||||
output = self.process.readAllStandardOutput().data().decode('utf-8', errors='ignore')
|
||||
for line in reversed(output.strip().splitlines()):
|
||||
line = line.strip()
|
||||
progress_match = re.search(r'\[download\]\s+([\d.]+)%\s+of\s+~?\s*([\d.]+\w+B)', line)
|
||||
if progress_match:
|
||||
percent, size = progress_match.groups()
|
||||
self.file_progress_signal.emit("yt-dlp:", f"{percent}% of {size}")
|
||||
break
|
||||
|
||||
def _on_ytdlp_finished(self, exit_code):
|
||||
if self._is_finished:
|
||||
return
|
||||
self._is_finished = True
|
||||
|
||||
download_count, skip_count = 0, 0
|
||||
|
||||
if self.is_cancelled:
|
||||
self.progress_signal.emit(f" Download of '{self.current_filename}' was cancelled.")
|
||||
skip_count = 1
|
||||
elif exit_code == 0:
|
||||
self.progress_signal.emit(f" ✅ Download process finished successfully for '{self.current_filename}'.")
|
||||
download_count = 1
|
||||
else:
|
||||
self.progress_signal.emit(f" ❌ Download process exited with an error (Code: {exit_code}) for '{self.current_filename}'.")
|
||||
skip_count = 1
|
||||
|
||||
self.overall_progress_signal.emit(1, 1)
|
||||
self.process = None
|
||||
self.finished_signal.emit(download_count, skip_count, self.is_cancelled)
|
||||
|
||||
def cancel(self):
|
||||
self.is_cancelled = True
|
||||
self.cancellation_event.set()
|
||||
if self.process and self.process.state() == QProcess.Running:
|
||||
self.progress_signal.emit(" Cancellation signal received, terminating yt-dlp process.")
|
||||
self.process.kill()
|
||||
51
src/ui/classes/hentai2read_downloader_thread.py
Normal file
51
src/ui/classes/hentai2read_downloader_thread.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import threading
|
||||
import time
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
|
||||
from ...core.Hentai2read_client import run_hentai2read_download as h2r_run_download
|
||||
|
||||
|
||||
class Hentai2readDownloadThread(QThread):
|
||||
"""
|
||||
A dedicated QThread that calls the self-contained Hentai2Read client to
|
||||
perform scraping and downloading.
|
||||
"""
|
||||
progress_signal = pyqtSignal(str)
|
||||
file_progress_signal = pyqtSignal(str, object)
|
||||
finished_signal = pyqtSignal(int, int, bool)
|
||||
overall_progress_signal = pyqtSignal(int, int)
|
||||
|
||||
def __init__(self, url, output_dir, parent=None):
|
||||
super().__init__(parent)
|
||||
self.start_url = url
|
||||
self.output_dir = output_dir
|
||||
self.is_cancelled = False
|
||||
self.pause_event = parent.pause_event if hasattr(parent, 'pause_event') else threading.Event()
|
||||
|
||||
def _check_pause(self):
|
||||
"""Helper to handle pausing and cancellation events."""
|
||||
if self.is_cancelled: return True
|
||||
if self.pause_event and self.pause_event.is_set():
|
||||
self.progress_signal.emit(" Download paused...")
|
||||
while self.pause_event.is_set():
|
||||
if self.is_cancelled: return True
|
||||
time.sleep(0.5)
|
||||
self.progress_signal.emit(" Download resumed.")
|
||||
return self.is_cancelled
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
Executes the main download logic by calling the dedicated client function.
|
||||
"""
|
||||
downloaded, skipped = h2r_run_download(
|
||||
start_url=self.start_url,
|
||||
output_dir=self.output_dir,
|
||||
progress_callback=self.progress_signal.emit,
|
||||
overall_progress_callback=self.overall_progress_signal.emit,
|
||||
check_pause_func=self._check_pause
|
||||
)
|
||||
|
||||
self.finished_signal.emit(downloaded, skipped, self.is_cancelled)
|
||||
|
||||
def cancel(self):
|
||||
self.is_cancelled = True
|
||||
549
src/ui/classes/kemono_discord_downloader_thread.py
Normal file
549
src/ui/classes/kemono_discord_downloader_thread.py
Normal file
@@ -0,0 +1,549 @@
|
||||
# kemono_discord_downloader_thread.py
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
import threading
|
||||
import cloudscraper
|
||||
import requests
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
|
||||
# --- Assuming these files are in the correct relative path ---
|
||||
# Adjust imports if your project structure is different
|
||||
try:
|
||||
from ...core.discord_client import fetch_server_channels, fetch_channel_messages
|
||||
from ...utils.file_utils import clean_filename
|
||||
except ImportError as e:
|
||||
# Basic fallback logging if signals aren't ready
|
||||
print(f"ERROR: Failed to import required modules for Kemono Discord thread: {e}")
|
||||
# Re-raise to prevent the thread from being created incorrectly
|
||||
raise
|
||||
|
||||
# Custom exception for clean cancellation/pausing
|
||||
class InterruptedError(Exception):
|
||||
"""Custom exception for handling cancellations/pausing gracefully within download loops."""
|
||||
pass
|
||||
|
||||
class KemonoDiscordDownloadThread(QThread):
|
||||
"""
|
||||
A dedicated QThread for downloading files from Kemono Discord server/channel pages,
|
||||
using the Kemono API via discord_client and multithreading for file downloads.
|
||||
Includes a single retry attempt after a 15-second delay for specific errors.
|
||||
"""
|
||||
# --- Signals ---
|
||||
progress_signal = pyqtSignal(str) # General log messages
|
||||
progress_label_signal = pyqtSignal(str) # Update main progress label (e.g., "Fetching messages...")
|
||||
file_progress_signal = pyqtSignal(str, object) # Update file progress bar (filename, (downloaded_bytes, total_bytes | None))
|
||||
permanent_file_failed_signal = pyqtSignal(list) # To report failures to main window
|
||||
finished_signal = pyqtSignal(int, int, bool, list) # (downloaded_count, skipped_count, was_cancelled, [])
|
||||
|
||||
def __init__(self, server_id, channel_id, output_dir, cookies_dict, parent):
|
||||
"""
|
||||
Initializes the Kemono Discord downloader thread.
|
||||
|
||||
Args:
|
||||
server_id (str): The Discord server ID from Kemono.
|
||||
channel_id (str | None): The specific Discord channel ID from Kemono, if provided.
|
||||
output_dir (str): The base directory to save downloaded files.
|
||||
cookies_dict (dict | None): Cookies to use for requests.
|
||||
parent (QWidget): The parent widget (main_app) to access events/settings.
|
||||
"""
|
||||
super().__init__(parent)
|
||||
self.server_id = server_id
|
||||
self.target_channel_id = channel_id # The specific channel from URL, if any
|
||||
self.output_dir = output_dir
|
||||
self.cookies_dict = cookies_dict
|
||||
self.parent_app = parent # Access main app's events and settings
|
||||
|
||||
# --- Shared Events & Internal State ---
|
||||
self.cancellation_event = getattr(parent, 'cancellation_event', threading.Event())
|
||||
self.pause_event = getattr(parent, 'pause_event', threading.Event())
|
||||
self._is_cancelled_internal = False # Internal flag for quick breaking
|
||||
|
||||
# --- Thread-Safe Counters ---
|
||||
self.download_count = 0
|
||||
self.skip_count = 0
|
||||
self.count_lock = threading.Lock()
|
||||
|
||||
# --- List to Store Failure Details ---
|
||||
self.permanently_failed_details = []
|
||||
|
||||
# --- Multithreading Configuration ---
|
||||
self.num_file_threads = 1 # Default
|
||||
try:
|
||||
use_mt = getattr(self.parent_app, 'use_multithreading_checkbox', None)
|
||||
thread_input = getattr(self.parent_app, 'thread_count_input', None)
|
||||
if use_mt and use_mt.isChecked() and thread_input:
|
||||
thread_count_ui = int(thread_input.text().strip())
|
||||
# Apply a reasonable cap specific to this downloader type (adjust as needed)
|
||||
self.num_file_threads = max(1, min(thread_count_ui, 20)) # Cap at 20 file threads
|
||||
except (ValueError, AttributeError, TypeError):
|
||||
try: self.progress_signal.emit("⚠️ Warning: Could not read thread count setting, defaulting to 1.")
|
||||
except: pass
|
||||
self.num_file_threads = 1 # Fallback on error getting setting
|
||||
|
||||
# --- Network Client ---
|
||||
try:
|
||||
self.scraper = cloudscraper.create_scraper(browser={'browser': 'firefox', 'platform': 'windows', 'mobile': False})
|
||||
except Exception as e:
|
||||
try: self.progress_signal.emit(f"❌ ERROR: Failed to initialize cloudscraper: {e}")
|
||||
except: pass
|
||||
self.scraper = None
|
||||
|
||||
# --- Control Methods (cancel, pause, resume - same as before) ---
|
||||
def cancel(self):
|
||||
self._is_cancelled_internal = True
|
||||
self.cancellation_event.set()
|
||||
try: self.progress_signal.emit(" Cancellation requested for Kemono Discord download.")
|
||||
except: pass
|
||||
|
||||
def pause(self):
|
||||
if not self.pause_event.is_set():
|
||||
self.pause_event.set()
|
||||
try: self.progress_signal.emit(" Pausing Kemono Discord download...")
|
||||
except: pass
|
||||
|
||||
def resume(self):
|
||||
if self.pause_event.is_set():
|
||||
self.pause_event.clear()
|
||||
try: self.progress_signal.emit(" Resuming Kemono Discord download...")
|
||||
except: pass
|
||||
|
||||
# --- Helper: Check Cancellation/Pause (same as before) ---
|
||||
def _check_events(self):
|
||||
if self._is_cancelled_internal or self.cancellation_event.is_set():
|
||||
if not self._is_cancelled_internal:
|
||||
self._is_cancelled_internal = True
|
||||
try: self.progress_signal.emit(" Cancellation detected by Kemono Discord thread check.")
|
||||
except: pass
|
||||
return True # Cancelled
|
||||
|
||||
was_paused = False
|
||||
while self.pause_event.is_set():
|
||||
if not was_paused:
|
||||
try: self.progress_signal.emit(" Kemono Discord operation paused...")
|
||||
except: pass
|
||||
was_paused = True
|
||||
if self.cancellation_event.is_set():
|
||||
self._is_cancelled_internal = True
|
||||
try: self.progress_signal.emit(" Cancellation detected while paused.")
|
||||
except: pass
|
||||
return True
|
||||
time.sleep(0.5)
|
||||
return False
|
||||
|
||||
# --- REVISED Helper: Download Single File with ONE Retry ---
|
||||
def _download_single_kemono_file(self, file_info):
|
||||
"""
|
||||
Downloads a single file, handles collisions after download,
|
||||
and automatically retries ONCE after 15s for specific network errors.
|
||||
|
||||
Returns:
|
||||
tuple: (bool_success, dict_error_details_or_None)
|
||||
"""
|
||||
# --- Constants for Retry Logic ---
|
||||
MAX_ATTEMPTS = 2 # 1 initial attempt + 1 retry
|
||||
RETRY_DELAY_SECONDS = 15
|
||||
|
||||
# --- Extract info ---
|
||||
channel_dir = file_info['channel_dir']
|
||||
original_filename = file_info['original_filename']
|
||||
file_url = file_info['file_url']
|
||||
channel_id = file_info['channel_id']
|
||||
post_title = file_info.get('post_title', f"Message in channel {channel_id}")
|
||||
original_post_id_for_log = file_info.get('message_id', 'N/A')
|
||||
base_kemono_domain = "kemono.cr"
|
||||
|
||||
if not self.scraper:
|
||||
try: self.progress_signal.emit(f" ❌ Cannot download '{original_filename}': Cloudscraper not initialized.")
|
||||
except: pass
|
||||
failure_details = { 'file_info': {'url': file_url, 'name': original_filename}, 'post_title': post_title, 'original_post_id_for_log': original_post_id_for_log, 'target_folder_path': channel_dir, 'error': 'Cloudscraper not initialized', 'service': 'discord', 'user_id': self.server_id }
|
||||
return False, failure_details
|
||||
|
||||
if self._check_events(): return False, None # Interrupted before start
|
||||
|
||||
# --- Determine filenames ---
|
||||
cleaned_original_filename = clean_filename(original_filename)
|
||||
intended_final_filename = cleaned_original_filename
|
||||
unique_suffix = uuid.uuid4().hex[:8]
|
||||
temp_filename = f"{intended_final_filename}.{unique_suffix}.part"
|
||||
temp_filepath = os.path.join(channel_dir, temp_filename)
|
||||
|
||||
# --- Download Attempt Loop ---
|
||||
download_successful = False
|
||||
last_exception = None
|
||||
should_retry = False # Flag to indicate if the first attempt failed with a retryable error
|
||||
|
||||
for attempt in range(1, MAX_ATTEMPTS + 1):
|
||||
response = None
|
||||
try:
|
||||
# --- Pre-attempt checks ---
|
||||
if self._check_events(): raise InterruptedError("Cancelled/Paused before attempt")
|
||||
if attempt == 2 and should_retry: # Only delay *before* the retry
|
||||
try: self.progress_signal.emit(f" ⏳ Retrying '{original_filename}' (Attempt {attempt}/{MAX_ATTEMPTS}) after {RETRY_DELAY_SECONDS}s...")
|
||||
except: pass
|
||||
for _ in range(RETRY_DELAY_SECONDS):
|
||||
if self._check_events(): raise InterruptedError("Cancelled/Paused during retry delay")
|
||||
time.sleep(1)
|
||||
# If it's attempt 2 but should_retry is False, it means the first error was non-retryable, so skip
|
||||
elif attempt == 2 and not should_retry:
|
||||
break # Exit loop, failure already determined
|
||||
|
||||
# --- Log attempt ---
|
||||
log_prefix = f" ⬇️ Downloading:" if attempt == 1 else f" 🔄 Retrying:"
|
||||
try: self.progress_signal.emit(f"{log_prefix} '{original_filename}' (Attempt {attempt}/{MAX_ATTEMPTS})...")
|
||||
except: pass
|
||||
if attempt == 1:
|
||||
try: self.file_progress_signal.emit(original_filename, (0, 0))
|
||||
except: pass
|
||||
|
||||
# --- Perform Download ---
|
||||
headers = { 'User-Agent': 'Mozilla/5.0 ...', 'Referer': f'https://{base_kemono_domain}/discord/channel/{channel_id}'} # Shortened for brevity
|
||||
response = self.scraper.get(file_url, headers=headers, cookies=self.cookies_dict, stream=True, timeout=(15, 120))
|
||||
response.raise_for_status()
|
||||
|
||||
total_size = int(response.headers.get('content-length', 0))
|
||||
downloaded_size = 0
|
||||
last_progress_emit_time = time.time()
|
||||
|
||||
with open(temp_filepath, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=1024*1024):
|
||||
if self._check_events(): raise InterruptedError("Cancelled/Paused during chunk writing")
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
downloaded_size += len(chunk)
|
||||
current_time = time.time()
|
||||
if total_size > 0 and (current_time - last_progress_emit_time > 0.5 or downloaded_size == total_size):
|
||||
try: self.file_progress_signal.emit(original_filename, (downloaded_size, total_size))
|
||||
except: pass
|
||||
last_progress_emit_time = current_time
|
||||
elif total_size == 0 and (current_time - last_progress_emit_time > 0.5):
|
||||
try: self.file_progress_signal.emit(original_filename, (downloaded_size, 0))
|
||||
except: pass
|
||||
last_progress_emit_time = current_time
|
||||
response.close()
|
||||
|
||||
# --- Verification ---
|
||||
if self._check_events(): raise InterruptedError("Cancelled/Paused after download completion")
|
||||
|
||||
if total_size > 0 and downloaded_size != total_size:
|
||||
try: self.progress_signal.emit(f" ⚠️ Size mismatch on attempt {attempt} for '{original_filename}'. Expected {total_size}, got {downloaded_size}.")
|
||||
except: pass
|
||||
last_exception = IOError(f"Size mismatch: Expected {total_size}, got {downloaded_size}")
|
||||
if os.path.exists(temp_filepath):
|
||||
try: os.remove(temp_filepath)
|
||||
except OSError: pass
|
||||
should_retry = (attempt == 1) # Only retry if it was the first attempt
|
||||
continue # Try again if attempt 1, otherwise loop finishes
|
||||
else:
|
||||
download_successful = True
|
||||
break # Success!
|
||||
|
||||
# --- Error Handling within Loop ---
|
||||
except InterruptedError as e:
|
||||
last_exception = e
|
||||
should_retry = False # Don't retry if interrupted
|
||||
break
|
||||
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError, cloudscraper.exceptions.CloudflareException) as e:
|
||||
last_exception = e
|
||||
try: self.progress_signal.emit(f" ❌ Network/Cloudflare error on attempt {attempt} for '{original_filename}': {e}")
|
||||
except: pass
|
||||
should_retry = (attempt == 1) # Retry only if first attempt
|
||||
except requests.exceptions.RequestException as e:
|
||||
status_code = getattr(e.response, 'status_code', None)
|
||||
if status_code and 500 <= status_code <= 599: # Retry on 5xx
|
||||
last_exception = e
|
||||
try: self.progress_signal.emit(f" ❌ Server error ({status_code}) on attempt {attempt} for '{original_filename}'. Will retry...")
|
||||
except: pass
|
||||
should_retry = (attempt == 1) # Retry only if first attempt
|
||||
else: # Don't retry on 4xx or other request errors
|
||||
last_exception = e
|
||||
try: self.progress_signal.emit(f" ❌ Non-retryable HTTP error for '{original_filename}': {e}")
|
||||
except: pass
|
||||
should_retry = False
|
||||
break
|
||||
except OSError as e:
|
||||
last_exception = e
|
||||
try: self.progress_signal.emit(f" ❌ OS error during download attempt {attempt} for '{original_filename}': {e}")
|
||||
except: pass
|
||||
should_retry = False
|
||||
break
|
||||
except Exception as e:
|
||||
last_exception = e
|
||||
try: self.progress_signal.emit(f" ❌ Unexpected error on attempt {attempt} for '{original_filename}': {e}")
|
||||
except: pass
|
||||
should_retry = False
|
||||
break
|
||||
finally:
|
||||
if response:
|
||||
try: response.close()
|
||||
except Exception: pass
|
||||
# --- End Download Attempt Loop ---
|
||||
|
||||
try: self.file_progress_signal.emit(original_filename, None) # Clear progress
|
||||
except: pass
|
||||
|
||||
# --- Post-Download Processing ---
|
||||
if download_successful:
|
||||
# --- Rename Logic ---
|
||||
final_filename_to_use = intended_final_filename
|
||||
final_filepath_on_disk = os.path.join(channel_dir, final_filename_to_use)
|
||||
counter = 1
|
||||
base_name, extension = os.path.splitext(intended_final_filename)
|
||||
while os.path.exists(final_filepath_on_disk):
|
||||
final_filename_to_use = f"{base_name} ({counter}){extension}"
|
||||
final_filepath_on_disk = os.path.join(channel_dir, final_filename_to_use)
|
||||
counter += 1
|
||||
if final_filename_to_use != intended_final_filename:
|
||||
try: self.progress_signal.emit(f" -> Name conflict for '{intended_final_filename}'. Renaming to '{final_filename_to_use}'.")
|
||||
except: pass
|
||||
try:
|
||||
os.rename(temp_filepath, final_filepath_on_disk)
|
||||
try: self.progress_signal.emit(f" ✅ Saved: '{final_filename_to_use}'")
|
||||
except: pass
|
||||
return True, None # SUCCESS
|
||||
except OSError as e:
|
||||
try: self.progress_signal.emit(f" ❌ OS error renaming temp file to '{final_filename_to_use}': {e}")
|
||||
except: pass
|
||||
if os.path.exists(temp_filepath):
|
||||
try: os.remove(temp_filepath)
|
||||
except OSError: pass
|
||||
# ---> RETURN FAILURE TUPLE (Rename Failed) <---
|
||||
failure_details = { 'file_info': {'url': file_url, 'name': original_filename}, 'post_title': post_title, 'original_post_id_for_log': original_post_id_for_log, 'target_folder_path': channel_dir, 'intended_filename': intended_final_filename, 'error': f"Rename failed: {e}", 'service': 'discord', 'user_id': self.server_id }
|
||||
return False, failure_details
|
||||
else:
|
||||
# Download failed or was interrupted
|
||||
if not isinstance(last_exception, InterruptedError):
|
||||
try: self.progress_signal.emit(f" ❌ FAILED to download '{original_filename}' after {MAX_ATTEMPTS} attempts. Last error: {last_exception}")
|
||||
except: pass
|
||||
if os.path.exists(temp_filepath):
|
||||
try: os.remove(temp_filepath)
|
||||
except OSError as e_rem:
|
||||
try: self.progress_signal.emit(f" (Failed to remove temp file '{temp_filename}': {e_rem})")
|
||||
except: pass
|
||||
# ---> RETURN FAILURE TUPLE (Download Failed/Interrupted) <---
|
||||
# Only generate details if it wasn't interrupted by user
|
||||
failure_details = None
|
||||
if not isinstance(last_exception, InterruptedError):
|
||||
failure_details = {
|
||||
'file_info': {'url': file_url, 'name': original_filename},
|
||||
'post_title': post_title, 'original_post_id_for_log': original_post_id_for_log,
|
||||
'target_folder_path': channel_dir, 'intended_filename': intended_final_filename,
|
||||
'error': f"Failed after {MAX_ATTEMPTS} attempts: {last_exception}",
|
||||
'service': 'discord', 'user_id': self.server_id,
|
||||
'forced_filename_override': intended_final_filename,
|
||||
'file_index_in_post': file_info.get('file_index', 0),
|
||||
'num_files_in_this_post': file_info.get('num_files', 1)
|
||||
}
|
||||
return False, failure_details # Return None details if interrupted
|
||||
|
||||
# --- Main Thread Execution ---
|
||||
def run(self):
|
||||
"""Main execution logic: Fetches channels/messages and dispatches file downloads."""
|
||||
self.download_count = 0
|
||||
self.skip_count = 0
|
||||
self._is_cancelled_internal = False
|
||||
self.permanently_failed_details = [] # Reset failed list
|
||||
|
||||
if not self.scraper:
|
||||
try: self.progress_signal.emit("❌ Aborting Kemono Discord download: Cloudscraper failed to initialize.")
|
||||
except: pass
|
||||
self.finished_signal.emit(0, 0, False, [])
|
||||
return
|
||||
|
||||
try:
|
||||
# --- Log Start ---
|
||||
try:
|
||||
self.progress_signal.emit("=" * 40)
|
||||
self.progress_signal.emit(f"🚀 Starting Kemono Discord download for server: {self.server_id}")
|
||||
self.progress_signal.emit(f" Using {self.num_file_threads} thread(s) for file downloads.")
|
||||
except: pass
|
||||
|
||||
# --- Channel Fetching (same as before) ---
|
||||
channels_to_process = []
|
||||
# ... (logic to populate channels_to_process using fetch_server_channels or target_channel_id) ...
|
||||
if self.target_channel_id:
|
||||
channels_to_process.append({'id': self.target_channel_id, 'name': self.target_channel_id})
|
||||
try: self.progress_signal.emit(f" Targeting specific channel: {self.target_channel_id}")
|
||||
except: pass
|
||||
else:
|
||||
try: self.progress_label_signal.emit("Fetching server channels via Kemono API...")
|
||||
except: pass
|
||||
channels_data = fetch_server_channels(self.server_id, logger=self.progress_signal.emit, cookies_dict=self.cookies_dict)
|
||||
if self._check_events(): return
|
||||
if channels_data is not None:
|
||||
channels_to_process = channels_data
|
||||
try: self.progress_signal.emit(f" Found {len(channels_to_process)} channels.")
|
||||
except: pass
|
||||
else:
|
||||
try: self.progress_signal.emit(f" ❌ Failed to fetch channels for server {self.server_id} via Kemono API.")
|
||||
except: pass
|
||||
return
|
||||
|
||||
# --- Process Each Channel ---
|
||||
for channel in channels_to_process:
|
||||
if self._check_events(): break
|
||||
|
||||
channel_id = channel['id']
|
||||
channel_name = clean_filename(channel.get('name', channel_id))
|
||||
channel_dir = os.path.join(self.output_dir, channel_name)
|
||||
try:
|
||||
os.makedirs(channel_dir, exist_ok=True)
|
||||
except OSError as e:
|
||||
try: self.progress_signal.emit(f" ❌ Failed to create directory for channel '{channel_name}': {e}. Skipping channel.")
|
||||
except: pass
|
||||
continue
|
||||
|
||||
try:
|
||||
self.progress_signal.emit(f"\n--- Processing Channel: #{channel_name} ({channel_id}) ---")
|
||||
self.progress_label_signal.emit(f"Fetching messages for #{channel_name}...")
|
||||
except: pass
|
||||
|
||||
# --- Collect File Download Tasks ---
|
||||
file_tasks = []
|
||||
message_generator = fetch_channel_messages(
|
||||
channel_id, logger=self.progress_signal.emit,
|
||||
cancellation_event=self.cancellation_event, pause_event=self.pause_event,
|
||||
cookies_dict=self.cookies_dict
|
||||
)
|
||||
|
||||
try:
|
||||
message_index = 0
|
||||
for message_batch in message_generator:
|
||||
if self._check_events(): break
|
||||
for message in message_batch:
|
||||
message_id = message.get('id', f'msg_{message_index}')
|
||||
post_title_context = (message.get('content') or f"Message {message_id}")[:50] + "..."
|
||||
attachments = message.get('attachments', [])
|
||||
file_index_in_message = 0
|
||||
num_files_in_message = len(attachments)
|
||||
|
||||
for attachment in attachments:
|
||||
if self._check_events(): raise InterruptedError
|
||||
file_path = attachment.get('path')
|
||||
original_filename = attachment.get('name')
|
||||
if file_path and original_filename:
|
||||
base_kemono_domain = "kemono.cr"
|
||||
if not file_path.startswith('/'): file_path = '/' + file_path
|
||||
file_url = f"https://{base_kemono_domain}/data{file_path}"
|
||||
file_tasks.append({
|
||||
'channel_dir': channel_dir, 'original_filename': original_filename,
|
||||
'file_url': file_url, 'channel_id': channel_id,
|
||||
'message_id': message_id, 'post_title': post_title_context,
|
||||
'file_index': file_index_in_message, 'num_files': num_files_in_message
|
||||
})
|
||||
file_index_in_message += 1
|
||||
message_index += 1
|
||||
if self._check_events(): raise InterruptedError
|
||||
if self._check_events(): raise InterruptedError
|
||||
except InterruptedError:
|
||||
try: self.progress_signal.emit(" Interrupted while collecting file tasks.")
|
||||
except: pass
|
||||
break # Exit channel processing
|
||||
except Exception as e_msg:
|
||||
try: self.progress_signal.emit(f" ❌ Error fetching messages for channel {channel_name}: {e_msg}")
|
||||
except: pass
|
||||
continue # Continue to next channel
|
||||
|
||||
if self._check_events(): break
|
||||
|
||||
if not file_tasks:
|
||||
try: self.progress_signal.emit(" No downloadable file attachments found in this channel's messages.")
|
||||
except: pass
|
||||
continue
|
||||
|
||||
try:
|
||||
self.progress_signal.emit(f" Found {len(file_tasks)} potential file attachments. Starting downloads...")
|
||||
self.progress_label_signal.emit(f"Downloading {len(file_tasks)} files for #{channel_name}...")
|
||||
except: pass
|
||||
|
||||
# --- Execute Downloads Concurrently ---
|
||||
files_processed_in_channel = 0
|
||||
with ThreadPoolExecutor(max_workers=self.num_file_threads, thread_name_prefix=f"KDC_{channel_id[:4]}_") as executor:
|
||||
futures = {executor.submit(self._download_single_kemono_file, task): task for task in file_tasks}
|
||||
try:
|
||||
for future in as_completed(futures):
|
||||
files_processed_in_channel += 1
|
||||
task_info = futures[future]
|
||||
try:
|
||||
success, details = future.result() # Unpack result
|
||||
with self.count_lock:
|
||||
if success:
|
||||
self.download_count += 1
|
||||
else:
|
||||
self.skip_count += 1
|
||||
if details: # Append details if the download permanently failed
|
||||
self.permanently_failed_details.append(details)
|
||||
except Exception as e_future:
|
||||
filename = task_info.get('original_filename', 'unknown file')
|
||||
try: self.progress_signal.emit(f" ❌ System error processing download future for '{filename}': {e_future}")
|
||||
except: pass
|
||||
with self.count_lock:
|
||||
self.skip_count += 1
|
||||
# Append details on system failure
|
||||
failure_details = { 'file_info': {'url': task_info.get('file_url'), 'name': filename}, 'post_title': task_info.get('post_title', 'N/A'), 'original_post_id_for_log': task_info.get('message_id', 'N/A'), 'target_folder_path': task_info.get('channel_dir'), 'error': f"Future execution error: {e_future}", 'service': 'discord', 'user_id': self.server_id, 'forced_filename_override': clean_filename(filename), 'file_index_in_post': task_info.get('file_index', 0), 'num_files_in_this_post': task_info.get('num_files', 1) }
|
||||
self.permanently_failed_details.append(failure_details)
|
||||
|
||||
try: self.progress_label_signal.emit(f"#{channel_name}: {files_processed_in_channel}/{len(file_tasks)} files processed")
|
||||
except: pass
|
||||
|
||||
if self._check_events():
|
||||
try: self.progress_signal.emit(" Cancelling remaining file downloads for this channel...")
|
||||
except: pass
|
||||
executor.shutdown(wait=False, cancel_futures=True)
|
||||
break # Exit as_completed loop
|
||||
except InterruptedError:
|
||||
try: self.progress_signal.emit(" Download processing loop interrupted.")
|
||||
except: pass
|
||||
executor.shutdown(wait=False, cancel_futures=True)
|
||||
|
||||
if self._check_events(): break # Check between channels
|
||||
|
||||
# --- End Channel Loop ---
|
||||
|
||||
except Exception as e:
|
||||
# Catch unexpected errors in the main run logic
|
||||
try:
|
||||
self.progress_signal.emit(f"❌ Unexpected critical error in Kemono Discord thread run loop: {e}")
|
||||
import traceback
|
||||
self.progress_signal.emit(traceback.format_exc())
|
||||
except: pass # Avoid errors if signals fail at the very end
|
||||
finally:
|
||||
# --- Final Cleanup and Signal ---
|
||||
try:
|
||||
try: self.progress_signal.emit("=" * 40)
|
||||
except: pass
|
||||
cancelled = self._is_cancelled_internal or self.cancellation_event.is_set()
|
||||
|
||||
# --- EMIT FAILED FILES SIGNAL ---
|
||||
if self.permanently_failed_details:
|
||||
try:
|
||||
self.progress_signal.emit(f" Reporting {len(self.permanently_failed_details)} permanently failed files...")
|
||||
self.permanent_file_failed_signal.emit(list(self.permanently_failed_details)) # Emit a copy
|
||||
except Exception as e_emit_fail:
|
||||
print(f"ERROR emitting permanent_file_failed_signal: {e_emit_fail}")
|
||||
|
||||
# Log final status
|
||||
try:
|
||||
if cancelled and not self._is_cancelled_internal:
|
||||
self.progress_signal.emit(" Kemono Discord download cancelled externally.")
|
||||
elif self._is_cancelled_internal:
|
||||
self.progress_signal.emit(" Kemono Discord download finished due to cancellation.")
|
||||
else:
|
||||
self.progress_signal.emit("✅ Kemono Discord download process finished.")
|
||||
except: pass
|
||||
|
||||
# Clear file progress
|
||||
try: self.file_progress_signal.emit("", None)
|
||||
except: pass
|
||||
|
||||
# Get final counts safely
|
||||
with self.count_lock:
|
||||
final_download_count = self.download_count
|
||||
final_skip_count = self.skip_count
|
||||
|
||||
# Emit finished signal
|
||||
self.finished_signal.emit(final_download_count, final_skip_count, cancelled, [])
|
||||
except Exception as e_final:
|
||||
# Log final signal emission error if possible
|
||||
print(f"ERROR in KemonoDiscordDownloadThread finally block: {e_final}")
|
||||
45
src/ui/classes/mangadex_downloader_thread.py
Normal file
45
src/ui/classes/mangadex_downloader_thread.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import threading
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
|
||||
from ...core.mangadex_client import fetch_mangadex_data
|
||||
|
||||
|
||||
class MangaDexDownloadThread(QThread):
|
||||
"""A wrapper QThread for running the MangaDex client function."""
|
||||
progress_signal = pyqtSignal(str)
|
||||
file_progress_signal = pyqtSignal(str, object)
|
||||
finished_signal = pyqtSignal(int, int, bool)
|
||||
overall_progress_signal = pyqtSignal(int, int)
|
||||
|
||||
def __init__(self, url, output_dir, parent=None):
|
||||
super().__init__(parent)
|
||||
self.start_url = url
|
||||
self.output_dir = output_dir
|
||||
self.is_cancelled = False
|
||||
self.pause_event = parent.pause_event if hasattr(parent, 'pause_event') else threading.Event()
|
||||
self.cancellation_event = parent.cancellation_event if hasattr(parent, 'cancellation_event') else threading.Event()
|
||||
|
||||
def run(self):
|
||||
downloaded = 0
|
||||
skipped = 0
|
||||
try:
|
||||
downloaded, skipped = fetch_mangadex_data(
|
||||
self.start_url,
|
||||
self.output_dir,
|
||||
logger_func=self.progress_signal.emit,
|
||||
file_progress_callback=self.file_progress_signal,
|
||||
overall_progress_callback=self.overall_progress_signal,
|
||||
pause_event=self.pause_event,
|
||||
cancellation_event=self.cancellation_event
|
||||
)
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f"❌ A critical error occurred in the MangaDex thread: {e}")
|
||||
skipped = 1 # Mark as skipped if there was a critical failure
|
||||
finally:
|
||||
self.finished_signal.emit(downloaded, skipped, self.is_cancelled)
|
||||
|
||||
def cancel(self):
|
||||
self.is_cancelled = True
|
||||
if self.cancellation_event:
|
||||
self.cancellation_event.set()
|
||||
self.progress_signal.emit(" Cancellation signal received by MangaDex thread.")
|
||||
105
src/ui/classes/nhentai_downloader_thread.py
Normal file
105
src/ui/classes/nhentai_downloader_thread.py
Normal file
@@ -0,0 +1,105 @@
|
||||
import os
|
||||
import time
|
||||
import cloudscraper
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
|
||||
from ...utils.file_utils import clean_folder_name
|
||||
|
||||
|
||||
class NhentaiDownloadThread(QThread):
|
||||
progress_signal = pyqtSignal(str)
|
||||
finished_signal = pyqtSignal(int, int, bool)
|
||||
|
||||
IMAGE_SERVERS = [
|
||||
"https://i.nhentai.net", "https://i2.nhentai.net", "https://i3.nhentai.net",
|
||||
"https://i5.nhentai.net", "https://i7.nhentai.net"
|
||||
]
|
||||
|
||||
EXTENSION_MAP = {'j': 'jpg', 'p': 'png', 'g': 'gif', 'w': 'webp' }
|
||||
|
||||
def __init__(self, gallery_data, output_dir, parent=None):
|
||||
super().__init__(parent)
|
||||
self.gallery_data = gallery_data
|
||||
self.output_dir = output_dir
|
||||
self.is_cancelled = False
|
||||
|
||||
def run(self):
|
||||
title = self.gallery_data.get("title", {}).get("english", f"gallery_{self.gallery_data.get('id')}")
|
||||
gallery_id = self.gallery_data.get("id")
|
||||
media_id = self.gallery_data.get("media_id")
|
||||
pages_info = self.gallery_data.get("pages", [])
|
||||
|
||||
folder_name = clean_folder_name(title)
|
||||
gallery_path = os.path.join(self.output_dir, folder_name)
|
||||
|
||||
try:
|
||||
os.makedirs(gallery_path, exist_ok=True)
|
||||
except OSError as e:
|
||||
self.progress_signal.emit(f"❌ Critical error creating directory: {e}")
|
||||
self.finished_signal.emit(0, len(pages_info), False)
|
||||
return
|
||||
|
||||
self.progress_signal.emit(f"⬇️ Downloading '{title}' to folder '{folder_name}'...")
|
||||
|
||||
scraper = cloudscraper.create_scraper()
|
||||
download_count = 0
|
||||
skip_count = 0
|
||||
|
||||
for i, page_data in enumerate(pages_info):
|
||||
if self.is_cancelled:
|
||||
break
|
||||
|
||||
page_num = i + 1
|
||||
|
||||
ext_char = page_data.get('t', 'j')
|
||||
extension = self.EXTENSION_MAP.get(ext_char, 'jpg')
|
||||
|
||||
relative_path = f"/galleries/{media_id}/{page_num}.{extension}"
|
||||
|
||||
local_filename = f"{page_num:03d}.{extension}"
|
||||
filepath = os.path.join(gallery_path, local_filename)
|
||||
|
||||
if os.path.exists(filepath):
|
||||
self.progress_signal.emit(f" -> Skip (Exists): {local_filename}")
|
||||
skip_count += 1
|
||||
continue
|
||||
|
||||
download_successful = False
|
||||
for server in self.IMAGE_SERVERS:
|
||||
if self.is_cancelled:
|
||||
break
|
||||
|
||||
full_url = f"{server}{relative_path}"
|
||||
try:
|
||||
self.progress_signal.emit(f" Downloading page {page_num}/{len(pages_info)} from {server} ...")
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
|
||||
'Referer': f'https://nhentai.net/g/{gallery_id}/'
|
||||
}
|
||||
|
||||
response = scraper.get(full_url, headers=headers, timeout=60, stream=True)
|
||||
|
||||
if response.status_code == 200:
|
||||
with open(filepath, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
download_count += 1
|
||||
download_successful = True
|
||||
break
|
||||
else:
|
||||
self.progress_signal.emit(f" -> {server} returned status {response.status_code}. Trying next server...")
|
||||
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f" -> {server} failed to connect or timed out: {e}. Trying next server...")
|
||||
|
||||
if not download_successful:
|
||||
self.progress_signal.emit(f" ❌ Failed to download {local_filename} from all servers.")
|
||||
skip_count += 1
|
||||
|
||||
time.sleep(0.5)
|
||||
|
||||
self.finished_signal.emit(download_count, skip_count, self.is_cancelled)
|
||||
|
||||
def cancel(self):
|
||||
self.is_cancelled = True
|
||||
101
src/ui/classes/pixeldrain_downloader_thread.py
Normal file
101
src/ui/classes/pixeldrain_downloader_thread.py
Normal file
@@ -0,0 +1,101 @@
|
||||
import os
|
||||
import time
|
||||
import requests
|
||||
import cloudscraper
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
|
||||
from ...core.pixeldrain_client import fetch_pixeldrain_data
|
||||
from ...utils.file_utils import clean_folder_name
|
||||
|
||||
|
||||
class PixeldrainDownloadThread(QThread):
|
||||
"""A dedicated QThread for handling pixeldrain.com downloads."""
|
||||
progress_signal = pyqtSignal(str)
|
||||
file_progress_signal = pyqtSignal(str, object)
|
||||
finished_signal = pyqtSignal(int, int, bool) # dl_count, skip_count, cancelled
|
||||
|
||||
def __init__(self, url, output_dir, parent=None):
|
||||
super().__init__(parent)
|
||||
self.pixeldrain_url = url
|
||||
self.output_dir = output_dir
|
||||
self.is_cancelled = False
|
||||
|
||||
def run(self):
|
||||
download_count = 0
|
||||
skip_count = 0
|
||||
self.progress_signal.emit("=" * 40)
|
||||
self.progress_signal.emit(f"🚀 Starting Pixeldrain.com Download for: {self.pixeldrain_url}")
|
||||
|
||||
album_title_raw, files_to_download = fetch_pixeldrain_data(self.pixeldrain_url, self.progress_signal.emit)
|
||||
|
||||
if not files_to_download:
|
||||
self.progress_signal.emit("❌ Failed to extract file information from Pixeldrain. Aborting.")
|
||||
self.finished_signal.emit(0, 0, self.is_cancelled)
|
||||
return
|
||||
|
||||
album_folder_name = clean_folder_name(album_title_raw)
|
||||
album_path = os.path.join(self.output_dir, album_folder_name)
|
||||
try:
|
||||
os.makedirs(album_path, exist_ok=True)
|
||||
self.progress_signal.emit(f" Saving to folder: '{album_path}'")
|
||||
except OSError as e:
|
||||
self.progress_signal.emit(f"❌ Critical error creating directory: {e}")
|
||||
self.finished_signal.emit(0, len(files_to_download), self.is_cancelled)
|
||||
return
|
||||
|
||||
total_files = len(files_to_download)
|
||||
session = cloudscraper.create_scraper()
|
||||
|
||||
for i, file_data in enumerate(files_to_download):
|
||||
if self.is_cancelled:
|
||||
self.progress_signal.emit(" Download cancelled by user.")
|
||||
skip_count = total_files - download_count
|
||||
break
|
||||
|
||||
filename = file_data.get('filename')
|
||||
file_url = file_data.get('url')
|
||||
filepath = os.path.join(album_path, filename)
|
||||
|
||||
if os.path.exists(filepath):
|
||||
self.progress_signal.emit(f" -> Skip ({i+1}/{total_files}): '{filename}' already exists.")
|
||||
skip_count += 1
|
||||
continue
|
||||
|
||||
self.progress_signal.emit(f" Downloading ({i+1}/{total_files}): '{filename}'...")
|
||||
|
||||
try:
|
||||
response = session.get(file_url, stream=True, timeout=90)
|
||||
response.raise_for_status()
|
||||
|
||||
total_size = int(response.headers.get('content-length', 0))
|
||||
downloaded_size = 0
|
||||
last_update_time = time.time()
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if self.is_cancelled:
|
||||
break
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
downloaded_size += len(chunk)
|
||||
current_time = time.time()
|
||||
if total_size > 0 and (current_time - last_update_time) > 0.5:
|
||||
self.file_progress_signal.emit(filename, (downloaded_size, total_size))
|
||||
last_update_time = current_time
|
||||
|
||||
if self.is_cancelled:
|
||||
if os.path.exists(filepath): os.remove(filepath)
|
||||
continue
|
||||
|
||||
download_count += 1
|
||||
except requests.exceptions.RequestException as e:
|
||||
self.progress_signal.emit(f" ❌ Failed to download '{filename}'. Error: {e}")
|
||||
if os.path.exists(filepath): os.remove(filepath)
|
||||
skip_count += 1
|
||||
|
||||
self.file_progress_signal.emit("", None)
|
||||
self.finished_signal.emit(download_count, skip_count, self.is_cancelled)
|
||||
|
||||
def cancel(self):
|
||||
self.is_cancelled = True
|
||||
self.progress_signal.emit(" Cancellation signal received by Pixeldrain thread.")
|
||||
87
src/ui/classes/rule34video_downloader_thread.py
Normal file
87
src/ui/classes/rule34video_downloader_thread.py
Normal file
@@ -0,0 +1,87 @@
|
||||
import os
|
||||
import time
|
||||
import requests
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
import cloudscraper
|
||||
|
||||
from ...core.rule34video_client import fetch_rule34video_data
|
||||
from ...utils.file_utils import clean_folder_name
|
||||
|
||||
class Rule34VideoDownloadThread(QThread):
|
||||
"""A dedicated QThread for handling rule34video.com downloads."""
|
||||
progress_signal = pyqtSignal(str)
|
||||
file_progress_signal = pyqtSignal(str, object)
|
||||
finished_signal = pyqtSignal(int, int, bool) # dl_count, skip_count, cancelled
|
||||
|
||||
def __init__(self, url, output_dir, parent=None):
|
||||
super().__init__(parent)
|
||||
self.video_url = url
|
||||
self.output_dir = output_dir
|
||||
self.is_cancelled = False
|
||||
|
||||
def run(self):
|
||||
download_count = 0
|
||||
skip_count = 0
|
||||
|
||||
video_title, final_video_url = fetch_rule34video_data(self.video_url, self.progress_signal.emit)
|
||||
|
||||
if not final_video_url:
|
||||
self.progress_signal.emit("❌ Failed to get video data. Aborting.")
|
||||
self.finished_signal.emit(0, 1, self.is_cancelled)
|
||||
return
|
||||
|
||||
# Create a safe filename from the title, defaulting if needed
|
||||
safe_title = clean_folder_name(video_title if video_title else "rule34video_file")
|
||||
filename = f"{safe_title}.mp4"
|
||||
filepath = os.path.join(self.output_dir, filename)
|
||||
|
||||
if os.path.exists(filepath):
|
||||
self.progress_signal.emit(f" -> Skip: '{filename}' already exists.")
|
||||
self.finished_signal.emit(0, 1, self.is_cancelled)
|
||||
return
|
||||
|
||||
self.progress_signal.emit(f" Downloading: '{filename}'...")
|
||||
try:
|
||||
scraper = cloudscraper.create_scraper()
|
||||
# The CDN link might not require special headers, but a referer is good practice
|
||||
headers = {'Referer': 'https://rule34video.com/'}
|
||||
response = scraper.get(final_video_url, stream=True, headers=headers, timeout=90)
|
||||
response.raise_for_status()
|
||||
|
||||
total_size = int(response.headers.get('content-length', 0))
|
||||
downloaded_size = 0
|
||||
last_update_time = time.time()
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
# Use a larger chunk size for video files
|
||||
for chunk in response.iter_content(chunk_size=8192 * 4):
|
||||
if self.is_cancelled:
|
||||
break
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
downloaded_size += len(chunk)
|
||||
current_time = time.time()
|
||||
if total_size > 0 and (current_time - last_update_time) > 0.5:
|
||||
self.file_progress_signal.emit(filename, (downloaded_size, total_size))
|
||||
last_update_time = current_time
|
||||
|
||||
if self.is_cancelled:
|
||||
if os.path.exists(filepath):
|
||||
os.remove(filepath)
|
||||
skip_count = 1
|
||||
self.progress_signal.emit(f" Download cancelled for '{filename}'.")
|
||||
else:
|
||||
download_count = 1
|
||||
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f" ❌ Failed to download '{filename}': {e}")
|
||||
if os.path.exists(filepath):
|
||||
os.remove(filepath)
|
||||
skip_count = 1
|
||||
|
||||
self.file_progress_signal.emit("", None)
|
||||
self.finished_signal.emit(download_count, skip_count, self.is_cancelled)
|
||||
|
||||
def cancel(self):
|
||||
self.is_cancelled = True
|
||||
self.progress_signal.emit(" Cancellation signal received by Rule34Video thread.")
|
||||
105
src/ui/classes/saint2_downloader_thread.py
Normal file
105
src/ui/classes/saint2_downloader_thread.py
Normal file
@@ -0,0 +1,105 @@
|
||||
import os
|
||||
import time
|
||||
import requests
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
|
||||
from ...core.saint2_client import fetch_saint2_data
|
||||
|
||||
class Saint2DownloadThread(QThread):
|
||||
"""A dedicated QThread for handling saint2.su downloads."""
|
||||
progress_signal = pyqtSignal(str)
|
||||
file_progress_signal = pyqtSignal(str, object)
|
||||
finished_signal = pyqtSignal(int, int, bool) # dl_count, skip_count, cancelled
|
||||
|
||||
def __init__(self, url, output_dir, parent=None):
|
||||
super().__init__(parent)
|
||||
self.saint2_url = url
|
||||
self.output_dir = output_dir
|
||||
self.is_cancelled = False
|
||||
|
||||
def run(self):
|
||||
download_count = 0
|
||||
skip_count = 0
|
||||
self.progress_signal.emit("=" * 40)
|
||||
self.progress_signal.emit(f"🚀 Starting Saint2.su Download for: {self.saint2_url}")
|
||||
|
||||
album_name, files_to_download = fetch_saint2_data(self.saint2_url, self.progress_signal.emit)
|
||||
|
||||
if not files_to_download:
|
||||
self.progress_signal.emit("❌ Failed to extract file information from Saint2. Aborting.")
|
||||
self.finished_signal.emit(0, 0, self.is_cancelled)
|
||||
return
|
||||
|
||||
album_path = os.path.join(self.output_dir, album_name)
|
||||
try:
|
||||
os.makedirs(album_path, exist_ok=True)
|
||||
self.progress_signal.emit(f" Saving to folder: '{album_path}'")
|
||||
except OSError as e:
|
||||
self.progress_signal.emit(f"❌ Critical error creating directory: {e}")
|
||||
self.finished_signal.emit(0, len(files_to_download), self.is_cancelled)
|
||||
return
|
||||
|
||||
total_files = len(files_to_download)
|
||||
session = requests.Session()
|
||||
|
||||
for i, file_data in enumerate(files_to_download):
|
||||
if self.is_cancelled:
|
||||
self.progress_signal.emit(" Download cancelled by user.")
|
||||
skip_count = total_files - download_count
|
||||
break
|
||||
|
||||
filename = file_data.get('filename', f'untitled_{i+1}.mp4')
|
||||
file_url = file_data.get('url')
|
||||
headers = file_data.get('headers')
|
||||
filepath = os.path.join(album_path, filename)
|
||||
|
||||
if os.path.exists(filepath):
|
||||
self.progress_signal.emit(f" -> Skip ({i+1}/{total_files}): '{filename}' already exists.")
|
||||
skip_count += 1
|
||||
continue
|
||||
|
||||
self.progress_signal.emit(f" Downloading ({i+1}/{total_files}): '{filename}'...")
|
||||
|
||||
try:
|
||||
response = session.get(file_url, stream=True, headers=headers, timeout=60)
|
||||
response.raise_for_status()
|
||||
|
||||
total_size = int(response.headers.get('content-length', 0))
|
||||
downloaded_size = 0
|
||||
last_update_time = time.time()
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if self.is_cancelled:
|
||||
break
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
downloaded_size += len(chunk)
|
||||
current_time = time.time()
|
||||
if total_size > 0 and (current_time - last_update_time) > 0.5:
|
||||
self.file_progress_signal.emit(filename, (downloaded_size, total_size))
|
||||
last_update_time = current_time
|
||||
|
||||
if self.is_cancelled:
|
||||
if os.path.exists(filepath): os.remove(filepath)
|
||||
continue
|
||||
|
||||
if total_size > 0:
|
||||
self.file_progress_signal.emit(filename, (total_size, total_size))
|
||||
|
||||
download_count += 1
|
||||
except requests.exceptions.RequestException as e:
|
||||
self.progress_signal.emit(f" ❌ Failed to download '{filename}'. Error: {e}")
|
||||
if os.path.exists(filepath): os.remove(filepath)
|
||||
skip_count += 1
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f" ❌ An unexpected error occurred with '{filename}': {e}")
|
||||
if os.path.exists(filepath): os.remove(filepath)
|
||||
skip_count += 1
|
||||
|
||||
self.file_progress_signal.emit("", None)
|
||||
self.finished_signal.emit(download_count, skip_count, self.is_cancelled)
|
||||
|
||||
def cancel(self):
|
||||
self.is_cancelled = True
|
||||
self.progress_signal.emit(" Cancellation signal received by Saint2 thread.")
|
||||
380
src/ui/classes/simp_city_downloader_thread.py
Normal file
380
src/ui/classes/simp_city_downloader_thread.py
Normal file
@@ -0,0 +1,380 @@
|
||||
import os
|
||||
import queue
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
from collections import Counter
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import cloudscraper
|
||||
import requests
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
|
||||
from ...core.bunkr_client import fetch_bunkr_data
|
||||
from ...core.pixeldrain_client import fetch_pixeldrain_data
|
||||
from ...core.saint2_client import fetch_saint2_data
|
||||
from ...core.simpcity_client import fetch_single_simpcity_page
|
||||
from ...services.drive_downloader import (
|
||||
download_mega_file as drive_download_mega_file,
|
||||
download_gofile_folder
|
||||
)
|
||||
from ...utils.file_utils import clean_folder_name
|
||||
|
||||
|
||||
class SimpCityDownloadThread(QThread):
|
||||
progress_signal = pyqtSignal(str)
|
||||
file_progress_signal = pyqtSignal(str, object)
|
||||
finished_signal = pyqtSignal(int, int, bool, list)
|
||||
overall_progress_signal = pyqtSignal(int, int)
|
||||
|
||||
def __init__(self, url, post_id, output_dir, cookies, parent=None):
|
||||
super().__init__(parent)
|
||||
self.start_url = url
|
||||
self.post_id = post_id
|
||||
self.output_dir = output_dir
|
||||
self.cookies = cookies
|
||||
self.is_cancelled = False
|
||||
self.parent_app = parent
|
||||
self.image_queue = queue.Queue()
|
||||
self.service_queue = queue.Queue()
|
||||
self.counter_lock = threading.Lock()
|
||||
self.total_dl_count = 0
|
||||
self.total_skip_count = 0
|
||||
self.total_jobs_found = 0
|
||||
self.total_jobs_processed = 0
|
||||
self.processed_job_urls = set()
|
||||
|
||||
def cancel(self):
|
||||
self.is_cancelled = True
|
||||
|
||||
class _ServiceLoggerAdapter:
|
||||
"""Wraps the progress signal to provide .info(), .error(), .warning() methods for other clients."""
|
||||
def __init__(self, signal_emitter, prefix=""):
|
||||
self.emit = signal_emitter
|
||||
self.prefix = prefix
|
||||
|
||||
def __call__(self, msg, *args, **kwargs):
|
||||
# Make the logger callable, defaulting to the info method.
|
||||
self.info(msg, *args, **kwargs)
|
||||
|
||||
def info(self, msg, *args, **kwargs): self.emit(f"{self.prefix}{str(msg) % args}")
|
||||
def error(self, msg, *args, **kwargs): self.emit(f"{self.prefix}❌ ERROR: {str(msg) % args}")
|
||||
def warning(self, msg, *args, **kwargs): self.emit(f"{self.prefix}⚠️ WARNING: {str(msg) % args}")
|
||||
|
||||
def _log_interceptor(self, message):
|
||||
"""Filters out verbose log messages from the simpcity_client."""
|
||||
if "[SimpCity] Scraper found" in message or "[SimpCity] Scraping page" in message:
|
||||
pass
|
||||
else:
|
||||
self.progress_signal.emit(message)
|
||||
|
||||
def _get_enriched_jobs(self, jobs_to_check):
|
||||
"""Performs a pre-flight check on jobs to get an accurate total file count and summary."""
|
||||
if not jobs_to_check:
|
||||
return []
|
||||
|
||||
enriched_jobs = []
|
||||
|
||||
bunkr_logger = self._ServiceLoggerAdapter(self.progress_signal.emit, prefix=" ")
|
||||
pixeldrain_logger = self._ServiceLoggerAdapter(self.progress_signal.emit, prefix=" ")
|
||||
saint2_logger = self._ServiceLoggerAdapter(self.progress_signal.emit, prefix=" ")
|
||||
|
||||
for job in jobs_to_check:
|
||||
job_type = job.get('type')
|
||||
job_url = job.get('url')
|
||||
|
||||
if job_type in ['image', 'saint2_direct']:
|
||||
enriched_jobs.append(job)
|
||||
elif (job_type == 'bunkr' and self.should_dl_bunkr) or \
|
||||
(job_type == 'pixeldrain' and self.should_dl_pixeldrain) or \
|
||||
(job_type == 'saint2' and self.should_dl_saint2):
|
||||
self.progress_signal.emit(f" -> Checking {job_type} album for file count...")
|
||||
|
||||
fetch_map = {
|
||||
'bunkr': (fetch_bunkr_data, bunkr_logger),
|
||||
'pixeldrain': (fetch_pixeldrain_data, pixeldrain_logger),
|
||||
'saint2': (fetch_saint2_data, saint2_logger)
|
||||
}
|
||||
fetch_func, logger_adapter = fetch_map[job_type]
|
||||
album_name, files = fetch_func(job_url, logger_adapter)
|
||||
|
||||
if files:
|
||||
job['prefetched_files'] = files
|
||||
job['prefetched_album_name'] = album_name
|
||||
enriched_jobs.append(job)
|
||||
|
||||
if enriched_jobs:
|
||||
summary_counts = Counter()
|
||||
current_page_file_count = 0
|
||||
for job in enriched_jobs:
|
||||
if job.get('prefetched_files'):
|
||||
file_count = len(job['prefetched_files'])
|
||||
summary_counts[job['type']] += file_count
|
||||
current_page_file_count += file_count
|
||||
else:
|
||||
summary_counts[job['type']] += 1
|
||||
current_page_file_count += 1
|
||||
|
||||
summary_parts = [f"{job_type} ({count})" for job_type, count in summary_counts.items()]
|
||||
self.progress_signal.emit(f" [SimpCity] Content Found: {' | '.join(summary_parts)}")
|
||||
|
||||
with self.counter_lock: self.total_jobs_found += current_page_file_count
|
||||
self.overall_progress_signal.emit(self.total_jobs_found, self.total_jobs_processed)
|
||||
|
||||
return enriched_jobs
|
||||
|
||||
def _download_single_image(self, job, album_path, session):
|
||||
"""Downloads one image file; this is run by the image thread pool."""
|
||||
filename = job['filename']
|
||||
filepath = os.path.join(album_path, filename)
|
||||
try:
|
||||
if os.path.exists(filepath):
|
||||
self.progress_signal.emit(f" -> Skip (Image): '{filename}'")
|
||||
with self.counter_lock: self.total_skip_count += 1
|
||||
return
|
||||
self.progress_signal.emit(f" -> Downloading (Image): '{filename}'...")
|
||||
# --- START MODIFICATION ---
|
||||
response = session.get(job['url'], stream=True, timeout=180, headers={'Referer': self.start_url})
|
||||
# --- END MODIFICATION ---
|
||||
response.raise_for_status()
|
||||
with open(filepath, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if self.is_cancelled: break
|
||||
f.write(chunk)
|
||||
if not self.is_cancelled:
|
||||
with self.counter_lock: self.total_dl_count += 1
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f" -> ❌ Image download failed for '{filename}': {e}")
|
||||
with self.counter_lock: self.total_skip_count += 1
|
||||
finally:
|
||||
if not self.is_cancelled:
|
||||
with self.counter_lock: self.total_jobs_processed += 1
|
||||
self.overall_progress_signal.emit(self.total_jobs_found, self.total_jobs_processed)
|
||||
|
||||
def _image_worker(self, album_path):
|
||||
"""Target function for the image thread pool that pulls jobs from the queue."""
|
||||
session = cloudscraper.create_scraper()
|
||||
while True:
|
||||
if self.is_cancelled: break
|
||||
try:
|
||||
job = self.image_queue.get(timeout=1)
|
||||
if job is None: break
|
||||
self._download_single_image(job, album_path, session)
|
||||
self.image_queue.task_done()
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
def _service_worker(self, album_path):
|
||||
"""Target function for the single service thread, ensuring sequential downloads."""
|
||||
while True:
|
||||
if self.is_cancelled: break
|
||||
try:
|
||||
job = self.service_queue.get(timeout=1)
|
||||
if job is None: break
|
||||
|
||||
job_type = job['type']
|
||||
job_url = job['url']
|
||||
|
||||
if job_type in ['pixeldrain', 'saint2', 'bunkr']:
|
||||
if (job_type == 'pixeldrain' and self.should_dl_pixeldrain) or \
|
||||
(job_type == 'saint2' and self.should_dl_saint2) or \
|
||||
(job_type == 'bunkr' and self.should_dl_bunkr):
|
||||
self.progress_signal.emit(f"\n--- Processing Service ({job_type.capitalize()}): {job_url} ---")
|
||||
self._download_album(job.get('prefetched_files', []), job_url, album_path)
|
||||
elif job_type == 'mega' and self.should_dl_mega:
|
||||
self.progress_signal.emit(f"\n--- Processing Service (Mega): {job_url} ---")
|
||||
drive_download_mega_file(job_url, album_path, self.progress_signal.emit, self.file_progress_signal.emit)
|
||||
elif job_type == 'gofile' and self.should_dl_gofile:
|
||||
self.progress_signal.emit(f"\n--- Processing Service (Gofile): {job_url} ---")
|
||||
download_gofile_folder(job_url, album_path, self.progress_signal.emit, self.file_progress_signal.emit)
|
||||
elif job_type == 'saint2_direct' and self.should_dl_saint2:
|
||||
self.progress_signal.emit(f"\n--- Processing Service (Saint2 Direct): {job_url} ---")
|
||||
try:
|
||||
filename = os.path.basename(urlparse(job_url).path)
|
||||
filepath = os.path.join(album_path, filename)
|
||||
if os.path.exists(filepath):
|
||||
with self.counter_lock: self.total_skip_count += 1
|
||||
else:
|
||||
response = cloudscraper.create_scraper().get(job_url, stream=True, timeout=120, headers={'Referer': self.start_url})
|
||||
response.raise_for_status()
|
||||
with open(filepath, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if self.is_cancelled: break
|
||||
f.write(chunk)
|
||||
if not self.is_cancelled:
|
||||
with self.counter_lock: self.total_dl_count += 1
|
||||
except Exception as e:
|
||||
with self.counter_lock: self.total_skip_count += 1
|
||||
finally:
|
||||
if not self.is_cancelled:
|
||||
with self.counter_lock: self.total_jobs_processed += 1
|
||||
self.overall_progress_signal.emit(self.total_jobs_found, self.total_jobs_processed)
|
||||
|
||||
self.service_queue.task_done()
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
def _download_album(self, files_to_process, source_url, album_path):
|
||||
"""Helper to download all files from a pre-fetched album list."""
|
||||
if not files_to_process: return
|
||||
session = cloudscraper.create_scraper()
|
||||
for file_data in files_to_process:
|
||||
if self.is_cancelled: return
|
||||
filename = file_data.get('filename') or file_data.get('name')
|
||||
filepath = os.path.join(album_path, filename)
|
||||
try:
|
||||
if os.path.exists(filepath):
|
||||
with self.counter_lock: self.total_skip_count += 1
|
||||
else:
|
||||
self.progress_signal.emit(f" -> Downloading: '{filename}'...")
|
||||
headers = file_data.get('headers', {'Referer': source_url})
|
||||
# --- START MODIFICATION ---
|
||||
response = session.get(file_data.get('url'), stream=True, timeout=180, headers=headers)
|
||||
# --- END MODIFICATION ---
|
||||
response.raise_for_status()
|
||||
with open(filepath, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if self.is_cancelled: break
|
||||
f.write(chunk)
|
||||
if not self.is_cancelled:
|
||||
with self.counter_lock: self.total_dl_count += 1
|
||||
except Exception as e:
|
||||
with self.counter_lock: self.total_skip_count += 1
|
||||
finally:
|
||||
if not self.is_cancelled:
|
||||
with self.counter_lock: self.total_jobs_processed += 1
|
||||
self.overall_progress_signal.emit(self.total_jobs_found, self.total_jobs_processed)
|
||||
|
||||
def run(self):
|
||||
"""Main entry point for the thread, orchestrates the entire download."""
|
||||
self.progress_signal.emit("=" * 40)
|
||||
self.progress_signal.emit(f"🚀 Starting SimpCity Download for: {self.start_url}")
|
||||
|
||||
self.should_dl_pixeldrain = self.parent_app.simpcity_dl_pixeldrain_cb.isChecked()
|
||||
self.should_dl_saint2 = self.parent_app.simpcity_dl_saint2_cb.isChecked()
|
||||
self.should_dl_mega = self.parent_app.simpcity_dl_mega_cb.isChecked()
|
||||
self.should_dl_bunkr = self.parent_app.simpcity_dl_bunkr_cb.isChecked()
|
||||
self.should_dl_gofile = self.parent_app.simpcity_dl_gofile_cb.isChecked()
|
||||
|
||||
is_single_post_mode = self.post_id or '/post-' in self.start_url
|
||||
album_path = ""
|
||||
|
||||
try:
|
||||
if is_single_post_mode:
|
||||
self.progress_signal.emit(" Mode: Single Post detected.")
|
||||
album_title, _, _ = fetch_single_simpcity_page(self.start_url, self._log_interceptor, cookies=self.cookies, post_id=self.post_id)
|
||||
album_path = os.path.join(self.output_dir, clean_folder_name(album_title or "simpcity_post"))
|
||||
else:
|
||||
self.progress_signal.emit(" Mode: Full Thread detected.")
|
||||
first_page_url = re.sub(r'(/page-\d+)|(/post-\d+)', '', self.start_url).split('#')[0].strip('/')
|
||||
album_title, _, _ = fetch_single_simpcity_page(first_page_url, self._log_interceptor, cookies=self.cookies)
|
||||
album_path = os.path.join(self.output_dir, clean_folder_name(album_title or "simpcity_album"))
|
||||
os.makedirs(album_path, exist_ok=True)
|
||||
self.progress_signal.emit(f" Saving all content to folder: '{os.path.basename(album_path)}'")
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f"❌ Could not process the initial page. Aborting. Error: {e}")
|
||||
self.finished_signal.emit(0, 0, self.is_cancelled, []); return
|
||||
|
||||
service_thread = threading.Thread(target=self._service_worker, args=(album_path,), daemon=True)
|
||||
service_thread.start()
|
||||
num_image_threads = 15
|
||||
image_executor = ThreadPoolExecutor(max_workers=num_image_threads, thread_name_prefix='SimpCityImage')
|
||||
for _ in range(num_image_threads): image_executor.submit(self._image_worker, album_path)
|
||||
|
||||
try:
|
||||
if is_single_post_mode:
|
||||
_, jobs, _ = fetch_single_simpcity_page(self.start_url, self._log_interceptor, cookies=self.cookies, post_id=self.post_id)
|
||||
enriched_jobs = self._get_enriched_jobs(jobs)
|
||||
if enriched_jobs:
|
||||
for job in enriched_jobs:
|
||||
if job['type'] == 'image': self.image_queue.put(job)
|
||||
else: self.service_queue.put(job)
|
||||
else:
|
||||
base_url = re.sub(r'(/page-\d+)|(/post-\d+)', '', self.start_url).split('#')[0].strip('/')
|
||||
page_counter = 1; end_of_thread = False; MAX_RETRIES = 3
|
||||
while not end_of_thread:
|
||||
if self.is_cancelled: break
|
||||
page_url = f"{base_url}/page-{page_counter}"; retries = 0; page_fetch_successful = False
|
||||
while retries < MAX_RETRIES:
|
||||
if self.is_cancelled: end_of_thread = True; break
|
||||
self.progress_signal.emit(f"\n--- Analyzing page {page_counter} (Attempt {retries + 1}/{MAX_RETRIES}) ---")
|
||||
try:
|
||||
page_title, jobs_on_page, final_url = fetch_single_simpcity_page(page_url, self._log_interceptor, cookies=self.cookies)
|
||||
|
||||
# --- START: MODIFIED REDIRECT LOGIC ---
|
||||
if final_url != page_url:
|
||||
self.progress_signal.emit(f" -> Redirect detected from {page_url} to {final_url}")
|
||||
try:
|
||||
req_page_match = re.search(r'/page-(\d+)', page_url)
|
||||
final_page_match = re.search(r'/page-(\d+)', final_url)
|
||||
|
||||
if req_page_match:
|
||||
req_page_num = int(req_page_match.group(1))
|
||||
|
||||
# Scenario 1: Redirect to an earlier page (e.g., page-11 -> page-10)
|
||||
if final_page_match and int(final_page_match.group(1)) < req_page_num:
|
||||
self.progress_signal.emit(f" -> Redirected to an earlier page ({final_page_match.group(0)}). Reached end of thread.")
|
||||
end_of_thread = True
|
||||
|
||||
# Scenario 2: Redirect to base URL (e.g., page-11 -> /)
|
||||
# We check req_page_num > 1 because page-1 often redirects to base URL, which is normal.
|
||||
elif not final_page_match and req_page_num > 1:
|
||||
self.progress_signal.emit(f" -> Redirected to base thread URL. Reached end of thread.")
|
||||
end_of_thread = True
|
||||
|
||||
except (ValueError, TypeError):
|
||||
pass # Ignore parsing errors
|
||||
# --- END: MODIFIED REDIRECT LOGIC ---
|
||||
|
||||
if end_of_thread:
|
||||
page_fetch_successful = True; break
|
||||
|
||||
if page_counter > 1 and not page_title:
|
||||
self.progress_signal.emit(f" -> Page {page_counter} is invalid or has no title. Reached end of thread.")
|
||||
end_of_thread = True
|
||||
elif not jobs_on_page:
|
||||
self.progress_signal.emit(f" -> Page {page_counter} has no content. Reached end of thread.")
|
||||
end_of_thread = True
|
||||
else:
|
||||
new_jobs = [job for job in jobs_on_page if job.get('url') not in self.processed_job_urls]
|
||||
if not new_jobs and page_counter > 1:
|
||||
self.progress_signal.emit(f" -> Page {page_counter} contains no new content. Reached end of thread.")
|
||||
end_of_thread = True
|
||||
else:
|
||||
enriched_jobs = self._get_enriched_jobs(new_jobs)
|
||||
if not enriched_jobs and not new_jobs:
|
||||
# This can happen if all new_jobs were e.g. pixeldrain and it's disabled
|
||||
self.progress_signal.emit(f" -> Page {page_counter} content was filtered out. Reached end of thread.")
|
||||
end_of_thread = True
|
||||
else:
|
||||
for job in enriched_jobs:
|
||||
self.processed_job_urls.add(job.get('url'))
|
||||
if job['type'] == 'image': self.image_queue.put(job)
|
||||
else: self.service_queue.put(job)
|
||||
page_fetch_successful = True; break
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if e.response.status_code in [403, 404]:
|
||||
self.progress_signal.emit(f" -> Page {page_counter} returned {e.response.status_code}. Reached end of thread.")
|
||||
end_of_thread = True; break
|
||||
elif e.response.status_code == 429:
|
||||
self.progress_signal.emit(f" -> Rate limited (429). Waiting...")
|
||||
time.sleep(5 * (retries + 2)); retries += 1
|
||||
else:
|
||||
self.progress_signal.emit(f" -> HTTP Error {e.response.status_code} on page {page_counter}. Stopping crawl.")
|
||||
end_of_thread = True; break
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f" Stopping crawl due to error on page {page_counter}: {e}"); end_of_thread = True; break
|
||||
if not page_fetch_successful and not end_of_thread:
|
||||
self.progress_signal.emit(f" -> Failed to fetch page {page_counter} after {MAX_RETRIES} attempts. Stopping crawl.")
|
||||
end_of_thread = True
|
||||
if not end_of_thread: page_counter += 1
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f"❌ A critical error occurred during the main fetch phase: {e}")
|
||||
|
||||
self.progress_signal.emit("\n--- All pages analyzed. Waiting for background downloads to complete... ---")
|
||||
for _ in range(num_image_threads): self.image_queue.put(None)
|
||||
self.service_queue.put(None)
|
||||
image_executor.shutdown(wait=True)
|
||||
service_thread.join()
|
||||
self.finished_signal.emit(self.total_dl_count, self.total_skip_count, self.is_cancelled, [])
|
||||
128
src/ui/classes/toonily_downloader_thread.py
Normal file
128
src/ui/classes/toonily_downloader_thread.py
Normal file
@@ -0,0 +1,128 @@
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import cloudscraper
|
||||
from PyQt5.QtCore import QThread, pyqtSignal
|
||||
|
||||
from ...core.toonily_client import (
|
||||
fetch_chapter_data as toonily_fetch_data,
|
||||
get_chapter_list as toonily_get_list
|
||||
)
|
||||
from ...utils.file_utils import clean_folder_name
|
||||
|
||||
|
||||
class ToonilyDownloadThread(QThread):
|
||||
"""A dedicated QThread for handling toonily.com series or single chapters."""
|
||||
progress_signal = pyqtSignal(str)
|
||||
file_progress_signal = pyqtSignal(str, object)
|
||||
finished_signal = pyqtSignal(int, int, bool)
|
||||
overall_progress_signal = pyqtSignal(int, int) # Signal for chapter progress
|
||||
|
||||
def __init__(self, url, output_dir, parent=None):
|
||||
super().__init__(parent)
|
||||
self.start_url = url
|
||||
self.output_dir = output_dir
|
||||
self.is_cancelled = False
|
||||
# Get access to the pause event from the main app
|
||||
self.pause_event = parent.pause_event if hasattr(parent, 'pause_event') else threading.Event()
|
||||
|
||||
def _check_pause(self):
|
||||
# Helper function to check for pause/cancel events
|
||||
if self.is_cancelled: return True
|
||||
if self.pause_event and self.pause_event.is_set():
|
||||
self.progress_signal.emit(" Download paused...")
|
||||
while self.pause_event.is_set():
|
||||
if self.is_cancelled: return True
|
||||
time.sleep(0.5)
|
||||
self.progress_signal.emit(" Download resumed.")
|
||||
return self.is_cancelled
|
||||
|
||||
def run(self):
|
||||
grand_total_dl = 0
|
||||
grand_total_skip = 0
|
||||
|
||||
# Check if the URL is a series or a chapter
|
||||
if '/chapter-' in self.start_url:
|
||||
# It's a single chapter URL
|
||||
chapters_to_download = [self.start_url]
|
||||
self.progress_signal.emit("ℹ️ Single Toonily chapter URL detected.")
|
||||
else:
|
||||
# It's a series URL, so get all chapters
|
||||
chapters_to_download = toonily_get_list(self.start_url, self.progress_signal.emit)
|
||||
|
||||
if not chapters_to_download:
|
||||
self.progress_signal.emit("❌ No chapters found to download.")
|
||||
self.finished_signal.emit(0, 0, self.is_cancelled)
|
||||
return
|
||||
|
||||
self.progress_signal.emit(f"--- Starting download of {len(chapters_to_download)} chapter(s) ---")
|
||||
self.overall_progress_signal.emit(len(chapters_to_download), 0)
|
||||
|
||||
scraper = cloudscraper.create_scraper()
|
||||
|
||||
for chapter_idx, chapter_url in enumerate(chapters_to_download):
|
||||
if self._check_pause(): break
|
||||
|
||||
self.progress_signal.emit(f"\n-- Processing Chapter {chapter_idx + 1}/{len(chapters_to_download)} --")
|
||||
series_title, chapter_title, image_urls = toonily_fetch_data(chapter_url, self.progress_signal.emit, scraper)
|
||||
|
||||
if not image_urls:
|
||||
self.progress_signal.emit(f"❌ Failed to get data for chapter. Skipping.")
|
||||
continue
|
||||
|
||||
# Create folders like: /Downloads/Series Name/Chapter 01/
|
||||
series_folder_name = clean_folder_name(series_title)
|
||||
# Make a safe folder name from the full chapter title
|
||||
chapter_folder_name = clean_folder_name(chapter_title)
|
||||
final_save_path = os.path.join(self.output_dir, series_folder_name, chapter_folder_name)
|
||||
|
||||
try:
|
||||
os.makedirs(final_save_path, exist_ok=True)
|
||||
self.progress_signal.emit(f" Saving to folder: '{os.path.join(series_folder_name, chapter_folder_name)}'")
|
||||
except OSError as e:
|
||||
self.progress_signal.emit(f"❌ Critical error creating directory: {e}")
|
||||
grand_total_skip += len(image_urls)
|
||||
continue
|
||||
|
||||
for i, img_url in enumerate(image_urls):
|
||||
if self._check_pause(): break
|
||||
|
||||
try:
|
||||
file_extension = os.path.splitext(urlparse(img_url).path)[1] or '.jpg'
|
||||
filename = f"{i+1:03d}{file_extension}"
|
||||
filepath = os.path.join(final_save_path, filename)
|
||||
|
||||
if os.path.exists(filepath):
|
||||
self.progress_signal.emit(f" -> Skip ({i+1}/{len(image_urls)}): '{filename}' already exists.")
|
||||
grand_total_skip += 1
|
||||
else:
|
||||
self.progress_signal.emit(f" Downloading ({i+1}/{len(image_urls)}): '{filename}'...")
|
||||
response = scraper.get(img_url, stream=True, timeout=60, headers={'Referer': chapter_url})
|
||||
response.raise_for_status()
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if self._check_pause(): break
|
||||
f.write(chunk)
|
||||
|
||||
if self._check_pause():
|
||||
if os.path.exists(filepath): os.remove(filepath)
|
||||
break
|
||||
|
||||
grand_total_dl += 1
|
||||
time.sleep(0.2)
|
||||
except Exception as e:
|
||||
self.progress_signal.emit(f" ❌ Failed to download '{filename}': {e}")
|
||||
grand_total_skip += 1
|
||||
|
||||
self.overall_progress_signal.emit(len(chapters_to_download), chapter_idx + 1)
|
||||
time.sleep(1) # Wait a second between chapters
|
||||
|
||||
self.file_progress_signal.emit("", None)
|
||||
self.finished_signal.emit(grand_total_dl, grand_total_skip, self.is_cancelled)
|
||||
|
||||
def cancel(self):
|
||||
self.is_cancelled = True
|
||||
self.progress_signal.emit(" Cancellation signal received by Toonily thread.")
|
||||
@@ -22,6 +22,8 @@ from ..main_window import get_app_icon_object
|
||||
from ...core.api_client import download_from_api
|
||||
from ...utils.network_utils import extract_post_info, prepare_cookies_for_request
|
||||
from ...utils.resolution import get_dark_theme
|
||||
# --- IMPORT THE NEW DIALOG ---
|
||||
from .UpdateCheckDialog import UpdateCheckDialog
|
||||
|
||||
|
||||
class PostsFetcherThread (QThread ):
|
||||
@@ -138,7 +140,7 @@ class EmptyPopupDialog (QDialog ):
|
||||
SCOPE_CREATORS ="Creators"
|
||||
|
||||
|
||||
def __init__ (self ,app_base_dir ,parent_app_ref ,parent =None ):
|
||||
def __init__ (self ,user_data_path ,parent_app_ref ,parent =None ):
|
||||
super ().__init__ (parent )
|
||||
self.parent_app = parent_app_ref
|
||||
|
||||
@@ -146,13 +148,18 @@ class EmptyPopupDialog (QDialog ):
|
||||
|
||||
self.setMinimumSize(int(400 * scale_factor), int(300 * scale_factor))
|
||||
self.current_scope_mode = self.SCOPE_CREATORS
|
||||
self .app_base_dir =app_base_dir
|
||||
self.user_data_path = user_data_path
|
||||
|
||||
app_icon =get_app_icon_object ()
|
||||
if app_icon and not app_icon .isNull ():
|
||||
self .setWindowIcon (app_icon )
|
||||
|
||||
# --- MODIFIED: Store a list of profiles now ---
|
||||
self.update_profiles_list = None
|
||||
# --- DEPRECATED (kept for compatibility if needed, but new logic won't use them) ---
|
||||
self.update_profile_data = None
|
||||
self.update_creator_name = None
|
||||
|
||||
self .selected_creators_for_queue =[]
|
||||
self .globally_selected_creators ={}
|
||||
self .fetched_posts_data ={}
|
||||
@@ -321,29 +328,34 @@ class EmptyPopupDialog (QDialog ):
|
||||
pass
|
||||
|
||||
def _handle_update_check(self):
|
||||
"""Opens a dialog to select a creator profile and loads it for an update session."""
|
||||
appdata_dir = os.path.join(self.app_base_dir, "appdata")
|
||||
profiles_dir = os.path.join(appdata_dir, "creator_profiles")
|
||||
"""
|
||||
--- MODIFIED FUNCTION ---
|
||||
Opens the new UpdateCheckDialog instead of a QFileDialog.
|
||||
If a profile is selected, it sets the dialog's result properties
|
||||
and accepts the dialog, just like the old file dialog logic did.
|
||||
"""
|
||||
# --- NEW BEHAVIOR ---
|
||||
# Pass the app_base_dir and a reference to the main app (for translations/theme)
|
||||
dialog = UpdateCheckDialog(self.user_data_path, self.parent_app, self)
|
||||
|
||||
if not os.path.isdir(profiles_dir):
|
||||
QMessageBox.warning(self, "Directory Not Found", f"The creator profiles directory does not exist yet.\n\nPath: {profiles_dir}")
|
||||
return
|
||||
|
||||
filepath, _ = QFileDialog.getOpenFileName(self, "Select Creator Profile for Update", profiles_dir, "JSON Files (*.json)")
|
||||
|
||||
if filepath:
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
if 'creator_url' not in data or 'processed_post_ids' not in data:
|
||||
raise ValueError("Invalid profile format.")
|
||||
|
||||
self.update_profile_data = data
|
||||
self.update_creator_name = os.path.basename(filepath).replace('.json', '')
|
||||
self.accept() # Close the dialog and signal success
|
||||
except Exception as e:
|
||||
QMessageBox.critical(self, "Error Loading Profile", f"Could not load or parse the selected profile file:\n\n{e}")
|
||||
if dialog.exec_() == QDialog.Accepted:
|
||||
# --- MODIFIED: Get a list of profiles now ---
|
||||
selected_profiles = dialog.get_selected_profiles()
|
||||
if selected_profiles:
|
||||
try:
|
||||
# --- MODIFIED: Store the list ---
|
||||
self.update_profiles_list = selected_profiles
|
||||
|
||||
# --- Set deprecated single-profile fields for backward compatibility (optional) ---
|
||||
# --- This helps if other parts of the main window still expect one profile ---
|
||||
self.update_profile_data = selected_profiles[0]['data']
|
||||
self.update_creator_name = selected_profiles[0]['name']
|
||||
|
||||
self.accept() # Close EmptyPopupDialog and signal success to main_window
|
||||
except Exception as e:
|
||||
QMessageBox.critical(self, "Error Loading Profile",
|
||||
f"Could not process the selected profile data:\n\n{e}")
|
||||
# --- END OF NEW BEHAVIOR ---
|
||||
|
||||
def _handle_fetch_posts_click (self ):
|
||||
selected_creators =list (self .globally_selected_creators .values ())
|
||||
@@ -981,9 +993,14 @@ class EmptyPopupDialog (QDialog ):
|
||||
def _handle_posts_close_view (self ):
|
||||
self .right_pane_widget .hide ()
|
||||
self .main_splitter .setSizes ([self .width (),0 ])
|
||||
self .posts_list_widget .itemChanged .disconnect (self ._handle_post_item_check_changed )
|
||||
|
||||
# --- MODIFIED: Added check before disconnect ---
|
||||
if hasattr (self ,'_handle_post_item_check_changed'):
|
||||
self .posts_title_list_widget .itemChanged .disconnect (self ._handle_post_item_check_changed )
|
||||
try:
|
||||
self .posts_title_list_widget .itemChanged .disconnect (self ._handle_post_item_check_changed )
|
||||
except TypeError:
|
||||
pass # Already disconnected
|
||||
|
||||
self .posts_search_input .setVisible (False )
|
||||
self .posts_search_input .clear ()
|
||||
self .globally_selected_post_ids .clear ()
|
||||
|
||||
@@ -153,7 +153,7 @@ class SupportDialog(QDialog):
|
||||
|
||||
community_layout.addWidget(self._create_card_button(
|
||||
get_asset_path("github.png"), "GitHub", "Report issues",
|
||||
"https://github.com/Yuvi63771/Kemono-Downloader", "#2E2E2E",
|
||||
"https://github.com/Yuvi9587/Kemono-Downloader", "#2E2E2E",
|
||||
min_height=100, icon_size=36
|
||||
))
|
||||
community_layout.addWidget(self._create_card_button(
|
||||
|
||||
179
src/ui/dialogs/UpdateCheckDialog.py
Normal file
179
src/ui/dialogs/UpdateCheckDialog.py
Normal file
@@ -0,0 +1,179 @@
|
||||
# --- Standard Library Imports ---
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
# --- PyQt5 Imports ---
|
||||
from PyQt5.QtCore import Qt, pyqtSignal
|
||||
from PyQt5.QtWidgets import (
|
||||
QDialog, QVBoxLayout, QHBoxLayout, QListWidget, QListWidgetItem,
|
||||
QPushButton, QMessageBox, QAbstractItemView, QLabel
|
||||
)
|
||||
|
||||
# --- Local Application Imports ---
|
||||
from ...i18n.translator import get_translation
|
||||
from ..main_window import get_app_icon_object
|
||||
from ...utils.resolution import get_dark_theme
|
||||
|
||||
class UpdateCheckDialog(QDialog):
|
||||
"""
|
||||
A dialog that lists all creator .json profiles with checkboxes
|
||||
and allows the user to select multiple to check for updates.
|
||||
"""
|
||||
|
||||
def __init__(self, user_data_path, parent_app_ref, parent=None):
|
||||
super().__init__(parent)
|
||||
self.parent_app = parent_app_ref
|
||||
self.user_data_path = user_data_path
|
||||
self.selected_profiles_list = [] # Will store a list of {'name': ..., 'data': ...}
|
||||
|
||||
self._init_ui()
|
||||
self._load_profiles()
|
||||
self._retranslate_ui()
|
||||
|
||||
# Apply theme from parent
|
||||
if self.parent_app and self.parent_app.current_theme == "dark":
|
||||
scale = getattr(self.parent_app, 'scale_factor', 1)
|
||||
self.setStyleSheet(get_dark_theme(scale))
|
||||
else:
|
||||
self.setStyleSheet("")
|
||||
|
||||
def _init_ui(self):
|
||||
"""Initializes the UI components."""
|
||||
self.setWindowTitle("Check for Updates")
|
||||
self.setMinimumSize(400, 450)
|
||||
|
||||
app_icon = get_app_icon_object()
|
||||
if app_icon and not app_icon.isNull():
|
||||
self.setWindowIcon(app_icon)
|
||||
|
||||
layout = QVBoxLayout(self)
|
||||
|
||||
self.info_label = QLabel("Select creator profiles to check for updates:")
|
||||
layout.addWidget(self.info_label)
|
||||
|
||||
# --- List Widget with Checkboxes ---
|
||||
self.list_widget = QListWidget()
|
||||
# No selection mode, we only care about checkboxes
|
||||
self.list_widget.setSelectionMode(QAbstractItemView.NoSelection)
|
||||
layout.addWidget(self.list_widget)
|
||||
|
||||
# --- All Buttons in One Horizontal Layout ---
|
||||
button_layout = QHBoxLayout()
|
||||
button_layout.setSpacing(6) # small even spacing between all buttons
|
||||
|
||||
self.select_all_button = QPushButton("Select All")
|
||||
self.select_all_button.clicked.connect(self._toggle_all_checkboxes)
|
||||
|
||||
self.deselect_all_button = QPushButton("Deselect All")
|
||||
self.deselect_all_button.clicked.connect(self._toggle_all_checkboxes)
|
||||
|
||||
self.close_button = QPushButton("Close")
|
||||
self.close_button.clicked.connect(self.reject)
|
||||
|
||||
self.check_button = QPushButton("Check Selected")
|
||||
self.check_button.clicked.connect(self.on_check_selected)
|
||||
self.check_button.setDefault(True)
|
||||
|
||||
# Add buttons without a stretch (so no large gap)
|
||||
button_layout.addWidget(self.select_all_button)
|
||||
button_layout.addWidget(self.deselect_all_button)
|
||||
button_layout.addWidget(self.close_button)
|
||||
button_layout.addWidget(self.check_button)
|
||||
|
||||
layout.addLayout(button_layout)
|
||||
|
||||
def _tr(self, key, default_text=""):
|
||||
"""Helper to get translation based on current app language."""
|
||||
if callable(get_translation) and self.parent_app:
|
||||
return get_translation(self.parent_app.current_selected_language, key, default_text)
|
||||
return default_text
|
||||
|
||||
def _retranslate_ui(self):
|
||||
"""Translates the UI elements."""
|
||||
self.setWindowTitle(self._tr("update_check_dialog_title", "Check for Updates"))
|
||||
self.info_label.setText(self._tr("update_check_dialog_info_multiple", "Select creator profiles to check for updates:"))
|
||||
self.select_all_button.setText(self._tr("select_all_button_text", "Select All"))
|
||||
self.deselect_all_button.setText(self._tr("deselect_all_button_text", "Deselect All"))
|
||||
self.check_button.setText(self._tr("update_check_dialog_check_button", "Check Selected"))
|
||||
self.close_button.setText(self._tr("update_check_dialog_close_button", "Close"))
|
||||
|
||||
def _load_profiles(self):
|
||||
"""Loads all .json files from the creator_profiles directory as checkable items."""
|
||||
appdata_dir = self.user_data_path
|
||||
profiles_dir = os.path.join(appdata_dir, "creator_profiles")
|
||||
|
||||
if not os.path.isdir(profiles_dir):
|
||||
QMessageBox.warning(self,
|
||||
self._tr("update_check_dir_not_found_title", "Directory Not Found"),
|
||||
self._tr("update_check_dir_not_found_msg",
|
||||
"The creator profiles directory does not exist yet.\n\nPath: {path}")
|
||||
.format(path=profiles_dir))
|
||||
return
|
||||
|
||||
profiles_found = []
|
||||
for filename in os.listdir(profiles_dir):
|
||||
if filename.endswith(".json"):
|
||||
filepath = os.path.join(profiles_dir, filename)
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Basic validation to ensure it's a valid profile
|
||||
if 'creator_url' in data and 'processed_post_ids' in data:
|
||||
creator_name = os.path.splitext(filename)[0]
|
||||
profiles_found.append({'name': creator_name, 'data': data})
|
||||
else:
|
||||
print(f"Skipping invalid profile: {filename}")
|
||||
except Exception as e:
|
||||
print(f"Failed to load profile {filename}: {e}")
|
||||
|
||||
profiles_found.sort(key=lambda x: x['name'].lower())
|
||||
|
||||
for profile_info in profiles_found:
|
||||
item = QListWidgetItem(profile_info['name'])
|
||||
item.setData(Qt.UserRole, profile_info)
|
||||
# --- Make item checkable ---
|
||||
item.setFlags(item.flags() | Qt.ItemIsUserCheckable)
|
||||
item.setCheckState(Qt.Unchecked)
|
||||
self.list_widget.addItem(item)
|
||||
|
||||
if not profiles_found:
|
||||
self.list_widget.addItem(self._tr("update_check_no_profiles", "No creator profiles found."))
|
||||
self.list_widget.setEnabled(False)
|
||||
self.check_button.setEnabled(False)
|
||||
self.select_all_button.setEnabled(False)
|
||||
self.deselect_all_button.setEnabled(False)
|
||||
|
||||
def _toggle_all_checkboxes(self):
|
||||
"""Handles Select All and Deselect All button clicks."""
|
||||
sender = self.sender()
|
||||
check_state = Qt.Checked if sender == self.select_all_button else Qt.Unchecked
|
||||
|
||||
for i in range(self.list_widget.count()):
|
||||
item = self.list_widget.item(i)
|
||||
if item.flags() & Qt.ItemIsUserCheckable:
|
||||
item.setCheckState(check_state)
|
||||
|
||||
def on_check_selected(self):
|
||||
"""Handles the 'Check Selected' button click."""
|
||||
self.selected_profiles_list = []
|
||||
|
||||
for i in range(self.list_widget.count()):
|
||||
item = self.list_widget.item(i)
|
||||
if item.checkState() == Qt.Checked:
|
||||
profile_info = item.data(Qt.UserRole)
|
||||
if profile_info:
|
||||
self.selected_profiles_list.append(profile_info)
|
||||
|
||||
if not self.selected_profiles_list:
|
||||
QMessageBox.warning(self,
|
||||
self._tr("update_check_no_selection_title", "No Selection"),
|
||||
self._tr("update_check_no_selection_msg", "Please select at least one creator to check."))
|
||||
return
|
||||
|
||||
self.accept()
|
||||
|
||||
def get_selected_profiles(self):
|
||||
"""Returns the list of profile data selected by the user."""
|
||||
return self.selected_profiles_list
|
||||
File diff suppressed because it is too large
Load Diff
@@ -137,6 +137,12 @@ def extract_post_info(url_string):
|
||||
|
||||
stripped_url = url_string.strip()
|
||||
|
||||
# --- Rule34Video Check ---
|
||||
rule34video_match = re.search(r'rule34video\.com/video/(\d+)', stripped_url)
|
||||
if rule34video_match:
|
||||
video_id = rule34video_match.group(1)
|
||||
return 'rule34video', video_id, None
|
||||
|
||||
# --- Danbooru Check ---
|
||||
danbooru_match = re.search(r'danbooru\.donmai\.us|safebooru\.donmai\.us', stripped_url)
|
||||
if danbooru_match:
|
||||
|
||||
@@ -26,6 +26,16 @@ KNOWN_TXT_MATCH_CLEANUP_PATTERNS = [
|
||||
r'\bPreview\b',
|
||||
]
|
||||
|
||||
# --- START NEW CODE ---
|
||||
# Regular expression to detect CJK characters
|
||||
# Covers Hiragana, Katakana, Half/Full width forms, CJK Unified Ideographs, Hangul Syllables, etc.
|
||||
cjk_pattern = re.compile(r'[\u3000-\u303f\u3040-\u309f\u30a0-\u30ff\uff00-\uffef\u4e00-\u9fff\uac00-\ud7af]')
|
||||
|
||||
def contains_cjk(text):
|
||||
"""Checks if the text contains any CJK characters."""
|
||||
return bool(cjk_pattern.search(text))
|
||||
# --- END NEW CODE ---
|
||||
|
||||
# --- Text Matching and Manipulation Utilities ---
|
||||
|
||||
def is_title_match_for_character(post_title, character_name_filter):
|
||||
@@ -42,7 +52,7 @@ def is_title_match_for_character(post_title, character_name_filter):
|
||||
"""
|
||||
if not post_title or not character_name_filter:
|
||||
return False
|
||||
|
||||
|
||||
# Use word boundaries (\b) to match whole words only
|
||||
pattern = r"(?i)\b" + re.escape(str(character_name_filter).strip()) + r"\b"
|
||||
return bool(re.search(pattern, post_title))
|
||||
@@ -62,7 +72,7 @@ def is_filename_match_for_character(filename, character_name_filter):
|
||||
"""
|
||||
if not filename or not character_name_filter:
|
||||
return False
|
||||
|
||||
|
||||
return str(character_name_filter).strip().lower() in filename.lower()
|
||||
|
||||
|
||||
@@ -101,16 +111,16 @@ def extract_folder_name_from_title(title, unwanted_keywords):
|
||||
"""
|
||||
if not title:
|
||||
return 'Uncategorized'
|
||||
|
||||
|
||||
title_lower = title.lower()
|
||||
# Find all whole words in the title
|
||||
tokens = re.findall(r'\b[\w\-]+\b', title_lower)
|
||||
|
||||
|
||||
for token in tokens:
|
||||
clean_token = clean_folder_name(token)
|
||||
if clean_token and clean_token.lower() not in unwanted_keywords:
|
||||
return clean_token
|
||||
|
||||
|
||||
# Fallback to cleaning the full title if no single significant word is found
|
||||
cleaned_full_title = clean_folder_name(title)
|
||||
return cleaned_full_title if cleaned_full_title else 'Uncategorized'
|
||||
@@ -120,6 +130,7 @@ def match_folders_from_title(title, names_to_match, unwanted_keywords):
|
||||
"""
|
||||
Matches folder names from a title based on a list of known name objects.
|
||||
Each name object is a dict: {'name': 'PrimaryName', 'aliases': ['alias1', ...]}
|
||||
MODIFIED: Uses substring matching for CJK aliases, word boundary for others.
|
||||
|
||||
Args:
|
||||
title (str): The post title to check.
|
||||
@@ -137,10 +148,11 @@ def match_folders_from_title(title, names_to_match, unwanted_keywords):
|
||||
for pat_str in KNOWN_TXT_MATCH_CLEANUP_PATTERNS:
|
||||
cleaned_title = re.sub(pat_str, ' ', cleaned_title, flags=re.IGNORECASE)
|
||||
cleaned_title = re.sub(r'\s+', ' ', cleaned_title).strip()
|
||||
# Store both original case cleaned title and lower case for different matching
|
||||
title_lower = cleaned_title.lower()
|
||||
|
||||
matched_cleaned_names = set()
|
||||
|
||||
|
||||
# Sort by name length descending to match longer names first (e.g., "Cloud Strife" before "Cloud")
|
||||
sorted_name_objects = sorted(names_to_match, key=lambda x: len(x.get("name", "")), reverse=True)
|
||||
|
||||
@@ -149,19 +161,43 @@ def match_folders_from_title(title, names_to_match, unwanted_keywords):
|
||||
aliases = name_obj.get("aliases", [])
|
||||
if not primary_folder_name or not aliases:
|
||||
continue
|
||||
|
||||
|
||||
# <<< START MODIFICATION >>>
|
||||
cleaned_primary_name = clean_folder_name(primary_folder_name)
|
||||
if not cleaned_primary_name or cleaned_primary_name.lower() in unwanted_keywords:
|
||||
continue # Skip this entry entirely if its primary name is unwanted or empty
|
||||
|
||||
match_found_for_this_object = False
|
||||
for alias in aliases:
|
||||
if not alias: continue
|
||||
alias_lower = alias.lower()
|
||||
if not alias_lower: continue
|
||||
|
||||
# Use word boundaries for accurate matching
|
||||
pattern = r'\b' + re.escape(alias_lower) + r'\b'
|
||||
if re.search(pattern, title_lower):
|
||||
cleaned_primary_name = clean_folder_name(primary_folder_name)
|
||||
if cleaned_primary_name.lower() not in unwanted_keywords:
|
||||
|
||||
# Check if the alias contains CJK characters
|
||||
if contains_cjk(alias):
|
||||
# Use simple substring matching for CJK
|
||||
if alias_lower in title_lower:
|
||||
matched_cleaned_names.add(cleaned_primary_name)
|
||||
break # Move to the next name object once a match is found for this one
|
||||
|
||||
match_found_for_this_object = True
|
||||
break # Move to the next name object
|
||||
else:
|
||||
# Use original word boundary matching for non-CJK
|
||||
try:
|
||||
# Compile pattern for efficiency if used repeatedly, though here it changes each loop
|
||||
pattern = r'\b' + re.escape(alias_lower) + r'\b'
|
||||
if re.search(pattern, title_lower):
|
||||
matched_cleaned_names.add(cleaned_primary_name)
|
||||
match_found_for_this_object = True
|
||||
break # Move to the next name object
|
||||
except re.error as e:
|
||||
# Log error if the alias creates an invalid regex (unlikely with escape)
|
||||
print(f"Regex error for alias '{alias}': {e}") # Or use proper logging
|
||||
continue
|
||||
|
||||
# This outer break logic remains the same (though slightly redundant with inner breaks)
|
||||
if match_found_for_this_object:
|
||||
pass # Already added and broke inner loop
|
||||
# <<< END MODIFICATION >>>
|
||||
|
||||
return sorted(list(matched_cleaned_names))
|
||||
|
||||
|
||||
@@ -169,6 +205,8 @@ def match_folders_from_filename_enhanced(filename, names_to_match, unwanted_keyw
|
||||
"""
|
||||
Matches folder names from a filename, prioritizing longer and more specific aliases.
|
||||
It returns immediately after finding the first (longest) match.
|
||||
MODIFIED: Prioritizes boundary-aware matches for Latin characters,
|
||||
falls back to substring search for CJK compatibility.
|
||||
|
||||
Args:
|
||||
filename (str): The filename to check.
|
||||
@@ -188,23 +226,49 @@ def match_folders_from_filename_enhanced(filename, names_to_match, unwanted_keyw
|
||||
for name_obj in names_to_match:
|
||||
primary_name = name_obj.get("name")
|
||||
if not primary_name: continue
|
||||
|
||||
|
||||
cleaned_primary_name = clean_folder_name(primary_name)
|
||||
if not cleaned_primary_name or cleaned_primary_name.lower() in unwanted_keywords:
|
||||
continue
|
||||
|
||||
for alias in name_obj.get("aliases", []):
|
||||
if alias.lower():
|
||||
alias_map_to_primary.append((alias.lower(), cleaned_primary_name))
|
||||
|
||||
if alias: # Check if alias is not None and not an empty string
|
||||
alias_lower_val = alias.lower()
|
||||
if alias_lower_val: # Check again after lowercasing
|
||||
alias_map_to_primary.append((alias_lower_val, cleaned_primary_name))
|
||||
|
||||
# Sort by alias length, descending, to match longer aliases first
|
||||
alias_map_to_primary.sort(key=lambda x: len(x[0]), reverse=True)
|
||||
|
||||
# <<< MODIFICATION: Return the FIRST match found, which will be the longest >>>
|
||||
# Return the FIRST match found, which will be the longest
|
||||
for alias_lower, primary_name_for_alias in alias_map_to_primary:
|
||||
if alias_lower in filename_lower:
|
||||
# Found the longest possible alias that is a substring. Return immediately.
|
||||
return [primary_name_for_alias]
|
||||
try:
|
||||
# 1. Attempt boundary-aware match first (good for English/Latin)
|
||||
# Matches alias if it's at the start/end or surrounded by common separators
|
||||
# We use word boundaries (\b) and also check for common non-word separators like +_-
|
||||
pattern = r'(?:^|[\s_+-])' + re.escape(alias_lower) + r'(?:[\s_+-]|$)'
|
||||
|
||||
if re.search(pattern, filename_lower):
|
||||
# Found a precise, boundary-aware match. This is the best case.
|
||||
return [primary_name_for_alias]
|
||||
|
||||
# 2. Fallback: Simple substring check (for CJK or other cases)
|
||||
# This executes ONLY if the boundary match above failed.
|
||||
# We check if the alias contains CJK OR if the filename does.
|
||||
# This avoids applying the simple 'in' check for Latin-only aliases in Latin-only filenames.
|
||||
elif (contains_cjk(alias_lower) or contains_cjk(filename_lower)) and alias_lower in filename_lower:
|
||||
# This is the fallback for CJK compatibility.
|
||||
return [primary_name_for_alias]
|
||||
|
||||
# If alias is "ul" and filename is "sin+título":
|
||||
# 1. re.search(r'(?:^|[\s_+-])ul(?:[\s_+-]|$)', "sin+título") -> Fails (good)
|
||||
# 2. contains_cjk("ul") -> False
|
||||
# 3. contains_cjk("sin+título") -> False
|
||||
# 4. No match is found for "ul". (correct)
|
||||
|
||||
except re.error as e:
|
||||
print(f"Regex error matching alias '{alias_lower}' in filename '{filename_lower}': {e}")
|
||||
continue # Skip this alias if regex fails
|
||||
|
||||
# If the loop finishes without any matches, return an empty list.
|
||||
return []
|
||||
return []
|
||||
111
structure.txt
Normal file
111
structure.txt
Normal file
@@ -0,0 +1,111 @@
|
||||
├── assets/
|
||||
│ ├── Kemono.ico
|
||||
│ ├── Kemono.png
|
||||
│ ├── Ko-fi.png
|
||||
│ ├── buymeacoffee.png
|
||||
│ ├── discord.png
|
||||
│ ├── github.png
|
||||
│ ├── instagram.png
|
||||
│ └── patreon.png
|
||||
├── data/
|
||||
│ ├── creators.json
|
||||
│ └── dejavu-sans/
|
||||
│ ├── DejaVu Fonts License.txt
|
||||
│ ├── DejaVuSans-Bold.ttf
|
||||
│ ├── DejaVuSans-BoldOblique.ttf
|
||||
│ ├── DejaVuSans-ExtraLight.ttf
|
||||
│ ├── DejaVuSans-Oblique.ttf
|
||||
│ ├── DejaVuSans.ttf
|
||||
│ ├── DejaVuSansCondensed-Bold.ttf
|
||||
│ ├── DejaVuSansCondensed-BoldOblique.ttf
|
||||
│ ├── DejaVuSansCondensed-Oblique.ttf
|
||||
│ └── DejaVuSansCondensed.ttf
|
||||
├── directory_tree.txt
|
||||
├── main.py
|
||||
├── src/
|
||||
│ ├── __init__.py
|
||||
│ ├── config/
|
||||
│ │ ├── __init__.py
|
||||
│ │ └── constants.py
|
||||
│ ├── core/
|
||||
│ │ ├── Hentai2read_client.py
|
||||
│ │ ├── __init__.py
|
||||
│ │ ├── allcomic_client.py
|
||||
│ │ ├── api_client.py
|
||||
│ │ ├── booru_client.py
|
||||
│ │ ├── bunkr_client.py
|
||||
│ │ ├── discord_client.py
|
||||
│ │ ├── erome_client.py
|
||||
│ │ ├── fap_nation_client.py
|
||||
│ │ ├── manager.py
|
||||
│ │ ├── mangadex_client.py
|
||||
│ │ ├── nhentai_client.py
|
||||
│ │ ├── pixeldrain_client.py
|
||||
│ │ ├── rule34video_client.py
|
||||
│ │ ├── saint2_client.py
|
||||
│ │ ├── simpcity_client.py
|
||||
│ │ ├── toonily_client.py
|
||||
│ │ └── workers.py
|
||||
│ ├── i18n/
|
||||
│ │ ├── __init__.py
|
||||
│ │ └── translator.py
|
||||
│ ├── services/
|
||||
│ │ ├── __init__.py
|
||||
│ │ ├── drive_downloader.py
|
||||
│ │ ├── multipart_downloader.py
|
||||
│ │ └── updater.py
|
||||
│ ├── ui/
|
||||
│ │ ├── __init__.py
|
||||
│ │ ├── assets.py
|
||||
│ │ ├── classes/
|
||||
│ │ │ ├── allcomic_downloader_thread.py
|
||||
│ │ │ ├── booru_downloader_thread.py
|
||||
│ │ │ ├── bunkr_downloader_thread.py
|
||||
│ │ │ ├── discord_downloader_thread.py
|
||||
│ │ │ ├── downloader_factory.py
|
||||
│ │ │ ├── drive_downloader_thread.py
|
||||
│ │ │ ├── erome_downloader_thread.py
|
||||
│ │ │ ├── external_link_downloader_thread.py
|
||||
│ │ │ ├── fap_nation_downloader_thread.py
|
||||
│ │ │ ├── hentai2read_downloader_thread.py
|
||||
│ │ │ ├── kemono_discord_downloader_thread.py
|
||||
│ │ │ ├── mangadex_downloader_thread.py
|
||||
│ │ │ ├── nhentai_downloader_thread.py
|
||||
│ │ │ ├── pixeldrain_downloader_thread.py
|
||||
│ │ │ ├── rule34video_downloader_thread.py
|
||||
│ │ │ ├── saint2_downloader_thread.py
|
||||
│ │ │ ├── simp_city_downloader_thread.py
|
||||
│ │ │ └── toonily_downloader_thread.py
|
||||
│ │ ├── dialogs/
|
||||
│ │ │ ├── ConfirmAddAllDialog.py
|
||||
│ │ │ ├── CookieHelpDialog.py
|
||||
│ │ │ ├── CustomFilenameDialog.py
|
||||
│ │ │ ├── DownloadExtractedLinksDialog.py
|
||||
│ │ │ ├── DownloadHistoryDialog.py
|
||||
│ │ │ ├── EmptyPopupDialog.py
|
||||
│ │ │ ├── ErrorFilesDialog.py
|
||||
│ │ │ ├── ExportLinksDialog.py
|
||||
│ │ │ ├── ExportOptionsDialog.py
|
||||
│ │ │ ├── FavoriteArtistsDialog.py
|
||||
│ │ │ ├── FavoritePostsDialog.py
|
||||
│ │ │ ├── FutureSettingsDialog.py
|
||||
│ │ │ ├── HelpGuideDialog.py
|
||||
│ │ │ ├── KeepDuplicatesDialog.py
|
||||
│ │ │ ├── KnownNamesFilterDialog.py
|
||||
│ │ │ ├── MoreOptionsDialog.py
|
||||
│ │ │ ├── MultipartScopeDialog.py
|
||||
│ │ │ ├── SinglePDF.py
|
||||
│ │ │ ├── SupportDialog.py
|
||||
│ │ │ ├── TourDialog.py
|
||||
│ │ │ ├── __init__.py
|
||||
│ │ │ └── discord_pdf_generator.py
|
||||
│ │ └── main_window.py
|
||||
│ └── utils/
|
||||
│ ├── __init__.py
|
||||
│ ├── command.py
|
||||
│ ├── file_utils.py
|
||||
│ ├── network_utils.py
|
||||
│ ├── resolution.py
|
||||
│ └── text_utils.py
|
||||
├── structure.txt
|
||||
└── yt-dlp.exe
|
||||
Reference in New Issue
Block a user