Kemono-Downloader/src/core/mangadex_client.py
2025-10-08 17:02:46 +05:30

189 lines
8.5 KiB
Python

# src/core/mangadex_client.py
import os
import re
import time
import cloudscraper
from collections import defaultdict
from ..utils.file_utils import clean_folder_name
def fetch_mangadex_data(start_url, output_dir, logger_func, file_progress_callback, overall_progress_callback, pause_event, cancellation_event):
"""
Fetches and downloads all content from a MangaDex series or chapter URL.
Returns a tuple of (downloaded_count, skipped_count).
"""
grand_total_dl = 0
grand_total_skip = 0
api = _MangadexAPI(logger_func)
def _check_pause():
if cancellation_event and cancellation_event.is_set(): return True
if pause_event and pause_event.is_set():
logger_func(" Download paused...")
while pause_event.is_set():
if cancellation_event and cancellation_event.is_set(): return True
time.sleep(0.5)
logger_func(" Download resumed.")
return cancellation_event.is_set()
series_match = re.search(r"mangadex\.org/(?:title|manga)/([0-9a-f-]+)", start_url)
chapter_match = re.search(r"mangadex\.org/chapter/([0-9a-f-]+)", start_url)
chapters_to_process = []
if series_match:
series_id = series_match.group(1)
logger_func(f" Series detected. Fetching chapter list for ID: {series_id}")
chapters_to_process = api.get_manga_chapters(series_id, cancellation_event, pause_event)
elif chapter_match:
chapter_id = chapter_match.group(1)
logger_func(f" Single chapter detected. Fetching info for ID: {chapter_id}")
chapter_info = api.get_chapter_info(chapter_id)
if chapter_info:
chapters_to_process = [chapter_info]
if not chapters_to_process:
logger_func("❌ No chapters found or failed to fetch chapter info.")
return 0, 0
logger_func(f"✅ Found {len(chapters_to_process)} chapter(s) to download.")
if overall_progress_callback:
overall_progress_callback.emit(len(chapters_to_process), 0)
for chap_idx, chapter_json in enumerate(chapters_to_process):
if _check_pause(): break
try:
metadata = api.transform_chapter_data(chapter_json)
logger_func("-" * 40)
logger_func(f"Processing Chapter {chap_idx + 1}/{len(chapters_to_process)}: Vol. {metadata['volume']} Ch. {metadata['chapter']}{metadata['chapter_minor']} - {metadata['title']}")
server_info = api.get_at_home_server(chapter_json["id"])
if not server_info:
logger_func(" ❌ Could not get image server for this chapter. Skipping.")
continue
base_url = f"{server_info['baseUrl']}/data/{server_info['chapter']['hash']}/"
image_files = server_info['chapter']['data']
series_folder = clean_folder_name(metadata['manga'])
chapter_folder_title = metadata['title'] or ''
chapter_folder = clean_folder_name(f"Vol {metadata['volume']:02d} Chap {metadata['chapter']:03d}{metadata['chapter_minor']} - {chapter_folder_title}".strip().strip('-').strip())
final_save_path = os.path.join(output_dir, series_folder, chapter_folder)
os.makedirs(final_save_path, exist_ok=True)
for img_idx, filename in enumerate(image_files):
if _check_pause(): break
full_img_url = base_url + filename
img_path = os.path.join(final_save_path, f"{img_idx + 1:03d}{os.path.splitext(filename)[1]}")
if os.path.exists(img_path):
logger_func(f" -> Skip ({img_idx+1}/{len(image_files)}): '{os.path.basename(img_path)}' already exists.")
grand_total_skip += 1
continue
logger_func(f" Downloading ({img_idx+1}/{len(image_files)}): '{os.path.basename(img_path)}'...")
try:
response = api.session.get(full_img_url, stream=True, timeout=60, headers={'Referer': 'https://mangadex.org/'})
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
if file_progress_callback:
file_progress_callback.emit(os.path.basename(img_path), (0, total_size))
with open(img_path, 'wb') as f:
downloaded_bytes = 0
for chunk in response.iter_content(chunk_size=8192):
if _check_pause(): break
f.write(chunk)
downloaded_bytes += len(chunk)
if file_progress_callback:
file_progress_callback.emit(os.path.basename(img_path), (downloaded_bytes, total_size))
if _check_pause():
if os.path.exists(img_path): os.remove(img_path)
break
grand_total_dl += 1
except Exception as e:
logger_func(f" ❌ Failed to download page {img_idx+1}: {e}")
grand_total_skip += 1
if overall_progress_callback:
overall_progress_callback.emit(len(chapters_to_process), chap_idx + 1)
time.sleep(1)
except Exception as e:
logger_func(f" ❌ An unexpected error occurred while processing chapter {chapter_json.get('id')}: {e}")
return grand_total_dl, grand_total_skip
class _MangadexAPI:
def __init__(self, logger_func):
self.logger_func = logger_func
self.session = cloudscraper.create_scraper()
self.root = "https://api.mangadex.org"
def _call(self, endpoint, params=None, cancellation_event=None):
if cancellation_event and cancellation_event.is_set(): return None
try:
response = self.session.get(f"{self.root}{endpoint}", params=params, timeout=30)
if response.status_code == 429:
retry_after = int(response.headers.get("X-RateLimit-Retry-After", 5))
self.logger_func(f" ⚠️ Rate limited. Waiting for {retry_after} seconds...")
time.sleep(retry_after)
return self._call(endpoint, params, cancellation_event)
response.raise_for_status()
return response.json()
except Exception as e:
self.logger_func(f" ❌ API call to '{endpoint}' failed: {e}")
return None
def get_manga_chapters(self, series_id, cancellation_event, pause_event):
all_chapters = []
offset = 0
limit = 500
base_params = {
"limit": limit, "order[volume]": "asc", "order[chapter]": "asc",
"translatedLanguage[]": ["en"], "includes[]": ["scanlation_group", "user", "manga"]
}
while True:
if cancellation_event.is_set(): break
while pause_event.is_set(): time.sleep(0.5)
params = {**base_params, "offset": offset}
data = self._call(f"/manga/{series_id}/feed", params, cancellation_event)
if not data or data.get("result") != "ok": break
results = data.get("data", [])
all_chapters.extend(results)
if (offset + limit) >= data.get("total", 0): break
offset += limit
return all_chapters
def get_chapter_info(self, chapter_id):
params = {"includes[]": ["scanlation_group", "user", "manga"]}
data = self._call(f"/chapter/{chapter_id}", params)
return data.get("data") if data and data.get("result") == "ok" else None
def get_at_home_server(self, chapter_id):
return self._call(f"/at-home/server/{chapter_id}")
def transform_chapter_data(self, chapter):
relationships = {item["type"]: item for item in chapter.get("relationships", [])}
manga = relationships.get("manga", {})
c_attrs = chapter.get("attributes", {})
m_attrs = manga.get("attributes", {})
chapter_num_str = c_attrs.get("chapter", "0") or "0"
chnum, sep, minor = chapter_num_str.partition(".")
return {
"manga": (m_attrs.get("title", {}).get("en") or next(iter(m_attrs.get("title", {}).values()), "Unknown Series")),
"title": c_attrs.get("title", ""),
"volume": int(float(c_attrs.get("volume", 0) or 0)),
"chapter": int(float(chnum or 0)),
"chapter_minor": sep + minor if minor else ""
}