Kemono-Downloader/src/core/toonily_client.py

import cloudscraper
from bs4 import BeautifulSoup
import time

def get_chapter_list(series_url, logger_func):
    logger_func(f"   [Toonily] Scraping series page for chapter list: {series_url}")
    scraper = cloudscraper.create_scraper()
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
        'Referer': 'https://toonily.com/'
    }

    try:
        response = scraper.get(series_url, timeout=30, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        chapter_links = soup.select('li.wp-manga-chapter > a')

        if not chapter_links:
            logger_func("   [Toonily] ❌ Could not find any chapter links on the page.")
            return []

        urls = [link['href'] for link in chapter_links]
        urls.reverse()
        logger_func(f"   [Toonily] Found {len(urls)} chapters.")
        return urls

    except Exception as e:
        logger_func(f"   [Toonily] ❌ Error getting chapter list: {e}")
        return []


def fetch_chapter_data(chapter_url, logger_func, scraper_session):
    """
    Scrapes a single Toonily.com chapter page for its title and image URLs.
    """
    main_series_url = chapter_url.rsplit('/', 2)[0] + '/'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Language': 'en-US,en;q=0.9',
        'Referer': main_series_url
    }

    try:
        response = scraper_session.get(chapter_url, timeout=30, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')

        title_element = soup.select_one('h1#chapter-heading')
        image_container = soup.select_one('div.reading-content')

        if not title_element or not image_container:
            logger_func("   [Toonily] ❌ Page structure invalid. Could not find title or image container.")
            return None, None, []

        full_chapter_title = title_element.text.strip()

        if " - Chapter" in full_chapter_title:
            series_title = full_chapter_title.split(" - Chapter")[0].strip()
        else:
            series_title = full_chapter_title.strip()

        chapter_title = full_chapter_title # The full string is best for the chapter folder name

        image_elements = image_container.select('img')
        image_urls = [img.get('data-src', img.get('src')).strip() for img in image_elements if img.get('data-src') or img.get('src')]

        return series_title, chapter_title, image_urls

    except Exception as e:
        logger_func(f"   [Toonily] ❌ An error occurred scraping chapter '{chapter_url}': {e}")
        return None, None, []