Files
Kemono-Downloader/src/core/allcomic_client.py
Yuvi9587 77bd428b91 Commit
2025-12-25 21:56:04 +05:30

132 lines
5.1 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import requests
import re
from bs4 import BeautifulSoup
import time
import random
from urllib.parse import urlparse
# 1. Update arguments to accept proxies=None
def get_chapter_list(scraper, series_url, logger_func, proxies=None):
"""
Checks if a URL is a series page and returns a list of all chapter URLs if it is.
Relies on a passed-in scraper session for connection.
"""
logger_func(f" [AllComic] Checking for chapter list at: {series_url}")
headers = {'Referer': 'https://allporncomic.com/'}
response = None
max_retries = 8
# 2. Define smart timeout logic
req_timeout = (30, 120) if proxies else 30
for attempt in range(max_retries):
try:
# 3. Add proxies, verify=False, and the new timeout
response = scraper.get(series_url, headers=headers, timeout=req_timeout, proxies=proxies, verify=False)
response.raise_for_status()
logger_func(f" [AllComic] Successfully connected to series page on attempt {attempt + 1}.")
break
except requests.RequestException as e:
logger_func(f" [AllComic] ⚠️ Series page check attempt {attempt + 1}/{max_retries} failed: {e}")
if attempt < max_retries - 1:
wait_time = (2 ** attempt) + random.uniform(0, 2)
logger_func(f" Retrying in {wait_time:.1f} seconds...")
time.sleep(wait_time)
else:
logger_func(f" [AllComic] ❌ All attempts to check series page failed.")
return []
if not response:
return []
try:
soup = BeautifulSoup(response.text, 'html.parser')
chapter_links = soup.select('li.wp-manga-chapter a')
if not chapter_links:
logger_func(" [AllComic] No chapter list found. Assuming this is a single chapter page.")
return []
chapter_urls = [link['href'] for link in chapter_links]
chapter_urls.reverse()
logger_func(f" [AllComic] ✅ Found {len(chapter_urls)} chapters.")
return chapter_urls
except Exception as e:
logger_func(f" [AllComic] ❌ Error parsing chapters after successful connection: {e}")
return []
# 4. Update arguments here too
def fetch_chapter_data(scraper, chapter_url, logger_func, proxies=None):
"""
Fetches the comic title, chapter title, and image URLs for a single chapter page.
Relies on a passed-in scraper session for connection.
"""
logger_func(f" [AllComic] Fetching page: {chapter_url}")
headers = {'Referer': 'https://allporncomic.com/'}
response = None
max_retries = 8
# 5. Define smart timeout logic again
req_timeout = (30, 120) if proxies else 30
for attempt in range(max_retries):
try:
# 6. Add proxies, verify=False, and timeout
response = scraper.get(chapter_url, headers=headers, timeout=req_timeout, proxies=proxies, verify=False)
response.raise_for_status()
break
except requests.RequestException as e:
logger_func(f" [AllComic] ⚠️ Chapter page connection attempt {attempt + 1}/{max_retries} failed: {e}")
if attempt < max_retries - 1:
wait_time = (2 ** attempt) + random.uniform(0, 2)
logger_func(f" Retrying in {wait_time:.1f} seconds...")
time.sleep(wait_time)
else:
logger_func(f" [AllComic] ❌ All connection attempts failed for chapter: {chapter_url}")
return None, None, None
if not response:
return None, None, None
try:
soup = BeautifulSoup(response.text, 'html.parser')
comic_title = "Unknown Comic"
title_element = soup.find('h1', class_='post-title')
if title_element:
comic_title = title_element.text.strip()
else:
try:
path_parts = urlparse(chapter_url).path.strip('/').split('/')
if len(path_parts) >= 3 and path_parts[-3] == 'porncomic':
comic_slug = path_parts[-2]
comic_title = comic_slug.replace('-', ' ').title()
except Exception:
pass
chapter_slug = chapter_url.strip('/').split('/')[-1]
chapter_title = chapter_slug.replace('-', ' ').title()
reading_container = soup.find('div', class_='reading-content')
list_of_image_urls = []
if reading_container:
image_elements = reading_container.find_all('img', class_='wp-manga-chapter-img')
for img in image_elements:
img_url = (img.get('data-src') or img.get('src', '')).strip()
if img_url:
list_of_image_urls.append(img_url)
if not list_of_image_urls:
logger_func(f" [AllComic] ❌ Could not find any images on the page.")
return None, None, None
return comic_title, chapter_title, list_of_image_urls
except Exception as e:
logger_func(f" [AllComic] ❌ An unexpected error occurred while parsing the page: {e}")
return None, None, None