2025-10-08 17:02:46 +05:30
|
|
|
|
import requests
|
|
|
|
|
|
import re
|
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
import time
|
2025-10-18 16:03:34 +05:30
|
|
|
|
import random
|
2025-10-08 17:02:46 +05:30
|
|
|
|
from urllib.parse import urlparse
|
|
|
|
|
|
|
2025-10-18 16:03:34 +05:30
|
|
|
|
def get_chapter_list(scraper, series_url, logger_func):
|
2025-10-08 17:02:46 +05:30
|
|
|
|
"""
|
|
|
|
|
|
Checks if a URL is a series page and returns a list of all chapter URLs if it is.
|
2025-10-18 16:03:34 +05:30
|
|
|
|
Relies on a passed-in scraper session for connection.
|
2025-10-08 17:02:46 +05:30
|
|
|
|
"""
|
|
|
|
|
|
logger_func(f" [AllComic] Checking for chapter list at: {series_url}")
|
|
|
|
|
|
|
2025-10-18 16:03:34 +05:30
|
|
|
|
headers = {'Referer': 'https://allporncomic.com/'}
|
2025-10-08 17:02:46 +05:30
|
|
|
|
response = None
|
|
|
|
|
|
max_retries = 8
|
|
|
|
|
|
|
|
|
|
|
|
for attempt in range(max_retries):
|
|
|
|
|
|
try:
|
2025-10-18 16:03:34 +05:30
|
|
|
|
response = scraper.get(series_url, headers=headers, timeout=30)
|
2025-10-08 17:02:46 +05:30
|
|
|
|
response.raise_for_status()
|
|
|
|
|
|
logger_func(f" [AllComic] Successfully connected to series page on attempt {attempt + 1}.")
|
2025-10-18 16:03:34 +05:30
|
|
|
|
break
|
2025-10-08 17:02:46 +05:30
|
|
|
|
except requests.RequestException as e:
|
|
|
|
|
|
logger_func(f" [AllComic] ⚠️ Series page check attempt {attempt + 1}/{max_retries} failed: {e}")
|
|
|
|
|
|
if attempt < max_retries - 1:
|
2025-10-18 16:03:34 +05:30
|
|
|
|
wait_time = (2 ** attempt) + random.uniform(0, 2)
|
|
|
|
|
|
logger_func(f" Retrying in {wait_time:.1f} seconds...")
|
2025-10-08 17:02:46 +05:30
|
|
|
|
time.sleep(wait_time)
|
|
|
|
|
|
else:
|
|
|
|
|
|
logger_func(f" [AllComic] ❌ All attempts to check series page failed.")
|
2025-10-18 16:03:34 +05:30
|
|
|
|
return []
|
2025-10-08 17:02:46 +05:30
|
|
|
|
|
|
|
|
|
|
if not response:
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
|
|
chapter_links = soup.select('li.wp-manga-chapter a')
|
|
|
|
|
|
|
|
|
|
|
|
if not chapter_links:
|
|
|
|
|
|
logger_func(" [AllComic] ℹ️ No chapter list found. Assuming this is a single chapter page.")
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
chapter_urls = [link['href'] for link in chapter_links]
|
2025-10-18 16:03:34 +05:30
|
|
|
|
chapter_urls.reverse()
|
2025-10-08 17:02:46 +05:30
|
|
|
|
|
|
|
|
|
|
logger_func(f" [AllComic] ✅ Found {len(chapter_urls)} chapters.")
|
|
|
|
|
|
return chapter_urls
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger_func(f" [AllComic] ❌ Error parsing chapters after successful connection: {e}")
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
2025-10-18 16:03:34 +05:30
|
|
|
|
def fetch_chapter_data(scraper, chapter_url, logger_func):
|
2025-10-08 17:02:46 +05:30
|
|
|
|
"""
|
|
|
|
|
|
Fetches the comic title, chapter title, and image URLs for a single chapter page.
|
2025-10-18 16:03:34 +05:30
|
|
|
|
Relies on a passed-in scraper session for connection.
|
2025-10-08 17:02:46 +05:30
|
|
|
|
"""
|
|
|
|
|
|
logger_func(f" [AllComic] Fetching page: {chapter_url}")
|
|
|
|
|
|
|
|
|
|
|
|
headers = {'Referer': 'https://allporncomic.com/'}
|
|
|
|
|
|
|
|
|
|
|
|
response = None
|
|
|
|
|
|
max_retries = 8
|
|
|
|
|
|
for attempt in range(max_retries):
|
|
|
|
|
|
try:
|
|
|
|
|
|
response = scraper.get(chapter_url, headers=headers, timeout=30)
|
|
|
|
|
|
response.raise_for_status()
|
|
|
|
|
|
break
|
|
|
|
|
|
except requests.RequestException as e:
|
2025-10-18 16:03:34 +05:30
|
|
|
|
logger_func(f" [AllComic] ⚠️ Chapter page connection attempt {attempt + 1}/{max_retries} failed: {e}")
|
2025-10-08 17:02:46 +05:30
|
|
|
|
if attempt < max_retries - 1:
|
2025-10-18 16:03:34 +05:30
|
|
|
|
wait_time = (2 ** attempt) + random.uniform(0, 2)
|
|
|
|
|
|
logger_func(f" Retrying in {wait_time:.1f} seconds...")
|
|
|
|
|
|
time.sleep(wait_time)
|
2025-10-08 17:02:46 +05:30
|
|
|
|
else:
|
|
|
|
|
|
logger_func(f" [AllComic] ❌ All connection attempts failed for chapter: {chapter_url}")
|
|
|
|
|
|
return None, None, None
|
|
|
|
|
|
|
2025-10-18 16:03:34 +05:30
|
|
|
|
if not response:
|
|
|
|
|
|
return None, None, None
|
|
|
|
|
|
|
2025-10-08 17:02:46 +05:30
|
|
|
|
try:
|
|
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
2025-10-18 16:03:34 +05:30
|
|
|
|
|
|
|
|
|
|
comic_title = "Unknown Comic"
|
2025-10-08 17:02:46 +05:30
|
|
|
|
title_element = soup.find('h1', class_='post-title')
|
|
|
|
|
|
if title_element:
|
|
|
|
|
|
comic_title = title_element.text.strip()
|
|
|
|
|
|
else:
|
|
|
|
|
|
try:
|
|
|
|
|
|
path_parts = urlparse(chapter_url).path.strip('/').split('/')
|
|
|
|
|
|
if len(path_parts) >= 3 and path_parts[-3] == 'porncomic':
|
|
|
|
|
|
comic_slug = path_parts[-2]
|
|
|
|
|
|
comic_title = comic_slug.replace('-', ' ').title()
|
|
|
|
|
|
except Exception:
|
2025-10-18 16:03:34 +05:30
|
|
|
|
pass
|
2025-10-08 17:02:46 +05:30
|
|
|
|
|
|
|
|
|
|
chapter_slug = chapter_url.strip('/').split('/')[-1]
|
|
|
|
|
|
chapter_title = chapter_slug.replace('-', ' ').title()
|
|
|
|
|
|
|
|
|
|
|
|
reading_container = soup.find('div', class_='reading-content')
|
|
|
|
|
|
list_of_image_urls = []
|
|
|
|
|
|
if reading_container:
|
|
|
|
|
|
image_elements = reading_container.find_all('img', class_='wp-manga-chapter-img')
|
|
|
|
|
|
for img in image_elements:
|
|
|
|
|
|
img_url = (img.get('data-src') or img.get('src', '')).strip()
|
|
|
|
|
|
if img_url:
|
|
|
|
|
|
list_of_image_urls.append(img_url)
|
|
|
|
|
|
|
2025-10-18 16:03:34 +05:30
|
|
|
|
if not list_of_image_urls:
|
|
|
|
|
|
logger_func(f" [AllComic] ❌ Could not find any images on the page.")
|
2025-10-08 17:02:46 +05:30
|
|
|
|
return None, None, None
|
|
|
|
|
|
|
|
|
|
|
|
return comic_title, chapter_title, list_of_image_urls
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger_func(f" [AllComic] ❌ An unexpected error occurred while parsing the page: {e}")
|
|
|
|
|
|
return None, None, None
|