Files
Kemono-Downloader/src/core/fap_nation_client.py
Yuvi63771 cef4211d7b Commit
2025-10-20 13:37:27 +05:30

138 lines
7.0 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
import os
import cloudscraper
from urllib.parse import urlparse, urljoin
from ..utils.file_utils import clean_folder_name
def fetch_fap_nation_data(album_url, logger_func):
"""
Scrapes a fap-nation page by prioritizing HLS streams first, then falling
back to direct download links. Selects the highest quality available.
"""
logger_func(f" [Fap-Nation] Fetching album data from: {album_url}")
scraper = cloudscraper.create_scraper()
try:
response = scraper.get(album_url, timeout=45)
response.raise_for_status()
html_content = response.text
title_match = re.search(r'<h1[^>]*itemprop="name"[^>]*>(.*?)</h1>', html_content, re.IGNORECASE)
album_slug = clean_folder_name(os.path.basename(urlparse(album_url).path.strip('/')))
album_title = clean_folder_name(title_match.group(1).strip()) if title_match else album_slug
files_to_download = []
final_url = None
link_type = None
filename_from_video_tag = None
video_tag_title_match = re.search(r'data-plyr-config=.*?&quot;title&quot;:.*?&quot;([^&]+?\.mp4)&quot;', html_content, re.IGNORECASE)
if video_tag_title_match:
filename_from_video_tag = clean_folder_name(video_tag_title_match.group(1))
logger_func(f" [Fap-Nation] Found high-quality filename in video tag: {filename_from_video_tag}")
# --- REVISED LOGIC: HLS FIRST ---
# 1. Prioritize finding an HLS stream.
logger_func(" [Fap-Nation] Priority 1: Searching for HLS stream...")
iframe_match = re.search(r'<iframe[^>]+src="([^"]+mediadelivery\.net[^"]+)"', html_content, re.IGNORECASE)
if iframe_match:
iframe_url = iframe_match.group(1)
logger_func(f" [Fap-Nation] Found video iframe. Visiting: {iframe_url}")
try:
iframe_response = scraper.get(iframe_url, timeout=30)
iframe_response.raise_for_status()
iframe_html = iframe_response.text
playlist_match = re.search(r'<source[^>]+src="([^"]+\.m3u8)"', iframe_html, re.IGNORECASE)
if playlist_match:
final_url = playlist_match.group(1)
link_type = 'hls'
logger_func(f" [Fap-Nation] Found embedded HLS stream in iframe: {final_url}")
except Exception as e:
logger_func(f" [Fap-Nation] ⚠️ Error fetching or parsing iframe content: {e}")
if not final_url:
logger_func(" [Fap-Nation] No stream found in iframe. Checking main page content as a last resort...")
js_var_match = re.search(r'"(https?://[^"]+\.m3u8)"', html_content, re.IGNORECASE)
if js_var_match:
final_url = js_var_match.group(1)
link_type = 'hls'
logger_func(f" [Fap-Nation] Found HLS stream on main page: {final_url}")
# 2. Fallback: If no HLS stream was found, search for direct links.
if not final_url:
logger_func(" [Fap-Nation] No HLS stream found. Priority 2 (Fallback): Searching for direct download links...")
direct_link_pattern = r'<a\s+[^>]*href="([^"]+\.(?:mp4|webm|mkv|mov))"[^>]*>'
direct_links_found = re.findall(direct_link_pattern, html_content, re.IGNORECASE)
if direct_links_found:
logger_func(f" [Fap-Nation] Found {len(direct_links_found)} direct media link(s). Selecting the best quality...")
best_link = None
# Define qualities from highest to lowest
qualities_to_check = ['1080p', '720p', '480p', '360p']
# Find the best quality link by iterating through preferred qualities
for quality in qualities_to_check:
for link in direct_links_found:
if quality in link.lower():
best_link = link
logger_func(f" [Fap-Nation] Found '{quality}' link: {best_link}")
break # Found the best link for this quality level
if best_link:
break # Found the highest quality available
# Fallback if no quality string was found in any link
if not best_link:
best_link = direct_links_found[0]
logger_func(f" [Fap-Nation] ⚠️ No quality tags (1080p, 720p, etc.) found in links. Defaulting to first link: {best_link}")
final_url = best_link
link_type = 'direct'
logger_func(f" [Fap-Nation] Identified direct media link: {final_url}")
# If after all checks, we still have no URL, then fail.
if not final_url:
logger_func(" [Fap-Nation] ❌ Stage 1 Failed: Could not find any HLS stream or direct link.")
return None, []
# --- HLS Quality Selection Logic ---
if link_type == 'hls' and final_url:
logger_func(" [Fap-Nation] HLS stream found. Checking for higher quality variants...")
try:
master_playlist_response = scraper.get(final_url, timeout=20)
master_playlist_response.raise_for_status()
playlist_content = master_playlist_response.text
streams = re.findall(r'#EXT-X-STREAM-INF:.*?RESOLUTION=(\d+)x(\d+).*?\n(.*?)\s', playlist_content)
if streams:
best_stream = max(streams, key=lambda s: int(s[0]) * int(s[1]))
height = best_stream[1]
relative_path = best_stream[2]
new_final_url = urljoin(final_url, relative_path)
logger_func(f" [Fap-Nation] ✅ Best quality found: {height}p. Updating URL to: {new_final_url}")
final_url = new_final_url
else:
logger_func(" [Fap-Nation] No alternate quality streams found in playlist. Using original.")
except Exception as e:
logger_func(f" [Fap-Nation] ⚠️ Could not parse HLS master playlist for quality selection: {e}. Using original URL.")
if final_url and link_type:
if filename_from_video_tag:
base_name, _ = os.path.splitext(filename_from_video_tag)
new_filename = f"{base_name}.mp4"
else:
new_filename = f"{album_slug}.mp4"
files_to_download.append({'url': final_url, 'filename': new_filename, 'type': link_type})
logger_func(f" [Fap-Nation] ✅ Ready to download '{new_filename}' ({link_type} method).")
return album_title, files_to_download
logger_func(f" [Fap-Nation] ❌ Could not determine a valid download link.")
return None, []
except Exception as e:
logger_func(f" [Fap-Nation] ❌ Error fetching Fap-Nation data: {e}")
return None, []