mirror of
https://github.com/Yuvi9587/Kemono-Downloader.git
synced 2025-12-29 16:14:44 +00:00
138 lines
7.0 KiB
Python
138 lines
7.0 KiB
Python
import re
|
||
import os
|
||
import cloudscraper
|
||
from urllib.parse import urlparse, urljoin
|
||
from ..utils.file_utils import clean_folder_name
|
||
|
||
def fetch_fap_nation_data(album_url, logger_func):
|
||
"""
|
||
Scrapes a fap-nation page by prioritizing HLS streams first, then falling
|
||
back to direct download links. Selects the highest quality available.
|
||
"""
|
||
logger_func(f" [Fap-Nation] Fetching album data from: {album_url}")
|
||
scraper = cloudscraper.create_scraper()
|
||
|
||
try:
|
||
response = scraper.get(album_url, timeout=45)
|
||
response.raise_for_status()
|
||
html_content = response.text
|
||
|
||
title_match = re.search(r'<h1[^>]*itemprop="name"[^>]*>(.*?)</h1>', html_content, re.IGNORECASE)
|
||
album_slug = clean_folder_name(os.path.basename(urlparse(album_url).path.strip('/')))
|
||
album_title = clean_folder_name(title_match.group(1).strip()) if title_match else album_slug
|
||
|
||
files_to_download = []
|
||
final_url = None
|
||
link_type = None
|
||
filename_from_video_tag = None
|
||
|
||
video_tag_title_match = re.search(r'data-plyr-config=.*?"title":.*?"([^&]+?\.mp4)"', html_content, re.IGNORECASE)
|
||
if video_tag_title_match:
|
||
filename_from_video_tag = clean_folder_name(video_tag_title_match.group(1))
|
||
logger_func(f" [Fap-Nation] Found high-quality filename in video tag: {filename_from_video_tag}")
|
||
|
||
# --- REVISED LOGIC: HLS FIRST ---
|
||
|
||
# 1. Prioritize finding an HLS stream.
|
||
logger_func(" [Fap-Nation] Priority 1: Searching for HLS stream...")
|
||
iframe_match = re.search(r'<iframe[^>]+src="([^"]+mediadelivery\.net[^"]+)"', html_content, re.IGNORECASE)
|
||
|
||
if iframe_match:
|
||
iframe_url = iframe_match.group(1)
|
||
logger_func(f" [Fap-Nation] Found video iframe. Visiting: {iframe_url}")
|
||
try:
|
||
iframe_response = scraper.get(iframe_url, timeout=30)
|
||
iframe_response.raise_for_status()
|
||
iframe_html = iframe_response.text
|
||
|
||
playlist_match = re.search(r'<source[^>]+src="([^"]+\.m3u8)"', iframe_html, re.IGNORECASE)
|
||
if playlist_match:
|
||
final_url = playlist_match.group(1)
|
||
link_type = 'hls'
|
||
logger_func(f" [Fap-Nation] Found embedded HLS stream in iframe: {final_url}")
|
||
except Exception as e:
|
||
logger_func(f" [Fap-Nation] ⚠️ Error fetching or parsing iframe content: {e}")
|
||
|
||
if not final_url:
|
||
logger_func(" [Fap-Nation] No stream found in iframe. Checking main page content as a last resort...")
|
||
js_var_match = re.search(r'"(https?://[^"]+\.m3u8)"', html_content, re.IGNORECASE)
|
||
if js_var_match:
|
||
final_url = js_var_match.group(1)
|
||
link_type = 'hls'
|
||
logger_func(f" [Fap-Nation] Found HLS stream on main page: {final_url}")
|
||
|
||
# 2. Fallback: If no HLS stream was found, search for direct links.
|
||
if not final_url:
|
||
logger_func(" [Fap-Nation] No HLS stream found. Priority 2 (Fallback): Searching for direct download links...")
|
||
direct_link_pattern = r'<a\s+[^>]*href="([^"]+\.(?:mp4|webm|mkv|mov))"[^>]*>'
|
||
direct_links_found = re.findall(direct_link_pattern, html_content, re.IGNORECASE)
|
||
|
||
if direct_links_found:
|
||
logger_func(f" [Fap-Nation] Found {len(direct_links_found)} direct media link(s). Selecting the best quality...")
|
||
best_link = None
|
||
# Define qualities from highest to lowest
|
||
qualities_to_check = ['1080p', '720p', '480p', '360p']
|
||
|
||
# Find the best quality link by iterating through preferred qualities
|
||
for quality in qualities_to_check:
|
||
for link in direct_links_found:
|
||
if quality in link.lower():
|
||
best_link = link
|
||
logger_func(f" [Fap-Nation] Found '{quality}' link: {best_link}")
|
||
break # Found the best link for this quality level
|
||
if best_link:
|
||
break # Found the highest quality available
|
||
|
||
# Fallback if no quality string was found in any link
|
||
if not best_link:
|
||
best_link = direct_links_found[0]
|
||
logger_func(f" [Fap-Nation] ⚠️ No quality tags (1080p, 720p, etc.) found in links. Defaulting to first link: {best_link}")
|
||
|
||
final_url = best_link
|
||
link_type = 'direct'
|
||
logger_func(f" [Fap-Nation] Identified direct media link: {final_url}")
|
||
# If after all checks, we still have no URL, then fail.
|
||
if not final_url:
|
||
logger_func(" [Fap-Nation] ❌ Stage 1 Failed: Could not find any HLS stream or direct link.")
|
||
return None, []
|
||
|
||
# --- HLS Quality Selection Logic ---
|
||
if link_type == 'hls' and final_url:
|
||
logger_func(" [Fap-Nation] HLS stream found. Checking for higher quality variants...")
|
||
try:
|
||
master_playlist_response = scraper.get(final_url, timeout=20)
|
||
master_playlist_response.raise_for_status()
|
||
playlist_content = master_playlist_response.text
|
||
|
||
streams = re.findall(r'#EXT-X-STREAM-INF:.*?RESOLUTION=(\d+)x(\d+).*?\n(.*?)\s', playlist_content)
|
||
|
||
if streams:
|
||
best_stream = max(streams, key=lambda s: int(s[0]) * int(s[1]))
|
||
height = best_stream[1]
|
||
relative_path = best_stream[2]
|
||
new_final_url = urljoin(final_url, relative_path)
|
||
|
||
logger_func(f" [Fap-Nation] ✅ Best quality found: {height}p. Updating URL to: {new_final_url}")
|
||
final_url = new_final_url
|
||
else:
|
||
logger_func(" [Fap-Nation] ℹ️ No alternate quality streams found in playlist. Using original.")
|
||
except Exception as e:
|
||
logger_func(f" [Fap-Nation] ⚠️ Could not parse HLS master playlist for quality selection: {e}. Using original URL.")
|
||
|
||
if final_url and link_type:
|
||
if filename_from_video_tag:
|
||
base_name, _ = os.path.splitext(filename_from_video_tag)
|
||
new_filename = f"{base_name}.mp4"
|
||
else:
|
||
new_filename = f"{album_slug}.mp4"
|
||
|
||
files_to_download.append({'url': final_url, 'filename': new_filename, 'type': link_type})
|
||
logger_func(f" [Fap-Nation] ✅ Ready to download '{new_filename}' ({link_type} method).")
|
||
return album_title, files_to_download
|
||
|
||
logger_func(f" [Fap-Nation] ❌ Could not determine a valid download link.")
|
||
return None, []
|
||
|
||
except Exception as e:
|
||
logger_func(f" [Fap-Nation] ❌ Error fetching Fap-Nation data: {e}")
|
||
return None, [] |