import re
import os
import cloudscraper
from urllib.parse import urlparse, urljoin
from ..utils.file_utils import clean_folder_name
def fetch_fap_nation_data(album_url, logger_func):
"""
Scrapes a fap-nation page by prioritizing HLS streams first, then falling
back to direct download links. Selects the highest quality available.
"""
logger_func(f" [Fap-Nation] Fetching album data from: {album_url}")
scraper = cloudscraper.create_scraper()
try:
response = scraper.get(album_url, timeout=45)
response.raise_for_status()
html_content = response.text
title_match = re.search(r'
]*itemprop="name"[^>]*>(.*?)
', html_content, re.IGNORECASE)
album_slug = clean_folder_name(os.path.basename(urlparse(album_url).path.strip('/')))
album_title = clean_folder_name(title_match.group(1).strip()) if title_match else album_slug
files_to_download = []
final_url = None
link_type = None
filename_from_video_tag = None
video_tag_title_match = re.search(r'data-plyr-config=.*?"title":.*?"([^&]+?\.mp4)"', html_content, re.IGNORECASE)
if video_tag_title_match:
filename_from_video_tag = clean_folder_name(video_tag_title_match.group(1))
logger_func(f" [Fap-Nation] Found high-quality filename in video tag: {filename_from_video_tag}")
# --- REVISED LOGIC: HLS FIRST ---
# 1. Prioritize finding an HLS stream.
logger_func(" [Fap-Nation] Priority 1: Searching for HLS stream...")
iframe_match = re.search(r'