import re import os import cloudscraper from urllib.parse import urlparse, urljoin from ..utils.file_utils import clean_folder_name def fetch_fap_nation_data(album_url, logger_func): """ Scrapes a fap-nation page by prioritizing HLS streams first, then falling back to direct download links. Selects the highest quality available. """ logger_func(f" [Fap-Nation] Fetching album data from: {album_url}") scraper = cloudscraper.create_scraper() try: response = scraper.get(album_url, timeout=45) response.raise_for_status() html_content = response.text title_match = re.search(r']*itemprop="name"[^>]*>(.*?)', html_content, re.IGNORECASE) album_slug = clean_folder_name(os.path.basename(urlparse(album_url).path.strip('/'))) album_title = clean_folder_name(title_match.group(1).strip()) if title_match else album_slug files_to_download = [] final_url = None link_type = None filename_from_video_tag = None video_tag_title_match = re.search(r'data-plyr-config=.*?"title":.*?"([^&]+?\.mp4)"', html_content, re.IGNORECASE) if video_tag_title_match: filename_from_video_tag = clean_folder_name(video_tag_title_match.group(1)) logger_func(f" [Fap-Nation] Found high-quality filename in video tag: {filename_from_video_tag}") # --- REVISED LOGIC: HLS FIRST --- # 1. Prioritize finding an HLS stream. logger_func(" [Fap-Nation] Priority 1: Searching for HLS stream...") iframe_match = re.search(r']+src="([^"]+mediadelivery\.net[^"]+)"', html_content, re.IGNORECASE) if iframe_match: iframe_url = iframe_match.group(1) logger_func(f" [Fap-Nation] Found video iframe. Visiting: {iframe_url}") try: iframe_response = scraper.get(iframe_url, timeout=30) iframe_response.raise_for_status() iframe_html = iframe_response.text playlist_match = re.search(r']+src="([^"]+\.m3u8)"', iframe_html, re.IGNORECASE) if playlist_match: final_url = playlist_match.group(1) link_type = 'hls' logger_func(f" [Fap-Nation] Found embedded HLS stream in iframe: {final_url}") except Exception as e: logger_func(f" [Fap-Nation] ⚠️ Error fetching or parsing iframe content: {e}") if not final_url: logger_func(" [Fap-Nation] No stream found in iframe. Checking main page content as a last resort...") js_var_match = re.search(r'"(https?://[^"]+\.m3u8)"', html_content, re.IGNORECASE) if js_var_match: final_url = js_var_match.group(1) link_type = 'hls' logger_func(f" [Fap-Nation] Found HLS stream on main page: {final_url}") # 2. Fallback: If no HLS stream was found, search for direct links. if not final_url: logger_func(" [Fap-Nation] No HLS stream found. Priority 2 (Fallback): Searching for direct download links...") direct_link_pattern = r']*href="([^"]+\.(?:mp4|webm|mkv|mov))"[^>]*>' direct_links_found = re.findall(direct_link_pattern, html_content, re.IGNORECASE) if direct_links_found: logger_func(f" [Fap-Nation] Found {len(direct_links_found)} direct media link(s). Selecting the best quality...") best_link = None # Define qualities from highest to lowest qualities_to_check = ['1080p', '720p', '480p', '360p'] # Find the best quality link by iterating through preferred qualities for quality in qualities_to_check: for link in direct_links_found: if quality in link.lower(): best_link = link logger_func(f" [Fap-Nation] Found '{quality}' link: {best_link}") break # Found the best link for this quality level if best_link: break # Found the highest quality available # Fallback if no quality string was found in any link if not best_link: best_link = direct_links_found[0] logger_func(f" [Fap-Nation] ⚠️ No quality tags (1080p, 720p, etc.) found in links. Defaulting to first link: {best_link}") final_url = best_link link_type = 'direct' logger_func(f" [Fap-Nation] Identified direct media link: {final_url}") # If after all checks, we still have no URL, then fail. if not final_url: logger_func(" [Fap-Nation] ❌ Stage 1 Failed: Could not find any HLS stream or direct link.") return None, [] # --- HLS Quality Selection Logic --- if link_type == 'hls' and final_url: logger_func(" [Fap-Nation] HLS stream found. Checking for higher quality variants...") try: master_playlist_response = scraper.get(final_url, timeout=20) master_playlist_response.raise_for_status() playlist_content = master_playlist_response.text streams = re.findall(r'#EXT-X-STREAM-INF:.*?RESOLUTION=(\d+)x(\d+).*?\n(.*?)\s', playlist_content) if streams: best_stream = max(streams, key=lambda s: int(s[0]) * int(s[1])) height = best_stream[1] relative_path = best_stream[2] new_final_url = urljoin(final_url, relative_path) logger_func(f" [Fap-Nation] ✅ Best quality found: {height}p. Updating URL to: {new_final_url}") final_url = new_final_url else: logger_func(" [Fap-Nation] ℹ️ No alternate quality streams found in playlist. Using original.") except Exception as e: logger_func(f" [Fap-Nation] ⚠️ Could not parse HLS master playlist for quality selection: {e}. Using original URL.") if final_url and link_type: if filename_from_video_tag: base_name, _ = os.path.splitext(filename_from_video_tag) new_filename = f"{base_name}.mp4" else: new_filename = f"{album_slug}.mp4" files_to_download.append({'url': final_url, 'filename': new_filename, 'type': link_type}) logger_func(f" [Fap-Nation] ✅ Ready to download '{new_filename}' ({link_type} method).") return album_title, files_to_download logger_func(f" [Fap-Nation] ❌ Could not determine a valid download link.") return None, [] except Exception as e: logger_func(f" [Fap-Nation] ❌ Error fetching Fap-Nation data: {e}") return None, []