import os import re import html import time import urllib.parse import requests from datetime import datetime import cloudscraper def extr(txt, begin, end, default=""): """Stripped-down version of 'extract()' to find text between two delimiters.""" try: first = txt.index(begin) + len(begin) return txt[first:txt.index(end, first)] except (ValueError, IndexError): return default def extract_iter(txt, begin, end): """Yields all occurrences of text between two delimiters.""" try: index = txt.index lbeg = len(begin) lend = len(end) pos = 0 while True: first = index(begin, pos) + lbeg last = index(end, first) pos = last + lend yield txt[first:last] except (ValueError, IndexError): return def nameext_from_url(url): """Extracts filename and extension from a URL.""" data = {} filename = urllib.parse.unquote(url.partition("?")[0].rpartition("/")[2]) name, _, ext = filename.rpartition(".") if name and len(ext) <= 16: data["filename"], data["extension"] = name, ext.lower() else: data["filename"], data["extension"] = filename, "" return data def parse_timestamp(ts, default=None): """Creates a datetime object from a Unix timestamp.""" try: return datetime.fromtimestamp(int(ts)) except (ValueError, TypeError): return default def fetch_erome_data(url, logger): """ Identifies and extracts all media files from an Erome album URL. Args: url (str): The Erome album URL (e.g., https://www.erome.com/a/albumID). logger (function): A function to log progress messages. Returns: tuple: A tuple containing (album_folder_name, list_of_file_dicts). Returns (None, []) if data extraction fails. """ album_id_match = re.search(r"/a/(\w+)", url) if not album_id_match: logger(f"Error: The URL '{url}' does not appear to be a valid Erome album link.") return None, [] album_id = album_id_match.group(1) page_url = f"https://www.erome.com/a/{album_id}" session = cloudscraper.create_scraper() try: logger(f" Fetching Erome album page: {page_url}") for attempt in range(5): response = session.get(page_url, timeout=30) response.raise_for_status() page_content = response.text if "