import os import re as re_module import html import urllib.parse import requests PATTERN_CACHE = {} def re(pattern): """Compile a regular expression pattern and cache it.""" try: return PATTERN_CACHE[pattern] except KeyError: p = PATTERN_CACHE[pattern] = re_module.compile(pattern) return p def extract_from(txt, pos=None, default=""): """Returns a function that extracts text between two delimiters from 'txt'.""" def extr(begin, end, index=txt.find, txt=txt): nonlocal pos try: start_pos = pos if pos is not None else 0 first = index(begin, start_pos) + len(begin) last = index(end, first) if pos is not None: pos = last + len(end) return txt[first:last] except (ValueError, IndexError): return default return extr def nameext_from_url(url): """Extract filename and extension from a URL.""" data = {} filename = urllib.parse.unquote(url.partition("?")[0].rpartition("/")[2]) name, _, ext = filename.rpartition(".") if name and len(ext) <= 16: data["filename"], data["extension"] = name, ext.lower() else: data["filename"], data["extension"] = filename, "" return data class BaseExtractor: """A simplified base class for extractors.""" def __init__(self, match, session, logger): self.match = match self.groups = match.groups() self.session = session self.log = logger def request(self, url, **kwargs): """Makes an HTTP request using the session.""" try: response = self.session.get(url, **kwargs) response.raise_for_status() return response except requests.exceptions.RequestException as e: self.log(f"Error making request to {url}: {e}") return None class SaintAlbumExtractor(BaseExtractor): """Extractor for saint.su albums.""" root = "https://saint2.su" pattern = re(r"(?:https?://)?saint\d*\.(?:su|pk|cr|to)/a/([^/?#]+)") def items(self): """Generator that yields all files from an album.""" album_id = self.groups[0] response = self.request(f"{self.root}/a/{album_id}") if not response: return None, [] extr = extract_from(response.text) title = extr("