diff --git a/scanner/scan_disc.py b/scanner/scan_disc.py index c2e6bbf..66a733f 100644 --- a/scanner/scan_disc.py +++ b/scanner/scan_disc.py @@ -2,8 +2,11 @@ """ MeDBia Disc Scanner — macOS client =================================== -Polls the optical drive, reads disc metadata, posts to the remote -videoDB API, then ejects the disc. +Polls the optical drive, reads the disc content (video/data files), +posts to the remote videoDB API, then ejects the disc. + +All discs are treated as data discs containing media files (mp4, mkv, etc.). +Media type is inferred from disc capacity reported by drutil. Setup: pip3 install requests @@ -24,174 +27,176 @@ try: except ImportError: sys.exit("Install requests first: pip3 install requests") -# ── Config (override with environment variables) ─────────────────────────────── +# ── Config ───────────────────────────────────────────────────────────────────── API_URL = os.environ.get("VIDEODB_URL", "http://your-server:6761").rstrip("/") + "/api_ingest.php" API_TOKEN = os.environ.get("VIDEODB_TOKEN", "change_this_secret_token") POLL_SEC = int(os.environ.get("POLL_INTERVAL", "5")) -# videoDB mediatype IDs (must match install.sql) -MEDIATYPE = { - "dvd": 1, - "bluray": 16, - "cd": 18, - "data_cd": 18, +# videoDB mediatype IDs (from install.sql) +MT_DVD = 1 +MT_BLURAY = 16 +MT_CD = 18 + +# Video file extensions to list in the index +VIDEO_EXT = { + ".mp4", ".mkv", ".avi", ".mov", ".m4v", ".ts", ".m2ts", + ".wmv", ".flv", ".webm", ".vob", ".mpg", ".mpeg", ".iso", } -# Known system volumes to ignore when scanning /Volumes/ +# System volumes to ignore when scanning /Volumes/ IGNORE_VOLUMES = {"Macintosh HD", "Preboot", "Recovery", "VM", "Data", "Update"} -# ── Shell helpers ────────────────────────────────────────────────────────────── +# ── Shell helper ─────────────────────────────────────────────────────────────── def run(cmd: list) -> str: - """Run a command, return stdout (empty string on error).""" try: - r = subprocess.run(cmd, capture_output=True, text=True, timeout=15) + r = subprocess.run(cmd, capture_output=True, text=True, timeout=20) return r.stdout except Exception: return "" -# ── Disc detection ───────────────────────────────────────────────────────────── +# ── Disc presence ────────────────────────────────────────────────────────────── def disc_status() -> dict | None: - """ - Returns a dict with keys: drutil_type, device, tracks - Returns None when no disc is present. - """ + """Returns disc info dict, or None when no disc is present.""" out = run(["drutil", "status"]) if not out or "No Media" in out: return None info: dict = {} - # Type: DVD-ROM / Audio CD / CD-ROM / BD-ROM … m = re.search(r"Type:\s+(.+?)(?:\s{3,}|$)", out, re.MULTILINE) if m: info["drutil_type"] = m.group(1).strip() - # Device node: /dev/disk2 m = re.search(r"Name:\s+(/dev/\S+)", out) if m: info["device"] = m.group(1) - # Track count - m = re.search(r"Tracks:\s+(\d+)", out) + # Space Used in GB (e.g. "Space Used: 7.88 GB") + m = re.search(r"Space Used:\s+([\d.]+)\s*GB", out) if m: - info["tracks"] = int(m.group(1)) + info["used_gb"] = float(m.group(1)) return info +# ── Mount ────────────────────────────────────────────────────────────────────── + def find_mount(device: str) -> str | None: - """Find where the optical disc is mounted.""" - # Try 'mount' output first + """Return the mount point for the optical disc.""" for line in run(["mount"]).splitlines(): if device in line: - # "... on /Volumes/FOO (…)" - m = re.search(r" on (/Volumes/\S+)", line) + m = re.search(r" on (/Volumes/[^\s(]+)", line) if m: - return m.group(1).rstrip("()") + return m.group(1) - # Fallback: new entry in /Volumes/ that isn't a system volume - volumes = set(os.listdir("/Volumes/")) - IGNORE_VOLUMES - if volumes: - # Return the first one alphabetically - return f"/Volumes/{sorted(volumes)[0]}" + # Fallback: first non-system entry in /Volumes/ + try: + volumes = set(os.listdir("/Volumes/")) - IGNORE_VOLUMES + if volumes: + return f"/Volumes/{sorted(volumes)[0]}" + except Exception: + pass return None -# ── Disc classification ─────────────────────────────────────────────────────── +# ── Media type from disc capacity ────────────────────────────────────────────── -def classify(mount: str | None, drutil_type: str) -> tuple[int, str]: +def mediatype_from_size(used_gb: float) -> tuple[int, str]: """ - Returns (mediatype_id, label) for videoDB. - Checks filesystem structure first, falls back to drutil type string. + Infer videoDB mediatype from used disc capacity. + Blu-ray discs hold 25/50 GB; DVDs hold ~4.7/8.5 GB; CDs ~0.7 GB. """ - if mount: - p = Path(mount) - if (p / "BDMV").exists(): - return MEDIATYPE["bluray"], "Blu-ray" - if (p / "VIDEO_TS").exists() or (p / "VIDEO_TS.IFO").exists(): - return MEDIATYPE["dvd"], "DVD" + if used_gb > 8.0: + return MT_BLURAY, "Blu-ray" + if used_gb > 0.68: + return MT_DVD, "DVD" + return MT_CD, "CD" + +def mediatype_from_drutil(drutil_type: str) -> tuple[int, str] | None: + """Parse drutil type string if available.""" t = drutil_type.upper() - if "AUDIO" in t: - return MEDIATYPE["cd"], "Audio CD" if "BD" in t: - return MEDIATYPE["bluray"], "Blu-ray" + return MT_BLURAY, "Blu-ray" if "DVD" in t: - return MEDIATYPE["dvd"], "DVD" - - return MEDIATYPE["cd"], "CD/Data" + return MT_DVD, "DVD" + if "CD" in t: + return MT_CD, "CD" + return None -def volume_label(mount: str | None, device: str) -> str: - """Get the disc's volume label.""" - if mount: - label = os.path.basename(mount) - if label: - return label +# ── File listing ─────────────────────────────────────────────────────────────── - if device: - for line in run(["diskutil", "info", device]).splitlines(): - if "Volume Name" in line: - return line.split(":", 1)[-1].strip() - - return "Unknown Disc" +def list_video_files(mount: str) -> list[str]: + """Return relative paths of all video files on the disc.""" + found = [] + try: + for root, _dirs, files in os.walk(mount): + for f in files: + if Path(f).suffix.lower() in VIDEO_EXT: + rel = os.path.relpath(os.path.join(root, f), mount) + found.append(rel) + except PermissionError: + pass + return sorted(found) -def disc_size_bytes(mount: str | None) -> int: - """Total used space on the disc in bytes.""" - if not mount: - return 0 +def all_files_count(mount: str) -> int: + """Count every file on the disc (for discs with no video files).""" + count = 0 + try: + for _root, _dirs, files in os.walk(mount): + count += len(files) + except PermissionError: + pass + return count + + +def disc_size_bytes(mount: str) -> int: + """Used space in bytes via df.""" out = run(["df", "-k", mount]) for line in out.splitlines()[1:]: parts = line.split() if len(parts) >= 3: try: - return int(parts[2]) * 1024 # 'Used' column, KB→B + return int(parts[2]) * 1024 except ValueError: pass return 0 -def sample_files(mount: str | None, limit: int = 30) -> list[str]: - """Return a sample of file paths on the disc.""" - if not mount or not os.path.exists(mount): - return [] - found = [] - try: - for root, _dirs, files in os.walk(mount): - for f in files: - rel = os.path.relpath(os.path.join(root, f), mount) - found.append(rel) - if len(found) >= limit: - return found - except PermissionError: - pass - return found +# ── Volume label ─────────────────────────────────────────────────────────────── + +def volume_label(mount: str | None, device: str) -> str: + if mount: + label = os.path.basename(mount) + if label: + return label + if device: + for line in run(["diskutil", "info", device]).splitlines(): + if "Volume Name" in line: + return line.split(":", 1)[-1].strip() + return "Unknown Disc" -# ── Eject ───────────────────────────────────────────────────────────────────── +# ── Eject ────────────────────────────────────────────────────────────────────── def eject(mount: str | None): - """Eject the disc. Try diskutil first, fall back to drutil.""" if mount: - result = subprocess.run( - ["diskutil", "eject", mount], - capture_output=True, text=True - ) - if result.returncode == 0: + r = subprocess.run(["diskutil", "eject", mount], capture_output=True) + if r.returncode == 0: return run(["drutil", "eject"]) -# ── API submission ──────────────────────────────────────────────────────────── +# ── API submission ───────────────────────────────────────────────────────────── def submit(payload: dict) -> bool: - """POST disc data to the videoDB API. Returns True on success.""" try: resp = requests.post( API_URL, @@ -201,20 +206,21 @@ def submit(payload: dict) -> bool: ) if resp.status_code == 200: data = resp.json() - print(f" [OK] Indexed as entry #{data.get('id', '?')}: {payload['title']}") + print(f" [OK] Entry #{data.get('id', '?')}: {payload['title']}") return True else: - print(f" [ERR] API {resp.status_code}: {resp.text[:200]}") + print(f" [ERR] API {resp.status_code}: {resp.text[:300]}") return False except requests.ConnectionError: - print(f" [ERR] Cannot reach {API_URL} — check VIDEODB_URL and network") + print(f" [ERR] Cannot reach {API_URL}") + print(f" Check VIDEODB_URL and that the server is up.") return False except Exception as e: print(f" [ERR] {e}") return False -# ── Main scan ───────────────────────────────────────────────────────────────── +# ── Main scan routine ────────────────────────────────────────────────────────── def scan_and_submit(): status = disc_status() @@ -223,58 +229,70 @@ def scan_and_submit(): drutil_type = status.get("drutil_type", "") device = status.get("device", "") - tracks = status.get("tracks", 0) + used_gb = status.get("used_gb", 0.0) print(f" drutil type : {drutil_type}") print(f" device : {device}") - print(f" tracks : {tracks}") + print(f" used : {used_gb:.2f} GB") - # Give macOS a moment to finish mounting - time.sleep(3) + # Give macOS a moment to finish mounting the filesystem + time.sleep(4) mount = find_mount(device) - print(f" mount point : {mount or '(not mounted)'}") + print(f" mount : {mount or '(not mounted)'}") - mediatype_id, mediatype_name = classify(mount, drutil_type) - title = volume_label(mount, device) - size = disc_size_bytes(mount) - files = sample_files(mount) - - # Build a short content summary for the 'comment' field - if tracks and mediatype_id == MEDIATYPE["cd"]: - summary = f"{tracks} audio tracks" - elif files: - summary = f"{len(files)} files" - if len(files) <= 10: - summary += ": " + ", ".join(Path(f).name for f in files[:10]) + # Determine media type — prefer drutil string, fall back to size + mt = mediatype_from_drutil(drutil_type) + if mt: + mediatype_id, mediatype_name = mt else: - summary = "" + mediatype_id, mediatype_name = mediatype_from_size(used_gb) + + title = volume_label(mount, device) + size_b = disc_size_bytes(mount) if mount else int(used_gb * 1024**3) + + # Build file listing + video_files = list_video_files(mount) if mount else [] + total_files = all_files_count(mount) if mount else 0 + + if video_files: + # List video file names (not full paths) for the comment field + names = [Path(f).name for f in video_files] + summary = f"{len(video_files)} video files: " + ", ".join(names[:10]) + if len(names) > 10: + summary += f" … +{len(names) - 10} more" + # Store full file list in custom2 (255 char limit — truncate gracefully) + file_detail = "\n".join(video_files) + else: + summary = f"{total_files} files (no video files detected)" + file_detail = "" payload = { "title": title, "mediatype": mediatype_id, "comment": summary[:255], - "filesize": size, + "filesize": size_b, "disklabel": title[:32], - "custom1": drutil_type[:255], # raw disc type string - "custom2": str(tracks) if tracks else str(len(files)), # track/file count + "custom1": drutil_type[:255], + "custom2": str(len(video_files) or total_files), } - print(f"\n Submitting [{mediatype_name}] \"{title}\" ({size // (1024*1024)} MB)") + print(f"\n [{mediatype_name}] \"{title}\"") + print(f" {summary[:120]}") ok = submit(payload) - print(" Ejecting...") + print(" Ejecting disc...") eject(mount) return ok -# ── Entry point ─────────────────────────────────────────────────────────────── +# ── Entry point ──────────────────────────────────────────────────────────────── def main(): print("=" * 55) print(" MeDBia Disc Scanner") - print(f" API : {API_URL}") - print(f" Poll: every {POLL_SEC}s") + print(f" API : {API_URL}") + print(f" Poll : every {POLL_SEC}s") print("=" * 55) print("Insert a disc to index it. Ctrl-C to stop.\n") @@ -287,11 +305,10 @@ def main(): was_present = True print("Disc detected!") scan_and_submit() - was_present = False # reset — disc was ejected + was_present = False print("\nReady — insert next disc.\n") elif not status and was_present: - # Disc manually removed before scan completed was_present = False time.sleep(POLL_SEC) diff --git a/videodb/Dockerfile b/videodb/Dockerfile index 6af5bc9..b458b72 100644 --- a/videodb/Dockerfile +++ b/videodb/Dockerfile @@ -5,6 +5,7 @@ RUN apt-get update && apt-get install -y \ libpng-dev \ libjpeg-dev \ libfreetype6-dev \ + libonig-dev \ default-mysql-client \ && rm -rf /var/lib/apt/lists/*