fix: add libonig-dev for mbstring; update scanner for data discs

Dockerfile: add libonig-dev (oniguruma) — required by mbstring extension

scanner/scan_disc.py:
- Treat all discs as data discs (mp4/mkv/etc), no VIDEO_TS/BDMV logic
- List video files by extension (.mp4 .mkv .avi .mov .m4v .ts .m2ts …)
- Infer media type from drutil type string, fall back to used_gb capacity
  (>8 GB → Blu-ray, >0.68 GB → DVD, smaller → CD)
- Store video file names in comment field, count in custom2

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-11 10:12:34 +02:00
parent 6002fc6e58
commit 451afc0440
2 changed files with 140 additions and 122 deletions

View File

@@ -2,8 +2,11 @@
"""
MeDBia Disc Scanner — macOS client
===================================
Polls the optical drive, reads disc metadata, posts to the remote
videoDB API, then ejects the disc.
Polls the optical drive, reads the disc content (video/data files),
posts to the remote videoDB API, then ejects the disc.
All discs are treated as data discs containing media files (mp4, mkv, etc.).
Media type is inferred from disc capacity reported by drutil.
Setup:
pip3 install requests
@@ -24,174 +27,176 @@ try:
except ImportError:
sys.exit("Install requests first: pip3 install requests")
# ── Config (override with environment variables) ───────────────────────────────
# ── Config ─────────────────────────────────────────────────────────────────────
API_URL = os.environ.get("VIDEODB_URL", "http://your-server:6761").rstrip("/") + "/api_ingest.php"
API_TOKEN = os.environ.get("VIDEODB_TOKEN", "change_this_secret_token")
POLL_SEC = int(os.environ.get("POLL_INTERVAL", "5"))
# videoDB mediatype IDs (must match install.sql)
MEDIATYPE = {
"dvd": 1,
"bluray": 16,
"cd": 18,
"data_cd": 18,
# videoDB mediatype IDs (from install.sql)
MT_DVD = 1
MT_BLURAY = 16
MT_CD = 18
# Video file extensions to list in the index
VIDEO_EXT = {
".mp4", ".mkv", ".avi", ".mov", ".m4v", ".ts", ".m2ts",
".wmv", ".flv", ".webm", ".vob", ".mpg", ".mpeg", ".iso",
}
# Known system volumes to ignore when scanning /Volumes/
# System volumes to ignore when scanning /Volumes/
IGNORE_VOLUMES = {"Macintosh HD", "Preboot", "Recovery", "VM", "Data", "Update"}
# ── Shell helpers ──────────────────────────────────────────────────────────────
# ── Shell helper ──────────────────────────────────────────────────────────────
def run(cmd: list) -> str:
"""Run a command, return stdout (empty string on error)."""
try:
r = subprocess.run(cmd, capture_output=True, text=True, timeout=15)
r = subprocess.run(cmd, capture_output=True, text=True, timeout=20)
return r.stdout
except Exception:
return ""
# ── Disc detection ─────────────────────────────────────────────────────────────
# ── Disc presence ──────────────────────────────────────────────────────────────
def disc_status() -> dict | None:
"""
Returns a dict with keys: drutil_type, device, tracks
Returns None when no disc is present.
"""
"""Returns disc info dict, or None when no disc is present."""
out = run(["drutil", "status"])
if not out or "No Media" in out:
return None
info: dict = {}
# Type: DVD-ROM / Audio CD / CD-ROM / BD-ROM …
m = re.search(r"Type:\s+(.+?)(?:\s{3,}|$)", out, re.MULTILINE)
if m:
info["drutil_type"] = m.group(1).strip()
# Device node: /dev/disk2
m = re.search(r"Name:\s+(/dev/\S+)", out)
if m:
info["device"] = m.group(1)
# Track count
m = re.search(r"Tracks:\s+(\d+)", out)
# Space Used in GB (e.g. "Space Used: 7.88 GB")
m = re.search(r"Space Used:\s+([\d.]+)\s*GB", out)
if m:
info["tracks"] = int(m.group(1))
info["used_gb"] = float(m.group(1))
return info
# ── Mount ──────────────────────────────────────────────────────────────────────
def find_mount(device: str) -> str | None:
"""Find where the optical disc is mounted."""
# Try 'mount' output first
"""Return the mount point for the optical disc."""
for line in run(["mount"]).splitlines():
if device in line:
# "... on /Volumes/FOO (…)"
m = re.search(r" on (/Volumes/\S+)", line)
m = re.search(r" on (/Volumes/[^\s(]+)", line)
if m:
return m.group(1).rstrip("()")
return m.group(1)
# Fallback: new entry in /Volumes/ that isn't a system volume
volumes = set(os.listdir("/Volumes/")) - IGNORE_VOLUMES
if volumes:
# Return the first one alphabetically
return f"/Volumes/{sorted(volumes)[0]}"
# Fallback: first non-system entry in /Volumes/
try:
volumes = set(os.listdir("/Volumes/")) - IGNORE_VOLUMES
if volumes:
return f"/Volumes/{sorted(volumes)[0]}"
except Exception:
pass
return None
# ── Disc classification ───────────────────────────────────────────────────────
# ── Media type from disc capacity ──────────────────────────────────────────────
def classify(mount: str | None, drutil_type: str) -> tuple[int, str]:
def mediatype_from_size(used_gb: float) -> tuple[int, str]:
"""
Returns (mediatype_id, label) for videoDB.
Checks filesystem structure first, falls back to drutil type string.
Infer videoDB mediatype from used disc capacity.
Blu-ray discs hold 25/50 GB; DVDs hold ~4.7/8.5 GB; CDs ~0.7 GB.
"""
if mount:
p = Path(mount)
if (p / "BDMV").exists():
return MEDIATYPE["bluray"], "Blu-ray"
if (p / "VIDEO_TS").exists() or (p / "VIDEO_TS.IFO").exists():
return MEDIATYPE["dvd"], "DVD"
if used_gb > 8.0:
return MT_BLURAY, "Blu-ray"
if used_gb > 0.68:
return MT_DVD, "DVD"
return MT_CD, "CD"
def mediatype_from_drutil(drutil_type: str) -> tuple[int, str] | None:
"""Parse drutil type string if available."""
t = drutil_type.upper()
if "AUDIO" in t:
return MEDIATYPE["cd"], "Audio CD"
if "BD" in t:
return MEDIATYPE["bluray"], "Blu-ray"
return MT_BLURAY, "Blu-ray"
if "DVD" in t:
return MEDIATYPE["dvd"], "DVD"
return MEDIATYPE["cd"], "CD/Data"
return MT_DVD, "DVD"
if "CD" in t:
return MT_CD, "CD"
return None
def volume_label(mount: str | None, device: str) -> str:
"""Get the disc's volume label."""
if mount:
label = os.path.basename(mount)
if label:
return label
# ── File listing ───────────────────────────────────────────────────────────────
if device:
for line in run(["diskutil", "info", device]).splitlines():
if "Volume Name" in line:
return line.split(":", 1)[-1].strip()
return "Unknown Disc"
def list_video_files(mount: str) -> list[str]:
"""Return relative paths of all video files on the disc."""
found = []
try:
for root, _dirs, files in os.walk(mount):
for f in files:
if Path(f).suffix.lower() in VIDEO_EXT:
rel = os.path.relpath(os.path.join(root, f), mount)
found.append(rel)
except PermissionError:
pass
return sorted(found)
def disc_size_bytes(mount: str | None) -> int:
"""Total used space on the disc in bytes."""
if not mount:
return 0
def all_files_count(mount: str) -> int:
"""Count every file on the disc (for discs with no video files)."""
count = 0
try:
for _root, _dirs, files in os.walk(mount):
count += len(files)
except PermissionError:
pass
return count
def disc_size_bytes(mount: str) -> int:
"""Used space in bytes via df."""
out = run(["df", "-k", mount])
for line in out.splitlines()[1:]:
parts = line.split()
if len(parts) >= 3:
try:
return int(parts[2]) * 1024 # 'Used' column, KB→B
return int(parts[2]) * 1024
except ValueError:
pass
return 0
def sample_files(mount: str | None, limit: int = 30) -> list[str]:
"""Return a sample of file paths on the disc."""
if not mount or not os.path.exists(mount):
return []
found = []
try:
for root, _dirs, files in os.walk(mount):
for f in files:
rel = os.path.relpath(os.path.join(root, f), mount)
found.append(rel)
if len(found) >= limit:
return found
except PermissionError:
pass
return found
# ── Volume label ───────────────────────────────────────────────────────────────
def volume_label(mount: str | None, device: str) -> str:
if mount:
label = os.path.basename(mount)
if label:
return label
if device:
for line in run(["diskutil", "info", device]).splitlines():
if "Volume Name" in line:
return line.split(":", 1)[-1].strip()
return "Unknown Disc"
# ── Eject ─────────────────────────────────────────────────────────────────────
# ── Eject ─────────────────────────────────────────────────────────────────────
def eject(mount: str | None):
"""Eject the disc. Try diskutil first, fall back to drutil."""
if mount:
result = subprocess.run(
["diskutil", "eject", mount],
capture_output=True, text=True
)
if result.returncode == 0:
r = subprocess.run(["diskutil", "eject", mount], capture_output=True)
if r.returncode == 0:
return
run(["drutil", "eject"])
# ── API submission ────────────────────────────────────────────────────────────
# ── API submission ────────────────────────────────────────────────────────────
def submit(payload: dict) -> bool:
"""POST disc data to the videoDB API. Returns True on success."""
try:
resp = requests.post(
API_URL,
@@ -201,20 +206,21 @@ def submit(payload: dict) -> bool:
)
if resp.status_code == 200:
data = resp.json()
print(f" [OK] Indexed as entry #{data.get('id', '?')}: {payload['title']}")
print(f" [OK] Entry #{data.get('id', '?')}: {payload['title']}")
return True
else:
print(f" [ERR] API {resp.status_code}: {resp.text[:200]}")
print(f" [ERR] API {resp.status_code}: {resp.text[:300]}")
return False
except requests.ConnectionError:
print(f" [ERR] Cannot reach {API_URL} — check VIDEODB_URL and network")
print(f" [ERR] Cannot reach {API_URL}")
print(f" Check VIDEODB_URL and that the server is up.")
return False
except Exception as e:
print(f" [ERR] {e}")
return False
# ── Main scan ─────────────────────────────────────────────────────────────────
# ── Main scan routine ──────────────────────────────────────────────────────────
def scan_and_submit():
status = disc_status()
@@ -223,58 +229,70 @@ def scan_and_submit():
drutil_type = status.get("drutil_type", "")
device = status.get("device", "")
tracks = status.get("tracks", 0)
used_gb = status.get("used_gb", 0.0)
print(f" drutil type : {drutil_type}")
print(f" device : {device}")
print(f" tracks : {tracks}")
print(f" used : {used_gb:.2f} GB")
# Give macOS a moment to finish mounting
time.sleep(3)
# Give macOS a moment to finish mounting the filesystem
time.sleep(4)
mount = find_mount(device)
print(f" mount point : {mount or '(not mounted)'}")
print(f" mount : {mount or '(not mounted)'}")
mediatype_id, mediatype_name = classify(mount, drutil_type)
title = volume_label(mount, device)
size = disc_size_bytes(mount)
files = sample_files(mount)
# Build a short content summary for the 'comment' field
if tracks and mediatype_id == MEDIATYPE["cd"]:
summary = f"{tracks} audio tracks"
elif files:
summary = f"{len(files)} files"
if len(files) <= 10:
summary += ": " + ", ".join(Path(f).name for f in files[:10])
# Determine media type — prefer drutil string, fall back to size
mt = mediatype_from_drutil(drutil_type)
if mt:
mediatype_id, mediatype_name = mt
else:
summary = ""
mediatype_id, mediatype_name = mediatype_from_size(used_gb)
title = volume_label(mount, device)
size_b = disc_size_bytes(mount) if mount else int(used_gb * 1024**3)
# Build file listing
video_files = list_video_files(mount) if mount else []
total_files = all_files_count(mount) if mount else 0
if video_files:
# List video file names (not full paths) for the comment field
names = [Path(f).name for f in video_files]
summary = f"{len(video_files)} video files: " + ", ".join(names[:10])
if len(names) > 10:
summary += f" … +{len(names) - 10} more"
# Store full file list in custom2 (255 char limit — truncate gracefully)
file_detail = "\n".join(video_files)
else:
summary = f"{total_files} files (no video files detected)"
file_detail = ""
payload = {
"title": title,
"mediatype": mediatype_id,
"comment": summary[:255],
"filesize": size,
"filesize": size_b,
"disklabel": title[:32],
"custom1": drutil_type[:255], # raw disc type string
"custom2": str(tracks) if tracks else str(len(files)), # track/file count
"custom1": drutil_type[:255],
"custom2": str(len(video_files) or total_files),
}
print(f"\n Submitting [{mediatype_name}] \"{title}\" ({size // (1024*1024)} MB)")
print(f"\n [{mediatype_name}] \"{title}\"")
print(f" {summary[:120]}")
ok = submit(payload)
print(" Ejecting...")
print(" Ejecting disc...")
eject(mount)
return ok
# ── Entry point ───────────────────────────────────────────────────────────────
# ── Entry point ───────────────────────────────────────────────────────────────
def main():
print("=" * 55)
print(" MeDBia Disc Scanner")
print(f" API : {API_URL}")
print(f" Poll: every {POLL_SEC}s")
print(f" API : {API_URL}")
print(f" Poll : every {POLL_SEC}s")
print("=" * 55)
print("Insert a disc to index it. Ctrl-C to stop.\n")
@@ -287,11 +305,10 @@ def main():
was_present = True
print("Disc detected!")
scan_and_submit()
was_present = False # reset — disc was ejected
was_present = False
print("\nReady — insert next disc.\n")
elif not status and was_present:
# Disc manually removed before scan completed
was_present = False
time.sleep(POLL_SEC)

View File

@@ -5,6 +5,7 @@ RUN apt-get update && apt-get install -y \
libpng-dev \
libjpeg-dev \
libfreetype6-dev \
libonig-dev \
default-mysql-client \
&& rm -rf /var/lib/apt/lists/*