feat: photo gallery indexing — one record per folder, full filename search

scanner/scan_disc.py:
- Inventory disc into video files + photo folders (grouped by directory)
- Photo extensions: jpg/jpeg/png/gif/tiff/bmp/heic/heif/webp + raw formats
  (cr2 cr3 nef arw dng orf rw2 raf)
- Video disc   → 1 record per disc with video file list
- Photo disc   → 1 record per gallery folder with photo list
- Mixed disc   → both: 1 video record + 1 record per photo folder
- Unknown disc → 1 fallback record with total file count
- Folder title format: "DISC_LABEL — Folder / Subfolder"

videodb/api_ingest.php:
- Add subtitle field (gallery folder path)
- Add plot field (TEXT — full filename list, no 255-char limit)
- Add custom3 field (content type: video | photo | mixed | data)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-11 10:15:56 +02:00
parent 451afc0440
commit 7d401b1963
2 changed files with 198 additions and 104 deletions

View File

@@ -2,11 +2,12 @@
""" """
MeDBia Disc Scanner — macOS client MeDBia Disc Scanner — macOS client
=================================== ===================================
Polls the optical drive, reads the disc content (video/data files), Polls the optical drive, inspects disc contents, and submits records
posts to the remote videoDB API, then ejects the disc. to the remote videoDB API:
All discs are treated as data discs containing media files (mp4, mkv, etc.). • Video discs → one record per disc, listing video filenames
Media type is inferred from disc capacity reported by drutil. • Photo discs → one record per gallery folder, listing photo filenames
• Mixed discs → both: a video record + one record per photo folder
Setup: Setup:
pip3 install requests pip3 install requests
@@ -20,6 +21,7 @@ import re
import sys import sys
import time import time
import subprocess import subprocess
from collections import defaultdict
from pathlib import Path from pathlib import Path
try: try:
@@ -37,13 +39,17 @@ MT_DVD = 1
MT_BLURAY = 16 MT_BLURAY = 16
MT_CD = 18 MT_CD = 18
# Video file extensions to list in the index
VIDEO_EXT = { VIDEO_EXT = {
".mp4", ".mkv", ".avi", ".mov", ".m4v", ".ts", ".m2ts", ".mp4", ".mkv", ".avi", ".mov", ".m4v", ".ts", ".m2ts", ".mts",
".wmv", ".flv", ".webm", ".vob", ".mpg", ".mpeg", ".iso", ".wmv", ".flv", ".webm", ".vob", ".mpg", ".mpeg", ".iso", ".divx",
}
PHOTO_EXT = {
".jpg", ".jpeg", ".png", ".gif", ".tiff", ".tif", ".bmp",
".heic", ".heif", ".webp",
".raw", ".cr2", ".cr3", ".nef", ".arw", ".dng", ".orf", ".rw2", ".raf",
} }
# System volumes to ignore when scanning /Volumes/
IGNORE_VOLUMES = {"Macintosh HD", "Preboot", "Recovery", "VM", "Data", "Update"} IGNORE_VOLUMES = {"Macintosh HD", "Preboot", "Recovery", "VM", "Data", "Update"}
@@ -60,66 +66,43 @@ def run(cmd: list) -> str:
# ── Disc presence ────────────────────────────────────────────────────────────── # ── Disc presence ──────────────────────────────────────────────────────────────
def disc_status() -> dict | None: def disc_status() -> dict | None:
"""Returns disc info dict, or None when no disc is present."""
out = run(["drutil", "status"]) out = run(["drutil", "status"])
if not out or "No Media" in out: if not out or "No Media" in out:
return None return None
info: dict = {} info: dict = {}
m = re.search(r"Type:\s+(.+?)(?:\s{3,}|$)", out, re.MULTILINE) m = re.search(r"Type:\s+(.+?)(?:\s{3,}|$)", out, re.MULTILINE)
if m: if m:
info["drutil_type"] = m.group(1).strip() info["drutil_type"] = m.group(1).strip()
m = re.search(r"Name:\s+(/dev/\S+)", out) m = re.search(r"Name:\s+(/dev/\S+)", out)
if m: if m:
info["device"] = m.group(1) info["device"] = m.group(1)
# Space Used in GB (e.g. "Space Used: 7.88 GB")
m = re.search(r"Space Used:\s+([\d.]+)\s*GB", out) m = re.search(r"Space Used:\s+([\d.]+)\s*GB", out)
if m: if m:
info["used_gb"] = float(m.group(1)) info["used_gb"] = float(m.group(1))
return info return info
# ── Mount ────────────────────────────────────────────────────────────────────── # ── Mount ──────────────────────────────────────────────────────────────────────
def find_mount(device: str) -> str | None: def find_mount(device: str) -> str | None:
"""Return the mount point for the optical disc."""
for line in run(["mount"]).splitlines(): for line in run(["mount"]).splitlines():
if device in line: if device in line:
m = re.search(r" on (/Volumes/[^\s(]+)", line) m = re.search(r" on (/Volumes/[^\s(]+)", line)
if m: if m:
return m.group(1) return m.group(1)
# Fallback: first non-system entry in /Volumes/
try: try:
volumes = set(os.listdir("/Volumes/")) - IGNORE_VOLUMES volumes = set(os.listdir("/Volumes/")) - IGNORE_VOLUMES
if volumes: if volumes:
return f"/Volumes/{sorted(volumes)[0]}" return f"/Volumes/{sorted(volumes)[0]}"
except Exception: except Exception:
pass pass
return None return None
# ── Media type from disc capacity ────────────────────────────────────────────── # ── Media type inference ───────────────────────────────────────────────────────
def mediatype_from_size(used_gb: float) -> tuple[int, str]:
"""
Infer videoDB mediatype from used disc capacity.
Blu-ray discs hold 25/50 GB; DVDs hold ~4.7/8.5 GB; CDs ~0.7 GB.
"""
if used_gb > 8.0:
return MT_BLURAY, "Blu-ray"
if used_gb > 0.68:
return MT_DVD, "DVD"
return MT_CD, "CD"
def mediatype_from_drutil(drutil_type: str) -> tuple[int, str] | None: def mediatype_from_drutil(drutil_type: str) -> tuple[int, str] | None:
"""Parse drutil type string if available."""
t = drutil_type.upper() t = drutil_type.upper()
if "BD" in t: if "BD" in t:
return MT_BLURAY, "Blu-ray" return MT_BLURAY, "Blu-ray"
@@ -129,36 +112,49 @@ def mediatype_from_drutil(drutil_type: str) -> tuple[int, str] | None:
return MT_CD, "CD" return MT_CD, "CD"
return None return None
def mediatype_from_size(used_gb: float) -> tuple[int, str]:
if used_gb > 8.0:
return MT_BLURAY, "Blu-ray"
if used_gb > 0.68:
return MT_DVD, "DVD"
return MT_CD, "CD"
# ── File listing ───────────────────────────────────────────────────────────────
def list_video_files(mount: str) -> list[str]: # ── Disc inventory ─────────────────────────────────────────────────────────────
"""Return relative paths of all video files on the disc."""
found = [] def inventory_disc(mount: str) -> dict:
"""
Walk the disc and return:
video_files: list of relative paths to video files
photo_folders: dict of { relative_folder_path -> [photo filenames] }
"""
video_files: list[str] = []
# folder path (relative to mount) -> list of photo filenames in that folder
photo_folders: dict[str, list[str]] = defaultdict(list)
try: try:
for root, _dirs, files in os.walk(mount): for root, _dirs, files in os.walk(mount):
for f in files: for fname in files:
if Path(f).suffix.lower() in VIDEO_EXT: ext = Path(fname).suffix.lower()
rel = os.path.relpath(os.path.join(root, f), mount) rel_path = os.path.relpath(os.path.join(root, fname), mount)
found.append(rel) rel_folder = os.path.relpath(root, mount)
if rel_folder == ".":
rel_folder = "(root)"
if ext in VIDEO_EXT:
video_files.append(rel_path)
elif ext in PHOTO_EXT:
photo_folders[rel_folder].append(fname)
except PermissionError: except PermissionError:
pass pass
return sorted(found)
return {
def all_files_count(mount: str) -> int: "video_files": sorted(video_files),
"""Count every file on the disc (for discs with no video files).""" "photo_folders": dict(photo_folders),
count = 0 }
try:
for _root, _dirs, files in os.walk(mount):
count += len(files)
except PermissionError:
pass
return count
def disc_size_bytes(mount: str) -> int: def disc_size_bytes(mount: str) -> int:
"""Used space in bytes via df."""
out = run(["df", "-k", mount]) out = run(["df", "-k", mount])
for line in out.splitlines()[1:]: for line in out.splitlines()[1:]:
parts = line.split() parts = line.split()
@@ -170,8 +166,6 @@ def disc_size_bytes(mount: str) -> int:
return 0 return 0
# ── Volume label ───────────────────────────────────────────────────────────────
def volume_label(mount: str | None, device: str) -> str: def volume_label(mount: str | None, device: str) -> str:
if mount: if mount:
label = os.path.basename(mount) label = os.path.basename(mount)
@@ -206,20 +200,85 @@ def submit(payload: dict) -> bool:
) )
if resp.status_code == 200: if resp.status_code == 200:
data = resp.json() data = resp.json()
print(f" [OK] Entry #{data.get('id', '?')}: {payload['title']}") print(f" [OK] Entry #{data.get('id', '?')}: {payload['title']}")
return True return True
else: else:
print(f" [ERR] API {resp.status_code}: {resp.text[:300]}") print(f" [ERR] API {resp.status_code}: {resp.text[:300]}")
return False return False
except requests.ConnectionError: except requests.ConnectionError:
print(f" [ERR] Cannot reach {API_URL}") print(f" [ERR] Cannot reach {API_URL}")
print(f" Check VIDEODB_URL and that the server is up.")
return False return False
except Exception as e: except Exception as e:
print(f" [ERR] {e}") print(f" [ERR] {e}")
return False return False
# ── Record builders ────────────────────────────────────────────────────────────
def build_video_record(disc_label: str, disklabel: str, video_files: list[str],
mediatype_id: int, drutil_type: str, size_b: int) -> dict:
names = [Path(f).name for f in video_files]
comment = f"{len(names)} video file{'s' if len(names) != 1 else ''}"
plot = "\n".join(names) # full list in the TEXT field — no length limit
# Short preview for comment (VARCHAR 255)
preview = ", ".join(names[:8])
if len(names) > 8:
preview += f" … +{len(names) - 8} more"
comment = f"{len(names)} video files: {preview}"
return {
"title": disc_label,
"subtitle": "",
"mediatype": mediatype_id,
"comment": comment[:255],
"plot": plot,
"filesize": size_b,
"disklabel": disklabel,
"custom1": drutil_type,
"custom2": str(len(names)),
"custom3": "video",
}
def build_photo_records(disc_label: str, disklabel: str, photo_folders: dict[str, list[str]],
mediatype_id: int, drutil_type: str, size_b: int) -> list[dict]:
"""One record per gallery folder."""
records = []
for folder, photos in photo_folders.items():
photos_sorted = sorted(photos)
count = len(photos_sorted)
# Title: disc label + folder name (skip "(root)" clutter if only one folder)
if folder == "(root)" and len(photo_folders) == 1:
title = disc_label
else:
folder_display = folder.replace("/", " / ")
title = f"{disc_label}{folder_display}"
# Comment: short summary
preview = ", ".join(photos_sorted[:6])
if count > 6:
preview += f" … +{count - 6} more"
comment = f"{count} photo{'s' if count != 1 else ''}: {preview}"
# Plot: full filename list — searchable via videoDB full-text search
plot = "\n".join(photos_sorted)
records.append({
"title": title[:255],
"subtitle": folder if folder != "(root)" else "",
"mediatype": mediatype_id,
"comment": comment[:255],
"plot": plot,
"filesize": 0, # folder-level size not easily available
"disklabel": disklabel,
"custom1": drutil_type,
"custom2": str(count),
"custom3": "photo",
})
return records
# ── Main scan routine ────────────────────────────────────────────────────────── # ── Main scan routine ──────────────────────────────────────────────────────────
def scan_and_submit(): def scan_and_submit():
@@ -227,7 +286,7 @@ def scan_and_submit():
if not status: if not status:
return False return False
drutil_type = status.get("drutil_type", "") drutil_type = status.get("drutil_type", "Unknown")
device = status.get("device", "") device = status.get("device", "")
used_gb = status.get("used_gb", 0.0) used_gb = status.get("used_gb", 0.0)
@@ -235,55 +294,87 @@ def scan_and_submit():
print(f" device : {device}") print(f" device : {device}")
print(f" used : {used_gb:.2f} GB") print(f" used : {used_gb:.2f} GB")
# Give macOS a moment to finish mounting the filesystem time.sleep(4) # let macOS finish mounting
time.sleep(4)
mount = find_mount(device) mount = find_mount(device)
print(f" mount : {mount or '(not mounted)'}") print(f" mount : {mount or '(not mounted)'}")
# Determine media type — prefer drutil string, fall back to size mt = mediatype_from_drutil(drutil_type) or mediatype_from_size(used_gb)
mt = mediatype_from_drutil(drutil_type) mediatype_id, mediatype_name = mt
if mt: disc_label = volume_label(mount, device)
mediatype_id, mediatype_name = mt disklabel = disc_label[:32]
else:
mediatype_id, mediatype_name = mediatype_from_size(used_gb)
title = volume_label(mount, device) if not mount:
size_b = disc_size_bytes(mount) if mount else int(used_gb * 1024**3) print(" [WARN] Disc not mounted — submitting with disc label only")
submit({
"title": disc_label,
"subtitle": "",
"mediatype": mediatype_id,
"comment": f"Disc not mounted ({drutil_type})",
"plot": "",
"filesize": int(used_gb * 1024**3),
"disklabel": disklabel,
"custom1": drutil_type,
"custom2": "0",
"custom3": "unknown",
})
eject(None)
return True
# Build file listing size_b = disc_size_bytes(mount)
video_files = list_video_files(mount) if mount else [] print(f" Scanning contents...")
total_files = all_files_count(mount) if mount else 0 inv = inventory_disc(mount)
video_files = inv["video_files"]
photo_folders = inv["photo_folders"]
n_videos = len(video_files)
n_photos = sum(len(v) for v in photo_folders.values())
n_galleries = len(photo_folders)
print(f" Found: {n_videos} video files, {n_photos} photos in {n_galleries} folder(s)")
records = []
if video_files: if video_files:
# List video file names (not full paths) for the comment field records.append(build_video_record(
names = [Path(f).name for f in video_files] disc_label, disklabel, video_files, mediatype_id, drutil_type, size_b
summary = f"{len(video_files)} video files: " + ", ".join(names[:10]) ))
if len(names) > 10:
summary += f" … +{len(names) - 10} more"
# Store full file list in custom2 (255 char limit — truncate gracefully)
file_detail = "\n".join(video_files)
else:
summary = f"{total_files} files (no video files detected)"
file_detail = ""
payload = { if photo_folders:
"title": title, records += build_photo_records(
"mediatype": mediatype_id, disc_label, disklabel, photo_folders, mediatype_id, drutil_type, size_b
"comment": summary[:255], )
"filesize": size_b,
"disklabel": title[:32],
"custom1": drutil_type[:255],
"custom2": str(len(video_files) or total_files),
}
print(f"\n [{mediatype_name}] \"{title}\"") if not records:
print(f" {summary[:120]}") # Disc has neither video nor photo files — index with raw file count
ok = submit(payload) total = sum(
len(files)
for _, _, files in os.walk(mount)
)
records.append({
"title": disc_label,
"subtitle": "",
"mediatype": mediatype_id,
"comment": f"{total} files (no video or photo files detected)",
"plot": "",
"filesize": size_b,
"disklabel": disklabel,
"custom1": drutil_type,
"custom2": str(total),
"custom3": "data",
})
print(f"\n Submitting {len(records)} record(s)...")
ok_count = 0
for rec in records:
if submit(rec):
ok_count += 1
print(f" {ok_count}/{len(records)} records submitted.")
print(" Ejecting disc...") print(" Ejecting disc...")
eject(mount) eject(mount)
return ok return ok_count > 0
# ── Entry point ──────────────────────────────────────────────────────────────── # ── Entry point ────────────────────────────────────────────────────────────────

View File

@@ -62,19 +62,22 @@ if (!$dbh) {
} }
// ── Sanitize inputs ─────────────────────────────────────────────────────────── // ── Sanitize inputs ───────────────────────────────────────────────────────────
$title = mysqli_real_escape_string($dbh, substr($data['title'], 0, 255)); $title = mysqli_real_escape_string($dbh, substr($data['title'] ?? '', 0, 255));
$mediatype = (int)($data['mediatype'] ?? 1); // 1=DVD, 16=Blu-ray, 18=CD $subtitle = mysqli_real_escape_string($dbh, substr($data['subtitle'] ?? '', 0, 255)); // gallery folder path
$comment = mysqli_real_escape_string($dbh, substr($data['comment'] ?? '', 0, 255)); $mediatype = (int)($data['mediatype'] ?? 1);
$filesize = (int)($data['filesize'] ?? 0); $comment = mysqli_real_escape_string($dbh, substr($data['comment'] ?? '', 0, 255));
$custom1 = mysqli_real_escape_string($dbh, substr($data['custom1'] ?? '', 0, 255)); // raw drutil type $plot = mysqli_real_escape_string($dbh, $data['plot'] ?? ''); // full file listing (TEXT, no limit)
$custom2 = mysqli_real_escape_string($dbh, substr($data['custom2'] ?? '', 0, 255)); // track count / file count $filesize = (int)($data['filesize'] ?? 0);
$custom1 = mysqli_real_escape_string($dbh, substr($data['custom1'] ?? '', 0, 255)); // disc type string
$custom2 = mysqli_real_escape_string($dbh, substr($data['custom2'] ?? '', 0, 255)); // file/photo count
$custom3 = mysqli_real_escape_string($dbh, substr($data['custom3'] ?? '', 0, 255)); // content type: video|photo|mixed
$disklabel = mysqli_real_escape_string($dbh, substr($data['disklabel'] ?? '', 0, 32)); $disklabel = mysqli_real_escape_string($dbh, substr($data['disklabel'] ?? '', 0, 32));
// ── Insert ──────────────────────────────────────────────────────────────────── // ── Insert ────────────────────────────────────────────────────────────────────
$sql = "INSERT INTO " . TBL_DATA . " $sql = "INSERT INTO " . TBL_DATA . "
(title, mediatype, comment, filesize, disklabel, custom1, custom2, created, owner_id) (title, subtitle, mediatype, comment, plot, filesize, disklabel, custom1, custom2, custom3, created, owner_id)
VALUES VALUES
('$title', $mediatype, '$comment', $filesize, '$disklabel', '$custom1', '$custom2', NOW(), 1)"; ('$title', '$subtitle', $mediatype, '$comment', '$plot', $filesize, '$disklabel', '$custom1', '$custom2', '$custom3', NOW(), 1)";
if (mysqli_query($dbh, $sql)) { if (mysqli_query($dbh, $sql)) {
$id = (int)mysqli_insert_id($dbh); $id = (int)mysqli_insert_id($dbh);