feat: photo gallery indexing — one record per folder, full filename search

scanner/scan_disc.py:
- Inventory disc into video files + photo folders (grouped by directory)
- Photo extensions: jpg/jpeg/png/gif/tiff/bmp/heic/heif/webp + raw formats
  (cr2 cr3 nef arw dng orf rw2 raf)
- Video disc   → 1 record per disc with video file list
- Photo disc   → 1 record per gallery folder with photo list
- Mixed disc   → both: 1 video record + 1 record per photo folder
- Unknown disc → 1 fallback record with total file count
- Folder title format: "DISC_LABEL — Folder / Subfolder"

videodb/api_ingest.php:
- Add subtitle field (gallery folder path)
- Add plot field (TEXT — full filename list, no 255-char limit)
- Add custom3 field (content type: video | photo | mixed | data)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-11 10:15:56 +02:00
parent 451afc0440
commit 7d401b1963
2 changed files with 198 additions and 104 deletions

View File

@@ -2,11 +2,12 @@
"""
MeDBia Disc Scanner — macOS client
===================================
Polls the optical drive, reads the disc content (video/data files),
posts to the remote videoDB API, then ejects the disc.
Polls the optical drive, inspects disc contents, and submits records
to the remote videoDB API:
All discs are treated as data discs containing media files (mp4, mkv, etc.).
Media type is inferred from disc capacity reported by drutil.
• Video discs → one record per disc, listing video filenames
• Photo discs → one record per gallery folder, listing photo filenames
• Mixed discs → both: a video record + one record per photo folder
Setup:
pip3 install requests
@@ -20,6 +21,7 @@ import re
import sys
import time
import subprocess
from collections import defaultdict
from pathlib import Path
try:
@@ -37,13 +39,17 @@ MT_DVD = 1
MT_BLURAY = 16
MT_CD = 18
# Video file extensions to list in the index
VIDEO_EXT = {
".mp4", ".mkv", ".avi", ".mov", ".m4v", ".ts", ".m2ts",
".wmv", ".flv", ".webm", ".vob", ".mpg", ".mpeg", ".iso",
".mp4", ".mkv", ".avi", ".mov", ".m4v", ".ts", ".m2ts", ".mts",
".wmv", ".flv", ".webm", ".vob", ".mpg", ".mpeg", ".iso", ".divx",
}
PHOTO_EXT = {
".jpg", ".jpeg", ".png", ".gif", ".tiff", ".tif", ".bmp",
".heic", ".heif", ".webp",
".raw", ".cr2", ".cr3", ".nef", ".arw", ".dng", ".orf", ".rw2", ".raf",
}
# System volumes to ignore when scanning /Volumes/
IGNORE_VOLUMES = {"Macintosh HD", "Preboot", "Recovery", "VM", "Data", "Update"}
@@ -60,66 +66,43 @@ def run(cmd: list) -> str:
# ── Disc presence ──────────────────────────────────────────────────────────────
def disc_status() -> dict | None:
"""Returns disc info dict, or None when no disc is present."""
out = run(["drutil", "status"])
if not out or "No Media" in out:
return None
info: dict = {}
m = re.search(r"Type:\s+(.+?)(?:\s{3,}|$)", out, re.MULTILINE)
if m:
info["drutil_type"] = m.group(1).strip()
m = re.search(r"Name:\s+(/dev/\S+)", out)
if m:
info["device"] = m.group(1)
# Space Used in GB (e.g. "Space Used: 7.88 GB")
m = re.search(r"Space Used:\s+([\d.]+)\s*GB", out)
if m:
info["used_gb"] = float(m.group(1))
return info
# ── Mount ──────────────────────────────────────────────────────────────────────
def find_mount(device: str) -> str | None:
"""Return the mount point for the optical disc."""
for line in run(["mount"]).splitlines():
if device in line:
m = re.search(r" on (/Volumes/[^\s(]+)", line)
if m:
return m.group(1)
# Fallback: first non-system entry in /Volumes/
try:
volumes = set(os.listdir("/Volumes/")) - IGNORE_VOLUMES
if volumes:
return f"/Volumes/{sorted(volumes)[0]}"
except Exception:
pass
return None
# ── Media type from disc capacity ──────────────────────────────────────────────
def mediatype_from_size(used_gb: float) -> tuple[int, str]:
"""
Infer videoDB mediatype from used disc capacity.
Blu-ray discs hold 25/50 GB; DVDs hold ~4.7/8.5 GB; CDs ~0.7 GB.
"""
if used_gb > 8.0:
return MT_BLURAY, "Blu-ray"
if used_gb > 0.68:
return MT_DVD, "DVD"
return MT_CD, "CD"
# ── Media type inference ───────────────────────────────────────────────────────
def mediatype_from_drutil(drutil_type: str) -> tuple[int, str] | None:
"""Parse drutil type string if available."""
t = drutil_type.upper()
if "BD" in t:
return MT_BLURAY, "Blu-ray"
@@ -129,36 +112,49 @@ def mediatype_from_drutil(drutil_type: str) -> tuple[int, str] | None:
return MT_CD, "CD"
return None
def mediatype_from_size(used_gb: float) -> tuple[int, str]:
if used_gb > 8.0:
return MT_BLURAY, "Blu-ray"
if used_gb > 0.68:
return MT_DVD, "DVD"
return MT_CD, "CD"
# ── File listing ───────────────────────────────────────────────────────────────
def list_video_files(mount: str) -> list[str]:
"""Return relative paths of all video files on the disc."""
found = []
# ── Disc inventory ─────────────────────────────────────────────────────────────
def inventory_disc(mount: str) -> dict:
"""
Walk the disc and return:
video_files: list of relative paths to video files
photo_folders: dict of { relative_folder_path -> [photo filenames] }
"""
video_files: list[str] = []
# folder path (relative to mount) -> list of photo filenames in that folder
photo_folders: dict[str, list[str]] = defaultdict(list)
try:
for root, _dirs, files in os.walk(mount):
for f in files:
if Path(f).suffix.lower() in VIDEO_EXT:
rel = os.path.relpath(os.path.join(root, f), mount)
found.append(rel)
for fname in files:
ext = Path(fname).suffix.lower()
rel_path = os.path.relpath(os.path.join(root, fname), mount)
rel_folder = os.path.relpath(root, mount)
if rel_folder == ".":
rel_folder = "(root)"
if ext in VIDEO_EXT:
video_files.append(rel_path)
elif ext in PHOTO_EXT:
photo_folders[rel_folder].append(fname)
except PermissionError:
pass
return sorted(found)
def all_files_count(mount: str) -> int:
"""Count every file on the disc (for discs with no video files)."""
count = 0
try:
for _root, _dirs, files in os.walk(mount):
count += len(files)
except PermissionError:
pass
return count
return {
"video_files": sorted(video_files),
"photo_folders": dict(photo_folders),
}
def disc_size_bytes(mount: str) -> int:
"""Used space in bytes via df."""
out = run(["df", "-k", mount])
for line in out.splitlines()[1:]:
parts = line.split()
@@ -170,8 +166,6 @@ def disc_size_bytes(mount: str) -> int:
return 0
# ── Volume label ───────────────────────────────────────────────────────────────
def volume_label(mount: str | None, device: str) -> str:
if mount:
label = os.path.basename(mount)
@@ -213,13 +207,78 @@ def submit(payload: dict) -> bool:
return False
except requests.ConnectionError:
print(f" [ERR] Cannot reach {API_URL}")
print(f" Check VIDEODB_URL and that the server is up.")
return False
except Exception as e:
print(f" [ERR] {e}")
return False
# ── Record builders ────────────────────────────────────────────────────────────
def build_video_record(disc_label: str, disklabel: str, video_files: list[str],
mediatype_id: int, drutil_type: str, size_b: int) -> dict:
names = [Path(f).name for f in video_files]
comment = f"{len(names)} video file{'s' if len(names) != 1 else ''}"
plot = "\n".join(names) # full list in the TEXT field — no length limit
# Short preview for comment (VARCHAR 255)
preview = ", ".join(names[:8])
if len(names) > 8:
preview += f" … +{len(names) - 8} more"
comment = f"{len(names)} video files: {preview}"
return {
"title": disc_label,
"subtitle": "",
"mediatype": mediatype_id,
"comment": comment[:255],
"plot": plot,
"filesize": size_b,
"disklabel": disklabel,
"custom1": drutil_type,
"custom2": str(len(names)),
"custom3": "video",
}
def build_photo_records(disc_label: str, disklabel: str, photo_folders: dict[str, list[str]],
mediatype_id: int, drutil_type: str, size_b: int) -> list[dict]:
"""One record per gallery folder."""
records = []
for folder, photos in photo_folders.items():
photos_sorted = sorted(photos)
count = len(photos_sorted)
# Title: disc label + folder name (skip "(root)" clutter if only one folder)
if folder == "(root)" and len(photo_folders) == 1:
title = disc_label
else:
folder_display = folder.replace("/", " / ")
title = f"{disc_label}{folder_display}"
# Comment: short summary
preview = ", ".join(photos_sorted[:6])
if count > 6:
preview += f" … +{count - 6} more"
comment = f"{count} photo{'s' if count != 1 else ''}: {preview}"
# Plot: full filename list — searchable via videoDB full-text search
plot = "\n".join(photos_sorted)
records.append({
"title": title[:255],
"subtitle": folder if folder != "(root)" else "",
"mediatype": mediatype_id,
"comment": comment[:255],
"plot": plot,
"filesize": 0, # folder-level size not easily available
"disklabel": disklabel,
"custom1": drutil_type,
"custom2": str(count),
"custom3": "photo",
})
return records
# ── Main scan routine ──────────────────────────────────────────────────────────
def scan_and_submit():
@@ -227,7 +286,7 @@ def scan_and_submit():
if not status:
return False
drutil_type = status.get("drutil_type", "")
drutil_type = status.get("drutil_type", "Unknown")
device = status.get("device", "")
used_gb = status.get("used_gb", 0.0)
@@ -235,55 +294,87 @@ def scan_and_submit():
print(f" device : {device}")
print(f" used : {used_gb:.2f} GB")
# Give macOS a moment to finish mounting the filesystem
time.sleep(4)
time.sleep(4) # let macOS finish mounting
mount = find_mount(device)
print(f" mount : {mount or '(not mounted)'}")
# Determine media type — prefer drutil string, fall back to size
mt = mediatype_from_drutil(drutil_type)
if mt:
mt = mediatype_from_drutil(drutil_type) or mediatype_from_size(used_gb)
mediatype_id, mediatype_name = mt
else:
mediatype_id, mediatype_name = mediatype_from_size(used_gb)
disc_label = volume_label(mount, device)
disklabel = disc_label[:32]
title = volume_label(mount, device)
size_b = disc_size_bytes(mount) if mount else int(used_gb * 1024**3)
if not mount:
print(" [WARN] Disc not mounted — submitting with disc label only")
submit({
"title": disc_label,
"subtitle": "",
"mediatype": mediatype_id,
"comment": f"Disc not mounted ({drutil_type})",
"plot": "",
"filesize": int(used_gb * 1024**3),
"disklabel": disklabel,
"custom1": drutil_type,
"custom2": "0",
"custom3": "unknown",
})
eject(None)
return True
# Build file listing
video_files = list_video_files(mount) if mount else []
total_files = all_files_count(mount) if mount else 0
size_b = disc_size_bytes(mount)
print(f" Scanning contents...")
inv = inventory_disc(mount)
video_files = inv["video_files"]
photo_folders = inv["photo_folders"]
n_videos = len(video_files)
n_photos = sum(len(v) for v in photo_folders.values())
n_galleries = len(photo_folders)
print(f" Found: {n_videos} video files, {n_photos} photos in {n_galleries} folder(s)")
records = []
if video_files:
# List video file names (not full paths) for the comment field
names = [Path(f).name for f in video_files]
summary = f"{len(video_files)} video files: " + ", ".join(names[:10])
if len(names) > 10:
summary += f" … +{len(names) - 10} more"
# Store full file list in custom2 (255 char limit — truncate gracefully)
file_detail = "\n".join(video_files)
else:
summary = f"{total_files} files (no video files detected)"
file_detail = ""
records.append(build_video_record(
disc_label, disklabel, video_files, mediatype_id, drutil_type, size_b
))
payload = {
"title": title,
if photo_folders:
records += build_photo_records(
disc_label, disklabel, photo_folders, mediatype_id, drutil_type, size_b
)
if not records:
# Disc has neither video nor photo files — index with raw file count
total = sum(
len(files)
for _, _, files in os.walk(mount)
)
records.append({
"title": disc_label,
"subtitle": "",
"mediatype": mediatype_id,
"comment": summary[:255],
"comment": f"{total} files (no video or photo files detected)",
"plot": "",
"filesize": size_b,
"disklabel": title[:32],
"custom1": drutil_type[:255],
"custom2": str(len(video_files) or total_files),
}
"disklabel": disklabel,
"custom1": drutil_type,
"custom2": str(total),
"custom3": "data",
})
print(f"\n [{mediatype_name}] \"{title}\"")
print(f" {summary[:120]}")
ok = submit(payload)
print(f"\n Submitting {len(records)} record(s)...")
ok_count = 0
for rec in records:
if submit(rec):
ok_count += 1
print(f" {ok_count}/{len(records)} records submitted.")
print(" Ejecting disc...")
eject(mount)
return ok
return ok_count > 0
# ── Entry point ────────────────────────────────────────────────────────────────

View File

@@ -62,19 +62,22 @@ if (!$dbh) {
}
// ── Sanitize inputs ───────────────────────────────────────────────────────────
$title = mysqli_real_escape_string($dbh, substr($data['title'], 0, 255));
$mediatype = (int)($data['mediatype'] ?? 1); // 1=DVD, 16=Blu-ray, 18=CD
$title = mysqli_real_escape_string($dbh, substr($data['title'] ?? '', 0, 255));
$subtitle = mysqli_real_escape_string($dbh, substr($data['subtitle'] ?? '', 0, 255)); // gallery folder path
$mediatype = (int)($data['mediatype'] ?? 1);
$comment = mysqli_real_escape_string($dbh, substr($data['comment'] ?? '', 0, 255));
$plot = mysqli_real_escape_string($dbh, $data['plot'] ?? ''); // full file listing (TEXT, no limit)
$filesize = (int)($data['filesize'] ?? 0);
$custom1 = mysqli_real_escape_string($dbh, substr($data['custom1'] ?? '', 0, 255)); // raw drutil type
$custom2 = mysqli_real_escape_string($dbh, substr($data['custom2'] ?? '', 0, 255)); // track count / file count
$custom1 = mysqli_real_escape_string($dbh, substr($data['custom1'] ?? '', 0, 255)); // disc type string
$custom2 = mysqli_real_escape_string($dbh, substr($data['custom2'] ?? '', 0, 255)); // file/photo count
$custom3 = mysqli_real_escape_string($dbh, substr($data['custom3'] ?? '', 0, 255)); // content type: video|photo|mixed
$disklabel = mysqli_real_escape_string($dbh, substr($data['disklabel'] ?? '', 0, 32));
// ── Insert ────────────────────────────────────────────────────────────────────
$sql = "INSERT INTO " . TBL_DATA . "
(title, mediatype, comment, filesize, disklabel, custom1, custom2, created, owner_id)
(title, subtitle, mediatype, comment, plot, filesize, disklabel, custom1, custom2, custom3, created, owner_id)
VALUES
('$title', $mediatype, '$comment', $filesize, '$disklabel', '$custom1', '$custom2', NOW(), 1)";
('$title', '$subtitle', $mediatype, '$comment', '$plot', $filesize, '$disklabel', '$custom1', '$custom2', '$custom3', NOW(), 1)";
if (mysqli_query($dbh, $sql)) {
$id = (int)mysqli_insert_id($dbh);