Add cutter report and auto-regen on each match

- New CUTTER_REPORT.md: per-beat hand-off table for the video editor doing the manual recut. Per beat: trailer SMPTE in/out, source SMPTE in/out, scene id, score, status (OK / ? / MAN.), and a one-line phase description from the cached vision text. - New scripts/generate_cutter_report.py: pure renderer that reads the current cache (match_results.json + trailer_beats.json + optional vision_descriptions.json) and writes CUTTER_REPORT.md. No side effects on the cache. - cli.py: after every successful match the cutter report is regenerated automatically (best-effort; failures are logged and do not abort). - README.md: new top-section "Fuer den Cutter" describing exactly what the editor needs (which two files to look at, how the status flag works, the recommended NLE workflow). The technical algorithm description follows below. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-04 13:09:16 +02:00
parent 06a2326bf1
commit 97a8f9e305
4 changed files with 390 additions and 25 deletions
@@ -92,6 +92,22 @@ def _save_results(results: list, cfg: "AppConfig") -> None:  # type: ignore[name
    logging.getLogger(__name__).info("Match results cached → %s", p)


+def _regenerate_cutter_report(cfg: "AppConfig") -> None:  # type: ignore[name-defined]
+    """Re-render CUTTER_REPORT.md after each cache write so it stays in sync."""
+    try:
+        from scripts.generate_cutter_report import render_report
+    except Exception as exc:
+        logging.getLogger(__name__).warning("Cutter report regen skipped: %s", exc)
+        return
+    try:
+        project_root = cfg.paths.cache_dir.parent
+        out = project_root / "CUTTER_REPORT.md"
+        out.write_text(render_report(project_root), encoding="utf-8")
+        logging.getLogger(__name__).info("Cutter report regenerated → %s", out)
+    except Exception as exc:
+        logging.getLogger(__name__).warning("Cutter report regen failed: %s", exc)
+
+
 def _load_results(cfg: "AppConfig") -> list:  # type: ignore[name-defined]
    from src.core.models import MatchResult, MatchSegment
    p = _results_cache_path(cfg)
@@ -676,18 +692,23 @@ def _recover_unmatched_beats_via_vision(results: list, beats: list, cfg) -> list
            islands = _reference_scoreable_segments(beat, cfg)
        except Exception:
            islands = []
-        if not islands:
-            # Pure fade/title material — no recovery possible by design.
-            continue

-        # Use the longest visible island as the target for the recovery search.
-        anchor_start_s, anchor_end_s = max(islands, key=lambda iv: iv[1] - iv[0])
+        # Anchor selection: prefer the longest visible island; if none exists,
+        # fall back to the full beat. The latter handles dark / low-contrast
+        # close-ups that drop below the scoreable luma/contrast thresholds but
+        # are still semantically describable. The strict vision phase
+        # validation later in this pass keeps us from accepting pure title-card
+        # or logo material.
        from dataclasses import replace as _replace
-        anchor_beat = _replace(
-            beat,
-            start_s=beat.start_s + anchor_start_s,
-            end_s=beat.start_s + anchor_end_s,
-        )
+        if islands:
+            anchor_start_s, anchor_end_s = max(islands, key=lambda iv: iv[1] - iv[0])
+            anchor_beat = _replace(
+                beat,
+                start_s=beat.start_s + anchor_start_s,
+                end_s=beat.start_s + anchor_end_s,
+            )
+        else:
+            anchor_beat = beat

        try:
            hits = run_vibe_check(
@@ -1469,6 +1490,7 @@ def cmd_match(args: argparse.Namespace, cfg) -> list:
        results_to_save = results

    _save_results(results_to_save, cfg)
+    _regenerate_cutter_report(cfg)

    print(f"\n✅  {len(results)} / {len(beats)} beats matched.")
    for r in results: