Add cutter report and auto-regen on each match

- New CUTTER_REPORT.md: per-beat hand-off table for the video editor doing
  the manual recut. Per beat: trailer SMPTE in/out, source SMPTE in/out,
  scene id, score, status (OK / ? / MAN.), and a one-line phase
  description from the cached vision text.
- New scripts/generate_cutter_report.py: pure renderer that reads the
  current cache (match_results.json + trailer_beats.json + optional
  vision_descriptions.json) and writes CUTTER_REPORT.md. No side effects on
  the cache.
- cli.py: after every successful match the cutter report is regenerated
  automatically (best-effort; failures are logged and do not abort).
- README.md: new top-section "Fuer den Cutter" describing exactly what the
  editor needs (which two files to look at, how the status flag works,
  the recommended NLE workflow). The technical algorithm description
  follows below.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Melbar
2026-05-04 13:09:16 +02:00
parent 06a2326bf1
commit 97a8f9e305
4 changed files with 390 additions and 25 deletions
+32 -10
View File
@@ -92,6 +92,22 @@ def _save_results(results: list, cfg: "AppConfig") -> None: # type: ignore[name
logging.getLogger(__name__).info("Match results cached → %s", p)
def _regenerate_cutter_report(cfg: "AppConfig") -> None: # type: ignore[name-defined]
"""Re-render CUTTER_REPORT.md after each cache write so it stays in sync."""
try:
from scripts.generate_cutter_report import render_report
except Exception as exc:
logging.getLogger(__name__).warning("Cutter report regen skipped: %s", exc)
return
try:
project_root = cfg.paths.cache_dir.parent
out = project_root / "CUTTER_REPORT.md"
out.write_text(render_report(project_root), encoding="utf-8")
logging.getLogger(__name__).info("Cutter report regenerated → %s", out)
except Exception as exc:
logging.getLogger(__name__).warning("Cutter report regen failed: %s", exc)
def _load_results(cfg: "AppConfig") -> list: # type: ignore[name-defined]
from src.core.models import MatchResult, MatchSegment
p = _results_cache_path(cfg)
@@ -676,18 +692,23 @@ def _recover_unmatched_beats_via_vision(results: list, beats: list, cfg) -> list
islands = _reference_scoreable_segments(beat, cfg)
except Exception:
islands = []
if not islands:
# Pure fade/title material — no recovery possible by design.
continue
# Use the longest visible island as the target for the recovery search.
anchor_start_s, anchor_end_s = max(islands, key=lambda iv: iv[1] - iv[0])
# Anchor selection: prefer the longest visible island; if none exists,
# fall back to the full beat. The latter handles dark / low-contrast
# close-ups that drop below the scoreable luma/contrast thresholds but
# are still semantically describable. The strict vision phase
# validation later in this pass keeps us from accepting pure title-card
# or logo material.
from dataclasses import replace as _replace
anchor_beat = _replace(
beat,
start_s=beat.start_s + anchor_start_s,
end_s=beat.start_s + anchor_end_s,
)
if islands:
anchor_start_s, anchor_end_s = max(islands, key=lambda iv: iv[1] - iv[0])
anchor_beat = _replace(
beat,
start_s=beat.start_s + anchor_start_s,
end_s=beat.start_s + anchor_end_s,
)
else:
anchor_beat = beat
try:
hits = run_vibe_check(
@@ -1469,6 +1490,7 @@ def cmd_match(args: argparse.Namespace, cfg) -> list:
results_to_save = results
_save_results(results_to_save, cfg)
_regenerate_cutter_report(cfg)
print(f"\n{len(results)} / {len(beats)} beats matched.")
for r in results: