Improve multi-shot phase retune
This commit is contained in:
+1
-2
File diff suppressed because one or more lines are too long
+9
-9
File diff suppressed because one or more lines are too long
@@ -36,6 +36,10 @@ Was du bekommst sind zwei Dateien, mit denen du arbeitest:
|
||||
5. Bei `MAN.`-Beats selbst die passende Stelle im Spielfilm suchen — die
|
||||
Beschreibung im Report sagt dir was du suchst.
|
||||
|
||||
Für die visuelle Kontrolle ist zusätzlich **`CUTTER_REPORT.html`** relevant:
|
||||
er enthält die frame-locked Compare-Clips. Der alte `match_report.html` ist
|
||||
nicht mehr Teil des Workflows.
|
||||
|
||||
Alles andere unten ist Hintergrund für den Tool-Verantwortlichen.
|
||||
|
||||
---
|
||||
@@ -48,7 +52,7 @@ Alles andere unten ist Hintergrund für den Tool-Verantwortlichen.
|
||||
| **1** | Schneller Vibe-Check: für jeden Beat die Top-K ähnlichsten Szenen aus dem Spielfilm vorauswählen (Histogramm + pHash). |
|
||||
| **2** | Optional: Vision-LLM beschreibt unsichere Szenen mit 3-Frame-Samples; die Beschreibungen liegen gecached vor. |
|
||||
| **3** | Frame-genaue Verfeinerung pro Beat (OpenCV-Templatematching, Bewegungsphasen-Vergleich). |
|
||||
| **4** | Phasen-Reparatur: bei segmentierten Beats wird die Bewegungsphase im Source mit der sichtbaren Trailerphase abgeglichen. |
|
||||
| **4** | Phasen-Reparatur: bei segmentierten Beats wird die Bewegungsphase im Source saliency-gewichtet mit der sichtbaren Trailerphase abgeglichen. |
|
||||
| **5** | Recovery: Beats ohne Treffer werden via Vision-Phasensuche in den Top-K Szenen nochmal probiert. |
|
||||
| **6** | Export als FCPXML 1.10 oder CMX-3600-EDL plus `CUTTER_REPORT.md`. |
|
||||
|
||||
@@ -56,6 +60,10 @@ Alles andere unten ist Hintergrund für den Tool-Verantwortlichen.
|
||||
Vergleich ausgeblendet, damit Title-Cards, Logos und Letterbox die Treffer
|
||||
nicht verfälschen.
|
||||
|
||||
**Cutter-Report-Caching:** Vorhandene Compare-Clips werden wiederverwendet.
|
||||
Bei gezielten Rematches wird nur der betroffene Beat neu gerendert, damit der
|
||||
Report schnell aktuell bleibt und keine unnötigen Videoartefakte neu entstehen.
|
||||
|
||||
**Wichtig:** Auch wenn Vision aktiviert ist — der finale Match bleibt
|
||||
CV-verifiziert. Das LLM liefert nur zusätzliche Suchanker.
|
||||
|
||||
|
||||
@@ -131,7 +131,7 @@ def _auto_commit_push_reports(project_root: "Path") -> None: # type: ignore[nam
|
||||
log.warning("Auto-commit/push failed (non-fatal): %s", exc)
|
||||
|
||||
|
||||
def _regenerate_cutter_report(cfg: "AppConfig") -> None: # type: ignore[name-defined]
|
||||
def _regenerate_cutter_report(cfg: "AppConfig", force_beats: set[int] | None = None) -> None: # type: ignore[name-defined]
|
||||
"""Re-render CUTTER_REPORT.{md,html} with Frame-Locked Compare clips.
|
||||
|
||||
Called from every match-style command after the cache is written so all
|
||||
@@ -141,8 +141,19 @@ def _regenerate_cutter_report(cfg: "AppConfig") -> None: # type: ignore[name-de
|
||||
"""
|
||||
project_root = cfg.paths.cache_dir.parent
|
||||
try:
|
||||
import os
|
||||
from scripts.generate_cutter_report import render_report
|
||||
md, html = render_report(project_root, with_stills=True, with_clips=True)
|
||||
old_force = os.environ.get("CUTTER_REPORT_FORCE_BEATS")
|
||||
try:
|
||||
if force_beats:
|
||||
os.environ["CUTTER_REPORT_FORCE_BEATS"] = ",".join(str(b) for b in sorted(force_beats))
|
||||
md, html = render_report(project_root, with_stills=True, with_clips=True)
|
||||
finally:
|
||||
if force_beats:
|
||||
if old_force is None:
|
||||
os.environ.pop("CUTTER_REPORT_FORCE_BEATS", None)
|
||||
else:
|
||||
os.environ["CUTTER_REPORT_FORCE_BEATS"] = old_force
|
||||
(project_root / "CUTTER_REPORT.md").write_text(md, encoding="utf-8")
|
||||
(project_root / "CUTTER_REPORT.html").write_text(html, encoding="utf-8")
|
||||
|
||||
@@ -293,6 +304,8 @@ def _normalize_cached_results(beats: list, results: list, cfg) -> list:
|
||||
segment,
|
||||
in_point_s=in_point_s,
|
||||
out_point_s=in_point_s + float(segment.duration_s),
|
||||
match_score=max(float(segment.match_score), float(_phase_score)),
|
||||
is_confirmed=float(_phase_score) >= cfg.cv.deep_scan.match_threshold,
|
||||
)
|
||||
repaired_segments.append(segment)
|
||||
|
||||
@@ -1430,6 +1443,8 @@ def _attach_visual_segments(results: list, beats: list, cfg) -> list:
|
||||
seg,
|
||||
in_point_s=in_point_s,
|
||||
out_point_s=in_point_s + seg.duration_s,
|
||||
match_score=max(seg.match_score, _phase_score),
|
||||
is_confirmed=_phase_score >= cfg.cv.deep_scan.match_threshold,
|
||||
)
|
||||
else:
|
||||
seg = repaired
|
||||
@@ -1693,27 +1708,34 @@ def _local_same_scene_segment_match(segment_beat, beat, segment_offset_s: float,
|
||||
|
||||
|
||||
def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: float, cfg):
|
||||
"""Retune a weak multi-shot segment inside its own scene using cheap frame features."""
|
||||
"""Retune a weak multi-shot segment inside its own scene using saliency-weighted frames."""
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
offsets = [0.0, 0.28, 0.56, 0.84, 1.12]
|
||||
offsets = [0.0, 0.16, 0.32, 0.48, 0.64, 0.80, 0.96, 1.12]
|
||||
size = (160, 90)
|
||||
|
||||
def feature(frame):
|
||||
def prepared_gray(frame):
|
||||
if frame is None:
|
||||
return None
|
||||
h, w = frame.shape[:2]
|
||||
frame = frame.copy()
|
||||
frame[: int(h * 0.16), : int(w * 0.28)] = 0
|
||||
# Timecode overlays and letterbox edges are trailer/source-specific and
|
||||
# should not pull the phase toward the wrong moment.
|
||||
frame[: int(h * 0.16), : int(w * 0.32)] = 0
|
||||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||
gray = cv2.resize(gray, size)
|
||||
edges = cv2.Canny(gray, 40, 120)
|
||||
vec = np.concatenate([
|
||||
gray.reshape(-1).astype("float32") / 255.0,
|
||||
edges.reshape(-1).astype("float32") / 255.0,
|
||||
])
|
||||
return (vec - vec.mean()) / (vec.std() + 1e-6)
|
||||
return cv2.equalizeHist(gray).astype("float32") / 255.0
|
||||
|
||||
def edge(gray):
|
||||
return cv2.Canny((gray * 255).astype("uint8"), 45, 130).astype("float32") / 255.0
|
||||
|
||||
def pair_score(ref_gray, src_gray, mask):
|
||||
if ref_gray is None or src_gray is None:
|
||||
return None
|
||||
pixel = 1.0 - float((np.abs(ref_gray - src_gray) * mask).sum())
|
||||
edge_score = 1.0 - float((np.abs(edge(ref_gray) - edge(src_gray)) * mask).sum())
|
||||
return 0.65 * pixel + 0.35 * edge_score
|
||||
|
||||
def frame_at(cap, t_s):
|
||||
cap.set(cv2.CAP_PROP_POS_MSEC, t_s * 1000.0)
|
||||
@@ -1722,39 +1744,69 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
|
||||
|
||||
trailer_cap = cv2.VideoCapture(str(cfg.paths.reference_trailer))
|
||||
refs = [
|
||||
feature(frame_at(trailer_cap, segment_beat.start_s + offset))
|
||||
prepared_gray(frame_at(trailer_cap, segment_beat.start_s + offset))
|
||||
for offset in offsets
|
||||
if offset <= segment_beat.duration_s + 0.04
|
||||
]
|
||||
refs = [ref for ref in refs if ref is not None]
|
||||
if len(refs) < 3:
|
||||
if len(refs) < 4:
|
||||
return None
|
||||
|
||||
ref_stack = np.stack(refs, axis=0)
|
||||
edge_stack = np.stack([edge(ref) for ref in refs], axis=0)
|
||||
saliency = ref_stack.std(axis=0) * 1.25 + edge_stack.mean(axis=0) * 0.75
|
||||
saliency[:, : int(size[0] * 0.12)] *= 0.15
|
||||
saliency[: int(size[1] * 0.16), : int(size[0] * 0.32)] = 0.0
|
||||
threshold = np.quantile(saliency, 0.72)
|
||||
mask = (saliency >= threshold).astype("float32")
|
||||
mask /= mask.sum() + 1e-6
|
||||
|
||||
scene_start = float(scene["start_s"])
|
||||
scene_end = float(scene["end_s"])
|
||||
scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s))
|
||||
max_points = 96
|
||||
max_points = 400
|
||||
step_s = max(0.08, (scan_end - scene_start) / max_points)
|
||||
|
||||
source_cap = cv2.VideoCapture(str(cfg.paths.source_movie))
|
||||
source_fps = source_cap.get(cv2.CAP_PROP_FPS) or _scene_fps_light(scene, cfg)
|
||||
stride = max(1, int(round(step_s * source_fps)))
|
||||
start_frame = max(0, int(round(scene_start * source_fps)))
|
||||
end_frame = max(start_frame, int(round(scene_end * source_fps)))
|
||||
times: list[float] = []
|
||||
source_frames: list = []
|
||||
frame_idx = start_frame
|
||||
while frame_idx <= end_frame:
|
||||
source_cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
|
||||
ok, frame = source_cap.read()
|
||||
if not ok:
|
||||
break
|
||||
times.append(frame_idx / source_fps)
|
||||
source_frames.append(prepared_gray(frame))
|
||||
frame_idx += stride
|
||||
|
||||
candidates: list[tuple[float, float, float]] = []
|
||||
t = scene_start
|
||||
while t <= scan_end:
|
||||
for i, t in enumerate(times):
|
||||
if t > scan_end:
|
||||
break
|
||||
vals = []
|
||||
for offset, ref in zip(offsets, refs):
|
||||
src = feature(frame_at(source_cap, t + offset))
|
||||
if src is not None:
|
||||
vals.append(float(np.dot(ref, src) / len(ref)))
|
||||
if len(vals) >= 3:
|
||||
candidates.append((sum(vals) / len(vals), min(vals), t))
|
||||
t = round(t + step_s, 6)
|
||||
j = int(round((t + offset - scene_start) / step_s))
|
||||
if 0 <= j < len(source_frames):
|
||||
score = pair_score(ref, source_frames[j], mask)
|
||||
else:
|
||||
score = None
|
||||
if score is not None:
|
||||
vals.append(score)
|
||||
if len(vals) >= 4:
|
||||
avg_score = sum(vals) / len(vals)
|
||||
candidates.append((0.55 * avg_score + 0.45 * min(vals), min(vals), t))
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
candidates.sort(reverse=True)
|
||||
best_score = candidates[0][0]
|
||||
near_tie = [c for c in candidates if c[0] >= best_score - 0.01]
|
||||
near_tie = [c for c in candidates if c[0] >= best_score - 0.002]
|
||||
chosen = min(near_tie, key=lambda c: abs(c[2] - original_in_s))
|
||||
return chosen[2], chosen[0]
|
||||
|
||||
@@ -1858,7 +1910,8 @@ def cmd_match(args: argparse.Namespace, cfg) -> list:
|
||||
results_to_save = results
|
||||
|
||||
_save_results(results_to_save, cfg)
|
||||
_regenerate_cutter_report(cfg)
|
||||
force_report_beats = {int(args.beat)} if getattr(args, "beat", None) is not None else None
|
||||
_regenerate_cutter_report(cfg, force_beats=force_report_beats)
|
||||
|
||||
print(f"\n✅ {len(results)} / {len(beats)} beats matched.")
|
||||
for r in results:
|
||||
|
||||
+11
-2
@@ -148,8 +148,17 @@ Phasenfehler nicht durch Schwarz verdeckt werden.
|
||||
Für Multi-Shot-Beats gilt zusätzlich eine Segment-Schwelle pro sichtbarer
|
||||
Einstellung. Ein gutes erstes Segment darf kein zweites Segment mit schwachem
|
||||
Score mitziehen. Segmente unter `multi_shot_segment_threshold` werden nicht als
|
||||
Source-Material ausgegeben; der entsprechende Beat-Bereich bleibt im
|
||||
Cutter-Report offen, bis ein eigenständig belastbarer Treffer gefunden wird.
|
||||
stabile Wahrheit behandelt, sondern innerhalb derselben plausiblen Source-Scene
|
||||
nachjustiert. Die Nachjustierung nutzt eine saliency-gewichtete Mehrframe-Prüfung:
|
||||
Timecodes und statische Randbereiche werden entwertet, kontrastreiche und über
|
||||
mehrere Trailerframes unterscheidbare Bildbereiche zählen stärker. Dadurch kann
|
||||
eine schwache zweite Einstellung phasengenauer repariert werden, ohne den Fehler
|
||||
durch Schwarzbild zu verdecken oder einen Beat manuell zu kuratieren.
|
||||
|
||||
Der Cutter-Report verwendet Clip-Caching. Bereits vorhandene Compare-Clips werden
|
||||
wiederverwendet; bei gezielten Rematches wird nur der betroffene Beat neu gerendert
|
||||
(`CUTTER_REPORT_FORCE_BEATS`). So bleibt der Report aktuell, ohne alle Beats jedes
|
||||
Mal neu zu kodieren.
|
||||
|
||||
## Vision-Seeds vs. Vollscan
|
||||
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -22,6 +22,7 @@ from __future__ import annotations
|
||||
import argparse
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
@@ -134,6 +135,19 @@ def _run(cmd: list[str], timeout: int = 120) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _forced_beats() -> set[int]:
|
||||
raw = os.environ.get("CUTTER_REPORT_FORCE_BEATS", "")
|
||||
forced: set[int] = set()
|
||||
for part in re.split(r"[,;\s]+", raw):
|
||||
if not part:
|
||||
continue
|
||||
try:
|
||||
forced.add(int(part))
|
||||
except ValueError:
|
||||
continue
|
||||
return forced
|
||||
|
||||
|
||||
def extract_still(video_path: Path, t_s: float, out: Path) -> bool:
|
||||
"""Always render fresh."""
|
||||
if not video_path.exists():
|
||||
@@ -410,6 +424,7 @@ def collect_rows(
|
||||
stills_dir.mkdir(parents=True, exist_ok=True)
|
||||
if with_clips:
|
||||
clips_dir.mkdir(parents=True, exist_ok=True)
|
||||
force_beats = _forced_beats()
|
||||
|
||||
rows: list[BeatRow] = []
|
||||
for beat in beats:
|
||||
@@ -454,13 +469,17 @@ def collect_rows(
|
||||
if with_stills:
|
||||
t_still = beat_still_time(beat["start_s"], beat["end_s"])
|
||||
tjpg = stills_dir / f"beat_{bid:02d}_trailer.jpg"
|
||||
if extract_still(trailer_path, t_still, tjpg):
|
||||
if tjpg.exists() and bid not in force_beats:
|
||||
trailer_still = tjpg
|
||||
elif extract_still(trailer_path, t_still, tjpg):
|
||||
trailer_still = tjpg
|
||||
if rec is not None:
|
||||
src_dur = max(0.04, rec["out_point_s"] - rec["in_point_s"])
|
||||
s_still = rec["in_point_s"] + min(0.4, src_dur * 0.3)
|
||||
sjpg = stills_dir / f"beat_{bid:02d}_source.jpg"
|
||||
if extract_still(source_path, s_still, sjpg):
|
||||
if sjpg.exists() and bid not in force_beats:
|
||||
source_still = sjpg
|
||||
elif extract_still(source_path, s_still, sjpg):
|
||||
source_still = sjpg
|
||||
|
||||
if with_clips:
|
||||
@@ -468,12 +487,16 @@ def collect_rows(
|
||||
|
||||
# Trailer clip (cutter-side, simple)
|
||||
tmp4 = clips_dir / f"beat_{bid:02d}_trailer.mp4"
|
||||
if extract_clip(trailer_path, beat["start_s"], beat_dur, tmp4):
|
||||
if tmp4.exists() and bid not in force_beats:
|
||||
trailer_clip = tmp4
|
||||
elif extract_clip(trailer_path, beat["start_s"], beat_dur, tmp4):
|
||||
trailer_clip = tmp4
|
||||
|
||||
if rec is not None:
|
||||
smp4 = clips_dir / f"beat_{bid:02d}_source.mp4"
|
||||
if num_segs >= 2:
|
||||
if smp4.exists() and bid not in force_beats:
|
||||
source_clip = smp4
|
||||
elif num_segs >= 2:
|
||||
seg_specs = [
|
||||
(float(s["in_point_s"]),
|
||||
max(0.04, float(s["out_point_s"]) - float(s["in_point_s"])))
|
||||
@@ -502,7 +525,9 @@ def collect_rows(
|
||||
"match_score": rec.get("match_score", 0.0),
|
||||
"is_confirmed": rec.get("is_confirmed", False),
|
||||
}]
|
||||
if build_compare_clip(
|
||||
if cmp4.exists() and bid not in force_beats:
|
||||
compare_clip = cmp4
|
||||
elif build_compare_clip(
|
||||
trailer_path, beat["start_s"], beat_dur,
|
||||
source_path, compare_segs,
|
||||
cmp4,
|
||||
|
||||
Reference in New Issue
Block a user