Retune weak multi-shot segment phases

This commit is contained in:
Melbar
2026-05-09 04:45:56 +02:00
parent fab6c53698
commit a275b2efb6
8 changed files with 178 additions and 11 deletions
+1 -1
View File
File diff suppressed because one or more lines are too long
+2 -2
View File
@@ -277,9 +277,9 @@ Trailer-TC in Trailer-Framerate, Source-TC in Source-Framerate.
### Beat 15 — ? / Vorläufig ### Beat 15 — ? / Vorläufig
- **Trailer**: 00:00:57:02 00:01:01:12 (4.40 s) - **Trailer**: 00:00:57:02 00:01:01:12 (4.40 s)
- **Source** : 00:02:10:08 00:02:13:03 (scenes 17, 309 (2 Segmente), score 0.650) - **Source** : 00:02:10:08 00:02:13:03 (scenes 17, 309 (2 Segmente), score 0.621)
- Seg 1: TC 00:02:10:08 dur 2.80s @ Trailer-Offset 0.24s (scene 17) - Seg 1: TC 00:02:10:08 dur 2.80s @ Trailer-Offset 0.24s (scene 17)
- Seg 2: TC 00:43:38:19 dur 1.28s @ Trailer-Offset 3.04s (scene 309) - Seg 2: TC 00:45:27:10 dur 1.28s @ Trailer-Offset 3.04s (scene 309, retuned phase)
- **Rematch**: `python cli.py rematch --beat 15` - **Rematch**: `python cli.py rematch --beat 15`
- **Phase**: static conversation - **Phase**: static conversation
- **Bild**: medium shot, profile view of two characters facing each other, indoor room with a large window overlooking a snowy forest - **Bild**: medium shot, profile view of two characters facing each other, indoor room with a large window overlooking a snowy forest
+156 -6
View File
@@ -270,9 +270,38 @@ def _normalize_cached_results(beats: list, results: list, cfg) -> list:
for result in results: for result in results:
beat = beats_by_id.get(result.beat_id) beat = beats_by_id.get(result.beat_id)
if getattr(result, "segments", ()): if getattr(result, "segments", ()):
segment_duration = sum(max(0.0, float(s.duration_s)) for s in result.segments) segment_threshold = cfg.cv.deep_scan.multi_shot_segment_threshold
repaired_segments = []
for segment in result.segments:
if float(segment.match_score) < segment_threshold:
scene = _scene_by_id_light(scenes, segment.scene_id)
if beat is not None and scene is not None:
segment_beat = replace(
beat,
start_s=beat.start_s + float(segment.trailer_offset_s),
end_s=beat.start_s + float(segment.trailer_offset_s) + float(segment.duration_s),
)
probe = _phase_probe_segment_in_scene(
segment_beat,
scene,
float(segment.in_point_s),
cfg,
)
if probe is not None:
in_point_s, _phase_score = probe
segment = replace(
segment,
in_point_s=in_point_s,
out_point_s=in_point_s + float(segment.duration_s),
)
repaired_segments.append(segment)
valid_segments = tuple(repaired_segments)
if not valid_segments:
continue
segment_duration = sum(max(0.0, float(s.duration_s)) for s in valid_segments)
weighted_score = ( weighted_score = (
sum(max(0.0, float(s.duration_s)) * float(s.match_score) for s in result.segments) sum(max(0.0, float(s.duration_s)) * float(s.match_score) for s in valid_segments)
/ segment_duration / segment_duration
if segment_duration > 0 else result.match_score if segment_duration > 0 else result.match_score
) )
@@ -287,7 +316,15 @@ def _normalize_cached_results(beats: list, results: list, cfg) -> list:
coverage = segment_duration / coverage_target coverage = segment_duration / coverage_target
if coverage < cfg.cv.deep_scan.min_duration_coverage: if coverage < cfg.cv.deep_scan.min_duration_coverage:
continue continue
normalized.append(replace(result, match_score=weighted_score)) first_segment = valid_segments[0]
normalized.append(replace(
result,
scene_id=first_segment.scene_id,
in_point_s=first_segment.in_point_s,
out_point_s=first_segment.out_point_s,
match_score=weighted_score,
segments=valid_segments,
))
continue continue
if result.match_score < cfg.cv.deep_scan.provisional_match_threshold: if result.match_score < cfg.cv.deep_scan.provisional_match_threshold:
@@ -1363,6 +1400,39 @@ def _attach_visual_segments(results: list, beats: list, cfg) -> list:
if not segment_matches: if not segment_matches:
continue continue
seg = segment_matches[0] seg = segment_matches[0]
if seg.match_score < cfg.cv.deep_scan.multi_shot_segment_threshold:
repaired = _local_same_scene_segment_match(
segment_beat,
beat,
start_s,
cached + expanded,
cfg,
)
if (
repaired is None
or repaired.match_score
< max(
cfg.cv.deep_scan.multi_shot_segment_threshold,
seg.match_score + cfg.cv.deep_scan.duration_tie_break_score_delta,
)
):
scenes = _load_scene_cache_light(cfg)
scene = _scene_by_id_light(scenes, seg.scene_id)
probe = (
_phase_probe_segment_in_scene(segment_beat, scene, seg.in_point_s, cfg)
if scene is not None else None
)
if probe is None:
continue
in_point_s, _phase_score = probe
from dataclasses import replace as _replace
seg = _replace(
seg,
in_point_s=in_point_s,
out_point_s=in_point_s + seg.duration_s,
)
else:
seg = repaired
seg_dur = min(max(0.0, end_s - start_s), max(0.0, seg.duration_s)) seg_dur = min(max(0.0, end_s - start_s), max(0.0, seg.duration_s))
segments.append( segments.append(
MatchSegment( MatchSegment(
@@ -1484,7 +1554,10 @@ def _match_unmatched_visual_segments(
if recovered: if recovered:
rec = recovered[0] rec = recovered[0]
seg_dur = min(max(0.0, end_s - start_s), max(0.0, rec.duration_s)) seg_dur = min(max(0.0, end_s - start_s), max(0.0, rec.duration_s))
if seg_dur > 0: if (
seg_dur > 0
and rec.match_score >= cfg.cv.deep_scan.multi_shot_segment_threshold
):
segments.append(MatchSegment( segments.append(MatchSegment(
trailer_offset_s=start_s, trailer_offset_s=start_s,
duration_s=seg_dur, duration_s=seg_dur,
@@ -1506,6 +1579,8 @@ def _match_unmatched_visual_segments(
segments.append(local_segment) segments.append(local_segment)
continue continue
seg = segment_matches[0] seg = segment_matches[0]
if seg.match_score < cfg.cv.deep_scan.multi_shot_segment_threshold:
continue
seg_dur = min(max(0.0, end_s - start_s), max(0.0, seg.duration_s)) seg_dur = min(max(0.0, end_s - start_s), max(0.0, seg.duration_s))
segments.append( segments.append(
MatchSegment( MatchSegment(
@@ -1577,7 +1652,13 @@ def _local_same_scene_segment_match(segment_beat, beat, segment_offset_s: float,
cfg.cv.deep_scan.provisional_content_threshold * 0.70, cfg.cv.deep_scan.provisional_content_threshold * 0.70,
cfg.cv.deep_scan.provisional_match_threshold, cfg.cv.deep_scan.provisional_match_threshold,
) )
step_s = max(1.0 / cfg.export.edl_frame_rate, 0.04) # Coarse repair scan over already plausible neighbouring scenes. A frame-step
# sweep across long dialogue scenes is slow and can overfit static layouts.
step_s = max(
cfg.vision.local_scan_step_s,
cfg.cv.deep_scan.content_align_sample_step_s,
0.25,
)
best: tuple[float, float, int] | None = None best: tuple[float, float, int] | None = None
with open_video(cfg.paths.source_movie) as cap: with open_video(cfg.paths.source_movie) as cap:
for scene_id in scene_ids: for scene_id in scene_ids:
@@ -1586,12 +1667,14 @@ def _local_same_scene_segment_match(segment_beat, beat, segment_offset_s: float,
continue continue
start_s = max(0.0, float(scene["start_s"]) - 0.25) start_s = max(0.0, float(scene["start_s"]) - 0.25)
end_s = max(start_s, float(scene["end_s"]) - max(0.04, segment_beat.duration_s) + 0.25) end_s = max(start_s, float(scene["end_s"]) - max(0.04, segment_beat.duration_s) + 0.25)
max_points = max(4, min(48, int(cfg.vision.local_scan_max_points_per_scene)))
scene_step_s = max(step_s, (end_s - start_s) / max_points)
t = start_s t = start_s
while t <= end_s: while t <= end_s:
score = _content_alignment_score(cap, t, templates, cfg) score = _content_alignment_score(cap, t, templates, cfg)
if best is None or score > best[0]: if best is None or score > best[0]:
best = (score, t, int(scene_id)) best = (score, t, int(scene_id))
t = round(t + step_s, 6) t = round(t + scene_step_s, 6)
if best is None or best[0] < min_score: if best is None or best[0] < min_score:
return None return None
@@ -1609,6 +1692,73 @@ def _local_same_scene_segment_match(segment_beat, beat, segment_offset_s: float,
) )
def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: float, cfg):
"""Retune a weak multi-shot segment inside its own scene using cheap frame features."""
import cv2
import numpy as np
offsets = [0.0, 0.28, 0.56, 0.84, 1.12]
size = (160, 90)
def feature(frame):
if frame is None:
return None
h, w = frame.shape[:2]
frame = frame.copy()
frame[: int(h * 0.16), : int(w * 0.28)] = 0
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray = cv2.resize(gray, size)
edges = cv2.Canny(gray, 40, 120)
vec = np.concatenate([
gray.reshape(-1).astype("float32") / 255.0,
edges.reshape(-1).astype("float32") / 255.0,
])
return (vec - vec.mean()) / (vec.std() + 1e-6)
def frame_at(cap, t_s):
cap.set(cv2.CAP_PROP_POS_MSEC, t_s * 1000.0)
ok, frame = cap.read()
return frame if ok else None
trailer_cap = cv2.VideoCapture(str(cfg.paths.reference_trailer))
refs = [
feature(frame_at(trailer_cap, segment_beat.start_s + offset))
for offset in offsets
if offset <= segment_beat.duration_s + 0.04
]
refs = [ref for ref in refs if ref is not None]
if len(refs) < 3:
return None
scene_start = float(scene["start_s"])
scene_end = float(scene["end_s"])
scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s))
max_points = 96
step_s = max(0.08, (scan_end - scene_start) / max_points)
source_cap = cv2.VideoCapture(str(cfg.paths.source_movie))
candidates: list[tuple[float, float, float]] = []
t = scene_start
while t <= scan_end:
vals = []
for offset, ref in zip(offsets, refs):
src = feature(frame_at(source_cap, t + offset))
if src is not None:
vals.append(float(np.dot(ref, src) / len(ref)))
if len(vals) >= 3:
candidates.append((sum(vals) / len(vals), min(vals), t))
t = round(t + step_s, 6)
if not candidates:
return None
candidates.sort(reverse=True)
best_score = candidates[0][0]
near_tie = [c for c in candidates if c[0] >= best_score - 0.01]
chosen = min(near_tie, key=lambda c: abs(c[2] - original_in_s))
return chosen[2], chosen[0]
def cmd_match(args: argparse.Namespace, cfg) -> list: def cmd_match(args: argparse.Namespace, cfg) -> list:
from src.pipeline.matcher import run_matching from src.pipeline.matcher import run_matching
from dataclasses import replace from dataclasses import replace
+3
View File
@@ -87,6 +87,9 @@ coarse_score_weight = 0.10
duration_score_weight = 0.20 duration_score_weight = 0.20
duration_tie_break_score_delta = 0.03 duration_tie_break_score_delta = 0.03
min_duration_coverage = 0.55 min_duration_coverage = 0.55
# Every visible sub-shot in a multi-shot beat must pass this stricter gate.
# A weak segment is left unmatched instead of being hidden by a strong neighbor.
multi_shot_segment_threshold = 0.50
continuity_seed_offsets_s = [-1.0, 0.0, 0.5, 1.0, 1.5, 2.0, 3.0] continuity_seed_offsets_s = [-1.0, 0.0, 0.5, 1.0, 1.5, 2.0, 3.0]
scene_seed_top_k = 30 scene_seed_top_k = 30
scene_seed_points_per_scene = 6 scene_seed_points_per_scene = 6
+14 -2
View File
@@ -131,13 +131,25 @@ bereits auf die sichtbare Aktionsphase ausgerichtet.
Der Segment-Offset zählt nur über vorherige scorebare Bildinseln, nicht über Der Segment-Offset zählt nur über vorherige scorebare Bildinseln, nicht über
schwarze oder blendige Lücken. Nach dem Retiming wird die nutzbare Source- schwarze oder blendige Lücken. Nach dem Retiming wird die nutzbare Source-
Dauer erneut geschätzt; läuft die Source am Ende in eine sichtbar andere`r`nAktionsphase, wird der Treffer im Cutter-Report klar als phasenkritisch markiert.`r`nSchwarz/Placeholder wird nur für wirklich ungematchte Trailer-Bereiche oder Fades`r`nverwendet, nicht um sichtbare Kandidatenbewegung im Review zu verstecken. Dauer erneut geschätzt; läuft die Source am Ende in eine sichtbar andere
Aktionsphase, wird der Treffer im Cutter-Report klar als phasenkritisch
markiert. Schwarz/Placeholder wird nur für wirklich ungematchte Trailer-
Bereiche oder Fades verwendet, nicht um sichtbare Kandidatenbewegung im Review
zu verstecken.
Diese Span-Schätzung ist strenger als der grobe Suchscore: Ein fast stehender Diese Span-Schätzung ist strenger als der grobe Suchscore: Ein fast stehender
Anfang darf einen Match nicht retten, wenn spätere Frames sichtbar in eine Anfang darf einen Match nicht retten, wenn spätere Frames sichtbar in eine
andere Gestik, Körperposition oder eintretende Figur driften. Stabile andere Gestik, Körperposition oder eintretende Figur driften. Stabile
Score-Plateaus dürfen nur verlängern, wenn sie noch nah genug am Anfangsniveau Score-Plateaus dürfen nur verlängern, wenn sie noch nah genug am Anfangsniveau
liegen; sonst bleibt der Treffer vorläufig und muss neu gesucht oder visuell`r`ngeprüft werden. Der Review-Clip zeigt den Kandidaten weiterhin sichtbar, damit`r`nPhasenfehler nicht durch Schwarz verdeckt werden. liegen; sonst bleibt der Treffer vorläufig und muss neu gesucht oder visuell
geprüft werden. Der Review-Clip zeigt den Kandidaten weiterhin sichtbar, damit
Phasenfehler nicht durch Schwarz verdeckt werden.
Für Multi-Shot-Beats gilt zusätzlich eine Segment-Schwelle pro sichtbarer
Einstellung. Ein gutes erstes Segment darf kein zweites Segment mit schwachem
Score mitziehen. Segmente unter `multi_shot_segment_threshold` werden nicht als
Source-Material ausgegeben; der entsprechende Beat-Bereich bleibt im
Cutter-Report offen, bis ein eigenständig belastbarer Treffer gefunden wird.
## Vision-Seeds vs. Vollscan ## Vision-Seeds vs. Vollscan
Binary file not shown.
Binary file not shown.
+2
View File
@@ -64,6 +64,7 @@ class DeepScanConfig:
duration_score_weight: float duration_score_weight: float
duration_tie_break_score_delta: float duration_tie_break_score_delta: float
min_duration_coverage: float min_duration_coverage: float
multi_shot_segment_threshold: float
continuity_seed_offsets_s: tuple[float, ...] continuity_seed_offsets_s: tuple[float, ...]
scene_seed_top_k: int scene_seed_top_k: int
scene_seed_points_per_scene: int scene_seed_points_per_scene: int
@@ -267,6 +268,7 @@ def load_config(
duration_score_weight=float(cv_raw["deep_scan"].get("duration_score_weight", 0.20)), duration_score_weight=float(cv_raw["deep_scan"].get("duration_score_weight", 0.20)),
duration_tie_break_score_delta=float(cv_raw["deep_scan"].get("duration_tie_break_score_delta", 0.03)), duration_tie_break_score_delta=float(cv_raw["deep_scan"].get("duration_tie_break_score_delta", 0.03)),
min_duration_coverage=float(cv_raw["deep_scan"].get("min_duration_coverage", 0.65)), min_duration_coverage=float(cv_raw["deep_scan"].get("min_duration_coverage", 0.65)),
multi_shot_segment_threshold=float(cv_raw["deep_scan"].get("multi_shot_segment_threshold", 0.50)),
continuity_seed_offsets_s=tuple( continuity_seed_offsets_s=tuple(
float(v) for v in cv_raw["deep_scan"].get( float(v) for v in cv_raw["deep_scan"].get(
"continuity_seed_offsets_s", "continuity_seed_offsets_s",