Recover short low-light vibe matches

2026-05-09 10:38:57 +02:00
parent f63d65fcd2
commit bcaf0417b3
6 changed files with 102 additions and 7 deletions
@@ -1038,6 +1038,97 @@ def _recover_unmatched_beats_via_vision(results: list, beats: list, cfg) -> list
    return sorted(new_results, key=lambda r: r.beat_id)


+def _recover_short_lowlight_vibe_matches(results: list, beats: list, cfg) -> list:
+    """Keep obvious short low-light scene hits as provisional instead of no-match.
+
+    Short blue/dark dialogue shots can be correctly ranked by scene-level
+    histogram/pHash but then rejected by the stricter content aligner because
+    the shot contains little texture, motion blur, or trailer timecode overlay.
+    This fallback only accepts the top vibe scene when it has a clear margin and
+    the local content scan still finds a usable in-point.
+    """
+    from src.core.models import MatchResult, Scene
+    from src.cv.global_scan import _content_alignment_score, _content_alignment_templates
+    from src.cv.vibe_check import run_vibe_check
+    from src.cv.frame_extractor import open_video
+
+    matched_ids = {r.beat_id for r in results}
+    targets = [b for b in beats if b.beat_id not in matched_ids and b.duration_s <= 2.25]
+    if not targets:
+        return results
+
+    raw_scenes = _load_scene_cache_light(cfg)
+    scenes = [
+        Scene(
+            scene_id=int(s["scene_id"]),
+            source_path=cfg.paths.source_movie,
+            start_s=float(s["start_s"]),
+            end_s=float(s["end_s"]),
+            start_frame=int(s["start_frame"]),
+            end_frame=int(s["end_frame"]),
+            luma_hist=bytes.fromhex(s["luma_hist"]) if s.get("luma_hist") else None,
+            sat_hist=bytes.fromhex(s["sat_hist"]) if s.get("sat_hist") else None,
+            phash=s.get("phash"),
+        )
+        for s in raw_scenes
+    ]
+    scenes_by_id = {s.scene_id: s for s in scenes}
+    recovered = list(results)
+
+    with open_video(cfg.paths.source_movie) as cap:
+        for beat in targets:
+            templates = _content_alignment_templates(beat, cfg)
+            if not templates:
+                continue
+            hits = run_vibe_check(
+                beat,
+                scenes,
+                top_k=6,
+                hist_method=cfg.cv.vibe_check.hist_compare_method,
+                phash_max_distance=64,
+            )
+            if len(hits) < 2:
+                continue
+            top, second = hits[0], hits[1]
+            if top.combined_score < 0.74 or top.combined_score - second.combined_score < 0.03:
+                continue
+            scene = scenes_by_id.get(top.scene_id)
+            if scene is None or scene.duration_s < max(0.5, beat.duration_s):
+                continue
+
+            best: tuple[float, float] | None = None
+            scan_end = max(scene.start_s, scene.end_s - beat.duration_s)
+            step_s = 0.12
+            t = scene.start_s
+            while t <= scan_end:
+                score = _content_alignment_score(cap, t, templates, cfg)
+                if best is None or score > best[0]:
+                    best = (score, t)
+                t = round(t + step_s, 6)
+            if best is None or best[0] < 0.15:
+                continue
+
+            content_score, in_point_s = best
+            final_score = max(
+                cfg.cv.deep_scan.provisional_match_threshold,
+                min(0.64, top.combined_score * 0.55 + content_score * 0.45),
+            )
+            recovered.append(MatchResult(
+                beat_id=beat.beat_id,
+                scene_id=scene.scene_id,
+                source_path=scene.source_path,
+                in_point_s=in_point_s,
+                out_point_s=in_point_s + beat.duration_s,
+                in_point_frame=int(in_point_s * cfg.export.edl_frame_rate),
+                match_score=final_score,
+                match_location=(0, 0),
+                is_confirmed=False,
+                segments=tuple(),
+            ))
+
+    return sorted(recovered, key=lambda r: r.beat_id)
+
+
 def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg) -> list:
    """Drop vision-enabled matches whose final action phase contradicts the beat."""
    if not cfg.vision.enabled or not results:
@@ -1929,6 +2020,7 @@ def cmd_match(args: argparse.Namespace, cfg) -> list:
    results = _attach_visual_segments(results, beats, cfg)
    results = _filter_semantically_invalid_vision_matches(results, beats, cfg)
    results = _recover_unmatched_beats_via_vision(results, beats, cfg)
+    results = _recover_short_lowlight_vibe_matches(results, beats, cfg)

    # A targeted one-beat match must NEVER delete or modify any other beat's
    # cache entry. We deliberately re-load the raw cache from disk here so