Recover short low-light vibe matches

2026-05-09 10:38:57 +02:00
parent f63d65fcd2
commit bcaf0417b3
6 changed files with 102 additions and 7 deletions
@@ -1038,6 +1038,97 @@ def _recover_unmatched_beats_via_vision(results: list, beats: list, cfg) -> list
    return sorted(new_results, key=lambda r: r.beat_id)
 def _recover_short_lowlight_vibe_matches(results: list, beats: list, cfg) -> list:
    """Keep obvious short low-light scene hits as provisional instead of no-match.
    Short blue/dark dialogue shots can be correctly ranked by scene-level
    histogram/pHash but then rejected by the stricter content aligner because
    the shot contains little texture, motion blur, or trailer timecode overlay.
    This fallback only accepts the top vibe scene when it has a clear margin and
    the local content scan still finds a usable in-point.
    """
    from src.core.models import MatchResult, Scene
    from src.cv.global_scan import _content_alignment_score, _content_alignment_templates
    from src.cv.vibe_check import run_vibe_check
    from src.cv.frame_extractor import open_video
    matched_ids = {r.beat_id for r in results}
    targets = [b for b in beats if b.beat_id not in matched_ids and b.duration_s <= 2.25]
    if not targets:
        return results
    raw_scenes = _load_scene_cache_light(cfg)
    scenes = [
        Scene(
            scene_id=int(s["scene_id"]),
            source_path=cfg.paths.source_movie,
            start_s=float(s["start_s"]),
            end_s=float(s["end_s"]),
            start_frame=int(s["start_frame"]),
            end_frame=int(s["end_frame"]),
            luma_hist=bytes.fromhex(s["luma_hist"]) if s.get("luma_hist") else None,
            sat_hist=bytes.fromhex(s["sat_hist"]) if s.get("sat_hist") else None,
            phash=s.get("phash"),
        )
        for s in raw_scenes
    ]
    scenes_by_id = {s.scene_id: s for s in scenes}
    recovered = list(results)
    with open_video(cfg.paths.source_movie) as cap:
        for beat in targets:
            templates = _content_alignment_templates(beat, cfg)
            if not templates:
                continue
            hits = run_vibe_check(
                beat,
                scenes,
                top_k=6,
                hist_method=cfg.cv.vibe_check.hist_compare_method,
                phash_max_distance=64,
            )
            if len(hits) < 2:
                continue
            top, second = hits[0], hits[1]
            if top.combined_score < 0.74 or top.combined_score - second.combined_score < 0.03:
                continue
            scene = scenes_by_id.get(top.scene_id)
            if scene is None or scene.duration_s < max(0.5, beat.duration_s):
                continue
            best: tuple[float, float] | None = None
            scan_end = max(scene.start_s, scene.end_s - beat.duration_s)
            step_s = 0.12
            t = scene.start_s
            while t <= scan_end:
                score = _content_alignment_score(cap, t, templates, cfg)
                if best is None or score > best[0]:
                    best = (score, t)
                t = round(t + step_s, 6)
            if best is None or best[0] < 0.15:
                continue
            content_score, in_point_s = best
            final_score = max(
                cfg.cv.deep_scan.provisional_match_threshold,
                min(0.64, top.combined_score * 0.55 + content_score * 0.45),
            )
            recovered.append(MatchResult(
                beat_id=beat.beat_id,
                scene_id=scene.scene_id,
                source_path=scene.source_path,
                in_point_s=in_point_s,
                out_point_s=in_point_s + beat.duration_s,
                in_point_frame=int(in_point_s * cfg.export.edl_frame_rate),
                match_score=final_score,
                match_location=(0, 0),
                is_confirmed=False,
                segments=tuple(),
            ))
    return sorted(recovered, key=lambda r: r.beat_id)
 def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg) -> list:
    """Drop vision-enabled matches whose final action phase contradicts the beat."""
    if not cfg.vision.enabled or not results:
@@ -1929,6 +2020,7 @@ def cmd_match(args: argparse.Namespace, cfg) -> list:
    results = _attach_visual_segments(results, beats, cfg)
    results = _filter_semantically_invalid_vision_matches(results, beats, cfg)
    results = _recover_unmatched_beats_via_vision(results, beats, cfg)
    results = _recover_short_lowlight_vibe_matches(results, beats, cfg)
    # A targeted one-beat match must NEVER delete or modify any other beat's
    # cache entry. We deliberately re-load the raw cache from disk here so