Recover short low-light vibe matches
This commit is contained in:
+1
-1
File diff suppressed because one or more lines are too long
+9
-6
File diff suppressed because one or more lines are too long
@@ -1038,6 +1038,97 @@ def _recover_unmatched_beats_via_vision(results: list, beats: list, cfg) -> list
|
||||
return sorted(new_results, key=lambda r: r.beat_id)
|
||||
|
||||
|
||||
def _recover_short_lowlight_vibe_matches(results: list, beats: list, cfg) -> list:
|
||||
"""Keep obvious short low-light scene hits as provisional instead of no-match.
|
||||
|
||||
Short blue/dark dialogue shots can be correctly ranked by scene-level
|
||||
histogram/pHash but then rejected by the stricter content aligner because
|
||||
the shot contains little texture, motion blur, or trailer timecode overlay.
|
||||
This fallback only accepts the top vibe scene when it has a clear margin and
|
||||
the local content scan still finds a usable in-point.
|
||||
"""
|
||||
from src.core.models import MatchResult, Scene
|
||||
from src.cv.global_scan import _content_alignment_score, _content_alignment_templates
|
||||
from src.cv.vibe_check import run_vibe_check
|
||||
from src.cv.frame_extractor import open_video
|
||||
|
||||
matched_ids = {r.beat_id for r in results}
|
||||
targets = [b for b in beats if b.beat_id not in matched_ids and b.duration_s <= 2.25]
|
||||
if not targets:
|
||||
return results
|
||||
|
||||
raw_scenes = _load_scene_cache_light(cfg)
|
||||
scenes = [
|
||||
Scene(
|
||||
scene_id=int(s["scene_id"]),
|
||||
source_path=cfg.paths.source_movie,
|
||||
start_s=float(s["start_s"]),
|
||||
end_s=float(s["end_s"]),
|
||||
start_frame=int(s["start_frame"]),
|
||||
end_frame=int(s["end_frame"]),
|
||||
luma_hist=bytes.fromhex(s["luma_hist"]) if s.get("luma_hist") else None,
|
||||
sat_hist=bytes.fromhex(s["sat_hist"]) if s.get("sat_hist") else None,
|
||||
phash=s.get("phash"),
|
||||
)
|
||||
for s in raw_scenes
|
||||
]
|
||||
scenes_by_id = {s.scene_id: s for s in scenes}
|
||||
recovered = list(results)
|
||||
|
||||
with open_video(cfg.paths.source_movie) as cap:
|
||||
for beat in targets:
|
||||
templates = _content_alignment_templates(beat, cfg)
|
||||
if not templates:
|
||||
continue
|
||||
hits = run_vibe_check(
|
||||
beat,
|
||||
scenes,
|
||||
top_k=6,
|
||||
hist_method=cfg.cv.vibe_check.hist_compare_method,
|
||||
phash_max_distance=64,
|
||||
)
|
||||
if len(hits) < 2:
|
||||
continue
|
||||
top, second = hits[0], hits[1]
|
||||
if top.combined_score < 0.74 or top.combined_score - second.combined_score < 0.03:
|
||||
continue
|
||||
scene = scenes_by_id.get(top.scene_id)
|
||||
if scene is None or scene.duration_s < max(0.5, beat.duration_s):
|
||||
continue
|
||||
|
||||
best: tuple[float, float] | None = None
|
||||
scan_end = max(scene.start_s, scene.end_s - beat.duration_s)
|
||||
step_s = 0.12
|
||||
t = scene.start_s
|
||||
while t <= scan_end:
|
||||
score = _content_alignment_score(cap, t, templates, cfg)
|
||||
if best is None or score > best[0]:
|
||||
best = (score, t)
|
||||
t = round(t + step_s, 6)
|
||||
if best is None or best[0] < 0.15:
|
||||
continue
|
||||
|
||||
content_score, in_point_s = best
|
||||
final_score = max(
|
||||
cfg.cv.deep_scan.provisional_match_threshold,
|
||||
min(0.64, top.combined_score * 0.55 + content_score * 0.45),
|
||||
)
|
||||
recovered.append(MatchResult(
|
||||
beat_id=beat.beat_id,
|
||||
scene_id=scene.scene_id,
|
||||
source_path=scene.source_path,
|
||||
in_point_s=in_point_s,
|
||||
out_point_s=in_point_s + beat.duration_s,
|
||||
in_point_frame=int(in_point_s * cfg.export.edl_frame_rate),
|
||||
match_score=final_score,
|
||||
match_location=(0, 0),
|
||||
is_confirmed=False,
|
||||
segments=tuple(),
|
||||
))
|
||||
|
||||
return sorted(recovered, key=lambda r: r.beat_id)
|
||||
|
||||
|
||||
def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg) -> list:
|
||||
"""Drop vision-enabled matches whose final action phase contradicts the beat."""
|
||||
if not cfg.vision.enabled or not results:
|
||||
@@ -1929,6 +2020,7 @@ def cmd_match(args: argparse.Namespace, cfg) -> list:
|
||||
results = _attach_visual_segments(results, beats, cfg)
|
||||
results = _filter_semantically_invalid_vision_matches(results, beats, cfg)
|
||||
results = _recover_unmatched_beats_via_vision(results, beats, cfg)
|
||||
results = _recover_short_lowlight_vibe_matches(results, beats, cfg)
|
||||
|
||||
# A targeted one-beat match must NEVER delete or modify any other beat's
|
||||
# cache entry. We deliberately re-load the raw cache from disk here so
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 5.0 KiB After Width: | Height: | Size: 8.4 KiB |
Reference in New Issue
Block a user