Recover short low-light vibe matches
This commit is contained in:
+1
-1
File diff suppressed because one or more lines are too long
+9
-6
File diff suppressed because one or more lines are too long
@@ -1038,6 +1038,97 @@ def _recover_unmatched_beats_via_vision(results: list, beats: list, cfg) -> list
|
|||||||
return sorted(new_results, key=lambda r: r.beat_id)
|
return sorted(new_results, key=lambda r: r.beat_id)
|
||||||
|
|
||||||
|
|
||||||
|
def _recover_short_lowlight_vibe_matches(results: list, beats: list, cfg) -> list:
|
||||||
|
"""Keep obvious short low-light scene hits as provisional instead of no-match.
|
||||||
|
|
||||||
|
Short blue/dark dialogue shots can be correctly ranked by scene-level
|
||||||
|
histogram/pHash but then rejected by the stricter content aligner because
|
||||||
|
the shot contains little texture, motion blur, or trailer timecode overlay.
|
||||||
|
This fallback only accepts the top vibe scene when it has a clear margin and
|
||||||
|
the local content scan still finds a usable in-point.
|
||||||
|
"""
|
||||||
|
from src.core.models import MatchResult, Scene
|
||||||
|
from src.cv.global_scan import _content_alignment_score, _content_alignment_templates
|
||||||
|
from src.cv.vibe_check import run_vibe_check
|
||||||
|
from src.cv.frame_extractor import open_video
|
||||||
|
|
||||||
|
matched_ids = {r.beat_id for r in results}
|
||||||
|
targets = [b for b in beats if b.beat_id not in matched_ids and b.duration_s <= 2.25]
|
||||||
|
if not targets:
|
||||||
|
return results
|
||||||
|
|
||||||
|
raw_scenes = _load_scene_cache_light(cfg)
|
||||||
|
scenes = [
|
||||||
|
Scene(
|
||||||
|
scene_id=int(s["scene_id"]),
|
||||||
|
source_path=cfg.paths.source_movie,
|
||||||
|
start_s=float(s["start_s"]),
|
||||||
|
end_s=float(s["end_s"]),
|
||||||
|
start_frame=int(s["start_frame"]),
|
||||||
|
end_frame=int(s["end_frame"]),
|
||||||
|
luma_hist=bytes.fromhex(s["luma_hist"]) if s.get("luma_hist") else None,
|
||||||
|
sat_hist=bytes.fromhex(s["sat_hist"]) if s.get("sat_hist") else None,
|
||||||
|
phash=s.get("phash"),
|
||||||
|
)
|
||||||
|
for s in raw_scenes
|
||||||
|
]
|
||||||
|
scenes_by_id = {s.scene_id: s for s in scenes}
|
||||||
|
recovered = list(results)
|
||||||
|
|
||||||
|
with open_video(cfg.paths.source_movie) as cap:
|
||||||
|
for beat in targets:
|
||||||
|
templates = _content_alignment_templates(beat, cfg)
|
||||||
|
if not templates:
|
||||||
|
continue
|
||||||
|
hits = run_vibe_check(
|
||||||
|
beat,
|
||||||
|
scenes,
|
||||||
|
top_k=6,
|
||||||
|
hist_method=cfg.cv.vibe_check.hist_compare_method,
|
||||||
|
phash_max_distance=64,
|
||||||
|
)
|
||||||
|
if len(hits) < 2:
|
||||||
|
continue
|
||||||
|
top, second = hits[0], hits[1]
|
||||||
|
if top.combined_score < 0.74 or top.combined_score - second.combined_score < 0.03:
|
||||||
|
continue
|
||||||
|
scene = scenes_by_id.get(top.scene_id)
|
||||||
|
if scene is None or scene.duration_s < max(0.5, beat.duration_s):
|
||||||
|
continue
|
||||||
|
|
||||||
|
best: tuple[float, float] | None = None
|
||||||
|
scan_end = max(scene.start_s, scene.end_s - beat.duration_s)
|
||||||
|
step_s = 0.12
|
||||||
|
t = scene.start_s
|
||||||
|
while t <= scan_end:
|
||||||
|
score = _content_alignment_score(cap, t, templates, cfg)
|
||||||
|
if best is None or score > best[0]:
|
||||||
|
best = (score, t)
|
||||||
|
t = round(t + step_s, 6)
|
||||||
|
if best is None or best[0] < 0.15:
|
||||||
|
continue
|
||||||
|
|
||||||
|
content_score, in_point_s = best
|
||||||
|
final_score = max(
|
||||||
|
cfg.cv.deep_scan.provisional_match_threshold,
|
||||||
|
min(0.64, top.combined_score * 0.55 + content_score * 0.45),
|
||||||
|
)
|
||||||
|
recovered.append(MatchResult(
|
||||||
|
beat_id=beat.beat_id,
|
||||||
|
scene_id=scene.scene_id,
|
||||||
|
source_path=scene.source_path,
|
||||||
|
in_point_s=in_point_s,
|
||||||
|
out_point_s=in_point_s + beat.duration_s,
|
||||||
|
in_point_frame=int(in_point_s * cfg.export.edl_frame_rate),
|
||||||
|
match_score=final_score,
|
||||||
|
match_location=(0, 0),
|
||||||
|
is_confirmed=False,
|
||||||
|
segments=tuple(),
|
||||||
|
))
|
||||||
|
|
||||||
|
return sorted(recovered, key=lambda r: r.beat_id)
|
||||||
|
|
||||||
|
|
||||||
def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg) -> list:
|
def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg) -> list:
|
||||||
"""Drop vision-enabled matches whose final action phase contradicts the beat."""
|
"""Drop vision-enabled matches whose final action phase contradicts the beat."""
|
||||||
if not cfg.vision.enabled or not results:
|
if not cfg.vision.enabled or not results:
|
||||||
@@ -1929,6 +2020,7 @@ def cmd_match(args: argparse.Namespace, cfg) -> list:
|
|||||||
results = _attach_visual_segments(results, beats, cfg)
|
results = _attach_visual_segments(results, beats, cfg)
|
||||||
results = _filter_semantically_invalid_vision_matches(results, beats, cfg)
|
results = _filter_semantically_invalid_vision_matches(results, beats, cfg)
|
||||||
results = _recover_unmatched_beats_via_vision(results, beats, cfg)
|
results = _recover_unmatched_beats_via_vision(results, beats, cfg)
|
||||||
|
results = _recover_short_lowlight_vibe_matches(results, beats, cfg)
|
||||||
|
|
||||||
# A targeted one-beat match must NEVER delete or modify any other beat's
|
# A targeted one-beat match must NEVER delete or modify any other beat's
|
||||||
# cache entry. We deliberately re-load the raw cache from disk here so
|
# cache entry. We deliberately re-load the raw cache from disk here so
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 5.0 KiB After Width: | Height: | Size: 8.4 KiB |
Reference in New Issue
Block a user