Improve vision matching for dissolve-heavy beats

This commit is contained in:
Melbar
2026-05-02 16:15:51 +02:00
parent 858a814db1
commit d9e470c877
4 changed files with 261 additions and 25 deletions
+103 -9
View File
@@ -458,7 +458,23 @@ def _find_scene_for_in_point(cfg, in_point_s: float):
def _reference_scoreable_segments(beat, cfg) -> list[tuple[float, float]]:
"""Find visible source-matchable islands inside a trailer beat."""
from src.cv.frame_extractor import grab_frame_at_path
from src.cv.global_scan import _is_scoreable_reference_frame
from src.cv.global_scan import (
_corr_same_size,
_is_scoreable_reference_frame,
_prepare_haystack,
_reference_visibility_stats,
)
def is_visible(frame) -> bool:
if frame is None:
return False
mean_luma, p90_luma, contrast = _reference_visibility_stats(frame, cfg)
visible_luma = (
mean_luma >= cfg.cv.deep_scan.scoreable_luma_mean_min * 0.45
or p90_luma >= cfg.cv.deep_scan.scoreable_luma_p90_min * 0.50
)
visible_contrast = contrast >= max(8.0, cfg.cv.deep_scan.scoreable_contrast_min * 0.30)
return visible_luma and visible_contrast
step_s = max(0.08, cfg.cv.deep_scan.span_sample_step_s)
min_segment_s = max(0.32, step_s * 3.0)
@@ -487,7 +503,46 @@ def _reference_scoreable_segments(beat, cfg) -> list[tuple[float, float]]:
if end - start >= min_segment_s:
raw.append((start, end))
return raw
expanded: list[tuple[float, float]] = []
same_shot_corr_min = 0.72
for start_s, end_s in raw:
start_anchor = grab_frame_at_path(beat.trailer_path, beat.start_s + start_s)
end_anchor = grab_frame_at_path(beat.trailer_path, beat.start_s + max(start_s, end_s - step_s))
start_feature = _prepare_haystack(start_anchor, cfg) if start_anchor is not None else None
end_feature = _prepare_haystack(end_anchor, cfg) if end_anchor is not None else None
soft_start = start_s
t = round(start_s - step_s, 6)
while t >= 0.0:
frame = grab_frame_at_path(beat.trailer_path, beat.start_s + t)
if not is_visible(frame):
break
if start_feature is not None and _corr_same_size(_prepare_haystack(frame, cfg), start_feature) < same_shot_corr_min:
break
soft_start = max(0.0, t)
t = round(t - step_s, 6)
soft_end = end_s
t = round(end_s, 6)
while t <= beat.duration_s + 1e-6:
frame = grab_frame_at_path(beat.trailer_path, beat.start_s + t)
if not is_visible(frame):
break
if end_feature is not None and _corr_same_size(_prepare_haystack(frame, cfg), end_feature) < same_shot_corr_min:
break
soft_end = min(beat.duration_s, t + step_s)
t = round(t + step_s, 6)
if soft_end - soft_start >= min_segment_s:
expanded.append((soft_start, soft_end))
merged: list[tuple[float, float]] = []
for start_s, end_s in expanded:
if merged and start_s - merged[-1][1] <= bridge_gap_s:
merged[-1] = (merged[-1][0], max(merged[-1][1], end_s))
else:
merged.append((start_s, end_s))
return merged
def _trim_beats_to_single_visual_island(beats: list, cfg) -> tuple[list, dict[int, tuple[float, float]]]:
@@ -555,6 +610,28 @@ def _apply_single_island_segments(results: list, trims: dict[int, tuple[float, f
return expanded
def _merge_best_results(existing: list, candidates: list, cfg) -> list:
"""Merge matches by beat, preferring confirmed or higher-scoring results."""
by_id = {r.beat_id: r for r in existing}
for candidate in candidates:
old = by_id.get(candidate.beat_id)
if old is None:
by_id[candidate.beat_id] = candidate
continue
candidate_confirmed = candidate.match_score >= cfg.cv.deep_scan.match_threshold or candidate.is_confirmed
old_confirmed = old.match_score >= cfg.cv.deep_scan.match_threshold or old.is_confirmed
if (
candidate_confirmed and not old_confirmed
or candidate.match_score > old.match_score + cfg.cv.deep_scan.duration_tie_break_score_delta
or (
candidate.match_score >= old.match_score - cfg.cv.deep_scan.duration_tie_break_score_delta
and candidate.duration_s > old.duration_s
)
):
by_id[candidate.beat_id] = candidate
return sorted(by_id.values(), key=lambda r: r.beat_id)
def _attach_visual_segments(results: list, beats: list, cfg) -> list:
"""Attach automatic sub-shot matches for multi-island trailer beats."""
from dataclasses import replace
@@ -657,16 +734,21 @@ def _run_segment_match(segment_beat, continuity, cfg, allow_fullscan: bool = Tru
seed_in_points=continuity,
)
if fast_matches:
return fast_matches
if not allow_fullscan or all(
m.is_confirmed or m.match_score >= cfg.cv.deep_scan.match_threshold
for m in fast_matches
):
return fast_matches
if not allow_fullscan:
return []
return fast_matches if cfg.vision.enabled else []
return run_matching(
full_matches = run_matching(
cfg,
[segment_beat],
seed_in_points=continuity,
)
return _merge_best_results(fast_matches if cfg.vision.enabled else [], full_matches, cfg)
def _match_unmatched_visual_segments(
@@ -862,9 +944,21 @@ def cmd_match(args: argparse.Namespace, cfg) -> list:
seed_in_points=seed_in_points,
)
if len(results) < len(scan_beats):
matched_ids = {r.beat_id for r in results}
remaining_beats = [b for b in scan_beats if b.beat_id not in matched_ids]
if len(results) < len(scan_beats) or any(
not r.is_confirmed and r.match_score < cfg.cv.deep_scan.match_threshold
for r in results
):
results_by_id = {r.beat_id: r for r in results}
remaining_beats = [
b for b in scan_beats
if (
b.beat_id not in results_by_id
or (
not results_by_id[b.beat_id].is_confirmed
and results_by_id[b.beat_id].match_score < cfg.cv.deep_scan.match_threshold
)
)
]
if remaining_beats:
full_results = run_matching(
cfg,
@@ -872,7 +966,7 @@ def cmd_match(args: argparse.Namespace, cfg) -> list:
force_reindex=args.force_reindex,
seed_in_points=seed_in_points,
)
results = sorted([*results, *full_results], key=lambda r: r.beat_id)
results = _merge_best_results(results, full_results, cfg)
results = _apply_single_island_segments(results, single_island_trims)
results = _match_unmatched_visual_segments(
results,