Fix: prevent tail-trimming of valid matches at hard scene boundaries in global_scan.py
This commit is contained in:
+71
-4
@@ -260,12 +260,21 @@ def _fixed_content_sequence_score(
|
||||
in_point_s: float,
|
||||
templates: list[tuple[float, tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]]],
|
||||
cfg: AppConfig,
|
||||
max_source_duration_s: float | None = None,
|
||||
) -> float:
|
||||
if not templates:
|
||||
return 0.0
|
||||
|
||||
active = (
|
||||
[(off, feats) for off, feats in templates if off <= max_source_duration_s]
|
||||
if max_source_duration_s is not None and max_source_duration_s > 0
|
||||
else templates
|
||||
)
|
||||
if not active:
|
||||
return 0.0
|
||||
|
||||
scores: list[float] = []
|
||||
for offset_s, ref_features in templates:
|
||||
for offset_s, ref_features in active:
|
||||
frame = grab_frame_at(cap, in_point_s + offset_s)
|
||||
if frame is None:
|
||||
return 0.0
|
||||
@@ -802,6 +811,7 @@ def align_in_point_by_content(
|
||||
estimated_in_point_s: float,
|
||||
cfg: AppConfig,
|
||||
search_window_s: float | None = None,
|
||||
scene_end_s: float | None = None,
|
||||
) -> tuple[float, float]:
|
||||
"""
|
||||
Find the frame offset directly from image content around a rough match.
|
||||
@@ -809,6 +819,10 @@ def align_in_point_by_content(
|
||||
This is deliberately local: once a candidate shot is plausible, scanning a
|
||||
small window around it with many reference frames is faster and more robust
|
||||
than repeating a global scan or applying a fixed frame preroll.
|
||||
|
||||
scene_end_s: when provided, templates are dynamically filtered per candidate
|
||||
to only offsets that stay within the source scene, preventing cross-boundary
|
||||
frame reads from dragging scores into wrong-content territory.
|
||||
"""
|
||||
templates = _content_alignment_templates(beat, cfg)
|
||||
if not templates:
|
||||
@@ -830,7 +844,15 @@ def align_in_point_by_content(
|
||||
best_score = -1.0
|
||||
t = start_s
|
||||
while t <= end_s:
|
||||
score = _content_alignment_score(cap, t, templates, cfg)
|
||||
if scene_end_s is not None:
|
||||
avail_s = scene_end_s - t
|
||||
if avail_s > 0:
|
||||
active_templates = [(off, tpl) for off, tpl in templates if off <= avail_s]
|
||||
else:
|
||||
active_templates = []
|
||||
else:
|
||||
active_templates = templates
|
||||
score = _content_alignment_score(cap, t, active_templates, cfg) if active_templates else -1.0
|
||||
if score > best_score + tie_delta:
|
||||
best_score = score
|
||||
best_in = t
|
||||
@@ -981,6 +1003,7 @@ def estimate_usable_source_duration(
|
||||
cfg: AppConfig,
|
||||
sample_step_s: float | None = None,
|
||||
min_keep_s: float = 0.5,
|
||||
scene_end_s: float | None = None,
|
||||
) -> tuple[float, float]:
|
||||
"""
|
||||
Estimate how long the source stays visually aligned with the beat.
|
||||
@@ -1032,6 +1055,17 @@ def estimate_usable_source_duration(
|
||||
break
|
||||
|
||||
tail_safety_s = max(0.0, cfg.cv.deep_scan.trim_tail_frames / source_fps)
|
||||
|
||||
if scene_end_s is not None:
|
||||
avail_s = scene_end_s - in_point_s
|
||||
if avail_s > 0 and last_good >= avail_s - (step_s * 2):
|
||||
logger.info('Beat %d: Boundary hit: last_good=%.3f, avail_s=%.3f, step_s=%.3f. Disabling tail safety.', beat.beat_id, last_good, avail_s, step_s)
|
||||
tail_safety_s = 0.0
|
||||
last_good = max(last_good, avail_s)
|
||||
else:
|
||||
if avail_s > 0:
|
||||
logger.info('Beat %d: Boundary NOT hit: last_good=%.3f, avail_s=%.3f, thresh=%.3f', beat.beat_id, last_good, avail_s, avail_s - (step_s * 2))
|
||||
|
||||
usable = min(beat.duration_s, max(0.0, last_good - tail_safety_s))
|
||||
if usable < min_keep_s and scores:
|
||||
usable = min(beat.duration_s, max(min_keep_s, scores[0][0] + step_s - tail_safety_s))
|
||||
@@ -1071,6 +1105,7 @@ def refine_in_point_with_sequence(
|
||||
estimated_in_point_s: float,
|
||||
cfg: AppConfig,
|
||||
search_window_s: float | None = None,
|
||||
scene_end_s: float | None = None,
|
||||
) -> tuple[float, float]:
|
||||
"""
|
||||
Refine a rough source in-point by comparing several frames across the beat.
|
||||
@@ -1078,7 +1113,7 @@ def refine_in_point_with_sequence(
|
||||
Returns:
|
||||
(best_in_point_s, sequence_score)
|
||||
"""
|
||||
return align_in_point_by_content(beat, estimated_in_point_s, cfg, search_window_s)
|
||||
return align_in_point_by_content(beat, estimated_in_point_s, cfg, search_window_s, scene_end_s)
|
||||
|
||||
|
||||
def _find_scene_for_time(scenes: Sequence | None, t_sec: float, cfg: AppConfig):
|
||||
@@ -1357,23 +1392,51 @@ def run_global_scan(
|
||||
midpoint_t = coarse_in_s + (b.duration_s / 2)
|
||||
fine_t = refine_timestamp(midpoint_templates[i], midpoint_t, cfg)
|
||||
rough_in_s = max(0.0, fine_t - (b.duration_s / 2))
|
||||
# Don't let midpoint refinement jump to a different scene — a
|
||||
# scene boundary crossed here means a wrong-content frame won
|
||||
# the template match. Revert so the coarse candidate retains
|
||||
# its original scene context.
|
||||
if scenes:
|
||||
coarse_scene_check = _find_scene_for_time(scenes, coarse_in_s, cfg)
|
||||
rough_scene_check = _find_scene_for_time(scenes, rough_in_s, cfg)
|
||||
if (
|
||||
coarse_scene_check is not None
|
||||
and rough_scene_check is not None
|
||||
and coarse_scene_check.scene_id != rough_scene_check.scene_id
|
||||
):
|
||||
rough_in_s = coarse_in_s
|
||||
local_align_window_s = (
|
||||
min(cfg.vision.local_scan_step_s, cfg.cv.deep_scan.content_align_window_seconds)
|
||||
if is_weighted_seed_candidate
|
||||
else None
|
||||
)
|
||||
# When rough_in_s is near a scene boundary, filter templates to
|
||||
# stay within the available source window so cross-boundary frames
|
||||
# from unrelated content don't corrupt alignment and scoring.
|
||||
rough_scene_for_boundary = _find_scene_for_time(scenes, rough_in_s, cfg) if scenes else None
|
||||
rough_scene_end_s: float | None = None
|
||||
if rough_scene_for_boundary is not None and matchable_duration_s > 0:
|
||||
_avail = float(rough_scene_for_boundary.end_s) - rough_in_s
|
||||
if 0 < _avail < matchable_duration_s:
|
||||
rough_scene_end_s = float(rough_scene_for_boundary.end_s)
|
||||
refined_in_s, sequence_score = refine_in_point_with_sequence(
|
||||
b,
|
||||
rough_in_s,
|
||||
cfg,
|
||||
search_window_s=local_align_window_s,
|
||||
scene_end_s=rough_scene_end_s,
|
||||
)
|
||||
scene = _find_scene_for_time(scenes, refined_in_s, cfg)
|
||||
scene_fps = _source_fps_from_scene(scene) if scene is not None else source_fps
|
||||
adjusted_in_s = _apply_start_preroll(refined_in_s, scene_fps, cfg)
|
||||
adjusted_in_s = _clamp_to_scene_start(adjusted_in_s, scene)
|
||||
scene = _find_scene_for_time(scenes, adjusted_in_s, cfg)
|
||||
usable_duration_s, span_score = estimate_usable_source_duration(b, adjusted_in_s, cfg)
|
||||
usable_duration_s, span_score = estimate_usable_source_duration(
|
||||
b,
|
||||
adjusted_in_s,
|
||||
cfg,
|
||||
scene_end_s=float(scene.end_s) if scene is not None else None
|
||||
)
|
||||
out_s = adjusted_in_s + usable_duration_s
|
||||
if scene is not None:
|
||||
out_s = min(out_s, scene.end_s)
|
||||
@@ -1385,6 +1448,7 @@ def run_global_scan(
|
||||
adjusted_in_s,
|
||||
validation_templates,
|
||||
cfg,
|
||||
max_source_duration_s=duration_s if rough_scene_end_s is not None else None,
|
||||
)
|
||||
content_score = original_content_score
|
||||
content_in_s, align_content_score = align_in_point_by_content(
|
||||
@@ -1396,6 +1460,7 @@ def run_global_scan(
|
||||
if local_align_window_s is not None
|
||||
else min(0.8, cfg.cv.deep_scan.content_align_window_seconds)
|
||||
),
|
||||
scene_end_s=rough_scene_end_s,
|
||||
)
|
||||
if abs(content_in_s - adjusted_in_s) <= cfg.cv.deep_scan.content_align_window_seconds:
|
||||
with open_video(cfg.paths.source_movie) as validation_cap:
|
||||
@@ -1404,6 +1469,7 @@ def run_global_scan(
|
||||
content_in_s,
|
||||
validation_templates,
|
||||
cfg,
|
||||
max_source_duration_s=duration_s if rough_scene_end_s is not None else None,
|
||||
)
|
||||
if aligned_content_score >= original_content_score + 0.01:
|
||||
adjusted_in_s = content_in_s
|
||||
@@ -1459,6 +1525,7 @@ def run_global_scan(
|
||||
adjusted_in_s,
|
||||
validation_templates,
|
||||
cfg,
|
||||
max_source_duration_s=duration_s if rough_scene_end_s is not None else None,
|
||||
)
|
||||
else:
|
||||
motion_score = original_motion_score
|
||||
|
||||
Reference in New Issue
Block a user