Retiming long scene matches by action phase

This commit is contained in:
Melbar
2026-05-02 20:47:59 +02:00
parent 252f710396
commit 8415516f89
3 changed files with 158 additions and 11 deletions
+69
View File
@@ -871,6 +871,75 @@ def align_in_point_by_motion(
return best_in, max(0.0, best_score)
def align_in_point_by_content_and_motion(
beat: TrailerBeat,
estimated_in_point_s: float,
cfg: AppConfig,
search_window_s: float | None = None,
) -> tuple[float, float, float, float]:
"""
Align a candidate using still-frame content and motion phase together.
Running content and motion as separate passes can overshoot short action
phases: one pass may land on the right broad gesture and the next can slide
to a visually similar but later posture. A joint score keeps the in-point
tied to the same frame hypothesis throughout the local search.
"""
templates = _prepare_beat_templates(beat, cfg)
motion_templates = _prepare_motion_templates(beat, cfg)
if not templates:
return estimated_in_point_s, 0.0, 0.0, 0.0
with open_video(cfg.paths.source_movie) as cap:
fps = float(cap.get(cv2.CAP_PROP_FPS)) or cfg.export.edl_frame_rate
frame_step_s = 1.0 / fps
window_s = (
search_window_s
if search_window_s is not None
else cfg.cv.deep_scan.content_align_window_seconds
)
start_s = max(0.0, estimated_in_point_s - window_s)
end_s = estimated_in_point_s + window_s
tie_delta = cfg.cv.deep_scan.start_tie_break_score_delta
best_in = estimated_in_point_s
best_score = -1.0
best_content = -1.0
best_motion = -1.0
t = start_s
while t <= end_s:
content_score = _content_alignment_score(cap, t, templates, cfg)
motion_score = (
_motion_phase_score(cap, t, motion_templates, cfg)
if len(motion_templates) >= 2
else content_score
)
if content_score < 0 or motion_score < 0:
t = round(t + frame_step_s, 6)
continue
raw_score = content_score * 0.64 + motion_score * 0.36
anchor_penalty = min(0.18, abs(t - estimated_in_point_s) * 0.05)
score = raw_score - anchor_penalty
if score > best_score + tie_delta:
best_score = score
best_in = t
best_content = content_score
best_motion = motion_score
elif score >= best_score - tie_delta:
current_distance = abs(t - estimated_in_point_s)
best_distance = abs(best_in - estimated_in_point_s)
if current_distance < best_distance or (
abs(current_distance - best_distance) <= frame_step_s * 0.5
and t < best_in
):
best_in = t
best_content = content_score
best_motion = motion_score
t = round(t + frame_step_s, 6)
return best_in, max(0.0, best_score), max(0.0, best_content), max(0.0, best_motion)
def estimate_usable_source_duration(
beat: TrailerBeat,
in_point_s: float,