Retiming long scene matches by action phase
This commit is contained in:
@@ -871,6 +871,75 @@ def align_in_point_by_motion(
|
||||
return best_in, max(0.0, best_score)
|
||||
|
||||
|
||||
def align_in_point_by_content_and_motion(
|
||||
beat: TrailerBeat,
|
||||
estimated_in_point_s: float,
|
||||
cfg: AppConfig,
|
||||
search_window_s: float | None = None,
|
||||
) -> tuple[float, float, float, float]:
|
||||
"""
|
||||
Align a candidate using still-frame content and motion phase together.
|
||||
|
||||
Running content and motion as separate passes can overshoot short action
|
||||
phases: one pass may land on the right broad gesture and the next can slide
|
||||
to a visually similar but later posture. A joint score keeps the in-point
|
||||
tied to the same frame hypothesis throughout the local search.
|
||||
"""
|
||||
templates = _prepare_beat_templates(beat, cfg)
|
||||
motion_templates = _prepare_motion_templates(beat, cfg)
|
||||
if not templates:
|
||||
return estimated_in_point_s, 0.0, 0.0, 0.0
|
||||
|
||||
with open_video(cfg.paths.source_movie) as cap:
|
||||
fps = float(cap.get(cv2.CAP_PROP_FPS)) or cfg.export.edl_frame_rate
|
||||
frame_step_s = 1.0 / fps
|
||||
window_s = (
|
||||
search_window_s
|
||||
if search_window_s is not None
|
||||
else cfg.cv.deep_scan.content_align_window_seconds
|
||||
)
|
||||
start_s = max(0.0, estimated_in_point_s - window_s)
|
||||
end_s = estimated_in_point_s + window_s
|
||||
tie_delta = cfg.cv.deep_scan.start_tie_break_score_delta
|
||||
|
||||
best_in = estimated_in_point_s
|
||||
best_score = -1.0
|
||||
best_content = -1.0
|
||||
best_motion = -1.0
|
||||
t = start_s
|
||||
while t <= end_s:
|
||||
content_score = _content_alignment_score(cap, t, templates, cfg)
|
||||
motion_score = (
|
||||
_motion_phase_score(cap, t, motion_templates, cfg)
|
||||
if len(motion_templates) >= 2
|
||||
else content_score
|
||||
)
|
||||
if content_score < 0 or motion_score < 0:
|
||||
t = round(t + frame_step_s, 6)
|
||||
continue
|
||||
raw_score = content_score * 0.64 + motion_score * 0.36
|
||||
anchor_penalty = min(0.18, abs(t - estimated_in_point_s) * 0.05)
|
||||
score = raw_score - anchor_penalty
|
||||
if score > best_score + tie_delta:
|
||||
best_score = score
|
||||
best_in = t
|
||||
best_content = content_score
|
||||
best_motion = motion_score
|
||||
elif score >= best_score - tie_delta:
|
||||
current_distance = abs(t - estimated_in_point_s)
|
||||
best_distance = abs(best_in - estimated_in_point_s)
|
||||
if current_distance < best_distance or (
|
||||
abs(current_distance - best_distance) <= frame_step_s * 0.5
|
||||
and t < best_in
|
||||
):
|
||||
best_in = t
|
||||
best_content = content_score
|
||||
best_motion = motion_score
|
||||
t = round(t + frame_step_s, 6)
|
||||
|
||||
return best_in, max(0.0, best_score), max(0.0, best_content), max(0.0, best_motion)
|
||||
|
||||
|
||||
def estimate_usable_source_duration(
|
||||
beat: TrailerBeat,
|
||||
in_point_s: float,
|
||||
|
||||
Reference in New Issue
Block a user