Improve local phase retuning

This commit is contained in:
Melbar
2026-05-09 12:35:33 +02:00
parent 71117a8a3b
commit ae3c2b1b13
4 changed files with 52 additions and 10 deletions
+40 -9
View File
@@ -1912,15 +1912,19 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
scene_start = float(scene["start_s"])
scene_end = float(scene["end_s"])
scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s - align_offset))
center_t = max(scene_start, min(scene_end, original_in_s + align_offset))
retune_radius_s = max(4.0, min(12.0, segment_beat.duration_s * 2.5))
scan_start = max(scene_start, center_t - retune_radius_s)
scene_scan_end = min(scene_end, center_t + retune_radius_s)
scan_end = max(scan_start, scene_scan_end - max(0.04, segment_beat.duration_s - align_offset))
max_points = 400
step_s = max(0.08, (scan_end - scene_start) / max_points)
step_s = max(0.04, (scan_end - scan_start) / max_points)
source_cap = cv2.VideoCapture(str(cfg.paths.source_movie))
source_fps = source_cap.get(cv2.CAP_PROP_FPS) or _scene_fps_light(scene, cfg)
stride = max(1, int(round(step_s * source_fps)))
start_frame = max(0, int(round(scene_start * source_fps)))
end_frame = max(start_frame, int(round(scene_end * source_fps)))
start_frame = max(0, int(round(scan_start * source_fps)))
end_frame = max(start_frame, int(round(scene_scan_end * source_fps)))
times: list[float] = []
source_frames: list = []
frame_idx = start_frame
@@ -1932,33 +1936,60 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
times.append(frame_idx / source_fps)
source_frames.append(prepared_gray(frame))
frame_idx += stride
base_time = times[0] if times else scan_start
candidates: list[tuple[float, float, float]] = []
for i, t in enumerate(times):
if t > scan_end:
break
vals = []
src_for_offsets = []
for offset, ref in zip(ref_offsets, refs):
j = int(round((t + offset - scene_start) / step_s))
j = int(round((t + offset - base_time) / step_s))
if 0 <= j < len(source_frames):
score = pair_score(ref, source_frames[j], mask)
src = source_frames[j]
score = pair_score(ref, src, mask)
else:
src = None
score = None
if score is not None:
vals.append(score)
src_for_offsets.append(src)
if len(vals) >= 4:
avg_score = sum(vals) / len(vals)
candidates.append((0.55 * avg_score + 0.45 * min(vals), min(vals), t))
early_count = min(2, len(vals))
tail_count = min(2, len(vals))
early_score = sum(vals[:early_count]) / early_count
tail_score = sum(vals[-tail_count:]) / tail_count
motion_vals = []
for idx in range(1, min(len(refs), len(src_for_offsets))):
if src_for_offsets[idx - 1] is None or src_for_offsets[idx] is None:
continue
ref_motion = refs[idx] - refs[idx - 1]
src_motion = src_for_offsets[idx] - src_for_offsets[idx - 1]
motion_vals.append(1.0 - float((np.abs(ref_motion - src_motion) * mask).sum()))
motion_score = sum(motion_vals) / len(motion_vals) if motion_vals else avg_score
# Phase retuning must reject "same shot, wrong moment" matches.
# A plain average can hide a bad onset inside slow dialogue shots;
# keep the low-water mark, onset, and frame-to-frame motion influential.
phase_score = (
0.26 * avg_score
+ 0.24 * min(vals)
+ 0.24 * early_score
+ 0.08 * tail_score
+ 0.18 * motion_score
)
candidates.append((phase_score, min(vals), t))
if not candidates:
return None
candidates.sort(reverse=True)
best_score = candidates[0][0]
tie_window = 0.014 if transition_start else 0.002
tie_window = 0.006 if transition_start else 0.002
near_tie = [c for c in candidates if c[0] >= best_score - tie_window]
if transition_start:
chosen = max(near_tie, key=lambda c: c[2])
chosen = max(near_tie, key=lambda c: (c[1], c[0]))
else:
chosen = min(near_tie, key=lambda c: abs((c[2] - align_offset) - original_in_s))
return max(scene_start, chosen[2] - align_offset), chosen[0]