Improve local phase retuning
This commit is contained in:
@@ -1912,15 +1912,19 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
|
||||
|
||||
scene_start = float(scene["start_s"])
|
||||
scene_end = float(scene["end_s"])
|
||||
scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s - align_offset))
|
||||
center_t = max(scene_start, min(scene_end, original_in_s + align_offset))
|
||||
retune_radius_s = max(4.0, min(12.0, segment_beat.duration_s * 2.5))
|
||||
scan_start = max(scene_start, center_t - retune_radius_s)
|
||||
scene_scan_end = min(scene_end, center_t + retune_radius_s)
|
||||
scan_end = max(scan_start, scene_scan_end - max(0.04, segment_beat.duration_s - align_offset))
|
||||
max_points = 400
|
||||
step_s = max(0.08, (scan_end - scene_start) / max_points)
|
||||
step_s = max(0.04, (scan_end - scan_start) / max_points)
|
||||
|
||||
source_cap = cv2.VideoCapture(str(cfg.paths.source_movie))
|
||||
source_fps = source_cap.get(cv2.CAP_PROP_FPS) or _scene_fps_light(scene, cfg)
|
||||
stride = max(1, int(round(step_s * source_fps)))
|
||||
start_frame = max(0, int(round(scene_start * source_fps)))
|
||||
end_frame = max(start_frame, int(round(scene_end * source_fps)))
|
||||
start_frame = max(0, int(round(scan_start * source_fps)))
|
||||
end_frame = max(start_frame, int(round(scene_scan_end * source_fps)))
|
||||
times: list[float] = []
|
||||
source_frames: list = []
|
||||
frame_idx = start_frame
|
||||
@@ -1932,33 +1936,60 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
|
||||
times.append(frame_idx / source_fps)
|
||||
source_frames.append(prepared_gray(frame))
|
||||
frame_idx += stride
|
||||
base_time = times[0] if times else scan_start
|
||||
|
||||
candidates: list[tuple[float, float, float]] = []
|
||||
for i, t in enumerate(times):
|
||||
if t > scan_end:
|
||||
break
|
||||
vals = []
|
||||
src_for_offsets = []
|
||||
for offset, ref in zip(ref_offsets, refs):
|
||||
j = int(round((t + offset - scene_start) / step_s))
|
||||
j = int(round((t + offset - base_time) / step_s))
|
||||
if 0 <= j < len(source_frames):
|
||||
score = pair_score(ref, source_frames[j], mask)
|
||||
src = source_frames[j]
|
||||
score = pair_score(ref, src, mask)
|
||||
else:
|
||||
src = None
|
||||
score = None
|
||||
if score is not None:
|
||||
vals.append(score)
|
||||
src_for_offsets.append(src)
|
||||
if len(vals) >= 4:
|
||||
avg_score = sum(vals) / len(vals)
|
||||
candidates.append((0.55 * avg_score + 0.45 * min(vals), min(vals), t))
|
||||
early_count = min(2, len(vals))
|
||||
tail_count = min(2, len(vals))
|
||||
early_score = sum(vals[:early_count]) / early_count
|
||||
tail_score = sum(vals[-tail_count:]) / tail_count
|
||||
motion_vals = []
|
||||
for idx in range(1, min(len(refs), len(src_for_offsets))):
|
||||
if src_for_offsets[idx - 1] is None or src_for_offsets[idx] is None:
|
||||
continue
|
||||
ref_motion = refs[idx] - refs[idx - 1]
|
||||
src_motion = src_for_offsets[idx] - src_for_offsets[idx - 1]
|
||||
motion_vals.append(1.0 - float((np.abs(ref_motion - src_motion) * mask).sum()))
|
||||
motion_score = sum(motion_vals) / len(motion_vals) if motion_vals else avg_score
|
||||
# Phase retuning must reject "same shot, wrong moment" matches.
|
||||
# A plain average can hide a bad onset inside slow dialogue shots;
|
||||
# keep the low-water mark, onset, and frame-to-frame motion influential.
|
||||
phase_score = (
|
||||
0.26 * avg_score
|
||||
+ 0.24 * min(vals)
|
||||
+ 0.24 * early_score
|
||||
+ 0.08 * tail_score
|
||||
+ 0.18 * motion_score
|
||||
)
|
||||
candidates.append((phase_score, min(vals), t))
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
candidates.sort(reverse=True)
|
||||
best_score = candidates[0][0]
|
||||
tie_window = 0.014 if transition_start else 0.002
|
||||
tie_window = 0.006 if transition_start else 0.002
|
||||
near_tie = [c for c in candidates if c[0] >= best_score - tie_window]
|
||||
if transition_start:
|
||||
chosen = max(near_tie, key=lambda c: c[2])
|
||||
chosen = max(near_tie, key=lambda c: (c[1], c[0]))
|
||||
else:
|
||||
chosen = min(near_tie, key=lambda c: abs((c[2] - align_offset) - original_in_s))
|
||||
return max(scene_start, chosen[2] - align_offset), chosen[0]
|
||||
|
||||
Reference in New Issue
Block a user