Improve local phase retuning

2026-05-09 12:35:33 +02:00
parent 71117a8a3b
commit ae3c2b1b13
4 changed files with 52 additions and 10 deletions
@@ -1912,15 +1912,19 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa

    scene_start = float(scene["start_s"])
    scene_end = float(scene["end_s"])
-    scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s - align_offset))
+    center_t = max(scene_start, min(scene_end, original_in_s + align_offset))
+    retune_radius_s = max(4.0, min(12.0, segment_beat.duration_s * 2.5))
+    scan_start = max(scene_start, center_t - retune_radius_s)
+    scene_scan_end = min(scene_end, center_t + retune_radius_s)
+    scan_end = max(scan_start, scene_scan_end - max(0.04, segment_beat.duration_s - align_offset))
    max_points = 400
-    step_s = max(0.08, (scan_end - scene_start) / max_points)
+    step_s = max(0.04, (scan_end - scan_start) / max_points)

    source_cap = cv2.VideoCapture(str(cfg.paths.source_movie))
    source_fps = source_cap.get(cv2.CAP_PROP_FPS) or _scene_fps_light(scene, cfg)
    stride = max(1, int(round(step_s * source_fps)))
-    start_frame = max(0, int(round(scene_start * source_fps)))
-    end_frame = max(start_frame, int(round(scene_end * source_fps)))
+    start_frame = max(0, int(round(scan_start * source_fps)))
+    end_frame = max(start_frame, int(round(scene_scan_end * source_fps)))
    times: list[float] = []
    source_frames: list = []
    frame_idx = start_frame
@@ -1932,33 +1936,60 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
        times.append(frame_idx / source_fps)
        source_frames.append(prepared_gray(frame))
        frame_idx += stride
+    base_time = times[0] if times else scan_start

    candidates: list[tuple[float, float, float]] = []
    for i, t in enumerate(times):
        if t > scan_end:
            break
        vals = []
+        src_for_offsets = []
        for offset, ref in zip(ref_offsets, refs):
-            j = int(round((t + offset - scene_start) / step_s))
+            j = int(round((t + offset - base_time) / step_s))
            if 0 <= j < len(source_frames):
-                score = pair_score(ref, source_frames[j], mask)
+                src = source_frames[j]
+                score = pair_score(ref, src, mask)
            else:
+                src = None
                score = None
            if score is not None:
                vals.append(score)
+                src_for_offsets.append(src)
        if len(vals) >= 4:
            avg_score = sum(vals) / len(vals)
-            candidates.append((0.55 * avg_score + 0.45 * min(vals), min(vals), t))
+            early_count = min(2, len(vals))
+            tail_count = min(2, len(vals))
+            early_score = sum(vals[:early_count]) / early_count
+            tail_score = sum(vals[-tail_count:]) / tail_count
+            motion_vals = []
+            for idx in range(1, min(len(refs), len(src_for_offsets))):
+                if src_for_offsets[idx - 1] is None or src_for_offsets[idx] is None:
+                    continue
+                ref_motion = refs[idx] - refs[idx - 1]
+                src_motion = src_for_offsets[idx] - src_for_offsets[idx - 1]
+                motion_vals.append(1.0 - float((np.abs(ref_motion - src_motion) * mask).sum()))
+            motion_score = sum(motion_vals) / len(motion_vals) if motion_vals else avg_score
+            # Phase retuning must reject "same shot, wrong moment" matches.
+            # A plain average can hide a bad onset inside slow dialogue shots;
+            # keep the low-water mark, onset, and frame-to-frame motion influential.
+            phase_score = (
+                0.26 * avg_score
+                + 0.24 * min(vals)
+                + 0.24 * early_score
+                + 0.08 * tail_score
+                + 0.18 * motion_score
+            )
+            candidates.append((phase_score, min(vals), t))

    if not candidates:
        return None

    candidates.sort(reverse=True)
    best_score = candidates[0][0]
-    tie_window = 0.014 if transition_start else 0.002
+    tie_window = 0.006 if transition_start else 0.002
    near_tie = [c for c in candidates if c[0] >= best_score - tie_window]
    if transition_start:
-        chosen = max(near_tie, key=lambda c: c[2])
+        chosen = max(near_tie, key=lambda c: (c[1], c[0]))
    else:
        chosen = min(near_tie, key=lambda c: abs((c[2] - align_offset) - original_in_s))
    return max(scene_start, chosen[2] - align_offset), chosen[0]