Handle fade-led segment phase ties

2026-05-09 10:11:36 +02:00
parent c08ba97d37
commit f63d65fcd2
5 changed files with 65 additions and 20 deletions
@@ -1743,27 +1743,68 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
        return frame if ok else None

    trailer_cap = cv2.VideoCapture(str(cfg.paths.reference_trailer))
-    refs = [
-        prepared_gray(frame_at(trailer_cap, segment_beat.start_s + offset))
-        for offset in offsets
-        if offset <= segment_beat.duration_s + 0.04
-    ]
-    refs = [ref for ref in refs if ref is not None]
-    if len(refs) < 4:
+    ref_candidates = []
+    fallback_items = []
+    for offset in offsets:
+        if offset > segment_beat.duration_s + 0.04:
+            continue
+        frame = frame_at(trailer_cap, segment_beat.start_s + offset)
+        ref = prepared_gray(frame)
+        if ref is None:
+            continue
+        fallback_items.append((offset, ref))
+        raw_gray = cv2.resize(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY), size)
+        h, w = raw_gray.shape[:2]
+        raw_gray[: int(h * 0.16), : int(w * 0.32)] = 0
+        roi = raw_gray[int(h * 0.12) : int(h * 0.90), :]
+        mean_luma = float(roi.mean() / 255.0)
+        p90_luma = float(np.percentile(roi, 90) / 255.0)
+        contrast = float(roi.std() / 255.0)
+        ref_candidates.append((offset, ref, mean_luma, p90_luma, contrast))
+
+    transition_start = False
+    ref_items = []
+    if ref_candidates:
+        max_mean = max(item[2] for item in ref_candidates)
+        max_p90 = max(item[3] for item in ref_candidates)
+        transition_start = (
+            ref_candidates[0][2] < max_mean * 0.90
+            or ref_candidates[0][3] < max_p90 * 0.90
+        )
+        ref_items = [
+            (offset, ref)
+            for offset, ref, mean_luma, p90_luma, contrast in ref_candidates
+            if (
+                mean_luma >= max(0.16, max_mean * 0.82)
+                and p90_luma >= max(0.28, max_p90 * 0.86)
+                and contrast >= 0.035
+            )
+        ]
+    if len(ref_items) < 4:
+        ref_items = fallback_items
+    if len(ref_items) < 4:
        return None
+    ref_offsets = [item[0] for item in ref_items]
+    refs = [item[1] for item in ref_items]
+
+    align_offset = ref_offsets[0]
+    ref_offsets = [offset - align_offset for offset in ref_offsets]

    ref_stack = np.stack(refs, axis=0)
    edge_stack = np.stack([edge(ref) for ref in refs], axis=0)
-    saliency = ref_stack.std(axis=0) * 1.25 + edge_stack.mean(axis=0) * 0.75
+    # Static window/room edges are useful for finding the scene, but toxic for
+    # phase retuning inside a repeated dialogue shot. Bias the mask toward
+    # areas that actually change across the reference segment.
+    saliency = ref_stack.std(axis=0) * 3.0 + edge_stack.std(axis=0) * 0.75 + edge_stack.mean(axis=0) * 0.15
    saliency[:, : int(size[0] * 0.12)] *= 0.15
    saliency[: int(size[1] * 0.16), : int(size[0] * 0.32)] = 0.0
-    threshold = np.quantile(saliency, 0.72)
+    threshold = np.quantile(saliency, 0.66)
    mask = (saliency >= threshold).astype("float32")
    mask /= mask.sum() + 1e-6

    scene_start = float(scene["start_s"])
    scene_end = float(scene["end_s"])
-    scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s))
+    scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s - align_offset))
    max_points = 400
    step_s = max(0.08, (scan_end - scene_start) / max_points)

@@ -1789,7 +1830,7 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
        if t > scan_end:
            break
        vals = []
-        for offset, ref in zip(offsets, refs):
+        for offset, ref in zip(ref_offsets, refs):
            j = int(round((t + offset - scene_start) / step_s))
            if 0 <= j < len(source_frames):
                score = pair_score(ref, source_frames[j], mask)
@@ -1806,9 +1847,13 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa

    candidates.sort(reverse=True)
    best_score = candidates[0][0]
-    near_tie = [c for c in candidates if c[0] >= best_score - 0.002]
-    chosen = min(near_tie, key=lambda c: abs(c[2] - original_in_s))
-    return chosen[2], chosen[0]
+    tie_window = 0.014 if transition_start else 0.002
+    near_tie = [c for c in candidates if c[0] >= best_score - tie_window]
+    if transition_start:
+        chosen = max(near_tie, key=lambda c: c[2])
+    else:
+        chosen = min(near_tie, key=lambda c: abs((c[2] - align_offset) - original_in_s))
+    return max(scene_start, chosen[2] - align_offset), chosen[0]


 def cmd_match(args: argparse.Namespace, cfg) -> list: