Handle fade-led segment phase ties

2026-05-09 10:11:36 +02:00
parent c08ba97d37
commit f63d65fcd2
5 changed files with 65 additions and 20 deletions
@@ -1743,27 +1743,68 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
        return frame if ok else None
    trailer_cap = cv2.VideoCapture(str(cfg.paths.reference_trailer))
-    refs = [
+    ref_candidates = []
-        prepared_gray(frame_at(trailer_cap, segment_beat.start_s + offset))
+    fallback_items = []
-        for offset in offsets
+    for offset in offsets:
-        if offset <= segment_beat.duration_s + 0.04
+        if offset > segment_beat.duration_s + 0.04:
            continue
        frame = frame_at(trailer_cap, segment_beat.start_s + offset)
        ref = prepared_gray(frame)
        if ref is None:
            continue
        fallback_items.append((offset, ref))
        raw_gray = cv2.resize(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY), size)
        h, w = raw_gray.shape[:2]
        raw_gray[: int(h * 0.16), : int(w * 0.32)] = 0
        roi = raw_gray[int(h * 0.12) : int(h * 0.90), :]
        mean_luma = float(roi.mean() / 255.0)
        p90_luma = float(np.percentile(roi, 90) / 255.0)
        contrast = float(roi.std() / 255.0)
        ref_candidates.append((offset, ref, mean_luma, p90_luma, contrast))
    transition_start = False
    ref_items = []
    if ref_candidates:
        max_mean = max(item[2] for item in ref_candidates)
        max_p90 = max(item[3] for item in ref_candidates)
        transition_start = (
            ref_candidates[0][2] < max_mean * 0.90
            or ref_candidates[0][3] < max_p90 * 0.90
        )
        ref_items = [
            (offset, ref)
            for offset, ref, mean_luma, p90_luma, contrast in ref_candidates
            if (
                mean_luma >= max(0.16, max_mean * 0.82)
                and p90_luma >= max(0.28, max_p90 * 0.86)
                and contrast >= 0.035
            )
        ]
-    refs = [ref for ref in refs if ref is not None]
+    if len(ref_items) < 4:
-    if len(refs) < 4:
+        ref_items = fallback_items
    if len(ref_items) < 4:
        return None
    ref_offsets = [item[0] for item in ref_items]
    refs = [item[1] for item in ref_items]
    align_offset = ref_offsets[0]
    ref_offsets = [offset - align_offset for offset in ref_offsets]
    ref_stack = np.stack(refs, axis=0)
    edge_stack = np.stack([edge(ref) for ref in refs], axis=0)
-    saliency = ref_stack.std(axis=0) * 1.25 + edge_stack.mean(axis=0) * 0.75
+    # Static window/room edges are useful for finding the scene, but toxic for
    # phase retuning inside a repeated dialogue shot. Bias the mask toward
    # areas that actually change across the reference segment.
    saliency = ref_stack.std(axis=0) * 3.0 + edge_stack.std(axis=0) * 0.75 + edge_stack.mean(axis=0) * 0.15
    saliency[:, : int(size[0] * 0.12)] *= 0.15
    saliency[: int(size[1] * 0.16), : int(size[0] * 0.32)] = 0.0
-    threshold = np.quantile(saliency, 0.72)
+    threshold = np.quantile(saliency, 0.66)
    mask = (saliency >= threshold).astype("float32")
    mask /= mask.sum() + 1e-6
    scene_start = float(scene["start_s"])
    scene_end = float(scene["end_s"])
-    scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s))
+    scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s - align_offset))
    max_points = 400
    step_s = max(0.08, (scan_end - scene_start) / max_points)
@@ -1789,7 +1830,7 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
        if t > scan_end:
            break
        vals = []
-        for offset, ref in zip(offsets, refs):
+        for offset, ref in zip(ref_offsets, refs):
            j = int(round((t + offset - scene_start) / step_s))
            if 0 <= j < len(source_frames):
                score = pair_score(ref, source_frames[j], mask)
@@ -1806,9 +1847,13 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
    candidates.sort(reverse=True)
    best_score = candidates[0][0]
-    near_tie = [c for c in candidates if c[0] >= best_score - 0.002]
+    tie_window = 0.014 if transition_start else 0.002
-    chosen = min(near_tie, key=lambda c: abs(c[2] - original_in_s))
+    near_tie = [c for c in candidates if c[0] >= best_score - tie_window]
-    return chosen[2], chosen[0]
+    if transition_start:
        chosen = max(near_tie, key=lambda c: c[2])
    else:
        chosen = min(near_tie, key=lambda c: abs((c[2] - align_offset) - original_in_s))
    return max(scene_start, chosen[2] - align_offset), chosen[0]
 def cmd_match(args: argparse.Namespace, cfg) -> list: