Handle fade-led segment phase ties

This commit is contained in:
Melbar
2026-05-09 10:11:36 +02:00
parent c08ba97d37
commit f63d65fcd2
5 changed files with 65 additions and 20 deletions
+1 -1
View File
File diff suppressed because one or more lines are too long
+5 -5
View File
File diff suppressed because one or more lines are too long
+59 -14
View File
@@ -1743,27 +1743,68 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
return frame if ok else None
trailer_cap = cv2.VideoCapture(str(cfg.paths.reference_trailer))
refs = [
prepared_gray(frame_at(trailer_cap, segment_beat.start_s + offset))
for offset in offsets
if offset <= segment_beat.duration_s + 0.04
]
refs = [ref for ref in refs if ref is not None]
if len(refs) < 4:
ref_candidates = []
fallback_items = []
for offset in offsets:
if offset > segment_beat.duration_s + 0.04:
continue
frame = frame_at(trailer_cap, segment_beat.start_s + offset)
ref = prepared_gray(frame)
if ref is None:
continue
fallback_items.append((offset, ref))
raw_gray = cv2.resize(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY), size)
h, w = raw_gray.shape[:2]
raw_gray[: int(h * 0.16), : int(w * 0.32)] = 0
roi = raw_gray[int(h * 0.12) : int(h * 0.90), :]
mean_luma = float(roi.mean() / 255.0)
p90_luma = float(np.percentile(roi, 90) / 255.0)
contrast = float(roi.std() / 255.0)
ref_candidates.append((offset, ref, mean_luma, p90_luma, contrast))
transition_start = False
ref_items = []
if ref_candidates:
max_mean = max(item[2] for item in ref_candidates)
max_p90 = max(item[3] for item in ref_candidates)
transition_start = (
ref_candidates[0][2] < max_mean * 0.90
or ref_candidates[0][3] < max_p90 * 0.90
)
ref_items = [
(offset, ref)
for offset, ref, mean_luma, p90_luma, contrast in ref_candidates
if (
mean_luma >= max(0.16, max_mean * 0.82)
and p90_luma >= max(0.28, max_p90 * 0.86)
and contrast >= 0.035
)
]
if len(ref_items) < 4:
ref_items = fallback_items
if len(ref_items) < 4:
return None
ref_offsets = [item[0] for item in ref_items]
refs = [item[1] for item in ref_items]
align_offset = ref_offsets[0]
ref_offsets = [offset - align_offset for offset in ref_offsets]
ref_stack = np.stack(refs, axis=0)
edge_stack = np.stack([edge(ref) for ref in refs], axis=0)
saliency = ref_stack.std(axis=0) * 1.25 + edge_stack.mean(axis=0) * 0.75
# Static window/room edges are useful for finding the scene, but toxic for
# phase retuning inside a repeated dialogue shot. Bias the mask toward
# areas that actually change across the reference segment.
saliency = ref_stack.std(axis=0) * 3.0 + edge_stack.std(axis=0) * 0.75 + edge_stack.mean(axis=0) * 0.15
saliency[:, : int(size[0] * 0.12)] *= 0.15
saliency[: int(size[1] * 0.16), : int(size[0] * 0.32)] = 0.0
threshold = np.quantile(saliency, 0.72)
threshold = np.quantile(saliency, 0.66)
mask = (saliency >= threshold).astype("float32")
mask /= mask.sum() + 1e-6
scene_start = float(scene["start_s"])
scene_end = float(scene["end_s"])
scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s))
scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s - align_offset))
max_points = 400
step_s = max(0.08, (scan_end - scene_start) / max_points)
@@ -1789,7 +1830,7 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
if t > scan_end:
break
vals = []
for offset, ref in zip(offsets, refs):
for offset, ref in zip(ref_offsets, refs):
j = int(round((t + offset - scene_start) / step_s))
if 0 <= j < len(source_frames):
score = pair_score(ref, source_frames[j], mask)
@@ -1806,9 +1847,13 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
candidates.sort(reverse=True)
best_score = candidates[0][0]
near_tie = [c for c in candidates if c[0] >= best_score - 0.002]
chosen = min(near_tie, key=lambda c: abs(c[2] - original_in_s))
return chosen[2], chosen[0]
tie_window = 0.014 if transition_start else 0.002
near_tie = [c for c in candidates if c[0] >= best_score - tie_window]
if transition_start:
chosen = max(near_tie, key=lambda c: c[2])
else:
chosen = min(near_tie, key=lambda c: abs((c[2] - align_offset) - original_in_s))
return max(scene_start, chosen[2] - align_offset), chosen[0]
def cmd_match(args: argparse.Namespace, cfg) -> list:
Binary file not shown.
Binary file not shown.