Handle fade-led segment phase ties
This commit is contained in:
+1
-1
File diff suppressed because one or more lines are too long
+5
-5
File diff suppressed because one or more lines are too long
@@ -1743,27 +1743,68 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
|
||||
return frame if ok else None
|
||||
|
||||
trailer_cap = cv2.VideoCapture(str(cfg.paths.reference_trailer))
|
||||
refs = [
|
||||
prepared_gray(frame_at(trailer_cap, segment_beat.start_s + offset))
|
||||
for offset in offsets
|
||||
if offset <= segment_beat.duration_s + 0.04
|
||||
]
|
||||
refs = [ref for ref in refs if ref is not None]
|
||||
if len(refs) < 4:
|
||||
ref_candidates = []
|
||||
fallback_items = []
|
||||
for offset in offsets:
|
||||
if offset > segment_beat.duration_s + 0.04:
|
||||
continue
|
||||
frame = frame_at(trailer_cap, segment_beat.start_s + offset)
|
||||
ref = prepared_gray(frame)
|
||||
if ref is None:
|
||||
continue
|
||||
fallback_items.append((offset, ref))
|
||||
raw_gray = cv2.resize(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY), size)
|
||||
h, w = raw_gray.shape[:2]
|
||||
raw_gray[: int(h * 0.16), : int(w * 0.32)] = 0
|
||||
roi = raw_gray[int(h * 0.12) : int(h * 0.90), :]
|
||||
mean_luma = float(roi.mean() / 255.0)
|
||||
p90_luma = float(np.percentile(roi, 90) / 255.0)
|
||||
contrast = float(roi.std() / 255.0)
|
||||
ref_candidates.append((offset, ref, mean_luma, p90_luma, contrast))
|
||||
|
||||
transition_start = False
|
||||
ref_items = []
|
||||
if ref_candidates:
|
||||
max_mean = max(item[2] for item in ref_candidates)
|
||||
max_p90 = max(item[3] for item in ref_candidates)
|
||||
transition_start = (
|
||||
ref_candidates[0][2] < max_mean * 0.90
|
||||
or ref_candidates[0][3] < max_p90 * 0.90
|
||||
)
|
||||
ref_items = [
|
||||
(offset, ref)
|
||||
for offset, ref, mean_luma, p90_luma, contrast in ref_candidates
|
||||
if (
|
||||
mean_luma >= max(0.16, max_mean * 0.82)
|
||||
and p90_luma >= max(0.28, max_p90 * 0.86)
|
||||
and contrast >= 0.035
|
||||
)
|
||||
]
|
||||
if len(ref_items) < 4:
|
||||
ref_items = fallback_items
|
||||
if len(ref_items) < 4:
|
||||
return None
|
||||
ref_offsets = [item[0] for item in ref_items]
|
||||
refs = [item[1] for item in ref_items]
|
||||
|
||||
align_offset = ref_offsets[0]
|
||||
ref_offsets = [offset - align_offset for offset in ref_offsets]
|
||||
|
||||
ref_stack = np.stack(refs, axis=0)
|
||||
edge_stack = np.stack([edge(ref) for ref in refs], axis=0)
|
||||
saliency = ref_stack.std(axis=0) * 1.25 + edge_stack.mean(axis=0) * 0.75
|
||||
# Static window/room edges are useful for finding the scene, but toxic for
|
||||
# phase retuning inside a repeated dialogue shot. Bias the mask toward
|
||||
# areas that actually change across the reference segment.
|
||||
saliency = ref_stack.std(axis=0) * 3.0 + edge_stack.std(axis=0) * 0.75 + edge_stack.mean(axis=0) * 0.15
|
||||
saliency[:, : int(size[0] * 0.12)] *= 0.15
|
||||
saliency[: int(size[1] * 0.16), : int(size[0] * 0.32)] = 0.0
|
||||
threshold = np.quantile(saliency, 0.72)
|
||||
threshold = np.quantile(saliency, 0.66)
|
||||
mask = (saliency >= threshold).astype("float32")
|
||||
mask /= mask.sum() + 1e-6
|
||||
|
||||
scene_start = float(scene["start_s"])
|
||||
scene_end = float(scene["end_s"])
|
||||
scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s))
|
||||
scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s - align_offset))
|
||||
max_points = 400
|
||||
step_s = max(0.08, (scan_end - scene_start) / max_points)
|
||||
|
||||
@@ -1789,7 +1830,7 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
|
||||
if t > scan_end:
|
||||
break
|
||||
vals = []
|
||||
for offset, ref in zip(offsets, refs):
|
||||
for offset, ref in zip(ref_offsets, refs):
|
||||
j = int(round((t + offset - scene_start) / step_s))
|
||||
if 0 <= j < len(source_frames):
|
||||
score = pair_score(ref, source_frames[j], mask)
|
||||
@@ -1806,9 +1847,13 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
|
||||
|
||||
candidates.sort(reverse=True)
|
||||
best_score = candidates[0][0]
|
||||
near_tie = [c for c in candidates if c[0] >= best_score - 0.002]
|
||||
chosen = min(near_tie, key=lambda c: abs(c[2] - original_in_s))
|
||||
return chosen[2], chosen[0]
|
||||
tie_window = 0.014 if transition_start else 0.002
|
||||
near_tie = [c for c in candidates if c[0] >= best_score - tie_window]
|
||||
if transition_start:
|
||||
chosen = max(near_tie, key=lambda c: c[2])
|
||||
else:
|
||||
chosen = min(near_tie, key=lambda c: abs((c[2] - align_offset) - original_in_s))
|
||||
return max(scene_start, chosen[2] - align_offset), chosen[0]
|
||||
|
||||
|
||||
def cmd_match(args: argparse.Namespace, cfg) -> list:
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user