Handle fade-led segment phase ties
This commit is contained in:
+1
-1
File diff suppressed because one or more lines are too long
+5
-5
File diff suppressed because one or more lines are too long
@@ -1743,27 +1743,68 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
|
|||||||
return frame if ok else None
|
return frame if ok else None
|
||||||
|
|
||||||
trailer_cap = cv2.VideoCapture(str(cfg.paths.reference_trailer))
|
trailer_cap = cv2.VideoCapture(str(cfg.paths.reference_trailer))
|
||||||
refs = [
|
ref_candidates = []
|
||||||
prepared_gray(frame_at(trailer_cap, segment_beat.start_s + offset))
|
fallback_items = []
|
||||||
for offset in offsets
|
for offset in offsets:
|
||||||
if offset <= segment_beat.duration_s + 0.04
|
if offset > segment_beat.duration_s + 0.04:
|
||||||
|
continue
|
||||||
|
frame = frame_at(trailer_cap, segment_beat.start_s + offset)
|
||||||
|
ref = prepared_gray(frame)
|
||||||
|
if ref is None:
|
||||||
|
continue
|
||||||
|
fallback_items.append((offset, ref))
|
||||||
|
raw_gray = cv2.resize(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY), size)
|
||||||
|
h, w = raw_gray.shape[:2]
|
||||||
|
raw_gray[: int(h * 0.16), : int(w * 0.32)] = 0
|
||||||
|
roi = raw_gray[int(h * 0.12) : int(h * 0.90), :]
|
||||||
|
mean_luma = float(roi.mean() / 255.0)
|
||||||
|
p90_luma = float(np.percentile(roi, 90) / 255.0)
|
||||||
|
contrast = float(roi.std() / 255.0)
|
||||||
|
ref_candidates.append((offset, ref, mean_luma, p90_luma, contrast))
|
||||||
|
|
||||||
|
transition_start = False
|
||||||
|
ref_items = []
|
||||||
|
if ref_candidates:
|
||||||
|
max_mean = max(item[2] for item in ref_candidates)
|
||||||
|
max_p90 = max(item[3] for item in ref_candidates)
|
||||||
|
transition_start = (
|
||||||
|
ref_candidates[0][2] < max_mean * 0.90
|
||||||
|
or ref_candidates[0][3] < max_p90 * 0.90
|
||||||
|
)
|
||||||
|
ref_items = [
|
||||||
|
(offset, ref)
|
||||||
|
for offset, ref, mean_luma, p90_luma, contrast in ref_candidates
|
||||||
|
if (
|
||||||
|
mean_luma >= max(0.16, max_mean * 0.82)
|
||||||
|
and p90_luma >= max(0.28, max_p90 * 0.86)
|
||||||
|
and contrast >= 0.035
|
||||||
|
)
|
||||||
]
|
]
|
||||||
refs = [ref for ref in refs if ref is not None]
|
if len(ref_items) < 4:
|
||||||
if len(refs) < 4:
|
ref_items = fallback_items
|
||||||
|
if len(ref_items) < 4:
|
||||||
return None
|
return None
|
||||||
|
ref_offsets = [item[0] for item in ref_items]
|
||||||
|
refs = [item[1] for item in ref_items]
|
||||||
|
|
||||||
|
align_offset = ref_offsets[0]
|
||||||
|
ref_offsets = [offset - align_offset for offset in ref_offsets]
|
||||||
|
|
||||||
ref_stack = np.stack(refs, axis=0)
|
ref_stack = np.stack(refs, axis=0)
|
||||||
edge_stack = np.stack([edge(ref) for ref in refs], axis=0)
|
edge_stack = np.stack([edge(ref) for ref in refs], axis=0)
|
||||||
saliency = ref_stack.std(axis=0) * 1.25 + edge_stack.mean(axis=0) * 0.75
|
# Static window/room edges are useful for finding the scene, but toxic for
|
||||||
|
# phase retuning inside a repeated dialogue shot. Bias the mask toward
|
||||||
|
# areas that actually change across the reference segment.
|
||||||
|
saliency = ref_stack.std(axis=0) * 3.0 + edge_stack.std(axis=0) * 0.75 + edge_stack.mean(axis=0) * 0.15
|
||||||
saliency[:, : int(size[0] * 0.12)] *= 0.15
|
saliency[:, : int(size[0] * 0.12)] *= 0.15
|
||||||
saliency[: int(size[1] * 0.16), : int(size[0] * 0.32)] = 0.0
|
saliency[: int(size[1] * 0.16), : int(size[0] * 0.32)] = 0.0
|
||||||
threshold = np.quantile(saliency, 0.72)
|
threshold = np.quantile(saliency, 0.66)
|
||||||
mask = (saliency >= threshold).astype("float32")
|
mask = (saliency >= threshold).astype("float32")
|
||||||
mask /= mask.sum() + 1e-6
|
mask /= mask.sum() + 1e-6
|
||||||
|
|
||||||
scene_start = float(scene["start_s"])
|
scene_start = float(scene["start_s"])
|
||||||
scene_end = float(scene["end_s"])
|
scene_end = float(scene["end_s"])
|
||||||
scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s))
|
scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s - align_offset))
|
||||||
max_points = 400
|
max_points = 400
|
||||||
step_s = max(0.08, (scan_end - scene_start) / max_points)
|
step_s = max(0.08, (scan_end - scene_start) / max_points)
|
||||||
|
|
||||||
@@ -1789,7 +1830,7 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
|
|||||||
if t > scan_end:
|
if t > scan_end:
|
||||||
break
|
break
|
||||||
vals = []
|
vals = []
|
||||||
for offset, ref in zip(offsets, refs):
|
for offset, ref in zip(ref_offsets, refs):
|
||||||
j = int(round((t + offset - scene_start) / step_s))
|
j = int(round((t + offset - scene_start) / step_s))
|
||||||
if 0 <= j < len(source_frames):
|
if 0 <= j < len(source_frames):
|
||||||
score = pair_score(ref, source_frames[j], mask)
|
score = pair_score(ref, source_frames[j], mask)
|
||||||
@@ -1806,9 +1847,13 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
|
|||||||
|
|
||||||
candidates.sort(reverse=True)
|
candidates.sort(reverse=True)
|
||||||
best_score = candidates[0][0]
|
best_score = candidates[0][0]
|
||||||
near_tie = [c for c in candidates if c[0] >= best_score - 0.002]
|
tie_window = 0.014 if transition_start else 0.002
|
||||||
chosen = min(near_tie, key=lambda c: abs(c[2] - original_in_s))
|
near_tie = [c for c in candidates if c[0] >= best_score - tie_window]
|
||||||
return chosen[2], chosen[0]
|
if transition_start:
|
||||||
|
chosen = max(near_tie, key=lambda c: c[2])
|
||||||
|
else:
|
||||||
|
chosen = min(near_tie, key=lambda c: abs((c[2] - align_offset) - original_in_s))
|
||||||
|
return max(scene_start, chosen[2] - align_offset), chosen[0]
|
||||||
|
|
||||||
|
|
||||||
def cmd_match(args: argparse.Namespace, cfg) -> list:
|
def cmd_match(args: argparse.Namespace, cfg) -> list:
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user