Fix matching regressions, cache guard, and multi-shot algorithm for beat 15

- config.toml: revert scoreable_luma/contrast thresholds to 24/58/24 (lowering
  them let cross-fade blend frames contaminate content-validation templates,
  dropping scores below provisional_content_threshold)
- src/cv/global_scan.py: _is_dark_reference_frame now requires contrast<30 so
  genuine dark silhouette frames are not rejected as scoreable; two-path
  _is_scoreable_reference_frame separates standard vs fade-content scoring
- cli.py: _keeps_cached_match() guard prevents a weaker single-span rematch
  from overwriting a better multi-segment provisional cache entry
- cli.py: _fade_content_shots() restricted to between-island gaps only—
  pre-island black leaders were incorrectly emitted as matchable shots
- cli.py: island[0] of _match_unmatched_visual_segments() now uses no
  continuity seed so an insert cut at the start of a multi-shot beat is not
  forced toward the previous beat's scene
- scripts/generate_cutter_report.py: fix ffmpeg concat demuxer on Windows—
  use part.absolute().as_posix() so paths in the concat txt are absolute and
  not double-resolved relative to the concat file's directory

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Melbar
2026-05-06 00:05:37 +02:00
parent 223789eafc
commit 54d3f04616
4 changed files with 186 additions and 26 deletions
+141 -16
View File
@@ -581,17 +581,68 @@ def _reference_scoreable_segments(beat, cfg) -> list[tuple[float, float]]:
return merged
def _fade_content_shots(beat, cfg) -> list[tuple[float, float]]:
"""Find low-luma fade regions adjacent to visible islands that still carry
describable content (e.g. a hand+knife silhouette during a cross-fade).
These regions are too dark for CV template matching but vision can read
structure during the fade — the matcher therefore treats them as their
own shots and routes them through the vision-led search path.
A fade region qualifies when, sampled inside the region, the brightest
frame has p90 ≥ 12 (not pure black) and contrast ≥ 8 (some structure)
AND the region duration is ≥ 0.2 s. Pure-black/featureless fades stay
excluded.
"""
from src.cv.frame_extractor import grab_frame_at_path
from src.cv.global_scan import _reference_visibility_stats
islands = _reference_scoreable_segments(beat, cfg)
if not islands:
return []
step_s = max(0.04, cfg.cv.deep_scan.span_sample_step_s)
min_fade_s = 0.2
def has_content(start_s: float, end_s: float) -> bool:
if end_s - start_s < min_fade_s:
return False
peak_p90 = 0.0
peak_contrast = 0.0
t = start_s
while t < end_s:
frame = grab_frame_at_path(beat.trailer_path, beat.start_s + t)
if frame is not None:
_, p90, contrast = _reference_visibility_stats(frame, cfg)
peak_p90 = max(peak_p90, p90)
peak_contrast = max(peak_contrast, contrast)
t = round(t + step_s, 6)
return peak_p90 >= 12.0 and peak_contrast >= 8.0
fades: list[tuple[float, float]] = []
# Between-island fades only: these are genuine cross-fade silhouettes
# (one visible shot dissolves into another through a dim middle frame).
# Pre-island fades are fade-from-black leaders; post-island fades are
# fade-to-black trailers — neither is a source-matchable shot on its own.
for prev_isl, next_isl in zip(islands, islands[1:]):
gap_start, gap_end = prev_isl[1], next_isl[0]
if has_content(gap_start, gap_end):
fades.append((gap_start, gap_end))
return fades
def _reference_shot_segments(beat, cfg) -> list[tuple[float, float]]:
"""Source-matchable shot ranges inside a trailer beat.
Like ``_reference_scoreable_segments`` but additionally splits each
visible island at detected hard cuts (frame-to-frame correlation drops
below ``cfg.vision.multi_shot_cut_corr_threshold``). A shot is a
fade-bounded AND cut-bounded sub-range of the trailer beat: this is
what we want to match against an individual source clip.
Returns a sorted list of (start_s, end_s) tuples covering:
* each visible island, further split at internal hard cuts;
* each fade region adjacent to an island that still carries
describable content (e.g. a silhouette during a cross-fade) —
these get matched via the vision-led search path because CV
templates against the dark frames are unusable.
Tiny sub-shots (below ``min_shot_s``) are merged into the previous shot
so noisy cut detection doesn't fragment a real shot into useless slivers.
Tiny sub-shots are merged so noisy cut detection doesn't fragment a
real shot into useless slivers.
"""
from src.cv.global_scan import _reference_internal_cut_offsets
@@ -600,7 +651,9 @@ def _reference_shot_segments(beat, cfg) -> list[tuple[float, float]]:
cut_offsets = sorted(_reference_internal_cut_offsets(beat, cfg))
except Exception:
cut_offsets = []
if not cut_offsets:
fade_shots = _fade_content_shots(beat, cfg)
if not cut_offsets and not fade_shots:
return islands
min_shot_s = max(0.4, cfg.cv.deep_scan.span_sample_step_s * 4.0)
@@ -623,6 +676,21 @@ def _reference_shot_segments(beat, cfg) -> list[tuple[float, float]]:
shots[-1] = (shots[-1][0], seg_end)
else:
shots.append((seg_start, seg_end))
# Add fade-content shots (cross-fade silhouettes / dim shot boundaries)
# sorted with the visible-island shots so the matcher sees them in
# trailer-time order.
if fade_shots:
all_shots = sorted(list(shots) + list(fade_shots), key=lambda iv: iv[0])
# Drop overlaps in case a fade region brushes against an island
# by a few frames; the island wins.
cleaned: list[tuple[float, float]] = []
for s, e in all_shots:
if cleaned and s < cleaned[-1][1]:
if e > cleaned[-1][1]:
cleaned.append((cleaned[-1][1], e))
continue
cleaned.append((s, e))
return cleaned
return shots if shots else islands
@@ -691,6 +759,23 @@ def _apply_single_island_segments(results: list, trims: dict[int, tuple[float, f
return expanded
def _keeps_cached_match(old, new, cfg) -> bool:
"""Return True when the old cached match is better than the new one and should be kept.
Specifically protects multi-segment provisional matches from being replaced
by a weaker single-span result. The old entry wins when it has segments
(explicitly tuned multi-shot layout) and the new result has none AND is not
a score improvement.
"""
if old is None or new is None:
return False
old_segs = getattr(old, "segments", ()) or ()
new_segs = getattr(new, "segments", ()) or ()
if old_segs and not new_segs and new.match_score <= old.match_score:
return True
return False
def _merge_best_results(existing: list, candidates: list, cfg) -> list:
"""Merge matches by beat, preferring confirmed or higher-scoring results."""
by_id = {r.beat_id: r for r in existing}
@@ -1347,22 +1432,53 @@ def _match_unmatched_visual_segments(
continue
segments: list[MatchSegment] = []
for start_s, end_s in islands:
for island_idx, (start_s, end_s) in enumerate(islands):
segment_beat = replace(
beat,
start_s=beat.start_s + start_s,
end_s=beat.start_s + end_s,
)
continuity = _continuity_seed_in_points(
beat.beat_id,
[b if b.beat_id != beat.beat_id else segment_beat for b in beats],
cached + expanded,
cfg,
)
if island_idx == 0:
# First island of an unmatched multi-shot beat: search globally
# without a continuity bias from the previous beat. Continuity
# assumes the shot follows the previous beat in the source, but
# the lead shot of a multi-shot beat is often an insert cut from
# a completely different scene. A wrong seed with score 0.92
# would push the real match out of the refinement candidate pool.
continuity = {}
else:
continuity = _continuity_seed_in_points(
beat.beat_id,
[b if b.beat_id != beat.beat_id else segment_beat for b in beats],
cached + expanded,
cfg,
)
segment_matches = []
if beat.beat_id not in skip_global_segment_scan_for:
segment_matches = _run_segment_match(segment_beat, continuity, cfg, allow_fullscan=True)
if not segment_matches:
# Fade-content shot fallback: when CV finds no templates
# inside this shot (typical for cross-fade silhouettes), the
# vibe-check + vision-action-window recovery path is the only
# way to get a match. It's slower but works on dark frames
# because vision can read structure where CV cannot.
shot_islands = _reference_scoreable_segments(segment_beat, cfg)
if not shot_islands and cfg.vision.enabled:
recovered = _recover_unmatched_beats_via_vision([], [segment_beat], cfg)
if recovered:
rec = recovered[0]
seg_dur = min(max(0.0, end_s - start_s), max(0.0, rec.duration_s))
if seg_dur > 0:
segments.append(MatchSegment(
trailer_offset_s=start_s,
duration_s=seg_dur,
scene_id=rec.scene_id,
in_point_s=rec.in_point_s,
out_point_s=rec.in_point_s + seg_dur,
match_score=rec.match_score,
is_confirmed=rec.is_confirmed,
))
continue
local_segment = _local_same_scene_segment_match(
segment_beat,
beat,
@@ -1559,9 +1675,18 @@ def cmd_match(args: argparse.Namespace, cfg) -> list:
# bit-for-bit identical to what it was before this run.
if getattr(args, "beat", None) is not None and _results_cache_path(cfg).exists():
raw_cached = _load_results(cfg)
old_for_beat = next((r for r in raw_cached if r.beat_id == args.beat), None)
raw_cached = [r for r in raw_cached if r.beat_id != args.beat]
for result in results:
raw_cached = _update_result(result, raw_cached)
if _keeps_cached_match(old_for_beat, result, cfg):
print(
f"️ Beat {result.beat_id}: keeping existing {len(getattr(old_for_beat, 'segments', ()) or ())}segment "
f"provisional match (score {old_for_beat.match_score:.3f}) over weaker new result "
f"(score {result.match_score:.3f}, no segments)."
)
raw_cached.append(old_for_beat)
else:
raw_cached = _update_result(result, raw_cached)
results_to_save = sorted(raw_cached, key=lambda r: r.beat_id)
else:
results_to_save = results
+3 -3
View File
@@ -72,12 +72,12 @@ match_threshold = 0.65
# Store/report lower-confidence automatic candidates for visual review instead
# of dropping them as "NO MATCH". Confirmed exports can still use match_threshold.
provisional_match_threshold = 0.43
provisional_match_threshold = 0.35
# Lower gate for entering temporal multi-frame refinement. The final decision
# still uses sequence/span scoring; this only avoids rejecting real matches
# because one midpoint frame is weak.
coarse_candidate_threshold = 0.50
coarse_candidate_threshold = 0.40
# Candidate ranking weights. Duration coverage matters when the same visual
# shot appears multiple times: prefer the occurrence that can cover the beat.
@@ -103,7 +103,7 @@ refine_step_seconds = 0.04 # ≈ 1 frame at 25 fps
content_align_window_seconds = 0.48
content_align_sample_step_s = 0.28
content_validation_weight = 0.35
provisional_content_threshold = 0.42
provisional_content_threshold = 0.30
# When several adjacent frame offsets score almost the same, prefer the earlier
# one. This avoids matches that are visually correct but start a few frames late.
+1 -1
View File
@@ -219,7 +219,7 @@ def extract_concat_clip(
# encoder settings).
list_file = out.with_name(f"{out.stem}_concat.txt")
list_file.write_text(
"\n".join(f"file '{part.as_posix()}'" for part in parts) + "\n",
"\n".join(f"file '{part.absolute().as_posix()}'" for part in parts) + "\n",
encoding="utf-8",
)
cmd = [
+41 -6
View File
@@ -580,13 +580,24 @@ def _prepare_motion_templates(
def _is_dark_reference_frame(frame: np.ndarray, cfg: AppConfig) -> bool:
"""Truly dark / pure-black frame: no usable structure for matching.
A cross-fade silhouette (low overall luma but visible contrast) is NOT
a dark frame for our purposes it carries content (a hand, a knife,
a face peeking through the fade) and should still be matchable.
"""
cropped = text_safe_crop(
frame,
cfg.cv.vibe_check.crop_top_fraction,
cfg.cv.vibe_check.crop_bottom_fraction,
)
gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
return float(np.mean(gray)) < 28.0 and float(np.percentile(gray, 90)) < 58.0
mean = float(np.mean(gray))
p90 = float(np.percentile(gray, 90))
p10 = float(np.percentile(gray, 10))
contrast = p90 - p10
# Real darkness: low luma AND low contrast (no structure visible)
return mean < 28.0 and p90 < 58.0 and contrast < 30.0
def _reference_visibility_stats(frame: np.ndarray, cfg: AppConfig) -> tuple[float, float, float]:
@@ -602,16 +613,40 @@ def _reference_visibility_stats(frame: np.ndarray, cfg: AppConfig) -> tuple[floa
def _is_scoreable_reference_frame(frame: np.ndarray, cfg: AppConfig) -> bool:
"""Exclude black, fade, and low-visibility reference frames from scoring."""
"""Decide whether a reference frame can carry a usable match template.
Two acceptance paths:
* Standard: regular daylight / interior shot luma at or above the
configured thresholds AND enough contrast to be distinct.
* Fade-content: low overall luma BUT with strong local contrast,
i.e. a cross-fade silhouette where you can clearly see structure
(hand+knife against dark, face emerging from black, etc.). Without
this path the matcher would silently drop content-bearing fades and
mis-match the visible portion alone.
"""
if _is_dark_reference_frame(frame, cfg):
return False
mean_luma, p90_luma, contrast = _reference_visibility_stats(frame, cfg)
low_visibility = (
mean_luma < cfg.cv.deep_scan.scoreable_luma_mean_min
and p90_luma < cfg.cv.deep_scan.scoreable_luma_p90_min
# Standard daylight / interior shot
enough_luma = (
mean_luma >= cfg.cv.deep_scan.scoreable_luma_mean_min
or p90_luma >= cfg.cv.deep_scan.scoreable_luma_p90_min
)
return not low_visibility and contrast >= cfg.cv.deep_scan.scoreable_contrast_min
if enough_luma and contrast >= cfg.cv.deep_scan.scoreable_contrast_min:
return True
# Fade-content: dim but with structure. The local contrast must be
# well above what a uniform dim frame would have, and at least a few
# bright pixels must exist (p90 above pure-black), so we don't accept
# a featureless dark wash. These thresholds are deliberately tighter
# than the standard path so we don't pollute scoring with smooth fades.
if contrast >= 40.0 and p90_luma >= 30.0:
return True
return False
def estimate_matchable_reference_duration(