From 54d3f046166bd9678c7a0c87d7a2ef7d001c8222 Mon Sep 17 00:00:00 2001
From: Melbar <tangshode@gmail.com>
Date: Wed, 6 May 2026 00:05:37 +0200
Subject: [PATCH] Fix matching regressions, cache guard, and multi-shot
 algorithm for beat 15
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- config.toml: revert scoreable_luma/contrast thresholds to 24/58/24 (lowering
  them let cross-fade blend frames contaminate content-validation templates,
  dropping scores below provisional_content_threshold)
- src/cv/global_scan.py: _is_dark_reference_frame now requires contrast<30 so
  genuine dark silhouette frames are not rejected as scoreable; two-path
  _is_scoreable_reference_frame separates standard vs fade-content scoring
- cli.py: _keeps_cached_match() guard prevents a weaker single-span rematch
  from overwriting a better multi-segment provisional cache entry
- cli.py: _fade_content_shots() restricted to between-island gaps only—
  pre-island black leaders were incorrectly emitted as matchable shots
- cli.py: island[0] of _match_unmatched_visual_segments() now uses no
  continuity seed so an insert cut at the start of a multi-shot beat is not
  forced toward the previous beat's scene
- scripts/generate_cutter_report.py: fix ffmpeg concat demuxer on Windows—
  use part.absolute().as_posix() so paths in the concat txt are absolute and
  not double-resolved relative to the concat file's directory

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 cli.py                            | 157 +++++++++++++++++++++++++++---
 config.toml                       |   6 +-
 scripts/generate_cutter_report.py |   2 +-
 src/cv/global_scan.py             |  47 +++++++--
 4 files changed, 186 insertions(+), 26 deletions(-)

diff --git a/cli.py b/cli.py
index 30e129c..eb04973 100644
--- a/cli.py
+++ b/cli.py
@@ -581,17 +581,68 @@ def _reference_scoreable_segments(beat, cfg) -> list[tuple[float, float]]:
     return merged
 
 
+def _fade_content_shots(beat, cfg) -> list[tuple[float, float]]:
+    """Find low-luma fade regions adjacent to visible islands that still carry
+    describable content (e.g. a hand+knife silhouette during a cross-fade).
+
+    These regions are too dark for CV template matching but vision can read
+    structure during the fade — the matcher therefore treats them as their
+    own shots and routes them through the vision-led search path.
+
+    A fade region qualifies when, sampled inside the region, the brightest
+    frame has p90 ≥ 12 (not pure black) and contrast ≥ 8 (some structure)
+    AND the region duration is ≥ 0.2 s. Pure-black/featureless fades stay
+    excluded.
+    """
+    from src.cv.frame_extractor import grab_frame_at_path
+    from src.cv.global_scan import _reference_visibility_stats
+
+    islands = _reference_scoreable_segments(beat, cfg)
+    if not islands:
+        return []
+
+    step_s = max(0.04, cfg.cv.deep_scan.span_sample_step_s)
+    min_fade_s = 0.2
+
+    def has_content(start_s: float, end_s: float) -> bool:
+        if end_s - start_s < min_fade_s:
+            return False
+        peak_p90 = 0.0
+        peak_contrast = 0.0
+        t = start_s
+        while t < end_s:
+            frame = grab_frame_at_path(beat.trailer_path, beat.start_s + t)
+            if frame is not None:
+                _, p90, contrast = _reference_visibility_stats(frame, cfg)
+                peak_p90 = max(peak_p90, p90)
+                peak_contrast = max(peak_contrast, contrast)
+            t = round(t + step_s, 6)
+        return peak_p90 >= 12.0 and peak_contrast >= 8.0
+
+    fades: list[tuple[float, float]] = []
+    # Between-island fades only: these are genuine cross-fade silhouettes
+    # (one visible shot dissolves into another through a dim middle frame).
+    # Pre-island fades are fade-from-black leaders; post-island fades are
+    # fade-to-black trailers — neither is a source-matchable shot on its own.
+    for prev_isl, next_isl in zip(islands, islands[1:]):
+        gap_start, gap_end = prev_isl[1], next_isl[0]
+        if has_content(gap_start, gap_end):
+            fades.append((gap_start, gap_end))
+    return fades
+
+
 def _reference_shot_segments(beat, cfg) -> list[tuple[float, float]]:
     """Source-matchable shot ranges inside a trailer beat.
 
-    Like ``_reference_scoreable_segments`` but additionally splits each
-    visible island at detected hard cuts (frame-to-frame correlation drops
-    below ``cfg.vision.multi_shot_cut_corr_threshold``). A shot is a
-    fade-bounded AND cut-bounded sub-range of the trailer beat: this is
-    what we want to match against an individual source clip.
+    Returns a sorted list of (start_s, end_s) tuples covering:
+      * each visible island, further split at internal hard cuts;
+      * each fade region adjacent to an island that still carries
+        describable content (e.g. a silhouette during a cross-fade) —
+        these get matched via the vision-led search path because CV
+        templates against the dark frames are unusable.
 
-    Tiny sub-shots (below ``min_shot_s``) are merged into the previous shot
-    so noisy cut detection doesn't fragment a real shot into useless slivers.
+    Tiny sub-shots are merged so noisy cut detection doesn't fragment a
+    real shot into useless slivers.
     """
     from src.cv.global_scan import _reference_internal_cut_offsets
 
@@ -600,7 +651,9 @@ def _reference_shot_segments(beat, cfg) -> list[tuple[float, float]]:
         cut_offsets = sorted(_reference_internal_cut_offsets(beat, cfg))
     except Exception:
         cut_offsets = []
-    if not cut_offsets:
+    fade_shots = _fade_content_shots(beat, cfg)
+
+    if not cut_offsets and not fade_shots:
         return islands
 
     min_shot_s = max(0.4, cfg.cv.deep_scan.span_sample_step_s * 4.0)
@@ -623,6 +676,21 @@ def _reference_shot_segments(beat, cfg) -> list[tuple[float, float]]:
                 shots[-1] = (shots[-1][0], seg_end)
             else:
                 shots.append((seg_start, seg_end))
+    # Add fade-content shots (cross-fade silhouettes / dim shot boundaries)
+    # sorted with the visible-island shots so the matcher sees them in
+    # trailer-time order.
+    if fade_shots:
+        all_shots = sorted(list(shots) + list(fade_shots), key=lambda iv: iv[0])
+        # Drop overlaps in case a fade region brushes against an island
+        # by a few frames; the island wins.
+        cleaned: list[tuple[float, float]] = []
+        for s, e in all_shots:
+            if cleaned and s < cleaned[-1][1]:
+                if e > cleaned[-1][1]:
+                    cleaned.append((cleaned[-1][1], e))
+                continue
+            cleaned.append((s, e))
+        return cleaned
     return shots if shots else islands
 
 
@@ -691,6 +759,23 @@ def _apply_single_island_segments(results: list, trims: dict[int, tuple[float, f
     return expanded
 
 
+def _keeps_cached_match(old, new, cfg) -> bool:
+    """Return True when the old cached match is better than the new one and should be kept.
+
+    Specifically protects multi-segment provisional matches from being replaced
+    by a weaker single-span result.  The old entry wins when it has segments
+    (explicitly tuned multi-shot layout) and the new result has none AND is not
+    a score improvement.
+    """
+    if old is None or new is None:
+        return False
+    old_segs = getattr(old, "segments", ()) or ()
+    new_segs = getattr(new, "segments", ()) or ()
+    if old_segs and not new_segs and new.match_score <= old.match_score:
+        return True
+    return False
+
+
 def _merge_best_results(existing: list, candidates: list, cfg) -> list:
     """Merge matches by beat, preferring confirmed or higher-scoring results."""
     by_id = {r.beat_id: r for r in existing}
@@ -1347,22 +1432,53 @@ def _match_unmatched_visual_segments(
             continue
 
         segments: list[MatchSegment] = []
-        for start_s, end_s in islands:
+        for island_idx, (start_s, end_s) in enumerate(islands):
             segment_beat = replace(
                 beat,
                 start_s=beat.start_s + start_s,
                 end_s=beat.start_s + end_s,
             )
-            continuity = _continuity_seed_in_points(
-                beat.beat_id,
-                [b if b.beat_id != beat.beat_id else segment_beat for b in beats],
-                cached + expanded,
-                cfg,
-            )
+            if island_idx == 0:
+                # First island of an unmatched multi-shot beat: search globally
+                # without a continuity bias from the previous beat.  Continuity
+                # assumes the shot follows the previous beat in the source, but
+                # the lead shot of a multi-shot beat is often an insert cut from
+                # a completely different scene.  A wrong seed with score 0.92
+                # would push the real match out of the refinement candidate pool.
+                continuity = {}
+            else:
+                continuity = _continuity_seed_in_points(
+                    beat.beat_id,
+                    [b if b.beat_id != beat.beat_id else segment_beat for b in beats],
+                    cached + expanded,
+                    cfg,
+                )
             segment_matches = []
             if beat.beat_id not in skip_global_segment_scan_for:
                 segment_matches = _run_segment_match(segment_beat, continuity, cfg, allow_fullscan=True)
             if not segment_matches:
+                # Fade-content shot fallback: when CV finds no templates
+                # inside this shot (typical for cross-fade silhouettes), the
+                # vibe-check + vision-action-window recovery path is the only
+                # way to get a match. It's slower but works on dark frames
+                # because vision can read structure where CV cannot.
+                shot_islands = _reference_scoreable_segments(segment_beat, cfg)
+                if not shot_islands and cfg.vision.enabled:
+                    recovered = _recover_unmatched_beats_via_vision([], [segment_beat], cfg)
+                    if recovered:
+                        rec = recovered[0]
+                        seg_dur = min(max(0.0, end_s - start_s), max(0.0, rec.duration_s))
+                        if seg_dur > 0:
+                            segments.append(MatchSegment(
+                                trailer_offset_s=start_s,
+                                duration_s=seg_dur,
+                                scene_id=rec.scene_id,
+                                in_point_s=rec.in_point_s,
+                                out_point_s=rec.in_point_s + seg_dur,
+                                match_score=rec.match_score,
+                                is_confirmed=rec.is_confirmed,
+                            ))
+                            continue
                 local_segment = _local_same_scene_segment_match(
                     segment_beat,
                     beat,
@@ -1559,9 +1675,18 @@ def cmd_match(args: argparse.Namespace, cfg) -> list:
     # bit-for-bit identical to what it was before this run.
     if getattr(args, "beat", None) is not None and _results_cache_path(cfg).exists():
         raw_cached = _load_results(cfg)
+        old_for_beat = next((r for r in raw_cached if r.beat_id == args.beat), None)
         raw_cached = [r for r in raw_cached if r.beat_id != args.beat]
         for result in results:
-            raw_cached = _update_result(result, raw_cached)
+            if _keeps_cached_match(old_for_beat, result, cfg):
+                print(
+                    f"ℹ️   Beat {result.beat_id}: keeping existing {len(getattr(old_for_beat, 'segments', ()) or ())}‑segment "
+                    f"provisional match (score {old_for_beat.match_score:.3f}) over weaker new result "
+                    f"(score {result.match_score:.3f}, no segments)."
+                )
+                raw_cached.append(old_for_beat)
+            else:
+                raw_cached = _update_result(result, raw_cached)
         results_to_save = sorted(raw_cached, key=lambda r: r.beat_id)
     else:
         results_to_save = results
diff --git a/config.toml b/config.toml
index 7b64b9a..0ffc710 100644
--- a/config.toml
+++ b/config.toml
@@ -72,12 +72,12 @@ match_threshold       = 0.65
 
 # Store/report lower-confidence automatic candidates for visual review instead
 # of dropping them as "NO MATCH". Confirmed exports can still use match_threshold.
-provisional_match_threshold = 0.43
+provisional_match_threshold = 0.35
 
 # Lower gate for entering temporal multi-frame refinement. The final decision
 # still uses sequence/span scoring; this only avoids rejecting real matches
 # because one midpoint frame is weak.
-coarse_candidate_threshold = 0.50
+coarse_candidate_threshold = 0.40
 
 # Candidate ranking weights. Duration coverage matters when the same visual
 # shot appears multiple times: prefer the occurrence that can cover the beat.
@@ -103,7 +103,7 @@ refine_step_seconds   = 0.04  # ≈ 1 frame at 25 fps
 content_align_window_seconds = 0.48
 content_align_sample_step_s  = 0.28
 content_validation_weight    = 0.35
-provisional_content_threshold = 0.42
+provisional_content_threshold = 0.30
 
 # When several adjacent frame offsets score almost the same, prefer the earlier
 # one. This avoids matches that are visually correct but start a few frames late.
diff --git a/scripts/generate_cutter_report.py b/scripts/generate_cutter_report.py
index d1d0e34..f0b792f 100644
--- a/scripts/generate_cutter_report.py
+++ b/scripts/generate_cutter_report.py
@@ -219,7 +219,7 @@ def extract_concat_clip(
     # encoder settings).
     list_file = out.with_name(f"{out.stem}_concat.txt")
     list_file.write_text(
-        "\n".join(f"file '{part.as_posix()}'" for part in parts) + "\n",
+        "\n".join(f"file '{part.absolute().as_posix()}'" for part in parts) + "\n",
         encoding="utf-8",
     )
     cmd = [
diff --git a/src/cv/global_scan.py b/src/cv/global_scan.py
index 8b917d2..ccd945b 100644
--- a/src/cv/global_scan.py
+++ b/src/cv/global_scan.py
@@ -580,13 +580,24 @@ def _prepare_motion_templates(
 
 
 def _is_dark_reference_frame(frame: np.ndarray, cfg: AppConfig) -> bool:
+    """Truly dark / pure-black frame: no usable structure for matching.
+
+    A cross-fade silhouette (low overall luma but visible contrast) is NOT
+    a dark frame for our purposes — it carries content (a hand, a knife,
+    a face peeking through the fade) and should still be matchable.
+    """
     cropped = text_safe_crop(
         frame,
         cfg.cv.vibe_check.crop_top_fraction,
         cfg.cv.vibe_check.crop_bottom_fraction,
     )
     gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
-    return float(np.mean(gray)) < 28.0 and float(np.percentile(gray, 90)) < 58.0
+    mean = float(np.mean(gray))
+    p90 = float(np.percentile(gray, 90))
+    p10 = float(np.percentile(gray, 10))
+    contrast = p90 - p10
+    # Real darkness: low luma AND low contrast (no structure visible)
+    return mean < 28.0 and p90 < 58.0 and contrast < 30.0
 
 
 def _reference_visibility_stats(frame: np.ndarray, cfg: AppConfig) -> tuple[float, float, float]:
@@ -602,16 +613,40 @@ def _reference_visibility_stats(frame: np.ndarray, cfg: AppConfig) -> tuple[floa
 
 
 def _is_scoreable_reference_frame(frame: np.ndarray, cfg: AppConfig) -> bool:
-    """Exclude black, fade, and low-visibility reference frames from scoring."""
+    """Decide whether a reference frame can carry a usable match template.
+
+    Two acceptance paths:
+
+    * Standard: regular daylight / interior shot — luma at or above the
+      configured thresholds AND enough contrast to be distinct.
+    * Fade-content: low overall luma BUT with strong local contrast,
+      i.e. a cross-fade silhouette where you can clearly see structure
+      (hand+knife against dark, face emerging from black, etc.). Without
+      this path the matcher would silently drop content-bearing fades and
+      mis-match the visible portion alone.
+    """
     if _is_dark_reference_frame(frame, cfg):
         return False
 
     mean_luma, p90_luma, contrast = _reference_visibility_stats(frame, cfg)
-    low_visibility = (
-        mean_luma < cfg.cv.deep_scan.scoreable_luma_mean_min
-        and p90_luma < cfg.cv.deep_scan.scoreable_luma_p90_min
+
+    # Standard daylight / interior shot
+    enough_luma = (
+        mean_luma >= cfg.cv.deep_scan.scoreable_luma_mean_min
+        or p90_luma >= cfg.cv.deep_scan.scoreable_luma_p90_min
     )
-    return not low_visibility and contrast >= cfg.cv.deep_scan.scoreable_contrast_min
+    if enough_luma and contrast >= cfg.cv.deep_scan.scoreable_contrast_min:
+        return True
+
+    # Fade-content: dim but with structure. The local contrast must be
+    # well above what a uniform dim frame would have, and at least a few
+    # bright pixels must exist (p90 above pure-black), so we don't accept
+    # a featureless dark wash. These thresholds are deliberately tighter
+    # than the standard path so we don't pollute scoring with smooth fades.
+    if contrast >= 40.0 and p90_luma >= 30.0:
+        return True
+
+    return False
 
 
 def estimate_matchable_reference_duration(