Per-shot match for beats with internal cuts; protect cache on --beat runs
Two issues fixed: 1. Beats with internal hard cuts (e.g. man-shot then back to woman) were being approximated by a single source clip because the multi-segment path only triggered for fade-bounded multi-island beats. Added _reference_shot_segments(), which returns the shot ranges by splitting each visible island at detected internal cuts. The multi-island gate in cmd_match and the per-island loop in _match_unmatched_visual_segments now use shots, so any beat with cuts > 0 produces one MatchSegment per shot. Each shot is matched independently against the source movie. Effect on Beat 10: 1 segment (3.32 s in scene 558) -> 3 segments covering shots 0-0.88 s, 0.88-2.64 s, 2.64-3.32 s in scenes 554, 559, 556 respectively, with the previously missing "back to woman" cut now correctly placed in scene 556. 2. Targeted --beat N runs were silently dropping cache entries for other beats whose old scores no longer pass current quality gates (_normalize_cached_results runs at load time and removes them). The save path now re-loads the raw cache from disk and writes back every non-targeted beat verbatim, so a per-beat run can never regress another beat's stored match. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+1
-1
File diff suppressed because one or more lines are too long
+3
-3
@@ -1,6 +1,6 @@
|
||||
# Cutter-Report — manuelles Nachschneiden
|
||||
|
||||
Stand: 2026-05-04
|
||||
Stand: 2026-05-05
|
||||
|
||||
- **Trailer**: `BehindTheRedDoor_Trailer_REFERENCE.mp4` @ 25.000 fps
|
||||
- **Source** : `BehindTheRedDoor_FTR_1080P_2398_Fixed.mp4` @ 23.976 fps
|
||||
@@ -33,7 +33,7 @@ Bilder sind base64-eingebettet (kein toter Link). Für Video-Vorschau siehe `CUT
|
||||
| 7 | 00:00:29:06-00:00:31:17 | 01:20:10:10-01:20:12:14 | 0.497 | ? | man appears to be engaged in conversation |
|
||||
| 8 | 00:00:31:17-00:00:33:16 | 00:00:51:07-00:00:53:01 | 0.620 | ? | static or slow drifting |
|
||||
| 9 | 00:00:33:16-00:00:36:19 | 01:20:29:03-01:20:32:06 | 0.674 | OK | speaking, transitioning from closed eyes to open mouth and focused gaze |
|
||||
| 10 | 00:00:36:19-00:00:40:02 | 01:20:35:16-01:20:39:00 | 0.560 | ? | conversation |
|
||||
| 10 | 00:00:36:19-00:00:40:02 | 01:20:13:13-01:20:14:11 | 0.660 | ? | conversation |
|
||||
| 11 | 00:00:40:02-00:00:42:03 | 01:20:40:18-01:20:42:18 | 0.636 | ? | static talking head with slight facial expression changes |
|
||||
| 12 | 00:00:42:03-00:00:50:06 | 01:14:26:06-01:14:29:18 | 0.701 | OK | static profile shot transitioning to black/darkness |
|
||||
| 13 | 00:00:50:06-00:00:53:21 | 00:43:19:13-00:43:23:04 | 0.636 | ? | static conversation; woman on right is standing and holding a cup |
|
||||
@@ -164,7 +164,7 @@ Bilder sind base64-eingebettet (kein toter Link). Für Video-Vorschau siehe `CUT
|
||||
### Beat 10 — Status `?`
|
||||
|
||||
- **Trailer**: 00:00:36:19 – 00:00:40:02
|
||||
- **Source** : 01:20:35:16 – 01:20:39:00 (scene 558, score 0.560)
|
||||
- **Source** : 01:20:13:13 – 01:20:14:11 (scene 554, score 0.660)
|
||||
- **Phase** : conversation
|
||||
- **Bild** : alternating close-ups and a medium two-shot, indoor living room with large windows showing a blue exterior landscape
|
||||
|
||||
|
||||
@@ -562,6 +562,51 @@ def _reference_scoreable_segments(beat, cfg) -> list[tuple[float, float]]:
|
||||
return merged
|
||||
|
||||
|
||||
def _reference_shot_segments(beat, cfg) -> list[tuple[float, float]]:
|
||||
"""Source-matchable shot ranges inside a trailer beat.
|
||||
|
||||
Like ``_reference_scoreable_segments`` but additionally splits each
|
||||
visible island at detected hard cuts (frame-to-frame correlation drops
|
||||
below ``cfg.vision.multi_shot_cut_corr_threshold``). A shot is a
|
||||
fade-bounded AND cut-bounded sub-range of the trailer beat: this is
|
||||
what we want to match against an individual source clip.
|
||||
|
||||
Tiny sub-shots (below ``min_shot_s``) are merged into the previous shot
|
||||
so noisy cut detection doesn't fragment a real shot into useless slivers.
|
||||
"""
|
||||
from src.cv.global_scan import _reference_internal_cut_offsets
|
||||
|
||||
islands = _reference_scoreable_segments(beat, cfg)
|
||||
try:
|
||||
cut_offsets = sorted(_reference_internal_cut_offsets(beat, cfg))
|
||||
except Exception:
|
||||
cut_offsets = []
|
||||
if not cut_offsets:
|
||||
return islands
|
||||
|
||||
min_shot_s = max(0.4, cfg.cv.deep_scan.span_sample_step_s * 4.0)
|
||||
shots: list[tuple[float, float]] = []
|
||||
for start_s, end_s in islands:
|
||||
boundaries = [start_s]
|
||||
for cut in cut_offsets:
|
||||
if start_s + 1e-3 < cut < end_s - 1e-3:
|
||||
boundaries.append(cut)
|
||||
boundaries.append(end_s)
|
||||
for i in range(len(boundaries) - 1):
|
||||
seg_start = boundaries[i]
|
||||
seg_end = boundaries[i + 1]
|
||||
if seg_end - seg_start < min_shot_s and shots and shots[-1][1] >= seg_start - 1e-3:
|
||||
# merge into previous if the new piece is too short
|
||||
shots[-1] = (shots[-1][0], seg_end)
|
||||
elif seg_end - seg_start >= min_shot_s:
|
||||
shots.append((seg_start, seg_end))
|
||||
elif shots:
|
||||
shots[-1] = (shots[-1][0], seg_end)
|
||||
else:
|
||||
shots.append((seg_start, seg_end))
|
||||
return shots if shots else islands
|
||||
|
||||
|
||||
def _trim_beats_to_single_visual_island(beats: list, cfg) -> tuple[list, dict[int, tuple[float, float]]]:
|
||||
"""Use a single visible island as the primary match target for faded beats."""
|
||||
from dataclasses import replace
|
||||
@@ -1276,7 +1321,9 @@ def _match_unmatched_visual_segments(
|
||||
if beat.beat_id in matched_ids:
|
||||
continue
|
||||
|
||||
islands = _reference_scoreable_segments(beat, cfg)
|
||||
# Per-shot matching when the beat has either fade-bounded islands
|
||||
# OR internal hard cuts; each shot becomes its own MatchSegment.
|
||||
islands = _reference_shot_segments(beat, cfg)
|
||||
if not islands:
|
||||
continue
|
||||
|
||||
@@ -1423,10 +1470,15 @@ def cmd_match(args: argparse.Namespace, cfg) -> list:
|
||||
all_beats = _load_beats(cfg)
|
||||
beats = _select_beats(all_beats, getattr(args, "beat", None))
|
||||
cached = _normalize_cached_results(all_beats, _load_results(cfg), cfg) if _results_cache_path(cfg).exists() else []
|
||||
# Multi-shot beats: either fade-bounded multiple islands, OR a single
|
||||
# island with internal hard cuts (e.g. man-shot then back to woman). Both
|
||||
# cases are routed through the per-segment match path so each shot gets
|
||||
# its own source clip instead of being approximated by one continuous
|
||||
# span.
|
||||
multi_island_beat_ids = {
|
||||
beat.beat_id
|
||||
for beat in beats
|
||||
if len(_reference_scoreable_segments(beat, cfg)) > 1
|
||||
if len(_reference_shot_segments(beat, cfg)) > 1
|
||||
}
|
||||
scan_beats, single_island_trims = _trim_beats_to_single_visual_island(beats, cfg)
|
||||
scan_beats = [b for b in scan_beats if b.beat_id not in multi_island_beat_ids]
|
||||
@@ -1480,13 +1532,18 @@ def cmd_match(args: argparse.Namespace, cfg) -> list:
|
||||
results = _filter_semantically_invalid_vision_matches(results, beats, cfg)
|
||||
results = _recover_unmatched_beats_via_vision(results, beats, cfg)
|
||||
|
||||
# A targeted one-beat match should improve the cache without deleting
|
||||
# automatic matches for other beats.
|
||||
# A targeted one-beat match must NEVER delete or modify any other beat's
|
||||
# cache entry. We deliberately re-load the raw cache from disk here so
|
||||
# the upstream normalisation pass (which drops entries that no longer
|
||||
# pass current quality gates) cannot leak into the save: only the
|
||||
# targeted beat's slot gets replaced, every other entry is written back
|
||||
# bit-for-bit identical to what it was before this run.
|
||||
if getattr(args, "beat", None) is not None and _results_cache_path(cfg).exists():
|
||||
cached = [r for r in cached if r.beat_id != args.beat]
|
||||
raw_cached = _load_results(cfg)
|
||||
raw_cached = [r for r in raw_cached if r.beat_id != args.beat]
|
||||
for result in results:
|
||||
cached = _update_result(result, cached)
|
||||
results_to_save = cached
|
||||
raw_cached = _update_result(result, raw_cached)
|
||||
results_to_save = sorted(raw_cached, key=lambda r: r.beat_id)
|
||||
else:
|
||||
results_to_save = results
|
||||
|
||||
|
||||
Reference in New Issue
Block a user