From 884a0d4232ba54cd01a4ce68a7aa9d18d92029bc Mon Sep 17 00:00:00 2001 From: Melbar Date: Sat, 2 May 2026 13:03:15 +0200 Subject: [PATCH] Add vision prepass for targeted matches --- README.md | 10 +++++ cli.py | 126 +++++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 125 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 4786391..b4d9c03 100644 --- a/README.md +++ b/README.md @@ -228,6 +228,11 @@ Continuity- und Vision-Seeds allein schalten den globalen FFmpeg-Scan standardmäßig nicht ab. Sie sind Suchanker, keine Beweise; der volle CV-Scan bleibt aktiv, damit semantisch plausible, aber falsche Vision-Treffer echte Bildmatches nicht verdrängen. +Bei aktivierter Vision wird für gezielte Match-Läufe trotzdem zuerst ein +schneller seed-basierter CV-Prepass ausgeführt. Er überspringt den vollen +FFmpeg-Stream nur vorläufig und akzeptiert einen Treffer erst nach derselben +Bild-/Phasenvalidierung wie der normale Matcher. Nur nicht gelöste Beats fallen +danach auf den vollständigen Scan zurück. Lange Trailerbeats werden nicht mehr automatisch über ihre gesamte Beat-Länge gegen einen einzigen Source-Clip validiert. Sobald nach einem sichtbaren Source-Abschnitt eine anhaltende Schwarzblende oder Titel-/Credit-Insel beginnt, @@ -248,6 +253,11 @@ zusammengesetzt. Sehr kurze Inseln dürfen zusätzlich in den Source-Szenen benachbarter bereits gematchter Beats lokal nach ihrer Bewegungsphase suchen. Das ist weiterhin nur ein allgemeiner Continuity-Anker, kein manueller Override für bestimmte Beat-Nummern oder Szenen. +Besteht ein Beat nach automatischer Fade-/Titel-Filterung nur aus einer +einzigen sichtbaren Insel, wird diese Insel direkt als primäres Suchziel +verwendet. Dadurch scannt der Matcher denselben Bildinhalt nicht erst als +vollen Beat und danach noch einmal als Segment; der Report behält trotzdem die +korrekte Beat-Position und füllt echte Randlücken mit Schwarz. Zusätzlich werden sehr dunkle, kontrastarme oder noch nicht sauber auf-/abgeblendete Referenzframes aus Score, Inhalts-Reranking, Phasen-Alignment und Motion-Templates herausgenommen. Blenden sollen bestimmen, diff --git a/cli.py b/cli.py index ecb6436..db450c9 100644 --- a/cli.py +++ b/cli.py @@ -485,6 +485,71 @@ def _reference_scoreable_segments(beat, cfg) -> list[tuple[float, float]]: return raw +def _trim_beats_to_single_visual_island(beats: list, cfg) -> tuple[list, dict[int, tuple[float, float]]]: + """Use a single visible island as the primary match target for faded beats.""" + from dataclasses import replace + + trimmed = [] + trims: dict[int, tuple[float, float]] = {} + frame_s = 1.0 / max(1.0, float(cfg.export.edl_frame_rate)) + for beat in beats: + islands = _reference_scoreable_segments(beat, cfg) + if len(islands) == 1: + start_s, end_s = islands[0] + island_duration_s = max(0.0, end_s - start_s) + has_real_trim = ( + start_s > frame_s * 1.5 + or beat.duration_s - end_s > frame_s * 1.5 + ) + if island_duration_s > 0.0 and has_real_trim: + trimmed.append( + replace( + beat, + start_s=beat.start_s + start_s, + end_s=beat.start_s + end_s, + ) + ) + trims[beat.beat_id] = (start_s, island_duration_s) + continue + trimmed.append(beat) + return trimmed, trims + + +def _apply_single_island_segments(results: list, trims: dict[int, tuple[float, float]]) -> list: + """Restore beat-relative segment metadata after matching a trimmed island.""" + if not trims: + return results + + from dataclasses import replace + from src.core.models import MatchSegment + + expanded = [] + for result in results: + trim = trims.get(result.beat_id) + if trim is None or getattr(result, "segments", ()): + expanded.append(result) + continue + trailer_offset_s, island_duration_s = trim + duration_s = min(max(0.0, island_duration_s), max(0.0, result.duration_s)) + segment = MatchSegment( + trailer_offset_s=trailer_offset_s, + duration_s=duration_s, + scene_id=result.scene_id, + in_point_s=result.in_point_s, + out_point_s=result.in_point_s + duration_s, + match_score=result.match_score, + is_confirmed=result.is_confirmed, + ) + expanded.append( + replace( + result, + out_point_s=result.in_point_s + duration_s, + segments=(segment,), + ) + ) + return expanded + + def _attach_visual_segments(results: list, beats: list, cfg) -> list: """Attach automatic sub-shot matches for multi-island trailer beats.""" from dataclasses import replace @@ -558,7 +623,13 @@ def _attach_visual_segments(results: list, beats: list, cfg) -> list: return expanded -def _match_unmatched_visual_segments(results: list, beats: list, cached: list, cfg) -> list: +def _match_unmatched_visual_segments( + results: list, + beats: list, + cached: list, + cfg, + skip_global_segment_scan_for: set[int] | None = None, +) -> list: """Create segmented provisional matches when a whole beat has no single match.""" from dataclasses import replace from src.core.models import MatchResult, MatchSegment @@ -567,6 +638,7 @@ def _match_unmatched_visual_segments(results: list, beats: list, cached: list, c matched_ids = {r.beat_id for r in results} expanded = list(results) + skip_global_segment_scan_for = skip_global_segment_scan_for or set() try: fps = float(get_video_info(cfg.paths.source_movie)["fps"]) or cfg.export.edl_frame_rate except Exception: @@ -593,11 +665,13 @@ def _match_unmatched_visual_segments(results: list, beats: list, cached: list, c cached + expanded, cfg, ) - segment_matches = run_global_scan( - [segment_beat], - cfg, - seed_in_points=continuity, - ) + segment_matches = [] + if beat.beat_id not in skip_global_segment_scan_for: + segment_matches = run_global_scan( + [segment_beat], + cfg, + seed_in_points=continuity, + ) if not segment_matches: local_segment = _local_same_scene_segment_match( segment_beat, @@ -725,18 +799,48 @@ def cmd_match(args: argparse.Namespace, cfg) -> list: all_beats = _load_beats(cfg) beats = _select_beats(all_beats, getattr(args, "beat", None)) cached = _normalize_cached_results(all_beats, _load_results(cfg), cfg) if _results_cache_path(cfg).exists() else [] + scan_beats, single_island_trims = _trim_beats_to_single_visual_island(beats, cfg) seed_in_points = ( _continuity_seed_in_points(args.beat, all_beats, cached, cfg) if getattr(args, "beat", None) is not None else None ) - results = run_matching( - cfg, + results = [] + if cfg.vision.enabled: + fast_cfg = replace( + cfg, + cv=replace( + cfg.cv, + deep_scan=replace(cfg.cv.deep_scan, skip_coarse_scan_with_weighted_seeds=True), + ), + vision=replace(cfg.vision, fullscan_fallback=False), + ) + results = run_matching( + fast_cfg, + scan_beats, + force_reindex=args.force_reindex, + seed_in_points=seed_in_points, + ) + + if len(results) < len(scan_beats): + matched_ids = {r.beat_id for r in results} + remaining_beats = [b for b in scan_beats if b.beat_id not in matched_ids] + if remaining_beats: + full_results = run_matching( + cfg, + remaining_beats, + force_reindex=args.force_reindex, + seed_in_points=seed_in_points, + ) + results = sorted([*results, *full_results], key=lambda r: r.beat_id) + results = _apply_single_island_segments(results, single_island_trims) + results = _match_unmatched_visual_segments( + results, beats, - force_reindex=args.force_reindex, - seed_in_points=seed_in_points, + cached, + cfg, + skip_global_segment_scan_for=set(single_island_trims), ) - results = _match_unmatched_visual_segments(results, beats, cached, cfg) results = _attach_visual_segments(results, beats, cfg) # A targeted one-beat match should improve the cache without deleting