Realign wrong in-scene action matches

2026-05-02 17:13:22 +02:00
parent 1a177d6b89
commit 3ea5582b49
3 changed files with 178 additions and 1 deletions
@@ -595,3 +595,73 @@ def validate_match_window_with_vision(
    if missing_actions and score < threshold:
        return False, f"{reason} missing_actions={sorted(missing_actions)}"
    return True, reason
+
+
+def find_action_window_in_scene(
+    beat: TrailerBeat,
+    scene: Scene,
+    cfg: AppConfig,
+) -> tuple[float, float, float, str] | None:
+    """
+    Search one already selected source scene for the beat's action phase.
+
+    This is used after CV picked the right broad scene but the wrong time
+    inside that scene. It stays automatic and cached: windows are described
+    evenly across the scene until the per-run vision budget is consumed.
+    """
+    if not cfg.vision.enabled or scene.duration_s <= 0:
+        return None
+
+    cache = _load_cache(cfg)
+    budget = [max(0, cfg.vision.max_new_descriptions_per_run)]
+    beat_desc = _describe_sample(
+        kind="beat",
+        item_id=beat.beat_id,
+        label=f"trailer beat {beat.beat_id} action search",
+        video_path=beat.trailer_path,
+        start_s=beat.start_s,
+        end_s=beat.end_s,
+        cfg=cfg,
+        cache=cache,
+        budget=budget,
+    )
+    if not beat_desc:
+        return None
+
+    beat_actions = _semantic_action_groups(beat_desc) & _STRONG_ACTION_GROUPS
+    if not beat_actions:
+        return None
+
+    max_windows = max(
+        cfg.vision.seed_points_per_scene,
+        cfg.vision.max_new_descriptions_per_run,
+    )
+    best: tuple[float, float, float, str] | None = None
+    for start_s, end_s in _scene_window_ranges(scene, beat, max_windows):
+        desc = _describe_sample(
+            kind="action_window",
+            item_id=scene.scene_id,
+            label=f"source scene {scene.scene_id} action window {start_s:.2f}-{end_s:.2f}",
+            video_path=scene.source_path,
+            start_s=start_s,
+            end_s=end_s,
+            cfg=cfg,
+            cache=cache,
+            budget=budget,
+        )
+        if not desc:
+            continue
+        score, reason = _semantic_match_score(beat_desc, desc)
+        source_actions = _semantic_action_groups(desc)
+        missing_actions = beat_actions - source_actions
+        if missing_actions:
+            continue
+        threshold = max(0.38, cfg.vision.similarity_threshold + 0.18)
+        if score < threshold:
+            continue
+        candidate = (start_s, end_s, score, reason)
+        if best is None or candidate[2] > best[2]:
+            best = candidate
+
+    _save_cache(cfg, cache)
+    return best