Rank long-scene probes without action penalty

2026-05-02 18:53:27 +02:00
parent 2d9ba91a7b
commit 252f710396
2 changed files with 23 additions and 2 deletions
@@ -168,6 +168,9 @@ Dieser lokale Fenster-Probe ist bewusst breiter als die finale Seed-Auswahl:
 Eine lange Dialogszene kann in der Gesamtbeschreibung nur als Gespräch
 erscheinen, aber an einer späteren Stelle trotzdem genau die gesuchte
 Aktionsphase enthalten.
+Für diese Probe wird deshalb die grobe Szenenähnlichkeit ohne harte
+Aktionsstrafe gerankt; die harte Aktionsprüfung greift erst auf den lokalen
+Fenstern und dem finalen Source-Zeitbereich.
 Nach dem CV-Match kann derselbe Vision-Layer den konkreten finalen Source-
 Zeitbereich nochmals gegen den Trailer-Beat prüfen. Starke Aktionsphasen wie
 Annäherung, Kuss/Stirnkontakt, Handbewegungen oder Schneiden müssen dann auch
@@ -365,7 +365,7 @@ def _add_window_seed_descriptions(

    probe_limit = max(
        cfg.vision.max_seed_scenes * 4,
-        cfg.vision.scene_candidate_top_k // 2,
+        cfg.vision.scene_candidate_top_k,
    )
    scenes_to_probe = ranked[: max(1, min(len(ranked), probe_limit))]
    windows_per_scene = max(1, min(6, cfg.vision.seed_points_per_scene // 2))
@@ -460,6 +460,7 @@ def build_vision_seed_in_points(
        )

        ranked_by_scene: dict[int, tuple[float, Scene, str]] = {}
+        probe_ranked_by_scene: dict[int, tuple[float, Scene, str]] = {}
        for hit in hits:
            scene = scenes_by_id.get(hit.scene_id)
            if scene is None:
@@ -477,6 +478,13 @@ def build_vision_seed_in_points(
            )
            if not scene_desc:
                continue
+            probe_score = _text_similarity(beat_desc, scene_desc)
+            if probe_score >= cfg.vision.similarity_threshold:
+                probe_ranked_by_scene[scene.scene_id] = (
+                    min(0.99, probe_score + 0.25),
+                    scene,
+                    "probe",
+                )
            score, _reason = _semantic_match_score(beat_desc, scene_desc)
            if score >= cfg.vision.similarity_threshold:
                ranked_by_scene[scene.scene_id] = (min(0.99, score + 0.25), scene, "vision")
@@ -488,8 +496,17 @@ def build_vision_seed_in_points(
            existing = ranked_by_scene.get(scene.scene_id)
            if existing is None or vibe_score > existing[0]:
                ranked_by_scene[scene.scene_id] = (vibe_score, scene, "vibe")
+            probe_existing = probe_ranked_by_scene.get(scene.scene_id)
+            if probe_existing is None or vibe_score > probe_existing[0]:
+                probe_ranked_by_scene[scene.scene_id] = (vibe_score, scene, "vibe")

        for scene, scene_desc in _cached_scene_descriptions(cache, scenes_by_id, cfg):
+            probe_score = _text_similarity(beat_desc, scene_desc)
+            if probe_score >= cfg.vision.similarity_threshold:
+                probe_semantic_score = min(0.99, probe_score + 0.25)
+                probe_existing = probe_ranked_by_scene.get(scene.scene_id)
+                if probe_existing is None or probe_semantic_score > probe_existing[0]:
+                    probe_ranked_by_scene[scene.scene_id] = (probe_semantic_score, scene, "probe-cache")
            score, _reason = _semantic_match_score(beat_desc, scene_desc)
            if score < cfg.vision.similarity_threshold:
                continue
@@ -499,10 +516,11 @@ def build_vision_seed_in_points(
                ranked_by_scene[scene.scene_id] = (semantic_score, scene, "cache")

        ranked = sorted(ranked_by_scene.values(), key=lambda item: item[0], reverse=True)
+        probe_ranked = sorted(probe_ranked_by_scene.values(), key=lambda item: item[0], reverse=True)
        window_points = _add_window_seed_descriptions(
            beat=beat,
            beat_desc=beat_desc,
-            ranked=ranked,
+            ranked=probe_ranked,
            cfg=cfg,
            cache=cache,
            budget=budget,