Rank long-scene probes without action penalty

2026-05-02 18:53:27 +02:00
parent 2d9ba91a7b
commit 252f710396
2 changed files with 23 additions and 2 deletions
@@ -168,6 +168,9 @@ Dieser lokale Fenster-Probe ist bewusst breiter als die finale Seed-Auswahl:
 Eine lange Dialogszene kann in der Gesamtbeschreibung nur als Gespräch
 erscheinen, aber an einer späteren Stelle trotzdem genau die gesuchte
 Aktionsphase enthalten.
 Für diese Probe wird deshalb die grobe Szenenähnlichkeit ohne harte
 Aktionsstrafe gerankt; die harte Aktionsprüfung greift erst auf den lokalen
 Fenstern und dem finalen Source-Zeitbereich.
 Nach dem CV-Match kann derselbe Vision-Layer den konkreten finalen Source-
 Zeitbereich nochmals gegen den Trailer-Beat prüfen. Starke Aktionsphasen wie
 Annäherung, Kuss/Stirnkontakt, Handbewegungen oder Schneiden müssen dann auch
@@ -365,7 +365,7 @@ def _add_window_seed_descriptions(
    probe_limit = max(
        cfg.vision.max_seed_scenes * 4,
-        cfg.vision.scene_candidate_top_k // 2,
+        cfg.vision.scene_candidate_top_k,
    )
    scenes_to_probe = ranked[: max(1, min(len(ranked), probe_limit))]
    windows_per_scene = max(1, min(6, cfg.vision.seed_points_per_scene // 2))
@@ -460,6 +460,7 @@ def build_vision_seed_in_points(
        )
        ranked_by_scene: dict[int, tuple[float, Scene, str]] = {}
        probe_ranked_by_scene: dict[int, tuple[float, Scene, str]] = {}
        for hit in hits:
            scene = scenes_by_id.get(hit.scene_id)
            if scene is None:
@@ -477,6 +478,13 @@ def build_vision_seed_in_points(
            )
            if not scene_desc:
                continue
            probe_score = _text_similarity(beat_desc, scene_desc)
            if probe_score >= cfg.vision.similarity_threshold:
                probe_ranked_by_scene[scene.scene_id] = (
                    min(0.99, probe_score + 0.25),
                    scene,
                    "probe",
                )
            score, _reason = _semantic_match_score(beat_desc, scene_desc)
            if score >= cfg.vision.similarity_threshold:
                ranked_by_scene[scene.scene_id] = (min(0.99, score + 0.25), scene, "vision")
@@ -488,8 +496,17 @@ def build_vision_seed_in_points(
            existing = ranked_by_scene.get(scene.scene_id)
            if existing is None or vibe_score > existing[0]:
                ranked_by_scene[scene.scene_id] = (vibe_score, scene, "vibe")
            probe_existing = probe_ranked_by_scene.get(scene.scene_id)
            if probe_existing is None or vibe_score > probe_existing[0]:
                probe_ranked_by_scene[scene.scene_id] = (vibe_score, scene, "vibe")
        for scene, scene_desc in _cached_scene_descriptions(cache, scenes_by_id, cfg):
            probe_score = _text_similarity(beat_desc, scene_desc)
            if probe_score >= cfg.vision.similarity_threshold:
                probe_semantic_score = min(0.99, probe_score + 0.25)
                probe_existing = probe_ranked_by_scene.get(scene.scene_id)
                if probe_existing is None or probe_semantic_score > probe_existing[0]:
                    probe_ranked_by_scene[scene.scene_id] = (probe_semantic_score, scene, "probe-cache")
            score, _reason = _semantic_match_score(beat_desc, scene_desc)
            if score < cfg.vision.similarity_threshold:
                continue
@@ -499,10 +516,11 @@ def build_vision_seed_in_points(
                ranked_by_scene[scene.scene_id] = (semantic_score, scene, "cache")
        ranked = sorted(ranked_by_scene.values(), key=lambda item: item[0], reverse=True)
        probe_ranked = sorted(probe_ranked_by_scene.values(), key=lambda item: item[0], reverse=True)
        window_points = _add_window_seed_descriptions(
            beat=beat,
            beat_desc=beat_desc,
-            ranked=ranked,
+            ranked=probe_ranked,
            cfg=cfg,
            cache=cache,
            budget=budget,