Rank long-scene probes without action penalty

This commit is contained in:
Melbar
2026-05-02 18:53:27 +02:00
parent 2d9ba91a7b
commit 252f710396
2 changed files with 23 additions and 2 deletions
+3
View File
@@ -168,6 +168,9 @@ Dieser lokale Fenster-Probe ist bewusst breiter als die finale Seed-Auswahl:
Eine lange Dialogszene kann in der Gesamtbeschreibung nur als Gespräch
erscheinen, aber an einer späteren Stelle trotzdem genau die gesuchte
Aktionsphase enthalten.
Für diese Probe wird deshalb die grobe Szenenähnlichkeit ohne harte
Aktionsstrafe gerankt; die harte Aktionsprüfung greift erst auf den lokalen
Fenstern und dem finalen Source-Zeitbereich.
Nach dem CV-Match kann derselbe Vision-Layer den konkreten finalen Source-
Zeitbereich nochmals gegen den Trailer-Beat prüfen. Starke Aktionsphasen wie
Annäherung, Kuss/Stirnkontakt, Handbewegungen oder Schneiden müssen dann auch
+20 -2
View File
@@ -365,7 +365,7 @@ def _add_window_seed_descriptions(
probe_limit = max(
cfg.vision.max_seed_scenes * 4,
cfg.vision.scene_candidate_top_k // 2,
cfg.vision.scene_candidate_top_k,
)
scenes_to_probe = ranked[: max(1, min(len(ranked), probe_limit))]
windows_per_scene = max(1, min(6, cfg.vision.seed_points_per_scene // 2))
@@ -460,6 +460,7 @@ def build_vision_seed_in_points(
)
ranked_by_scene: dict[int, tuple[float, Scene, str]] = {}
probe_ranked_by_scene: dict[int, tuple[float, Scene, str]] = {}
for hit in hits:
scene = scenes_by_id.get(hit.scene_id)
if scene is None:
@@ -477,6 +478,13 @@ def build_vision_seed_in_points(
)
if not scene_desc:
continue
probe_score = _text_similarity(beat_desc, scene_desc)
if probe_score >= cfg.vision.similarity_threshold:
probe_ranked_by_scene[scene.scene_id] = (
min(0.99, probe_score + 0.25),
scene,
"probe",
)
score, _reason = _semantic_match_score(beat_desc, scene_desc)
if score >= cfg.vision.similarity_threshold:
ranked_by_scene[scene.scene_id] = (min(0.99, score + 0.25), scene, "vision")
@@ -488,8 +496,17 @@ def build_vision_seed_in_points(
existing = ranked_by_scene.get(scene.scene_id)
if existing is None or vibe_score > existing[0]:
ranked_by_scene[scene.scene_id] = (vibe_score, scene, "vibe")
probe_existing = probe_ranked_by_scene.get(scene.scene_id)
if probe_existing is None or vibe_score > probe_existing[0]:
probe_ranked_by_scene[scene.scene_id] = (vibe_score, scene, "vibe")
for scene, scene_desc in _cached_scene_descriptions(cache, scenes_by_id, cfg):
probe_score = _text_similarity(beat_desc, scene_desc)
if probe_score >= cfg.vision.similarity_threshold:
probe_semantic_score = min(0.99, probe_score + 0.25)
probe_existing = probe_ranked_by_scene.get(scene.scene_id)
if probe_existing is None or probe_semantic_score > probe_existing[0]:
probe_ranked_by_scene[scene.scene_id] = (probe_semantic_score, scene, "probe-cache")
score, _reason = _semantic_match_score(beat_desc, scene_desc)
if score < cfg.vision.similarity_threshold:
continue
@@ -499,10 +516,11 @@ def build_vision_seed_in_points(
ranked_by_scene[scene.scene_id] = (semantic_score, scene, "cache")
ranked = sorted(ranked_by_scene.values(), key=lambda item: item[0], reverse=True)
probe_ranked = sorted(probe_ranked_by_scene.values(), key=lambda item: item[0], reverse=True)
window_points = _add_window_seed_descriptions(
beat=beat,
beat_desc=beat_desc,
ranked=ranked,
ranked=probe_ranked,
cfg=cfg,
cache=cache,
budget=budget,