Realign wrong in-scene action matches
This commit is contained in:
@@ -595,3 +595,73 @@ def validate_match_window_with_vision(
|
||||
if missing_actions and score < threshold:
|
||||
return False, f"{reason} missing_actions={sorted(missing_actions)}"
|
||||
return True, reason
|
||||
|
||||
|
||||
def find_action_window_in_scene(
|
||||
beat: TrailerBeat,
|
||||
scene: Scene,
|
||||
cfg: AppConfig,
|
||||
) -> tuple[float, float, float, str] | None:
|
||||
"""
|
||||
Search one already selected source scene for the beat's action phase.
|
||||
|
||||
This is used after CV picked the right broad scene but the wrong time
|
||||
inside that scene. It stays automatic and cached: windows are described
|
||||
evenly across the scene until the per-run vision budget is consumed.
|
||||
"""
|
||||
if not cfg.vision.enabled or scene.duration_s <= 0:
|
||||
return None
|
||||
|
||||
cache = _load_cache(cfg)
|
||||
budget = [max(0, cfg.vision.max_new_descriptions_per_run)]
|
||||
beat_desc = _describe_sample(
|
||||
kind="beat",
|
||||
item_id=beat.beat_id,
|
||||
label=f"trailer beat {beat.beat_id} action search",
|
||||
video_path=beat.trailer_path,
|
||||
start_s=beat.start_s,
|
||||
end_s=beat.end_s,
|
||||
cfg=cfg,
|
||||
cache=cache,
|
||||
budget=budget,
|
||||
)
|
||||
if not beat_desc:
|
||||
return None
|
||||
|
||||
beat_actions = _semantic_action_groups(beat_desc) & _STRONG_ACTION_GROUPS
|
||||
if not beat_actions:
|
||||
return None
|
||||
|
||||
max_windows = max(
|
||||
cfg.vision.seed_points_per_scene,
|
||||
cfg.vision.max_new_descriptions_per_run,
|
||||
)
|
||||
best: tuple[float, float, float, str] | None = None
|
||||
for start_s, end_s in _scene_window_ranges(scene, beat, max_windows):
|
||||
desc = _describe_sample(
|
||||
kind="action_window",
|
||||
item_id=scene.scene_id,
|
||||
label=f"source scene {scene.scene_id} action window {start_s:.2f}-{end_s:.2f}",
|
||||
video_path=scene.source_path,
|
||||
start_s=start_s,
|
||||
end_s=end_s,
|
||||
cfg=cfg,
|
||||
cache=cache,
|
||||
budget=budget,
|
||||
)
|
||||
if not desc:
|
||||
continue
|
||||
score, reason = _semantic_match_score(beat_desc, desc)
|
||||
source_actions = _semantic_action_groups(desc)
|
||||
missing_actions = beat_actions - source_actions
|
||||
if missing_actions:
|
||||
continue
|
||||
threshold = max(0.38, cfg.vision.similarity_threshold + 0.18)
|
||||
if score < threshold:
|
||||
continue
|
||||
candidate = (start_s, end_s, score, reason)
|
||||
if best is None or candidate[2] > best[2]:
|
||||
best = candidate
|
||||
|
||||
_save_cache(cfg, cache)
|
||||
return best
|
||||
|
||||
Reference in New Issue
Block a user