Retiming long scene matches by action phase
This commit is contained in:
@@ -640,7 +640,7 @@ def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg)
|
||||
from dataclasses import replace
|
||||
from src.llm.vision_cache import find_action_window_in_scene, validate_match_window_with_vision
|
||||
from src.cv.scene_indexer import build_scene_index
|
||||
from src.cv.global_scan import align_in_point_by_content, align_in_point_by_motion
|
||||
from src.cv.global_scan import align_in_point_by_content_and_motion
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
beats_by_id = {beat.beat_id: beat for beat in beats}
|
||||
@@ -654,19 +654,13 @@ def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg)
|
||||
if found is None:
|
||||
return None
|
||||
start_s, end_s, semantic_score, reason = found
|
||||
window_s = max(1.0, min(4.0, (end_s - start_s) * 1.5))
|
||||
motion_in_s, motion_score = align_in_point_by_motion(
|
||||
window_s = max(3.0, min(8.0, (end_s - start_s) * 4.0))
|
||||
aligned_in_s, combined_score, content_score, motion_score = align_in_point_by_content_and_motion(
|
||||
check_beat,
|
||||
start_s,
|
||||
cfg,
|
||||
search_window_s=window_s,
|
||||
)
|
||||
aligned_in_s, content_score = align_in_point_by_content(
|
||||
check_beat,
|
||||
motion_in_s,
|
||||
cfg,
|
||||
search_window_s=min(window_s, 0.8),
|
||||
)
|
||||
aligned_in_s = max(scene.start_s, min(aligned_in_s, max(scene.start_s, scene.end_s - check_beat.duration_s)))
|
||||
ok, verify_reason = validate_match_window_with_vision(
|
||||
check_beat,
|
||||
@@ -685,7 +679,7 @@ def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg)
|
||||
verify_reason,
|
||||
)
|
||||
return None
|
||||
score = max(content_score, min(0.99, semantic_score * 0.75 + motion_score * 0.25))
|
||||
score = max(combined_score, min(0.99, semantic_score * 0.70 + motion_score * 0.20 + content_score * 0.10))
|
||||
return scene, aligned_in_s, score, f"{reason}; {verify_reason}"
|
||||
|
||||
kept = []
|
||||
@@ -728,7 +722,82 @@ def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg)
|
||||
valid = False
|
||||
break
|
||||
if valid:
|
||||
kept.append(result)
|
||||
repaired = False
|
||||
if getattr(result, "segments", ()):
|
||||
new_segments = []
|
||||
repair_reasons = []
|
||||
changed = False
|
||||
for segment in result.segments:
|
||||
scene = scenes_by_id.get(segment.scene_id)
|
||||
if scene is None or scene.duration_s <= max(segment.duration_s * 1.6, 6.0):
|
||||
new_segments.append(segment)
|
||||
continue
|
||||
segment_beat = replace(
|
||||
beat,
|
||||
start_s=beat.start_s + segment.trailer_offset_s,
|
||||
end_s=beat.start_s + segment.trailer_offset_s + segment.duration_s,
|
||||
)
|
||||
repair = realign_window(segment_beat, segment.scene_id)
|
||||
if repair is None:
|
||||
new_segments.append(segment)
|
||||
continue
|
||||
repair_scene, aligned_in_s, score, repair_reason = repair
|
||||
if abs(aligned_in_s - segment.in_point_s) <= 1.0 / cfg.export.edl_frame_rate:
|
||||
new_segments.append(segment)
|
||||
continue
|
||||
changed = True
|
||||
repair_reasons.append(repair_reason)
|
||||
new_segments.append(replace(
|
||||
segment,
|
||||
scene_id=repair_scene.scene_id,
|
||||
in_point_s=aligned_in_s,
|
||||
out_point_s=aligned_in_s + segment.duration_s,
|
||||
match_score=score,
|
||||
is_confirmed=score >= cfg.cv.deep_scan.match_threshold,
|
||||
))
|
||||
if changed and new_segments:
|
||||
first = new_segments[0]
|
||||
repaired_score = min(seg.match_score for seg in new_segments)
|
||||
logger.info(
|
||||
"Beat %d: realigned semantically valid long scene by motion/action windows (%s)",
|
||||
result.beat_id,
|
||||
"; ".join(repair_reasons),
|
||||
)
|
||||
kept.append(replace(
|
||||
result,
|
||||
scene_id=first.scene_id,
|
||||
in_point_s=first.in_point_s,
|
||||
out_point_s=first.out_point_s,
|
||||
in_point_frame=int(first.in_point_s * cfg.export.edl_frame_rate),
|
||||
match_score=repaired_score,
|
||||
is_confirmed=repaired_score >= cfg.cv.deep_scan.match_threshold,
|
||||
segments=tuple(new_segments),
|
||||
))
|
||||
repaired = True
|
||||
else:
|
||||
scene = scenes_by_id.get(result.scene_id)
|
||||
if scene is not None and scene.duration_s > max(result.duration_s * 1.6, 6.0):
|
||||
repair = realign_window(beat, result.scene_id)
|
||||
if repair is not None:
|
||||
repair_scene, aligned_in_s, score, repair_reason = repair
|
||||
if abs(aligned_in_s - result.in_point_s) > 1.0 / cfg.export.edl_frame_rate:
|
||||
logger.info(
|
||||
"Beat %d: realigned semantically valid long scene by motion/action window (%s)",
|
||||
result.beat_id,
|
||||
repair_reason,
|
||||
)
|
||||
kept.append(replace(
|
||||
result,
|
||||
scene_id=repair_scene.scene_id,
|
||||
in_point_s=aligned_in_s,
|
||||
out_point_s=aligned_in_s + result.duration_s,
|
||||
in_point_frame=int(aligned_in_s * cfg.export.edl_frame_rate),
|
||||
match_score=score,
|
||||
is_confirmed=score >= cfg.cv.deep_scan.match_threshold,
|
||||
))
|
||||
repaired = True
|
||||
if not repaired:
|
||||
kept.append(result)
|
||||
else:
|
||||
if getattr(result, "segments", ()):
|
||||
new_segments = []
|
||||
|
||||
Reference in New Issue
Block a user