Trim retimed segments when phase drifts
This commit is contained in:
@@ -191,6 +191,11 @@ sichtbare Segment begrenzt; der gefundene Source-Inpoint wird dabei um den
|
||||
Trailer-Offset des Segments verschoben. So geht die globale Aktionsbeschreibung
|
||||
eines Beats nicht verloren, nur weil der scorebare Teil erst nach einer Blende
|
||||
beginnt.
|
||||
Der Segment-Offset zählt dabei nur über vorherige scorebare Bildinseln, nicht
|
||||
über schwarze oder blendige Lücken. Nach dem Retiming wird die nutzbare
|
||||
Source-Dauer erneut geschätzt; läuft die Source am Ende in eine sichtbar andere
|
||||
Aktionsphase, wird der Clip gekürzt und der Rest bleibt Placeholder/Fade statt
|
||||
einen falschen Bewegungsmoment zu zeigen.
|
||||
Der gewichtete Vision-Seed-Pfad ersetzt standardmäßig keinen normalen
|
||||
FFmpeg-Vollscan. Vision-Beschreibungen sind semantische Hinweise, aber keine
|
||||
Beweise; der volle CV-Scan bleibt deshalb aktiv, damit falsch bewertete
|
||||
|
||||
@@ -640,12 +640,24 @@ def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg)
|
||||
from dataclasses import replace
|
||||
from src.llm.vision_cache import find_action_window_in_scene, validate_match_window_with_vision
|
||||
from src.cv.scene_indexer import build_scene_index
|
||||
from src.cv.global_scan import align_in_point_by_content_and_motion
|
||||
from src.cv.global_scan import align_in_point_by_content_and_motion, estimate_usable_source_duration
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
beats_by_id = {beat.beat_id: beat for beat in beats}
|
||||
scenes_by_id = {scene.scene_id: scene for scene in build_scene_index(cfg)}
|
||||
|
||||
def visible_content_offset(action_beat, segment_start_offset_s: float) -> float:
|
||||
content_offset_s = 0.0
|
||||
for start_s, end_s in _reference_scoreable_segments(action_beat, cfg):
|
||||
if end_s <= segment_start_offset_s:
|
||||
content_offset_s += max(0.0, end_s - start_s)
|
||||
elif start_s < segment_start_offset_s:
|
||||
content_offset_s += max(0.0, segment_start_offset_s - start_s)
|
||||
break
|
||||
else:
|
||||
break
|
||||
return content_offset_s
|
||||
|
||||
def realign_window(check_beat, scene_id: int, action_beat=None):
|
||||
scene = scenes_by_id.get(scene_id)
|
||||
if scene is None:
|
||||
@@ -655,9 +667,10 @@ def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg)
|
||||
return None
|
||||
start_s, end_s, semantic_score, reason = found
|
||||
if action_beat is not None:
|
||||
offset_delta_s = max(0.0, check_beat.start_s - action_beat.start_s)
|
||||
start_s += offset_delta_s
|
||||
end_s += offset_delta_s
|
||||
segment_start_offset_s = max(0.0, check_beat.start_s - action_beat.start_s)
|
||||
content_offset_s = visible_content_offset(action_beat, segment_start_offset_s)
|
||||
start_s += content_offset_s
|
||||
end_s += content_offset_s
|
||||
window_s = max(3.0, min(8.0, (end_s - start_s) * 4.0))
|
||||
aligned_in_s, combined_score, content_score, motion_score = align_in_point_by_content_and_motion(
|
||||
check_beat,
|
||||
@@ -666,12 +679,16 @@ def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg)
|
||||
search_window_s=window_s,
|
||||
)
|
||||
aligned_in_s = max(scene.start_s, min(aligned_in_s, max(scene.start_s, scene.end_s - check_beat.duration_s)))
|
||||
usable_duration_s, usable_score = estimate_usable_source_duration(check_beat, aligned_in_s, cfg)
|
||||
usable_duration_s = max(0.0, min(check_beat.duration_s, usable_duration_s))
|
||||
if usable_duration_s < max(0.32, check_beat.duration_s * 0.45):
|
||||
usable_duration_s = check_beat.duration_s
|
||||
ok, verify_reason = validate_match_window_with_vision(
|
||||
check_beat,
|
||||
source_path=scene.source_path,
|
||||
scene_id=scene.scene_id,
|
||||
in_point_s=aligned_in_s,
|
||||
out_point_s=aligned_in_s + check_beat.duration_s,
|
||||
out_point_s=aligned_in_s + usable_duration_s,
|
||||
cfg=cfg,
|
||||
)
|
||||
if not ok:
|
||||
@@ -683,8 +700,11 @@ def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg)
|
||||
verify_reason,
|
||||
)
|
||||
return None
|
||||
score = max(combined_score, min(0.99, semantic_score * 0.70 + motion_score * 0.20 + content_score * 0.10))
|
||||
return scene, aligned_in_s, score, f"{reason}; {verify_reason}"
|
||||
score = max(
|
||||
combined_score,
|
||||
min(0.99, semantic_score * 0.65 + motion_score * 0.18 + content_score * 0.09 + usable_score * 0.08),
|
||||
)
|
||||
return scene, aligned_in_s, usable_duration_s, score, f"{reason}; {verify_reason}"
|
||||
|
||||
kept = []
|
||||
for result in results:
|
||||
@@ -745,7 +765,7 @@ def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg)
|
||||
if repair is None:
|
||||
new_segments.append(segment)
|
||||
continue
|
||||
repair_scene, aligned_in_s, score, repair_reason = repair
|
||||
repair_scene, aligned_in_s, usable_duration_s, score, repair_reason = repair
|
||||
if abs(aligned_in_s - segment.in_point_s) <= 1.0 / cfg.export.edl_frame_rate:
|
||||
new_segments.append(segment)
|
||||
continue
|
||||
@@ -755,7 +775,8 @@ def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg)
|
||||
segment,
|
||||
scene_id=repair_scene.scene_id,
|
||||
in_point_s=aligned_in_s,
|
||||
out_point_s=aligned_in_s + segment.duration_s,
|
||||
out_point_s=aligned_in_s + usable_duration_s,
|
||||
duration_s=usable_duration_s,
|
||||
match_score=score,
|
||||
is_confirmed=score >= cfg.cv.deep_scan.match_threshold,
|
||||
))
|
||||
@@ -783,7 +804,7 @@ def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg)
|
||||
if scene is not None and scene.duration_s > max(result.duration_s * 1.6, 6.0):
|
||||
repair = realign_window(beat, result.scene_id)
|
||||
if repair is not None:
|
||||
repair_scene, aligned_in_s, score, repair_reason = repair
|
||||
repair_scene, aligned_in_s, usable_duration_s, score, repair_reason = repair
|
||||
if abs(aligned_in_s - result.in_point_s) > 1.0 / cfg.export.edl_frame_rate:
|
||||
logger.info(
|
||||
"Beat %d: realigned semantically valid long scene by motion/action window (%s)",
|
||||
@@ -794,7 +815,7 @@ def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg)
|
||||
result,
|
||||
scene_id=repair_scene.scene_id,
|
||||
in_point_s=aligned_in_s,
|
||||
out_point_s=aligned_in_s + result.duration_s,
|
||||
out_point_s=aligned_in_s + usable_duration_s,
|
||||
in_point_frame=int(aligned_in_s * cfg.export.edl_frame_rate),
|
||||
match_score=score,
|
||||
is_confirmed=score >= cfg.cv.deep_scan.match_threshold,
|
||||
@@ -817,13 +838,14 @@ def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg)
|
||||
if repair is None:
|
||||
all_repaired = False
|
||||
break
|
||||
scene, aligned_in_s, score, repair_reason = repair
|
||||
scene, aligned_in_s, usable_duration_s, score, repair_reason = repair
|
||||
repair_reasons.append(repair_reason)
|
||||
new_segments.append(replace(
|
||||
segment,
|
||||
scene_id=scene.scene_id,
|
||||
in_point_s=aligned_in_s,
|
||||
out_point_s=aligned_in_s + segment.duration_s,
|
||||
out_point_s=aligned_in_s + usable_duration_s,
|
||||
duration_s=usable_duration_s,
|
||||
match_score=score,
|
||||
is_confirmed=score >= cfg.cv.deep_scan.match_threshold,
|
||||
))
|
||||
@@ -849,7 +871,7 @@ def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg)
|
||||
else:
|
||||
repair = realign_window(beat, result.scene_id)
|
||||
if repair is not None:
|
||||
scene, aligned_in_s, score, repair_reason = repair
|
||||
scene, aligned_in_s, usable_duration_s, score, repair_reason = repair
|
||||
logger.info(
|
||||
"Beat %d: realigned inside matched scene by vision action window (%s)",
|
||||
result.beat_id,
|
||||
@@ -859,7 +881,7 @@ def _filter_semantically_invalid_vision_matches(results: list, beats: list, cfg)
|
||||
result,
|
||||
scene_id=scene.scene_id,
|
||||
in_point_s=aligned_in_s,
|
||||
out_point_s=aligned_in_s + result.duration_s,
|
||||
out_point_s=aligned_in_s + usable_duration_s,
|
||||
in_point_frame=int(aligned_in_s * cfg.export.edl_frame_rate),
|
||||
match_score=score,
|
||||
is_confirmed=score >= cfg.cv.deep_scan.match_threshold,
|
||||
|
||||
Reference in New Issue
Block a user