diff --git a/cli.py b/cli.py index 651a377..3c6e071 100644 --- a/cli.py +++ b/cli.py @@ -891,7 +891,7 @@ def _merge_best_results(existing: list, candidates: list, cfg) -> list: def _recover_unmatched_beats_via_vision(results: list, beats: list, cfg) -> list: - """Try a vision-led search for beats that ended up without a match. + """Try a vision-led search for beats that ended up weak or unmatched. For each unmatched beat that has scoreable visual content (i.e. not pure fade/title-card material), this pass: @@ -919,17 +919,28 @@ def _recover_unmatched_beats_via_vision(results: list, beats: list, cfg) -> list from src.llm.vision_cache import find_action_window_in_scene, validate_match_window_with_vision logger = logging.getLogger(__name__) - matched_ids = {r.beat_id for r in results} - unmatched = [b for b in beats if b.beat_id not in matched_ids] - if not unmatched: + results_by_id = {r.beat_id: r for r in results} + recovery_targets = [ + b for b in beats + if ( + b.beat_id not in results_by_id + or ( + not results_by_id[b.beat_id].is_confirmed + and results_by_id[b.beat_id].match_score < cfg.cv.deep_scan.match_threshold + ) + ) + ] + if not recovery_targets: return results scenes = build_scene_index(cfg) if not scenes: return results - new_results = list(results) - for beat in unmatched: + target_ids = {b.beat_id for b in recovery_targets} + new_results = [r for r in results if r.beat_id not in target_ids] + replaced_results = {r.beat_id: r for r in results if r.beat_id in target_ids} + for beat in recovery_targets: try: islands = _reference_scoreable_segments(beat, cfg) except Exception: @@ -1029,6 +1040,9 @@ def _recover_unmatched_beats_via_vision(results: list, beats: list, cfg) -> list best = candidate if best is None: + previous = replaced_results.get(beat.beat_id) + if previous is not None: + new_results.append(previous) continue score, scene, aligned_in_s, usable_duration_s, repair_reason = best logger.info( diff --git a/docs/ALGORITHM.md b/docs/ALGORITHM.md index d66c4b2..5f6c0e6 100644 --- a/docs/ALGORITHM.md +++ b/docs/ALGORITHM.md @@ -190,6 +190,10 @@ eine kurze Geste erst korrekt erkannt und anschließend in eine spätere ähnliche Körperhaltung verschoben wird. Wenn mehrere Vision-Kandidaten in derselben Source-Szene ähnlich gut scoren und die Beat-Dauer abdecken, bevorzugt der Matcher die frühere Phase. +Die Vision-Recovery läuft nicht nur für komplett fehlende Beats, sondern auch +für schwache unbestätigte Treffer. Gerade Low-Light-Beats dürfen nicht an einem +falschen dunklen CV-Treffer hängen bleiben, wenn der Cache semantisch eine +bessere Handlungsphase kennt. Der zusätzliche Hi-Res-Phasenrefine bleibt lokal um den bereits validierten Inpoint und übernimmt nur klare Verbesserungen. Er darf keine ganze lange