Recover weak low-light matches via vision

This commit is contained in:
Melbar
2026-05-09 17:26:10 +02:00
parent ae3c2b1b13
commit ed7b083dca
2 changed files with 24 additions and 6 deletions
+20 -6
View File
@@ -891,7 +891,7 @@ def _merge_best_results(existing: list, candidates: list, cfg) -> list:
def _recover_unmatched_beats_via_vision(results: list, beats: list, cfg) -> list:
"""Try a vision-led search for beats that ended up without a match.
"""Try a vision-led search for beats that ended up weak or unmatched.
For each unmatched beat that has scoreable visual content (i.e. not pure
fade/title-card material), this pass:
@@ -919,17 +919,28 @@ def _recover_unmatched_beats_via_vision(results: list, beats: list, cfg) -> list
from src.llm.vision_cache import find_action_window_in_scene, validate_match_window_with_vision
logger = logging.getLogger(__name__)
matched_ids = {r.beat_id for r in results}
unmatched = [b for b in beats if b.beat_id not in matched_ids]
if not unmatched:
results_by_id = {r.beat_id: r for r in results}
recovery_targets = [
b for b in beats
if (
b.beat_id not in results_by_id
or (
not results_by_id[b.beat_id].is_confirmed
and results_by_id[b.beat_id].match_score < cfg.cv.deep_scan.match_threshold
)
)
]
if not recovery_targets:
return results
scenes = build_scene_index(cfg)
if not scenes:
return results
new_results = list(results)
for beat in unmatched:
target_ids = {b.beat_id for b in recovery_targets}
new_results = [r for r in results if r.beat_id not in target_ids]
replaced_results = {r.beat_id: r for r in results if r.beat_id in target_ids}
for beat in recovery_targets:
try:
islands = _reference_scoreable_segments(beat, cfg)
except Exception:
@@ -1029,6 +1040,9 @@ def _recover_unmatched_beats_via_vision(results: list, beats: list, cfg) -> list
best = candidate
if best is None:
previous = replaced_results.get(beat.beat_id)
if previous is not None:
new_results.append(previous)
continue
score, scene, aligned_in_s, usable_duration_s, repair_reason = best
logger.info(
+4
View File
@@ -190,6 +190,10 @@ eine kurze Geste erst korrekt erkannt und anschließend in eine spätere
ähnliche Körperhaltung verschoben wird. Wenn mehrere Vision-Kandidaten in
derselben Source-Szene ähnlich gut scoren und die Beat-Dauer abdecken,
bevorzugt der Matcher die frühere Phase.
Die Vision-Recovery läuft nicht nur für komplett fehlende Beats, sondern auch
für schwache unbestätigte Treffer. Gerade Low-Light-Beats dürfen nicht an einem
falschen dunklen CV-Treffer hängen bleiben, wenn der Cache semantisch eine
bessere Handlungsphase kennt.
Der zusätzliche Hi-Res-Phasenrefine bleibt lokal um den bereits validierten
Inpoint und übernimmt nur klare Verbesserungen. Er darf keine ganze lange