Improve local phase retuning
This commit is contained in:
@@ -0,0 +1,6 @@
|
||||
* text=auto
|
||||
.gitattributes text eol=lf
|
||||
*.py text eol=lf
|
||||
*.md text eol=lf
|
||||
*.html text eol=lf
|
||||
*.ps1 text eol=crlf
|
||||
@@ -52,7 +52,7 @@ Alles andere unten ist Hintergrund für den Tool-Verantwortlichen.
|
||||
| **1** | Schneller Vibe-Check: für jeden Beat die Top-K ähnlichsten Szenen aus dem Spielfilm vorauswählen (Histogramm + pHash). |
|
||||
| **2** | Optional: Vision-LLM beschreibt unsichere Szenen mit 3-Frame-Samples; die Beschreibungen liegen gecached vor. |
|
||||
| **3** | Frame-genaue Verfeinerung pro Beat (OpenCV-Templatematching, Bewegungsphasen-Vergleich). |
|
||||
| **4** | Phasen-Reparatur: bei segmentierten Beats wird die Bewegungsphase im Source saliency-gewichtet mit der sichtbaren Trailerphase abgeglichen. |
|
||||
| **4** | Phasen-Reparatur: bei segmentierten Beats wird die Bewegungsphase lokal um den gefundenen Inpoint saliency- und motion-gewichtet mit der sichtbaren Trailerphase abgeglichen. |
|
||||
| **5** | Recovery: Beats ohne Treffer werden via Vision-Phasensuche in den Top-K Szenen nochmal probiert. |
|
||||
| **6** | Export als FCPXML 1.10 oder CMX-3600-EDL plus `CUTTER_REPORT.md`. |
|
||||
|
||||
|
||||
@@ -1912,15 +1912,19 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
|
||||
|
||||
scene_start = float(scene["start_s"])
|
||||
scene_end = float(scene["end_s"])
|
||||
scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s - align_offset))
|
||||
center_t = max(scene_start, min(scene_end, original_in_s + align_offset))
|
||||
retune_radius_s = max(4.0, min(12.0, segment_beat.duration_s * 2.5))
|
||||
scan_start = max(scene_start, center_t - retune_radius_s)
|
||||
scene_scan_end = min(scene_end, center_t + retune_radius_s)
|
||||
scan_end = max(scan_start, scene_scan_end - max(0.04, segment_beat.duration_s - align_offset))
|
||||
max_points = 400
|
||||
step_s = max(0.08, (scan_end - scene_start) / max_points)
|
||||
step_s = max(0.04, (scan_end - scan_start) / max_points)
|
||||
|
||||
source_cap = cv2.VideoCapture(str(cfg.paths.source_movie))
|
||||
source_fps = source_cap.get(cv2.CAP_PROP_FPS) or _scene_fps_light(scene, cfg)
|
||||
stride = max(1, int(round(step_s * source_fps)))
|
||||
start_frame = max(0, int(round(scene_start * source_fps)))
|
||||
end_frame = max(start_frame, int(round(scene_end * source_fps)))
|
||||
start_frame = max(0, int(round(scan_start * source_fps)))
|
||||
end_frame = max(start_frame, int(round(scene_scan_end * source_fps)))
|
||||
times: list[float] = []
|
||||
source_frames: list = []
|
||||
frame_idx = start_frame
|
||||
@@ -1932,33 +1936,60 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
|
||||
times.append(frame_idx / source_fps)
|
||||
source_frames.append(prepared_gray(frame))
|
||||
frame_idx += stride
|
||||
base_time = times[0] if times else scan_start
|
||||
|
||||
candidates: list[tuple[float, float, float]] = []
|
||||
for i, t in enumerate(times):
|
||||
if t > scan_end:
|
||||
break
|
||||
vals = []
|
||||
src_for_offsets = []
|
||||
for offset, ref in zip(ref_offsets, refs):
|
||||
j = int(round((t + offset - scene_start) / step_s))
|
||||
j = int(round((t + offset - base_time) / step_s))
|
||||
if 0 <= j < len(source_frames):
|
||||
score = pair_score(ref, source_frames[j], mask)
|
||||
src = source_frames[j]
|
||||
score = pair_score(ref, src, mask)
|
||||
else:
|
||||
src = None
|
||||
score = None
|
||||
if score is not None:
|
||||
vals.append(score)
|
||||
src_for_offsets.append(src)
|
||||
if len(vals) >= 4:
|
||||
avg_score = sum(vals) / len(vals)
|
||||
candidates.append((0.55 * avg_score + 0.45 * min(vals), min(vals), t))
|
||||
early_count = min(2, len(vals))
|
||||
tail_count = min(2, len(vals))
|
||||
early_score = sum(vals[:early_count]) / early_count
|
||||
tail_score = sum(vals[-tail_count:]) / tail_count
|
||||
motion_vals = []
|
||||
for idx in range(1, min(len(refs), len(src_for_offsets))):
|
||||
if src_for_offsets[idx - 1] is None or src_for_offsets[idx] is None:
|
||||
continue
|
||||
ref_motion = refs[idx] - refs[idx - 1]
|
||||
src_motion = src_for_offsets[idx] - src_for_offsets[idx - 1]
|
||||
motion_vals.append(1.0 - float((np.abs(ref_motion - src_motion) * mask).sum()))
|
||||
motion_score = sum(motion_vals) / len(motion_vals) if motion_vals else avg_score
|
||||
# Phase retuning must reject "same shot, wrong moment" matches.
|
||||
# A plain average can hide a bad onset inside slow dialogue shots;
|
||||
# keep the low-water mark, onset, and frame-to-frame motion influential.
|
||||
phase_score = (
|
||||
0.26 * avg_score
|
||||
+ 0.24 * min(vals)
|
||||
+ 0.24 * early_score
|
||||
+ 0.08 * tail_score
|
||||
+ 0.18 * motion_score
|
||||
)
|
||||
candidates.append((phase_score, min(vals), t))
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
candidates.sort(reverse=True)
|
||||
best_score = candidates[0][0]
|
||||
tie_window = 0.014 if transition_start else 0.002
|
||||
tie_window = 0.006 if transition_start else 0.002
|
||||
near_tie = [c for c in candidates if c[0] >= best_score - tie_window]
|
||||
if transition_start:
|
||||
chosen = max(near_tie, key=lambda c: c[2])
|
||||
chosen = max(near_tie, key=lambda c: (c[1], c[0]))
|
||||
else:
|
||||
chosen = min(near_tie, key=lambda c: abs((c[2] - align_offset) - original_in_s))
|
||||
return max(scene_start, chosen[2] - align_offset), chosen[0]
|
||||
|
||||
@@ -195,6 +195,11 @@ Der zusätzliche Hi-Res-Phasenrefine bleibt lokal um den bereits validierten
|
||||
Inpoint und übernimmt nur klare Verbesserungen. Er darf keine ganze lange
|
||||
Dialogszene nach ähnlichen Layouts durchsuchen, weil sonst dieselbe Location
|
||||
mit anderer Gestik als falsche Phase gewinnen kann und die Laufzeit explodiert.
|
||||
Die lokale Retune-Wertung nutzt deshalb nicht nur den mittleren Frame-Score,
|
||||
sondern auch den schlechtesten Einzelvergleich, die ersten sichtbaren Frames
|
||||
und die Frame-zu-Frame-Bewegung. Dadurch gewinnt nicht mehr ein späteres
|
||||
Standbild derselben Einstellung, nur weil Fenster, Gesichter und Licht fast
|
||||
identisch aussehen.
|
||||
Report-Clips werden zusätzlich an den bekannten Source-Szenenstart plus eine
|
||||
sehr kurze Ein-Frame-Guard-Zone geklemmt, damit ein knapp vor oder direkt auf
|
||||
der Schnittkante liegender Inpoint nicht mit Frames der vorherigen Einstellung
|
||||
|
||||
Reference in New Issue
Block a user