diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..61a200c
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,6 @@
+* text=auto
+.gitattributes text eol=lf
+*.py text eol=lf
+*.md text eol=lf
+*.html text eol=lf
+*.ps1 text eol=crlf
diff --git a/README.md b/README.md
index 7e117a3..a420e9f 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,7 @@ Alles andere unten ist Hintergrund für den Tool-Verantwortlichen.
 | **1** | Schneller Vibe-Check: für jeden Beat die Top-K ähnlichsten Szenen aus dem Spielfilm vorauswählen (Histogramm + pHash). |
 | **2** | Optional: Vision-LLM beschreibt unsichere Szenen mit 3-Frame-Samples; die Beschreibungen liegen gecached vor. |
 | **3** | Frame-genaue Verfeinerung pro Beat (OpenCV-Templatematching, Bewegungsphasen-Vergleich). |
-| **4** | Phasen-Reparatur: bei segmentierten Beats wird die Bewegungsphase im Source saliency-gewichtet mit der sichtbaren Trailerphase abgeglichen. |
+| **4** | Phasen-Reparatur: bei segmentierten Beats wird die Bewegungsphase lokal um den gefundenen Inpoint saliency- und motion-gewichtet mit der sichtbaren Trailerphase abgeglichen. |
 | **5** | Recovery: Beats ohne Treffer werden via Vision-Phasensuche in den Top-K Szenen nochmal probiert. |
 | **6** | Export als FCPXML 1.10 oder CMX-3600-EDL plus `CUTTER_REPORT.md`. |
 
diff --git a/cli.py b/cli.py
index f33da73..651a377 100644
--- a/cli.py
+++ b/cli.py
@@ -1912,15 +1912,19 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
 
     scene_start = float(scene["start_s"])
     scene_end = float(scene["end_s"])
-    scan_end = max(scene_start, scene_end - max(0.04, segment_beat.duration_s - align_offset))
+    center_t = max(scene_start, min(scene_end, original_in_s + align_offset))
+    retune_radius_s = max(4.0, min(12.0, segment_beat.duration_s * 2.5))
+    scan_start = max(scene_start, center_t - retune_radius_s)
+    scene_scan_end = min(scene_end, center_t + retune_radius_s)
+    scan_end = max(scan_start, scene_scan_end - max(0.04, segment_beat.duration_s - align_offset))
     max_points = 400
-    step_s = max(0.08, (scan_end - scene_start) / max_points)
+    step_s = max(0.04, (scan_end - scan_start) / max_points)
 
     source_cap = cv2.VideoCapture(str(cfg.paths.source_movie))
     source_fps = source_cap.get(cv2.CAP_PROP_FPS) or _scene_fps_light(scene, cfg)
     stride = max(1, int(round(step_s * source_fps)))
-    start_frame = max(0, int(round(scene_start * source_fps)))
-    end_frame = max(start_frame, int(round(scene_end * source_fps)))
+    start_frame = max(0, int(round(scan_start * source_fps)))
+    end_frame = max(start_frame, int(round(scene_scan_end * source_fps)))
     times: list[float] = []
     source_frames: list = []
     frame_idx = start_frame
@@ -1932,33 +1936,60 @@ def _phase_probe_segment_in_scene(segment_beat, scene: dict, original_in_s: floa
         times.append(frame_idx / source_fps)
         source_frames.append(prepared_gray(frame))
         frame_idx += stride
+    base_time = times[0] if times else scan_start
 
     candidates: list[tuple[float, float, float]] = []
     for i, t in enumerate(times):
         if t > scan_end:
             break
         vals = []
+        src_for_offsets = []
         for offset, ref in zip(ref_offsets, refs):
-            j = int(round((t + offset - scene_start) / step_s))
+            j = int(round((t + offset - base_time) / step_s))
             if 0 <= j < len(source_frames):
-                score = pair_score(ref, source_frames[j], mask)
+                src = source_frames[j]
+                score = pair_score(ref, src, mask)
             else:
+                src = None
                 score = None
             if score is not None:
                 vals.append(score)
+                src_for_offsets.append(src)
         if len(vals) >= 4:
             avg_score = sum(vals) / len(vals)
-            candidates.append((0.55 * avg_score + 0.45 * min(vals), min(vals), t))
+            early_count = min(2, len(vals))
+            tail_count = min(2, len(vals))
+            early_score = sum(vals[:early_count]) / early_count
+            tail_score = sum(vals[-tail_count:]) / tail_count
+            motion_vals = []
+            for idx in range(1, min(len(refs), len(src_for_offsets))):
+                if src_for_offsets[idx - 1] is None or src_for_offsets[idx] is None:
+                    continue
+                ref_motion = refs[idx] - refs[idx - 1]
+                src_motion = src_for_offsets[idx] - src_for_offsets[idx - 1]
+                motion_vals.append(1.0 - float((np.abs(ref_motion - src_motion) * mask).sum()))
+            motion_score = sum(motion_vals) / len(motion_vals) if motion_vals else avg_score
+            # Phase retuning must reject "same shot, wrong moment" matches.
+            # A plain average can hide a bad onset inside slow dialogue shots;
+            # keep the low-water mark, onset, and frame-to-frame motion influential.
+            phase_score = (
+                0.26 * avg_score
+                + 0.24 * min(vals)
+                + 0.24 * early_score
+                + 0.08 * tail_score
+                + 0.18 * motion_score
+            )
+            candidates.append((phase_score, min(vals), t))
 
     if not candidates:
         return None
 
     candidates.sort(reverse=True)
     best_score = candidates[0][0]
-    tie_window = 0.014 if transition_start else 0.002
+    tie_window = 0.006 if transition_start else 0.002
     near_tie = [c for c in candidates if c[0] >= best_score - tie_window]
     if transition_start:
-        chosen = max(near_tie, key=lambda c: c[2])
+        chosen = max(near_tie, key=lambda c: (c[1], c[0]))
     else:
         chosen = min(near_tie, key=lambda c: abs((c[2] - align_offset) - original_in_s))
     return max(scene_start, chosen[2] - align_offset), chosen[0]
diff --git a/docs/ALGORITHM.md b/docs/ALGORITHM.md
index 8e5505b..d66c4b2 100644
--- a/docs/ALGORITHM.md
+++ b/docs/ALGORITHM.md
@@ -195,6 +195,11 @@ Der zusätzliche Hi-Res-Phasenrefine bleibt lokal um den bereits validierten
 Inpoint und übernimmt nur klare Verbesserungen. Er darf keine ganze lange
 Dialogszene nach ähnlichen Layouts durchsuchen, weil sonst dieselbe Location
 mit anderer Gestik als falsche Phase gewinnen kann und die Laufzeit explodiert.
+Die lokale Retune-Wertung nutzt deshalb nicht nur den mittleren Frame-Score,
+sondern auch den schlechtesten Einzelvergleich, die ersten sichtbaren Frames
+und die Frame-zu-Frame-Bewegung. Dadurch gewinnt nicht mehr ein späteres
+Standbild derselben Einstellung, nur weil Fenster, Gesichter und Licht fast
+identisch aussehen.
 Report-Clips werden zusätzlich an den bekannten Source-Szenenstart plus eine
 sehr kurze Ein-Frame-Guard-Zone geklemmt, damit ein knapp vor oder direkt auf
 der Schnittkante liegender Inpoint nicht mit Frames der vorherigen Einstellung