Refactor report pipeline: redesign HTML, add motion alignment, remove legacy reporter

- scripts/generate_cutter_report.py: complete HTML redesign with glassmorphism dark-mode style, compare video links in markdown output - cli.py: cmd_report now calls _regenerate_cutter_report directly; also writes legacy match_report.html; removes dependency on src/pipeline/reporter.py - src/cv/global_scan.py: add motion-phase alignment refinement step after initial in-point search (align_in_point_by_motion, threshold +0.015) - Remove HANDOVER.md and src/pipeline/reporter.py (superseded) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-06 12:44:10 +02:00
parent 3b90905d07
commit 45769aa366
5 changed files with 216 additions and 655 deletions
@@ -1422,12 +1422,46 @@ def run_global_scan(
                motion_score = 0.0
                if len(motion_templates) >= 2:
                    with open_video(cfg.paths.source_movie) as motion_cap:
-                        motion_score = _motion_phase_score(
+                        original_motion_score = _motion_phase_score(
                            motion_cap,
                            adjusted_in_s,
                            motion_templates,
                            cfg,
                        )
+                    
+                    motion_in_s, align_motion_score = align_in_point_by_motion(
+                        b,
+                        adjusted_in_s,
+                        cfg,
+                        search_window_s=(
+                            local_align_window_s
+                            if local_align_window_s is not None
+                            else min(1.0, cfg.cv.deep_scan.content_align_window_seconds)
+                        ),
+                    )
+                    
+                    if align_motion_score >= original_motion_score + 0.015:
+                        adjusted_in_s = motion_in_s
+                        motion_score = align_motion_score
+                        scene = _find_scene_for_time(scenes, adjusted_in_s, cfg)
+                        usable_duration_s = max(0.0, duration_s)
+                        out_s = adjusted_in_s + usable_duration_s
+                        if scene is not None:
+                            out_s = min(out_s, scene.end_s)
+                        duration_s = max(0.0, out_s - adjusted_in_s)
+                        duration_coverage = (
+                            min(1.0, duration_s / matchable_duration_s)
+                            if matchable_duration_s > 0 else 0.0
+                        )
+                        with open_video(cfg.paths.source_movie) as validation_cap:
+                            content_score = _fixed_content_sequence_score(
+                                validation_cap,
+                                adjusted_in_s,
+                                validation_templates,
+                                cfg,
+                            )
+                    else:
+                        motion_score = original_motion_score

                if is_weighted_seed_candidate and scene is not None and content_score >= content_gate:
                    contiguous_usable_s = _contiguous_scene_coverage_duration(
@@ -1,427 +0,0 @@
-"""
-src/pipeline/reporter.py — Visual Match Report Generator
-
-Generates an HTML file containing side-by-side video clips of:
-  Left:  The original beat from the reference trailer
-  Right: The matched scene from the source movie
-
-This allows instant visual verification of the CV pipeline's results.
-"""
-
-from __future__ import annotations
-
-import logging
-import subprocess
-from pathlib import Path
-
-from src.core.config import AppConfig
-
-logger = logging.getLogger(__name__)
-
-
-def _extract_clip(video_path: Path, start_s: float, duration_s: float, out_path: Path) -> None:
-    """Use ffmpeg to extract a silent, low-res preview clip."""
-    out_path.parent.mkdir(parents=True, exist_ok=True)
-
-    # Fast input seek close to the target, then accurate output seek for
-    # frame-faithful preview clips. A plain "-ss before -i" can land on a
-    # nearby keyframe and make the report look several frames out of sync.
-    preroll_s = 2.0 if start_s >= 2.0 else 0.0
-    input_seek_s = max(0.0, start_s - preroll_s)
-    accurate_seek_s = start_s - input_seek_s
-
-    cmd = [
-        "ffmpeg", "-y", "-loglevel", "error",
-        "-ss", str(input_seek_s),
-        "-i", str(video_path),
-        "-ss", str(accurate_seek_s),
-        "-t", str(duration_s),
-        "-map", "0:v:0",
-        "-c:v", "libx264",
-        "-preset", "ultrafast",
-        "-crf", "28",
-        "-vf", "scale=640:-2",   # scale down for lightweight report
-        "-an",                   # no audio
-        "-movflags", "+faststart",
-        str(out_path)
-    ]
-    
-    result = subprocess.run(cmd, capture_output=True)
-    if result.returncode != 0:
-        logger.error(
-            "ffmpeg clip extraction failed for %s:\n%s", 
-            out_path.name, result.stderr.decode(errors="replace")
-        )
-
-
-def _extract_clip_with_black_tail(
-    video_path: Path,
-    start_s: float,
-    source_duration_s: float,
-    total_duration_s: float,
-    out_path: Path,
-) -> None:
-    """Extract a source preview and append black frames for trailer-only tails."""
-    tail_s = max(0.0, total_duration_s - source_duration_s)
-    if tail_s <= 0.02:
-        _extract_clip(video_path, start_s, source_duration_s, out_path)
-        return
-
-    out_path.parent.mkdir(parents=True, exist_ok=True)
-    source_tmp = out_path.with_name(f"{out_path.stem}_source_tmp.mp4")
-    tail_tmp = out_path.with_name(f"{out_path.stem}_tail_tmp.mp4")
-    preroll_s = 2.0 if start_s >= 2.0 else 0.0
-    input_seek_s = max(0.0, start_s - preroll_s)
-    accurate_seek_s = start_s - input_seek_s
-
-    # First render the matched source portion with the same accurate seek path
-    # as _extract_clip(). Using trim=start=... after an input seek is brittle
-    # because FFmpeg may preserve non-zero packet timestamps around keyframes.
-    source_cmd = [
-        "ffmpeg", "-y", "-loglevel", "error",
-        "-ss", str(input_seek_s),
-        "-i", str(video_path),
-        "-ss", str(accurate_seek_s),
-        "-t", str(source_duration_s),
-        "-map", "0:v:0",
-        "-c:v", "libx264",
-        "-preset", "ultrafast",
-        "-crf", "28",
-        "-vf", "scale=640:360,setsar=1,fps=25,setpts=PTS-STARTPTS",
-        "-an",
-        "-movflags", "+faststart",
-        str(source_tmp),
-    ]
-
-    result = subprocess.run(source_cmd, capture_output=True)
-    if result.returncode != 0:
-        logger.error(
-            "ffmpeg source preview extraction failed for %s:\n%s",
-            out_path.name,
-            result.stderr.decode(errors="replace"),
-        )
-        return
-
-    tail_cmd = [
-        "ffmpeg", "-y", "-loglevel", "error",
-        "-f", "lavfi",
-        "-i", f"color=c=black:s=640x360:r=25:d={tail_s}",
-        "-c:v", "libx264",
-        "-preset", "ultrafast",
-        "-crf", "28",
-        "-an",
-        "-movflags", "+faststart",
-        str(tail_tmp),
-    ]
-    result = subprocess.run(tail_cmd, capture_output=True)
-    if result.returncode != 0:
-        logger.error(
-            "ffmpeg black tail render failed for %s:\n%s",
-            out_path.name,
-            result.stderr.decode(errors="replace"),
-        )
-        return
-
-    concat_cmd = [
-        "ffmpeg", "-y", "-loglevel", "error",
-        "-i", str(source_tmp),
-        "-i", str(tail_tmp),
-        "-filter_complex", "[0:v][1:v]concat=n=2:v=1:a=0[v]",
-        "-map", "[v]",
-        "-c:v", "libx264",
-        "-preset", "ultrafast",
-        "-crf", "28",
-        "-an",
-        "-movflags", "+faststart",
-        str(out_path),
-    ]
-    result = subprocess.run(concat_cmd, capture_output=True)
-    if result.returncode != 0:
-        logger.error(
-            "ffmpeg tailed preview concat failed for %s:\n%s",
-            out_path.name,
-            result.stderr.decode(errors="replace"),
-        )
-
-    for tmp in (source_tmp, tail_tmp):
-        try:
-            tmp.unlink(missing_ok=True)
-        except OSError:
-            pass
-
-
-def _extract_segmented_clip(
-    video_path: Path,
-    segments: list,
-    total_duration_s: float,
-    out_path: Path,
-) -> None:
-    """Render a beat-length source preview from multiple matched source islands."""
-    if not segments:
-        _extract_clip_with_black_tail(video_path, 0.0, 0.0, total_duration_s, out_path)
-        return
-
-    out_path.parent.mkdir(parents=True, exist_ok=True)
-    tmp_paths: list[Path] = []
-    cursor = 0.0
-
-    def add_black(duration_s: float) -> None:
-        if duration_s <= 0.02:
-            return
-        tmp = out_path.with_name(f"{out_path.stem}_part_{len(tmp_paths):03d}_black.mp4")
-        cmd = [
-            "ffmpeg", "-y", "-loglevel", "error",
-            "-f", "lavfi",
-            "-i", f"color=c=black:s=640x360:r=25:d={duration_s}",
-            "-c:v", "libx264", "-preset", "ultrafast", "-crf", "28",
-            "-an", "-movflags", "+faststart",
-            str(tmp),
-        ]
-        result = subprocess.run(cmd, capture_output=True)
-        if result.returncode == 0:
-            tmp_paths.append(tmp)
-        else:
-            logger.error("ffmpeg black segment render failed:\n%s", result.stderr.decode(errors="replace"))
-
-    def add_source(start_s: float, duration_s: float) -> None:
-        if duration_s <= 0.02:
-            return
-        tmp = out_path.with_name(f"{out_path.stem}_part_{len(tmp_paths):03d}_src.mp4")
-        preroll_s = 2.0 if start_s >= 2.0 else 0.0
-        input_seek_s = max(0.0, start_s - preroll_s)
-        accurate_seek_s = start_s - input_seek_s
-        cmd = [
-            "ffmpeg", "-y", "-loglevel", "error",
-            "-ss", str(input_seek_s),
-            "-i", str(video_path),
-            "-ss", str(accurate_seek_s),
-            "-t", str(duration_s),
-            "-map", "0:v:0",
-            "-c:v", "libx264", "-preset", "ultrafast", "-crf", "28",
-            "-vf", "scale=640:360,setsar=1,fps=25,setpts=PTS-STARTPTS",
-            "-an", "-movflags", "+faststart",
-            str(tmp),
-        ]
-        result = subprocess.run(cmd, capture_output=True)
-        if result.returncode == 0 and tmp.exists():
-            tmp_paths.append(tmp)
-        else:
-            logger.error("ffmpeg source segment render failed:\n%s", result.stderr.decode(errors="replace"))
-
-    for segment in sorted(segments, key=lambda s: s.trailer_offset_s):
-        offset_s = max(0.0, float(segment.trailer_offset_s))
-        duration_s = max(0.0, float(segment.duration_s))
-        add_black(offset_s - cursor)
-        add_source(float(segment.in_point_s), duration_s)
-        cursor = max(cursor, offset_s + duration_s)
-
-    add_black(total_duration_s - cursor)
-
-    if len(tmp_paths) == 1:
-        tmp_paths[0].replace(out_path)
-        return
-
-    inputs: list[str] = []
-    labels: list[str] = []
-    for idx, tmp in enumerate(tmp_paths):
-        inputs.extend(["-i", str(tmp)])
-        labels.append(f"[{idx}:v]")
-    filter_complex = "".join(labels) + f"concat=n={len(tmp_paths)}:v=1:a=0[v]"
-    cmd = [
-        "ffmpeg", "-y", "-loglevel", "error",
-        *inputs,
-        "-filter_complex", filter_complex,
-        "-map", "[v]",
-        "-c:v", "libx264", "-preset", "ultrafast", "-crf", "28",
-        "-an", "-movflags", "+faststart",
-        str(out_path),
-    ]
-    result = subprocess.run(cmd, capture_output=True)
-    if result.returncode != 0:
-        logger.error("ffmpeg segmented preview concat failed:\n%s", result.stderr.decode(errors="replace"))
-
-    for tmp in tmp_paths:
-        try:
-            tmp.unlink(missing_ok=True)
-        except OSError:
-            pass
-
-
-def _build_frame_locked_compare(ref_path: Path, src_path: Path, out_path: Path) -> None:
-    """Render reference and source into one side-by-side video stream."""
-    out_path.parent.mkdir(parents=True, exist_ok=True)
-    normalize = (
-        "fps=25,scale=640:360:force_original_aspect_ratio=decrease,"
-        "pad=640:360:(ow-iw)/2:(oh-ih)/2,setsar=1,setpts=PTS-STARTPTS"
-    )
-    filter_complex = (
-        f"[0:v]{normalize}[ref];"
-        f"[1:v]{normalize}[src];"
-        "[ref][src]hstack=inputs=2[v]"
-    )
-    cmd = [
-        "ffmpeg", "-y", "-loglevel", "error",
-        "-i", str(ref_path),
-        "-i", str(src_path),
-        "-filter_complex", filter_complex,
-        "-map", "[v]",
-        "-c:v", "libx264",
-        "-preset", "ultrafast",
-        "-crf", "28",
-        "-an",
-        "-movflags", "+faststart",
-        str(out_path),
-    ]
-    result = subprocess.run(cmd, capture_output=True)
-    if result.returncode != 0:
-        logger.error(
-            "ffmpeg compare render failed for %s:\n%s",
-            out_path.name,
-            result.stderr.decode(errors="replace"),
-        )
-
-
-def generate_report(beats: list, results: list, cfg: AppConfig) -> Path:
-    """
-    Generate an HTML side-by-side report.
-    Returns the path to the .html file.
-    """
-    report_dir = cfg.paths.output_dir / "report"
-    report_dir.mkdir(parents=True, exist_ok=True)
-    
-    html_path = report_dir / "match_report.html"
-    results_by_beat = {r.beat_id: r for r in results}
-    
-    logger.info("Generating report clips in %s (this might take a moment) ...", report_dir)
-    
-    html = [
-        "<!DOCTYPE html>",
-        "<html><head><meta charset='utf-8'><title>AI Trailer Match Report</title>",
-        "<style>",
-        "body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background: #0f0f0f; color: #e0e0e0; margin: 40px; }",
-        "h1 { color: #fff; border-bottom: 1px solid #333; padding-bottom: 10px; }",
-        ".stats { font-size: 1.2em; margin-bottom: 30px; color: #aaa; }",
-        ".beat-row { display: flex; margin-bottom: 30px; background: #1a1a1a; padding: 20px; border-radius: 12px; border: 1px solid #333; }",
-        ".info { width: 250px; padding-right: 20px; flex-shrink: 0; }",
-        ".info h3 { margin-top: 0; color: #fff; }",
-        ".video-container { display: flex; gap: 20px; flex-grow: 1; }",
-        ".videos { flex-grow: 1; }",
-        ".compare { margin-bottom: 18px; }",
-        ".video-col { flex: 1; }",
-        ".video-col p { margin-top: 0; font-weight: bold; color: #888; }",
-        "video { width: 100%; border-radius: 6px; box-shadow: 0 4px 6px rgba(0,0,0,0.5); background: #000; }",
-        ".status-match { color: #4ade80; font-weight: bold; font-size: 1.1em; }",
-        ".status-miss { color: #f87171; font-weight: bold; font-size: 1.1em; }",
-        ".score { font-family: monospace; font-size: 1.1em; color: #60a5fa; }",
-        ".code-hint { background: #000; padding: 10px; border-radius: 4px; font-family: monospace; font-size: 0.9em; margin-top: 15px; color: #a3e635; }",
-        "</style></head><body>",
-        f"<h1>AI Trailer Generator — Match Report</h1>",
-        f"<div class='stats'>Total Beats: {len(beats)} | Matched: {len(results)}</div>",
-        "<script>",
-        "function syncBeat(row) {",
-        "  const vids = row.querySelectorAll('video');",
-        "  if (vids.length < 2) return;",
-        "  const ref = vids[0];",
-        "  const src = vids[1];",
-        "  let syncing = false;",
-        "  function align() {",
-        "    if (syncing) return;",
-        "    syncing = true;",
-        "    const target = Math.min(ref.currentTime, Math.max(0, (src.duration || ref.currentTime) - 0.02));",
-        "    if (Math.abs(src.currentTime - target) > 0.035) src.currentTime = target;",
-        "    if (ref.paused && !src.paused) src.pause();",
-        "    if (!ref.paused && src.paused) src.play().catch(() => {});",
-        "    syncing = false;",
-        "  }",
-        "  ref.addEventListener('play', () => { src.currentTime = Math.min(ref.currentTime, Math.max(0, (src.duration || ref.currentTime) - 0.02)); src.play().catch(() => {}); });",
-        "  ref.addEventListener('pause', () => src.pause());",
-        "  ref.addEventListener('seeked', () => { src.currentTime = Math.min(ref.currentTime, Math.max(0, (src.duration || ref.currentTime) - 0.02)); });",
-        "  ref.addEventListener('timeupdate', align);",
-        "}",
-        "document.addEventListener('DOMContentLoaded', () => document.querySelectorAll('.beat-row').forEach(syncBeat));",
-        "</script>"
-    ]
-    
-    for beat in beats:
-        res = results_by_beat.get(beat.beat_id)
-        
-        # Extract Reference Clip
-        ref_mp4 = report_dir / f"beat_{beat.beat_id:03d}_ref.mp4"
-        _extract_clip(beat.trailer_path, beat.start_s, beat.duration_s, ref_mp4)
-        
-        html.append("<div class='beat-row'>")
-        
-        # Info Panel
-        html.append("<div class='info'>")
-        html.append(f"<h3>Beat {beat.beat_id:03d}</h3>")
-        html.append(f"<p><b>Type:</b> {beat.beat_type.name}</p>")
-        html.append(f"<p><b>Trailer:</b> {beat.start_s:.2f}s &rarr; {beat.end_s:.2f}s</p>")
-        
-        if res:
-            segments = list(getattr(res, "segments", ()) or [])
-            source_duration = sum(max(0.0, float(s.duration_s)) for s in segments)
-            if not segments:
-                source_duration = max(0.0, res.out_point_s - res.in_point_s)
-            preview_duration = min(beat.duration_s, source_duration) if source_duration > 0 else beat.duration_s
-            last_segment_end = max(
-                (float(s.trailer_offset_s) + float(s.duration_s) for s in segments),
-                default=preview_duration,
-            )
-            trailer_tail_s = max(0.0, beat.duration_s - last_segment_end)
-            if getattr(res, "is_confirmed", True):
-                html.append("<p class='status-match'>MATCHED</p>")
-            else:
-                html.append("<p style='color: #fbbf24; font-weight: bold; font-size: 1.1em;'>PROVISIONAL MATCH</p>")
-            html.append(f"<p><b>Scene ID:</b> {res.scene_id}</p>")
-            html.append(f"<p><b>Movie In:</b> {res.in_point_s:.2f}s</p>")
-            html.append(f"<p><b>Source Dur:</b> {source_duration:.2f}s</p>")
-            if len(segments) > 1:
-                html.append(f"<p><b>Segments:</b> {len(segments)} matched visual islands</p>")
-            if trailer_tail_s > 0:
-                html.append(f"<p><b>Unmatched Tail:</b> {trailer_tail_s:.2f}s placeholder</p>")
-            html.append(f"<p><b>Score:</b> <span class='score'>{res.match_score:.3f}</span></p>")
-            if trailer_tail_s > 0:
-                html.append("<p style='color: #fbbf24; font-size: 0.9em;'>Some trailer frames are still unmatched; report fills only those gaps with placeholder black.</p>")
-            
-            # Warn if score is low
-            if res.match_score < 0.80:
-                html.append("<p style='color: #fbbf24; font-size: 0.9em;'>⚠️ Score below 0.80. Verify visually.</p>")
-            
-            # Extract Source Clip
-            src_mp4 = report_dir / f"beat_{beat.beat_id:03d}_src.mp4"
-            compare_mp4 = report_dir / f"beat_{beat.beat_id:03d}_compare.mp4"
-            if segments:
-                _extract_segmented_clip(res.source_path, segments, beat.duration_s, src_mp4)
-            else:
-                _extract_clip_with_black_tail(
-                    res.source_path,
-                    res.in_point_s,
-                    preview_duration,
-                    beat.duration_s,
-                    src_mp4,
-                )
-            _build_frame_locked_compare(ref_mp4, src_mp4, compare_mp4)
-        else:
-            html.append("<p class='status-miss'>NO MATCH</p>")
-            src_mp4 = None
-            compare_mp4 = None
-            
-        html.append(f"<div class='code-hint'>python cli.py rematch --beat {beat.beat_id}</div>")
-        html.append("</div>") # /info
-        
-        # Video Panel
-        html.append("<div class='videos'>")
-        if compare_mp4:
-            html.append(f"<div class='compare'><p>Frame-Locked Compare</p><video src='{compare_mp4.name}' controls loop muted autoplay></video></div>")
-        else:
-            html.append("<div class='video-container'>")
-            html.append(f"<div class='video-col'><p>Reference Trailer</p><video src='{ref_mp4.name}' controls loop muted autoplay></video></div>")
-            html.append("<div class='video-col'><p>Matched Source</p><div style='width: 100%; aspect-ratio: 16/9; background: #222; display: flex; align-items: center; justify-content: center; border-radius: 6px; color: #555;'>No Match</div></div>")
-            html.append("</div>") # /video-container
-        html.append("</div>") # /videos
-        html.append("</div>") # /beat-row
-        
-    html.append("</body></html>")
-    
-    html_path.write_text("\n".join(html), encoding="utf-8")
-    return html_path