Initial project import

This commit is contained in:
Melbar
2026-05-02 09:07:41 +02:00
commit 8e1bcf142f
38 changed files with 7928 additions and 0 deletions
+3
View File
@@ -0,0 +1,3 @@
"""
src/pipeline/__init__.py — Orchestration layer
"""
+291
View File
@@ -0,0 +1,291 @@
"""
src/pipeline/matcher.py — Top-level CV matching orchestrator
This is the single entry point for the full 2-phase CV pipeline:
Phase 0: Load / build scene index (PySceneDetect + fingerprinting)
Phase 1: Vibe Check — histogram + pHash filter → Top-K candidates per beat
Phase 2: Deep Scan — template matching → frame-accurate MatchResult per beat
Usage:
from src.core.config import load_config
from src.pipeline.matcher import run_matching
cfg = load_config()
beats = [...] # list[TrailerBeat] from trailer analysis
results = run_matching(cfg, beats)
"""
from __future__ import annotations
import logging
from typing import Sequence
from src.core.config import AppConfig
from src.core.models import MatchResult, Scene, TrailerBeat
logger = logging.getLogger(__name__)
SeedPoint = float | tuple[float, float]
def _scene_seed_points(scene: Scene, max_points: int) -> list[float]:
if max_points <= 1 or scene.duration_s <= 0:
return [scene.start_s]
usable_end = max(scene.start_s, scene.end_s - 0.2)
if usable_end <= scene.start_s:
return [scene.start_s]
step = (usable_end - scene.start_s) / max(1, max_points - 1)
return [scene.start_s + step * idx for idx in range(max_points)]
def _build_scene_seed_in_points(
beats: Sequence[TrailerBeat],
scenes: Sequence[Scene],
cfg: AppConfig,
) -> dict[int, list[float]]:
from src.cv.vibe_check import run_vibe_check
scenes_by_id = {scene.scene_id: scene for scene in scenes}
seeds: dict[int, list[float]] = {}
for beat in beats:
hits = run_vibe_check(
beat,
scenes,
top_k=cfg.cv.deep_scan.scene_seed_top_k,
hist_method=cfg.cv.vibe_check.hist_compare_method,
phash_max_distance=64,
)
points: list[float] = []
for hit in hits:
scene = scenes_by_id.get(hit.scene_id)
if scene is None:
continue
points.extend(_scene_seed_points(scene, cfg.cv.deep_scan.scene_seed_points_per_scene))
if points:
seeds[beat.beat_id] = sorted({round(max(0.0, p), 3) for p in points})
logger.info(
"Beat %d: added %d scene-level seed candidates from %d source scenes.",
beat.beat_id,
len(seeds[beat.beat_id]),
len(hits),
)
return seeds
def _merge_seed_in_points(
*seed_maps: dict[int, Sequence[SeedPoint]] | None,
) -> dict[int, list[SeedPoint]]:
merged: dict[int, dict[float, float | None]] = {}
for seed_map in seed_maps:
if not seed_map:
continue
for beat_id, points in seed_map.items():
beat_points = merged.setdefault(beat_id, {})
for point in points:
if isinstance(point, tuple):
t_sec = round(max(0.0, float(point[0])), 3)
score = float(point[1])
else:
t_sec = round(max(0.0, float(point)), 3)
score = None
old_score = beat_points.get(t_sec)
if old_score is None:
beat_points[t_sec] = score
elif score is not None:
beat_points[t_sec] = max(old_score, score)
result: dict[int, list[SeedPoint]] = {}
for beat_id, points in merged.items():
result[beat_id] = [
(t_sec, score) if score is not None else t_sec
for t_sec, score in sorted(points.items())
]
return result
# ---------------------------------------------------------------------------
# Beat fingerprinting
# ---------------------------------------------------------------------------
def fingerprint_beats(
beats: Sequence[TrailerBeat],
cfg: AppConfig,
) -> list[TrailerBeat]:
"""
Enrich every TrailerBeat with its visual fingerprint (histogram + pHash).
Extracts the midpoint frame from the reference trailer and fingerprints it
using the same Text-Safe Crop parameters as the scene indexer.
Args:
beats: TrailerBeat list (fingerprints will be None initially).
cfg: Application configuration.
Returns:
New list of TrailerBeat objects with luma_hist, sat_hist, phash set.
"""
from dataclasses import replace
from src.cv.fingerprinting import fingerprint_frame
from src.cv.frame_extractor import grab_frame_at_path
vc_cfg = cfg.cv.vibe_check
enriched: list[TrailerBeat] = []
for beat in beats:
frame = grab_frame_at_path(beat.trailer_path, beat.midpoint_s)
if frame is None:
logger.warning("Beat %d: cannot decode midpoint frame, leaving unfingerpinted.", beat.beat_id)
enriched.append(beat)
continue
luma_b, sat_b, phash = fingerprint_frame(frame, vc_cfg)
enriched.append(replace(beat, luma_hist=luma_b, sat_hist=sat_b, phash=phash))
logger.info("Fingerprinted %d / %d beats.", sum(1 for b in enriched if b.phash), len(beats))
return enriched
# ---------------------------------------------------------------------------
# Main pipeline entry point
# ---------------------------------------------------------------------------
def run_matching(
cfg: AppConfig,
beats: Sequence[TrailerBeat],
force_reindex: bool = False,
seed_in_points: dict[int, Sequence[SeedPoint]] | None = None,
) -> list[MatchResult]:
"""
Execute the full 2-phase CV matching pipeline.
Args:
cfg: Application configuration (loaded from config.toml).
beats: All trailer beats to source (must have trailer_path set).
force_reindex: If True, ignore the scene cache and re-run PySceneDetect.
Returns:
List of MatchResult, one per beat (unmatched beats are omitted).
Results are in the same order as the input beats.
"""
from src.cv.scene_indexer import build_scene_index
logger.info("=" * 60)
logger.info("AI Trailer Generator v2 — CV Matching Pipeline")
logger.info("Source : %s", cfg.paths.source_movie.name)
logger.info("Trailer: %s", cfg.paths.reference_trailer.name)
logger.info("Beats : %d", len(beats))
logger.info("=" * 60)
# ------------------------------------------------------------------
# Phase 0: Scene index
# ------------------------------------------------------------------
logger.info("[Phase 0] Building scene index …")
scenes: list[Scene] = build_scene_index(cfg, force_reindex=force_reindex)
scenes_by_id: dict[int, Scene] = {s.scene_id: s for s in scenes}
logger.info("[Phase 0] %d scenes indexed.", len(scenes))
# ------------------------------------------------------------------
# Phase 0b: Fingerprint the beats
# ------------------------------------------------------------------
logger.info("[Phase 0b] Fingerprinting %d trailer beats …", len(beats))
beats = fingerprint_beats(beats, cfg)
# ------------------------------------------------------------------
# Phase 1 & 2: Global Scan (bypasses Scene Indexer / Vibe Check entirely)
# ------------------------------------------------------------------
logger.info("[Phase 1 & 2] Running FFmpeg Global Scan for %d beats ...", len(beats))
from src.cv.global_scan import run_global_scan
scene_seed_in_points = _build_scene_seed_in_points(beats, scenes, cfg)
vision_seed_in_points = {}
if cfg.vision.enabled:
try:
from src.llm.vision_cache import build_vision_seed_in_points
vision_seed_in_points = build_vision_seed_in_points(beats, scenes, cfg)
except Exception as exc:
logger.error("Vision seeding failed: %s — continuing with CV-only seeds.", exc)
results = run_global_scan(
beats,
cfg,
scenes=scenes,
seed_in_points=_merge_seed_in_points(seed_in_points, scene_seed_in_points, vision_seed_in_points),
)
logger.info("[Phase 1 & 2] Done. %d / %d beats matched.", len(results), len(beats))
logger.info("=" * 60)
return results
# ---------------------------------------------------------------------------
# Convenience: build an EditTimeline from match results
# ---------------------------------------------------------------------------
def build_timeline(
beats: Sequence[TrailerBeat],
results: Sequence[MatchResult],
cfg: AppConfig,
) -> "src.core.models.EditTimeline": # type: ignore[name-defined]
"""
Combine beats + match results into an ordered EditTimeline.
Unmatched beats are skipped; timeline positions are computed
sequentially from the usable source-match durations.
Args:
beats: All trailer beats (defines order + durations).
results: MatchResult list from run_matching().
cfg: Application configuration.
Returns:
EditTimeline ready for FCPXML / EDL export.
"""
from src.core.models import EditClip, EditTimeline
results_by_beat: dict[int, MatchResult] = {r.beat_id: r for r in results}
clips: list[EditClip] = []
cursor = 0.0
for beat in beats:
match = results_by_beat.get(beat.beat_id)
if match is None:
logger.warning("Beat %d has no match — gap in timeline.", beat.beat_id)
cursor += beat.duration_s
continue
match_duration = max(0.0, match.duration_s)
source_duration = min(beat.duration_s, match_duration) if match_duration > 0 else beat.duration_s
trailer_tail_s = max(0.0, beat.duration_s - source_duration)
if trailer_tail_s > 0:
logger.warning(
"Beat %d uses %.2fs source + %.2fs generated trailer tail.",
beat.beat_id,
source_duration,
trailer_tail_s,
)
clip = EditClip(
clip_index=len(clips),
beat=beat,
match=match,
timeline_start_s=cursor,
timeline_end_s=cursor + beat.duration_s,
source_duration_s=source_duration,
trailer_tail_s=trailer_tail_s,
)
clips.append(clip)
cursor += beat.duration_s
timeline = EditTimeline(
title=cfg.paths.reference_trailer.stem,
frame_rate=cfg.export.edl_frame_rate,
clips=tuple(clips),
)
logger.info(
"Timeline built: %d clips, total duration %.2fs",
timeline.clip_count, timeline.total_duration_s,
)
return timeline
+427
View File
@@ -0,0 +1,427 @@
"""
src/pipeline/reporter.py — Visual Match Report Generator
Generates an HTML file containing side-by-side video clips of:
Left: The original beat from the reference trailer
Right: The matched scene from the source movie
This allows instant visual verification of the CV pipeline's results.
"""
from __future__ import annotations
import logging
import subprocess
from pathlib import Path
from src.core.config import AppConfig
logger = logging.getLogger(__name__)
def _extract_clip(video_path: Path, start_s: float, duration_s: float, out_path: Path) -> None:
"""Use ffmpeg to extract a silent, low-res preview clip."""
out_path.parent.mkdir(parents=True, exist_ok=True)
# Fast input seek close to the target, then accurate output seek for
# frame-faithful preview clips. A plain "-ss before -i" can land on a
# nearby keyframe and make the report look several frames out of sync.
preroll_s = 2.0 if start_s >= 2.0 else 0.0
input_seek_s = max(0.0, start_s - preroll_s)
accurate_seek_s = start_s - input_seek_s
cmd = [
"ffmpeg", "-y", "-loglevel", "error",
"-ss", str(input_seek_s),
"-i", str(video_path),
"-ss", str(accurate_seek_s),
"-t", str(duration_s),
"-map", "0:v:0",
"-c:v", "libx264",
"-preset", "ultrafast",
"-crf", "28",
"-vf", "scale=640:-2", # scale down for lightweight report
"-an", # no audio
"-movflags", "+faststart",
str(out_path)
]
result = subprocess.run(cmd, capture_output=True)
if result.returncode != 0:
logger.error(
"ffmpeg clip extraction failed for %s:\n%s",
out_path.name, result.stderr.decode(errors="replace")
)
def _extract_clip_with_black_tail(
video_path: Path,
start_s: float,
source_duration_s: float,
total_duration_s: float,
out_path: Path,
) -> None:
"""Extract a source preview and append black frames for trailer-only tails."""
tail_s = max(0.0, total_duration_s - source_duration_s)
if tail_s <= 0.02:
_extract_clip(video_path, start_s, source_duration_s, out_path)
return
out_path.parent.mkdir(parents=True, exist_ok=True)
source_tmp = out_path.with_name(f"{out_path.stem}_source_tmp.mp4")
tail_tmp = out_path.with_name(f"{out_path.stem}_tail_tmp.mp4")
preroll_s = 2.0 if start_s >= 2.0 else 0.0
input_seek_s = max(0.0, start_s - preroll_s)
accurate_seek_s = start_s - input_seek_s
# First render the matched source portion with the same accurate seek path
# as _extract_clip(). Using trim=start=... after an input seek is brittle
# because FFmpeg may preserve non-zero packet timestamps around keyframes.
source_cmd = [
"ffmpeg", "-y", "-loglevel", "error",
"-ss", str(input_seek_s),
"-i", str(video_path),
"-ss", str(accurate_seek_s),
"-t", str(source_duration_s),
"-map", "0:v:0",
"-c:v", "libx264",
"-preset", "ultrafast",
"-crf", "28",
"-vf", "scale=640:360,setsar=1,fps=25,setpts=PTS-STARTPTS",
"-an",
"-movflags", "+faststart",
str(source_tmp),
]
result = subprocess.run(source_cmd, capture_output=True)
if result.returncode != 0:
logger.error(
"ffmpeg source preview extraction failed for %s:\n%s",
out_path.name,
result.stderr.decode(errors="replace"),
)
return
tail_cmd = [
"ffmpeg", "-y", "-loglevel", "error",
"-f", "lavfi",
"-i", f"color=c=black:s=640x360:r=25:d={tail_s}",
"-c:v", "libx264",
"-preset", "ultrafast",
"-crf", "28",
"-an",
"-movflags", "+faststart",
str(tail_tmp),
]
result = subprocess.run(tail_cmd, capture_output=True)
if result.returncode != 0:
logger.error(
"ffmpeg black tail render failed for %s:\n%s",
out_path.name,
result.stderr.decode(errors="replace"),
)
return
concat_cmd = [
"ffmpeg", "-y", "-loglevel", "error",
"-i", str(source_tmp),
"-i", str(tail_tmp),
"-filter_complex", "[0:v][1:v]concat=n=2:v=1:a=0[v]",
"-map", "[v]",
"-c:v", "libx264",
"-preset", "ultrafast",
"-crf", "28",
"-an",
"-movflags", "+faststart",
str(out_path),
]
result = subprocess.run(concat_cmd, capture_output=True)
if result.returncode != 0:
logger.error(
"ffmpeg tailed preview concat failed for %s:\n%s",
out_path.name,
result.stderr.decode(errors="replace"),
)
for tmp in (source_tmp, tail_tmp):
try:
tmp.unlink(missing_ok=True)
except OSError:
pass
def _extract_segmented_clip(
video_path: Path,
segments: list,
total_duration_s: float,
out_path: Path,
) -> None:
"""Render a beat-length source preview from multiple matched source islands."""
if not segments:
_extract_clip_with_black_tail(video_path, 0.0, 0.0, total_duration_s, out_path)
return
out_path.parent.mkdir(parents=True, exist_ok=True)
tmp_paths: list[Path] = []
cursor = 0.0
def add_black(duration_s: float) -> None:
if duration_s <= 0.02:
return
tmp = out_path.with_name(f"{out_path.stem}_part_{len(tmp_paths):03d}_black.mp4")
cmd = [
"ffmpeg", "-y", "-loglevel", "error",
"-f", "lavfi",
"-i", f"color=c=black:s=640x360:r=25:d={duration_s}",
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "28",
"-an", "-movflags", "+faststart",
str(tmp),
]
result = subprocess.run(cmd, capture_output=True)
if result.returncode == 0:
tmp_paths.append(tmp)
else:
logger.error("ffmpeg black segment render failed:\n%s", result.stderr.decode(errors="replace"))
def add_source(start_s: float, duration_s: float) -> None:
if duration_s <= 0.02:
return
tmp = out_path.with_name(f"{out_path.stem}_part_{len(tmp_paths):03d}_src.mp4")
preroll_s = 2.0 if start_s >= 2.0 else 0.0
input_seek_s = max(0.0, start_s - preroll_s)
accurate_seek_s = start_s - input_seek_s
cmd = [
"ffmpeg", "-y", "-loglevel", "error",
"-ss", str(input_seek_s),
"-i", str(video_path),
"-ss", str(accurate_seek_s),
"-t", str(duration_s),
"-map", "0:v:0",
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "28",
"-vf", "scale=640:360,setsar=1,fps=25,setpts=PTS-STARTPTS",
"-an", "-movflags", "+faststart",
str(tmp),
]
result = subprocess.run(cmd, capture_output=True)
if result.returncode == 0 and tmp.exists():
tmp_paths.append(tmp)
else:
logger.error("ffmpeg source segment render failed:\n%s", result.stderr.decode(errors="replace"))
for segment in sorted(segments, key=lambda s: s.trailer_offset_s):
offset_s = max(0.0, float(segment.trailer_offset_s))
duration_s = max(0.0, float(segment.duration_s))
add_black(offset_s - cursor)
add_source(float(segment.in_point_s), duration_s)
cursor = max(cursor, offset_s + duration_s)
add_black(total_duration_s - cursor)
if len(tmp_paths) == 1:
tmp_paths[0].replace(out_path)
return
inputs: list[str] = []
labels: list[str] = []
for idx, tmp in enumerate(tmp_paths):
inputs.extend(["-i", str(tmp)])
labels.append(f"[{idx}:v]")
filter_complex = "".join(labels) + f"concat=n={len(tmp_paths)}:v=1:a=0[v]"
cmd = [
"ffmpeg", "-y", "-loglevel", "error",
*inputs,
"-filter_complex", filter_complex,
"-map", "[v]",
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "28",
"-an", "-movflags", "+faststart",
str(out_path),
]
result = subprocess.run(cmd, capture_output=True)
if result.returncode != 0:
logger.error("ffmpeg segmented preview concat failed:\n%s", result.stderr.decode(errors="replace"))
for tmp in tmp_paths:
try:
tmp.unlink(missing_ok=True)
except OSError:
pass
def _build_frame_locked_compare(ref_path: Path, src_path: Path, out_path: Path) -> None:
"""Render reference and source into one side-by-side video stream."""
out_path.parent.mkdir(parents=True, exist_ok=True)
normalize = (
"fps=25,scale=640:360:force_original_aspect_ratio=decrease,"
"pad=640:360:(ow-iw)/2:(oh-ih)/2,setsar=1,setpts=PTS-STARTPTS"
)
filter_complex = (
f"[0:v]{normalize}[ref];"
f"[1:v]{normalize}[src];"
"[ref][src]hstack=inputs=2[v]"
)
cmd = [
"ffmpeg", "-y", "-loglevel", "error",
"-i", str(ref_path),
"-i", str(src_path),
"-filter_complex", filter_complex,
"-map", "[v]",
"-c:v", "libx264",
"-preset", "ultrafast",
"-crf", "28",
"-an",
"-movflags", "+faststart",
str(out_path),
]
result = subprocess.run(cmd, capture_output=True)
if result.returncode != 0:
logger.error(
"ffmpeg compare render failed for %s:\n%s",
out_path.name,
result.stderr.decode(errors="replace"),
)
def generate_report(beats: list, results: list, cfg: AppConfig) -> Path:
"""
Generate an HTML side-by-side report.
Returns the path to the .html file.
"""
report_dir = cfg.paths.output_dir / "report"
report_dir.mkdir(parents=True, exist_ok=True)
html_path = report_dir / "match_report.html"
results_by_beat = {r.beat_id: r for r in results}
logger.info("Generating report clips in %s (this might take a moment) ...", report_dir)
html = [
"<!DOCTYPE html>",
"<html><head><meta charset='utf-8'><title>AI Trailer Match Report</title>",
"<style>",
"body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background: #0f0f0f; color: #e0e0e0; margin: 40px; }",
"h1 { color: #fff; border-bottom: 1px solid #333; padding-bottom: 10px; }",
".stats { font-size: 1.2em; margin-bottom: 30px; color: #aaa; }",
".beat-row { display: flex; margin-bottom: 30px; background: #1a1a1a; padding: 20px; border-radius: 12px; border: 1px solid #333; }",
".info { width: 250px; padding-right: 20px; flex-shrink: 0; }",
".info h3 { margin-top: 0; color: #fff; }",
".video-container { display: flex; gap: 20px; flex-grow: 1; }",
".videos { flex-grow: 1; }",
".compare { margin-bottom: 18px; }",
".video-col { flex: 1; }",
".video-col p { margin-top: 0; font-weight: bold; color: #888; }",
"video { width: 100%; border-radius: 6px; box-shadow: 0 4px 6px rgba(0,0,0,0.5); background: #000; }",
".status-match { color: #4ade80; font-weight: bold; font-size: 1.1em; }",
".status-miss { color: #f87171; font-weight: bold; font-size: 1.1em; }",
".score { font-family: monospace; font-size: 1.1em; color: #60a5fa; }",
".code-hint { background: #000; padding: 10px; border-radius: 4px; font-family: monospace; font-size: 0.9em; margin-top: 15px; color: #a3e635; }",
"</style></head><body>",
f"<h1>AI Trailer Generator — Match Report</h1>",
f"<div class='stats'>Total Beats: {len(beats)} | Matched: {len(results)}</div>",
"<script>",
"function syncBeat(row) {",
" const vids = row.querySelectorAll('video');",
" if (vids.length < 2) return;",
" const ref = vids[0];",
" const src = vids[1];",
" let syncing = false;",
" function align() {",
" if (syncing) return;",
" syncing = true;",
" const target = Math.min(ref.currentTime, Math.max(0, (src.duration || ref.currentTime) - 0.02));",
" if (Math.abs(src.currentTime - target) > 0.035) src.currentTime = target;",
" if (ref.paused && !src.paused) src.pause();",
" if (!ref.paused && src.paused) src.play().catch(() => {});",
" syncing = false;",
" }",
" ref.addEventListener('play', () => { src.currentTime = Math.min(ref.currentTime, Math.max(0, (src.duration || ref.currentTime) - 0.02)); src.play().catch(() => {}); });",
" ref.addEventListener('pause', () => src.pause());",
" ref.addEventListener('seeked', () => { src.currentTime = Math.min(ref.currentTime, Math.max(0, (src.duration || ref.currentTime) - 0.02)); });",
" ref.addEventListener('timeupdate', align);",
"}",
"document.addEventListener('DOMContentLoaded', () => document.querySelectorAll('.beat-row').forEach(syncBeat));",
"</script>"
]
for beat in beats:
res = results_by_beat.get(beat.beat_id)
# Extract Reference Clip
ref_mp4 = report_dir / f"beat_{beat.beat_id:03d}_ref.mp4"
_extract_clip(beat.trailer_path, beat.start_s, beat.duration_s, ref_mp4)
html.append("<div class='beat-row'>")
# Info Panel
html.append("<div class='info'>")
html.append(f"<h3>Beat {beat.beat_id:03d}</h3>")
html.append(f"<p><b>Type:</b> {beat.beat_type.name}</p>")
html.append(f"<p><b>Trailer:</b> {beat.start_s:.2f}s &rarr; {beat.end_s:.2f}s</p>")
if res:
segments = list(getattr(res, "segments", ()) or [])
source_duration = sum(max(0.0, float(s.duration_s)) for s in segments)
if not segments:
source_duration = max(0.0, res.out_point_s - res.in_point_s)
preview_duration = min(beat.duration_s, source_duration) if source_duration > 0 else beat.duration_s
last_segment_end = max(
(float(s.trailer_offset_s) + float(s.duration_s) for s in segments),
default=preview_duration,
)
trailer_tail_s = max(0.0, beat.duration_s - last_segment_end)
if getattr(res, "is_confirmed", True):
html.append("<p class='status-match'>MATCHED</p>")
else:
html.append("<p style='color: #fbbf24; font-weight: bold; font-size: 1.1em;'>PROVISIONAL MATCH</p>")
html.append(f"<p><b>Scene ID:</b> {res.scene_id}</p>")
html.append(f"<p><b>Movie In:</b> {res.in_point_s:.2f}s</p>")
html.append(f"<p><b>Source Dur:</b> {source_duration:.2f}s</p>")
if len(segments) > 1:
html.append(f"<p><b>Segments:</b> {len(segments)} matched visual islands</p>")
if trailer_tail_s > 0:
html.append(f"<p><b>Unmatched Tail:</b> {trailer_tail_s:.2f}s placeholder</p>")
html.append(f"<p><b>Score:</b> <span class='score'>{res.match_score:.3f}</span></p>")
if trailer_tail_s > 0:
html.append("<p style='color: #fbbf24; font-size: 0.9em;'>Some trailer frames are still unmatched; report fills only those gaps with placeholder black.</p>")
# Warn if score is low
if res.match_score < 0.80:
html.append("<p style='color: #fbbf24; font-size: 0.9em;'>⚠️ Score below 0.80. Verify visually.</p>")
# Extract Source Clip
src_mp4 = report_dir / f"beat_{beat.beat_id:03d}_src.mp4"
compare_mp4 = report_dir / f"beat_{beat.beat_id:03d}_compare.mp4"
if segments:
_extract_segmented_clip(res.source_path, segments, beat.duration_s, src_mp4)
else:
_extract_clip_with_black_tail(
res.source_path,
res.in_point_s,
preview_duration,
beat.duration_s,
src_mp4,
)
_build_frame_locked_compare(ref_mp4, src_mp4, compare_mp4)
else:
html.append("<p class='status-miss'>NO MATCH</p>")
src_mp4 = None
compare_mp4 = None
html.append(f"<div class='code-hint'>python cli.py rematch --beat {beat.beat_id}</div>")
html.append("</div>") # /info
# Video Panel
html.append("<div class='videos'>")
if compare_mp4:
html.append(f"<div class='compare'><p>Frame-Locked Compare</p><video src='{compare_mp4.name}' controls loop muted autoplay></video></div>")
else:
html.append("<div class='video-container'>")
html.append(f"<div class='video-col'><p>Reference Trailer</p><video src='{ref_mp4.name}' controls loop muted autoplay></video></div>")
html.append("<div class='video-col'><p>Matched Source</p><div style='width: 100%; aspect-ratio: 16/9; background: #222; display: flex; align-items: center; justify-content: center; border-radius: 6px; color: #555;'>No Match</div></div>")
html.append("</div>") # /video-container
html.append("</div>") # /videos
html.append("</div>") # /beat-row
html.append("</body></html>")
html_path.write_text("\n".join(html), encoding="utf-8")
return html_path
+175
View File
@@ -0,0 +1,175 @@
"""
src/pipeline/trailer_analyzer.py — Reference trailer → list[TrailerBeat]
Responsibility:
1. Run PySceneDetect on the REFERENCE TRAILER (not the source movie)
to detect cut boundaries → raw beat intervals
2. Fingerprint the midpoint frame of each beat (for Vibe Check)
3. Transcribe dialogue per beat via Whisper (optional, injected)
4. Optionally classify BeatType via the LLM dramaturg (injected)
Returns: list[TrailerBeat] ready to feed into run_matching().
"""
from __future__ import annotations
import logging
from dataclasses import replace
from pathlib import Path
from typing import Callable, Sequence
from src.core.config import AppConfig
from src.core.models import BeatType, DialogueLine, TrailerBeat
from src.cv.fingerprinting import fingerprint_frame
from src.cv.frame_extractor import grab_midpoint_frame, open_video
logger = logging.getLogger(__name__)
# Injection type aliases — keeps this module free of hard audio/LLM imports
TranscribeCallback = Callable[[Path, float, float, float], list[DialogueLine]]
ClassifyCallback = Callable[[list[TrailerBeat]], list[TrailerBeat]]
# ---------------------------------------------------------------------------
# Step 1: Scene detection on the reference trailer
# ---------------------------------------------------------------------------
def _detect_trailer_beats(cfg: AppConfig) -> list[tuple[float, float, int, int]]:
"""
Run PySceneDetect on the reference trailer.
Returns list of (start_s, end_s, start_frame, end_frame).
Uses the same ContentDetector thresholds as the source movie.
"""
try:
from scenedetect import open_video as sd_open_video, SceneManager
from scenedetect.detectors import ContentDetector
except ImportError:
raise ImportError("pip install scenedetect[opencv]")
trailer_path = cfg.paths.reference_trailer
video = sd_open_video(str(trailer_path))
manager = SceneManager()
manager.add_detector(
ContentDetector(
threshold=cfg.scene_detection.content_threshold,
min_scene_len=int(
cfg.scene_detection.min_scene_duration_s * video.frame_rate
),
)
)
logger.info("Detecting beats in reference trailer: %s", trailer_path.name)
manager.detect_scenes(video=video, show_progress=False)
raw = manager.get_scene_list()
result = [
(s.get_seconds(), e.get_seconds(), s.get_frames(), e.get_frames())
for s, e in raw
]
logger.info("Detected %d beats in reference trailer.", len(result))
return result
# ---------------------------------------------------------------------------
# Step 2: Fingerprint beats
# ---------------------------------------------------------------------------
def _fingerprint_beats(
raw_beats: list[tuple[float, float, int, int]],
cfg: AppConfig,
) -> list[TrailerBeat]:
"""Extract midpoint frame for each beat and compute fingerprints."""
vc_cfg = cfg.cv.vibe_check
trailer_path = cfg.paths.reference_trailer
beats: list[TrailerBeat] = []
with open_video(trailer_path) as cap:
for idx, (start_s, end_s, start_frame, end_frame) in enumerate(raw_beats):
frame = grab_midpoint_frame(cap, start_s, end_s)
if frame is None:
logger.warning("Beat %d: midpoint frame decode failed.", idx)
beats.append(TrailerBeat(
beat_id=idx,
trailer_path=trailer_path,
start_s=start_s, end_s=end_s,
start_frame=start_frame, end_frame=end_frame,
))
continue
luma_b, sat_b, phash = fingerprint_frame(frame, vc_cfg)
beats.append(TrailerBeat(
beat_id=idx,
trailer_path=trailer_path,
start_s=start_s, end_s=end_s,
start_frame=start_frame, end_frame=end_frame,
luma_hist=luma_b,
sat_hist=sat_b,
phash=phash,
))
return beats
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def analyze_reference_trailer(
cfg: AppConfig,
transcribe_callback: TranscribeCallback | None = None,
classify_callback: ClassifyCallback | None = None,
) -> list[TrailerBeat]:
"""
Full reference-trailer analysis pipeline.
Args:
cfg: Application configuration.
transcribe_callback: Optional fn(path, start_s, end_s, offset_s)
→ list[DialogueLine]. Injected to keep this
module free of faster-whisper imports.
classify_callback: Optional fn(beats) → beats with BeatType set.
Injected to keep this module LLM-free.
Returns:
List of TrailerBeat objects with fingerprints (and optionally
dialogue + BeatType) populated.
"""
# Step 1 — cut detection
raw_beats = _detect_trailer_beats(cfg)
# Step 2 — fingerprint
beats = _fingerprint_beats(raw_beats, cfg)
# Step 3 — dialogue (optional)
if transcribe_callback is not None:
enriched: list[TrailerBeat] = []
for beat in beats:
try:
lines = transcribe_callback(
beat.trailer_path,
beat.start_s,
beat.end_s,
beat.start_s, # time_offset so timestamps are absolute
)
enriched.append(replace(beat, dialogue=tuple(lines)))
except Exception as exc:
logger.warning("Beat %d transcription failed: %s", beat.beat_id, exc)
enriched.append(beat)
beats = enriched
# Step 4 — LLM dramaturgy (optional)
if classify_callback is not None:
try:
beats = classify_callback(beats)
except Exception as exc:
logger.warning("Beat classification failed: %s — keeping UNKNOWN.", exc)
logger.info(
"Trailer analysis complete: %d beats, %d with dialogue, %d classified.",
len(beats),
sum(1 for b in beats if b.dialogue),
sum(1 for b in beats if b.beat_type != BeatType.UNKNOWN),
)
return beats