Initial project import
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
"""
|
||||
src/pipeline/__init__.py — Orchestration layer
|
||||
"""
|
||||
@@ -0,0 +1,291 @@
|
||||
"""
|
||||
src/pipeline/matcher.py — Top-level CV matching orchestrator
|
||||
|
||||
This is the single entry point for the full 2-phase CV pipeline:
|
||||
|
||||
Phase 0: Load / build scene index (PySceneDetect + fingerprinting)
|
||||
Phase 1: Vibe Check — histogram + pHash filter → Top-K candidates per beat
|
||||
Phase 2: Deep Scan — template matching → frame-accurate MatchResult per beat
|
||||
|
||||
Usage:
|
||||
from src.core.config import load_config
|
||||
from src.pipeline.matcher import run_matching
|
||||
|
||||
cfg = load_config()
|
||||
beats = [...] # list[TrailerBeat] from trailer analysis
|
||||
results = run_matching(cfg, beats)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Sequence
|
||||
|
||||
from src.core.config import AppConfig
|
||||
from src.core.models import MatchResult, Scene, TrailerBeat
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
SeedPoint = float | tuple[float, float]
|
||||
|
||||
|
||||
def _scene_seed_points(scene: Scene, max_points: int) -> list[float]:
|
||||
if max_points <= 1 or scene.duration_s <= 0:
|
||||
return [scene.start_s]
|
||||
usable_end = max(scene.start_s, scene.end_s - 0.2)
|
||||
if usable_end <= scene.start_s:
|
||||
return [scene.start_s]
|
||||
step = (usable_end - scene.start_s) / max(1, max_points - 1)
|
||||
return [scene.start_s + step * idx for idx in range(max_points)]
|
||||
|
||||
|
||||
def _build_scene_seed_in_points(
|
||||
beats: Sequence[TrailerBeat],
|
||||
scenes: Sequence[Scene],
|
||||
cfg: AppConfig,
|
||||
) -> dict[int, list[float]]:
|
||||
from src.cv.vibe_check import run_vibe_check
|
||||
|
||||
scenes_by_id = {scene.scene_id: scene for scene in scenes}
|
||||
seeds: dict[int, list[float]] = {}
|
||||
for beat in beats:
|
||||
hits = run_vibe_check(
|
||||
beat,
|
||||
scenes,
|
||||
top_k=cfg.cv.deep_scan.scene_seed_top_k,
|
||||
hist_method=cfg.cv.vibe_check.hist_compare_method,
|
||||
phash_max_distance=64,
|
||||
)
|
||||
points: list[float] = []
|
||||
for hit in hits:
|
||||
scene = scenes_by_id.get(hit.scene_id)
|
||||
if scene is None:
|
||||
continue
|
||||
points.extend(_scene_seed_points(scene, cfg.cv.deep_scan.scene_seed_points_per_scene))
|
||||
if points:
|
||||
seeds[beat.beat_id] = sorted({round(max(0.0, p), 3) for p in points})
|
||||
logger.info(
|
||||
"Beat %d: added %d scene-level seed candidates from %d source scenes.",
|
||||
beat.beat_id,
|
||||
len(seeds[beat.beat_id]),
|
||||
len(hits),
|
||||
)
|
||||
return seeds
|
||||
|
||||
|
||||
def _merge_seed_in_points(
|
||||
*seed_maps: dict[int, Sequence[SeedPoint]] | None,
|
||||
) -> dict[int, list[SeedPoint]]:
|
||||
merged: dict[int, dict[float, float | None]] = {}
|
||||
for seed_map in seed_maps:
|
||||
if not seed_map:
|
||||
continue
|
||||
for beat_id, points in seed_map.items():
|
||||
beat_points = merged.setdefault(beat_id, {})
|
||||
for point in points:
|
||||
if isinstance(point, tuple):
|
||||
t_sec = round(max(0.0, float(point[0])), 3)
|
||||
score = float(point[1])
|
||||
else:
|
||||
t_sec = round(max(0.0, float(point)), 3)
|
||||
score = None
|
||||
old_score = beat_points.get(t_sec)
|
||||
if old_score is None:
|
||||
beat_points[t_sec] = score
|
||||
elif score is not None:
|
||||
beat_points[t_sec] = max(old_score, score)
|
||||
|
||||
result: dict[int, list[SeedPoint]] = {}
|
||||
for beat_id, points in merged.items():
|
||||
result[beat_id] = [
|
||||
(t_sec, score) if score is not None else t_sec
|
||||
for t_sec, score in sorted(points.items())
|
||||
]
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Beat fingerprinting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def fingerprint_beats(
|
||||
beats: Sequence[TrailerBeat],
|
||||
cfg: AppConfig,
|
||||
) -> list[TrailerBeat]:
|
||||
"""
|
||||
Enrich every TrailerBeat with its visual fingerprint (histogram + pHash).
|
||||
|
||||
Extracts the midpoint frame from the reference trailer and fingerprints it
|
||||
using the same Text-Safe Crop parameters as the scene indexer.
|
||||
|
||||
Args:
|
||||
beats: TrailerBeat list (fingerprints will be None initially).
|
||||
cfg: Application configuration.
|
||||
|
||||
Returns:
|
||||
New list of TrailerBeat objects with luma_hist, sat_hist, phash set.
|
||||
"""
|
||||
from dataclasses import replace
|
||||
from src.cv.fingerprinting import fingerprint_frame
|
||||
from src.cv.frame_extractor import grab_frame_at_path
|
||||
|
||||
vc_cfg = cfg.cv.vibe_check
|
||||
enriched: list[TrailerBeat] = []
|
||||
|
||||
for beat in beats:
|
||||
frame = grab_frame_at_path(beat.trailer_path, beat.midpoint_s)
|
||||
if frame is None:
|
||||
logger.warning("Beat %d: cannot decode midpoint frame, leaving unfingerpinted.", beat.beat_id)
|
||||
enriched.append(beat)
|
||||
continue
|
||||
|
||||
luma_b, sat_b, phash = fingerprint_frame(frame, vc_cfg)
|
||||
enriched.append(replace(beat, luma_hist=luma_b, sat_hist=sat_b, phash=phash))
|
||||
|
||||
logger.info("Fingerprinted %d / %d beats.", sum(1 for b in enriched if b.phash), len(beats))
|
||||
return enriched
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main pipeline entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def run_matching(
|
||||
cfg: AppConfig,
|
||||
beats: Sequence[TrailerBeat],
|
||||
force_reindex: bool = False,
|
||||
seed_in_points: dict[int, Sequence[SeedPoint]] | None = None,
|
||||
) -> list[MatchResult]:
|
||||
"""
|
||||
Execute the full 2-phase CV matching pipeline.
|
||||
|
||||
Args:
|
||||
cfg: Application configuration (loaded from config.toml).
|
||||
beats: All trailer beats to source (must have trailer_path set).
|
||||
force_reindex: If True, ignore the scene cache and re-run PySceneDetect.
|
||||
|
||||
Returns:
|
||||
List of MatchResult, one per beat (unmatched beats are omitted).
|
||||
Results are in the same order as the input beats.
|
||||
"""
|
||||
from src.cv.scene_indexer import build_scene_index
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("AI Trailer Generator v2 — CV Matching Pipeline")
|
||||
logger.info("Source : %s", cfg.paths.source_movie.name)
|
||||
logger.info("Trailer: %s", cfg.paths.reference_trailer.name)
|
||||
logger.info("Beats : %d", len(beats))
|
||||
logger.info("=" * 60)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Phase 0: Scene index
|
||||
# ------------------------------------------------------------------
|
||||
logger.info("[Phase 0] Building scene index …")
|
||||
scenes: list[Scene] = build_scene_index(cfg, force_reindex=force_reindex)
|
||||
scenes_by_id: dict[int, Scene] = {s.scene_id: s for s in scenes}
|
||||
logger.info("[Phase 0] %d scenes indexed.", len(scenes))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Phase 0b: Fingerprint the beats
|
||||
# ------------------------------------------------------------------
|
||||
logger.info("[Phase 0b] Fingerprinting %d trailer beats …", len(beats))
|
||||
beats = fingerprint_beats(beats, cfg)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Phase 1 & 2: Global Scan (bypasses Scene Indexer / Vibe Check entirely)
|
||||
# ------------------------------------------------------------------
|
||||
logger.info("[Phase 1 & 2] Running FFmpeg Global Scan for %d beats ...", len(beats))
|
||||
from src.cv.global_scan import run_global_scan
|
||||
|
||||
scene_seed_in_points = _build_scene_seed_in_points(beats, scenes, cfg)
|
||||
vision_seed_in_points = {}
|
||||
if cfg.vision.enabled:
|
||||
try:
|
||||
from src.llm.vision_cache import build_vision_seed_in_points
|
||||
|
||||
vision_seed_in_points = build_vision_seed_in_points(beats, scenes, cfg)
|
||||
except Exception as exc:
|
||||
logger.error("Vision seeding failed: %s — continuing with CV-only seeds.", exc)
|
||||
results = run_global_scan(
|
||||
beats,
|
||||
cfg,
|
||||
scenes=scenes,
|
||||
seed_in_points=_merge_seed_in_points(seed_in_points, scene_seed_in_points, vision_seed_in_points),
|
||||
)
|
||||
|
||||
logger.info("[Phase 1 & 2] Done. %d / %d beats matched.", len(results), len(beats))
|
||||
logger.info("=" * 60)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Convenience: build an EditTimeline from match results
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_timeline(
|
||||
beats: Sequence[TrailerBeat],
|
||||
results: Sequence[MatchResult],
|
||||
cfg: AppConfig,
|
||||
) -> "src.core.models.EditTimeline": # type: ignore[name-defined]
|
||||
"""
|
||||
Combine beats + match results into an ordered EditTimeline.
|
||||
|
||||
Unmatched beats are skipped; timeline positions are computed
|
||||
sequentially from the usable source-match durations.
|
||||
|
||||
Args:
|
||||
beats: All trailer beats (defines order + durations).
|
||||
results: MatchResult list from run_matching().
|
||||
cfg: Application configuration.
|
||||
|
||||
Returns:
|
||||
EditTimeline ready for FCPXML / EDL export.
|
||||
"""
|
||||
from src.core.models import EditClip, EditTimeline
|
||||
|
||||
results_by_beat: dict[int, MatchResult] = {r.beat_id: r for r in results}
|
||||
|
||||
clips: list[EditClip] = []
|
||||
cursor = 0.0
|
||||
|
||||
for beat in beats:
|
||||
match = results_by_beat.get(beat.beat_id)
|
||||
if match is None:
|
||||
logger.warning("Beat %d has no match — gap in timeline.", beat.beat_id)
|
||||
cursor += beat.duration_s
|
||||
continue
|
||||
|
||||
match_duration = max(0.0, match.duration_s)
|
||||
source_duration = min(beat.duration_s, match_duration) if match_duration > 0 else beat.duration_s
|
||||
trailer_tail_s = max(0.0, beat.duration_s - source_duration)
|
||||
if trailer_tail_s > 0:
|
||||
logger.warning(
|
||||
"Beat %d uses %.2fs source + %.2fs generated trailer tail.",
|
||||
beat.beat_id,
|
||||
source_duration,
|
||||
trailer_tail_s,
|
||||
)
|
||||
|
||||
clip = EditClip(
|
||||
clip_index=len(clips),
|
||||
beat=beat,
|
||||
match=match,
|
||||
timeline_start_s=cursor,
|
||||
timeline_end_s=cursor + beat.duration_s,
|
||||
source_duration_s=source_duration,
|
||||
trailer_tail_s=trailer_tail_s,
|
||||
)
|
||||
clips.append(clip)
|
||||
cursor += beat.duration_s
|
||||
|
||||
timeline = EditTimeline(
|
||||
title=cfg.paths.reference_trailer.stem,
|
||||
frame_rate=cfg.export.edl_frame_rate,
|
||||
clips=tuple(clips),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Timeline built: %d clips, total duration %.2fs",
|
||||
timeline.clip_count, timeline.total_duration_s,
|
||||
)
|
||||
return timeline
|
||||
@@ -0,0 +1,427 @@
|
||||
"""
|
||||
src/pipeline/reporter.py — Visual Match Report Generator
|
||||
|
||||
Generates an HTML file containing side-by-side video clips of:
|
||||
Left: The original beat from the reference trailer
|
||||
Right: The matched scene from the source movie
|
||||
|
||||
This allows instant visual verification of the CV pipeline's results.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from src.core.config import AppConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _extract_clip(video_path: Path, start_s: float, duration_s: float, out_path: Path) -> None:
|
||||
"""Use ffmpeg to extract a silent, low-res preview clip."""
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Fast input seek close to the target, then accurate output seek for
|
||||
# frame-faithful preview clips. A plain "-ss before -i" can land on a
|
||||
# nearby keyframe and make the report look several frames out of sync.
|
||||
preroll_s = 2.0 if start_s >= 2.0 else 0.0
|
||||
input_seek_s = max(0.0, start_s - preroll_s)
|
||||
accurate_seek_s = start_s - input_seek_s
|
||||
|
||||
cmd = [
|
||||
"ffmpeg", "-y", "-loglevel", "error",
|
||||
"-ss", str(input_seek_s),
|
||||
"-i", str(video_path),
|
||||
"-ss", str(accurate_seek_s),
|
||||
"-t", str(duration_s),
|
||||
"-map", "0:v:0",
|
||||
"-c:v", "libx264",
|
||||
"-preset", "ultrafast",
|
||||
"-crf", "28",
|
||||
"-vf", "scale=640:-2", # scale down for lightweight report
|
||||
"-an", # no audio
|
||||
"-movflags", "+faststart",
|
||||
str(out_path)
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True)
|
||||
if result.returncode != 0:
|
||||
logger.error(
|
||||
"ffmpeg clip extraction failed for %s:\n%s",
|
||||
out_path.name, result.stderr.decode(errors="replace")
|
||||
)
|
||||
|
||||
|
||||
def _extract_clip_with_black_tail(
|
||||
video_path: Path,
|
||||
start_s: float,
|
||||
source_duration_s: float,
|
||||
total_duration_s: float,
|
||||
out_path: Path,
|
||||
) -> None:
|
||||
"""Extract a source preview and append black frames for trailer-only tails."""
|
||||
tail_s = max(0.0, total_duration_s - source_duration_s)
|
||||
if tail_s <= 0.02:
|
||||
_extract_clip(video_path, start_s, source_duration_s, out_path)
|
||||
return
|
||||
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
source_tmp = out_path.with_name(f"{out_path.stem}_source_tmp.mp4")
|
||||
tail_tmp = out_path.with_name(f"{out_path.stem}_tail_tmp.mp4")
|
||||
preroll_s = 2.0 if start_s >= 2.0 else 0.0
|
||||
input_seek_s = max(0.0, start_s - preroll_s)
|
||||
accurate_seek_s = start_s - input_seek_s
|
||||
|
||||
# First render the matched source portion with the same accurate seek path
|
||||
# as _extract_clip(). Using trim=start=... after an input seek is brittle
|
||||
# because FFmpeg may preserve non-zero packet timestamps around keyframes.
|
||||
source_cmd = [
|
||||
"ffmpeg", "-y", "-loglevel", "error",
|
||||
"-ss", str(input_seek_s),
|
||||
"-i", str(video_path),
|
||||
"-ss", str(accurate_seek_s),
|
||||
"-t", str(source_duration_s),
|
||||
"-map", "0:v:0",
|
||||
"-c:v", "libx264",
|
||||
"-preset", "ultrafast",
|
||||
"-crf", "28",
|
||||
"-vf", "scale=640:360,setsar=1,fps=25,setpts=PTS-STARTPTS",
|
||||
"-an",
|
||||
"-movflags", "+faststart",
|
||||
str(source_tmp),
|
||||
]
|
||||
|
||||
result = subprocess.run(source_cmd, capture_output=True)
|
||||
if result.returncode != 0:
|
||||
logger.error(
|
||||
"ffmpeg source preview extraction failed for %s:\n%s",
|
||||
out_path.name,
|
||||
result.stderr.decode(errors="replace"),
|
||||
)
|
||||
return
|
||||
|
||||
tail_cmd = [
|
||||
"ffmpeg", "-y", "-loglevel", "error",
|
||||
"-f", "lavfi",
|
||||
"-i", f"color=c=black:s=640x360:r=25:d={tail_s}",
|
||||
"-c:v", "libx264",
|
||||
"-preset", "ultrafast",
|
||||
"-crf", "28",
|
||||
"-an",
|
||||
"-movflags", "+faststart",
|
||||
str(tail_tmp),
|
||||
]
|
||||
result = subprocess.run(tail_cmd, capture_output=True)
|
||||
if result.returncode != 0:
|
||||
logger.error(
|
||||
"ffmpeg black tail render failed for %s:\n%s",
|
||||
out_path.name,
|
||||
result.stderr.decode(errors="replace"),
|
||||
)
|
||||
return
|
||||
|
||||
concat_cmd = [
|
||||
"ffmpeg", "-y", "-loglevel", "error",
|
||||
"-i", str(source_tmp),
|
||||
"-i", str(tail_tmp),
|
||||
"-filter_complex", "[0:v][1:v]concat=n=2:v=1:a=0[v]",
|
||||
"-map", "[v]",
|
||||
"-c:v", "libx264",
|
||||
"-preset", "ultrafast",
|
||||
"-crf", "28",
|
||||
"-an",
|
||||
"-movflags", "+faststart",
|
||||
str(out_path),
|
||||
]
|
||||
result = subprocess.run(concat_cmd, capture_output=True)
|
||||
if result.returncode != 0:
|
||||
logger.error(
|
||||
"ffmpeg tailed preview concat failed for %s:\n%s",
|
||||
out_path.name,
|
||||
result.stderr.decode(errors="replace"),
|
||||
)
|
||||
|
||||
for tmp in (source_tmp, tail_tmp):
|
||||
try:
|
||||
tmp.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _extract_segmented_clip(
|
||||
video_path: Path,
|
||||
segments: list,
|
||||
total_duration_s: float,
|
||||
out_path: Path,
|
||||
) -> None:
|
||||
"""Render a beat-length source preview from multiple matched source islands."""
|
||||
if not segments:
|
||||
_extract_clip_with_black_tail(video_path, 0.0, 0.0, total_duration_s, out_path)
|
||||
return
|
||||
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp_paths: list[Path] = []
|
||||
cursor = 0.0
|
||||
|
||||
def add_black(duration_s: float) -> None:
|
||||
if duration_s <= 0.02:
|
||||
return
|
||||
tmp = out_path.with_name(f"{out_path.stem}_part_{len(tmp_paths):03d}_black.mp4")
|
||||
cmd = [
|
||||
"ffmpeg", "-y", "-loglevel", "error",
|
||||
"-f", "lavfi",
|
||||
"-i", f"color=c=black:s=640x360:r=25:d={duration_s}",
|
||||
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "28",
|
||||
"-an", "-movflags", "+faststart",
|
||||
str(tmp),
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True)
|
||||
if result.returncode == 0:
|
||||
tmp_paths.append(tmp)
|
||||
else:
|
||||
logger.error("ffmpeg black segment render failed:\n%s", result.stderr.decode(errors="replace"))
|
||||
|
||||
def add_source(start_s: float, duration_s: float) -> None:
|
||||
if duration_s <= 0.02:
|
||||
return
|
||||
tmp = out_path.with_name(f"{out_path.stem}_part_{len(tmp_paths):03d}_src.mp4")
|
||||
preroll_s = 2.0 if start_s >= 2.0 else 0.0
|
||||
input_seek_s = max(0.0, start_s - preroll_s)
|
||||
accurate_seek_s = start_s - input_seek_s
|
||||
cmd = [
|
||||
"ffmpeg", "-y", "-loglevel", "error",
|
||||
"-ss", str(input_seek_s),
|
||||
"-i", str(video_path),
|
||||
"-ss", str(accurate_seek_s),
|
||||
"-t", str(duration_s),
|
||||
"-map", "0:v:0",
|
||||
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "28",
|
||||
"-vf", "scale=640:360,setsar=1,fps=25,setpts=PTS-STARTPTS",
|
||||
"-an", "-movflags", "+faststart",
|
||||
str(tmp),
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True)
|
||||
if result.returncode == 0 and tmp.exists():
|
||||
tmp_paths.append(tmp)
|
||||
else:
|
||||
logger.error("ffmpeg source segment render failed:\n%s", result.stderr.decode(errors="replace"))
|
||||
|
||||
for segment in sorted(segments, key=lambda s: s.trailer_offset_s):
|
||||
offset_s = max(0.0, float(segment.trailer_offset_s))
|
||||
duration_s = max(0.0, float(segment.duration_s))
|
||||
add_black(offset_s - cursor)
|
||||
add_source(float(segment.in_point_s), duration_s)
|
||||
cursor = max(cursor, offset_s + duration_s)
|
||||
|
||||
add_black(total_duration_s - cursor)
|
||||
|
||||
if len(tmp_paths) == 1:
|
||||
tmp_paths[0].replace(out_path)
|
||||
return
|
||||
|
||||
inputs: list[str] = []
|
||||
labels: list[str] = []
|
||||
for idx, tmp in enumerate(tmp_paths):
|
||||
inputs.extend(["-i", str(tmp)])
|
||||
labels.append(f"[{idx}:v]")
|
||||
filter_complex = "".join(labels) + f"concat=n={len(tmp_paths)}:v=1:a=0[v]"
|
||||
cmd = [
|
||||
"ffmpeg", "-y", "-loglevel", "error",
|
||||
*inputs,
|
||||
"-filter_complex", filter_complex,
|
||||
"-map", "[v]",
|
||||
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "28",
|
||||
"-an", "-movflags", "+faststart",
|
||||
str(out_path),
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True)
|
||||
if result.returncode != 0:
|
||||
logger.error("ffmpeg segmented preview concat failed:\n%s", result.stderr.decode(errors="replace"))
|
||||
|
||||
for tmp in tmp_paths:
|
||||
try:
|
||||
tmp.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _build_frame_locked_compare(ref_path: Path, src_path: Path, out_path: Path) -> None:
|
||||
"""Render reference and source into one side-by-side video stream."""
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
normalize = (
|
||||
"fps=25,scale=640:360:force_original_aspect_ratio=decrease,"
|
||||
"pad=640:360:(ow-iw)/2:(oh-ih)/2,setsar=1,setpts=PTS-STARTPTS"
|
||||
)
|
||||
filter_complex = (
|
||||
f"[0:v]{normalize}[ref];"
|
||||
f"[1:v]{normalize}[src];"
|
||||
"[ref][src]hstack=inputs=2[v]"
|
||||
)
|
||||
cmd = [
|
||||
"ffmpeg", "-y", "-loglevel", "error",
|
||||
"-i", str(ref_path),
|
||||
"-i", str(src_path),
|
||||
"-filter_complex", filter_complex,
|
||||
"-map", "[v]",
|
||||
"-c:v", "libx264",
|
||||
"-preset", "ultrafast",
|
||||
"-crf", "28",
|
||||
"-an",
|
||||
"-movflags", "+faststart",
|
||||
str(out_path),
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True)
|
||||
if result.returncode != 0:
|
||||
logger.error(
|
||||
"ffmpeg compare render failed for %s:\n%s",
|
||||
out_path.name,
|
||||
result.stderr.decode(errors="replace"),
|
||||
)
|
||||
|
||||
|
||||
def generate_report(beats: list, results: list, cfg: AppConfig) -> Path:
|
||||
"""
|
||||
Generate an HTML side-by-side report.
|
||||
Returns the path to the .html file.
|
||||
"""
|
||||
report_dir = cfg.paths.output_dir / "report"
|
||||
report_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
html_path = report_dir / "match_report.html"
|
||||
results_by_beat = {r.beat_id: r for r in results}
|
||||
|
||||
logger.info("Generating report clips in %s (this might take a moment) ...", report_dir)
|
||||
|
||||
html = [
|
||||
"<!DOCTYPE html>",
|
||||
"<html><head><meta charset='utf-8'><title>AI Trailer Match Report</title>",
|
||||
"<style>",
|
||||
"body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background: #0f0f0f; color: #e0e0e0; margin: 40px; }",
|
||||
"h1 { color: #fff; border-bottom: 1px solid #333; padding-bottom: 10px; }",
|
||||
".stats { font-size: 1.2em; margin-bottom: 30px; color: #aaa; }",
|
||||
".beat-row { display: flex; margin-bottom: 30px; background: #1a1a1a; padding: 20px; border-radius: 12px; border: 1px solid #333; }",
|
||||
".info { width: 250px; padding-right: 20px; flex-shrink: 0; }",
|
||||
".info h3 { margin-top: 0; color: #fff; }",
|
||||
".video-container { display: flex; gap: 20px; flex-grow: 1; }",
|
||||
".videos { flex-grow: 1; }",
|
||||
".compare { margin-bottom: 18px; }",
|
||||
".video-col { flex: 1; }",
|
||||
".video-col p { margin-top: 0; font-weight: bold; color: #888; }",
|
||||
"video { width: 100%; border-radius: 6px; box-shadow: 0 4px 6px rgba(0,0,0,0.5); background: #000; }",
|
||||
".status-match { color: #4ade80; font-weight: bold; font-size: 1.1em; }",
|
||||
".status-miss { color: #f87171; font-weight: bold; font-size: 1.1em; }",
|
||||
".score { font-family: monospace; font-size: 1.1em; color: #60a5fa; }",
|
||||
".code-hint { background: #000; padding: 10px; border-radius: 4px; font-family: monospace; font-size: 0.9em; margin-top: 15px; color: #a3e635; }",
|
||||
"</style></head><body>",
|
||||
f"<h1>AI Trailer Generator — Match Report</h1>",
|
||||
f"<div class='stats'>Total Beats: {len(beats)} | Matched: {len(results)}</div>",
|
||||
"<script>",
|
||||
"function syncBeat(row) {",
|
||||
" const vids = row.querySelectorAll('video');",
|
||||
" if (vids.length < 2) return;",
|
||||
" const ref = vids[0];",
|
||||
" const src = vids[1];",
|
||||
" let syncing = false;",
|
||||
" function align() {",
|
||||
" if (syncing) return;",
|
||||
" syncing = true;",
|
||||
" const target = Math.min(ref.currentTime, Math.max(0, (src.duration || ref.currentTime) - 0.02));",
|
||||
" if (Math.abs(src.currentTime - target) > 0.035) src.currentTime = target;",
|
||||
" if (ref.paused && !src.paused) src.pause();",
|
||||
" if (!ref.paused && src.paused) src.play().catch(() => {});",
|
||||
" syncing = false;",
|
||||
" }",
|
||||
" ref.addEventListener('play', () => { src.currentTime = Math.min(ref.currentTime, Math.max(0, (src.duration || ref.currentTime) - 0.02)); src.play().catch(() => {}); });",
|
||||
" ref.addEventListener('pause', () => src.pause());",
|
||||
" ref.addEventListener('seeked', () => { src.currentTime = Math.min(ref.currentTime, Math.max(0, (src.duration || ref.currentTime) - 0.02)); });",
|
||||
" ref.addEventListener('timeupdate', align);",
|
||||
"}",
|
||||
"document.addEventListener('DOMContentLoaded', () => document.querySelectorAll('.beat-row').forEach(syncBeat));",
|
||||
"</script>"
|
||||
]
|
||||
|
||||
for beat in beats:
|
||||
res = results_by_beat.get(beat.beat_id)
|
||||
|
||||
# Extract Reference Clip
|
||||
ref_mp4 = report_dir / f"beat_{beat.beat_id:03d}_ref.mp4"
|
||||
_extract_clip(beat.trailer_path, beat.start_s, beat.duration_s, ref_mp4)
|
||||
|
||||
html.append("<div class='beat-row'>")
|
||||
|
||||
# Info Panel
|
||||
html.append("<div class='info'>")
|
||||
html.append(f"<h3>Beat {beat.beat_id:03d}</h3>")
|
||||
html.append(f"<p><b>Type:</b> {beat.beat_type.name}</p>")
|
||||
html.append(f"<p><b>Trailer:</b> {beat.start_s:.2f}s → {beat.end_s:.2f}s</p>")
|
||||
|
||||
if res:
|
||||
segments = list(getattr(res, "segments", ()) or [])
|
||||
source_duration = sum(max(0.0, float(s.duration_s)) for s in segments)
|
||||
if not segments:
|
||||
source_duration = max(0.0, res.out_point_s - res.in_point_s)
|
||||
preview_duration = min(beat.duration_s, source_duration) if source_duration > 0 else beat.duration_s
|
||||
last_segment_end = max(
|
||||
(float(s.trailer_offset_s) + float(s.duration_s) for s in segments),
|
||||
default=preview_duration,
|
||||
)
|
||||
trailer_tail_s = max(0.0, beat.duration_s - last_segment_end)
|
||||
if getattr(res, "is_confirmed", True):
|
||||
html.append("<p class='status-match'>MATCHED</p>")
|
||||
else:
|
||||
html.append("<p style='color: #fbbf24; font-weight: bold; font-size: 1.1em;'>PROVISIONAL MATCH</p>")
|
||||
html.append(f"<p><b>Scene ID:</b> {res.scene_id}</p>")
|
||||
html.append(f"<p><b>Movie In:</b> {res.in_point_s:.2f}s</p>")
|
||||
html.append(f"<p><b>Source Dur:</b> {source_duration:.2f}s</p>")
|
||||
if len(segments) > 1:
|
||||
html.append(f"<p><b>Segments:</b> {len(segments)} matched visual islands</p>")
|
||||
if trailer_tail_s > 0:
|
||||
html.append(f"<p><b>Unmatched Tail:</b> {trailer_tail_s:.2f}s placeholder</p>")
|
||||
html.append(f"<p><b>Score:</b> <span class='score'>{res.match_score:.3f}</span></p>")
|
||||
if trailer_tail_s > 0:
|
||||
html.append("<p style='color: #fbbf24; font-size: 0.9em;'>Some trailer frames are still unmatched; report fills only those gaps with placeholder black.</p>")
|
||||
|
||||
# Warn if score is low
|
||||
if res.match_score < 0.80:
|
||||
html.append("<p style='color: #fbbf24; font-size: 0.9em;'>⚠️ Score below 0.80. Verify visually.</p>")
|
||||
|
||||
# Extract Source Clip
|
||||
src_mp4 = report_dir / f"beat_{beat.beat_id:03d}_src.mp4"
|
||||
compare_mp4 = report_dir / f"beat_{beat.beat_id:03d}_compare.mp4"
|
||||
if segments:
|
||||
_extract_segmented_clip(res.source_path, segments, beat.duration_s, src_mp4)
|
||||
else:
|
||||
_extract_clip_with_black_tail(
|
||||
res.source_path,
|
||||
res.in_point_s,
|
||||
preview_duration,
|
||||
beat.duration_s,
|
||||
src_mp4,
|
||||
)
|
||||
_build_frame_locked_compare(ref_mp4, src_mp4, compare_mp4)
|
||||
else:
|
||||
html.append("<p class='status-miss'>NO MATCH</p>")
|
||||
src_mp4 = None
|
||||
compare_mp4 = None
|
||||
|
||||
html.append(f"<div class='code-hint'>python cli.py rematch --beat {beat.beat_id}</div>")
|
||||
html.append("</div>") # /info
|
||||
|
||||
# Video Panel
|
||||
html.append("<div class='videos'>")
|
||||
if compare_mp4:
|
||||
html.append(f"<div class='compare'><p>Frame-Locked Compare</p><video src='{compare_mp4.name}' controls loop muted autoplay></video></div>")
|
||||
else:
|
||||
html.append("<div class='video-container'>")
|
||||
html.append(f"<div class='video-col'><p>Reference Trailer</p><video src='{ref_mp4.name}' controls loop muted autoplay></video></div>")
|
||||
html.append("<div class='video-col'><p>Matched Source</p><div style='width: 100%; aspect-ratio: 16/9; background: #222; display: flex; align-items: center; justify-content: center; border-radius: 6px; color: #555;'>No Match</div></div>")
|
||||
html.append("</div>") # /video-container
|
||||
html.append("</div>") # /videos
|
||||
html.append("</div>") # /beat-row
|
||||
|
||||
html.append("</body></html>")
|
||||
|
||||
html_path.write_text("\n".join(html), encoding="utf-8")
|
||||
return html_path
|
||||
@@ -0,0 +1,175 @@
|
||||
"""
|
||||
src/pipeline/trailer_analyzer.py — Reference trailer → list[TrailerBeat]
|
||||
|
||||
Responsibility:
|
||||
1. Run PySceneDetect on the REFERENCE TRAILER (not the source movie)
|
||||
to detect cut boundaries → raw beat intervals
|
||||
2. Fingerprint the midpoint frame of each beat (for Vibe Check)
|
||||
3. Transcribe dialogue per beat via Whisper (optional, injected)
|
||||
4. Optionally classify BeatType via the LLM dramaturg (injected)
|
||||
|
||||
Returns: list[TrailerBeat] ready to feed into run_matching().
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import replace
|
||||
from pathlib import Path
|
||||
from typing import Callable, Sequence
|
||||
|
||||
from src.core.config import AppConfig
|
||||
from src.core.models import BeatType, DialogueLine, TrailerBeat
|
||||
from src.cv.fingerprinting import fingerprint_frame
|
||||
from src.cv.frame_extractor import grab_midpoint_frame, open_video
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Injection type aliases — keeps this module free of hard audio/LLM imports
|
||||
TranscribeCallback = Callable[[Path, float, float, float], list[DialogueLine]]
|
||||
ClassifyCallback = Callable[[list[TrailerBeat]], list[TrailerBeat]]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Step 1: Scene detection on the reference trailer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _detect_trailer_beats(cfg: AppConfig) -> list[tuple[float, float, int, int]]:
|
||||
"""
|
||||
Run PySceneDetect on the reference trailer.
|
||||
|
||||
Returns list of (start_s, end_s, start_frame, end_frame).
|
||||
Uses the same ContentDetector thresholds as the source movie.
|
||||
"""
|
||||
try:
|
||||
from scenedetect import open_video as sd_open_video, SceneManager
|
||||
from scenedetect.detectors import ContentDetector
|
||||
except ImportError:
|
||||
raise ImportError("pip install scenedetect[opencv]")
|
||||
|
||||
trailer_path = cfg.paths.reference_trailer
|
||||
video = sd_open_video(str(trailer_path))
|
||||
manager = SceneManager()
|
||||
manager.add_detector(
|
||||
ContentDetector(
|
||||
threshold=cfg.scene_detection.content_threshold,
|
||||
min_scene_len=int(
|
||||
cfg.scene_detection.min_scene_duration_s * video.frame_rate
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
logger.info("Detecting beats in reference trailer: %s …", trailer_path.name)
|
||||
manager.detect_scenes(video=video, show_progress=False)
|
||||
|
||||
raw = manager.get_scene_list()
|
||||
result = [
|
||||
(s.get_seconds(), e.get_seconds(), s.get_frames(), e.get_frames())
|
||||
for s, e in raw
|
||||
]
|
||||
logger.info("Detected %d beats in reference trailer.", len(result))
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Step 2: Fingerprint beats
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _fingerprint_beats(
|
||||
raw_beats: list[tuple[float, float, int, int]],
|
||||
cfg: AppConfig,
|
||||
) -> list[TrailerBeat]:
|
||||
"""Extract midpoint frame for each beat and compute fingerprints."""
|
||||
vc_cfg = cfg.cv.vibe_check
|
||||
trailer_path = cfg.paths.reference_trailer
|
||||
beats: list[TrailerBeat] = []
|
||||
|
||||
with open_video(trailer_path) as cap:
|
||||
for idx, (start_s, end_s, start_frame, end_frame) in enumerate(raw_beats):
|
||||
frame = grab_midpoint_frame(cap, start_s, end_s)
|
||||
|
||||
if frame is None:
|
||||
logger.warning("Beat %d: midpoint frame decode failed.", idx)
|
||||
beats.append(TrailerBeat(
|
||||
beat_id=idx,
|
||||
trailer_path=trailer_path,
|
||||
start_s=start_s, end_s=end_s,
|
||||
start_frame=start_frame, end_frame=end_frame,
|
||||
))
|
||||
continue
|
||||
|
||||
luma_b, sat_b, phash = fingerprint_frame(frame, vc_cfg)
|
||||
beats.append(TrailerBeat(
|
||||
beat_id=idx,
|
||||
trailer_path=trailer_path,
|
||||
start_s=start_s, end_s=end_s,
|
||||
start_frame=start_frame, end_frame=end_frame,
|
||||
luma_hist=luma_b,
|
||||
sat_hist=sat_b,
|
||||
phash=phash,
|
||||
))
|
||||
|
||||
return beats
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def analyze_reference_trailer(
|
||||
cfg: AppConfig,
|
||||
transcribe_callback: TranscribeCallback | None = None,
|
||||
classify_callback: ClassifyCallback | None = None,
|
||||
) -> list[TrailerBeat]:
|
||||
"""
|
||||
Full reference-trailer analysis pipeline.
|
||||
|
||||
Args:
|
||||
cfg: Application configuration.
|
||||
transcribe_callback: Optional fn(path, start_s, end_s, offset_s)
|
||||
→ list[DialogueLine]. Injected to keep this
|
||||
module free of faster-whisper imports.
|
||||
classify_callback: Optional fn(beats) → beats with BeatType set.
|
||||
Injected to keep this module LLM-free.
|
||||
|
||||
Returns:
|
||||
List of TrailerBeat objects with fingerprints (and optionally
|
||||
dialogue + BeatType) populated.
|
||||
"""
|
||||
# Step 1 — cut detection
|
||||
raw_beats = _detect_trailer_beats(cfg)
|
||||
|
||||
# Step 2 — fingerprint
|
||||
beats = _fingerprint_beats(raw_beats, cfg)
|
||||
|
||||
# Step 3 — dialogue (optional)
|
||||
if transcribe_callback is not None:
|
||||
enriched: list[TrailerBeat] = []
|
||||
for beat in beats:
|
||||
try:
|
||||
lines = transcribe_callback(
|
||||
beat.trailer_path,
|
||||
beat.start_s,
|
||||
beat.end_s,
|
||||
beat.start_s, # time_offset so timestamps are absolute
|
||||
)
|
||||
enriched.append(replace(beat, dialogue=tuple(lines)))
|
||||
except Exception as exc:
|
||||
logger.warning("Beat %d transcription failed: %s", beat.beat_id, exc)
|
||||
enriched.append(beat)
|
||||
beats = enriched
|
||||
|
||||
# Step 4 — LLM dramaturgy (optional)
|
||||
if classify_callback is not None:
|
||||
try:
|
||||
beats = classify_callback(beats)
|
||||
except Exception as exc:
|
||||
logger.warning("Beat classification failed: %s — keeping UNKNOWN.", exc)
|
||||
|
||||
logger.info(
|
||||
"Trailer analysis complete: %d beats, %d with dialogue, %d classified.",
|
||||
len(beats),
|
||||
sum(1 for b in beats if b.dialogue),
|
||||
sum(1 for b in beats if b.beat_type != BeatType.UNKNOWN),
|
||||
)
|
||||
return beats
|
||||
Reference in New Issue
Block a user