Initial project import

2026-05-02 09:07:41 +02:00
commit 8e1bcf142f
38 changed files with 7928 additions and 0 deletions
@@ -0,0 +1 @@
+# src.core package
@@ -0,0 +1,387 @@
+"""
+src/core/config.py — Configuration loader for AI Trailer Generator v2
+
+Loads config.toml and exposes typed, nested dataclasses.
+All CV thresholds, paths, and model settings are sourced exclusively here.
+API keys are NEVER stored in config.toml; they are loaded from .env.
+"""
+
+from __future__ import annotations
+
+import os
+import tomllib
+
+try:
+    from dotenv import load_dotenv as _load_dotenv
+    _HAS_DOTENV = True
+except ImportError:  # dotenv optional — falls back to existing env vars
+    _HAS_DOTENV = False
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Literal
+
+
+# ---------------------------------------------------------------------------
+# Leaf sections
+# ---------------------------------------------------------------------------
+
+@dataclass(frozen=True)
+class PathsConfig:
+    source_movie: Path
+    reference_trailer: Path
+    output_dir: Path
+    cache_dir: Path
+    proxy_dir: Path
+
+
+@dataclass(frozen=True)
+class VideoConfig:
+    extract_fps: float
+    proxy_width: int
+    proxy_height: int
+
+
+@dataclass(frozen=True)
+class VibeCheckConfig:
+    top_k_candidates: int
+    hist_compare_method: int
+    hist_bins_hue: int
+    hist_bins_saturation: int
+    phash_max_distance: int
+    crop_top_fraction: float
+    crop_bottom_fraction: float
+
+
+@dataclass(frozen=True)
+class DeepScanConfig:
+    coarse_step_seconds: float
+    match_threshold: float
+    provisional_match_threshold: float
+    coarse_candidate_threshold: float
+    sequence_score_weight: float
+    span_score_weight: float
+    coarse_score_weight: float
+    duration_score_weight: float
+    duration_tie_break_score_delta: float
+    min_duration_coverage: float
+    continuity_seed_offsets_s: tuple[float, ...]
+    scene_seed_top_k: int
+    scene_seed_points_per_scene: int
+    content_rerank_candidate_count: int
+    skip_coarse_scan_with_weighted_seeds: bool
+    max_refine_candidates: int
+    match_method: int
+    refine_window_seconds: float
+    refine_step_seconds: float
+    content_align_window_seconds: float
+    content_align_sample_step_s: float
+    content_validation_weight: float
+    provisional_content_threshold: float
+    start_tie_break_score_delta: float
+    start_preroll_frames: int
+    sequence_candidate_count: int
+    sequence_min_distance_s: float
+    span_sample_step_s: float
+    trim_tail_frames: int
+    scene_boundary_epsilon_s: float
+    scoreable_luma_mean_min: float
+    scoreable_luma_p90_min: float
+    scoreable_contrast_min: float
+
+
+@dataclass(frozen=True)
+class CVConfig:
+    vibe_check: VibeCheckConfig
+    deep_scan: DeepScanConfig
+
+
+@dataclass(frozen=True)
+class SceneDetectionConfig:
+    content_threshold: float
+    min_scene_duration_s: float
+
+
+@dataclass(frozen=True)
+class WhisperConfig:
+    model: str
+    language: str
+    device: Literal["cuda", "cpu"]
+    compute_type: Literal["float16", "int8", "float32"]
+
+
+@dataclass(frozen=True)
+class LLMConfig:
+    provider: Literal["ollama", "openai", "openrouter"]
+    base_url: str
+    model: str
+    timeout_seconds: int
+    temperature: float
+    max_tokens: int
+    # Loaded from .env — NEVER committed to version control
+    api_key: str = ""
+
+
+@dataclass(frozen=True)
+class VisionConfig:
+    enabled: bool
+    provider: Literal["openai", "openrouter"]
+    base_url: str
+    model: str
+    timeout_seconds: int
+    temperature: float
+    max_tokens: int
+    scene_candidate_top_k: int
+    max_new_descriptions_per_run: int
+    max_seed_scenes: int
+    seed_points_per_scene: int
+    seed_score: float
+    max_refine_candidates: int
+    local_scan_step_s: float
+    local_scan_max_points_per_scene: int
+    local_scan_top_candidates: int
+    local_scan_tie_break_score_delta: float
+    multi_shot_cut_corr_threshold: float
+    multi_shot_boundary_tolerance_s: float
+    fullscan_fallback: bool
+    content_threshold: float
+    similarity_threshold: float
+    api_key: str = ""
+
+
+@dataclass(frozen=True)
+class ExportConfig:
+    fcpxml_version: str
+    edl_frame_rate: float
+    output_format: Literal["fcpxml", "edl", "both"]
+
+
+# ---------------------------------------------------------------------------
+# Root config — single object passed through the entire application
+# ---------------------------------------------------------------------------
+
+@dataclass(frozen=True)
+class AppConfig:
+    project_name: str
+    version: str
+    log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"]
+
+    paths: PathsConfig
+    video: VideoConfig
+    cv: CVConfig
+    scene_detection: SceneDetectionConfig
+    whisper: WhisperConfig
+    llm: LLMConfig
+    vision: VisionConfig
+    export: ExportConfig
+
+
+# ---------------------------------------------------------------------------
+# Loader
+# ---------------------------------------------------------------------------
+
+_DEFAULT_CONFIG_PATH = Path(__file__).parents[2] / "config.toml"
+_DEFAULT_ENV_PATH    = Path(__file__).parents[2] / ".env"
+
+
+def load_config(
+    config_path: Path = _DEFAULT_CONFIG_PATH,
+    env_path: Path = _DEFAULT_ENV_PATH,
+) -> AppConfig:
+    """
+    Parse config.toml and return a fully-typed, immutable AppConfig.
+
+    API keys are read from the .env file (or existing environment variables);
+    they are never stored in config.toml.
+
+    Args:
+        config_path: Absolute or relative path to the TOML file.
+                     Defaults to <project_root>/config.toml.
+        env_path:    Path to the .env file.
+                     Defaults to <project_root>/.env.
+
+    Raises:
+        FileNotFoundError: If the TOML file does not exist.
+        KeyError / TypeError: If a required key is missing or has the wrong type.
+    """
+    # Load .env first so os.environ is populated before we read it below.
+    if _HAS_DOTENV:
+        _load_dotenv(dotenv_path=env_path, override=False)
+
+    if not config_path.exists():
+        raise FileNotFoundError(
+            f"Config file not found: {config_path}\n"
+            "Copy config.toml.example to config.toml and adjust your paths."
+        )
+
+    with config_path.open("rb") as fh:
+        raw: dict = tomllib.load(fh)
+
+    project = raw["project"]
+    paths_raw = raw["paths"]
+    video_raw = raw["video"]
+    cv_raw = raw["cv"]
+    sd_raw = raw["scene_detection"]
+    whisper_raw = raw["whisper"]
+    llm_raw = raw["llm"]
+    vision_raw = raw.get("vision", {})
+    export_raw = raw["export"]
+
+    # Resolve paths relative to the config file's parent directory so the
+    # project is relocatable, but keep absolute paths as-is.
+    def _resolve(p: str) -> Path:
+        path = Path(p)
+        return path if path.is_absolute() else (config_path.parent / path).resolve()
+
+    paths = PathsConfig(
+        source_movie=_resolve(paths_raw["source_movie"]),
+        reference_trailer=_resolve(paths_raw["reference_trailer"]),
+        output_dir=_resolve(paths_raw["output_dir"]),
+        cache_dir=_resolve(paths_raw["cache_dir"]),
+        proxy_dir=_resolve(paths_raw["proxy_dir"]),
+    )
+
+    video = VideoConfig(
+        extract_fps=float(video_raw["extract_fps"]),
+        proxy_width=int(video_raw["proxy_width"]),
+        proxy_height=int(video_raw["proxy_height"]),
+    )
+
+    vibe_check = VibeCheckConfig(
+        top_k_candidates=int(cv_raw["vibe_check"]["top_k_candidates"]),
+        hist_compare_method=int(cv_raw["vibe_check"]["hist_compare_method"]),
+        hist_bins_hue=int(cv_raw["vibe_check"]["hist_bins_hue"]),
+        hist_bins_saturation=int(cv_raw["vibe_check"]["hist_bins_saturation"]),
+        phash_max_distance=int(cv_raw["vibe_check"]["phash_max_distance"]),
+        crop_top_fraction=float(cv_raw["vibe_check"]["crop_top_fraction"]),
+        crop_bottom_fraction=float(cv_raw["vibe_check"]["crop_bottom_fraction"]),
+    )
+
+    deep_scan = DeepScanConfig(
+        coarse_step_seconds=float(cv_raw["deep_scan"]["coarse_step_seconds"]),
+        match_threshold=float(cv_raw["deep_scan"]["match_threshold"]),
+        provisional_match_threshold=float(cv_raw["deep_scan"].get("provisional_match_threshold", 0.45)),
+        coarse_candidate_threshold=float(cv_raw["deep_scan"].get("coarse_candidate_threshold", cv_raw["deep_scan"]["match_threshold"])),
+        sequence_score_weight=float(cv_raw["deep_scan"].get("sequence_score_weight", 0.55)),
+        span_score_weight=float(cv_raw["deep_scan"].get("span_score_weight", 0.15)),
+        coarse_score_weight=float(cv_raw["deep_scan"].get("coarse_score_weight", 0.10)),
+        duration_score_weight=float(cv_raw["deep_scan"].get("duration_score_weight", 0.20)),
+        duration_tie_break_score_delta=float(cv_raw["deep_scan"].get("duration_tie_break_score_delta", 0.03)),
+        min_duration_coverage=float(cv_raw["deep_scan"].get("min_duration_coverage", 0.65)),
+        continuity_seed_offsets_s=tuple(
+            float(v) for v in cv_raw["deep_scan"].get(
+                "continuity_seed_offsets_s",
+                [-1.0, 0.0, 0.5, 1.0, 1.5, 2.0, 3.0],
+            )
+        ),
+        scene_seed_top_k=int(cv_raw["deep_scan"].get("scene_seed_top_k", 30)),
+        scene_seed_points_per_scene=int(cv_raw["deep_scan"].get("scene_seed_points_per_scene", 6)),
+        content_rerank_candidate_count=int(cv_raw["deep_scan"].get("content_rerank_candidate_count", 100)),
+        skip_coarse_scan_with_weighted_seeds=bool(cv_raw["deep_scan"].get("skip_coarse_scan_with_weighted_seeds", False)),
+        max_refine_candidates=int(cv_raw["deep_scan"].get("max_refine_candidates", 6)),
+        match_method=int(cv_raw["deep_scan"]["match_method"]),
+        refine_window_seconds=float(cv_raw["deep_scan"].get("refine_window_seconds", 0.6)),
+        refine_step_seconds=float(cv_raw["deep_scan"]["refine_step_seconds"]),
+        content_align_window_seconds=float(cv_raw["deep_scan"].get("content_align_window_seconds", 0.48)),
+        content_align_sample_step_s=float(cv_raw["deep_scan"].get("content_align_sample_step_s", 0.28)),
+        content_validation_weight=float(cv_raw["deep_scan"].get("content_validation_weight", 0.35)),
+        provisional_content_threshold=float(cv_raw["deep_scan"].get("provisional_content_threshold", 0.42)),
+        start_tie_break_score_delta=float(cv_raw["deep_scan"].get("start_tie_break_score_delta", 0.015)),
+        start_preroll_frames=int(cv_raw["deep_scan"].get("start_preroll_frames", 0)),
+        sequence_candidate_count=int(cv_raw["deep_scan"].get("sequence_candidate_count", 240)),
+        sequence_min_distance_s=float(cv_raw["deep_scan"].get("sequence_min_distance_s", 1.0)),
+        span_sample_step_s=float(cv_raw["deep_scan"].get("span_sample_step_s", 0.08)),
+        trim_tail_frames=int(cv_raw["deep_scan"].get("trim_tail_frames", 2)),
+        scene_boundary_epsilon_s=float(cv_raw["deep_scan"].get("scene_boundary_epsilon_s", 0.12)),
+        scoreable_luma_mean_min=float(cv_raw["deep_scan"].get("scoreable_luma_mean_min", 24.0)),
+        scoreable_luma_p90_min=float(cv_raw["deep_scan"].get("scoreable_luma_p90_min", 58.0)),
+        scoreable_contrast_min=float(cv_raw["deep_scan"].get("scoreable_contrast_min", 24.0)),
+    )
+
+    scene_detection = SceneDetectionConfig(
+        content_threshold=float(sd_raw["content_threshold"]),
+        min_scene_duration_s=float(sd_raw["min_scene_duration_s"]),
+    )
+
+    whisper = WhisperConfig(
+        model=whisper_raw["model"],
+        language=whisper_raw["language"],
+        device=whisper_raw["device"],
+        compute_type=whisper_raw["compute_type"],
+    )
+
+    # Resolve API key: env var takes precedence over config (which shouldn't have it).
+    # Supported env vars (in priority order):
+    #   OPENROUTER_API_KEY  → for provider = openrouter
+    #   OPENAI_API_KEY      → for provider = openai
+    #   LLM_API_KEY         → universal fallback
+    _provider = llm_raw["provider"]
+    _api_key = (
+        os.environ.get("OPENROUTER_API_KEY", "")
+        if _provider == "openrouter"
+        else os.environ.get("OPENAI_API_KEY", "")
+        if _provider == "openai"
+        else ""
+    ) or os.environ.get("LLM_API_KEY", "")
+
+    llm = LLMConfig(
+        provider=_provider,
+        base_url=llm_raw["base_url"],
+        model=llm_raw["model"],
+        timeout_seconds=int(llm_raw["timeout_seconds"]),
+        temperature=float(llm_raw["temperature"]),
+        max_tokens=int(llm_raw["max_tokens"]),
+        api_key=_api_key,
+    )
+
+    vision_provider = vision_raw.get("provider", _provider if _provider in ("openai", "openrouter") else "openrouter")
+    vision_api_key = (
+        os.environ.get("OPENROUTER_API_KEY", "")
+        if vision_provider == "openrouter"
+        else os.environ.get("OPENAI_API_KEY", "")
+    ) or os.environ.get("VISION_API_KEY", "") or os.environ.get("LLM_API_KEY", "")
+
+    vision = VisionConfig(
+        enabled=bool(vision_raw.get("enabled", False)),
+        provider=vision_provider,
+        base_url=str(vision_raw.get("base_url", llm.base_url)),
+        model=str(vision_raw.get("model", llm.model)),
+        timeout_seconds=int(vision_raw.get("timeout_seconds", llm.timeout_seconds)),
+        temperature=float(vision_raw.get("temperature", 0.0)),
+        max_tokens=int(vision_raw.get("max_tokens", 350)),
+        scene_candidate_top_k=int(vision_raw.get("scene_candidate_top_k", 8)),
+        max_new_descriptions_per_run=int(vision_raw.get("max_new_descriptions_per_run", 12)),
+        max_seed_scenes=int(vision_raw.get("max_seed_scenes", 3)),
+        seed_points_per_scene=int(vision_raw.get("seed_points_per_scene", 12)),
+        seed_score=float(vision_raw.get("seed_score", 0.88)),
+        max_refine_candidates=int(vision_raw.get("max_refine_candidates", 6)),
+        local_scan_step_s=float(vision_raw.get("local_scan_step_s", 0.12)),
+        local_scan_max_points_per_scene=int(vision_raw.get("local_scan_max_points_per_scene", 180)),
+        local_scan_top_candidates=int(vision_raw.get("local_scan_top_candidates", 18)),
+        local_scan_tie_break_score_delta=float(vision_raw.get("local_scan_tie_break_score_delta", 0.08)),
+        multi_shot_cut_corr_threshold=float(vision_raw.get("multi_shot_cut_corr_threshold", 0.20)),
+        multi_shot_boundary_tolerance_s=float(vision_raw.get("multi_shot_boundary_tolerance_s", 0.20)),
+        fullscan_fallback=bool(vision_raw.get("fullscan_fallback", False)),
+        content_threshold=float(vision_raw.get("content_threshold", 0.22)),
+        similarity_threshold=float(vision_raw.get("similarity_threshold", 0.18)),
+        api_key=vision_api_key,
+    )
+
+    export = ExportConfig(
+        fcpxml_version=str(export_raw["fcpxml_version"]),
+        edl_frame_rate=float(export_raw["edl_frame_rate"]),
+        output_format=export_raw["output_format"],
+    )
+
+    return AppConfig(
+        project_name=project["name"],
+        version=project["version"],
+        log_level=project["log_level"],
+        paths=paths,
+        video=video,
+        cv=CVConfig(vibe_check=vibe_check, deep_scan=deep_scan),
+        scene_detection=scene_detection,
+        whisper=whisper,
+        llm=llm,
+        vision=vision,
+        export=export,
+    )
@@ -0,0 +1,287 @@
+"""
+src/core/models.py — Canonical data models for AI Trailer Generator v2
+
+Rules:
+  - Every model is a frozen dataclass (immutable after creation).
+  - All fields are strictly typed; no bare dicts or untyped lists.
+  - Seconds are always float; frame numbers are always int.
+  - Confidence scores live in [0.0, 1.0].
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from enum import Enum, auto
+from pathlib import Path
+from typing import Optional
+
+
+# ===========================================================================
+# Enumerations
+# ===========================================================================
+
+class MatchMethod(Enum):
+    """CV template matching method (mirrors cv2.TM_* constants)."""
+    TM_SQDIFF         = 0
+    TM_SQDIFF_NORMED  = 1
+    TM_CCORR          = 2
+    TM_CCORR_NORMED   = 3
+    TM_CCOEFF         = 4
+    TM_CCOEFF_NORMED  = 5
+
+
+class BeatType(Enum):
+    """Narrative role of a trailer beat (for dramaturgy / LLM use only)."""
+    HOOK        = auto()   # Opening attention grabber
+    SETUP       = auto()   # World / character introduction
+    CONFLICT    = auto()   # Inciting incident / rising tension
+    CLIMAX      = auto()   # Peak action / emotion
+    RESOLUTION  = auto()   # Cool-down / tagline
+    UNKNOWN     = auto()
+
+
+class ExportFormat(Enum):
+    FCPXML = "fcpxml"
+    EDL    = "edl"
+    BOTH   = "both"
+
+
+# ===========================================================================
+# Phase 0 — Source-movie scene index
+# ===========================================================================
+
+@dataclass(frozen=True)
+class DialogueLine:
+    """Single transcribed line from Whisper output."""
+    start_s: float       # onset in seconds
+    end_s:   float       # offset in seconds
+    text:    str         # verbatim transcript
+    speaker: Optional[str] = None  # diarisation label if available
+
+    @property
+    def duration_s(self) -> float:
+        return self.end_s - self.start_s
+
+
+@dataclass(frozen=True)
+class Scene:
+    """
+    One detected scene in the source movie.
+
+    Produced by PySceneDetect; enriched by Whisper dialogue and
+    (optionally) perceptual hashes during the Vibe Check phase.
+    """
+    scene_id:       int          # zero-based index in source movie
+    source_path:    Path         # absolute path to the source video file
+    start_s:        float        # scene start in seconds
+    end_s:          float        # scene end   in seconds
+    start_frame:    int          # first frame number
+    end_frame:      int          # last  frame number
+
+    # Populated after Vibe Check fingerprinting
+    luma_hist:      Optional[bytes]  = None  # serialised np.ndarray (pickle)
+    sat_hist:       Optional[bytes]  = None
+    phash:          Optional[str]    = None  # 64-bit hex string
+
+    # Populated after Whisper pass
+    dialogue:       tuple[DialogueLine, ...] = field(default_factory=tuple)
+
+    @property
+    def duration_s(self) -> float:
+        return self.end_s - self.start_s
+
+    @property
+    def midpoint_s(self) -> float:
+        return self.start_s + self.duration_s / 2.0
+
+    def __repr__(self) -> str:
+        return (
+            f"Scene(id={self.scene_id}, "
+            f"{self.start_s:.2f}s–{self.end_s:.2f}s, "
+            f"dur={self.duration_s:.2f}s)"
+        )
+
+
+# ===========================================================================
+# Phase 1 — Reference-trailer beat
+# ===========================================================================
+
+@dataclass(frozen=True)
+class TrailerBeat:
+    """
+    One cut / segment in the reference trailer.
+
+    The 'beat' is the atomic unit of a trailer:  it maps exactly to one
+    clip that will later be sourced from the original movie.
+    """
+    beat_id:        int
+    trailer_path:   Path
+    start_s:        float
+    end_s:          float
+    start_frame:    int
+    end_frame:      int
+
+    beat_type:      BeatType = BeatType.UNKNOWN  # set by LLM dramaturgy pass
+
+    # Visual fingerprints of the *middle* frame (populated by CV pipeline)
+    luma_hist:      Optional[bytes] = None
+    sat_hist:       Optional[bytes] = None
+    phash:          Optional[str]   = None
+
+    # Dialogue extracted from this beat
+    dialogue:       tuple[DialogueLine, ...] = field(default_factory=tuple)
+
+    @property
+    def duration_s(self) -> float:
+        return self.end_s - self.start_s
+
+    @property
+    def midpoint_s(self) -> float:
+        return self.start_s + self.duration_s / 2.0
+
+    def __repr__(self) -> str:
+        return (
+            f"TrailerBeat(id={self.beat_id}, "
+            f"{self.beat_type.name}, "
+            f"{self.start_s:.2f}s–{self.end_s:.2f}s)"
+        )
+
+
+# ===========================================================================
+# Phase 2 — CV match result
+# ===========================================================================
+
+@dataclass(frozen=True)
+class VibeHit:
+    """
+    Intermediate result from Phase 1 (Vibe Check — histogram/pHash).
+
+    Represents a *candidate* scene that passed the coarse filter.
+    Not yet a confirmed match; forwarded to Deep Scan.
+    """
+    beat_id:            int
+    scene_id:           int
+    hist_score:         float   # histogram similarity [0.0, 1.0] (CORREL method)
+    phash_distance:     int     # Hamming distance [0, 64]; lower = more similar
+    combined_score:     float   # weighted aggregate used for ranking
+
+
+@dataclass(frozen=True)
+class MatchSegment:
+    """
+    One source-backed visual island inside a trailer beat.
+
+    Some trailer beats contain multiple shots separated by fades/title frames.
+    A single continuous source in/out cannot represent those beats accurately.
+    """
+    trailer_offset_s:   float
+    duration_s:         float
+    scene_id:           int
+    in_point_s:         float
+    out_point_s:        float
+    match_score:        float
+    is_confirmed:       bool = True
+
+
+@dataclass(frozen=True)
+class MatchResult:
+    """
+    Final, confirmed match from Phase 2 (Deep Scan — template matching).
+
+    One MatchResult per TrailerBeat: the best frame-accurate hit found
+    inside the source movie.
+    """
+    beat_id:            int       # which trailer beat was matched
+    scene_id:           int       # which source scene contains the match
+    source_path:        Path      # absolute path to source video
+
+    # Frame-accurate in-point / out-point in the SOURCE movie
+    in_point_s:         float     # matched frame onset in source seconds
+    out_point_s:        float     # computed out-point (in_point + beat duration)
+    in_point_frame:     int       # matched frame number in source movie
+
+    # Match quality
+    match_score:        float     # cv2.matchTemplate peak value [0.0, 1.0]
+    match_location:     tuple[int, int] = field(default_factory=lambda: (0, 0))
+    # (x, y) pixel location of the best match within the source frame
+
+    # Provenance
+    vibe_hit:           Optional[VibeHit] = None  # the candidate that led here
+    is_confirmed:       bool = True
+    segments:           tuple[MatchSegment, ...] = field(default_factory=tuple)
+
+    @property
+    def duration_s(self) -> float:
+        return self.out_point_s - self.in_point_s
+
+    def __repr__(self) -> str:
+        return (
+            f"MatchResult(beat={self.beat_id} → scene={self.scene_id}, "
+            f"in={self.in_point_s:.3f}s, score={self.match_score:.3f})"
+        )
+
+
+# ===========================================================================
+# Phase 3 — Edit timeline (pre-export)
+# ===========================================================================
+
+@dataclass(frozen=True)
+class EditClip:
+    """
+    One clip on the final edit timeline, ready for FCPXML / EDL export.
+
+    Combines beat dramaturgy + the CV-confirmed source in/out points.
+    """
+    clip_index:         int        # position on the timeline (0-based)
+    beat:               TrailerBeat
+    match:              MatchResult
+
+    # Timeline position (in the OUTPUT trailer)
+    timeline_start_s:   float
+    timeline_end_s:     float
+    source_duration_s:  float | None = None
+    trailer_tail_s:     float = 0.0
+
+    # Optional audio override (e.g. VO or music)
+    audio_path:         Optional[Path] = None
+    audio_offset_s:     float = 0.0
+
+    @property
+    def timeline_duration_s(self) -> float:
+        return self.timeline_end_s - self.timeline_start_s
+
+    @property
+    def source_timeline_duration_s(self) -> float:
+        if self.source_duration_s is not None:
+            return max(0.0, self.source_duration_s)
+        return self.timeline_duration_s
+
+    def __repr__(self) -> str:
+        return (
+            f"EditClip(#{self.clip_index}, "
+            f"tl={self.timeline_start_s:.2f}s–{self.timeline_end_s:.2f}s, "
+            f"src={self.match.in_point_s:.3f}s)"
+        )
+
+
+@dataclass(frozen=True)
+class EditTimeline:
+    """
+    The complete ordered sequence of EditClips that forms the trailer.
+
+    Passed to the export layer (FCPXML / EDL writer).
+    """
+    title:          str
+    frame_rate:     float                   # e.g. 23.976
+    clips:          tuple[EditClip, ...]    # ordered by clip_index
+
+    @property
+    def total_duration_s(self) -> float:
+        if not self.clips:
+            return 0.0
+        last = max(self.clips, key=lambda c: c.timeline_end_s)
+        return last.timeline_end_s
+
+    @property
+    def clip_count(self) -> int:
+        return len(self.clips)