aitrailer/src/core/config.py

"""
src/core/config.py — Configuration loader for AI Trailer Generator v2

Loads config.toml and exposes typed, nested dataclasses.
All CV thresholds, paths, and model settings are sourced exclusively here.
API keys are NEVER stored in config.toml; they are loaded from .env.
"""

from __future__ import annotations

import os
import tomllib

try:
    from dotenv import load_dotenv as _load_dotenv
    _HAS_DOTENV = True
except ImportError:  # dotenv optional — falls back to existing env vars
    _HAS_DOTENV = False
from dataclasses import dataclass, field
from pathlib import Path
from typing import Literal


# ---------------------------------------------------------------------------
# Leaf sections
# ---------------------------------------------------------------------------

@dataclass(frozen=True)
class PathsConfig:
    source_movie: Path
    reference_trailer: Path
    output_dir: Path
    cache_dir: Path
    proxy_dir: Path


@dataclass(frozen=True)
class VideoConfig:
    extract_fps: float
    proxy_width: int
    proxy_height: int


@dataclass(frozen=True)
class VibeCheckConfig:
    top_k_candidates: int
    hist_compare_method: int
    hist_bins_hue: int
    hist_bins_saturation: int
    phash_max_distance: int
    crop_top_fraction: float
    crop_bottom_fraction: float


@dataclass(frozen=True)
class DeepScanConfig:
    coarse_step_seconds: float
    match_threshold: float
    provisional_match_threshold: float
    coarse_candidate_threshold: float
    sequence_score_weight: float
    span_score_weight: float
    coarse_score_weight: float
    duration_score_weight: float
    duration_tie_break_score_delta: float
    min_duration_coverage: float
    continuity_seed_offsets_s: tuple[float, ...]
    scene_seed_top_k: int
    scene_seed_points_per_scene: int
    content_rerank_candidate_count: int
    skip_coarse_scan_with_weighted_seeds: bool
    max_refine_candidates: int
    match_method: int
    refine_window_seconds: float
    refine_step_seconds: float
    content_align_window_seconds: float
    content_align_sample_step_s: float
    content_validation_weight: float
    provisional_content_threshold: float
    start_tie_break_score_delta: float
    start_preroll_frames: int
    sequence_candidate_count: int
    sequence_min_distance_s: float
    span_sample_step_s: float
    trim_tail_frames: int
    scene_boundary_epsilon_s: float
    scoreable_luma_mean_min: float
    scoreable_luma_p90_min: float
    scoreable_contrast_min: float


@dataclass(frozen=True)
class CVConfig:
    vibe_check: VibeCheckConfig
    deep_scan: DeepScanConfig


@dataclass(frozen=True)
class SceneDetectionConfig:
    content_threshold: float
    min_scene_duration_s: float


@dataclass(frozen=True)
class WhisperConfig:
    model: str
    language: str
    device: Literal["cuda", "cpu"]
    compute_type: Literal["float16", "int8", "float32"]


@dataclass(frozen=True)
class LLMConfig:
    provider: Literal["ollama", "openai", "openrouter"]
    base_url: str
    model: str
    timeout_seconds: int
    temperature: float
    max_tokens: int
    # Loaded from .env — NEVER committed to version control
    api_key: str = ""


@dataclass(frozen=True)
class VisionConfig:
    enabled: bool
    provider: Literal["openai", "openrouter"]
    base_url: str
    model: str
    timeout_seconds: int
    temperature: float
    max_tokens: int
    scene_candidate_top_k: int
    max_new_descriptions_per_run: int
    max_seed_scenes: int
    seed_points_per_scene: int
    seed_score: float
    max_refine_candidates: int
    local_scan_step_s: float
    local_scan_max_points_per_scene: int
    local_scan_top_candidates: int
    local_scan_tie_break_score_delta: float
    multi_shot_cut_corr_threshold: float
    multi_shot_boundary_tolerance_s: float
    fullscan_fallback: bool
    content_threshold: float
    similarity_threshold: float
    api_key: str = ""


@dataclass(frozen=True)
class ExportConfig:
    fcpxml_version: str
    edl_frame_rate: float
    output_format: Literal["fcpxml", "edl", "both"]


# ---------------------------------------------------------------------------
# Root config — single object passed through the entire application
# ---------------------------------------------------------------------------

@dataclass(frozen=True)
class AppConfig:
    project_name: str
    version: str
    log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"]

    paths: PathsConfig
    video: VideoConfig
    cv: CVConfig
    scene_detection: SceneDetectionConfig
    whisper: WhisperConfig
    llm: LLMConfig
    vision: VisionConfig
    export: ExportConfig


# ---------------------------------------------------------------------------
# Loader
# ---------------------------------------------------------------------------

_DEFAULT_CONFIG_PATH = Path(__file__).parents[2] / "config.toml"
_DEFAULT_ENV_PATH    = Path(__file__).parents[2] / ".env"


def load_config(
    config_path: Path = _DEFAULT_CONFIG_PATH,
    env_path: Path = _DEFAULT_ENV_PATH,
) -> AppConfig:
    """
    Parse config.toml and return a fully-typed, immutable AppConfig.

    API keys are read from the .env file (or existing environment variables);
    they are never stored in config.toml.

    Args:
        config_path: Absolute or relative path to the TOML file.
                     Defaults to <project_root>/config.toml.
        env_path:    Path to the .env file.
                     Defaults to <project_root>/.env.

    Raises:
        FileNotFoundError: If the TOML file does not exist.
        KeyError / TypeError: If a required key is missing or has the wrong type.
    """
    # Load .env first so os.environ is populated before we read it below.
    if _HAS_DOTENV:
        _load_dotenv(dotenv_path=env_path, override=False)

    if not config_path.exists():
        raise FileNotFoundError(
            f"Config file not found: {config_path}\n"
            "Copy config.toml.example to config.toml and adjust your paths."
        )

    with config_path.open("rb") as fh:
        raw: dict = tomllib.load(fh)

    project = raw["project"]
    paths_raw = raw["paths"]
    video_raw = raw["video"]
    cv_raw = raw["cv"]
    sd_raw = raw["scene_detection"]
    whisper_raw = raw["whisper"]
    llm_raw = raw["llm"]
    vision_raw = raw.get("vision", {})
    export_raw = raw["export"]

    # Resolve paths relative to the config file's parent directory so the
    # project is relocatable, but keep absolute paths as-is.
    def _resolve(p: str) -> Path:
        path = Path(p)
        return path if path.is_absolute() else (config_path.parent / path).resolve()

    paths = PathsConfig(
        source_movie=_resolve(paths_raw["source_movie"]),
        reference_trailer=_resolve(paths_raw["reference_trailer"]),
        output_dir=_resolve(paths_raw["output_dir"]),
        cache_dir=_resolve(paths_raw["cache_dir"]),
        proxy_dir=_resolve(paths_raw["proxy_dir"]),
    )

    video = VideoConfig(
        extract_fps=float(video_raw["extract_fps"]),
        proxy_width=int(video_raw["proxy_width"]),
        proxy_height=int(video_raw["proxy_height"]),
    )

    vibe_check = VibeCheckConfig(
        top_k_candidates=int(cv_raw["vibe_check"]["top_k_candidates"]),
        hist_compare_method=int(cv_raw["vibe_check"]["hist_compare_method"]),
        hist_bins_hue=int(cv_raw["vibe_check"]["hist_bins_hue"]),
        hist_bins_saturation=int(cv_raw["vibe_check"]["hist_bins_saturation"]),
        phash_max_distance=int(cv_raw["vibe_check"]["phash_max_distance"]),
        crop_top_fraction=float(cv_raw["vibe_check"]["crop_top_fraction"]),
        crop_bottom_fraction=float(cv_raw["vibe_check"]["crop_bottom_fraction"]),
    )

    deep_scan = DeepScanConfig(
        coarse_step_seconds=float(cv_raw["deep_scan"]["coarse_step_seconds"]),
        match_threshold=float(cv_raw["deep_scan"]["match_threshold"]),
        provisional_match_threshold=float(cv_raw["deep_scan"].get("provisional_match_threshold", 0.43)),
        coarse_candidate_threshold=float(cv_raw["deep_scan"].get("coarse_candidate_threshold", cv_raw["deep_scan"]["match_threshold"])),
        sequence_score_weight=float(cv_raw["deep_scan"].get("sequence_score_weight", 0.55)),
        span_score_weight=float(cv_raw["deep_scan"].get("span_score_weight", 0.15)),
        coarse_score_weight=float(cv_raw["deep_scan"].get("coarse_score_weight", 0.10)),
        duration_score_weight=float(cv_raw["deep_scan"].get("duration_score_weight", 0.20)),
        duration_tie_break_score_delta=float(cv_raw["deep_scan"].get("duration_tie_break_score_delta", 0.03)),
        min_duration_coverage=float(cv_raw["deep_scan"].get("min_duration_coverage", 0.65)),
        continuity_seed_offsets_s=tuple(
            float(v) for v in cv_raw["deep_scan"].get(
                "continuity_seed_offsets_s",
                [-1.0, 0.0, 0.5, 1.0, 1.5, 2.0, 3.0],
            )
        ),
        scene_seed_top_k=int(cv_raw["deep_scan"].get("scene_seed_top_k", 30)),
        scene_seed_points_per_scene=int(cv_raw["deep_scan"].get("scene_seed_points_per_scene", 6)),
        content_rerank_candidate_count=int(cv_raw["deep_scan"].get("content_rerank_candidate_count", 100)),
        skip_coarse_scan_with_weighted_seeds=bool(cv_raw["deep_scan"].get("skip_coarse_scan_with_weighted_seeds", False)),
        max_refine_candidates=int(cv_raw["deep_scan"].get("max_refine_candidates", 6)),
        match_method=int(cv_raw["deep_scan"]["match_method"]),
        refine_window_seconds=float(cv_raw["deep_scan"].get("refine_window_seconds", 0.6)),
        refine_step_seconds=float(cv_raw["deep_scan"]["refine_step_seconds"]),
        content_align_window_seconds=float(cv_raw["deep_scan"].get("content_align_window_seconds", 0.48)),
        content_align_sample_step_s=float(cv_raw["deep_scan"].get("content_align_sample_step_s", 0.28)),
        content_validation_weight=float(cv_raw["deep_scan"].get("content_validation_weight", 0.35)),
        provisional_content_threshold=float(cv_raw["deep_scan"].get("provisional_content_threshold", 0.42)),
        start_tie_break_score_delta=float(cv_raw["deep_scan"].get("start_tie_break_score_delta", 0.015)),
        start_preroll_frames=int(cv_raw["deep_scan"].get("start_preroll_frames", 0)),
        sequence_candidate_count=int(cv_raw["deep_scan"].get("sequence_candidate_count", 240)),
        sequence_min_distance_s=float(cv_raw["deep_scan"].get("sequence_min_distance_s", 1.0)),
        span_sample_step_s=float(cv_raw["deep_scan"].get("span_sample_step_s", 0.08)),
        trim_tail_frames=int(cv_raw["deep_scan"].get("trim_tail_frames", 2)),
        scene_boundary_epsilon_s=float(cv_raw["deep_scan"].get("scene_boundary_epsilon_s", 0.12)),
        scoreable_luma_mean_min=float(cv_raw["deep_scan"].get("scoreable_luma_mean_min", 24.0)),
        scoreable_luma_p90_min=float(cv_raw["deep_scan"].get("scoreable_luma_p90_min", 58.0)),
        scoreable_contrast_min=float(cv_raw["deep_scan"].get("scoreable_contrast_min", 24.0)),
    )

    scene_detection = SceneDetectionConfig(
        content_threshold=float(sd_raw["content_threshold"]),
        min_scene_duration_s=float(sd_raw["min_scene_duration_s"]),
    )

    whisper = WhisperConfig(
        model=whisper_raw["model"],
        language=whisper_raw["language"],
        device=whisper_raw["device"],
        compute_type=whisper_raw["compute_type"],
    )

    # Resolve API key: env var takes precedence over config (which shouldn't have it).
    # Supported env vars (in priority order):
    #   OPENROUTER_API_KEY  → for provider = openrouter
    #   OPENAI_API_KEY      → for provider = openai
    #   LLM_API_KEY         → universal fallback
    _provider = llm_raw["provider"]
    _api_key = (
        os.environ.get("OPENROUTER_API_KEY", "")
        if _provider == "openrouter"
        else os.environ.get("OPENAI_API_KEY", "")
        if _provider == "openai"
        else ""
    ) or os.environ.get("LLM_API_KEY", "")

    llm = LLMConfig(
        provider=_provider,
        base_url=llm_raw["base_url"],
        model=llm_raw["model"],
        timeout_seconds=int(llm_raw["timeout_seconds"]),
        temperature=float(llm_raw["temperature"]),
        max_tokens=int(llm_raw["max_tokens"]),
        api_key=_api_key,
    )

    vision_provider = vision_raw.get("provider", _provider if _provider in ("openai", "openrouter") else "openrouter")
    vision_api_key = (
        os.environ.get("OPENROUTER_API_KEY", "")
        if vision_provider == "openrouter"
        else os.environ.get("OPENAI_API_KEY", "")
    ) or os.environ.get("VISION_API_KEY", "") or os.environ.get("LLM_API_KEY", "")

    vision = VisionConfig(
        enabled=bool(vision_raw.get("enabled", False)),
        provider=vision_provider,
        base_url=str(vision_raw.get("base_url", llm.base_url)),
        model=str(vision_raw.get("model", llm.model)),
        timeout_seconds=int(vision_raw.get("timeout_seconds", llm.timeout_seconds)),
        temperature=float(vision_raw.get("temperature", 0.0)),
        max_tokens=int(vision_raw.get("max_tokens", 350)),
        scene_candidate_top_k=int(vision_raw.get("scene_candidate_top_k", 8)),
        max_new_descriptions_per_run=int(vision_raw.get("max_new_descriptions_per_run", 12)),
        max_seed_scenes=int(vision_raw.get("max_seed_scenes", 3)),
        seed_points_per_scene=int(vision_raw.get("seed_points_per_scene", 12)),
        seed_score=float(vision_raw.get("seed_score", 0.88)),
        max_refine_candidates=int(vision_raw.get("max_refine_candidates", 6)),
        local_scan_step_s=float(vision_raw.get("local_scan_step_s", 0.12)),
        local_scan_max_points_per_scene=int(vision_raw.get("local_scan_max_points_per_scene", 180)),
        local_scan_top_candidates=int(vision_raw.get("local_scan_top_candidates", 18)),
        local_scan_tie_break_score_delta=float(vision_raw.get("local_scan_tie_break_score_delta", 0.08)),
        multi_shot_cut_corr_threshold=float(vision_raw.get("multi_shot_cut_corr_threshold", 0.20)),
        multi_shot_boundary_tolerance_s=float(vision_raw.get("multi_shot_boundary_tolerance_s", 0.20)),
        fullscan_fallback=bool(vision_raw.get("fullscan_fallback", False)),
        content_threshold=float(vision_raw.get("content_threshold", 0.22)),
        similarity_threshold=float(vision_raw.get("similarity_threshold", 0.18)),
        api_key=vision_api_key,
    )

    export = ExportConfig(
        fcpxml_version=str(export_raw["fcpxml_version"]),
        edl_frame_rate=float(export_raw["edl_frame_rate"]),
        output_format=export_raw["output_format"],
    )

    return AppConfig(
        project_name=project["name"],
        version=project["version"],
        log_level=project["log_level"],
        paths=paths,
        video=video,
        cv=CVConfig(vibe_check=vibe_check, deep_scan=deep_scan),
        scene_detection=scene_detection,
        whisper=whisper,
        llm=llm,
        vision=vision,
        export=export,
    )