Initial project import
This commit is contained in:
@@ -0,0 +1 @@
|
||||
# src.core package
|
||||
@@ -0,0 +1,387 @@
|
||||
"""
|
||||
src/core/config.py — Configuration loader for AI Trailer Generator v2
|
||||
|
||||
Loads config.toml and exposes typed, nested dataclasses.
|
||||
All CV thresholds, paths, and model settings are sourced exclusively here.
|
||||
API keys are NEVER stored in config.toml; they are loaded from .env.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import tomllib
|
||||
|
||||
try:
|
||||
from dotenv import load_dotenv as _load_dotenv
|
||||
_HAS_DOTENV = True
|
||||
except ImportError: # dotenv optional — falls back to existing env vars
|
||||
_HAS_DOTENV = False
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Leaf sections
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PathsConfig:
|
||||
source_movie: Path
|
||||
reference_trailer: Path
|
||||
output_dir: Path
|
||||
cache_dir: Path
|
||||
proxy_dir: Path
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VideoConfig:
|
||||
extract_fps: float
|
||||
proxy_width: int
|
||||
proxy_height: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VibeCheckConfig:
|
||||
top_k_candidates: int
|
||||
hist_compare_method: int
|
||||
hist_bins_hue: int
|
||||
hist_bins_saturation: int
|
||||
phash_max_distance: int
|
||||
crop_top_fraction: float
|
||||
crop_bottom_fraction: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DeepScanConfig:
|
||||
coarse_step_seconds: float
|
||||
match_threshold: float
|
||||
provisional_match_threshold: float
|
||||
coarse_candidate_threshold: float
|
||||
sequence_score_weight: float
|
||||
span_score_weight: float
|
||||
coarse_score_weight: float
|
||||
duration_score_weight: float
|
||||
duration_tie_break_score_delta: float
|
||||
min_duration_coverage: float
|
||||
continuity_seed_offsets_s: tuple[float, ...]
|
||||
scene_seed_top_k: int
|
||||
scene_seed_points_per_scene: int
|
||||
content_rerank_candidate_count: int
|
||||
skip_coarse_scan_with_weighted_seeds: bool
|
||||
max_refine_candidates: int
|
||||
match_method: int
|
||||
refine_window_seconds: float
|
||||
refine_step_seconds: float
|
||||
content_align_window_seconds: float
|
||||
content_align_sample_step_s: float
|
||||
content_validation_weight: float
|
||||
provisional_content_threshold: float
|
||||
start_tie_break_score_delta: float
|
||||
start_preroll_frames: int
|
||||
sequence_candidate_count: int
|
||||
sequence_min_distance_s: float
|
||||
span_sample_step_s: float
|
||||
trim_tail_frames: int
|
||||
scene_boundary_epsilon_s: float
|
||||
scoreable_luma_mean_min: float
|
||||
scoreable_luma_p90_min: float
|
||||
scoreable_contrast_min: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CVConfig:
|
||||
vibe_check: VibeCheckConfig
|
||||
deep_scan: DeepScanConfig
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SceneDetectionConfig:
|
||||
content_threshold: float
|
||||
min_scene_duration_s: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WhisperConfig:
|
||||
model: str
|
||||
language: str
|
||||
device: Literal["cuda", "cpu"]
|
||||
compute_type: Literal["float16", "int8", "float32"]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LLMConfig:
|
||||
provider: Literal["ollama", "openai", "openrouter"]
|
||||
base_url: str
|
||||
model: str
|
||||
timeout_seconds: int
|
||||
temperature: float
|
||||
max_tokens: int
|
||||
# Loaded from .env — NEVER committed to version control
|
||||
api_key: str = ""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VisionConfig:
|
||||
enabled: bool
|
||||
provider: Literal["openai", "openrouter"]
|
||||
base_url: str
|
||||
model: str
|
||||
timeout_seconds: int
|
||||
temperature: float
|
||||
max_tokens: int
|
||||
scene_candidate_top_k: int
|
||||
max_new_descriptions_per_run: int
|
||||
max_seed_scenes: int
|
||||
seed_points_per_scene: int
|
||||
seed_score: float
|
||||
max_refine_candidates: int
|
||||
local_scan_step_s: float
|
||||
local_scan_max_points_per_scene: int
|
||||
local_scan_top_candidates: int
|
||||
local_scan_tie_break_score_delta: float
|
||||
multi_shot_cut_corr_threshold: float
|
||||
multi_shot_boundary_tolerance_s: float
|
||||
fullscan_fallback: bool
|
||||
content_threshold: float
|
||||
similarity_threshold: float
|
||||
api_key: str = ""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ExportConfig:
|
||||
fcpxml_version: str
|
||||
edl_frame_rate: float
|
||||
output_format: Literal["fcpxml", "edl", "both"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Root config — single object passed through the entire application
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AppConfig:
|
||||
project_name: str
|
||||
version: str
|
||||
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"]
|
||||
|
||||
paths: PathsConfig
|
||||
video: VideoConfig
|
||||
cv: CVConfig
|
||||
scene_detection: SceneDetectionConfig
|
||||
whisper: WhisperConfig
|
||||
llm: LLMConfig
|
||||
vision: VisionConfig
|
||||
export: ExportConfig
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Loader
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DEFAULT_CONFIG_PATH = Path(__file__).parents[2] / "config.toml"
|
||||
_DEFAULT_ENV_PATH = Path(__file__).parents[2] / ".env"
|
||||
|
||||
|
||||
def load_config(
|
||||
config_path: Path = _DEFAULT_CONFIG_PATH,
|
||||
env_path: Path = _DEFAULT_ENV_PATH,
|
||||
) -> AppConfig:
|
||||
"""
|
||||
Parse config.toml and return a fully-typed, immutable AppConfig.
|
||||
|
||||
API keys are read from the .env file (or existing environment variables);
|
||||
they are never stored in config.toml.
|
||||
|
||||
Args:
|
||||
config_path: Absolute or relative path to the TOML file.
|
||||
Defaults to <project_root>/config.toml.
|
||||
env_path: Path to the .env file.
|
||||
Defaults to <project_root>/.env.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the TOML file does not exist.
|
||||
KeyError / TypeError: If a required key is missing or has the wrong type.
|
||||
"""
|
||||
# Load .env first so os.environ is populated before we read it below.
|
||||
if _HAS_DOTENV:
|
||||
_load_dotenv(dotenv_path=env_path, override=False)
|
||||
|
||||
if not config_path.exists():
|
||||
raise FileNotFoundError(
|
||||
f"Config file not found: {config_path}\n"
|
||||
"Copy config.toml.example to config.toml and adjust your paths."
|
||||
)
|
||||
|
||||
with config_path.open("rb") as fh:
|
||||
raw: dict = tomllib.load(fh)
|
||||
|
||||
project = raw["project"]
|
||||
paths_raw = raw["paths"]
|
||||
video_raw = raw["video"]
|
||||
cv_raw = raw["cv"]
|
||||
sd_raw = raw["scene_detection"]
|
||||
whisper_raw = raw["whisper"]
|
||||
llm_raw = raw["llm"]
|
||||
vision_raw = raw.get("vision", {})
|
||||
export_raw = raw["export"]
|
||||
|
||||
# Resolve paths relative to the config file's parent directory so the
|
||||
# project is relocatable, but keep absolute paths as-is.
|
||||
def _resolve(p: str) -> Path:
|
||||
path = Path(p)
|
||||
return path if path.is_absolute() else (config_path.parent / path).resolve()
|
||||
|
||||
paths = PathsConfig(
|
||||
source_movie=_resolve(paths_raw["source_movie"]),
|
||||
reference_trailer=_resolve(paths_raw["reference_trailer"]),
|
||||
output_dir=_resolve(paths_raw["output_dir"]),
|
||||
cache_dir=_resolve(paths_raw["cache_dir"]),
|
||||
proxy_dir=_resolve(paths_raw["proxy_dir"]),
|
||||
)
|
||||
|
||||
video = VideoConfig(
|
||||
extract_fps=float(video_raw["extract_fps"]),
|
||||
proxy_width=int(video_raw["proxy_width"]),
|
||||
proxy_height=int(video_raw["proxy_height"]),
|
||||
)
|
||||
|
||||
vibe_check = VibeCheckConfig(
|
||||
top_k_candidates=int(cv_raw["vibe_check"]["top_k_candidates"]),
|
||||
hist_compare_method=int(cv_raw["vibe_check"]["hist_compare_method"]),
|
||||
hist_bins_hue=int(cv_raw["vibe_check"]["hist_bins_hue"]),
|
||||
hist_bins_saturation=int(cv_raw["vibe_check"]["hist_bins_saturation"]),
|
||||
phash_max_distance=int(cv_raw["vibe_check"]["phash_max_distance"]),
|
||||
crop_top_fraction=float(cv_raw["vibe_check"]["crop_top_fraction"]),
|
||||
crop_bottom_fraction=float(cv_raw["vibe_check"]["crop_bottom_fraction"]),
|
||||
)
|
||||
|
||||
deep_scan = DeepScanConfig(
|
||||
coarse_step_seconds=float(cv_raw["deep_scan"]["coarse_step_seconds"]),
|
||||
match_threshold=float(cv_raw["deep_scan"]["match_threshold"]),
|
||||
provisional_match_threshold=float(cv_raw["deep_scan"].get("provisional_match_threshold", 0.45)),
|
||||
coarse_candidate_threshold=float(cv_raw["deep_scan"].get("coarse_candidate_threshold", cv_raw["deep_scan"]["match_threshold"])),
|
||||
sequence_score_weight=float(cv_raw["deep_scan"].get("sequence_score_weight", 0.55)),
|
||||
span_score_weight=float(cv_raw["deep_scan"].get("span_score_weight", 0.15)),
|
||||
coarse_score_weight=float(cv_raw["deep_scan"].get("coarse_score_weight", 0.10)),
|
||||
duration_score_weight=float(cv_raw["deep_scan"].get("duration_score_weight", 0.20)),
|
||||
duration_tie_break_score_delta=float(cv_raw["deep_scan"].get("duration_tie_break_score_delta", 0.03)),
|
||||
min_duration_coverage=float(cv_raw["deep_scan"].get("min_duration_coverage", 0.65)),
|
||||
continuity_seed_offsets_s=tuple(
|
||||
float(v) for v in cv_raw["deep_scan"].get(
|
||||
"continuity_seed_offsets_s",
|
||||
[-1.0, 0.0, 0.5, 1.0, 1.5, 2.0, 3.0],
|
||||
)
|
||||
),
|
||||
scene_seed_top_k=int(cv_raw["deep_scan"].get("scene_seed_top_k", 30)),
|
||||
scene_seed_points_per_scene=int(cv_raw["deep_scan"].get("scene_seed_points_per_scene", 6)),
|
||||
content_rerank_candidate_count=int(cv_raw["deep_scan"].get("content_rerank_candidate_count", 100)),
|
||||
skip_coarse_scan_with_weighted_seeds=bool(cv_raw["deep_scan"].get("skip_coarse_scan_with_weighted_seeds", False)),
|
||||
max_refine_candidates=int(cv_raw["deep_scan"].get("max_refine_candidates", 6)),
|
||||
match_method=int(cv_raw["deep_scan"]["match_method"]),
|
||||
refine_window_seconds=float(cv_raw["deep_scan"].get("refine_window_seconds", 0.6)),
|
||||
refine_step_seconds=float(cv_raw["deep_scan"]["refine_step_seconds"]),
|
||||
content_align_window_seconds=float(cv_raw["deep_scan"].get("content_align_window_seconds", 0.48)),
|
||||
content_align_sample_step_s=float(cv_raw["deep_scan"].get("content_align_sample_step_s", 0.28)),
|
||||
content_validation_weight=float(cv_raw["deep_scan"].get("content_validation_weight", 0.35)),
|
||||
provisional_content_threshold=float(cv_raw["deep_scan"].get("provisional_content_threshold", 0.42)),
|
||||
start_tie_break_score_delta=float(cv_raw["deep_scan"].get("start_tie_break_score_delta", 0.015)),
|
||||
start_preroll_frames=int(cv_raw["deep_scan"].get("start_preroll_frames", 0)),
|
||||
sequence_candidate_count=int(cv_raw["deep_scan"].get("sequence_candidate_count", 240)),
|
||||
sequence_min_distance_s=float(cv_raw["deep_scan"].get("sequence_min_distance_s", 1.0)),
|
||||
span_sample_step_s=float(cv_raw["deep_scan"].get("span_sample_step_s", 0.08)),
|
||||
trim_tail_frames=int(cv_raw["deep_scan"].get("trim_tail_frames", 2)),
|
||||
scene_boundary_epsilon_s=float(cv_raw["deep_scan"].get("scene_boundary_epsilon_s", 0.12)),
|
||||
scoreable_luma_mean_min=float(cv_raw["deep_scan"].get("scoreable_luma_mean_min", 24.0)),
|
||||
scoreable_luma_p90_min=float(cv_raw["deep_scan"].get("scoreable_luma_p90_min", 58.0)),
|
||||
scoreable_contrast_min=float(cv_raw["deep_scan"].get("scoreable_contrast_min", 24.0)),
|
||||
)
|
||||
|
||||
scene_detection = SceneDetectionConfig(
|
||||
content_threshold=float(sd_raw["content_threshold"]),
|
||||
min_scene_duration_s=float(sd_raw["min_scene_duration_s"]),
|
||||
)
|
||||
|
||||
whisper = WhisperConfig(
|
||||
model=whisper_raw["model"],
|
||||
language=whisper_raw["language"],
|
||||
device=whisper_raw["device"],
|
||||
compute_type=whisper_raw["compute_type"],
|
||||
)
|
||||
|
||||
# Resolve API key: env var takes precedence over config (which shouldn't have it).
|
||||
# Supported env vars (in priority order):
|
||||
# OPENROUTER_API_KEY → for provider = openrouter
|
||||
# OPENAI_API_KEY → for provider = openai
|
||||
# LLM_API_KEY → universal fallback
|
||||
_provider = llm_raw["provider"]
|
||||
_api_key = (
|
||||
os.environ.get("OPENROUTER_API_KEY", "")
|
||||
if _provider == "openrouter"
|
||||
else os.environ.get("OPENAI_API_KEY", "")
|
||||
if _provider == "openai"
|
||||
else ""
|
||||
) or os.environ.get("LLM_API_KEY", "")
|
||||
|
||||
llm = LLMConfig(
|
||||
provider=_provider,
|
||||
base_url=llm_raw["base_url"],
|
||||
model=llm_raw["model"],
|
||||
timeout_seconds=int(llm_raw["timeout_seconds"]),
|
||||
temperature=float(llm_raw["temperature"]),
|
||||
max_tokens=int(llm_raw["max_tokens"]),
|
||||
api_key=_api_key,
|
||||
)
|
||||
|
||||
vision_provider = vision_raw.get("provider", _provider if _provider in ("openai", "openrouter") else "openrouter")
|
||||
vision_api_key = (
|
||||
os.environ.get("OPENROUTER_API_KEY", "")
|
||||
if vision_provider == "openrouter"
|
||||
else os.environ.get("OPENAI_API_KEY", "")
|
||||
) or os.environ.get("VISION_API_KEY", "") or os.environ.get("LLM_API_KEY", "")
|
||||
|
||||
vision = VisionConfig(
|
||||
enabled=bool(vision_raw.get("enabled", False)),
|
||||
provider=vision_provider,
|
||||
base_url=str(vision_raw.get("base_url", llm.base_url)),
|
||||
model=str(vision_raw.get("model", llm.model)),
|
||||
timeout_seconds=int(vision_raw.get("timeout_seconds", llm.timeout_seconds)),
|
||||
temperature=float(vision_raw.get("temperature", 0.0)),
|
||||
max_tokens=int(vision_raw.get("max_tokens", 350)),
|
||||
scene_candidate_top_k=int(vision_raw.get("scene_candidate_top_k", 8)),
|
||||
max_new_descriptions_per_run=int(vision_raw.get("max_new_descriptions_per_run", 12)),
|
||||
max_seed_scenes=int(vision_raw.get("max_seed_scenes", 3)),
|
||||
seed_points_per_scene=int(vision_raw.get("seed_points_per_scene", 12)),
|
||||
seed_score=float(vision_raw.get("seed_score", 0.88)),
|
||||
max_refine_candidates=int(vision_raw.get("max_refine_candidates", 6)),
|
||||
local_scan_step_s=float(vision_raw.get("local_scan_step_s", 0.12)),
|
||||
local_scan_max_points_per_scene=int(vision_raw.get("local_scan_max_points_per_scene", 180)),
|
||||
local_scan_top_candidates=int(vision_raw.get("local_scan_top_candidates", 18)),
|
||||
local_scan_tie_break_score_delta=float(vision_raw.get("local_scan_tie_break_score_delta", 0.08)),
|
||||
multi_shot_cut_corr_threshold=float(vision_raw.get("multi_shot_cut_corr_threshold", 0.20)),
|
||||
multi_shot_boundary_tolerance_s=float(vision_raw.get("multi_shot_boundary_tolerance_s", 0.20)),
|
||||
fullscan_fallback=bool(vision_raw.get("fullscan_fallback", False)),
|
||||
content_threshold=float(vision_raw.get("content_threshold", 0.22)),
|
||||
similarity_threshold=float(vision_raw.get("similarity_threshold", 0.18)),
|
||||
api_key=vision_api_key,
|
||||
)
|
||||
|
||||
export = ExportConfig(
|
||||
fcpxml_version=str(export_raw["fcpxml_version"]),
|
||||
edl_frame_rate=float(export_raw["edl_frame_rate"]),
|
||||
output_format=export_raw["output_format"],
|
||||
)
|
||||
|
||||
return AppConfig(
|
||||
project_name=project["name"],
|
||||
version=project["version"],
|
||||
log_level=project["log_level"],
|
||||
paths=paths,
|
||||
video=video,
|
||||
cv=CVConfig(vibe_check=vibe_check, deep_scan=deep_scan),
|
||||
scene_detection=scene_detection,
|
||||
whisper=whisper,
|
||||
llm=llm,
|
||||
vision=vision,
|
||||
export=export,
|
||||
)
|
||||
@@ -0,0 +1,287 @@
|
||||
"""
|
||||
src/core/models.py — Canonical data models for AI Trailer Generator v2
|
||||
|
||||
Rules:
|
||||
- Every model is a frozen dataclass (immutable after creation).
|
||||
- All fields are strictly typed; no bare dicts or untyped lists.
|
||||
- Seconds are always float; frame numbers are always int.
|
||||
- Confidence scores live in [0.0, 1.0].
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum, auto
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Enumerations
|
||||
# ===========================================================================
|
||||
|
||||
class MatchMethod(Enum):
|
||||
"""CV template matching method (mirrors cv2.TM_* constants)."""
|
||||
TM_SQDIFF = 0
|
||||
TM_SQDIFF_NORMED = 1
|
||||
TM_CCORR = 2
|
||||
TM_CCORR_NORMED = 3
|
||||
TM_CCOEFF = 4
|
||||
TM_CCOEFF_NORMED = 5
|
||||
|
||||
|
||||
class BeatType(Enum):
|
||||
"""Narrative role of a trailer beat (for dramaturgy / LLM use only)."""
|
||||
HOOK = auto() # Opening attention grabber
|
||||
SETUP = auto() # World / character introduction
|
||||
CONFLICT = auto() # Inciting incident / rising tension
|
||||
CLIMAX = auto() # Peak action / emotion
|
||||
RESOLUTION = auto() # Cool-down / tagline
|
||||
UNKNOWN = auto()
|
||||
|
||||
|
||||
class ExportFormat(Enum):
|
||||
FCPXML = "fcpxml"
|
||||
EDL = "edl"
|
||||
BOTH = "both"
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Phase 0 — Source-movie scene index
|
||||
# ===========================================================================
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DialogueLine:
|
||||
"""Single transcribed line from Whisper output."""
|
||||
start_s: float # onset in seconds
|
||||
end_s: float # offset in seconds
|
||||
text: str # verbatim transcript
|
||||
speaker: Optional[str] = None # diarisation label if available
|
||||
|
||||
@property
|
||||
def duration_s(self) -> float:
|
||||
return self.end_s - self.start_s
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Scene:
|
||||
"""
|
||||
One detected scene in the source movie.
|
||||
|
||||
Produced by PySceneDetect; enriched by Whisper dialogue and
|
||||
(optionally) perceptual hashes during the Vibe Check phase.
|
||||
"""
|
||||
scene_id: int # zero-based index in source movie
|
||||
source_path: Path # absolute path to the source video file
|
||||
start_s: float # scene start in seconds
|
||||
end_s: float # scene end in seconds
|
||||
start_frame: int # first frame number
|
||||
end_frame: int # last frame number
|
||||
|
||||
# Populated after Vibe Check fingerprinting
|
||||
luma_hist: Optional[bytes] = None # serialised np.ndarray (pickle)
|
||||
sat_hist: Optional[bytes] = None
|
||||
phash: Optional[str] = None # 64-bit hex string
|
||||
|
||||
# Populated after Whisper pass
|
||||
dialogue: tuple[DialogueLine, ...] = field(default_factory=tuple)
|
||||
|
||||
@property
|
||||
def duration_s(self) -> float:
|
||||
return self.end_s - self.start_s
|
||||
|
||||
@property
|
||||
def midpoint_s(self) -> float:
|
||||
return self.start_s + self.duration_s / 2.0
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"Scene(id={self.scene_id}, "
|
||||
f"{self.start_s:.2f}s–{self.end_s:.2f}s, "
|
||||
f"dur={self.duration_s:.2f}s)"
|
||||
)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Phase 1 — Reference-trailer beat
|
||||
# ===========================================================================
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TrailerBeat:
|
||||
"""
|
||||
One cut / segment in the reference trailer.
|
||||
|
||||
The 'beat' is the atomic unit of a trailer: it maps exactly to one
|
||||
clip that will later be sourced from the original movie.
|
||||
"""
|
||||
beat_id: int
|
||||
trailer_path: Path
|
||||
start_s: float
|
||||
end_s: float
|
||||
start_frame: int
|
||||
end_frame: int
|
||||
|
||||
beat_type: BeatType = BeatType.UNKNOWN # set by LLM dramaturgy pass
|
||||
|
||||
# Visual fingerprints of the *middle* frame (populated by CV pipeline)
|
||||
luma_hist: Optional[bytes] = None
|
||||
sat_hist: Optional[bytes] = None
|
||||
phash: Optional[str] = None
|
||||
|
||||
# Dialogue extracted from this beat
|
||||
dialogue: tuple[DialogueLine, ...] = field(default_factory=tuple)
|
||||
|
||||
@property
|
||||
def duration_s(self) -> float:
|
||||
return self.end_s - self.start_s
|
||||
|
||||
@property
|
||||
def midpoint_s(self) -> float:
|
||||
return self.start_s + self.duration_s / 2.0
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"TrailerBeat(id={self.beat_id}, "
|
||||
f"{self.beat_type.name}, "
|
||||
f"{self.start_s:.2f}s–{self.end_s:.2f}s)"
|
||||
)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Phase 2 — CV match result
|
||||
# ===========================================================================
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VibeHit:
|
||||
"""
|
||||
Intermediate result from Phase 1 (Vibe Check — histogram/pHash).
|
||||
|
||||
Represents a *candidate* scene that passed the coarse filter.
|
||||
Not yet a confirmed match; forwarded to Deep Scan.
|
||||
"""
|
||||
beat_id: int
|
||||
scene_id: int
|
||||
hist_score: float # histogram similarity [0.0, 1.0] (CORREL method)
|
||||
phash_distance: int # Hamming distance [0, 64]; lower = more similar
|
||||
combined_score: float # weighted aggregate used for ranking
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MatchSegment:
|
||||
"""
|
||||
One source-backed visual island inside a trailer beat.
|
||||
|
||||
Some trailer beats contain multiple shots separated by fades/title frames.
|
||||
A single continuous source in/out cannot represent those beats accurately.
|
||||
"""
|
||||
trailer_offset_s: float
|
||||
duration_s: float
|
||||
scene_id: int
|
||||
in_point_s: float
|
||||
out_point_s: float
|
||||
match_score: float
|
||||
is_confirmed: bool = True
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MatchResult:
|
||||
"""
|
||||
Final, confirmed match from Phase 2 (Deep Scan — template matching).
|
||||
|
||||
One MatchResult per TrailerBeat: the best frame-accurate hit found
|
||||
inside the source movie.
|
||||
"""
|
||||
beat_id: int # which trailer beat was matched
|
||||
scene_id: int # which source scene contains the match
|
||||
source_path: Path # absolute path to source video
|
||||
|
||||
# Frame-accurate in-point / out-point in the SOURCE movie
|
||||
in_point_s: float # matched frame onset in source seconds
|
||||
out_point_s: float # computed out-point (in_point + beat duration)
|
||||
in_point_frame: int # matched frame number in source movie
|
||||
|
||||
# Match quality
|
||||
match_score: float # cv2.matchTemplate peak value [0.0, 1.0]
|
||||
match_location: tuple[int, int] = field(default_factory=lambda: (0, 0))
|
||||
# (x, y) pixel location of the best match within the source frame
|
||||
|
||||
# Provenance
|
||||
vibe_hit: Optional[VibeHit] = None # the candidate that led here
|
||||
is_confirmed: bool = True
|
||||
segments: tuple[MatchSegment, ...] = field(default_factory=tuple)
|
||||
|
||||
@property
|
||||
def duration_s(self) -> float:
|
||||
return self.out_point_s - self.in_point_s
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"MatchResult(beat={self.beat_id} → scene={self.scene_id}, "
|
||||
f"in={self.in_point_s:.3f}s, score={self.match_score:.3f})"
|
||||
)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Phase 3 — Edit timeline (pre-export)
|
||||
# ===========================================================================
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EditClip:
|
||||
"""
|
||||
One clip on the final edit timeline, ready for FCPXML / EDL export.
|
||||
|
||||
Combines beat dramaturgy + the CV-confirmed source in/out points.
|
||||
"""
|
||||
clip_index: int # position on the timeline (0-based)
|
||||
beat: TrailerBeat
|
||||
match: MatchResult
|
||||
|
||||
# Timeline position (in the OUTPUT trailer)
|
||||
timeline_start_s: float
|
||||
timeline_end_s: float
|
||||
source_duration_s: float | None = None
|
||||
trailer_tail_s: float = 0.0
|
||||
|
||||
# Optional audio override (e.g. VO or music)
|
||||
audio_path: Optional[Path] = None
|
||||
audio_offset_s: float = 0.0
|
||||
|
||||
@property
|
||||
def timeline_duration_s(self) -> float:
|
||||
return self.timeline_end_s - self.timeline_start_s
|
||||
|
||||
@property
|
||||
def source_timeline_duration_s(self) -> float:
|
||||
if self.source_duration_s is not None:
|
||||
return max(0.0, self.source_duration_s)
|
||||
return self.timeline_duration_s
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"EditClip(#{self.clip_index}, "
|
||||
f"tl={self.timeline_start_s:.2f}s–{self.timeline_end_s:.2f}s, "
|
||||
f"src={self.match.in_point_s:.3f}s)"
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EditTimeline:
|
||||
"""
|
||||
The complete ordered sequence of EditClips that forms the trailer.
|
||||
|
||||
Passed to the export layer (FCPXML / EDL writer).
|
||||
"""
|
||||
title: str
|
||||
frame_rate: float # e.g. 23.976
|
||||
clips: tuple[EditClip, ...] # ordered by clip_index
|
||||
|
||||
@property
|
||||
def total_duration_s(self) -> float:
|
||||
if not self.clips:
|
||||
return 0.0
|
||||
last = max(self.clips, key=lambda c: c.timeline_end_s)
|
||||
return last.timeline_end_s
|
||||
|
||||
@property
|
||||
def clip_count(self) -> int:
|
||||
return len(self.clips)
|
||||
Reference in New Issue
Block a user