199 lines
7.9 KiB
TOML
199 lines
7.9 KiB
TOML
# =============================================================================
|
||
# AI Trailer Generator v2 — Central Configuration
|
||
# =============================================================================
|
||
# All tunable parameters, thresholds, and file paths are defined here.
|
||
# NO hardcoded values are allowed in the Python source code.
|
||
# =============================================================================
|
||
|
||
[project]
|
||
name = "AI Trailer Generator v2"
|
||
version = "2.0.0"
|
||
log_level = "INFO" # DEBUG | INFO | WARNING | ERROR
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# [paths] — External video sources (read-only access)
|
||
# -----------------------------------------------------------------------------
|
||
[paths]
|
||
source_movie = "B:/Proxy/BehindTheRedDoor_FTR_1080P_2398_Fixed.mp4"
|
||
reference_trailer = "F:/Encodings/BehindTheRedDoor_Trailer_REFERENCE.mp4"
|
||
|
||
# Output destinations (inside project sandbox)
|
||
output_dir = "output"
|
||
cache_dir = ".cache"
|
||
proxy_dir = "proxy"
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# [video] — Decode / proxy settings
|
||
# -----------------------------------------------------------------------------
|
||
[video]
|
||
# Target FPS for internal frame extraction (0 = use source FPS)
|
||
extract_fps = 1.0
|
||
# Proxy resolution for template matching (width x height)
|
||
proxy_width = 640
|
||
proxy_height = 360
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# [cv] — Computer Vision engine parameters
|
||
# Phase 1 — "Vibe Check" (histogram / perceptual hash scene-level filter)
|
||
# Phase 2 — "Deep Scan" (template matching frame-level precision)
|
||
# -----------------------------------------------------------------------------
|
||
[cv]
|
||
|
||
[cv.vibe_check]
|
||
# Number of top candidate scenes to forward to Deep Scan
|
||
top_k_candidates = 100
|
||
|
||
# Histogram comparison method:
|
||
# CORREL=0 | CHISQR=1 | INTERSECT=2 | BHATTACHARYYA=3
|
||
hist_compare_method = 0
|
||
|
||
# Histogram bins per channel (hue, saturation)
|
||
hist_bins_hue = 50
|
||
hist_bins_saturation = 60
|
||
|
||
# pHash similarity threshold (lower = stricter; 0–64 range)
|
||
# NOTE: 12 is for near-duplicate detection. Cross-video matching
|
||
# (trailer vs source movie with different grading/compression)
|
||
# needs 25–35. Start at 32 and tighten if you get false positives.
|
||
phash_max_distance = 32
|
||
|
||
# ---- Text-Safe Crop -------------------------------------------------------
|
||
# Fraction of frame height to EXCLUDE from the top (e.g. logos, title cards)
|
||
crop_top_fraction = 0.15
|
||
# Fraction of frame height to EXCLUDE from the bottom (e.g. letterbox, subs)
|
||
crop_bottom_fraction = 0.30
|
||
|
||
[cv.deep_scan]
|
||
# Step size in SECONDS between sampled frames during the coarse scan pass
|
||
coarse_step_seconds = 0.5
|
||
|
||
# Minimum template match score (0.0–1.0) to accept a candidate as a hit
|
||
match_threshold = 0.65
|
||
|
||
# Store/report lower-confidence automatic candidates for visual review instead
|
||
# of dropping them as "NO MATCH". Confirmed exports can still use match_threshold.
|
||
provisional_match_threshold = 0.45
|
||
|
||
# Lower gate for entering temporal multi-frame refinement. The final decision
|
||
# still uses sequence/span scoring; this only avoids rejecting real matches
|
||
# because one midpoint frame is weak.
|
||
coarse_candidate_threshold = 0.50
|
||
|
||
# Candidate ranking weights. Duration coverage matters when the same visual
|
||
# shot appears multiple times: prefer the occurrence that can cover the beat.
|
||
sequence_score_weight = 0.55
|
||
span_score_weight = 0.15
|
||
coarse_score_weight = 0.10
|
||
duration_score_weight = 0.20
|
||
duration_tie_break_score_delta = 0.03
|
||
min_duration_coverage = 0.65
|
||
continuity_seed_offsets_s = [-1.0, 0.0, 0.5, 1.0, 1.5, 2.0, 3.0]
|
||
scene_seed_top_k = 30
|
||
scene_seed_points_per_scene = 6
|
||
content_rerank_candidate_count = 100
|
||
skip_coarse_scan_with_weighted_seeds = false
|
||
|
||
# cv2.matchTemplate method:
|
||
# TM_CCOEFF_NORMED=5 (recommended), TM_CCORR_NORMED=3
|
||
match_method = 5
|
||
|
||
# If a coarse hit is found, refine by scanning ± this many seconds
|
||
refine_window_seconds = 0.6
|
||
refine_step_seconds = 0.04 # ≈ 1 frame at 25 fps
|
||
content_align_window_seconds = 0.48
|
||
content_align_sample_step_s = 0.28
|
||
content_validation_weight = 0.35
|
||
provisional_content_threshold = 0.42
|
||
|
||
# When several adjacent frame offsets score almost the same, prefer the earlier
|
||
# one. This avoids matches that are visually correct but start a few frames late.
|
||
start_tie_break_score_delta = 0.015
|
||
start_preroll_frames = 0
|
||
|
||
# Automatic temporal verification after a coarse image hit.
|
||
# More candidates reduces false positives from visually similar shots.
|
||
sequence_candidate_count = 240
|
||
sequence_min_distance_s = 1.0
|
||
max_refine_candidates = 6
|
||
|
||
# Match-span detection: trim when the source starts drifting into a different shot.
|
||
span_sample_step_s = 0.08
|
||
trim_tail_frames = 4
|
||
|
||
# If a refined in-point lands this close to a detected scene end, treat it as
|
||
# the next scene. Scene detectors often place cuts a frame or two around the
|
||
# visible boundary.
|
||
scene_boundary_epsilon_s = 0.12
|
||
scoreable_luma_mean_min = 24.0
|
||
scoreable_luma_p90_min = 58.0
|
||
scoreable_contrast_min = 24.0
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# [scene_detection] — PySceneDetect parameters (used to segment source movie)
|
||
# -----------------------------------------------------------------------------
|
||
[scene_detection]
|
||
# Threshold for ContentDetector (lower = more sensitive)
|
||
content_threshold = 27.0
|
||
# Minimum scene duration in seconds
|
||
min_scene_duration_s = 1.5
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# [whisper] — Dialogue / audio analysis
|
||
# -----------------------------------------------------------------------------
|
||
[whisper]
|
||
model = "large-v3"
|
||
language = "ar"
|
||
device = "cuda" # cuda | cpu
|
||
compute_type = "float16" # float16 | int8 | float32
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# [llm] — Used ONLY for thematic segmentation / dramaturgy
|
||
# -----------------------------------------------------------------------------
|
||
[llm]
|
||
provider = "openrouter"
|
||
base_url = "https://openrouter.ai/api/v1"
|
||
model = "google/gemma-4-31b-it"
|
||
timeout_seconds = 120
|
||
temperature = 0.3
|
||
max_tokens = 4096
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# [vision] — Optional cached visual descriptions for ambiguous matching
|
||
# -----------------------------------------------------------------------------
|
||
[vision]
|
||
# Disabled by default to avoid surprise API cost. Enable when you want the
|
||
# matcher to ask a vision-capable model for cached 3-frame scene descriptions.
|
||
enabled = false
|
||
provider = "openrouter"
|
||
base_url = "https://openrouter.ai/api/v1"
|
||
model = "google/gemma-4-31b-it"
|
||
timeout_seconds = 90
|
||
temperature = 0.0
|
||
max_tokens = 350
|
||
|
||
# Cost controls: per beat, only the top scene-level candidates are described,
|
||
# and cached descriptions in .cache/vision_descriptions.json are reused.
|
||
scene_candidate_top_k = 8
|
||
max_new_descriptions_per_run = 12
|
||
max_seed_scenes = 3
|
||
seed_points_per_scene = 12
|
||
seed_score = 0.88
|
||
max_refine_candidates = 6
|
||
local_scan_step_s = 0.12
|
||
local_scan_max_points_per_scene = 180
|
||
local_scan_top_candidates = 18
|
||
local_scan_tie_break_score_delta = 0.08
|
||
multi_shot_cut_corr_threshold = 0.20
|
||
multi_shot_boundary_tolerance_s = 0.20
|
||
fullscan_fallback = false
|
||
content_threshold = 0.22
|
||
similarity_threshold = 0.18
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# [export] — FCPXML / EDL export settings
|
||
# -----------------------------------------------------------------------------
|
||
[export]
|
||
fcpxml_version = "1.10"
|
||
edl_frame_rate = 23.976 # fps used in EDL timecode generation
|
||
output_format = "fcpxml" # fcpxml | edl | both
|