Files
aitrailer/config.toml
T
Melbar 54d3f04616 Fix matching regressions, cache guard, and multi-shot algorithm for beat 15
- config.toml: revert scoreable_luma/contrast thresholds to 24/58/24 (lowering
  them let cross-fade blend frames contaminate content-validation templates,
  dropping scores below provisional_content_threshold)
- src/cv/global_scan.py: _is_dark_reference_frame now requires contrast<30 so
  genuine dark silhouette frames are not rejected as scoreable; two-path
  _is_scoreable_reference_frame separates standard vs fade-content scoring
- cli.py: _keeps_cached_match() guard prevents a weaker single-span rematch
  from overwriting a better multi-segment provisional cache entry
- cli.py: _fade_content_shots() restricted to between-island gaps only—
  pre-island black leaders were incorrectly emitted as matchable shots
- cli.py: island[0] of _match_unmatched_visual_segments() now uses no
  continuity seed so an insert cut at the start of a multi-shot beat is not
  forced toward the previous beat's scene
- scripts/generate_cutter_report.py: fix ffmpeg concat demuxer on Windows—
  use part.absolute().as_posix() so paths in the concat txt are absolute and
  not double-resolved relative to the concat file's directory

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-06 00:05:37 +02:00

199 lines
7.9 KiB
TOML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# =============================================================================
# AI Trailer Generator v2 — Central Configuration
# =============================================================================
# All tunable parameters, thresholds, and file paths are defined here.
# NO hardcoded values are allowed in the Python source code.
# =============================================================================
[project]
name = "AI Trailer Generator v2"
version = "2.0.0"
log_level = "INFO" # DEBUG | INFO | WARNING | ERROR
# -----------------------------------------------------------------------------
# [paths] — External video sources (read-only access)
# -----------------------------------------------------------------------------
[paths]
source_movie = "B:/Proxy/BehindTheRedDoor_FTR_1080P_2398_Fixed.mp4"
reference_trailer = "F:/Encodings/BehindTheRedDoor_Trailer_REFERENCE.mp4"
# Output destinations (inside project sandbox)
output_dir = "output"
cache_dir = ".cache"
proxy_dir = "proxy"
# -----------------------------------------------------------------------------
# [video] — Decode / proxy settings
# -----------------------------------------------------------------------------
[video]
# Target FPS for internal frame extraction (0 = use source FPS)
extract_fps = 1.0
# Proxy resolution for template matching (width x height)
proxy_width = 640
proxy_height = 360
# -----------------------------------------------------------------------------
# [cv] — Computer Vision engine parameters
# Phase 1 — "Vibe Check" (histogram / perceptual hash scene-level filter)
# Phase 2 — "Deep Scan" (template matching frame-level precision)
# -----------------------------------------------------------------------------
[cv]
[cv.vibe_check]
# Number of top candidate scenes to forward to Deep Scan
top_k_candidates = 100
# Histogram comparison method:
# CORREL=0 | CHISQR=1 | INTERSECT=2 | BHATTACHARYYA=3
hist_compare_method = 0
# Histogram bins per channel (hue, saturation)
hist_bins_hue = 50
hist_bins_saturation = 60
# pHash similarity threshold (lower = stricter; 064 range)
# NOTE: 12 is for near-duplicate detection. Cross-video matching
# (trailer vs source movie with different grading/compression)
# needs 2535. Start at 32 and tighten if you get false positives.
phash_max_distance = 32
# ---- Text-Safe Crop -------------------------------------------------------
# Fraction of frame height to EXCLUDE from the top (e.g. logos, title cards)
crop_top_fraction = 0.15
# Fraction of frame height to EXCLUDE from the bottom (e.g. letterbox, subs)
crop_bottom_fraction = 0.30
[cv.deep_scan]
# Step size in SECONDS between sampled frames during the coarse scan pass
coarse_step_seconds = 0.5
# Minimum template match score (0.01.0) to accept a candidate as a hit
match_threshold = 0.65
# Store/report lower-confidence automatic candidates for visual review instead
# of dropping them as "NO MATCH". Confirmed exports can still use match_threshold.
provisional_match_threshold = 0.35
# Lower gate for entering temporal multi-frame refinement. The final decision
# still uses sequence/span scoring; this only avoids rejecting real matches
# because one midpoint frame is weak.
coarse_candidate_threshold = 0.40
# Candidate ranking weights. Duration coverage matters when the same visual
# shot appears multiple times: prefer the occurrence that can cover the beat.
sequence_score_weight = 0.55
span_score_weight = 0.15
coarse_score_weight = 0.10
duration_score_weight = 0.20
duration_tie_break_score_delta = 0.03
min_duration_coverage = 0.65
continuity_seed_offsets_s = [-1.0, 0.0, 0.5, 1.0, 1.5, 2.0, 3.0]
scene_seed_top_k = 30
scene_seed_points_per_scene = 6
content_rerank_candidate_count = 100
skip_coarse_scan_with_weighted_seeds = false
# cv2.matchTemplate method:
# TM_CCOEFF_NORMED=5 (recommended), TM_CCORR_NORMED=3
match_method = 5
# If a coarse hit is found, refine by scanning ± this many seconds
refine_window_seconds = 0.6
refine_step_seconds = 0.04 # ≈ 1 frame at 25 fps
content_align_window_seconds = 0.48
content_align_sample_step_s = 0.28
content_validation_weight = 0.35
provisional_content_threshold = 0.30
# When several adjacent frame offsets score almost the same, prefer the earlier
# one. This avoids matches that are visually correct but start a few frames late.
start_tie_break_score_delta = 0.015
start_preroll_frames = 0
# Automatic temporal verification after a coarse image hit.
# More candidates reduces false positives from visually similar shots.
sequence_candidate_count = 240
sequence_min_distance_s = 1.0
max_refine_candidates = 6
# Match-span detection: trim when the source starts drifting into a different shot.
span_sample_step_s = 0.08
trim_tail_frames = 4
# If a refined in-point lands this close to a detected scene end, treat it as
# the next scene. Scene detectors often place cuts a frame or two around the
# visible boundary.
scene_boundary_epsilon_s = 0.12
scoreable_luma_mean_min = 24.0
scoreable_luma_p90_min = 58.0
scoreable_contrast_min = 24.0
# -----------------------------------------------------------------------------
# [scene_detection] — PySceneDetect parameters (used to segment source movie)
# -----------------------------------------------------------------------------
[scene_detection]
# Threshold for ContentDetector (lower = more sensitive)
content_threshold = 27.0
# Minimum scene duration in seconds
min_scene_duration_s = 1.5
# -----------------------------------------------------------------------------
# [whisper] — Dialogue / audio analysis
# -----------------------------------------------------------------------------
[whisper]
model = "large-v3"
language = "ar"
device = "cuda" # cuda | cpu
compute_type = "float16" # float16 | int8 | float32
# -----------------------------------------------------------------------------
# [llm] — Used ONLY for thematic segmentation / dramaturgy
# -----------------------------------------------------------------------------
[llm]
provider = "openrouter"
base_url = "https://openrouter.ai/api/v1"
model = "google/gemma-4-31b-it"
timeout_seconds = 120
temperature = 0.3
max_tokens = 4096
# -----------------------------------------------------------------------------
# [vision] — Optional cached visual descriptions for ambiguous matching
# -----------------------------------------------------------------------------
[vision]
# Disabled by default to avoid surprise API cost. Enable when you want the
# matcher to ask a vision-capable model for cached 3-frame scene descriptions.
enabled = false
provider = "openrouter"
base_url = "https://openrouter.ai/api/v1"
model = "google/gemma-4-31b-it"
timeout_seconds = 90
temperature = 0.0
max_tokens = 350
# Cost controls: per beat, only the top scene-level candidates are described,
# and cached descriptions in .cache/vision_descriptions.json are reused.
scene_candidate_top_k = 48
max_new_descriptions_per_run = 24
max_seed_scenes = 8
seed_points_per_scene = 12
seed_score = 0.88
max_refine_candidates = 12
local_scan_step_s = 0.12
local_scan_max_points_per_scene = 180
local_scan_top_candidates = 36
local_scan_tie_break_score_delta = 0.08
multi_shot_cut_corr_threshold = 0.20
multi_shot_boundary_tolerance_s = 0.20
fullscan_fallback = false
content_threshold = 0.22
similarity_threshold = 0.18
# -----------------------------------------------------------------------------
# [export] — FCPXML / EDL export settings
# -----------------------------------------------------------------------------
[export]
fcpxml_version = "1.10"
edl_frame_rate = 23.976 # fps used in EDL timecode generation
output_format = "fcpxml" # fcpxml | edl | both