Initial project import

This commit is contained in:
Melbar
2026-05-02 09:07:41 +02:00
commit 8e1bcf142f
38 changed files with 7928 additions and 0 deletions
+198
View File
@@ -0,0 +1,198 @@
# =============================================================================
# AI Trailer Generator v2 — Central Configuration
# =============================================================================
# All tunable parameters, thresholds, and file paths are defined here.
# NO hardcoded values are allowed in the Python source code.
# =============================================================================
[project]
name = "AI Trailer Generator v2"
version = "2.0.0"
log_level = "INFO" # DEBUG | INFO | WARNING | ERROR
# -----------------------------------------------------------------------------
# [paths] — External video sources (read-only access)
# -----------------------------------------------------------------------------
[paths]
source_movie = "B:/Proxy/BehindTheRedDoor_FTR_1080P_2398_Fixed.mp4"
reference_trailer = "F:/Encodings/BehindTheRedDoor_Trailer_REFERENCE.mp4"
# Output destinations (inside project sandbox)
output_dir = "output"
cache_dir = ".cache"
proxy_dir = "proxy"
# -----------------------------------------------------------------------------
# [video] — Decode / proxy settings
# -----------------------------------------------------------------------------
[video]
# Target FPS for internal frame extraction (0 = use source FPS)
extract_fps = 1.0
# Proxy resolution for template matching (width x height)
proxy_width = 640
proxy_height = 360
# -----------------------------------------------------------------------------
# [cv] — Computer Vision engine parameters
# Phase 1 — "Vibe Check" (histogram / perceptual hash scene-level filter)
# Phase 2 — "Deep Scan" (template matching frame-level precision)
# -----------------------------------------------------------------------------
[cv]
[cv.vibe_check]
# Number of top candidate scenes to forward to Deep Scan
top_k_candidates = 100
# Histogram comparison method:
# CORREL=0 | CHISQR=1 | INTERSECT=2 | BHATTACHARYYA=3
hist_compare_method = 0
# Histogram bins per channel (hue, saturation)
hist_bins_hue = 50
hist_bins_saturation = 60
# pHash similarity threshold (lower = stricter; 064 range)
# NOTE: 12 is for near-duplicate detection. Cross-video matching
# (trailer vs source movie with different grading/compression)
# needs 2535. Start at 32 and tighten if you get false positives.
phash_max_distance = 32
# ---- Text-Safe Crop -------------------------------------------------------
# Fraction of frame height to EXCLUDE from the top (e.g. logos, title cards)
crop_top_fraction = 0.15
# Fraction of frame height to EXCLUDE from the bottom (e.g. letterbox, subs)
crop_bottom_fraction = 0.30
[cv.deep_scan]
# Step size in SECONDS between sampled frames during the coarse scan pass
coarse_step_seconds = 0.5
# Minimum template match score (0.01.0) to accept a candidate as a hit
match_threshold = 0.65
# Store/report lower-confidence automatic candidates for visual review instead
# of dropping them as "NO MATCH". Confirmed exports can still use match_threshold.
provisional_match_threshold = 0.45
# Lower gate for entering temporal multi-frame refinement. The final decision
# still uses sequence/span scoring; this only avoids rejecting real matches
# because one midpoint frame is weak.
coarse_candidate_threshold = 0.50
# Candidate ranking weights. Duration coverage matters when the same visual
# shot appears multiple times: prefer the occurrence that can cover the beat.
sequence_score_weight = 0.55
span_score_weight = 0.15
coarse_score_weight = 0.10
duration_score_weight = 0.20
duration_tie_break_score_delta = 0.03
min_duration_coverage = 0.65
continuity_seed_offsets_s = [-1.0, 0.0, 0.5, 1.0, 1.5, 2.0, 3.0]
scene_seed_top_k = 30
scene_seed_points_per_scene = 6
content_rerank_candidate_count = 100
skip_coarse_scan_with_weighted_seeds = false
# cv2.matchTemplate method:
# TM_CCOEFF_NORMED=5 (recommended), TM_CCORR_NORMED=3
match_method = 5
# If a coarse hit is found, refine by scanning ± this many seconds
refine_window_seconds = 0.6
refine_step_seconds = 0.04 # ≈ 1 frame at 25 fps
content_align_window_seconds = 0.48
content_align_sample_step_s = 0.28
content_validation_weight = 0.35
provisional_content_threshold = 0.42
# When several adjacent frame offsets score almost the same, prefer the earlier
# one. This avoids matches that are visually correct but start a few frames late.
start_tie_break_score_delta = 0.015
start_preroll_frames = 0
# Automatic temporal verification after a coarse image hit.
# More candidates reduces false positives from visually similar shots.
sequence_candidate_count = 240
sequence_min_distance_s = 1.0
max_refine_candidates = 6
# Match-span detection: trim when the source starts drifting into a different shot.
span_sample_step_s = 0.08
trim_tail_frames = 4
# If a refined in-point lands this close to a detected scene end, treat it as
# the next scene. Scene detectors often place cuts a frame or two around the
# visible boundary.
scene_boundary_epsilon_s = 0.12
scoreable_luma_mean_min = 24.0
scoreable_luma_p90_min = 58.0
scoreable_contrast_min = 24.0
# -----------------------------------------------------------------------------
# [scene_detection] — PySceneDetect parameters (used to segment source movie)
# -----------------------------------------------------------------------------
[scene_detection]
# Threshold for ContentDetector (lower = more sensitive)
content_threshold = 27.0
# Minimum scene duration in seconds
min_scene_duration_s = 1.5
# -----------------------------------------------------------------------------
# [whisper] — Dialogue / audio analysis
# -----------------------------------------------------------------------------
[whisper]
model = "large-v3"
language = "ar"
device = "cuda" # cuda | cpu
compute_type = "float16" # float16 | int8 | float32
# -----------------------------------------------------------------------------
# [llm] — Used ONLY for thematic segmentation / dramaturgy
# -----------------------------------------------------------------------------
[llm]
provider = "openrouter"
base_url = "https://openrouter.ai/api/v1"
model = "google/gemma-4-31b-it"
timeout_seconds = 120
temperature = 0.3
max_tokens = 4096
# -----------------------------------------------------------------------------
# [vision] — Optional cached visual descriptions for ambiguous matching
# -----------------------------------------------------------------------------
[vision]
# Disabled by default to avoid surprise API cost. Enable when you want the
# matcher to ask a vision-capable model for cached 3-frame scene descriptions.
enabled = false
provider = "openrouter"
base_url = "https://openrouter.ai/api/v1"
model = "google/gemma-4-31b-it"
timeout_seconds = 90
temperature = 0.0
max_tokens = 350
# Cost controls: per beat, only the top scene-level candidates are described,
# and cached descriptions in .cache/vision_descriptions.json are reused.
scene_candidate_top_k = 8
max_new_descriptions_per_run = 12
max_seed_scenes = 3
seed_points_per_scene = 12
seed_score = 0.88
max_refine_candidates = 6
local_scan_step_s = 0.12
local_scan_max_points_per_scene = 180
local_scan_top_candidates = 18
local_scan_tie_break_score_delta = 0.08
multi_shot_cut_corr_threshold = 0.20
multi_shot_boundary_tolerance_s = 0.20
fullscan_fallback = false
content_threshold = 0.22
similarity_threshold = 0.18
# -----------------------------------------------------------------------------
# [export] — FCPXML / EDL export settings
# -----------------------------------------------------------------------------
[export]
fcpxml_version = "1.10"
edl_frame_rate = 23.976 # fps used in EDL timecode generation
output_format = "fcpxml" # fcpxml | edl | both