# ============================================================================= # AI Trailer Generator v2 — Central Configuration # ============================================================================= # All tunable parameters, thresholds, and file paths are defined here. # NO hardcoded values are allowed in the Python source code. # ============================================================================= [project] name = "AI Trailer Generator v2" version = "2.0.0" log_level = "DEBUG" # DEBUG | INFO | WARNING | ERROR # ----------------------------------------------------------------------------- # [paths] — External video sources (read-only access) # ----------------------------------------------------------------------------- [paths] source_movie = "B:/Proxy/BehindTheRedDoor_FTR_1080P_2398_Fixed.mp4" reference_trailer = "F:/Encodings/BehindTheRedDoor_Trailer_REFERENCE.mp4" # Output destinations (inside project sandbox) output_dir = "output" cache_dir = ".cache" proxy_dir = "proxy" # ----------------------------------------------------------------------------- # [video] — Decode / proxy settings # ----------------------------------------------------------------------------- [video] # Target FPS for internal frame extraction (0 = use source FPS) extract_fps = 1.0 # Proxy resolution for template matching (width x height) proxy_width = 640 proxy_height = 360 # ----------------------------------------------------------------------------- # [cv] — Computer Vision engine parameters # Phase 1 — "Vibe Check" (histogram / perceptual hash scene-level filter) # Phase 2 — "Deep Scan" (template matching frame-level precision) # ----------------------------------------------------------------------------- [cv] [cv.vibe_check] # Number of top candidate scenes to forward to Deep Scan top_k_candidates = 100 # Histogram comparison method: # CORREL=0 | CHISQR=1 | INTERSECT=2 | BHATTACHARYYA=3 hist_compare_method = 0 # Histogram bins per channel (hue, saturation) hist_bins_hue = 50 hist_bins_saturation = 60 # pHash similarity threshold (lower = stricter; 0–64 range) # NOTE: 12 is for near-duplicate detection. Cross-video matching # (trailer vs source movie with different grading/compression) # needs 25–35. Start at 32 and tighten if you get false positives. phash_max_distance = 32 # ---- Text-Safe Crop ------------------------------------------------------- # Fraction of frame height to EXCLUDE from the top (e.g. logos, title cards) crop_top_fraction = 0.15 # Fraction of frame height to EXCLUDE from the bottom (e.g. letterbox, subs) crop_bottom_fraction = 0.30 [cv.deep_scan] # Step size in SECONDS between sampled frames during the coarse scan pass coarse_step_seconds = 0.5 # Minimum template match score (0.0–1.0) to accept a candidate as a hit match_threshold = 0.65 # Store/report lower-confidence automatic candidates for visual review instead # of dropping them as "NO MATCH". Confirmed exports can still use match_threshold. provisional_match_threshold = 0.35 # Lower gate for entering temporal multi-frame refinement. The final decision # still uses sequence/span scoring; this only avoids rejecting real matches # because one midpoint frame is weak. coarse_candidate_threshold = 0.40 # Candidate ranking weights. Duration coverage matters when the same visual # shot appears multiple times: prefer the occurrence that can cover the beat. sequence_score_weight = 0.55 span_score_weight = 0.15 coarse_score_weight = 0.10 duration_score_weight = 0.20 duration_tie_break_score_delta = 0.03 min_duration_coverage = 0.55 continuity_seed_offsets_s = [-1.0, 0.0, 0.5, 1.0, 1.5, 2.0, 3.0] scene_seed_top_k = 30 scene_seed_points_per_scene = 6 content_rerank_candidate_count = 100 skip_coarse_scan_with_weighted_seeds = false # cv2.matchTemplate method: # TM_CCOEFF_NORMED=5 (recommended), TM_CCORR_NORMED=3 match_method = 5 # If a coarse hit is found, refine by scanning ± this many seconds refine_window_seconds = 0.6 refine_step_seconds = 0.04 # ≈ 1 frame at 25 fps content_align_window_seconds = 0.48 content_align_sample_step_s = 0.28 content_validation_weight = 0.35 provisional_content_threshold = 0.30 # When several adjacent frame offsets score almost the same, prefer the earlier # one. This avoids matches that are visually correct but start a few frames late. start_tie_break_score_delta = 0.015 start_preroll_frames = 0 # Automatic temporal verification after a coarse image hit. # More candidates reduces false positives from visually similar shots. sequence_candidate_count = 240 sequence_min_distance_s = 1.0 max_refine_candidates = 6 # Match-span detection: trim when the source starts drifting into a different shot. span_sample_step_s = 0.08 trim_tail_frames = 4 # If a refined in-point lands this close to a detected scene end, treat it as # the next scene. Scene detectors often place cuts a frame or two around the # visible boundary. scene_boundary_epsilon_s = 0.12 scoreable_luma_mean_min = 24.0 scoreable_luma_p90_min = 58.0 scoreable_contrast_min = 24.0 # ----------------------------------------------------------------------------- # [scene_detection] — PySceneDetect parameters (used to segment source movie) # ----------------------------------------------------------------------------- [scene_detection] # Threshold for ContentDetector (lower = more sensitive) content_threshold = 27.0 # Minimum scene duration in seconds min_scene_duration_s = 1.5 # ----------------------------------------------------------------------------- # [whisper] — Dialogue / audio analysis # ----------------------------------------------------------------------------- [whisper] model = "large-v3" language = "ar" device = "cuda" # cuda | cpu compute_type = "float16" # float16 | int8 | float32 # ----------------------------------------------------------------------------- # [llm] — Used ONLY for thematic segmentation / dramaturgy # ----------------------------------------------------------------------------- [llm] provider = "openrouter" base_url = "https://openrouter.ai/api/v1" model = "google/gemma-4-31b-it" timeout_seconds = 120 temperature = 0.3 max_tokens = 4096 # ----------------------------------------------------------------------------- # [vision] — Optional cached visual descriptions for ambiguous matching # ----------------------------------------------------------------------------- [vision] # Disabled by default to avoid surprise API cost. Enable when you want the # matcher to ask a vision-capable model for cached 3-frame scene descriptions. enabled = false provider = "openrouter" base_url = "https://openrouter.ai/api/v1" model = "google/gemma-4-31b-it" timeout_seconds = 90 temperature = 0.0 max_tokens = 350 # Cost controls: per beat, only the top scene-level candidates are described, # and cached descriptions in .cache/vision_descriptions.json are reused. scene_candidate_top_k = 48 max_new_descriptions_per_run = 24 max_seed_scenes = 8 seed_points_per_scene = 12 seed_score = 0.88 max_refine_candidates = 12 local_scan_step_s = 0.12 local_scan_max_points_per_scene = 180 local_scan_top_candidates = 36 local_scan_tie_break_score_delta = 0.08 multi_shot_cut_corr_threshold = 0.55 multi_shot_boundary_tolerance_s = 0.20 fullscan_fallback = false content_threshold = 0.22 similarity_threshold = 0.18 # ----------------------------------------------------------------------------- # [export] — FCPXML / EDL export settings # ----------------------------------------------------------------------------- [export] fcpxml_version = "1.10" edl_frame_rate = 23.976 # fps used in EDL timecode generation output_format = "fcpxml" # fcpxml | edl | both