Initial project import

2026-05-02 09:07:41 +02:00
commit 8e1bcf142f
38 changed files with 7928 additions and 0 deletions
@@ -0,0 +1,198 @@
+# =============================================================================
+# AI Trailer Generator v2 — Central Configuration
+# =============================================================================
+# All tunable parameters, thresholds, and file paths are defined here.
+# NO hardcoded values are allowed in the Python source code.
+# =============================================================================
+
+[project]
+name        = "AI Trailer Generator v2"
+version     = "2.0.0"
+log_level   = "INFO"   # DEBUG | INFO | WARNING | ERROR
+
+# -----------------------------------------------------------------------------
+# [paths] — External video sources (read-only access)
+# -----------------------------------------------------------------------------
+[paths]
+source_movie       = "B:/Proxy/BehindTheRedDoor_FTR_1080P_2398_Fixed.mp4"
+reference_trailer  = "F:/Encodings/BehindTheRedDoor_Trailer_REFERENCE.mp4"
+
+# Output destinations (inside project sandbox)
+output_dir         = "output"
+cache_dir          = ".cache"
+proxy_dir          = "proxy"
+
+# -----------------------------------------------------------------------------
+# [video] — Decode / proxy settings
+# -----------------------------------------------------------------------------
+[video]
+# Target FPS for internal frame extraction (0 = use source FPS)
+extract_fps         = 1.0
+# Proxy resolution for template matching (width x height)
+proxy_width         = 640
+proxy_height        = 360
+
+# -----------------------------------------------------------------------------
+# [cv] — Computer Vision engine parameters
+# Phase 1 — "Vibe Check" (histogram / perceptual hash scene-level filter)
+# Phase 2 — "Deep Scan"  (template matching frame-level precision)
+# -----------------------------------------------------------------------------
+[cv]
+
+[cv.vibe_check]
+# Number of top candidate scenes to forward to Deep Scan
+top_k_candidates      = 100
+
+# Histogram comparison method:
+# CORREL=0 | CHISQR=1 | INTERSECT=2 | BHATTACHARYYA=3
+hist_compare_method   = 0
+
+# Histogram bins per channel (hue, saturation)
+hist_bins_hue         = 50
+hist_bins_saturation  = 60
+
+# pHash similarity threshold (lower = stricter; 0–64 range)
+# NOTE: 12 is for near-duplicate detection. Cross-video matching
+#       (trailer vs source movie with different grading/compression)
+#       needs 25–35. Start at 32 and tighten if you get false positives.
+phash_max_distance    = 32
+
+# ---- Text-Safe Crop -------------------------------------------------------
+# Fraction of frame height to EXCLUDE from the top (e.g. logos, title cards)
+crop_top_fraction    = 0.15
+# Fraction of frame height to EXCLUDE from the bottom (e.g. letterbox, subs)
+crop_bottom_fraction = 0.30
+
+[cv.deep_scan]
+# Step size in SECONDS between sampled frames during the coarse scan pass
+coarse_step_seconds   = 0.5
+
+# Minimum template match score (0.0–1.0) to accept a candidate as a hit
+match_threshold       = 0.65
+
+# Store/report lower-confidence automatic candidates for visual review instead
+# of dropping them as "NO MATCH". Confirmed exports can still use match_threshold.
+provisional_match_threshold = 0.45
+
+# Lower gate for entering temporal multi-frame refinement. The final decision
+# still uses sequence/span scoring; this only avoids rejecting real matches
+# because one midpoint frame is weak.
+coarse_candidate_threshold = 0.50
+
+# Candidate ranking weights. Duration coverage matters when the same visual
+# shot appears multiple times: prefer the occurrence that can cover the beat.
+sequence_score_weight = 0.55
+span_score_weight     = 0.15
+coarse_score_weight   = 0.10
+duration_score_weight = 0.20
+duration_tie_break_score_delta = 0.03
+min_duration_coverage = 0.65
+continuity_seed_offsets_s = [-1.0, 0.0, 0.5, 1.0, 1.5, 2.0, 3.0]
+scene_seed_top_k = 30
+scene_seed_points_per_scene = 6
+content_rerank_candidate_count = 100
+skip_coarse_scan_with_weighted_seeds = false
+
+# cv2.matchTemplate method:
+# TM_CCOEFF_NORMED=5 (recommended), TM_CCORR_NORMED=3
+match_method          = 5
+
+# If a coarse hit is found, refine by scanning ± this many seconds
+refine_window_seconds = 0.6
+refine_step_seconds   = 0.04  # ≈ 1 frame at 25 fps
+content_align_window_seconds = 0.48
+content_align_sample_step_s  = 0.28
+content_validation_weight    = 0.35
+provisional_content_threshold = 0.42
+
+# When several adjacent frame offsets score almost the same, prefer the earlier
+# one. This avoids matches that are visually correct but start a few frames late.
+start_tie_break_score_delta = 0.015
+start_preroll_frames        = 0
+
+# Automatic temporal verification after a coarse image hit.
+# More candidates reduces false positives from visually similar shots.
+sequence_candidate_count = 240
+sequence_min_distance_s  = 1.0
+max_refine_candidates    = 6
+
+# Match-span detection: trim when the source starts drifting into a different shot.
+span_sample_step_s       = 0.08
+trim_tail_frames         = 4
+
+# If a refined in-point lands this close to a detected scene end, treat it as
+# the next scene. Scene detectors often place cuts a frame or two around the
+# visible boundary.
+scene_boundary_epsilon_s = 0.12
+scoreable_luma_mean_min = 24.0
+scoreable_luma_p90_min  = 58.0
+scoreable_contrast_min  = 24.0
+
+# -----------------------------------------------------------------------------
+# [scene_detection] — PySceneDetect parameters (used to segment source movie)
+# -----------------------------------------------------------------------------
+[scene_detection]
+# Threshold for ContentDetector (lower = more sensitive)
+content_threshold     = 27.0
+# Minimum scene duration in seconds
+min_scene_duration_s  = 1.5
+
+# -----------------------------------------------------------------------------
+# [whisper] — Dialogue / audio analysis
+# -----------------------------------------------------------------------------
+[whisper]
+model              = "large-v3"
+language           = "ar"
+device             = "cuda"        # cuda | cpu
+compute_type       = "float16"     # float16 | int8 | float32
+
+# -----------------------------------------------------------------------------
+# [llm] — Used ONLY for thematic segmentation / dramaturgy
+# -----------------------------------------------------------------------------
+[llm]
+provider           = "openrouter"
+base_url           = "https://openrouter.ai/api/v1"
+model              = "google/gemma-4-31b-it"
+timeout_seconds    = 120
+temperature        = 0.3
+max_tokens         = 4096
+
+# -----------------------------------------------------------------------------
+# [vision] — Optional cached visual descriptions for ambiguous matching
+# -----------------------------------------------------------------------------
+[vision]
+# Disabled by default to avoid surprise API cost. Enable when you want the
+# matcher to ask a vision-capable model for cached 3-frame scene descriptions.
+enabled            = false
+provider           = "openrouter"
+base_url           = "https://openrouter.ai/api/v1"
+model              = "google/gemma-4-31b-it"
+timeout_seconds    = 90
+temperature        = 0.0
+max_tokens         = 350
+
+# Cost controls: per beat, only the top scene-level candidates are described,
+# and cached descriptions in .cache/vision_descriptions.json are reused.
+scene_candidate_top_k       = 8
+max_new_descriptions_per_run = 12
+max_seed_scenes             = 3
+seed_points_per_scene       = 12
+seed_score                  = 0.88
+max_refine_candidates       = 6
+local_scan_step_s           = 0.12
+local_scan_max_points_per_scene = 180
+local_scan_top_candidates   = 18
+local_scan_tie_break_score_delta = 0.08
+multi_shot_cut_corr_threshold = 0.20
+multi_shot_boundary_tolerance_s = 0.20
+fullscan_fallback           = false
+content_threshold           = 0.22
+similarity_threshold        = 0.18
+
+# -----------------------------------------------------------------------------
+# [export] — FCPXML / EDL export settings
+# -----------------------------------------------------------------------------
+[export]
+fcpxml_version     = "1.10"
+edl_frame_rate     = 23.976        # fps used in EDL timecode generation
+output_format      = "fcpxml"      # fcpxml | edl | both