Fix forehead_touch action group + always-fresh cutter assets
1. Action-group classifier conflated object-touches and person-touches. "man touches the red door with a small object" was being tagged as forehead_touch because "touch" was in the forehead_touch needles set. That made the realign pass yank Beat 16 from scene 451 (correct: man painting red door, IV stand) over to scene 623 (woman/man in bed) — a totally wrong shot at score 0.344. Fix: removed generic "touch*" verbs from forehead_touch's needle set. forehead_touch is now added in _semantic_action_groups() only when a touch verb is paired with an explicit body-part target (forehead, face, cheek, head, hand, ...) and not paired with an object target (door, handle, brush, tool, lock, ...). Effect on Beat 16 after `match --beat 16 --vision`: scene 623 in=5476.28 score=0.344 -> scene 451 in=3912.48 score=0.626. 2. Cutter-report stills/clips were keyed by source-video mtime, so a match-position change without a video change served stale frames from the previous match. Dropped the mtime cache; both extractors now render fresh every time. Slower (~minute per full regen) but correct. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
|
Before Width: | Height: | Size: 1.3 KiB After Width: | Height: | Size: 2.0 KiB |
|
Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 8.4 KiB |
|
Before Width: | Height: | Size: 5.8 KiB After Width: | Height: | Size: 9.0 KiB |
|
Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 8.9 KiB |
|
Before Width: | Height: | Size: 6.0 KiB After Width: | Height: | Size: 10 KiB |
|
Before Width: | Height: | Size: 4.9 KiB After Width: | Height: | Size: 8.2 KiB |
|
Before Width: | Height: | Size: 5.8 KiB After Width: | Height: | Size: 9.9 KiB |
|
Before Width: | Height: | Size: 6.2 KiB After Width: | Height: | Size: 10 KiB |
|
Before Width: | Height: | Size: 2.3 KiB After Width: | Height: | Size: 4.1 KiB |
|
Before Width: | Height: | Size: 4.0 KiB After Width: | Height: | Size: 6.5 KiB |
|
Before Width: | Height: | Size: 5.0 KiB After Width: | Height: | Size: 8.3 KiB |
|
Before Width: | Height: | Size: 5.3 KiB After Width: | Height: | Size: 8.6 KiB |
|
Before Width: | Height: | Size: 7.4 KiB After Width: | Height: | Size: 13 KiB |
|
Before Width: | Height: | Size: 6.6 KiB After Width: | Height: | Size: 11 KiB |
|
Before Width: | Height: | Size: 2.5 KiB After Width: | Height: | Size: 4.4 KiB |
|
Before Width: | Height: | Size: 3.3 KiB After Width: | Height: | Size: 5.7 KiB |
|
Before Width: | Height: | Size: 6.0 KiB After Width: | Height: | Size: 10 KiB |
|
Before Width: | Height: | Size: 6.1 KiB After Width: | Height: | Size: 10 KiB |
|
Before Width: | Height: | Size: 6.4 KiB After Width: | Height: | Size: 11 KiB |
|
Before Width: | Height: | Size: 6.6 KiB After Width: | Height: | Size: 11 KiB |
|
Before Width: | Height: | Size: 6.5 KiB After Width: | Height: | Size: 11 KiB |
|
Before Width: | Height: | Size: 6.6 KiB After Width: | Height: | Size: 11 KiB |
|
Before Width: | Height: | Size: 5.2 KiB After Width: | Height: | Size: 9.1 KiB |
|
Before Width: | Height: | Size: 4.7 KiB After Width: | Height: | Size: 8.0 KiB |
|
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 25 KiB |
|
Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 21 KiB |
|
Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 20 KiB |
|
Before Width: | Height: | Size: 10 KiB After Width: | Height: | Size: 17 KiB |
|
Before Width: | Height: | Size: 4.6 KiB After Width: | Height: | Size: 20 KiB |
|
Before Width: | Height: | Size: 6.4 KiB After Width: | Height: | Size: 10 KiB |
|
Before Width: | Height: | Size: 9.4 KiB After Width: | Height: | Size: 16 KiB |
|
Before Width: | Height: | Size: 8.4 KiB After Width: | Height: | Size: 14 KiB |
|
Before Width: | Height: | Size: 6.3 KiB After Width: | Height: | Size: 10 KiB |
|
Before Width: | Height: | Size: 4.7 KiB After Width: | Height: | Size: 7.9 KiB |
|
Before Width: | Height: | Size: 5.8 KiB After Width: | Height: | Size: 9.5 KiB |
|
Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 9.2 KiB |
|
Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 9.1 KiB |
|
Before Width: | Height: | Size: 6.8 KiB After Width: | Height: | Size: 12 KiB |
|
Before Width: | Height: | Size: 5.6 KiB After Width: | Height: | Size: 9.5 KiB |
|
Before Width: | Height: | Size: 4.0 KiB After Width: | Height: | Size: 6.7 KiB |
|
Before Width: | Height: | Size: 5.6 KiB After Width: | Height: | Size: 11 KiB |
|
Before Width: | Height: | Size: 5.6 KiB After Width: | Height: | Size: 9.7 KiB |
|
Before Width: | Height: | Size: 7.1 KiB After Width: | Height: | Size: 12 KiB |
|
Before Width: | Height: | Size: 3.8 KiB After Width: | Height: | Size: 6.1 KiB |
@@ -19,7 +19,7 @@ video { width: 100%; border-radius: 6px; box-shadow: 0 4px 6px rgba(0,0,0,0.5);
|
|||||||
.code-hint { background: #000; padding: 10px; border-radius: 4px; font-family: monospace; font-size: 0.9em; margin-top: 15px; color: #a3e635; }
|
.code-hint { background: #000; padding: 10px; border-radius: 4px; font-family: monospace; font-size: 0.9em; margin-top: 15px; color: #a3e635; }
|
||||||
</style></head><body>
|
</style></head><body>
|
||||||
<h1>AI Trailer Generator — Match Report</h1>
|
<h1>AI Trailer Generator — Match Report</h1>
|
||||||
<div class='stats'>Total Beats: 25 | Matched: 19</div>
|
<div class='stats'>Total Beats: 25 | Matched: 20</div>
|
||||||
<script>
|
<script>
|
||||||
function syncBeat(row) {
|
function syncBeat(row) {
|
||||||
const vids = row.querySelectorAll('video');
|
const vids = row.querySelectorAll('video');
|
||||||
@@ -348,14 +348,18 @@ document.addEventListener('DOMContentLoaded', () => document.querySelectorAll('.
|
|||||||
<h3>Beat 016</h3>
|
<h3>Beat 016</h3>
|
||||||
<p><b>Type:</b> UNKNOWN</p>
|
<p><b>Type:</b> UNKNOWN</p>
|
||||||
<p><b>Trailer:</b> 61.48s → 64.48s</p>
|
<p><b>Trailer:</b> 61.48s → 64.48s</p>
|
||||||
<p class='status-miss'>NO MATCH</p>
|
<p style='color: #fbbf24; font-weight: bold; font-size: 1.1em;'>PROVISIONAL MATCH</p>
|
||||||
|
<p><b>Scene ID:</b> 451</p>
|
||||||
|
<p><b>Movie In:</b> 3912.48s</p>
|
||||||
|
<p><b>Source Dur:</b> 2.80s</p>
|
||||||
|
<p><b>Unmatched Tail:</b> 0.12s placeholder</p>
|
||||||
|
<p><b>Score:</b> <span class='score'>0.626</span></p>
|
||||||
|
<p style='color: #fbbf24; font-size: 0.9em;'>Some trailer frames are still unmatched; report fills only those gaps with placeholder black.</p>
|
||||||
|
<p style='color: #fbbf24; font-size: 0.9em;'>⚠️ Score below 0.80. Verify visually.</p>
|
||||||
<div class='code-hint'>python cli.py rematch --beat 16</div>
|
<div class='code-hint'>python cli.py rematch --beat 16</div>
|
||||||
</div>
|
</div>
|
||||||
<div class='videos'>
|
<div class='videos'>
|
||||||
<div class='video-container'>
|
<div class='compare'><p>Frame-Locked Compare</p><video src='beat_016_compare.mp4' controls loop muted autoplay></video></div>
|
||||||
<div class='video-col'><p>Reference Trailer</p><video src='beat_016_ref.mp4' controls loop muted autoplay></video></div>
|
|
||||||
<div class='video-col'><p>Matched Source</p><div style='width: 100%; aspect-ratio: 16/9; background: #222; display: flex; align-items: center; justify-content: center; border-radius: 6px; color: #555;'>No Match</div></div>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class='beat-row'>
|
<div class='beat-row'>
|
||||||
|
|||||||
@@ -136,18 +136,12 @@ CLIP_WIDTH = 480
|
|||||||
CLIP_MAX_DURATION_S = 30.0
|
CLIP_MAX_DURATION_S = 30.0
|
||||||
|
|
||||||
|
|
||||||
def _stale(out: Path, src: Path) -> bool:
|
|
||||||
try:
|
|
||||||
return not (out.exists() and out.stat().st_mtime >= src.stat().st_mtime and out.stat().st_size > 0)
|
|
||||||
except OSError:
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def extract_still(video_path: Path, t_s: float, out: Path) -> bool:
|
def extract_still(video_path: Path, t_s: float, out: Path) -> bool:
|
||||||
|
"""Always render fresh. The match position can change without the source
|
||||||
|
video changing, so a mtime-based cache would silently serve stale frames
|
||||||
|
from the previous match. The cutter expects bit-current previews."""
|
||||||
if not video_path.exists():
|
if not video_path.exists():
|
||||||
return False
|
return False
|
||||||
if not _stale(out, video_path):
|
|
||||||
return True
|
|
||||||
out.parent.mkdir(parents=True, exist_ok=True)
|
out.parent.mkdir(parents=True, exist_ok=True)
|
||||||
cmd = [
|
cmd = [
|
||||||
"ffmpeg", "-y", "-loglevel", "error",
|
"ffmpeg", "-y", "-loglevel", "error",
|
||||||
@@ -166,10 +160,9 @@ def extract_still(video_path: Path, t_s: float, out: Path) -> bool:
|
|||||||
|
|
||||||
|
|
||||||
def extract_clip(video_path: Path, start_s: float, duration_s: float, out: Path) -> bool:
|
def extract_clip(video_path: Path, start_s: float, duration_s: float, out: Path) -> bool:
|
||||||
|
"""Always render fresh — see extract_still for rationale."""
|
||||||
if not video_path.exists():
|
if not video_path.exists():
|
||||||
return False
|
return False
|
||||||
if not _stale(out, video_path):
|
|
||||||
return True
|
|
||||||
out.parent.mkdir(parents=True, exist_ok=True)
|
out.parent.mkdir(parents=True, exist_ok=True)
|
||||||
cmd = [
|
cmd = [
|
||||||
"ffmpeg", "-y", "-loglevel", "error",
|
"ffmpeg", "-y", "-loglevel", "error",
|
||||||
|
|||||||
@@ -53,7 +53,11 @@ _CREDIT_ERROR_PATTERNS = (
|
|||||||
|
|
||||||
_ACTION_GROUPS = {
|
_ACTION_GROUPS = {
|
||||||
"kiss": {"kiss", "kisses", "kissing", "kissed"},
|
"kiss": {"kiss", "kisses", "kissing", "kissed"},
|
||||||
"forehead_touch": {"forehead", "foreheads", "touch", "touches", "touching", "touched"},
|
# "touch" is intentionally NOT in forehead_touch — a generic "touch" can
|
||||||
|
# mean "touches the door / handle / brush" which is unrelated to person
|
||||||
|
# contact. forehead_touch is added in _semantic_action_groups() only
|
||||||
|
# when an explicit body-part target is present.
|
||||||
|
"forehead_touch": {"forehead", "foreheads"},
|
||||||
"approach": {"approach", "approaches", "approaching", "closer", "lean", "leans", "leaning"},
|
"approach": {"approach", "approaches", "approaching", "closer", "lean", "leans", "leaning"},
|
||||||
"talk": {"talk", "talking", "speak", "speaking", "conversation", "conversing"},
|
"talk": {"talk", "talking", "speak", "speaking", "conversation", "conversing"},
|
||||||
"hand": {"hand", "hands", "holding", "holds", "raise", "raises", "raising", "lift", "lifting"},
|
"hand": {"hand", "hands", "holding", "holds", "raise", "raises", "raising", "lift", "lifting"},
|
||||||
@@ -61,6 +65,21 @@ _ACTION_GROUPS = {
|
|||||||
"look_down": {"down", "lowering", "lowers"},
|
"look_down": {"down", "lowering", "lowers"},
|
||||||
"turn": {"turn", "turns", "turning"},
|
"turn": {"turn", "turns", "turning"},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Words that, when paired with "touch"-family verbs, signal an object touch
|
||||||
|
# (door, handle, brush, tool, ...) rather than a person-on-person touch.
|
||||||
|
_OBJECT_TOUCH_TARGETS = {
|
||||||
|
"door", "doors", "handle", "knob", "lock", "mechanism", "brush", "tool",
|
||||||
|
"pole", "stand", "rail", "button", "switch", "wall", "surface", "object",
|
||||||
|
"knife", "blade", "weapon", "phone", "glass", "bottle", "cup",
|
||||||
|
}
|
||||||
|
# Words that, when paired with "touch", signal a person-on-person touch
|
||||||
|
# (forehead/face/skin/...). These keep forehead_touch as a strong action.
|
||||||
|
_PERSON_TOUCH_TARGETS = {
|
||||||
|
"forehead", "foreheads", "face", "faces", "cheek", "cheeks",
|
||||||
|
"head", "skin", "lips", "lip", "neck", "shoulder", "shoulders",
|
||||||
|
"arm", "arms", "chest", "hand", "hands", "hair", "body",
|
||||||
|
}
|
||||||
_STRONG_ACTION_GROUPS = {"kiss", "forehead_touch", "approach", "hand", "cutting"}
|
_STRONG_ACTION_GROUPS = {"kiss", "forehead_touch", "approach", "hand", "cutting"}
|
||||||
|
|
||||||
|
|
||||||
@@ -285,6 +304,17 @@ def _semantic_action_groups(text: str) -> set[str]:
|
|||||||
for name, needles in _ACTION_GROUPS.items()
|
for name, needles in _ACTION_GROUPS.items()
|
||||||
if terms & needles
|
if terms & needles
|
||||||
}
|
}
|
||||||
|
# Distinguish person-on-person touches from object touches. "touches the
|
||||||
|
# red door" must NOT count as forehead_touch; "touches her forehead"
|
||||||
|
# must. We look at the action_phase first (most specific), fall back to
|
||||||
|
# the full description.
|
||||||
|
phase = _action_phase_text(text)
|
||||||
|
touch_present = any(w in phase for w in ("touch", "touches", "touching", "touched"))
|
||||||
|
if touch_present:
|
||||||
|
person_target = any(w in phase for w in _PERSON_TOUCH_TARGETS)
|
||||||
|
object_target = any(w in phase for w in _OBJECT_TOUCH_TARGETS)
|
||||||
|
if person_target and not object_target:
|
||||||
|
groups.add("forehead_touch")
|
||||||
if "moving closer" in lowered or "move closer" in lowered:
|
if "moving closer" in lowered or "move closer" in lowered:
|
||||||
groups.add("approach")
|
groups.add("approach")
|
||||||
if "face-to-face" in lowered or "faces facing" in lowered:
|
if "face-to-face" in lowered or "faces facing" in lowered:
|
||||||
|
|||||||