Improve segmented vision matching quality

This commit is contained in:
Melbar
2026-05-02 13:49:16 +02:00
parent 884a0d4232
commit e6bd0faf03
4 changed files with 114 additions and 22 deletions
+50 -7
View File
@@ -13,6 +13,7 @@ import base64
import json
import logging
import re
import time
import urllib.error
import urllib.request
from dataclasses import asdict
@@ -38,6 +39,17 @@ Return only compact JSON with these keys:
subject, setting, composition, action_phase, distinctive_objects, lighting_color, negatives.
Focus on stable visual facts and spatial layout. Ignore timecode overlays, subtitles, logos, compression, aspect ratio, and color grading differences."""
_RETRYABLE_HTTP_CODES = {408, 409, 425, 429, 500, 502, 503, 504}
_CREDIT_ERROR_PATTERNS = (
"insufficient credit",
"insufficient credits",
"no credits",
"out of credits",
"billing",
"quota exceeded",
"payment required",
)
def _cache_path(cfg: AppConfig) -> Path:
return cfg.paths.cache_dir / "vision_descriptions.json"
@@ -133,13 +145,44 @@ def _call_vision_model(label: str, image_urls: list[str], cfg: AppConfig) -> str
url = f"{vision.base_url.rstrip('/')}/chat/completions"
req = urllib.request.Request(url, data=body, headers=headers, method="POST")
try:
with urllib.request.urlopen(req, timeout=vision.timeout_seconds) as resp:
data = json.loads(resp.read().decode("utf-8"))
return str(data["choices"][0]["message"]["content"]).strip()
except urllib.error.HTTPError as exc:
body_text = exc.read().decode(errors="replace")
raise RuntimeError(f"Vision HTTP {exc.code} from {url}:\n{body_text}") from exc
delays_s = (8.0, 20.0, 45.0, 90.0)
for attempt in range(len(delays_s) + 1):
try:
with urllib.request.urlopen(req, timeout=vision.timeout_seconds) as resp:
data = json.loads(resp.read().decode("utf-8"))
return str(data["choices"][0]["message"]["content"]).strip()
except urllib.error.HTTPError as exc:
body_text = exc.read().decode(errors="replace")
lowered = body_text.lower()
if exc.code == 402 or any(pattern in lowered for pattern in _CREDIT_ERROR_PATTERNS):
raise RuntimeError(f"Vision HTTP {exc.code} from {url}:\n{body_text}") from exc
if exc.code not in _RETRYABLE_HTTP_CODES or attempt >= len(delays_s):
raise RuntimeError(f"Vision HTTP {exc.code} from {url}:\n{body_text}") from exc
delay_s = delays_s[attempt]
logger.warning(
"Vision HTTP %d for %s; waiting %.0fs before retry %d/%d.",
exc.code,
label,
delay_s,
attempt + 1,
len(delays_s),
)
time.sleep(delay_s)
except urllib.error.URLError as exc:
if attempt >= len(delays_s):
raise RuntimeError(f"Vision request failed for {url}: {exc}") from exc
delay_s = delays_s[attempt]
logger.warning(
"Vision request failed for %s (%s); waiting %.0fs before retry %d/%d.",
label,
exc.reason,
delay_s,
attempt + 1,
len(delays_s),
)
time.sleep(delay_s)
raise RuntimeError(f"Vision request failed unexpectedly for {url}")
def _description_key(kind: str, item_id: int, start_s: float, end_s: float, cfg: AppConfig) -> str: