Kapitel 13/Tutorial.md aktualisiert
This commit is contained in:
@@ -1352,55 +1352,91 @@ source "$ENV_FILE"
|
|||||||
VOD_ID="${1:?VOD-ID muss übergeben werden}"
|
VOD_ID="${1:?VOD-ID muss übergeben werden}"
|
||||||
MODEL="${WHISPER_MODEL:-small}"
|
MODEL="${WHISPER_MODEL:-small}"
|
||||||
|
|
||||||
|
LOG_FILE="/srv/clipper/logs/$VOD_ID/subtitle.log"
|
||||||
|
mkdir -p "$(dirname "$LOG_FILE")"
|
||||||
|
exec >> "$LOG_FILE" 2>&1
|
||||||
|
|
||||||
echo "[INFO] Starte Untertitelung für VOD: $VOD_ID mit Modell: $MODEL"
|
echo "[INFO] Starte Untertitelung für VOD: $VOD_ID mit Modell: $MODEL"
|
||||||
|
|
||||||
# === Pfade setzen ===
|
# Pfade
|
||||||
VOD_OUT="/srv/clipper/out/$VOD_ID"
|
VOD_OUT="/srv/clipper/out/$VOD_ID"
|
||||||
CLIPS_ROOT="$VOD_OUT/clips"
|
CLIPS_ROOT="$VOD_OUT/clips"
|
||||||
WAV_DIR="/srv/clipper/temp/$VOD_ID/whisper_wav"
|
WAV_DIR="/srv/clipper/temp/$VOD_ID/whisper_wav"
|
||||||
LOG_FILE="/srv/clipper/logs/$VOD_ID/subtitle.log"
|
|
||||||
|
|
||||||
mkdir -p "$WAV_DIR"
|
mkdir -p "$WAV_DIR"
|
||||||
exec >> "$LOG_FILE" 2>&1
|
|
||||||
|
|
||||||
echo "[INFO] WAV-Verzeichnis: $WAV_DIR"
|
# Vorprüfung: Whisper installiert?
|
||||||
echo "[INFO] Clipverzeichnis: $CLIPS_ROOT"
|
if ! /srv/clipper/.venv/bin/python3 -c "import whisper" 2>/dev/null; then
|
||||||
|
echo "[FATAL] Python-Modul 'whisper' fehlt im venv." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
# === Whisper auf alle Clips anwenden ===
|
for clip_folder in "$CLIPS_ROOT"/*; do
|
||||||
find "$CLIPS_ROOT" -mindepth 1 -maxdepth 1 -type d | sort | while read -r CLIP_FOLDER; do
|
[ -d "$clip_folder" ] || continue
|
||||||
CLIP_PATH=$(find "$CLIP_FOLDER" -maxdepth 1 -type f -name "*.mp4" | head -n1)
|
clip_file="$(find "$clip_folder" -maxdepth 1 -name '*.mp4' | head -n1)"
|
||||||
|
[ -f "$clip_file" ] || { echo "[WARN] Kein Clip in $clip_folder – überspringe"; continue; }
|
||||||
|
|
||||||
if [[ ! -f "$CLIP_PATH" ]]; then
|
basename="$(basename "$clip_file" .mp4)"
|
||||||
echo "[WARN] Kein Clip gefunden in $CLIP_FOLDER – überspringe"
|
wav_file="$WAV_DIR/$basename.wav"
|
||||||
continue
|
srt_file="$clip_folder/$basename.srt"
|
||||||
fi
|
json_file="$clip_folder/$basename.json"
|
||||||
|
txt_file="$clip_folder/$basename.txt"
|
||||||
|
|
||||||
BASENAME="$(basename "$CLIP_PATH" .mp4)"
|
echo "[INFO] → Clip: $basename"
|
||||||
WAV_FILE="$WAV_DIR/$BASENAME.wav"
|
|
||||||
SRT_FILE="$CLIP_FOLDER/$BASENAME.srt"
|
|
||||||
JSON_FILE="$CLIP_FOLDER/$BASENAME.json"
|
|
||||||
TXT_FILE="$CLIP_FOLDER/$BASENAME.txt"
|
|
||||||
|
|
||||||
echo "[INFO] → Clip: $BASENAME"
|
if [ ! -f "$wav_file" ]; then
|
||||||
|
|
||||||
if [[ ! -f "$WAV_FILE" ]]; then
|
|
||||||
echo "[INFO] Extrahiere WAV..."
|
echo "[INFO] Extrahiere WAV..."
|
||||||
ffmpeg -hide_banner -loglevel error -y -i "$CLIP_PATH" -vn -ac 1 -ar 16000 -f wav "$WAV_FILE"
|
ffmpeg -hide_banner -loglevel error -y \
|
||||||
|
-i "$clip_file" -vn -ac 1 -ar 16000 "$wav_file"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "[INFO] Starte Whisper..."
|
echo "[INFO] Starte Whisper-Transkription für $basename..."
|
||||||
whisper "$WAV_FILE" \
|
|
||||||
--model "$MODEL" \
|
# Python-Block korrekt im venv ausführen
|
||||||
--output_format srt \
|
/srv/clipper/.venv/bin/python3 -u <<EOF >> "$LOG_FILE" 2>&1
|
||||||
--output_format json \
|
import os
|
||||||
--output_format txt \
|
import whisper
|
||||||
--output_dir "$CLIP_FOLDER" \
|
import json
|
||||||
--fp16 False
|
from pathlib import Path
|
||||||
|
import logging
|
||||||
|
|
||||||
|
print = lambda *a, **k: __builtins__.print(*a, flush=True, **k)
|
||||||
|
|
||||||
|
logger = logging.getLogger("subtitle_logger")
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
handler.setFormatter(logging.Formatter("[%(levelname)s] %(message)s"))
|
||||||
|
logger.addHandler(handler)
|
||||||
|
|
||||||
|
MODEL = os.environ.get("MODEL", "$MODEL")
|
||||||
|
wav_path = Path("$wav_file")
|
||||||
|
srt_path = Path("$srt_file")
|
||||||
|
json_path = Path("$json_file")
|
||||||
|
txt_path = Path("$txt_file")
|
||||||
|
|
||||||
|
logger.info(f"Modell laden: {MODEL}")
|
||||||
|
model = whisper.load_model(MODEL)
|
||||||
|
|
||||||
|
result = model.transcribe(str(wav_path), fp16=False)
|
||||||
|
|
||||||
|
logger.info("Schreibe SRT...")
|
||||||
|
with srt_path.open("w", encoding="utf-8") as f:
|
||||||
|
for seg in result["segments"]:
|
||||||
|
f.write(f"{seg['id']+1}\\n{seg['start']:.3f} --> {seg['end']:.3f}\\n{seg['text'].strip()}\\n\\n")
|
||||||
|
|
||||||
|
logger.info("Schreibe JSON...")
|
||||||
|
with json_path.open("w", encoding="utf-8") as f:
|
||||||
|
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
logger.info("Schreibe TXT...")
|
||||||
|
with txt_path.open("w", encoding="utf-8") as f:
|
||||||
|
f.write(result["text"].strip())
|
||||||
|
|
||||||
|
logger.info(f"[DONE] $basename abgeschlossen.")
|
||||||
|
EOF
|
||||||
|
|
||||||
echo "[DONE] Untertitel erstellt: $SRT_FILE, $JSON_FILE, $TXT_FILE"
|
|
||||||
done
|
done
|
||||||
|
|
||||||
echo "[DONE] Untertitel abgeschlossen."
|
echo "[DONE] Untertitelung abgeschlossen für VOD $VOD_ID."
|
||||||
```
|
```
|
||||||
Mit
|
Mit
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
Reference in New Issue
Block a user