diff --git a/Kapitel 13/Tutorial.md b/Kapitel 13/Tutorial.md index ce65ed0..8967841 100644 --- a/Kapitel 13/Tutorial.md +++ b/Kapitel 13/Tutorial.md @@ -25,7 +25,7 @@ Clips sind der beste Weg, lange Streams in kurze, teilbare Highlights zu verwand Wir beginnen mit einem frischen Debian‑12‑LXC in Proxmox, benennen ihn `clipper` und vergeben die im Abschnitt oben genannten Ressourcen. Danach bringen wir das System auf Stand und installieren die Grundwerkzeuge: ```bash apt update && apt upgrade -y -apt install -y curl unzip ffmpeg inotify-tools pv bc +apt install -y curl unzip ffmpeg inotify-tools pv bc git ``` Eine korrekte Systemzeit ist entscheidend, da Schnittmarken später auf exakten Sekunden basieren. Prüfe die Zeit mit: @@ -149,7 +149,7 @@ Erzeuge und fülle eine virtuelle Umgebung für die spätere Analyse: python3 -m venv /srv/clipper/.venv source /srv/clipper/.venv/bin/activate pip install --upgrade pip -pip install librosa soundfile numpy scipy +pip install librosa soundfile numpy scipy git+https://github.com/openai/whisper.git deactivate ``` Wechsle für die nächsten Schritte **im Benutzer clipper** weiter. @@ -1186,15 +1186,33 @@ Lege mit ```nano /bin/clipper-cut-vod ``` die benötigte Datei a ```bash #!/bin/bash +set -euo pipefail VOD_ID="$1" -# Abschnitt 1: Namensmodus erkennen +echo "[START] Starte Clip-Schnitt für VOD: $VOD_ID" +# Pfade ETC_DIR="/srv/clipper/etc" +TMP_DIR="/srv/clipper/temp/$VOD_ID" +OUT_DIR="/srv/clipper/out/$VOD_ID" +VOD_PATH="$OUT_DIR/original/$VOD_ID.mp4" +CANDIDATES_JSON="$TMP_DIR/candidates.json" +USED_PATH="$ETC_DIR/used.json" +LOG_DIR="/srv/clipper/logs/$VOD_ID" +LOG_FILE="$LOG_DIR/cut-clips.log" +CLIPS_DIR="$OUT_DIR/clips" + +mkdir -p "$LOG_DIR" "$CLIPS_DIR" +touch "$LOG_FILE" +echo 0 > /dev/null # Suppress debug=0 parse errors from unwanted stdin + +log() { echo "[INFO] $1" | tee -a "$LOG_FILE" >&2; } +warn() { echo "[WARN] $1" | tee -a "$LOG_FILE" >&2; } +error() { echo "[ERROR] $1" | tee -a "$LOG_FILE" >&2; exit 1; } + +# Namenslogik CODE_POOL="$ETC_DIR/codename_pool.txt" ADJ_POOL="$ETC_DIR/adjektive_de.txt" - -# Standardmodus NAMING_MODE="fallback" if [[ -s "$CODE_POOL" && -s "$ADJ_POOL" ]]; then @@ -1203,24 +1221,20 @@ elif [[ -s "$CODE_POOL" ]]; then NAMING_MODE="codename" fi -echo "[INFO] Namensmodus erkannt: $NAMING_MODE" +log "Namensmodus: $NAMING_MODE" -# Abschnitt 2: Namen wählen (robust & ohne Wiederholung) +declare -A USED_MAP +if [[ -f "$USED_PATH" ]]; then + while IFS= read -r name; do + USED_MAP["$name"]=1 + done < <(jq -r '.[]' "$USED_PATH" || echo "") +fi -USED_PATH="/srv/clipper/etc/used.json" -mkdir -p "$(dirname "$USED_PATH")" -touch "$USED_PATH" -USED_LIST=$(jq -r '.[]' "$USED_PATH" 2>/dev/null || echo "") - -is_used() { - grep -Fxq "$1" <<< "$USED_LIST" -} +is_used() { [[ -n "${USED_MAP[$1]+1}" ]]; } choose_final_name() { case "$NAMING_MODE" in - "codename") - mapfile -t CANDIDATES < "$CODE_POOL" - ;; + "codename") mapfile -t CANDIDATES < "$CODE_POOL" ;; "adjektiv_codename") mapfile -t CODES < "$CODE_POOL" mapfile -t ADJS < "$ADJ_POOL" @@ -1231,95 +1245,176 @@ choose_final_name() { done done ;; - "fallback") - echo "$VOD_ID" - return 0 - ;; + *) echo "$VOD_ID"; return 0 ;; esac - # Filtere bereits genutzte Namen - AVAILABLE=() - for name in "${CANDIDATES[@]}"; do - if ! is_used "$name"; then - AVAILABLE+=("$name") + FINAL_NAME="" + for candidate in $(printf "%s\n" "${CANDIDATES[@]}" | shuf); do + if ! is_used "$candidate"; then + FINAL_NAME="$candidate" + log "Gewählter Codename: $FINAL_NAME" + break fi done - if [[ ${#AVAILABLE[@]} -eq 0 ]]; then - echo "[WARN] Keine verfügbaren Namen mehr – nutze Fallback: $VOD_ID" - echo "$VOD_ID" - else - FINAL_NAME="${AVAILABLE[RANDOM % ${#AVAILABLE[@]}]}" - echo "[INFO] Gewählter Codename: $FINAL_NAME" - echo "$FINAL_NAME" - fi + [[ -n "$FINAL_NAME" ]] && echo "$FINAL_NAME" || { warn "Keine freien Namen – fallback: $VOD_ID"; echo "$VOD_ID"; } } -# Codename in used.json speichern (wenn nicht fallback) +FINAL_NAME=$(choose_final_name) + if [[ "$FINAL_NAME" != "$VOD_ID" ]]; then - if jq -e . >/dev/null 2>&1 <<< "$USED_LIST"; then - UPDATED=$(jq --arg name "$FINAL_NAME" '. + [$name]' <<< "$USED_LIST") - else - UPDATED="[$(printf '%s\n' "$USED_LIST" | jq -R . | jq -s .), \"$FINAL_NAME\"]" - fi - echo "$UPDATED" > "$USED_PATH" + jq --arg name "$FINAL_NAME" '. + [$name]' "$USED_PATH" 2>/dev/null > "$USED_PATH.tmp" || echo "[\"$FINAL_NAME\"]" > "$USED_PATH.tmp" + mv "$USED_PATH.tmp" "$USED_PATH" fi -# Verzeichnisstruktur -TMP_DIR="/srv/clipper/temp/$VOD_ID" -VOD_PATH="/srv/clipper/out/$VOD_ID/original/$VOD_ID.mp4" -CANDIDATES_JSON="$TMP_DIR/candidates.json" -CLIPS_DIR="/srv/clipper/out/$VOD_ID/clips" - -mkdir -p "$CLIPS_DIR/combined" -mkdir -p "$CLIPS_DIR/audio" -mkdir -p "$CLIPS_DIR/video" - -# Ziel für temporären CSV-Index INDEX_CSV="$TMP_DIR/index.csv" echo "Typ;Quelle;Beginn;Ende;Dateiname;Pfad" > "$INDEX_CSV" - CLIP_NUM=1 for SECTION in combined only_audio only_video; do case "$SECTION" in - combined) SUBDIR="kombiniert"; QUELLE="kombiniert" ;; - only_audio) SUBDIR="audio"; QUELLE="audio" ;; - only_video) SUBDIR="video"; QUELLE="video" ;; + combined) QUELLE="kombiniert" ;; + only_audio) QUELLE="audio" ;; + only_video) QUELLE="video" ;; esac - mkdir -p "$CLIPS_DIR/$SUBDIR" + COUNT=$(jq ".\"$SECTION\" | length" "$CANDIDATES_JSON" 2>/dev/null || echo 0) + if [[ "$COUNT" -eq 0 ]]; then + warn "Keine Clips in $SECTION" + continue + fi - jq -c ".$SECTION[]" "$CANDIDATES_JSON" | while read -r clip; do - START=$(jq -r '.start' <<< "$clip") - END=$(jq -r '.end' <<< "$clip") + STARTS=($(jq -r ".\"$SECTION\"[].start" "$CANDIDATES_JSON")) + ENDS=($(jq -r ".\"$SECTION\"[].end" "$CANDIDATES_JSON")) + + for i in "${!STARTS[@]}"; do + START="${STARTS[$i]}" + END="${ENDS[$i]}" DURATION=$(awk "BEGIN { printf \"%.2f\", $END - $START }") - OUT_NAME=$(printf "%03d_%s.mp4" "$CLIP_NUM" "$FINAL_NAME") - OUT_PATH="$CLIPS_DIR/$SUBDIR/$OUT_NAME" + OUT_BASENAME=$(printf "%03d_%s" "$CLIP_NUM" "$FINAL_NAME") + CLIP_DIR="$CLIPS_DIR/$OUT_BASENAME" + mkdir -p "$CLIP_DIR" + OUT_PATH="$CLIP_DIR/$OUT_BASENAME.mp4" - echo "[INFO] Schneide Clip $OUT_NAME ($START - $END → $DURATION s)" - ffmpeg -hide_banner -loglevel error -ss "$START" -i "$VOD_PATH" -t "$DURATION" -c copy "$OUT_PATH" + START_FMT=$(date -u -d "@$START" +"%H:%M:%S" 2>/dev/null || echo "$START") + END_FMT=$(date -u -d "@$END" +"%H:%M:%S" 2>/dev/null || echo "$END") - # Zeitformat HH:MM:SS - START_FMT=$(date -u -d "@$START" +"%H:%M:%S") - END_FMT=$(date -u -d "@$END" +"%H:%M:%S") + log "→ Clip $OUT_BASENAME.mp4 (Start: $START_FMT, Dauer: ${DURATION}s)" - echo "Clip;$QUELLE;$START_FMT;$END_FMT;$OUT_NAME;clips/$SUBDIR/" >> "$INDEX_CSV" + if ! ffmpeg -hide_banner -loglevel error -ss "$START" -i "$VOD_PATH" -t "$DURATION" \ + -c:v libx264 -preset veryfast -crf 23 -c:a aac "$OUT_PATH" 2>>"$LOG_FILE"; then + warn "Fehler beim Clip $OUT_BASENAME" + continue + fi + echo "Clip;$QUELLE;$START_FMT;$END_FMT;$OUT_BASENAME.mp4;clips/$OUT_BASENAME/" >> "$INDEX_CSV" CLIP_NUM=$((CLIP_NUM + 1)) done + done + +log "[DONE] Schnitt abgeschlossen – $((CLIP_NUM - 1)) Clips erstellt." +echo '[]' ``` SSH Node – Clips schneiden (Node-Name: Cut Clips) + Node-Typ: SSH + Credentials: SSH Clipper + Working Dir: /srv/clipper + Command (Expression): -``set -euo pipefail; /srv/clipper/bin/clipper-cut-vod "{{ $('Loop Over Items').item.json.data.id }}"`` +``/srv/clipper/bin/clipper-cut-vod "{{ $('Loop Over Items').item.json.data.id }}"`` Setze auch hier die richtigen BErechtigungen, damit alles einwandfrei und problemlos durchlaufen kann. ```bash chmod 755 /srv/clipper/bin/clipper-cut-vod chown clipper:clipper /srv/clipper/bin/clipper-cut-vod -``` \ No newline at end of file +``` + +Als nächstes wollen wir die zuvor installierte KI Funktion von Whispser nutzen. +Dafür haben wir bereits alles vorbereitet. Einzig das Skript und der Node in n8n stehen noch aus. + +Erstelle mit +``nano /srv/clipper/bin/create-subtitle`` +die entsprechende Datei und befülle sie mit +```bash +#!/usr/bin/env bash +set -euo pipefail + +# === ENV einlesen === +ENV_FILE="/etc/clipper/clipper.env" +[ -r "$ENV_FILE" ] || { echo "[FATAL] ENV nicht lesbar: $ENV_FILE" >&2; exit 1; } +source "$ENV_FILE" + +VOD_ID="${1:?VOD-ID muss übergeben werden}" +MODEL="${WHISPER_MODEL:-small}" + +echo "[INFO] Starte Untertitelung für VOD: $VOD_ID mit Modell: $MODEL" + +# === Pfade setzen === +VOD_OUT="/srv/clipper/out/$VOD_ID" +CLIPS_ROOT="$VOD_OUT/clips" +WAV_DIR="/srv/clipper/temp/$VOD_ID/whisper_wav" +LOG_FILE="/srv/clipper/logs/$VOD_ID/subtitle.log" + +mkdir -p "$WAV_DIR" +exec >> "$LOG_FILE" 2>&1 + +echo "[INFO] WAV-Verzeichnis: $WAV_DIR" +echo "[INFO] Clipverzeichnis: $CLIPS_ROOT" + +# === Whisper auf alle Clips anwenden === +find "$CLIPS_ROOT" -mindepth 1 -maxdepth 1 -type d | sort | while read -r CLIP_FOLDER; do + CLIP_PATH=$(find "$CLIP_FOLDER" -maxdepth 1 -type f -name "*.mp4" | head -n1) + + if [[ ! -f "$CLIP_PATH" ]]; then + echo "[WARN] Kein Clip gefunden in $CLIP_FOLDER – überspringe" + continue + fi + + BASENAME="$(basename "$CLIP_PATH" .mp4)" + WAV_FILE="$WAV_DIR/$BASENAME.wav" + SRT_FILE="$CLIP_FOLDER/$BASENAME.srt" + JSON_FILE="$CLIP_FOLDER/$BASENAME.json" + TXT_FILE="$CLIP_FOLDER/$BASENAME.txt" + + echo "[INFO] → Clip: $BASENAME" + + if [[ ! -f "$WAV_FILE" ]]; then + echo "[INFO] Extrahiere WAV..." + ffmpeg -hide_banner -loglevel error -y -i "$CLIP_PATH" -vn -ac 1 -ar 16000 -f wav "$WAV_FILE" + fi + + echo "[INFO] Starte Whisper..." + whisper "$WAV_FILE" \ + --model "$MODEL" \ + --output_format srt \ + --output_format json \ + --output_format txt \ + --output_dir "$CLIP_FOLDER" \ + --fp16 False + + echo "[DONE] Untertitel erstellt: $SRT_FILE, $JSON_FILE, $TXT_FILE" +done + +echo "[DONE] Untertitel abgeschlossen." +``` +Mit +```bash +chmod 755 /srv/clipper/bin/create-subtitle +chown clipper:clipper /srv/clipper/bin/create-subtitle +``` +setzen wir dir Rechte korrekt. + +Der in n8n benötigte node sieht wie folgt aus: +SSH Node – Untertitel erstellen (Node-Name: Create Subtitle) + +Node-Typ: SSH + +Credentials: SSH Clipper + +Command (Expression): +``/srv/clipper/bin/create-subtitle "{{ $('Loop Over Items').item.json.data.id }}"``