Kapitel 13/Tutorial.md aktualisiert
This commit is contained in:
@@ -25,7 +25,7 @@ Clips sind der beste Weg, lange Streams in kurze, teilbare Highlights zu verwand
|
||||
Wir beginnen mit einem frischen Debian‑12‑LXC in Proxmox, benennen ihn `clipper` und vergeben die im Abschnitt oben genannten Ressourcen. Danach bringen wir das System auf Stand und installieren die Grundwerkzeuge:
|
||||
```bash
|
||||
apt update && apt upgrade -y
|
||||
apt install -y curl unzip ffmpeg inotify-tools pv bc
|
||||
apt install -y curl unzip ffmpeg inotify-tools pv bc git
|
||||
```
|
||||
|
||||
Eine korrekte Systemzeit ist entscheidend, da Schnittmarken später auf exakten Sekunden basieren. Prüfe die Zeit mit:
|
||||
@@ -149,7 +149,7 @@ Erzeuge und fülle eine virtuelle Umgebung für die spätere Analyse:
|
||||
python3 -m venv /srv/clipper/.venv
|
||||
source /srv/clipper/.venv/bin/activate
|
||||
pip install --upgrade pip
|
||||
pip install librosa soundfile numpy scipy
|
||||
pip install librosa soundfile numpy scipy git+https://github.com/openai/whisper.git
|
||||
deactivate
|
||||
```
|
||||
Wechsle für die nächsten Schritte **im Benutzer clipper** weiter.
|
||||
@@ -1186,15 +1186,33 @@ Lege mit ```nano <clipper-ordner>/bin/clipper-cut-vod ``` die benötigte Datei a
|
||||
```bash
|
||||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
VOD_ID="$1"
|
||||
# Abschnitt 1: Namensmodus erkennen
|
||||
echo "[START] Starte Clip-Schnitt für VOD: $VOD_ID"
|
||||
|
||||
# Pfade
|
||||
ETC_DIR="/srv/clipper/etc"
|
||||
TMP_DIR="/srv/clipper/temp/$VOD_ID"
|
||||
OUT_DIR="/srv/clipper/out/$VOD_ID"
|
||||
VOD_PATH="$OUT_DIR/original/$VOD_ID.mp4"
|
||||
CANDIDATES_JSON="$TMP_DIR/candidates.json"
|
||||
USED_PATH="$ETC_DIR/used.json"
|
||||
LOG_DIR="/srv/clipper/logs/$VOD_ID"
|
||||
LOG_FILE="$LOG_DIR/cut-clips.log"
|
||||
CLIPS_DIR="$OUT_DIR/clips"
|
||||
|
||||
mkdir -p "$LOG_DIR" "$CLIPS_DIR"
|
||||
touch "$LOG_FILE"
|
||||
echo 0 > /dev/null # Suppress debug=0 parse errors from unwanted stdin
|
||||
|
||||
log() { echo "[INFO] $1" | tee -a "$LOG_FILE" >&2; }
|
||||
warn() { echo "[WARN] $1" | tee -a "$LOG_FILE" >&2; }
|
||||
error() { echo "[ERROR] $1" | tee -a "$LOG_FILE" >&2; exit 1; }
|
||||
|
||||
# Namenslogik
|
||||
CODE_POOL="$ETC_DIR/codename_pool.txt"
|
||||
ADJ_POOL="$ETC_DIR/adjektive_de.txt"
|
||||
|
||||
# Standardmodus
|
||||
NAMING_MODE="fallback"
|
||||
|
||||
if [[ -s "$CODE_POOL" && -s "$ADJ_POOL" ]]; then
|
||||
@@ -1203,24 +1221,20 @@ elif [[ -s "$CODE_POOL" ]]; then
|
||||
NAMING_MODE="codename"
|
||||
fi
|
||||
|
||||
echo "[INFO] Namensmodus erkannt: $NAMING_MODE"
|
||||
log "Namensmodus: $NAMING_MODE"
|
||||
|
||||
# Abschnitt 2: Namen wählen (robust & ohne Wiederholung)
|
||||
declare -A USED_MAP
|
||||
if [[ -f "$USED_PATH" ]]; then
|
||||
while IFS= read -r name; do
|
||||
USED_MAP["$name"]=1
|
||||
done < <(jq -r '.[]' "$USED_PATH" || echo "")
|
||||
fi
|
||||
|
||||
USED_PATH="/srv/clipper/etc/used.json"
|
||||
mkdir -p "$(dirname "$USED_PATH")"
|
||||
touch "$USED_PATH"
|
||||
USED_LIST=$(jq -r '.[]' "$USED_PATH" 2>/dev/null || echo "")
|
||||
|
||||
is_used() {
|
||||
grep -Fxq "$1" <<< "$USED_LIST"
|
||||
}
|
||||
is_used() { [[ -n "${USED_MAP[$1]+1}" ]]; }
|
||||
|
||||
choose_final_name() {
|
||||
case "$NAMING_MODE" in
|
||||
"codename")
|
||||
mapfile -t CANDIDATES < "$CODE_POOL"
|
||||
;;
|
||||
"codename") mapfile -t CANDIDATES < "$CODE_POOL" ;;
|
||||
"adjektiv_codename")
|
||||
mapfile -t CODES < "$CODE_POOL"
|
||||
mapfile -t ADJS < "$ADJ_POOL"
|
||||
@@ -1231,95 +1245,176 @@ choose_final_name() {
|
||||
done
|
||||
done
|
||||
;;
|
||||
"fallback")
|
||||
echo "$VOD_ID"
|
||||
return 0
|
||||
;;
|
||||
*) echo "$VOD_ID"; return 0 ;;
|
||||
esac
|
||||
|
||||
# Filtere bereits genutzte Namen
|
||||
AVAILABLE=()
|
||||
for name in "${CANDIDATES[@]}"; do
|
||||
if ! is_used "$name"; then
|
||||
AVAILABLE+=("$name")
|
||||
FINAL_NAME=""
|
||||
for candidate in $(printf "%s\n" "${CANDIDATES[@]}" | shuf); do
|
||||
if ! is_used "$candidate"; then
|
||||
FINAL_NAME="$candidate"
|
||||
log "Gewählter Codename: $FINAL_NAME"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ ${#AVAILABLE[@]} -eq 0 ]]; then
|
||||
echo "[WARN] Keine verfügbaren Namen mehr – nutze Fallback: $VOD_ID"
|
||||
echo "$VOD_ID"
|
||||
else
|
||||
FINAL_NAME="${AVAILABLE[RANDOM % ${#AVAILABLE[@]}]}"
|
||||
echo "[INFO] Gewählter Codename: $FINAL_NAME"
|
||||
echo "$FINAL_NAME"
|
||||
fi
|
||||
[[ -n "$FINAL_NAME" ]] && echo "$FINAL_NAME" || { warn "Keine freien Namen – fallback: $VOD_ID"; echo "$VOD_ID"; }
|
||||
}
|
||||
|
||||
# Codename in used.json speichern (wenn nicht fallback)
|
||||
FINAL_NAME=$(choose_final_name)
|
||||
|
||||
if [[ "$FINAL_NAME" != "$VOD_ID" ]]; then
|
||||
if jq -e . >/dev/null 2>&1 <<< "$USED_LIST"; then
|
||||
UPDATED=$(jq --arg name "$FINAL_NAME" '. + [$name]' <<< "$USED_LIST")
|
||||
else
|
||||
UPDATED="[$(printf '%s\n' "$USED_LIST" | jq -R . | jq -s .), \"$FINAL_NAME\"]"
|
||||
fi
|
||||
echo "$UPDATED" > "$USED_PATH"
|
||||
jq --arg name "$FINAL_NAME" '. + [$name]' "$USED_PATH" 2>/dev/null > "$USED_PATH.tmp" || echo "[\"$FINAL_NAME\"]" > "$USED_PATH.tmp"
|
||||
mv "$USED_PATH.tmp" "$USED_PATH"
|
||||
fi
|
||||
|
||||
# Verzeichnisstruktur
|
||||
TMP_DIR="/srv/clipper/temp/$VOD_ID"
|
||||
VOD_PATH="/srv/clipper/out/$VOD_ID/original/$VOD_ID.mp4"
|
||||
CANDIDATES_JSON="$TMP_DIR/candidates.json"
|
||||
CLIPS_DIR="/srv/clipper/out/$VOD_ID/clips"
|
||||
|
||||
mkdir -p "$CLIPS_DIR/combined"
|
||||
mkdir -p "$CLIPS_DIR/audio"
|
||||
mkdir -p "$CLIPS_DIR/video"
|
||||
|
||||
# Ziel für temporären CSV-Index
|
||||
INDEX_CSV="$TMP_DIR/index.csv"
|
||||
echo "Typ;Quelle;Beginn;Ende;Dateiname;Pfad" > "$INDEX_CSV"
|
||||
|
||||
CLIP_NUM=1
|
||||
|
||||
for SECTION in combined only_audio only_video; do
|
||||
case "$SECTION" in
|
||||
combined) SUBDIR="kombiniert"; QUELLE="kombiniert" ;;
|
||||
only_audio) SUBDIR="audio"; QUELLE="audio" ;;
|
||||
only_video) SUBDIR="video"; QUELLE="video" ;;
|
||||
combined) QUELLE="kombiniert" ;;
|
||||
only_audio) QUELLE="audio" ;;
|
||||
only_video) QUELLE="video" ;;
|
||||
esac
|
||||
|
||||
mkdir -p "$CLIPS_DIR/$SUBDIR"
|
||||
COUNT=$(jq ".\"$SECTION\" | length" "$CANDIDATES_JSON" 2>/dev/null || echo 0)
|
||||
if [[ "$COUNT" -eq 0 ]]; then
|
||||
warn "Keine Clips in $SECTION"
|
||||
continue
|
||||
fi
|
||||
|
||||
jq -c ".$SECTION[]" "$CANDIDATES_JSON" | while read -r clip; do
|
||||
START=$(jq -r '.start' <<< "$clip")
|
||||
END=$(jq -r '.end' <<< "$clip")
|
||||
STARTS=($(jq -r ".\"$SECTION\"[].start" "$CANDIDATES_JSON"))
|
||||
ENDS=($(jq -r ".\"$SECTION\"[].end" "$CANDIDATES_JSON"))
|
||||
|
||||
for i in "${!STARTS[@]}"; do
|
||||
START="${STARTS[$i]}"
|
||||
END="${ENDS[$i]}"
|
||||
DURATION=$(awk "BEGIN { printf \"%.2f\", $END - $START }")
|
||||
|
||||
OUT_NAME=$(printf "%03d_%s.mp4" "$CLIP_NUM" "$FINAL_NAME")
|
||||
OUT_PATH="$CLIPS_DIR/$SUBDIR/$OUT_NAME"
|
||||
OUT_BASENAME=$(printf "%03d_%s" "$CLIP_NUM" "$FINAL_NAME")
|
||||
CLIP_DIR="$CLIPS_DIR/$OUT_BASENAME"
|
||||
mkdir -p "$CLIP_DIR"
|
||||
OUT_PATH="$CLIP_DIR/$OUT_BASENAME.mp4"
|
||||
|
||||
echo "[INFO] Schneide Clip $OUT_NAME ($START - $END → $DURATION s)"
|
||||
ffmpeg -hide_banner -loglevel error -ss "$START" -i "$VOD_PATH" -t "$DURATION" -c copy "$OUT_PATH"
|
||||
START_FMT=$(date -u -d "@$START" +"%H:%M:%S" 2>/dev/null || echo "$START")
|
||||
END_FMT=$(date -u -d "@$END" +"%H:%M:%S" 2>/dev/null || echo "$END")
|
||||
|
||||
# Zeitformat HH:MM:SS
|
||||
START_FMT=$(date -u -d "@$START" +"%H:%M:%S")
|
||||
END_FMT=$(date -u -d "@$END" +"%H:%M:%S")
|
||||
log "→ Clip $OUT_BASENAME.mp4 (Start: $START_FMT, Dauer: ${DURATION}s)"
|
||||
|
||||
echo "Clip;$QUELLE;$START_FMT;$END_FMT;$OUT_NAME;clips/$SUBDIR/" >> "$INDEX_CSV"
|
||||
if ! ffmpeg -hide_banner -loglevel error -ss "$START" -i "$VOD_PATH" -t "$DURATION" \
|
||||
-c:v libx264 -preset veryfast -crf 23 -c:a aac "$OUT_PATH" 2>>"$LOG_FILE"; then
|
||||
warn "Fehler beim Clip $OUT_BASENAME"
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "Clip;$QUELLE;$START_FMT;$END_FMT;$OUT_BASENAME.mp4;clips/$OUT_BASENAME/" >> "$INDEX_CSV"
|
||||
CLIP_NUM=$((CLIP_NUM + 1))
|
||||
done
|
||||
|
||||
done
|
||||
|
||||
log "[DONE] Schnitt abgeschlossen – $((CLIP_NUM - 1)) Clips erstellt."
|
||||
echo '[]'
|
||||
```
|
||||
SSH Node – Clips schneiden (Node-Name: Cut Clips)
|
||||
|
||||
Node-Typ: SSH
|
||||
|
||||
Credentials: SSH Clipper
|
||||
|
||||
Working Dir: /srv/clipper
|
||||
|
||||
Command (Expression):
|
||||
``set -euo pipefail; /srv/clipper/bin/clipper-cut-vod "{{ $('Loop Over Items').item.json.data.id }}"``
|
||||
``/srv/clipper/bin/clipper-cut-vod "{{ $('Loop Over Items').item.json.data.id }}"``
|
||||
|
||||
Setze auch hier die richtigen BErechtigungen, damit alles einwandfrei und problemlos durchlaufen kann.
|
||||
```bash
|
||||
chmod 755 /srv/clipper/bin/clipper-cut-vod
|
||||
chown clipper:clipper /srv/clipper/bin/clipper-cut-vod
|
||||
```
|
||||
```
|
||||
|
||||
Als nächstes wollen wir die zuvor installierte KI Funktion von Whispser nutzen.
|
||||
Dafür haben wir bereits alles vorbereitet. Einzig das Skript und der Node in n8n stehen noch aus.
|
||||
|
||||
Erstelle mit
|
||||
``nano /srv/clipper/bin/create-subtitle``
|
||||
die entsprechende Datei und befülle sie mit
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# === ENV einlesen ===
|
||||
ENV_FILE="/etc/clipper/clipper.env"
|
||||
[ -r "$ENV_FILE" ] || { echo "[FATAL] ENV nicht lesbar: $ENV_FILE" >&2; exit 1; }
|
||||
source "$ENV_FILE"
|
||||
|
||||
VOD_ID="${1:?VOD-ID muss übergeben werden}"
|
||||
MODEL="${WHISPER_MODEL:-small}"
|
||||
|
||||
echo "[INFO] Starte Untertitelung für VOD: $VOD_ID mit Modell: $MODEL"
|
||||
|
||||
# === Pfade setzen ===
|
||||
VOD_OUT="/srv/clipper/out/$VOD_ID"
|
||||
CLIPS_ROOT="$VOD_OUT/clips"
|
||||
WAV_DIR="/srv/clipper/temp/$VOD_ID/whisper_wav"
|
||||
LOG_FILE="/srv/clipper/logs/$VOD_ID/subtitle.log"
|
||||
|
||||
mkdir -p "$WAV_DIR"
|
||||
exec >> "$LOG_FILE" 2>&1
|
||||
|
||||
echo "[INFO] WAV-Verzeichnis: $WAV_DIR"
|
||||
echo "[INFO] Clipverzeichnis: $CLIPS_ROOT"
|
||||
|
||||
# === Whisper auf alle Clips anwenden ===
|
||||
find "$CLIPS_ROOT" -mindepth 1 -maxdepth 1 -type d | sort | while read -r CLIP_FOLDER; do
|
||||
CLIP_PATH=$(find "$CLIP_FOLDER" -maxdepth 1 -type f -name "*.mp4" | head -n1)
|
||||
|
||||
if [[ ! -f "$CLIP_PATH" ]]; then
|
||||
echo "[WARN] Kein Clip gefunden in $CLIP_FOLDER – überspringe"
|
||||
continue
|
||||
fi
|
||||
|
||||
BASENAME="$(basename "$CLIP_PATH" .mp4)"
|
||||
WAV_FILE="$WAV_DIR/$BASENAME.wav"
|
||||
SRT_FILE="$CLIP_FOLDER/$BASENAME.srt"
|
||||
JSON_FILE="$CLIP_FOLDER/$BASENAME.json"
|
||||
TXT_FILE="$CLIP_FOLDER/$BASENAME.txt"
|
||||
|
||||
echo "[INFO] → Clip: $BASENAME"
|
||||
|
||||
if [[ ! -f "$WAV_FILE" ]]; then
|
||||
echo "[INFO] Extrahiere WAV..."
|
||||
ffmpeg -hide_banner -loglevel error -y -i "$CLIP_PATH" -vn -ac 1 -ar 16000 -f wav "$WAV_FILE"
|
||||
fi
|
||||
|
||||
echo "[INFO] Starte Whisper..."
|
||||
whisper "$WAV_FILE" \
|
||||
--model "$MODEL" \
|
||||
--output_format srt \
|
||||
--output_format json \
|
||||
--output_format txt \
|
||||
--output_dir "$CLIP_FOLDER" \
|
||||
--fp16 False
|
||||
|
||||
echo "[DONE] Untertitel erstellt: $SRT_FILE, $JSON_FILE, $TXT_FILE"
|
||||
done
|
||||
|
||||
echo "[DONE] Untertitel abgeschlossen."
|
||||
```
|
||||
Mit
|
||||
```bash
|
||||
chmod 755 /srv/clipper/bin/create-subtitle
|
||||
chown clipper:clipper /srv/clipper/bin/create-subtitle
|
||||
```
|
||||
setzen wir dir Rechte korrekt.
|
||||
|
||||
Der in n8n benötigte node sieht wie folgt aus:
|
||||
SSH Node – Untertitel erstellen (Node-Name: Create Subtitle)
|
||||
|
||||
Node-Typ: SSH
|
||||
|
||||
Credentials: SSH Clipper
|
||||
|
||||
Command (Expression):
|
||||
``/srv/clipper/bin/create-subtitle "{{ $('Loop Over Items').item.json.data.id }}"``
|
||||
|
||||
Reference in New Issue
Block a user