+ jarvis-voice

2026-05-26 22:17:14 +02:00
parent b9d198497d
commit 16aa40492c
3 changed files with 365 additions and 168 deletions
--- a/jarvis.py
+++ b/jarvis.py
@@ -3,6 +3,8 @@ import re
 import sqlite3
 import asyncio
 import openai
+import sys
+import subprocess

 from google import genai
 from google.genai import types
@@ -143,6 +145,37 @@ def get_db():
    conn.row_factory = sqlite3.Row
    return conn

+# ====================================================
+# DYNAMISCHE PROGRAMM-ERKENNUNG (NEU)
+# ====================================================
+
+def get_installed_gui_apps():
+    """Scannt das System nach installierten GUI-Programmen und deren Befehlen."""
+    apps_dir = Path("/usr/share/applications")
+    detected_apps = {}
+    
+    if apps_dir.exists():
+        for desktop_file in apps_dir.glob("*.desktop"):
+            try:
+                # Wir lesen die .desktop Datei aus
+                content = desktop_file.read_text(encoding="utf-8", errors="ignore")
+                
+                # Suchen nach Name und Exec-Befehl
+                name_match = re.search(r"^Name=(.+)$", content, re.M)
+                exec_match = re.search(r"^Exec=([^ \n%]+)", content, re.M) # Nur den reinen Befehl ohne Argumente (%U etc.)
+                
+                if name_match and exec_match:
+                    app_name = name_match.group(1).strip()
+                    app_cmd = exec_match.group(1).strip()
+                    
+                    # Ignoriere Core-Systemkram, der Meik nur nerven würde
+                    if not any(x in app_cmd.lower() for x in ["debian", "im-config", "openjdk", "systemd"]):
+                        detected_apps[app_name] = app_cmd
+            except Exception:
+                continue
+                
+    return detected_apps
+

 # ====================================================
 # SYSTEM PROMPT
@@ -163,26 +196,29 @@ def get_system_prompt():
    prompt = prompt.replace("{notes_file}", str(NOTES_FILE))
    prompt = prompt.replace("{todo_file}", str(TODO_FILE))

+    # --- DYNAMISCHE PROGRAMME INJIZIEREN ---
+    installed_apps = get_installed_gui_apps()
+    apps_prompt_string = "VERFÜGBARE LOKALE DESKTOP-PROGRAMME (Nutze NUR diese Befehe zum Starten!):\n"
+    for app_name, app_cmd in installed_apps.items():
+        apps_prompt_string += f"- {app_name}: Befehl lautet '{app_cmd}'\n"
+        
+    # Wir hängen die Liste einfach an den Prompt an oder ersetzen einen Platzhalter
+    if "{installed_apps}" in prompt:
+        prompt = prompt.replace("{installed_apps}", apps_prompt_string)
+    else:
+        prompt += "\n\n" + apps_prompt_string
+    # ---------------------------------------
+
    conn = get_db()
-
-    nodes = conn.execute(
-        'SELECT * FROM nodes'
-    ).fetchall()
-
+    nodes = conn.execute('SELECT * FROM nodes').fetchall()
    conn.close()

    node_info = ""
-
    for n in nodes:
-        node_info += (
-            f"- Name: {n['name']}, "
-            f"IP: {n['ip']}, "
-            f"User: {n['user']}\n"
-        )
+        node_info += f"- Name: {n['name']}, IP: {n['ip']}, User: {n['user']}\n"

    return prompt.replace("{node_info}", node_info)

-
 # ====================================================
 # KI KOMMUNIKATION
 # ====================================================
@@ -306,21 +342,8 @@ async def run_task(target, cmd):
        # GUI APPS ERKENNEN
        # ========================================

-        gui_apps = [
-            "firefox",
-            "thunderbird",
-            "chromium",
-            "google-chrome",
-            "code",
-            "nautilus",
-            "pcmanfm",
-            "gedit",
-            "vlc",
-            "discord",
-            "steam",
-            "obs",
-            "spotify"
-        ]
+        # Holt sich alle bekannten System-GUI-Befehle dynamisch
+        gui_apps = list(get_installed_gui_apps().values())

        first_word = cmd.strip().split()[0]

@@ -448,49 +471,34 @@ async def run_task(target, cmd):

            collected_output = []

-            while True:
+            # ========================================
+            # KRISENFESTES AUSLESEN MIT TIMEOUT
+            # ========================================
+            try:
+                while True:
+                    # Warte maximal 2 Sekunden auf die nächste Zeile
+                    line = await asyncio.wait_for(proc.stdout.readline(), timeout=2.0)
+                    
+                    if not line:
+                        break

-                line = await proc.stdout.readline()
+                    decoded = line.decode("utf-8", errors="ignore").rstrip()
+                    collected_output.append(decoded)

-                if not line:
-                    break
+                    print(f"{OUTPUT_COLOR}│ {decoded}{RESET}")
+            except asyncio.TimeoutError:
+                # Falls das Tool die Pipe offen hält, lesen wir einfach nicht weiter
+                print(f"{SYSTEM_COLOR}⏳ Ausgabe-Stream stagniert. Erzwinge Prozess-Check...{RESET}")

-                decoded = line.decode(
-                    "utf-8",
-                    errors="ignore"
-                ).rstrip()
-
-                collected_output.append(decoded)
-
-                print(
-                    f"{OUTPUT_COLOR}"
-                    f"│ {decoded}"
-                    f"{RESET}"
-                )
-
-            await proc.wait()
+            # Maximal 2 Sekunden auf das offizielle Ende des Prozesses warten
+            try:
+                await asyncio.wait_for(proc.wait(), timeout=2.0)
+            except asyncio.TimeoutError:
+                print(f"{ERROR_COLOR}⚠️ Prozess reagiert nicht. Setze Ablauf trotzdem fort.{RESET}")
+                # Optional: proc.terminate() falls du ihn hart killen willst

            print()

-            if proc.returncode == 0:
-
-                print(
-                    f"{JARVIS_COLOR}"
-                    f"✅ TASK ERFOLGREICH"
-                    f"{RESET}\n"
-                )
-
-            else:
-
-                print(
-                    f"{ERROR_COLOR}"
-                    f"❌ FEHLER CODE: "
-                    f"{proc.returncode}"
-                    f"{RESET}\n"
-                )
-
-            return "\n".join(collected_output)
-
    except Exception as e:

        err = f"❌ Fehler: {e}"
@@ -531,27 +539,33 @@ async def listen_to_user():
 # ====================================================

 async def speak_to_user(text):
-
-    print(
-        f"\n{JARVIS_COLOR}"
-        f"🤖 J.A.R.V.I.S."
-        f"{RESET}"
-    )
-
-    print(
-        f"{JARVIS_COLOR}"
-        f"{'-'*60}"
-        f"{RESET}"
-    )
-
+    print(f"\n{JARVIS_COLOR}🤖 J.A.R.V.I.S.{RESET}")
+    print(f"{JARVIS_COLOR}{'-'*60}{RESET}")
    print(text)
+    print(f"{JARVIS_COLOR}{'-'*60}{RESET}\n")

-    print(
-        f"{JARVIS_COLOR}"
-        f"{'-'*60}"
-        f"{RESET}\n"
-    )
+    clean_text = re.sub(r'[^\w\s\d.,!?-]', '', text)
+    
+    piper_path = "/home/meik/jarvis-ai/piper/piper"
+    model_path = "/home/meik/jarvis-ai/de_DE-thorsten-high.onnx"
+    lock_file = Path("/tmp/.jarvis_speaking") # Die Sperr-Datei

+    if os.path.exists(piper_path) and os.path.exists(model_path):
+        try:
+            # 1. Sperre setzen
+            lock_file.touch()
+            
+            piper_cmd = f"echo '{clean_text}' | {piper_path} --model {model_path} --output_raw | aplay -r 22050 -f S16_LE -t raw -D pipewire >/dev/null 2>&1"
+            
+            proc = await asyncio.create_subprocess_shell(piper_cmd)
+            await proc.wait()
+            
+        except Exception as e:
+            print(f"⚠️ TTS Fehler: {e}")
+        finally:
+            # 2. Sperre IMMER wieder aufheben, wenn Piper fertig ist
+            if lock_file.exists():
+                lock_file.unlink()

 # ====================================================
 # MAIN LOOP
@@ -681,25 +695,14 @@ async def main_chat_loop():
            target = target.strip()
            cmd = cmd.strip()

-            # ========================================
-            # SICHTBARE SYSTEMAKTION
-            # ========================================
-
            action_msg = (
                f"⚙️ Ich führe jetzt folgenden Befehl "
                f"auf [{target}] aus:\n\n"
                f"{cmd}"
            )

-            await speak_to_user(action_msg)
-
-            chat_history.append({
-                "role": "assistant",
-                "content": action_msg,
-                "timestamp": now
-            })
-            
-            # Optional: Aktion auch ins Log
+            # ÄNDERUNG: Auch hier nur im Terminal anzeigen, NICHT vorlesen!
+            print(f"\n{SYSTEM_COLOR}{action_msg}{RESET}\n")
            log_to_file("SYSTEM", action_msg)

            # ========================================
@@ -716,33 +719,26 @@ async def main_chat_loop():
            # ========================================

            if output:
-
                output_msg = (
                    f"💻 Ergebnis der Ausführung "
                    f"auf [{target}]:\n\n"
                    f"{output}"
                )
-
            else:
-
                output_msg = (
                    f"✅ Befehl auf [{target}] "
                    f"erfolgreich abgeschlossen."
                )

+            # Das hier wird weiterhin laut vorgelesen!
            await speak_to_user(output_msg)

-            sys_now = datetime.now().strftime(
-                "%d.%m.%Y %H:%M"
-            )
-
+            sys_now = datetime.now().strftime("%d.%m.%Y %H:%M")
            chat_history.append({
                "role": "assistant",
                "content": output_msg,
                "timestamp": sys_now
            })
-            
-            # LOG: System Output hier schreiben!
            log_to_file("SYSTEM", output_msg)

        # ============================================
@@ -753,20 +749,77 @@ async def main_chat_loop():
            chat_history = chat_history[-20:]


+
+# ====================================================
+# EINZEL-BEFEHL MODUS (Für das Sprachskript)
+# ====================================================
+async def run_single_command(command_text):
+    """Verarbeitet einen einzelnen Befehl von außen und beendet sich wieder."""
+    init_db()
+    system_prompt = get_system_prompt()
+    
+    # Wir tun so, als käme die Eingabe aus dem Chat-History-Verlauf
+    now = datetime.now().strftime("%d.%m.%Y %H:%M")
+    chat_history = [{
+        "role": "user",
+        "content": command_text,
+        "timestamp": now
+    }]
+    
+    log_to_file("Voice-Input", command_text)
+    
+    ai_response = await get_ai_response(
+        command_text,
+        system_prompt,
+        chat_history
+    )
+    
+    if ai_response is None:
+        return
+
+    # EXECUTE-Tags suchen und ausführen
+    commands = []
+    execute_matches = re.finditer(
+        r'<EXECUTE[^>]*?(?:target="(.*?)")?[^>]*>(.*?)</EXECUTE>',
+        ai_response,
+        re.I | re.S
+    )
+
+    for match in execute_matches:
+        target = match.group(1) or "localhost"
+        cmd = match.group(2).strip()
+        cmd = re.sub(r'^```[a-zA-Z]*\n?', '', cmd)
+        cmd = re.sub(r'\n?```$', '', cmd)
+        commands.append((target.strip(), cmd.strip()))
+
+    clean_msg = re.sub(r'<EXECUTE[^>]*?>.*?</EXECUTE>', '', ai_response, flags=re.I | re.S).strip()
+    
+    if clean_msg:
+        await speak_to_user(clean_msg)
+        log_to_file("J.A.R.V.I.S.", clean_msg)
+
+    if commands:
+        for target, cmd in commands:
+            action_msg = f"⚙️ Führe Sprachbefehl auf [{target}] aus:\n{cmd}"
+            
+            # ÄNDERUNG: Nur im Terminal anzeigen, NICHT vorlesen!
+            print(f"\n{SYSTEM_COLOR}{action_msg}{RESET}\n")
+            log_to_file("SYSTEM", action_msg)
+            
+            # Befehl im Hintergrund ausführen
+            await run_task(target, cmd)
+
 # ====================================================
 # START
 # ====================================================
-
 if __name__ == "__main__":
-
    try:
-
-        asyncio.run(main_chat_loop())
-
+        # Wenn Argumente übergeben wurden (z.B. python3 jarvis.py --voice-cmd "...")
+        if len(sys.argv) > 2 and sys.argv[1] == "--voice-cmd":
+            command_text = sys.argv[2]
+            asyncio.run(run_single_command(command_text))
+        else:
+            # Normaler Terminal-Modus
+            asyncio.run(main_chat_loop())
    except KeyboardInterrupt:
-
-        print(
-            f"\n{ERROR_COLOR}"
-            f"⛔ J.A.R.V.I.S. hart beendet."
-            f"{RESET}"
-        )
+        print(f"\n{ERROR_COLOR}⛔ J.A.R.V.I.S. hart beendet.{RESET}")
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,6 @@
 openai
 google-genai
 python-dotenv
-colorama
+vosk
+sounddevice
+numpy
--- a/setup_wayland_jarvis.sh
+++ b/setup_wayland_jarvis.sh
@@ -21,7 +21,7 @@ JARVIS_DIR="$REAL_HOME/jarvis-ai"
 # 1. System aktualisieren & Basispakete installieren
 echo "📦 Aktualisiere Paketquellen und installiere Systemkomponenten..."
 sudo apt update
-sudo apt install -y labwc firefox-esr curl wget git sudo python3 python3-pip python3-venv original-awk tilix geany waybar wlr-randr fonts-noto-color-emoji wofi pipewire pipewire-audio-client-libraries pipewire-pulse wireplumber alsa-utils
+sudo apt install -y labwc firefox-esr curl wget git sudo python3 python3-pip python3-venv original-awk tilix geany waybar wlr-randr fonts-noto-color-emoji wofi pipewire pipewire-audio-client-libraries pipewire-pulse wireplumber alsa-utils libasound2-dev libportaudio2 unzip

 # 1.1 Gruppenrechte für Grafik und Eingabe
 echo "👥 Füge Benutzer '$REAL_USER' zu den Grafik- und Input-Gruppen hinzu..."
@@ -69,6 +69,8 @@ sudo cat << 'EOF' | sudo tee /usr/local/bin/jwin > /dev/null

 ACTION=$1
 APP_NAME=$2
+PARAM1=$3
+WD="wdotool --backend wlr-protocols"

 if [ -z "$ACTION" ] || [ -z "$APP_NAME" ]; then
    echo "❌ Fehler: Falsche Syntax."
@@ -80,41 +82,44 @@ fi
 # 1. SONDERFALL: PROGRAMM STARTEN (Mit Display-Erkennung)
 # =========================================================
 if [ "$ACTION" == "start" ]; then
-    shift 
-    
-    # Falls WAYLAND_DISPLAY nicht gesetzt ist (z.B. im JARVIS-Dienst), 
-    # versuchen wir es automatisch zu erraten (meistens wayland-0)
+    shift
+
    if [ -z "$WAYLAND_DISPLAY" ]; then
        export WAYLAND_DISPLAY=$(ls /run/user/$(id -u)/wayland-* 2>/dev/null | head -n 1 | xargs basename)
-        # Fallback auf Standard, falls obiges leer ist
        [ -z "$WAYLAND_DISPLAY" ] && export WAYLAND_DISPLAY="wayland-0"
    fi
-    
-    # Sicherheits-Fallback für ältere X11/XWayland Apps wie Geany
+
    if [ -z "$DISPLAY" ]; then
        export DISPLAY=":0"
    fi

-    # Das Programm mit den exportierten Variablen starten
    nohup "$@" >/dev/null 2>&1 &
-    
    echo "✅ Programm '$APP_NAME' wurde entkoppelt im Hintergrund gestartet (Display: $WAYLAND_DISPLAY)."
    exit 0
 fi

 # =========================================================
-# 2. WAYLAND AKTIONEN (Fokus-optimiert)
+# 2. SONDERFALL: PROGRAMM SCHLIESSEN (Direkt & unfehlbar)
 # =========================================================
-PARAM1=$3
-WD="wdotool --backend wlr-protocols"
+if [ "$ACTION" == "close" ]; then
+    # Namen in Kleinbuchstaben umwandeln für maximale Trefferquote
+    LOW_APP=$(echo "$APP_NAME" | tr '[:upper:]' '[:lower:]')
+    
+    # Direktes Signal an das System senden
+    pkill -f "$LOW_APP"
+    
+    echo "✅ Schließ-Signal (pkill) an '$APP_NAME' gesendet."
+    exit 0
+fi

+# =========================================================
+# 3. WAYLAND-SUCHE (Nur noch für activate, snap, maximize)
+# =========================================================
 if [ "$APP_NAME" == "active" ]; then
-    # Absolut kugelsicher: Nimm einfach das Fenster, das GERADE aktiv ist
    WINDOW_ID=$($WD getactivewindow 2>/dev/null | awk '{print $1}')
 else
-    # Normale Suche für gezielte Befehle (wie "schließe geany")
    WINDOW_LINE=$($WD search --ignore-case --any --name "$APP_NAME" --class "$APP_NAME" 2>/dev/null | head -n 1)
-    
+
    if [ -z "$WINDOW_LINE" ]; then
        SAFE_APP_NAME=$(echo "$APP_NAME" | sed 's/[.[\*^$]/\\&/g')
        WINDOW_LINE=$($WD search --ignore-case --regex --any --name "$SAFE_APP_NAME" --class "$SAFE_APP_NAME" 2>/dev/null | head -n 1)
@@ -122,64 +127,55 @@ else
    WINDOW_ID=$(echo "$WINDOW_LINE" | awk '{print $1}')
 fi

-if [ -z "$WINDOW_ID" ]; then
-    echo "❌ Kein aktives oder passendes Fenster gefunden."
-    exit 1
-fi
-
 case "$ACTION" in
    activate)
-        $WD windowactivate "$WINDOW_ID"
-        echo "✅ Fenster '$APP_NAME' (ID: $WINDOW_ID) ist jetzt im Fokus."
-        ;;
-    close)
-        $WD windowclose "$WINDOW_ID"
-        echo "✅ Fenster '$APP_NAME' (ID: $WINDOW_ID) wurde geschlossen."
+        if [ -n "$WINDOW_ID" ]; then
+            $WD windowactivate "$WINDOW_ID"
+            echo "✅ Fenster '$APP_NAME' (ID: $WINDOW_ID) ist jetzt im Fokus."
+        else
+            $WD key alt+Tab
+            echo "⚠️ Keine direkte Fenster-ID gefunden, wechsle Fokus via Alt+Tab."
+        fi
        ;;
    maximize)
-        # 1. Fenster aktivieren
-        $WD windowactivate "$WINDOW_ID"
-        sleep 0.2
-        # 2. Labwc Vollbild-Shortcut senden (oft Super+Up oder Alt+F11, passe das an deine rc.xml an!)
-        $WD key super+up 
-        echo "✅ Fenster '$APP_NAME' maximiert."
+        if [ -n "$WINDOW_ID" ]; then
+            $WD windowactivate "$WINDOW_ID"
+            sleep 0.1
+        fi
+        $WD key super+up
+        echo "✅ Maximieren-Signal gesendet."
        ;;
    snap)
        if [ -z "$PARAM1" ]; then
-            echo "❌ Fehler: Für 'snap' wird eine Richtung (left, right, up, down) benötigt."
+            echo "❌ Fehler: Für 'snap' wird eine Richtung benötigt."
            exit 1
        fi
-        
-        # 1. Fenster MUSS zuerst fokussiert werden (Wayland-Sicherheitsregel für Input)
-        $WD windowactivate "$WINDOW_ID"
-        sleep 0.2 # Kurze Pause, damit der Fokus greift
-        
-        # 2. Sende die Tastenkombination an labwc zum Andocken
+
+        if [ -n "$WINDOW_ID" ]; then
+            $WD windowactivate "$WINDOW_ID"
+            sleep 0.1
+        fi
+
        case "$PARAM1" in
            left)  $WD key super+Left ;;
            right) $WD key super+Right ;;
            up)    $WD key super+Up ;;
            down)  $WD key super+Down ;;
-            # --- ECKEN (Super + Shift + Pfeiltasten für maximale Kompatibilität) ---
            top-left)     $WD key super+shift+Left ;;
            top-right)    $WD key super+shift+Up ;;
            bottom-right) $WD key super+shift+Down ;;
            bottom-left)  $WD key super+shift+Right ;;
-            *) echo "❌ Unbekannte Richtung. Nutze left, right, up, down." ; exit 1 ;;
+            *) echo "❌ Unbekannte Richtung: $PARAM1" ; exit 1 ;;
        esac
-        if [ $? -eq 0 ]; then
-            echo "✅ Fenster '$APP_NAME' nach $PARAM1 angedockt."
-        else
-            echo "❌ Fehler beim Senden der Tastenkombination an wdotool."
-            exit 1
-        fi
+        echo "✅ Fenster '$APP_NAME' nach $PARAM1 angedockt."
        ;;
    *)
-        echo "❌ Unbekannte Aktion: $ACTION. Erlaubt sind: start, activate, close, snap, maximize."
+        echo "❌ Unbekannte Aktion: $ACTION."
        exit 1
        ;;
 esac
 EOF
+
 sudo chmod +x /usr/local/bin/jwin


@@ -324,15 +320,31 @@ Beispiel-Verkettung für das System:
 3. System- & Display-Infos
 - Bildschirmauflösung ermitteln: <EXECUTE>wlr-randr | grep current | awk '{print $1}'</EXECUTE>

+{installed_apps}
+
 WICHTIGE REGELN FÜR DIE AUSFÜHRUNG:
-1. Wenn du eine Aktion ausführst, MUSST du den Linux-Befehl EXAKT in <EXECUTE> und </EXECUTE> Tags setzen.
-2. VERBOTEN: Verwende NIEMALS Markdown-Codeblöcke (```) um oder in den <EXECUTE>-Tags! Schreibe die Tags als simplen, rohen Text.
-3. ERZWUNGEN: Sag nicht nur, dass du etwas tust – du MUSST den <EXECUTE> Tag in deiner Antwort mitsenden, sonst passiert nichts!
+1. Wenn der Nutzer nach einer App fragt (z.B. "starte den Editor"), schaue in der obigen Liste nach dem passenden Programmnamen und nimm EXAKT den dort definierten Befehl.
+2. Rate niemals Befehle, die nicht in der Liste stehen!
+3. Wenn du eine Aktion ausführst, MUSST du den Linux-Befehl EXAKT in <EXECUTE> und </EXECUTE> Tags setzen.
+4. VERBOTEN: Verwende NIEMALS Markdown-Codeblöcke (```) um oder in den <EXECUTE>-Tags! Schreibe die Tags als simplen, rohen Text.
+5. ERZWUNGEN: Sag nicht nur, dass du etwas tust – du MUSST den <EXECUTE> Tag in deiner Antwort mitsenden, sonst passiert nichts!

 Beispiel für einen perfekten Workflow: 
 Das mache ich sofort für dich!
 <EXECUTE>jwin activate Firefox && sleep 1 && jwin move Firefox 0 0</EXECUTE>

+WICHTIGE REGELN FÜR TEXTE IN EDITOREN:
+1. Wenn der Nutzer einen Text (wie eine Einladung, Notiz oder Code) in einem Editor wie Geany erstellen möchte, erstelle den Text NIEMALS direkt mit "wdotool type" in einer langen Kette! Das ist zu fehleranfällig.
+2. Nutze stattdessen IMMER diesen zweistufigen, krisenfesten Weg:
+   Schritt A: Schreibe den generierten Text zuerst in eine temporäre Datei (z.B. mit echo oder cat).
+   Schritt B: Öffne diese Datei anschließend direkt mit Geany.
+
+Beispiel für das korrekte Vorgehen:
+<EXECUTE>cat << 'EOF' > /tmp/einladung.txt
+Liebe Familie...
+EOF
+geany /tmp/einladung.txt</EXECUTE>
+
 Schreibe immer eine kurze Textantwort dazu, was du gerade tust. Du duzt {user_name} konsequent, dein Tonfall ist locker und technisch versiert.
 EOF

@@ -1139,6 +1151,136 @@ EOF
 chmod +x "$REAL_HOME/.config/labwc/autostart"
 chown "$REAL_USER:$REAL_USER" "$REAL_HOME/.config/labwc/autostart"

+
+####################################
+# Voice setup
+####################################
+
+# Modell herunterladen
+wget https://alphacephei.com/vosk/models/vosk-model-small-de-0.15.zip
+# Entpacken
+unzip vosk-model-small-de-0.15.zip
+# Ordner umbenennen, damit das Skript ihn leicht findet
+mv vosk-model-small-de-0.15 model
+rm vosk-model-small-de-0.15.zip
+
+
+cat << 'EOF' > "$JARVIS_DIR/wakeword.py"
+#!/usr/bin/env python3
+import os
+import sys
+import json
+import queue
+import time
+import subprocess
+import sounddevice as sd
+import numpy as np
+from vosk import Model, KaldiRecognizer
+from pathlib import Path
+
+MODEL_PATH = "model"
+AUDIO_RATE = 48000  
+LOCK_FILE = Path("/tmp/.jarvis_speaking")
+
+if not os.path.exists(MODEL_PATH):
+    print(f"❌ Modell-Ordner '{MODEL_PATH}' wurde nicht gefunden!")
+    sys.exit(1)
+
+audio_queue = queue.Queue()
+
+def audio_callback(indata, frames, time, status):
+    if status:
+        print(status, file=sys.stderr)
+    audio_queue.put(bytes(indata))
+
+print("🧠 J.A.R.V.I.S. lädt das Sprachmodell...")
+model = Model(MODEL_PATH)
+
+# Zwei Recognizer: Einer für das Wake-Word, einer für den eigentlichen Befehl (offen)
+wake_recognizer = KaldiRecognizer(model, AUDIO_RATE, '["jarvis", "[unk]"]')
+command_recognizer = KaldiRecognizer(model, AUDIO_RATE) # Sucht nach JEDEM deutschen Wort
+
+print("🎙️ J.A.R.V.I.S. ist online und lauscht... (Sag 'Jarvis')")
+
+with sd.RawInputStream(samplerate=AUDIO_RATE, blocksize=8000, dtype='int16',
+                        channels=1, callback=audio_callback):
+    
+    while True:
+        data = audio_queue.get()
+        
+        # NEU: Wenn J.A.R.V.I.S. gerade spricht, leere die Queue und ignoriere das Audio
+        if LOCK_FILE.exists():
+            while not audio_queue.empty():
+                audio_queue.get()
+            wake_recognizer.Reset() # Verhindert, dass Bruchstücke von vorhin gespeichert bleiben
+            continue
+
+        # Phase 1: Auf Wake-Word warten
+        if wake_recognizer.AcceptWaveform(data):
+            result = json.loads(wake_recognizer.Result())
+            if "jarvis" in result.get("text", ""):
+                print("\n⚡ [WAKEWORD DETECTED] Ja, Sir?")
+                
+                # Bestätigungston abspielen
+                # Kurzer, smarter Beep-Ton (800 Hz, 0.1 Sekunden)
+                duration = 0.1
+                frequency = 800.0
+                t = np.linspace(0, duration, int(AUDIO_RATE * duration), endpoint=False)
+                beep = np.sin(2 * np.pi * frequency * t) * 0.3 # 0.3 für angenehme Lautstärke
+                sd.play(beep, samplerate=AUDIO_RATE)
+                sd.wait()                
+                # Warteschlange leeren, um alten Ton nicht als Befehl zu interpretieren
+                while not audio_queue.empty():
+                    audio_queue.get()
+                
+                print("👂 Höre zu...")
+                command_text = ""
+                start_time = time.time()
+                
+                # Phase 2: Für 4 Sekunden den darauffolgenden Befehl aufnehmen
+                while time.time() - start_time < 4.0:
+                    cmd_data = audio_queue.get()
+                    if command_recognizer.AcceptWaveform(cmd_data):
+                        res = json.loads(command_recognizer.Result())
+                        command_text += " " + res.get("text", "")
+                
+                # Letzten Rest auslesen
+                final_res = json.loads(command_recognizer.FinalResult())
+                command_text += " " + final_res.get("text", "")
+                command_text = command_text.strip()
+                
+                if command_text:
+                    print(f"🗣️ Erkannter Befehl: '{command_text}'")
+                    print("🧠 Übermittle an J.A.R.V.I.S. Gehirn...")
+                    
+                    # Rufe jarvis.py im virtuellen Environment auf und übergib den Befehl
+                    # (Wir nutzen hier Google Gemini oder was auch immer in deiner .env aktiv ist!)
+                    subprocess.run([
+                        "/home/meik/jarvis-ai/venv/bin/python3", 
+                        "/home/meik/jarvis-ai/jarvis.py", 
+                        "--voice-cmd", 
+                        command_text
+                    ])
+                else:
+                    print("🔇 Kein Befehl verstanden.")
+                
+                print("\n🎙️ Zurück im Standby. Lausche auf 'Jarvis'...")
+                wake_recognizer.Reset()
+                command_recognizer.Reset()
+EOF
+
+# Piper installieren
+wget https://github.com/rhasspy/piper/releases/download/v1.2.0/piper_amd64.tar.gz
+tar -xf piper_amd64.tar.gz
+rm piper_amd64.tar.gz
+
+# Das eigentliche Sprachmodell (.onnx)
+wget https://huggingface.co/rhasspy/piper-voices/resolve/main/de/de_DE/thorsten/high/de_DE-thorsten-high.onnx
+
+# Die dazugehörige Konfigurationsdatei (.json)
+wget https://huggingface.co/rhasspy/piper-voices/resolve/main/de/de_DE/thorsten/high/de_DE-thorsten-high.onnx.json
+
+
 echo "===================================================="
 echo "✅ Lokales Setup erfolgreich abgeschlossen!"
 echo "👉 1. Trage deine API-Keys in $JARVIS_DIR/config/.env ein."