jarvis.py aktualisiert

2026-05-28 00:10:22 +00:00 · 2026-05-28 00:03:44 +00:00 · 2026-05-28 00:02:08 +00:00 · 2026-05-27 23:27:31 +00:00 · 2026-05-27 23:24:41 +00:00 · 2026-05-27 23:18:37 +00:00
7 changed files with 544 additions and 1338 deletions
--- a/jarvis.py
+++ b/jarvis.py
@@ -3,6 +3,9 @@ import re
 import sqlite3
 import asyncio
 import openai
+import sys
+import subprocess
+import edge_tts

 from google import genai
 from google.genai import types
@@ -143,6 +146,37 @@ def get_db():
    conn.row_factory = sqlite3.Row
    return conn

+# ====================================================
+# DYNAMISCHE PROGRAMM-ERKENNUNG (NEU)
+# ====================================================
+
+def get_installed_gui_apps():
+    """Scannt das System nach installierten GUI-Programmen und deren Befehlen."""
+    apps_dir = Path("/usr/share/applications")
+    detected_apps = {}
+    
+    if apps_dir.exists():
+        for desktop_file in apps_dir.glob("*.desktop"):
+            try:
+                # Wir lesen die .desktop Datei aus
+                content = desktop_file.read_text(encoding="utf-8", errors="ignore")
+                
+                # Suchen nach Name und Exec-Befehl
+                name_match = re.search(r"^Name=(.+)$", content, re.M)
+                exec_match = re.search(r"^Exec=([^ \n%]+)", content, re.M) # Nur den reinen Befehl ohne Argumente (%U etc.)
+                
+                if name_match and exec_match:
+                    app_name = name_match.group(1).strip()
+                    app_cmd = exec_match.group(1).strip()
+                    
+                    # Ignoriere Core-Systemkram, der Meik nur nerven würde
+                    if not any(x in app_cmd.lower() for x in ["debian", "im-config", "openjdk", "systemd"]):
+                        detected_apps[app_name] = app_cmd
+            except Exception:
+                continue
+                
+    return detected_apps
+

 # ====================================================
 # SYSTEM PROMPT
@@ -163,26 +197,29 @@ def get_system_prompt():
    prompt = prompt.replace("{notes_file}", str(NOTES_FILE))
    prompt = prompt.replace("{todo_file}", str(TODO_FILE))

+    # --- DYNAMISCHE PROGRAMME INJIZIEREN ---
+    installed_apps = get_installed_gui_apps()
+    apps_prompt_string = "VERFÜGBARE LOKALE DESKTOP-PROGRAMME (Nutze NUR diese Befehe zum Starten!):\n"
+    for app_name, app_cmd in installed_apps.items():
+        apps_prompt_string += f"- {app_name}: Befehl lautet '{app_cmd}'\n"
+        
+    # Wir hängen die Liste einfach an den Prompt an oder ersetzen einen Platzhalter
+    if "{installed_apps}" in prompt:
+        prompt = prompt.replace("{installed_apps}", apps_prompt_string)
+    else:
+        prompt += "\n\n" + apps_prompt_string
+    # ---------------------------------------
+
    conn = get_db()
-
-    nodes = conn.execute(
-        'SELECT * FROM nodes'
-    ).fetchall()
-
+    nodes = conn.execute('SELECT * FROM nodes').fetchall()
    conn.close()

    node_info = ""
-
    for n in nodes:
-        node_info += (
-            f"- Name: {n['name']}, "
-            f"IP: {n['ip']}, "
-            f"User: {n['user']}\n"
-        )
+        node_info += f"- Name: {n['name']}, IP: {n['ip']}, User: {n['user']}\n"

    return prompt.replace("{node_info}", node_info)

-
 # ====================================================
 # KI KOMMUNIKATION
 # ====================================================
@@ -306,21 +343,8 @@ async def run_task(target, cmd):
        # GUI APPS ERKENNEN
        # ========================================

-        gui_apps = [
-            "firefox",
-            "thunderbird",
-            "chromium",
-            "google-chrome",
-            "code",
-            "nautilus",
-            "pcmanfm",
-            "gedit",
-            "vlc",
-            "discord",
-            "steam",
-            "obs",
-            "spotify"
-        ]
+        # Holt sich alle bekannten System-GUI-Befehle dynamisch
+        gui_apps = list(get_installed_gui_apps().values())

        first_word = cmd.strip().split()[0]

@@ -448,49 +472,34 @@ async def run_task(target, cmd):

            collected_output = []

-            while True:
+            # ========================================
+            # KRISENFESTES AUSLESEN MIT TIMEOUT
+            # ========================================
+            try:
+                while True:
+                    # Warte maximal 2 Sekunden auf die nächste Zeile
+                    line = await asyncio.wait_for(proc.stdout.readline(), timeout=2.0)
+                    
+                    if not line:
+                        break

-                line = await proc.stdout.readline()
+                    decoded = line.decode("utf-8", errors="ignore").rstrip()
+                    collected_output.append(decoded)

-                if not line:
-                    break
+                    print(f"{OUTPUT_COLOR}│ {decoded}{RESET}")
+            except asyncio.TimeoutError:
+                # Falls das Tool die Pipe offen hält, lesen wir einfach nicht weiter
+                print(f"{SYSTEM_COLOR}⏳ Ausgabe-Stream stagniert. Erzwinge Prozess-Check...{RESET}")

-                decoded = line.decode(
-                    "utf-8",
-                    errors="ignore"
-                ).rstrip()
-
-                collected_output.append(decoded)
-
-                print(
-                    f"{OUTPUT_COLOR}"
-                    f"│ {decoded}"
-                    f"{RESET}"
-                )
-
-            await proc.wait()
+            # Maximal 2 Sekunden auf das offizielle Ende des Prozesses warten
+            try:
+                await asyncio.wait_for(proc.wait(), timeout=2.0)
+            except asyncio.TimeoutError:
+                print(f"{ERROR_COLOR}⚠️ Prozess reagiert nicht. Setze Ablauf trotzdem fort.{RESET}")
+                # Optional: proc.terminate() falls du ihn hart killen willst

            print()

-            if proc.returncode == 0:
-
-                print(
-                    f"{JARVIS_COLOR}"
-                    f"✅ TASK ERFOLGREICH"
-                    f"{RESET}\n"
-                )
-
-            else:
-
-                print(
-                    f"{ERROR_COLOR}"
-                    f"❌ FEHLER CODE: "
-                    f"{proc.returncode}"
-                    f"{RESET}\n"
-                )
-
-            return "\n".join(collected_output)
-
    except Exception as e:

        err = f"❌ Fehler: {e}"
@@ -531,27 +540,47 @@ async def listen_to_user():
 # ====================================================

 async def speak_to_user(text):
-
-    print(
-        f"\n{JARVIS_COLOR}"
-        f"🤖 J.A.R.V.I.S."
-        f"{RESET}"
-    )
-
-    print(
-        f"{JARVIS_COLOR}"
-        f"{'-'*60}"
-        f"{RESET}"
-    )
-
+    print(f"\n{JARVIS_COLOR}🤖 J.A.R.V.I.S.{RESET}")
+    print(f"{JARVIS_COLOR}{'-'*60}{RESET}")
    print(text)
+    print(f"{JARVIS_COLOR}{'-'*60}{RESET}\n")

-    print(
-        f"{JARVIS_COLOR}"
-        f"{'-'*60}"
-        f"{RESET}\n"
-    )
+    """Generiert eine hochauflösende KI-Stimme via Edge-TTS und spielt sie ab."""
+    if not text.strip():
+        return

+    # Definition der Stimme (Killian und Conrad sind hervorragende deutsche Männerstimmen)
+    VOICE = "de-DE-KillianNeural" 
+    OUTPUT_FILE = "/tmp/jarvis_response.mp3"
+    LOCK_FILE = "/tmp/.jarvis_speaking"
+
+    try:
+        # 1. Erstelle die Lock-Datei, damit das Mikrofon im Wakeword-Skript stummschaltet
+        with open(LOCK_FILE, "w") as f:
+            f.write("1")
+
+        print(f"🔊 J.A.R.V.I.S. spricht: {text}")
+
+        # 2. Audio aus der Cloud abrufen (Jetzt sauber mit direktem await!)
+        communicate = edge_tts.Communicate(text, VOICE)
+        await communicate.save(OUTPUT_FILE)
+
+        # 3. Audio ressourcenschonend & asynchron abspielen
+        proc = await asyncio.create_subprocess_exec(
+            "mpv", "--no-video", OUTPUT_FILE,
+            stdout=asyncio.subprocess.DEVNULL,
+            stderr=asyncio.subprocess.DEVNULL
+        )
+        # Warten, bis mpv fertig gesprochen hat
+        await proc.wait()
+
+    except Exception as e:
+        print(f"❌ Fehler bei der Sprachausgabe: {e}")
+    
+    finally:
+        # 4. Lock-Datei IMMER löschen, damit J.A.R.V.I.S. wieder zuhört
+        if os.path.exists(LOCK_FILE):
+            os.remove(LOCK_FILE)

 # ====================================================
 # MAIN LOOP
@@ -681,25 +710,14 @@ async def main_chat_loop():
            target = target.strip()
            cmd = cmd.strip()

-            # ========================================
-            # SICHTBARE SYSTEMAKTION
-            # ========================================
-
            action_msg = (
                f"⚙️ Ich führe jetzt folgenden Befehl "
                f"auf [{target}] aus:\n\n"
                f"{cmd}"
            )

-            await speak_to_user(action_msg)
-
-            chat_history.append({
-                "role": "assistant",
-                "content": action_msg,
-                "timestamp": now
-            })
-            
-            # Optional: Aktion auch ins Log
+            # ÄNDERUNG: Auch hier nur im Terminal anzeigen, NICHT vorlesen!
+            print(f"\n{SYSTEM_COLOR}{action_msg}{RESET}\n")
            log_to_file("SYSTEM", action_msg)

            # ========================================
@@ -716,33 +734,26 @@ async def main_chat_loop():
            # ========================================

            if output:
-
                output_msg = (
                    f"💻 Ergebnis der Ausführung "
                    f"auf [{target}]:\n\n"
                    f"{output}"
                )
-
            else:
-
                output_msg = (
                    f"✅ Befehl auf [{target}] "
                    f"erfolgreich abgeschlossen."
                )

+            # Das hier wird weiterhin laut vorgelesen!
            await speak_to_user(output_msg)

-            sys_now = datetime.now().strftime(
-                "%d.%m.%Y %H:%M"
-            )
-
+            sys_now = datetime.now().strftime("%d.%m.%Y %H:%M")
            chat_history.append({
                "role": "assistant",
                "content": output_msg,
                "timestamp": sys_now
            })
-            
-            # LOG: System Output hier schreiben!
            log_to_file("SYSTEM", output_msg)

        # ============================================
@@ -753,20 +764,77 @@ async def main_chat_loop():
            chat_history = chat_history[-20:]


+
+# ====================================================
+# EINZEL-BEFEHL MODUS (Für das Sprachskript)
+# ====================================================
+async def run_single_command(command_text):
+    """Verarbeitet einen einzelnen Befehl von außen und beendet sich wieder."""
+    init_db()
+    system_prompt = get_system_prompt()
+    
+    # Wir tun so, als käme die Eingabe aus dem Chat-History-Verlauf
+    now = datetime.now().strftime("%d.%m.%Y %H:%M")
+    chat_history = [{
+        "role": "user",
+        "content": command_text,
+        "timestamp": now
+    }]
+    
+    log_to_file("Voice-Input", command_text)
+    
+    ai_response = await get_ai_response(
+        command_text,
+        system_prompt,
+        chat_history
+    )
+    
+    if ai_response is None:
+        return
+
+    # EXECUTE-Tags suchen und ausführen
+    commands = []
+    execute_matches = re.finditer(
+        r'<EXECUTE[^>]*?(?:target="(.*?)")?[^>]*>(.*?)</EXECUTE>',
+        ai_response,
+        re.I | re.S
+    )
+
+    for match in execute_matches:
+        target = match.group(1) or "localhost"
+        cmd = match.group(2).strip()
+        cmd = re.sub(r'^```[a-zA-Z]*\n?', '', cmd)
+        cmd = re.sub(r'\n?```$', '', cmd)
+        commands.append((target.strip(), cmd.strip()))
+
+    clean_msg = re.sub(r'<EXECUTE[^>]*?>.*?</EXECUTE>', '', ai_response, flags=re.I | re.S).strip()
+    
+    if clean_msg:
+        await speak_to_user(clean_msg)
+        log_to_file("J.A.R.V.I.S.", clean_msg)
+
+    if commands:
+        for target, cmd in commands:
+            action_msg = f"⚙️ Führe Sprachbefehl auf [{target}] aus:\n{cmd}"
+            
+            # ÄNDERUNG: Nur im Terminal anzeigen, NICHT vorlesen!
+            print(f"\n{SYSTEM_COLOR}{action_msg}{RESET}\n")
+            log_to_file("SYSTEM", action_msg)
+            
+            # Befehl im Hintergrund ausführen
+            await run_task(target, cmd)
+
 # ====================================================
 # START
 # ====================================================
-
 if __name__ == "__main__":
-
    try:
-
-        asyncio.run(main_chat_loop())
-
+        # Wenn Argumente übergeben wurden (z.B. python3 jarvis.py --voice-cmd "...")
+        if len(sys.argv) > 2 and sys.argv[1] == "--voice-cmd":
+            command_text = sys.argv[2]
+            asyncio.run(run_single_command(command_text))
+        else:
+            # Normaler Terminal-Modus
+            asyncio.run(main_chat_loop())
    except KeyboardInterrupt:
-
-        print(
-            f"\n{ERROR_COLOR}"
-            f"⛔ J.A.R.V.I.S. hart beendet."
-            f"{RESET}"
-        )
+        print(f"\n{ERROR_COLOR}⛔ J.A.R.V.I.S. hart beendet.{RESET}")
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,7 @@
 openai
 google-genai
 python-dotenv
+vosk
+sounddevice
+numpy
+colorama
--- a/setup.sh
+++ b/setup.sh
@@ -1,77 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "=========================================="
-echo "🤖 J.A.R.V.I.S. Terminal - Setup"
-echo "=========================================="
-
-# 1. Virtual Environment
-echo "--- Richte Python-Umgebung ein..."
-if [ ! -d "venv" ]; then
-    python3 -m venv venv
-fi
-
-./venv/bin/pip install --upgrade pip
-if [ -f "requirements.txt" ]; then
-    ./venv/bin/pip install -r requirements.txt
-fi
-
-# 2. Ordnerstruktur
-mkdir -p config data workspace
-
-# 3. .env Setup
-ENV_FILE="config/.env"
-if [ ! -f "$ENV_FILE" ]; then
-    echo "--- Erstelle .env Konfiguration..."
-    read -p "Dein Name (Standard: Meik): " web_user
-    read -p "Primäre KI (google, openai, nvidia, ollama) [nvidia]: " ai_prov
-    read -p "NVIDIA API Key: " nvidia_key
-    read -p "Google API Key: " google_key
-    
-    cat <<EOF > "$ENV_FILE"
-WEB_USER_NAME=${web_user:-Meik}
-AI_PROVIDER=${ai_prov:-nvidia}
-GOOGLE_API_KEY=$google_key
-NVIDIA_API_KEY=$nvidia_key
-NVIDIA_MODEL=moonshotai/kimi-k2.5
-GOOGLE_MODEL=gemini-2.5-flash
-OLLAMA_BASE_URL=http://127.0.0.1:11434/v1
-OLLAMA_MODEL=llama3
-EOF
-    echo "✅ .env erstellt."
-fi
-
-# 4. System Prompt kopieren
-PROMPT_FILE="config/system_prompt.txt"
-if [ ! -f "$PROMPT_FILE" ]; then
-    echo "--- Erstelle Standard-Prompt..."
-    cat << 'EOF' > "$PROMPT_FILE"
-Dein Name ist JARVIS.
-Du bist ein präziser KI-Assistent für die Cluster-Verwaltung.
-WICHTIGSTE REGEL: Deine Sprache ist locker, technisch versiert und du verwendest NIEMALS die Höflichkeitsform "Sie". Wir sind per Du.
-
-DEIN WORKSPACE (GEDÄCHTNIS):
-Du hast Zugriff auf ein eigenes Arbeitsverzeichnis auf dem Host-System (localhost), um dir Notizen zu machen oder Todos für {user_name} zu speichern:
- Arbeitsverzeichnis: {workspace_dir}
- Notizen-Datei: {notes_file}
- Todo-Liste: {todo_file}
-
-Du kannst diese Dateien lesen oder beschreiben. Nutze dazu normale Shell-Befehle (z.B. cat, echo "text" >> datei) mit dem Ziel "localhost":
-<EXECUTE target="localhost">befehl</EXECUTE>
-
-PROTOKOLL FÜR BEFEHLE (2 Phasen):
-PHASE 1 (Vorschlag): Wenn {user_name} eine Aktion anfordert, erstelle NUR einen Text-Vorschlag.
- Beschreibe kurz, was du tun würdest. Nenne den Befehl als normalen Text.
- Frage explizit nach Erlaubnis: "Soll ich das ausführen, {user_name}?"
-PHASE 2 (Ausführung): NUR wenn {user_name} die Aktion bestätigt, gibst du den Befehl im XML-Format aus:
-<EXECUTE target="IP_ODER_LOCALHOST">befehl</EXECUTE>
-
-Bekannte Nodes: 
-{node_info}
-EOF
-    echo "✅ system_prompt.txt erstellt."
-fi
-
-echo "=========================================="
-echo "✅ Setup abgeschlossen! "
-echo "=========================================="
--- a/setup_wayland_jarvis.sh
+++ b/setup_wayland_jarvis.sh
--- a/setup_x11_jarvis.sh
+++ b/setup_x11_jarvis.sh
@@ -0,0 +1,212 @@
+#!/bin/bash
+
+# Abbrechen bei Fehlern
+set -e
+
+echo "===================================================="
+echo "🚀 Starte J.A.R.V.I.S. Desktop OS - X11 Openbox Setup"
+echo "===================================================="
+
+# Benutzererkennung
+REAL_USER="${SUDO_USER:-$(logname 2>/dev/null || whoami)}"
+REAL_HOME=$(getent passwd "$REAL_USER" | cut -d: -f6)
+
+if [ -z "$REAL_USER" ] || [ "$REAL_USER" = "root" ]; then
+    REAL_USER=$(id -nu 1000 2>/dev/null || echo "meik")
+    REAL_HOME=$(getent passwd "$REAL_USER" | cut -d: -f6)
+fi
+
+JARVIS_DIR="$REAL_HOME/jarvis-ai"
+
+# 1. System aktualisieren & X11 / Openbox Pakete installieren
+echo "📦 Installiere X11-Server, Openbox und Automatisierungstools..."
+sudo apt update
+sudo apt install -y \
+    xserver-xorg \
+    xinit \
+    x11-xserver-utils \
+    openbox \
+    tint2 \
+    rofi \
+    xdotool \
+    wmctrl \
+    firefox-esr \
+    curl \
+    wget \
+    git \
+    sudo \
+    python3 \
+    python3-pip \
+    python3-venv \
+    tilix \
+    geany \
+    fonts-noto-color-emoji \
+    pipewire \
+    pipewire-audio-client-libraries \
+    pipewire-pulse \
+    wireplumber \
+    alsa-utils \
+    libasound2-dev \
+    libportaudio2 \
+    unzip
+
+# 1.1 Gruppenrechte für Audio und Grafik setzen
+echo "👥 Setze Gruppenrechte für '$REAL_USER'..."
+sudo usermod -aG video,render,input,audio "$REAL_USER"
+
+# 2. Openbox Konfigurationsverzeichnis erstellen
+echo "⚙️ Konfiguriere Openbox für '$REAL_USER'..."
+mkdir -p "$REAL_HOME/.config/openbox"
+
+# 3. Autostart-Datei für Openbox erstellen
+cat << EOF > "$REAL_HOME/.config/openbox/autostart"
+# Tint2 Taskleiste im Hintergrund starten
+tint2 &
+# Deutsches Tastaturlayout
+setxkbmap de &
+# PipeWire Audio-Server starten
+gentle_pipewire_start() {
+    systemctl --user start pipewire pipewire-pulse wireplumber
+}
+gentle_pipewire_start &
+
+# Falls ein Hintergrundbild gewünscht ist (optional, benötigt 'feh'):
+# feh --bg-scale /pfad/zum/bild.jpg &
+
+# J.A.R.V.I.S.-AI starten
+tilix -e "$JARVIS_DIR/start.sh" &
+EOF
+
+chmod +x "$REAL_HOME/.config/openbox/autostart"
+
+# 4. .xinitrc erstellen (ermöglicht den Start via 'startx' aus der Konsole)
+cat << EOF > "$REAL_HOME/.xinitrc"
+#!/bin/sh
+# X11 Umgebungsvariablen setzen falls nötig
+export QT_QPA_PLATFORM=xcb
+export GDK_BACKEND=x11
+
+# Openbox Session starten
+exec openbox-session
+EOF
+
+chmod +x "$REAL_HOME/.xinitrc"
+
+# Eigentumsrechte korrigieren
+chown -R "$REAL_USER:$REAL_USER" "$REAL_HOME/.config" "$REAL_HOME/.xinitrc"
+
+# J.A.R.V.I.S. .env Template (Leerzeichen bei [ ] und EOF korrigiert)
+if [ ! -f "$JARVIS_DIR/config/.env" ]; then
+    echo "📝 Erstelle .env Konfigurationsdatei..."
+    # Stelle sicher, dass der config-Ordner existiert
+    mkdir -p "$JARVIS_DIR/config"
+    
+cat << EOF > "$JARVIS_DIR/config/.env"
+WEB_USER_NAME=$REAL_USER
+AI_PROVIDER=nvdia
+OPENAI_API_KEY=dein-openai-key
+OPENAI_MODEL=
+GOOGLE_API_KEY=dein-google-key
+NVIDIA_API_KEY=dein-nvidia-key
+NVIDIA_MODEL=moonshotai/kimi-k2.6
+GOOGLE_MODEL=gemini-2.5-flash
+OLLAMA_BASE_URL=http://127.0.0.1:11434/v1
+OLLAMA_MODEL=llama3
+GROQ_API_KEY=dein-groq-key
+GROQ_MODEL=groq/compound
+EOF
+fi
+	
+# J.A.R.V.I.S. Optimierter System-Prompt inkl. wdotool-Handbuch
+cat << 'EOF' > "$JARVIS_DIR/config/system_prompt.txt"
+Du bist J.A.R.V.I.S., ein KI-Systemassistent, der direkt auf einem Debian X11-Desktop (Openbox) läuft. Du hast vollen lokalen Zugriff auf das System.
+UMGEBUNG & GEDÄCHTNIS:
+Arbeitsverzeichnis: {workspace_dir}
+Notizen: {notes_file}
+Todos: {todo_file}
+DESKTOP STEUERUNG & FENSTER-MANAGEMENT:
+Du steuerst die grafische Oberfläche (X11/Openbox) über native Befehlszeilen-Tools wie wmctrl und xdotool.
+Programme & Fenster verwalten
+Du steuerst Fenster direkt über Konsolenbefehle.
+Folgende Aktionen sind erlaubt:
+start (Startet ein Programm in den Hintergrund): firefox &
+activate (Holt ein Fenster in den Vordergrund): wmctrl -a "Firefox" (Sucht nach dem Namen im Titel)
+close (Schließt das Fenster sanft): wmctrl -c "Firefox"
+maximize (Maximiert das Fenster): wmctrl -r "Firefox" -b add,maximized_vert,maximized_horz
+positionieren/snappen: Nutze wmctrl mit dem Schalter -e (Format: gravity,X,Y,Width,Height).
+Beispiele:
+wmctrl -r "Firefox" -b add,maximized_vert,maximized_horz (Maximiert Firefox)
+wmctrl -a "Terminal" (Holt das Terminal in den Vordergrund)
+Regel für Multitasking-Fenster: Wenn der User mehrere Instanzen derselben App starten möchte, starte sie im Hintergrund, warte kurz, hole das aktive Fenster mit xdotool und passe es an.
+Beispiel-Verkettung für das System:
+firefox & sleep 2 && wmctrl -r "Mozilla Firefox" -e 0,0,0,960,1080 && firefox & sleep 2 && xdotool getactivewindow windowmove 960 0 windowsize 960 1080
+Tastatur & Maus (Tool: xdotool)
+Tastatur: xdotool key ctrl+l, xdotool key alt+Tab, xdotool type "Hallo"
+Maus: xdotool mousemove 500 400 (absolut), xdotool click 1 (1=links, 3=rechts)
+WICHTIG: Wenn du Tasten an ein Programm senden willst, sorge IMMER dafür, dass es vorher den Fokus hat (z.B. wmctrl -a "Firefox" && xdotool key F5).
+System- & Display-Infos
+Bildschirmauflösung ermitteln: xrandr | grep '*' | awk '{print $1}'
+{installed_apps}
+WICHTIGE REGELN FÜR DIE AUSFÜHRUNG VON BEFEHLEN:
+Das Backend-System wertet deine Befehle NUR aus, wenn sie exakt in XML-Tags eingeschlossen sind. Du darfst Systembefehle unter keinen Umständen als einfachen Text oder in Markdown-Codeblöcken (```) ausgeben!
+
+RICHTIG: <EXECUTE>firefox &</EXECUTE>
+FALSCH: ```bash firefox & ```
+
+SPEZIALREGEL FÜR WEBSEITEN & BROWSER:
+Navigiere NIEMALS mittels xdotool (ctrl+l) zu einer Webseite! Das ist zu fehleranfällig. Übergib die URL stattdessen IMMER direkt als Argument an den firefox-Befehl. Das öffnet die Seite direkt (oder in einem neuen Tab, falls Firefox bereits läuft).
+RICHTIG: <EXECUTE>firefox "google.de" &</EXECUTE>
+FALSCH: <EXECUTE>firefox & sleep 2 && xdotool key ctrl+l ...</EXECUTE>
+
+RICHTIGE VERKETTUNG (für andere Anwendungen):
+Wenn du mehrere Befehle verknüpfen musst (z.B. App starten und danach pflegen), nutze && innerhalb eines EINZIGEN <EXECUTE>-Blocks.
+Beispiel:
+<EXECUTE>tilix & sleep 1 && wmctrl -a "Tilix"</EXECUTE>
+
+WICHTIGE REGELN FÜR TEXTE IN EDITOREN:
+Wenn du Texte für den Nutzer in einem Editor wie Geany erstellen sollst, nutze immer temporäre Dateien.
+RICHTIG:
+<EXECUTE>cat << 'EOF' > /tmp/notiz.txt
+Dein generierter Text...
+'EOF'
+geany /tmp/notiz.txt &</EXECUTE>
+Antworte immer mit einem kurzen, lockeren Bestätigungssatz, was du tust, gefolgt von dem <EXECUTE>-Block.
+Du duzt {user_name} konsequent, dein Tonfall ist locker und technisch versiert.
+EOF
+
+
+# 5. Virtuelle Python-Umgebung einrichten (falls noch nicht geschehen)
+
+echo "🐍 Richte virtuelles Python-Environment ein..."
+mkdir -p "$JARVIS_DIR"
+python3 -m venv "$JARVIS_DIR/venv"
+"$JARVIS_DIR/venv/bin/pip" install --upgrade pip
+"$JARVIS_DIR/venv/bin/pip" install -r requirements.txt
+
+####################################
+# Voice setup
+####################################
+	
+# Modell herunterladen
+wget https://alphacephei.com/vosk/models/vosk-model-small-de-0.15.zip
+# Entpacken
+unzip vosk-model-small-de-0.15.zip
+# Ordner umbenennen, damit das Skript ihn leicht findet
+mv vosk-model-small-de-0.15 model
+rm vosk-model-small-de-0.15.zip
+	
+# Piper installieren
+wget https://github.com/rhasspy/piper/releases/download/v1.2.0/piper_amd64.tar.gz
+tar -xf piper_amd64.tar.gz
+rm piper_amd64.tar.gz
+	
+# Das eigentliche Sprachmodell (.onnx)
+wget https://huggingface.co/rhasspy/piper-voices/resolve/main/de/de_DE/thorsten/high/de_DE-thorsten-high.onnx
+	
+# Die dazugehörige Konfigurationsdatei (.json)
+wget https://huggingface.co/rhasspy/piper-voices/resolve/main/de/de_DE/thorsten/high/de_DE-thorsten-high.onnx.json
+
+echo "===================================================="
+echo "✅ X11 Openbox-Umgebung erfolgreich eingerichtet!"
+echo "👉 Starte die grafische Oberfläche einfach mit dem Befehl: startx"
+echo "===================================================="
--- a/start.sh
+++ b/start.sh
@@ -1,4 +1,12 @@
 #!/bin/bash
 cd "$(dirname "$0")"
 source venv/bin/activate
+
+# WICHTIG: Fängt Strg+C ab und beendet alle verknüpften Hintergrundprozesse sauber
+trap 'echo -e "\n🛑 Beende alle J.A.R.V.I.S. Systeme..."; kill 0' EXIT
+
+echo "🎙️ Starte Wake-Word-Engine im Hintergrund..."
+python3 wakeword.py &
+
+echo "💬 Starte Chat-Interface..."
 python3 jarvis.py
--- a/wakeword.py
+++ b/wakeword.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+import os
+import sys
+import json
+import queue
+import time
+import subprocess
+import wave
+import sounddevice as sd
+import numpy as np
+from vosk import Model, KaldiRecognizer
+from pathlib import Path
+from dotenv import load_dotenv
+from openai import OpenAI
+
+# ====================================================
+# PFADE & ENV SETUP (Aus config/.env lesen)
+# ====================================================
+BASE_DIR = Path(__file__).resolve().parent
+CONFIG_DIR = BASE_DIR / "config"
+ENV_FILE = CONFIG_DIR / ".env"
+load_dotenv(ENV_FILE)
+
+MODEL_PATH = "model"
+AUDIO_RATE = 48000  
+LOCK_FILE = Path("/tmp/.jarvis_speaking")
+
+if not os.path.exists(MODEL_PATH):
+    print(f"❌ Modell-Ordner '{MODEL_PATH}' wurde nicht gefunden!")
+    sys.exit(1)
+
+# OpenAI Client initialisieren
+if not os.getenv("OPENAI_API_KEY"):
+    print("⚠️ Warnung: Kein OPENAI_API_KEY in der .env gefunden!")
+openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
+audio_queue = queue.Queue()
+
+def audio_callback(indata, frames, time, status):
+    if status:
+        print(status, file=sys.stderr)
+    audio_queue.put(bytes(indata))
+
+print("🧠 J.A.R.V.I.S. lädt das Sprachmodell für das Wake-Word...")
+model = Model(MODEL_PATH)
+
+# Nur noch EIN Recognizer: Ausschließlich für das Wake-Word ("jarvis")
+wake_recognizer = KaldiRecognizer(model, AUDIO_RATE, '["jarvis", "[unk]"]')
+
+print("🎙️ J.A.R.V.I.S. läuft im Hybrid-Modus (Vosk + Whisper) und lauscht... (Sag 'Jarvis')")
+
+with sd.RawInputStream(samplerate=AUDIO_RATE, blocksize=8000, dtype='int16',
+                        channels=1, callback=audio_callback):
+    
+    while True:
+        data = audio_queue.get()
+        
+        # Wenn J.A.R.V.I.S. gerade spricht, leere die Queue und ignoriere das Audio
+        if LOCK_FILE.exists():
+            while not audio_queue.empty():
+                audio_queue.get()
+            wake_recognizer.Reset() # Verhindert alte Bruchstücke
+            continue
+
+        # Phase 1: Auf Wake-Word warten (Lokal via Vosk)
+        if wake_recognizer.AcceptWaveform(data):
+            result = json.loads(wake_recognizer.Result())
+            if "jarvis" in result.get("text", ""):
+                print("\n⚡ [WAKEWORD DETECTED] Ja, Sir?")
+                
+                # Bestätigungston abspielen (800 Hz, 0.1 Sekunden)
+                duration = 0.1
+                frequency = 800.0
+                t = np.linspace(0, duration, int(AUDIO_RATE * duration), endpoint=False)
+                beep = np.sin(2 * np.pi * frequency * t) * 0.3
+                sd.play(beep, samplerate=AUDIO_RATE)
+                sd.wait()                
+                
+                # Warteschlange leeren, um den Beep nicht selbst aufzunehmen
+                while not audio_queue.empty():
+                    audio_queue.get()
+                
+                print("👂 Höre zu (Befehlsaufnahme)...")
+                collected_chunks = []
+                start_time = time.time()
+                
+                # Phase 2: Für 4 Sekunden die Rohdaten aus dem Stream sammeln
+                while time.time() - start_time < 4.0:
+                    try:
+                        # Kurzer Timeout, damit die Schleife agil bleibt
+                        cmd_data = audio_queue.get(timeout=0.2)
+                        collected_chunks.append(cmd_data)
+                    except queue.Empty:
+                        continue
+                
+                print("🧠 Sende Audio an OpenAI Whisper API...")
+                
+                # Rohe Audio-Bytes zusammenfügen und als WAV speichern
+                wav_path = "/tmp/jarvis_cmd.wav"
+                all_bytes = b"".join(collected_chunks)
+                
+                try:
+                    with wave.open(wav_path, "wb") as wf:
+                        wf.setnchannels(1)
+                        wf.setsampwidth(2) # int16 entspricht 2 Bytes
+                        wf.setframerate(AUDIO_RATE)
+                        wf.writeframes(all_bytes)
+                    
+                    # Whisper API aufrufen
+                    with open(wav_path, "rb") as audio_file:
+                        transcription = openai_client.audio.transcriptions.create(
+                            model="whisper-1", 
+                            file=audio_file,
+                            language="de"  # Erzwingt deutsche Texterkennung
+                        )
+                    command_text = transcription.text.strip()
+                    
+                except Exception as e:
+                    print(f"❌ Fehler bei der Spracherkennung: {e}")
+                    command_text = ""
+                
+                # Phase 3: Befehl verarbeiten, falls Whisper etwas verstanden hat
+                if command_text:
+                    print(f"🗣️ Erkannt (Whisper): '{command_text}'")
+                    print("🧠 Übermittle an J.A.R.V.I.S. Gehirn...")
+                    
+                    subprocess.run([
+                        "venv/bin/python3", 
+                        "jarvis.py", 
+                        "--voice-cmd", 
+                        command_text
+                    ])
+                else:
+                    print("🔇 Kein Befehl verstanden.")
+                
+                print("\n🎙️ Zurück im Standby. Lausche auf 'Jarvis'...")
+                wake_recognizer.Reset()
Author	SHA1	Message	Date
info@pi-farm.de	b44bd350f6	jarvis.py aktualisiert	2026-05-28 00:10:22 +00:00
info@pi-farm.de	082c055683	jarvis.py aktualisiert	2026-05-28 00:03:44 +00:00
info@pi-farm.de	cd239fde3c	jarvis.py aktualisiert	2026-05-28 00:02:08 +00:00
info@pi-farm.de	095d3edc03	wakeword.py aktualisiert	2026-05-27 23:27:31 +00:00
info@pi-farm.de	091a553452	wakeword.py aktualisiert	2026-05-27 23:24:41 +00:00
info@pi-farm.de	6703aa2082	setup_x11_jarvis.sh aktualisiert	2026-05-27 23:18:37 +00:00
info@pi-farm.de	0c4e377fa3	setup_x11_jarvis.sh aktualisiert	2026-05-27 23:15:35 +00:00
info@pi-farm.de	4eabde994d	setup_x11_jarvis.sh aktualisiert	2026-05-27 23:10:17 +00:00
info@pi-farm.de	c069c3d464	setup_x11_jarvis.sh aktualisiert	2026-05-27 22:56:43 +00:00
info@pi-farm.de	27fa7ed96f	setup_x11_jarvis.sh aktualisiert	2026-05-27 22:53:18 +00:00
info@pi-farm.de	ab0b8921c1	setup_x11_jarvis.sh aktualisiert	2026-05-27 22:45:32 +00:00
info@pi-farm.de	019815b5b1	setup_x11_jarvis.sh aktualisiert	2026-05-27 22:16:49 +00:00
info@pi-farm.de	baeca38fcf	setup_x11_jarvis.sh aktualisiert	2026-05-27 07:28:51 +00:00
info@pi-farm.de	de37178f7a	setup_x11_jarvis.sh aktualisiert	2026-05-27 07:15:27 +00:00
info@pi-farm.de	87fabe2489	+ system_prompt & .env	2026-05-27 06:52:51 +00:00
info@pi-farm.de	fa90f7b1e6	setup_x11_jarvis.sh aktualisiert	2026-05-26 23:14:49 +00:00
info@pi-farm.de	b83de316b3	wakeword.py aktualisiert	2026-05-26 22:53:11 +00:00
info@pi-farm.de	0578808fc9	requirements.txt aktualisiert	2026-05-26 22:51:50 +00:00
info@pi-farm.de	01319a2b8c	setup_x11_jarvis.sh aktualisiert	2026-05-26 22:49:49 +00:00
info@pi-farm.de	849ad23c64	setup_x11_jarvis.sh	2026-05-26 22:30:16 +00:00
info@pi-farm.de	be29a86f1b	setup_wayland_jarvis.sh aktualisiert	2026-05-26 22:18:48 +00:00
pi-farm	feb648d035	+ wakeword.py	2026-05-26 23:54:22 +02:00
info@pi-farm.de	3275c1ee69	setup_wayland_jarvis.sh aktualisiert	2026-05-26 20:57:27 +00:00
info@pi-farm.de	6a513768e1	setup.sh gelöscht	2026-05-26 20:33:34 +00:00
info@pi-farm.de	ddf0ac1211	shortcuts rc.xml	2026-05-26 20:33:06 +00:00
info@pi-farm.de	e11d55c077	starte chat + voice	2026-05-26 20:23:55 +00:00
pi-farm	16aa40492c	+ jarvis-voice	2026-05-26 22:17:14 +02:00
info@pi-farm.de	b9d198497d	colorama	2026-05-26 10:13:44 +00:00