jarvis.py aktualisiert

2026-05-28 00:10:22 +00:00 · 2026-05-28 00:03:44 +00:00 · 2026-05-28 00:02:08 +00:00 · 2026-05-27 23:27:31 +00:00
2 changed files with 99 additions and 49 deletions
--- a/jarvis.py
+++ b/jarvis.py
@@ -5,6 +5,7 @@ import asyncio
 import openai
 import sys
 import subprocess
 import edge_tts
 from google import genai
 from google.genai import types
@@ -544,28 +545,42 @@ async def speak_to_user(text):
    print(text)
    print(f"{JARVIS_COLOR}{'-'*60}{RESET}\n")
-    clean_text = re.sub(r'[^\w\s\d.,!?-]', '', text)
+    """Generiert eine hochauflösende KI-Stimme via Edge-TTS und spielt sie ab."""
-    
+    if not text.strip():
-    piper_path = "/home/meik/jarvis-ai/piper/piper"
+        return
    model_path = "/home/meik/jarvis-ai/de_DE-thorsten-high.onnx"
    lock_file = Path("/tmp/.jarvis_speaking") # Die Sperr-Datei
-    if os.path.exists(piper_path) and os.path.exists(model_path):
+    # Definition der Stimme (Killian und Conrad sind hervorragende deutsche Männerstimmen)
-        try:
+    VOICE = "de-DE-KillianNeural" 
-            # 1. Sperre setzen
+    OUTPUT_FILE = "/tmp/jarvis_response.mp3"
-            lock_file.touch()
+    LOCK_FILE = "/tmp/.jarvis_speaking"
-            
+
-            piper_cmd = f"echo '{clean_text}' | {piper_path} --model {model_path} --output_raw | aplay -r 22050 -f S16_LE -t raw -D pipewire >/dev/null 2>&1"
+    try:
-            
+        # 1. Erstelle die Lock-Datei, damit das Mikrofon im Wakeword-Skript stummschaltet
-            proc = await asyncio.create_subprocess_shell(piper_cmd)
+        with open(LOCK_FILE, "w") as f:
-            await proc.wait()
+            f.write("1")
-            
+
-        except Exception as e:
+        print(f"🔊 J.A.R.V.I.S. spricht: {text}")
-            print(f"⚠️ TTS Fehler: {e}")
+
-        finally:
+        # 2. Audio aus der Cloud abrufen (Jetzt sauber mit direktem await!)
-            # 2. Sperre IMMER wieder aufheben, wenn Piper fertig ist
+        communicate = edge_tts.Communicate(text, VOICE)
-            if lock_file.exists():
+        await communicate.save(OUTPUT_FILE)
-                lock_file.unlink()
+
        # 3. Audio ressourcenschonend & asynchron abspielen
        proc = await asyncio.create_subprocess_exec(
            "mpv", "--no-video", OUTPUT_FILE,
            stdout=asyncio.subprocess.DEVNULL,
            stderr=asyncio.subprocess.DEVNULL
        )
        # Warten, bis mpv fertig gesprochen hat
        await proc.wait()
    except Exception as e:
        print(f"❌ Fehler bei der Sprachausgabe: {e}")
    finally:
        # 4. Lock-Datei IMMER löschen, damit J.A.R.V.I.S. wieder zuhört
        if os.path.exists(LOCK_FILE):
            os.remove(LOCK_FILE)
 # ====================================================
 # MAIN LOOP
--- a/wakeword.py
+++ b/wakeword.py
@@ -5,11 +5,21 @@ import json
 import queue
 import time
 import subprocess
-import colorama
+import wave
 import sounddevice as sd
 import numpy as np
 from vosk import Model, KaldiRecognizer
 from pathlib import Path
 from dotenv import load_dotenv
 from openai import OpenAI
 # ====================================================
 # PFADE & ENV SETUP (Aus config/.env lesen)
 # ====================================================
 BASE_DIR = Path(__file__).resolve().parent
 CONFIG_DIR = BASE_DIR / "config"
 ENV_FILE = CONFIG_DIR / ".env"
 load_dotenv(ENV_FILE)
 MODEL_PATH = "model"
 AUDIO_RATE = 48000  
@@ -19,6 +29,11 @@ if not os.path.exists(MODEL_PATH):
    print(f"❌ Modell-Ordner '{MODEL_PATH}' wurde nicht gefunden!")
    sys.exit(1)
 # OpenAI Client initialisieren
 if not os.getenv("OPENAI_API_KEY"):
    print("⚠️ Warnung: Kein OPENAI_API_KEY in der .env gefunden!")
 openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 audio_queue = queue.Queue()
 def audio_callback(indata, frames, time, status):
@@ -26,14 +41,13 @@ def audio_callback(indata, frames, time, status):
        print(status, file=sys.stderr)
    audio_queue.put(bytes(indata))
-print("🧠 J.A.R.V.I.S. lädt das Sprachmodell...")
+print("🧠 J.A.R.V.I.S. lädt das Sprachmodell für das Wake-Word...")
 model = Model(MODEL_PATH)
-# Zwei Recognizer: Einer für das Wake-Word, einer für den eigentlichen Befehl (offen)
+# Nur noch EIN Recognizer: Ausschließlich für das Wake-Word ("jarvis")
 wake_recognizer = KaldiRecognizer(model, AUDIO_RATE, '["jarvis", "[unk]"]')
 command_recognizer = KaldiRecognizer(model, AUDIO_RATE) # Sucht nach JEDEM deutschen Wort
-print("🎙️ J.A.R.V.I.S. ist online und lauscht... (Sag 'Jarvis')")
+print("🎙️ J.A.R.V.I.S. läuft im Hybrid-Modus (Vosk + Whisper) und lauscht... (Sag 'Jarvis')")
 with sd.RawInputStream(samplerate=AUDIO_RATE, blocksize=8000, dtype='int16',
                        channels=1, callback=audio_callback):
@@ -41,53 +55,75 @@ with sd.RawInputStream(samplerate=AUDIO_RATE, blocksize=8000, dtype='int16',
    while True:
        data = audio_queue.get()
-        # NEU: Wenn J.A.R.V.I.S. gerade spricht, leere die Queue und ignoriere das Audio
+        # Wenn J.A.R.V.I.S. gerade spricht, leere die Queue und ignoriere das Audio
        if LOCK_FILE.exists():
            while not audio_queue.empty():
                audio_queue.get()
-            wake_recognizer.Reset() # Verhindert, dass Bruchstücke von vorhin gespeichert bleiben
+            wake_recognizer.Reset() # Verhindert alte Bruchstücke
            continue
-        # Phase 1: Auf Wake-Word warten
+        # Phase 1: Auf Wake-Word warten (Lokal via Vosk)
        if wake_recognizer.AcceptWaveform(data):
            result = json.loads(wake_recognizer.Result())
            if "jarvis" in result.get("text", ""):
                print("\n⚡ [WAKEWORD DETECTED] Ja, Sir?")
-                # Bestätigungston abspielen
+                # Bestätigungston abspielen (800 Hz, 0.1 Sekunden)
                # Kurzer, smarter Beep-Ton (800 Hz, 0.1 Sekunden)
                duration = 0.1
                frequency = 800.0
                t = np.linspace(0, duration, int(AUDIO_RATE * duration), endpoint=False)
-                beep = np.sin(2 * np.pi * frequency * t) * 0.3 # 0.3 für angenehme Lautstärke
+                beep = np.sin(2 * np.pi * frequency * t) * 0.3
                sd.play(beep, samplerate=AUDIO_RATE)
                sd.wait()                
-                # Warteschlange leeren, um alten Ton nicht als Befehl zu interpretieren
+                
                # Warteschlange leeren, um den Beep nicht selbst aufzunehmen
                while not audio_queue.empty():
                    audio_queue.get()
-                print("👂 Höre zu...")
+                print("👂 Höre zu (Befehlsaufnahme)...")
-                command_text = ""
+                collected_chunks = []
                start_time = time.time()
-                # Phase 2: Für 4 Sekunden den darauffolgenden Befehl aufnehmen
+                # Phase 2: Für 4 Sekunden die Rohdaten aus dem Stream sammeln
                while time.time() - start_time < 4.0:
-                    cmd_data = audio_queue.get()
+                    try:
-                    if command_recognizer.AcceptWaveform(cmd_data):
+                        # Kurzer Timeout, damit die Schleife agil bleibt
-                        res = json.loads(command_recognizer.Result())
+                        cmd_data = audio_queue.get(timeout=0.2)
-                        command_text += " " + res.get("text", "")
+                        collected_chunks.append(cmd_data)
                    except queue.Empty:
                        continue
-                # Letzten Rest auslesen
+                print("🧠 Sende Audio an OpenAI Whisper API...")
                final_res = json.loads(command_recognizer.FinalResult())
                command_text += " " + final_res.get("text", "")
                command_text = command_text.strip()
                # Rohe Audio-Bytes zusammenfügen und als WAV speichern
                wav_path = "/tmp/jarvis_cmd.wav"
                all_bytes = b"".join(collected_chunks)
                try:
                    with wave.open(wav_path, "wb") as wf:
                        wf.setnchannels(1)
                        wf.setsampwidth(2) # int16 entspricht 2 Bytes
                        wf.setframerate(AUDIO_RATE)
                        wf.writeframes(all_bytes)
                    # Whisper API aufrufen
                    with open(wav_path, "rb") as audio_file:
                        transcription = openai_client.audio.transcriptions.create(
                            model="whisper-1", 
                            file=audio_file,
                            language="de"  # Erzwingt deutsche Texterkennung
                        )
                    command_text = transcription.text.strip()
                except Exception as e:
                    print(f"❌ Fehler bei der Spracherkennung: {e}")
                    command_text = ""
                # Phase 3: Befehl verarbeiten, falls Whisper etwas verstanden hat
                if command_text:
-                    print(f"🗣️ Erkannter Befehl: '{command_text}'")
+                    print(f"🗣️ Erkannt (Whisper): '{command_text}'")
                    print("🧠 Übermittle an J.A.R.V.I.S. Gehirn...")
                    # Rufe jarvis.py im virtuellen Environment auf und übergib den Befehl
                    # (Wir nutzen hier Google Gemini oder was auch immer in deiner .env aktiv ist!)
                    subprocess.run([
                        "venv/bin/python3", 
                        "jarvis.py", 
@@ -98,5 +134,4 @@ with sd.RawInputStream(samplerate=AUDIO_RATE, blocksize=8000, dtype='int16',
                    print("🔇 Kein Befehl verstanden.")
                print("\n🎙️ Zurück im Standby. Lausche auf 'Jarvis'...")
-                wake_recognizer.Reset()
+                wake_recognizer.Reset()
                command_recognizer.Reset()
Author	SHA1	Message	Date
info@pi-farm.de	b44bd350f6	jarvis.py aktualisiert	2026-05-28 00:10:22 +00:00
info@pi-farm.de	082c055683	jarvis.py aktualisiert	2026-05-28 00:03:44 +00:00
info@pi-farm.de	cd239fde3c	jarvis.py aktualisiert	2026-05-28 00:02:08 +00:00
info@pi-farm.de	095d3edc03	wakeword.py aktualisiert	2026-05-27 23:27:31 +00:00