+ wakeword.py
This commit is contained in:
101
wakeword.py
Normal file
101
wakeword.py
Normal file
@@ -0,0 +1,101 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import queue
|
||||
import time
|
||||
import subprocess
|
||||
import sounddevice as sd
|
||||
import numpy as np
|
||||
from vosk import Model, KaldiRecognizer
|
||||
from pathlib import Path
|
||||
|
||||
MODEL_PATH = "model"
|
||||
AUDIO_RATE = 48000
|
||||
LOCK_FILE = Path("/tmp/.jarvis_speaking")
|
||||
|
||||
if not os.path.exists(MODEL_PATH):
|
||||
print(f"❌ Modell-Ordner '{MODEL_PATH}' wurde nicht gefunden!")
|
||||
sys.exit(1)
|
||||
|
||||
audio_queue = queue.Queue()
|
||||
|
||||
def audio_callback(indata, frames, time, status):
|
||||
if status:
|
||||
print(status, file=sys.stderr)
|
||||
audio_queue.put(bytes(indata))
|
||||
|
||||
print("🧠 J.A.R.V.I.S. lädt das Sprachmodell...")
|
||||
model = Model(MODEL_PATH)
|
||||
|
||||
# Zwei Recognizer: Einer für das Wake-Word, einer für den eigentlichen Befehl (offen)
|
||||
wake_recognizer = KaldiRecognizer(model, AUDIO_RATE, '["jarvis", "[unk]"]')
|
||||
command_recognizer = KaldiRecognizer(model, AUDIO_RATE) # Sucht nach JEDEM deutschen Wort
|
||||
|
||||
print("🎙️ J.A.R.V.I.S. ist online und lauscht... (Sag 'Jarvis')")
|
||||
|
||||
with sd.RawInputStream(samplerate=AUDIO_RATE, blocksize=8000, dtype='int16',
|
||||
channels=1, callback=audio_callback):
|
||||
|
||||
while True:
|
||||
data = audio_queue.get()
|
||||
|
||||
# NEU: Wenn J.A.R.V.I.S. gerade spricht, leere die Queue und ignoriere das Audio
|
||||
if LOCK_FILE.exists():
|
||||
while not audio_queue.empty():
|
||||
audio_queue.get()
|
||||
wake_recognizer.Reset() # Verhindert, dass Bruchstücke von vorhin gespeichert bleiben
|
||||
continue
|
||||
|
||||
# Phase 1: Auf Wake-Word warten
|
||||
if wake_recognizer.AcceptWaveform(data):
|
||||
result = json.loads(wake_recognizer.Result())
|
||||
if "jarvis" in result.get("text", ""):
|
||||
print("\n⚡ [WAKEWORD DETECTED] Ja, Sir?")
|
||||
|
||||
# Bestätigungston abspielen
|
||||
# Kurzer, smarter Beep-Ton (800 Hz, 0.1 Sekunden)
|
||||
duration = 0.1
|
||||
frequency = 800.0
|
||||
t = np.linspace(0, duration, int(AUDIO_RATE * duration), endpoint=False)
|
||||
beep = np.sin(2 * np.pi * frequency * t) * 0.3 # 0.3 für angenehme Lautstärke
|
||||
sd.play(beep, samplerate=AUDIO_RATE)
|
||||
sd.wait()
|
||||
# Warteschlange leeren, um alten Ton nicht als Befehl zu interpretieren
|
||||
while not audio_queue.empty():
|
||||
audio_queue.get()
|
||||
|
||||
print("👂 Höre zu...")
|
||||
command_text = ""
|
||||
start_time = time.time()
|
||||
|
||||
# Phase 2: Für 4 Sekunden den darauffolgenden Befehl aufnehmen
|
||||
while time.time() - start_time < 4.0:
|
||||
cmd_data = audio_queue.get()
|
||||
if command_recognizer.AcceptWaveform(cmd_data):
|
||||
res = json.loads(command_recognizer.Result())
|
||||
command_text += " " + res.get("text", "")
|
||||
|
||||
# Letzten Rest auslesen
|
||||
final_res = json.loads(command_recognizer.FinalResult())
|
||||
command_text += " " + final_res.get("text", "")
|
||||
command_text = command_text.strip()
|
||||
|
||||
if command_text:
|
||||
print(f"🗣️ Erkannter Befehl: '{command_text}'")
|
||||
print("🧠 Übermittle an J.A.R.V.I.S. Gehirn...")
|
||||
|
||||
# Rufe jarvis.py im virtuellen Environment auf und übergib den Befehl
|
||||
# (Wir nutzen hier Google Gemini oder was auch immer in deiner .env aktiv ist!)
|
||||
subprocess.run([
|
||||
"/home/meik/jarvis-ai/venv/bin/python3",
|
||||
"/home/meik/jarvis-ai/jarvis.py",
|
||||
"--voice-cmd",
|
||||
command_text
|
||||
])
|
||||
else:
|
||||
print("🔇 Kein Befehl verstanden.")
|
||||
|
||||
print("\n🎙️ Zurück im Standby. Lausche auf 'Jarvis'...")
|
||||
wake_recognizer.Reset()
|
||||
command_recognizer.Reset()
|
||||
Reference in New Issue
Block a user