137 lines
5.2 KiB
Python
137 lines
5.2 KiB
Python
#!/usr/bin/env python3
|
|
import os
|
|
import sys
|
|
import json
|
|
import queue
|
|
import time
|
|
import subprocess
|
|
import wave
|
|
import sounddevice as sd
|
|
import numpy as np
|
|
from vosk import Model, KaldiRecognizer
|
|
from pathlib import Path
|
|
from dotenv import load_dotenv
|
|
from openai import OpenAI
|
|
|
|
# ====================================================
|
|
# PFADE & ENV SETUP (Aus config/.env lesen)
|
|
# ====================================================
|
|
BASE_DIR = Path(__file__).resolve().parent
|
|
CONFIG_DIR = BASE_DIR / "config"
|
|
ENV_FILE = CONFIG_DIR / ".env"
|
|
load_dotenv(ENV_FILE)
|
|
|
|
MODEL_PATH = "model"
|
|
AUDIO_RATE = 48000
|
|
LOCK_FILE = Path("/tmp/.jarvis_speaking")
|
|
|
|
if not os.path.exists(MODEL_PATH):
|
|
print(f"❌ Modell-Ordner '{MODEL_PATH}' wurde nicht gefunden!")
|
|
sys.exit(1)
|
|
|
|
# OpenAI Client initialisieren
|
|
if not os.getenv("OPENAI_API_KEY"):
|
|
print("⚠️ Warnung: Kein OPENAI_API_KEY in der .env gefunden!")
|
|
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
|
|
audio_queue = queue.Queue()
|
|
|
|
def audio_callback(indata, frames, time, status):
|
|
if status:
|
|
print(status, file=sys.stderr)
|
|
audio_queue.put(bytes(indata))
|
|
|
|
print("🧠 J.A.R.V.I.S. lädt das Sprachmodell für das Wake-Word...")
|
|
model = Model(MODEL_PATH)
|
|
|
|
# Nur noch EIN Recognizer: Ausschließlich für das Wake-Word ("jarvis")
|
|
wake_recognizer = KaldiRecognizer(model, AUDIO_RATE, '["jarvis", "[unk]"]')
|
|
|
|
print("🎙️ J.A.R.V.I.S. läuft im Hybrid-Modus (Vosk + Whisper) und lauscht... (Sag 'Jarvis')")
|
|
|
|
with sd.RawInputStream(samplerate=AUDIO_RATE, blocksize=8000, dtype='int16',
|
|
channels=1, callback=audio_callback):
|
|
|
|
while True:
|
|
data = audio_queue.get()
|
|
|
|
# Wenn J.A.R.V.I.S. gerade spricht, leere die Queue und ignoriere das Audio
|
|
if LOCK_FILE.exists():
|
|
while not audio_queue.empty():
|
|
audio_queue.get()
|
|
wake_recognizer.Reset() # Verhindert alte Bruchstücke
|
|
continue
|
|
|
|
# Phase 1: Auf Wake-Word warten (Lokal via Vosk)
|
|
if wake_recognizer.AcceptWaveform(data):
|
|
result = json.loads(wake_recognizer.Result())
|
|
if "jarvis" in result.get("text", ""):
|
|
print("\n⚡ [WAKEWORD DETECTED] Ja, Sir?")
|
|
|
|
# Bestätigungston abspielen (800 Hz, 0.1 Sekunden)
|
|
duration = 0.1
|
|
frequency = 800.0
|
|
t = np.linspace(0, duration, int(AUDIO_RATE * duration), endpoint=False)
|
|
beep = np.sin(2 * np.pi * frequency * t) * 0.3
|
|
sd.play(beep, samplerate=AUDIO_RATE)
|
|
sd.wait()
|
|
|
|
# Warteschlange leeren, um den Beep nicht selbst aufzunehmen
|
|
while not audio_queue.empty():
|
|
audio_queue.get()
|
|
|
|
print("👂 Höre zu (Befehlsaufnahme)...")
|
|
collected_chunks = []
|
|
start_time = time.time()
|
|
|
|
# Phase 2: Für 4 Sekunden die Rohdaten aus dem Stream sammeln
|
|
while time.time() - start_time < 4.0:
|
|
try:
|
|
# Kurzer Timeout, damit die Schleife agil bleibt
|
|
cmd_data = audio_queue.get(timeout=0.2)
|
|
collected_chunks.append(cmd_data)
|
|
except queue.Empty:
|
|
continue
|
|
|
|
print("🧠 Sende Audio an OpenAI Whisper API...")
|
|
|
|
# Rohe Audio-Bytes zusammenfügen und als WAV speichern
|
|
wav_path = "/tmp/jarvis_cmd.wav"
|
|
all_bytes = b"".join(collected_chunks)
|
|
|
|
try:
|
|
with wave.open(wav_path, "wb") as wf:
|
|
wf.setnchannels(1)
|
|
wf.setsampwidth(2) # int16 entspricht 2 Bytes
|
|
wf.setframerate(AUDIO_RATE)
|
|
wf.writeframes(all_bytes)
|
|
|
|
# Whisper API aufrufen
|
|
with open(wav_path, "rb") as audio_file:
|
|
transcription = openai_client.audio.transcriptions.create(
|
|
model="whisper-1",
|
|
file=audio_file,
|
|
language="de" # Erzwingt deutsche Texterkennung
|
|
)
|
|
command_text = transcription.text.strip()
|
|
|
|
except Exception as e:
|
|
print(f"❌ Fehler bei der Spracherkennung: {e}")
|
|
command_text = ""
|
|
|
|
# Phase 3: Befehl verarbeiten, falls Whisper etwas verstanden hat
|
|
if command_text:
|
|
print(f"🗣️ Erkannt (Whisper): '{command_text}'")
|
|
print("🧠 Übermittle an J.A.R.V.I.S. Gehirn...")
|
|
|
|
subprocess.run([
|
|
"venv/bin/python3",
|
|
"jarvis.py",
|
|
"--voice-cmd",
|
|
command_text
|
|
])
|
|
else:
|
|
print("🔇 Kein Befehl verstanden.")
|
|
|
|
print("\n🎙️ Zurück im Standby. Lausche auf 'Jarvis'...")
|
|
wake_recognizer.Reset() |