app/src/main/java/com/example/jarvis_stts/MainActivity.kt aktualisiert

This commit is contained in:
2026-03-11 15:27:14 +00:00
parent 209571a5d1
commit fd012f767c

View File

@@ -41,10 +41,26 @@ class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnIn
private var availableVoices = mutableListOf<Voice>() private var availableVoices = mutableListOf<Voice>()
private var voiceNames = mutableListOf<String>() private var voiceNames = mutableListOf<String>()
// Launcher für Google Spracherkennung
private val speechRecognizerLauncher = registerForActivityResult(
ActivityResultContracts.StartActivityForResult()
) { result ->
// WICHTIG: Nach der Google-Eingabe starten wir Vosk wieder
if (result.resultCode == RESULT_OK && result.data != null) {
val spokenText = result.data!!.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS)?.get(0) ?: ""
tvStatus.text = "Ich: $spokenText"
webSocket?.send(spokenText)
}
// Vosk wieder starten, nachdem Google fertig ist
startVosk()
}
override fun onCreate(savedInstanceState: Bundle?) { override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState) super.onCreate(savedInstanceState)
setContentView(R.layout.activity_main) setContentView(R.layout.activity_main)
// 1. UI initialisieren
tvStatus = findViewById(R.id.tvStatus) tvStatus = findViewById(R.id.tvStatus)
etUrl = findViewById(R.id.etUrl) etUrl = findViewById(R.id.etUrl)
spinnerVoices = findViewById(R.id.spinnerVoices) spinnerVoices = findViewById(R.id.spinnerVoices)
@@ -53,6 +69,7 @@ class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnIn
tts = TextToSpeech(this, this) tts = TextToSpeech(this, this)
// 2. SharedPreferences (Server URL laden)
val prefs = getSharedPreferences("JarvisPrefs", MODE_PRIVATE) val prefs = getSharedPreferences("JarvisPrefs", MODE_PRIVATE)
etUrl.setText(prefs.getString("server_url", "")) etUrl.setText(prefs.getString("server_url", ""))
@@ -65,10 +82,15 @@ class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnIn
} }
btnSpeak.setOnClickListener { btnSpeak.setOnClickListener {
voskService?.stop() // Vosk stoppen, wenn man manuell klickt voskService?.stop() // Stoppe Wake-Word, wenn man manuell klickt
startVoiceInput() startVoiceInput()
} }
// 3. Berechtigungen prüfen & Modell laden
checkPermissionsAndInit()
}
private fun checkPermissionsAndInit() {
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) { if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
ActivityCompat.requestPermissions(this, arrayOf(Manifest.permission.RECORD_AUDIO), 1) ActivityCompat.requestPermissions(this, arrayOf(Manifest.permission.RECORD_AUDIO), 1)
} else { } else {
@@ -76,7 +98,6 @@ class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnIn
} }
} }
// KORREKTUR: Damit Jarvis sofort nach der Erlaubnis startet
override fun onRequestPermissionsResult(requestCode: Int, permissions: Array<out String>, grantResults: IntArray) { override fun onRequestPermissionsResult(requestCode: Int, permissions: Array<out String>, grantResults: IntArray) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults) super.onRequestPermissionsResult(requestCode, permissions, grantResults)
if (requestCode == 1 && grantResults.isNotEmpty() && grantResults[0] == PackageManager.PERMISSION_GRANTED) { if (requestCode == 1 && grantResults.isNotEmpty() && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
@@ -85,18 +106,16 @@ class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnIn
} }
private fun initVoskModel() { private fun initVoskModel() {
// Der erste Name "model-de" MUSS exakt dem Ordnernamen in 'assets' entsprechen! // "model-de" ist der Ordner in assets. "model" ist der Zielordner auf dem Handy.
StorageService.unpack(this, "model-de", "model", StorageService.unpack(this, "model-de", "model",
{ model: Model -> { model: Model ->
Log.d("JARVIS", "Vosk Modell erfolgreich geladen!")
voskModel = model voskModel = model
Log.d("JARVIS", "Modell erfolgreich geladen!")
startVosk() startVosk()
}, },
{ exception: IOException -> { exception: IOException ->
Log.e("JARVIS", "Vosk Fehler beim Entpacken: ${exception.message}") Log.e("JARVIS", "Vosk Entpack-Fehler: ${exception.message}")
runOnUiThread { runOnUiThread { tvStatus.text = "Fehler: Modell nicht gefunden" }
Toast.makeText(this, "Modell Fehler: ${exception.message}", Toast.LENGTH_LONG).show()
}
} }
) )
} }
@@ -104,50 +123,44 @@ class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnIn
private fun startVosk() { private fun startVosk() {
try { try {
if (voskModel == null) return if (voskModel == null) return
// Falls noch ein alter Service läuft, sicherheitshalber stoppen
// Alten Service sicherheitshalber beenden
voskService?.stop() voskService?.stop()
voskService?.shutdown() voskService?.shutdown()
// Wir horchen auf "computer" und "jarvis".
val rec = Recognizer(voskModel, 16000.0f, "[\"computer\", \"jarvis\", \"[unk]\"]") val rec = Recognizer(voskModel, 16000.0f, "[\"computer\", \"jarvis\", \"[unk]\"]")
voskService = SpeechService(rec, 16000.0f) voskService = SpeechService(rec, 16000.0f)
voskService?.startListening(this) voskService?.startListening(this)
runOnUiThread { tvStatus.text = "Bereit (Warte auf 'Computer')" }
runOnUiThread { tvStatus.text = "Bereit (Warte auf 'Jarvis' oder 'Computer')" }
} catch (e: Exception) { } catch (e: Exception) {
Log.e("JARVIS", "Vosk Start Fehler: ${e.message}") Log.e("JARVIS", "Vosk Start Fehler: ${e.message}")
} }
} }
// 1. Hilfsfunktion zum sauberen Filtern des Wortes // --- Vosk RecognitionListener ---
private fun extractText(json: String): String {
return json.substringAfter(": \"").substringBefore("\"")
}
override fun onPartialResult(hypothesis: String) { override fun onPartialResult(hypothesis: String) {
val recognizedText = extractText(hypothesis) val recognizedText = extractText(hypothesis)
Log.d("JARVIS", "Vosk hört: $recognizedText") Log.d("JARVIS", "Vosk hört: $recognizedText")
// KORREKTUR: ignoreCase hinzugefügt für mehr Sicherheit // Wake-Word Check
if (recognizedText.contains("computer", true) || recognizedText.contains("jarvis", true)) { if (recognizedText.contains("computer", true) || recognizedText.contains("jarvis", true)) {
voskService?.stop() Log.d("JARVIS", "Wake-Word erkannt!")
voskService?.stop() // Stoppen, um Mikrofon für Google freizugeben
startVoiceInput() startVoiceInput()
} }
} }
// 2. Im SpeechRecognizerLauncher private fun extractText(json: String): String {
private val speechRecognizerLauncher = registerForActivityResult( // Hilft, den Text aus dem JSON {"partial" : "..."} zu ziehen
ActivityResultContracts.StartActivityForResult() return json.substringAfter(": \"").substringBefore("\"")
) { result -> }
if (result.resultCode == RESULT_OK && result.data != null) {
val spokenText = result.data!!.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS)?.get(0) ?: "" override fun onResult(hypothesis: String) {
tvStatus.text = "Ich: $spokenText" // Hier könnte man das finale Wort prüfen, falls Partial nicht reicht
webSocket?.send(spokenText)
}
// KORREKTUR: Einfach die Funktion aufrufen, nicht über voskService
startVosk()
} }
override fun onResult(hypothesis: String) {}
override fun onFinalResult(hypothesis: String) {} override fun onFinalResult(hypothesis: String) {}
override fun onError(e: Exception) { Log.e("JARVIS", "Vosk Error: ${e.message}") } override fun onError(e: Exception) { Log.e("JARVIS", "Vosk Error: ${e.message}") }
override fun onTimeout() {} override fun onTimeout() {}
@@ -157,8 +170,14 @@ class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnIn
val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply { val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM) putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
putExtra(RecognizerIntent.EXTRA_LANGUAGE, "de-DE") putExtra(RecognizerIntent.EXTRA_LANGUAGE, "de-DE")
putExtra(RecognizerIntent.EXTRA_PROMPT, "Ich höre...")
}
try {
speechRecognizerLauncher.launch(intent)
} catch (e: Exception) {
Toast.makeText(this, "Google Spracheingabe nicht verfügbar", Toast.LENGTH_SHORT).show()
startVosk() // Falls Google scheitert, Vosk wieder an
} }
speechRecognizerLauncher.launch(intent)
} }
private fun connectToServer(url: String) { private fun connectToServer(url: String) {
@@ -203,7 +222,12 @@ class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnIn
} }
private fun speakOut(text: String) { private fun speakOut(text: String) {
tts.speak(text, TextToSpeech.QUEUE_FLUSH, null, "") // Wir können hier Vosk stoppen, damit Jarvis sich nicht selbst hört
voskService?.stop()
tts.speak(text, TextToSpeech.QUEUE_FLUSH, null, "TTS_DONE")
// Nach dem Sprechen müssen wir Vosk wieder starten.
// Das machen wir am besten über einen Listener (siehe unten).
} }
override fun onDestroy() { override fun onDestroy() {