Files
jarvis-ai-stts/app/src/main/java/com/example/jarvis_stts/MainActivity.kt

253 lines
9.7 KiB
Kotlin

package com.example.jarvis_stts
import android.Manifest
import android.content.Intent
import android.content.pm.PackageManager
import android.os.Bundle
import android.speech.RecognizerIntent
import android.speech.tts.TextToSpeech
import android.speech.tts.Voice
import android.util.Log
import android.view.View
import android.widget.*
import androidx.activity.result.contract.ActivityResultContracts
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import androidx.core.content.ContextCompat
import okhttp3.*
import java.util.Locale
import ai.picovoice.porcupine.PorcupineManager
import ai.picovoice.porcupine.PorcupineManagerCallback
import ai.picovoice.porcupine.Porcupine
class MainActivity : AppCompatActivity(), TextToSpeech.OnInitListener {
private lateinit var tts: TextToSpeech
private lateinit var tvStatus: TextView
private lateinit var etUrl: EditText
private lateinit var spinnerVoices: Spinner
private var porcupineManager: PorcupineManager? = null
private val client = OkHttpClient()
private var webSocket: WebSocket? = null
// Liste für die Stimmen-Objekte und deren Namen
private var availableVoices = mutableListOf<Voice>()
private var voiceNames = mutableListOf<String>()
private val speechRecognizerLauncher = registerForActivityResult(
ActivityResultContracts.StartActivityForResult()
) { result ->
if (result.resultCode == RESULT_OK && result.data != null) {
val spokenText = result.data!!.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS)?.get(0) ?: ""
tvStatus.text = "Ich: $spokenText"
webSocket?.send(spokenText)
}
}
private val speechRecognizerLauncher = registerForActivityResult(
ActivityResultContracts.StartActivityForResult()
) { result ->
// Wenn Google fertig ist, starten wir das Wake-word wieder
try {
porcupineManager?.start()
} catch (e: Exception) {
Log.e("JARVIS", "Neustart nach Spracheingabe fehlgeschlagen")
}
if (result.resultCode == RESULT_OK && result.data != null) {
val spokenText = result.data!!.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS)?.get(0) ?: ""
tvStatus.text = "Ich: $spokenText"
webSocket?.send(spokenText)
}
}
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_main)
// 1. UI-Elemente initialisieren
tvStatus = findViewById(R.id.tvStatus)
etUrl = findViewById(R.id.etUrl)
spinnerVoices = findViewById(R.id.spinnerVoices)
// Hier fehlten wahrscheinlich diese Definitionen:
val btnConnect = findViewById<Button>(R.id.btnConnect)
val btnSpeak = findViewById<Button>(R.id.btnSpeak)
tts = TextToSpeech(this, this)
// 2. URL aus dem Speicher laden (SharedPreferences)
val prefs = getSharedPreferences("JarvisPrefs", MODE_PRIVATE)
val savedUrl = prefs.getString("server_url", "")
if (!savedUrl.isNullOrEmpty()) {
etUrl.setText(savedUrl)
}
// 3. Click-Listener für den Verbinden-Button
btnConnect.setOnClickListener {
val url = etUrl.text.toString()
if (url.isNotEmpty()) {
// URL speichern
prefs.edit().putString("server_url", url).apply()
connectToServer(url)
} else {
Toast.makeText(this, "Bitte URL eingeben!", Toast.LENGTH_SHORT).show()
}
}
// 4. Click-Listener für den Sprechen-Button
btnSpeak.setOnClickListener { startVoiceInput() }
// 5. Mikrofon-Berechtigung prüfen
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
// App hat noch keine Erlaubnis -> Wir fragen den Nutzer
ActivityCompat.requestPermissions(this, arrayOf(Manifest.permission.RECORD_AUDIO), 1)
} else {
// App hat die Erlaubnis schon (z.B. beim zweiten App-Start) -> Direkt starten!
}
}
override fun onRequestPermissionsResult(requestCode: Int, permissions: Array<out String>, grantResults: IntArray) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
if (requestCode == 1) {
if (grantResults.isNotEmpty() && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
// Juhu, Mikrofon ist freigegeben! Jetzt darf Porcupine starten.
initWakeWord()
} else {
Toast.makeText(this, "Ohne Mikrofon-Erlaubnis funktioniert J.A.R.V.I.S. nicht!", Toast.LENGTH_LONG).show()
}
}
}
override fun onResume() {
super.onResume()
// Nur starten, wenn wir die Erlaubnis haben
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) == PackageManager.PERMISSION_GRANTED) {
// Falls der Manager schon läuft, stoppen wir ihn kurz, um ihn sauber neu zu starten
porcupineManager?.stop()
initWakeWord()
}
}
override fun onPause() {
super.onPause()
// Mikrofon pausieren, wenn die App nicht im Fokus ist, um den Fehler zu vermeiden
porcupineManager?.stop()
}
private fun connectToServer(url: String) {
webSocket?.close(1000, "Neuverbindung")
val request = Request.Builder().url(url).build()
webSocket = client.newWebSocket(request, object : WebSocketListener() {
override fun onOpen(webSocket: WebSocket, response: Response) {
runOnUiThread { tvStatus.text = "Verbunden!" }
}
override fun onMessage(webSocket: WebSocket, text: String) {
runOnUiThread { tvStatus.text = "J.A.R.V.I.S.: $text" }
speakOut(text)
}
override fun onFailure(webSocket: WebSocket, t: Throwable, response: Response?) {
runOnUiThread { tvStatus.text = "Fehler: ${t.message}" }
}
})
}
override fun onInit(status: Int) {
if (status == TextToSpeech.SUCCESS) {
tts.language = Locale.GERMAN
setupVoiceSpinner()
}
}
private fun setupVoiceSpinner() {
availableVoices.clear()
voiceNames.clear()
// Alle verfügbaren deutschen Stimmen finden
val allVoices = tts.voices
for (v in allVoices) {
if (v.locale.language == "de") {
availableVoices.add(v)
// Ein lesbarer Name für den Spinner
voiceNames.add("${v.name} (${if(v.isNetworkConnectionRequired) "Online" else "Lokal"})")
}
}
val adapter = ArrayAdapter(this, android.R.layout.simple_spinner_item, voiceNames)
adapter.setDropDownViewResource(android.R.layout.simple_spinner_dropdown_item)
runOnUiThread {
spinnerVoices.adapter = adapter
spinnerVoices.onItemSelectedListener = object : AdapterView.OnItemSelectedListener {
override fun onItemSelected(parent: AdapterView<*>?, view: View?, position: Int, id: Long) {
tts.voice = availableVoices[position]
Toast.makeText(this@MainActivity, "Stimme geändert", Toast.LENGTH_SHORT).show()
}
override fun onNothingSelected(parent: AdapterView<*>?) {}
}
}
}
private fun startVoiceInput() {
val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
putExtra(RecognizerIntent.EXTRA_LANGUAGE, "de-DE")
}
speechRecognizerLauncher.launch(intent)
}
private fun speakOut(text: String) {
tts.speak(text, TextToSpeech.QUEUE_FLUSH, null, "")
}
private fun initWakeWord() {
try {
val accessKey = "DEIN_KEY_HIER" // Bitte nochmal prüfen!
porcupineManager = PorcupineManager.Builder()
.setAccessKey(accessKey)
.setKeyword(Porcupine.BuiltInKeyword.JARVIS)
.build(applicationContext, PorcupineManagerCallback { keywordIndex ->
runOnUiThread {
// 1. Wake-word stoppen, damit Google das Mikrofon kriegt
porcupineManager?.stop()
// 2. Google-Eingabe starten
startVoiceInput()
}
})
porcupineManager?.start()
Log.d("JARVIS", "Porcupine erfolgreich gestartet")
} catch (e: Exception) {
// DAS HIER IST WICHTIG: Was genau ist der Fehler?
val errorMessage = when (e) {
is ai.picovoice.porcupine.PorcupineActivationException -> "Key ungültig oder Limit erreicht"
is ai.picovoice.porcupine.PorcupineActivationNetworkException -> "Keine Internetverbindung zur Key-Prüfung"
is ai.picovoice.porcupine.PorcupineInvalidArgumentException -> "Falsches Keyword oder Argument"
else -> e.message ?: "Unbekannter Fehler"
}
Log.e("JARVIS", "Detail-Fehler: $errorMessage")
runOnUiThread {
tvStatus.text = "Fehler: $errorMessage"
}
}
}
override fun onDestroy() {
// Wichtig: Beim Schließen der App aufräumen!
porcupineManager?.stop()
porcupineManager?.delete()
webSocket?.close(1000, "App Ende")
tts.stop()
tts.shutdown()
super.onDestroy()
}
}