app/src/main/java/com/example/jarvis_stts/MainActivity.kt aktualisiert

This commit is contained in:
2026-03-11 12:13:05 +00:00
parent 423409cbc2
commit cd78b0fb93

View File

@@ -15,42 +15,38 @@ import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat import androidx.core.app.ActivityCompat
import androidx.core.content.ContextCompat import androidx.core.content.ContextCompat
import okhttp3.* import okhttp3.*
import java.util.Locale
//import ai.picovoice.porcupine.PorcupineManager
//import ai.picovoice.porcupine.PorcupineManagerCallback
//import ai.picovoice.porcupine.Porcupine
import org.vosk.Model import org.vosk.Model
import org.vosk.Recognizer import org.vosk.Recognizer
import org.vosk.android.RecognitionListener
import org.vosk.android.SpeechService import org.vosk.android.SpeechService
import org.vosk.android.StorageService import org.vosk.android.StorageService
import org.vosk.android.RecognitionLi import java.io.IOException
import java.util.Locale
class MainActivity : AppCompatActivity(), RecognitionListener { class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnInitListener {
// UI Elemente
private lateinit var tvStatus: TextView
private lateinit var etUrl: EditText
private lateinit var spinnerVoices: Spinner
private lateinit var tts: TextToSpeech
// Vosk & Netzwerk
private var voskService: SpeechService? = null private var voskService: SpeechService? = null
private var voskModel: Model? = null private var voskModel: Model? = null
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
initVosk()
}
private val client = OkHttpClient() private val client = OkHttpClient()
private var webSocket: WebSocket? = null private var webSocket: WebSocket? = null
// Liste für die Stimmen-Objekte und deren Namen // TTS Stimmen
private var availableVoices = mutableListOf<Voice>() private var availableVoices = mutableListOf<Voice>()
private var voiceNames = mutableListOf<String>() private var voiceNames = mutableListOf<String>()
// Launcher für Google Spracherkennung
private val speechRecognizerLauncher = registerForActivityResult( private val speechRecognizerLauncher = registerForActivityResult(
ActivityResultContracts.StartActivityForResult() ActivityResultContracts.StartActivityForResult()
) { result -> ) { result ->
// Wenn Google fertig ist, starten wir das Wake-word wieder // Nach der Google-Eingabe starten wir Vosk wieder
try { startVosk()
porcupineManager?.start()
} catch (e: Exception) {
Log.e("JARVIS", "Neustart nach Spracheingabe fehlgeschlagen: ${e.message}")
}
if (result.resultCode == RESULT_OK && result.data != null) { if (result.resultCode == RESULT_OK && result.data != null) {
val spokenText = result.data!!.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS)?.get(0) ?: "" val spokenText = result.data!!.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS)?.get(0) ?: ""
@@ -59,7 +55,42 @@ class MainActivity : AppCompatActivity(), RecognitionListener {
} }
} }
private fun initVosk() { override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_main)
// 1. UI initialisieren
tvStatus = findViewById(R.id.tvStatus)
etUrl = findViewById(R.id.etUrl)
spinnerVoices = findViewById(R.id.spinnerVoices)
val btnConnect = findViewById<Button>(R.id.btnConnect)
val btnSpeak = findViewById<Button>(R.id.btnSpeak)
tts = TextToSpeech(this, this)
// 2. SharedPreferences
val prefs = getSharedPreferences("JarvisPrefs", MODE_PRIVATE)
etUrl.setText(prefs.getString("server_url", ""))
btnConnect.setOnClickListener {
val url = etUrl.text.toString()
if (url.isNotEmpty()) {
prefs.edit().putString("server_url", url).apply()
connectToServer(url)
}
}
btnSpeak.setOnClickListener { startVoiceInput() }
// 3. Vosk Modell laden & Berechtigungen
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
ActivityCompat.requestPermissions(this, arrayOf(Manifest.permission.RECORD_AUDIO), 1)
} else {
initVoskModel()
}
}
private fun initVoskModel() {
StorageService.unpack(this, "model-de", "model", StorageService.unpack(this, "model-de", "model",
{ model: Model -> { model: Model ->
voskModel = model voskModel = model
@@ -73,125 +104,51 @@ class MainActivity : AppCompatActivity(), RecognitionListener {
private fun startVosk() { private fun startVosk() {
try { try {
// "jarvis" als Hotword im Recognizer definieren if (voskModel == null) return
// Wir horchen auf "jarvis". [unk] lässt unbekannte Wörter zu.
val rec = Recognizer(voskModel, 16000.0f, "[\"jarvis\", \"[unk]\"]") val rec = Recognizer(voskModel, 16000.0f, "[\"jarvis\", \"[unk]\"]")
voskService = SpeechService(rec, 16000.0f) voskService = SpeechService(rec, 16000.0f)
voskService?.startListening(this) voskService?.startListening(this)
tvStatus.text = "Bereit (Vosk)" runOnUiThread { tvStatus.text = "Bereit (Warte auf 'Jarvis')" }
} catch (e: Exception) { } catch (e: Exception) {
Log.e("JARVIS", "Vosk Start Fehler: ${e.message}") Log.e("JARVIS", "Vosk Start Fehler: ${e.message}")
} }
} }
// --- RecognitionListener Methoden --- // --- Vosk RecognitionListener ---
override fun onPartialResult(hypothesis: String) { override fun onPartialResult(hypothesis: String) {
// Hier prüfen wir, ob "jarvis" im Teil-Ergebnis steht // Vosk liefert JSON, z.B. {"partial" : "jarvis"}
if (hypothesis.contains("jarvis")) { if (hypothesis.contains("jarvis", ignoreCase = true)) {
voskService?.stop() // Kurz Pause machen voskService?.stop() // Mikrofon für Google frei machen
startSpeechRecognition() // Dein Google STT starten startVoiceInput()
} }
} }
override fun onResult(hypothesis: String) { } override fun onResult(hypothesis: String) {}
override fun onFinalResult(hypothesis: String) { } override fun onFinalResult(hypothesis: String) {}
override fun onError(exception: Exception) { } override fun onError(e: Exception) { Log.e("JARVIS", "Vosk Error: ${e.message}") }
override fun onTimeout() { } override fun onTimeout() {}
// --- Google STT & TTS ---
override fun onCreate(savedInstanceState: Bundle?) { private fun startVoiceInput() {
super.onCreate(savedInstanceState) val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
setContentView(R.layout.activity_main) putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
putExtra(RecognizerIntent.EXTRA_LANGUAGE, "de-DE")
// 1. UI-Elemente initialisieren
tvStatus = findViewById(R.id.tvStatus)
etUrl = findViewById(R.id.etUrl)
spinnerVoices = findViewById(R.id.spinnerVoices)
// Hier fehlten wahrscheinlich diese Definitionen:
val btnConnect = findViewById<Button>(R.id.btnConnect)
val btnSpeak = findViewById<Button>(R.id.btnSpeak)
tts = TextToSpeech(this, this)
// 2. URL aus dem Speicher laden (SharedPreferences)
val prefs = getSharedPreferences("JarvisPrefs", MODE_PRIVATE)
val savedUrl = prefs.getString("server_url", "")
if (!savedUrl.isNullOrEmpty()) {
etUrl.setText(savedUrl)
} }
speechRecognizerLauncher.launch(intent)
// 3. Click-Listener für den Verbinden-Button
btnConnect.setOnClickListener {
val url = etUrl.text.toString()
if (url.isNotEmpty()) {
// URL speichern
prefs.edit().putString("server_url", url).apply()
connectToServer(url)
} else {
Toast.makeText(this, "Bitte URL eingeben!", Toast.LENGTH_SHORT).show()
}
}
// 4. Click-Listener für den Sprechen-Button
btnSpeak.setOnClickListener { startVoiceInput() }
// 5. Mikrofon-Berechtigung prüfen
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
// App hat noch keine Erlaubnis -> Wir fragen den Nutzer
ActivityCompat.requestPermissions(this, arrayOf(Manifest.permission.RECORD_AUDIO), 1)
} else {
// App hat die Erlaubnis schon (z.B. beim zweiten App-Start) -> Direkt starten!
}
}
override fun onRequestPermissionsResult(requestCode: Int, permissions: Array<out String>, grantResults: IntArray) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
if (requestCode == 1) {
if (grantResults.isNotEmpty() && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
// Juhu, Mikrofon ist freigegeben! Jetzt darf Porcupine starten.
initWakeWord()
} else {
Toast.makeText(this, "Ohne Mikrofon-Erlaubnis funktioniert J.A.R.V.I.S. nicht!", Toast.LENGTH_LONG).show()
}
}
}
override fun onResume() {
super.onResume()
// Nur starten, wenn wir die Erlaubnis haben
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) == PackageManager.PERMISSION_GRANTED) {
// Falls der Manager schon läuft, stoppen wir ihn kurz, um ihn sauber neu zu starten
porcupineManager?.stop()
initWakeWord()
}
}
override fun onPause() {
super.onPause()
// Mikrofon pausieren, wenn die App nicht im Fokus ist, um den Fehler zu vermeiden
porcupineManager?.stop()
} }
private fun connectToServer(url: String) { private fun connectToServer(url: String) {
webSocket?.close(1000, "Neuverbindung") webSocket?.close(1000, "Neuverbindung")
val request = Request.Builder().url(url).build() val request = Request.Builder().url(url).build()
webSocket = client.newWebSocket(request, object : WebSocketListener() { webSocket = client.newWebSocket(request, object : WebSocketListener() {
override fun onOpen(webSocket: WebSocket, response: Response) { override fun onOpen(webSocket: WebSocket, response: Response) {
runOnUiThread { tvStatus.text = "Verbunden!" } runOnUiThread { tvStatus.text = "Verbunden!" }
} }
override fun onMessage(webSocket: WebSocket, text: String) { override fun onMessage(webSocket: WebSocket, text: String) {
runOnUiThread { tvStatus.text = "J.A.R.V.I.S.: $text" } runOnUiThread { tvStatus.text = "J.A.R.V.I.S.: $text" }
speakOut(text) speakOut(text)
} }
override fun onFailure(webSocket: WebSocket, t: Throwable, response: Response?) {
runOnUiThread { tvStatus.text = "Fehler: ${t.message}" }
}
}) })
} }
@@ -205,84 +162,31 @@ class MainActivity : AppCompatActivity(), RecognitionListener {
private fun setupVoiceSpinner() { private fun setupVoiceSpinner() {
availableVoices.clear() availableVoices.clear()
voiceNames.clear() voiceNames.clear()
for (v in tts.voices) {
// Alle verfügbaren deutschen Stimmen finden
val allVoices = tts.voices
for (v in allVoices) {
if (v.locale.language == "de") { if (v.locale.language == "de") {
availableVoices.add(v) availableVoices.add(v)
// Ein lesbarer Name für den Spinner
voiceNames.add("${v.name} (${if(v.isNetworkConnectionRequired) "Online" else "Lokal"})") voiceNames.add("${v.name} (${if(v.isNetworkConnectionRequired) "Online" else "Lokal"})")
} }
} }
val adapter = ArrayAdapter(this, android.R.layout.simple_spinner_item, voiceNames) val adapter = ArrayAdapter(this, android.R.layout.simple_spinner_item, voiceNames)
adapter.setDropDownViewResource(android.R.layout.simple_spinner_dropdown_item) adapter.setDropDownViewResource(android.R.layout.simple_spinner_dropdown_item)
runOnUiThread {
spinnerVoices.adapter = adapter spinnerVoices.adapter = adapter
spinnerVoices.onItemSelectedListener = object : AdapterView.OnItemSelectedListener { spinnerVoices.onItemSelectedListener = object : AdapterView.OnItemSelectedListener {
override fun onItemSelected(parent: AdapterView<*>?, view: View?, position: Int, id: Long) { override fun onItemSelected(p0: AdapterView<*>?, p1: View?, pos: Int, p3: Long) {
tts.voice = availableVoices[position] tts.voice = availableVoices[pos]
Toast.makeText(this@MainActivity, "Stimme geändert", Toast.LENGTH_SHORT).show()
} }
override fun onNothingSelected(parent: AdapterView<*>?) {} override fun onNothingSelected(p0: AdapterView<*>?) {}
} }
} }
}
private fun startVoiceInput() {
val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
putExtra(RecognizerIntent.EXTRA_LANGUAGE, "de-DE")
}
speechRecognizerLauncher.launch(intent)
}
private fun speakOut(text: String) { private fun speakOut(text: String) {
tts.speak(text, TextToSpeech.QUEUE_FLUSH, null, "") tts.speak(text, TextToSpeech.QUEUE_FLUSH, null, "")
} }
private fun initWakeWord() {
try {
val accessKey = "DEIN_KEY_HIER" // Bitte nochmal prüfen!
porcupineManager = PorcupineManager.Builder()
.setAccessKey(accessKey)
.setKeyword(Porcupine.BuiltInKeyword.JARVIS)
.build(applicationContext, PorcupineManagerCallback { keywordIndex ->
runOnUiThread {
// 1. Wake-word stoppen, damit Google das Mikrofon kriegt
porcupineManager?.stop()
// 2. Google-Eingabe starten
startVoiceInput()
}
})
porcupineManager?.start()
Log.d("JARVIS", "Porcupine erfolgreich gestartet")
} catch (e: Exception) {
val errorMessage = when {
// Wir prüfen den Klassennamen als String, falls die Klasse beim Kompilieren nicht gefunden wird
e.javaClass.simpleName.contains("PorcupineActivationNetworkException") -> "Keine Internetverbindung zur Key-Prüfung"
e.javaClass.simpleName.contains("PorcupineActivationException") -> "Key ungültig oder Limit erreicht"
else -> e.message ?: "Initialisierungsfehler"
}
Log.e("JARVIS", "Detail-Fehler: $errorMessage")
runOnUiThread {
tvStatus.text = "Fehler: $errorMessage"
}
}
}
override fun onDestroy() { override fun onDestroy() {
// Wichtig: Beim Schließen der App aufräumen! voskService?.stop()
porcupineManager?.stop() voskService?.shutdown()
porcupineManager?.delete()
webSocket?.close(1000, "App Ende") webSocket?.close(1000, "App Ende")
tts.stop()
tts.shutdown() tts.shutdown()
super.onDestroy() super.onDestroy()
} }