Files
jarvis-ai-stts/app/src/main/java/com/example/jarvis_stts/MainActivity.kt

197 lines
7.0 KiB
Kotlin

package com.example.jarvis_stts
import android.Manifest
import android.content.Intent
import android.content.pm.PackageManager
import android.os.Bundle
import android.speech.RecognizerIntent
import android.speech.tts.TextToSpeech
import android.speech.tts.Voice
import android.util.Log
import android.view.View
import android.widget.*
import androidx.activity.result.contract.ActivityResultContracts
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import androidx.core.content.ContextCompat
import okhttp3.*
import org.vosk.Model
import org.vosk.Recognizer
import org.vosk.android.RecognitionListener
import org.vosk.android.SpeechService
import org.vosk.android.StorageService
import java.io.IOException
import java.util.Locale
class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnInitListener {
// UI Elemente
private lateinit var tvStatus: TextView
private lateinit var etUrl: EditText
private lateinit var spinnerVoices: Spinner
private lateinit var tts: TextToSpeech
// Vosk & Netzwerk
private var voskService: SpeechService? = null
private var voskModel: Model? = null
private val client = OkHttpClient()
private var webSocket: WebSocket? = null
// TTS Stimmen
private var availableVoices = mutableListOf<Voice>()
private var voiceNames = mutableListOf<String>()
// Launcher für Google Spracherkennung
private val speechRecognizerLauncher = registerForActivityResult(
ActivityResultContracts.StartActivityForResult()
) { result ->
// Nach der Google-Eingabe starten wir Vosk wieder
startVosk()
if (result.resultCode == RESULT_OK && result.data != null) {
val spokenText = result.data!!.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS)?.get(0) ?: ""
tvStatus.text = "Ich: $spokenText"
webSocket?.send(spokenText)
}
}
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_main)
// 1. UI initialisieren
tvStatus = findViewById(R.id.tvStatus)
etUrl = findViewById(R.id.etUrl)
spinnerVoices = findViewById(R.id.spinnerVoices)
val btnConnect = findViewById<Button>(R.id.btnConnect)
val btnSpeak = findViewById<Button>(R.id.btnSpeak)
tts = TextToSpeech(this, this)
// 2. SharedPreferences
val prefs = getSharedPreferences("JarvisPrefs", MODE_PRIVATE)
etUrl.setText(prefs.getString("server_url", ""))
btnConnect.setOnClickListener {
val url = etUrl.text.toString()
if (url.isNotEmpty()) {
prefs.edit().putString("server_url", url).apply()
connectToServer(url)
}
}
btnSpeak.setOnClickListener { startVoiceInput() }
// 3. Vosk Modell laden & Berechtigungen
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
ActivityCompat.requestPermissions(this, arrayOf(Manifest.permission.RECORD_AUDIO), 1)
} else {
initVoskModel()
}
}
private fun initVoskModel() {
StorageService.unpack(this, "model-de", "model",
{ model: Model ->
voskModel = model
startVosk()
},
{ exception: IOException ->
Log.e("JARVIS", "Vosk Fehler: ${exception.message}")
}
)
}
private fun startVosk() {
try {
if (voskModel == null) return
// Wir horchen auf "jarvis". [unk] lässt unbekannte Wörter zu.
val rec = Recognizer(voskModel, 16000.0f, "[\"computer\", \"[unk]\"]")
//val rec = Recognizer(voskModel, 16000.0f, "[\"jarvis\", \"[unk]\"]")
voskService = SpeechService(rec, 16000.0f)
voskService?.startListening(this)
runOnUiThread { tvStatus.text = "Bereit (Warte auf 'Jarvis')" }
} catch (e: Exception) {
Log.e("JARVIS", "Vosk Start Fehler: ${e.message}")
}
}
// --- Vosk RecognitionListener ---
override fun onPartialResult(hypothesis: String) {
// Diese Zeile zeigt dir live im Logcat, was Vosk gerade verstanden hat:
Log.d("JARVIS", "Vosk hört: $hypothesis")
// Testweise auf "computer" hören
if (hypothesis.contains("computer", ignoreCase = true)) {
voskService?.stop()
startVoiceInput()
}
}
override fun onResult(hypothesis: String) {}
override fun onFinalResult(hypothesis: String) {}
override fun onError(e: Exception) { Log.e("JARVIS", "Vosk Error: ${e.message}") }
override fun onTimeout() {}
// --- Google STT & TTS ---
private fun startVoiceInput() {
val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
putExtra(RecognizerIntent.EXTRA_LANGUAGE, "de-DE")
}
speechRecognizerLauncher.launch(intent)
}
private fun connectToServer(url: String) {
webSocket?.close(1000, "Neuverbindung")
val request = Request.Builder().url(url).build()
webSocket = client.newWebSocket(request, object : WebSocketListener() {
override fun onOpen(webSocket: WebSocket, response: Response) {
runOnUiThread { tvStatus.text = "Verbunden!" }
}
override fun onMessage(webSocket: WebSocket, text: String) {
runOnUiThread { tvStatus.text = "J.A.R.V.I.S.: $text" }
speakOut(text)
}
})
}
override fun onInit(status: Int) {
if (status == TextToSpeech.SUCCESS) {
tts.language = Locale.GERMAN
setupVoiceSpinner()
}
}
private fun setupVoiceSpinner() {
availableVoices.clear()
voiceNames.clear()
for (v in tts.voices) {
if (v.locale.language == "de") {
availableVoices.add(v)
voiceNames.add("${v.name} (${if(v.isNetworkConnectionRequired) "Online" else "Lokal"})")
}
}
val adapter = ArrayAdapter(this, android.R.layout.simple_spinner_item, voiceNames)
adapter.setDropDownViewResource(android.R.layout.simple_spinner_dropdown_item)
spinnerVoices.adapter = adapter
spinnerVoices.onItemSelectedListener = object : AdapterView.OnItemSelectedListener {
override fun onItemSelected(p0: AdapterView<*>?, p1: View?, pos: Int, p3: Long) {
tts.voice = availableVoices[pos]
}
override fun onNothingSelected(p0: AdapterView<*>?) {}
}
}
private fun speakOut(text: String) {
tts.speak(text, TextToSpeech.QUEUE_FLUSH, null, "")
}
override fun onDestroy() {
voskService?.stop()
voskService?.shutdown()
webSocket?.close(1000, "App Ende")
tts.shutdown()
super.onDestroy()
}
}