197 lines
7.0 KiB
Kotlin
197 lines
7.0 KiB
Kotlin
package com.example.jarvis_stts
|
|
|
|
import android.Manifest
|
|
import android.content.Intent
|
|
import android.content.pm.PackageManager
|
|
import android.os.Bundle
|
|
import android.speech.RecognizerIntent
|
|
import android.speech.tts.TextToSpeech
|
|
import android.speech.tts.Voice
|
|
import android.util.Log
|
|
import android.view.View
|
|
import android.widget.*
|
|
import androidx.activity.result.contract.ActivityResultContracts
|
|
import androidx.appcompat.app.AppCompatActivity
|
|
import androidx.core.app.ActivityCompat
|
|
import androidx.core.content.ContextCompat
|
|
import okhttp3.*
|
|
import org.vosk.Model
|
|
import org.vosk.Recognizer
|
|
import org.vosk.android.RecognitionListener
|
|
import org.vosk.android.SpeechService
|
|
import org.vosk.android.StorageService
|
|
import java.io.IOException
|
|
import java.util.Locale
|
|
|
|
class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnInitListener {
|
|
|
|
// UI Elemente
|
|
private lateinit var tvStatus: TextView
|
|
private lateinit var etUrl: EditText
|
|
private lateinit var spinnerVoices: Spinner
|
|
private lateinit var tts: TextToSpeech
|
|
|
|
// Vosk & Netzwerk
|
|
private var voskService: SpeechService? = null
|
|
private var voskModel: Model? = null
|
|
private val client = OkHttpClient()
|
|
private var webSocket: WebSocket? = null
|
|
|
|
// TTS Stimmen
|
|
private var availableVoices = mutableListOf<Voice>()
|
|
private var voiceNames = mutableListOf<String>()
|
|
|
|
// Launcher für Google Spracherkennung
|
|
private val speechRecognizerLauncher = registerForActivityResult(
|
|
ActivityResultContracts.StartActivityForResult()
|
|
) { result ->
|
|
// Nach der Google-Eingabe starten wir Vosk wieder
|
|
startVosk()
|
|
|
|
if (result.resultCode == RESULT_OK && result.data != null) {
|
|
val spokenText = result.data!!.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS)?.get(0) ?: ""
|
|
tvStatus.text = "Ich: $spokenText"
|
|
webSocket?.send(spokenText)
|
|
}
|
|
}
|
|
|
|
override fun onCreate(savedInstanceState: Bundle?) {
|
|
super.onCreate(savedInstanceState)
|
|
setContentView(R.layout.activity_main)
|
|
|
|
// 1. UI initialisieren
|
|
tvStatus = findViewById(R.id.tvStatus)
|
|
etUrl = findViewById(R.id.etUrl)
|
|
spinnerVoices = findViewById(R.id.spinnerVoices)
|
|
val btnConnect = findViewById<Button>(R.id.btnConnect)
|
|
val btnSpeak = findViewById<Button>(R.id.btnSpeak)
|
|
|
|
tts = TextToSpeech(this, this)
|
|
|
|
// 2. SharedPreferences
|
|
val prefs = getSharedPreferences("JarvisPrefs", MODE_PRIVATE)
|
|
etUrl.setText(prefs.getString("server_url", ""))
|
|
|
|
btnConnect.setOnClickListener {
|
|
val url = etUrl.text.toString()
|
|
if (url.isNotEmpty()) {
|
|
prefs.edit().putString("server_url", url).apply()
|
|
connectToServer(url)
|
|
}
|
|
}
|
|
|
|
btnSpeak.setOnClickListener { startVoiceInput() }
|
|
|
|
// 3. Vosk Modell laden & Berechtigungen
|
|
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
|
|
ActivityCompat.requestPermissions(this, arrayOf(Manifest.permission.RECORD_AUDIO), 1)
|
|
} else {
|
|
initVoskModel()
|
|
}
|
|
}
|
|
|
|
private fun initVoskModel() {
|
|
StorageService.unpack(this, "model-de", "model",
|
|
{ model: Model ->
|
|
voskModel = model
|
|
startVosk()
|
|
},
|
|
{ exception: IOException ->
|
|
Log.e("JARVIS", "Vosk Fehler: ${exception.message}")
|
|
}
|
|
)
|
|
}
|
|
|
|
private fun startVosk() {
|
|
try {
|
|
if (voskModel == null) return
|
|
// Wir horchen auf "jarvis". [unk] lässt unbekannte Wörter zu.
|
|
val rec = Recognizer(voskModel, 16000.0f, "[\"computer\", \"[unk]\"]")
|
|
//val rec = Recognizer(voskModel, 16000.0f, "[\"jarvis\", \"[unk]\"]")
|
|
voskService = SpeechService(rec, 16000.0f)
|
|
voskService?.startListening(this)
|
|
runOnUiThread { tvStatus.text = "Bereit (Warte auf 'Jarvis')" }
|
|
} catch (e: Exception) {
|
|
Log.e("JARVIS", "Vosk Start Fehler: ${e.message}")
|
|
}
|
|
}
|
|
|
|
// --- Vosk RecognitionListener ---
|
|
override fun onPartialResult(hypothesis: String) {
|
|
// Diese Zeile zeigt dir live im Logcat, was Vosk gerade verstanden hat:
|
|
Log.d("JARVIS", "Vosk hört: $hypothesis")
|
|
|
|
// Testweise auf "computer" hören
|
|
if (hypothesis.contains("computer", ignoreCase = true)) {
|
|
voskService?.stop()
|
|
startVoiceInput()
|
|
}
|
|
}
|
|
|
|
override fun onResult(hypothesis: String) {}
|
|
override fun onFinalResult(hypothesis: String) {}
|
|
override fun onError(e: Exception) { Log.e("JARVIS", "Vosk Error: ${e.message}") }
|
|
override fun onTimeout() {}
|
|
|
|
// --- Google STT & TTS ---
|
|
private fun startVoiceInput() {
|
|
val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
|
|
putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
|
|
putExtra(RecognizerIntent.EXTRA_LANGUAGE, "de-DE")
|
|
}
|
|
speechRecognizerLauncher.launch(intent)
|
|
}
|
|
|
|
private fun connectToServer(url: String) {
|
|
webSocket?.close(1000, "Neuverbindung")
|
|
val request = Request.Builder().url(url).build()
|
|
webSocket = client.newWebSocket(request, object : WebSocketListener() {
|
|
override fun onOpen(webSocket: WebSocket, response: Response) {
|
|
runOnUiThread { tvStatus.text = "Verbunden!" }
|
|
}
|
|
override fun onMessage(webSocket: WebSocket, text: String) {
|
|
runOnUiThread { tvStatus.text = "J.A.R.V.I.S.: $text" }
|
|
speakOut(text)
|
|
}
|
|
})
|
|
}
|
|
|
|
override fun onInit(status: Int) {
|
|
if (status == TextToSpeech.SUCCESS) {
|
|
tts.language = Locale.GERMAN
|
|
setupVoiceSpinner()
|
|
}
|
|
}
|
|
|
|
private fun setupVoiceSpinner() {
|
|
availableVoices.clear()
|
|
voiceNames.clear()
|
|
for (v in tts.voices) {
|
|
if (v.locale.language == "de") {
|
|
availableVoices.add(v)
|
|
voiceNames.add("${v.name} (${if(v.isNetworkConnectionRequired) "Online" else "Lokal"})")
|
|
}
|
|
}
|
|
val adapter = ArrayAdapter(this, android.R.layout.simple_spinner_item, voiceNames)
|
|
adapter.setDropDownViewResource(android.R.layout.simple_spinner_dropdown_item)
|
|
spinnerVoices.adapter = adapter
|
|
spinnerVoices.onItemSelectedListener = object : AdapterView.OnItemSelectedListener {
|
|
override fun onItemSelected(p0: AdapterView<*>?, p1: View?, pos: Int, p3: Long) {
|
|
tts.voice = availableVoices[pos]
|
|
}
|
|
override fun onNothingSelected(p0: AdapterView<*>?) {}
|
|
}
|
|
}
|
|
|
|
private fun speakOut(text: String) {
|
|
tts.speak(text, TextToSpeech.QUEUE_FLUSH, null, "")
|
|
}
|
|
|
|
override fun onDestroy() {
|
|
voskService?.stop()
|
|
voskService?.shutdown()
|
|
webSocket?.close(1000, "App Ende")
|
|
tts.shutdown()
|
|
super.onDestroy()
|
|
}
|
|
} |