diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml index 57f60d7..bfd116e 100644 --- a/app/src/main/AndroidManifest.xml +++ b/app/src/main/AndroidManifest.xml @@ -1,35 +1,37 @@ - + package="com.example.jarvis_stts"> + - + + + android:theme="@style/Theme.AppCompat.DayNight.NoActionBar" + android:usesCleartextTraffic="true"> + + android:exported="true" + android:launchMode="singleTask"> - - + android:exported="false" + android:foregroundServiceType="microphone" /> - \ No newline at end of file diff --git a/app/src/main/assets/model-de/COPYING b/app/src/main/assets/model-de/COPYING new file mode 100644 index 0000000..ab6497f --- /dev/null +++ b/app/src/main/assets/model-de/COPYING @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. this License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/app/src/main/assets/model-de/README b/app/src/main/assets/model-de/README new file mode 100644 index 0000000..dd0ad0c --- /dev/null +++ b/app/src/main/assets/model-de/README @@ -0,0 +1,12 @@ +German model for mobile Vosk applications + +Copyright © 2020 Alpha Cephei Inc + +%WER 28.63 [ 3442 / 12023, 497 ins, 919 del, 2026 sub ] exp/chain_a/tdnn/decode_test-podcast-wb/wer_10_0.0 +%WER 30.67 [ 3688 / 12023, 520 ins, 954 del, 2214 sub ] exp/chain_a/tdnn/decode_test-podcast-wb_l/wer_10_0.0 +%WER 26.68 [ 3208 / 12023, 464 ins, 910 del, 1834 sub ] exp/chain_a/tdnn/decode_test-podcast-wb_rescore/wer_10_0.0 +%WER 11.04 [ 7683 / 69600, 1592 ins, 925 del, 5166 sub ] exp/chain_a/tdnn/decode_test-wb/wer_10_0.5 +%WER 13.75 [ 9567 / 69600, 1896 ins, 1126 del, 6545 sub ] exp/chain_a/tdnn/decode_test-wb_l/wer_10_0.5 +%WER 9.02 [ 6279 / 69600, 1241 ins, 879 del, 4159 sub ] exp/chain_a/tdnn/decode_test-wb_rescore/wer_11_0.5 + +Time taken 45.1052s: real-time factor assuming 100 frames/sec is 0.115468 diff --git a/app/src/main/assets/model-de/am/final.mdl b/app/src/main/assets/model-de/am/final.mdl new file mode 100644 index 0000000..cdc70f1 Binary files /dev/null and b/app/src/main/assets/model-de/am/final.mdl differ diff --git a/app/src/main/assets/model-de/conf/mfcc.conf b/app/src/main/assets/model-de/conf/mfcc.conf new file mode 100644 index 0000000..a233c47 --- /dev/null +++ b/app/src/main/assets/model-de/conf/mfcc.conf @@ -0,0 +1,6 @@ +--use-energy=false +--sample-frequency=16000 +--num-mel-bins=30 +--num-ceps=30 +--low-freq=100 +--high-freq=7600 diff --git a/app/src/main/assets/model-de/conf/model.conf b/app/src/main/assets/model-de/conf/model.conf new file mode 100644 index 0000000..64bc89e --- /dev/null +++ b/app/src/main/assets/model-de/conf/model.conf @@ -0,0 +1,10 @@ +--min-active=200 +--max-active=3000 +--beam=10.0 +--lattice-beam=2.0 +--acoustic-scale=1.0 +--frame-subsampling-factor=3 +--endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10 +--endpoint.rule2.min-trailing-silence=0.5 +--endpoint.rule3.min-trailing-silence=1.0 +--endpoint.rule4.min-trailing-silence=2.0 diff --git a/app/src/main/assets/model-de/graph/Gr.fst b/app/src/main/assets/model-de/graph/Gr.fst new file mode 100644 index 0000000..e30cfdf Binary files /dev/null and b/app/src/main/assets/model-de/graph/Gr.fst differ diff --git a/app/src/main/assets/model-de/graph/HCLr.fst b/app/src/main/assets/model-de/graph/HCLr.fst new file mode 100644 index 0000000..4380099 Binary files /dev/null and b/app/src/main/assets/model-de/graph/HCLr.fst differ diff --git a/app/src/main/assets/model-de/graph/disambig_tid.int b/app/src/main/assets/model-de/graph/disambig_tid.int new file mode 100644 index 0000000..e6cd439 --- /dev/null +++ b/app/src/main/assets/model-de/graph/disambig_tid.int @@ -0,0 +1,7 @@ +14297 +14298 +14299 +14300 +14301 +14302 +14303 diff --git a/app/src/main/assets/model-de/graph/phones/word_boundary.int b/app/src/main/assets/model-de/graph/phones/word_boundary.int new file mode 100644 index 0000000..23fb8c5 --- /dev/null +++ b/app/src/main/assets/model-de/graph/phones/word_boundary.int @@ -0,0 +1,410 @@ +1 nonword +2 begin +3 end +4 internal +5 singleton +6 nonword +7 begin +8 end +9 internal +10 singleton +11 begin +12 end +13 internal +14 singleton +15 begin +16 end +17 internal +18 singleton +19 begin +20 end +21 internal +22 singleton +23 begin +24 end +25 internal +26 singleton +27 begin +28 end +29 internal +30 singleton +31 begin +32 end +33 internal +34 singleton +35 begin +36 end +37 internal +38 singleton +39 begin +40 end +41 internal +42 singleton +43 begin +44 end +45 internal +46 singleton +47 begin +48 end +49 internal +50 singleton +51 begin +52 end +53 internal +54 singleton +55 begin +56 end +57 internal +58 singleton +59 begin +60 end +61 internal +62 singleton +63 begin +64 end +65 internal +66 singleton +67 begin +68 end +69 internal +70 singleton +71 begin +72 end +73 internal +74 singleton +75 begin +76 end +77 internal +78 singleton +79 begin +80 end +81 internal +82 singleton +83 begin +84 end +85 internal +86 singleton +87 begin +88 end +89 internal +90 singleton +91 begin +92 end +93 internal +94 singleton +95 begin +96 end +97 internal +98 singleton +99 begin +100 end +101 internal +102 singleton +103 begin +104 end +105 internal +106 singleton +107 begin +108 end +109 internal +110 singleton +111 begin +112 end +113 internal +114 singleton +115 begin +116 end +117 internal +118 singleton +119 begin +120 end +121 internal +122 singleton +123 begin +124 end +125 internal +126 singleton +127 begin +128 end +129 internal +130 singleton +131 begin +132 end +133 internal +134 singleton +135 begin +136 end +137 internal +138 singleton +139 begin +140 end +141 internal +142 singleton +143 begin +144 end +145 internal +146 singleton +147 begin +148 end +149 internal +150 singleton +151 begin +152 end +153 internal +154 singleton +155 begin +156 end +157 internal +158 singleton +159 begin +160 end +161 internal +162 singleton +163 begin +164 end +165 internal +166 singleton +167 begin +168 end +169 internal +170 singleton +171 begin +172 end +173 internal +174 singleton +175 begin +176 end +177 internal +178 singleton +179 begin +180 end +181 internal +182 singleton +183 begin +184 end +185 internal +186 singleton +187 begin +188 end +189 internal +190 singleton +191 begin +192 end +193 internal +194 singleton +195 begin +196 end +197 internal +198 singleton +199 begin +200 end +201 internal +202 singleton +203 begin +204 end +205 internal +206 singleton +207 begin +208 end +209 internal +210 singleton +211 begin +212 end +213 internal +214 singleton +215 begin +216 end +217 internal +218 singleton +219 begin +220 end +221 internal +222 singleton +223 begin +224 end +225 internal +226 singleton +227 begin +228 end +229 internal +230 singleton +231 begin +232 end +233 internal +234 singleton +235 begin +236 end +237 internal +238 singleton +239 begin +240 end +241 internal +242 singleton +243 begin +244 end +245 internal +246 singleton +247 begin +248 end +249 internal +250 singleton +251 begin +252 end +253 internal +254 singleton +255 begin +256 end +257 internal +258 singleton +259 begin +260 end +261 internal +262 singleton +263 begin +264 end +265 internal +266 singleton +267 begin +268 end +269 internal +270 singleton +271 begin +272 end +273 internal +274 singleton +275 begin +276 end +277 internal +278 singleton +279 begin +280 end +281 internal +282 singleton +283 begin +284 end +285 internal +286 singleton +287 begin +288 end +289 internal +290 singleton +291 begin +292 end +293 internal +294 singleton +295 begin +296 end +297 internal +298 singleton +299 begin +300 end +301 internal +302 singleton +303 begin +304 end +305 internal +306 singleton +307 begin +308 end +309 internal +310 singleton +311 begin +312 end +313 internal +314 singleton +315 begin +316 end +317 internal +318 singleton +319 begin +320 end +321 internal +322 singleton +323 begin +324 end +325 internal +326 singleton +327 begin +328 end +329 internal +330 singleton +331 begin +332 end +333 internal +334 singleton +335 begin +336 end +337 internal +338 singleton +339 begin +340 end +341 internal +342 singleton +343 begin +344 end +345 internal +346 singleton +347 begin +348 end +349 internal +350 singleton +351 begin +352 end +353 internal +354 singleton +355 begin +356 end +357 internal +358 singleton +359 begin +360 end +361 internal +362 singleton +363 begin +364 end +365 internal +366 singleton +367 begin +368 end +369 internal +370 singleton +371 begin +372 end +373 internal +374 singleton +375 begin +376 end +377 internal +378 singleton +379 begin +380 end +381 internal +382 singleton +383 begin +384 end +385 internal +386 singleton +387 begin +388 end +389 internal +390 singleton +391 begin +392 end +393 internal +394 singleton +395 begin +396 end +397 internal +398 singleton +399 begin +400 end +401 internal +402 singleton +403 begin +404 end +405 internal +406 singleton +407 begin +408 end +409 internal +410 singleton diff --git a/app/src/main/assets/model-de/ivector/final.dubm b/app/src/main/assets/model-de/ivector/final.dubm new file mode 100644 index 0000000..5b4cd8b Binary files /dev/null and b/app/src/main/assets/model-de/ivector/final.dubm differ diff --git a/app/src/main/assets/model-de/ivector/final.ie b/app/src/main/assets/model-de/ivector/final.ie new file mode 100644 index 0000000..2f7eff7 Binary files /dev/null and b/app/src/main/assets/model-de/ivector/final.ie differ diff --git a/app/src/main/assets/model-de/ivector/final.mat b/app/src/main/assets/model-de/ivector/final.mat new file mode 100644 index 0000000..1072913 Binary files /dev/null and b/app/src/main/assets/model-de/ivector/final.mat differ diff --git a/app/src/main/assets/model-de/ivector/global_cmvn.stats b/app/src/main/assets/model-de/ivector/global_cmvn.stats new file mode 100644 index 0000000..bac4588 --- /dev/null +++ b/app/src/main/assets/model-de/ivector/global_cmvn.stats @@ -0,0 +1,3 @@ + [ + 5.71762e+10 -1.787531e+09 4.831925e+08 4.409361e+09 -1.932859e+09 -1.543599e+09 -3.678115e+09 -3.702029e+09 -3.746038e+09 -1.137944e+09 -9.079639e+08 -5.819128e+07 -2.0947e+09 1.309573e+09 -5.171984e+08 2.481896e+08 -8.396431e+08 1.52164e+08 -6.130926e+08 1.542617e+07 -2.69509e+08 -2281178 -6.453142e+07 7354178 1.033131e+08 -1.133166e+07 6.952426e+07 -6.868613e+07 8611552 -7.709294e+07 6.473713e+08 + 5.228252e+12 2.646175e+11 1.876415e+11 2.966005e+11 2.523884e+11 2.051858e+11 2.541094e+11 2.495341e+11 2.292826e+11 2.018152e+11 1.897722e+11 1.747636e+11 1.580083e+11 1.184324e+11 8.987214e+10 7.035342e+10 5.102559e+10 3.405446e+10 2.154957e+10 1.188595e+10 5.606266e+09 1.907599e+09 2.56771e+08 7.379798e+07 9.136799e+08 2.368205e+09 3.935658e+09 5.588645e+09 7.453864e+09 8.618391e+09 0 ] diff --git a/app/src/main/assets/model-de/ivector/online_cmvn.conf b/app/src/main/assets/model-de/ivector/online_cmvn.conf new file mode 100644 index 0000000..e69de29 diff --git a/app/src/main/assets/model-de/ivector/splice.conf b/app/src/main/assets/model-de/ivector/splice.conf new file mode 100644 index 0000000..960cd2e --- /dev/null +++ b/app/src/main/assets/model-de/ivector/splice.conf @@ -0,0 +1,2 @@ +--left-context=3 +--right-context=3 diff --git a/app/src/main/assets/model-de/uuid b/app/src/main/assets/model-de/uuid new file mode 100644 index 0000000..d800886 --- /dev/null +++ b/app/src/main/assets/model-de/uuid @@ -0,0 +1 @@ +123 \ No newline at end of file diff --git a/app/src/main/java/com/example/jarvis_stts/JarviceService.kt b/app/src/main/java/com/example/jarvis_stts/JarviceService.kt new file mode 100644 index 0000000..d0f6cfb --- /dev/null +++ b/app/src/main/java/com/example/jarvis_stts/JarviceService.kt @@ -0,0 +1,179 @@ +package com.example.jarvis_stts + +import android.app.NotificationChannel +import android.app.NotificationManager +import android.app.PendingIntent +import android.app.Service +import android.content.Intent +import android.os.Build +import android.os.IBinder +import android.util.Log +import androidx.core.app.NotificationCompat +import org.vosk.Model +import org.vosk.Recognizer +import org.vosk.android.RecognitionListener +import org.vosk.android.SpeechService +import java.io.File +import android.content.pm.ServiceInfo +import org.json.JSONObject + +class JarvisService : Service(), RecognitionListener { + + private var voskService: SpeechService? = null + private var voskModel: Model? = null + private var isInteracting = false + + companion object { + const val CHANNEL_ID = "JarvisServiceChannel" + const val ACTION_START = "ACTION_START" + const val ACTION_PAUSE = "ACTION_PAUSE" + const val ACTION_RESUME = "ACTION_RESUME" + } + + override fun onCreate() { + super.onCreate() + createNotificationChannel() + + // Für Android 14 (API 34) und höher müssen wir den Typ beim Starten mitgeben + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.UPSIDE_DOWN_CAKE) { + startForeground( + 1, + createNotification("J.A.R.V.I.S. hört zu..."), + ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE + ) + } else { + startForeground(1, createNotification("J.A.R.V.I.S. hört zu...")) + } + + initVosk() + } + + override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int { + when (intent?.action) { + ACTION_START -> resumeListening() + ACTION_PAUSE -> pauseListening() + ACTION_RESUME -> resumeListening() + } + return START_STICKY + } + + private fun createNotificationChannel() { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + val channel = NotificationChannel( + CHANNEL_ID, + "J.A.R.V.I.S. Background Service", + NotificationManager.IMPORTANCE_LOW + ) + val manager = getSystemService(NotificationManager::class.java) + manager.createNotificationChannel(channel) + } + } + + private fun createNotification(text: String): android.app.Notification { + val notificationIntent = Intent(this, MainActivity::class.java) + val pendingIntent = PendingIntent.getActivity( + this, 0, notificationIntent, PendingIntent.FLAG_IMMUTABLE + ) + + return NotificationCompat.Builder(this, CHANNEL_ID) + .setContentTitle("J.A.R.V.I.S. ist aktiv") + .setContentText(text) + .setSmallIcon(android.R.drawable.ic_btn_speak_now) // Standard Android Icon + .setContentIntent(pendingIntent) + .setOngoing(true) + .build() + } + + private fun updateNotification(text: String) { + val manager = getSystemService(NotificationManager::class.java) + manager.notify(1, createNotification(text)) + } + + private fun initVosk() { + val baseDir = getExternalFilesDir(null) + val modelFolder = File(baseDir, "model/model-de") + + if (modelFolder.exists()) { + Log.d("JARVIS", "Modell gefunden unter: ${modelFolder.absolutePath}") + try { + // Wir laden das Modell und speichern es in der Klassen-Variable 'voskModel' + voskModel = Model(modelFolder.absolutePath) + + Log.d("JARVIS", "Modell bereit, starte Hintergrund-Dienst...") + + // Jetzt, wo das Modell da ist, können wir das Zuhören starten + resumeListening() + + } catch (e: Exception) { + Log.e("JARVIS", "Vosk Fehler beim Modell-Laden: ${e.message}") + updateNotification("Fehler: Modell konnte nicht geladen werden") + } + } else { + Log.e("JARVIS", "ORDNER NICHT GEFUNDEN! Pfad: ${modelFolder.absolutePath}") + updateNotification("Fehler: Modell-Ordner fehlt") + } + } + + private fun pauseListening() { + Log.d("JARVIS", "Service: Pausiere Zuhören") + voskService?.stop() + updateNotification("Pausiert (verarbeitet Anfrage...)") + } + + private fun resumeListening() { + if (voskModel == null) return + Log.d("JARVIS", "Service: Starte Zuhören") + voskService?.stop() + + // Die Liste klein halten ist gut, aber wir brauchen den exakten Treffer + val rec = Recognizer(voskModel, 16000.0f, "[\"jarvis\", \"[unk]\"]") + voskService = SpeechService(rec, 16000.0f) + voskService?.startListening(this) + + isInteracting = false + updateNotification("Warte auf 'Jarvis'...") + } + + // --- Vosk Listener --- + override fun onPartialResult(hypothesis: String) { + if (isInteracting) return + + try { + val json = JSONObject(hypothesis) + // Bei onPartialResult heißt das Feld "partial" + val partialText = json.optString("partial").lowercase().trim() + + // Wir reagieren NUR, wenn das Wort exakt "jarvis" ist + // Das verhindert, dass Wörter wie "Service" oder "Nachtisch" triggern + if (partialText == "jarvis" ) { + Log.d("JARVIS", "Service: WAKE WORD EXAKT ERKANNT: $partialText") + + isInteracting = true + // Ein kurzer haptischer Feedback-Vibe wäre hier cool (optional) + + pauseListening() + + val intent = Intent(this, MainActivity::class.java).apply { + addFlags(Intent.FLAG_ACTIVITY_NEW_TASK or Intent.FLAG_ACTIVITY_SINGLE_TOP) + putExtra("WAKE_WORD_TRIGGERED", true) + } + startActivity(intent) + } + } catch (e: Exception) { + Log.e("JARVIS", "Fehler beim Parsen des PartialResults: ${e.message}") + } + } + + override fun onResult(hypothesis: String) {} + override fun onFinalResult(hypothesis: String) {} + override fun onError(e: Exception) { Log.e("JARVIS", "Service Error: ${e.message}") } + override fun onTimeout() {} + + override fun onDestroy() { + super.onDestroy() + voskService?.stop() + voskService?.shutdown() + } + + override fun onBind(intent: Intent?): IBinder? = null +} \ No newline at end of file diff --git a/app/src/main/java/com/example/jarvis_stts/JarvisService.kt b/app/src/main/java/com/example/jarvis_stts/JarvisService.kt deleted file mode 100644 index 520ac8b..0000000 --- a/app/src/main/java/com/example/jarvis_stts/JarvisService.kt +++ /dev/null @@ -1,69 +0,0 @@ -package com.example.jarvis_stts - -import android.app.Service -import android.content.Intent -import android.os.IBinder -import org.vosk.Model -import org.vosk.Recognizer -import org.vosk.android.SpeechService -import org.vosk.android.RecognitionListener -import java.io.IOException -import java.io.File - -class JarvisService : Service(), RecognitionListener { - - private var speechService: SpeechService? = null - - override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int { - setupVosk() - return START_STICKY // Sorgt dafür, dass der Service bei Beendung neu startet - } - - private fun setupVosk() { - try { - // MainActivity entpackt nach "model", also greifen wir hier darauf zu: - val modelPath = File(filesDir, "model").absolutePath - - val model = Model(modelPath) - - // WICHTIG: Nutze hier "computer" ODER "jarvis", - // je nachdem was du in der MainActivity definiert hast. - val recognizer = Recognizer(model, 16000f, "[\"computer\", \"jarvis\", \"[unk]\"]") - - speechService = SpeechService(recognizer, 16000f) - speechService?.startListening(this) - Log.d("JARVIS", "Service: Vosk hört jetzt zu...") - - } catch (e: Exception) { - Log.e("JARVIS", "Service: Fehler beim Laden des Modells: ${e.message}") - } - } - - override fun onResult(hypothesis: String?) { - // hypothesis ist ein JSON String, z.B.: { "text" : "jarvis" } - if (hypothesis != null && hypothesis.contains("jarvis")) { - println("WAKE WORD ERKANNT!") - // Hier triggerst du deine Antwort-Logik - } - } - - override fun onPartialResult(hypothesis: String?) { - // Wird während des Sprechens aufgerufen - } - - override fun onFinalResult(hypothesis: String?) {} - - override fun onError(e: Exception?) { - e?.printStackTrace() - } - - override fun onTimeout() {} - - override fun onDestroy() { - super.onDestroy() - speechService?.stop() - speechService?.shutdown() - } - - override fun onBind(intent: Intent?): IBinder? = null -} \ No newline at end of file diff --git a/app/src/main/java/com/example/jarvis_stts/MainActivity.kt b/app/src/main/java/com/example/jarvis_stts/MainActivity.kt index 7746f2f..6dc4fcd 100644 --- a/app/src/main/java/com/example/jarvis_stts/MainActivity.kt +++ b/app/src/main/java/com/example/jarvis_stts/MainActivity.kt @@ -3,9 +3,11 @@ package com.example.jarvis_stts import android.Manifest import android.content.Intent import android.content.pm.PackageManager +import android.os.Build import android.os.Bundle import android.speech.RecognizerIntent import android.speech.tts.TextToSpeech +import android.speech.tts.UtteranceProgressListener import android.speech.tts.Voice import android.util.Log import android.view.View @@ -16,55 +18,59 @@ import androidx.core.app.ActivityCompat import androidx.core.content.ContextCompat import okhttp3.* import org.vosk.Model -import org.vosk.Recognizer -import org.vosk.android.RecognitionListener -import org.vosk.android.SpeechService import org.vosk.android.StorageService import java.io.IOException import java.util.Locale -import android.speech.tts.UtteranceProgressListener +import android.provider.Settings +import android.net.Uri -class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnInitListener { +class MainActivity : AppCompatActivity(), TextToSpeech.OnInitListener { - // UI Elemente private lateinit var tvStatus: TextView private lateinit var etUrl: EditText private lateinit var spinnerVoices: Spinner private lateinit var tts: TextToSpeech - // Vosk & Netzwerk - private var voskService: SpeechService? = null - private var voskModel: Model? = null private val client = OkHttpClient() private var webSocket: WebSocket? = null - private var isInteracting = false - // TTS Stimmen private var availableVoices = mutableListOf() private var voiceNames = mutableListOf() - // Launcher für Google Spracherkennung - private val speechRecognizerLauncher = registerForActivityResult( - ActivityResultContracts.StartActivityForResult() - ) { result -> - isInteracting = false // WICHTIG: Sperre wieder aufheben! - - if (result.resultCode == RESULT_OK && result.data != null) { - val spokenText = result.data!!.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS)?.get(0) ?: "" - tvStatus.text = "Ich: $spokenText" - webSocket?.send(spokenText) - // HIER STARTEN WIR VOSK NOCH NICHT! Wir warten auf die Antwort des Servers. - } else { - // Nur wenn wir nichts gesagt oder abgebrochen haben, geht Vosk direkt wieder an - startVosk() + private val speechRecognizerLauncher = registerForActivityResult( + ActivityResultContracts.StartActivityForResult() + ) { result -> + if (result.resultCode == RESULT_OK && result.data != null) { + val spokenText = result.data!!.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS)?.get(0) ?: "" + tvStatus.text = "Ich: $spokenText" + webSocket?.send(spokenText) + // Hier warten wir auf den Server. Der Service bleibt pausiert. + } else { + // Abbruch oder Fehler -> Service soll wieder zuhören + tellServiceTo(JarvisService.ACTION_RESUME) + } + } + + private fun checkOverlayPermission() { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) { + if (!Settings.canDrawOverlays(this)) { + Log.d("JARVIS", "Overlay-Berechtigung fehlt. Öffne Einstellungen...") + val intent = Intent( + Settings.ACTION_MANAGE_OVERLAY_PERMISSION, + Uri.parse("package:$packageName") + ) + // Wirft den Nutzer in die Einstellungen. + // Nach der Rückkehr muss die App meist neu gestartet/fokussiert werden. + startActivity(intent) + Toast.makeText(this, "Bitte erlaube J.A.R.V.I.S., über anderen Apps zu erscheinen", Toast.LENGTH_LONG).show() } } + } override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) setContentView(R.layout.activity_main) - // 1. UI initialisieren tvStatus = findViewById(R.id.tvStatus) etUrl = findViewById(R.id.etUrl) spinnerVoices = findViewById(R.id.spinnerVoices) @@ -73,7 +79,6 @@ class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnIn tts = TextToSpeech(this, this) - // 2. SharedPreferences (Server URL laden) val prefs = getSharedPreferences("JarvisPrefs", MODE_PRIVATE) etUrl.setText(prefs.getString("server_url", "")) @@ -85,18 +90,41 @@ class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnIn } } - btnSpeak.setOnClickListener { - voskService?.stop() // Stoppe Wake-Word, wenn man manuell klickt - startVoiceInput() + btnSpeak.setOnClickListener { + tellServiceTo(JarvisService.ACTION_PAUSE) + startVoiceInput() } - // 3. Berechtigungen prüfen & Modell laden checkPermissionsAndInit() + checkOverlayPermission() // <-- Hier aufrufen! + } + + // Wird aufgerufen, wenn die App im Hintergrund war und vom Service geweckt wird + override fun onNewIntent(intent: Intent?) { + super.onNewIntent(intent) + if (intent?.getBooleanExtra("WAKE_WORD_TRIGGERED", false) == true) { + Log.d("JARVIS", "MainActivity: Wake Word vom Service empfangen! Starte Google...") + // Kleiner Delay, damit die Audio-Hardware Zeit zum Umschalten hat + tvStatus.postDelayed({ + startVoiceInput() + }, 500) + } } private fun checkPermissionsAndInit() { - if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) { - ActivityCompat.requestPermissions(this, arrayOf(Manifest.permission.RECORD_AUDIO), 1) + val permissions = mutableListOf(Manifest.permission.RECORD_AUDIO) + + // Notification Permission für Android 13+ + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.TIRAMISU) { + permissions.add(Manifest.permission.POST_NOTIFICATIONS) + } + + val missingPermissions = permissions.filter { + ContextCompat.checkSelfPermission(this, it) != PackageManager.PERMISSION_GRANTED + } + + if (missingPermissions.isNotEmpty()) { + ActivityCompat.requestPermissions(this, missingPermissions.toTypedArray(), 1) } else { initVoskModel() } @@ -104,88 +132,48 @@ class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnIn override fun onRequestPermissionsResult(requestCode: Int, permissions: Array, grantResults: IntArray) { super.onRequestPermissionsResult(requestCode, permissions, grantResults) - if (requestCode == 1 && grantResults.isNotEmpty() && grantResults[0] == PackageManager.PERMISSION_GRANTED) { + if (requestCode == 1 && grantResults.all { it == PackageManager.PERMISSION_GRANTED }) { initVoskModel() } } private fun initVoskModel() { - // "model-de" ist der Ordner in assets. "model" ist der Zielordner auf dem Handy. StorageService.unpack(this, "model-de", "model", - { model: Model -> - voskModel = model - Log.d("JARVIS", "Modell erfolgreich geladen!") - startVosk() + { _: Model -> + Log.d("JARVIS", "Modell bereit, starte Hintergrund-Dienst...") + tellServiceTo(JarvisService.ACTION_START) + tvStatus.text = "Service läuft im Hintergrund!" }, - { exception: IOException -> - Log.e("JARVIS", "Vosk Entpack-Fehler: ${exception.message}") - runOnUiThread { tvStatus.text = "Fehler: Modell nicht gefunden" } + { exception: IOException -> + Log.e("JARVIS", "Vosk Entpack-Fehler: ${exception.message}") + tvStatus.text = "Fehler: Modell nicht gefunden" } ) } - private fun startVosk() { - try { - if (voskModel == null) return - - // Alten Service sicherheitshalber beenden - voskService?.stop() - voskService?.shutdown() - - // Wir horchen auf "computer" und "jarvis". - val rec = Recognizer(voskModel, 16000.0f, "[\"computer\", \"jarvis\", \"[unk]\"]") - voskService = SpeechService(rec, 16000.0f) - voskService?.startListening(this) - - runOnUiThread { tvStatus.text = "Bereit (Warte auf 'Jarvis' oder 'Computer')" } - } catch (e: Exception) { - Log.e("JARVIS", "Vosk Start Fehler: ${e.message}") + private fun tellServiceTo(action: String) { + val intent = Intent(this, JarvisService::class.java).apply { + this.action = action + } + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + startForegroundService(intent) + } else { + startService(intent) } } - // --- Vosk RecognitionListener --- - override fun onPartialResult(hypothesis: String) { - if (isInteracting) return // Wenn wir schon dabei sind, ignoriere weiteres - - val recognizedText = extractText(hypothesis) - if (recognizedText.contains("jarvis", true)) { - isInteracting = true // Sperre setzen - voskService?.stop() - - tvStatus.postDelayed({ - startVoiceInput() - }, 500) - } - } - - private fun extractText(json: String): String { - // Hilft, den Text aus dem JSON {"partial" : "..."} zu ziehen - return json.substringAfter(": \"").substringBefore("\"") - } - - override fun onResult(hypothesis: String) { - // Hier könnte man das finale Wort prüfen, falls Partial nicht reicht - } - - override fun onFinalResult(hypothesis: String) {} - override fun onError(e: Exception) { Log.e("JARVIS", "Vosk Error: ${e.message}") } - override fun onTimeout() {} - - // --- Google STT & TTS --- private fun startVoiceInput() { val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply { putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM) putExtra(RecognizerIntent.EXTRA_LANGUAGE, "de-DE") putExtra(RecognizerIntent.EXTRA_PROMPT, "Ich höre dir zu...") - - // Diese beiden sorgen dafür, dass Google geduldiger ist: putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 2000L) putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 2000L) } try { speechRecognizerLauncher.launch(intent) } catch (e: Exception) { - startVosk() + tellServiceTo(JarvisService.ACTION_RESUME) } } @@ -200,6 +188,12 @@ class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnIn runOnUiThread { tvStatus.text = "J.A.R.V.I.S.: $text" } speakOut(text) } + override fun onFailure(webSocket: WebSocket, t: Throwable, response: Response?) { + runOnUiThread { + tvStatus.text = "Verbindungsfehler!" + tellServiceTo(JarvisService.ACTION_RESUME) + } + } }) } @@ -208,25 +202,19 @@ class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnIn tts.language = Locale.GERMAN setupVoiceSpinner() - // NEU: Wir horchen darauf, wann Jarvis aufhört zu sprechen tts.setOnUtteranceProgressListener(object : UtteranceProgressListener() { - override fun onStart(utteranceId: String?) { - // Jarvis fängt an zu sprechen - } - + override fun onStart(utteranceId: String?) {} override fun onDone(utteranceId: String?) { - // Jarvis ist fertig! Wake-Word wieder aktivieren. if (utteranceId == "TTS_DONE") { - // onDone läuft im Hintergrund, UI/Vosk Updates müssen in den Main Thread runOnUiThread { - startVosk() + // Wenn Jarvis fertig gesprochen hat, lauschen wir wieder! + tellServiceTo(JarvisService.ACTION_RESUME) } } } - @Deprecated("Deprecated in Java") override fun onError(utteranceId: String?) { - runOnUiThread { startVosk() } // Bei einem Fehler auch wieder zuhören + runOnUiThread { tellServiceTo(JarvisService.ACTION_RESUME) } } }) } @@ -253,18 +241,13 @@ class MainActivity : AppCompatActivity(), RecognitionListener, TextToSpeech.OnIn } private fun speakOut(text: String) { - // Wir können hier Vosk stoppen, damit Jarvis sich nicht selbst hört - voskService?.stop() - - // Die ID "TTS_DONE" triggert unseren Listener, wenn der Text fertig gesprochen wurde tts.speak(text, TextToSpeech.QUEUE_FLUSH, null, "TTS_DONE") } override fun onDestroy() { - voskService?.stop() - voskService?.shutdown() webSocket?.close(1000, "App Ende") tts.shutdown() + // Wir lassen den Service ABSICHTLICH nicht stoppen, wenn die Activity zerstört wird! super.onDestroy() } } \ No newline at end of file