1717import android .Manifest ;
1818import android .app .Activity ;
1919import android .content .pm .PackageManager ;
20- import android .os .AsyncTask ;
2120import android .os .Bundle ;
22- import androidx .annotation .NonNull ;
23- import androidx .core .app .ActivityCompat ;
24- import androidx .core .content .ContextCompat ;
25-
2621import android .text .method .ScrollingMovementMethod ;
27- import android .util .Log ;
28- import android .view .View ;
2922import android .widget .Button ;
3023import android .widget .TextView ;
24+ import android .widget .ToggleButton ;
3125
3226import org .vosk .LibVosk ;
3327import org .vosk .LogLevel ;
34- import org .vosk .android .Assets ;
35- import org .vosk .android .RecognitionListener ;
3628import org .vosk .Model ;
3729import org .vosk .Recognizer ;
30+ import org .vosk .android .RecognitionListener ;
3831import org .vosk .android .SpeechService ;
32+ import org .vosk .android .SpeechStreamService ;
33+ import org .vosk .android .StorageService ;
3934
40- import java .io .File ;
4135import java .io .IOException ;
4236import java .io .InputStream ;
43- import java .lang .ref .WeakReference ;
37+
38+ import androidx .annotation .NonNull ;
39+ import androidx .core .app .ActivityCompat ;
40+ import androidx .core .content .ContextCompat ;
4441
4542public class VoskActivity extends Activity implements
4643 RecognitionListener {
@@ -49,14 +46,14 @@ public class VoskActivity extends Activity implements
4946 static private final int STATE_READY = 1 ;
5047 static private final int STATE_DONE = 2 ;
5148 static private final int STATE_FILE = 3 ;
52- static private final int STATE_MIC = 4 ;
49+ static private final int STATE_MIC = 4 ;
5350
5451 /* Used to handle permission request */
5552 private static final int PERMISSIONS_REQUEST_RECORD_AUDIO = 1 ;
5653
57-
5854 private Model model ;
5955 private SpeechService speechService ;
56+ private SpeechStreamService speechStreamService ;
6057 private TextView resultView ;
6158
6259 @ Override
@@ -68,108 +65,30 @@ public void onCreate(Bundle state) {
6865 resultView = findViewById (R .id .result_text );
6966 setUiState (STATE_START );
7067
71- findViewById (R .id .recognize_file ).setOnClickListener (new View .OnClickListener () {
72- @ Override
73- public void onClick (View view ) {
74- recognizeFile ();
75- }
76- });
68+ findViewById (R .id .recognize_file ).setOnClickListener (view -> recognizeFile ());
69+ findViewById (R .id .recognize_mic ).setOnClickListener (view -> recognizeMicrophone ());
70+ ((ToggleButton ) findViewById (R .id .pause )).setOnCheckedChangeListener ((view , isChecked ) -> pause (isChecked ));
7771
78- findViewById (R .id .recognize_mic ).setOnClickListener (new View .OnClickListener () {
79- @ Override
80- public void onClick (View view ) {
81- recognizeMicrophone ();
82- }
83- });
72+ LibVosk .setLogLevel (LogLevel .INFO );
8473
85- // Check if user has given permission to record audio
74+ // Check if user has given permission to record audio, init the model after permission is granted
8675 int permissionCheck = ContextCompat .checkSelfPermission (getApplicationContext (), Manifest .permission .RECORD_AUDIO );
8776 if (permissionCheck != PackageManager .PERMISSION_GRANTED ) {
8877 ActivityCompat .requestPermissions (this , new String []{Manifest .permission .RECORD_AUDIO }, PERMISSIONS_REQUEST_RECORD_AUDIO );
89- return ;
78+ } else {
79+ initModel ();
9080 }
91- // Recognizer initialization is a time-consuming and it involves IO,
92- // so we execute it in async task
93- new SetupTask (this ).execute ();
9481 }
9582
96- private static class SetupTask extends AsyncTask <Void , Void , Exception > {
97- final WeakReference <org .vosk .demo .VoskActivity > activityReference ;
98-
99- SetupTask (org .vosk .demo .VoskActivity activity ) {
100- this .activityReference = new WeakReference <>(activity );
101- }
102-
103- @ Override
104- protected Exception doInBackground (Void ... params ) {
105- try {
106- Assets assets = new Assets (activityReference .get ());
107- File assetDir = assets .syncAssets ();
108- Log .d ("KaldiDemo" , "Sync files in the folder " + assetDir .toString ());
109-
110- LibVosk .setLogLevel (LogLevel .INFO );
111-
112- activityReference .get ().model = new Model (assetDir .toString () + "/model-android" );
113- } catch (IOException e ) {
114- return e ;
115- }
116- return null ;
117- }
118-
119- @ Override
120- protected void onPostExecute (Exception result ) {
121- if (result != null ) {
122- activityReference .get ().setErrorState (String .format (activityReference .get ().getString (R .string .failed ), result ));
123- } else {
124- activityReference .get ().setUiState (STATE_READY );
125- }
126- }
83+ private void initModel () {
84+ StorageService .unpack (this , "model-en-us" , "model" ,
85+ (model ) -> {
86+ this .model = model ;
87+ setUiState (STATE_READY );
88+ },
89+ (exception ) -> setErrorState ("Failed to unpack the model" + exception .getMessage ()));
12790 }
12891
129- private static class RecognizeTask extends AsyncTask <Void , Void , String > {
130- final WeakReference <org .vosk .demo .VoskActivity > activityReference ;
131- final WeakReference <TextView > resultView ;
132-
133- RecognizeTask (org .vosk .demo .VoskActivity activity , TextView resultView ) {
134- this .activityReference = new WeakReference <>(activity );
135- this .resultView = new WeakReference <>(resultView );
136- }
137-
138- @ Override
139- protected String doInBackground (Void ... params ) {
140- Recognizer rec ;
141- long startTime = System .currentTimeMillis ();
142- StringBuilder result = new StringBuilder ();
143- try {
144- rec = new Recognizer (activityReference .get ().model , 16000.f , "[\" one zero zero zero one\" , " +
145- "\" oh zero one two three four five six seven eight nine\" , \" [unk]\" ]" );
146-
147- InputStream ais = activityReference .get ().getAssets ().open ("10001-90210-01803.wav" );
148- if (ais .skip (44 ) != 44 ) {
149- return "" ;
150- }
151- byte [] b = new byte [4096 ];
152- int nbytes ;
153- while ((nbytes = ais .read (b )) >= 0 ) {
154- if (rec .acceptWaveForm (b , nbytes )) {
155- result .append (rec .getResult ());
156- } else {
157- result .append (rec .getPartialResult ());
158- }
159- }
160- result .append (rec .getFinalResult ());
161- } catch (IOException e ) {
162- return "" ;
163- }
164- return String .format (activityReference .get ().getString (R .string .elapsed ), result .toString (), (System .currentTimeMillis () - startTime ));
165- }
166-
167- @ Override
168- protected void onPostExecute (String result ) {
169- activityReference .get ().setUiState (STATE_READY );
170- resultView .get ().append (result + "\n " );
171- }
172- }
17392
17493 @ Override
17594 public void onRequestPermissionsResult (int requestCode ,
@@ -180,7 +99,7 @@ public void onRequestPermissionsResult(int requestCode,
18099 if (grantResults .length > 0 && grantResults [0 ] == PackageManager .PERMISSION_GRANTED ) {
181100 // Recognizer initialization is a time-consuming and it involves IO,
182101 // so we execute it in async task
183- new SetupTask ( this ). execute ();
102+ initModel ();
184103 } else {
185104 finish ();
186105 }
@@ -192,17 +111,29 @@ public void onDestroy() {
192111 super .onDestroy ();
193112
194113 if (speechService != null ) {
195- speechService .cancel ();
114+ speechService .stop ();
196115 speechService .shutdown ();
197116 }
198- }
199117
118+ if (speechStreamService != null ) {
119+ speechStreamService .stop ();
120+ }
121+ }
200122
201123 @ Override
202124 public void onResult (String hypothesis ) {
203125 resultView .append (hypothesis + "\n " );
204126 }
205127
128+ @ Override
129+ public void onFinalResult (String hypothesis ) {
130+ resultView .append (hypothesis + "\n " );
131+ setUiState (STATE_DONE );
132+ if (speechStreamService != null ) {
133+ speechStreamService = null ;
134+ }
135+ }
136+
206137 @ Override
207138 public void onPartialResult (String hypothesis ) {
208139 resultView .append (hypothesis + "\n " );
@@ -215,9 +146,7 @@ public void onError(Exception e) {
215146
216147 @ Override
217148 public void onTimeout () {
218- speechService .cancel ();
219- speechService = null ;
220- setUiState (STATE_READY );
149+ setUiState (STATE_DONE );
221150 }
222151
223152 private void setUiState (int state ) {
@@ -227,29 +156,38 @@ private void setUiState(int state) {
227156 resultView .setMovementMethod (new ScrollingMovementMethod ());
228157 findViewById (R .id .recognize_file ).setEnabled (false );
229158 findViewById (R .id .recognize_mic ).setEnabled (false );
159+ findViewById (R .id .pause ).setEnabled ((false ));
230160 break ;
231161 case STATE_READY :
232162 resultView .setText (R .string .ready );
233163 ((Button ) findViewById (R .id .recognize_mic )).setText (R .string .recognize_microphone );
234164 findViewById (R .id .recognize_file ).setEnabled (true );
235165 findViewById (R .id .recognize_mic ).setEnabled (true );
166+ findViewById (R .id .pause ).setEnabled ((false ));
236167 break ;
237168 case STATE_DONE :
169+ ((Button ) findViewById (R .id .recognize_file )).setText (R .string .recognize_file );
238170 ((Button ) findViewById (R .id .recognize_mic )).setText (R .string .recognize_microphone );
239171 findViewById (R .id .recognize_file ).setEnabled (true );
240172 findViewById (R .id .recognize_mic ).setEnabled (true );
173+ findViewById (R .id .pause ).setEnabled ((false ));
241174 break ;
242175 case STATE_FILE :
176+ ((Button ) findViewById (R .id .recognize_file )).setText (R .string .stop_file );
243177 resultView .setText (getString (R .string .starting ));
244178 findViewById (R .id .recognize_mic ).setEnabled (false );
245- findViewById (R .id .recognize_file ).setEnabled (false );
179+ findViewById (R .id .recognize_file ).setEnabled (true );
180+ findViewById (R .id .pause ).setEnabled ((false ));
246181 break ;
247182 case STATE_MIC :
248183 ((Button ) findViewById (R .id .recognize_mic )).setText (R .string .stop_microphone );
249184 resultView .setText (getString (R .string .say_something ));
250185 findViewById (R .id .recognize_file ).setEnabled (false );
251186 findViewById (R .id .recognize_mic ).setEnabled (true );
187+ findViewById (R .id .pause ).setEnabled ((true ));
252188 break ;
189+ default :
190+ throw new IllegalStateException ("Unexpected value: " + state );
253191 }
254192 }
255193
@@ -261,26 +199,50 @@ private void setErrorState(String message) {
261199 }
262200
263201 private void recognizeFile () {
264- setUiState (STATE_FILE );
265- new RecognizeTask (this , resultView ).execute ();
202+ if (speechStreamService != null ) {
203+ setUiState (STATE_DONE );
204+ speechStreamService .stop ();
205+ speechStreamService = null ;
206+ } else {
207+ setUiState (STATE_FILE );
208+ try {
209+ Recognizer rec = new Recognizer (model , 16000.f , "[\" one zero zero zero one\" , " +
210+ "\" oh zero one two three four five six seven eight nine\" , \" [unk]\" ]" );
211+
212+ InputStream ais = getAssets ().open (
213+ "10001-90210-01803.wav" );
214+ if (ais .skip (44 ) != 44 ) throw new IOException ("File too short" );
215+
216+ speechStreamService = new SpeechStreamService (rec , ais , 16000 );
217+ speechStreamService .start (this );
218+ } catch (IOException e ) {
219+ setErrorState (e .getMessage ());
220+ }
221+ }
266222 }
267223
268224 private void recognizeMicrophone () {
269225 if (speechService != null ) {
270226 setUiState (STATE_DONE );
271- speechService .cancel ();
227+ speechService .stop ();
272228 speechService = null ;
273229 } else {
274230 setUiState (STATE_MIC );
275231 try {
276232 Recognizer rec = new Recognizer (model , 16000.0f );
277233 speechService = new SpeechService (rec , 16000.0f );
278- speechService .addListener (this );
279- speechService .startListening ();
234+ speechService .startListening (this );
280235 } catch (IOException e ) {
281236 setErrorState (e .getMessage ());
282237 }
283238 }
284239 }
285240
241+
242+ private void pause (boolean checked ) {
243+ if (speechService != null ) {
244+ speechService .setPause (checked );
245+ }
246+ }
247+
286248}
0 commit comments