| 
45 | 45 | import androidx.core.content.res.ResourcesCompat;  | 
46 | 46 | import com.google.gson.Gson;  | 
47 | 47 | import com.google.gson.reflect.TypeToken;  | 
 | 48 | +import java.io.IOException;  | 
48 | 49 | import java.lang.reflect.Type;  | 
 | 50 | +import java.nio.ByteBuffer;  | 
 | 51 | +import java.nio.ByteOrder;  | 
 | 52 | +import java.nio.file.Files;  | 
 | 53 | +import java.nio.file.Paths;  | 
49 | 54 | import java.util.ArrayList;  | 
50 | 55 | import java.util.List;  | 
51 | 56 | import java.util.concurrent.Executor;  | 
@@ -84,6 +89,7 @@ public class MainActivity extends AppCompatActivity implements Runnable, LlmCall  | 
84 | 89 |   private int promptID = 0;  | 
85 | 90 |   private Executor executor;  | 
86 | 91 |   private boolean sawStartHeaderId = false;  | 
 | 92 | +  private String mAudioFileToPrefill;  | 
87 | 93 | 
 
  | 
88 | 94 |   @Override  | 
89 | 95 |   public void onResult(String result) {  | 
@@ -477,6 +483,22 @@ private void setupMediaButton() {  | 
477 | 483 |     mAudioButton.setOnClickListener(  | 
478 | 484 |         view -> {  | 
479 | 485 |           mAddMediaLayout.setVisibility(View.GONE);  | 
 | 486 | +          String[] audioFiles =  | 
 | 487 | +              SettingsActivity.listLocalFile("/data/local/tmp/audio/", new String[] {".bin"});  | 
 | 488 | +          AlertDialog.Builder audioFilePathBuilder = new AlertDialog.Builder(this);  | 
 | 489 | +          audioFilePathBuilder.setTitle("Select audio feature path");  | 
 | 490 | +          audioFilePathBuilder.setSingleChoiceItems(  | 
 | 491 | +              audioFiles,  | 
 | 492 | +              -1,  | 
 | 493 | +              (dialog, item) -> {  | 
 | 494 | +                mAudioFileToPrefill = audioFiles[item];  | 
 | 495 | +                mMessageAdapter.add(  | 
 | 496 | +                    new Message(  | 
 | 497 | +                        "Selected audio: " + mAudioFileToPrefill, false, MessageType.SYSTEM, 0));  | 
 | 498 | +                mMessageAdapter.notifyDataSetChanged();  | 
 | 499 | +                dialog.dismiss();  | 
 | 500 | +              });  | 
 | 501 | +          audioFilePathBuilder.create().show();  | 
480 | 502 |         });  | 
481 | 503 |     mCameraButton = requireViewById(R.id.cameraButton);  | 
482 | 504 |     mCameraButton.setOnClickListener(  | 
@@ -773,8 +795,16 @@ public void run() {  | 
773 | 795 |                           mCurrentSettingsFields.getModelType(),  | 
774 | 796 |                           mCurrentSettingsFields.getBackendType())  | 
775 | 797 |                       == ModelUtils.VISION_MODEL) {  | 
776 |  | -                    mModule.generate(  | 
777 |  | -                        finalPrompt, ModelUtils.VISION_MODEL_SEQ_LEN, MainActivity.this, false);  | 
 | 798 | +                    if (mCurrentSettingsFields.getModelType() == ModelType.VOXTRAL  | 
 | 799 | +                        && mAudioFileToPrefill != null) {  | 
 | 800 | +                      prefillVoxtralAudio(mAudioFileToPrefill, finalPrompt);  | 
 | 801 | +                      mAudioFileToPrefill = null;  | 
 | 802 | +                      mModule.generate(  | 
 | 803 | +                          "", ModelUtils.VISION_MODEL_SEQ_LEN, MainActivity.this, false);  | 
 | 804 | +                    } else {  | 
 | 805 | +                      mModule.generate(  | 
 | 806 | +                          finalPrompt, ModelUtils.VISION_MODEL_SEQ_LEN, MainActivity.this, false);  | 
 | 807 | +                    }  | 
778 | 808 |                   } else if (mCurrentSettingsFields.getModelType() == ModelType.LLAMA_GUARD_3) {  | 
779 | 809 |                     String llamaGuardPromptForClassification =  | 
780 | 810 |                         PromptFormat.getFormattedLlamaGuardPrompt(rawPrompt);  | 
@@ -808,6 +838,28 @@ public void run() {  | 
808 | 838 |     mMessageAdapter.notifyDataSetChanged();  | 
809 | 839 |   }  | 
810 | 840 | 
 
  | 
 | 841 | +  private void prefillVoxtralAudio(String audioFeaturePath, String textPrompt) {  | 
 | 842 | +    try {  | 
 | 843 | +      byte[] byteData = Files.readAllBytes(Paths.get(audioFeaturePath));  | 
 | 844 | +      ByteBuffer buffer = ByteBuffer.wrap(byteData).order(ByteOrder.LITTLE_ENDIAN);  | 
 | 845 | +      int floatCount = byteData.length / Float.BYTES;  | 
 | 846 | +      float[] floats = new float[floatCount];  | 
 | 847 | + | 
 | 848 | +      // Read floats from the buffer  | 
 | 849 | +      for (int i = 0; i < floatCount; i++) {  | 
 | 850 | +        floats[i] = buffer.getFloat();  | 
 | 851 | +      }  | 
 | 852 | +      int bins = 128;  | 
 | 853 | +      int frames = 3000;  | 
 | 854 | +      int batchSize = floatCount / (bins * frames);  | 
 | 855 | +      mModule.prefillPrompt("<s>[INST][BEGIN_AUDIO]");  | 
 | 856 | +      mModule.prefillAudio(floats, batchSize, bins, frames);  | 
 | 857 | +      mModule.prefillPrompt(textPrompt + "[/INST]");  | 
 | 858 | +    } catch (IOException e) {  | 
 | 859 | +      Log.e("AudioPrefill", "Audio file error");  | 
 | 860 | +    }  | 
 | 861 | +  }  | 
 | 862 | + | 
811 | 863 |   @Override  | 
812 | 864 |   public void run() {  | 
813 | 865 |     runOnUiThread(  | 
 | 
0 commit comments