Skip to content

Commit 6972f00

Browse files
basic genai aidl
1 parent 0de2e66 commit 6972f00

14 files changed

+1208
-0
lines changed
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
package com.example.modeltest;
2+
3+
import java.io.File;
4+
import java.util.Scanner;
5+
6+
import ai.onnxruntime.genai.GenAIException;
7+
import ai.onnxruntime.genai.Generator;
8+
import ai.onnxruntime.genai.GeneratorParams;
9+
import ai.onnxruntime.genai.Model;
10+
import ai.onnxruntime.genai.Sequences;
11+
import ai.onnxruntime.genai.Tokenizer;
12+
import ai.onnxruntime.genai.TokenizerStream;
13+
14+
/**
15+
* Standalone Java application to test the Phi-3 model locally
16+
*
17+
* Usage:
18+
* 1. Make sure you have the model files in the directory specified by MODEL_PATH
19+
* 2. Compile and run this application
20+
* 3. Enter prompts to test the model
21+
* 4. Type 'exit' to quit
22+
*/
23+
public class ModelTest {
24+
25+
// Change this to the path where you have the model files locally
26+
private static final String MODEL_PATH = "C:\\Users\\shekadam\\.aitk\\models\\Microsoft\\Phi-3.5-mini-instruct-generic-cpu\\cpu-int4-rtn-block-32-acc-level-4";
27+
28+
public static void main(String[] args) {
29+
System.out.println("Phi-3 Model Test Application");
30+
System.out.println("============================");
31+
32+
// Check if model directory exists
33+
File modelDir = new File(MODEL_PATH);
34+
if (!modelDir.exists() || !modelDir.isDirectory()) {
35+
System.err.println("Model directory not found at: " + MODEL_PATH);
36+
System.err.println("Please update the MODEL_PATH in the source code.");
37+
return;
38+
}
39+
40+
// Check for model files
41+
File[] onnxFiles = modelDir.listFiles((dir, name) -> name.endsWith(".onnx"));
42+
if (onnxFiles == null || onnxFiles.length == 0) {
43+
System.err.println("No ONNX files found in directory: " + MODEL_PATH);
44+
return;
45+
}
46+
47+
System.out.println("Found " + onnxFiles.length + " ONNX files in model directory");
48+
System.out.println("Initializing model, please wait...");
49+
50+
Model model = null;
51+
Tokenizer tokenizer = null;
52+
53+
try {
54+
// Initialize model and tokenizer
55+
long startTime = System.currentTimeMillis();
56+
model = new Model(MODEL_PATH);
57+
tokenizer = model.createTokenizer();
58+
long initTime = System.currentTimeMillis() - startTime;
59+
60+
System.out.println("Model initialized successfully in " + initTime + "ms");
61+
System.out.println("Enter your prompts, or type 'exit' to quit:");
62+
63+
// Process user prompts
64+
Scanner scanner = new Scanner(System.in);
65+
while (true) {
66+
System.out.print("\nPrompt> ");
67+
String input = scanner.nextLine().trim();
68+
69+
if (input.equalsIgnoreCase("exit")) {
70+
break;
71+
}
72+
73+
if (input.isEmpty()) {
74+
continue;
75+
}
76+
77+
generateResponse(model, tokenizer, input);
78+
}
79+
80+
scanner.close();
81+
82+
} catch (GenAIException e) {
83+
System.err.println("Error initializing model: " + e.getMessage());
84+
e.printStackTrace();
85+
} finally {
86+
// Clean up resources
87+
if (tokenizer != null) tokenizer.close();
88+
if (model != null) model.close();
89+
}
90+
}
91+
92+
private static void generateResponse(Model model, Tokenizer tokenizer, String prompt) {
93+
TokenizerStream stream = null;
94+
GeneratorParams generatorParams = null;
95+
Sequences encodedPrompt = null;
96+
Generator generator = null;
97+
98+
try {
99+
long startTime = System.currentTimeMillis();
100+
101+
// Format prompt for Phi-3 model
102+
String promptFormatted = "<s>You are a helpful AI assistant. Answer in two paragraphs or less<|end|><|user|>" +
103+
prompt + "<|end|>\n<assistant|>";
104+
105+
// Create tokenizer stream
106+
stream = tokenizer.createStream();
107+
108+
// Create generator parameters
109+
generatorParams = model.createGeneratorParams();
110+
generatorParams.setSearchOption("max_length", 100L);
111+
generatorParams.setSearchOption("temperature", 0.7);
112+
generatorParams.setSearchOption("top_p", 0.9);
113+
114+
// Encode the prompt
115+
encodedPrompt = tokenizer.encode(promptFormatted);
116+
generatorParams.setInput(encodedPrompt);
117+
118+
// Create generator
119+
generator = new Generator(model, generatorParams);
120+
121+
StringBuilder result = new StringBuilder();
122+
System.out.print("\nGenerating: ");
123+
124+
// Generate tokens until done
125+
int tokenCount = 0;
126+
while (!generator.isDone()) {
127+
generator.computeLogits();
128+
generator.generateNextToken();
129+
130+
int token = generator.getLastTokenInSequence(0);
131+
String decodedToken = stream.decode(token);
132+
result.append(decodedToken);
133+
134+
// Print progress
135+
System.out.print(decodedToken);
136+
tokenCount++;
137+
}
138+
139+
long totalTime = System.currentTimeMillis() - startTime;
140+
double tokensPerSecond = tokenCount * 1000.0 / totalTime;
141+
142+
System.out.println("\n\nGeneration complete!");
143+
System.out.println("Time: " + totalTime + "ms for " + tokenCount + " tokens (" +
144+
String.format("%.2f", tokensPerSecond) + " tokens/sec)");
145+
146+
} catch (Exception e) {
147+
System.err.println("\nError generating response: " + e.getMessage());
148+
e.printStackTrace();
149+
} finally {
150+
// Clean up resources
151+
if (generator != null) generator.close();
152+
if (encodedPrompt != null) encodedPrompt.close();
153+
if (stream != null) stream.close();
154+
if (generatorParams != null) generatorParams.close();
155+
}
156+
}
157+
}
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
# Android Dual App Project
2+
3+
This project demonstrates a dual-app architecture for on-device AI inference using ONNX Runtime on Android.
4+
5+
## Overview
6+
7+
The project consists of two separate Android applications:
8+
9+
1. **UI App**: Provides user interface for input collection and result display.
10+
2. **AI App**: Handles the AI model loading and inference using ONNX Runtime and ONNX Runtime GenAI.
11+
12+
## Architecture
13+
14+
The apps communicate using Android's Inter-Process Communication (IPC) mechanism via AIDL (Android Interface Definition Language).
15+
16+
```
17+
+-------------+ +-------------+
18+
| | | |
19+
| UI App |<--------->| AI App |
20+
| (User Input)| AIDL | (Inference) |
21+
| | | |
22+
+-------------+ +-------------+
23+
|
24+
v
25+
+----------+
26+
| ONNX |
27+
| Model |
28+
+----------+
29+
```
30+
31+
### Why Two Separate Apps?
32+
33+
This dual-app architecture provides several benefits:
34+
35+
1. **Memory Isolation**: The AI inference process runs in a separate memory space from the UI, preventing the UI from becoming unresponsive during inference.
36+
37+
2. **Process Separation**: If the model inference causes crashes or excessive memory usage, it won't crash the UI app.
38+
39+
3. **Resource Management**: The AI app can be configured to run at a different priority level than the UI app.
40+
41+
4. **Update Flexibility**: The AI model implementation can be updated independently of the UI application.
42+
43+
5. **Security**: Sensitive model operations are isolated from the user-facing app.
44+
45+
### Communication Flow
46+
47+
1. The UI App binds to the AIApp's InferenceService using AIDL.
48+
2. User enters a prompt in the UI App.
49+
3. The prompt is sent via AIDL to the AIApp.
50+
4. The AIApp loads the model (if not already loaded) and performs inference.
51+
5. The AIApp returns the generated response back to the UI App.
52+
6. The UI App displays the response to the user.
53+
54+
## Key Components
55+
56+
### UI App
57+
58+
- `MainActivity`: Manages the user interface and communication with the AI App.
59+
- `IInferenceService.aidl`: Interface definition for communicating with the AI App's service.
60+
61+
### AI App
62+
63+
- `InferenceService`: Android Service that manages the AI model and processes inference requests.
64+
- `ModelManager`: Handles the ONNX Runtime model initialization and inference.
65+
- `ModelDownloader`: Utility for downloading or extracting model files.
66+
- `IInferenceService.aidl`: Interface implementation for the service.
67+
68+
## Dependencies
69+
70+
- ONNX Runtime for Android: `com.microsoft.onnxruntime:onnxruntime-android:1.16.1`
71+
- ONNX Runtime GenAI: `onnxruntime-genai-android-0.4.0-dev.aar`
72+
73+
## Setup and Running
74+
75+
### Building the Apps
76+
77+
1. Open the project in Android Studio
78+
2. Build both the AIApp and UIApp modules
79+
3. Make sure you have the `onnxruntime-genai-android-0.4.0-dev.aar` file in the `AIApp/libs/` folder
80+
81+
### Setting up the Model Files
82+
83+
1. Download the Microsoft Phi-3.5-mini model files (ONNX format)
84+
2. Use the provided PowerShell script to push the model to your device:
85+
86+
```powershell
87+
.\push_model.ps1 -modelDir "path\to\model\directory"
88+
```
89+
90+
Or manually push the files via ADB:
91+
92+
```powershell
93+
adb shell mkdir -p /sdcard/phi-3-model
94+
adb push path\to\model-file.onnx /sdcard/phi-3-model/
95+
adb push path\to\tokenizer.json /sdcard/phi-3-model/
96+
```
97+
98+
### Running the Apps
99+
100+
1. Install both apps on your Android device
101+
2. Launch the AI App first to initialize the service
102+
3. Grant necessary storage permissions when prompted
103+
4. Start the service using the button in the AIApp
104+
5. Launch the UI App which will automatically connect to the AI service
105+
6. Enter prompts in the UIApp and receive model responses
106+
107+
### Monitoring Performance
108+
109+
The apps provide detailed performance logs via Android logcat. Connect your device to a computer and run:
110+
111+
```powershell
112+
adb logcat -s ModelManager:D MemoryMonitor:D InferenceService:D UIApp:D
113+
```
114+
115+
## Permissions
116+
117+
The application requires the following permissions:
118+
119+
- `READ_EXTERNAL_STORAGE` / `WRITE_EXTERNAL_STORAGE` - For accessing model files in external storage
120+
- `READ_MEDIA_*` (Android 13+) - For accessing model files on newer Android versions
121+
122+
For Android 10+ devices, the app uses `requestLegacyExternalStorage="true"` to ensure compatibility with older storage access methods.
123+
124+
## Model Setup
125+
126+
### Pushing Model Files to Device using ADB
127+
128+
The app is configured to load model files from `/sdcard/phi-3-model/` directory on your Android device. Follow these steps to push the model files:
129+
130+
1. Download the Phi-3.5 model files (the ONNX format files) from Microsoft or HuggingFace
131+
2. Connect your Android device to your computer
132+
3. Enable USB debugging on your device
133+
4. Open a terminal and use these ADB commands:
134+
135+
```bash
136+
# Create directory on device
137+
adb shell mkdir -p /sdcard/phi-3-model
138+
139+
# Push model files to device
140+
adb push path/to/model-file.onnx /sdcard/phi-3-model/
141+
adb push path/to/tokenizer.json /sdcard/phi-3-model/
142+
# Push any other required model files
143+
```
144+
145+
5. Verify files were transferred correctly:
146+
```bash
147+
adb shell ls -la /sdcard/phi-3-model/
148+
```
149+
150+
### Alternative Setup Methods
151+
152+
For a production app, you might want to:
153+
1. Copy the model files to the app's assets folder
154+
2. Extract them at runtime to the app's internal storage
155+
3. Or download them from a server at runtime
156+
157+
## License
158+
159+
[Your license information here]
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// Top-level build file where you can add configuration options common to all sub-projects/modules.
2+
plugins {
3+
id("com.android.application") version "8.2.0" apply false
4+
id("org.jetbrains.kotlin.android") version "1.9.0" apply false
5+
}
6+
7+
tasks.register("clean", Delete::class) {
8+
delete(rootProject.buildDir)
9+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# PowerShell script to handle stubborn file locks
2+
Write-Host "Attempting to resolve file lock issues..."
3+
4+
$buildPath = "AIApp\build"
5+
$problematicFile = "AIApp\build\intermediates\compile_and_runtime_not_namespaced_r_class_jar\debug\R.jar"
6+
7+
# Stop all Gradle processes
8+
Write-Host "Stopping Gradle daemons..."
9+
& .\gradlew --stop
10+
11+
# Wait for processes to settle
12+
Start-Sleep -Seconds 3
13+
14+
# Try to remove the specific problematic file first
15+
if (Test-Path $problematicFile) {
16+
Write-Host "Attempting to remove problematic R.jar file..."
17+
try {
18+
Remove-Item -Path $problematicFile -Force -ErrorAction Stop
19+
Write-Host "Successfully removed R.jar"
20+
} catch {
21+
Write-Host "R.jar still locked, attempting workaround..."
22+
23+
# Try to move the file to a temp location first
24+
$tempFile = "$env:TEMP\R_$(Get-Date -Format 'yyyyMMdd_HHmmss').jar"
25+
try {
26+
Move-Item -Path $problematicFile -Destination $tempFile -Force
27+
Write-Host "Moved locked file to temp location: $tempFile"
28+
} catch {
29+
Write-Host "Could not move file, it's still locked by a process"
30+
}
31+
}
32+
}
33+
34+
# Now try to remove the entire build directory
35+
if (Test-Path $buildPath) {
36+
Write-Host "Removing build directory..."
37+
try {
38+
Remove-Item -Path $buildPath -Recurse -Force -ErrorAction Stop
39+
Write-Host "Successfully removed build directory"
40+
} catch {
41+
Write-Host "Some files in build directory are still locked"
42+
}
43+
}
44+
45+
# Wait and try Gradle clean
46+
Start-Sleep -Seconds 2
47+
Write-Host "Attempting Gradle clean..."
48+
& .\gradlew clean --no-daemon --no-build-cache
49+
50+
Write-Host "Script completed."

0 commit comments

Comments
 (0)