@@ -125,9 +125,7 @@ public int generate(String prompt, LlmCallback llmCallback, boolean echo) {
125
125
* @param llmCallback callback object to receive results
126
126
* @param echo indicate whether to echo the input prompt or not (text completion vs chat)
127
127
*/
128
- public int generate (String prompt , int seqLen , LlmCallback llmCallback , boolean echo ) {
129
- return generate (null , 0 , 0 , 0 , prompt , seqLen , llmCallback , echo );
130
- }
128
+ public native int generate (String prompt , int seqLen , LlmCallback llmCallback , boolean echo );
131
129
132
130
/**
133
131
* Start generating tokens from the module.
@@ -154,16 +152,19 @@ public int generate(String prompt, LlmGenerationConfig config, LlmCallback llmCa
154
152
* @param llmCallback callback object to receive results.
155
153
* @param echo indicate whether to echo the input prompt or not (text completion vs chat)
156
154
*/
157
- @ DoNotStrip
158
- public native int generate (
155
+ public int generate (
159
156
int [] image ,
160
157
int width ,
161
158
int height ,
162
159
int channels ,
163
160
String prompt ,
164
161
int seqLen ,
165
162
LlmCallback llmCallback ,
166
- boolean echo );
163
+ boolean echo ) {
164
+ prefillPrompt (prompt );
165
+ prefillImages (image , width , height , channels );
166
+ return generate ("" , llmCallback , echo );
167
+ }
167
168
168
169
/**
169
170
* Prefill an LLaVA Module with the given images input.
@@ -172,16 +173,12 @@ public native int generate(
172
173
* @param width Input image width
173
174
* @param height Input image height
174
175
* @param channels Input image number of channels
175
- * @param startPos The starting position in KV cache of the input in the LLM.
176
176
* @return 0, as the updated starting position in KV cache of the input in the LLM is no longer
177
177
* exposed to user.
178
178
* @throws RuntimeException if the prefill failed
179
179
*/
180
180
@ Deprecated
181
- public long prefillImages (int [] image , int width , int height , int channels , long startPos ) {
182
- if (startPos == 0 ) {
183
- resetContext ();
184
- }
181
+ public long prefillImages (int [] image , int width , int height , int channels ) {
185
182
int nativeResult = appendImagesInput (image , width , height , channels );
186
183
if (nativeResult != 0 ) {
187
184
throw new RuntimeException ("Prefill failed with error code: " + nativeResult );
@@ -195,28 +192,21 @@ public long prefillImages(int[] image, int width, int height, int channels, long
195
192
* Prefill an LLaVA Module with the given text input.
196
193
*
197
194
* @param prompt The text prompt to LLaVA.
198
- * @param startPos The starting position in KV cache of the input in the LLM. It's passed as
199
- * reference and will be updated inside this function.
200
- * @param bos The number of BOS (begin of sequence) token.
201
- * @param eos The number of EOS (end of sequence) token.
202
195
* @return 0, as the updated starting position in KV cache of the input in the LLM is no longer
203
196
* exposed to user.
204
197
* @throws RuntimeException if the prefill failed
205
198
*/
206
199
@ Deprecated
207
- public long prefillPrompt (String prompt , long startPos , int bos , int eos ) {
208
- if (startPos == 0 ) {
209
- resetContext ();
210
- }
211
- int nativeResult = appendTextInput (prompt , bos , eos );
200
+ public long prefillPrompt (String prompt ) {
201
+ int nativeResult = appendTextInput (prompt );
212
202
if (nativeResult != 0 ) {
213
203
throw new RuntimeException ("Prefill failed with error code: " + nativeResult );
214
204
}
215
205
return 0 ;
216
206
}
217
207
218
- // returns a tuple of ( status, updated startPos)
219
- private native int appendTextInput (String prompt , int bos , int eos );
208
+ // returns status
209
+ private native int appendTextInput (String prompt );
220
210
221
211
/**
222
212
* Reset the context of the LLM. This will clear the KV cache and reset the state of the LLM.
0 commit comments