@@ -125,9 +125,7 @@ public int generate(String prompt, LlmCallback llmCallback, boolean echo) {
125125 * @param llmCallback callback object to receive results
126126 * @param echo indicate whether to echo the input prompt or not (text completion vs chat)
127127 */
128- public int generate (String prompt , int seqLen , LlmCallback llmCallback , boolean echo ) {
129- return generate (null , 0 , 0 , 0 , prompt , seqLen , llmCallback , echo );
130- }
128+ public native int generate (String prompt , int seqLen , LlmCallback llmCallback , boolean echo );
131129
132130 /**
133131 * Start generating tokens from the module.
@@ -154,16 +152,19 @@ public int generate(String prompt, LlmGenerationConfig config, LlmCallback llmCa
154152 * @param llmCallback callback object to receive results.
155153 * @param echo indicate whether to echo the input prompt or not (text completion vs chat)
156154 */
157- @ DoNotStrip
158- public native int generate (
155+ public int generate (
159156 int [] image ,
160157 int width ,
161158 int height ,
162159 int channels ,
163160 String prompt ,
164161 int seqLen ,
165162 LlmCallback llmCallback ,
166- boolean echo );
163+ boolean echo ) {
164+ prefillPrompt (prompt );
165+ prefillImages (image , width , height , channels );
166+ return generate ("" , llmCallback , echo );
167+ }
167168
168169 /**
169170 * Prefill an LLaVA Module with the given images input.
@@ -172,16 +173,12 @@ public native int generate(
172173 * @param width Input image width
173174 * @param height Input image height
174175 * @param channels Input image number of channels
175- * @param startPos The starting position in KV cache of the input in the LLM.
176176 * @return 0, as the updated starting position in KV cache of the input in the LLM is no longer
177177 * exposed to user.
178178 * @throws RuntimeException if the prefill failed
179179 */
180180 @ Deprecated
181- public long prefillImages (int [] image , int width , int height , int channels , long startPos ) {
182- if (startPos == 0 ) {
183- resetContext ();
184- }
181+ public long prefillImages (int [] image , int width , int height , int channels ) {
185182 int nativeResult = appendImagesInput (image , width , height , channels );
186183 if (nativeResult != 0 ) {
187184 throw new RuntimeException ("Prefill failed with error code: " + nativeResult );
@@ -195,28 +192,21 @@ public long prefillImages(int[] image, int width, int height, int channels, long
195192 * Prefill an LLaVA Module with the given text input.
196193 *
197194 * @param prompt The text prompt to LLaVA.
198- * @param startPos The starting position in KV cache of the input in the LLM. It's passed as
199- * reference and will be updated inside this function.
200- * @param bos The number of BOS (begin of sequence) token.
201- * @param eos The number of EOS (end of sequence) token.
202195 * @return 0, as the updated starting position in KV cache of the input in the LLM is no longer
203196 * exposed to user.
204197 * @throws RuntimeException if the prefill failed
205198 */
206199 @ Deprecated
207- public long prefillPrompt (String prompt , long startPos , int bos , int eos ) {
208- if (startPos == 0 ) {
209- resetContext ();
210- }
211- int nativeResult = appendTextInput (prompt , bos , eos );
200+ public long prefillPrompt (String prompt ) {
201+ int nativeResult = appendTextInput (prompt );
212202 if (nativeResult != 0 ) {
213203 throw new RuntimeException ("Prefill failed with error code: " + nativeResult );
214204 }
215205 return 0 ;
216206 }
217207
218- // returns a tuple of ( status, updated startPos)
219- private native int appendTextInput (String prompt , int bos , int eos );
208+ // returns status
209+ private native int appendTextInput (String prompt );
220210
221211 /**
222212 * Reset the context of the LLM. This will clear the KV cache and reset the state of the LLM.
0 commit comments