File tree Expand file tree Collapse file tree 1 file changed +10
-28
lines changed Expand file tree Collapse file tree 1 file changed +10
-28
lines changed Original file line number Diff line number Diff line change @@ -150,40 +150,22 @@ fn main() -> Result<()> {
150
150
}
151
151
152
152
std:: io:: stderr ( ) . flush ( ) ?;
153
-
154
- // create a llama_batch with the size of the context
155
- // we use this object to submit token data for decoding
156
- let mut batch = LlamaBatch :: new ( n_ctx, 1 ) ;
157
-
158
- let mut max_seq_id_batch = 0 ;
159
153
let mut output = Vec :: with_capacity ( tokens_lines_list. len ( ) ) ;
160
154
161
155
let t_main_start = ggml_time_us ( ) ;
162
156
163
157
for tokens in & tokens_lines_list {
164
- // Flush the batch if the next prompt would exceed our batch size
165
- if ( batch. n_tokens ( ) as usize + tokens. len ( ) ) > n_ctx {
166
- batch_decode (
167
- & mut ctx,
168
- & mut batch,
169
- max_seq_id_batch,
170
- & mut output,
171
- normalise,
172
- ) ?;
173
- max_seq_id_batch = 0 ;
174
- }
175
-
176
- batch. add_sequence ( tokens, max_seq_id_batch, false ) ?;
177
- max_seq_id_batch += 1 ;
158
+ // Create a fresh batch for each sequence
159
+ let mut batch = LlamaBatch :: new ( n_ctx, 1 ) ;
160
+ batch. add_sequence ( tokens, 0 , false ) ?;
161
+ batch_decode (
162
+ & mut ctx,
163
+ & mut batch,
164
+ 1 , // Only one sequence in this batch
165
+ & mut output,
166
+ normalise,
167
+ ) ?;
178
168
}
179
- // Handle final batch
180
- batch_decode (
181
- & mut ctx,
182
- & mut batch,
183
- max_seq_id_batch,
184
- & mut output,
185
- normalise,
186
- ) ?;
187
169
188
170
let t_main_end = ggml_time_us ( ) ;
189
171
You can’t perform that action at this time.
0 commit comments