@@ -151,7 +151,7 @@ static std::string get_gpu_info() {
151151 int count = ggml_backend_sycl_get_device_count ();
152152 for (int i = 0 ; i < count; i++) {
153153 char buf[128 ];
154- ggml_sycl_get_device_description (i, buf, sizeof (buf));
154+ ggml_backend_sycl_get_device_description (i, buf, sizeof (buf));
155155 id += buf;
156156 if (i < count - 1 ) {
157157 id += " /" ;
@@ -1428,7 +1428,7 @@ struct sql_printer : public printer {
14281428 }
14291429};
14301430
1431- static void test_prompt (llama_context * ctx, int n_prompt, int n_past, int n_batch, int n_threads) {
1431+ static void test_prompt (llama_context * ctx, int n_prompt, int n_batch, int n_threads) {
14321432 llama_set_n_threads (ctx, n_threads, n_threads);
14331433
14341434 const llama_model * model = llama_get_model (ctx);
@@ -1444,14 +1444,14 @@ static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_bat
14441444 for (int i = 1 ; i < n_tokens; i++) {
14451445 tokens[i] = std::rand () % n_vocab;
14461446 }
1447- llama_decode (ctx, llama_batch_get_one (tokens.data (), n_tokens, n_past + n_processed, 0 ));
1447+ llama_decode (ctx, llama_batch_get_one (tokens.data (), n_tokens));
14481448 n_processed += n_tokens;
14491449 }
14501450
14511451 llama_synchronize (ctx);
14521452}
14531453
1454- static void test_gen (llama_context * ctx, int n_gen, int n_past, int n_threads) {
1454+ static void test_gen (llama_context * ctx, int n_gen, int n_threads) {
14551455 llama_set_n_threads (ctx, n_threads, n_threads);
14561456
14571457 const llama_model * model = llama_get_model (ctx);
@@ -1460,7 +1460,7 @@ static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads)
14601460 llama_token token = llama_add_bos_token (model) ? llama_token_bos (model) : std::rand () % n_vocab;
14611461
14621462 for (int i = 0 ; i < n_gen; i++) {
1463- llama_decode (ctx, llama_batch_get_one (&token, 1 , n_past + i, 0 ));
1463+ llama_decode (ctx, llama_batch_get_one (&token, 1 ));
14641464 llama_synchronize (ctx);
14651465 token = std::rand () % n_vocab;
14661466 }
@@ -1596,13 +1596,13 @@ int main(int argc, char ** argv) {
15961596 fprintf (stderr, " llama-bench: benchmark %d/%ld: warmup prompt run\n " , params_idx, params_count);
15971597 }
15981598 // test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
1599- test_prompt (ctx, t.n_prompt , 0 , t.n_batch , t.n_threads );
1599+ test_prompt (ctx, t.n_prompt , t.n_batch , t.n_threads );
16001600 }
16011601 if (t.n_gen > 0 ) {
16021602 if (params.progress ) {
16031603 fprintf (stderr, " llama-bench: benchmark %d/%ld: warmup generation run\n " , params_idx, params_count);
16041604 }
1605- test_gen (ctx, 1 , 0 , t.n_threads );
1605+ test_gen (ctx, 1 , t.n_threads );
16061606 }
16071607
16081608 for (int i = 0 ; i < params.reps ; i++) {
@@ -1614,13 +1614,13 @@ int main(int argc, char ** argv) {
16141614 if (params.progress ) {
16151615 fprintf (stderr, " llama-bench: benchmark %d/%ld: prompt run %d/%d\n " , params_idx, params_count, i + 1 , params.reps );
16161616 }
1617- test_prompt (ctx, t.n_prompt , 0 , t.n_batch , t.n_threads );
1617+ test_prompt (ctx, t.n_prompt , t.n_batch , t.n_threads );
16181618 }
16191619 if (t.n_gen > 0 ) {
16201620 if (params.progress ) {
16211621 fprintf (stderr, " llama-bench: benchmark %d/%ld: generation run %d/%d\n " , params_idx, params_count, i + 1 , params.reps );
16221622 }
1623- test_gen (ctx, t.n_gen , t.n_prompt , t. n_threads );
1623+ test_gen (ctx, t.n_gen , t.n_threads );
16241624 }
16251625
16261626 uint64_t t_ns = get_time_ns () - t_start;
0 commit comments