llama_decode_ext

ngxson · ngxson · commit 4bf7ca3943df · 2025-02-24T17:01:20.000+01:00
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -3149,7 +3149,7 @@ struct server_context {
 
             llama_batch_ext_ptr batch_view(llama_batch_ext_get_view(batch.get(), i, n_tokens));
 
-            const int ret = llama_text_decode(ctx, batch_view.get());
+            const int ret = llama_decode_ext(ctx, batch_view.get());
             metrics.on_decoded(slots);
 
             if (ret != 0) {
@@ -3294,7 +3294,7 @@ struct server_context {
 
                 SLT_DBG(slot, "decoding speculative batch, size = %d\n", llama_batch_ext_get_n_tokens(slot.batch_spec.get()));
 
-                llama_text_decode(ctx, slot.batch_spec.get());
+                llama_decode_ext(ctx, slot.batch_spec.get());
 
                 // the accepted tokens from the speculation
                 const auto ids = common_sampler_sample_and_accept_n(slot.smpl, ctx, draft);
diff --git a/include/llama.h b/include/llama.h
@@ -944,7 +944,7 @@ extern "C" {
     DEPRECATED(LLAMA_API int32_t llama_encode(
             struct llama_context * ctx,
               struct llama_batch   batch), "use llama_batch_ext API instead");
-    LLAMA_API int32_t llama_text_encode(
+    LLAMA_API int32_t llama_encode_ext(
             struct llama_context * ctx,
           struct llama_batch_ext * batch);
 
@@ -955,7 +955,7 @@ extern "C" {
     DEPRECATED(LLAMA_API int32_t llama_decode(
             struct llama_context * ctx,
               struct llama_batch batch), "use llama_batch_ext API instead");
-    LLAMA_API int32_t llama_text_decode(
+    LLAMA_API int32_t llama_decode_ext(
             struct llama_context * ctx,
           struct llama_batch_ext * batch);
 
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -9977,7 +9977,7 @@ int32_t llama_encode(
     // also convert llama_batch to llama_batch_ext
     llama_batch_allocr batch_allocr(batch, batch.pos ? -1 : ctx->kv_self.max_pos() + 1);
     llama_batch_ext * batch_ext = batch_allocr.batch;
-    return llama_text_encode(ctx, batch_ext);
+    return llama_encode_ext(ctx, batch_ext);
 }
 
 // DEPRECATED
@@ -9988,10 +9988,10 @@ int32_t llama_decode(
     // also convert llama_batch to llama_batch_ext
     llama_batch_allocr batch_allocr(batch, batch.pos ? -1 : ctx->kv_self.max_pos() + 1);
     llama_batch_ext * batch_ext = batch_allocr.batch;
-    return llama_text_decode(ctx, batch_ext);
+    return llama_decode_ext(ctx, batch_ext);
 }
 
-int32_t llama_text_encode(
+int32_t llama_encode_ext(
         struct llama_context * ctx,
       struct llama_batch_ext * batch) {
     const int ret = llama_encode_impl(*ctx, *batch);
@@ -10002,7 +10002,7 @@ int32_t llama_text_encode(
     return ret;
 }
 
-int32_t llama_text_decode(
+int32_t llama_decode_ext(
         struct llama_context * ctx,
       struct llama_batch_ext * batch) {
     const int ret = llama_decode_impl(*ctx, *batch);