Nexesenex
diff --git a/‎common/chat.cpp‎
Lines changed: 3 additions & 1 deletion b/‎common/chat.cpp‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/ops/CANN.csv‎
Lines changed: 8133 additions & 0 deletions b/‎docs/ops/CANN.csv‎
Lines changed: 8133 additions & 0 deletions
diff --git a/‎docs/ops/OpenCL.csv‎
Lines changed: 8133 additions & 0 deletions b/‎docs/ops/OpenCL.csv‎
Lines changed: 8133 additions & 0 deletions
diff --git a/‎examples/embedding/embedding.cpp‎
Lines changed: 8 additions & 0 deletions b/‎examples/embedding/embedding.cpp‎
Lines changed: 8 additions & 0 deletions
@@ -1944,6 +1944,8 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
         }
     }
     auto msg = builder.result();
-    LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
+    if (!is_partial) {
+        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
+    }
     return msg;
 }
@@ -81,6 +81,14 @@ int main(int argc, char ** argv) {
 
     params.embedding = true;
 
+    // if the number of prompts that would be encoded is known in advance, it's more efficient to specify the
+    //   --parallel argument accordingly. for convenience, if not specified, we fallback to unified KV cache
+    //   in order to support any number of prompts
+    if (params.n_parallel == 1) {
+        LOG_INF("%s: n_parallel == 1 -> unified KV cache is enabled\n", __func__);
+        params.kv_unified = true;
+    }
+
     // utilize the full context
     if (params.n_batch < params.n_ctx) {
         LOG_WRN("%s: setting batch size to %d\n", __func__, params.n_ctx);
Original file line number	Diff line number	Diff line change
`@@ -1944,6 +1944,8 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co`
`1944`	`1944`	`}`
`1945`	`1945`	`}`
`1946`	`1946`	`auto msg = builder.result();`
`1947`		`- LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());`
	`1947`	`+ if (!is_partial) {`
	`1948`	`+ LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());`
	`1949`	`+ }`
`1948`	`1950`	`return msg;`
`1949`	`1951`	`}`