foldl
diff --git a/‎bindings/chatllm.py‎
Lines changed: 59 additions & 9 deletions b/‎bindings/chatllm.py‎
Lines changed: 59 additions & 9 deletions
diff --git a/‎bindings/libchatllm.h‎
Lines changed: 53 additions & 3 deletions b/‎bindings/libchatllm.h‎
Lines changed: 53 additions & 3 deletions
diff --git a/‎bindings/libchatllm.nim‎
Lines changed: 56 additions & 7 deletions b/‎bindings/libchatllm.nim‎
Lines changed: 56 additions & 7 deletions
@@ -2,7 +2,7 @@
 from enum import IntEnum
 import os, sys, signal, queue
 import threading
-import json
+import json, base64
 from typing import Any, Iterable, List, Union
 
 try:
@@ -37,6 +37,10 @@ class PrintType(IntEnum):
     PRINT_EVT_ASYNC_COMPLETED       = 100,   # last async operation completed (utf8_str is null)
     PRINT_EVT_THOUGHT_COMPLETED     = 101,   # thought completed
 
+class EmbeddingPurpose(IntEnum):
+    Document = 0,                   # for document
+    Query    = 1,                   # for query
+
 class LibChatLLM:
 
     _obj2id = {}
@@ -100,11 +104,15 @@ def __init__(self, lib: str = '', model_storage: str = '', init_params: list[str
         self._chatllm_show_statistics   = self._lib.chatllm_show_statistics
         self._chatllm_save_session      = self._lib.chatllm_save_session
         self._chatllm_load_session      = self._lib.chatllm_load_session
+        self._chatllm_multimedia_msg_prepare        = self._lib.chatllm_multimedia_msg_prepare
+        self._chatllm_multimedia_msg_append         = self._lib.chatllm_multimedia_msg_append
+        self._chatllm_user_input_multimedia_msg     = self._lib.chatllm_user_input_multimedia_msg
 
         self._chatllm_async_user_input      = self._lib.chatllm_async_user_input
         self._chatllm_async_ai_continue     = self._lib.chatllm_async_ai_continue
         self._chatllm_async_tool_input      = self._lib.chatllm_async_tool_input
         self._chatllm_async_tool_completion = self._lib.chatllm_async_tool_completion
+        self._chatllm_async_user_input_multimedia_msg = self._lib.chatllm_async_user_input_multimedia_msg
 
         self._chatllm_create.restype = c_void_p
         self._chatllm_create.argtypes = []
@@ -123,11 +131,20 @@ def __init__(self, lib: str = '', model_storage: str = '', init_params: list[str
         self._chatllm_async_ai_continue.restype = c_int
         self._chatllm_async_ai_continue.argtypes = [c_void_p, c_char_p]
 
+        self._chatllm_multimedia_msg_prepare.argtypes = [c_void_p]
+        self._chatllm_multimedia_msg_append.restype = c_int
+        self._chatllm_multimedia_msg_append.argtypes = [c_void_p, c_char_p, c_char_p]
+
         self._chatllm_user_input.restype = c_int
         self._chatllm_user_input.argtypes = [c_void_p, c_char_p]
         self._chatllm_async_user_input.restype = c_int
         self._chatllm_async_user_input.argtypes = [c_void_p, c_char_p]
 
+        self._chatllm_user_input_multimedia_msg.restype         = c_int
+        self._chatllm_user_input_multimedia_msg.argtypes        = [c_void_p]
+        self._chatllm_async_user_input_multimedia_msg.restype   = c_int
+        self._chatllm_async_user_input_multimedia_msg.argtypes  = [c_void_p]
+
         self._chatllm_tool_input.restype = c_int
         self._chatllm_tool_input.argtypes = [c_void_p, c_char_p]
         self._chatllm_async_tool_input.restype = c_int
@@ -139,7 +156,7 @@ def __init__(self, lib: str = '', model_storage: str = '', init_params: list[str
         self._chatllm_async_tool_completion.argtypes = [c_void_p, c_char_p]
 
         self._chatllm_text_embedding.restype = c_int
-        self._chatllm_text_embedding.argtypes = [c_void_p, c_char_p]
+        self._chatllm_text_embedding.argtypes = [c_void_p, c_char_p, c_int]
 
         self._chatllm_text_tokenize.restype = c_int
         self._chatllm_text_tokenize.argtypes = [c_void_p, c_char_p]
@@ -241,11 +258,44 @@ def start(self, obj: c_void_p, callback_obj: Any) -> int:
     def set_ai_prefix(self, obj: c_void_p, prefix: str) -> int:
         return self._chatllm_set_ai_prefix(obj, c_char_p(prefix.encode()))
 
-    def chat(self, obj: c_void_p, user_input: str) -> int:
-        return self._chatllm_user_input(obj, c_char_p(user_input.encode()))
-
-    def async_chat(self, obj: c_void_p, user_input: str) -> int:
-        return self._chatllm_async_user_input(obj, c_char_p(user_input.encode()))
+    def _input_multimedia_msg(self, obj: c_void_p, user_input: List[dict | str]) -> int:
+        self._chatllm_multimedia_msg_prepare(obj)
+        for x in user_input:
+            if isinstance(x, str):
+                self._chatllm_multimedia_msg_append(obj, c_char_p('text'), c_char_p(x))
+            elif isinstance(x, dict):
+                t = x['type']
+                if t == 'text':
+                    data = x['text'].encode()
+                else:
+                    if 'file' in x:
+                        with open(x['file'], 'rb') as f:
+                            data = f.read()
+                    elif 'url' in x:
+                        url: str = x['url']
+                        if url.startswith('data:'):
+                            i = url.find('base64,')
+                            data = base64.decodebytes(url[i + 7 :].encode())
+                        else:
+                            data = model_downloader.download_file_to_bytes(url)
+                    else:
+                        raise Exception(f'unknown message piece: {x}')
+                    data = base64.b64encode(data)
+                self._chatllm_multimedia_msg_append(obj, c_char_p(t.encode()), c_char_p(data))
+
+    def chat(self, obj: c_void_p, user_input: str | List[dict | str]) -> int:
+        if isinstance(user_input, str):
+            return self._chatllm_user_input(obj, c_char_p(user_input.encode()))
+        elif isinstance(user_input, list):
+            self._input_multimedia_msg(obj, user_input)
+            return self._chatllm_user_input_multimedia_msg(obj)
+
+    def async_chat(self, obj: c_void_p, user_input: str | List[dict | str]) -> int:
+        if isinstance(user_input, str):
+            return self._chatllm_async_user_input(obj, c_char_p(user_input.encode()))
+        else:
+            self._input_multimedia_msg(obj, user_input)
+            self._chatllm_async_user_input_multimedia_msg(obj)
 
     def ai_continue(self, obj: c_void_p, suffix: str) -> int:
         return self._chatllm_ai_continue(obj, c_char_p(suffix.encode()))
@@ -268,8 +318,8 @@ def tool_completion(self, obj: c_void_p, user_input: str) -> int:
     def text_tokenize(self, obj: c_void_p, text: str) -> str:
         return self._chatllm_text_tokenize(obj, c_char_p(text.encode()))
 
-    def text_embedding(self, obj: c_void_p, text: str) -> str:
-        return self._chatllm_text_embedding(obj, c_char_p(text.encode()))
+    def text_embedding(self, obj: c_void_p, text: str, purpose: EmbeddingPurpose = EmbeddingPurpose.Document) -> str:
+        return self._chatllm_text_embedding(obj, c_char_p(text.encode()), c_int(purpose.value))
 
     def qa_rank(self, obj: c_void_p, q: str, a: str) -> float:
         return self._chatllm_qa_rank(obj, c_char_p(q.encode()), c_char_p(a.encode()))
 
@@ -154,6 +154,29 @@ DLL_DECL void API_CALL chatllm_set_gen_max_tokens(struct chatllm_obj *obj, int g
  */
 DLL_DECL void API_CALL chatllm_restart(struct chatllm_obj *obj, const char *utf8_sys_prompt);
 
+/**
+ * @brief prepare to generate a multimedia input, i.e. clear previously added pieces.
+ *
+ * Each `chatllm_obj` has a global multimedia message object, which can be used as user input,
+ * or chat history, etc.
+ *
+ * @param[in] obj               model object
+ * @return                      0 if succeeded
+ */
+DLL_DECL void API_CALL chatllm_multimedia_msg_prepare(struct chatllm_obj *obj);
+
+/**
+ * @brief add a piece to a multimedia message
+ *
+ * Remember to clear the message by `chatllm_multimedia_msg_prepare` when starting a new message.
+ *
+ * @param[in] obj               model object
+ * @param[in] type              type ::= "text" | "image" | "video" | "audio" | ...
+ * @param[in] utf8_str          content, i.e. utf8 text content, or base64 encoded data of multimedia data.
+ * @return                      0 if succeeded
+ */
+DLL_DECL int API_CALL chatllm_multimedia_msg_append(struct chatllm_obj *obj, const char *type, const char *utf8_str);
+
 enum RoleType
 {
     ROLE_USER = 2,
@@ -165,7 +188,7 @@ enum RoleType
  * @brief push back a message to the end of chat history.
  *
  * This can be used to restore session after `chatllm_restart`.
- * This would not trigger generation. Use `chatllm_user_input`, etc  to start generation.
+ * This would not trigger generation. Use `chatllm_user_input`, etc to start generation.
  *
  * @param[in] obj               model object
  * @param[in] role_type         message type (see `RoleType`)
@@ -184,6 +207,16 @@ DLL_DECL void API_CALL chatllm_history_append(struct chatllm_obj *obj, int role_
  */
 DLL_DECL int API_CALL chatllm_user_input(struct chatllm_obj *obj, const char *utf8_str);
 
+/**
+ * @brief take current multimedia message as user input and run
+ *
+ * This function is synchronized, i.e. it returns after model generation ends and `f_end` is called.
+ *
+ * @param[in] obj               model object
+ * @return                      0 if succeeded
+ */
+DLL_DECL int API_CALL chatllm_user_input_multimedia_msg(struct chatllm_obj *obj);
+
 /**
  * @brief set prefix for AI generation
  *
@@ -248,16 +281,25 @@ DLL_DECL int chatllm_tool_completion(struct chatllm_obj *obj, const char *utf8_s
  */
 DLL_DECL int chatllm_text_tokenize(struct chatllm_obj *obj, const char *utf8_str);
 
+enum EmbeddingPurpose
+{
+    EMBEDDING_FOR_DOC   = 0,    // for document
+    EMBEDDING_FOR_QUERY = 1,    // for query
+};
+
 /**
  * @brief text embedding
  *
  * embedding is emitted through `PRINTLN_EMBEDDING`.
  *
+ * Note: Not all models support specifying purpose.(see _Qwen3-Embedding_).
+ *
  * @param[in] obj               model object
  * @param[in] utf8_str          text
+ * @param[in] purpose           purpose, see `EmbeddingPurpose`
  * @return                      0 if succeeded
  */
-DLL_DECL int chatllm_text_embedding(struct chatllm_obj *obj, const char *utf8_str);
+DLL_DECL int chatllm_text_embedding(struct chatllm_obj *obj, const char *utf8_str, int purpose);
 
 /**
  * @brief question & answer ranking
@@ -346,6 +388,14 @@ DLL_DECL int API_CALL chatllm_async_start(struct chatllm_obj *obj, f_chatllm_pri
  */
 DLL_DECL int API_CALL chatllm_async_user_input(struct chatllm_obj *obj, const char *utf8_str);
 
+/**
+ * @brief async version of `chatllm_user_input_multimedia_msg`
+ *
+ * @param   ...
+ * @return                      0 if started else -1
+ */
+DLL_DECL int API_CALL chatllm_async_user_input_multimedia_msg(struct chatllm_obj *obj);
+
 /**
  * @brief async version of `chatllm_ai_continue`
 
@@ -376,7 +426,7 @@ DLL_DECL int chatllm_async_tool_completion(struct chatllm_obj *obj, const char *
  * @param   ...
  * @return                      0 if started else -1
  */
-DLL_DECL int chatllm_async_text_embedding(struct chatllm_obj *obj, const char *utf8_str);
+DLL_DECL int chatllm_async_text_embedding(struct chatllm_obj *obj, const char *utf8_str, int purpose);
 
 /**
  * @brief async version of `chatllm_qa_rank`
 
@@ -87,6 +87,29 @@ proc chatllm_set_gen_max_tokens*(obj: ptr chatllm_obj; gen_max_tokens: cint) {.s
 ##
 proc chatllm_restart*(obj: ptr chatllm_obj; utf8_sys_prompt: cstring) {.stdcall, dynlib: libName, importc.}
 
+##
+## @brief prepare to generate a multimedia input, i.e. clear previously added pieces.
+##
+## Each `chatllm_obj` has a global multimedia message object, which can be used as user input,
+## or chat history, etc.
+##
+## @param[in] obj               model object
+## @return                      0 if succeeded
+##
+proc chatllm_multimedia_msg_prepare(obj: ptr chatllm_obj) {.stdcall, dynlib: libName, importc.}
+
+##
+## @brief add a piece to a multimedia message
+##
+## Remember to clear the message by `chatllm_multimedia_msg_prepare` when starting a new message.
+##
+## @param[in] obj               model object
+## @param[in] type              type ::= "text" | "image" | "video" | "audio" | ...
+## @param[in] utf8_str          content, i.e. utf8 text content, or base64 encoded data of multimedia data.
+## @return                      0 if succeeded
+##
+proc chatllm_multimedia_msg_append(obj: ptr chatllm_obj; content_type: cstring; utf8_str: cstring): cint {.stdcall, dynlib: libName, importc.}
+
 type
     RoleType* = enum
         ROLE_USER = 2,
@@ -116,6 +139,16 @@ proc chatllm_history_append*(obj: ptr chatllm_obj; role_type: int; utf8_str: cst
 ##
 proc chatllm_user_input*(obj: ptr chatllm_obj; utf8_str: cstring): cint {.stdcall, dynlib: libName, importc.}
 
+##
+## @brief take current multimedia message as user input and run
+##
+## This function is synchronized, i.e. it returns after model generation ends and `f_end` is called.
+##
+## @param[in] obj               model object
+## @return                      0 if succeeded
+##
+proc chatllm_user_input_multimedia_msg(obj: ptr chatllm_obj): cint {.stdcall, dynlib: libName, importc.}
+
 ##
 ##  @brief set prefix for AI generation
 ##
@@ -171,16 +204,24 @@ proc chatllm_tool_completion*(obj: ptr chatllm_obj; utf8_str: cstring): cint {.s
 ##
 proc chatllm_text_tokenize*(obj: ptr chatllm_obj; utf8_str: cstring): cint {.stdcall, dynlib: libName, importc.}
 
+type
+    EmbeddingPurpose* = enum
+        EMBEDDING_FOR_DOC   = 0,    # for document
+        EMBEDDING_FOR_QUERY = 1,    # for query
+
 ##
-##  @brief text embedding
+## @brief text embedding
 ##
-##  embedding is emitted through `PRINTLN_EMBEDDING`.
+## embedding is emitted through `PRINTLN_EMBEDDING`.
 ##
-##  @param[in] obj               model object
-##  @param[in] utf8_str          text
-##  @return                      0 if succeeded
+## Note: Not all models support specifying purpose.(see _Qwen3-Embedding_).
+##
+## @param[in] obj               model object
+## @param[in] utf8_str          text
+## @param[in] purpose           purpose, see `EmbeddingPurpose`
+## @return                      0 if succeeded
 ##
-proc chatllm_text_embedding*(obj: ptr chatllm_obj; utf8_str: cstring): cint {.stdcall, dynlib: libName, importc.}
+proc chatllm_text_embedding*(obj: ptr chatllm_obj; utf8_str: cstring; purpose: cint): cint {.stdcall, dynlib: libName, importc.}
 
 ##
 ##  @brief question & answer ranking
@@ -271,6 +312,14 @@ proc chatllm_async_start*(obj: ptr chatllm_obj; f_print: f_chatllm_print;
 ##
 proc chatllm_async_user_input*(obj: ptr chatllm_obj; utf8_str: cstring): cint {.stdcall, dynlib: libName, importc.}
 
+##
+## @brief async version of `chatllm_user_input_multimedia_msg`
+##
+## @param   ...
+## @return                      0 if started else -1
+##
+proc chatllm_async_user_input_multimedia_msg(obj: ptr chatllm_obj): cint {.stdcall, dynlib: libName, importc.}
+
 ##
 ##  @brief async version of `chatllm_tool_input`
 ##
@@ -293,7 +342,7 @@ proc chatllm_async_tool_completion*(obj: ptr chatllm_obj; utf8_str: cstring): ci
 ##  @param   ...
 ##  @return                      0 if started else -1
 ##
-proc chatllm_async_text_embedding*(obj: ptr chatllm_obj; utf8_str: cstring): cint {.stdcall, dynlib: libName, importc.}
+proc chatllm_async_text_embedding*(obj: ptr chatllm_obj; utf8_str: cstring; purpose: cint): cint {.stdcall, dynlib: libName, importc.}
 
 ##
 ##  @brief async version of `chatllm_qa_rank`