Progress on chatbot kernel

amikhail48 · amikhail48 · commit bc9ef0502d1a · 2025-04-04T15:38:42.000-07:00
diff --git a/MODULE.bazel b/MODULE.bazel
@@ -14,6 +14,8 @@ bazel_dep(name = "rules_python", version = "0.37.2")
 bazel_dep(name = "platforms", version = "0.0.10")
 bazel_dep(name = "googletest", version = "1.15.2")
 bazel_dep(name = "apple_support", version = "1.17.1", repo_name = "build_bazel_apple_support")
+bazel_dep(name = "curl", version = "8.8.0")
+bazel_dep(name = "nlohmann_json", version = "3.11.3")
 
 # Use archive_override to patch rules_foreign_cc to default to specific cmake version
 archive_override(
diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock
diff --git a/kernels/ai_server/BUILD b/kernels/ai_server/BUILD
@@ -0,0 +1,20 @@
+cc_library(
+    name = "llm_kernels",
+    srcs = glob([
+        "*.cpp",
+    ]),
+    hdrs = glob([
+        "*.h",
+        "*.hpp",
+    ]),
+    includes = [
+        ".",
+        "//framework/include"
+    ],
+    deps = [
+        "//:corevx",
+        "@curl//:curl",
+        "@nlohmann_json//:json"
+    ],
+    visibility = ["//visibility:public"]
+)
diff --git a/kernels/ai_server/chatbot.hpp b/kernels/ai_server/chatbot.hpp
@@ -0,0 +1,110 @@
+/**
+ * @file chatbot.hpp
+ * @brief Kernel for AI Model Server Chatbot
+ * @version 0.1
+ * @date 2025-04-04
+ *
+ * @copyright Copyright (c) 2025
+ *
+ */
+#include <curl/curl.h>
+#include <nlohmann/json.hpp>
+#include <string>
+#include <vector>
+#include <VX/vx.h>
+
+#define DEFAULT_MODEL "gpt-4o-mini"
+#define SERVER_URL "http://localhost:8000"
+#define API_KEY "hardcoded-api-key"
+
+class RemoteModelClient
+{
+private:
+    // Helper function for non-streaming response
+    static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *userp)
+    {
+        size_t totalSize = size * nmemb;
+        ((std::string *)userp)->append((char *)contents, totalSize);
+        return totalSize;
+    }
+
+public:
+    // kernel function (non-streaming)
+    vx_status AiServerQuery(const std::string &input_text, std::string &output_text, const std::string &api_path)
+    {
+        CURL *curl = curl_easy_init();
+        if (!curl)
+            return VX_FAILURE;
+
+        nlohmann::json request_json = {
+            {"model", DEFAULT_MODEL},
+            {"messages", {{{"role", "user"}, {"content", input_text}}}},
+            {"max_tokens", 100},
+            {"stream", false}};
+
+        std::string request_payload = request_json.dump();
+        std::string response_string;
+        std::string api_url = std::string(SERVER_URL) + api_path;
+
+        struct curl_slist *headers = nullptr;
+        headers = curl_slist_append(headers, "Content-Type: application/json");
+        headers = curl_slist_append(headers, ("Authorization: Bearer " + std::string(API_KEY)).c_str());
+
+        curl_easy_setopt(curl, CURLOPT_URL, api_url.c_str());
+        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, request_payload.c_str());
+        curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
+        curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string);
+
+        CURLcode res = curl_easy_perform(curl);
+        curl_slist_free_all(headers);
+        curl_easy_cleanup(curl);
+
+        if (res != CURLE_OK)
+            return VX_FAILURE;
+
+        auto json_response = nlohmann::json::parse(response_string);
+        output_text = json_response["choices"][0]["message"]["content"];
+
+        return VX_SUCCESS;
+    }
+
+    // kernel function (streaming)
+    vx_status AiServerQueryStream(const std::string &input_text, std::string &output_text, const std::string &api_path)
+    {
+        CURL *curl = curl_easy_init();
+        if (!curl)
+            return VX_FAILURE;
+
+        nlohmann::json request_json = {
+            {"model", DEFAULT_MODEL},
+            {"messages", {{{"role", "user"}, {"content", input_text}}}},
+            {"max_tokens", 100},
+            {"stream", true}};
+
+        std::string request_payload = request_json.dump();
+        std::string response_chunk;
+        std::string api_url = std::string(SERVER_URL) + api_path;
+
+        struct curl_slist *headers = nullptr;
+        headers = curl_slist_append(headers, "Content-Type: application/json");
+        headers = curl_slist_append(headers, ("Authorization: Bearer " + std::string(API_KEY)).c_str());
+
+        curl_easy_setopt(curl, CURLOPT_URL, api_url.c_str());
+        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, request_payload.c_str());
+        curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
+        curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_chunk);
+
+        CURLcode res = curl_easy_perform(curl);
+        curl_slist_free_all(headers);
+        curl_easy_cleanup(curl);
+
+        if (res != CURLE_OK)
+            return VX_FAILURE;
+
+        // Just return raw streamed response (newline-delimited JSON chunks)
+        output_text = response_chunk;
+        return VX_SUCCESS;
+    }
+};
diff --git a/targets/ai_server/BUILD b/targets/ai_server/BUILD
@@ -8,11 +8,11 @@ cc_library(
     includes = [
         ".",
         "//framework/include",
-        # "//kernels/ai-server",
+        "//kernels/ai-server",
     ],
     deps = [
         "//:corevx",
-        # "//kernels/ai-server:ai-server-kernels",
+        "//kernels/ai_server:llm_kernels"
     ],
     visibility = ["//visibility:public"]
 )
diff --git a/targets/ai_server/vx_chatbot.cpp b/targets/ai_server/vx_chatbot.cpp
@@ -9,20 +9,51 @@
  */
 #include <iostream>
 #include <string>
+#include <unordered_map>
 
 #include <VX/vx.h>
 #include <VX/vx_compatibility.h>
 #include <VX/vx_helper.h>
 #include <VX/vx_lib_debug.h>
 
+#include "chatbot.hpp"
 #include "vx_internal.h"
 
+// Create an instance of ORT runner
+static const std::shared_ptr<RemoteModelClient> kernel = std::make_shared<RemoteModelClient>();
+
+static std::unordered_map<std::string, const std::string> api_map = {
+    {"chat", "/v1/chat/completions"},
+};
+
 class VxRemoteModelClient
 {
+private:
+    static vx_status store_vx_string_to_array(vx_array arr, const vx_string &in)
+    {
+        vx_status status = vxTruncateArray(arr, 0); // clear existing contents
+        if (status != VX_SUCCESS)
+            return status;
+
+        return vxAddArrayItems(arr, in.size(), in.data(), sizeof(char));
+    }
+
+    static vx_status load_vx_string_from_array(vx_array arr, vx_string &out)
+    {
+        vx_size size = 0;
+        vx_status status = vxQueryArray(arr, VX_ARRAY_ATTRIBUTE_NUMITEMS, &size, sizeof(size));
+        if (status != VX_SUCCESS || size == 0)
+            return VX_FAILURE;
+
+        out.resize(size); // allocate space directly in std::string
+        status = vxCopyArrayRange(arr, 0, size, sizeof(char), out.data(), VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
+        return status;
+    }
+
 public:
     static constexpr vx_param_description_t kernelParams[] = {
-        {VX_INPUT, VX_TYPE_STRING, VX_PARAMETER_STATE_REQUIRED},  // Parameter 0: Input text
-        {VX_OUTPUT, VX_TYPE_STRING, VX_PARAMETER_STATE_REQUIRED}, // Parameter 1: Output text
+        {VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED},  // Parameter 0: Input text
+        {VX_OUTPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED}, // Parameter 1: Output text
     };
 
     static vx_status VX_CALLBACK init(vx_node node, const vx_reference parameters[], vx_uint32 num)
@@ -47,7 +78,17 @@ class VxRemoteModelClient
         (void)node;
         (void)parameters;
         (void)num;
-        return VX_SUCCESS;
+        vx_status status = VX_SUCCESS;
+        vx_string input_text, output_text;
+
+        status = load_vx_string_from_array((vx_array)parameters[0], input_text);
+        status |= kernel->AiServerQuery(
+            input_text,       // Input text
+            output_text,      // Output text
+            api_map["chat"]); // API path
+        status |= store_vx_string_to_array((vx_array)parameters[1], output_text);
+
+        return status;
     }
 };