@@ -3,3 +3,276 @@ title: On device Machine LXM(LLM, LVM, etc.) Service API example
33...
44
55# # On device Machine LXM(LLM, LVM, etc.) Service API example
6+
7+ This example demonstrates how to implement a Large Language Model (LLM) service using `ml-lxm-service`(unreleased).
8+
9+ # # Overview
10+ The sample application provides an interactive CLI(Command Line Interface) to :
11+ - Create/destroy LXM sessions
12+ - Set instructions
13+ - Create/destroy prompts
14+ - Append text/instructions to prompts
15+ - Generate token-streamed responses
16+
17+
18+
19+ # # Prerequisites
20+ - ` ml-api-service` and `flare` development packages installed on your target device
21+
22+ # # Building the Example
23+ # ## Build
24+ ` ` `
25+ meson setup build --prefix=${NNST_ROOT} --libdir=lib --bindir=bin --includedir=include
26+ ninja -C build install
27+ ` ` `
28+ # ## For rpm
29+ ` ` `
30+ ./gen-ml-lxm-service-rpm.sh
31+ ` ` `
32+ # ## Run
33+ ` ` `
34+ cp config.conf tokenlizer.json sflare_if_4bit_3b.bin ${PROJECT}/
35+ ./build/ml-lxm-service-example
36+ ` ` `
37+
38+
39+ # # API Reference
40+
41+ # ### LXM service availability status.
42+ ` ` ` cpp
43+ typedef enum
44+ {
45+ ML_LXM_AVAILABILITY_AVAILABLE = 0,
46+ ML_LXM_AVAILABILITY_DEVICE_NOT_ELIGIBLE,
47+ ML_LXM_AVAILABILITY_SERVICE_DISABLED,
48+ ML_LXM_AVAILABILITY_MODEL_NOT_READY,
49+ ML_LXM_AVAILABILITY_UNKNOWN
50+ } ml_lxm_availability_e;
51+ ` ` `
52+ # ### Availability Check
53+ ` ` ` cpp
54+ /**
55+ * @brief Checks LXM service availability.
56+ * @param[out] status Current availability status.
57+ * @return ML_ERROR_NONE on success, error code otherwise.
58+ */
59+ int ml_lxm_check_availability (ml_lxm_availability_e * status);
60+ ` ` `
61+
62+ # ## Data Type
63+ ` ` ` cpp
64+ /**
65+ * @brief Token streaming callback type.
66+ * @param token Generated token string.
67+ * @param user_data User-defined context.
68+ */
69+ typedef void (*ml_lxm_token_cb)(ml_service_event_e event, ml_information_h event_data, void *user_data);
70+
71+ /**
72+ * @brief Generation options for LXM responses.
73+ */
74+ typedef struct {
75+ double temperature;
76+ size_t max_tokens;
77+ } ml_lxm_generation_options_s;
78+ ` ` `
79+ # ### Session Management
80+ ` ` ` cpp
81+ /**
82+ * @brief Creates an LXM session.
83+ * @param[out] session Session handle.
84+ * @param[in] config_path Path to configuration file.
85+ * @param[in] instructions Initial instructions (optional).
86+ * @return ML_ERROR_NONE on success.
87+ */
88+ int ml_lxm_session_create(ml_lxm_session_h *session, const char *config_path, const char *instructions);
89+
90+ /**
91+ * @brief Destroys an LXM session.
92+ * @param[in] session Session handle.
93+ * @return ML_ERROR_NONE on success.
94+ */
95+ int ml_lxm_session_destroy(ml_lxm_session_h session);
96+
97+ /**
98+ * @brief Sets runtime instructions for a session.
99+ * @param[in] session Session handle.
100+ * @param[in] instructions New instructions.
101+ * @return ML_ERROR_NONE on success.
102+ */
103+ int ml_lxm_session_set_instructions(ml_lxm_session_h session, const char *instructions);
104+ ` ` `
105+
106+ # ### Prompt Handling
107+ ` ` ` cpp
108+ /**
109+ * @brief Creates a prompt object.
110+ * @param[out] prompt Prompt handle.
111+ * @return ML_ERROR_NONE on success.
112+ */
113+ int ml_lxm_prompt_create(ml_lxm_prompt_h *prompt);
114+
115+ /**
116+ * @brief Destroys a prompt object.
117+ * @param[in] prompt Prompt handle.
118+ * @return ML_ERROR_NONE on success.
119+ */
120+ int ml_lxm_prompt_destroy(ml_lxm_prompt_h prompt);
121+
122+ /**
123+ * @brief Appends text to a prompt.
124+ * @param[in] prompt Prompt handle.
125+ * @param[in] text Text to append.
126+ * @return ML_ERROR_NONE on success.
127+ */
128+ int ml_lxm_prompt_append_text(ml_lxm_prompt_h prompt, const char *text);
129+
130+ /**
131+ * @brief Appends an instruction to a prompt.
132+ * @param[in] prompt Prompt handle.
133+ * @param[in] instruction Instruction to append.
134+ * @return ML_ERROR_NONE on success.
135+ */
136+ int ml_lxm_prompt_append_instruction(ml_lxm_prompt_h prompt, const char *instruction);
137+ ` ` `
138+
139+ # ### Response Generation
140+ ` ` ` cpp
141+ /**
142+ * @brief Generates an token-streamed response.
143+ * @param[in] session Session handle.
144+ * @param[in] prompt Prompt handle.
145+ * @param[in] options Generation parameters.
146+ * @param[in] token_callback Callback for each generated token.
147+ * @param[in] user_data User context passed to callback.
148+ * @return ML_ERROR_NONE on success.
149+ */
150+ int ml_lxm_session_respond(
151+ ml_lxm_session_h session,
152+ ml_lxm_prompt_h prompt,
153+ const ml_lxm_generation_options_s *options,
154+ ml_lxm_token_cb token_cb,
155+ void *user_data
156+ );
157+ ` ` `
158+ # ### Error Codes
159+ - ML_ERROR_NONE : Operation successful
160+ - ML_ERROR_INVALID_PARAMETER : Invalid parameters detected
161+ - ML_ERROR_OUT_OF_MEMORY : Memory allocation failure
162+ - ML_ERROR_IO_ERROR : File/DB operation failure
163+
164+
165+ # # Sample Code Explanation
166+
167+ # ## Key Components
168+ ` ` ` cpp
169+ #include <ml-api-service.h>
170+ #include "ml-lxm-service.h"
171+
172+ // Global handles
173+ ml_lxm_session_h g_session = NULL;
174+ ml_lxm_prompt_h g_prompt = NULL;
175+
176+ ` ` `
177+ # ## Main Workflow
178+ 1. Session Creation
179+ ` ` ` cpp
180+ ret = ml_lxm_session_create(&g_session, config_path, "Default instructions");
181+ ` ` `
182+ 2. Prompt Handling
183+ ` ` ` cpp
184+ ml_lxm_prompt_create(&g_prompt);
185+ ml_lxm_prompt_append_text(g_prompt, "Explain quantum computing");
186+ ` ` `
187+ 3. Response Generation
188+ ` ` ` cpp
189+ ml_lxm_generation_options_s options = {
190+ .temperature = 1.2,
191+ .max_tokens = 128
192+ };
193+
194+ ml_lxm_session_respond(
195+ g_session,
196+ g_prompt,
197+ &options,
198+ token_handler,
199+ NULL
200+ );
201+ ` ` `
202+ 4. Token Callback
203+ ` ` ` cpp
204+ static void token_handler(
205+ ml_service_event_e event,
206+ ml_information_h event_data,
207+ void *user_data
208+ ) {
209+ /* Process tokens here */
210+ }
211+ ` ` `
212+ 5. Cleanup
213+ ` ` ` cpp
214+ ml_lxm_prompt_destroy(g_prompt);
215+ ml_lxm_session_destroy(g_session);
216+ ` ` `
217+ # ## Full Example Snippet
218+ ` ` ` cpp
219+ #include <ml-api-service.h>
220+ #include "ml-lxm-service.h"
221+
222+ static void token_handler(ml_service_event_e event,
223+ ml_information_h event_data,
224+ void *user_data);
225+ int main() {
226+ ml_lxm_session_h session;
227+ ml_lxm_prompt_h prompt;
228+
229+ // 1. Create session
230+ ml_lxm_session_create(&session, "/path/to/config", NULL);
231+
232+ // 2. Create prompt
233+ ml_lxm_prompt_create(&prompt);
234+ ml_lxm_prompt_append_text(prompt, "Hello AI");
235+
236+ // 3. Generate response
237+ ml_lxm_generation_options_s options = {1.0, 50};
238+ ml_lxm_session_respond(session, prompt, &options, token_handler, NULL);
239+
240+ // 4. Cleanup
241+ ml_lxm_prompt_destroy(prompt);
242+ ml_lxm_session_destroy(session);
243+
244+ return 0;
245+ }
246+
247+ static void token_handler(ml_service_event_e event,
248+ ml_information_h event_data,
249+ void *user_data) {
250+ ml_tensors_data_h data = NULL;
251+ void *_raw = NULL;
252+ size_t _size = 0;
253+ int ret;
254+
255+ switch (event) {
256+ case ML_SERVICE_EVENT_NEW_DATA:
257+ if (event_data != NULL) {
258+
259+ ret = ml_information_get(event_data, "data", &data);
260+ if (ret != ML_ERROR_NONE) {
261+ g_print("Failed to get data from event_data\n ");
262+ return;
263+ }
264+
265+ ret = ml_tensors_data_get_tensor_data(data, 0U, &_raw, &_size);
266+ if (ret != ML_ERROR_NONE) {
267+ g_print("Failed to get tensor data\n ");
268+ return;
269+ }
270+
271+ std::cout.write(static_cast<const char *>(_raw), _size);
272+ std::cout.flush();
273+ }
274+ default:
275+ break;
276+ }
277+ }
278+ ` ` `
0 commit comments