Skip to content

Commit dd435c8

Browse files
committed
[ml service] Support asynchronous output from sub-plugin in ML service extension single API
- Add invoke async callback to receive results from sub-plugins asynchronously. Signed-off-by: hyunil park <hyunil46.park@samsung.com>
1 parent ca9efae commit dd435c8

File tree

5 files changed

+114
-1
lines changed

5 files changed

+114
-1
lines changed

c/include/nnstreamer-tizen-internal.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ extern "C" {
2626
*/
2727
int ml_pipeline_construct_internal (const char *pipeline_description, ml_pipeline_state_cb cb, void *user_data, ml_pipeline_h *pipe);
2828

29+
typedef void (*ml_single_invoke_async_cb) (void *handle, void *output, void *user_data); /**< The callback function pointer to be called every time the sub-plugin generates a new output tensor asynchronously. */
30+
2931
/**
3032
* @brief An information to create single-shot instance.
3133
*/
@@ -39,6 +41,8 @@ typedef struct {
3941
char *fw_name; /**< The explicit framework name given by user */
4042
int invoke_dynamic; /**< True for supporting invoke with flexible output. */
4143
int invoke_async; /**< The sub-plugin must support asynchronous output to use this option. If set to TRUE, the sub-plugin can generate multiple outputs asynchronously per single input. Otherwise, only synchronous single-output is expected and async callback/handle are ignored. */
44+
void *invoke_async_data; /**< User data to be passed to async callback. */
45+
ml_single_invoke_async_cb invoke_async_cb; /**< Callback function to be called when the sub-plugin generates an output asynchronously. */
4246
} ml_single_preset;
4347

4448
/**

c/src/ml-api-inference-single.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,6 +1088,20 @@ ml_single_open_custom (ml_single_h * single, ml_single_preset * info)
10881088
g_object_set (filter_obj, "custom", info->custom_option, NULL);
10891089
}
10901090

1091+
if (single_h->klass && info->invoke_async) {
1092+
if (info->invoke_async_cb != NULL && info->invoke_async_data!= NULL) {
1093+
NNSFilterInvokeAsyncCallback invoke_async_cb =
1094+
(NNSFilterInvokeAsyncCallback) info->invoke_async_cb;
1095+
single_h->klass->set_invoke_async_callback (single_h->filter,
1096+
invoke_async_cb, info->invoke_async_data);
1097+
} else {
1098+
_ml_error_report
1099+
("The parameters invoke_async_cb and invoke_async_data in the info argument are invalid");
1100+
status = ML_ERROR_INVALID_PARAMETER;
1101+
goto error;
1102+
}
1103+
}
1104+
10911105
/* 4. Start the nnfw to get inout configurations if needed */
10921106
if (!single_h->klass->start (single_h->filter)) {
10931107
_ml_error_report
@@ -1235,6 +1249,14 @@ ml_single_open_with_option (ml_single_h * single, const ml_option_h option)
12351249
if (strcasecmp ((gchar *) value, "TRUE") == 0)
12361250
info.invoke_async = TRUE;
12371251
}
1252+
if (info.invoke_async) {
1253+
if (ML_ERROR_NONE == ml_option_get (option, "invoke_async_cb", &value)) {
1254+
info.invoke_async_cb = (ml_single_invoke_async_cb) value;
1255+
}
1256+
if (ML_ERROR_NONE == ml_option_get (option, "invoke_async_cb_data", &value)) {
1257+
info.invoke_async_data = (void *) value;
1258+
}
1259+
}
12381260

12391261
return ml_single_open_custom (single, &info);
12401262
}

c/src/ml-api-service-extension.c

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,52 @@ _ml_extension_destroy_tensors_info (void *data)
234234
ml_tensors_info_destroy (info);
235235
}
236236

237+
/**
238+
* @brief Internal function to invoke model asynchronously. It is called by the sub-plugin.
239+
*/
240+
static void
241+
_ml_extension_invoke_async_callback (void *handle, GstTensorMemory * output)
242+
{
243+
int ret;
244+
size_t len;
245+
ml_service_s *mls = NULL;
246+
ml_tensors_info_h info;
247+
ml_tensors_data_h data;
248+
ml_tensor_dimension dimension = { 0 };
249+
250+
mls = (ml_service_s *) handle;
251+
if (!output || !output->data || !mls) {
252+
_ml_loge ("Invalid callback parameters.");
253+
return;
254+
}
255+
// TODO: Use the tensor information received from the sub-plugin. This should not be a problem for llama.cpp
256+
dimension[0] = len = strlen ((char *) output->data);
257+
ml_tensors_info_create (&info);
258+
ml_tensors_info_set_count (info, 1U);
259+
ml_tensors_info_set_tensor_type (info, 0U, ML_TENSOR_TYPE_UINT8);
260+
ml_tensors_info_set_tensor_dimension (info, 0U, dimension);
261+
262+
ret = ml_tensors_data_create (info, &data);
263+
if (ret != ML_ERROR_NONE) {
264+
_ml_loge("Failed to create tensors info. error: %d", ret);
265+
ml_tensors_info_destroy(info);
266+
g_free(output->data);
267+
return;
268+
}
269+
270+
ret = ml_tensors_data_set_tensor_data (data, 0U, output->data, len);
271+
if (ret != ML_ERROR_NONE) {
272+
_ml_loge("Failed to set tensor data. error: %d", ret);
273+
ml_tensors_data_destroy(data);
274+
ml_tensors_info_destroy(info);
275+
g_free(output->data);
276+
return;
277+
}
278+
g_free (output->data);
279+
280+
_ml_service_invoke_event_new_data (mls, NULL, data);
281+
}
282+
237283
/**
238284
* @brief Internal function to parse single-shot info from json.
239285
*/
@@ -352,8 +398,14 @@ _ml_extension_conf_parse_single (ml_service_s * mls, JsonObject * single)
352398
const gchar *invoke_async =
353399
json_object_get_string_member (single, "invoke_async");
354400

355-
if (STR_IS_VALID (invoke_async))
401+
if (STR_IS_VALID (invoke_async)) {
356402
ml_option_set (option, "invoke_async", g_strdup (invoke_async), g_free);
403+
}
404+
if (strcasecmp (invoke_async, "TRUE") == 0) {
405+
ml_option_set (option, "invoke_async_cb",
406+
(void *) _ml_extension_invoke_async_callback, NULL);
407+
ml_option_set (option, "invoke_async_cb_data", (void *) mls, NULL);
408+
}
357409
}
358410

359411
error:

tests/capi/unittest_capi_service_extension.cc

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,31 @@ TEST_REQUIRE_TFLITE (MLServiceExtension, scenarioConfigLlamacpp)
496496
EXPECT_EQ (status, ML_ERROR_NONE);
497497
}
498498

499+
/**
500+
* @brief Usage of ml-service extension API.
501+
*/
502+
TEST_REQUIRE_TFLITE (MLServiceExtension, scenarioConfigLlamacppAsync)
503+
{
504+
ml_service_h handle;
505+
int status;
506+
507+
g_autofree gchar *model_file = _get_model_path ("llama-2-7b-chat.Q2_K.gguf");
508+
if (!g_file_test (model_file, G_FILE_TEST_EXISTS)) {
509+
g_critical ("Skipping scenarioConfigLlamacppAsync test due to missing model file. "
510+
"Please download model file from https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF");
511+
return;
512+
}
513+
514+
g_autofree gchar *config = get_config_path ("config_single_llamacpp_async.conf");
515+
516+
status = ml_service_new (config, &handle);
517+
ASSERT_EQ (status, ML_ERROR_NONE);
518+
519+
_extension_test_llamacpp (handle, FALSE);
520+
521+
status = ml_service_destroy (handle);
522+
EXPECT_EQ (status, ML_ERROR_NONE);
523+
}
499524

500525
/**
501526
* @brief Usage of ml-service extension API.
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"single" :
3+
{
4+
"framework" : "llamacpp",
5+
"model" : ["../tests/test_models/models/llama-2-7b-chat.Q2_K.gguf"],
6+
"custom" : "num_predict:32",
7+
"invoke_dynamic" : "true",
8+
"invoke_async" : "true"
9+
}
10+
}

0 commit comments

Comments
 (0)