feat: Support for request id field in generate API (#7392)

shreyas-samsung · web-flow · commit d1780d1fb04d · 2024-07-10T12:10:48.000-07:00
diff --git a/docs/protocol/extension_generate.md b/docs/protocol/extension_generate.md
@@ -1,5 +1,5 @@
 <!--
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -87,10 +87,12 @@ return an error.
 
     $generate_request =
     {
+      "id" : $string, #optional
       "text_input" : $string,
       "parameters" : $parameters #optional
     }
 
+* "id": An identifier for this request. Optional, but if specified this identifier must be returned in the response.
 * "text_input" : The text input that the model should generate output from.
 * "parameters" : An optional object containing zero or more parameters for this
   generate request expressed as key/value pairs. See
@@ -121,14 +123,15 @@ specification to set the parameters.
 Below is an example to send generate request with additional model parameters `stream` and `temperature`.
 
 ```
-$ curl -X POST localhost:8000/v2/models/mymodel/generate -d '{"text_input": "client input", "parameters": {"stream": false, "temperature": 0}}'
+$ curl -X POST localhost:8000/v2/models/mymodel/generate -d '{"id": "42", "text_input": "client input", "parameters": {"stream": false, "temperature": 0}}'
 
 POST /v2/models/mymodel/generate HTTP/1.1
 Host: localhost:8000
 Content-Type: application/json
 Content-Length: <xx>
 {
-  "text_input":  "client input",
+  "id" : "42",
+  "text_input" :  "client input",
   "parameters" :
     {
       "stream": false,
@@ -145,11 +148,13 @@ the HTTP body.
 
     $generate_response =
     {
+      "id" : $string
       "model_name" : $string,
       "model_version" : $string,
       "text_output" : $string
     }
 
+* "id" : The "id" identifier given in the request, if any.
 * "model_name" : The name of the model used for inference.
 * "model_version" : The specific model version used for inference.
 * "text_output" : The output of the inference.
@@ -159,6 +164,7 @@ the HTTP body.
 ```
 200
 {
+  "id" : "42"
   "model_name" : "mymodel",
   "model_version" : "1",
   "text_output" : "model output"
diff --git a/qa/L0_http/generate_endpoint_test.py b/qa/L0_http/generate_endpoint_test.py
@@ -142,6 +142,49 @@ def test_generate(self):
         self.assertIn("TEXT", data)
         self.assertEqual(text, data["TEXT"])
 
+    def test_request_id(self):
+        # Setup text based input
+        text = "hello world"
+        request_id = "42"
+
+        # Test when request id in request body
+        inputs = {"PROMPT": text, "id": request_id, "STREAM": False}
+        r = self.generate(self._model_name, inputs)
+        r.raise_for_status()
+
+        self.assertIn("Content-Type", r.headers)
+        self.assertEqual(r.headers["Content-Type"], "application/json")
+
+        data = r.json()
+        self.assertIn("id", data)
+        self.assertEqual(request_id, data["id"])
+        self.assertIn("TEXT", data)
+        self.assertEqual(text, data["TEXT"])
+
+        # Test when request id not in request body
+        inputs = {"PROMPT": text, "STREAM": False}
+        r = self.generate(self._model_name, inputs)
+        r.raise_for_status()
+
+        self.assertIn("Content-Type", r.headers)
+        self.assertEqual(r.headers["Content-Type"], "application/json")
+
+        data = r.json()
+        self.assertNotIn("id", data)
+
+        # Test when request id is empty
+        inputs = {"PROMPT": text, "id": "", "STREAM": False}
+        r = self.generate(self._model_name, inputs)
+        r.raise_for_status()
+
+        self.assertIn("Content-Type", r.headers)
+        self.assertEqual(r.headers["Content-Type"], "application/json")
+
+        data = r.json()
+        self.assertNotIn("id", data)
+        self.assertIn("TEXT", data)
+        self.assertEqual(text, data["TEXT"])
+
     def test_generate_stream(self):
         # Setup text-based input
         text = "hello world"
diff --git a/qa/L0_http/test.sh b/qa/L0_http/test.sh
@@ -662,7 +662,7 @@ fi
 ## Python Unit Tests
 TEST_RESULT_FILE='test_results.txt'
 PYTHON_TEST=generate_endpoint_test.py
-EXPECTED_NUM_TESTS=15
+EXPECTED_NUM_TESTS=16
 set +e
 python $PYTHON_TEST >$CLIENT_LOG 2>&1
 if [ $? -ne 0 ]; then
diff --git a/src/http_server.cc b/src/http_server.cc
@@ -3327,6 +3327,8 @@ HTTPAPIServer::HandleGenerate(
   //   thus the string must live as long as the JSON message).
   triton::common::TritonJson::Value request;
   RETURN_AND_CALLBACK_IF_ERR(EVRequestToJson(req, &request), error_callback);
+  RETURN_AND_CALLBACK_IF_ERR(
+      ParseJsonTritonRequestID(request, irequest), error_callback);
 
   RETURN_AND_CALLBACK_IF_ERR(
       generate_request->ConvertGenerateRequest(

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`<!--`
`2`		`-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.`
	`2`	`+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.`
`3`	`3`	`#`
`4`	`4`	`# Redistribution and use in source and binary forms, with or without`
`5`	`5`	`# modification, are permitted provided that the following conditions`
`@@ -87,10 +87,12 @@ return an error.`
`87`	`87`
`88`	`88`	`$generate_request =`
`89`	`89`	`{`
	`90`	`+ "id" : $string, #optional`
`90`	`91`	`"text_input" : $string,`
`91`	`92`	`"parameters" : $parameters #optional`
`92`	`93`	`}`
`93`	`94`
	`95`	`+* "id": An identifier for this request. Optional, but if specified this identifier must be returned in the response.`
`94`	`96`	`* "text_input" : The text input that the model should generate output from.`
`95`	`97`	`* "parameters" : An optional object containing zero or more parameters for this`
`96`	`98`	`generate request expressed as key/value pairs. See`
`@@ -121,14 +123,15 @@ specification to set the parameters.`
`121`	`123`	Below is an example to send generate request with additional model parameters `stream` and `temperature`.
`122`	`124`
`123`	`125`	```
`124`		`-$ curl -X POST localhost:8000/v2/models/mymodel/generate -d '{"text_input": "client input", "parameters": {"stream": false, "temperature": 0}}'`
	`126`	`+$ curl -X POST localhost:8000/v2/models/mymodel/generate -d '{"id": "42", "text_input": "client input", "parameters": {"stream": false, "temperature": 0}}'`
`125`	`127`
`126`	`128`	`POST /v2/models/mymodel/generate HTTP/1.1`
`127`	`129`	`Host: localhost:8000`
`128`	`130`	`Content-Type: application/json`
`129`	`131`	`Content-Length: <xx>`
`130`	`132`	`{`
`131`		`- "text_input": "client input",`
	`133`	`+ "id" : "42",`
	`134`	`+ "text_input" : "client input",`
`132`	`135`	`"parameters" :`
`133`	`136`	`{`
`134`	`137`	`"stream": false,`
`@@ -145,11 +148,13 @@ the HTTP body.`
`145`	`148`
`146`	`149`	`$generate_response =`
`147`	`150`	`{`
	`151`	`+ "id" : $string`
`148`	`152`	`"model_name" : $string,`
`149`	`153`	`"model_version" : $string,`
`150`	`154`	`"text_output" : $string`
`151`	`155`	`}`
`152`	`156`
	`157`	`+* "id" : The "id" identifier given in the request, if any.`
`153`	`158`	`* "model_name" : The name of the model used for inference.`
`154`	`159`	`* "model_version" : The specific model version used for inference.`
`155`	`160`	`* "text_output" : The output of the inference.`
`@@ -159,6 +164,7 @@ the HTTP body.`
`159`	`164`	```
`160`	`165`	`200`
`161`	`166`	`{`
	`167`	`+ "id" : "42"`
`162`	`168`	`"model_name" : "mymodel",`
`163`	`169`	`"model_version" : "1",`
`164`	`170`	`"text_output" : "model output"`