elastic · szabosteve · Jul 21, 2025 · Jul 9, 2025 · Jul 9, 2025 · Jul 9, 2025
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
@@ -768,6 +768,38 @@ export class CustomServiceSettings {
    * ```
    */
   headers?: UserDefinedValue
+  /**
+   * Specifies the input type translation values that are used to replace the `${input_type}` template in the request body.
+   * For example:
+   * ```
+   * "input_type": {
+   *   "translation": {
+   *     "ingest": "do_ingest",
+   *     "search": "do_search"
+   *   },
+   *   "default": "a_default"
+   * },
+   * ```
+   * If the subsequent inference requests come from a search context, the `search` key will be used and the template will be replaced with `do_search`.
+   * If it comes from the ingest context `do_ingest` is used. If it's a different context that is not specified, the default value will be used. If no default is specified an empty string is used.
+   * `transition` can be:
+   * * `classification`
+   * * `clustering`
+   * * `ingest`
+   * * `search`
+   */
+  input_type?: UserDefinedValue
+  /**
+   * Specifies the query parameters as a list of tuples. The arrays inside the `query_parameters` must have two items, a key and a value.
+   * For example:
+   * ```
+   * "query_parameters":[
+   *   ["param_key", "some_value"],
+   *   ["param_key", "another_value"]
+   * ]
+   * ```
+   */
+  query_parameters?: UserDefinedValue
   /**
    * The request configuration object.
    */
@@ -797,9 +829,7 @@ export class CustomRequestParams {
    * The body structure of the request. It requires passing in the string-escaped result of the JSON format HTTP request body.
    * For example:
    * ```
-   * "request":{
-   *   "content":"{\"input\":${input}}"
-   * }
+   * "request": "{\"input\":${input}}"
    * ```
    * > info
    * > The content string needs to be a single line except using the Kibana console.
@@ -808,39 +838,103 @@ export class CustomRequestParams {
 }
 
 export class CustomResponseParams {
-  /**
-   * Specifies the path to the error message in the response from the custom service.
-   * For example:
-   * ```
-   * "response": {
-   *   "error_parser": {
-   *     "path": "$.error.message"
-   *   }
-   * }
-   * ```
-   */
-  error_parser: UserDefinedValue
   /**
    * Specifies the JSON parser that is used to parse the response from the custom service.
    * Different task types require different json_parser parameters.
    * For example:
    * ```
    * # text_embedding
+   * # For a response like this:
+   *
+   * {
+   *  "object": "list",
+   *  "data": [
+   *      {
+   *        "object": "embedding",
+   *        "index": 0,
+   *        "embedding": [
+   *            0.014539449,
+   *            -0.015288644
+   *        ]
+   *      }
+   *  ],
+   *  "model": "text-embedding-ada-002-v2",
+   *  "usage": {
+   *      "prompt_tokens": 8,
+   *      "total_tokens": 8
+   *  }
+   * }
+   *
+   * # the json_parser definition should look like this:
+   *
    * "response":{
    *   "json_parser":{
-   *     "text_embeddings":"$.result.embeddings[*].embedding"
+   *     "text_embeddings":"$.data[*].embedding[*]"
    *   }
    * }
    *
    * # sparse_embedding
+   * # For a response like this:
+   *
+   * {
+   *   "request_id": "75C50B5B-E79E-4930-****-F48DBB392231",
+   *   "latency": 22,
+   *   "usage": {
+   *      "token_count": 11
+   *   },
+   *   "result": {
+   *      "sparse_embeddings": [
+   *         {
+   *           "index": 0,
+   *           "embedding": [
+   *             {
+   *               "token_id": 6,
+   *               "weight": 0.101
+   *             },
+   *             {
+   *               "token_id": 163040,
+   *               "weight": 0.28417
+   *             }
+   *           ]
+   *         }
+   *      ]
+   *   }
+   * }
+   *
+   * # the json_parser definition should look like this:
+   *
    * "response":{
    *   "json_parser":{
-   *     "token_path":"$.result[*].embeddings[*].token",
-   *     "weight_path":"$.result[*].embeddings[*].weight"
+   *     "token_path":"$.result.sparse_embeddings[*].embedding[*].token_id",
+   *     "weight_path":"$.result.sparse_embeddings[*].embedding[*].weight"
    *   }
    * }
    *
    * # rerank
+   * # For a response like this:
+   *
+   * {
+   *   "results": [
+   *     {
+   *       "index": 3,
+   *       "relevance_score": 0.999071,
+   *       "document": "abc"
+   *     },
+   *     {
+   *       "index": 4,
+   *       "relevance_score": 0.7867867,
+   *       "document": "123"
+   *     },
+   *     {
+   *       "index": 0,
+   *       "relevance_score": 0.32713068,
+   *       "document": "super"
+   *     }
+   *   ],
+   * }
+   *
+   * # the json_parser definition should look like this:
+   *
    * "response":{
    *   "json_parser":{
    *     "reranked_index":"$.result.scores[*].index",    // optional
@@ -850,9 +944,33 @@ export class CustomResponseParams {
    * }
    *
    * # completion
+   * # For a response like this:
+   *
+   * {
+   *  "id": "chatcmpl-B9MBs8CjcvOU2jLn4n570S5qMJKcT",
+   *  "object": "chat.completion",
+   *  "created": 1741569952,
+   *  "model": "gpt-4.1-2025-04-14",
+   *  "choices": [
+   *    {
+   *     "index": 0,
+   *     "message": {
+   *       "role": "assistant",
+   *       "content": "Hello! How can I assist you today?",
+   *       "refusal": null,
+   *       "annotations": []
+   *     },
+   *     "logprobs": null,
+   *     "finish_reason": "stop"
+   *   }
+   *  ]
+   * }
+   *
+   * # the json_parser definition should look like this:
+   *
    * "response":{
    *   "json_parser":{
-   *     "completion_result":"$.result.text"
+   *     "completion_result":"$.choices[*].message.content"
    *   }
    * }
    */

diff --git a/specification/inference/put_custom/PutCustomRequest.ts b/specification/inference/put_custom/PutCustomRequest.ts
@@ -30,7 +30,46 @@ import { InferenceChunkingSettings } from '@inference/_types/Services'
 /**
  * Create a custom inference endpoint.
  *
- * You can create an inference endpoint to perform an inference task with a custom model that supports the HTTP format.
+ * The custom service gives more control over how to interact with external inference services that aren't explicitly supported through dedicated integrations.
+ * The custom service gives you the ability to define the headers, url, query parameters, request body, and secrets.
+ * The custom service supports the template replacement functionality, which enables you to define a template that can be replaced with the contents of a value that defines that key.
+ * Templates are portions of a string that start with `${` and end with `}`.
+ * The parameters `secret_parameters` and `task_settings` are checked for keys for template replacement. Template replacement is supported in the `request`, `headers`, `url`, and `query_parameters`.
+ * If the definition (key) is not found for a template, an error message is returned.
+ * In case of an endpoint definition like the following:
+ * ```
+ * PUT _inference/text_embedding/test-text-embedding
+ * {
+ *   "service": "custom",
+ *   "service_settings": {
+ *      "secret_parameters": {
+ *           "api_key": "<some api key>"
+ *      },
+ *      "url": "...endpoints.huggingface.cloud/v1/embeddings",
+ *      "headers": {
+ *          "Authorization": "Bearer ${api_key}",
+ *          "Content-Type": "application/json"
+ *      },
+ *      "request": "{\"input\": ${input}}",
+ *      "response": {
+ *          "json_parser": {
+ *              "text_embeddings":"$.data[*].embedding[*]"
+ *          }
+ *      }
+ *   }
+ * }
+ * ```
+ * To replace `${api_key}` the `secret_parameters` and `task_settings` are checked for a key named `api_key`.
+ *
+ * > info
+ * > Templates should not be surrounded by quotes.
+ *
+ * Pre-defined templates:
+ * * `${input}` refers to the array of input strings that comes from the `input` field of the subsequent inference requests.
+ * * `${input_type}` refers to the input type translation values.
+ * * `${query}` refers to the query field used specifically for reranking tasks.
+ * * `${top_n}` refers to the `top_n` field available when performing rerank requests.
+ * * `${return_documents}` refers to the `return_documents` field available when performing rerank requests.
  * @rest_spec_name inference.put_custom
  * @availability stack since=8.13.0 stability=stable visibility=public
  * @availability serverless stability=stable visibility=public