Skip to content

Commit 207638f

Browse files
Update specification
1 parent 16e0155 commit 207638f

File tree

7 files changed

+1193
-149
lines changed

7 files changed

+1193
-149
lines changed

output/openapi/elasticsearch-openapi.json

Lines changed: 267 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/openapi/elasticsearch-serverless-openapi.json

Lines changed: 264 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/schema/schema.json

Lines changed: 637 additions & 133 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/typescript/types.ts

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

specification/inference/_types/CommonTypes.ts

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1524,7 +1524,7 @@ export class LlamaServiceSettings {
15241524
/**
15251525
* The URL endpoint of the Llama stack endpoint.
15261526
* URL must contain:
1527-
* * For `text_embedding` task - `/v1/inference/embeddings`.
1527+
* * For `text_embedding` task - `/v1/openai/v1/embeddings`.
15281528
* * For `completion` and `chat_completion` tasks - `/v1/openai/v1/chat/completions`.
15291529
*/
15301530
url: string
@@ -1552,7 +1552,9 @@ export class LlamaServiceSettings {
15521552
*/
15531553
max_input_tokens?: integer
15541554
/**
1555-
* For a `text_embedding` task, the number of dimensions the resulting output embeddings should have.
1555+
* For a `text_embedding` task, the number of dimensions the resulting output embeddings must have.
1556+
* It is supported only in `text-embedding-3` and later models. If it is not set by user, it defaults to the model returned dimensions.
1557+
* If model returns embeddings with a different number of dimensions, error is returned.
15561558
*/
15571559
dimensions?: integer
15581560
/**
@@ -1566,6 +1568,14 @@ export class LlamaServiceSettings {
15661568
rate_limit?: RateLimitSetting
15671569
}
15681570

1571+
export class LlamaTaskSettings {
1572+
/**
1573+
* For a `completion` or `text_embedding` task, specify the user issuing the request.
1574+
* This information can be used for abuse detection.
1575+
*/
1576+
user?: string
1577+
}
1578+
15691579
export enum LlamaTaskType {
15701580
text_embedding,
15711581
completion,

specification/inference/put_llama/PutLlamaRequest.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import { Duration } from '@_types/Time'
2323
import {
2424
LlamaServiceSettings,
2525
LlamaServiceType,
26+
LlamaTaskSettings,
2627
LlamaTaskType
2728
} from '@inference/_types/CommonTypes'
2829
import { InferenceChunkingSettings } from '@inference/_types/Services'
@@ -75,5 +76,10 @@ export interface Request extends RequestBase {
7576
* Settings used to install the inference model. These settings are specific to the `llama` service.
7677
*/
7778
service_settings: LlamaServiceSettings
79+
/**
80+
* Settings to configure the inference task.
81+
* These settings are specific to the task type you specified.
82+
*/
83+
task_settings?: LlamaTaskSettings
7884
}
7985
}

specification/inference/put_llama/examples/request/PutLlamaRequestExample1.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ value: |-
66
{
77
"service": "llama",
88
"service_settings": {
9-
"url": "http://localhost:8321/v1/inference/embeddings"
9+
"url": "http://localhost:8321/v1/openai/v1/embeddings"
10+
"dimensions": 384,
1011
"api_key": "llama-api-key",
1112
"model_id": "all-MiniLM-L6-v2"
1213
}

0 commit comments

Comments
 (0)