Skip to content

Commit 559e6d2

Browse files
daniel-rodrigues809Daniel Rodrigues
andauthored
Add deploymenttemplates to swagger (#36175)
* typespec changes * fix compile * add newline * fix v2 in path and add list * try with lro options * recompile * set parameter location * fix package * reset package lock and try config changes * fixes * add examples * examples fixes * fix examples 2.0 * compile and fix examples * test * does this work? * try accepted response * introduce long running response * set polling location response * fix examples * fix location --------- Co-authored-by: Daniel Rodrigues <[email protected]>
1 parent 8052426 commit 559e6d2

File tree

59 files changed

+2875
-47
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+2875
-47
lines changed

specification/machinelearningservices/AzureAI.Assets/common.tsp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,55 @@ model SystemData {
2828
@doc("The timestamp of resource last modification (UTC)")
2929
lastModifiedAt?: utcDateTime;
3030
}
31+
32+
@doc("Settings for online request configuration.")
33+
model OnlineRequestSettings {
34+
@doc("The timeout duration for requests.")
35+
requestTimeout: duration;
36+
37+
@doc("The maximum number of concurrent requests per instance.")
38+
maxConcurrentRequestsPerInstance: int32;
39+
}
40+
41+
@doc("LongRunningNullResponse")
42+
model LongRunningNullResponse {
43+
@doc("Polling Uri.")
44+
@pollingLocation
45+
location: ResourceLocation<ResourceOperationStatus<PutAssetLROResponse>>;
46+
}
47+
48+
@doc("PutAssetLROResponse")
49+
model PutAssetLROResponse {
50+
@doc("Response of completed put asset lro.")
51+
assetId: string;
52+
}
53+
54+
@doc("Settings for probe configuration.")
55+
model ProbeSettings {
56+
@doc("The initial delay before starting probes.")
57+
initialDelay?: duration;
58+
59+
@doc("The period between probe executions.")
60+
period: duration;
61+
62+
@doc("The timeout duration for each probe.")
63+
timeout: duration;
64+
65+
@doc("The number of consecutive failures required to consider the probe as failed.")
66+
failureThreshold: int32;
67+
68+
@doc("The number of consecutive successes required to consider the probe as successful.")
69+
successThreshold: int32;
70+
71+
@doc("The path for the probe request.")
72+
path: string;
73+
74+
@doc("The port number for the probe.")
75+
port: int32;
76+
77+
@doc("The scheme for the probe (e.g., HTTP, HTTPS).")
78+
scheme: string;
79+
80+
@doc("The HTTP method for the probe request.")
81+
httpMethod: string;
82+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
{
2+
"title": "DeploymentTemplates_Create - generated by [MaximumSet] rule",
3+
"operationId": "DeploymentTemplates_Create",
4+
"parameters": {
5+
"api-version": "2024-04-01-preview",
6+
"registryName": "myRegistry",
7+
"name": "ai21-jamba-template",
8+
"version": "1.0.0",
9+
"body": {
10+
"deploymentTemplateType": "Managed",
11+
"environmentId": "azureml://registries/myRegistry/environments/ai21-jamba-env/versions/1",
12+
"environmentVariables": {
13+
"AZUREML_MODEL_DIR": "/var/azureml-app/azureml-models/model-1/1",
14+
"WORLD_SIZE": "2",
15+
"MAX_PROMPT_LENGTH": "65536",
16+
"CUDA_VISIBLE_DEVICES": "0,1"
17+
},
18+
"allowedEnvironmentVariableOverrides": [
19+
"WORLD_SIZE",
20+
"MAX_PROMPT_LENGTH",
21+
"CUDA_VISIBLE_DEVICES"
22+
],
23+
"modelMountPath": "/var/azureml-app",
24+
"requestSettings": {
25+
"requestTimeout": "PT90S",
26+
"maxConcurrentRequestsPerInstance": 10
27+
},
28+
"livenessProbe": {
29+
"initialDelay": "PT300S",
30+
"period": "PT10S",
31+
"timeout": "PT2S",
32+
"failureThreshold": 30,
33+
"successThreshold": 1,
34+
"path": "/health",
35+
"port": 8080,
36+
"scheme": "HTTP",
37+
"httpMethod": "GET"
38+
},
39+
"readinessProbe": {
40+
"initialDelay": "PT300S",
41+
"period": "PT10S",
42+
"timeout": "PT2S",
43+
"failureThreshold": 30,
44+
"successThreshold": 1,
45+
"path": "/ready",
46+
"port": 8080,
47+
"scheme": "HTTP",
48+
"httpMethod": "GET"
49+
},
50+
"allowedInstanceType": [
51+
"Standard_NC24ads_A100_v4",
52+
"Standard_NC48ads_A100_v4",
53+
"Standard_NC96ads_A100_v4"
54+
],
55+
"defaultInstanceType": "Standard_NC24ads_A100_v4",
56+
"instanceCount": 1,
57+
"scoringPath": "/score",
58+
"scoringPort": 8080,
59+
"stage": "Development",
60+
"description": "AI21 Jamba deployment template for large language model inference",
61+
"tags": {
62+
"modelFamily": "ai21-jamba",
63+
"taskType": "text-generation",
64+
"framework": "transformers"
65+
},
66+
"properties": {
67+
"modelSize": "large",
68+
"accelerator": "gpu",
69+
"precision": "fp16"
70+
}
71+
}
72+
},
73+
"responses": {
74+
"200": {
75+
"body": {
76+
"location": "https://management.azure.com/subscriptions/12345678-1234-1234-1234-123456789abc/resourceGroups/myResourceGroup/providers/Microsoft.MachineLearningServices/registries/myRegistry/deploymenttemplates/ai21-jamba-template/versions/1.0.0/operations/operation-id-12345"
77+
}
78+
}
79+
}
80+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"title": "DeploymentTemplates_Delete - generated by [MaximumSet] rule",
3+
"operationId": "DeploymentTemplates_Delete",
4+
"parameters": {
5+
"api-version": "2024-04-01-preview",
6+
"registryName": "myRegistry",
7+
"name": "ai21-jamba-template",
8+
"version": "1.0.0"
9+
},
10+
"responses": {
11+
"200": {},
12+
"204": {}
13+
}
14+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
{
2+
"title": "DeploymentTemplates_Get - generated by [MaximumSet] rule",
3+
"operationId": "DeploymentTemplates_Get",
4+
"parameters": {
5+
"api-version": "2024-04-01-preview",
6+
"registryName": "myRegistry",
7+
"name": "ai21-jamba-template",
8+
"version": "1.0.0",
9+
"assetResourceTenantId": "72f988bf-86f1-41af-91ab-2d7cd011db47"
10+
},
11+
"responses": {
12+
"200": {
13+
"body": {
14+
"deploymentTemplateType": "Managed",
15+
"environmentId": "azureml://registries/myRegistry/environments/ai21-jamba-env/versions/1",
16+
"environmentVariables": {
17+
"AZUREML_MODEL_DIR": "/var/azureml-app/azureml-models/model-1/1",
18+
"WORLD_SIZE": "2",
19+
"MAX_PROMPT_LENGTH": "65536",
20+
"CUDA_VISIBLE_DEVICES": "0,1"
21+
},
22+
"allowedEnvironmentVariableOverrides": [
23+
"WORLD_SIZE",
24+
"MAX_PROMPT_LENGTH",
25+
"CUDA_VISIBLE_DEVICES"
26+
],
27+
"modelMountPath": "/var/azureml-app",
28+
"requestSettings": {
29+
"requestTimeout": "PT90S",
30+
"maxConcurrentRequestsPerInstance": 10
31+
},
32+
"livenessProbe": {
33+
"initialDelay": "PT300S",
34+
"period": "PT10S",
35+
"timeout": "PT2S",
36+
"failureThreshold": 30,
37+
"successThreshold": 1,
38+
"path": "/health",
39+
"port": 8080,
40+
"scheme": "HTTP",
41+
"httpMethod": "GET"
42+
},
43+
"readinessProbe": {
44+
"initialDelay": "PT300S",
45+
"period": "PT10S",
46+
"timeout": "PT2S",
47+
"failureThreshold": 30,
48+
"successThreshold": 1,
49+
"path": "/ready",
50+
"port": 8080,
51+
"scheme": "HTTP",
52+
"httpMethod": "GET"
53+
},
54+
"allowedInstanceType": [
55+
"Standard_NC24ads_A100_v4",
56+
"Standard_NC48ads_A100_v4",
57+
"Standard_NC96ads_A100_v4"
58+
],
59+
"defaultInstanceType": "Standard_NC24ads_A100_v4",
60+
"instanceCount": 1,
61+
"scoringPath": "/score",
62+
"scoringPort": 8080,
63+
"id": "azureml://registries/myRegistry/deploymenttemplates/ai21-jamba-template/versions/1.0.0",
64+
"stage": "Development",
65+
"description": "AI21 Jamba deployment template for large language model inference",
66+
"systemData": {
67+
"createdAt": "2024-09-17T10:30:15.123Z",
68+
"createdBy": "[email protected]",
69+
"createdByType": "User",
70+
"lastModifiedAt": "2024-09-17T10:30:15.123Z"
71+
},
72+
"tags": {
73+
"modelFamily": "ai21-jamba",
74+
"taskType": "text-generation",
75+
"framework": "transformers"
76+
},
77+
"properties": {
78+
"modelSize": "large",
79+
"accelerator": "gpu",
80+
"precision": "fp16"
81+
}
82+
}
83+
}
84+
}
85+
}
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
{
2+
"title": "DeploymentTemplates_List - generated by [MaximumSet] rule",
3+
"operationId": "DeploymentTemplates_List",
4+
"parameters": {
5+
"api-version": "2024-04-01-preview",
6+
"registryName": "myRegistry",
7+
"name": "ai21-jamba-template",
8+
"tags": "modelFamily=ai21-jamba,taskType=text-generation",
9+
"continuationToken": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
10+
"count": 10,
11+
"stage": "Development",
12+
"listViewType": "ActiveOnly"
13+
},
14+
"responses": {
15+
"200": {
16+
"body": {
17+
"value": [
18+
{
19+
"deploymentTemplateType": "Managed",
20+
"environmentId": "azureml://registries/myRegistry/environments/ai21-jamba-env/versions/1",
21+
"environmentVariables": {
22+
"AZUREML_MODEL_DIR": "/var/azureml-app/azureml-models/model-1/1",
23+
"WORLD_SIZE": "2",
24+
"MAX_PROMPT_LENGTH": "65536"
25+
},
26+
"allowedEnvironmentVariableOverrides": [
27+
"WORLD_SIZE",
28+
"MAX_PROMPT_LENGTH"
29+
],
30+
"modelMountPath": "/var/azureml-app",
31+
"requestSettings": {
32+
"requestTimeout": "PT90S",
33+
"maxConcurrentRequestsPerInstance": 10
34+
},
35+
"livenessProbe": {
36+
"initialDelay": "PT300S",
37+
"period": "PT10S",
38+
"timeout": "PT2S",
39+
"failureThreshold": 30,
40+
"successThreshold": 1,
41+
"path": "/health",
42+
"port": 8080,
43+
"scheme": "HTTP",
44+
"httpMethod": "GET"
45+
},
46+
"readinessProbe": {
47+
"initialDelay": "PT300S",
48+
"period": "PT10S",
49+
"timeout": "PT2S",
50+
"failureThreshold": 30,
51+
"successThreshold": 1,
52+
"path": "/ready",
53+
"port": 8080,
54+
"scheme": "HTTP",
55+
"httpMethod": "GET"
56+
},
57+
"allowedInstanceType": [
58+
"Standard_NC24ads_A100_v4",
59+
"Standard_NC48ads_A100_v4"
60+
],
61+
"defaultInstanceType": "Standard_NC24ads_A100_v4",
62+
"instanceCount": 1,
63+
"scoringPath": "/score",
64+
"scoringPort": 8080,
65+
"id": "azureml://registries/myRegistry/deploymenttemplates/ai21-jamba-template/versions/1.0.0",
66+
"stage": "Development",
67+
"description": "AI21 Jamba deployment template for large language model inference",
68+
"systemData": {
69+
"createdAt": "2024-09-17T10:30:15.123Z",
70+
"createdBy": "[email protected]",
71+
"createdByType": "User",
72+
"lastModifiedAt": "2024-09-17T10:30:15.123Z"
73+
},
74+
"tags": {
75+
"modelFamily": "ai21-jamba",
76+
"taskType": "text-generation",
77+
"framework": "transformers"
78+
},
79+
"properties": {
80+
"modelSize": "large",
81+
"accelerator": "gpu",
82+
"precision": "fp16"
83+
}
84+
},
85+
{
86+
"deploymentTemplateType": "Managed",
87+
"environmentId": "azureml://registries/myRegistry/environments/ai21-jamba-env/versions/2",
88+
"environmentVariables": {
89+
"AZUREML_MODEL_DIR": "/var/azureml-app/azureml-models/model-1/1",
90+
"WORLD_SIZE": "4",
91+
"MAX_PROMPT_LENGTH": "32768"
92+
},
93+
"allowedEnvironmentVariableOverrides": [
94+
"WORLD_SIZE",
95+
"MAX_PROMPT_LENGTH"
96+
],
97+
"modelMountPath": "/var/azureml-app",
98+
"requestSettings": {
99+
"requestTimeout": "PT120S",
100+
"maxConcurrentRequestsPerInstance": 5
101+
},
102+
"livenessProbe": {
103+
"initialDelay": "PT300S",
104+
"period": "PT10S",
105+
"timeout": "PT5S",
106+
"failureThreshold": 30,
107+
"successThreshold": 1,
108+
"path": "/health",
109+
"port": 8080,
110+
"scheme": "HTTP",
111+
"httpMethod": "GET"
112+
},
113+
"readinessProbe": {
114+
"initialDelay": "PT300S",
115+
"period": "PT10S",
116+
"timeout": "PT5S",
117+
"failureThreshold": 30,
118+
"successThreshold": 1,
119+
"path": "/ready",
120+
"port": 8080,
121+
"scheme": "HTTP",
122+
"httpMethod": "GET"
123+
},
124+
"allowedInstanceType": [
125+
"Standard_NC48ads_A100_v4",
126+
"Standard_NC96ads_A100_v4"
127+
],
128+
"defaultInstanceType": "Standard_NC48ads_A100_v4",
129+
"instanceCount": 2,
130+
"scoringPath": "/score",
131+
"scoringPort": 8080,
132+
"id": "azureml://registries/myRegistry/deploymenttemplates/ai21-jamba-template/versions/2.0.0",
133+
"stage": "Development",
134+
"description": "AI21 Jamba deployment template for large language model inference - optimized version",
135+
"systemData": {
136+
"createdAt": "2024-09-17T11:45:30.456Z",
137+
"createdBy": "[email protected]",
138+
"createdByType": "User",
139+
"lastModifiedAt": "2024-09-17T11:45:30.456Z"
140+
},
141+
"tags": {
142+
"modelFamily": "ai21-jamba",
143+
"taskType": "text-generation",
144+
"framework": "transformers",
145+
"optimized": "true"
146+
},
147+
"properties": {
148+
"modelSize": "large",
149+
"accelerator": "gpu",
150+
"precision": "fp16",
151+
"distributed": "true"
152+
}
153+
}
154+
],
155+
"nextLink": "https://management.azure.com/subscriptions/12345678-1234-1234-1234-123456789abc/resourceGroups/myResourceGroup/providers/Microsoft.MachineLearningServices/registries/myRegistry/deploymenttemplates?api-version=2024-04-01-preview&continuationToken=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.next"
156+
}
157+
}
158+
}
159+
}

0 commit comments

Comments
 (0)