Skip to content

Commit 912f691

Browse files
authored
Add deployment pause/resume APIs and service tier management (#37935)
* feat(cognitiveservices): Add deployment pause/resume APIs and service tier management - Add serviceTier enum property to DeploymentProperties (Default, Priority) - Add deploymentState enum property to DeploymentProperties (Running, Paused) - Add POST /deployments/{deploymentName}/pause endpoint for pausing deployments - Add POST /deployments/{deploymentName}/resume endpoint for resuming deployments - Create API examples for pause and resume operations - Update existing deployment examples to include new properties - Add comprehensive descriptions with usage guidance and SKU restrictions - Document HTTP 423 behavior for paused deployments - Mark both operations as idempotent This enables customers to control deployment processing priority through service tiers and pause/resume deployment inferencing for cost optimization and resource management. * need to add pattern for lint errors * deploymentNameForActionParameter
1 parent 0b1ad6b commit 912f691

File tree

7 files changed

+222
-1
lines changed

7 files changed

+222
-1
lines changed

specification/cognitiveservices/resource-manager/Microsoft.CognitiveServices/preview/2025-10-01-preview/cognitiveservices.json

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1664,6 +1664,98 @@
16641664
}
16651665
}
16661666
},
1667+
"/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.CognitiveServices/accounts/{accountName}/deployments/{deploymentName}/pause": {
1668+
"post": {
1669+
"tags": [
1670+
"Deployments"
1671+
],
1672+
"operationId": "Deployments_Pause",
1673+
"summary": "Pause a deployment",
1674+
"description": "Pauses inferencing on a deployment by setting the deploymentState to 'Paused' (see #/definitions/DeploymentProperties/properties/deploymentState). Only Standard, DataZoneStandard, and GlobalStandard SKUs support this operation. Inference requests to the paused deployment endpoint will receive HTTP 423 (Locked). This operation is idempotent.",
1675+
"x-ms-examples": {
1676+
"PauseDeployment": {
1677+
"$ref": "./examples/PauseDeployment.json"
1678+
}
1679+
},
1680+
"parameters": [
1681+
{
1682+
"$ref": "../../../../../common-types/resource-management/v3/types.json#/parameters/ResourceGroupNameParameter"
1683+
},
1684+
{
1685+
"$ref": "#/parameters/accountNameParameter"
1686+
},
1687+
{
1688+
"$ref": "../../../../../common-types/resource-management/v3/types.json#/parameters/ApiVersionParameter"
1689+
},
1690+
{
1691+
"$ref": "../../../../../common-types/resource-management/v3/types.json#/parameters/SubscriptionIdParameter"
1692+
},
1693+
{
1694+
"$ref": "#/parameters/deploymentNameForActionParameter"
1695+
}
1696+
],
1697+
"responses": {
1698+
"200": {
1699+
"description": "OK -- Deployment paused successfully.",
1700+
"schema": {
1701+
"$ref": "#/definitions/Deployment"
1702+
}
1703+
},
1704+
"default": {
1705+
"description": "Error response describing why the operation failed.",
1706+
"schema": {
1707+
"$ref": "../../../../../common-types/resource-management/v3/types.json#/definitions/ErrorResponse"
1708+
}
1709+
}
1710+
}
1711+
}
1712+
},
1713+
"/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.CognitiveServices/accounts/{accountName}/deployments/{deploymentName}/resume": {
1714+
"post": {
1715+
"tags": [
1716+
"Deployments"
1717+
],
1718+
"operationId": "Deployments_Resume",
1719+
"summary": "Resume a deployment",
1720+
"description": "Resumes inferencing on a previously paused deployment by setting the deploymentState to 'Running' (see #/definitions/DeploymentProperties/properties/deploymentState). This operation is idempotent and can be safely called on already running deployments.",
1721+
"x-ms-examples": {
1722+
"ResumeDeployment": {
1723+
"$ref": "./examples/ResumeDeployment.json"
1724+
}
1725+
},
1726+
"parameters": [
1727+
{
1728+
"$ref": "../../../../../common-types/resource-management/v3/types.json#/parameters/ResourceGroupNameParameter"
1729+
},
1730+
{
1731+
"$ref": "#/parameters/accountNameParameter"
1732+
},
1733+
{
1734+
"$ref": "../../../../../common-types/resource-management/v3/types.json#/parameters/ApiVersionParameter"
1735+
},
1736+
{
1737+
"$ref": "../../../../../common-types/resource-management/v3/types.json#/parameters/SubscriptionIdParameter"
1738+
},
1739+
{
1740+
"$ref": "#/parameters/deploymentNameForActionParameter"
1741+
}
1742+
],
1743+
"responses": {
1744+
"200": {
1745+
"description": "OK -- Deployment resumed successfully.",
1746+
"schema": {
1747+
"$ref": "#/definitions/Deployment"
1748+
}
1749+
},
1750+
"default": {
1751+
"description": "Error response describing why the operation failed.",
1752+
"schema": {
1753+
"$ref": "../../../../../common-types/resource-management/v3/types.json#/definitions/ErrorResponse"
1754+
}
1755+
}
1756+
}
1757+
}
1758+
},
16671759
"/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.CognitiveServices/accounts/{accountName}/commitmentPlans": {
16681760
"get": {
16691761
"tags": [
@@ -8116,6 +8208,52 @@
81168208
"spilloverDeploymentName": {
81178209
"type": "string",
81188210
"description": "Specifies the deployment name that should serve requests when the request would have otherwise been throttled due to reaching current deployment throughput limit."
8211+
},
8212+
"serviceTier": {
8213+
"type": "string",
8214+
"description": "The service tier for the deployment. Determines the pricing and performance level for request processing. Use 'Default' for standard pricing or 'Priority' for higher-priority processing with premium pricing. Note: Pause operations are only supported on Standard, DataZoneStandard, and GlobalStandard SKUs.",
8215+
"enum": [
8216+
"Default",
8217+
"Priority"
8218+
],
8219+
"x-nullable": true,
8220+
"x-ms-enum": {
8221+
"name": "ServiceTier",
8222+
"modelAsString": true,
8223+
"values": [
8224+
{
8225+
"value": "Default",
8226+
"description": "Default service tier meaning the request will be processed with the standard pricing and performance for the selected model."
8227+
},
8228+
{
8229+
"value": "Priority",
8230+
"description": "Priority service tier meaning the request will be processed with higher pricing and performance for the selected model."
8231+
}
8232+
]
8233+
}
8234+
},
8235+
"deploymentState": {
8236+
"type": "string",
8237+
"description": "The state of the deployment. Controls whether the deployment is accepting inference requests. Use 'Running' for active deployments that process requests, or 'Paused' to temporarily stop inference while preserving the deployment configuration.",
8238+
"enum": [
8239+
"Running",
8240+
"Paused"
8241+
],
8242+
"x-nullable": true,
8243+
"x-ms-enum": {
8244+
"name": "DeploymentState",
8245+
"modelAsString": true,
8246+
"values": [
8247+
{
8248+
"value": "Running",
8249+
"description": "The deployment is running and accepting inference requests."
8250+
},
8251+
{
8252+
"value": "Paused",
8253+
"description": "The deployment is paused and not accepting inference requests."
8254+
}
8255+
]
8256+
}
81198257
}
81208258
},
81218259
"description": "Properties of Cognitive Services account deployment."
@@ -11675,6 +11813,15 @@
1167511813
"description": "The name of the deployment associated with the Cognitive Services Account",
1167611814
"x-ms-parameter-location": "method"
1167711815
},
11816+
"deploymentNameForActionParameter": {
11817+
"name": "deploymentName",
11818+
"in": "path",
11819+
"required": true,
11820+
"type": "string",
11821+
"pattern": "^[a-zA-Z0-9][a-zA-Z0-9_.-]*$",
11822+
"description": "The name of the deployment for pause/resume actions",
11823+
"x-ms-parameter-location": "method"
11824+
},
1167811825
"raiPolicyNameParameter": {
1167911826
"name": "raiPolicyName",
1168011827
"in": "path",

specification/cognitiveservices/resource-manager/Microsoft.CognitiveServices/preview/2025-10-01-preview/examples/GetDeployment.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
"name": "ada",
2323
"version": "1"
2424
},
25+
"serviceTier": "Default",
26+
"deploymentState": "Running",
2527
"provisioningState": "Succeeded"
2628
}
2729
}

specification/cognitiveservices/resource-manager/Microsoft.CognitiveServices/preview/2025-10-01-preview/examples/ListDeployments.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
"name": "ada",
2424
"version": "1"
2525
},
26+
"serviceTier": "Default",
27+
"deploymentState": "Running",
2628
"provisioningState": "Succeeded"
2729
}
2830
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"parameters": {
3+
"api-version": "2025-10-01-preview",
4+
"subscriptionId": "00000000-1111-2222-3333-444444444444",
5+
"resourceGroupName": "resourceGroupName",
6+
"accountName": "accountName",
7+
"deploymentName": "deploymentName"
8+
},
9+
"responses": {
10+
"200": {
11+
"body": {
12+
"id": "/subscriptions/subscriptionId/resourceGroups/resourceGroupName/providers/Microsoft.CognitiveServices/accounts/accountName/deployments/deploymentName",
13+
"name": "deploymentName",
14+
"type": "Microsoft.CognitiveServices/accounts/deployments",
15+
"sku": {
16+
"name": "Standard",
17+
"capacity": 1
18+
},
19+
"properties": {
20+
"model": {
21+
"format": "OpenAI",
22+
"name": "gpt-4",
23+
"version": "0613"
24+
},
25+
"deploymentState": "Paused",
26+
"provisioningState": "Succeeded"
27+
}
28+
}
29+
}
30+
}
31+
}

specification/cognitiveservices/resource-manager/Microsoft.CognitiveServices/preview/2025-10-01-preview/examples/PutDeployment.json

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
"format": "OpenAI",
1616
"name": "ada",
1717
"version": "1"
18-
}
18+
},
19+
"serviceTier": "Priority",
20+
"deploymentState": "Running"
1921
}
2022
}
2123
},
@@ -35,6 +37,8 @@
3537
"name": "ada",
3638
"version": "1"
3739
},
40+
"serviceTier": "Priority",
41+
"deploymentState": "Running",
3842
"provisioningState": "Succeeded"
3943
}
4044
}
@@ -54,6 +58,8 @@
5458
"name": "ada",
5559
"version": "1"
5660
},
61+
"serviceTier": "Priority",
62+
"deploymentState": "Running",
5763
"provisioningState": "Accepted"
5864
}
5965
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"parameters": {
3+
"api-version": "2025-10-01-preview",
4+
"subscriptionId": "00000000-1111-2222-3333-444444444444",
5+
"resourceGroupName": "resourceGroupName",
6+
"accountName": "accountName",
7+
"deploymentName": "deploymentName"
8+
},
9+
"responses": {
10+
"200": {
11+
"body": {
12+
"id": "/subscriptions/subscriptionId/resourceGroups/resourceGroupName/providers/Microsoft.CognitiveServices/accounts/accountName/deployments/deploymentName",
13+
"name": "deploymentName",
14+
"type": "Microsoft.CognitiveServices/accounts/deployments",
15+
"sku": {
16+
"name": "Standard",
17+
"capacity": 1
18+
},
19+
"properties": {
20+
"model": {
21+
"format": "OpenAI",
22+
"name": "gpt-4",
23+
"version": "0613"
24+
},
25+
"deploymentState": "Running",
26+
"provisioningState": "Succeeded"
27+
}
28+
}
29+
}
30+
}
31+
}

specification/cognitiveservices/resource-manager/Microsoft.CognitiveServices/preview/2025-10-01-preview/examples/UpdateDeployment.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
"name": "ada",
2929
"version": "1"
3030
},
31+
"serviceTier": "Priority",
32+
"deploymentState": "Paused",
3133
"provisioningState": "Succeeded"
3234
}
3335
}

0 commit comments

Comments
 (0)