Azure
diff --git a/‎specification/machinelearningservices/AzureAI.Assets/common.tsp‎
Lines changed: 52 additions & 0 deletions b/‎specification/machinelearningservices/AzureAI.Assets/common.tsp‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎specification/machinelearningservices/AzureAI.Assets/examples/2024-04-01-preview/DeploymentTemplates_Create_MaximumSet_Gen.json‎
Lines changed: 80 additions & 0 deletions b/‎specification/machinelearningservices/AzureAI.Assets/examples/2024-04-01-preview/DeploymentTemplates_Create_MaximumSet_Gen.json‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎specification/machinelearningservices/AzureAI.Assets/examples/2024-04-01-preview/DeploymentTemplates_Delete_MaximumSet_Gen.json‎
Lines changed: 14 additions & 0 deletions b/‎specification/machinelearningservices/AzureAI.Assets/examples/2024-04-01-preview/DeploymentTemplates_Delete_MaximumSet_Gen.json‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎specification/machinelearningservices/AzureAI.Assets/examples/2024-04-01-preview/DeploymentTemplates_Get_MaximumSet_Gen.json‎
Lines changed: 85 additions & 0 deletions b/‎specification/machinelearningservices/AzureAI.Assets/examples/2024-04-01-preview/DeploymentTemplates_Get_MaximumSet_Gen.json‎
Lines changed: 85 additions & 0 deletions
diff --git a/‎specification/machinelearningservices/AzureAI.Assets/examples/2024-04-01-preview/DeploymentTemplates_List_MaximumSet_Gen.json‎
Lines changed: 159 additions & 0 deletions b/‎specification/machinelearningservices/AzureAI.Assets/examples/2024-04-01-preview/DeploymentTemplates_List_MaximumSet_Gen.json‎
Lines changed: 159 additions & 0 deletions
@@ -28,3 +28,55 @@ model SystemData {
   @doc("The timestamp of resource last modification (UTC)")
   lastModifiedAt?: utcDateTime;
 }
+
+@doc("Settings for online request configuration.")
+model OnlineRequestSettings {
+  @doc("The timeout duration for requests.")
+  requestTimeout: duration;
+
+  @doc("The maximum number of concurrent requests per instance.")
+  maxConcurrentRequestsPerInstance: int32;
+}
+
+@doc("LongRunningNullResponse")
+model LongRunningNullResponse {
+  @doc("Polling Uri.")
+  @pollingLocation
+  location: ResourceLocation<ResourceOperationStatus<PutAssetLROResponse>>;
+}
+
+@doc("PutAssetLROResponse")
+model PutAssetLROResponse {
+  @doc("Response of completed put asset lro.")
+  assetId: string;
+}
+
+@doc("Settings for probe configuration.")
+model ProbeSettings {
+  @doc("The initial delay before starting probes.")
+  initialDelay?: duration;
+
+  @doc("The period between probe executions.")
+  period: duration;
+
+  @doc("The timeout duration for each probe.")
+  timeout: duration;
+
+  @doc("The number of consecutive failures required to consider the probe as failed.")
+  failureThreshold: int32;
+
+  @doc("The number of consecutive successes required to consider the probe as successful.")
+  successThreshold: int32;
+
+  @doc("The path for the probe request.")
+  path: string;
+
+  @doc("The port number for the probe.")
+  port: int32;
+
+  @doc("The scheme for the probe (e.g., HTTP, HTTPS).")
+  scheme: string;
+
+  @doc("The HTTP method for the probe request.")
+  httpMethod: string;
+}
@@ -0,0 +1,80 @@
+{
+  "title": "DeploymentTemplates_Create - generated by [MaximumSet] rule",
+  "operationId": "DeploymentTemplates_Create",
+  "parameters": {
+    "api-version": "2024-04-01-preview",
+    "registryName": "myRegistry",
+    "name": "ai21-jamba-template",
+    "version": "1.0.0",
+    "body": {
+      "deploymentTemplateType": "Managed",
+      "environmentId": "azureml://registries/myRegistry/environments/ai21-jamba-env/versions/1",
+      "environmentVariables": {
+        "AZUREML_MODEL_DIR": "/var/azureml-app/azureml-models/model-1/1",
+        "WORLD_SIZE": "2",
+        "MAX_PROMPT_LENGTH": "65536",
+        "CUDA_VISIBLE_DEVICES": "0,1"
+      },
+      "allowedEnvironmentVariableOverrides": [
+        "WORLD_SIZE",
+        "MAX_PROMPT_LENGTH",
+        "CUDA_VISIBLE_DEVICES"
+      ],
+      "modelMountPath": "/var/azureml-app",
+      "requestSettings": {
+        "requestTimeout": "PT90S",
+        "maxConcurrentRequestsPerInstance": 10
+      },
+      "livenessProbe": {
+        "initialDelay": "PT300S",
+        "period": "PT10S",
+        "timeout": "PT2S",
+        "failureThreshold": 30,
+        "successThreshold": 1,
+        "path": "/health",
+        "port": 8080,
+        "scheme": "HTTP",
+        "httpMethod": "GET"
+      },
+      "readinessProbe": {
+        "initialDelay": "PT300S",
+        "period": "PT10S",
+        "timeout": "PT2S",
+        "failureThreshold": 30,
+        "successThreshold": 1,
+        "path": "/ready",
+        "port": 8080,
+        "scheme": "HTTP",
+        "httpMethod": "GET"
+      },
+      "allowedInstanceType": [
+        "Standard_NC24ads_A100_v4",
+        "Standard_NC48ads_A100_v4",
+        "Standard_NC96ads_A100_v4"
+      ],
+      "defaultInstanceType": "Standard_NC24ads_A100_v4",
+      "instanceCount": 1,
+      "scoringPath": "/score",
+      "scoringPort": 8080,
+      "stage": "Development",
+      "description": "AI21 Jamba deployment template for large language model inference",
+      "tags": {
+        "modelFamily": "ai21-jamba",
+        "taskType": "text-generation",
+        "framework": "transformers"
+      },
+      "properties": {
+        "modelSize": "large",
+        "accelerator": "gpu",
+        "precision": "fp16"
+      }
+    }
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "location": "https://management.azure.com/subscriptions/12345678-1234-1234-1234-123456789abc/resourceGroups/myResourceGroup/providers/Microsoft.MachineLearningServices/registries/myRegistry/deploymenttemplates/ai21-jamba-template/versions/1.0.0/operations/operation-id-12345"
+      }
+    }
+  }
+}
@@ -0,0 +1,14 @@
+{
+  "title": "DeploymentTemplates_Delete - generated by [MaximumSet] rule",
+  "operationId": "DeploymentTemplates_Delete",
+  "parameters": {
+    "api-version": "2024-04-01-preview",
+    "registryName": "myRegistry",
+    "name": "ai21-jamba-template",
+    "version": "1.0.0"
+  },
+  "responses": {
+    "200": {},
+    "204": {}
+  }
+}
@@ -0,0 +1,85 @@
+{
+  "title": "DeploymentTemplates_Get - generated by [MaximumSet] rule",
+  "operationId": "DeploymentTemplates_Get",
+  "parameters": {
+    "api-version": "2024-04-01-preview",
+    "registryName": "myRegistry",
+    "name": "ai21-jamba-template",
+    "version": "1.0.0",
+    "assetResourceTenantId": "72f988bf-86f1-41af-91ab-2d7cd011db47"
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "deploymentTemplateType": "Managed",
+        "environmentId": "azureml://registries/myRegistry/environments/ai21-jamba-env/versions/1",
+        "environmentVariables": {
+          "AZUREML_MODEL_DIR": "/var/azureml-app/azureml-models/model-1/1",
+          "WORLD_SIZE": "2",
+          "MAX_PROMPT_LENGTH": "65536",
+          "CUDA_VISIBLE_DEVICES": "0,1"
+        },
+        "allowedEnvironmentVariableOverrides": [
+          "WORLD_SIZE",
+          "MAX_PROMPT_LENGTH",
+          "CUDA_VISIBLE_DEVICES"
+        ],
+        "modelMountPath": "/var/azureml-app",
+        "requestSettings": {
+          "requestTimeout": "PT90S",
+          "maxConcurrentRequestsPerInstance": 10
+        },
+        "livenessProbe": {
+          "initialDelay": "PT300S",
+          "period": "PT10S",
+          "timeout": "PT2S",
+          "failureThreshold": 30,
+          "successThreshold": 1,
+          "path": "/health",
+          "port": 8080,
+          "scheme": "HTTP",
+          "httpMethod": "GET"
+        },
+        "readinessProbe": {
+          "initialDelay": "PT300S",
+          "period": "PT10S",
+          "timeout": "PT2S",
+          "failureThreshold": 30,
+          "successThreshold": 1,
+          "path": "/ready",
+          "port": 8080,
+          "scheme": "HTTP",
+          "httpMethod": "GET"
+        },
+        "allowedInstanceType": [
+          "Standard_NC24ads_A100_v4",
+          "Standard_NC48ads_A100_v4",
+          "Standard_NC96ads_A100_v4"
+        ],
+        "defaultInstanceType": "Standard_NC24ads_A100_v4",
+        "instanceCount": 1,
+        "scoringPath": "/score",
+        "scoringPort": 8080,
+        "id": "azureml://registries/myRegistry/deploymenttemplates/ai21-jamba-template/versions/1.0.0",
+        "stage": "Development",
+        "description": "AI21 Jamba deployment template for large language model inference",
+        "systemData": {
+          "createdAt": "2024-09-17T10:30:15.123Z",
+          "createdBy": "[email protected]",
+          "createdByType": "User",
+          "lastModifiedAt": "2024-09-17T10:30:15.123Z"
+        },
+        "tags": {
+          "modelFamily": "ai21-jamba",
+          "taskType": "text-generation",
+          "framework": "transformers"
+        },
+        "properties": {
+          "modelSize": "large",
+          "accelerator": "gpu",
+          "precision": "fp16"
+        }
+      }
+    }
+  }
+}
@@ -0,0 +1,159 @@
+{
+  "title": "DeploymentTemplates_List - generated by [MaximumSet] rule",
+  "operationId": "DeploymentTemplates_List",
+  "parameters": {
+    "api-version": "2024-04-01-preview",
+    "registryName": "myRegistry",
+    "name": "ai21-jamba-template",
+    "tags": "modelFamily=ai21-jamba,taskType=text-generation",
+    "continuationToken": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
+    "count": 10,
+    "stage": "Development",
+    "listViewType": "ActiveOnly"
+  },
+  "responses": {
+    "200": {
+      "body": {
+        "value": [
+          {
+            "deploymentTemplateType": "Managed",
+            "environmentId": "azureml://registries/myRegistry/environments/ai21-jamba-env/versions/1",
+            "environmentVariables": {
+              "AZUREML_MODEL_DIR": "/var/azureml-app/azureml-models/model-1/1",
+              "WORLD_SIZE": "2",
+              "MAX_PROMPT_LENGTH": "65536"
+            },
+            "allowedEnvironmentVariableOverrides": [
+              "WORLD_SIZE",
+              "MAX_PROMPT_LENGTH"
+            ],
+            "modelMountPath": "/var/azureml-app",
+            "requestSettings": {
+              "requestTimeout": "PT90S",
+              "maxConcurrentRequestsPerInstance": 10
+            },
+            "livenessProbe": {
+              "initialDelay": "PT300S",
+              "period": "PT10S",
+              "timeout": "PT2S",
+              "failureThreshold": 30,
+              "successThreshold": 1,
+              "path": "/health",
+              "port": 8080,
+              "scheme": "HTTP",
+              "httpMethod": "GET"
+            },
+            "readinessProbe": {
+              "initialDelay": "PT300S",
+              "period": "PT10S",
+              "timeout": "PT2S",
+              "failureThreshold": 30,
+              "successThreshold": 1,
+              "path": "/ready",
+              "port": 8080,
+              "scheme": "HTTP",
+              "httpMethod": "GET"
+            },
+            "allowedInstanceType": [
+              "Standard_NC24ads_A100_v4",
+              "Standard_NC48ads_A100_v4"
+            ],
+            "defaultInstanceType": "Standard_NC24ads_A100_v4",
+            "instanceCount": 1,
+            "scoringPath": "/score",
+            "scoringPort": 8080,
+            "id": "azureml://registries/myRegistry/deploymenttemplates/ai21-jamba-template/versions/1.0.0",
+            "stage": "Development",
+            "description": "AI21 Jamba deployment template for large language model inference",
+            "systemData": {
+              "createdAt": "2024-09-17T10:30:15.123Z",
+              "createdBy": "[email protected]",
+              "createdByType": "User",
+              "lastModifiedAt": "2024-09-17T10:30:15.123Z"
+            },
+            "tags": {
+              "modelFamily": "ai21-jamba",
+              "taskType": "text-generation",
+              "framework": "transformers"
+            },
+            "properties": {
+              "modelSize": "large",
+              "accelerator": "gpu",
+              "precision": "fp16"
+            }
+          },
+          {
+            "deploymentTemplateType": "Managed",
+            "environmentId": "azureml://registries/myRegistry/environments/ai21-jamba-env/versions/2",
+            "environmentVariables": {
+              "AZUREML_MODEL_DIR": "/var/azureml-app/azureml-models/model-1/1",
+              "WORLD_SIZE": "4",
+              "MAX_PROMPT_LENGTH": "32768"
+            },
+            "allowedEnvironmentVariableOverrides": [
+              "WORLD_SIZE",
+              "MAX_PROMPT_LENGTH"
+            ],
+            "modelMountPath": "/var/azureml-app",
+            "requestSettings": {
+              "requestTimeout": "PT120S",
+              "maxConcurrentRequestsPerInstance": 5
+            },
+            "livenessProbe": {
+              "initialDelay": "PT300S",
+              "period": "PT10S",
+              "timeout": "PT5S",
+              "failureThreshold": 30,
+              "successThreshold": 1,
+              "path": "/health",
+              "port": 8080,
+              "scheme": "HTTP",
+              "httpMethod": "GET"
+            },
+            "readinessProbe": {
+              "initialDelay": "PT300S",
+              "period": "PT10S",
+              "timeout": "PT5S",
+              "failureThreshold": 30,
+              "successThreshold": 1,
+              "path": "/ready",
+              "port": 8080,
+              "scheme": "HTTP",
+              "httpMethod": "GET"
+            },
+            "allowedInstanceType": [
+              "Standard_NC48ads_A100_v4",
+              "Standard_NC96ads_A100_v4"
+            ],
+            "defaultInstanceType": "Standard_NC48ads_A100_v4",
+            "instanceCount": 2,
+            "scoringPath": "/score",
+            "scoringPort": 8080,
+            "id": "azureml://registries/myRegistry/deploymenttemplates/ai21-jamba-template/versions/2.0.0",
+            "stage": "Development",
+            "description": "AI21 Jamba deployment template for large language model inference - optimized version",
+            "systemData": {
+              "createdAt": "2024-09-17T11:45:30.456Z",
+              "createdBy": "[email protected]",
+              "createdByType": "User",
+              "lastModifiedAt": "2024-09-17T11:45:30.456Z"
+            },
+            "tags": {
+              "modelFamily": "ai21-jamba",
+              "taskType": "text-generation",
+              "framework": "transformers",
+              "optimized": "true"
+            },
+            "properties": {
+              "modelSize": "large",
+              "accelerator": "gpu",
+              "precision": "fp16",
+              "distributed": "true"
+            }
+          }
+        ],
+        "nextLink": "https://management.azure.com/subscriptions/12345678-1234-1234-1234-123456789abc/resourceGroups/myResourceGroup/providers/Microsoft.MachineLearningServices/registries/myRegistry/deploymenttemplates?api-version=2024-04-01-preview&continuationToken=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.next"
+      }
+    }
+  }
+}