Azure · PratibhaShrivastav18 · Oct 30, 2025 · Oct 22, 2025 · Oct 24, 2025
@@ -26,7 +26,7 @@ az ml model create --name $MODEL_NAME --path "model"
 
 echo "Creating compute with GPU"
 # <create_compute>
-az ml compute create -n gpu-cluster --type amlcompute --size STANDARD_NC6s_v3 --min-instances 0 --max-instances 2
+az ml compute create -n gpu-cluster --type amlcompute --size STANDARD_NC4AS_T4_V3 --min-instances 0 --max-instances 2
 # </create_compute>
 
 echo "Creating batch endpoint $ENDPOINT_NAME"

@@ -7,4 +7,4 @@ model:
   path: ./models
   type: triton_model
 instance_count: 1
-instance_type: Standard_NC6s_v3
+instance_type: STANDARD_NC4AS_T4_V3
@@ -13,8 +13,8 @@ compute_cluster_finetune="sample-finetune-cluster-gpu"
 compute_model_evaluation="sample-finetune-cluster-gpu"
 # If above compute cluster does not exist, create it with the following vm size
 compute_model_import_sku="Standard_D12"
-compute_finetune_sku="Standard_NC6s_v3"
-compute_model_evaluation_sku="Standard_NC6s_v3"
+compute_finetune_sku="STANDARD_NC4AS_T4_V3"
+compute_model_evaluation_sku="STANDARD_NC4AS_T4_V3"
 
 # This is the number of GPUs in a single node of the selected 'vm_size' compute. 
 # Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.

@@ -13,8 +13,8 @@ compute_cluster_finetune="sample-finetune-cluster-gpu"
 compute_model_evaluation="sample-finetune-cluster-gpu"
 # If above compute cluster does not exist, create it with the following vm size
 compute_model_import_sku="Standard_D12"
-compute_finetune_sku="Standard_NC6s_v3"
-compute_model_evaluation_sku="Standard_NC6s_v3"
+compute_finetune_sku="STANDARD_NC4AS_T4_V3"
+compute_model_evaluation_sku="STANDARD_NC4AS_T4_V3"
 
 # This is the number of GPUs in a single node of the selected 'vm_size' compute. 
 # Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.

@@ -15,8 +15,8 @@ compute_cluster_finetune="sample-finetune-cluster-gpu"
 compute_model_evaluation="sample-finetune-cluster-gpu"
 # If above compute cluster does not exist, create it with the following vm size
 compute_model_import_sku="Standard_D12"
-compute_finetune_sku="Standard_NC6s_v3"
-compute_model_evaluation_sku="Standard_NC6s_v3"
+compute_finetune_sku="STANDARD_NC4AS_T4_V3"
+compute_model_evaluation_sku="STANDARD_NC4AS_T4_V3"
 
 # This is the number of GPUs in a single node of the selected 'vm_size' compute. 
 # Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.

@@ -14,8 +14,8 @@ compute_cluster_finetune="sample-finetune-cluster-gpu"
 compute_model_evaluation="sample-finetune-cluster-gpu"
 # If above compute cluster does not exist, create it with the following vm size
 compute_model_import_sku="Standard_D12"
-compute_finetune_sku="Standard_NC6s_v3"
-compute_model_evaluation_sku="Standard_NC6s_v3"
+compute_finetune_sku="STANDARD_NC4AS_T4_V3"
+compute_model_evaluation_sku="STANDARD_NC4AS_T4_V3"
 
 # This is the number of GPUs in a single node of the selected 'vm_size' compute. 
 # Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.

@@ -10,7 +10,7 @@ registry_name="azureml"
 
 compute_cluster="gpu-cluster-big"
 # if above compute cluster does not exist, create it with the following vm size
-compute_sku="Standard_NC24rs_v3"
+compute_sku="STANDARD_NC4AS_T4_V3"
 # This is the number of GPUs in a single node of the selected 'vm_size' compute. 
 # Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
 # Setting this to more than the number of GPUs will result in an error.

@@ -10,7 +10,7 @@ registry_name="azureml"
 
 compute_cluster="gpu-cluster-big"
 # if above compute cluster does not exist, create it with the following vm size
-compute_sku="Standard_NC24rs_v3"
+compute_sku="STANDARD_NC4AS_T4_V3"
 # This is the number of GPUs in a single node of the selected 'vm_size' compute. 
 # Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
 # Setting this to more than the number of GPUs will result in an error.

@@ -10,7 +10,7 @@ registry_name="azureml"
 
 compute_cluster="gpu-cluster-big"
 # if above compute cluster does not exist, create it with the following vm size
-compute_sku="Standard_NC24rs_v3"
+compute_sku="STANDARD_NC4AS_T4_V3"
 # This is the number of GPUs in a single node of the selected 'vm_size' compute. 
 # Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
 # Setting this to more than the number of GPUs will result in an error.

@@ -1,5 +1,5 @@
 $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
-instance_type: Standard_NC6s_v3
+instance_type: STANDARD_NC4AS_T4_V3
 instance_count: 1
 liveness_probe:
   initial_delay: 180

@@ -10,7 +10,7 @@ workspace_name="<WORKSPACE_NAME>"
 cluster_name="sample-finetune-cluster-gpu"
 
 # If above compute cluster does not exist, create it with the following vm size
-cluster_sku="Standard_NC6s_v3"
+cluster_sku="STANDARD_NC4AS_T4_V3"
 
 # This is the number of GPUs in a single node of the selected 'vm_size' compute. 
 # Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
@@ -29,7 +29,7 @@ version=$(date +%s)
 finetuned_huggingface_model_name="runwayml-stable-diffusion-2-1-dog-text-to-image"
 huggingface_endpoint_name="text-to-image-dog-$version"
 deployment_name="text2img-dog-mlflow-deploy"
-deployment_sku="Standard_NC6s_v3"
+deployment_sku="STANDARD_NC4AS_T4_V3"
 request_file="request.json"
 response_file="generated_image.json"
 

@@ -10,7 +10,7 @@ registry_name="azureml"
 
 compute_cluster="gpu-cluster-big"
 # if above compute cluster does not exist, create it with the following vm size
-compute_sku="Standard_NC24rs_v3"
+compute_sku="STANDARD_NC4AS_T4_V3"
 # This is the number of GPUs in a single node of the selected 'vm_size' compute. 
 # Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
 # Setting this to more than the number of GPUs will result in an error.

@@ -11,7 +11,7 @@ registry_name="azureml"
 
 compute_cluster="gpu-cluster-big"
 # if above compute cluster does not exist, create it with the following vm size
-compute_sku="Standard_NC24rs_v3"
+compute_sku="STANDARD_NC4AS_T4_V3"
 # This is the number of GPUs in a single node of the selected 'vm_size' compute. 
 # Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
 # Setting this to more than the number of GPUs will result in an error.

@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
 name: demo
-instance_type: Standard_NC6s_v3
+instance_type: STANDARD_NC4AS_T4_V3
 instance_count: 1
 liveness_probe:
   initial_delay: 180

@@ -12,7 +12,7 @@ compute_cluster_model_import="sample-model-import-cluster"
 compute_cluster_finetune="sample-finetune-cluster-gpu"
 # If above compute cluster does not exist, create it with the following vm size
 compute_model_import_sku="Standard_D12"
-compute_finetune_sku="Standard_NC6s_v3"
+compute_finetune_sku="STANDARD_NC4AS_T4_V3"
 
 # This is the foundation model for finetuning
 mmtracking_model_name="bytetrack_yolox_x_crowdhuman_mot17-private-half"
@@ -21,7 +21,7 @@ model_label="latest"
 version=$(date +%s)
 finetuned_mmtracking_model_name="$mmtracking_model_name-mot17-tiny"
 mmtracking_endpoint_name="mmt-mot-mot17-tiny-$version"
-deployment_sku="Standard_NC6s_v3"
+deployment_sku="STANDARD_NC4AS_T4_V3"
 
 # Scoring file
 mmtracking_sample_request_data="./sample_request_data.json"

@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
 name: demo
-instance_type: Standard_NC6s_v3
+instance_type: STANDARD_NC4AS_T4_V3
 instance_count: 1
 liveness_probe:
   initial_delay: 180

@@ -17,7 +17,7 @@ endpoint_name="image-text-to-image-$version"
 deployment_name="image-text-to-image-batch-deploy"
 
 deployment_compute="gpu-cluster"
-compute_sku="Standard_NC6s_v3"
+compute_sku="STANDARD_NC4AS_T4_V3"
 
 # 1. Setup pre-requisites
 if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \

@@ -17,7 +17,7 @@ version=$(date +%s)
 endpoint_name="image-text-to-image-$version"
 deployment_name="image-text-to-image-deploy"
 
-deployment_sku="Standard_NC6s_v3"
+deployment_sku="STANDARD_NC4AS_T4_V3"
 
 # sample_request_data
 sample_request_data="inpainting_data/sample_request_data.json"

@@ -16,7 +16,7 @@ endpoint_name="text-to-image-$version"
 deployment_name="stablediffusion-demo"
 
 deployment_compute="gpu-cluster"
-compute_sku="Standard_NC6s_v3"
+compute_sku="STANDARD_NC4AS_T4_V3"
 
 # 1. Setup pre-requisites
 if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \

@@ -18,7 +18,7 @@ endpoint_name="text-to-image-$version"
 deployment_name="inpainting-batch-deploy"
 
 deployment_compute="gpu-cluster"
-compute_sku="Standard_NC6s_v3"
+compute_sku="STANDARD_NC4AS_T4_V3"
 
 # 1. Setup pre-requisites
 if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \

@@ -16,7 +16,7 @@ version=$(date +%s)
 endpoint_name="inpainting-$version"
 deployment_name="inpainting-deploy"
 
-deployment_sku="Standard_NC6s_v3"
+deployment_sku="STANDARD_NC4AS_T4_V3"
 
 # sample_request_data
 sample_request_data="inpainting_data/sample_request_data.json"

@@ -15,7 +15,7 @@ version=$(date +%s)
 endpoint_name="text-to-image-$version"
 
 # Todo: fetch deployment_sku from the min_inference_sku tag of the model
-deployment_sku="Standard_NC6s_v3"
+deployment_sku="STANDARD_NC4AS_T4_V3"
 
 # sample_request_data
 sample_request_data="./sample_request_data.json"
@@ -51,7 +51,7 @@ az ml online-endpoint create --name $endpoint_name $workspace_info || {
 max_concurrent_request=2  # the maximum number of concurrent requests supported by the endpoint
 
 # Note: We have set the value of `max_concurrent_request` to 2, 
-# as we are utilizing the `Standard_NC6s_v3` SKU for deployment, which has one GPU. 
+# as we are utilizing the `STANDARD_NC4AS_T4_V3` SKU for deployment, which has one GPU. 
 # If you are using a larger SKU, please increase this value to get the maximum performance.
 # For model `stabilityai-stable-diffusion-xl-base-1-0`, set the value of `MAX_CONCURRENT_REQUESTS` to 1
 

@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
 name: demo
-instance_type: Standard_NC6s_V3
+instance_type: STANDARD_NC4AS_T4_V3
 instance_count: 1
 liveness_probe:
   initial_delay: 180

@@ -15,7 +15,7 @@ version=$(date +%s)
 endpoint_name="video-mot-$version"
 
 # todo: fetch deployment_sku from the min_inference_sku tag of the model
-deployment_sku="Standard_NC6s_V3"
+deployment_sku="STANDARD_NC4AS_T4_V3"
 
 # Prepare data for deployment
 python ./prepare_data.py

@@ -105,10 +105,10 @@ if [[ ! -z "${RUN_BOOTSTRAP:-}" ]]; then
     "$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "cpu-cluster-lg" 0 4 "Standard_DS15_v2"
 
     echo_title "Ensuring GPU compute"
-    "$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "gpu-cluster" 0 20 "STANDARD_NC6s_v3"
-    "$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "automl-gpu-cluster" 0 4 "STANDARD_NC6s_v3"
+    "$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "gpu-cluster" 0 20 "STANDARD_NC4AS_T4_V3"
+    "$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "automl-gpu-cluster" 0 4 "STANDARD_NC4AS_T4_V3"
     # v100 single GPU cluster for pytorch 2.0 based notebooks
-    "$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "gpu-v100-1GPU-cluster" 0 4 "Standard_NC6s_v3"
+    "$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "gpu-v100-1GPU-cluster" 0 4 "STANDARD_NC4AS_T4_V3"
     # v100 GPU cluster for deepspeed cli examples
     "$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "gpu-v100-cluster" 0 2 "Standard_ND40rs_v2"
 

diff --git a/sdk/python/endpoints/online/custom-container/triton/online-endpoints-triton-cc.ipynb b/sdk/python/endpoints/online/custom-container/triton/online-endpoints-triton-cc.ipynb
@@ -303,7 +303,7 @@
     "    endpoint_name=endpoint_name,\n",
     "    environment=enviroment,\n",
     "    model=model,\n",
-    "    instance_type=\"Standard_NC6s_v3\",\n",
+    "    instance_type=\"STANDARD_NC4AS_T4_V3\",\n",
     "    instance_count=1,\n",
     "    model_mount_path=\"/models\",\n",
     ")"

diff --git a/sdk/python/endpoints/online/triton/single-model/online-endpoints-triton.ipynb b/sdk/python/endpoints/online/triton/single-model/online-endpoints-triton.ipynb
@@ -192,7 +192,7 @@
     "    name=\"blue\",\n",
     "    endpoint_name=endpoint_name,\n",
     "    model=Model(path=\"./models\", type=\"triton_model\"),\n",
-    "    instance_type=\"Standard_NC6s_v3\",\n",
+    "    instance_type=\"STANDARD_NC4AS_T4_V3\",\n",
     "    instance_count=1,\n",
     ")"
    ]

@@ -152,7 +152,7 @@
     "    name=deployment_name,\n",
     "    endpoint_name=endpoint_name,\n",
     "    model=model,\n",
-    "    instance_type=\"Standard_NC6s_v3\",  # Use a GPU instance type like Standard_NC6s_v3 for fast inference\n",
+    "    instance_type=\"STANDARD_NC4AS_T4_V3\",  # Use a GPU instance type like STANDARD_NC4AS_T4_V3 for fast inference\n",
     "    instance_count=1,\n",
     "    request_settings=OnlineRequestSettings(request_timeout_ms=90000),\n",
     "    app_insights_enabled=True,\n",
@@ -181,7 +181,7 @@
     {
      "data": {
       "text/plain": [
-       "ManagedOnlineDeployment({'private_network_connection': None, 'package_model': False, 'provisioning_state': 'Succeeded', 'endpoint_name': 'medimageinsight-u2g5q', 'type': 'Managed', 'name': 'medimageinsight-v1', 'description': None, 'tags': {}, 'properties': {'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/6c180dd2-1ec4-4fad-8ba8-1f2d8d67c129/providers/Microsoft.MachineLearningServices/locations/westus2/mfeOperationsStatus/odidp:681e8849-345b-4da8-b1b6-8697ba1ef038:d510a9dd-8938-4983-9e3d-e7abe2addacb?api-version=2023-04-01-preview'}, 'print_as_yaml': False, 'id': '/subscriptions/6c180dd2-1ec4-4fad-8ba8-1f2d8d67c129/resourceGroups/fmmg-mars-collab/providers/Microsoft.MachineLearningServices/workspaces/fmmg-mars-collab/onlineEndpoints/medimageinsight-u2g5q/deployments/medimageinsight-v1', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/jmerkow-cpu4/code/Users/jmerkow/healthcareai-azureml-examples/sdk/python/foundation-models/healthcare-ai/medimageinsight', 'creation_context': <azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.SystemData object at 0x7f6b140a3550>, 'serialize': <msrest.serialization.Serializer object at 0x7f6b140a37c0>, 'model': 'azureml://registries/azureml-staging/models/MedImageInsight/versions/2', 'code_configuration': None, 'environment': '/subscriptions/6c180dd2-1ec4-4fad-8ba8-1f2d8d67c129/resourceGroups/fmmg-mars-collab/providers/Microsoft.MachineLearningServices/workspaces/fmmg-mars-collab/environments/DefaultNcdEnv-mlflow-ubuntu20-04-py38-cpu-inference/versions/20240805v1', 'environment_variables': {'MLFLOW_MODEL_FOLDER': 'mlflow_model_folder', 'AZUREML_EXTRA_CONDA_YAML_ABS_PATH': '/var/azureml-app/azureml-models/MedImageInsight/2/mlflow_model_folder/conda.yaml', 'AML_APP_INSIGHTS_KEY': 'befe0234-c1a1-46b0-920c-09d6464016f9', 'AML_APP_INSIGHTS_ENDPOINT': 'https://dc.services.visualstudio.com/v2/track', 'AML_APP_INSIGHTS_ENABLED': 'true', 'AZUREML_MODEL_DIR': '/var/azureml-app/azureml-models/MedImageInsight/2'}, 'app_insights_enabled': True, 'scale_settings': <azure.ai.ml.entities._deployment.scale_settings.DefaultScaleSettings object at 0x7f6b140a28f0>, 'request_settings': <azure.ai.ml.entities._deployment.deployment_settings.OnlineRequestSettings object at 0x7f6b140a0d30>, 'liveness_probe': <azure.ai.ml.entities._deployment.deployment_settings.ProbeSettings object at 0x7f6b140a2410>, 'readiness_probe': <azure.ai.ml.entities._deployment.deployment_settings.ProbeSettings object at 0x7f6b140a1c00>, 'instance_count': 1, 'arm_type': 'online_deployment', 'model_mount_path': None, 'instance_type': 'Standard_NC6s_v3', 'data_collector': None, 'egress_public_network_access': 'Enabled'})"
+       "ManagedOnlineDeployment({'private_network_connection': None, 'package_model': False, 'provisioning_state': 'Succeeded', 'endpoint_name': 'medimageinsight-u2g5q', 'type': 'Managed', 'name': 'medimageinsight-v1', 'description': None, 'tags': {}, 'properties': {'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/6c180dd2-1ec4-4fad-8ba8-1f2d8d67c129/providers/Microsoft.MachineLearningServices/locations/westus2/mfeOperationsStatus/odidp:681e8849-345b-4da8-b1b6-8697ba1ef038:d510a9dd-8938-4983-9e3d-e7abe2addacb?api-version=2023-04-01-preview'}, 'print_as_yaml': False, 'id': '/subscriptions/6c180dd2-1ec4-4fad-8ba8-1f2d8d67c129/resourceGroups/fmmg-mars-collab/providers/Microsoft.MachineLearningServices/workspaces/fmmg-mars-collab/onlineEndpoints/medimageinsight-u2g5q/deployments/medimageinsight-v1', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/jmerkow-cpu4/code/Users/jmerkow/healthcareai-azureml-examples/sdk/python/foundation-models/healthcare-ai/medimageinsight', 'creation_context': <azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.SystemData object at 0x7f6b140a3550>, 'serialize': <msrest.serialization.Serializer object at 0x7f6b140a37c0>, 'model': 'azureml://registries/azureml-staging/models/MedImageInsight/versions/2', 'code_configuration': None, 'environment': '/subscriptions/6c180dd2-1ec4-4fad-8ba8-1f2d8d67c129/resourceGroups/fmmg-mars-collab/providers/Microsoft.MachineLearningServices/workspaces/fmmg-mars-collab/environments/DefaultNcdEnv-mlflow-ubuntu20-04-py38-cpu-inference/versions/20240805v1', 'environment_variables': {'MLFLOW_MODEL_FOLDER': 'mlflow_model_folder', 'AZUREML_EXTRA_CONDA_YAML_ABS_PATH': '/var/azureml-app/azureml-models/MedImageInsight/2/mlflow_model_folder/conda.yaml', 'AML_APP_INSIGHTS_KEY': 'befe0234-c1a1-46b0-920c-09d6464016f9', 'AML_APP_INSIGHTS_ENDPOINT': 'https://dc.services.visualstudio.com/v2/track', 'AML_APP_INSIGHTS_ENABLED': 'true', 'AZUREML_MODEL_DIR': '/var/azureml-app/azureml-models/MedImageInsight/2'}, 'app_insights_enabled': True, 'scale_settings': <azure.ai.ml.entities._deployment.scale_settings.DefaultScaleSettings object at 0x7f6b140a28f0>, 'request_settings': <azure.ai.ml.entities._deployment.deployment_settings.OnlineRequestSettings object at 0x7f6b140a0d30>, 'liveness_probe': <azure.ai.ml.entities._deployment.deployment_settings.ProbeSettings object at 0x7f6b140a2410>, 'readiness_probe': <azure.ai.ml.entities._deployment.deployment_settings.ProbeSettings object at 0x7f6b140a1c00>, 'instance_count': 1, 'arm_type': 'online_deployment', 'model_mount_path': None, 'instance_type': 'STANDARD_NC4AS_T4_V3', 'data_collector': None, 'egress_public_network_access': 'Enabled'})"
       ]
      },
      "execution_count": 17,

@@ -125,7 +125,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -136,7 +136,7 @@
     "    name=deployment_name,\n",
     "    endpoint_name=endpoint_name,\n",
     "    model=model,\n",
-    "    instance_type=\"Standard_NC6s_v3\",\n",
+    "    instance_type=\"Standard_NC40ads_H100_v5\",\n",
     "    instance_count=1,\n",
     "    request_settings=OnlineRequestSettings(request_timeout_ms=90000),\n",
     "    app_insights_enabled=True,\n",