Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ az ml model create --name $MODEL_NAME --path "model"

echo "Creating compute with GPU"
# <create_compute>
az ml compute create -n gpu-cluster --type amlcompute --size STANDARD_NC6s_v3 --min-instances 0 --max-instances 2
az ml compute create -n gpu-cluster --type amlcompute --size STANDARD_NC4AS_T4_V3 --min-instances 0 --max-instances 2
# </create_compute>

echo "Creating batch endpoint $ENDPOINT_NAME"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ model:
path: ./models
type: triton_model
instance_count: 1
instance_type: Standard_NC6s_v3
instance_type: STANDARD_NC4AS_T4_V3
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ compute_cluster_finetune="sample-finetune-cluster-gpu"
compute_model_evaluation="sample-finetune-cluster-gpu"
# If above compute cluster does not exist, create it with the following vm size
compute_model_import_sku="Standard_D12"
compute_finetune_sku="Standard_NC6s_v3"
compute_model_evaluation_sku="Standard_NC6s_v3"
compute_finetune_sku="STANDARD_NC4AS_T4_V3"
compute_model_evaluation_sku="STANDARD_NC4AS_T4_V3"

# This is the number of GPUs in a single node of the selected 'vm_size' compute.
# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ compute_cluster_finetune="sample-finetune-cluster-gpu"
compute_model_evaluation="sample-finetune-cluster-gpu"
# If above compute cluster does not exist, create it with the following vm size
compute_model_import_sku="Standard_D12"
compute_finetune_sku="Standard_NC6s_v3"
compute_model_evaluation_sku="Standard_NC6s_v3"
compute_finetune_sku="STANDARD_NC4AS_T4_V3"
compute_model_evaluation_sku="STANDARD_NC4AS_T4_V3"

# This is the number of GPUs in a single node of the selected 'vm_size' compute.
# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ compute_cluster_finetune="sample-finetune-cluster-gpu"
compute_model_evaluation="sample-finetune-cluster-gpu"
# If above compute cluster does not exist, create it with the following vm size
compute_model_import_sku="Standard_D12"
compute_finetune_sku="Standard_NC6s_v3"
compute_model_evaluation_sku="Standard_NC6s_v3"
compute_finetune_sku="STANDARD_NC4AS_T4_V3"
compute_model_evaluation_sku="STANDARD_NC4AS_T4_V3"

# This is the number of GPUs in a single node of the selected 'vm_size' compute.
# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ compute_cluster_finetune="sample-finetune-cluster-gpu"
compute_model_evaluation="sample-finetune-cluster-gpu"
# If above compute cluster does not exist, create it with the following vm size
compute_model_import_sku="Standard_D12"
compute_finetune_sku="Standard_NC6s_v3"
compute_model_evaluation_sku="Standard_NC6s_v3"
compute_finetune_sku="STANDARD_NC4AS_T4_V3"
compute_model_evaluation_sku="STANDARD_NC4AS_T4_V3"

# This is the number of GPUs in a single node of the selected 'vm_size' compute.
# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ registry_name="azureml"

compute_cluster="gpu-cluster-big"
# if above compute cluster does not exist, create it with the following vm size
compute_sku="Standard_NC24rs_v3"
compute_sku="STANDARD_NC4AS_T4_V3"
# This is the number of GPUs in a single node of the selected 'vm_size' compute.
# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
# Setting this to more than the number of GPUs will result in an error.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ registry_name="azureml"

compute_cluster="gpu-cluster-big"
# if above compute cluster does not exist, create it with the following vm size
compute_sku="Standard_NC24rs_v3"
compute_sku="STANDARD_NC4AS_T4_V3"
# This is the number of GPUs in a single node of the selected 'vm_size' compute.
# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
# Setting this to more than the number of GPUs will result in an error.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ registry_name="azureml"

compute_cluster="gpu-cluster-big"
# if above compute cluster does not exist, create it with the following vm size
compute_sku="Standard_NC24rs_v3"
compute_sku="STANDARD_NC4AS_T4_V3"
# This is the number of GPUs in a single node of the selected 'vm_size' compute.
# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
# Setting this to more than the number of GPUs will result in an error.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
instance_type: Standard_NC6s_v3
instance_type: STANDARD_NC4AS_T4_V3
instance_count: 1
liveness_probe:
initial_delay: 180
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ workspace_name="<WORKSPACE_NAME>"
cluster_name="sample-finetune-cluster-gpu"

# If above compute cluster does not exist, create it with the following vm size
cluster_sku="Standard_NC6s_v3"
cluster_sku="STANDARD_NC4AS_T4_V3"

# This is the number of GPUs in a single node of the selected 'vm_size' compute.
# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
Expand All @@ -29,7 +29,7 @@ version=$(date +%s)
finetuned_huggingface_model_name="runwayml-stable-diffusion-2-1-dog-text-to-image"
huggingface_endpoint_name="text-to-image-dog-$version"
deployment_name="text2img-dog-mlflow-deploy"
deployment_sku="Standard_NC6s_v3"
deployment_sku="STANDARD_NC4AS_T4_V3"
request_file="request.json"
response_file="generated_image.json"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ registry_name="azureml"

compute_cluster="gpu-cluster-big"
# if above compute cluster does not exist, create it with the following vm size
compute_sku="Standard_NC24rs_v3"
compute_sku="STANDARD_NC4AS_T4_V3"
# This is the number of GPUs in a single node of the selected 'vm_size' compute.
# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
# Setting this to more than the number of GPUs will result in an error.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ registry_name="azureml"

compute_cluster="gpu-cluster-big"
# if above compute cluster does not exist, create it with the following vm size
compute_sku="Standard_NC24rs_v3"
compute_sku="STANDARD_NC4AS_T4_V3"
# This is the number of GPUs in a single node of the selected 'vm_size' compute.
# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
# Setting this to more than the number of GPUs will result in an error.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
name: demo
instance_type: Standard_NC6s_v3
instance_type: STANDARD_NC4AS_T4_V3
instance_count: 1
liveness_probe:
initial_delay: 180
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ compute_cluster_model_import="sample-model-import-cluster"
compute_cluster_finetune="sample-finetune-cluster-gpu"
# If above compute cluster does not exist, create it with the following vm size
compute_model_import_sku="Standard_D12"
compute_finetune_sku="Standard_NC6s_v3"
compute_finetune_sku="STANDARD_NC4AS_T4_V3"

# This is the foundation model for finetuning
mmtracking_model_name="bytetrack_yolox_x_crowdhuman_mot17-private-half"
Expand All @@ -21,7 +21,7 @@ model_label="latest"
version=$(date +%s)
finetuned_mmtracking_model_name="$mmtracking_model_name-mot17-tiny"
mmtracking_endpoint_name="mmt-mot-mot17-tiny-$version"
deployment_sku="Standard_NC6s_v3"
deployment_sku="STANDARD_NC4AS_T4_V3"

# Scoring file
mmtracking_sample_request_data="./sample_request_data.json"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
name: demo
instance_type: Standard_NC6s_v3
instance_type: STANDARD_NC4AS_T4_V3
instance_count: 1
liveness_probe:
initial_delay: 180
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ endpoint_name="image-text-to-image-$version"
deployment_name="image-text-to-image-batch-deploy"

deployment_compute="gpu-cluster"
compute_sku="Standard_NC6s_v3"
compute_sku="STANDARD_NC4AS_T4_V3"

# 1. Setup pre-requisites
if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ version=$(date +%s)
endpoint_name="image-text-to-image-$version"
deployment_name="image-text-to-image-deploy"

deployment_sku="Standard_NC6s_v3"
deployment_sku="STANDARD_NC4AS_T4_V3"

# sample_request_data
sample_request_data="inpainting_data/sample_request_data.json"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ endpoint_name="text-to-image-$version"
deployment_name="stablediffusion-demo"

deployment_compute="gpu-cluster"
compute_sku="Standard_NC6s_v3"
compute_sku="STANDARD_NC4AS_T4_V3"

# 1. Setup pre-requisites
if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ endpoint_name="text-to-image-$version"
deployment_name="inpainting-batch-deploy"

deployment_compute="gpu-cluster"
compute_sku="Standard_NC6s_v3"
compute_sku="STANDARD_NC4AS_T4_V3"

# 1. Setup pre-requisites
if [ "$subscription_id" = "<SUBSCRIPTION_ID>" ] || \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ version=$(date +%s)
endpoint_name="inpainting-$version"
deployment_name="inpainting-deploy"

deployment_sku="Standard_NC6s_v3"
deployment_sku="STANDARD_NC4AS_T4_V3"

# sample_request_data
sample_request_data="inpainting_data/sample_request_data.json"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ version=$(date +%s)
endpoint_name="text-to-image-$version"

# Todo: fetch deployment_sku from the min_inference_sku tag of the model
deployment_sku="Standard_NC6s_v3"
deployment_sku="STANDARD_NC4AS_T4_V3"

# sample_request_data
sample_request_data="./sample_request_data.json"
Expand Down Expand Up @@ -51,7 +51,7 @@ az ml online-endpoint create --name $endpoint_name $workspace_info || {
max_concurrent_request=2 # the maximum number of concurrent requests supported by the endpoint

# Note: We have set the value of `max_concurrent_request` to 2,
# as we are utilizing the `Standard_NC6s_v3` SKU for deployment, which has one GPU.
# as we are utilizing the `STANDARD_NC4AS_T4_V3` SKU for deployment, which has one GPU.
# If you are using a larger SKU, please increase this value to get the maximum performance.
# For model `stabilityai-stable-diffusion-xl-base-1-0`, set the value of `MAX_CONCURRENT_REQUESTS` to 1

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
name: demo
instance_type: Standard_NC6s_V3
instance_type: STANDARD_NC4AS_T4_V3
instance_count: 1
liveness_probe:
initial_delay: 180
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ version=$(date +%s)
endpoint_name="video-mot-$version"

# todo: fetch deployment_sku from the min_inference_sku tag of the model
deployment_sku="Standard_NC6s_V3"
deployment_sku="STANDARD_NC4AS_T4_V3"

# Prepare data for deployment
python ./prepare_data.py
Expand Down
6 changes: 3 additions & 3 deletions infra/bootstrapping/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,10 @@ if [[ ! -z "${RUN_BOOTSTRAP:-}" ]]; then
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "cpu-cluster-lg" 0 4 "Standard_DS15_v2"

echo_title "Ensuring GPU compute"
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "gpu-cluster" 0 20 "STANDARD_NC6s_v3"
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "automl-gpu-cluster" 0 4 "STANDARD_NC6s_v3"
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "gpu-cluster" 0 20 "STANDARD_NC4AS_T4_V3"
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "automl-gpu-cluster" 0 4 "STANDARD_NC4AS_T4_V3"
# v100 single GPU cluster for pytorch 2.0 based notebooks
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "gpu-v100-1GPU-cluster" 0 4 "Standard_NC6s_v3"
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "gpu-v100-1GPU-cluster" 0 4 "STANDARD_NC4AS_T4_V3"
# v100 GPU cluster for deepspeed cli examples
"$SCRIPT_DIR"/sdk_helpers.sh ensure_aml_compute "gpu-v100-cluster" 0 2 "Standard_ND40rs_v2"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@
" endpoint_name=endpoint_name,\n",
" environment=enviroment,\n",
" model=model,\n",
" instance_type=\"Standard_NC6s_v3\",\n",
" instance_type=\"STANDARD_NC4AS_T4_V3\",\n",
" instance_count=1,\n",
" model_mount_path=\"/models\",\n",
")"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@
" name=\"blue\",\n",
" endpoint_name=endpoint_name,\n",
" model=Model(path=\"./models\", type=\"triton_model\"),\n",
" instance_type=\"Standard_NC6s_v3\",\n",
" instance_type=\"STANDARD_NC4AS_T4_V3\",\n",
" instance_count=1,\n",
")"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@
" name=deployment_name,\n",
" endpoint_name=endpoint_name,\n",
" model=model,\n",
" instance_type=\"Standard_NC6s_v3\", # Use a GPU instance type like Standard_NC6s_v3 for fast inference\n",
" instance_type=\"STANDARD_NC4AS_T4_V3\", # Use a GPU instance type like STANDARD_NC4AS_T4_V3 for fast inference\n",
" instance_count=1,\n",
" request_settings=OnlineRequestSettings(request_timeout_ms=90000),\n",
" app_insights_enabled=True,\n",
Expand Down Expand Up @@ -181,7 +181,7 @@
{
"data": {
"text/plain": [
"ManagedOnlineDeployment({'private_network_connection': None, 'package_model': False, 'provisioning_state': 'Succeeded', 'endpoint_name': 'medimageinsight-u2g5q', 'type': 'Managed', 'name': 'medimageinsight-v1', 'description': None, 'tags': {}, 'properties': {'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/6c180dd2-1ec4-4fad-8ba8-1f2d8d67c129/providers/Microsoft.MachineLearningServices/locations/westus2/mfeOperationsStatus/odidp:681e8849-345b-4da8-b1b6-8697ba1ef038:d510a9dd-8938-4983-9e3d-e7abe2addacb?api-version=2023-04-01-preview'}, 'print_as_yaml': False, 'id': '/subscriptions/6c180dd2-1ec4-4fad-8ba8-1f2d8d67c129/resourceGroups/fmmg-mars-collab/providers/Microsoft.MachineLearningServices/workspaces/fmmg-mars-collab/onlineEndpoints/medimageinsight-u2g5q/deployments/medimageinsight-v1', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/jmerkow-cpu4/code/Users/jmerkow/healthcareai-azureml-examples/sdk/python/foundation-models/healthcare-ai/medimageinsight', 'creation_context': <azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.SystemData object at 0x7f6b140a3550>, 'serialize': <msrest.serialization.Serializer object at 0x7f6b140a37c0>, 'model': 'azureml://registries/azureml-staging/models/MedImageInsight/versions/2', 'code_configuration': None, 'environment': '/subscriptions/6c180dd2-1ec4-4fad-8ba8-1f2d8d67c129/resourceGroups/fmmg-mars-collab/providers/Microsoft.MachineLearningServices/workspaces/fmmg-mars-collab/environments/DefaultNcdEnv-mlflow-ubuntu20-04-py38-cpu-inference/versions/20240805v1', 'environment_variables': {'MLFLOW_MODEL_FOLDER': 'mlflow_model_folder', 'AZUREML_EXTRA_CONDA_YAML_ABS_PATH': '/var/azureml-app/azureml-models/MedImageInsight/2/mlflow_model_folder/conda.yaml', 'AML_APP_INSIGHTS_KEY': 'befe0234-c1a1-46b0-920c-09d6464016f9', 'AML_APP_INSIGHTS_ENDPOINT': 'https://dc.services.visualstudio.com/v2/track', 'AML_APP_INSIGHTS_ENABLED': 'true', 'AZUREML_MODEL_DIR': '/var/azureml-app/azureml-models/MedImageInsight/2'}, 'app_insights_enabled': True, 'scale_settings': <azure.ai.ml.entities._deployment.scale_settings.DefaultScaleSettings object at 0x7f6b140a28f0>, 'request_settings': <azure.ai.ml.entities._deployment.deployment_settings.OnlineRequestSettings object at 0x7f6b140a0d30>, 'liveness_probe': <azure.ai.ml.entities._deployment.deployment_settings.ProbeSettings object at 0x7f6b140a2410>, 'readiness_probe': <azure.ai.ml.entities._deployment.deployment_settings.ProbeSettings object at 0x7f6b140a1c00>, 'instance_count': 1, 'arm_type': 'online_deployment', 'model_mount_path': None, 'instance_type': 'Standard_NC6s_v3', 'data_collector': None, 'egress_public_network_access': 'Enabled'})"
"ManagedOnlineDeployment({'private_network_connection': None, 'package_model': False, 'provisioning_state': 'Succeeded', 'endpoint_name': 'medimageinsight-u2g5q', 'type': 'Managed', 'name': 'medimageinsight-v1', 'description': None, 'tags': {}, 'properties': {'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/6c180dd2-1ec4-4fad-8ba8-1f2d8d67c129/providers/Microsoft.MachineLearningServices/locations/westus2/mfeOperationsStatus/odidp:681e8849-345b-4da8-b1b6-8697ba1ef038:d510a9dd-8938-4983-9e3d-e7abe2addacb?api-version=2023-04-01-preview'}, 'print_as_yaml': False, 'id': '/subscriptions/6c180dd2-1ec4-4fad-8ba8-1f2d8d67c129/resourceGroups/fmmg-mars-collab/providers/Microsoft.MachineLearningServices/workspaces/fmmg-mars-collab/onlineEndpoints/medimageinsight-u2g5q/deployments/medimageinsight-v1', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/jmerkow-cpu4/code/Users/jmerkow/healthcareai-azureml-examples/sdk/python/foundation-models/healthcare-ai/medimageinsight', 'creation_context': <azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.SystemData object at 0x7f6b140a3550>, 'serialize': <msrest.serialization.Serializer object at 0x7f6b140a37c0>, 'model': 'azureml://registries/azureml-staging/models/MedImageInsight/versions/2', 'code_configuration': None, 'environment': '/subscriptions/6c180dd2-1ec4-4fad-8ba8-1f2d8d67c129/resourceGroups/fmmg-mars-collab/providers/Microsoft.MachineLearningServices/workspaces/fmmg-mars-collab/environments/DefaultNcdEnv-mlflow-ubuntu20-04-py38-cpu-inference/versions/20240805v1', 'environment_variables': {'MLFLOW_MODEL_FOLDER': 'mlflow_model_folder', 'AZUREML_EXTRA_CONDA_YAML_ABS_PATH': '/var/azureml-app/azureml-models/MedImageInsight/2/mlflow_model_folder/conda.yaml', 'AML_APP_INSIGHTS_KEY': 'befe0234-c1a1-46b0-920c-09d6464016f9', 'AML_APP_INSIGHTS_ENDPOINT': 'https://dc.services.visualstudio.com/v2/track', 'AML_APP_INSIGHTS_ENABLED': 'true', 'AZUREML_MODEL_DIR': '/var/azureml-app/azureml-models/MedImageInsight/2'}, 'app_insights_enabled': True, 'scale_settings': <azure.ai.ml.entities._deployment.scale_settings.DefaultScaleSettings object at 0x7f6b140a28f0>, 'request_settings': <azure.ai.ml.entities._deployment.deployment_settings.OnlineRequestSettings object at 0x7f6b140a0d30>, 'liveness_probe': <azure.ai.ml.entities._deployment.deployment_settings.ProbeSettings object at 0x7f6b140a2410>, 'readiness_probe': <azure.ai.ml.entities._deployment.deployment_settings.ProbeSettings object at 0x7f6b140a1c00>, 'instance_count': 1, 'arm_type': 'online_deployment', 'model_mount_path': None, 'instance_type': 'STANDARD_NC4AS_T4_V3', 'data_collector': None, 'egress_public_network_access': 'Enabled'})"
]
},
"execution_count": 17,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -136,7 +136,7 @@
" name=deployment_name,\n",
" endpoint_name=endpoint_name,\n",
" model=model,\n",
" instance_type=\"Standard_NC6s_v3\",\n",
" instance_type=\"Standard_NC40ads_H100_v5\",\n",
" instance_count=1,\n",
" request_settings=OnlineRequestSettings(request_timeout_ms=90000),\n",
" app_insights_enabled=True,\n",
Expand Down
Loading
Loading