aws
diff --git a/‎.github/workflows/codebuild-ci.yml‎
Lines changed: 1 addition & 2 deletions b/‎.github/workflows/codebuild-ci.yml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 6 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 8 additions & 22 deletions b/‎README.md‎
Lines changed: 8 additions & 22 deletions
diff --git a/‎doc/cli_training.md‎
Lines changed: 1 addition & 1 deletion b/‎doc/cli_training.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/inference.md‎
Lines changed: 3 additions & 7 deletions b/‎doc/inference.md‎
Lines changed: 3 additions & 7 deletions
diff --git a/‎examples/inference/SDK/inference-jumpstart-e2e.ipynb‎
Lines changed: 2 additions & 5 deletions b/‎examples/inference/SDK/inference-jumpstart-e2e.ipynb‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎helm_chart/HyperPodHelmChart/Chart.yaml‎
Lines changed: 4 additions & 0 deletions b/‎helm_chart/HyperPodHelmChart/Chart.yaml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎helm_chart/HyperPodHelmChart/charts/health-monitoring-agent/templates/_helpers.tpl‎
Lines changed: 1 addition & 1 deletion b/‎helm_chart/HyperPodHelmChart/charts/health-monitoring-agent/templates/_helpers.tpl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎helm_chart/HyperPodHelmChart/charts/health-monitoring-agent/templates/health-monitoring-agent.yaml‎
Lines changed: 1 addition & 0 deletions b/‎helm_chart/HyperPodHelmChart/charts/health-monitoring-agent/templates/health-monitoring-agent.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎helm_chart/HyperPodHelmChart/charts/health-monitoring-agent/values.yaml‎
Lines changed: 1 addition & 1 deletion b/‎helm_chart/HyperPodHelmChart/charts/health-monitoring-agent/values.yaml‎
Lines changed: 1 addition & 1 deletion
@@ -2,8 +2,7 @@ name: PR Checks
 on:
   pull_request_target:
       branches:
-          - "master*"
-          - "main*"
+          - "*"
 
 concurrency:
     group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.head_ref }}
 
@@ -1,5 +1,11 @@
 # Changelog
 
+## v3.1.0 (2025-08-13)
+
+### Features
+
+ * Task Governance feature for training jobs.
+
 ## v3.0.2 (2025-07-31)
 
 ### Features
 
@@ -54,24 +54,13 @@ SageMaker HyperPod CLI currently supports start training job with:
 
 1. Make sure that your local python version is 3.8, 3.9, 3.10 or 3.11.
 
-1. Install ```helm```.
-
-    The SageMaker Hyperpod CLI uses Helm to start training jobs. See also the [Helm installation guide](https://helm.sh/docs/intro/install/).
-
-    ```
-    curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
-    chmod 700 get_helm.sh
-    ./get_helm.sh
-    rm -f ./get_helm.sh  
-    ```
-
-1. Clone and install the sagemaker-hyperpod-cli package.
+2. Install the sagemaker-hyperpod-cli package.
 
     ```
     pip install sagemaker-hyperpod
     ```
 
-1. Verify if the installation succeeded by running the following command.
+3. Verify if the installation succeeded by running the following command.
 
     ```
     hyp --help
@@ -171,7 +160,7 @@ hyp create hyp-pytorch-job \
     --priority "high" \
     --max-retry 3 \
     --volume name=model-data,type=hostPath,mount_path=/data,path=/data \
-    --volume name=training-output,type=pvc,mount_path=/data,claim_name=my-pvc,read_only=false
+    --volume name=training-output,type=pvc,mount_path=/data2,claim_name=my-pvc,read_only=false
 ```
 
 Key required parameters explained:
@@ -192,7 +181,6 @@ hyp create hyp-jumpstart-endpoint \
     --model-id jumpstart-model-id\
     --instance-type ml.g5.8xlarge \
     --endpoint-name endpoint-jumpstart \
-    --tls-output-s3-uri s3://sample-bucket
 ```
 
 
@@ -208,7 +196,7 @@ hyp invoke hyp-jumpstart-endpoint \
 
 ```
 hyp list hyp-jumpstart-endpoint
-hyp get hyp-jumpstart-endpoint --name endpoint-jumpstart
+hyp describe hyp-jumpstart-endpoint --name endpoint-jumpstart
 ```
 
 #### Creating a Custom Inference Endpoint 
@@ -219,7 +207,8 @@ hyp create hyp-custom-endpoint \
     --endpoint-name my-custom-endpoint \
     --model-name my-pytorch-model \
     --model-source-type s3 \
-    --model-location my-pytorch-training/model.tar.gz \
+    --model-location my-pytorch-training \
+    --model-volume-mount-name test-volume \
     --s3-bucket-name your-bucket \
     --s3-region us-east-1 \
     --instance-type ml.g5.8xlarge \
@@ -333,20 +322,17 @@ from sagemaker.hyperpod.inference.config.hp_jumpstart_endpoint_config import Mod
 from sagemaker.hyperpod.inference.hp_jumpstart_endpoint import HPJumpStartEndpoint
 
 model=Model(
-    model_id='deepseek-llm-r1-distill-qwen-1-5b',
-    model_version='2.0.4',
+    model_id='deepseek-llm-r1-distill-qwen-1-5b'
 )
 server=Server(
     instance_type='ml.g5.8xlarge',
 )
 endpoint_name=SageMakerEndpoint(name='<my-endpoint-name>')
-tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket>')
 
 js_endpoint=HPJumpStartEndpoint(
     model=model,
     server=server,
-    sage_maker_endpoint=endpoint_name,
-    tls_config=tls_config,
+    sage_maker_endpoint=endpoint_name
 )
 
 js_endpoint.create()
 
@@ -40,7 +40,7 @@ hyp create hyp-pytorch-job [OPTIONS]
 - `--tasks-per-node INTEGER`: Number of tasks per node (minimum: 1)
 - `--label-selector OBJECT`: Node label selector as key-value pairs
 - `--deep-health-check-passed-nodes-only BOOLEAN`: Schedule pods only on nodes that passed deep health check (default: false)
-- `--scheduler-type TEXT`: Scheduler type
+- `--scheduler-type TEXT`: If specified, training job pod will be dispatched by specified scheduler. If not specified, the pod will be dispatched by default scheduler.
 - `--queue-name TEXT`: Queue name for job scheduling (1-63 characters, alphanumeric with hyphens)
 - `--priority TEXT`: Priority class for job scheduling
 - `--max-retry INTEGER`: Maximum number of job retries (minimum: 0)
 
@@ -37,8 +37,7 @@ from sagemaker.hyperpod.inference.config.hp_jumpstart_endpoint_config import Mod
 from sagemaker.hyperpod.inference.hp_jumpstart_endpoint import HPJumpStartEndpoint
 
 model = Model(
-    model_id="deepseek-llm-r1-distill-qwen-1-5b",
-    model_version="2.0.4"
+    model_id="deepseek-llm-r1-distill-qwen-1-5b"
 )
 
 server = Server(
@@ -47,13 +46,10 @@ server = Server(
 
 endpoint_name = SageMakerEndpoint(name="endpoint-jumpstart")
 
-tls_config = TlsConfig(tls_certificate_output_s3_uri="s3://sample-bucket")
-
 js_endpoint = HPJumpStartEndpoint(
     model=model,
     server=server,
-    sage_maker_endpoint=endpoint_name,
-    tls_config=tls_config
+    sage_maker_endpoint=endpoint_name
 )
 
 js_endpoint.create()
@@ -85,7 +81,7 @@ from sagemaker.hyperpod.inference.hp_endpoint import HPEndpoint
 
 model = Model(
     model_source_type="s3",
-    model_location="test-pytorch-job/model.tar.gz",
+    model_location="test-pytorch-job",
     s3_bucket_name="my-bucket",
     s3_region="us-east-2",
     prefetch_enabled=True
 
@@ -107,21 +107,18 @@
    "source": [
     "# create configs\n",
     "model=Model(\n",
-    "    model_id='deepseek-llm-r1-distill-qwen-1-5b',\n",
-    "    model_version='2.0.4',\n",
+    "    model_id='deepseek-llm-r1-distill-qwen-1-5b'\n",
     ")\n",
     "server=Server(\n",
     "    instance_type='ml.g5.8xlarge',\n",
     ")\n",
     "endpoint_name=SageMakerEndpoint(name='<my-endpoint-name>')\n",
-    "tls_config=TlsConfig(tls_certificate_output_s3_uri='s3://<my-tls-bucket>')\n",
     "\n",
     "# create spec\n",
     "js_endpoint=HPJumpStartEndpoint(\n",
     "    model=model,\n",
     "    server=server,\n",
-    "    sage_maker_endpoint=endpoint_name,\n",
-    "    tls_config=tls_config,\n",
+    "    sage_maker_endpoint=endpoint_name\n",
     ")"
    ]
   },
 
@@ -24,6 +24,10 @@ version: 0.1.0
 appVersion: "1.16.0"
 
 dependencies:
+  - name: cert-manager
+    version: "v1.18.2"
+    repository: oci://quay.io/jetstack/charts
+    condition: cert-manager.enabled
   - name: training-operators
     version: "0.1.0"
     repository: "file://charts/training-operators"
 
@@ -55,7 +55,7 @@ Generate the health monitoring agent image URI based on AWS region
 */}}
 {{- define "health-monitoring-agent.imageUri" -}}
 {{- $region := "" -}}
-{{- $imageTag := .Values.imageTag | default "1.0.674.0_1.0.199.0" -}}
+{{- $imageTag := .Values.imageTag | default "1.0.742.0_1.0.241.0" -}}
 
 {{/* Debug: Show image tag selection if debug is enabled */}}
 {{- if .Values.debug -}}
 
@@ -111,6 +111,7 @@ spec:
                       - ml.g6e.48xlarge
                       - ml.trn2.48xlarge
                       - ml.p6-b200.48xlarge
+                      - ml.p6e-gb200.36xlarge
       containers:
         - name: health-monitoring-agent
           args:
 
@@ -25,7 +25,7 @@ imageTag: ""
 
 # Override the health monitoring agent image URI
 # If specified, this will override the automatic region-based URI selection
-# Example: "905418368575.dkr.ecr.us-west-2.amazonaws.com/hyperpod-health-monitoring-agent:1.0.674.0_1.0.199.0"
+# Example: "905418368575.dkr.ecr.us-west-2.amazonaws.com/hyperpod-health-monitoring-agent:1.0.742.0_1.0.241.0"
 hmaimage: ""
 
 # Enable debug output for region selection process