Skip to content

Commit 1ec94a5

Browse files
author
pintaoz
committed
add cli parameter
1 parent e4f440e commit 1ec94a5

File tree

4 files changed

+10
-9
lines changed

4 files changed

+10
-9
lines changed

doc/cli/cluster_management/cli_cluster_management.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ The `config.yaml` file supports the following parameters:
358358
| `create_s3_endpoint_stack` | BOOLEAN | Create S3 Endpoint stack | true |
359359
| `enable_hp_inference_feature` | BOOLEAN | Enable inference operator | false |
360360
| `stage` | TEXT | Deployment stage ("gamma" or "prod") | "prod" |
361-
| `custom_bucket_name` | TEXT | S3 bucket name for templates | "aws-sagemaker-hyperpod-cluster" |
361+
| `custom_bucket_name` | TEXT | S3 bucket name for templates | "aws-sagemaker-hyperpod-cluster-setup" |
362362
| `create_life_cycle_script_stack` | BOOLEAN | Create Life Cycle Script Stack | true |
363363
| `create_s3_bucket_stack` | BOOLEAN | Create S3 Bucket Stack | true |
364364
| `s3_bucket_name` | TEXT | S3 bucket for cluster lifecycle scripts | "s3-bucket" |

hyperpod-cluster-stack-template/hyperpod_cluster_stack_template/v1_0/model.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class ClusterStackBase(BaseModel):
3535
create_s3_endpoint_stack: Optional[bool] = Field(True, description="Boolean to Create S3 Endpoint stack")
3636
enable_hp_inference_feature: Optional[bool] = Field(False, description="Boolean to enable inference operator in Hyperpod cluster")
3737
stage: Optional[str] = Field("prod", description="Deployment stage used in S3 bucket naming for inference operator. Valid values: \"gamma\", \"prod\"")
38-
custom_bucket_name: str = Field("aws-sagemaker-hyperpod-cluster", description="S3 bucket name for templates")
38+
custom_bucket_name: str = Field("aws-sagemaker-hyperpod-cluster-setup", description="S3 bucket name for templates")
3939
create_life_cycle_script_stack: Optional[bool] = Field(True, description="Boolean to Create Life Cycle Script Stack")
4040
create_s3_bucket_stack: Optional[bool] = Field(True, description="Boolean to Create S3 Bucket Stack")
4141
s3_bucket_name: Optional[str] = Field("s3-bucket", description="The name of the S3 bucket used to store the cluster lifecycle scripts")
@@ -51,7 +51,6 @@ class ClusterStackBase(BaseModel):
5151
file_system_type_version: Optional[float] = Field(2.15, description="File system type version for the FSx file system")
5252
storage_capacity: Optional[int] = Field(1200, description="Storage capacity for the FSx file system in GiB")
5353
fsx_file_system_id: Optional[str] = Field("", description="Existing FSx file system ID")
54-
template_version: Optional[str] = Field("1", description="Version number of cluster creation template")
5554

5655
@field_validator('kubernetes_version', mode='before')
5756
@classmethod
@@ -121,7 +120,7 @@ def to_config(self, region: str = None):
121120

122121
# Set fixed defaults
123122
defaults = {
124-
'custom_bucket_name': 'aws-sagemaker-hyperpod-cluster',
123+
'custom_bucket_name': 'aws-sagemaker-hyperpod-cluster-setup',
125124
'github_raw_url': 'https://raw.githubusercontent.com/aws-samples/awsome-distributed-training/refs/heads/main/1.architectures/7.sagemaker-hyperpod-eks/LifecycleScripts/base-config/on_create.sh',
126125
'helm_repo_url': 'https://github.com/aws/sagemaker-hyperpod-cli.git',
127126
'helm_repo_path': 'helm_chart/HyperPodHelmChart'

src/sagemaker/hyperpod/cli/commands/cluster_stack.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,9 @@ def parse_status_list(ctx, param, value):
5353
@click.argument("config-file", required=True)
5454
@click.argument("stack-name", required=True)
5555
@click.option("--region", help="AWS region")
56+
@click.option("--template-version", help="Version number of cluster creation template")
5657
@click.option("--debug", is_flag=True, help="Enable debug logging")
57-
def create_cluster_stack(config_file, region, debug):
58+
def create_cluster_stack(config_file, region, template_version, debug):
5859
"""Create a new HyperPod cluster stack using the provided configuration.
5960
6061
Creates a CloudFormation stack for a HyperPod cluster using settings from a YAML configuration file.
@@ -66,7 +67,7 @@ def create_cluster_stack(config_file, region, debug):
6667
.. code-block:: bash
6768
6869
# Create cluster stack with config file
69-
hyp create hyp-cluster cluster-config.yaml my-stack-name --region us-west-2
70+
hyp create hyp-cluster cluster-config.yaml my-stack-name --region us-west-2 --template-version 1
7071
7172
# Create with debug logging
7273
hyp create hyp-cluster cluster-config.yaml my-stack-name --debug
@@ -95,7 +96,7 @@ def create_cluster_stack(config_file, region, debug):
9596
config = model_instance.to_config(region=region)
9697

9798
# Create the cluster stack
98-
stack_id = HpClusterStack(**config).create(region)
99+
stack_id = HpClusterStack(**config).create(region, template_version)
99100

100101
logger.info(f"Stack creation initiated successfully with ID: {stack_id}")
101102
logger.info("You can monitor the stack creation in the AWS CloudFormation console.")

src/sagemaker/hyperpod/cluster_management/hp_cluster_stack.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ def get_template() -> str:
6666

6767
@_hyperpod_telemetry_emitter(Feature.HYPERPOD, "create_cluster_stack")
6868
def create(self,
69-
region: Optional[str] = None) -> str:
69+
region: Optional[str] = None,
70+
template_version: Optional[int] = 1) -> str:
7071
"""Creates a new HyperPod cluster CloudFormation stack.
7172
7273
**Parameters:**
@@ -112,7 +113,7 @@ def create(self,
112113
stack_name = f"HyperpodClusterStack-{str(uuid.uuid4())[:5]}"
113114
# Use the fixed bucket name from the model
114115
bucket_name = self.custom_bucket_name
115-
template_key = f"{self.template_version}/main-stack-eks-based-template.yaml"
116+
template_key = f"{template_version}/templates/main-stack-eks-based-template.yaml"
116117

117118
try:
118119
# Use TemplateURL for large templates (>51KB)

0 commit comments

Comments
 (0)