|
35 | 35 | "metadata": {}, |
36 | 36 | "outputs": [], |
37 | 37 | "source": [ |
38 | | - "!hyp set-cluster-context --cluster-name hp-cluster-for-inf-Beta2try1" |
| 38 | + "!hyp set-cluster-context --cluster-name <cluster-name>" |
39 | 39 | ] |
40 | 40 | }, |
41 | 41 | { |
|
47 | 47 | "source": [ |
48 | 48 | "!hyp create hyp-custom-endpoint \\\n", |
49 | 49 | " --version 1.0 \\\n", |
50 | | - " --env \\\n", |
51 | | - " '{ \\\n", |
52 | | - " \"HF_MODEL_ID\": \"/opt/ml/model\", \\\n", |
53 | | - " \"SAGEMAKER_PROGRAM\": \"inference.py\", \\\n", |
54 | | - " \"SAGEMAKER_SUBMIT_DIRECTORY\": \"/opt/ml/model/code\", \\\n", |
55 | | - " \"MODEL_CACHE_ROOT\": \"/opt/ml/model\", \\\n", |
56 | | - " \"SAGEMAKER_ENV\": \"1\" \\\n", |
57 | | - " }' \\\n", |
| 50 | + " --env '{ \"key1\": \"val1\", \"key2\": \"val2\"}' \\\n", |
58 | 51 | " --metric-collection-period 30 \\\n", |
59 | 52 | " --metric-name Invocations \\\n", |
60 | 53 | " --metric-stat Sum \\\n", |
61 | 54 | " --metric-type Average \\\n", |
62 | 55 | " --min-value 0.0 \\\n", |
63 | | - " --cloud-watch-trigger-name SageMaker-Invocations-new \\\n", |
| 56 | + " --cloud-watch-trigger-name SageMaker-Invocations \\\n", |
64 | 57 | " --cloud-watch-trigger-namespace AWS/SageMaker \\\n", |
65 | 58 | " --target-value 10 \\\n", |
66 | 59 | " --use-cached-metrics true \\\n", |
67 | 60 | " --model-source-type s3 \\\n", |
68 | | - " --model-location deepseek15b \\\n", |
69 | | - " --s3-bucket-name test-model-s3-zhaoqi \\\n", |
70 | | - " --s3-region us-east-2 \\\n", |
71 | | - " --image-uri 763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0 \\\n", |
| 61 | + " --model-location <model-location-folder> \\\n", |
| 62 | + " --s3-bucket-name <bucket-name> \\\n", |
| 63 | + " --s3-region <bucket-region> \\\n", |
| 64 | + " --image-uri <image-uri> \\\n", |
72 | 65 | " --model-volume-mount-name model-weights \\\n", |
73 | 66 | " --container-port 8080 \\\n", |
74 | 67 | " --resources-requests '{\"cpu\": \"30000m\", \"nvidia.com/gpu\": 1, \"memory\": \"100Gi\"}' \\\n", |
75 | 68 | " --resources-limits '{\"nvidia.com/gpu\": 1}' \\\n", |
76 | | - " --tls-certificate-output-s3-uri s3://tls-bucket-inf1-beta2 \\\n", |
77 | | - " --instance-type ml.g5.8xlarge \\\n", |
78 | | - " --dimensions '{\"EndpointName\": \"endpoint-s3-test-cli\", \"VariantName\": \"AllTraffic\"}' \\\n", |
| 69 | + " --tls-certificate-output-s3-uri s3://sample-bucket \\\n", |
| 70 | + " --instance-type <instance-type> \\\n", |
| 71 | + " --dimensions '{\"EndpointName\": \"endpoint-s3\", \"VariantName\": \"AllTraffic\"}' \\\n", |
79 | 72 | " --metrics-enabled true \\\n", |
80 | | - " --endpoint-name endpoint-s3-test-cli \\\n", |
81 | | - " --model-name deepseek15b-s3-test-cli" |
| 73 | + " --endpoint-name endpoint-s3 \\\n", |
| 74 | + " --model-name <model-name>" |
82 | 75 | ] |
83 | 76 | }, |
84 | 77 | { |
|
98 | 91 | "metadata": {}, |
99 | 92 | "outputs": [], |
100 | 93 | "source": [ |
101 | | - "!hyp describe hyp-custom-endpoint --name endpoint-s3-test-cli" |
| 94 | + "!hyp describe hyp-custom-endpoint --name endpoint-s3" |
102 | 95 | ] |
103 | 96 | }, |
104 | 97 | { |
|
108 | 101 | "metadata": {}, |
109 | 102 | "outputs": [], |
110 | 103 | "source": [ |
111 | | - "!hyp invoke hyp-custom-endpoint --endpoint-name endpoint-s3-test-cli --body '{\"inputs\":\"What is the capital of USA?\"}'" |
| 104 | + "!hyp invoke hyp-custom-endpoint --endpoint-name endpoint-s3 --body '{\"inputs\":\"What is the capital of USA?\"}'" |
112 | 105 | ] |
113 | 106 | }, |
114 | 107 | { |
|
118 | 111 | "metadata": {}, |
119 | 112 | "outputs": [], |
120 | 113 | "source": [ |
121 | | - "!hyp delete hyp-custom-endpoint --name endpoint-s3-test-cli" |
| 114 | + "!hyp delete hyp-custom-endpoint --name endpoint-s3" |
122 | 115 | ] |
123 | 116 | }, |
124 | 117 | { |
|
0 commit comments