File tree Expand file tree Collapse file tree 10 files changed +17
-17
lines changed
model_analyzer/config/input Expand file tree Collapse file tree 10 files changed +17
-17
lines changed Original file line number Diff line number Diff line change 1212# See the License for the specific language governing permissions and
1313# limitations under the License.
1414
15- ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.05 -py3
16- ARG TRITONSDK_BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.05 -py3-sdk
15+ ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.06 -py3
16+ ARG TRITONSDK_BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.06 -py3-sdk
1717
18- ARG MODEL_ANALYZER_VERSION=1.41.0dev
19- ARG MODEL_ANALYZER_CONTAINER_VERSION=24.06dev
18+ ARG MODEL_ANALYZER_VERSION=1.41.0
19+ ARG MODEL_ANALYZER_CONTAINER_VERSION=24.06
2020FROM ${TRITONSDK_BASE_IMAGE} as sdk
2121
2222FROM $BASE_IMAGE
Original file line number Diff line number Diff line change 1- 1.41.0dev
1+ 1.41.0
Original file line number Diff line number Diff line change @@ -49,7 +49,7 @@ git pull origin main
4949** 1. Pull the SDK container:**
5050
5151```
52- docker pull nvcr.io/nvidia/tritonserver:24.05 -py3-sdk
52+ docker pull nvcr.io/nvidia/tritonserver:24.06 -py3-sdk
5353```
5454
5555** 2. Run the SDK container**
@@ -59,7 +59,7 @@ docker run -it --gpus 1 \
5959 --shm-size 2G \
6060 -v /var/run/docker.sock:/var/run/docker.sock \
6161 -v $(pwd)/examples/quick-start:$(pwd)/examples/quick-start \
62- --net=host nvcr.io/nvidia/tritonserver:24.05 -py3-sdk
62+ --net=host nvcr.io/nvidia/tritonserver:24.06 -py3-sdk
6363```
6464
6565** Important:** The example above uses a single GPU. If you are running on multiple GPUs, you may need to increase the shared memory size accordingly<br ><br >
Original file line number Diff line number Diff line change @@ -153,7 +153,7 @@ cpu_only_composing_models: <comma-delimited-string-list>
153153[ reload_model_disable: <bool> | default: false]
154154
155155# Triton Docker image tag used when launching using Docker mode
156- [ triton_docker_image: <string> | default: nvcr.io/nvidia/tritonserver:24.05 -py3 ]
156+ [ triton_docker_image: <string> | default: nvcr.io/nvidia/tritonserver:24.06 -py3 ]
157157
158158# Triton Server HTTP endpoint url used by Model Analyzer client"
159159[ triton_http_endpoint: <string> | default: localhost:8000 ]
Original file line number Diff line number Diff line change @@ -55,7 +55,7 @@ mkdir examples/quick-start/ensemble_add_sub/1
5555** 1. Pull the SDK container:**
5656
5757```
58- docker pull nvcr.io/nvidia/tritonserver:24.05 -py3-sdk
58+ docker pull nvcr.io/nvidia/tritonserver:24.06 -py3-sdk
5959```
6060
6161** 2. Run the SDK container**
@@ -65,7 +65,7 @@ docker run -it --gpus 1 \
6565 --shm-size 1G \
6666 -v /var/run/docker.sock:/var/run/docker.sock \
6767 -v $(pwd)/examples/quick-start:$(pwd)/examples/quick-start \
68- --net=host nvcr.io/nvidia/tritonserver:24.05 -py3-sdk
68+ --net=host nvcr.io/nvidia/tritonserver:24.06 -py3-sdk
6969```
7070
7171** Important:** The example above uses a single GPU. If you are running on multiple GPUs, you may need to increase the shared memory size accordingly<br ><br >
Original file line number Diff line number Diff line change @@ -79,7 +79,7 @@ images:
7979
8080 triton:
8181 image: nvcr.io/nvidia/tritonserver
82- tag: 24.05 -py3
82+ tag: 24.06 -py3
8383```
8484
8585The model analyzer executable uses the config file defined in ` helm-chart/templates/config-map.yaml ` . This config can be modified to supply arguments to model analyzer. Only the content under the ` config.yaml ` section of the file should be modified.
Original file line number Diff line number Diff line change @@ -49,7 +49,7 @@ git pull origin main
4949** 1. Pull the SDK container:**
5050
5151```
52- docker pull nvcr.io/nvidia/tritonserver:24.05 -py3-sdk
52+ docker pull nvcr.io/nvidia/tritonserver:24.06 -py3-sdk
5353```
5454
5555** 2. Run the SDK container**
@@ -58,7 +58,7 @@ docker pull nvcr.io/nvidia/tritonserver:24.05-py3-sdk
5858docker run -it --gpus all \
5959 -v /var/run/docker.sock:/var/run/docker.sock \
6060 -v $(pwd)/examples/quick-start:$(pwd)/examples/quick-start \
61- --net=host nvcr.io/nvidia/tritonserver:24.05 -py3-sdk
61+ --net=host nvcr.io/nvidia/tritonserver:24.06 -py3-sdk
6262```
6363
6464## ` Step 3: ` Profile both models concurrently
Original file line number Diff line number Diff line change @@ -49,7 +49,7 @@ git pull origin main
4949** 1. Pull the SDK container:**
5050
5151```
52- docker pull nvcr.io/nvidia/tritonserver:24.05 -py3-sdk
52+ docker pull nvcr.io/nvidia/tritonserver:24.06 -py3-sdk
5353```
5454
5555** 2. Run the SDK container**
@@ -58,7 +58,7 @@ docker pull nvcr.io/nvidia/tritonserver:24.05-py3-sdk
5858docker run -it --gpus all \
5959 -v /var/run/docker.sock:/var/run/docker.sock \
6060 -v $(pwd)/examples/quick-start:$(pwd)/examples/quick-start \
61- --net=host nvcr.io/nvidia/tritonserver:24.05 -py3-sdk
61+ --net=host nvcr.io/nvidia/tritonserver:24.06 -py3-sdk
6262```
6363
6464## ` Step 3: ` Profile the ` add_sub ` model
Original file line number Diff line number Diff line change @@ -41,4 +41,4 @@ images:
4141
4242 triton :
4343 image : nvcr.io/nvidia/tritonserver
44- tag : 24.05 -py3
44+ tag : 24.06 -py3
Original file line number Diff line number Diff line change 6363DEFAULT_REQUEST_RATE_SEARCH_ENABLE = False
6464DEFAULT_CONCURRENCY_SWEEP_DISABLE = False
6565DEFAULT_TRITON_LAUNCH_MODE = "local"
66- DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:24.05 -py3"
66+ DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:24.06 -py3"
6767DEFAULT_TRITON_HTTP_ENDPOINT = "localhost:8000"
6868DEFAULT_TRITON_GRPC_ENDPOINT = "localhost:8001"
6969DEFAULT_TRITON_METRICS_URL = "http://localhost:8002/metrics"
You can’t perform that action at this time.
0 commit comments