@@ -36,7 +36,7 @@ MODEL_NAME="gpt2_tensorrt_llm"
36
36
NAME=" tensorrt_llm_benchmarking_test"
37
37
MODEL_REPOSITORY=" $( pwd) /triton_model_repo"
38
38
TENSORRTLLM_BACKEND_DIR=" /workspace/tensorrtllm_backend"
39
- GPT_DIR=" $TENSORRTLLM_BACKEND_DIR /tensorrt_llm/examples/gpt"
39
+ GPT_DIR=" $TENSORRTLLM_BACKEND_DIR /tensorrt_llm/examples/models/core/ gpt"
40
40
TOKENIZER_DIR=" $GPT_DIR /gpt2"
41
41
ENGINES_DIR=" ${BASE_DIR} /engines/inflight_batcher_llm/${NUM_GPUS} -gpu"
42
42
TRITON_DIR=${TRITON_DIR:= " /opt/tritonserver" }
@@ -48,6 +48,13 @@ CLIENT_PY=${BASE_DIR}/orca_http_test.py
48
48
CLIENT_LOG=" ${NAME} _orca_http_test.log"
49
49
source ../common/util.sh
50
50
51
+ function replace_config_tags {
52
+ tag_to_replace=" ${1} "
53
+ new_value=" ${2} "
54
+ config_file_path=" ${3} "
55
+ sed -i " s|${tag_to_replace} |${new_value} |g" ${config_file_path}
56
+ }
57
+
51
58
function prepare_model_repository {
52
59
rm -rf ${MODEL_REPOSITORY} && mkdir ${MODEL_REPOSITORY}
53
60
cp -r ${TENSORRTLLM_BACKEND_DIR} /all_models/inflight_batcher_llm/* ${MODEL_REPOSITORY}
@@ -138,6 +145,42 @@ function kill_server {
138
145
done
139
146
}
140
147
148
+ function clone_tensorrt_llm_backend_repo {
149
+ rm -rf $TENSORRTLLM_BACKEND_DIR && mkdir $TENSORRTLLM_BACKEND_DIR
150
+ apt-get update && apt-get install git-lfs -y --no-install-recommends
151
+ git clone --single-branch --depth=1 -b ${TENSORRTLLM_BACKEND_REPO_TAG} ${TRITON_REPO_ORG} /tensorrtllm_backend.git $TENSORRTLLM_BACKEND_DIR
152
+ cd $TENSORRTLLM_BACKEND_DIR && git lfs install && git submodule update --init --recursive
153
+ }
154
+
155
+ function build_gpt2_base_model {
156
+ # Download weights from HuggingFace Transformers
157
+ cd ${GPT_DIR} && rm -rf gpt2 && git clone https://huggingface.co/gpt2-medium gpt2 && cd gpt2
158
+ rm pytorch_model.bin model.safetensors
159
+ if ! wget -q https://huggingface.co/gpt2-medium/resolve/main/pytorch_model.bin; then
160
+ echo " Downloading pytorch_model.bin failed."
161
+ exit 1
162
+ fi
163
+ cd ${GPT_DIR}
164
+
165
+ # Convert weights from HF Tranformers to FT format
166
+ python3 convert_checkpoint.py --model_dir gpt2 --dtype float16 --tp_size ${NUM_GPUS} --output_dir " ./c-model/gpt2/${NUM_GPUS} -gpu/"
167
+ cd ${BASE_DIR}
168
+ }
169
+
170
+ function build_gpt2_tensorrt_engine {
171
+ # Build TensorRT engines
172
+ cd ${GPT_DIR}
173
+ trtllm-build --checkpoint_dir " ./c-model/gpt2/${NUM_GPUS} -gpu/" \
174
+ --gpt_attention_plugin float16 \
175
+ --remove_input_padding enable \
176
+ --paged_kv_cache enable \
177
+ --gemm_plugin float16 \
178
+ --workers " ${NUM_GPUS} " \
179
+ --output_dir " ${ENGINES_DIR} "
180
+
181
+ cd ${BASE_DIR}
182
+ }
183
+
141
184
clone_tensorrt_llm_backend_repo
142
185
build_gpt2_base_model
143
186
build_gpt2_tensorrt_engine
0 commit comments