@@ -36,7 +36,7 @@ MODEL_NAME="gpt2_tensorrt_llm"
3636NAME=" tensorrt_llm_benchmarking_test"
3737MODEL_REPOSITORY=" $( pwd) /triton_model_repo"
3838TENSORRTLLM_BACKEND_DIR=" /workspace/tensorrtllm_backend"
39- GPT_DIR=" $TENSORRTLLM_BACKEND_DIR /tensorrt_llm/examples/gpt"
39+ GPT_DIR=" $TENSORRTLLM_BACKEND_DIR /tensorrt_llm/examples/models/core/ gpt"
4040TOKENIZER_DIR=" $GPT_DIR /gpt2"
4141ENGINES_DIR=" ${BASE_DIR} /engines/inflight_batcher_llm/${NUM_GPUS} -gpu"
4242TRITON_DIR=${TRITON_DIR:= " /opt/tritonserver" }
@@ -48,9 +48,52 @@ CLIENT_PY=${BASE_DIR}/orca_http_test.py
4848CLIENT_LOG=" ${NAME} _orca_http_test.log"
4949source ../common/util.sh
5050
51+ function clone_tensorrt_llm_backend_repo {
52+ rm -rf $TENSORRTLLM_BACKEND_DIR && mkdir $TENSORRTLLM_BACKEND_DIR
53+ apt-get update && apt-get install git-lfs -y --no-install-recommends
54+ git clone --single-branch --depth=1 -b ${TENSORRTLLM_BACKEND_REPO_TAG} ${TRITON_REPO_ORG} /tensorrtllm_backend.git $TENSORRTLLM_BACKEND_DIR
55+ cd $TENSORRTLLM_BACKEND_DIR && git lfs install && git submodule update --init --recursive
56+ }
57+
58+ function build_gpt2_base_model {
59+ # Download weights from HuggingFace Transformers
60+ cd ${GPT_DIR} && rm -rf gpt2 && git clone https://huggingface.co/gpt2-medium gpt2 && cd gpt2
61+ rm pytorch_model.bin model.safetensors
62+ if ! wget -q https://huggingface.co/gpt2-medium/resolve/main/pytorch_model.bin; then
63+ echo " Downloading pytorch_model.bin failed."
64+ exit 1
65+ fi
66+ cd ${GPT_DIR}
67+
68+ # Convert weights from HF Tranformers to FT format
69+ python3 convert_checkpoint.py --model_dir gpt2 --dtype float16 --tp_size ${NUM_GPUS} --output_dir " ./c-model/gpt2/${NUM_GPUS} -gpu/"
70+ cd ${BASE_DIR}
71+ }
72+
73+ function build_gpt2_tensorrt_engine {
74+ # Build TensorRT engines
75+ cd ${GPT_DIR}
76+ trtllm-build --checkpoint_dir " ./c-model/gpt2/${NUM_GPUS} -gpu/" \
77+ --gpt_attention_plugin float16 \
78+ --remove_input_padding enable \
79+ --paged_kv_cache enable \
80+ --gemm_plugin float16 \
81+ --workers " ${NUM_GPUS} " \
82+ --output_dir " ${ENGINES_DIR} "
83+
84+ cd ${BASE_DIR}
85+ }
86+
87+ function replace_config_tags {
88+ tag_to_replace=" ${1} "
89+ new_value=" ${2} "
90+ config_file_path=" ${3} "
91+ sed -i " s|${tag_to_replace} |${new_value} |g" ${config_file_path}
92+ }
93+
5194function prepare_model_repository {
5295 rm -rf ${MODEL_REPOSITORY} && mkdir ${MODEL_REPOSITORY}
53- cp -r ${TENSORRTLLM_BACKEND_DIR} /all_models/inflight_batcher_llm/* ${MODEL_REPOSITORY}
96+ cp -r ${TENSORRTLLM_BACKEND_DIR} /tensorrt_llm/triton_backend/ all_models/inflight_batcher_llm/* ${MODEL_REPOSITORY}
5497 rm -rf ${MODEL_REPOSITORY} /tensorrt_llm_bls
5598 mv " ${MODEL_REPOSITORY} /ensemble" " ${MODEL_REPOSITORY} /${MODEL_NAME} "
5699
@@ -113,7 +156,7 @@ function wait_for_server_ready() {
113156}
114157
115158function run_server {
116- python3 ${TENSORRTLLM_BACKEND_DIR} /scripts/launch_triton_server.py --world_size=" ${NUM_GPUS} " --model_repo=" ${MODEL_REPOSITORY} " > ${SERVER_LOG} 2>&1 &
159+ python3 ${TENSORRTLLM_BACKEND_DIR} /tensorrt_llm/triton_backend/ scripts/launch_triton_server.py --world_size=" ${NUM_GPUS} " --model_repo=" ${MODEL_REPOSITORY} " > ${SERVER_LOG} 2>&1 &
117160 sleep 2 # allow time to obtain the pid(s)
118161 # Read PIDs into an array, trimming whitespaces
119162 readarray -t SERVER_PID < <( pgrep " tritonserver" )
0 commit comments