|
145 | 145 | },
|
146 | 146 | {
|
147 | 147 | "cell_type": "code",
|
148 |
| - "execution_count": 3, |
| 148 | + "execution_count": null, |
149 | 149 | "id": "b6af94af-1de6-4cb1-959b-98fb3f4e1932",
|
150 | 150 | "metadata": {},
|
151 | 151 | "outputs": [],
|
152 | 152 | "source": [
|
153 |
| - "from transformers import AutoConfig\n", |
154 | 153 | "from trl import ModelConfig\n",
|
155 | 154 | "\n",
|
156 | 155 | "model_args = ModelConfig(\n",
|
|
608 | 607 | "metadata": {},
|
609 | 608 | "outputs": [],
|
610 | 609 | "source": [
|
| 610 | + "%%sh # [run in command line outside notebook]\n", |
| 611 | + "\n", |
611 | 612 | "docker run --rm --ipc=host -it \\\n",
|
612 | 613 | " --ulimit stack=67108864 --ulimit memlock=-1 \\\n",
|
613 | 614 | " --gpus all -p 8000:8000 -e TRTLLM_ENABLE_PDL=1 \\\n",
|
|
640 | 641 | "metadata": {},
|
641 | 642 | "outputs": [],
|
642 | 643 | "source": [
|
| 644 | + "%%sh # [run in TensorRT-LLM container]\n", |
| 645 | + "\n", |
643 | 646 | "git clone https://github.com/NVIDIA/TensorRT-Model-Optimizer.git"
|
644 | 647 | ]
|
645 | 648 | },
|
|
658 | 661 | "metadata": {},
|
659 | 662 | "outputs": [],
|
660 | 663 | "source": [
|
| 664 | + "%%sh # [run in TensorRT-LLM container]\n", |
| 665 | + "\n", |
661 | 666 | "cd TensorRT-Model-Optimizer/\n",
|
662 | 667 | "pip install -e ."
|
663 | 668 | ]
|
|
679 | 684 | },
|
680 | 685 | "outputs": [],
|
681 | 686 | "source": [
|
| 687 | + "%%sh # [run in TensorRT-LLM container]\n", |
| 688 | + "\n", |
682 | 689 | "# set export path for converted checkpoints. The script saves the converted checkpoint in ${ROOT_SAVE_PATH}/saved_models_${MODEL_FULL_NAME}\n",
|
683 | 690 | "export ROOT_SAVE_PATH=/app/tensorrt_llm\n",
|
684 | 691 | "\n",
|
|
710 | 717 | "metadata": {},
|
711 | 718 | "outputs": [],
|
712 | 719 | "source": [
|
| 720 | + "%%sh # [run in TensorRT-LLM container]\n", |
| 721 | + "\n", |
713 | 722 | "trtllm-serve /app/tensorrt_llm/saved_models_checkpoint-450_nvfp4_hf/ \\\n",
|
714 | 723 | " --max_batch_size 1 --max_num_tokens 1024 \\\n",
|
715 | 724 | " --max_seq_len 4096 --tp_size 8 --pp_size 1 \\\n",
|
|
803 | 812 | },
|
804 | 813 | {
|
805 | 814 | "cell_type": "code",
|
806 |
| - "execution_count": 13, |
| 815 | + "execution_count": null, |
807 | 816 | "id": "fb78741b-30cb-46f2-a292-c5192cbca9ed",
|
808 | 817 | "metadata": {},
|
809 | 818 | "outputs": [
|
|
0 commit comments