|
145 | 145 | }, |
146 | 146 | { |
147 | 147 | "cell_type": "code", |
148 | | - "execution_count": 3, |
| 148 | + "execution_count": null, |
149 | 149 | "id": "b6af94af-1de6-4cb1-959b-98fb3f4e1932", |
150 | 150 | "metadata": {}, |
151 | 151 | "outputs": [], |
152 | 152 | "source": [ |
153 | | - "from transformers import AutoConfig\n", |
154 | 153 | "from trl import ModelConfig\n", |
155 | 154 | "\n", |
156 | 155 | "model_args = ModelConfig(\n", |
|
608 | 607 | "metadata": {}, |
609 | 608 | "outputs": [], |
610 | 609 | "source": [ |
| 610 | + "%%sh # [run in command line outside notebook]\n", |
| 611 | + "\n", |
611 | 612 | "docker run --rm --ipc=host -it \\\n", |
612 | 613 | " --ulimit stack=67108864 --ulimit memlock=-1 \\\n", |
613 | 614 | " --gpus all -p 8000:8000 -e TRTLLM_ENABLE_PDL=1 \\\n", |
|
640 | 641 | "metadata": {}, |
641 | 642 | "outputs": [], |
642 | 643 | "source": [ |
| 644 | + "%%sh # [run in TensorRT-LLM container]\n", |
| 645 | + "\n", |
643 | 646 | "git clone https://github.com/NVIDIA/TensorRT-Model-Optimizer.git" |
644 | 647 | ] |
645 | 648 | }, |
|
658 | 661 | "metadata": {}, |
659 | 662 | "outputs": [], |
660 | 663 | "source": [ |
| 664 | + "%%sh # [run in TensorRT-LLM container]\n", |
| 665 | + "\n", |
661 | 666 | "cd TensorRT-Model-Optimizer/\n", |
662 | 667 | "pip install -e ." |
663 | 668 | ] |
|
679 | 684 | }, |
680 | 685 | "outputs": [], |
681 | 686 | "source": [ |
| 687 | + "%%sh # [run in TensorRT-LLM container]\n", |
| 688 | + "\n", |
682 | 689 | "# set export path for converted checkpoints. The script saves the converted checkpoint in ${ROOT_SAVE_PATH}/saved_models_${MODEL_FULL_NAME}\n", |
683 | 690 | "export ROOT_SAVE_PATH=/app/tensorrt_llm\n", |
684 | 691 | "\n", |
|
710 | 717 | "metadata": {}, |
711 | 718 | "outputs": [], |
712 | 719 | "source": [ |
| 720 | + "%%sh # [run in TensorRT-LLM container]\n", |
| 721 | + "\n", |
713 | 722 | "trtllm-serve /app/tensorrt_llm/saved_models_checkpoint-450_nvfp4_hf/ \\\n", |
714 | 723 | " --max_batch_size 1 --max_num_tokens 1024 \\\n", |
715 | 724 | " --max_seq_len 4096 --tp_size 8 --pp_size 1 \\\n", |
|
803 | 812 | }, |
804 | 813 | { |
805 | 814 | "cell_type": "code", |
806 | | - "execution_count": 13, |
| 815 | + "execution_count": null, |
807 | 816 | "id": "fb78741b-30cb-46f2-a292-c5192cbca9ed", |
808 | 817 | "metadata": {}, |
809 | 818 | "outputs": [ |
|
0 commit comments