File tree Expand file tree Collapse file tree 1 file changed +11
-1
lines changed
examples/llm_qat/notebooks Expand file tree Collapse file tree 1 file changed +11
-1
lines changed Original file line number Diff line number Diff line change 608
608
"metadata" : {},
609
609
"outputs" : [],
610
610
"source" : [
611
+ " %%sh # [run in command line outside notebook]\n " ,
612
+ " \n " ,
611
613
" docker run --rm --ipc=host -it \\\n " ,
612
614
" --ulimit stack=67108864 --ulimit memlock=-1 \\\n " ,
613
615
" --gpus all -p 8000:8000 -e TRTLLM_ENABLE_PDL=1 \\\n " ,
640
642
"metadata" : {},
641
643
"outputs" : [],
642
644
"source" : [
645
+ " %%sh # [run in TensorRT-LLM container]\n " ,
646
+ " \n " ,
643
647
" git clone https://github.com/NVIDIA/TensorRT-Model-Optimizer.git"
644
648
]
645
649
},
658
662
"metadata" : {},
659
663
"outputs" : [],
660
664
"source" : [
665
+ " %%sh # [run in TensorRT-LLM container]\n " ,
666
+ " \n " ,
661
667
" cd TensorRT-Model-Optimizer/\n " ,
662
668
" pip install -e ."
663
669
]
679
685
},
680
686
"outputs" : [],
681
687
"source" : [
688
+ " %%sh # [run in TensorRT-LLM container]\n " ,
689
+ " \n " ,
682
690
" # set export path for converted checkpoints. The script saves the converted checkpoint in ${ROOT_SAVE_PATH}/saved_models_${MODEL_FULL_NAME}\n " ,
683
691
" export ROOT_SAVE_PATH=/app/tensorrt_llm\n " ,
684
692
" \n " ,
710
718
"metadata" : {},
711
719
"outputs" : [],
712
720
"source" : [
721
+ " %%sh # [run in TensorRT-LLM container]\n " ,
722
+ " \n " ,
713
723
" trtllm-serve /app/tensorrt_llm/saved_models_checkpoint-450_nvfp4_hf/ \\\n " ,
714
724
" --max_batch_size 1 --max_num_tokens 1024 \\\n " ,
715
725
" --max_seq_len 4096 --tp_size 8 --pp_size 1 \\\n " ,
803
813
},
804
814
{
805
815
"cell_type" : " code" ,
806
- "execution_count" : 13 ,
816
+ "execution_count" : null ,
807
817
"id" : " fb78741b-30cb-46f2-a292-c5192cbca9ed" ,
808
818
"metadata" : {},
809
819
"outputs" : [
You can’t perform that action at this time.
0 commit comments