TensorRT-LLM/.pre-commit-config.yaml at main · zhengd-nv/TensorRT-LLM · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Files to be formatted by isort, yapf, and autoflake. Other files are formatted by ruff.
# Keep this list in sync with pyproject.toml.
common-files: &common_files |
    (?x)^(
        .devcontainer/make_env.py |
        .github/scripts/label_community_user.py |
        .github/scripts/pr_checklist_check.py |
        benchmarks/cpp/__init__.py |
        benchmarks/cpp/prepare_dataset.py |
        benchmarks/cpp/utils/__init__.py |
        benchmarks/cpp/utils/convert_nemo_dataset.py |
        benchmarks/cpp/utils/generate_rand_loras.py |
        benchmarks/cpp/utils/prepare_real_data.py |
        benchmarks/cpp/utils/prepare_synthetic_data.py |
        benchmarks/cpp/utils/utils.py |
        cpp/conanfile.py |
        cpp/kernels/fmha_v2/conftest.py |
        cpp/kernels/fmha_v2/fmha_test.py |
        cpp/kernels/fmha_v2/setup.py |
        cpp/kernels/fmha_v2/test/conftest.py |
        cpp/kernels/fmha_v2/test/fmha/filter_rules.py |
        cpp/kernels/fmha_v2/test/fmha/test_fmha_exe.py |
        cpp/kernels/fmha_v2/test/fmha/test_fmhca_exe.py |
        cpp/kernels/fmha_v2/test/fmha/test_meta.py |
        cpp/kernels/fmha_v2/test/fmha/utils.py |
        cpp/kernels/fmha_v2/test/train_ops/test_train_ops.py |
        cpp/kernels/fmha_v2/train_ops/fmha_bmark.py |
        cpp/kernels/fmha_v2/train_ops/fmha_unit_test.py |
        cpp/kernels/fmha_v2/train_ops/my_utils.py |
        cpp/kernels/fmha_v2/train_ops/te_mha.py |
        cpp/kernels/fmha_v2/train_ops/train_setup.py |
        cpp/kernels/xqa/gen_cpp_header.py |
        cpp/kernels/xqa/gen_cubins.py |
        cpp/kernels/xqa/ref.py |
        cpp/libnuma_conan.py |
        cpp/micro_benchmarks/gen-moe-benchmark-file.py |
        cpp/tensorrt_llm/deep_ep/strip_nvshmem_helper.py |
        cpp/tensorrt_llm/kernels/cutlass_kernels/python/generate_kernels.py |
        cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/copy_cu.py |
        cpp/tests/resources/scripts/build_chatglm_engines.py |
        cpp/tests/resources/scripts/build_eagle_engines.py |
        cpp/tests/resources/scripts/build_enc_dec_engines.py |
        cpp/tests/resources/scripts/build_engines_utils.py |
        cpp/tests/resources/scripts/build_gpt_engines.py |
        cpp/tests/resources/scripts/build_gptj_engines.py |
        cpp/tests/resources/scripts/build_llama_engines.py |
        cpp/tests/resources/scripts/build_mamba_engines.py |
        cpp/tests/resources/scripts/build_medusa_engines.py |
        cpp/tests/resources/scripts/build_recurrentgemma_engines.py |
        cpp/tests/resources/scripts/build_redrafter_engines.py |
        cpp/tests/resources/scripts/generate_expected_chatglm_output.py |
        cpp/tests/resources/scripts/generate_expected_eagle_output.py |
        cpp/tests/resources/scripts/generate_expected_enc_dec_output.py |
        cpp/tests/resources/scripts/generate_expected_gpt_output.py |
        cpp/tests/resources/scripts/generate_expected_gptj_output.py |
        cpp/tests/resources/scripts/generate_expected_llama_output.py |
        cpp/tests/resources/scripts/generate_expected_mamba_output.py |
        cpp/tests/resources/scripts/generate_expected_medusa_output.py |
        cpp/tests/resources/scripts/generate_expected_recurrentgemma_output.py |
        cpp/tests/resources/scripts/generate_expected_redrafter_output.py |
        cpp/tests/resources/scripts/generate_hf_gpt_output.py |
        cpp/tests/resources/scripts/generate_test_lora_weights.py |
        cpp/tests/resources/scripts/io_converter.py |
        docs/source/conf.py |
        docs/source/helper.py |
        examples/apps/chat.py |
        examples/apps/fastapi_server.py |
        examples/bindings/executor/example_advanced.py |
        examples/bindings/executor/example_basic.py |
        examples/bindings/executor/example_debug.py |
        examples/bindings/executor/example_logits_processor.py |
        examples/disaggregated/clients/disagg_client.py |
        examples/disaggregated/slurm/benchmark/gen_server_config.py |
        examples/disaggregated/slurm/benchmark/gen_worker_config.py |
        examples/disaggregated/slurm/benchmark/submit.py |
        examples/dora/normalize_weights.py |
        examples/eagle/convert_checkpoint.py |
        examples/eval_long_context.py |
        examples/generate_checkpoint_config.py |
        examples/generate_xgrammar_tokenizer_info.py |
        examples/hf_lora_convert.py |
        examples/infinitebench/args.py |
        examples/infinitebench/compute_scores.py |
        examples/infinitebench/construct_synthetic_dataset.py |
        examples/infinitebench/eval_utils.py |
        examples/llm-api/_tensorrt_engine/llm_eagle_decoding.py |
        examples/llm-api/_tensorrt_engine/llm_eagle2_decoding.py |
        examples/llm-api/_tensorrt_engine/llm_inference_customize.py |
        examples/llm-api/_tensorrt_engine/llm_inference_kv_events.py |
        examples/llm-api/_tensorrt_engine/llm_lookahead_decoding.py |
        examples/llm-api/_tensorrt_engine/llm_medusa_decoding.py |
        examples/llm-api/_tensorrt_engine/llm_quantization.py |
        examples/llm-api/_tensorrt_engine/quickstart_example.py |
        examples/llm-api/llm_guided_decoding.py |
        examples/llm-api/llm_inference_async_streaming.py |
        examples/llm-api/llm_inference_async.py |
        examples/llm-api/llm_inference_distributed.py |
        examples/llm-api/llm_inference.py |
        examples/llm-api/llm_kv_cache_connector.py |
        examples/llm-api/llm_kv_cache_offloading.py |
        examples/llm-api/llm_logits_processor.py |
        examples/llm-api/llm_multilora.py |
        examples/llm-api/llm_runtime.py |
        examples/llm-api/llm_sampling.py |
        examples/llm-api/llm_sparse_attention.py |
        examples/llm-api/llm_speculative_decoding.py |
        examples/llm-api/out_of_tree_example/main.py |
        examples/llm-api/out_of_tree_example/modeling_opt.py |
        examples/llm-api/quickstart_advanced.py |
        examples/llm-api/quickstart_example.py |
        examples/llm-api/quickstart_multimodal.py |
        examples/llm-api/star_attention.py |
        examples/llm-eval/lm-eval-harness/lm_eval_tensorrt_llm.py |
        examples/longbench/eval_longbench_v1.py |
        examples/longbench/eval_longbench_v2.py |
        examples/medusa/convert_checkpoint.py |
        examples/mmlu.py |
        examples/models/contrib/baichuan/convert_checkpoint.py |
        examples/models/contrib/bloom/convert_checkpoint.py |
        examples/models/contrib/chatglm-6b/tokenization_chatglm.py |
        examples/models/contrib/chatglm2-6b/tokenization_chatglm.py |
        examples/models/contrib/chatglm3-6b-32k/tokenization_chatglm.py |
        examples/models/contrib/cogvlm/convert_checkpoint.py |
        examples/models/contrib/dbrx/convert_checkpoint.py |
        examples/models/contrib/deepseek_v1/__init__.py |
        examples/models/contrib/deepseek_v1/convert_checkpoint.py |
        examples/models/contrib/deepseek_v2/convert_checkpoint.py |
        examples/models/contrib/dit/convert_checkpoint.py |
        examples/models/contrib/dit/diffusion.py |
        examples/models/contrib/dit/sample.py |
        examples/models/contrib/dit/utils_modelopt.py |
        examples/models/contrib/dit/vae_decoder_trt.py |
        examples/models/contrib/falcon/convert_checkpoint.py |
        examples/models/contrib/gptj/convert_checkpoint.py |
        examples/models/contrib/gptneox/convert_checkpoint.py |
        examples/models/contrib/grok/convert_checkpoint.py |
        examples/models/contrib/mmdit/convert_checkpoint.py |
        examples/models/contrib/mmdit/sample.py |
        examples/models/contrib/mpt/convert_checkpoint.py |
        examples/models/contrib/opt/convert_checkpoint.py |
        examples/models/contrib/sdxl/build_sdxl_unet.py |
        examples/models/contrib/sdxl/pipeline_stable_diffusion_xl.py |
        examples/models/contrib/sdxl/run_sdxl.py |
        examples/models/contrib/stdit/aspect.py |
        examples/models/contrib/stdit/convert_checkpoint.py |
        examples/models/contrib/stdit/pipeline_tllm.py |
        examples/models/contrib/stdit/sample.py |
        examples/models/contrib/stdit/scheduler.py |
        examples/models/contrib/stdit/text_encoder.py |
        examples/models/contrib/stdit/utils.py |
        examples/models/contrib/stdit/vae.py |
        examples/models/contrib/stdit/video_transforms.py |
        examples/models/core/bert/__init__.py |
        examples/models/core/bert/convert_checkpoint.py |
        examples/models/core/bert/run.py |
        examples/models/core/bert/utils.py |
        examples/models/core/commandr/convert_checkpoint.py |
        examples/models/core/enc_dec/__init__.py |
        examples/models/core/enc_dec/convert_checkpoint.py |
        examples/models/core/enc_dec/helper.py |
        examples/models/core/enc_dec/run.py |
        examples/models/core/gemma/convert_checkpoint.py |
        examples/models/core/glm-4-9b/convert_checkpoint.py |
        examples/models/core/glm-4-9b/tokenization_chatglm.py |
        examples/models/core/gpt_oss/openai_chat_client_function_calling.py |
        examples/models/core/gpt/convert_checkpoint.py |
        examples/models/core/gpt/merge_ptuning_tables.py |
        examples/models/core/gpt/nemo_lora_convert.py |
        examples/models/core/gpt/nemo_prompt_convert.py |
        examples/models/core/gpt/run_hf.py |
        examples/models/core/internlm2/convert_checkpoint.py |
        examples/models/core/kimi_k2/kimi_k2_tool_calling_example.py |
        examples/models/core/llama/convert_checkpoint.py |
        examples/models/core/llama/summarize_long.py |
        examples/models/core/mamba/convert_checkpoint.py |
        examples/models/core/mllama/convert_checkpoint.py |
        examples/models/core/multimodal/__init__.py |
        examples/models/core/multimodal/build_multimodal_engine.py |
        examples/models/core/multimodal/eval.py |
        examples/models/core/multimodal/run.py |
        examples/models/core/multimodal/utils.py |
        examples/models/core/nemotron_nas/calibration_utils.py |
        examples/models/core/nemotron_nas/convert_checkpoint.py |
        examples/models/core/phi/convert_checkpoint.py |
        examples/models/core/qwen/convert_checkpoint.py |
        examples/models/core/qwen2audio/run_chat.py |
        examples/models/core/qwen2audio/run.py |
        examples/models/core/qwen2audio/utils.py |
        examples/models/core/qwenvl/run_chat.py |
        examples/models/core/qwenvl/run.py |
        examples/models/core/qwenvl/show_pic.py |
        examples/models/core/qwenvl/vit_onnx_trt.py |
        examples/models/core/recurrentgemma/convert_checkpoint.py |
        examples/models/core/vit/convert_checkpoint.py |
        examples/models/core/whisper/convert_checkpoint.py |
        examples/models/core/whisper/distil_whisper/convert_from_distil_whisper.py |
        examples/models/core/whisper/run.py |
        examples/models/core/whisper/tokenizer.py |
        examples/models/core/whisper/whisper_utils.py |
        examples/ngram/run_dtm_ngram.py |
        examples/openai_triton/manual_plugin/build.py |
        examples/openai_triton/manual_plugin/fmha_triton.py |
        examples/openai_triton/manual_plugin/plugin.py |
        examples/openai_triton/manual_plugin/run.py |
        examples/openai_triton/plugin_autogen/build_engine.py |
        examples/openai_triton/plugin_autogen/kernel_config.py |
        examples/openai_triton/plugin_autogen/run_engine.py |
        examples/python_plugin/build_lookup.py |
        examples/python_plugin/plugin_lib/__init__.py |
        examples/python_plugin/plugin_lib/lookup_kernel.py |
        examples/python_plugin/plugin_lib/lookup_plugin.py |
        examples/python_plugin/run_lookup.py |
        examples/quantization/quantize_mixed_precision_moe.py |
        examples/quantization/quantize.py |
        examples/ray_orchestrator/llm_inference_async_ray.py |
        examples/ray_orchestrator/llm_inference_distributed_ray.py |
        examples/redrafter/convert_checkpoint.py |
        examples/run.py |
        examples/scaffolding/contrib/AsyncGeneration/stream_generation_controller.py |
        examples/scaffolding/contrib/AsyncGeneration/stream_generation_run.py |
        examples/scaffolding/contrib/DeepConf/run_generation.py |
        examples/scaffolding/contrib/Dynasor/scaffolding_dynasor_run.py |
        examples/scaffolding/contrib/mcp/e2b/e2bserver.py |
        examples/scaffolding/contrib/mcp/e2b/main.py |
        examples/scaffolding/contrib/mcp/mcptest.py |
        examples/scaffolding/contrib/mcp/weather/weather.py |
        examples/scaffolding/contrib/mcp/websearch/main.py |
        examples/scaffolding/contrib/mcp/websearch/websearch.py |
        examples/scaffolding/contrib/TreeInference/run_mcts_example.py |
        examples/scaffolding/contrib/TreeInference/run_tot_example.py |
        examples/scaffolding/run_basic_generation.py |
        examples/scaffolding/run_best_of_n_with_reward.py |
        examples/scaffolding/run_majority_vote_aime24.py |
        examples/scaffolding/token_budget_majority_vote.py |
        examples/serve/openai_chat_client_for_multimodal.py |
        examples/serve/openai_chat_client.py |
        examples/serve/openai_completion_client_for_lora.py |
        examples/serve/openai_completion_client_json_schema.py |
        examples/serve/openai_completion_client.py |
        examples/summarize.py |
        examples/utils.py |
        examples/wide_ep/ep_load_balancer/generate_eplb_config.py |
        examples/wide_ep/ep_load_balancer/report_load_statistics.py |
        examples/wide_ep/ep_load_balancer/utils.py |
        examples/wide_ep/slurm_scripts/process_gen_iterlog.py |
        jenkins/scripts/mergeWaiveList.py |
        jenkins/scripts/open_search_db.py |
        jenkins/scripts/test_rerun.py |
        scripts/build_cpp_examples.py |
        scripts/build_wheel.py |
        scripts/check_test_list.py |
        scripts/dco_check.py |
        scripts/format_test_list.py |
        scripts/generate_duration.py |
        scripts/generate_lock_file.py |
        scripts/get_wheel_from_package.py |
        scripts/git_replace.py |
        scripts/package_trt_llm.py |
        scripts/release_check.py |
        scripts/rename_docker_images.py |
        scripts/test_to_stage_mapping.py |
        setup.py |
        tensorrt_llm/__init__.py |
        tensorrt_llm/_ray_utils.py |
        tensorrt_llm/_tensorrt_engine/__init__.py |
        tensorrt_llm/_torch/__init__.py |
        tensorrt_llm/_torch/attention_backend/__init__.py |
        tensorrt_llm/_torch/attention_backend/flashinfer.py |
        tensorrt_llm/_torch/attention_backend/interface.py |
        tensorrt_llm/_torch/attention_backend/sparse/__init__.py |
        tensorrt_llm/_torch/attention_backend/sparse/dsa.py |
        tensorrt_llm/_torch/attention_backend/sparse/kernel.py |
        tensorrt_llm/_torch/attention_backend/sparse/rocket.py |
        tensorrt_llm/_torch/attention_backend/sparse/utils.py |
        tensorrt_llm/_torch/attention_backend/star_flashinfer.py |
        tensorrt_llm/_torch/attention_backend/trtllm.py |
        tensorrt_llm/_torch/attention_backend/utils.py |
        tensorrt_llm/_torch/attention_backend/vanilla.py |
        tensorrt_llm/_torch/autotuner.py |
        tensorrt_llm/_torch/compilation/__init__.py |
        tensorrt_llm/_torch/compilation/backend.py |
        tensorrt_llm/_torch/compilation/multi_stream/__init__.py |
        tensorrt_llm/_torch/compilation/multi_stream/auto_multi_stream.py |
        tensorrt_llm/_torch/compilation/patterns/__init__.py |
        tensorrt_llm/_torch/compilation/patterns/ar_residual_norm.py |
        tensorrt_llm/_torch/compilation/patterns/residual_add_norm.py |
        tensorrt_llm/_torch/compilation/piecewise_optimizer.py |
        tensorrt_llm/_torch/compilation/recover_pass.py |
        tensorrt_llm/_torch/compilation/remove_copy_pass.py |
        tensorrt_llm/_torch/compilation/utils.py |
        tensorrt_llm/_torch/configs/deepseek_v3.py |
        tensorrt_llm/_torch/cublaslt_utils.py |
        tensorrt_llm/_torch/custom_ops/__init__.py |
        tensorrt_llm/_torch/custom_ops/cpp_custom_ops.py |
        tensorrt_llm/_torch/custom_ops/cute_dsl_custom_ops.py |
        tensorrt_llm/_torch/custom_ops/flashinfer_custom_ops.py |
        tensorrt_llm/_torch/custom_ops/torch_custom_ops.py |
        tensorrt_llm/_torch/custom_ops/trtllm_gen_custom_ops.py |
        tensorrt_llm/_torch/custom_ops/userbuffers_custom_ops.py |
        tensorrt_llm/_torch/cute_dsl_kernels/__init__.py |
        tensorrt_llm/_torch/cute_dsl_kernels/blackwell/__init__.py |
        tensorrt_llm/_torch/cute_dsl_kernels/blackwell/custom_pipeline.py |
        tensorrt_llm/_torch/cute_dsl_kernels/blackwell/dense_blockscaled_gemm_persistent.py |
        tensorrt_llm/_torch/cute_dsl_kernels/blackwell/utils.py |
        tensorrt_llm/_torch/cute_dsl_utils.py |
        tensorrt_llm/_torch/debug/__init__.py |
        tensorrt_llm/_torch/debug/debug_hook.py |
        tensorrt_llm/_torch/device_mesh.py |
        tensorrt_llm/_torch/distributed/__init__.py |
        tensorrt_llm/_torch/distributed/communicator.py |
        tensorrt_llm/_torch/distributed/moe_alltoall.py |
        tensorrt_llm/_torch/distributed/ops.py |
        tensorrt_llm/_torch/distributed/pg_utils.py |
        tensorrt_llm/_torch/expert_statistic.py |
        tensorrt_llm/_torch/flashinfer_utils.py |
        tensorrt_llm/_torch/hostfunc.py |
        tensorrt_llm/_torch/llm.py |
        tensorrt_llm/_torch/memory_buffer_utils.py |
        tensorrt_llm/_torch/metadata.py |
        tensorrt_llm/_torch/model_config.py |
        tensorrt_llm/_torch/models/__init__.py |
        tensorrt_llm/_torch/models/checkpoints/__init__.py |
        tensorrt_llm/_torch/models/checkpoints/auto_mapper.py |
        tensorrt_llm/_torch/models/checkpoints/base_checkpoint_loader.py |
        tensorrt_llm/_torch/models/checkpoints/base_config_loader.py |
        tensorrt_llm/_torch/models/checkpoints/base_weight_loader.py |
        tensorrt_llm/_torch/models/checkpoints/base_weight_mapper.py |
        tensorrt_llm/_torch/models/checkpoints/hf/__init__.py |
        tensorrt_llm/_torch/models/checkpoints/hf/checkpoint_loader.py |
        tensorrt_llm/_torch/models/checkpoints/hf/config_loader.py |
        tensorrt_llm/_torch/models/checkpoints/hf/gemma3_weight_mapper.py |
        tensorrt_llm/_torch/models/checkpoints/hf/llama4_weight_mapper.py |
        tensorrt_llm/_torch/models/checkpoints/hf/mixtral_weight_mapper.py |
        tensorrt_llm/_torch/models/checkpoints/hf/nemotron_h_weight_mapper.py |
        tensorrt_llm/_torch/models/checkpoints/hf/qwen2_moe_weight_mapper.py |
        tensorrt_llm/_torch/models/checkpoints/hf/qwen2vl_weight_mapper.py |
        tensorrt_llm/_torch/models/checkpoints/hf/qwen3_moe_weight_mapper.py |
        tensorrt_llm/_torch/models/checkpoints/hf/qwen3_next_weight_mapper.py |
        tensorrt_llm/_torch/models/checkpoints/hf/weight_loader.py |
        tensorrt_llm/_torch/models/checkpoints/hf/weight_mapper.py |
        tensorrt_llm/_torch/models/modeling_auto.py |
        tensorrt_llm/_torch/models/modeling_bert.py |
        tensorrt_llm/_torch/models/modeling_clip.py |
        tensorrt_llm/_torch/models/modeling_deepseekv3.py |
        tensorrt_llm/_torch/models/modeling_exaone4.py |
        tensorrt_llm/_torch/models/modeling_gemma3.py |
        tensorrt_llm/_torch/models/modeling_gemma3vl.py |
        tensorrt_llm/_torch/models/modeling_gpt_oss.py |
        tensorrt_llm/_torch/models/modeling_hunyuan_dense.py |
        tensorrt_llm/_torch/models/modeling_hunyuan_moe.py |
        tensorrt_llm/_torch/models/modeling_hyperclovax.py |
        tensorrt_llm/_torch/models/modeling_llama_min_latency.py |
        tensorrt_llm/_torch/models/modeling_llama.py |
        tensorrt_llm/_torch/models/modeling_llava_next.py |
        tensorrt_llm/_torch/models/modeling_mistral.py |
        tensorrt_llm/_torch/models/modeling_mixtral.py |
        tensorrt_llm/_torch/models/modeling_mllama.py |
        tensorrt_llm/_torch/models/modeling_multimodal_encoder.py |
        tensorrt_llm/_torch/models/modeling_multimodal_utils.py |
        tensorrt_llm/_torch/models/modeling_nanov2vlm.py |
        tensorrt_llm/_torch/models/modeling_nemotron_h.py |
        tensorrt_llm/_torch/models/modeling_nemotron_nas.py |
        tensorrt_llm/_torch/models/modeling_nemotron.py |
        tensorrt_llm/_torch/models/modeling_phi3.py |
        tensorrt_llm/_torch/models/modeling_phi4mm.py |
        tensorrt_llm/_torch/models/modeling_qwen_moe.py |
        tensorrt_llm/_torch/models/modeling_qwen.py |
        tensorrt_llm/_torch/models/modeling_qwen2vl.py |
        tensorrt_llm/_torch/models/modeling_qwen3_moe.py |
        tensorrt_llm/_torch/models/modeling_qwen3_next.py |
        tensorrt_llm/_torch/models/modeling_qwen3.py |
        tensorrt_llm/_torch/models/modeling_radio.py |
        tensorrt_llm/_torch/models/modeling_seedoss.py |
        tensorrt_llm/_torch/models/modeling_siglip.py |
        tensorrt_llm/_torch/models/modeling_speculative.py |
        tensorrt_llm/_torch/models/modeling_utils.py |
        tensorrt_llm/_torch/models/modeling_vila.py |
        tensorrt_llm/_torch/modules/__init__.py |
        tensorrt_llm/_torch/modules/attention.py |
        tensorrt_llm/_torch/modules/decoder_layer.py |
        tensorrt_llm/_torch/modules/embedding.py |
        tensorrt_llm/_torch/modules/fla/__init__.py |
        tensorrt_llm/_torch/modules/fla/chunk_delta_h.py |
        tensorrt_llm/_torch/modules/fla/chunk_o.py |
        tensorrt_llm/_torch/modules/fla/chunk_scaled_dot_kkt.py |
        tensorrt_llm/_torch/modules/fla/chunk.py |
        tensorrt_llm/_torch/modules/fla/cumsum.py |
        tensorrt_llm/_torch/modules/fla/fused_recurrent.py |
        tensorrt_llm/_torch/modules/fla/fused_sigmoid_gating_recurrent.py |
        tensorrt_llm/_torch/modules/fla/index.py |
        tensorrt_llm/_torch/modules/fla/l2norm.py |
        tensorrt_llm/_torch/modules/fla/layernorm_gated.py |
        tensorrt_llm/_torch/modules/fla/op.py |
        tensorrt_llm/_torch/modules/fla/solve_tril.py |
        tensorrt_llm/_torch/modules/fla/utils.py |
        tensorrt_llm/_torch/modules/fla/wy_fast.py |
        tensorrt_llm/_torch/modules/fused_moe/__init__.py |
        tensorrt_llm/_torch/modules/fused_moe/create_moe.py |
        tensorrt_llm/_torch/modules/fused_moe/deep_ep_utils.py |
        tensorrt_llm/_torch/modules/fused_moe/fused_moe_cute_dsl.py |
        tensorrt_llm/_torch/modules/fused_moe/fused_moe_cutlass.py |
        tensorrt_llm/_torch/modules/fused_moe/fused_moe_deepgemm.py |
        tensorrt_llm/_torch/modules/fused_moe/fused_moe_triton.py |
        tensorrt_llm/_torch/modules/fused_moe/fused_moe_trtllm_gen.py |
        tensorrt_llm/_torch/modules/fused_moe/fused_moe_vanilla.py |
        tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py |
        tensorrt_llm/_torch/modules/fused_moe/interface.py |
        tensorrt_llm/_torch/modules/fused_moe/moe_load_balancer.py |
        tensorrt_llm/_torch/modules/fused_moe/ops/__init__.py |
        tensorrt_llm/_torch/modules/fused_moe/ops/moe_op_cutlass.py |
        tensorrt_llm/_torch/modules/fused_moe/ops/moe_op_deepgemm.py |
        tensorrt_llm/_torch/modules/fused_moe/ops/moe_op.py |
        tensorrt_llm/_torch/modules/fused_moe/quantization.py |
        tensorrt_llm/_torch/modules/fused_moe/routing.py |
        tensorrt_llm/_torch/modules/gated_mlp.py |
        tensorrt_llm/_torch/modules/layer_norm.py |
        tensorrt_llm/_torch/modules/linear.py |
        tensorrt_llm/_torch/modules/logits_processor.py |
        tensorrt_llm/_torch/modules/mamba/__init__.py |
        tensorrt_llm/_torch/modules/mamba/causal_conv1d.py |
        tensorrt_llm/_torch/modules/mamba/layernorm_gated.py |
        tensorrt_llm/_torch/modules/mamba/mamba2_metadata.py |
        tensorrt_llm/_torch/modules/mamba/mamba2_mixer.py |
        tensorrt_llm/_torch/modules/mamba/selective_state_update.py |
        tensorrt_llm/_torch/modules/mamba/softplus.py |
        tensorrt_llm/_torch/modules/mamba/ssd_bmm.py |
        tensorrt_llm/_torch/modules/mamba/ssd_chunk_scan.py |
        tensorrt_llm/_torch/modules/mamba/ssd_chunk_state.py |
        tensorrt_llm/_torch/modules/mamba/ssd_combined.py |
        tensorrt_llm/_torch/modules/mamba/ssd_state_passing.py |
        tensorrt_llm/_torch/modules/mlp.py |
        tensorrt_llm/_torch/modules/multi_stream_utils.py |
        tensorrt_llm/_torch/modules/qk_norm_attention.py |
        tensorrt_llm/_torch/modules/rms_norm.py |
        tensorrt_llm/_torch/modules/rotary_embedding.py |
        tensorrt_llm/_torch/modules/swiglu.py |
        tensorrt_llm/_torch/modules/triton_linear.py |
        tensorrt_llm/_torch/peft/__init__.py |
        tensorrt_llm/_torch/peft/lora/__init__.py |
        tensorrt_llm/_torch/peft/lora/layer.py |
        tensorrt_llm/_torch/pyexecutor/__init__.py |
        tensorrt_llm/_torch/pyexecutor/_util.py |
        tensorrt_llm/_torch/pyexecutor/config_utils.py |
        tensorrt_llm/_torch/pyexecutor/config.py |
        tensorrt_llm/_torch/pyexecutor/cuda_graph_runner.py |
        tensorrt_llm/_torch/pyexecutor/executor_request_queue.py |
        tensorrt_llm/_torch/pyexecutor/finish_reason.py |
        tensorrt_llm/_torch/pyexecutor/grammar_matcher.py |
        tensorrt_llm/_torch/pyexecutor/guided_decoder.py |
        tensorrt_llm/_torch/pyexecutor/handle_additional_outputs.py |
        tensorrt_llm/_torch/pyexecutor/handle_logits.py |
        tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py |
        tensorrt_llm/_torch/pyexecutor/kv_cache_transceiver.py |
        tensorrt_llm/_torch/pyexecutor/layerwise_nvtx_marker.py |
        tensorrt_llm/_torch/pyexecutor/llm_request.py |
        tensorrt_llm/_torch/pyexecutor/make_decoding_batch_input_output.py |
        tensorrt_llm/_torch/pyexecutor/mamba_cache_manager.py |
        tensorrt_llm/_torch/pyexecutor/model_engine.py |
        tensorrt_llm/_torch/pyexecutor/model_loader.py |
        tensorrt_llm/_torch/pyexecutor/py_executor_creator.py |
        tensorrt_llm/_torch/pyexecutor/py_executor.py |
        tensorrt_llm/_torch/pyexecutor/resource_manager.py |
        tensorrt_llm/_torch/pyexecutor/scheduler.py |
        tensorrt_llm/_torch/pyexecutor/seq_slot_manager.py |
        tensorrt_llm/_torch/shared_tensor/__init__.py |
        tensorrt_llm/_torch/shared_tensor/shared_tensor.py |
        tensorrt_llm/_torch/speculative/__init__.py |
        tensorrt_llm/_torch/speculative/auto_heuristic.py |
        tensorrt_llm/_torch/speculative/drafter.py |
        tensorrt_llm/_torch/speculative/drafting_loops.py |
        tensorrt_llm/_torch/speculative/eagle3.py |
        tensorrt_llm/_torch/speculative/interface.py |
        tensorrt_llm/_torch/speculative/model_drafter.py |
        tensorrt_llm/_torch/speculative/mtp.py |
        tensorrt_llm/_torch/speculative/ngram.py |
        tensorrt_llm/_torch/speculative/save_hidden_state.py |
        tensorrt_llm/_torch/speculative/spec_tree_manager.py |
        tensorrt_llm/_torch/speculative/speculation_gate.py |
        tensorrt_llm/_torch/speculative/utils.py |
        tensorrt_llm/_torch/utils.py |
        tensorrt_llm/_torch/virtual_memory.py |
        tensorrt_llm/_utils.py |
        tensorrt_llm/bench/__init__.py |
        tensorrt_llm/bench/benchmark/__init__.py |
        tensorrt_llm/bench/benchmark/low_latency.py |
        tensorrt_llm/bench/benchmark/throughput.py |
        tensorrt_llm/bench/benchmark/utils/__init__.py |
        tensorrt_llm/bench/benchmark/utils/asynchronous.py |
        tensorrt_llm/bench/benchmark/utils/general.py |
        tensorrt_llm/bench/benchmark/utils/processes.py |
        tensorrt_llm/bench/build/__init__.py |
        tensorrt_llm/bench/build/build.py |
        tensorrt_llm/bench/build/dataclasses.py |
        tensorrt_llm/bench/build/tuning.py |
        tensorrt_llm/bench/build/utils.py |
        tensorrt_llm/bench/dataclasses/__init__.py |
        tensorrt_llm/bench/dataclasses/configuration.py |
        tensorrt_llm/bench/dataclasses/engine.py |
        tensorrt_llm/bench/dataclasses/enums.py |
        tensorrt_llm/bench/dataclasses/general.py |
        tensorrt_llm/bench/dataclasses/reporting.py |
        tensorrt_llm/bench/dataclasses/statistics.py |
        tensorrt_llm/bench/utils/__init__.py |
        tensorrt_llm/bench/utils/data.py |
        tensorrt_llm/builder.py |
        tensorrt_llm/commands/__init__.py |
        tensorrt_llm/commands/bench.py |
        tensorrt_llm/commands/build.py |
        tensorrt_llm/commands/eval.py |
        tensorrt_llm/commands/prune.py |
        tensorrt_llm/commands/refit.py |
        tensorrt_llm/commands/serve.py |
        tensorrt_llm/evaluate/__init__.py |
        tensorrt_llm/evaluate/cnn_dailymail.py |
        tensorrt_llm/evaluate/interface.py |
        tensorrt_llm/evaluate/json_mode_eval.py |
        tensorrt_llm/evaluate/lm_eval_tasks/gpqa/cot_zeroshot_aa/_generate_configs.py |
        tensorrt_llm/evaluate/lm_eval_tasks/gpqa/cot_zeroshot_aa/utils.py |
        tensorrt_llm/evaluate/lm_eval.py |
        tensorrt_llm/evaluate/longbench_v2.py |
        tensorrt_llm/evaluate/mmlu.py |
        tensorrt_llm/executor/__init__.py |
        tensorrt_llm/executor/base_worker.py |
        tensorrt_llm/executor/executor.py |
        tensorrt_llm/executor/ipc.py |
        tensorrt_llm/executor/postproc_worker.py |
        tensorrt_llm/executor/proxy.py |
        tensorrt_llm/executor/ray_executor.py |
        tensorrt_llm/executor/ray_gpu_worker.py |
        tensorrt_llm/executor/request.py |
        tensorrt_llm/executor/result.py |
        tensorrt_llm/executor/rpc_proxy.py |
        tensorrt_llm/executor/rpc_worker.py |
        tensorrt_llm/executor/rpc/__init__.py |
        tensorrt_llm/executor/rpc/rpc_client.py |
        tensorrt_llm/executor/rpc/rpc_common.py |
        tensorrt_llm/executor/rpc/rpc_server.py |
        tensorrt_llm/executor/utils.py |
        tensorrt_llm/executor/worker.py |
        tensorrt_llm/functional.py |
        tensorrt_llm/inputs/__init__.py |
        tensorrt_llm/inputs/data.py |
        tensorrt_llm/inputs/evs.py |
        tensorrt_llm/inputs/multimodal.py |
        tensorrt_llm/inputs/registry.py |
        tensorrt_llm/inputs/utils.py |
        tensorrt_llm/layers/__init__.py |
        tensorrt_llm/layers/activation.py |
        tensorrt_llm/layers/attention.py |
        tensorrt_llm/layers/cast.py |
        tensorrt_llm/layers/conv.py |
        tensorrt_llm/layers/embedding.py |
        tensorrt_llm/layers/language_adapter.py |
        tensorrt_llm/layers/linear.py |
        tensorrt_llm/layers/lora.py |
        tensorrt_llm/layers/mlp.py |
        tensorrt_llm/layers/moe.py |
        tensorrt_llm/layers/normalization.py |
        tensorrt_llm/layers/pooling.py |
        tensorrt_llm/layers/recurrent.py |
        tensorrt_llm/layers/ssm.py |
        tensorrt_llm/llmapi/__init__.py |
        tensorrt_llm/llmapi/build_cache.py |
        tensorrt_llm/llmapi/disagg_utils.py |
        tensorrt_llm/llmapi/kv_cache_type.py |
        tensorrt_llm/llmapi/llm_args.py |
        tensorrt_llm/llmapi/llm_utils.py |
        tensorrt_llm/llmapi/llm.py |
        tensorrt_llm/llmapi/mgmn_leader_node.py |
        tensorrt_llm/llmapi/mgmn_worker_node.py |
        tensorrt_llm/llmapi/mm_encoder.py |
        tensorrt_llm/llmapi/mpi_session.py |
        tensorrt_llm/llmapi/reasoning_parser.py |
        tensorrt_llm/llmapi/tokenizer.py |
        tensorrt_llm/llmapi/tracer.py |
        tensorrt_llm/llmapi/tracing.py |
        tensorrt_llm/llmapi/utils.py |
        tensorrt_llm/lora_helper.py |
        tensorrt_llm/mapping.py |
        tensorrt_llm/math_utils.py |
        tensorrt_llm/metrics/__init__.py |
        tensorrt_llm/metrics/collector.py |
        tensorrt_llm/metrics/enums.py |
        tensorrt_llm/models/__init__.py |
        tensorrt_llm/models/automodel.py |
        tensorrt_llm/models/baichuan/__init__.py |
        tensorrt_llm/models/baichuan/config.py |
        tensorrt_llm/models/baichuan/convert.py |
        tensorrt_llm/models/baichuan/model.py |
        tensorrt_llm/models/bert/__init__.py |
        tensorrt_llm/models/bert/config.py |
        tensorrt_llm/models/bert/convert.py |
        tensorrt_llm/models/bert/model.py |
        tensorrt_llm/models/bloom/__init__.py |
        tensorrt_llm/models/bloom/model.py |
        tensorrt_llm/models/chatglm/__init__.py |
        tensorrt_llm/models/chatglm/config.py |
        tensorrt_llm/models/chatglm/convert.py |
        tensorrt_llm/models/chatglm/model.py |
        tensorrt_llm/models/clip/__init__.py |
        tensorrt_llm/models/clip/model.py |
        tensorrt_llm/models/cogvlm/__init__.py |
        tensorrt_llm/models/cogvlm/config.py |
        tensorrt_llm/models/cogvlm/convert.py |
        tensorrt_llm/models/cogvlm/model.py |
        tensorrt_llm/models/commandr/__init__.py |
        tensorrt_llm/models/commandr/config.py |
        tensorrt_llm/models/commandr/model.py |
        tensorrt_llm/models/convert_utils.py |
        tensorrt_llm/models/dbrx/__init__.py |
        tensorrt_llm/models/dbrx/config.py |
        tensorrt_llm/models/dbrx/model.py |
        tensorrt_llm/models/deepseek_v1/__init__.py |
        tensorrt_llm/models/deepseek_v1/config.py |
        tensorrt_llm/models/deepseek_v1/convert.py |
        tensorrt_llm/models/deepseek_v1/model.py |
        tensorrt_llm/models/deepseek_v2/__init__.py |
        tensorrt_llm/models/deepseek_v2/config.py |
        tensorrt_llm/models/deepseek_v2/convert.py |
        tensorrt_llm/models/deepseek_v2/model.py |
        tensorrt_llm/models/dit/__init__.py |
        tensorrt_llm/models/dit/model.py |
        tensorrt_llm/models/eagle/__init__.py |
        tensorrt_llm/models/eagle/config.py |
        tensorrt_llm/models/eagle/model.py |
        tensorrt_llm/models/enc_dec/__init__.py |
        tensorrt_llm/models/enc_dec/model.py |
        tensorrt_llm/models/falcon/__init__.py |
        tensorrt_llm/models/falcon/config.py |
        tensorrt_llm/models/falcon/convert.py |
        tensorrt_llm/models/falcon/model.py |
        tensorrt_llm/models/gemma/__init__.py |
        tensorrt_llm/models/gemma/config.py |
        tensorrt_llm/models/gemma/convert.py |
        tensorrt_llm/models/gemma/model.py |
        tensorrt_llm/models/gemma/smoothquant.py |
        tensorrt_llm/models/gemma/utils/__init__.py |
        tensorrt_llm/models/gemma/utils/layers.py |
        tensorrt_llm/models/gemma/utils/modules.py |
        tensorrt_llm/models/gemma/utils/params.py |
        tensorrt_llm/models/gemma/utils/positional_embeddings.py |
        tensorrt_llm/models/gemma/utils/sampler.py |
        tensorrt_llm/models/gemma/utils/transformer.py |
        tensorrt_llm/models/gemma/weight.py |
        tensorrt_llm/models/generation_mixin.py |
        tensorrt_llm/models/gpt/__init__.py |
        tensorrt_llm/models/gpt/config.py |
        tensorrt_llm/models/gpt/convert.py |
        tensorrt_llm/models/gpt/model.py |
        tensorrt_llm/models/gptj/__init__.py |
        tensorrt_llm/models/gptj/config.py |
        tensorrt_llm/models/gptj/convert.py |
        tensorrt_llm/models/gptj/model.py |
        tensorrt_llm/models/gptneox/__init__.py |
        tensorrt_llm/models/gptneox/model.py |
        tensorrt_llm/models/grok/__init__.py |
        tensorrt_llm/models/grok/convert.py |
        tensorrt_llm/models/grok/model.py |
        tensorrt_llm/models/grok/weight.py |
        tensorrt_llm/models/llama/__init__.py |
        tensorrt_llm/models/llama/config.py |
        tensorrt_llm/models/llama/convert.py |
        tensorrt_llm/models/llama/model.py |
        tensorrt_llm/models/mamba/__init__.py |
        tensorrt_llm/models/mamba/config.py |
        tensorrt_llm/models/mamba/convert.py |
        tensorrt_llm/models/mamba/model.py |
        tensorrt_llm/models/medusa/__init__.py |
        tensorrt_llm/models/medusa/config.py |
        tensorrt_llm/models/medusa/model.py |
        tensorrt_llm/models/medusa/weight.py |
        tensorrt_llm/models/mllama/__init__.py |
        tensorrt_llm/models/mllama/config.py |
        tensorrt_llm/models/mllama/model.py |
        tensorrt_llm/models/mmdit_sd3/__init__.py |
        tensorrt_llm/models/mmdit_sd3/config.py |
        tensorrt_llm/models/mmdit_sd3/model.py |
        tensorrt_llm/models/model_weights_loader.py |
        tensorrt_llm/models/modeling_utils.py |
        tensorrt_llm/models/mpt/__init__.py |
        tensorrt_llm/models/mpt/model.py |
        tensorrt_llm/models/multimodal_encoders/__init__.py |
        tensorrt_llm/models/multimodal_encoders/config.py |
        tensorrt_llm/models/multimodal_encoders/model.py |
        tensorrt_llm/models/nemotron_nas/__init__.py |
        tensorrt_llm/models/nemotron_nas/config.py |
        tensorrt_llm/models/nemotron_nas/convert.py |
        tensorrt_llm/models/nemotron_nas/layer_config.py |
        tensorrt_llm/models/nemotron_nas/model.py |
        tensorrt_llm/models/opt/__init__.py |
        tensorrt_llm/models/opt/model.py |
        tensorrt_llm/models/phi/__init__.py |
        tensorrt_llm/models/phi/config.py |
        tensorrt_llm/models/phi/convert.py |
        tensorrt_llm/models/phi/model.py |
        tensorrt_llm/models/phi3/__init__.py |
        tensorrt_llm/models/phi3/config.py |
        tensorrt_llm/models/phi3/convert.py |
        tensorrt_llm/models/phi3/model.py |
        tensorrt_llm/models/phi3/split_weights.py |
        tensorrt_llm/models/qwen/__init__.py |
        tensorrt_llm/models/qwen/config.py |
        tensorrt_llm/models/qwen/convert.py |
        tensorrt_llm/models/qwen/model.py |
        tensorrt_llm/models/qwen/utils.py |
        tensorrt_llm/models/recurrentgemma/__init__.py |
        tensorrt_llm/models/recurrentgemma/model.py |
        tensorrt_llm/models/redrafter/__init__.py |
        tensorrt_llm/models/redrafter/drafter.py |
        tensorrt_llm/models/redrafter/model.py |
        tensorrt_llm/models/redrafter/redrafter_helper.py |
        tensorrt_llm/models/stdit/__init__.py |
        tensorrt_llm/models/stdit/config.py |
        tensorrt_llm/models/stdit/model.py |
        tensorrt_llm/models/unet/__init__.py |
        tensorrt_llm/models/unet/attention.py |
        tensorrt_llm/models/unet/embeddings.py |
        tensorrt_llm/models/unet/pp/__init__.py |
        tensorrt_llm/models/unet/pp/attention.py |
        tensorrt_llm/models/unet/pp/conv2d.py |
        tensorrt_llm/models/unet/pp/groupnorm.py |
        tensorrt_llm/models/unet/pp/unet_pp.py |
        tensorrt_llm/models/unet/resnet.py |
        tensorrt_llm/models/unet/unet_2d_blocks.py |
        tensorrt_llm/models/unet/unet_2d_condition.py |
        tensorrt_llm/models/unet/weights.py |
        tensorrt_llm/network.py |
        tensorrt_llm/parameter.py |
        tensorrt_llm/plugin/__init__.py |
        tensorrt_llm/plugin/plugin.py |
        tensorrt_llm/quantization/__init__.py |
        tensorrt_llm/quantization/functional.py |
        tensorrt_llm/quantization/image_processing.py |
        tensorrt_llm/quantization/layers.py |
        tensorrt_llm/quantization/mode.py |
        tensorrt_llm/quantization/quantize_by_modelopt.py |
        tensorrt_llm/quantization/quantize.py |
        tensorrt_llm/quantization/utils/__init__.py |
        tensorrt_llm/quantization/utils/fp4_utils.py |
        tensorrt_llm/quantization/utils/fp8_utils.py |
        tensorrt_llm/ray_stub.py |
        tensorrt_llm/runtime/__init__.py |
        tensorrt_llm/runtime/enc_dec_model_runner.py |
        tensorrt_llm/runtime/generation.py |
        tensorrt_llm/runtime/kv_cache_manager.py |
        tensorrt_llm/runtime/medusa_utils.py |
        tensorrt_llm/runtime/memory_pools/__init__.py |
        tensorrt_llm/runtime/memory_pools/memory_pools_allocator.py |
        tensorrt_llm/runtime/memory_pools/pool.py |
        tensorrt_llm/runtime/memory_pools/pools_kv_cache_manager.py |
        tensorrt_llm/runtime/model_runner_cpp.py |
        tensorrt_llm/runtime/model_runner.py |
        tensorrt_llm/runtime/multimodal_model_runner.py |
        tensorrt_llm/runtime/processor_wrapper/__init__.py |
        tensorrt_llm/runtime/processor_wrapper/mllama_processor_wrapper.py |
        tensorrt_llm/runtime/processor_wrapper/processor_wrapper.py |
        tensorrt_llm/runtime/redrafter_utils.py |
        tensorrt_llm/runtime/session.py |
        tensorrt_llm/scaffolding/__init__.py |
        tensorrt_llm/scaffolding/benchmark.py |
        tensorrt_llm/scaffolding/contrib/__init__.py |
        tensorrt_llm/scaffolding/contrib/AsyncGeneration/__init__.py |
        tensorrt_llm/scaffolding/contrib/AsyncGeneration/stream_generation.py |
        tensorrt_llm/scaffolding/contrib/DeepConf/__init__.py |
        tensorrt_llm/scaffolding/contrib/DeepConf/deep_conf_controller.py |
        tensorrt_llm/scaffolding/contrib/DeepConf/deep_conf_utils.py |
        tensorrt_llm/scaffolding/contrib/Dynasor/__init__.py |
        tensorrt_llm/scaffolding/contrib/Dynasor/dynasor_controller.py |
        tensorrt_llm/scaffolding/contrib/Dynasor/evaluator.py |
        tensorrt_llm/scaffolding/contrib/mcp/__init__.py |
        tensorrt_llm/scaffolding/contrib/mcp/chat_handler.py |
        tensorrt_llm/scaffolding/contrib/mcp/chat_task.py |
        tensorrt_llm/scaffolding/contrib/mcp/mcp_controller.py |
        tensorrt_llm/scaffolding/contrib/mcp/mcp_task.py |
        tensorrt_llm/scaffolding/contrib/mcp/mcp_utils.py |
        tensorrt_llm/scaffolding/contrib/mcp/mcp_worker.py |
        tensorrt_llm/scaffolding/contrib/TreeInference/__init__.py |
        tensorrt_llm/scaffolding/contrib/TreeInference/tree_controllers.py |
        tensorrt_llm/scaffolding/controller.py |
        tensorrt_llm/scaffolding/math_utils.py |
        tensorrt_llm/scaffolding/result.py |
        tensorrt_llm/scaffolding/scaffolding_llm.py |
        tensorrt_llm/scaffolding/task_collection.py |
        tensorrt_llm/scaffolding/task.py |
        tensorrt_llm/scaffolding/worker.py |
        tensorrt_llm/scheduling_params.py |
        tensorrt_llm/serialization.py |
        tensorrt_llm/serve/__init__.py |
        tensorrt_llm/serve/chat_utils.py |
        tensorrt_llm/serve/cluster_storage.py |
        tensorrt_llm/serve/disagg_auto_scaling.py |
        tensorrt_llm/serve/harmony_adapter.py |
        tensorrt_llm/serve/metadata_server.py |
        tensorrt_llm/serve/openai_disagg_server.py |
        tensorrt_llm/serve/openai_protocol.py |
        tensorrt_llm/serve/openai_server.py |
        tensorrt_llm/serve/postprocess_handlers.py |
        tensorrt_llm/serve/responses_utils.py |
        tensorrt_llm/serve/router.py |
        tensorrt_llm/serve/scripts/__init__.py |
        tensorrt_llm/serve/scripts/backend_request_func.py |
        tensorrt_llm/serve/scripts/benchmark_dataset.py |
        tensorrt_llm/serve/scripts/benchmark_serving.py |
        tensorrt_llm/serve/scripts/benchmark_utils.py |
        tensorrt_llm/serve/scripts/time_breakdown/__init__.py |
        tensorrt_llm/serve/scripts/time_breakdown/__main__.py |
        tensorrt_llm/serve/scripts/time_breakdown/time_breakdown.py |
        tensorrt_llm/serve/tool_parser/base_tool_parser.py |
        tensorrt_llm/serve/tool_parser/qwen3_tool_parser.py |
        tensorrt_llm/serve/tool_parser/utils.py |
        tensorrt_llm/tools/__init__.py |
        tensorrt_llm/tools/importlib_utils.py |
        tensorrt_llm/tools/multimodal_builder.py |
        tensorrt_llm/tools/onnx_utils.py |
        tensorrt_llm/tools/plugin_gen/__init__.py |
        tensorrt_llm/tools/plugin_gen/core.py |
        tensorrt_llm/tools/plugin_gen/plugin_gen.py |
        tensorrt_llm/tools/plugin_gen/shape_infer.py |
        tensorrt_llm/tools/plugin_gen/templates/functional.py |
        tensorrt_llm/tools/ppl.py |
        tensorrt_llm/tools/profiler/nsys_profile_tools/gputrc2graph.py |
        tensorrt_llm/version.py |
        tests/integration/defs/__init__.py |
        tests/integration/defs/accuracy/__init__.py |
        tests/integration/defs/accuracy/accuracy_core.py |
        tests/integration/defs/accuracy/scripts/collect_evaluated_accuracies.py |
        tests/integration/defs/accuracy/scripts/compute_theta_and_thresholds.py |
        tests/integration/defs/accuracy/test_cli_flow.py |
        tests/integration/defs/accuracy/test_disaggregated_serving.py |
        tests/integration/defs/accuracy/test_llm_api_autodeploy.py |
        tests/integration/defs/accuracy/test_llm_api_pytorch_ray.py |
        tests/integration/defs/accuracy/test_llm_api_pytorch.py |
        tests/integration/defs/accuracy/test_llm_api.py |
        tests/integration/defs/ci_profiler.py |
        tests/integration/defs/common.py |
        tests/integration/defs/conftest.py |
        tests/integration/defs/cpp/conftest.py |
        tests/integration/defs/cpp/cpp_common.py |
        tests/integration/defs/cpp/test_e2e.py |
        tests/integration/defs/cpp/test_multi_gpu.py |
        tests/integration/defs/cpp/test_unit_tests.py |
        tests/integration/defs/deterministic/mixtral_deterministic.py |
        tests/integration/defs/deterministic/test_mixtral_deterministic.py |
        tests/integration/defs/disaggregated/test_auto_scaling.py |
        tests/integration/defs/disaggregated/test_disaggregated_etcd.py |
        tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py |
        tests/integration/defs/disaggregated/test_disaggregated.py |
        tests/integration/defs/disaggregated/test_workers.py |
        tests/integration/defs/examples/run_llm_fp8_quant_llama_70b.py |
        tests/integration/defs/examples/run_llm_quickstart_atexit.py |
        tests/integration/defs/examples/serve/test_serve_negative.py |
        tests/integration/defs/examples/serve/test_serve.py |
        tests/integration/defs/examples/test_ad_guided_decoding.py |
        tests/integration/defs/examples/test_bert.py |
        tests/integration/defs/examples/test_bindings.py |
        tests/integration/defs/examples/test_chatglm.py |
        tests/integration/defs/examples/test_commandr.py |
        tests/integration/defs/examples/test_draft_target_model.py |
        tests/integration/defs/examples/test_eagle.py |
        tests/integration/defs/examples/test_enc_dec.py |
        tests/integration/defs/examples/test_exaone.py |
        tests/integration/defs/examples/test_gemma.py |
        tests/integration/defs/examples/test_gpt.py |
        tests/integration/defs/examples/test_gptj.py |
        tests/integration/defs/examples/test_granite.py |
        tests/integration/defs/examples/test_internlm.py |
        tests/integration/defs/examples/test_llama.py |
        tests/integration/defs/examples/test_llm_api_with_mpi.py |
        tests/integration/defs/examples/test_mamba.py |
        tests/integration/defs/examples/test_medusa.py |
        tests/integration/defs/examples/test_mistral.py |
        tests/integration/defs/examples/test_mixtral.py |
        tests/integration/defs/examples/test_multimodal.py |
        tests/integration/defs/examples/test_nemotron_nas.py |
        tests/integration/defs/examples/test_nemotron.py |
        tests/integration/defs/examples/test_ngram.py |
        tests/integration/defs/examples/test_openai.py |
        tests/integration/defs/examples/test_phi.py |
        tests/integration/defs/examples/test_qwen.py |
        tests/integration/defs/examples/test_qwen2audio.py |
        tests/integration/defs/examples/test_qwenvl.py |
        tests/integration/defs/examples/test_ray.py |
        tests/integration/defs/examples/test_recurrentgemma.py |
        tests/integration/defs/examples/test_redrafter.py |
        tests/integration/defs/examples/test_whisper.py |
        tests/integration/defs/llmapi/__init__.py |
        tests/integration/defs/llmapi/_run_llmapi_llm.py |
        tests/integration/defs/llmapi/test_llm_api_connector.py |
        tests/integration/defs/llmapi/test_llm_api_qa.py |
        tests/integration/defs/llmapi/test_llm_e2e.py |
        tests/integration/defs/llmapi/test_llm_examples.py |
        tests/integration/defs/local_venv.py |
        tests/integration/defs/perf/__init__.py |
        tests/integration/defs/perf/allowed_configs.py |
        tests/integration/defs/perf/build.py |
        tests/integration/defs/perf/create_perf_comparison_report.py |
        tests/integration/defs/perf/data_export.py |
        tests/integration/defs/perf/data.py |
        tests/integration/defs/perf/diff_tools.py |
        tests/integration/defs/perf/gpu_clock_lock.py |
        tests/integration/defs/perf/misc.py |
        tests/integration/defs/perf/open_search_db_utils.py |
        tests/integration/defs/perf/pytorch_model_config.py |
        tests/integration/defs/perf/sample_options_config.py |
        tests/integration/defs/perf/sampler_options_config.py |
        tests/integration/defs/perf/sanity_perf_check.py |
        tests/integration/defs/perf/session_data_writer.py |
        tests/integration/defs/perf/test_perf.py |
        tests/integration/defs/perf/utils.py |
        tests/integration/defs/runner_interface.py |
        tests/integration/defs/stress_test/stress_test.py |
        tests/integration/defs/sysinfo/get_sysinfo.py |
        tests/integration/defs/test_e2e.py |
        tests/integration/defs/test_fmha.py |
        tests/integration/defs/test_list_parser.py |
        tests/integration/defs/test_list_validation.py |
        tests/integration/defs/test_mlpf_results.py |
        tests/integration/defs/test_sanity.py |
        tests/integration/defs/test_unittests.py |
        tests/integration/defs/triton_server/__init__.py |
        tests/integration/defs/triton_server/build_engines.py |
        tests/integration/defs/triton_server/common.py |
        tests/integration/defs/triton_server/conftest.py |
        tests/integration/defs/triton_server/local_venv.py |
        tests/integration/defs/triton_server/rcca/bug_4323566/inflight_batcher_llm_client_with_end_id.py |
        tests/integration/defs/triton_server/runner_interface.py |
        tests/integration/defs/triton_server/test_list_parser.py |
        tests/integration/defs/triton_server/test_triton_llm.py |
        tests/integration/defs/triton_server/test_triton_memleak.py |
        tests/integration/defs/triton_server/test_triton_multi_node.py |
        tests/integration/defs/triton_server/test_triton_rcca.py |
        tests/integration/defs/triton_server/test_triton.py |
        tests/integration/defs/triton_server/trt_test_alternative.py |
        tests/integration/defs/trt_test_alternative.py |
        tests/integration/defs/utils/__init__.py |
        tests/integration/defs/utils/periodic_junit.py |
        tests/integration/defs/utils/timeout_manager.py |
        tests/microbenchmarks/all_reduce.py |
        tests/microbenchmarks/build_time_benchmark.py |
        tests/microbenchmarks/build_time_dashboard.py |
        tests/scripts/allreduce_perf/allreduce_heuristic_code_gen.py |
        tests/scripts/allreduce_perf/allreduce_perf_viz.py |
        tests/scripts/iteration_log_parser.py |
        tests/scripts/perf-sanity/parse_benchmark_results.py |
        tests/scripts/perf-sanity/run_benchmark_serve.py |
        tests/unittest/_torch/attention/sparse/test_dsa_indexer.py |
        tests/unittest/_torch/attention/sparse/test_flash_mla.py |
        tests/unittest/_torch/attention/sparse/test_rocketkv.py |
        tests/unittest/_torch/attention/sparse/test_sparse_mla_forward.py |
        tests/unittest/_torch/attention/test_attention_mla.py |
        tests/unittest/_torch/attention/test_attention_no_cache.py |
        tests/unittest/_torch/attention/test_attention.py |
        tests/unittest/_torch/attention/test_flashinfer_attention.py |
        tests/unittest/_torch/attention/test_flashinfer_star_attn.py |
        tests/unittest/_torch/attention/test_vanilla_attention.py |
        tests/unittest/_torch/compilation/test_add_norm.py |
        tests/unittest/_torch/debugger/test_debugger_addon.py |
        tests/unittest/_torch/executor/test_chunked_logits.py |
        tests/unittest/_torch/executor/test_executor_request_queue.py |
        tests/unittest/_torch/executor/test_overlap_scheduler.py |
        tests/unittest/_torch/executor/test_pytorch_model_engine.py |
        tests/unittest/_torch/executor/test_resource_manager.py |
        tests/unittest/_torch/executor/test_router_dealer_ipc.py |
        tests/unittest/_torch/helpers.py |
        tests/unittest/_torch/misc/test_autotuner.py |
        tests/unittest/_torch/misc/test_share_tensor.py |
        tests/unittest/_torch/misc/test_virtual_memory.py |
        tests/unittest/_torch/modeling/test_modeling_bert.py |
        tests/unittest/_torch/modeling/test_modeling_clip.py |
        tests/unittest/_torch/modeling/test_modeling_exaone4.py |
        tests/unittest/_torch/modeling/test_modeling_gemma3.py |
        tests/unittest/_torch/modeling/test_modeling_gpt_oss.py |
        tests/unittest/_torch/modeling/test_modeling_llama_min_latency.py |
        tests/unittest/_torch/modeling/test_modeling_llama.py |
        tests/unittest/_torch/modeling/test_modeling_mixtral.py |
        tests/unittest/_torch/modeling/test_modeling_mllama.py |
        tests/unittest/_torch/modeling/test_modeling_nemotron_h.py |
        tests/unittest/_torch/modeling/test_modeling_nemotron_nas.py |
        tests/unittest/_torch/modeling/test_modeling_nemotron.py |
        tests/unittest/_torch/modeling/test_modeling_out_of_tree.py |
        tests/unittest/_torch/modeling/test_modeling_phi3.py |
        tests/unittest/_torch/modeling/test_modeling_qwen_moe.py |
        tests/unittest/_torch/modeling/test_modeling_qwen.py |
        tests/unittest/_torch/modeling/test_modeling_qwen2_5vl.py |
        tests/unittest/_torch/modeling/test_modeling_siglip.py |
        tests/unittest/_torch/modeling/test_modeling_vila.py |
        tests/unittest/_torch/modules/test_fused_moe.py |
        tests/unittest/_torch/modules/test_group_rmn_norm.py |
        tests/unittest/_torch/modules/test_moe_host_sharer.py |
        tests/unittest/_torch/modules/test_moe_load_balancer.py |
        tests/unittest/_torch/modules/test_moe_routing.py |
        tests/unittest/_torch/modules/test_rotary_embedding.py |
        tests/unittest/_torch/modules/test_triton_linear.py |
        tests/unittest/_torch/modules/tests_lora_modules/test_lora_attention_pytorch_flow_vs_trt.py |
        tests/unittest/_torch/modules/tests_lora_modules/test_lora_plugin_vs_lora_op.py |
        tests/unittest/_torch/multi_gpu_modeling/test_deepseek.py |
        tests/unittest/_torch/multi_gpu_modeling/test_llama3.py |
        tests/unittest/_torch/multi_gpu/test_allreduce.py |
        tests/unittest/_torch/multi_gpu/test_alltoall.py |
        tests/unittest/_torch/multi_gpu/test_ar_residual_norm.py |