|
| 1 | +```mermaid |
| 2 | +graph TB |
| 3 | + subgraph "User API & CLI Tools" |
| 4 | + CLI[CLI Tools] |
| 5 | + LLMAPI[LLM API] |
| 6 | + CLI --> LLMAPI |
| 7 | + end |
| 8 | +
|
| 9 | + subgraph "Model Checkpoint" |
| 10 | + Checkpoint[Huggingface Models] |
| 11 | + Checkpoint --> CLI |
| 12 | + Checkpoint --> LLMAPI |
| 13 | + end |
| 14 | +
|
| 15 | + subgraph "TensorRT_Flow" |
| 16 | + trtllmExecutor[trtllm.Executor] |
| 17 | + Engine[TensorRT Engine] |
| 18 | + TRTGraph[TensorRT Graph] |
| 19 | + Plugins[TensorRT Plugins] |
| 20 | + cudaKernel[CUDA Kernel] |
| 21 | + Executor[Executor] |
| 22 | + LLMAPI --> trtllmExecutor |
| 23 | + trtllmExecutor --> |build|Engine |
| 24 | + trtllmExecutor --> |compile|TRTGraph |
| 25 | + trtllmExecutor --> |compile|Plugins |
| 26 | + Engine --> Executor |
| 27 | + Plugins --> Executor |
| 28 | + TRTGraph --> Executor |
| 29 | + Plugins --> cudaKernel |
| 30 | + end |
| 31 | +
|
| 32 | + subgraph "PyTorch_Flow" |
| 33 | + PyExecutor[PyExecutor] |
| 34 | + PyEngine[PyTorch Engine] |
| 35 | + CustomOps[Custom Ops] |
| 36 | + PyTorchOps[Pytorch Ops] |
| 37 | + KernelLibs[Kernel Libs] |
| 38 | + PyScheduler[Scheduler] |
| 39 | + PyDecoder[Decoder] |
| 40 | + CUDAKernel[CUDA Kernel] |
| 41 | + LLMAPI --> PyExecutor |
| 42 | + PyExecutor --> PyEngine[PyTorch Engine] |
| 43 | + PyEngine --> CustomOps |
| 44 | + PyEngine --> PyTorchOps |
| 45 | + PyEngine --> KernelLibs |
| 46 | + PyEngine --> PyScheduler |
| 47 | + PyEngine --> PyDecoder |
| 48 | + KernelLibs --> CUDAKernel |
| 49 | + CustomOps --> CUDAKernel |
| 50 | + end |
| 51 | +
|
| 52 | + subgraph "Shared_Component" |
| 53 | + Shared_Decoder[Decoder] |
| 54 | + Shared_Scheduler[Scheduler] |
| 55 | + Sampling[Sampling] |
| 56 | + BatchManager[Batch Manager] |
| 57 | + KVCache[KV Cache Manager] |
| 58 | + PyScheduler --> |Pybind|Shared_Scheduler |
| 59 | + PyDecoder --> |Pybind|Shared_Decoder |
| 60 | + Executor --> Shared_Decoder |
| 61 | + Shared_Decoder --> Sampling |
| 62 | + Executor --> Shared_Scheduler[Scheduler] |
| 63 | + Shared_Scheduler --> |In-flight Batching| BatchManager |
| 64 | + BatchManager --> KVCache |
| 65 | + end |
| 66 | +
|
| 67 | + subgraph "Output_Results" |
| 68 | + Tokens[Generated Tokens] |
| 69 | + Stats[Performance Stats] |
| 70 | + Metrics[Accuracy Metrics] |
| 71 | + end |
| 72 | +
|
| 73 | + %% PyTorch_Flow ~~~ TensorRT_Flow |
| 74 | +
|
| 75 | + TensorRT_Flow --> Output_Results |
| 76 | + PyTorch_Flow --> Output_Results |
| 77 | +
|
| 78 | + %% Force Output_Results to be between PyTorch_flow and TensorRT_flow |
| 79 | + PyTorch_Flow ~~~ Output_Results |
| 80 | +
|
| 81 | + %% Model checkpoint format |
| 82 | + classDef checkpoint fill:#ff1,stroke:#333,stroke-width:2px; |
| 83 | + class Checkpoint checkpoint; |
| 84 | +
|
| 85 | + %% CLI tools format |
| 86 | + classDef cli fill:#f9f,stroke:#333,stroke-width:2px; |
| 87 | + class CLI cli; |
| 88 | + |
| 89 | + %% TRT flow format |
| 90 | + classDef trt fill:#bbf,stroke:#333,stroke-width:2px; |
| 91 | + class trtllmExecutor,TRTGraph,Plugins,Engine,Executor,cudaKernel trt; |
| 92 | +
|
| 93 | + %% PyTorch flow format |
| 94 | + classDef pytorch fill:#8bf,stroke:#333,stroke-width:2px; |
| 95 | + class PyExecutor,PyEngine,CustomOps,PyTorchOps,KernelLibs,PyScheduler,PyDecoder,CUDAKernel pytorch; |
| 96 | +
|
| 97 | + %% Shared Componnet format |
| 98 | + classDef component fill:#fc8,stroke:#333,stroke-width:2px; |
| 99 | + class Shared_Decoder,Sampling,Shared_Scheduler,BatchManager,KVCache component; |
| 100 | + |
| 101 | + %% APIs format |
| 102 | + classDef api fill:#bfb,stroke:#333,stroke-width:2px; |
| 103 | + class PythonAPI,CppAPI,LLMAPI api; |
| 104 | +
|
| 105 | + %% Results format |
| 106 | + classDef result fill:#fbb,stroke:#333,stroke-width:2px; |
| 107 | + class Tokens,Stats,Metrics result; |
| 108 | +``` |
0 commit comments