Skip to content

Commit 79be434

Browse files
committed
Fix pre-commit
Signed-off-by: Jiayu Chang <jiayuc@nvidia.com>
1 parent a9fbc2c commit 79be434

File tree

5 files changed

+11
-12
lines changed

5 files changed

+11
-12
lines changed

cpp/tensorrt_llm/kernels/cuda_graph_grouped_gemm.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,4 +368,5 @@ void cudaGraphSplitKGroupedGemm(cutlass::gemm::GemmCoord* problemSizesPtr, int p
368368
}
369369

370370
} // namespace kernels
371+
371372
TRTLLM_NAMESPACE_END

cpp/tensorrt_llm/kernels/cuda_graph_grouped_gemm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,5 @@ void cudaGraphSplitKGroupedGemm(cutlass::gemm::GemmCoord* problemSizesPtr, int p
5959
cutlass::gemm::GemmCoord* hostMaxProblemSizesPtr, int64_t* splitKOffsetsGpu, cudaStream_t stream);
6060

6161
} // namespace kernels
62+
6263
TRTLLM_NAMESPACE_END

cpp/tensorrt_llm/kernels/lora/loraGroupGEMMParamFillRowReorderFusion.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -307,8 +307,8 @@ void launchKernelWithModuleCount(int32_t* in_sizes, int32_t* out_sizes, int64_t*
307307
int const itemsPerRow = ITEMS_PER_BLOCK * gridDimX;
308308
gridDimY = std::max(gridDimY,
309309
common::ceilDiv(static_cast<int>(module_count * batch_size * max_lora_rank * dtype_element_size), itemsPerRow));
310-
gridDimY = std::max(
311-
gridDimY, common::ceilDiv(static_cast<int>(batch_size * sum_output_hidden_size * dtype_element_size), itemsPerRow));
310+
gridDimY = std::max(gridDimY,
311+
common::ceilDiv(static_cast<int>(batch_size * sum_output_hidden_size * dtype_element_size), itemsPerRow));
312312

313313
dim3 grid(gridDimX, gridDimY);
314314
dim3 block(BlockDim);

cpp/tensorrt_llm/thop/loraOp.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,5 +419,6 @@ TORCH_LIBRARY_IMPL(trtllm, CUDA, m)
419419
{
420420
m.impl("lora_grouped_gemm", &tensorrt_llm::torch_ext::lora_grouped_gemm);
421421
m.impl("lora_grouped_gemm_cuda_graph", &tensorrt_llm::torch_ext::lora_grouped_gemm_cuda_graph);
422-
m.impl("lora_group_gemm_param_fill_row_reorder_fusion", &tensorrt_llm::torch_ext::lora_group_gemm_param_fill_row_reorder_fusion);
422+
m.impl("lora_group_gemm_param_fill_row_reorder_fusion",
423+
&tensorrt_llm::torch_ext::lora_group_gemm_param_fill_row_reorder_fusion);
423424
}

tensorrt_llm/_torch/pyexecutor/model_engine.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3288,15 +3288,11 @@ def _prepare_inputs(
32883288
raise NotImplementedError(
32893289
f"Unsupported cp_type {getattr(cp_type, 'name', cp_type)}.")
32903290

3291-
return self._prepare_tp_inputs(scheduled_requests,
3292-
kv_cache_manager,
3293-
attn_metadata,
3294-
spec_metadata,
3295-
new_tensors_device,
3296-
cache_indirection_buffer,
3297-
num_accepted_tokens_device,
3298-
req_id_to_old_request, resource_manager,
3299-
maybe_graph)
3291+
return self._prepare_tp_inputs(
3292+
scheduled_requests, kv_cache_manager, attn_metadata, spec_metadata,
3293+
new_tensors_device, cache_indirection_buffer,
3294+
num_accepted_tokens_device, req_id_to_old_request, resource_manager,
3295+
maybe_graph)
33003296

33013297
@torch.inference_mode()
33023298
@with_model_extra_attrs(lambda self: self.model.extra_attrs)

0 commit comments

Comments
 (0)