Formatting fixes

amitz-nv · amitz-nv · commit e63e17d13de3 · 2026-01-28T11:58:22.000Z
Signed-off-by: amitz-nv &lt;203509407+amitz-nv@users.noreply.github.com&gt;
diff --git a/csrc/trtllm_fused_moe_kernel_launcher.cu b/csrc/trtllm_fused_moe_kernel_launcher.cu
@@ -305,10 +305,9 @@ class FusedMoeLauncher {
                                                 (int32_t)tile_tokens_dim, this->use_shuffled_weight,
                                                 this->weight_layout);
     } else {
-      moe_runner = std::make_unique<RunnerType>(this->mDtypeAct, this->mDtypeWeights,
-                                                args->mUseDeepSeekFp8, (int32_t)tile_tokens_dim,
-                                                this->activation_type,
-                                                this->use_shuffled_weight, this->weight_layout);
+      moe_runner = std::make_unique<RunnerType>(
+          this->mDtypeAct, this->mDtypeWeights, args->mUseDeepSeekFp8, (int32_t)tile_tokens_dim,
+          this->activation_type, this->use_shuffled_weight, this->weight_layout);
     }
 
     if (moe_tactic == -1) {
@@ -417,7 +416,8 @@ class Bf16MoeLauncher : public FusedMoeLauncher {
   void init(std::unique_ptr<tensorrt_llm::kernels::trtllmgen_moe::MoE::MoERunnerArgs>&& args,
             int64_t tile_tokens_dim, int64_t routing_method_type, bool use_shuffled_weight,
             int64_t weight_layout) {
-    constexpr ActivationType activation_type = ActivationType::Swiglu;  // not exposed in api for now
+    constexpr ActivationType activation_type =
+        ActivationType::Swiglu;  // not exposed in api for now
 
     // Do base class init and perform common checks
     FusedMoeLauncher::init_common(std::move(args), tile_tokens_dim, routing_method_type,
@@ -532,8 +532,8 @@ class Fp8PerTensorLauncher : public FusedMoeLauncher {
 
   void init(std::unique_ptr<tensorrt_llm::kernels::trtllmgen_moe::MoE::MoERunnerArgs>&& args,
             int64_t tile_tokens_dim, int64_t routing_method_type, bool use_shuffled_weight,
-            int64_t weight_layout, bool use_routing_scales_on_input_param, ActivationType activation_type) {
-
+            int64_t weight_layout, bool use_routing_scales_on_input_param,
+            ActivationType activation_type) {
     this->use_routing_scales_on_input = use_routing_scales_on_input_param;
 
     auto dtype = hidden_states.dtype();
@@ -968,8 +968,7 @@ class MxInt4BlockScaleLauncher : public FusedMoeLauncher {
     FusedMoeLauncher::init_common(
         std::move(args), tile_tokens_dim, routing_method_type,
         /*use_shuffled_weight=*/true,
-        static_cast<int64_t>(batchedGemm::gemm::MatrixLayout::BlockMajorK),
-        ActivationType::Swiglu);
+        static_cast<int64_t>(batchedGemm::gemm::MatrixLayout::BlockMajorK), ActivationType::Swiglu);
   }
 
   void check_routing() const override { FusedMoeLauncher::check_routing_common(); }
@@ -1763,7 +1762,8 @@ Array<Tensor> trtllm_fp4_block_scale_moe(
         gemm2_weights_scale, gemm2_bias, output1_scales_scalar, output1_scales_gate_scalar,
         output2_scales_scalar, topk_ids, expert_weights);
     launcher->init(std::move(args), curr_tile_N, routing_method_type, /*use_shuffled_weight=*/true,
-                   /*weight_layout=*/0, static_cast<ActivationType>(act_type), mDtypeAct, mDtypeWeights);
+                   /*weight_layout=*/0, static_cast<ActivationType>(act_type), mDtypeAct,
+                   mDtypeWeights);
 
     launchers_map[curr_tile_N] = std::move(launcher);
   }
diff --git a/csrc/trtllm_fused_moe_runner.cu b/csrc/trtllm_fused_moe_runner.cu
@@ -201,7 +201,8 @@ static inline ActType activationTypeToGatedActType(ActivationType actType) {
       return ActType::GeGlu;
     default:
       FLASHINFER_CHECK(false, "Unsupported gated activation type ",
-                       serializeActivationType(actType), " of enum ", static_cast<int64_t>(actType));
+                       serializeActivationType(actType), " of enum ",
+                       static_cast<int64_t>(actType));
   }
   return ActType::SwiGlu;
 }
@@ -214,7 +215,8 @@ static inline EltwiseActType activationTypeToEltwiseActType(ActivationType actTy
       return EltwiseActType::None;
     default:
       FLASHINFER_CHECK(false, "Unsupported eltwise activation type ",
-                       serializeActivationType(actType), " of enum ", static_cast<int64_t>(actType));
+                       serializeActivationType(actType), " of enum ",
+                       static_cast<int64_t>(actType));
   }
   return EltwiseActType::None;
 }
@@ -224,8 +226,9 @@ tensorrt_llm::kernels::TrtllmGenBatchedGemmRunnerOptions getOptions(
     ActivationType activationType, bool useShuffledMatrix,
     batchedGemm::gemm::MatrixLayout weightLayout) {
   int64_t actTypeInt = static_cast<int64_t>(activationType);
-  FLASHINFER_CHECK(0 <= actTypeInt && actTypeInt < static_cast<int64_t>(ActivationType::InvalidType),
-                   "Unknown activation type", serializeActivationType(activationType), "of enum", actTypeInt);
+  FLASHINFER_CHECK(
+      0 <= actTypeInt && actTypeInt < static_cast<int64_t>(ActivationType::InvalidType),
+      "Unknown activation type", serializeActivationType(activationType), "of enum", actTypeInt);
   bool isGatedAct = isGatedActivation(activationType);
   if (isGatedAct) {
     ActType actType = activationTypeToGatedActType(activationType);
@@ -289,12 +292,13 @@ void Runner::run(void* hiddenState, void* hiddenStateScale, void* weights, void*
   auto maxNumCtasInBatchDim =
       Routing::getMaxNumCtasInBatchDim(numTokens, topK, numExperts, mTileTokensDim);
   int32_t intermediateSizeFactor = (isGatedActivation(mActType) ? 2 : 1);
-  mRunner.run(numTokens, intermediateSizeFactor * intermediateSize, hiddenSize, {}, numTokens, numExperts,
-              maxNumCtasInBatchDim, hiddenState, hiddenStateScale, weights, weightsScale,
-              expertWeights, /* perTokensSfB */ nullptr, outputScalesScalar, outputScalesGateScalar,
-              ptrBias, ptrAlpha, ptrBeta, ptrClampLimit, output, outputScale, permutedIdxToTokenIdx,
-              ptrTotalNumPaddedTokens, ptrCtaIdxXyToBatchIdx, ptrCtaIdxXyToMnLimit,
-              ptrNumNonExitingCtas, bmm1Workspace, stream, device, configIndex, enable_pdl);
+  mRunner.run(numTokens, intermediateSizeFactor * intermediateSize, hiddenSize, {}, numTokens,
+              numExperts, maxNumCtasInBatchDim, hiddenState, hiddenStateScale, weights,
+              weightsScale, expertWeights, /* perTokensSfB */ nullptr, outputScalesScalar,
+              outputScalesGateScalar, ptrBias, ptrAlpha, ptrBeta, ptrClampLimit, output,
+              outputScale, permutedIdxToTokenIdx, ptrTotalNumPaddedTokens, ptrCtaIdxXyToBatchIdx,
+              ptrCtaIdxXyToMnLimit, ptrNumNonExitingCtas, bmm1Workspace, stream, device,
+              configIndex, enable_pdl);
 }
 
 size_t Runner::getWorkspaceSizeInBytes(int32_t topK, int32_t hiddenSize, int32_t intermediateSize,
@@ -477,8 +481,7 @@ void Runner::setOpsData(MoERunnerArgs const& args, MoEWorkspace const& workspace
   activationData.inDqSfsPtr = workspace.gemm1_output_scale;
   activationData.outDqSfsPtr = workspace.activation_output_scale;
   activationData.innerDim =
-      args.intermediate_size *
-      (isGatedActivation(args.activation_type) ? 2 : 1);
+      args.intermediate_size * (isGatedActivation(args.activation_type) ? 2 : 1);
   activationData.topK = args.top_k;
   activationData.numTokens = args.num_tokens;
   activationData.expandedIdxToPermutedIdx = workspace.expanded_idx_to_permuted_idx;