[BE][MPS] Don't pass nnz to mark_segments (pytorch#170403)

malfet · pytorchmergebot · commit ed2e92b4b6c9 · 2025-12-16T07:52:37.000Z
Fixes following unused variable warning ``` /Users/malfet/git/pytorch/pytorch/aten/src/ATen/native/sparse/mps/kernels/SparseTensorMath.metal:288:27: warning: unused parameter 'nnz' [-Wunused-parameter] constant uint& nnz [[buffer(2)]], ``` Also, use short circuit language rule to make kernel more compact Pull Request resolved: pytorch#170403 Approved by: https://github.com/Skylion007
diff --git a/aten/src/ATen/native/sparse/mps/SparseMPSTensorMath.mm b/aten/src/ATen/native/sparse/mps/SparseMPSTensorMath.mm
@@ -1409,7 +1409,7 @@ static Tensor softmax_sparse_mps_impl(
         auto pso = lib.getPipelineStateForFunc("mark_segments");
         auto enc = stream->commandEncoder();
         [enc setComputePipelineState:pso];
-        mtl_setArgs(enc, sorted_pool_indices, mask, nnz_u);
+        mtl_setArgs(enc, sorted_pool_indices, mask);
 
         auto gridSize = MTLSizeMake(nnz, 1, 1);
         auto threadGroupSize = MTLSizeMake(std::min<uint64_t>(nnz, pso.maxTotalThreadsPerThreadgroup), 1, 1);
@@ -1522,7 +1522,7 @@ static Tensor softmax_backward_sparse_mps_impl(
         auto pso = lib.getPipelineStateForFunc("mark_segments");
         auto enc = stream->commandEncoder();
         [enc setComputePipelineState:pso];
-        mtl_setArgs(enc, sorted_pool_indices, mask, nnz_u);
+        mtl_setArgs(enc, sorted_pool_indices, mask);
         auto gridSize = MTLSizeMake(nnz, 1, 1);
         auto threadGroupSize = MTLSizeMake(std::min<uint64_t>(nnz, pso.maxTotalThreadsPerThreadgroup), 1, 1);
         [enc dispatchThreads:gridSize threadsPerThreadgroup:threadGroupSize];
@@ -1592,4 +1592,4 @@ Tensor log_softmax_backward_sparse_mps(const Tensor& grad, const Tensor& output,
 
 REGISTER_MPS_DISPATCH(sparse_mask_intersection_out_stub, &sparse_mask_intersection_out_mps_kernel);
 REGISTER_MPS_DISPATCH(sparse_mask_projection_out_stub, &sparse_mask_projection_out_mps_kernel);
-} // namespace at::native
+} // namespace at::native
diff --git a/aten/src/ATen/native/sparse/mps/kernels/SparseTensorMath.metal b/aten/src/ATen/native/sparse/mps/kernels/SparseTensorMath.metal
@@ -285,14 +285,9 @@ kernel void spmm_addmm_coo(
 kernel void mark_segments(
     device const int64_t* indices [[buffer(0)]],
     device int*           mask    [[buffer(1)]],
-    constant uint&        nnz     [[buffer(2)]],
     uint                  tid     [[thread_position_in_grid]])
 {
-    if (tid == 0) {
-        mask[0] = 1;
-    } else {
-        mask[tid] = (indices[tid] != indices[tid - 1]) ? 1 : 0;
-    }
+    mask[tid] = (tid == 0 || indices[tid] != indices[tid - 1]) ? 1 : 0;
 }
 
 kernel void compute_offsets_and_counts(