[ATen][CUDA] Add sm_121a flag for RowwiseScaledMM (pytorch#167734)

Aidyn-A · pytorchmergebot · commit 226850cc6621 · 2025-11-14T08:44:04.000Z
This PR add a sm_121a flag for row-wise scaled matmuls on DGX Spark. Pull Request resolved: pytorch#167734 Approved by: https://github.com/eqy, https://github.com/cyyever
diff --git a/cmake/Codegen.cmake b/cmake/Codegen.cmake
@@ -118,6 +118,11 @@ if(INTERN_BUILD_ATEN_OPS)
             list(APPEND _file_compile_flags "-gencode;arch=compute_120a,code=sm_120a")
           endif()
         endif()
+        if("${_arch}" STREQUAL "121a")
+          if(_existing_arch_flags MATCHES ".*compute_120.*")
+            list(APPEND _file_compile_flags "-gencode;arch=compute_121a,code=sm_121a")
+          endif()
+        endif()
       endforeach()
       list(JOIN _file_compile_flags " " _file_compile_flags)
 
@@ -126,7 +131,7 @@ if(INTERN_BUILD_ATEN_OPS)
 
     _BUILD_FOR_ADDITIONAL_ARCHS(
       "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/cuda/RowwiseScaledMM.cu"
-      "89;90a;100a;103a;120a")
+      "89;90a;100a;103a;120a;121a")
     _BUILD_FOR_ADDITIONAL_ARCHS(
       "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/cuda/ScaledGroupMM.cu"
       "90a")