[CIR][CUDA] CallConvLowering for basic types in NVPTX (#1468)

AdUhTkJm · web-flow · commit a846f295ba27 · 2025-03-17T10:36:10.000-07:00
There are some subtleties here. This is the code in OG: ```cpp // note: this is different from default ABI if (!RetTy->isScalarType()) return ABIArgInfo::getDirect(); ``` which says we should return structs directly. It's correct, has have the same behaviour as `nvcc`, and it obeys the PTX ABI as well. The comment dates back to 2013 (see [this commit](llvm/llvm-project@f9329ff) -- it didn't provide any explanation either), so I believe it's outdated. I didn't include this comment in the PR.
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.h b/clang/include/clang/CIR/Dialect/IR/CIRTypes.h
@@ -186,6 +186,7 @@ class StructType
 };
 
 bool isAnyFloatingPointType(mlir::Type t);
+bool isScalarType(mlir::Type t);
 bool isFPOrFPVectorTy(mlir::Type);
 bool isIntOrIntVectorTy(mlir::Type);
 } // namespace cir
diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
@@ -841,6 +841,12 @@ bool cir::isAnyFloatingPointType(mlir::Type t) {
              cir::FP80Type>(t);
 }
 
+bool cir::isScalarType(mlir::Type ty) {
+  return isa<cir::IntType, cir::BoolType, cir::SingleType, cir::DoubleType,
+             cir::LongDoubleType, cir::FP16Type, cir::FP128Type, cir::FP80Type,
+             cir::DataMemberType, cir::PointerType>(ty);
+}
+
 //===----------------------------------------------------------------------===//
 // Floating-point and Float-point Vector type helpers
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/NVPTX.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/NVPTX.cpp
@@ -12,7 +12,9 @@
 #include "TargetInfo.h"
 #include "TargetLoweringInfo.h"
 #include "clang/CIR/ABIArgInfo.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
 #include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using ABIArgInfo = cir::ABIArgInfo;
@@ -31,9 +33,10 @@ class NVPTXABIInfo : public ABIInfo {
   NVPTXABIInfo(LowerTypes &lt) : ABIInfo(lt) {}
 
 private:
-  void computeInfo(LowerFunctionInfo &fi) const override {
-    llvm_unreachable("NYI");
-  }
+  ABIArgInfo classifyReturnType(mlir::Type ty) const;
+  ABIArgInfo classifyArgumentType(mlir::Type ty) const;
+
+  void computeInfo(LowerFunctionInfo &fi) const override;
 };
 
 class NVPTXTargetLoweringInfo : public TargetLoweringInfo {
@@ -63,6 +66,48 @@ class NVPTXTargetLoweringInfo : public TargetLoweringInfo {
 
 } // namespace
 
+ABIArgInfo NVPTXABIInfo::classifyReturnType(mlir::Type ty) const {
+  if (llvm::isa<VoidType>(ty))
+    return ABIArgInfo::getIgnore();
+
+  if (getContext().getLangOpts().OpenMP)
+    llvm_unreachable("NYI");
+
+  if (!isScalarType(ty))
+    return ABIArgInfo::getDirect();
+
+  // OG treats enums as their underlying type.
+  // This has already been done for CIR.
+
+  // Integers with size < 32 must be extended to 32 bits.
+  // (See Section 3.3 of PTX ABI.)
+  return (isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty)
+                                            : ABIArgInfo::getDirect());
+}
+
+ABIArgInfo NVPTXABIInfo::classifyArgumentType(mlir::Type ty) const {
+  if (isAggregateTypeForABI(ty))
+    llvm_unreachable("NYI");
+
+  if (auto intType = llvm::dyn_cast<IntType>(ty)) {
+    if (intType.getWidth() > 128)
+      llvm_unreachable("NYI");
+  }
+
+  return (isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty)
+                                            : ABIArgInfo::getDirect());
+}
+
+void NVPTXABIInfo::computeInfo(LowerFunctionInfo &fi) const {
+  if (!getCXXABI().classifyReturnType(fi))
+    fi.getReturnInfo() = classifyReturnType(fi.getReturnType());
+
+  for (auto &&[count, argument] : llvm::enumerate(fi.arguments()))
+    argument.info = count < fi.getNumRequiredArgs()
+                        ? classifyArgumentType(argument.type)
+                        : ABIArgInfo::getDirect();
+}
+
 std::unique_ptr<TargetLoweringInfo>
 createNVPTXTargetLoweringInfo(LowerModule &lowerModule) {
   return std::make_unique<NVPTXTargetLoweringInfo>(lowerModule.getTypes());
diff --git a/clang/test/CIR/CallConvLowering/NVPTX/basic.cpp b/clang/test/CIR/CallConvLowering/NVPTX/basic.cpp
@@ -0,0 +1,90 @@
+// RUN: %clang_cc1 -std=c++20 -triple nvptx-nvidia-cuda -fclangir \
+// RUN:            -fclangir-call-conv-lowering -emit-cir-flat -mmlir \
+// RUN:            --mlir-print-ir-after=cir-call-conv-lowering %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Test call conv lowering for trivial cases. //
+
+// CHECK: @_Z4Voidv()
+void Void(void) {
+  // CHECK:   cir.call @_Z4Voidv() : () -> ()
+  Void();
+}
+
+// CHECK: @_Z4Boolb(%arg0: !cir.bool {cir.zeroext} loc({{.+}})) -> (!cir.bool {cir.zeroext})
+bool Bool(bool a) {
+  // CHECK:   cir.call @_Z4Boolb({{.+}}) : (!cir.bool) -> !cir.bool
+  return Bool(a);
+}
+
+// CHECK: cir.func @_Z5UCharh(%arg0: !u8i {cir.zeroext} loc({{.+}})) -> (!u8i {cir.zeroext})
+unsigned char UChar(unsigned char c) {
+  // CHECK: cir.call @_Z5UCharh(%{{.+}}) : (!u8i) -> !u8i
+  return UChar(c);
+}
+
+// CHECK: cir.func @_Z6UShortt(%arg0: !u16i {cir.zeroext} loc({{.+}})) -> (!u16i {cir.zeroext})
+unsigned short UShort(unsigned short s) {
+  // CHECK: cir.call @_Z6UShortt(%{{.+}}) : (!u16i) -> !u16i
+  return UShort(s);
+}
+
+// CHECK: cir.func @_Z4UIntj(%arg0: !u32i loc({{.+}})) -> !u32i
+unsigned int UInt(unsigned int i) {
+  // CHECK: cir.call @_Z4UIntj(%{{.+}}) : (!u32i) -> !u32i
+  return UInt(i);
+}
+
+// CHECK: cir.func @_Z5ULongm(%arg0: !u32i loc({{.+}})) -> !u32i
+unsigned long ULong(unsigned long l) {
+  // CHECK: cir.call @_Z5ULongm(%{{.+}}) : (!u32i) -> !u32i
+  return ULong(l);
+}
+
+// CHECK: cir.func @_Z9ULongLongy(%arg0: !u64i loc({{.+}})) -> !u64i
+unsigned long long ULongLong(unsigned long long l) {
+  // CHECK: cir.call @_Z9ULongLongy(%{{.+}}) : (!u64i) -> !u64i
+  return ULongLong(l);
+}
+
+// CHECK: cir.func @_Z4Chara(%arg0: !s8i {cir.signext} loc({{.+}})) -> (!s8i {cir.signext})
+char Char(signed char c) {
+  // CHECK: cir.call @_Z4Chara(%{{.+}}) : (!s8i) -> !s8i
+  return Char(c);
+}
+
+// CHECK: cir.func @_Z5Shorts(%arg0: !s16i {cir.signext} loc({{.+}})) -> (!s16i {cir.signext})
+short Short(short s) {
+  // CHECK: cir.call @_Z5Shorts(%{{.+}}) : (!s16i) -> !s16i
+  return Short(s);
+}
+
+// CHECK: cir.func @_Z3Inti(%arg0: !s32i loc({{.+}})) -> !s32i
+int Int(int i) {
+  // CHECK: cir.call @_Z3Inti(%{{.+}}) : (!s32i) -> !s32i
+  return Int(i);
+}
+
+// CHECK: cir.func @_Z4Longl(%arg0: !s32i loc({{.+}})) -> !s32i
+long Long(long l) {
+  // CHECK: cir.call @_Z4Longl(%{{.+}}) : (!s32i) -> !s32i
+  return Long(l);
+}
+
+// CHECK: cir.func @_Z8LongLongx(%arg0: !s64i loc({{.+}})) -> !s64i
+long long LongLong(long long l) {
+  // CHECK: cir.call @_Z8LongLongx(%{{.+}}) : (!s64i) -> !s64i
+  return LongLong(l);
+}
+
+
+// Check for structs.
+
+struct Struct {
+  int a, b, c, d, e;
+};
+
+// CHECK: cir.func @_Z10StructFuncv() -> !ty_Struct
+Struct StructFunc() {
+  return { 0, 1, 2, 3, 4 };
+}