Skip to content

Commit 647ac77

Browse files
authored
Merge branch 'main' into main
2 parents 44428e1 + 49d89bc commit 647ac77

25 files changed

+600
-56
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,8 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f16, "sV2h", "nc", "gfx1250-insts")
707707
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f16, "sV2h", "nc", "gfx1250-insts")
708708
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f16, "ihiUiIi", "nc", "gfx1250-insts")
709709
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f16, "ihiUiIi", "nc", "gfx1250-insts")
710+
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32_e5m3, "iffiIb", "nc", "fp8e5m3-insts")
711+
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32_e5m3, "ifiiIi", "nc", "fp8e5m3-insts")
710712
TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_i4_i8, "UsUi", "nc", "gfx1250-insts")
711713
TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_u4_u8, "UsUi", "nc", "gfx1250-insts")
712714

clang/lib/CIR/CodeGen/CIRGenCall.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ struct CallArg {
137137

138138
/// A data-flow flag to make sure getRValue and/or copyInto are not
139139
/// called twice for duplicated IR emission.
140-
mutable bool isUsed;
140+
[[maybe_unused]] mutable bool isUsed;
141141

142142
public:
143143
clang::QualType ty;

clang/lib/CIR/CodeGen/CIRGenExpr.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1481,9 +1481,10 @@ Address CIRGenFunction::emitArrayToPointerDecay(const Expr *e) {
14811481
if (e->getType()->isVariableArrayType())
14821482
return addr;
14831483

1484-
auto pointeeTy = mlir::cast<cir::ArrayType>(lvalueAddrTy.getPointee());
1484+
[[maybe_unused]] auto pointeeTy =
1485+
mlir::cast<cir::ArrayType>(lvalueAddrTy.getPointee());
14851486

1486-
mlir::Type arrayTy = convertType(e->getType());
1487+
[[maybe_unused]] mlir::Type arrayTy = convertType(e->getType());
14871488
assert(mlir::isa<cir::ArrayType>(arrayTy) && "expected array");
14881489
assert(pointeeTy == arrayTy);
14891490

clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,15 @@ class ComplexExprEmitter : public StmtVisitor<ComplexExprEmitter, mlir::Value> {
156156
};
157157
} // namespace
158158

159+
#ifndef NDEBUG
160+
// Only used in asserts
159161
static const ComplexType *getComplexType(QualType type) {
160162
type = type.getCanonicalType();
161163
if (const ComplexType *comp = dyn_cast<ComplexType>(type))
162164
return comp;
163165
return cast<ComplexType>(cast<AtomicType>(type)->getValueType());
164166
}
167+
#endif // NDEBUG
165168

166169
LValue ComplexExprEmitter::emitBinAssignLValue(const BinaryOperator *e,
167170
mlir::Value &value) {

clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
439439
value = builder.getTrue(cgf.getLoc(e->getExprLoc()));
440440
} else if (type->isIntegerType()) {
441441
QualType promotedType;
442-
bool canPerformLossyDemotionCheck = false;
442+
[[maybe_unused]] bool canPerformLossyDemotionCheck = false;
443443
if (cgf.getContext().isPromotableIntegerType(type)) {
444444
promotedType = cgf.getContext().getPromotedIntegerType(type);
445445
assert(promotedType != type && "Shouldn't promote to the same type.");

clang/lib/CIR/CodeGen/CIRGenFunction.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,7 @@ void CIRGenFunction::emitAndUpdateRetAlloca(QualType type, mlir::Location loc,
216216
void CIRGenFunction::declare(mlir::Value addrVal, const Decl *var, QualType ty,
217217
mlir::Location loc, CharUnits alignment,
218218
bool isParam) {
219-
const auto *namedVar = dyn_cast_or_null<NamedDecl>(var);
220-
assert(namedVar && "Needs a named decl");
219+
assert(isa<NamedDecl>(var) && "Needs a named decl");
221220
assert(!cir::MissingFeatures::cgfSymbolTable());
222221

223222
auto allocaOp = cast<cir::AllocaOp>(addrVal.getDefiningOp());

clang/lib/CIR/CodeGen/CIRGenModule.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -656,8 +656,6 @@ mlir::Value CIRGenModule::getAddrOfGlobalVar(const VarDecl *d, mlir::Type ty,
656656

657657
void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd,
658658
bool isTentative) {
659-
const QualType astTy = vd->getType();
660-
661659
if (getLangOpts().OpenCL || getLangOpts().OpenMPIsTargetDevice) {
662660
errorNYI(vd->getSourceRange(), "emit OpenCL/OpenMP global variable");
663661
return;
@@ -701,7 +699,7 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd,
701699
// never attempt to emit a tentative definition if a real one
702700
// exists. A use may still exists, however, so we still may need
703701
// to do a RAUW.
704-
assert(!astTy->isIncompleteType() && "Unexpected incomplete type");
702+
assert(!vd->getType()->isIncompleteType() && "Unexpected incomplete type");
705703
init = builder.getZeroInitAttr(convertType(vd->getType()));
706704
} else {
707705
emitter.emplace(*this);

clang/lib/CIR/CodeGen/CIRGenStmt.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,15 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s,
7979
#define EXPR(Type, Base) case Stmt::Type##Class:
8080
#include "clang/AST/StmtNodes.inc"
8181
{
82-
// Remember the block we came in on.
83-
mlir::Block *incoming = builder.getInsertionBlock();
84-
assert(incoming && "expression emission must have an insertion point");
82+
assert(builder.getInsertionBlock() &&
83+
"expression emission must have an insertion point");
8584

8685
emitIgnoredExpr(cast<Expr>(s));
8786

88-
mlir::Block *outgoing = builder.getInsertionBlock();
89-
assert(outgoing && "expression emission cleared block!");
87+
// Classic codegen has a check here to see if the emitter created a new
88+
// block that isn't used (comparing the incoming and outgoing insertion
89+
// points) and deletes the outgoing block if it's not used. In CIR, we
90+
// will handle that during the cir.canonicalize pass.
9091
return mlir::success();
9192
}
9293
case Stmt::IfStmtClass:

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,60 @@ void test_prefetch(generic void *fptr, global void *gptr) {
652652
__builtin_amdgcn_global_prefetch(gptr, 8);
653653
}
654654

655+
// CHECK-LABEL: @test_cvt_pk_fp8_f32_e5m3(
656+
// CHECK-NEXT: entry:
657+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
658+
// CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
659+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
660+
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4, addrspace(5)
661+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
662+
// CHECK-NEXT: [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
663+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
664+
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
665+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
666+
// CHECK-NEXT: store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
667+
// CHECK-NEXT: store float [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
668+
// CHECK-NEXT: store float [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
669+
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
670+
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR_ASCAST]], align 4
671+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
672+
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.pk.fp8.f32.e5m3(float [[TMP0]], float [[TMP1]], i32 [[TMP2]], i1 true)
673+
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
674+
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
675+
// CHECK-NEXT: ret void
676+
//
677+
void test_cvt_pk_fp8_f32_e5m3(global int* out, int old, float a, float b)
678+
{
679+
*out = __builtin_amdgcn_cvt_pk_fp8_f32_e5m3(a, b, old, true);
680+
}
681+
682+
// CHECK-LABEL: @test_cvt_sr_fp8_f32_e5m3(
683+
// CHECK-NEXT: entry:
684+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
685+
// CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
686+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
687+
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
688+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
689+
// CHECK-NEXT: [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
690+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
691+
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
692+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
693+
// CHECK-NEXT: store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
694+
// CHECK-NEXT: store float [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
695+
// CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
696+
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
697+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
698+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
699+
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f32.e5m3(float [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 3)
700+
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
701+
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
702+
// CHECK-NEXT: ret void
703+
//
704+
void test_cvt_sr_fp8_f32_e5m3(global int* out, int old, float a, int b)
705+
{
706+
*out = __builtin_amdgcn_cvt_sr_fp8_f32_e5m3(a, b, old, 3);
707+
}
708+
655709
// CHECK-LABEL: @test_cvt_f32_fp8_e5m3(
656710
// CHECK-NEXT: entry:
657711
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3505,6 +3505,12 @@ def int_amdgcn_cvt_pk_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32">,
35053505
[llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty],
35063506
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
35073507

3508+
// llvm.amdgcn.cvt.pk.fp8.f32.e5m3 int vdst, float srcA, float srcB, int old, imm word_sel
3509+
def int_amdgcn_cvt_pk_fp8_f32_e5m3 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32_e5m3">,
3510+
DefaultAttrsIntrinsic<[llvm_i32_ty],
3511+
[llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty],
3512+
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
3513+
35083514
// llvm.amdgcn.cvt.sr.bf8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3]
35093515
// byte_sel selects byte to write into vdst.
35103516
def int_amdgcn_cvt_sr_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f32">,
@@ -3518,6 +3524,12 @@ def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">,
35183524
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
35193525
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
35203526

3527+
// llvm.amdgcn.cvt.sr.fp8.f32.e5m3 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3]
3528+
def int_amdgcn_cvt_sr_fp8_f32_e5m3 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32_e5m3">,
3529+
DefaultAttrsIntrinsic<[llvm_i32_ty],
3530+
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3531+
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
3532+
35213533
// llvm.amdgcn.cvt.off.fp32.i4 int srcA
35223534
def int_amdgcn_cvt_off_f32_i4: ClangBuiltin<"__builtin_amdgcn_cvt_off_f32_i4">,
35233535
DefaultAttrsIntrinsic<[llvm_float_ty],

0 commit comments

Comments
 (0)