Skip to content

Commit 505eb9b

Browse files
committed
merge main into amd-staging
2 parents 65b6874 + d7215c0 commit 505eb9b

File tree

95 files changed

+1919
-732
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+1919
-732
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,8 @@ clang-format
703703

704704
libclang
705705
--------
706+
- Fixed a bug in ``clang_File_isEqual`` that sometimes led to different
707+
in-memory files to be considered as equal.
706708
- Added ``clang_visitCXXMethods``, which allows visiting the methods
707709
of a class.
708710
- Added ``clang_getFullyQualifiedName``, which provides fully qualified type names as

clang/include/clang/Basic/arm_sme.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,24 @@ let SMETargetGuard = "sme2,sme-mop4,sme-b16b16" in {
321321
defm SVBMOP4S_H : MOP4<"s", "_za16", "b", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_1>]>;
322322
}
323323

324+
////////////////////////////////////////////////////////////////////////////////
325+
// SME2 - FP8 FMOP4A, FMOP4S
326+
327+
multiclass MOP4_FP8<string za, list<ImmCheck> checks> {
328+
def _1x1 : Inst<"svmop4a" # "[_1x1]" # za # "[_{d}_{d}]", "vidd>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x1", [IsInOutZA, IsStreaming], checks>;
329+
def _1x2 : Inst<"svmop4a" # "[_1x2]" # za # "[_{d}_{d}]", "vid2>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x2", [IsInOutZA, IsStreaming], checks>;
330+
def _2x1 : Inst<"svmop4a" # "[_2x1]" # za # "[_{d}_{d}]", "vi2d>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x1", [IsInOutZA, IsStreaming], checks>;
331+
def _2x2 : Inst<"svmop4a" # "[_2x2]" # za # "[_{d}_{d}]", "vi22>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x2", [IsInOutZA, IsStreaming], checks>;
332+
}
333+
334+
let SMETargetGuard = "sme2,sme-mop4,sme-f8f32" in {
335+
defm SVMOP4A_FP8_ZA32 : MOP4_FP8<"_za32", [ImmCheck<0, ImmCheck0_3>]>;
336+
}
337+
338+
let SMETargetGuard = "sme2,sme-mop4,sme-f8f16" in {
339+
defm SVMOP4A_FP8_ZA16 : MOP4_FP8<"_za16", [ImmCheck<0, ImmCheck0_1>]>;
340+
}
341+
324342
////////////////////////////////////////////////////////////////////////////////
325343
// SME2 - SMOP4A, SMOP4S, UMOP4A, UMOP4S
326344

clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,8 @@ class OpenACCClauseCIREmitter final
204204
if (!clause.getArchitectures().empty())
205205
operation.setDeviceType(
206206
decodeDeviceType(clause.getArchitectures()[0].getIdentifierInfo()));
207-
} else if constexpr (isOneOfTypes<OpTy, ParallelOp, SerialOp, KernelsOp>) {
207+
} else if constexpr (isOneOfTypes<OpTy, ParallelOp, SerialOp, KernelsOp,
208+
DataOp>) {
208209
// Nothing to do here, these constructs don't have any IR for these, as
209210
// they just modify the other clauses IR. So setting of `lastDeviceType`
210211
// (done above) is all we need.
@@ -243,7 +244,7 @@ class OpenACCClauseCIREmitter final
243244
}
244245

245246
void VisitAsyncClause(const OpenACCAsyncClause &clause) {
246-
if constexpr (isOneOfTypes<OpTy, ParallelOp, SerialOp, KernelsOp>) {
247+
if constexpr (isOneOfTypes<OpTy, ParallelOp, SerialOp, KernelsOp, DataOp>) {
247248
if (!clause.hasIntExpr()) {
248249
operation.setAsyncOnlyAttr(
249250
handleDeviceTypeAffectedClause(operation.getAsyncOnlyAttr()));
@@ -278,7 +279,7 @@ class OpenACCClauseCIREmitter final
278279

279280
void VisitIfClause(const OpenACCIfClause &clause) {
280281
if constexpr (isOneOfTypes<OpTy, ParallelOp, SerialOp, KernelsOp, InitOp,
281-
ShutdownOp, SetOp>) {
282+
ShutdownOp, SetOp, DataOp>) {
282283
operation.getIfCondMutable().append(
283284
createCondition(clause.getConditionExpr()));
284285
} else {
@@ -291,12 +292,7 @@ class OpenACCClauseCIREmitter final
291292
}
292293

293294
void VisitDeviceNumClause(const OpenACCDeviceNumClause &clause) {
294-
if constexpr (isOneOfTypes<OpTy, InitOp, ShutdownOp>) {
295-
operation.getDeviceNumOperandMutable().append(
296-
createIntExpr(clause.getIntExpr()));
297-
} else if constexpr (isOneOfTypes<OpTy, SetOp>) {
298-
// This is only a separate case because the getter name is different in
299-
// 'set' for some reason.
295+
if constexpr (isOneOfTypes<OpTy, InitOp, ShutdownOp, SetOp>) {
300296
operation.getDeviceNumMutable().append(
301297
createIntExpr(clause.getIntExpr()));
302298
} else {

clang/lib/Driver/ToolChains/CommonArgs.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3000,6 +3000,11 @@ void tools::addOpenMPDeviceRTL(const Driver &D,
30003000
for (const auto &LibPath : HostTC.getFilePaths())
30013001
LibraryPaths.emplace_back(LibPath);
30023002

3003+
// Check the target specific library path for the triple as well.
3004+
SmallString<128> P(D.Dir);
3005+
llvm::sys::path::append(P, "..", "lib", Triple.getTriple());
3006+
LibraryPaths.emplace_back(P);
3007+
30033008
OptSpecifier LibomptargetBCPathOpt =
30043009
Triple.isAMDGCN() ? options::OPT_libomptarget_amdgpu_bc_path_EQ
30053010
: Triple.isNVPTX() ? options::OPT_libomptarget_nvptx_bc_path_EQ

clang/test/CIR/CodeGenOpenACC/data.c

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
// RUN: %clang_cc1 -fopenacc -emit-cir -fclangir %s -o - | FileCheck %s
22

3-
void acc_data(void) {
4-
// CHECK: cir.func @acc_data() {
3+
void acc_data(int cond) {
4+
// CHECK: cir.func @acc_data(%[[ARG:.*]]: !s32i{{.*}}) {
5+
// CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["cond", init]
6+
// CHECK-NEXT: cir.store %[[ARG]], %[[COND]] : !s32i, !cir.ptr<!s32i>
57

68
#pragma acc data default(none)
79
{
@@ -33,5 +35,79 @@ void acc_data(void) {
3335
// CHECK-NEXT: acc.terminator
3436
// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue present>}
3537

38+
#pragma acc data default(none) async
39+
{}
40+
// CHECK-NEXT: acc.data {
41+
// CHECK-NEXT: acc.terminator
42+
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<none>], defaultAttr = #acc<defaultvalue none>}
43+
44+
#pragma acc data default(none) async(cond)
45+
{}
46+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
47+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
48+
// CHECK-NEXT: acc.data async(%[[CONV_CAST]] : si32) {
49+
// CHECK-NEXT: acc.terminator
50+
// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>}
51+
52+
#pragma acc data default(none) async device_type(nvidia, radeon) async
53+
{}
54+
// CHECK-NEXT: acc.data {
55+
// CHECK-NEXT: acc.terminator
56+
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>], defaultAttr = #acc<defaultvalue none>}
57+
58+
#pragma acc data default(none) async(3) device_type(nvidia, radeon) async(cond)
59+
{}
60+
// CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
61+
// CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
62+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
63+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
64+
// CHECK-NEXT: acc.data async(%[[THREE_CAST]] : si32, %[[CONV_CAST]] : si32 [#acc.device_type<nvidia>], %[[CONV_CAST]] : si32 [#acc.device_type<radeon>]) {
65+
// CHECK-NEXT: acc.terminator
66+
// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>}
67+
68+
#pragma acc data default(none) async device_type(nvidia, radeon) async(cond)
69+
{}
70+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
71+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
72+
// CHECK-NEXT: acc.data async(%[[CONV_CAST]] : si32 [#acc.device_type<nvidia>], %[[CONV_CAST]] : si32 [#acc.device_type<radeon>]) {
73+
// CHECK-NEXT: acc.terminator
74+
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<none>], defaultAttr = #acc<defaultvalue none>}
75+
76+
#pragma acc data default(none) async(3) device_type(nvidia, radeon) async
77+
{}
78+
// CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
79+
// CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
80+
// CHECK-NEXT: acc.data async(%[[THREE_CAST]] : si32) {
81+
// CHECK-NEXT: acc.terminator
82+
// CHECK-NEXT: } attributes {asyncOnly = [#acc.device_type<nvidia>, #acc.device_type<radeon>], defaultAttr = #acc<defaultvalue none>}
83+
84+
#pragma acc data default(none) if(cond)
85+
{}
86+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
87+
// CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
88+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
89+
// CHECK-NEXT: acc.data if(%[[CONV_CAST]]) {
90+
// CHECK-NEXT: acc.terminator
91+
// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>}
92+
93+
#pragma acc data default(none) if(1)
94+
{}
95+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
96+
// CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[ONE_LITERAL]] : !s32i), !cir.bool
97+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1
98+
// CHECK-NEXT: acc.data if(%[[CONV_CAST]]) {
99+
// CHECK-NEXT: acc.terminator
100+
// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>}
101+
102+
#pragma acc data default(none) if(cond == 1)
103+
{}
104+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
105+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
106+
// CHECK-NEXT: %[[EQ_RES:.*]] = cir.cmp(eq, %[[COND_LOAD]], %[[ONE_LITERAL]]) : !s32i, !cir.bool
107+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[EQ_RES]] : !cir.bool to i1
108+
// CHECK-NEXT: acc.data if(%[[CONV_CAST]]) {
109+
// CHECK-NEXT: acc.terminator
110+
// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>}
111+
36112
// CHECK-NEXT: cir.return
37113
}

0 commit comments

Comments
 (0)