Skip to content

Commit 096fe69

Browse files
author
z1_cciauto
authored
merge main into amd-staging (llvm#3147)
2 parents 15059ba + 417cd79 commit 096fe69

File tree

81 files changed

+12341
-669
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

81 files changed

+12341
-669
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,8 @@ TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts")
676676

677677
TARGET_BUILTIN(__builtin_amdgcn_tanh_bf16, "yy", "nc", "bf16-trans-insts")
678678
TARGET_BUILTIN(__builtin_amdgcn_rcp_bf16, "yy", "nc", "bf16-trans-insts")
679+
TARGET_BUILTIN(__builtin_amdgcn_rsq_bf16, "yy", "nc", "bf16-trans-insts")
680+
TARGET_BUILTIN(__builtin_amdgcn_log_bf16, "yy", "nc", "bf16-trans-insts")
679681

680682
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
681683
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")

clang/lib/Basic/Targets/SPIR.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,9 @@ class LLVM_LIBRARY_VISIBILITY SPIR32TargetInfo : public SPIRTargetInfo {
264264
PointerWidth = PointerAlign = 32;
265265
SizeType = TargetInfo::UnsignedInt;
266266
PtrDiffType = IntPtrType = TargetInfo::SignedInt;
267+
// SPIR32 has support for atomic ops if atomic extension is enabled.
268+
// Take the maximum because it's possible the Host supports wider types.
269+
MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 32);
267270
resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
268271
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
269272
}
@@ -281,6 +284,9 @@ class LLVM_LIBRARY_VISIBILITY SPIR64TargetInfo : public SPIRTargetInfo {
281284
PointerWidth = PointerAlign = 64;
282285
SizeType = TargetInfo::UnsignedLong;
283286
PtrDiffType = IntPtrType = TargetInfo::SignedLong;
287+
// SPIR64 has support for atomic ops if atomic extension is enabled.
288+
// Take the maximum because it's possible the Host supports wider types.
289+
MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 64);
284290
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
285291
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
286292
}

clang/lib/CodeGen/CGLoopInfo.cpp

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -221,18 +221,6 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs,
221221
return createUnrollAndJamMetadata(Attrs, LoopProperties, HasUserTransforms);
222222
}
223223

224-
// Apply all loop properties to the vectorized loop.
225-
SmallVector<Metadata *, 4> FollowupLoopProperties;
226-
FollowupLoopProperties.append(LoopProperties.begin(), LoopProperties.end());
227-
228-
// Don't vectorize an already vectorized loop.
229-
FollowupLoopProperties.push_back(
230-
MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized")));
231-
232-
bool FollowupHasTransforms = false;
233-
SmallVector<Metadata *, 4> Followup = createUnrollAndJamMetadata(
234-
Attrs, FollowupLoopProperties, FollowupHasTransforms);
235-
236224
SmallVector<Metadata *, 4> Args;
237225
Args.append(LoopProperties.begin(), LoopProperties.end());
238226

@@ -286,22 +274,46 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs,
286274
// 5) it is implied when vectorize.width is unset (0) and the user
287275
// explicitly requested fixed-width vectorization, i.e.
288276
// vectorize.scalable.enable is false.
277+
bool VectorizeEnabled = false;
289278
if (Attrs.VectorizeEnable != LoopAttributes::Unspecified ||
290279
(IsVectorPredicateEnabled && Attrs.VectorizeWidth != 1) ||
291280
Attrs.VectorizeWidth > 1 ||
292281
Attrs.VectorizeScalable == LoopAttributes::Enable ||
293282
(Attrs.VectorizeScalable == LoopAttributes::Disable &&
294283
Attrs.VectorizeWidth != 1)) {
295-
bool AttrVal = Attrs.VectorizeEnable != LoopAttributes::Disable;
284+
VectorizeEnabled = Attrs.VectorizeEnable != LoopAttributes::Disable;
296285
Args.push_back(
297286
MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"),
298287
ConstantAsMetadata::get(ConstantInt::get(
299-
llvm::Type::getInt1Ty(Ctx), AttrVal))}));
288+
llvm::Type::getInt1Ty(Ctx), VectorizeEnabled))}));
300289
}
301290

302-
if (FollowupHasTransforms)
303-
Args.push_back(
304-
createFollowupMetadata("llvm.loop.vectorize.followup_all", Followup));
291+
// Apply all loop properties to the vectorized loop.
292+
SmallVector<Metadata *, 4> FollowupLoopProperties;
293+
294+
// If vectorization is not explicitly enabled, the follow-up metadata will be
295+
// directly appended to the list currently being created. In that case, adding
296+
// LoopProperties to FollowupLoopProperties would result in duplication.
297+
if (VectorizeEnabled)
298+
FollowupLoopProperties.append(LoopProperties.begin(), LoopProperties.end());
299+
300+
// Don't vectorize an already vectorized loop.
301+
FollowupLoopProperties.push_back(
302+
MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized")));
303+
304+
bool FollowupHasTransforms = false;
305+
SmallVector<Metadata *, 4> Followup = createUnrollAndJamMetadata(
306+
Attrs, FollowupLoopProperties, FollowupHasTransforms);
307+
308+
if (FollowupHasTransforms) {
309+
// If vectorization is explicitly enabled, we create a follow-up metadata,
310+
// otherwise directly add the contents of it to Args.
311+
if (VectorizeEnabled)
312+
Args.push_back(
313+
createFollowupMetadata("llvm.loop.vectorize.followup_all", Followup));
314+
else
315+
Args.append(Followup.begin(), Followup.end());
316+
}
305317

306318
HasUserTransforms = true;
307319
return Args;

clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
433433
case AMDGPU::BI__builtin_amdgcn_rsq:
434434
case AMDGPU::BI__builtin_amdgcn_rsqf:
435435
case AMDGPU::BI__builtin_amdgcn_rsqh:
436+
case AMDGPU::BI__builtin_amdgcn_rsq_bf16:
436437
return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
437438
case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
438439
case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
@@ -447,6 +448,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
447448
case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
448449
return EmitAMDGPUDispatchPtr(*this, E);
449450
case AMDGPU::BI__builtin_amdgcn_logf:
451+
case AMDGPU::BI__builtin_amdgcn_log_bf16:
450452
return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
451453
case AMDGPU::BI__builtin_amdgcn_exp2f:
452454
return emitBuiltinWithOneOverloadedType<1>(*this, E,

clang/test/CodeGen/PowerPC/builtins-ppc-fpconstrained.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
// RUN: -S -ffp-exception-behavior=strict \
1212
// RUN: -o - %s | FileCheck --check-prefix=CHECK-ASM \
1313
// RUN: --check-prefix=FIXME-CHECK %s
14-
// RUN: %clang_cc1 -triple powerpcspe -ffp-exception-behavior=strict \
14+
// RUN: %clang_cc1 -triple powerpc -ffp-exception-behavior=strict \
1515
// RUN: -target-feature +vsx -fexperimental-strict-floating-point -emit-llvm \
1616
// RUN: %s -o - | FileCheck --check-prefix=CHECK-CONSTRAINED %s
1717

clang/test/CodeGen/new-pass-manager-opt-bisect.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@
77
// CHECK: BISECT: running pass (1)
88
// CHECK-NOT: BISECT: running pass (1)
99
// Make sure that legacy pass manager is running
10-
// CHECK: Instruction Selection
10+
// CHECK: -isel
1111

1212
int func(int a) { return a; }

clang/test/CodeGenCXX/pragma-loop.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,43 @@ void for_test_scalable_1(int *List, int Length) {
203203
}
204204
}
205205

206+
// Verify for loop is not performing vectorization
207+
void for_test_width_1(int *List, int Length) {
208+
#pragma clang loop vectorize_width(1) interleave_count(4) unroll(disable) distribute(disable)
209+
for (int i = 0; i < Length; i++) {
210+
// CHECK: br label {{.*}}, !llvm.loop ![[LOOP_20:.*]]
211+
List[i] = i * 2;
212+
}
213+
}
214+
215+
// Verify for loop is not performing vectorization
216+
void for_test_fixed_1(int *List, int Length) {
217+
#pragma clang loop vectorize_width(1, fixed) interleave_count(4) unroll(disable) distribute(disable)
218+
for (int i = 0; i < Length; i++) {
219+
// CHECK: br label {{.*}}, !llvm.loop ![[LOOP_21:.*]]
220+
List[i] = i * 2;
221+
}
222+
}
223+
224+
225+
// Verify unroll attributes are directly attached to the loop metadata
226+
void for_test_vectorize_disable_unroll(int *List, int Length) {
227+
#pragma clang loop vectorize(disable) unroll_count(8)
228+
for (int i = 0; i < Length; i++) {
229+
// CHECK: br label {{.*}}, !llvm.loop ![[LOOP_22:.*]]
230+
List[i] = i * 2;
231+
}
232+
}
233+
234+
// Verify unroll attributes are directly attached to the loop metadata
235+
void for_test_interleave_vectorize_disable_unroll(int *List, int Length) {
236+
#pragma clang loop vectorize(disable) interleave_count(4) unroll_count(8)
237+
for (int i = 0; i < Length; i++) {
238+
// CHECK: br label {{.*}}, !llvm.loop ![[LOOP_23:.*]]
239+
List[i] = i * 2;
240+
}
241+
}
242+
206243
// CHECK-DAG: ![[MP:[0-9]+]] = !{!"llvm.loop.mustprogress"}
207244

208245
// CHECK-DAG: ![[UNROLL_DISABLE:[0-9]+]] = !{!"llvm.loop.unroll.disable"}
@@ -270,3 +307,7 @@ void for_test_scalable_1(int *List, int Length) {
270307
// CHECK-DAG: ![[LOOP_17]] = distinct !{![[LOOP_17]], ![[MP]], ![[UNROLL_DISABLE]], ![[DISTRIBUTE_DISABLE]], ![[FIXED_VEC]], ![[INTERLEAVE_4]], ![[VECTORIZE_ENABLE]]}
271308
// CHECK-DAG: ![[LOOP_18]] = distinct !{![[LOOP_18]], ![[MP]], ![[UNROLL_DISABLE]], ![[DISTRIBUTE_DISABLE]], ![[SCALABLE_VEC]], ![[INTERLEAVE_4]], ![[VECTORIZE_ENABLE]]}
272309
// CHECK-DAG: ![[LOOP_19]] = distinct !{![[LOOP_19]], ![[MP]], ![[UNROLL_DISABLE]], ![[DISTRIBUTE_DISABLE]], ![[WIDTH_1]], ![[SCALABLE_VEC]], ![[INTERLEAVE_4]], ![[VECTORIZE_ENABLE]]}
310+
// CHECK-DAG: ![[LOOP_20]] = distinct !{![[LOOP_20]], ![[MP]], ![[UNROLL_DISABLE]], ![[DISTRIBUTE_DISABLE]], ![[WIDTH_1]], ![[FIXED_VEC]], ![[INTERLEAVE_4]]}
311+
// CHECK-DAG: ![[LOOP_21]] = distinct !{![[LOOP_21]], ![[MP]], ![[UNROLL_DISABLE]], ![[DISTRIBUTE_DISABLE]], ![[WIDTH_1]], ![[FIXED_VEC]], ![[INTERLEAVE_4]]}
312+
// CHECK-DAG: ![[LOOP_22]] = distinct !{![[LOOP_22]], ![[MP]], ![[WIDTH_1]], ![[ISVECTORIZED]], ![[UNROLL_8]]}
313+
// CHECK-DAG: ![[LOOP_23]] = distinct !{![[LOOP_23]], ![[MP]], ![[WIDTH_1]], ![[INTERLEAVE_4]], ![[ISVECTORIZED]], ![[UNROLL_8]]}

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,44 @@ void test_rcp_bf16(global __bf16* out, __bf16 a)
8080
*out = __builtin_amdgcn_rcp_bf16(a);
8181
}
8282

83+
// CHECK-LABEL: @test_rsq_bf16(
84+
// CHECK-NEXT: entry:
85+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
86+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
87+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
88+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
89+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
90+
// CHECK-NEXT: store bfloat [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
91+
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR_ASCAST]], align 2
92+
// CHECK-NEXT: [[TMP1:%.*]] = call bfloat @llvm.amdgcn.rsq.bf16(bfloat [[TMP0]])
93+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
94+
// CHECK-NEXT: store bfloat [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
95+
// CHECK-NEXT: ret void
96+
//
97+
void test_rsq_bf16(global __bf16* out, __bf16 a)
98+
{
99+
*out = __builtin_amdgcn_rsq_bf16(a);
100+
}
101+
102+
// CHECK-LABEL: @test_log_bf16(
103+
// CHECK-NEXT: entry:
104+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
105+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
106+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
107+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
108+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
109+
// CHECK-NEXT: store bfloat [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
110+
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR_ASCAST]], align 2
111+
// CHECK-NEXT: [[TMP1:%.*]] = call bfloat @llvm.amdgcn.log.bf16(bfloat [[TMP0]])
112+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
113+
// CHECK-NEXT: store bfloat [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
114+
// CHECK-NEXT: ret void
115+
//
116+
void test_log_bf16(global __bf16* out, __bf16 a)
117+
{
118+
*out = __builtin_amdgcn_log_bf16(a);
119+
}
120+
83121
// CHECK-LABEL: @test_cvt_f16_fp8(
84122
// CHECK-NEXT: entry:
85123
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -triple spir-unknown-unknown -verify
2+
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -triple spir64-unknown-unknown -verify
3+
4+
// expected-no-diagnostics
5+
6+
int fi1a(int *i) {
7+
int v;
8+
__scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
9+
return v;
10+
}
11+
12+
#ifdef __SPIR64__
13+
long fl1a(long *i) {
14+
long v;
15+
__scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
16+
return v;
17+
}
18+
#endif

clang/tools/libclang/libclang.map

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,8 @@ LLVM_13 {
327327
clang_getRange;
328328
clang_getRangeEnd;
329329
clang_getRangeStart;
330+
clang_getRemappings;
331+
clang_getRemappingsFromFileList;
330332
clang_getResultType;
331333
clang_getSkippedRanges;
332334
clang_getSpecializedCursorTemplate;
@@ -387,6 +389,9 @@ LLVM_13 {
387389
clang_parseTranslationUnit;
388390
clang_parseTranslationUnit2;
389391
clang_parseTranslationUnit2FullArgv;
392+
clang_remap_dispose;
393+
clang_remap_getFilenames;
394+
clang_remap_getNumFiles;
390395
clang_reparseTranslationUnit;
391396
clang_saveTranslationUnit;
392397
clang_sortCodeCompletionResults;

0 commit comments

Comments
 (0)