Skip to content

Commit 9d9179e

Browse files
committed
Merge remote-tracking branch 'origin/main' into laa-deref-assumption
2 parents 626a4fb + f6641e2 commit 9d9179e

File tree

186 files changed

+30001
-26417
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

186 files changed

+30001
-26417
lines changed

.github/workflows/build-ci-container-windows.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@ on:
1111
- .github/workflows/build-ci-container-windows.yml
1212
- '.github/workflows/containers/github-action-ci-windows/**'
1313
pull_request:
14-
branches:
15-
- main
1614
paths:
1715
- .github/workflows/build-ci-container-windows.yml
1816
- '.github/workflows/containers/github-action-ci-windows/**'

.github/workflows/build-ci-container.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@ on:
1111
- .github/workflows/build-ci-container.yml
1212
- '.github/workflows/containers/github-action-ci/**'
1313
pull_request:
14-
branches:
15-
- main
1614
paths:
1715
- .github/workflows/build-ci-container.yml
1816
- '.github/workflows/containers/github-action-ci/**'

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,8 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
684684
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
685685
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
686686
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")
687+
TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_i4_i8, "UsUi", "nc", "gfx1250-insts")
688+
TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_u4_u8, "UsUi", "nc", "gfx1250-insts")
687689

688690
// GFX1250 WMMA builtins
689691
TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x4_f32, "V8fIbV2fIbV2fIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")

clang/include/clang/Sema/Overload.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,8 +1491,6 @@ class Sema;
14911491
OverloadingResult
14921492
BestViableFunctionImpl(Sema &S, SourceLocation Loc,
14931493
OverloadCandidateSet::iterator &Best);
1494-
void PerfectViableFunction(Sema &S, SourceLocation Loc,
1495-
OverloadCandidateSet::iterator &Best);
14961494
};
14971495

14981496
bool isBetterOverloadCandidate(Sema &S, const OverloadCandidate &Cand1,

clang/lib/Sema/SemaOverload.cpp

Lines changed: 6 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -11354,55 +11354,18 @@ OverloadingResult OverloadCandidateSet::BestViableFunction(Sema &S,
1135411354
DeferredCandidatesCount != 0 && !ResolutionByPerfectCandidateIsDisabled;
1135511355

1135611356
if (TwoPhaseResolution) {
11357-
11358-
PerfectViableFunction(S, Loc, Best);
11359-
if (Best != end())
11360-
return ResultForBestCandidate(Best);
11357+
OverloadingResult Res = BestViableFunctionImpl(S, Loc, Best);
11358+
if (Best != end() && Best->isPerfectMatch(S.Context)) {
11359+
if (!(HasDeferredTemplateConstructors &&
11360+
isa_and_nonnull<CXXConversionDecl>(Best->Function)))
11361+
return Res;
11362+
}
1136111363
}
1136211364

1136311365
InjectNonDeducedTemplateCandidates(S);
1136411366
return BestViableFunctionImpl(S, Loc, Best);
1136511367
}
1136611368

11367-
void OverloadCandidateSet::PerfectViableFunction(
11368-
Sema &S, SourceLocation Loc, OverloadCandidateSet::iterator &Best) {
11369-
11370-
Best = end();
11371-
for (auto It = Candidates.begin(); It != Candidates.end(); ++It) {
11372-
11373-
if (!It->isPerfectMatch(S.getASTContext()))
11374-
continue;
11375-
11376-
// We found a suitable conversion function
11377-
// but if there is a template constructor in the target class
11378-
// we might prefer that instead.
11379-
if (HasDeferredTemplateConstructors &&
11380-
isa_and_nonnull<CXXConversionDecl>(It->Function)) {
11381-
Best = end();
11382-
break;
11383-
}
11384-
11385-
if (Best == end()) {
11386-
Best = It;
11387-
continue;
11388-
}
11389-
if (Best->Function && It->Function) {
11390-
FunctionDecl *D =
11391-
S.getMoreConstrainedFunction(Best->Function, It->Function);
11392-
if (D == nullptr) {
11393-
Best = end();
11394-
break;
11395-
}
11396-
if (D == It->Function)
11397-
Best = It;
11398-
continue;
11399-
}
11400-
// ambiguous
11401-
Best = end();
11402-
break;
11403-
}
11404-
}
11405-
1140611369
OverloadingResult OverloadCandidateSet::BestViableFunctionImpl(
1140711370
Sema &S, SourceLocation Loc, OverloadCandidateSet::iterator &Best) {
1140811371

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44

55
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
66

7+
typedef unsigned int uint;
8+
typedef unsigned short int ushort;
9+
typedef unsigned int __attribute__((ext_vector_type(2))) uint2;
710
typedef half __attribute__((ext_vector_type(2))) half2;
811

912
// CHECK-LABEL: @test_setprio_inc_wg(
@@ -42,6 +45,24 @@ void test_s_wait_tensorcnt() {
4245
__builtin_amdgcn_s_wait_tensorcnt(0);
4346
}
4447

48+
// CHECK-LABEL: @test_prng_b32(
49+
// CHECK-NEXT: entry:
50+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
51+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
52+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
53+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
54+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
55+
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
56+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
57+
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.prng.b32(i32 [[TMP0]])
58+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
59+
// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4
60+
// CHECK-NEXT: ret void
61+
//
62+
void test_prng_b32(global uint* out, uint a) {
63+
*out = __builtin_amdgcn_prng_b32(a);
64+
}
65+
4566
// CHECK-LABEL: @test_tanh_f32(
4667
// CHECK-NEXT: entry:
4768
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
@@ -349,6 +370,76 @@ void test_cvt_pk_f16_bf8(global half2* out, short a)
349370
out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
350371
}
351372

373+
// CHECK-LABEL: @test_sat_pk4_i4_i8(
374+
// CHECK-NEXT: entry:
375+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
376+
// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
377+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
378+
// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr
379+
// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
380+
// CHECK-NEXT: store i32 [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 4
381+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC_ADDR_ASCAST]], align 4
382+
// CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.amdgcn.sat.pk4.i4.i8(i32 [[TMP0]])
383+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
384+
// CHECK-NEXT: store i16 [[TMP1]], ptr [[TMP2]], align 2
385+
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[SRC_ADDR_ASCAST]], align 4
386+
// CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.amdgcn.sat.pk4.u4.u8(i32 [[TMP3]])
387+
// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
388+
// CHECK-NEXT: store i16 [[TMP4]], ptr [[TMP5]], align 2
389+
// CHECK-NEXT: ret void
390+
//
391+
void test_sat_pk4_i4_i8(ushort *out, uint src)
392+
{
393+
*out = __builtin_amdgcn_sat_pk4_i4_i8(src);
394+
*out = __builtin_amdgcn_sat_pk4_u4_u8(src);
395+
}
396+
397+
// CHECK-LABEL: @test_permlane16_swap(
398+
// CHECK-NEXT: entry:
399+
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
400+
// CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
401+
// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
402+
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
403+
// CHECK-NEXT: [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
404+
// CHECK-NEXT: [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr
405+
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
406+
// CHECK-NEXT: store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
407+
// CHECK-NEXT: store i32 [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 4
408+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
409+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC_ADDR_ASCAST]], align 4
410+
// CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 [[TMP0]], i32 [[TMP1]], i1 false, i1 false)
411+
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
412+
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
413+
// CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i64 0
414+
// CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP4]], i64 1
415+
// CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
416+
// CHECK-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 8
417+
// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
418+
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[SRC_ADDR_ASCAST]], align 4
419+
// CHECK-NEXT: [[TMP10:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 [[TMP8]], i32 [[TMP9]], i1 true, i1 false)
420+
// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { i32, i32 } [[TMP10]], 0
421+
// CHECK-NEXT: [[TMP12:%.*]] = extractvalue { i32, i32 } [[TMP10]], 1
422+
// CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i64 0
423+
// CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP12]], i64 1
424+
// CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
425+
// CHECK-NEXT: store <2 x i32> [[TMP14]], ptr addrspace(1) [[TMP15]], align 8
426+
// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
427+
// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[SRC_ADDR_ASCAST]], align 4
428+
// CHECK-NEXT: [[TMP18:%.*]] = call { i32, i32 } @llvm.amdgcn.permlane16.swap(i32 [[TMP16]], i32 [[TMP17]], i1 false, i1 true)
429+
// CHECK-NEXT: [[TMP19:%.*]] = extractvalue { i32, i32 } [[TMP18]], 0
430+
// CHECK-NEXT: [[TMP20:%.*]] = extractvalue { i32, i32 } [[TMP18]], 1
431+
// CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i32> poison, i32 [[TMP19]], i64 0
432+
// CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> [[TMP21]], i32 [[TMP20]], i64 1
433+
// CHECK-NEXT: [[TMP23:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
434+
// CHECK-NEXT: store <2 x i32> [[TMP22]], ptr addrspace(1) [[TMP23]], align 8
435+
// CHECK-NEXT: ret void
436+
//
437+
void test_permlane16_swap(global uint2* out, uint old, uint src) {
438+
*out = __builtin_amdgcn_permlane16_swap(old, src, false, false);
439+
*out = __builtin_amdgcn_permlane16_swap(old, src, true, false);
440+
*out = __builtin_amdgcn_permlane16_swap(old, src, false, true);
441+
}
442+
352443
// CHECK-LABEL: @test_cvt_f32_fp8_e5m3(
353444
// CHECK-NEXT: entry:
354445
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)

clang/test/Sema/dllexport.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
// RUN: %clang_cc1 -triple x86_64-win32 -fsyntax-only -fms-extensions -verify -std=c11 %s
33
// RUN: %clang_cc1 -triple i686-mingw32 -fsyntax-only -fms-extensions -verify -std=c11 %s
44
// RUN: %clang_cc1 -triple x86_64-mingw32 -fsyntax-only -fms-extensions -verify -std=c99 %s
5+
// RUN: %clang_cc1 -triple i686-windows-itanium -fsyntax-only -fms-extensions -verify -std=c99 %s
6+
// RUN: %clang_cc1 -triple x86_64-windows-itanium -fsyntax-only -fms-extensions -verify -std=c11 %s
7+
// RUN: %clang_cc1 -triple x86_64-sie-ps5 -fsyntax-only -fms-extensions -verify -std=c99 %s
8+
// RUN: %clang_cc1 -triple x86_64-sie-ps5 -fsyntax-only -fms-extensions -verify -std=c11 %s
59

610
// Invalid usage.
711
__declspec(dllexport) typedef int typedef1;

clang/test/SemaCXX/overload-resolution-deferred-templates.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,3 +283,31 @@ void f() {
283283
}
284284

285285
#endif
286+
287+
namespace GH147374 {
288+
289+
struct String {};
290+
template <typename T> void operator+(T, String &&) = delete;
291+
292+
struct Bar {
293+
void operator+(String) const; // expected-note {{candidate function}}
294+
friend void operator+(Bar, String) {}; // expected-note {{candidate function}}
295+
};
296+
297+
struct Baz {
298+
void operator+(String); // expected-note {{candidate function}}
299+
friend void operator+(Baz, String) {}; // expected-note {{candidate function}}
300+
};
301+
302+
void test() {
303+
Bar a;
304+
String b;
305+
a + b;
306+
//expected-error@-1 {{use of overloaded operator '+' is ambiguous (with operand types 'Bar' and 'String')}}
307+
308+
Baz z;
309+
z + b;
310+
//expected-error@-1 {{use of overloaded operator '+' is ambiguous (with operand types 'Baz' and 'String')}}
311+
}
312+
313+
}

flang/lib/Lower/Bridge.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,8 +1466,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
14661466
assert(falseTarget && "missing conditional branch false block");
14671467
mlir::Location loc = toLocation();
14681468
mlir::Value bcc = builder->createConvert(loc, builder->getI1Type(), cond);
1469-
builder->create<mlir::cf::CondBranchOp>(loc, bcc, trueTarget, std::nullopt,
1470-
falseTarget, std::nullopt);
1469+
builder->create<mlir::cf::CondBranchOp>(loc, bcc, trueTarget,
1470+
mlir::ValueRange{}, falseTarget,
1471+
mlir::ValueRange{});
14711472
}
14721473
void genConditionalBranch(mlir::Value cond,
14731474
Fortran::lower::pft::Evaluation *trueTarget,
@@ -2556,8 +2557,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
25562557
builder->setInsertionPointToEnd(loopWrapperOp.getBody());
25572558
auto loopOp = builder->create<fir::DoConcurrentLoopOp>(
25582559
loc, nestLBs, nestUBs, nestSts, /*loopAnnotation=*/nullptr,
2559-
/*local_vars=*/std::nullopt,
2560-
/*local_syms=*/nullptr, /*reduce_vars=*/std::nullopt,
2560+
/*local_vars=*/mlir::ValueRange{},
2561+
/*local_syms=*/nullptr, /*reduce_vars=*/mlir::ValueRange{},
25612562
/*reduce_byref=*/nullptr, /*reduce_syms=*/nullptr,
25622563
/*reduce_attrs=*/nullptr);
25632564

@@ -3810,9 +3811,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
38103811
mlir::Block *selectCaseBlock = insertBlock(blockList[0]);
38113812
mlir::Block *assumedSizeBlock =
38123813
rankStarBlock ? rankStarBlock : defaultBlock;
3813-
builder->create<mlir::cf::CondBranchOp>(loc, isAssumedSize,
3814-
assumedSizeBlock, std::nullopt,
3815-
selectCaseBlock, std::nullopt);
3814+
builder->create<mlir::cf::CondBranchOp>(
3815+
loc, isAssumedSize, assumedSizeBlock, mlir::ValueRange{},
3816+
selectCaseBlock, mlir::ValueRange{});
38163817
startBlock(selectCaseBlock);
38173818
}
38183819
// Create fir.select_case for the other rank cases.

flang/lib/Lower/ConvertConstant.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ createStringLitOp(fir::FirOpBuilder &builder, mlir::Location loc,
303303
mlir::NamedAttribute sizeAttr(sizeTag, builder.getI64IntegerAttr(len));
304304
llvm::SmallVector<mlir::NamedAttribute> attrs = {dataAttr, sizeAttr};
305305
return builder.create<fir::StringLitOp>(
306-
loc, llvm::ArrayRef<mlir::Type>{type}, std::nullopt, attrs);
306+
loc, llvm::ArrayRef<mlir::Type>{type}, mlir::ValueRange{}, attrs);
307307
}
308308
}
309309

0 commit comments

Comments
 (0)