Skip to content

Commit 9dc82f1

Browse files
authored
Merge branch 'main' into remove-br-undef-transforms-2
2 parents d54c7a9 + 69b39e7 commit 9dc82f1

File tree

13 files changed

+115
-46
lines changed

13 files changed

+115
-46
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -880,7 +880,9 @@ def SVRSQRTS : SInst<"svrsqrts[_{d}]", "ddd", "hfd", MergeNone, "aarch64_sve_frs
880880
////////////////////////////////////////////////////////////////////////////////
881881
// Floating-point reductions
882882

883-
def SVFADDA : SInst<"svadda[_{d}]", "sPsd", "hfd", MergeNone, "aarch64_sve_fadda", [VerifyRuntimeMode]>;
883+
let SVETargetGuard = "sve", SMETargetGuard = InvalidMode in {
884+
def SVFADDA : SInst<"svadda[_{d}]", "sPsd", "hfd", MergeNone, "aarch64_sve_fadda">;
885+
}
884886
def SVFADDV : SInst<"svaddv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_faddv", [VerifyRuntimeMode]>;
885887
def SVFMAXV : SInst<"svmaxv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fmaxv", [VerifyRuntimeMode]>;
886888
def SVFMAXNMV : SInst<"svmaxnmv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fmaxnmv", [VerifyRuntimeMode]>;

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5556,12 +5556,14 @@ void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
55565556
const auto *DO = S.getSingleClause<OMPDepobjClause>();
55575557
LValue DOLVal = EmitLValue(DO->getDepobj());
55585558
if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
5559-
OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
5560-
DC->getModifier());
5561-
Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
5562-
Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5563-
*this, Dependencies, DC->getBeginLoc());
5564-
EmitStoreOfScalar(DepAddr.emitRawPointer(*this), DOLVal);
5559+
// Build list and emit dependences
5560+
OMPTaskDataTy Data;
5561+
buildDependences(S, Data);
5562+
for (auto &Dep : Data.Dependences) {
5563+
Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5564+
*this, Dep, DC->getBeginLoc());
5565+
EmitStoreOfScalar(DepAddr.emitRawPointer(*this), DOLVal);
5566+
}
55655567
return;
55665568
}
55675569
if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {

clang/lib/Headers/openmp_wrappers/complex_cmath.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,13 @@ template <class _Tp> __DEVICE__ _Tp norm(const std::complex<_Tp> &__c) {
6464
}
6565

6666
// conj
67-
68-
template <class _Tp> std::complex<_Tp> conj(const std::complex<_Tp> &__c) {
67+
#ifdef _GLIBCXX20_CONSTEXPR
68+
#define CXX20_CONSTEXPR_DEVICE __DEVICE__
69+
#else
70+
#define CXX20_CONSTEXPR_DEVICE
71+
#endif
72+
template <class _Tp>
73+
CXX20_CONSTEXPR_DEVICE std::complex<_Tp> conj(const std::complex<_Tp> &__c) {
6974
return std::complex<_Tp>(__c.real(), -__c.imag());
7075
}
7176

clang/test/OpenMP/depobj_codegen.cpp

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,15 @@
1717
typedef void *omp_depend_t;
1818

1919
void foo() {}
20+
void tmainc(){
21+
omp_depend_t obj;
22+
#pragma omp depobj(obj) depend(inout: omp_all_memory)
23+
{
24+
volatile omp_depend_t temp = obj;
25+
char* char_ptr = reinterpret_cast<char*>(temp);
26+
char_ptr[0] = 1;
27+
}
28+
}
2029

2130
template <class T>
2231
T tmain(T argc) {
@@ -35,10 +44,25 @@ int main(int argc, char **argv) {
3544
#pragma omp depobj(b) destroy
3645
#pragma omp depobj(b) update(mutexinoutset)
3746
#pragma omp depobj(a) depend(iterator(char *p = argv[argc]:argv[0]:-1), out: p[0])
38-
(void)tmain(a), tmain(b);
47+
(void)tmain(a), tmain(b);
48+
tmainc();
3949
return 0;
4050
}
41-
51+
// CHECK-LABEL: tmainc
52+
// CHECK: [[D_ADDR:%obj]] = alloca ptr,
53+
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
54+
// CHECK: [[DEP_ADDR_ADDR2:%.+]] = call ptr @__kmpc_alloc(i32 [[GTID]], i64 48, ptr null)
55+
// CHECK: [[SZ_DEOOBJ:%.+]] = getelementptr inbounds nuw %struct.kmp_depend_info, ptr [[DEP_ADDR_ADDR2]], i{{.+}} 0, i{{.+}} 0
56+
// CHECK: store i64 1, ptr [[SZ_DEOOBJ]], align 8
57+
// CHECK: [[DEPOBJ_BASE_ADDR:%.+]] = getelementptr %struct.kmp_depend_info, ptr [[DEP_ADDR_ADDR2]], i{{.+}} 1
58+
// CHECK: [[ADDR_ONE:%.+]] = getelementptr inbounds nuw %struct.kmp_depend_info, ptr [[DEPOBJ_BASE_ADDR]], i{{.+}} 0, i{{.+}} 0
59+
// CHECK: store i64 0, ptr [[ADDR_ONE]], align 8
60+
// CHECK: [[SZ_ADDR:%.+]] = getelementptr inbounds nuw %struct.kmp_depend_info, ptr [[DEPOBJ_BASE_ADDR]], i{{.+}} 0, i{{.+}} 1
61+
// CHECK: store i64 0, ptr [[SZ_ADDR]], align 8
62+
// CHECK: [[SZ_ADDR_NEW:%.+]] = getelementptr inbounds nuw %struct.kmp_depend_info, ptr [[DEPOBJ_BASE_ADDR]], i{{.+}} 0, i{{.+}} 2
63+
// CHECK: store {{i[0-9]+}} {{-?[0-9]+}}, ptr [[SZ_ADDR_NEW]], align 8
64+
// CHECK: [[DEP_NEW:%.+]] = getelementptr %struct.kmp_depend_info, ptr [[DEP_ADDR_ADDR2]], i{{.+}} 1
65+
// CHECK: store ptr [[DEP_NEW]], ptr [[D_ADDR]], align 8
4266
// CHECK-LABEL: @main
4367
// CHECK: [[B_ADDR:%b]] = alloca ptr,
4468
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
@@ -69,6 +93,8 @@ int main(int argc, char **argv) {
6993
// CHECK: [[NUMDEPS_ADDR:%.+]] = getelementptr inbounds nuw %struct.kmp_depend_info, ptr [[NUMDEPS_BASE]], i{{.+}} 0, i{{.+}} 0
7094
// CHECK: [[NUMDEPS:%.+]] = load i64, ptr [[NUMDEPS_ADDR]], align 8
7195
// CHECK: [[END:%.+]] = getelementptr %struct.kmp_depend_info, ptr [[B_BASE]], i64 [[NUMDEPS]]
96+
97+
7298
// CHECK: br label %[[BODY:.+]]
7399
// CHECK: [[BODY]]:
74100
// CHECK: [[EL:%.+]] = phi ptr [ [[B_BASE]], %{{.+}} ], [ [[EL_NEXT:%.+]], %[[BODY]] ]
@@ -228,6 +254,8 @@ int main(int argc, char **argv) {
228254
// CHECK: [[EL_NEXT]] = getelementptr %struct.kmp_depend_info, ptr [[EL]], i{{.+}} 1
229255
// CHECK: [[IS_DONE:%.+]] = icmp eq ptr [[EL_NEXT]], [[END]]
230256
// CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[BODY]]
257+
231258
// CHECK: [[DONE]]:
232259

260+
233261
#endif

clang/test/Sema/aarch64-incompat-sm-builtin-calls.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,16 @@ void incompat_sme_smc(svbool_t pg, void const *ptr) __arm_streaming_compatible _
2828
return __builtin_sme_svld1_hor_za128(0, 0, pg, ptr);
2929
}
3030

31+
float incomp_sve_sm_fadda_sm(void) __arm_streaming {
32+
// expected-error@+1 {{builtin can only be called from a non-streaming function}}
33+
return svadda(svptrue_b32(), 0, svdup_f32(1));
34+
}
35+
36+
float incomp_sve_sm_fadda_smc(void) __arm_streaming_compatible {
37+
// expected-error@+1 {{builtin can only be called from a non-streaming function}}
38+
return svadda(svptrue_b32(), 0, svdup_f32(1));
39+
}
40+
3141
svuint32_t incompat_sve_sm(svbool_t pg, svuint32_t a, int16_t b) __arm_streaming {
3242
// expected-error@+1 {{builtin can only be called from a non-streaming function}}
3343
return __builtin_sve_svld1_gather_u32base_index_u32(pg, a, b);

lldb/test/API/python_api/process/cancel_attach/TestCancelAttach.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@
1414
class AttachCancelTestCase(TestBase):
1515
NO_DEBUG_INFO_TESTCASE = True
1616

17+
@skipIf(
18+
remote=True,
19+
hostoslist=["windows"],
20+
bugnumber="https://github.com/llvm/llvm-project/issues/115618",
21+
)
1722
def test_scripted_implementation(self):
1823
"""Test that cancelling a stuck "attach waitfor" works."""
1924
# First make an empty target for the attach:

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3920,6 +3920,19 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
39203920
Known.Zero.setBitsFrom(1);
39213921
break;
39223922
}
3923+
case ISD::MGATHER:
3924+
case ISD::MLOAD: {
3925+
ISD::LoadExtType ETy =
3926+
(Opcode == ISD::MGATHER)
3927+
? cast<MaskedGatherSDNode>(Op)->getExtensionType()
3928+
: cast<MaskedLoadSDNode>(Op)->getExtensionType();
3929+
if (ETy == ISD::ZEXTLOAD) {
3930+
EVT MemVT = cast<MemSDNode>(Op)->getMemoryVT();
3931+
KnownBits Known0(MemVT.getScalarSizeInBits());
3932+
return Known0.zext(BitWidth);
3933+
}
3934+
break;
3935+
}
39233936
case ISD::LOAD: {
39243937
LoadSDNode *LD = cast<LoadSDNode>(Op);
39253938
const Constant *Cst = TLI->getTargetConstantFromLoad(LD);

llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ static MCInstrInfo *createLoongArchMCInstrInfo() {
5555
static MCSubtargetInfo *
5656
createLoongArchMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
5757
if (CPU.empty() || CPU == "generic")
58-
CPU = TT.isArch64Bit() ? "la464" : "generic-la32";
58+
CPU = TT.isArch64Bit() ? "generic-la64" : "generic-la32";
5959
return createLoongArchMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
6060
}
6161

llvm/lib/Target/X86/X86PfmCounters.td

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -189,13 +189,13 @@ def IceLakePfmCounters : ProcPfmCounters {
189189
let CycleCounter = UnhaltedCoreCyclesPfmCounter;
190190
let UopsCounter = UopsIssuedPfmCounter;
191191
let IssueCounters = [
192-
PfmIssueCounter<"ICXPort0", "uops_dispatched_port:port_0">,
193-
PfmIssueCounter<"ICXPort1", "uops_dispatched_port:port_1">,
194-
PfmIssueCounter<"ICXPort23", "uops_dispatched_port:port_2_3">,
195-
PfmIssueCounter<"ICXPort49", "uops_dispatched_port:port_4_9">,
196-
PfmIssueCounter<"ICXPort5", "uops_dispatched_port:port_5">,
197-
PfmIssueCounter<"ICXPort6", "uops_dispatched_port:port_6">,
198-
PfmIssueCounter<"ICXPort78", "uops_dispatched_port:port_7_8">
192+
PfmIssueCounter<"ICXPort0", "uops_dispatched:port_0">,
193+
PfmIssueCounter<"ICXPort1", "uops_dispatched:port_1">,
194+
PfmIssueCounter<"ICXPort23", "uops_dispatched:port_2_3">,
195+
PfmIssueCounter<"ICXPort49", "uops_dispatched:port_4_9">,
196+
PfmIssueCounter<"ICXPort5", "uops_dispatched:port_5">,
197+
PfmIssueCounter<"ICXPort6", "uops_dispatched:port_6">,
198+
PfmIssueCounter<"ICXPort78", "uops_dispatched:port_7_8">
199199
];
200200
let ValidationCounters = DefaultIntelPfmValidationCounters;
201201
}
@@ -208,16 +208,16 @@ def AlderLakePfmCounters : ProcPfmCounters {
208208
let CycleCounter = UnhaltedCoreCyclesPfmCounter;
209209
let UopsCounter = UopsIssuedPfmCounter;
210210
let IssueCounters = [
211-
PfmIssueCounter<"ADLPPort00", "uops_dispatched_port:port_0">,
212-
PfmIssueCounter<"ADLPPort01", "uops_dispatched_port:port_1">,
211+
PfmIssueCounter<"ADLPPort00", "uops_dispatched:port_0">,
212+
PfmIssueCounter<"ADLPPort01", "uops_dispatched:port_1">,
213213
// The perfmon documentation and thus libpfm seems to incorrectly label
214214
// this performance counter, as ports 2,3, and 11 are actually grouped
215215
// according to most documentation. See #113941 for additional details.
216-
PfmIssueCounter<"ADLPPort02_03_11", "uops_dispatched_port:port_2_3_10">,
217-
PfmIssueCounter<"ADLPPort04_09", "uops_dispatched_port:port_4_9">,
218-
PfmIssueCounter<"ADLPPort05_11", "uops_dispatched_port:port_5_11">,
219-
PfmIssueCounter<"ADLPPort06", "uops_dispatched_port:port_6">,
220-
PfmIssueCounter<"ADLPPort07_08", "uops_dispatched_port:port_7_8">
216+
PfmIssueCounter<"ADLPPort02_03_11", "uops_dispatched:port_2_3_10">,
217+
PfmIssueCounter<"ADLPPort04_09", "uops_dispatched:port_4_9">,
218+
PfmIssueCounter<"ADLPPort05_11", "uops_dispatched:port_5_11">,
219+
PfmIssueCounter<"ADLPPort06", "uops_dispatched:port_6">,
220+
PfmIssueCounter<"ADLPPort07_08", "uops_dispatched:port_7_8">
221221
];
222222
let ValidationCounters = DefaultIntelPfmValidationCounters;
223223
}
@@ -227,16 +227,16 @@ def SapphireRapidsPfmCounters : ProcPfmCounters {
227227
let CycleCounter = UnhaltedCoreCyclesPfmCounter;
228228
let UopsCounter = UopsIssuedPfmCounter;
229229
let IssueCounters = [
230-
PfmIssueCounter<"SPRPort00", "uops_dispatched_port:port_0">,
231-
PfmIssueCounter<"SPRPort01", "uops_dispatched_port:port_1">,
230+
PfmIssueCounter<"SPRPort00", "uops_dispatched:port_0">,
231+
PfmIssueCounter<"SPRPort01", "uops_dispatched:port_1">,
232232
// The perfmon documentation and thus libpfm seems to incorrectly label
233233
// this performance counter, as ports 2,3, and 11 are actually grouped
234234
// according to most documentation. See #113941 for additional details.
235-
PfmIssueCounter<"SPRPort02_03_11", "uops_dispatched_port:port_2_3_10">,
236-
PfmIssueCounter<"SPRPort04_09", "uops_dispatched_port:port_4_9">,
237-
PfmIssueCounter<"SPRPort05_11", "uops_dispatched_port:port_5_11">,
238-
PfmIssueCounter<"SPRPort06", "uops_dispatched_port:port_6">,
239-
PfmIssueCounter<"SPRPort07_08", "uops_dispatched_port:port_7_8">,
235+
PfmIssueCounter<"SPRPort02_03_11", "uops_dispatched:port_2_3_10">,
236+
PfmIssueCounter<"SPRPort04_09", "uops_dispatched:port_4_9">,
237+
PfmIssueCounter<"SPRPort05_11", "uops_dispatched:port_5_11">,
238+
PfmIssueCounter<"SPRPort06", "uops_dispatched:port_6">,
239+
PfmIssueCounter<"SPRPort07_08", "uops_dispatched:port_7_8">,
240240
];
241241
let ValidationCounters = DefaultIntelPfmValidationCounters;
242242
}

llvm/test/CodeGen/AArch64/sve-hadd.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1347,10 +1347,8 @@ define void @zext_mload_avgflooru(ptr %p1, ptr %p2, <vscale x 8 x i1> %mask) {
13471347
; SVE: // %bb.0:
13481348
; SVE-NEXT: ld1b { z0.h }, p0/z, [x0]
13491349
; SVE-NEXT: ld1b { z1.h }, p0/z, [x1]
1350-
; SVE-NEXT: eor z2.d, z0.d, z1.d
1351-
; SVE-NEXT: and z0.d, z0.d, z1.d
1352-
; SVE-NEXT: lsr z1.h, z2.h, #1
13531350
; SVE-NEXT: add z0.h, z0.h, z1.h
1351+
; SVE-NEXT: lsr z0.h, z0.h, #1
13541352
; SVE-NEXT: st1h { z0.h }, p0, [x0]
13551353
; SVE-NEXT: ret
13561354
;
@@ -1377,11 +1375,11 @@ define void @zext_mload_avgceilu(ptr %p1, ptr %p2, <vscale x 8 x i1> %mask) {
13771375
; SVE-LABEL: zext_mload_avgceilu:
13781376
; SVE: // %bb.0:
13791377
; SVE-NEXT: ld1b { z0.h }, p0/z, [x0]
1380-
; SVE-NEXT: ld1b { z1.h }, p0/z, [x1]
1381-
; SVE-NEXT: eor z2.d, z0.d, z1.d
1382-
; SVE-NEXT: orr z0.d, z0.d, z1.d
1383-
; SVE-NEXT: lsr z1.h, z2.h, #1
1384-
; SVE-NEXT: sub z0.h, z0.h, z1.h
1378+
; SVE-NEXT: mov z1.h, #-1 // =0xffffffffffffffff
1379+
; SVE-NEXT: ld1b { z2.h }, p0/z, [x1]
1380+
; SVE-NEXT: eor z0.d, z0.d, z1.d
1381+
; SVE-NEXT: sub z0.h, z2.h, z0.h
1382+
; SVE-NEXT: lsr z0.h, z0.h, #1
13851383
; SVE-NEXT: st1b { z0.h }, p0, [x0]
13861384
; SVE-NEXT: ret
13871385
;

0 commit comments

Comments
 (0)