Skip to content

Commit 8fbff38

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merge llvm/main into amd-debug
2 parents fc7eac0 + 53d73c0 commit 8fbff38

File tree

15 files changed

+216
-83
lines changed

15 files changed

+216
-83
lines changed

clang/include/clang/Basic/CodeGenOptions.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ ENUM_CODEGENOPT(FramePointer, FramePointerKind, 2, FramePointerKind::None, Benig
5858

5959
ENUM_CODEGENOPT(ExceptionHandling, ExceptionHandlingKind, 3, ExceptionHandlingKind::None, NotCompatible)
6060

61-
CODEGENOPT(ClearASTBeforeBackend , 1, 0, Benign) ///< Free the AST before running backend code generation. Only works with -disable-free.
61+
CODEGENOPT(ClearASTBeforeBackend , 1, 0, Benign) ///< Free the AST before running backend code generation.
6262
CODEGENOPT(DisableFree , 1, 0, Benign) ///< Don't free memory.
6363
CODEGENOPT(DiscardValueNames , 1, 0, Benign) ///< Discard Value Names from the IR (LLVMContext flag)
6464
CODEGENOPT(DisableLLVMPasses , 1, 0, Benign) ///< Don't run any LLVM IR passes to get

clang/lib/Driver/ToolChains/CommonArgs.cpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3331,14 +3331,8 @@ void tools::handleVectorizeSLPArgs(const ArgList &Args,
33313331

33323332
void tools::handleInterchangeLoopsArgs(const ArgList &Args,
33333333
ArgStringList &CmdArgs) {
3334-
// FIXME: instead of relying on shouldEnableVectorizerAtOLevel, we may want to
3335-
// implement a separate function to infer loop interchange from opt level.
3336-
// For now, enable loop-interchange at the same opt levels as loop-vectorize.
3337-
bool EnableInterchange = shouldEnableVectorizerAtOLevel(Args, false);
3338-
OptSpecifier InterchangeAliasOption =
3339-
EnableInterchange ? options::OPT_O_Group : options::OPT_floop_interchange;
3340-
if (Args.hasFlag(options::OPT_floop_interchange, InterchangeAliasOption,
3341-
options::OPT_fno_loop_interchange, EnableInterchange))
3334+
if (Args.hasFlag(options::OPT_floop_interchange,
3335+
options::OPT_fno_loop_interchange, false))
33423336
CmdArgs.push_back("-floop-interchange");
33433337
}
33443338

clang/test/CodeGen/builtins.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// RUN: %clang_cc1 -emit-llvm -o %t %s
22
// RUN: not grep __builtin %t
33
// RUN: %clang_cc1 -emit-llvm -triple x86_64-darwin-apple -o - %s | FileCheck %s
4+
// RUN: %clang_cc1 -emit-llvm -triple x86_64-darwin-apple -o - %s -fexperimental-new-constant-interpreter | FileCheck %s
45

56
int printf(const char *, ...);
67

flang/test/Driver/loop-interchange.f90

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
! RUN: %flang -### -S -fno-loop-interchange %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
33
! RUN: %flang -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
44
! RUN: %flang -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
5-
! RUN: %flang -### -S -O2 %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-INTERCHANGE %s
6-
! RUN: %flang -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-INTERCHANGE %s
7-
! RUN: %flang -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-INTERCHANGE %s
5+
! RUN: %flang -### -S -O2 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
6+
! RUN: %flang -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
7+
! RUN: %flang -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
88
! RUN: %flang -### -S -Oz %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
99
! CHECK-LOOP-INTERCHANGE: "-floop-interchange"
1010
! CHECK-NO-LOOP-INTERCHANGE-NOT: "-floop-interchange"

libcxx/docs/ReleaseNotes/22.rst

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ What's New in Libc++ 22.0.0?
3838
Implemented Papers
3939
------------------
4040

41-
- P2321R2: ``zip`` (`Github <https://github.com/llvm/llvm-project/issues/105169>`__) (The paper is partially implemented. ``zip_transform_view`` is implemented in this release)
41+
- P2321R2: ``zip`` (`Github <https://github.com/llvm/llvm-project/issues/105169>`__) (The paper is partially
42+
implemented. ``zip_transform_view`` is implemented in this release)
4243
- P3168R2: Give ``std::optional`` Range Support (`Github <https://github.com/llvm/llvm-project/issues/105430>`__)
4344

4445
Improvements and New Features
@@ -50,7 +51,8 @@ Improvements and New Features
5051
- The performance of ``unordered_set::operator=(const unordered_set&)`` has been improved by up to 5x.
5152
- The performance of ``map::erase`` and ``set::erase`` has been improved by up to 2x
5253
- The performance of ``find(key)`` in ``map``, ``set``, ``multimap`` and ``multiset`` has been improved by up to 2.3x
53-
- Some reallocations are now avoided in `std::filesystem::path::lexically_relative`, resulting in a performance improvement of up to 1.7x.
54+
- Some reallocations are now avoided in `std::filesystem::path::lexically_relative`, resulting in a performance
55+
improvement of up to 1.7x.
5456
- The performance of the ``(iterator, iterator)`` constructors of ``multimap`` and ``multiset``
5557
has been improved by up to 3x
5658
- The performance of ``insert(iterator, iterator)`` of ``multimap`` and ``multiset`` has been improved by up to 2.5x
@@ -62,7 +64,11 @@ Deprecations and Removals
6264
Potentially breaking changes
6365
----------------------------
6466

65-
- The algorithm for ``multi{map,set}::find`` has been modified such that it doesn't necessarily return an iterator to the first equal element in the container. This was never guaranteed by the Standard, but libc++ previously happened to always return the first equal element, like other implementations do. Starting with this release, code relying on the first element being returned from ``find`` will be broken, and ``lower_bound`` or ``equal_range`` should be used instead.
67+
- The algorithm for ``multi{map,set}::find`` has been modified such that it doesn't necessarily return an iterator to
68+
the first equal element in the container. This was never guaranteed by the Standard, but libc++ previously happened to
69+
always return the first equal element, like other implementations do. Starting with this release, code relying on the
70+
first element being returned from ``find`` will be broken, and ``lower_bound`` or ``equal_range`` should be used
71+
instead.
6672

6773
Announcements About Future Releases
6874
-----------------------------------

lldb/unittests/Protocol/ProtocolMCPServerTest.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,6 @@ TEST_F(ProtocolServerMCPTest, ToolsCallFail) {
282282
TEST_F(ProtocolServerMCPTest, NotificationInitialized) {
283283
bool handler_called = false;
284284
std::condition_variable cv;
285-
std::mutex mutex;
286285

287286
server_up->AddNotificationHandler(
288287
"notifications/initialized",

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4399,13 +4399,8 @@ static unsigned getLoadStoreOpcodeForFP16(bool Load, const X86Subtarget &STI) {
43994399
if (STI.hasFP16())
44004400
return Load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr;
44014401
if (Load)
4402-
return STI.hasAVX512() ? X86::VMOVSSZrm
4403-
: STI.hasAVX() ? X86::VMOVSSrm
4404-
: X86::MOVSSrm;
4405-
else
4406-
return STI.hasAVX512() ? X86::VMOVSSZmr
4407-
: STI.hasAVX() ? X86::VMOVSSmr
4408-
: X86::MOVSSmr;
4402+
return X86::MOVSHPrm;
4403+
return X86::MOVSHPmr;
44094404
}
44104405

44114406
static unsigned getLoadStoreRegOpcode(Register Reg,
@@ -6131,6 +6126,25 @@ static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) {
61316126
return true;
61326127
}
61336128

6129+
static bool expandMOVSHP(MachineInstrBuilder &MIB, MachineInstr &MI,
6130+
const TargetInstrInfo &TII, bool HasAVX) {
6131+
unsigned NewOpc;
6132+
if (MI.getOpcode() == X86::MOVSHPrm) {
6133+
NewOpc = HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
6134+
Register Reg = MI.getOperand(0).getReg();
6135+
if (Reg > X86::XMM15)
6136+
NewOpc = X86::VMOVSSZrm;
6137+
} else {
6138+
NewOpc = HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
6139+
Register Reg = MI.getOperand(5).getReg();
6140+
if (Reg > X86::XMM15)
6141+
NewOpc = X86::VMOVSSZmr;
6142+
}
6143+
6144+
MIB->setDesc(TII.get(NewOpc));
6145+
return true;
6146+
}
6147+
61346148
bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
61356149
bool HasAVX = Subtarget.hasAVX();
61366150
MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
@@ -6203,6 +6217,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
62036217
}
62046218
return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
62056219
}
6220+
case X86::MOVSHPmr:
6221+
case X86::MOVSHPrm:
6222+
return expandMOVSHP(MIB, MI, *this, Subtarget.hasAVX());
62066223
case X86::V_SETALLONES:
62076224
return Expand2AddrUndef(MIB,
62086225
get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));

llvm/lib/Target/X86/X86InstrSSE.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,18 @@ multiclass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop,
267267
}
268268
}
269269

270+
// pseudo instruction for fp16 spilling.
271+
let isPseudo = 1, Predicates = [HasSSE2] in {
272+
let mayStore = 1 in
273+
def MOVSHPmr : I<0, Pseudo, (outs), (ins f32mem:$dst, FR16X:$src), "",
274+
[], SSEPackedSingle>,
275+
Sched<[WriteFStore]>;
276+
let mayLoad = 1 in
277+
def MOVSHPrm : I<0, Pseudo, (outs FR16X:$dst), (ins f32mem:$src), "",
278+
[], SSEPackedSingle>,
279+
Sched<[WriteFLoad]>;
280+
}
281+
270282
defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss",
271283
SSEPackedSingle, UseSSE1>, TB, XS;
272284
defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=x86_64-unknown -start-before=twoaddressinstruction -stop-after=postrapseudos -verify-machineinstrs -o - %s | FileCheck %s
3+
4+
...
5+
---
6+
name: test
7+
alignment: 16
8+
tracksRegLiveness: true
9+
debugInstrRef: true
10+
registers:
11+
liveins:
12+
- { reg: '$xmm0', virtual-reg: '%0' }
13+
frameInfo:
14+
maxAlignment: 1
15+
hasCalls: true
16+
machineFunctionInfo: {}
17+
body: |
18+
bb.0:
19+
liveins: $xmm0
20+
21+
; CHECK-LABEL: name: test
22+
; CHECK: liveins: $xmm0
23+
; CHECK-NEXT: {{ $}}
24+
; CHECK-NEXT: MOVSSmr $rsp, 1, $noreg, -4, $noreg, $xmm0 :: (store (s32) into %stack.0, align 2)
25+
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $xmm0, 12 /* clobber */, implicit-def dead early-clobber $xmm1, 12 /* clobber */, implicit-def dead early-clobber $xmm2, 12 /* clobber */, implicit-def dead early-clobber $xmm3, 12 /* clobber */, implicit-def dead early-clobber $xmm4, 12 /* clobber */, implicit-def dead early-clobber $xmm5, 12 /* clobber */, implicit-def dead early-clobber $xmm6, 12 /* clobber */, implicit-def dead early-clobber $xmm7, 12 /* clobber */, implicit-def dead early-clobber $xmm8, 12 /* clobber */, implicit-def dead early-clobber $xmm9, 12 /* clobber */, implicit-def dead early-clobber $xmm10, 12 /* clobber */, implicit-def dead early-clobber $xmm11, 12 /* clobber */, implicit-def dead early-clobber $xmm12, 12 /* clobber */, implicit-def dead early-clobber $xmm13, 12 /* clobber */, implicit-def dead early-clobber $xmm14, 12 /* clobber */, implicit-def dead early-clobber $xmm15, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
26+
; CHECK-NEXT: renamable $xmm0 = MOVSSrm $rsp, 1, $noreg, -4, $noreg :: (load (s32) from %stack.0, align 2)
27+
; CHECK-NEXT: FNOP implicit-def $fpsw, implicit killed renamable $xmm0
28+
; CHECK-NEXT: RET 0
29+
%0:fr16 = COPY killed $xmm0
30+
INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $xmm0, 12 /* clobber */, implicit-def dead early-clobber $xmm1, 12 /* clobber */, implicit-def dead early-clobber $xmm2, 12 /* clobber */, implicit-def dead early-clobber $xmm3, 12 /* clobber */, implicit-def dead early-clobber $xmm4, 12 /* clobber */, implicit-def dead early-clobber $xmm5, 12 /* clobber */, implicit-def dead early-clobber $xmm6, 12 /* clobber */, implicit-def dead early-clobber $xmm7, 12 /* clobber */, implicit-def dead early-clobber $xmm8, 12 /* clobber */, implicit-def dead early-clobber $xmm9, 12 /* clobber */, implicit-def dead early-clobber $xmm10, 12 /* clobber */, implicit-def dead early-clobber $xmm11, 12 /* clobber */, implicit-def dead early-clobber $xmm12, 12 /* clobber */, implicit-def dead early-clobber $xmm13, 12 /* clobber */, implicit-def dead early-clobber $xmm14, 12 /* clobber */, implicit-def dead early-clobber $xmm15, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
31+
FNOP implicit-def $fpsw, implicit %0:fr16
32+
RET 0
33+
34+
...
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefixes=SSE2
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -verify-machineinstrs | FileCheck %s --check-prefixes=AVX
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512
5+
6+
define half @test(float %f, ptr %p) nounwind {
7+
; SSE2-LABEL: test:
8+
; SSE2: # %bb.0:
9+
; SSE2-NEXT: pushq %rbx
10+
; SSE2-NEXT: subq $16, %rsp
11+
; SSE2-NEXT: movq %rdi, %rbx
12+
; SSE2-NEXT: callq __truncsfhf2@PLT
13+
; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
14+
; SSE2-NEXT: callq __extendhfsf2@PLT
15+
; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
16+
; SSE2-NEXT: #APP
17+
; SSE2-NEXT: #NO_APP
18+
; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
19+
; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
20+
; SSE2-NEXT: movss %xmm0, (%rbx)
21+
; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
22+
; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
23+
; SSE2-NEXT: addq $16, %rsp
24+
; SSE2-NEXT: popq %rbx
25+
; SSE2-NEXT: retq
26+
;
27+
; AVX-LABEL: test:
28+
; AVX: # %bb.0:
29+
; AVX-NEXT: pushq %rbx
30+
; AVX-NEXT: subq $16, %rsp
31+
; AVX-NEXT: movq %rdi, %rbx
32+
; AVX-NEXT: callq __truncsfhf2@PLT
33+
; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
34+
; AVX-NEXT: callq __extendhfsf2@PLT
35+
; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
36+
; AVX-NEXT: #APP
37+
; AVX-NEXT: #NO_APP
38+
; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
39+
; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
40+
; AVX-NEXT: vmovss %xmm0, (%rbx)
41+
; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
42+
; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero
43+
; AVX-NEXT: addq $16, %rsp
44+
; AVX-NEXT: popq %rbx
45+
; AVX-NEXT: retq
46+
;
47+
; AVX512-LABEL: test:
48+
; AVX512: # %bb.0:
49+
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
50+
; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
51+
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
52+
; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
53+
; AVX512-NEXT: #APP
54+
; AVX512-NEXT: #NO_APP
55+
; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
56+
; AVX512-NEXT: vmovss %xmm0, (%rdi)
57+
; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
58+
; AVX512-NEXT: retq
59+
%t = fptrunc float %f to half
60+
%t2 = fpext half %t to float
61+
tail call void asm sideeffect "", "~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"()
62+
store float %t2, ptr %p
63+
ret half %t
64+
}

0 commit comments

Comments
 (0)