Skip to content

Commit 4fab511

Browse files
authored
Merge branch 'main' into bazel-d8b84be-fix
2 parents c5f746d + 2ab5186 commit 4fab511

File tree

5 files changed

+144
-15
lines changed

5 files changed

+144
-15
lines changed

llvm/include/llvm/CodeGen/RegAllocEvictionAdvisor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ struct EvictionCost {
9191
return std::tie(BrokenHints, MaxWeight) <
9292
std::tie(O.BrokenHints, O.MaxWeight);
9393
}
94+
95+
bool operator>=(const EvictionCost &O) const { return !(*this < O); }
9496
};
9597

9698
/// Interface to the eviction advisor, which is responsible for making a

llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
308308
Cost.BrokenHints += BreaksHint;
309309
Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
310310
// Abort if this would be too expensive.
311-
if (!(Cost < MaxCost))
311+
if (Cost >= MaxCost)
312312
return false;
313313
if (Urgent)
314314
continue;

llvm/lib/CodeGen/RegAllocGreedy.cpp

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2389,22 +2389,18 @@ void RAGreedy::initializeCSRCost() {
23892389
void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
23902390
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
23912391

2392-
for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
2393-
if (!Instr.isCopy())
2392+
for (const MachineOperand &Opnd : MRI->reg_nodbg_operands(Reg)) {
2393+
const MachineInstr &Instr = *Opnd.getParent();
2394+
if (!Instr.isCopy() || Opnd.isImplicit())
23942395
continue;
23952396

23962397
// Look for the other end of the copy.
2397-
Register OtherReg = Instr.getOperand(0).getReg();
2398-
unsigned OtherSubReg = Instr.getOperand(0).getSubReg();
2399-
unsigned SubReg = Instr.getOperand(1).getSubReg();
2400-
2401-
if (OtherReg == Reg) {
2402-
OtherReg = Instr.getOperand(1).getReg();
2403-
OtherSubReg = Instr.getOperand(1).getSubReg();
2404-
SubReg = Instr.getOperand(0).getSubReg();
2405-
if (OtherReg == Reg)
2406-
continue;
2407-
}
2398+
const MachineOperand &OtherOpnd = Instr.getOperand(Opnd.isDef());
2399+
Register OtherReg = OtherOpnd.getReg();
2400+
if (OtherReg == Reg)
2401+
continue;
2402+
unsigned OtherSubReg = OtherOpnd.getSubReg();
2403+
unsigned SubReg = Opnd.getSubReg();
24082404

24092405
// Get the current assignment.
24102406
MCRegister OtherPhysReg =

llvm/test/CodeGen/X86/pr159723.ll

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16,+avx512vl| FileCheck %s
3+
4+
declare <8 x half> @test_call_8()
5+
6+
declare <16 x half> @test_call_16()
7+
8+
declare <32 x half> @test_call_32()
9+
10+
define <8 x i1> @test_cmp_v8half_ogt(<8 x half> %rhs, <8 x i1> %mask) nounwind {
11+
; CHECK-LABEL: test_cmp_v8half_ogt:
12+
; CHECK: # %bb.0:
13+
; CHECK-NEXT: subq $40, %rsp
14+
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
15+
; CHECK-NEXT: vpsllw $15, %xmm1, %xmm0
16+
; CHECK-NEXT: vpmovw2m %xmm0, %k1
17+
; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
18+
; CHECK-NEXT: callq test_call_8@PLT
19+
; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
20+
; CHECK-NEXT: vcmpltph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %k0 {%k1} # 16-byte Folded Reload
21+
; CHECK-NEXT: vpmovm2w %k0, %xmm0
22+
; CHECK-NEXT: addq $40, %rsp
23+
; CHECK-NEXT: retq
24+
%lhs = call <8 x half> @test_call_8()
25+
%comp = fcmp ogt <8 x half> %lhs, %rhs
26+
%res = and <8 x i1> %comp, %mask
27+
ret <8 x i1> %res
28+
}
29+
30+
define <8 x i1> @test_cmp_v8half_ogt_commute(<8 x half> %rhs, <8 x i1> %mask) nounwind {
31+
; CHECK-LABEL: test_cmp_v8half_ogt_commute:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: subq $40, %rsp
34+
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
35+
; CHECK-NEXT: vpsllw $15, %xmm1, %xmm0
36+
; CHECK-NEXT: vpmovw2m %xmm0, %k1
37+
; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
38+
; CHECK-NEXT: callq test_call_8@PLT
39+
; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
40+
; CHECK-NEXT: vcmpltph {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %k0 {%k1} # 16-byte Folded Reload
41+
; CHECK-NEXT: vpmovm2w %k0, %xmm0
42+
; CHECK-NEXT: addq $40, %rsp
43+
; CHECK-NEXT: retq
44+
%lhs = call <8 x half> @test_call_8()
45+
%comp = fcmp ogt <8 x half> %rhs, %lhs
46+
%res = and <8 x i1> %comp, %mask
47+
ret <8 x i1> %res
48+
}
49+
50+
51+
define <16 x i1> @test_cmp_v16half_olt(<16 x half> %rhs, <16 x i1> %mask) nounwind {
52+
; CHECK-LABEL: test_cmp_v16half_olt:
53+
; CHECK: # %bb.0:
54+
; CHECK-NEXT: subq $56, %rsp
55+
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
56+
; CHECK-NEXT: vpsllw $7, %xmm1, %xmm0
57+
; CHECK-NEXT: vpmovb2m %xmm0, %k1
58+
; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
59+
; CHECK-NEXT: callq test_call_16@PLT
60+
; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
61+
; CHECK-NEXT: vcmpltph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %k0 {%k1} # 32-byte Folded Reload
62+
; CHECK-NEXT: vpmovm2b %k0, %xmm0
63+
; CHECK-NEXT: addq $56, %rsp
64+
; CHECK-NEXT: vzeroupper
65+
; CHECK-NEXT: retq
66+
%lhs = call <16 x half> @test_call_16()
67+
%comp = fcmp olt <16 x half> %lhs, %rhs
68+
%res = and <16 x i1> %comp, %mask
69+
ret <16 x i1> %res
70+
}
71+
72+
define <16 x i1> @test_cmp_v16half_olt_commute(<16 x half> %rhs, <16 x i1> %mask) nounwind {
73+
; CHECK-LABEL: test_cmp_v16half_olt_commute:
74+
; CHECK: # %bb.0:
75+
; CHECK-NEXT: subq $56, %rsp
76+
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
77+
; CHECK-NEXT: vpsllw $7, %xmm1, %xmm0
78+
; CHECK-NEXT: vpmovb2m %xmm0, %k1
79+
; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
80+
; CHECK-NEXT: callq test_call_16@PLT
81+
; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
82+
; CHECK-NEXT: vcmpltph {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %k0 {%k1} # 32-byte Folded Reload
83+
; CHECK-NEXT: vpmovm2b %k0, %xmm0
84+
; CHECK-NEXT: addq $56, %rsp
85+
; CHECK-NEXT: vzeroupper
86+
; CHECK-NEXT: retq
87+
%lhs = call <16 x half> @test_call_16()
88+
%comp = fcmp olt <16 x half> %rhs, %lhs
89+
%res = and <16 x i1> %comp, %mask
90+
ret <16 x i1> %res
91+
}
92+
93+
define <32 x i1> @test_cmp_v32half_oge(<32 x half> %rhs, <32 x i1> %mask) nounwind {
94+
; CHECK-LABEL: test_cmp_v32half_oge:
95+
; CHECK: # %bb.0:
96+
; CHECK-NEXT: subq $88, %rsp
97+
; CHECK-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
98+
; CHECK-NEXT: vpsllw $7, %ymm1, %ymm0
99+
; CHECK-NEXT: vpmovb2m %ymm0, %k1
100+
; CHECK-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
101+
; CHECK-NEXT: callq test_call_32@PLT
102+
; CHECK-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 4-byte Reload
103+
; CHECK-NEXT: vcmpleph {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k0 {%k1} # 64-byte Folded Reload
104+
; CHECK-NEXT: vpmovm2b %k0, %ymm0
105+
; CHECK-NEXT: addq $88, %rsp
106+
; CHECK-NEXT: retq
107+
%lhs = call <32 x half> @test_call_32()
108+
%comp = fcmp oge <32 x half> %lhs, %rhs
109+
%res = and <32 x i1> %comp, %mask
110+
ret <32 x i1> %res
111+
}
112+
113+
define <32 x i1> @test_cmp_v32half_oge_commute(<32 x half> %rhs, <32 x i1> %mask) nounwind {
114+
; CHECK-LABEL: test_cmp_v32half_oge_commute:
115+
; CHECK: # %bb.0:
116+
; CHECK-NEXT: subq $88, %rsp
117+
; CHECK-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
118+
; CHECK-NEXT: vpsllw $7, %ymm1, %ymm0
119+
; CHECK-NEXT: vpmovb2m %ymm0, %k1
120+
; CHECK-NEXT: kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
121+
; CHECK-NEXT: callq test_call_32@PLT
122+
; CHECK-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 4-byte Reload
123+
; CHECK-NEXT: vcmpleph {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k0 {%k1} # 64-byte Folded Reload
124+
; CHECK-NEXT: vpmovm2b %k0, %ymm0
125+
; CHECK-NEXT: addq $88, %rsp
126+
; CHECK-NEXT: retq
127+
%lhs = call <32 x half> @test_call_32()
128+
%comp = fcmp oge <32 x half> %rhs, %lhs
129+
%res = and <32 x i1> %comp, %mask
130+
ret <32 x i1> %res
131+
}

mlir/lib/Dialect/Quant/Transforms/NormalizeQuantTypes.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ class NormalizedQuantTypesConverter : public TypeConverter {
8484

8585
if (isConvertibleToPerAxis(tensorType)) {
8686
auto shape = subChannelType.getScales().getType().getShape();
87-
auto quantizedDimItr =
87+
const auto *quantizedDimItr =
8888
llvm::find_if(shape, [](int64_t dim) { return dim != 1; });
8989
auto scales = llvm::to_vector(llvm::map_range(
9090
subChannelType.getScales().getValues<APFloat>(),

0 commit comments

Comments
 (0)