Skip to content

Commit d84a911

Browse files
authored
[AArch64][SVE] Avoid redundant extend of unsigned i8/i16 extracts. (llvm#165863)
Extracts of unsigned i8 or i16 elements from the bottom 128 bits of a scalable register lead to the implied zero-extend being transformed to an AND mask. The mask is redundant since UMOV already zeroes the high bits of the destination register. For example: ```c int foo(svuint8_t x) {   return x[3]; } ``` Currently: ```gas foo:   umov    w8, v0.b[3]   and     w0, w8, #0xff   ret ``` Becomes: ```gas foo:   umov    w0, v0.b[3]   ret ```
1 parent 57dad86 commit d84a911

File tree

2 files changed

+144
-0
lines changed

2 files changed

+144
-0
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3597,6 +3597,18 @@ let Predicates = [HasSVE_or_SME] in {
35973597

35983598
def : Pat<(sext (i32 (vector_extract nxv4i32:$vec, VectorIndexS:$index))),
35993599
(SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>;
3600+
3601+
// Extracts of ``unsigned'' i8 or i16 elements lead to the zero-extend being
3602+
// transformed to an AND mask. The mask is redundant since UMOV already zeroes
3603+
// the high bits of the destination register.
3604+
def : Pat<(i32 (and (vector_extract nxv16i8:$vec, VectorIndexB:$index), 0xff)),
3605+
(UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>;
3606+
def : Pat<(i32 (and (vector_extract nxv8i16:$vec, VectorIndexH:$index), 0xffff)),
3607+
(UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index)>;
3608+
def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract nxv16i8:$vec, VectorIndexB:$index)))), (i64 0xff))),
3609+
(SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)), sub_32)>;
3610+
def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract nxv8i16:$vec, VectorIndexH:$index)))), (i64 0xffff))),
3611+
(SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index)), sub_32)>;
36003612
} // End HasNEON
36013613

36023614
// Extract first element from vector.

llvm/test/CodeGen/AArch64/sve-extract-element.ll

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,26 @@ define i8 @test_lane0_16xi8(<vscale x 16 x i8> %a) #0 {
1212
ret i8 %b
1313
}
1414

15+
define i32 @test_lane0_16xi8_zext_i32(<vscale x 16 x i8> %a) #0 {
16+
; CHECK-LABEL: test_lane0_16xi8_zext_i32:
17+
; CHECK: // %bb.0:
18+
; CHECK-NEXT: umov w0, v0.b[0]
19+
; CHECK-NEXT: ret
20+
%b = extractelement <vscale x 16 x i8> %a, i32 0
21+
%c = zext i8 %b to i32
22+
ret i32 %c
23+
}
24+
25+
define i64 @test_lane0_16xi8_zext_i64(<vscale x 16 x i8> %a) #0 {
26+
; CHECK-LABEL: test_lane0_16xi8_zext_i64:
27+
; CHECK: // %bb.0:
28+
; CHECK-NEXT: umov w0, v0.b[0]
29+
; CHECK-NEXT: ret
30+
%b = extractelement <vscale x 16 x i8> %a, i32 0
31+
%c = zext i8 %b to i64
32+
ret i64 %c
33+
}
34+
1535
define i8 @test_lane15_16xi8(<vscale x 16 x i8> %a) #0 {
1636
; CHECK-LABEL: test_lane15_16xi8:
1737
; CHECK: // %bb.0:
@@ -21,6 +41,26 @@ define i8 @test_lane15_16xi8(<vscale x 16 x i8> %a) #0 {
2141
ret i8 %b
2242
}
2343

44+
define i32 @test_lane15_16xi8_zext_i32(<vscale x 16 x i8> %a) #0 {
45+
; CHECK-LABEL: test_lane15_16xi8_zext_i32:
46+
; CHECK: // %bb.0:
47+
; CHECK-NEXT: umov w0, v0.b[15]
48+
; CHECK-NEXT: ret
49+
%b = extractelement <vscale x 16 x i8> %a, i32 15
50+
%c = zext i8 %b to i32
51+
ret i32 %c
52+
}
53+
54+
define i64 @test_lane15_16xi8_zext_i64(<vscale x 16 x i8> %a) #0 {
55+
; CHECK-LABEL: test_lane15_16xi8_zext_i64:
56+
; CHECK: // %bb.0:
57+
; CHECK-NEXT: umov w0, v0.b[15]
58+
; CHECK-NEXT: ret
59+
%b = extractelement <vscale x 16 x i8> %a, i32 15
60+
%c = zext i8 %b to i64
61+
ret i64 %c
62+
}
63+
2464
define i8 @test_lane16_16xi8(<vscale x 16 x i8> %a) #0 {
2565
; CHECK-LABEL: test_lane16_16xi8:
2666
; CHECK: // %bb.0:
@@ -31,6 +71,32 @@ define i8 @test_lane16_16xi8(<vscale x 16 x i8> %a) #0 {
3171
ret i8 %b
3272
}
3373

74+
; FIXME: FMOV+AND -> UMOV.
75+
define i32 @test_lane16_16xi8_zext_i32(<vscale x 16 x i8> %a) #0 {
76+
; CHECK-LABEL: test_lane16_16xi8_zext_i32:
77+
; CHECK: // %bb.0:
78+
; CHECK-NEXT: mov z0.b, z0.b[16]
79+
; CHECK-NEXT: fmov w8, s0
80+
; CHECK-NEXT: and w0, w8, #0xff
81+
; CHECK-NEXT: ret
82+
%b = extractelement <vscale x 16 x i8> %a, i32 16
83+
%c = zext i8 %b to i32
84+
ret i32 %c
85+
}
86+
87+
; FIXME: FMOV+AND -> UMOV.
88+
define i64 @test_lane16_16xi8_zext_i64(<vscale x 16 x i8> %a) #0 {
89+
; CHECK-LABEL: test_lane16_16xi8_zext_i64:
90+
; CHECK: // %bb.0:
91+
; CHECK-NEXT: mov z0.b, z0.b[16]
92+
; CHECK-NEXT: fmov w8, s0
93+
; CHECK-NEXT: and x0, x8, #0xff
94+
; CHECK-NEXT: ret
95+
%b = extractelement <vscale x 16 x i8> %a, i32 16
96+
%c = zext i8 %b to i64
97+
ret i64 %c
98+
}
99+
34100
define i16 @test_lane0_8xi16(<vscale x 8 x i16> %a) #0 {
35101
; CHECK-LABEL: test_lane0_8xi16:
36102
; CHECK: // %bb.0:
@@ -40,6 +106,26 @@ define i16 @test_lane0_8xi16(<vscale x 8 x i16> %a) #0 {
40106
ret i16 %b
41107
}
42108

109+
define i32 @test_lane0_8xi16_zext_i32(<vscale x 8 x i16> %a) #0 {
110+
; CHECK-LABEL: test_lane0_8xi16_zext_i32:
111+
; CHECK: // %bb.0:
112+
; CHECK-NEXT: umov w0, v0.h[0]
113+
; CHECK-NEXT: ret
114+
%b = extractelement <vscale x 8 x i16> %a, i32 0
115+
%c = zext i16 %b to i32
116+
ret i32 %c
117+
}
118+
119+
define i64 @test_lane0_8xi16_zext_i64(<vscale x 8 x i16> %a) #0 {
120+
; CHECK-LABEL: test_lane0_8xi16_zext_i64:
121+
; CHECK: // %bb.0:
122+
; CHECK-NEXT: umov w0, v0.h[0]
123+
; CHECK-NEXT: ret
124+
%b = extractelement <vscale x 8 x i16> %a, i32 0
125+
%c = zext i16 %b to i64
126+
ret i64 %c
127+
}
128+
43129
define i16 @test_lane7_8xi16(<vscale x 8 x i16> %a) #0 {
44130
; CHECK-LABEL: test_lane7_8xi16:
45131
; CHECK: // %bb.0:
@@ -49,6 +135,26 @@ define i16 @test_lane7_8xi16(<vscale x 8 x i16> %a) #0 {
49135
ret i16 %b
50136
}
51137

138+
define i32 @test_lane7_8xi16_zext_i32(<vscale x 8 x i16> %a) #0 {
139+
; CHECK-LABEL: test_lane7_8xi16_zext_i32:
140+
; CHECK: // %bb.0:
141+
; CHECK-NEXT: umov w0, v0.h[7]
142+
; CHECK-NEXT: ret
143+
%b = extractelement <vscale x 8 x i16> %a, i32 7
144+
%c = zext i16 %b to i32
145+
ret i32 %c
146+
}
147+
148+
define i64 @test_lane7_8xi16_zext_i64(<vscale x 8 x i16> %a) #0 {
149+
; CHECK-LABEL: test_lane7_8xi16_zext_i64:
150+
; CHECK: // %bb.0:
151+
; CHECK-NEXT: umov w0, v0.h[7]
152+
; CHECK-NEXT: ret
153+
%b = extractelement <vscale x 8 x i16> %a, i32 7
154+
%c = zext i16 %b to i64
155+
ret i64 %c
156+
}
157+
52158
define i16 @test_lane8_8xi16(<vscale x 8 x i16> %a) #0 {
53159
; CHECK-LABEL: test_lane8_8xi16:
54160
; CHECK: // %bb.0:
@@ -59,6 +165,32 @@ define i16 @test_lane8_8xi16(<vscale x 8 x i16> %a) #0 {
59165
ret i16 %b
60166
}
61167

168+
; FIXME: FMOV+AND -> UMOV.
169+
define i32 @test_lane8_8xi16_zext_i32(<vscale x 8 x i16> %a) #0 {
170+
; CHECK-LABEL: test_lane8_8xi16_zext_i32:
171+
; CHECK: // %bb.0:
172+
; CHECK-NEXT: mov z0.h, z0.h[8]
173+
; CHECK-NEXT: fmov w8, s0
174+
; CHECK-NEXT: and w0, w8, #0xffff
175+
; CHECK-NEXT: ret
176+
%b = extractelement <vscale x 8 x i16> %a, i32 8
177+
%c = zext i16 %b to i32
178+
ret i32 %c
179+
}
180+
181+
; FIXME: FMOV+AND -> UMOV.
182+
define i64 @test_lane8_8xi16_zext_i64(<vscale x 8 x i16> %a) #0 {
183+
; CHECK-LABEL: test_lane8_8xi16_zext_i64:
184+
; CHECK: // %bb.0:
185+
; CHECK-NEXT: mov z0.h, z0.h[8]
186+
; CHECK-NEXT: fmov w8, s0
187+
; CHECK-NEXT: and x0, x8, #0xffff
188+
; CHECK-NEXT: ret
189+
%b = extractelement <vscale x 8 x i16> %a, i32 8
190+
%c = zext i16 %b to i64
191+
ret i64 %c
192+
}
193+
62194
define i32 @test_lane0_4xi32(<vscale x 4 x i32> %a) #0 {
63195
; CHECK-LABEL: test_lane0_4xi32:
64196
; CHECK: // %bb.0:

0 commit comments

Comments
 (0)