Skip to content

Commit 0161143

Browse files
[Hexagon] Generate signed mask for vshuff/vdeal (#159800)
vhsuff/vdeal takes a mask parameter which is interpreted as modulo vector size in bytes so the values -2 and 126 have the same effect, although the code is different. Positive values often come from the code that uses getVectorShuffle() and does look past the vector size so the upper bits are left as zeroes. Manually written code often picks negative values because they are often small and easy to think about in important cases like adjusting the result after a double-vector arithmetic. For consistency, switch to normalizing the mask parameters to a signed value. Signed values were chosen over unsigned because they are not sensitive to the vector length and A2_trfsi argument, which is used to create a value, is also signed.
1 parent be7444b commit 0161143

File tree

12 files changed

+240
-233
lines changed

12 files changed

+240
-233
lines changed

llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2483,8 +2483,15 @@ OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
24832483
}
24842484
++I;
24852485

2486+
// Upper bits of the vdeal/vshuff parameter that do not cover any byte in
2487+
// the vector are ignored. Technically, A2_tfrsi takes a signed value, which
2488+
// is sign-extended to 32 bit if there is no extender. The practical
2489+
// advantages are that signed values are smaller in common use cases and are
2490+
// not sensitive to the vector size.
2491+
int SS = SignExtend32(S, HwLog);
2492+
24862493
NodeTemplate Res;
2487-
Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(S, dl)});
2494+
Results.push(Hexagon::A2_tfrsi, MVT::i32, {getSignedConst32(SS, dl)});
24882495
Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd;
24892496
Res.Ty = PairTy;
24902497
Res.Ops = {OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1)};

llvm/test/CodeGen/Hexagon/autohvx/deal-128b.ll

Lines changed: 64 additions & 64 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/Hexagon/autohvx/deal-64b.ll

Lines changed: 32 additions & 32 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/Hexagon/autohvx/isel-shuff-single.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
; was missing).
77

88
; CHECK-LABEL: f0:
9-
; CHECK-DAG: r[[R0:[0-9]+]] = #66
9+
; CHECK-DAG: r[[R0:[0-9]+]] = #-62
1010
; CHECK-DAG: r[[R1:[0-9]+]] = #40
11-
; CHECK-DAG: r[[R2:[0-9]+]] = #85
11+
; CHECK-DAG: r[[R2:[0-9]+]] = #-43
1212
; CHECK: v1:0 = vdeal(v{{[0-9]+}},v0,r[[R0]])
1313
; CHECK: v1:0 = vshuff(v1,v0,r[[R1]])
1414
; CHECK: v1:0 = vshuff(v1,v0,r[[R2]])

llvm/test/CodeGen/Hexagon/autohvx/isel-vpackew.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ define void @f0(ptr %a0, ptr %a1, ptr %a2) #0 {
55
; CHECK-LABEL: f0:
66
; CHECK: // %bb.0: // %b0
77
; CHECK-NEXT: {
8-
; CHECK-NEXT: r7 = #124
8+
; CHECK-NEXT: r7 = #-4
99
; CHECK-NEXT: v0 = vmem(r0+#0)
1010
; CHECK-NEXT: }
1111
; CHECK-NEXT: {

llvm/test/CodeGen/Hexagon/autohvx/mulh.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ define <64 x i16> @mulhs16(<64 x i16> %a0, <64 x i16> %a1) #0 {
1010
; V60-NEXT: v1:0.w = vmpy(v1.h,v0.h)
1111
; V60-NEXT: }
1212
; V60-NEXT: {
13-
; V60-NEXT: r7 = #124
13+
; V60-NEXT: r7 = #-4
1414
; V60-NEXT: }
1515
; V60-NEXT: {
1616
; V60-NEXT: v1:0 = vshuff(v1,v0,r7)
@@ -28,7 +28,7 @@ define <64 x i16> @mulhs16(<64 x i16> %a0, <64 x i16> %a1) #0 {
2828
; V65-NEXT: v1:0.w = vmpy(v1.h,v0.h)
2929
; V65-NEXT: }
3030
; V65-NEXT: {
31-
; V65-NEXT: r7 = #124
31+
; V65-NEXT: r7 = #-4
3232
; V65-NEXT: }
3333
; V65-NEXT: {
3434
; V65-NEXT: v1:0 = vshuff(v1,v0,r7)
@@ -46,7 +46,7 @@ define <64 x i16> @mulhs16(<64 x i16> %a0, <64 x i16> %a1) #0 {
4646
; V69-NEXT: v1:0.w = vmpy(v1.h,v0.h)
4747
; V69-NEXT: }
4848
; V69-NEXT: {
49-
; V69-NEXT: r7 = #124
49+
; V69-NEXT: r7 = #-4
5050
; V69-NEXT: }
5151
; V69-NEXT: {
5252
; V69-NEXT: v1:0 = vshuff(v1,v0,r7)
@@ -72,7 +72,7 @@ define <64 x i16> @mulhu16(<64 x i16> %a0, <64 x i16> %a1) #0 {
7272
; V60-NEXT: v1:0.uw = vmpy(v1.uh,v0.uh)
7373
; V60-NEXT: }
7474
; V60-NEXT: {
75-
; V60-NEXT: r7 = #124
75+
; V60-NEXT: r7 = #-4
7676
; V60-NEXT: }
7777
; V60-NEXT: {
7878
; V60-NEXT: v1:0 = vshuff(v1,v0,r7)
@@ -90,7 +90,7 @@ define <64 x i16> @mulhu16(<64 x i16> %a0, <64 x i16> %a1) #0 {
9090
; V65-NEXT: v1:0.uw = vmpy(v1.uh,v0.uh)
9191
; V65-NEXT: }
9292
; V65-NEXT: {
93-
; V65-NEXT: r7 = #124
93+
; V65-NEXT: r7 = #-4
9494
; V65-NEXT: }
9595
; V65-NEXT: {
9696
; V65-NEXT: v1:0 = vshuff(v1,v0,r7)

llvm/test/CodeGen/Hexagon/autohvx/qmul.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ define void @f2(ptr %a0, ptr %a1, ptr %a2) #0 {
7575
; CHECK-NEXT: v0 = vmem(r1+#0)
7676
; CHECK-NEXT: }
7777
; CHECK-NEXT: {
78-
; CHECK-NEXT: r7 = #124
78+
; CHECK-NEXT: r7 = #-4
7979
; CHECK-NEXT: }
8080
; CHECK-NEXT: {
8181
; CHECK-NEXT: r3 = #15

llvm/test/CodeGen/Hexagon/autohvx/shuff-128b.ll

Lines changed: 64 additions & 64 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/Hexagon/autohvx/shuff-64b.ll

Lines changed: 32 additions & 32 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/Hexagon/autohvx/shuff-combos-128b.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
; Generator: vdeal(0x37), vdeal(0x53), vshuff(0x2f), vdeal(0x4b), vdeal(0x27), vdeal(0x43), vshuff(0x1f), vdeal(0x5b), vshuff(0x7e), vshuff(0x6c), vdeal(0x5a), vdeal(0x38), vshuff(0x16), vshuff(0x44), vdeal(0x72)
44
; CHECK-LABEL: test_0000:
5-
; CHECK-DAG: [[R00:r[0-9]+]] = #66
5+
; CHECK-DAG: [[R00:r[0-9]+]] = #-62
66
; CHECK-DAG: [[R01:r[0-9]+]] = #46
77
; CHECK-DAG: [[R02:r[0-9]+]] = #1
88
; CHECK: v[[H00:[0-9]+]]:[[L00:[0-9]+]] = vshuff(v1,v0,[[R00]])
@@ -18,7 +18,7 @@ define <256 x i8> @test_0000(<256 x i8> %v0) #0 {
1818
; CHECK-LABEL: test_0001:
1919
; CHECK-DAG: [[R10:r[0-9]+]] = #24
2020
; CHECK-DAG: [[R11:r[0-9]+]] = #9
21-
; CHECK-DAG: [[R12:r[0-9]+]] = #68
21+
; CHECK-DAG: [[R12:r[0-9]+]] = #-60
2222
; CHECK-DAG: [[R13:r[0-9]+]] = #34
2323
; CHECK: v[[H10:[0-9]+]]:[[L10:[0-9]+]] = vshuff(v1,v0,[[R10]])
2424
; CHECK: v[[H11:[0-9]+]]:[[L11:[0-9]+]] = vdeal(v[[H10]],v[[L10]],[[R11]])
@@ -34,7 +34,7 @@ define <256 x i8> @test_0001(<256 x i8> %v0) #0 {
3434
; CHECK-LABEL: test_0002:
3535
; CHECK-DAG: [[R20:r[0-9]+]] = #18
3636
; CHECK-DAG: [[R21:r[0-9]+]] = #10
37-
; CHECK-DAG: [[R22:r[0-9]+]] = #68
37+
; CHECK-DAG: [[R22:r[0-9]+]] = #-60
3838
; CHECK-DAG: [[R23:r[0-9]+]] = #5
3939
; CHECK: v[[H20:[0-9]+]]:[[L20:[0-9]+]] = vshuff(v1,v0,[[R20]])
4040
; CHECK: v[[H21:[0-9]+]]:[[L21:[0-9]+]] = vdeal(v[[H20]],v[[L20]],[[R21]])
@@ -51,7 +51,7 @@ define <256 x i8> @test_0002(<256 x i8> %v0) #0 {
5151
; CHECK-DAG: [[R30:r[0-9]+]] = #21
5252
; CHECK-DAG: [[R31:r[0-9]+]] = #9
5353
; CHECK-DAG: [[R32:r[0-9]+]] = #34
54-
; CHECK-DAG: [[R33:r[0-9]+]] = #66
54+
; CHECK-DAG: [[R33:r[0-9]+]] = #-62
5555
; CHECK: v[[H30:[0-9]+]]:[[L30:[0-9]+]] = vshuff(v1,v0,[[R30]])
5656
; CHECK: v[[H31:[0-9]+]]:[[L31:[0-9]+]] = vdeal(v[[H30]],v[[L30]],[[R31]])
5757
; CHECK: v[[H32:[0-9]+]]:[[L32:[0-9]+]] = vshuff(v[[H31]],v[[L31]],[[R32]])
@@ -65,7 +65,7 @@ define <256 x i8> @test_0003(<256 x i8> %v0) #0 {
6565
; Generator: vdeal(0x63), vshuff(0x6f), vdeal(0x77), vshuff(0x75), vdeal(0x3d), vshuff(0x2d), vshuff(0x00), vshuff(0x5c), vdeal(0x04), vshuff(0x79), vshuff(0x21), vdeal(0x7b), vdeal(0x66), vshuff(0x59), vdeal(0x54)
6666
; CHECK-LABEL: test_0004:
6767
; CHECK-DAG: [[R40:r[0-9]+]] = #38
68-
; CHECK-DAG: [[R41:r[0-9]+]] = #72
68+
; CHECK-DAG: [[R41:r[0-9]+]] = #-56
6969
; CHECK-DAG: [[R42:r[0-9]+]] = #18
7070
; CHECK: v[[H40:[0-9]+]]:[[L40:[0-9]+]] = vshuff(v1,v0,[[R40]])
7171
; CHECK: v[[H41:[0-9]+]]:[[L41:[0-9]+]] = vshuff(v[[H40]],v[[L40]],[[R41]])
@@ -81,7 +81,7 @@ define <256 x i8> @test_0004(<256 x i8> %v0) #0 {
8181
; CHECK-DAG: [[R50:r[0-9]+]] = #9
8282
; CHECK-DAG: [[R51:r[0-9]+]] = #3
8383
; CHECK-DAG: [[R52:r[0-9]+]] = #48
84-
; CHECK-DAG: [[R53:r[0-9]+]] = #68
84+
; CHECK-DAG: [[R53:r[0-9]+]] = #-60
8585
; CHECK: v[[H50:[0-9]+]]:[[L50:[0-9]+]] = vshuff(v1,v0,[[R50]])
8686
; CHECK: v[[H51:[0-9]+]]:[[L51:[0-9]+]] = vdeal(v[[H50]],v[[L50]],[[R51]])
8787
; CHECK: v[[H52:[0-9]+]]:[[L52:[0-9]+]] = vdeal(v[[H51]],v[[L51]],[[R52]])
@@ -94,7 +94,7 @@ define <256 x i8> @test_0005(<256 x i8> %v0) #0 {
9494

9595
; Generator: vshuff(0x34), vshuff(0x07), vdeal(0x5d), vshuff(0x05), vshuff(0x50), vshuff(0x13), vdeal(0x31), vdeal(0x6e), vdeal(0x0f), vdeal(0x2c), vdeal(0x28), vdeal(0x76), vdeal(0x22), vdeal(0x3a), vdeal(0x51)
9696
; CHECK-LABEL: test_0006:
97-
; CHECK-DAG: [[R60:r[0-9]+]] = #85
97+
; CHECK-DAG: [[R60:r[0-9]+]] = #-43
9898
; CHECK-DAG: [[R61:r[0-9]+]] = #2
9999
; CHECK: v[[H60:[0-9]+]]:[[L60:[0-9]+]] = vdeal(v1,v0,[[R60]])
100100
; CHECK: v[[H61:[0-9]+]]:[[L61:[0-9]+]] = vshuff(v[[H60]],v[[L60]],[[R61]])
@@ -106,7 +106,7 @@ define <256 x i8> @test_0006(<256 x i8> %v0) #0 {
106106

107107
; Generator: vshuff(0x25), vshuff(0x4c), vshuff(0x72), vdeal(0x70), vshuff(0x3b), vshuff(0x26), vshuff(0x4d), vdeal(0x20), vshuff(0x7f), vdeal(0x6a), vdeal(0x78), vshuff(0x5f), vdeal(0x10), vdeal(0x71), vshuff(0x6d)
108108
; CHECK-LABEL: test_0007:
109-
; CHECK-DAG: [[R70:r[0-9]+]] = #74
109+
; CHECK-DAG: [[R70:r[0-9]+]] = #-54
110110
; CHECK-DAG: [[R71:r[0-9]+]] = #20
111111
; CHECK-DAG: [[R72:r[0-9]+]] = #34
112112
; CHECK: v[[H70:[0-9]+]]:[[L70:[0-9]+]] = vshuff(v1,v0,[[R70]])
@@ -120,7 +120,7 @@ define <256 x i8> @test_0007(<256 x i8> %v0) #0 {
120120

121121
; Generator: vshuff(0x2e), vshuff(0x40), vdeal(0x35), vdeal(0x3e), vdeal(0x06), vshuff(0x4b), vshuff(0x24), vshuff(0x09), vdeal(0x18), vshuff(0x42), vshuff(0x43), vshuff(0x41), vshuff(0x23), vdeal(0x3f), vdeal(0x39)
122122
; CHECK-LABEL: test_0008:
123-
; CHECK-DAG: [[R80:r[0-9]+]] = #73
123+
; CHECK-DAG: [[R80:r[0-9]+]] = #-55
124124
; CHECK-DAG: [[R81:r[0-9]+]] = #5
125125
; CHECK-DAG: [[R82:r[0-9]+]] = #48
126126
; CHECK-DAG: [[R83:r[0-9]+]] = #2
@@ -136,7 +136,7 @@ define <256 x i8> @test_0008(<256 x i8> %v0) #0 {
136136

137137
; Generator: vshuff(0x33), vshuff(0x5e), vshuff(0x2a), vdeal(0x2f), vdeal(0x1f), vshuff(0x14), vshuff(0x17), vshuff(0x1b), vdeal(0x1c), vdeal(0x15), vshuff(0x37), vshuff(0x3c), vdeal(0x4e), vdeal(0x7d), vshuff(0x61)
138138
; CHECK-LABEL: test_0009:
139-
; CHECK-DAG: [[R90:r[0-9]+]] = #96
139+
; CHECK-DAG: [[R90:r[0-9]+]] = #-32
140140
; CHECK-DAG: [[R91:r[0-9]+]] = #18
141141
; CHECK-DAG: [[R92:r[0-9]+]] = #5
142142
; CHECK: v[[H90:[0-9]+]]:[[L90:[0-9]+]] = vshuff(v1,v0,[[R90]])
@@ -152,7 +152,7 @@ define <256 x i8> @test_0009(<256 x i8> %v0) #0 {
152152
; CHECK-LABEL: test_000a:
153153
; CHECK-DAG: [[Ra0:r[0-9]+]] = #44
154154
; CHECK-DAG: [[Ra1:r[0-9]+]] = #6
155-
; CHECK-DAG: [[Ra2:r[0-9]+]] = #80
155+
; CHECK-DAG: [[Ra2:r[0-9]+]] = #-48
156156
; CHECK: v[[Ha0:[0-9]+]]:[[La0:[0-9]+]] = vshuff(v1,v0,[[Ra0]])
157157
; CHECK: v[[Ha1:[0-9]+]]:[[La1:[0-9]+]] = vdeal(v[[Ha0]],v[[La0]],[[Ra1]])
158158
; CHECK: v[[Ha2:[0-9]+]]:[[La2:[0-9]+]] = vshuff(v[[Ha1]],v[[La1]],[[Ra2]])
@@ -164,7 +164,7 @@ define <256 x i8> @test_000a(<256 x i8> %v0) #0 {
164164

165165
; Generator: vshuff(0x74), vshuff(0x11), vshuff(0x53), vshuff(0x66), vshuff(0x1d), vdeal(0x59), vshuff(0x63), vshuff(0x49), vdeal(0x00), vshuff(0x38), vshuff(0x45), vdeal(0x68), vshuff(0x65), vshuff(0x6e), vdeal(0x62)
166166
; CHECK-LABEL: test_000b:
167-
; CHECK-DAG: [[Rb0:r[0-9]+]] = #68
167+
; CHECK-DAG: [[Rb0:r[0-9]+]] = #-60
168168
; CHECK-DAG: [[Rb1:r[0-9]+]] = #5
169169
; CHECK-DAG: [[Rb2:r[0-9]+]] = #18
170170
; CHECK-DAG: [[Rb3:r[0-9]+]] = #40
@@ -182,7 +182,7 @@ define <256 x i8> @test_000b(<256 x i8> %v0) #0 {
182182
; CHECK-LABEL: test_000c:
183183
; CHECK-DAG: [[Rc0:r[0-9]+]] = #10
184184
; CHECK-DAG: [[Rc1:r[0-9]+]] = #3
185-
; CHECK-DAG: [[Rc2:r[0-9]+]] = #84
185+
; CHECK-DAG: [[Rc2:r[0-9]+]] = #-44
186186
; CHECK: v[[Hc0:[0-9]+]]:[[Lc0:[0-9]+]] = vshuff(v1,v0,[[Rc0]])
187187
; CHECK: v[[Hc1:[0-9]+]]:[[Lc1:[0-9]+]] = vdeal(v[[Hc0]],v[[Lc0]],[[Rc1]])
188188
; CHECK: v[[Hc2:[0-9]+]]:[[Lc2:[0-9]+]] = vshuff(v[[Hc1]],v[[Lc1]],[[Rc2]])
@@ -195,7 +195,7 @@ define <256 x i8> @test_000c(<256 x i8> %v0) #0 {
195195
; Generator: vdeal(0x58), vdeal(0x0b), vdeal(0x21), vdeal(0x7f), vshuff(0x6a), vshuff(0x78), vshuff(0x52), vshuff(0x73), vshuff(0x06), vdeal(0x2d), vdeal(0x32), vdeal(0x48), vdeal(0x75), vdeal(0x55), vshuff(0x0e)
196196
; CHECK-LABEL: test_000d:
197197
; CHECK-DAG: [[Rd0:r[0-9]+]] = #36
198-
; CHECK-DAG: [[Rd1:r[0-9]+]] = #80
198+
; CHECK-DAG: [[Rd1:r[0-9]+]] = #-48
199199
; CHECK-DAG: [[Rd2:r[0-9]+]] = #9
200200
; CHECK: v[[Hd0:[0-9]+]]:[[Ld0:[0-9]+]] = vshuff(v1,v0,[[Rd0]])
201201
; CHECK: v[[Hd1:[0-9]+]]:[[Ld1:[0-9]+]] = vshuff(v[[Hd0]],v[[Ld0]],[[Rd1]])
@@ -208,7 +208,7 @@ define <256 x i8> @test_000d(<256 x i8> %v0) #0 {
208208

209209
; Generator: vdeal(0x6f), vdeal(0x13), vdeal(0x07), vdeal(0x56), vshuff(0x2c), vdeal(0x0c), vdeal(0x33), vshuff(0x22), vdeal(0x02), vshuff(0x18), vdeal(0x4d), vshuff(0x51), vshuff(0x3e), vshuff(0x77), vshuff(0x30)
210210
; CHECK-LABEL: test_000e:
211-
; CHECK-DAG: [[Re0:r[0-9]+]] = #65
211+
; CHECK-DAG: [[Re0:r[0-9]+]] = #-63
212212
; CHECK-DAG: [[Re1:r[0-9]+]] = #24
213213
; CHECK-DAG: [[Re2:r[0-9]+]] = #36
214214
; CHECK: v[[He0:[0-9]+]]:[[Le0:[0-9]+]] = vshuff(v1,v0,[[Re0]])

0 commit comments

Comments
 (0)