Skip to content

Commit 33de419

Browse files
Partial revert "[NVPTX] Enhance mul.wide and mad.wide peepholes #150477" (#155024)
Fix #150477 (comment) Undo `add (mul.wide a, b), c` -> `mad.wide a, b, c` instruction selection pattern, but preserve `mul.wide` DAG combine.
1 parent 43a9b66 commit 33de419

File tree

5 files changed

+81
-66
lines changed

5 files changed

+81
-66
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -898,10 +898,8 @@ let Predicates = [hasOptEnabled] in {
898898
defm MAD_LO_S32 : MADInst<"lo.s32", mul, I32RT, I32RT>;
899899
defm MAD_LO_S64 : MADInst<"lo.s64", mul, I64RT, I64RT>;
900900

901-
defm MAD_WIDE_U16 : MADInst<"wide.u16", umul_wide, I32RT, I16RT>;
902-
defm MAD_WIDE_S16 : MADInst<"wide.s16", smul_wide, I32RT, I16RT>;
903-
defm MAD_WIDE_U32 : MADInst<"wide.u32", umul_wide, I64RT, I32RT>;
904-
defm MAD_WIDE_S32 : MADInst<"wide.s32", smul_wide, I64RT, I32RT>;
901+
// Generating mad.wide causes a regression:
902+
// https://github.com/llvm/llvm-project/pull/150477#issuecomment-3191367837
905903
}
906904

907905
//-----------------------------------

llvm/test/CodeGen/NVPTX/bug26185-2.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ define ptx_kernel void @spam(ptr addrspace(1) noalias nocapture readonly %arg, p
1616
; CHECK: .maxntid 1, 1, 1
1717
; CHECK-NEXT: {
1818
; CHECK-NEXT: .reg .b32 %r<2>;
19-
; CHECK-NEXT: .reg .b64 %rd<8>;
19+
; CHECK-NEXT: .reg .b64 %rd<9>;
2020
; CHECK-EMPTY:
2121
; CHECK-NEXT: // %bb.0: // %bb
2222
; CHECK-NEXT: ld.param.b64 %rd1, [spam_param_0];
@@ -25,9 +25,10 @@ define ptx_kernel void @spam(ptr addrspace(1) noalias nocapture readonly %arg, p
2525
; CHECK-NEXT: add.s64 %rd4, %rd1, %rd3;
2626
; CHECK-NEXT: ld.param.b64 %rd5, [spam_param_1];
2727
; CHECK-NEXT: ld.global.nc.s16 %r1, [%rd4+16];
28-
; CHECK-NEXT: ld.global.b64 %rd6, [%rd5];
29-
; CHECK-NEXT: mad.wide.s32 %rd7, %r1, %r1, %rd6;
30-
; CHECK-NEXT: st.global.b64 [%rd5], %rd7;
28+
; CHECK-NEXT: mul.wide.s32 %rd6, %r1, %r1;
29+
; CHECK-NEXT: ld.global.b64 %rd7, [%rd5];
30+
; CHECK-NEXT: add.s64 %rd8, %rd6, %rd7;
31+
; CHECK-NEXT: st.global.b64 [%rd5], %rd8;
3132
; CHECK-NEXT: ret;
3233
bb:
3334
%tmp5 = add nsw i64 %arg3, 8

llvm/test/CodeGen/NVPTX/combine-wide.ll

Lines changed: 64 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,15 @@ define i64 @t1(i32 %a, i32 %b, i64 %c) {
99
; O1-LABEL: t1(
1010
; O1: {
1111
; O1-NEXT: .reg .b32 %r<3>;
12-
; O1-NEXT: .reg .b64 %rd<3>;
12+
; O1-NEXT: .reg .b64 %rd<4>;
1313
; O1-EMPTY:
1414
; O1-NEXT: // %bb.0:
1515
; O1-NEXT: ld.param.b32 %r1, [t1_param_0];
1616
; O1-NEXT: ld.param.b32 %r2, [t1_param_1];
17-
; O1-NEXT: ld.param.b64 %rd1, [t1_param_2];
18-
; O1-NEXT: mad.wide.s32 %rd2, %r1, %r2, %rd1;
19-
; O1-NEXT: st.param.b64 [func_retval0], %rd2;
17+
; O1-NEXT: mul.wide.s32 %rd1, %r1, %r2;
18+
; O1-NEXT: ld.param.b64 %rd2, [t1_param_2];
19+
; O1-NEXT: add.s64 %rd3, %rd2, %rd1;
20+
; O1-NEXT: st.param.b64 [func_retval0], %rd3;
2021
; O1-NEXT: ret;
2122
;
2223
; O0-LABEL: t1(
@@ -44,14 +45,15 @@ define i64 @t2(i32 %a, i32 %b, i64 %c) {
4445
; O1-LABEL: t2(
4546
; O1: {
4647
; O1-NEXT: .reg .b32 %r<3>;
47-
; O1-NEXT: .reg .b64 %rd<3>;
48+
; O1-NEXT: .reg .b64 %rd<4>;
4849
; O1-EMPTY:
4950
; O1-NEXT: // %bb.0:
5051
; O1-NEXT: ld.param.b32 %r1, [t2_param_0];
5152
; O1-NEXT: ld.param.b32 %r2, [t2_param_1];
52-
; O1-NEXT: ld.param.b64 %rd1, [t2_param_2];
53-
; O1-NEXT: mad.wide.s32 %rd2, %r1, %r2, %rd1;
54-
; O1-NEXT: st.param.b64 [func_retval0], %rd2;
53+
; O1-NEXT: mul.wide.s32 %rd1, %r1, %r2;
54+
; O1-NEXT: ld.param.b64 %rd2, [t2_param_2];
55+
; O1-NEXT: add.s64 %rd3, %rd1, %rd2;
56+
; O1-NEXT: st.param.b64 [func_retval0], %rd3;
5557
; O1-NEXT: ret;
5658
;
5759
; O0-LABEL: t2(
@@ -79,13 +81,14 @@ define i64 @t3(i32 %a, i32 %b) {
7981
; O1-LABEL: t3(
8082
; O1: {
8183
; O1-NEXT: .reg .b32 %r<3>;
82-
; O1-NEXT: .reg .b64 %rd<2>;
84+
; O1-NEXT: .reg .b64 %rd<3>;
8385
; O1-EMPTY:
8486
; O1-NEXT: // %bb.0:
8587
; O1-NEXT: ld.param.b32 %r1, [t3_param_0];
8688
; O1-NEXT: ld.param.b32 %r2, [t3_param_1];
87-
; O1-NEXT: mad.wide.s32 %rd1, %r1, %r2, 1;
88-
; O1-NEXT: st.param.b64 [func_retval0], %rd1;
89+
; O1-NEXT: mul.wide.s32 %rd1, %r1, %r2;
90+
; O1-NEXT: add.s64 %rd2, %rd1, 1;
91+
; O1-NEXT: st.param.b64 [func_retval0], %rd2;
8992
; O1-NEXT: ret;
9093
;
9194
; O0-LABEL: t3(
@@ -112,13 +115,14 @@ define i64 @t4(i32 %a, i64 %c) {
112115
; O1-LABEL: t4(
113116
; O1: {
114117
; O1-NEXT: .reg .b32 %r<2>;
115-
; O1-NEXT: .reg .b64 %rd<3>;
118+
; O1-NEXT: .reg .b64 %rd<4>;
116119
; O1-EMPTY:
117120
; O1-NEXT: // %bb.0:
118121
; O1-NEXT: ld.param.b32 %r1, [t4_param_0];
119122
; O1-NEXT: ld.param.b64 %rd1, [t4_param_1];
120-
; O1-NEXT: mad.wide.s32 %rd2, %r1, 3, %rd1;
121-
; O1-NEXT: st.param.b64 [func_retval0], %rd2;
123+
; O1-NEXT: mul.wide.s32 %rd2, %r1, 3;
124+
; O1-NEXT: add.s64 %rd3, %rd1, %rd2;
125+
; O1-NEXT: st.param.b64 [func_retval0], %rd3;
122126
; O1-NEXT: ret;
123127
;
124128
; O0-LABEL: t4(
@@ -145,12 +149,13 @@ define i64 @t4_1(i32 %a, i64 %c) {
145149
; O1-LABEL: t4_1(
146150
; O1: {
147151
; O1-NEXT: .reg .b32 %r<2>;
148-
; O1-NEXT: .reg .b64 %rd<2>;
152+
; O1-NEXT: .reg .b64 %rd<3>;
149153
; O1-EMPTY:
150154
; O1-NEXT: // %bb.0:
151155
; O1-NEXT: ld.param.b32 %r1, [t4_1_param_0];
152-
; O1-NEXT: mad.wide.s32 %rd1, %r1, 3, 5;
153-
; O1-NEXT: st.param.b64 [func_retval0], %rd1;
156+
; O1-NEXT: mul.wide.s32 %rd1, %r1, 3;
157+
; O1-NEXT: add.s64 %rd2, %rd1, 5;
158+
; O1-NEXT: st.param.b64 [func_retval0], %rd2;
154159
; O1-NEXT: ret;
155160
;
156161
; O0-LABEL: t4_1(
@@ -176,14 +181,15 @@ define i64 @t5(i32 %a, i32 %b, i64 %c) {
176181
; O1-LABEL: t5(
177182
; O1: {
178183
; O1-NEXT: .reg .b32 %r<3>;
179-
; O1-NEXT: .reg .b64 %rd<3>;
184+
; O1-NEXT: .reg .b64 %rd<4>;
180185
; O1-EMPTY:
181186
; O1-NEXT: // %bb.0:
182187
; O1-NEXT: ld.param.b32 %r1, [t5_param_0];
183188
; O1-NEXT: ld.param.b32 %r2, [t5_param_1];
184-
; O1-NEXT: ld.param.b64 %rd1, [t5_param_2];
185-
; O1-NEXT: mad.wide.u32 %rd2, %r1, %r2, %rd1;
186-
; O1-NEXT: st.param.b64 [func_retval0], %rd2;
189+
; O1-NEXT: mul.wide.u32 %rd1, %r1, %r2;
190+
; O1-NEXT: ld.param.b64 %rd2, [t5_param_2];
191+
; O1-NEXT: add.s64 %rd3, %rd2, %rd1;
192+
; O1-NEXT: st.param.b64 [func_retval0], %rd3;
187193
; O1-NEXT: ret;
188194
;
189195
; O0-LABEL: t5(
@@ -211,14 +217,15 @@ define i64 @t6(i32 %a, i32 %b, i64 %c) {
211217
; O1-LABEL: t6(
212218
; O1: {
213219
; O1-NEXT: .reg .b32 %r<3>;
214-
; O1-NEXT: .reg .b64 %rd<3>;
220+
; O1-NEXT: .reg .b64 %rd<4>;
215221
; O1-EMPTY:
216222
; O1-NEXT: // %bb.0:
217223
; O1-NEXT: ld.param.b32 %r1, [t6_param_0];
218224
; O1-NEXT: ld.param.b32 %r2, [t6_param_1];
219-
; O1-NEXT: ld.param.b64 %rd1, [t6_param_2];
220-
; O1-NEXT: mad.wide.u32 %rd2, %r1, %r2, %rd1;
221-
; O1-NEXT: st.param.b64 [func_retval0], %rd2;
225+
; O1-NEXT: mul.wide.u32 %rd1, %r1, %r2;
226+
; O1-NEXT: ld.param.b64 %rd2, [t6_param_2];
227+
; O1-NEXT: add.s64 %rd3, %rd1, %rd2;
228+
; O1-NEXT: st.param.b64 [func_retval0], %rd3;
222229
; O1-NEXT: ret;
223230
;
224231
; O0-LABEL: t6(
@@ -932,14 +939,15 @@ define i32 @t32(i16 %a, i16 %b, i32 %c) {
932939
; O1-LABEL: t32(
933940
; O1: {
934941
; O1-NEXT: .reg .b16 %rs<3>;
935-
; O1-NEXT: .reg .b32 %r<3>;
942+
; O1-NEXT: .reg .b32 %r<4>;
936943
; O1-EMPTY:
937944
; O1-NEXT: // %bb.0:
938945
; O1-NEXT: ld.param.b16 %rs1, [t32_param_0];
939946
; O1-NEXT: ld.param.b16 %rs2, [t32_param_1];
940-
; O1-NEXT: ld.param.b32 %r1, [t32_param_2];
941-
; O1-NEXT: mad.wide.s16 %r2, %rs1, %rs2, %r1;
942-
; O1-NEXT: st.param.b32 [func_retval0], %r2;
947+
; O1-NEXT: mul.wide.s16 %r1, %rs1, %rs2;
948+
; O1-NEXT: ld.param.b32 %r2, [t32_param_2];
949+
; O1-NEXT: add.s32 %r3, %r2, %r1;
950+
; O1-NEXT: st.param.b32 [func_retval0], %r3;
943951
; O1-NEXT: ret;
944952
;
945953
; O0-LABEL: t32(
@@ -967,14 +975,15 @@ define i32 @t33(i16 %a, i16 %b, i32 %c) {
967975
; O1-LABEL: t33(
968976
; O1: {
969977
; O1-NEXT: .reg .b16 %rs<3>;
970-
; O1-NEXT: .reg .b32 %r<3>;
978+
; O1-NEXT: .reg .b32 %r<4>;
971979
; O1-EMPTY:
972980
; O1-NEXT: // %bb.0:
973981
; O1-NEXT: ld.param.b16 %rs1, [t33_param_0];
974982
; O1-NEXT: ld.param.b16 %rs2, [t33_param_1];
975-
; O1-NEXT: ld.param.b32 %r1, [t33_param_2];
976-
; O1-NEXT: mad.wide.s16 %r2, %rs1, %rs2, %r1;
977-
; O1-NEXT: st.param.b32 [func_retval0], %r2;
983+
; O1-NEXT: mul.wide.s16 %r1, %rs1, %rs2;
984+
; O1-NEXT: ld.param.b32 %r2, [t33_param_2];
985+
; O1-NEXT: add.s32 %r3, %r2, %r1;
986+
; O1-NEXT: st.param.b32 [func_retval0], %r3;
978987
; O1-NEXT: ret;
979988
;
980989
; O0-LABEL: t33(
@@ -1002,13 +1011,14 @@ define i32 @t34(i16 %a, i16 %b) {
10021011
; O1-LABEL: t34(
10031012
; O1: {
10041013
; O1-NEXT: .reg .b16 %rs<3>;
1005-
; O1-NEXT: .reg .b32 %r<2>;
1014+
; O1-NEXT: .reg .b32 %r<3>;
10061015
; O1-EMPTY:
10071016
; O1-NEXT: // %bb.0:
10081017
; O1-NEXT: ld.param.b16 %rs1, [t34_param_0];
10091018
; O1-NEXT: ld.param.b16 %rs2, [t34_param_1];
1010-
; O1-NEXT: mad.wide.s16 %r1, %rs1, %rs2, 1;
1011-
; O1-NEXT: st.param.b32 [func_retval0], %r1;
1019+
; O1-NEXT: mul.wide.s16 %r1, %rs1, %rs2;
1020+
; O1-NEXT: add.s32 %r2, %r1, 1;
1021+
; O1-NEXT: st.param.b32 [func_retval0], %r2;
10121022
; O1-NEXT: ret;
10131023
;
10141024
; O0-LABEL: t34(
@@ -1035,13 +1045,14 @@ define i32 @t35(i16 %a, i32 %c) {
10351045
; O1-LABEL: t35(
10361046
; O1: {
10371047
; O1-NEXT: .reg .b16 %rs<2>;
1038-
; O1-NEXT: .reg .b32 %r<3>;
1048+
; O1-NEXT: .reg .b32 %r<4>;
10391049
; O1-EMPTY:
10401050
; O1-NEXT: // %bb.0:
10411051
; O1-NEXT: ld.param.b16 %rs1, [t35_param_0];
10421052
; O1-NEXT: ld.param.b32 %r1, [t35_param_1];
1043-
; O1-NEXT: mad.wide.s16 %r2, %rs1, 3, %r1;
1044-
; O1-NEXT: st.param.b32 [func_retval0], %r2;
1053+
; O1-NEXT: mul.wide.s16 %r2, %rs1, 3;
1054+
; O1-NEXT: add.s32 %r3, %r1, %r2;
1055+
; O1-NEXT: st.param.b32 [func_retval0], %r3;
10451056
; O1-NEXT: ret;
10461057
;
10471058
; O0-LABEL: t35(
@@ -1068,12 +1079,13 @@ define i32 @t36(i16 %a, i32 %c) {
10681079
; O1-LABEL: t36(
10691080
; O1: {
10701081
; O1-NEXT: .reg .b16 %rs<2>;
1071-
; O1-NEXT: .reg .b32 %r<2>;
1082+
; O1-NEXT: .reg .b32 %r<3>;
10721083
; O1-EMPTY:
10731084
; O1-NEXT: // %bb.0:
10741085
; O1-NEXT: ld.param.b16 %rs1, [t36_param_0];
1075-
; O1-NEXT: mad.wide.s16 %r1, %rs1, 3, 5;
1076-
; O1-NEXT: st.param.b32 [func_retval0], %r1;
1086+
; O1-NEXT: mul.wide.s16 %r1, %rs1, 3;
1087+
; O1-NEXT: add.s32 %r2, %r1, 5;
1088+
; O1-NEXT: st.param.b32 [func_retval0], %r2;
10771089
; O1-NEXT: ret;
10781090
;
10791091
; O0-LABEL: t36(
@@ -1099,14 +1111,15 @@ define i32 @t37(i16 %a, i16 %b, i32 %c) {
10991111
; O1-LABEL: t37(
11001112
; O1: {
11011113
; O1-NEXT: .reg .b16 %rs<3>;
1102-
; O1-NEXT: .reg .b32 %r<3>;
1114+
; O1-NEXT: .reg .b32 %r<4>;
11031115
; O1-EMPTY:
11041116
; O1-NEXT: // %bb.0:
11051117
; O1-NEXT: ld.param.b16 %rs1, [t37_param_0];
11061118
; O1-NEXT: ld.param.b16 %rs2, [t37_param_1];
1107-
; O1-NEXT: ld.param.b32 %r1, [t37_param_2];
1108-
; O1-NEXT: mad.wide.u16 %r2, %rs1, %rs2, %r1;
1109-
; O1-NEXT: st.param.b32 [func_retval0], %r2;
1119+
; O1-NEXT: mul.wide.u16 %r1, %rs1, %rs2;
1120+
; O1-NEXT: ld.param.b32 %r2, [t37_param_2];
1121+
; O1-NEXT: add.s32 %r3, %r2, %r1;
1122+
; O1-NEXT: st.param.b32 [func_retval0], %r3;
11101123
; O1-NEXT: ret;
11111124
;
11121125
; O0-LABEL: t37(
@@ -1134,14 +1147,15 @@ define i32 @t38(i16 %a, i16 %b, i32 %c) {
11341147
; O1-LABEL: t38(
11351148
; O1: {
11361149
; O1-NEXT: .reg .b16 %rs<3>;
1137-
; O1-NEXT: .reg .b32 %r<3>;
1150+
; O1-NEXT: .reg .b32 %r<4>;
11381151
; O1-EMPTY:
11391152
; O1-NEXT: // %bb.0:
11401153
; O1-NEXT: ld.param.b16 %rs1, [t38_param_0];
11411154
; O1-NEXT: ld.param.b16 %rs2, [t38_param_1];
1142-
; O1-NEXT: ld.param.b32 %r1, [t38_param_2];
1143-
; O1-NEXT: mad.wide.u16 %r2, %rs1, %rs2, %r1;
1144-
; O1-NEXT: st.param.b32 [func_retval0], %r2;
1155+
; O1-NEXT: mul.wide.u16 %r1, %rs1, %rs2;
1156+
; O1-NEXT: ld.param.b32 %r2, [t38_param_2];
1157+
; O1-NEXT: add.s32 %r3, %r1, %r2;
1158+
; O1-NEXT: st.param.b32 [func_retval0], %r3;
11451159
; O1-NEXT: ret;
11461160
;
11471161
; O0-LABEL: t38(

llvm/test/CodeGen/NVPTX/local-stack-frame.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,14 +114,15 @@ define void @foo3(i32 %a) {
114114
; PTX64-NEXT: .reg .b64 %SP;
115115
; PTX64-NEXT: .reg .b64 %SPL;
116116
; PTX64-NEXT: .reg .b32 %r<2>;
117-
; PTX64-NEXT: .reg .b64 %rd<3>;
117+
; PTX64-NEXT: .reg .b64 %rd<4>;
118118
; PTX64-EMPTY:
119119
; PTX64-NEXT: // %bb.0:
120120
; PTX64-NEXT: mov.b64 %SPL, __local_depot2;
121121
; PTX64-NEXT: ld.param.b32 %r1, [foo3_param_0];
122122
; PTX64-NEXT: add.u64 %rd1, %SPL, 0;
123-
; PTX64-NEXT: mad.wide.s32 %rd2, %r1, 4, %rd1;
124-
; PTX64-NEXT: st.local.b32 [%rd2], %r1;
123+
; PTX64-NEXT: mul.wide.s32 %rd2, %r1, 4;
124+
; PTX64-NEXT: add.s64 %rd3, %rd1, %rd2;
125+
; PTX64-NEXT: st.local.b32 [%rd3], %r1;
125126
; PTX64-NEXT: ret;
126127
%local = alloca [3 x i32], align 4
127128
%1 = getelementptr inbounds i32, ptr %local, i32 %a

llvm/test/CodeGen/NVPTX/vector-loads.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ define void @foo_complex(ptr nocapture readonly align 16 dereferenceable(1342177
154154
; CHECK: {
155155
; CHECK-NEXT: .reg .b16 %rs<4>;
156156
; CHECK-NEXT: .reg .b32 %r<8>;
157-
; CHECK-NEXT: .reg .b64 %rd<5>;
157+
; CHECK-NEXT: .reg .b64 %rd<6>;
158158
; CHECK-EMPTY:
159159
; CHECK-NEXT: // %bb.0:
160160
; CHECK-NEXT: ld.param.b64 %rd1, [foo_complex_param_0];
@@ -166,11 +166,12 @@ define void @foo_complex(ptr nocapture readonly align 16 dereferenceable(1342177
166166
; CHECK-NEXT: shl.b32 %r6, %r1, 1;
167167
; CHECK-NEXT: or.b32 %r7, %r5, %r6;
168168
; CHECK-NEXT: cvt.u64.u32 %rd2, %r7;
169-
; CHECK-NEXT: mad.wide.u32 %rd3, %r3, 131072, %rd1;
170-
; CHECK-NEXT: add.s64 %rd4, %rd3, %rd2;
171-
; CHECK-NEXT: ld.v2.b8 {%rs1, %rs2}, [%rd4+128];
169+
; CHECK-NEXT: mul.wide.u32 %rd3, %r3, 131072;
170+
; CHECK-NEXT: add.s64 %rd4, %rd1, %rd3;
171+
; CHECK-NEXT: add.s64 %rd5, %rd4, %rd2;
172+
; CHECK-NEXT: ld.v2.b8 {%rs1, %rs2}, [%rd5+128];
172173
; CHECK-NEXT: max.u16 %rs3, %rs1, %rs2;
173-
; CHECK-NEXT: st.b8 [%rd4+129], %rs3;
174+
; CHECK-NEXT: st.b8 [%rd5+129], %rs3;
174175
; CHECK-NEXT: ret;
175176
%t0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !1
176177
%t1 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()

0 commit comments

Comments
 (0)