Skip to content

Commit 7ca3e23

Browse files
committed
[SDAG] narrow truncated sign_extend_inreg
trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM There are improvements on existing tests from this, and there are a pair of large regressions in D127115 for Thumb2 caused by not folding this pattern. Differential Revision: https://reviews.llvm.org/D129890
1 parent 92a1b2a commit 7ca3e23

File tree

4 files changed

+30
-26
lines changed

4 files changed

+30
-26
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13153,6 +13153,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
1315313153
return N0.getOperand(0);
1315413154
}
1315513155

13156+
// Try to narrow a truncate-of-sext_in_reg to the destination type:
13157+
// trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
13158+
if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
13159+
N0.hasOneUse()) {
13160+
SDValue X = N0.getOperand(0);
13161+
SDValue ExtVal = N0.getOperand(1);
13162+
EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
13163+
if (ExtVT.bitsLT(VT)) {
13164+
SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
13165+
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal);
13166+
}
13167+
}
13168+
1315613169
// If this is anyext(trunc), don't fold it, allow ourselves to be folded.
1315713170
if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
1315813171
return SDValue();

llvm/test/CodeGen/AMDGPU/mul_int24.ll

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -181,34 +181,25 @@ define i64 @test_smul48_i64(i64 %lhs, i64 %rhs) {
181181
; SI-LABEL: test_smul48_i64:
182182
; SI: ; %bb.0:
183183
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184-
; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v0
185-
; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2
186-
; SI-NEXT: v_ashr_i64 v[3:4], v[0:1], 40
187-
; SI-NEXT: v_ashr_i64 v[1:2], v[1:2], 40
188-
; SI-NEXT: v_mul_i32_i24_e32 v0, v3, v1
189-
; SI-NEXT: v_mul_hi_i32_i24_e32 v1, v3, v1
184+
; SI-NEXT: v_mul_i32_i24_e32 v3, v0, v2
185+
; SI-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2
186+
; SI-NEXT: v_mov_b32_e32 v0, v3
190187
; SI-NEXT: s_setpc_b64 s[30:31]
191188
;
192189
; VI-LABEL: test_smul48_i64:
193190
; VI: ; %bb.0:
194191
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195-
; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v0
196-
; VI-NEXT: v_ashrrev_i64 v[3:4], 40, v[0:1]
197-
; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v2
198-
; VI-NEXT: v_ashrrev_i64 v[1:2], 40, v[0:1]
199-
; VI-NEXT: v_mul_i32_i24_e32 v0, v3, v1
200-
; VI-NEXT: v_mul_hi_i32_i24_e32 v1, v3, v1
192+
; VI-NEXT: v_mul_i32_i24_e32 v3, v0, v2
193+
; VI-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2
194+
; VI-NEXT: v_mov_b32_e32 v0, v3
201195
; VI-NEXT: s_setpc_b64 s[30:31]
202196
;
203197
; GFX9-LABEL: test_smul48_i64:
204198
; GFX9: ; %bb.0:
205199
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206-
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v0
207-
; GFX9-NEXT: v_ashrrev_i64 v[3:4], 40, v[0:1]
208-
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v2
209-
; GFX9-NEXT: v_ashrrev_i64 v[1:2], 40, v[0:1]
210-
; GFX9-NEXT: v_mul_i32_i24_e32 v0, v3, v1
211-
; GFX9-NEXT: v_mul_hi_i32_i24_e32 v1, v3, v1
200+
; GFX9-NEXT: v_mul_i32_i24_e32 v3, v0, v2
201+
; GFX9-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2
202+
; GFX9-NEXT: v_mov_b32_e32 v0, v3
212203
; GFX9-NEXT: s_setpc_b64 s[30:31]
213204
;
214205
; EG-LABEL: test_smul48_i64:

llvm/test/CodeGen/X86/pmulh.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -261,12 +261,14 @@ define <8 x i16> @sextinreg_mulhw_v8i16(<8 x i32> %a, <8 x i32> %b) {
261261
;
262262
; AVX512-LABEL: sextinreg_mulhw_v8i16:
263263
; AVX512: # %bb.0:
264-
; AVX512-NEXT: vpslld $24, %ymm0, %ymm0
265-
; AVX512-NEXT: vpsrad $24, %ymm0, %ymm0
266-
; AVX512-NEXT: vpslld $25, %ymm1, %ymm1
267-
; AVX512-NEXT: vpsrad $25, %ymm1, %ymm1
264+
; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
265+
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
268266
; AVX512-NEXT: vpmovdw %zmm1, %ymm1
267+
; AVX512-NEXT: vpsllw $9, %xmm1, %xmm1
268+
; AVX512-NEXT: vpsraw $9, %xmm1, %xmm1
269269
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
270+
; AVX512-NEXT: vpsllw $8, %xmm0, %xmm0
271+
; AVX512-NEXT: vpsraw $8, %xmm0, %xmm0
270272
; AVX512-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
271273
; AVX512-NEXT: vzeroupper
272274
; AVX512-NEXT: retq

llvm/test/CodeGen/X86/sar_fold64.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@
66
define i32 @shl48sar47(i64 %a) #0 {
77
; CHECK-LABEL: shl48sar47:
88
; CHECK: # %bb.0:
9-
; CHECK-NEXT: movswq %di, %rax
9+
; CHECK-NEXT: movswl %di, %eax
1010
; CHECK-NEXT: addl %eax, %eax
11-
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
1211
; CHECK-NEXT: retq
1312
%1 = shl i64 %a, 48
1413
%2 = ashr exact i64 %1, 47
@@ -32,9 +31,8 @@ define i32 @shl48sar49(i64 %a) #0 {
3231
define i32 @shl56sar55(i64 %a) #0 {
3332
; CHECK-LABEL: shl56sar55:
3433
; CHECK: # %bb.0:
35-
; CHECK-NEXT: movsbq %dil, %rax
34+
; CHECK-NEXT: movsbl %dil, %eax
3635
; CHECK-NEXT: addl %eax, %eax
37-
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
3836
; CHECK-NEXT: retq
3937
%1 = shl i64 %a, 56
4038
%2 = ashr exact i64 %1, 55

0 commit comments

Comments
 (0)