Skip to content

Commit 0d29279

Browse files
authored
[DAGCombine] Propagate nuw when evaluating sub with narrower types (#156710)
Proof: https://alive2.llvm.org/ce/z/cdbzSL Closes #156559.
1 parent 62ccea6 commit 0d29279

File tree

2 files changed

+213
-1
lines changed

2 files changed

+213
-1
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16317,7 +16317,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
1631716317
if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
1631816318
SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
1631916319
SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16320-
return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
16320+
SDNodeFlags Flags;
16321+
// Propagate nuw for sub.
16322+
if (N0->getOpcode() == ISD::SUB && N0->getFlags().hasNoUnsignedWrap() &&
16323+
DAG.MaskedValueIsZero(
16324+
N0->getOperand(0),
16325+
APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
16326+
VT.getScalarSizeInBits())))
16327+
Flags.setNoUnsignedWrap(true);
16328+
return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR, Flags);
1632116329
}
1632216330
}
1632316331
break;

llvm/test/CodeGen/X86/shift-i128.ll

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -938,3 +938,207 @@ define i128 @lshr_shl_mask(i128 %a0) {
938938
%2 = lshr i128 %1, 1
939939
ret i128 %2
940940
}
941+
942+
define i128 @shift_i128_limited_shamt(i128 noundef %a, i32 noundef %b) nounwind {
943+
; i686-LABEL: shift_i128_limited_shamt:
944+
; i686: # %bb.0: # %start
945+
; i686-NEXT: pushl %ebp
946+
; i686-NEXT: movl %esp, %ebp
947+
; i686-NEXT: pushl %ebx
948+
; i686-NEXT: pushl %edi
949+
; i686-NEXT: pushl %esi
950+
; i686-NEXT: andl $-16, %esp
951+
; i686-NEXT: subl $16, %esp
952+
; i686-NEXT: movl 28(%ebp), %esi
953+
; i686-NEXT: movl 32(%ebp), %eax
954+
; i686-NEXT: movb $6, %dl
955+
; i686-NEXT: subb 40(%ebp), %dl
956+
; i686-NEXT: movl %edx, %ecx
957+
; i686-NEXT: shll %cl, %eax
958+
; i686-NEXT: movl %esi, %ebx
959+
; i686-NEXT: movl %esi, %edi
960+
; i686-NEXT: shrl %ebx
961+
; i686-NEXT: notb %cl
962+
; i686-NEXT: shrl %cl, %ebx
963+
; i686-NEXT: orl %eax, %ebx
964+
; i686-NEXT: movl 24(%ebp), %esi
965+
; i686-NEXT: movl %esi, %eax
966+
; i686-NEXT: movl %edx, %ecx
967+
; i686-NEXT: shll %cl, %eax
968+
; i686-NEXT: shldl %cl, %esi, %edi
969+
; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
970+
; i686-NEXT: movl 8(%ebp), %edi
971+
; i686-NEXT: movl 36(%ebp), %esi
972+
; i686-NEXT: movl 32(%ebp), %edx
973+
; i686-NEXT: shldl %cl, %edx, %esi
974+
; i686-NEXT: movl %esi, 12(%edi)
975+
; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
976+
; i686-NEXT: movl %ecx, 4(%edi)
977+
; i686-NEXT: movl %eax, (%edi)
978+
; i686-NEXT: movl %ebx, 8(%edi)
979+
; i686-NEXT: movl %edi, %eax
980+
; i686-NEXT: leal -12(%ebp), %esp
981+
; i686-NEXT: popl %esi
982+
; i686-NEXT: popl %edi
983+
; i686-NEXT: popl %ebx
984+
; i686-NEXT: popl %ebp
985+
; i686-NEXT: retl $4
986+
;
987+
; x86_64-LABEL: shift_i128_limited_shamt:
988+
; x86_64: # %bb.0: # %start
989+
; x86_64-NEXT: movq %rdi, %rax
990+
; x86_64-NEXT: movb $6, %cl
991+
; x86_64-NEXT: subb %dl, %cl
992+
; x86_64-NEXT: shldq %cl, %rdi, %rsi
993+
; x86_64-NEXT: shlq %cl, %rax
994+
; x86_64-NEXT: movq %rsi, %rdx
995+
; x86_64-NEXT: retq
996+
start:
997+
%shamt = sub nuw nsw i32 6, %b
998+
%ext = zext nneg i32 %shamt to i128
999+
%res = shl i128 %a, %ext
1000+
ret i128 %res
1001+
}
1002+
1003+
define i128 @shift_i128_limited_shamt_no_nuw(i128 noundef %a, i32 noundef %b) nounwind {
1004+
; i686-LABEL: shift_i128_limited_shamt_no_nuw:
1005+
; i686: # %bb.0: # %start
1006+
; i686-NEXT: pushl %ebp
1007+
; i686-NEXT: movl %esp, %ebp
1008+
; i686-NEXT: pushl %ebx
1009+
; i686-NEXT: pushl %edi
1010+
; i686-NEXT: pushl %esi
1011+
; i686-NEXT: andl $-16, %esp
1012+
; i686-NEXT: subl $48, %esp
1013+
; i686-NEXT: movzbl 40(%ebp), %eax
1014+
; i686-NEXT: movl 24(%ebp), %ecx
1015+
; i686-NEXT: movl 28(%ebp), %edx
1016+
; i686-NEXT: movl 32(%ebp), %esi
1017+
; i686-NEXT: movl 36(%ebp), %edi
1018+
; i686-NEXT: movl %edi, {{[0-9]+}}(%esp)
1019+
; i686-NEXT: movl %esi, {{[0-9]+}}(%esp)
1020+
; i686-NEXT: movl %edx, {{[0-9]+}}(%esp)
1021+
; i686-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1022+
; i686-NEXT: movb $6, %cl
1023+
; i686-NEXT: subb %al, %cl
1024+
; i686-NEXT: movl %ecx, %eax
1025+
; i686-NEXT: shrb $3, %al
1026+
; i686-NEXT: andb $12, %al
1027+
; i686-NEXT: negb %al
1028+
; i686-NEXT: movsbl %al, %eax
1029+
; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
1030+
; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
1031+
; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
1032+
; i686-NEXT: movl $0, (%esp)
1033+
; i686-NEXT: movl 20(%esp,%eax), %edx
1034+
; i686-NEXT: movl 24(%esp,%eax), %ebx
1035+
; i686-NEXT: movl %ebx, %edi
1036+
; i686-NEXT: shldl %cl, %edx, %edi
1037+
; i686-NEXT: movl 16(%esp,%eax), %esi
1038+
; i686-NEXT: movl 28(%esp,%eax), %eax
1039+
; i686-NEXT: shldl %cl, %ebx, %eax
1040+
; i686-NEXT: movl 8(%ebp), %ebx
1041+
; i686-NEXT: movl %eax, 12(%ebx)
1042+
; i686-NEXT: movl %edi, 8(%ebx)
1043+
; i686-NEXT: movl %esi, %eax
1044+
; i686-NEXT: shll %cl, %eax
1045+
; i686-NEXT: shldl %cl, %esi, %edx
1046+
; i686-NEXT: movl %edx, 4(%ebx)
1047+
; i686-NEXT: movl %eax, (%ebx)
1048+
; i686-NEXT: movl %ebx, %eax
1049+
; i686-NEXT: leal -12(%ebp), %esp
1050+
; i686-NEXT: popl %esi
1051+
; i686-NEXT: popl %edi
1052+
; i686-NEXT: popl %ebx
1053+
; i686-NEXT: popl %ebp
1054+
; i686-NEXT: retl $4
1055+
;
1056+
; x86_64-LABEL: shift_i128_limited_shamt_no_nuw:
1057+
; x86_64: # %bb.0: # %start
1058+
; x86_64-NEXT: movb $6, %cl
1059+
; x86_64-NEXT: subb %dl, %cl
1060+
; x86_64-NEXT: shldq %cl, %rdi, %rsi
1061+
; x86_64-NEXT: shlq %cl, %rdi
1062+
; x86_64-NEXT: xorl %eax, %eax
1063+
; x86_64-NEXT: testb $64, %cl
1064+
; x86_64-NEXT: cmovneq %rdi, %rsi
1065+
; x86_64-NEXT: cmoveq %rdi, %rax
1066+
; x86_64-NEXT: movq %rsi, %rdx
1067+
; x86_64-NEXT: retq
1068+
start:
1069+
%shamt = sub nsw i32 6, %b
1070+
%ext = zext nneg i32 %shamt to i128
1071+
%res = shl i128 %a, %ext
1072+
ret i128 %res
1073+
}
1074+
1075+
define i128 @shift_i128_limited_shamt_unknown_lhs(i128 noundef %a, i32 noundef %b, i32 noundef %c) nounwind {
1076+
; i686-LABEL: shift_i128_limited_shamt_unknown_lhs:
1077+
; i686: # %bb.0: # %start
1078+
; i686-NEXT: pushl %ebp
1079+
; i686-NEXT: movl %esp, %ebp
1080+
; i686-NEXT: pushl %ebx
1081+
; i686-NEXT: pushl %edi
1082+
; i686-NEXT: pushl %esi
1083+
; i686-NEXT: andl $-16, %esp
1084+
; i686-NEXT: subl $48, %esp
1085+
; i686-NEXT: movl 24(%ebp), %eax
1086+
; i686-NEXT: movl 28(%ebp), %edx
1087+
; i686-NEXT: movl 32(%ebp), %esi
1088+
; i686-NEXT: movl 36(%ebp), %edi
1089+
; i686-NEXT: movl 44(%ebp), %ecx
1090+
; i686-NEXT: subl 40(%ebp), %ecx
1091+
; i686-NEXT: movl %edi, {{[0-9]+}}(%esp)
1092+
; i686-NEXT: movl %esi, {{[0-9]+}}(%esp)
1093+
; i686-NEXT: movl %edx, {{[0-9]+}}(%esp)
1094+
; i686-NEXT: movl %eax, {{[0-9]+}}(%esp)
1095+
; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
1096+
; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
1097+
; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
1098+
; i686-NEXT: movl $0, (%esp)
1099+
; i686-NEXT: movl %ecx, %eax
1100+
; i686-NEXT: shrb $3, %al
1101+
; i686-NEXT: andb $12, %al
1102+
; i686-NEXT: negb %al
1103+
; i686-NEXT: movsbl %al, %eax
1104+
; i686-NEXT: movl 20(%esp,%eax), %edx
1105+
; i686-NEXT: movl 24(%esp,%eax), %ebx
1106+
; i686-NEXT: movl %ebx, %edi
1107+
; i686-NEXT: shldl %cl, %edx, %edi
1108+
; i686-NEXT: movl 16(%esp,%eax), %esi
1109+
; i686-NEXT: movl 28(%esp,%eax), %eax
1110+
; i686-NEXT: shldl %cl, %ebx, %eax
1111+
; i686-NEXT: movl 8(%ebp), %ebx
1112+
; i686-NEXT: movl %eax, 12(%ebx)
1113+
; i686-NEXT: movl %edi, 8(%ebx)
1114+
; i686-NEXT: movl %esi, %eax
1115+
; i686-NEXT: shll %cl, %eax
1116+
; i686-NEXT: # kill: def $cl killed $cl killed $ecx
1117+
; i686-NEXT: shldl %cl, %esi, %edx
1118+
; i686-NEXT: movl %edx, 4(%ebx)
1119+
; i686-NEXT: movl %eax, (%ebx)
1120+
; i686-NEXT: movl %ebx, %eax
1121+
; i686-NEXT: leal -12(%ebp), %esp
1122+
; i686-NEXT: popl %esi
1123+
; i686-NEXT: popl %edi
1124+
; i686-NEXT: popl %ebx
1125+
; i686-NEXT: popl %ebp
1126+
; i686-NEXT: retl $4
1127+
;
1128+
; x86_64-LABEL: shift_i128_limited_shamt_unknown_lhs:
1129+
; x86_64: # %bb.0: # %start
1130+
; x86_64-NEXT: subl %edx, %ecx
1131+
; x86_64-NEXT: shldq %cl, %rdi, %rsi
1132+
; x86_64-NEXT: shlq %cl, %rdi
1133+
; x86_64-NEXT: xorl %eax, %eax
1134+
; x86_64-NEXT: testb $64, %cl
1135+
; x86_64-NEXT: cmovneq %rdi, %rsi
1136+
; x86_64-NEXT: cmoveq %rdi, %rax
1137+
; x86_64-NEXT: movq %rsi, %rdx
1138+
; x86_64-NEXT: retq
1139+
start:
1140+
%shamt = sub nuw nsw i32 %c, %b
1141+
%ext = zext nneg i32 %shamt to i128
1142+
%res = shl i128 %a, %ext
1143+
ret i128 %res
1144+
}

0 commit comments

Comments
 (0)