@@ -1161,6 +1161,170 @@ entry:
11611161 ret i64 %rem
11621162}
11631163
1164+ ; PR137514
1165+ define i64 @udiv_i64_magic_large_postshift (i64 %x ) nounwind {
1166+ ; X86-LABEL: udiv_i64_magic_large_postshift:
1167+ ; X86: # %bb.0:
1168+ ; X86-NEXT: subl $12, %esp
1169+ ; X86-NEXT: pushl $-1073741824 # imm = 0xC0000000
1170+ ; X86-NEXT: pushl $0
1171+ ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1172+ ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1173+ ; X86-NEXT: calll __udivdi3
1174+ ; X86-NEXT: addl $28, %esp
1175+ ; X86-NEXT: retl
1176+ ;
1177+ ; X64-LABEL: udiv_i64_magic_large_postshift:
1178+ ; X64: # %bb.0:
1179+ ; X64-NEXT: movq %rdi, %rax
1180+ ; X64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB
1181+ ; X64-NEXT: mulq %rcx
1182+ ; X64-NEXT: movq %rdx, %rax
1183+ ; X64-NEXT: shrq $63, %rax
1184+ ; X64-NEXT: retq
1185+ %ret = udiv i64 %x , 13835058055282163712 ; = 3 * 2^62
1186+ ret i64 %ret
1187+ }
1188+
1189+ ; PR137514
1190+ define i64 @urem_i64_magic_large_postshift (i64 %x ) nounwind {
1191+ ; X86-LABEL: urem_i64_magic_large_postshift:
1192+ ; X86: # %bb.0:
1193+ ; X86-NEXT: subl $12, %esp
1194+ ; X86-NEXT: pushl $-1073741824 # imm = 0xC0000000
1195+ ; X86-NEXT: pushl $0
1196+ ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1197+ ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1198+ ; X86-NEXT: calll __umoddi3
1199+ ; X86-NEXT: addl $28, %esp
1200+ ; X86-NEXT: retl
1201+ ;
1202+ ; X64-LABEL: urem_i64_magic_large_postshift:
1203+ ; X64: # %bb.0:
1204+ ; X64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB
1205+ ; X64-NEXT: movq %rdi, %rax
1206+ ; X64-NEXT: mulq %rcx
1207+ ; X64-NEXT: shrq %rdx
1208+ ; X64-NEXT: movabsq $4611686018427387904, %rax # imm = 0x4000000000000000
1209+ ; X64-NEXT: andq %rdx, %rax
1210+ ; X64-NEXT: addq %rdi, %rax
1211+ ; X64-NEXT: retq
1212+ %ret = urem i64 %x , 13835058055282163712 ; = 3 * 2^62
1213+ ret i64 %ret
1214+ }
1215+
1216+ ; PR137514
1217+ define i64 @udiv_i64_magic_large_preshift (i64 %x ) nounwind {
1218+ ; X86-LABEL: udiv_i64_magic_large_preshift:
1219+ ; X86: # %bb.0:
1220+ ; X86-NEXT: subl $12, %esp
1221+ ; X86-NEXT: pushl $14
1222+ ; X86-NEXT: pushl $0
1223+ ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1224+ ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1225+ ; X86-NEXT: calll __udivdi3
1226+ ; X86-NEXT: addl $28, %esp
1227+ ; X86-NEXT: retl
1228+ ;
1229+ ; X64-LABEL: udiv_i64_magic_large_preshift:
1230+ ; X64: # %bb.0:
1231+ ; X64-NEXT: movq %rdi, %rax
1232+ ; X64-NEXT: shrq $33, %rax
1233+ ; X64-NEXT: movabsq $2635249153387078803, %rcx # imm = 0x2492492492492493
1234+ ; X64-NEXT: mulq %rcx
1235+ ; X64-NEXT: movq %rdx, %rax
1236+ ; X64-NEXT: retq
1237+ %ret = udiv i64 %x , 60129542144 ; = 14 * 2^32
1238+ ret i64 %ret
1239+ }
1240+
1241+ ; PR137514
1242+ define i64 @urem_i64_magic_large_preshift (i64 %x ) nounwind {
1243+ ; X86-LABEL: urem_i64_magic_large_preshift:
1244+ ; X86: # %bb.0:
1245+ ; X86-NEXT: subl $12, %esp
1246+ ; X86-NEXT: pushl $14
1247+ ; X86-NEXT: pushl $0
1248+ ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1249+ ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1250+ ; X86-NEXT: calll __umoddi3
1251+ ; X86-NEXT: addl $28, %esp
1252+ ; X86-NEXT: retl
1253+ ;
1254+ ; X64-LABEL: urem_i64_magic_large_preshift:
1255+ ; X64: # %bb.0:
1256+ ; X64-NEXT: movq %rdi, %rax
1257+ ; X64-NEXT: shrq $33, %rax
1258+ ; X64-NEXT: movabsq $2635249153387078803, %rcx # imm = 0x2492492492492493
1259+ ; X64-NEXT: mulq %rcx
1260+ ; X64-NEXT: movabsq $60129542144, %rax # imm = 0xE00000000
1261+ ; X64-NEXT: imulq %rdx, %rax
1262+ ; X64-NEXT: subq %rax, %rdi
1263+ ; X64-NEXT: movq %rdi, %rax
1264+ ; X64-NEXT: retq
1265+ %ret = urem i64 %x , 60129542144 ; = 14 * 2^32
1266+ ret i64 %ret
1267+ }
1268+
1269+ ; PR137514
1270+ define i64 @udiv_i64_magic_is_add (i64 %x ) nounwind {
1271+ ; X86-LABEL: udiv_i64_magic_is_add:
1272+ ; X86: # %bb.0:
1273+ ; X86-NEXT: subl $12, %esp
1274+ ; X86-NEXT: pushl $196608 # imm = 0x30000
1275+ ; X86-NEXT: pushl $-1
1276+ ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1277+ ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1278+ ; X86-NEXT: calll __udivdi3
1279+ ; X86-NEXT: addl $28, %esp
1280+ ; X86-NEXT: retl
1281+ ;
1282+ ; X64-LABEL: udiv_i64_magic_is_add:
1283+ ; X64: # %bb.0:
1284+ ; X64-NEXT: movabsq $6148789591883185367, %rcx # imm = 0x5554E38E5ED0FCD7
1285+ ; X64-NEXT: movq %rdi, %rax
1286+ ; X64-NEXT: mulq %rcx
1287+ ; X64-NEXT: subq %rdx, %rdi
1288+ ; X64-NEXT: shrq %rdi
1289+ ; X64-NEXT: leaq (%rdi,%rdx), %rax
1290+ ; X64-NEXT: shrq $49, %rax
1291+ ; X64-NEXT: retq
1292+ %ret = udiv i64 %x , 844429225099263 ; = 3 * 2^48 + 2^32 - 1
1293+ ret i64 %ret
1294+ }
1295+
1296+ ; PR137514
1297+ define i64 @urem_i64_magic_is_add (i64 %x ) nounwind {
1298+ ; X86-LABEL: urem_i64_magic_is_add:
1299+ ; X86: # %bb.0:
1300+ ; X86-NEXT: subl $12, %esp
1301+ ; X86-NEXT: pushl $196608 # imm = 0x30000
1302+ ; X86-NEXT: pushl $-1
1303+ ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1304+ ; X86-NEXT: pushl {{[0-9]+}}(%esp)
1305+ ; X86-NEXT: calll __umoddi3
1306+ ; X86-NEXT: addl $28, %esp
1307+ ; X86-NEXT: retl
1308+ ;
1309+ ; X64-LABEL: urem_i64_magic_is_add:
1310+ ; X64: # %bb.0:
1311+ ; X64-NEXT: movabsq $6148789591883185367, %rcx # imm = 0x5554E38E5ED0FCD7
1312+ ; X64-NEXT: movq %rdi, %rax
1313+ ; X64-NEXT: mulq %rcx
1314+ ; X64-NEXT: movq %rdi, %rax
1315+ ; X64-NEXT: subq %rdx, %rax
1316+ ; X64-NEXT: shrq %rax
1317+ ; X64-NEXT: addq %rdx, %rax
1318+ ; X64-NEXT: shrq $49, %rax
1319+ ; X64-NEXT: movabsq $844429225099263, %rcx # imm = 0x30000FFFFFFFF
1320+ ; X64-NEXT: imulq %rax, %rcx
1321+ ; X64-NEXT: subq %rcx, %rdi
1322+ ; X64-NEXT: movq %rdi, %rax
1323+ ; X64-NEXT: retq
1324+ %ret = urem i64 %x , 844429225099263 ; = 3 * 2^48 + 2^32 - 1
1325+ ret i64 %ret
1326+ }
1327+
11641328; Make sure we don't inline expand for optsize.
11651329define i64 @urem_i64_3_optsize (i64 %x ) nounwind optsize {
11661330; X86-LABEL: urem_i64_3_optsize:
0 commit comments