-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[X86] Allow rotate to be affected by modulo shift #156684
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-x86 Author: AZero13 (AZero13) ChangesThe rotate uses the bottom 5 bits, so we can apply the same modulo we have here too. Patch is 25.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156684.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 62073ec125e8f..36774b93e9a61 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5464,6 +5464,8 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
[[fallthrough]];
case ISD::SRA:
case ISD::SHL:
+ case ISD::ROTR:
+ case ISD::ROTL:
if (tryShiftAmountMod(Node))
return;
break;
diff --git a/llvm/test/CodeGen/X86/not-shift.ll b/llvm/test/CodeGen/X86/not-shift.ll
index 1d2fd19cff6eb..0173058449071 100644
--- a/llvm/test/CodeGen/X86/not-shift.ll
+++ b/llvm/test/CodeGen/X86/not-shift.ll
@@ -704,3 +704,521 @@ define i32 @invalid_add31(i32 %val, i32 %cnt) nounwind {
%result = shl i32 %val, %adjcnt
ret i32 %result
}
+
+;==============================================================================;
+; Rotate tests (via funnel shifts) matching the shift-count transforms theme
+;==============================================================================;
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+; 64-bit rotates with xor 63 (Size*N-1)
+; CHECK-LABEL: rotl64_xor63
+define i64 @rotl64_xor63(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotl64_xor63:
+; X86-NOBMI2: # %bb.0:
+; X86-NOBMI2-NEXT: pushl %esi
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT: notl %ecx
+; X86-NOBMI2-NEXT: testb $32, %cl
+; X86-NOBMI2-NEXT: jne .LBB16_1
+; X86-NOBMI2-NEXT: # %bb.2:
+; X86-NOBMI2-NEXT: movl %eax, %edx
+; X86-NOBMI2-NEXT: jmp .LBB16_3
+; X86-NOBMI2-NEXT: .LBB16_1:
+; X86-NOBMI2-NEXT: movl %esi, %edx
+; X86-NOBMI2-NEXT: movl %eax, %esi
+; X86-NOBMI2-NEXT: .LBB16_3:
+; X86-NOBMI2-NEXT: movl %esi, %eax
+; X86-NOBMI2-NEXT: shldl %cl, %edx, %eax
+; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT: shldl %cl, %esi, %edx
+; X86-NOBMI2-NEXT: popl %esi
+; X86-NOBMI2-NEXT: retl
+;
+; X86-BMI2-LABEL: rotl64_xor63:
+; X86-BMI2: # %bb.0:
+; X86-BMI2-NEXT: pushl %esi
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: notl %ecx
+; X86-BMI2-NEXT: testb $32, %cl
+; X86-BMI2-NEXT: jne .LBB16_1
+; X86-BMI2-NEXT: # %bb.2:
+; X86-BMI2-NEXT: movl %eax, %edx
+; X86-BMI2-NEXT: jmp .LBB16_3
+; X86-BMI2-NEXT: .LBB16_1:
+; X86-BMI2-NEXT: movl %esi, %edx
+; X86-BMI2-NEXT: movl %eax, %esi
+; X86-BMI2-NEXT: .LBB16_3:
+; X86-BMI2-NEXT: movl %esi, %eax
+; X86-BMI2-NEXT: shldl %cl, %edx, %eax
+; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT: shldl %cl, %esi, %edx
+; X86-BMI2-NEXT: popl %esi
+; X86-BMI2-NEXT: retl
+;
+; X64-NOBMI2-LABEL: rotl64_xor63:
+; X64-NOBMI2: # %bb.0:
+; X64-NOBMI2-NEXT: movq %rsi, %rcx
+; X64-NOBMI2-NEXT: movq %rdi, %rax
+; X64-NOBMI2-NEXT: notl %ecx
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT: rolq %cl, %rax
+; X64-NOBMI2-NEXT: retq
+;
+; X64-BMI2-LABEL: rotl64_xor63:
+; X64-BMI2: # %bb.0:
+; X64-BMI2-NEXT: movq %rsi, %rcx
+; X64-BMI2-NEXT: movq %rdi, %rax
+; X64-BMI2-NEXT: notl %ecx
+; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT: rolq %cl, %rax
+; X64-BMI2-NEXT: retq
+ %adj = xor i64 %cnt, 63
+ %r = call i64 @llvm.fshl.i64(i64 %val, i64 %val, i64 %adj)
+ ret i64 %r
+}
+
+; CHECK-LABEL: rotr64_xor63
+define i64 @rotr64_xor63(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotr64_xor63:
+; X86-NOBMI2: # %bb.0:
+; X86-NOBMI2-NEXT: pushl %esi
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT: notl %ecx
+; X86-NOBMI2-NEXT: testb $32, %cl
+; X86-NOBMI2-NEXT: je .LBB17_1
+; X86-NOBMI2-NEXT: # %bb.2:
+; X86-NOBMI2-NEXT: movl %eax, %edx
+; X86-NOBMI2-NEXT: jmp .LBB17_3
+; X86-NOBMI2-NEXT: .LBB17_1:
+; X86-NOBMI2-NEXT: movl %esi, %edx
+; X86-NOBMI2-NEXT: movl %eax, %esi
+; X86-NOBMI2-NEXT: .LBB17_3:
+; X86-NOBMI2-NEXT: movl %esi, %eax
+; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax
+; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT: shrdl %cl, %esi, %edx
+; X86-NOBMI2-NEXT: popl %esi
+; X86-NOBMI2-NEXT: retl
+;
+; X86-BMI2-LABEL: rotr64_xor63:
+; X86-BMI2: # %bb.0:
+; X86-BMI2-NEXT: pushl %esi
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: notl %ecx
+; X86-BMI2-NEXT: testb $32, %cl
+; X86-BMI2-NEXT: je .LBB17_1
+; X86-BMI2-NEXT: # %bb.2:
+; X86-BMI2-NEXT: movl %eax, %edx
+; X86-BMI2-NEXT: jmp .LBB17_3
+; X86-BMI2-NEXT: .LBB17_1:
+; X86-BMI2-NEXT: movl %esi, %edx
+; X86-BMI2-NEXT: movl %eax, %esi
+; X86-BMI2-NEXT: .LBB17_3:
+; X86-BMI2-NEXT: movl %esi, %eax
+; X86-BMI2-NEXT: shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT: shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT: popl %esi
+; X86-BMI2-NEXT: retl
+;
+; X64-NOBMI2-LABEL: rotr64_xor63:
+; X64-NOBMI2: # %bb.0:
+; X64-NOBMI2-NEXT: movq %rsi, %rcx
+; X64-NOBMI2-NEXT: movq %rdi, %rax
+; X64-NOBMI2-NEXT: notl %ecx
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT: rorq %cl, %rax
+; X64-NOBMI2-NEXT: retq
+;
+; X64-BMI2-LABEL: rotr64_xor63:
+; X64-BMI2: # %bb.0:
+; X64-BMI2-NEXT: movq %rsi, %rcx
+; X64-BMI2-NEXT: movq %rdi, %rax
+; X64-BMI2-NEXT: notl %ecx
+; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT: rorq %cl, %rax
+; X64-BMI2-NEXT: retq
+ %adj = xor i64 %cnt, 63
+ %r = call i64 @llvm.fshr.i64(i64 %val, i64 %val, i64 %adj)
+ ret i64 %r
+}
+
+; 64-bit rotates with xor 127 (Size*N-1)
+; CHECK-LABEL: rotl64_xor127
+define i64 @rotl64_xor127(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotl64_xor127:
+; X86-NOBMI2: # %bb.0:
+; X86-NOBMI2-NEXT: pushl %esi
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT: notl %ecx
+; X86-NOBMI2-NEXT: testb $32, %cl
+; X86-NOBMI2-NEXT: jne .LBB18_1
+; X86-NOBMI2-NEXT: # %bb.2:
+; X86-NOBMI2-NEXT: movl %eax, %edx
+; X86-NOBMI2-NEXT: jmp .LBB18_3
+; X86-NOBMI2-NEXT: .LBB18_1:
+; X86-NOBMI2-NEXT: movl %esi, %edx
+; X86-NOBMI2-NEXT: movl %eax, %esi
+; X86-NOBMI2-NEXT: .LBB18_3:
+; X86-NOBMI2-NEXT: movl %esi, %eax
+; X86-NOBMI2-NEXT: shldl %cl, %edx, %eax
+; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT: shldl %cl, %esi, %edx
+; X86-NOBMI2-NEXT: popl %esi
+; X86-NOBMI2-NEXT: retl
+;
+; X86-BMI2-LABEL: rotl64_xor127:
+; X86-BMI2: # %bb.0:
+; X86-BMI2-NEXT: pushl %esi
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: notl %ecx
+; X86-BMI2-NEXT: testb $32, %cl
+; X86-BMI2-NEXT: jne .LBB18_1
+; X86-BMI2-NEXT: # %bb.2:
+; X86-BMI2-NEXT: movl %eax, %edx
+; X86-BMI2-NEXT: jmp .LBB18_3
+; X86-BMI2-NEXT: .LBB18_1:
+; X86-BMI2-NEXT: movl %esi, %edx
+; X86-BMI2-NEXT: movl %eax, %esi
+; X86-BMI2-NEXT: .LBB18_3:
+; X86-BMI2-NEXT: movl %esi, %eax
+; X86-BMI2-NEXT: shldl %cl, %edx, %eax
+; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT: shldl %cl, %esi, %edx
+; X86-BMI2-NEXT: popl %esi
+; X86-BMI2-NEXT: retl
+;
+; X64-NOBMI2-LABEL: rotl64_xor127:
+; X64-NOBMI2: # %bb.0:
+; X64-NOBMI2-NEXT: movq %rsi, %rcx
+; X64-NOBMI2-NEXT: movq %rdi, %rax
+; X64-NOBMI2-NEXT: notl %ecx
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT: rolq %cl, %rax
+; X64-NOBMI2-NEXT: retq
+;
+; X64-BMI2-LABEL: rotl64_xor127:
+; X64-BMI2: # %bb.0:
+; X64-BMI2-NEXT: movq %rsi, %rcx
+; X64-BMI2-NEXT: movq %rdi, %rax
+; X64-BMI2-NEXT: notl %ecx
+; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT: rolq %cl, %rax
+; X64-BMI2-NEXT: retq
+ %adj = xor i64 %cnt, 127
+ %r = call i64 @llvm.fshl.i64(i64 %val, i64 %val, i64 %adj)
+ ret i64 %r
+}
+
+; CHECK-LABEL: rotr64_xor127
+define i64 @rotr64_xor127(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotr64_xor127:
+; X86-NOBMI2: # %bb.0:
+; X86-NOBMI2-NEXT: pushl %esi
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT: notl %ecx
+; X86-NOBMI2-NEXT: testb $32, %cl
+; X86-NOBMI2-NEXT: je .LBB19_1
+; X86-NOBMI2-NEXT: # %bb.2:
+; X86-NOBMI2-NEXT: movl %eax, %edx
+; X86-NOBMI2-NEXT: jmp .LBB19_3
+; X86-NOBMI2-NEXT: .LBB19_1:
+; X86-NOBMI2-NEXT: movl %esi, %edx
+; X86-NOBMI2-NEXT: movl %eax, %esi
+; X86-NOBMI2-NEXT: .LBB19_3:
+; X86-NOBMI2-NEXT: movl %esi, %eax
+; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax
+; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT: shrdl %cl, %esi, %edx
+; X86-NOBMI2-NEXT: popl %esi
+; X86-NOBMI2-NEXT: retl
+;
+; X86-BMI2-LABEL: rotr64_xor127:
+; X86-BMI2: # %bb.0:
+; X86-BMI2-NEXT: pushl %esi
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: notl %ecx
+; X86-BMI2-NEXT: testb $32, %cl
+; X86-BMI2-NEXT: je .LBB19_1
+; X86-BMI2-NEXT: # %bb.2:
+; X86-BMI2-NEXT: movl %eax, %edx
+; X86-BMI2-NEXT: jmp .LBB19_3
+; X86-BMI2-NEXT: .LBB19_1:
+; X86-BMI2-NEXT: movl %esi, %edx
+; X86-BMI2-NEXT: movl %eax, %esi
+; X86-BMI2-NEXT: .LBB19_3:
+; X86-BMI2-NEXT: movl %esi, %eax
+; X86-BMI2-NEXT: shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT: shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT: popl %esi
+; X86-BMI2-NEXT: retl
+;
+; X64-NOBMI2-LABEL: rotr64_xor127:
+; X64-NOBMI2: # %bb.0:
+; X64-NOBMI2-NEXT: movq %rsi, %rcx
+; X64-NOBMI2-NEXT: movq %rdi, %rax
+; X64-NOBMI2-NEXT: notl %ecx
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT: rorq %cl, %rax
+; X64-NOBMI2-NEXT: retq
+;
+; X64-BMI2-LABEL: rotr64_xor127:
+; X64-BMI2: # %bb.0:
+; X64-BMI2-NEXT: movq %rsi, %rcx
+; X64-BMI2-NEXT: movq %rdi, %rax
+; X64-BMI2-NEXT: notl %ecx
+; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT: rorq %cl, %rax
+; X64-BMI2-NEXT: retq
+ %adj = xor i64 %cnt, 127
+ %r = call i64 @llvm.fshr.i64(i64 %val, i64 %val, i64 %adj)
+ ret i64 %r
+}
+
+; 64-bit rotates with sub 63 (negation within mask)
+; CHECK-LABEL: rotl64_sub63
+define i64 @rotl64_sub63(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotl64_sub63:
+; X86-NOBMI2: # %bb.0:
+; X86-NOBMI2-NEXT: pushl %esi
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: movl $63, %ecx
+; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT: testb $32, %cl
+; X86-NOBMI2-NEXT: jne .LBB20_1
+; X86-NOBMI2-NEXT: # %bb.2:
+; X86-NOBMI2-NEXT: movl %eax, %edx
+; X86-NOBMI2-NEXT: jmp .LBB20_3
+; X86-NOBMI2-NEXT: .LBB20_1:
+; X86-NOBMI2-NEXT: movl %esi, %edx
+; X86-NOBMI2-NEXT: movl %eax, %esi
+; X86-NOBMI2-NEXT: .LBB20_3:
+; X86-NOBMI2-NEXT: movl %esi, %eax
+; X86-NOBMI2-NEXT: shldl %cl, %edx, %eax
+; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT: shldl %cl, %esi, %edx
+; X86-NOBMI2-NEXT: popl %esi
+; X86-NOBMI2-NEXT: retl
+;
+; X86-BMI2-LABEL: rotl64_sub63:
+; X86-BMI2: # %bb.0:
+; X86-BMI2-NEXT: pushl %esi
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movl $63, %ecx
+; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: testb $32, %cl
+; X86-BMI2-NEXT: jne .LBB20_1
+; X86-BMI2-NEXT: # %bb.2:
+; X86-BMI2-NEXT: movl %eax, %edx
+; X86-BMI2-NEXT: jmp .LBB20_3
+; X86-BMI2-NEXT: .LBB20_1:
+; X86-BMI2-NEXT: movl %esi, %edx
+; X86-BMI2-NEXT: movl %eax, %esi
+; X86-BMI2-NEXT: .LBB20_3:
+; X86-BMI2-NEXT: movl %esi, %eax
+; X86-BMI2-NEXT: shldl %cl, %edx, %eax
+; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT: shldl %cl, %esi, %edx
+; X86-BMI2-NEXT: popl %esi
+; X86-BMI2-NEXT: retl
+;
+; X64-NOBMI2-LABEL: rotl64_sub63:
+; X64-NOBMI2: # %bb.0:
+; X64-NOBMI2-NEXT: movq %rsi, %rcx
+; X64-NOBMI2-NEXT: movq %rdi, %rax
+; X64-NOBMI2-NEXT: notl %ecx
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT: rolq %cl, %rax
+; X64-NOBMI2-NEXT: retq
+;
+; X64-BMI2-LABEL: rotl64_sub63:
+; X64-BMI2: # %bb.0:
+; X64-BMI2-NEXT: movq %rsi, %rcx
+; X64-BMI2-NEXT: movq %rdi, %rax
+; X64-BMI2-NEXT: notl %ecx
+; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT: rolq %cl, %rax
+; X64-BMI2-NEXT: retq
+ %adj = sub i64 63, %cnt
+ %r = call i64 @llvm.fshl.i64(i64 %val, i64 %val, i64 %adj)
+ ret i64 %r
+}
+
+; CHECK-LABEL: rotr64_sub63
+define i64 @rotr64_sub63(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotr64_sub63:
+; X86-NOBMI2: # %bb.0:
+; X86-NOBMI2-NEXT: pushl %esi
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT: movl $63, %ecx
+; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT: testb $32, %cl
+; X86-NOBMI2-NEXT: je .LBB21_1
+; X86-NOBMI2-NEXT: # %bb.2:
+; X86-NOBMI2-NEXT: movl %eax, %edx
+; X86-NOBMI2-NEXT: jmp .LBB21_3
+; X86-NOBMI2-NEXT: .LBB21_1:
+; X86-NOBMI2-NEXT: movl %esi, %edx
+; X86-NOBMI2-NEXT: movl %eax, %esi
+; X86-NOBMI2-NEXT: .LBB21_3:
+; X86-NOBMI2-NEXT: movl %esi, %eax
+; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax
+; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT: shrdl %cl, %esi, %edx
+; X86-NOBMI2-NEXT: popl %esi
+; X86-NOBMI2-NEXT: retl
+;
+; X86-BMI2-LABEL: rotr64_sub63:
+; X86-BMI2: # %bb.0:
+; X86-BMI2-NEXT: pushl %esi
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT: movl $63, %ecx
+; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: testb $32, %cl
+; X86-BMI2-NEXT: je .LBB21_1
+; X86-BMI2-NEXT: # %bb.2:
+; X86-BMI2-NEXT: movl %eax, %edx
+; X86-BMI2-NEXT: jmp .LBB21_3
+; X86-BMI2-NEXT: .LBB21_1:
+; X86-BMI2-NEXT: movl %esi, %edx
+; X86-BMI2-NEXT: movl %eax, %esi
+; X86-BMI2-NEXT: .LBB21_3:
+; X86-BMI2-NEXT: movl %esi, %eax
+; X86-BMI2-NEXT: shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT: shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT: popl %esi
+; X86-BMI2-NEXT: retl
+;
+; X64-NOBMI2-LABEL: rotr64_sub63:
+; X64-NOBMI2: # %bb.0:
+; X64-NOBMI2-NEXT: movq %rsi, %rcx
+; X64-NOBMI2-NEXT: movq %rdi, %rax
+; X64-NOBMI2-NEXT: notl %ecx
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT: rorq %cl, %rax
+; X64-NOBMI2-NEXT: retq
+;
+; X64-BMI2-LABEL: rotr64_sub63:
+; X64-BMI2: # %bb.0:
+; X64-BMI2-NEXT: movq %rsi, %rcx
+; X64-BMI2-NEXT: movq %rdi, %rax
+; X64-BMI2-NEXT: notl %ecx
+; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT: rorq %cl, %rax
+; X64-BMI2-NEXT: retq
+ %adj = sub i64 63, %cnt
+ %r = call i64 @llvm.fshr.i64(i64 %val, i64 %val, i64 %adj)
+ ret i64 %r
+}
+
+; 32-bit rotates with xor 31
+; CHECK-LABEL: rotl32_xor31
+define i32 @rotl32_xor31(i32 %val, i32 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotl32_xor31:
+; X86-NOBMI2: # %bb.0:
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT: notl %ecx
+; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT: roll %cl, %eax
+; X86-NOBMI2-NEXT: retl
+;
+; X86-BMI2-LABEL: rotl32_xor31:
+; X86-BMI2: # %bb.0:
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: notl %ecx
+; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT: roll %cl, %eax
+; X86-BMI2-NEXT: retl
+;
+; X64-NOBMI2-LABEL: rotl32_xor31:
+; X64-NOBMI2: # %bb.0:
+; X64-NOBMI2-NEXT: movl %esi, %ecx
+; X64-NOBMI2-NEXT: movl %edi, %eax
+; X64-NOBMI2-NEXT: notl %ecx
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT: roll %cl, %eax
+; X64-NOBMI2-NEXT: retq
+;
+; X64-BMI2-LABEL: rotl32_xor31:
+; X64-BMI2: # %bb.0:
+; X64-BMI2-NEXT: movl %esi, %ecx
+; X64-BMI2-NEXT: movl %edi, %eax
+; X64-BMI2-NEXT: notl %ecx
+; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-BMI2-NEXT: roll %cl, %eax
+; X64-BMI2-NEXT: retq
+ %adj = xor i32 %cnt, 31
+ %r = call i32 @llvm.fshl.i32(i32 %val, i32 %val, i32 %adj)
+ ret i32 %r
+}
+
+; CHECK-LABEL: rotr32_xor31
+define i32 @rotr32_xor31(i32 %val, i32 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotr32_xor31:
+; X86-NOBMI2: # %bb.0:
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT: notl %ecx
+; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT: rorl %cl, %eax
+; X86-NOBMI2-NEXT: retl
+;
+; X86-BMI2-LABEL: rotr32_xor31:
+; X86-BMI2: # %bb.0:
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: notl %ecx
+; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT: rorl %cl, %eax
+; X86-BMI2-NEXT: retl
+;
+; X64-NOBMI2-LABEL: rotr32_xor31:
+; X64-NOBMI2: # %bb.0:
+; X64-NOBMI2-NEXT: movl %esi, %ecx
+; X64-NOBMI2-NEXT: movl %edi, %eax
+; X64-NOBMI2-NEXT: notl %ecx
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT: rorl %cl, %eax
+; X64-NOBMI2-NEXT: retq
+;
+; X64-BMI2-LABEL: rotr32_xor31:
+; X64-BMI2: # %bb.0:
+; X64-BMI2-NEXT: movl %esi, %ecx
+; X64-BMI2-NEXT: movl %edi, %eax
+; X64-BMI2-NEXT: notl %ecx
+; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-BMI2-NEXT: rorl %cl, %eax
+; X64-BMI2-NEXT: retq
+ %adj = xor i32 %cnt, 31
+ %r = call i32 @llvm.fshr.i32(i32 %val, i32 %val, i32 %adj)
+ ret i32 %r
+}
+
diff --git a/llvm/test/CodeGen/X86/shift-amount-mod.ll b/llvm/test/CodeGen/X86/shift-amount-mod.ll
index 9f7ac748c47e1..34954c65aa299 100644
--- a/llvm/test/CodeGen/X86/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/X86/shift-amount-mod.ll
@@ -1574,3 +1574,206 @@ define i16 @sh_trunc_sh(i64 %x) {
%r = lshr i16 %t, 12
ret i16 %r
}
+
+;==============================================================================;
+; Funnel shift (FSHL/FSHR) count canonicalizations
+; - Test that the same shift-amount transforms (negation, add/sub by bitwidth)
+; are applied to funnel shifts on X86.
+;==============================================================================;
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+; CHECK-LABEL: fshl32_by_negated:
+define i32 @fshl32_by_negated(i32 %x, i32 %shamt) {
+; X86-LABEL: fshl32_by_negated:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: roll %cl, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: fshl32_by_negated:
+; X64: # %bb.0:
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: negl %ecx
+; X64-NEXT: # kill: def $cl killed $cl kille...
[truncated]
|
16f712c to
fe5cf13
Compare
|
Since rotates and funnel shifts explicitly define they use modulo amount as part of their ISD node we can help everyone by doing some of this as a generic DAGCombiner fold. The tryShiftAmountMod is mainly to help with lowering from ISD shift nodes which don't have modulo behavior, to the x86 shift instructions which do. |
The rotate uses the bottom 5 bits, so we can apply the same modulo we have here too.
fe5cf13 to
e7bae9f
Compare
True, but that is probablt best for another PR. Also, we call this for rotr in aarch64. Also, I am not sure that replacing 31 - with neg is best on all platforms, because I do not know if other platforms don't have reverse subtract. I know arm does but arm also is better off with neg too. |
The rotate uses the bottom 5 bits, so we can apply the same modulo we have here too.