[X86] Allow rotate to be affected by modulo shift #156684

AZero13 · 2025-09-03T14:43:04Z

The rotate uses the bottom 5 bits, so we can apply the same modulo we have here too.

llvmbot · 2025-09-03T14:43:41Z

@llvm/pr-subscribers-backend-x86

Author: AZero13 (AZero13)

Changes

The rotate uses the bottom 5 bits, so we can apply the same modulo we have here too.

Patch is 25.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156684.diff

3 Files Affected:

(modified) llvm/lib/Target/X86/X86ISelDAGToDAG.cpp (+2)
(modified) llvm/test/CodeGen/X86/not-shift.ll (+518)
(modified) llvm/test/CodeGen/X86/shift-amount-mod.ll (+203)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 62073ec125e8f..36774b93e9a61 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5464,6 +5464,8 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     [[fallthrough]];
   case ISD::SRA:
   case ISD::SHL:
+  case ISD::ROTR:
+  case ISD::ROTL:
     if (tryShiftAmountMod(Node))
       return;
     break;
diff --git a/llvm/test/CodeGen/X86/not-shift.ll b/llvm/test/CodeGen/X86/not-shift.ll
index 1d2fd19cff6eb..0173058449071 100644
--- a/llvm/test/CodeGen/X86/not-shift.ll
+++ b/llvm/test/CodeGen/X86/not-shift.ll
@@ -704,3 +704,521 @@ define i32 @invalid_add31(i32 %val, i32 %cnt) nounwind {
   %result = shl i32 %val, %adjcnt
   ret i32 %result
 }
+
+;==============================================================================;
+; Rotate tests (via funnel shifts) matching the shift-count transforms theme
+;==============================================================================;
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+; 64-bit rotates with xor 63 (Size*N-1)
+; CHECK-LABEL: rotl64_xor63
+define i64 @rotl64_xor63(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotl64_xor63:
+; X86-NOBMI2:       # %bb.0:
+; X86-NOBMI2-NEXT:    pushl %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    notl %ecx
+; X86-NOBMI2-NEXT:    testb $32, %cl
+; X86-NOBMI2-NEXT:    jne .LBB16_1
+; X86-NOBMI2-NEXT:  # %bb.2:
+; X86-NOBMI2-NEXT:    movl %eax, %edx
+; X86-NOBMI2-NEXT:    jmp .LBB16_3
+; X86-NOBMI2-NEXT:  .LBB16_1:
+; X86-NOBMI2-NEXT:    movl %esi, %edx
+; X86-NOBMI2-NEXT:    movl %eax, %esi
+; X86-NOBMI2-NEXT:  .LBB16_3:
+; X86-NOBMI2-NEXT:    movl %esi, %eax
+; X86-NOBMI2-NEXT:    shldl %cl, %edx, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT:    shldl %cl, %esi, %edx
+; X86-NOBMI2-NEXT:    popl %esi
+; X86-NOBMI2-NEXT:    retl
+;
+; X86-BMI2-LABEL: rotl64_xor63:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    notl %ecx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB16_1
+; X86-BMI2-NEXT:  # %bb.2:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    jmp .LBB16_3
+; X86-BMI2-NEXT:  .LBB16_1:
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    movl %eax, %esi
+; X86-BMI2-NEXT:  .LBB16_3:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    shldl %cl, %edx, %eax
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT:    shldl %cl, %esi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI2-LABEL: rotl64_xor63:
+; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
+; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    notl %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT:    rolq %cl, %rax
+; X64-NOBMI2-NEXT:    retq
+;
+; X64-BMI2-LABEL: rotl64_xor63:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq %rsi, %rcx
+; X64-BMI2-NEXT:    movq %rdi, %rax
+; X64-BMI2-NEXT:    notl %ecx
+; X64-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT:    rolq %cl, %rax
+; X64-BMI2-NEXT:    retq
+  %adj = xor i64 %cnt, 63
+  %r = call i64 @llvm.fshl.i64(i64 %val, i64 %val, i64 %adj)
+  ret i64 %r
+}
+
+; CHECK-LABEL: rotr64_xor63
+define i64 @rotr64_xor63(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotr64_xor63:
+; X86-NOBMI2:       # %bb.0:
+; X86-NOBMI2-NEXT:    pushl %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    notl %ecx
+; X86-NOBMI2-NEXT:    testb $32, %cl
+; X86-NOBMI2-NEXT:    je .LBB17_1
+; X86-NOBMI2-NEXT:  # %bb.2:
+; X86-NOBMI2-NEXT:    movl %eax, %edx
+; X86-NOBMI2-NEXT:    jmp .LBB17_3
+; X86-NOBMI2-NEXT:  .LBB17_1:
+; X86-NOBMI2-NEXT:    movl %esi, %edx
+; X86-NOBMI2-NEXT:    movl %eax, %esi
+; X86-NOBMI2-NEXT:  .LBB17_3:
+; X86-NOBMI2-NEXT:    movl %esi, %eax
+; X86-NOBMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-NOBMI2-NEXT:    popl %esi
+; X86-NOBMI2-NEXT:    retl
+;
+; X86-BMI2-LABEL: rotr64_xor63:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    notl %ecx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB17_1
+; X86-BMI2-NEXT:  # %bb.2:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    jmp .LBB17_3
+; X86-BMI2-NEXT:  .LBB17_1:
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    movl %eax, %esi
+; X86-BMI2-NEXT:  .LBB17_3:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI2-LABEL: rotr64_xor63:
+; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
+; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    notl %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT:    rorq %cl, %rax
+; X64-NOBMI2-NEXT:    retq
+;
+; X64-BMI2-LABEL: rotr64_xor63:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq %rsi, %rcx
+; X64-BMI2-NEXT:    movq %rdi, %rax
+; X64-BMI2-NEXT:    notl %ecx
+; X64-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT:    rorq %cl, %rax
+; X64-BMI2-NEXT:    retq
+  %adj = xor i64 %cnt, 63
+  %r = call i64 @llvm.fshr.i64(i64 %val, i64 %val, i64 %adj)
+  ret i64 %r
+}
+
+; 64-bit rotates with xor 127 (Size*N-1)
+; CHECK-LABEL: rotl64_xor127
+define i64 @rotl64_xor127(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotl64_xor127:
+; X86-NOBMI2:       # %bb.0:
+; X86-NOBMI2-NEXT:    pushl %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    notl %ecx
+; X86-NOBMI2-NEXT:    testb $32, %cl
+; X86-NOBMI2-NEXT:    jne .LBB18_1
+; X86-NOBMI2-NEXT:  # %bb.2:
+; X86-NOBMI2-NEXT:    movl %eax, %edx
+; X86-NOBMI2-NEXT:    jmp .LBB18_3
+; X86-NOBMI2-NEXT:  .LBB18_1:
+; X86-NOBMI2-NEXT:    movl %esi, %edx
+; X86-NOBMI2-NEXT:    movl %eax, %esi
+; X86-NOBMI2-NEXT:  .LBB18_3:
+; X86-NOBMI2-NEXT:    movl %esi, %eax
+; X86-NOBMI2-NEXT:    shldl %cl, %edx, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT:    shldl %cl, %esi, %edx
+; X86-NOBMI2-NEXT:    popl %esi
+; X86-NOBMI2-NEXT:    retl
+;
+; X86-BMI2-LABEL: rotl64_xor127:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    notl %ecx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB18_1
+; X86-BMI2-NEXT:  # %bb.2:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    jmp .LBB18_3
+; X86-BMI2-NEXT:  .LBB18_1:
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    movl %eax, %esi
+; X86-BMI2-NEXT:  .LBB18_3:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    shldl %cl, %edx, %eax
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT:    shldl %cl, %esi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI2-LABEL: rotl64_xor127:
+; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
+; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    notl %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT:    rolq %cl, %rax
+; X64-NOBMI2-NEXT:    retq
+;
+; X64-BMI2-LABEL: rotl64_xor127:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq %rsi, %rcx
+; X64-BMI2-NEXT:    movq %rdi, %rax
+; X64-BMI2-NEXT:    notl %ecx
+; X64-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT:    rolq %cl, %rax
+; X64-BMI2-NEXT:    retq
+  %adj = xor i64 %cnt, 127
+  %r = call i64 @llvm.fshl.i64(i64 %val, i64 %val, i64 %adj)
+  ret i64 %r
+}
+
+; CHECK-LABEL: rotr64_xor127
+define i64 @rotr64_xor127(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotr64_xor127:
+; X86-NOBMI2:       # %bb.0:
+; X86-NOBMI2-NEXT:    pushl %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    notl %ecx
+; X86-NOBMI2-NEXT:    testb $32, %cl
+; X86-NOBMI2-NEXT:    je .LBB19_1
+; X86-NOBMI2-NEXT:  # %bb.2:
+; X86-NOBMI2-NEXT:    movl %eax, %edx
+; X86-NOBMI2-NEXT:    jmp .LBB19_3
+; X86-NOBMI2-NEXT:  .LBB19_1:
+; X86-NOBMI2-NEXT:    movl %esi, %edx
+; X86-NOBMI2-NEXT:    movl %eax, %esi
+; X86-NOBMI2-NEXT:  .LBB19_3:
+; X86-NOBMI2-NEXT:    movl %esi, %eax
+; X86-NOBMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-NOBMI2-NEXT:    popl %esi
+; X86-NOBMI2-NEXT:    retl
+;
+; X86-BMI2-LABEL: rotr64_xor127:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    notl %ecx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB19_1
+; X86-BMI2-NEXT:  # %bb.2:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    jmp .LBB19_3
+; X86-BMI2-NEXT:  .LBB19_1:
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    movl %eax, %esi
+; X86-BMI2-NEXT:  .LBB19_3:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI2-LABEL: rotr64_xor127:
+; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
+; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    notl %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT:    rorq %cl, %rax
+; X64-NOBMI2-NEXT:    retq
+;
+; X64-BMI2-LABEL: rotr64_xor127:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq %rsi, %rcx
+; X64-BMI2-NEXT:    movq %rdi, %rax
+; X64-BMI2-NEXT:    notl %ecx
+; X64-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT:    rorq %cl, %rax
+; X64-BMI2-NEXT:    retq
+  %adj = xor i64 %cnt, 127
+  %r = call i64 @llvm.fshr.i64(i64 %val, i64 %val, i64 %adj)
+  ret i64 %r
+}
+
+; 64-bit rotates with sub 63 (negation within mask)
+; CHECK-LABEL: rotl64_sub63
+define i64 @rotl64_sub63(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotl64_sub63:
+; X86-NOBMI2:       # %bb.0:
+; X86-NOBMI2-NEXT:    pushl %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT:    movl $63, %ecx
+; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    testb $32, %cl
+; X86-NOBMI2-NEXT:    jne .LBB20_1
+; X86-NOBMI2-NEXT:  # %bb.2:
+; X86-NOBMI2-NEXT:    movl %eax, %edx
+; X86-NOBMI2-NEXT:    jmp .LBB20_3
+; X86-NOBMI2-NEXT:  .LBB20_1:
+; X86-NOBMI2-NEXT:    movl %esi, %edx
+; X86-NOBMI2-NEXT:    movl %eax, %esi
+; X86-NOBMI2-NEXT:  .LBB20_3:
+; X86-NOBMI2-NEXT:    movl %esi, %eax
+; X86-NOBMI2-NEXT:    shldl %cl, %edx, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT:    shldl %cl, %esi, %edx
+; X86-NOBMI2-NEXT:    popl %esi
+; X86-NOBMI2-NEXT:    retl
+;
+; X86-BMI2-LABEL: rotl64_sub63:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl $63, %ecx
+; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB20_1
+; X86-BMI2-NEXT:  # %bb.2:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    jmp .LBB20_3
+; X86-BMI2-NEXT:  .LBB20_1:
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    movl %eax, %esi
+; X86-BMI2-NEXT:  .LBB20_3:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    shldl %cl, %edx, %eax
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT:    shldl %cl, %esi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI2-LABEL: rotl64_sub63:
+; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
+; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    notl %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT:    rolq %cl, %rax
+; X64-NOBMI2-NEXT:    retq
+;
+; X64-BMI2-LABEL: rotl64_sub63:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq %rsi, %rcx
+; X64-BMI2-NEXT:    movq %rdi, %rax
+; X64-BMI2-NEXT:    notl %ecx
+; X64-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT:    rolq %cl, %rax
+; X64-BMI2-NEXT:    retq
+  %adj = sub i64 63, %cnt
+  %r = call i64 @llvm.fshl.i64(i64 %val, i64 %val, i64 %adj)
+  ret i64 %r
+}
+
+; CHECK-LABEL: rotr64_sub63
+define i64 @rotr64_sub63(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotr64_sub63:
+; X86-NOBMI2:       # %bb.0:
+; X86-NOBMI2-NEXT:    pushl %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT:    movl $63, %ecx
+; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    testb $32, %cl
+; X86-NOBMI2-NEXT:    je .LBB21_1
+; X86-NOBMI2-NEXT:  # %bb.2:
+; X86-NOBMI2-NEXT:    movl %eax, %edx
+; X86-NOBMI2-NEXT:    jmp .LBB21_3
+; X86-NOBMI2-NEXT:  .LBB21_1:
+; X86-NOBMI2-NEXT:    movl %esi, %edx
+; X86-NOBMI2-NEXT:    movl %eax, %esi
+; X86-NOBMI2-NEXT:  .LBB21_3:
+; X86-NOBMI2-NEXT:    movl %esi, %eax
+; X86-NOBMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-NOBMI2-NEXT:    popl %esi
+; X86-NOBMI2-NEXT:    retl
+;
+; X86-BMI2-LABEL: rotr64_sub63:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl $63, %ecx
+; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB21_1
+; X86-BMI2-NEXT:  # %bb.2:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    jmp .LBB21_3
+; X86-BMI2-NEXT:  .LBB21_1:
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    movl %eax, %esi
+; X86-BMI2-NEXT:  .LBB21_3:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI2-LABEL: rotr64_sub63:
+; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
+; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    notl %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT:    rorq %cl, %rax
+; X64-NOBMI2-NEXT:    retq
+;
+; X64-BMI2-LABEL: rotr64_sub63:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq %rsi, %rcx
+; X64-BMI2-NEXT:    movq %rdi, %rax
+; X64-BMI2-NEXT:    notl %ecx
+; X64-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT:    rorq %cl, %rax
+; X64-BMI2-NEXT:    retq
+  %adj = sub i64 63, %cnt
+  %r = call i64 @llvm.fshr.i64(i64 %val, i64 %val, i64 %adj)
+  ret i64 %r
+}
+
+; 32-bit rotates with xor 31
+; CHECK-LABEL: rotl32_xor31
+define i32 @rotl32_xor31(i32 %val, i32 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotl32_xor31:
+; X86-NOBMI2:       # %bb.0:
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    notl %ecx
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT:    roll %cl, %eax
+; X86-NOBMI2-NEXT:    retl
+;
+; X86-BMI2-LABEL: rotl32_xor31:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    notl %ecx
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT:    roll %cl, %eax
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI2-LABEL: rotl32_xor31:
+; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movl %esi, %ecx
+; X64-NOBMI2-NEXT:    movl %edi, %eax
+; X64-NOBMI2-NEXT:    notl %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT:    roll %cl, %eax
+; X64-NOBMI2-NEXT:    retq
+;
+; X64-BMI2-LABEL: rotl32_xor31:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl %esi, %ecx
+; X64-BMI2-NEXT:    movl %edi, %eax
+; X64-BMI2-NEXT:    notl %ecx
+; X64-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI2-NEXT:    roll %cl, %eax
+; X64-BMI2-NEXT:    retq
+  %adj = xor i32 %cnt, 31
+  %r = call i32 @llvm.fshl.i32(i32 %val, i32 %val, i32 %adj)
+  ret i32 %r
+}
+
+; CHECK-LABEL: rotr32_xor31
+define i32 @rotr32_xor31(i32 %val, i32 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotr32_xor31:
+; X86-NOBMI2:       # %bb.0:
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    notl %ecx
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT:    rorl %cl, %eax
+; X86-NOBMI2-NEXT:    retl
+;
+; X86-BMI2-LABEL: rotr32_xor31:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    notl %ecx
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT:    rorl %cl, %eax
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI2-LABEL: rotr32_xor31:
+; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movl %esi, %ecx
+; X64-NOBMI2-NEXT:    movl %edi, %eax
+; X64-NOBMI2-NEXT:    notl %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT:    rorl %cl, %eax
+; X64-NOBMI2-NEXT:    retq
+;
+; X64-BMI2-LABEL: rotr32_xor31:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl %esi, %ecx
+; X64-BMI2-NEXT:    movl %edi, %eax
+; X64-BMI2-NEXT:    notl %ecx
+; X64-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI2-NEXT:    rorl %cl, %eax
+; X64-BMI2-NEXT:    retq
+  %adj = xor i32 %cnt, 31
+  %r = call i32 @llvm.fshr.i32(i32 %val, i32 %val, i32 %adj)
+  ret i32 %r
+}
+
diff --git a/llvm/test/CodeGen/X86/shift-amount-mod.ll b/llvm/test/CodeGen/X86/shift-amount-mod.ll
index 9f7ac748c47e1..34954c65aa299 100644
--- a/llvm/test/CodeGen/X86/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/X86/shift-amount-mod.ll
@@ -1574,3 +1574,206 @@ define i16 @sh_trunc_sh(i64 %x) {
   %r = lshr i16 %t, 12
   ret i16 %r
 }
+
+;==============================================================================;
+; Funnel shift (FSHL/FSHR) count canonicalizations
+; - Test that the same shift-amount transforms (negation, add/sub by bitwidth)
+;   are applied to funnel shifts on X86.
+;==============================================================================;
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+; CHECK-LABEL: fshl32_by_negated:
+define i32 @fshl32_by_negated(i32 %x, i32 %shamt) {
+; X86-LABEL: fshl32_by_negated:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    roll %cl, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: fshl32_by_negated:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    # kill: def $cl killed $cl kille...
[truncated]

AZero13 · 2025-09-03T14:51:44Z

@topperc @phoebewang

RKSimon · 2025-09-03T15:05:34Z

Since rotates and funnel shifts explicitly define they use modulo amount as part of their ISD node we can help everyone by doing some of this as a generic DAGCombiner fold. The tryShiftAmountMod is mainly to help with lowering from ISD shift nodes which don't have modulo behavior, to the x86 shift instructions which do.

The rotate uses the bottom 5 bits, so we can apply the same modulo we have here too.

AZero13 · 2025-09-03T15:09:19Z

Since rotates and funnel shifts explicitly define they use modulo amount as part of their ISD node we can help everyone by doing some of this as a generic DAGCombiner fold. The tryShiftAmountMod is mainly to help with lowering from ISD shift nodes which don't have modulo behavior, to the x86 shift instructions which do.

True, but that is probablt best for another PR. Also, we call this for rotr in aarch64. Also, I am not sure that replacing 31 - with neg is best on all platforms, because I do not know if other platforms don't have reverse subtract. I know arm does but arm also is better off with neg too.

llvmbot added the backend:X86 label Sep 3, 2025

AZero13 force-pushed the materialize-0 branch from 16f712c to fe5cf13 Compare September 3, 2025 14:43

AZero13 added 2 commits September 3, 2025 11:06

Pre-commit tests (NFC)

703f938

[X86] Allow rotate to be optimized by modulo shift.

e7bae9f

The rotate uses the bottom 5 bits, so we can apply the same modulo we have here too.

AZero13 force-pushed the materialize-0 branch from fe5cf13 to e7bae9f Compare September 3, 2025 15:06

AZero13 closed this Sep 3, 2025

AZero13 deleted the materialize-0 branch September 3, 2025 22:21

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[X86] Allow rotate to be affected by modulo shift #156684

[X86] Allow rotate to be affected by modulo shift #156684

Uh oh!

AZero13 commented Sep 3, 2025

Uh oh!

llvmbot commented Sep 3, 2025

Uh oh!

AZero13 commented Sep 3, 2025

Uh oh!

RKSimon commented Sep 3, 2025

Uh oh!

AZero13 commented Sep 3, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

[X86] Allow rotate to be affected by modulo shift #156684

[X86] Allow rotate to be affected by modulo shift #156684

Uh oh!

Conversation

AZero13 commented Sep 3, 2025

Uh oh!

llvmbot commented Sep 3, 2025

Uh oh!

AZero13 commented Sep 3, 2025

Uh oh!

RKSimon commented Sep 3, 2025

Uh oh!

AZero13 commented Sep 3, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants