Skip to content

Conversation

@AZero13
Copy link
Contributor

@AZero13 AZero13 commented Sep 3, 2025

The rotate uses the bottom 5 bits, so we can apply the same modulo we have here too.

@llvmbot
Copy link
Member

llvmbot commented Sep 3, 2025

@llvm/pr-subscribers-backend-x86

Author: AZero13 (AZero13)

Changes

The rotate uses the bottom 5 bits, so we can apply the same modulo we have here too.


Patch is 25.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156684.diff

3 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelDAGToDAG.cpp (+2)
  • (modified) llvm/test/CodeGen/X86/not-shift.ll (+518)
  • (modified) llvm/test/CodeGen/X86/shift-amount-mod.ll (+203)
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 62073ec125e8f..36774b93e9a61 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5464,6 +5464,8 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     [[fallthrough]];
   case ISD::SRA:
   case ISD::SHL:
+  case ISD::ROTR:
+  case ISD::ROTL:
     if (tryShiftAmountMod(Node))
       return;
     break;
diff --git a/llvm/test/CodeGen/X86/not-shift.ll b/llvm/test/CodeGen/X86/not-shift.ll
index 1d2fd19cff6eb..0173058449071 100644
--- a/llvm/test/CodeGen/X86/not-shift.ll
+++ b/llvm/test/CodeGen/X86/not-shift.ll
@@ -704,3 +704,521 @@ define i32 @invalid_add31(i32 %val, i32 %cnt) nounwind {
   %result = shl i32 %val, %adjcnt
   ret i32 %result
 }
+
+;==============================================================================;
+; Rotate tests (via funnel shifts) matching the shift-count transforms theme
+;==============================================================================;
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+; 64-bit rotates with xor 63 (Size*N-1)
+; CHECK-LABEL: rotl64_xor63
+define i64 @rotl64_xor63(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotl64_xor63:
+; X86-NOBMI2:       # %bb.0:
+; X86-NOBMI2-NEXT:    pushl %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    notl %ecx
+; X86-NOBMI2-NEXT:    testb $32, %cl
+; X86-NOBMI2-NEXT:    jne .LBB16_1
+; X86-NOBMI2-NEXT:  # %bb.2:
+; X86-NOBMI2-NEXT:    movl %eax, %edx
+; X86-NOBMI2-NEXT:    jmp .LBB16_3
+; X86-NOBMI2-NEXT:  .LBB16_1:
+; X86-NOBMI2-NEXT:    movl %esi, %edx
+; X86-NOBMI2-NEXT:    movl %eax, %esi
+; X86-NOBMI2-NEXT:  .LBB16_3:
+; X86-NOBMI2-NEXT:    movl %esi, %eax
+; X86-NOBMI2-NEXT:    shldl %cl, %edx, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT:    shldl %cl, %esi, %edx
+; X86-NOBMI2-NEXT:    popl %esi
+; X86-NOBMI2-NEXT:    retl
+;
+; X86-BMI2-LABEL: rotl64_xor63:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    notl %ecx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB16_1
+; X86-BMI2-NEXT:  # %bb.2:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    jmp .LBB16_3
+; X86-BMI2-NEXT:  .LBB16_1:
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    movl %eax, %esi
+; X86-BMI2-NEXT:  .LBB16_3:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    shldl %cl, %edx, %eax
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT:    shldl %cl, %esi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI2-LABEL: rotl64_xor63:
+; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
+; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    notl %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT:    rolq %cl, %rax
+; X64-NOBMI2-NEXT:    retq
+;
+; X64-BMI2-LABEL: rotl64_xor63:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq %rsi, %rcx
+; X64-BMI2-NEXT:    movq %rdi, %rax
+; X64-BMI2-NEXT:    notl %ecx
+; X64-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT:    rolq %cl, %rax
+; X64-BMI2-NEXT:    retq
+  %adj = xor i64 %cnt, 63
+  %r = call i64 @llvm.fshl.i64(i64 %val, i64 %val, i64 %adj)
+  ret i64 %r
+}
+
+; CHECK-LABEL: rotr64_xor63
+define i64 @rotr64_xor63(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotr64_xor63:
+; X86-NOBMI2:       # %bb.0:
+; X86-NOBMI2-NEXT:    pushl %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    notl %ecx
+; X86-NOBMI2-NEXT:    testb $32, %cl
+; X86-NOBMI2-NEXT:    je .LBB17_1
+; X86-NOBMI2-NEXT:  # %bb.2:
+; X86-NOBMI2-NEXT:    movl %eax, %edx
+; X86-NOBMI2-NEXT:    jmp .LBB17_3
+; X86-NOBMI2-NEXT:  .LBB17_1:
+; X86-NOBMI2-NEXT:    movl %esi, %edx
+; X86-NOBMI2-NEXT:    movl %eax, %esi
+; X86-NOBMI2-NEXT:  .LBB17_3:
+; X86-NOBMI2-NEXT:    movl %esi, %eax
+; X86-NOBMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-NOBMI2-NEXT:    popl %esi
+; X86-NOBMI2-NEXT:    retl
+;
+; X86-BMI2-LABEL: rotr64_xor63:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    notl %ecx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB17_1
+; X86-BMI2-NEXT:  # %bb.2:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    jmp .LBB17_3
+; X86-BMI2-NEXT:  .LBB17_1:
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    movl %eax, %esi
+; X86-BMI2-NEXT:  .LBB17_3:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI2-LABEL: rotr64_xor63:
+; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
+; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    notl %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT:    rorq %cl, %rax
+; X64-NOBMI2-NEXT:    retq
+;
+; X64-BMI2-LABEL: rotr64_xor63:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq %rsi, %rcx
+; X64-BMI2-NEXT:    movq %rdi, %rax
+; X64-BMI2-NEXT:    notl %ecx
+; X64-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT:    rorq %cl, %rax
+; X64-BMI2-NEXT:    retq
+  %adj = xor i64 %cnt, 63
+  %r = call i64 @llvm.fshr.i64(i64 %val, i64 %val, i64 %adj)
+  ret i64 %r
+}
+
+; 64-bit rotates with xor 127 (Size*N-1)
+; CHECK-LABEL: rotl64_xor127
+define i64 @rotl64_xor127(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotl64_xor127:
+; X86-NOBMI2:       # %bb.0:
+; X86-NOBMI2-NEXT:    pushl %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    notl %ecx
+; X86-NOBMI2-NEXT:    testb $32, %cl
+; X86-NOBMI2-NEXT:    jne .LBB18_1
+; X86-NOBMI2-NEXT:  # %bb.2:
+; X86-NOBMI2-NEXT:    movl %eax, %edx
+; X86-NOBMI2-NEXT:    jmp .LBB18_3
+; X86-NOBMI2-NEXT:  .LBB18_1:
+; X86-NOBMI2-NEXT:    movl %esi, %edx
+; X86-NOBMI2-NEXT:    movl %eax, %esi
+; X86-NOBMI2-NEXT:  .LBB18_3:
+; X86-NOBMI2-NEXT:    movl %esi, %eax
+; X86-NOBMI2-NEXT:    shldl %cl, %edx, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT:    shldl %cl, %esi, %edx
+; X86-NOBMI2-NEXT:    popl %esi
+; X86-NOBMI2-NEXT:    retl
+;
+; X86-BMI2-LABEL: rotl64_xor127:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    notl %ecx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB18_1
+; X86-BMI2-NEXT:  # %bb.2:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    jmp .LBB18_3
+; X86-BMI2-NEXT:  .LBB18_1:
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    movl %eax, %esi
+; X86-BMI2-NEXT:  .LBB18_3:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    shldl %cl, %edx, %eax
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT:    shldl %cl, %esi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI2-LABEL: rotl64_xor127:
+; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
+; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    notl %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT:    rolq %cl, %rax
+; X64-NOBMI2-NEXT:    retq
+;
+; X64-BMI2-LABEL: rotl64_xor127:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq %rsi, %rcx
+; X64-BMI2-NEXT:    movq %rdi, %rax
+; X64-BMI2-NEXT:    notl %ecx
+; X64-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT:    rolq %cl, %rax
+; X64-BMI2-NEXT:    retq
+  %adj = xor i64 %cnt, 127
+  %r = call i64 @llvm.fshl.i64(i64 %val, i64 %val, i64 %adj)
+  ret i64 %r
+}
+
+; CHECK-LABEL: rotr64_xor127
+define i64 @rotr64_xor127(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotr64_xor127:
+; X86-NOBMI2:       # %bb.0:
+; X86-NOBMI2-NEXT:    pushl %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    notl %ecx
+; X86-NOBMI2-NEXT:    testb $32, %cl
+; X86-NOBMI2-NEXT:    je .LBB19_1
+; X86-NOBMI2-NEXT:  # %bb.2:
+; X86-NOBMI2-NEXT:    movl %eax, %edx
+; X86-NOBMI2-NEXT:    jmp .LBB19_3
+; X86-NOBMI2-NEXT:  .LBB19_1:
+; X86-NOBMI2-NEXT:    movl %esi, %edx
+; X86-NOBMI2-NEXT:    movl %eax, %esi
+; X86-NOBMI2-NEXT:  .LBB19_3:
+; X86-NOBMI2-NEXT:    movl %esi, %eax
+; X86-NOBMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-NOBMI2-NEXT:    popl %esi
+; X86-NOBMI2-NEXT:    retl
+;
+; X86-BMI2-LABEL: rotr64_xor127:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    notl %ecx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB19_1
+; X86-BMI2-NEXT:  # %bb.2:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    jmp .LBB19_3
+; X86-BMI2-NEXT:  .LBB19_1:
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    movl %eax, %esi
+; X86-BMI2-NEXT:  .LBB19_3:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI2-LABEL: rotr64_xor127:
+; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
+; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    notl %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT:    rorq %cl, %rax
+; X64-NOBMI2-NEXT:    retq
+;
+; X64-BMI2-LABEL: rotr64_xor127:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq %rsi, %rcx
+; X64-BMI2-NEXT:    movq %rdi, %rax
+; X64-BMI2-NEXT:    notl %ecx
+; X64-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT:    rorq %cl, %rax
+; X64-BMI2-NEXT:    retq
+  %adj = xor i64 %cnt, 127
+  %r = call i64 @llvm.fshr.i64(i64 %val, i64 %val, i64 %adj)
+  ret i64 %r
+}
+
+; 64-bit rotates with sub 63 (negation within mask)
+; CHECK-LABEL: rotl64_sub63
+define i64 @rotl64_sub63(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotl64_sub63:
+; X86-NOBMI2:       # %bb.0:
+; X86-NOBMI2-NEXT:    pushl %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT:    movl $63, %ecx
+; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    testb $32, %cl
+; X86-NOBMI2-NEXT:    jne .LBB20_1
+; X86-NOBMI2-NEXT:  # %bb.2:
+; X86-NOBMI2-NEXT:    movl %eax, %edx
+; X86-NOBMI2-NEXT:    jmp .LBB20_3
+; X86-NOBMI2-NEXT:  .LBB20_1:
+; X86-NOBMI2-NEXT:    movl %esi, %edx
+; X86-NOBMI2-NEXT:    movl %eax, %esi
+; X86-NOBMI2-NEXT:  .LBB20_3:
+; X86-NOBMI2-NEXT:    movl %esi, %eax
+; X86-NOBMI2-NEXT:    shldl %cl, %edx, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT:    shldl %cl, %esi, %edx
+; X86-NOBMI2-NEXT:    popl %esi
+; X86-NOBMI2-NEXT:    retl
+;
+; X86-BMI2-LABEL: rotl64_sub63:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl $63, %ecx
+; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB20_1
+; X86-BMI2-NEXT:  # %bb.2:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    jmp .LBB20_3
+; X86-BMI2-NEXT:  .LBB20_1:
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    movl %eax, %esi
+; X86-BMI2-NEXT:  .LBB20_3:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    shldl %cl, %edx, %eax
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT:    shldl %cl, %esi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI2-LABEL: rotl64_sub63:
+; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
+; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    notl %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT:    rolq %cl, %rax
+; X64-NOBMI2-NEXT:    retq
+;
+; X64-BMI2-LABEL: rotl64_sub63:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq %rsi, %rcx
+; X64-BMI2-NEXT:    movq %rdi, %rax
+; X64-BMI2-NEXT:    notl %ecx
+; X64-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT:    rolq %cl, %rax
+; X64-BMI2-NEXT:    retq
+  %adj = sub i64 63, %cnt
+  %r = call i64 @llvm.fshl.i64(i64 %val, i64 %val, i64 %adj)
+  ret i64 %r
+}
+
+; CHECK-LABEL: rotr64_sub63
+define i64 @rotr64_sub63(i64 %val, i64 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotr64_sub63:
+; X86-NOBMI2:       # %bb.0:
+; X86-NOBMI2-NEXT:    pushl %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT:    movl $63, %ecx
+; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    testb $32, %cl
+; X86-NOBMI2-NEXT:    je .LBB21_1
+; X86-NOBMI2-NEXT:  # %bb.2:
+; X86-NOBMI2-NEXT:    movl %eax, %edx
+; X86-NOBMI2-NEXT:    jmp .LBB21_3
+; X86-NOBMI2-NEXT:  .LBB21_1:
+; X86-NOBMI2-NEXT:    movl %esi, %edx
+; X86-NOBMI2-NEXT:    movl %eax, %esi
+; X86-NOBMI2-NEXT:  .LBB21_3:
+; X86-NOBMI2-NEXT:    movl %esi, %eax
+; X86-NOBMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-NOBMI2-NEXT:    popl %esi
+; X86-NOBMI2-NEXT:    retl
+;
+; X86-BMI2-LABEL: rotr64_sub63:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl $63, %ecx
+; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB21_1
+; X86-BMI2-NEXT:  # %bb.2:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    jmp .LBB21_3
+; X86-BMI2-NEXT:  .LBB21_1:
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    movl %eax, %esi
+; X86-BMI2-NEXT:  .LBB21_3:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI2-LABEL: rotr64_sub63:
+; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
+; X64-NOBMI2-NEXT:    movq %rdi, %rax
+; X64-NOBMI2-NEXT:    notl %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI2-NEXT:    rorq %cl, %rax
+; X64-NOBMI2-NEXT:    retq
+;
+; X64-BMI2-LABEL: rotr64_sub63:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq %rsi, %rcx
+; X64-BMI2-NEXT:    movq %rdi, %rax
+; X64-BMI2-NEXT:    notl %ecx
+; X64-BMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI2-NEXT:    rorq %cl, %rax
+; X64-BMI2-NEXT:    retq
+  %adj = sub i64 63, %cnt
+  %r = call i64 @llvm.fshr.i64(i64 %val, i64 %val, i64 %adj)
+  ret i64 %r
+}
+
+; 32-bit rotates with xor 31
+; CHECK-LABEL: rotl32_xor31
+define i32 @rotl32_xor31(i32 %val, i32 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotl32_xor31:
+; X86-NOBMI2:       # %bb.0:
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    notl %ecx
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT:    roll %cl, %eax
+; X86-NOBMI2-NEXT:    retl
+;
+; X86-BMI2-LABEL: rotl32_xor31:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    notl %ecx
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT:    roll %cl, %eax
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI2-LABEL: rotl32_xor31:
+; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movl %esi, %ecx
+; X64-NOBMI2-NEXT:    movl %edi, %eax
+; X64-NOBMI2-NEXT:    notl %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT:    roll %cl, %eax
+; X64-NOBMI2-NEXT:    retq
+;
+; X64-BMI2-LABEL: rotl32_xor31:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl %esi, %ecx
+; X64-BMI2-NEXT:    movl %edi, %eax
+; X64-BMI2-NEXT:    notl %ecx
+; X64-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI2-NEXT:    roll %cl, %eax
+; X64-BMI2-NEXT:    retq
+  %adj = xor i32 %cnt, 31
+  %r = call i32 @llvm.fshl.i32(i32 %val, i32 %val, i32 %adj)
+  ret i32 %r
+}
+
+; CHECK-LABEL: rotr32_xor31
+define i32 @rotr32_xor31(i32 %val, i32 %cnt) nounwind {
+; X86-NOBMI2-LABEL: rotr32_xor31:
+; X86-NOBMI2:       # %bb.0:
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT:    notl %ecx
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI2-NEXT:    rorl %cl, %eax
+; X86-NOBMI2-NEXT:    retl
+;
+; X86-BMI2-LABEL: rotr32_xor31:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    notl %ecx
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI2-NEXT:    rorl %cl, %eax
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI2-LABEL: rotr32_xor31:
+; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movl %esi, %ecx
+; X64-NOBMI2-NEXT:    movl %edi, %eax
+; X64-NOBMI2-NEXT:    notl %ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT:    rorl %cl, %eax
+; X64-NOBMI2-NEXT:    retq
+;
+; X64-BMI2-LABEL: rotr32_xor31:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl %esi, %ecx
+; X64-BMI2-NEXT:    movl %edi, %eax
+; X64-BMI2-NEXT:    notl %ecx
+; X64-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI2-NEXT:    rorl %cl, %eax
+; X64-BMI2-NEXT:    retq
+  %adj = xor i32 %cnt, 31
+  %r = call i32 @llvm.fshr.i32(i32 %val, i32 %val, i32 %adj)
+  ret i32 %r
+}
+
diff --git a/llvm/test/CodeGen/X86/shift-amount-mod.ll b/llvm/test/CodeGen/X86/shift-amount-mod.ll
index 9f7ac748c47e1..34954c65aa299 100644
--- a/llvm/test/CodeGen/X86/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/X86/shift-amount-mod.ll
@@ -1574,3 +1574,206 @@ define i16 @sh_trunc_sh(i64 %x) {
   %r = lshr i16 %t, 12
   ret i16 %r
 }
+
+;==============================================================================;
+; Funnel shift (FSHL/FSHR) count canonicalizations
+; - Test that the same shift-amount transforms (negation, add/sub by bitwidth)
+;   are applied to funnel shifts on X86.
+;==============================================================================;
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+; CHECK-LABEL: fshl32_by_negated:
+define i32 @fshl32_by_negated(i32 %x, i32 %shamt) {
+; X86-LABEL: fshl32_by_negated:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    roll %cl, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: fshl32_by_negated:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    negl %ecx
+; X64-NEXT:    # kill: def $cl killed $cl kille...
[truncated]

@AZero13
Copy link
Contributor Author

AZero13 commented Sep 3, 2025

@topperc @phoebewang

@RKSimon
Copy link
Collaborator

RKSimon commented Sep 3, 2025

Since rotates and funnel shifts explicitly define they use modulo amount as part of their ISD node we can help everyone by doing some of this as a generic DAGCombiner fold. The tryShiftAmountMod is mainly to help with lowering from ISD shift nodes which don't have modulo behavior, to the x86 shift instructions which do.

The rotate uses the bottom 5 bits, so we can apply the same modulo we have here too.
@AZero13
Copy link
Contributor Author

AZero13 commented Sep 3, 2025

Since rotates and funnel shifts explicitly define they use modulo amount as part of their ISD node we can help everyone by doing some of this as a generic DAGCombiner fold. The tryShiftAmountMod is mainly to help with lowering from ISD shift nodes which don't have modulo behavior, to the x86 shift instructions which do.

True, but that is probablt best for another PR. Also, we call this for rotr in aarch64. Also, I am not sure that replacing 31 - with neg is best on all platforms, because I do not know if other platforms don't have reverse subtract. I know arm does but arm also is better off with neg too.

@AZero13 AZero13 closed this Sep 3, 2025
@AZero13 AZero13 deleted the materialize-0 branch September 3, 2025 22:21
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants