Skip to content

Conversation

AZero13
Copy link
Contributor

@AZero13 AZero13 commented Sep 11, 2025

For X86, we want to do this for scalars up to the biggest legal type. Having it be for types bigger results in bloated code.

@llvmbot
Copy link
Member

llvmbot commented Sep 11, 2025

@llvm/pr-subscribers-backend-x86

Author: AZero13 (AZero13)

Changes

For X86, we want to do this for scalars up to the biggest legal type. Having it be for types bigger results in bloated code.


Full diff: https://github.com/llvm/llvm-project/pull/158068.diff

2 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+2-5)
  • (added) llvm/test/CodeGen/X86/and-mask-variable.ll (+356)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 08ae0d52d795e..a5dc7fae4a12a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3663,11 +3663,8 @@ bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
   if (VT.isVector())
     return false;
 
-  // 64-bit shifts on 32-bit targets produce really bad bloated code.
-  if (VT == MVT::i64 && !Subtarget.is64Bit())
-    return false;
-
-  return true;
+  unsigned MaxWidth = Subtarget.is64Bit() ? 64 : 32;
+  return VT.getScalarSizeInBits() <= MaxWidth;
 }
 
 TargetLowering::ShiftLegalizationStrategy
diff --git a/llvm/test/CodeGen/X86/and-mask-variable.ll b/llvm/test/CodeGen/X86/and-mask-variable.ll
new file mode 100644
index 0000000000000..844a413391d75
--- /dev/null
+++ b/llvm/test/CodeGen/X86/and-mask-variable.ll
@@ -0,0 +1,356 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMINOTBM,X86-BMI1
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMITBM,X86-BMI1
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMITBM,X86-BMI2
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMINOTBM,X86-BMI2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMINOTBM,X64-BMI1
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMITBM,X64-BMI1
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMITBM,X64-BMI2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMINOTBM,X64-BMI2
+
+define i32 @mask_pair(i32 %x, i32 %y) {
+; X86-NOBMI-LABEL: mask_pair:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: mask_pair:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: mask_pair:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: mask_pair:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: mask_pair:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl %edi, %eax
+; X64-BMI1-NEXT:    shrl %cl, %eax
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shll %cl, %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: mask_pair:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
+; X64-BMI2-NEXT:    retq
+  %shl = shl nsw i32 -1, %y
+  %and = and i32 %shl, %x
+  ret i32 %and
+}
+
+define i64 @mask_pair_64(i64 %x, i64 %y) {
+; X86-NOBMI-LABEL: mask_pair_64:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl $-1, %edx
+; X86-NOBMI-NEXT:    movl $-1, %eax
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    je .LBB1_2
+; X86-NOBMI-NEXT:  # %bb.1:
+; X86-NOBMI-NEXT:    movl %eax, %edx
+; X86-NOBMI-NEXT:    xorl %eax, %eax
+; X86-NOBMI-NEXT:  .LBB1_2:
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: mask_pair_64:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movl $-1, %edx
+; X86-BMI1-NEXT:    movl $-1, %eax
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB1_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB1_2:
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: mask_pair_64:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movl $-1, %edx
+; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB1_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB1_2:
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: mask_pair_64:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: mask_pair_64:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    movq %rdi, %rax
+; X64-BMI1-NEXT:    shrq %cl, %rax
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    shlq %cl, %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: mask_pair_64:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
+; X64-BMI2-NEXT:    retq
+  %shl = shl nsw i64 -1, %y
+  %and = and i64 %shl, %x
+  ret i64 %and
+}
+
+define i128 @mask_pair_128(i128 %x, i128 %y) {
+; X86-NOBMI-LABEL: mask_pair_128:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %ebx
+; X86-NOBMI-NEXT:    .cfi_def_cfa_offset 8
+; X86-NOBMI-NEXT:    pushl %edi
+; X86-NOBMI-NEXT:    .cfi_def_cfa_offset 12
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    .cfi_def_cfa_offset 16
+; X86-NOBMI-NEXT:    subl $32, %esp
+; X86-NOBMI-NEXT:    .cfi_def_cfa_offset 48
+; X86-NOBMI-NEXT:    .cfi_offset %esi, -16
+; X86-NOBMI-NEXT:    .cfi_offset %edi, -12
+; X86-NOBMI-NEXT:    .cfi_offset %ebx, -8
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $0, (%esp)
+; X86-NOBMI-NEXT:    movl %ecx, %edx
+; X86-NOBMI-NEXT:    shrb $3, %dl
+; X86-NOBMI-NEXT:    andb $12, %dl
+; X86-NOBMI-NEXT:    negb %dl
+; X86-NOBMI-NEXT:    movsbl %dl, %ebx
+; X86-NOBMI-NEXT:    movl 24(%esp,%ebx), %edx
+; X86-NOBMI-NEXT:    movl 28(%esp,%ebx), %esi
+; X86-NOBMI-NEXT:    shldl %cl, %edx, %esi
+; X86-NOBMI-NEXT:    movl 16(%esp,%ebx), %edi
+; X86-NOBMI-NEXT:    movl 20(%esp,%ebx), %ebx
+; X86-NOBMI-NEXT:    shldl %cl, %ebx, %edx
+; X86-NOBMI-NEXT:    shldl %cl, %edi, %ebx
+; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI-NEXT:    shll %cl, %edi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %ebx
+; X86-NOBMI-NEXT:    movl %esi, 12(%eax)
+; X86-NOBMI-NEXT:    movl %edx, 8(%eax)
+; X86-NOBMI-NEXT:    movl %ebx, 4(%eax)
+; X86-NOBMI-NEXT:    movl %edi, (%eax)
+; X86-NOBMI-NEXT:    addl $32, %esp
+; X86-NOBMI-NEXT:    .cfi_def_cfa_offset 16
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    .cfi_def_cfa_offset 12
+; X86-NOBMI-NEXT:    popl %edi
+; X86-NOBMI-NEXT:    .cfi_def_cfa_offset 8
+; X86-NOBMI-NEXT:    popl %ebx
+; X86-NOBMI-NEXT:    .cfi_def_cfa_offset 4
+; X86-NOBMI-NEXT:    retl $4
+;
+; X86-BMI1-LABEL: mask_pair_128:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    .cfi_def_cfa_offset 8
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    .cfi_def_cfa_offset 12
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    .cfi_def_cfa_offset 16
+; X86-BMI1-NEXT:    subl $32, %esp
+; X86-BMI1-NEXT:    .cfi_def_cfa_offset 48
+; X86-BMI1-NEXT:    .cfi_offset %esi, -16
+; X86-BMI1-NEXT:    .cfi_offset %edi, -12
+; X86-BMI1-NEXT:    .cfi_offset %ebx, -8
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    movl $0, (%esp)
+; X86-BMI1-NEXT:    movl %ecx, %edx
+; X86-BMI1-NEXT:    shrb $3, %dl
+; X86-BMI1-NEXT:    andb $12, %dl
+; X86-BMI1-NEXT:    negb %dl
+; X86-BMI1-NEXT:    movsbl %dl, %ebx
+; X86-BMI1-NEXT:    movl 24(%esp,%ebx), %edx
+; X86-BMI1-NEXT:    movl 28(%esp,%ebx), %esi
+; X86-BMI1-NEXT:    shldl %cl, %edx, %esi
+; X86-BMI1-NEXT:    movl 16(%esp,%ebx), %edi
+; X86-BMI1-NEXT:    movl 20(%esp,%ebx), %ebx
+; X86-BMI1-NEXT:    shldl %cl, %ebx, %edx
+; X86-BMI1-NEXT:    shldl %cl, %edi, %ebx
+; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1-NEXT:    shll %cl, %edi
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %ebx
+; X86-BMI1-NEXT:    movl %esi, 12(%eax)
+; X86-BMI1-NEXT:    movl %edx, 8(%eax)
+; X86-BMI1-NEXT:    movl %ebx, 4(%eax)
+; X86-BMI1-NEXT:    movl %edi, (%eax)
+; X86-BMI1-NEXT:    addl $32, %esp
+; X86-BMI1-NEXT:    .cfi_def_cfa_offset 16
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    .cfi_def_cfa_offset 12
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    .cfi_def_cfa_offset 8
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    .cfi_def_cfa_offset 4
+; X86-BMI1-NEXT:    retl $4
+;
+; X86-BMI2-LABEL: mask_pair_128:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    .cfi_def_cfa_offset 8
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    .cfi_def_cfa_offset 12
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    .cfi_def_cfa_offset 16
+; X86-BMI2-NEXT:    subl $32, %esp
+; X86-BMI2-NEXT:    .cfi_def_cfa_offset 48
+; X86-BMI2-NEXT:    .cfi_offset %esi, -16
+; X86-BMI2-NEXT:    .cfi_offset %edi, -12
+; X86-BMI2-NEXT:    .cfi_offset %ebx, -8
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $0, (%esp)
+; X86-BMI2-NEXT:    movl %ecx, %edx
+; X86-BMI2-NEXT:    shrb $3, %dl
+; X86-BMI2-NEXT:    andb $12, %dl
+; X86-BMI2-NEXT:    negb %dl
+; X86-BMI2-NEXT:    movsbl %dl, %edi
+; X86-BMI2-NEXT:    movl 24(%esp,%edi), %edx
+; X86-BMI2-NEXT:    movl 28(%esp,%edi), %esi
+; X86-BMI2-NEXT:    shldl %cl, %edx, %esi
+; X86-BMI2-NEXT:    movl 16(%esp,%edi), %ebx
+; X86-BMI2-NEXT:    movl 20(%esp,%edi), %edi
+; X86-BMI2-NEXT:    shldl %cl, %edi, %edx
+; X86-BMI2-NEXT:    shldl %cl, %ebx, %edi
+; X86-BMI2-NEXT:    shlxl %ecx, %ebx, %ecx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI2-NEXT:    movl %esi, 12(%eax)
+; X86-BMI2-NEXT:    movl %edx, 8(%eax)
+; X86-BMI2-NEXT:    movl %edi, 4(%eax)
+; X86-BMI2-NEXT:    movl %ecx, (%eax)
+; X86-BMI2-NEXT:    addl $32, %esp
+; X86-BMI2-NEXT:    .cfi_def_cfa_offset 16
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    .cfi_def_cfa_offset 12
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    .cfi_def_cfa_offset 8
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    .cfi_def_cfa_offset 4
+; X86-BMI2-NEXT:    retl $4
+;
+; X64-NOBMI-LABEL: mask_pair_128:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rdx, %rcx
+; X64-NOBMI-NEXT:    movq $-1, %rdx
+; X64-NOBMI-NEXT:    movq $-1, %r8
+; X64-NOBMI-NEXT:    shlq %cl, %r8
+; X64-NOBMI-NEXT:    xorl %eax, %eax
+; X64-NOBMI-NEXT:    testb $64, %cl
+; X64-NOBMI-NEXT:    cmovneq %r8, %rdx
+; X64-NOBMI-NEXT:    cmoveq %r8, %rax
+; X64-NOBMI-NEXT:    andq %rdi, %rax
+; X64-NOBMI-NEXT:    andq %rsi, %rdx
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: mask_pair_128:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movq %rdx, %rcx
+; X64-BMI1-NEXT:    movq $-1, %rdx
+; X64-BMI1-NEXT:    movq $-1, %r8
+; X64-BMI1-NEXT:    shlq %cl, %r8
+; X64-BMI1-NEXT:    xorl %eax, %eax
+; X64-BMI1-NEXT:    testb $64, %cl
+; X64-BMI1-NEXT:    cmovneq %r8, %rdx
+; X64-BMI1-NEXT:    cmoveq %r8, %rax
+; X64-BMI1-NEXT:    andq %rdi, %rax
+; X64-BMI1-NEXT:    andq %rsi, %rdx
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: mask_pair_128:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI2-NEXT:    shlxq %rdx, %rcx, %r8
+; X64-BMI2-NEXT:    xorl %eax, %eax
+; X64-BMI2-NEXT:    testb $64, %dl
+; X64-BMI2-NEXT:    cmovneq %r8, %rcx
+; X64-BMI2-NEXT:    cmoveq %r8, %rax
+; X64-BMI2-NEXT:    andq %rdi, %rax
+; X64-BMI2-NEXT:    andq %rsi, %rcx
+; X64-BMI2-NEXT:    movq %rcx, %rdx
+; X64-BMI2-NEXT:    retq
+  %shl = shl nsw i128 -1, %y
+  %and = and i128 %shl, %x
+  ret i128 %and
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; X64: {{.*}}
+; X64-BMINOTBM: {{.*}}
+; X64-BMITBM: {{.*}}
+; X86: {{.*}}
+; X86-BMINOTBM: {{.*}}
+; X86-BMITBM: {{.*}}

@AZero13
Copy link
Contributor Author

AZero13 commented Sep 11, 2025

@topperc

@AZero13
Copy link
Contributor Author

AZero13 commented Oct 3, 2025

@phoebewang Thoughts?

@AZero13 AZero13 force-pushed the x86-shifty branch 2 times, most recently from 47a6e2d to f16e4cf Compare October 7, 2025 14:25
@AZero13 AZero13 requested a review from phoebewang October 7, 2025 14:26
@AZero13
Copy link
Contributor Author

AZero13 commented Oct 7, 2025

@phoebewang Fixed.

… to the biggest legal type

For X86, we want to do this for scalars up to the biggest legal type.
@phoebewang phoebewang merged commit a4a9803 into llvm:main Oct 9, 2025
7 of 8 checks passed
@AZero13 AZero13 deleted the x86-shifty branch October 9, 2025 02:04
svkeerthy pushed a commit that referenced this pull request Oct 9, 2025
… to the biggest legal type (#158068)

For X86, we want to do this for scalars up to the biggest legal type.
Having it be for types bigger results in bloated code.
clingfei pushed a commit to clingfei/llvm-project that referenced this pull request Oct 10, 2025
… to the biggest legal type (llvm#158068)

For X86, we want to do this for scalars up to the biggest legal type.
Having it be for types bigger results in bloated code.
akadutta pushed a commit to akadutta/llvm-project that referenced this pull request Oct 14, 2025
… to the biggest legal type (llvm#158068)

For X86, we want to do this for scalars up to the biggest legal type.
Having it be for types bigger results in bloated code.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants