Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58881,6 +58881,15 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
return Ld->getBasePtr() == St->getBasePtr();
};

auto IsFoldableZext = [](SDValue Op) {
if (!Op.hasOneUse())
return false;
SDNode *User = *Op->use_begin();
EVT VT = User->getValueType(0);
return (User->getOpcode() == ISD::ZERO_EXTEND &&
(VT == MVT::i32 || VT == MVT::i64));
};

bool Commute = false;
switch (Op.getOpcode()) {
default: return false;
Expand All @@ -58897,8 +58906,15 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
return false;
break;
}
case ISD::ADD:
case ISD::MUL:
// When ZU is enabled, we prefer to not promote for MUL by a constant
// when there is an opportunity to fold a zext with imulzu.
if (Subtarget.hasZU() && IsFoldableZext(Op) &&
(isa<ConstantSDNode>(Op.getOperand(0)) ||
isa<ConstantSDNode>(Op.getOperand(1))))
return false;
[[fallthrough]];
case ISD::ADD:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/X86/X86InstrCompiler.td
Original file line number Diff line number Diff line change
Expand Up @@ -2184,6 +2184,18 @@ multiclass EFLAGSDefiningPats<string suffix, Predicate p> {
defm : EFLAGSDefiningPats<"", NoNDD>;
defm : EFLAGSDefiningPats<"_ND", HasNDD>;

let Predicates = [HasZU] in {
// zext (mul reg/mem, imm) -> imulzu
def : Pat<(i32 (zext (i16 (mul GR16:$src1, imm:$src2)))),
(SUBREG_TO_REG (i32 0), (IMULZU16rri GR16:$src1, imm:$src2), sub_16bit)>;
def : Pat<(i32 (zext (i16 (mul (loadi16 addr:$src1), imm:$src2)))),
(SUBREG_TO_REG (i32 0), (IMULZU16rmi addr:$src1, imm:$src2), sub_16bit)>;
def : Pat<(i64 (zext (i16 (mul GR16:$src1, imm:$src2)))),
(SUBREG_TO_REG (i64 0), (IMULZU16rri GR16:$src1, imm:$src2), sub_16bit)>;
def : Pat<(i64 (zext (i16 (mul (loadi16 addr:$src1), imm:$src2)))),
(SUBREG_TO_REG (i64 0), (IMULZU16rmi addr:$src1, imm:$src2), sub_16bit)>;
}

// mul reg, imm
def : Pat<(mul GR16:$src1, imm:$src2),
(IMUL16rri GR16:$src1, imm:$src2)>;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86InstrPredicates.td
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def NoEGPR : Predicate<"!Subtarget->hasEGPR()">;
// entries, so that the NDD variant can be selected first to benefit RA.
def HasNDD : Predicate<"Subtarget->hasNDD()">;
def NoNDD : Predicate<"!Subtarget->hasNDD()">;
def HasZU : Predicate<"Subtarget->hasZU()">;
def HasCF : Predicate<"Subtarget->hasCF()">;
def HasCMOV : Predicate<"Subtarget->canUseCMOV()">;
def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">;
Expand Down
226 changes: 226 additions & 0 deletions llvm/test/CodeGen/X86/apx/imulzu.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mattr=+zu | FileCheck %s --check-prefixes=CHECK,ZU
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOZU

; Test generation of 16b imulzu when -mattr=+zu is specified.
; The mulzu_* tests check for basic generation, which is limited to cases where a
; zero-extend of the result can be folded into imulzu.
; The remaining tests are modifications of selected test/CodeGen/X86/imul.ll tests with
; 16b multiplies, to check that common strength reductions in ISel are still performed
; when -mattr=+zu is in effect.
;
; FIXME: several cases from imul.ll covering DAG combines, in particular those using LEA,
; are not ported as X86's IsDesirableToPromoteOp has no way to accurately identify when
; promotion will permit a better sequence than an unpromoted imulzu.
; These cases should be added when they are implemented.

define i32 @mulzu_16_32(i16 %A) {
; ZU-LABEL: mulzu_16_32:
; ZU: # %bb.0:
; ZU-NEXT: imulzuw $1234, %di, %ax # imm = 0x4D2
; ZU-NEXT: retq
;
; NOZU-LABEL: mulzu_16_32:
; NOZU: # %bb.0:
; NOZU-NEXT: imull $1234, %edi, %eax # imm = 0x4D2
; NOZU-NEXT: movzwl %ax, %eax
; NOZU-NEXT: retq
%mul = mul i16 %A, 1234
%r = zext i16 %mul to i32
ret i32 %r
}

define i64 @mulzu_16_64(i16 %A) {
; ZU-LABEL: mulzu_16_64:
; ZU: # %bb.0:
; ZU-NEXT: imulzuw $1234, %di, %ax # imm = 0x4D2
; ZU-NEXT: retq
;
; NOZU-LABEL: mulzu_16_64:
; NOZU: # %bb.0:
; NOZU-NEXT: imull $1234, %edi, %eax # imm = 0x4D2
; NOZU-NEXT: movzwl %ax, %eax
; NOZU-NEXT: retq
%mul = mul i16 %A, 1234
%r = zext i16 %mul to i64
ret i64 %r
}

define i32 @mulzu_16_32_mem(ptr %P) {
; ZU-LABEL: mulzu_16_32_mem:
; ZU: # %bb.0:
; ZU-NEXT: imulzuw $1234, (%rdi), %ax # imm = 0x4D2
; ZU-NEXT: retq
;
; NOZU-LABEL: mulzu_16_32_mem:
; NOZU: # %bb.0:
; NOZU-NEXT: movzwl (%rdi), %eax
; NOZU-NEXT: imull $1234, %eax, %eax # imm = 0x4D2
; NOZU-NEXT: movzwl %ax, %eax
; NOZU-NEXT: retq
%gep = getelementptr i16, ptr %P, i64 0
%A = load i16, ptr %gep
%mul = mul i16 %A, 1234
%r = zext i16 %mul to i32
ret i32 %r
}

define i64 @mulzu_16_64_mem(ptr %P) {
; ZU-LABEL: mulzu_16_64_mem:
; ZU: # %bb.0:
; ZU-NEXT: imulzuw $1234, (%rdi), %ax # imm = 0x4D2
; ZU-NEXT: retq
;
; NOZU-LABEL: mulzu_16_64_mem:
; NOZU: # %bb.0:
; NOZU-NEXT: movzwl (%rdi), %eax
; NOZU-NEXT: imull $1234, %eax, %eax # imm = 0x4D2
; NOZU-NEXT: movzwl %ax, %eax
; NOZU-NEXT: retq
%gep = getelementptr i16, ptr %P, i64 0
%A = load i16, ptr %gep
%mul = mul i16 %A, 1234
%r = zext i16 %mul to i64
ret i64 %r
}

; The following mulzu cases check that imulzu is not
; generated in the absence of a single zext user. The ZU/NOZU
; cases should match.

define void @mulzu_16_store(i16 %A, ptr %R) {
; CHECK-LABEL: mulzu_16_store:
; CHECK: # %bb.0:
; CHECK-NEXT: imull $1234, %edi, %eax # imm = 0x4D2
; CHECK-NEXT: movw %ax, (%rsi)
; CHECK-NEXT: retq
%gep = getelementptr i16, ptr %R, i64 0
%mul = mul i16 %A, 1234
store i16 %mul, ptr %gep
ret void
}

define i32 @mulzu_16_store_32(i16 %A, ptr %R) {
; CHECK-LABEL: mulzu_16_store_32:
; CHECK: # %bb.0:
; CHECK-NEXT: imull $1234, %edi, %eax # imm = 0x4D2
; CHECK-NEXT: movw %ax, (%rsi)
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: retq
%gep = getelementptr i16, ptr %R, i64 0
%mul = mul i16 %A, 1234
store i16 %mul, ptr %gep
%r = zext i16 %mul to i32
ret i32 %r
}

define i64 @mulzu_16_store_64(i16 %A, ptr %R) {
; CHECK-LABEL: mulzu_16_store_64:
; CHECK: # %bb.0:
; CHECK-NEXT: imull $1234, %edi, %eax # imm = 0x4D2
; CHECK-NEXT: movw %ax, (%rsi)
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: retq
%gep = getelementptr i16, ptr %R, i64 0
%mul = mul i16 %A, 1234
store i16 %mul, ptr %gep
%r = zext i16 %mul to i64
ret i64 %r
}

define i32 @mulzu_sext_16_32(i16 %A) {
; CHECK-LABEL: mulzu_sext_16_32:
; CHECK: # %bb.0:
; CHECK-NEXT: imull $1234, %edi, %eax # imm = 0x4D2
; CHECK-NEXT: cwtl
; CHECK-NEXT: retq
%mul = mul i16 %A, 1234
%r = sext i16 %mul to i32
ret i32 %r
}

define i64 @mulzu_sext_16_64(i16 %A) {
; CHECK-LABEL: mulzu_sext_16_64:
; CHECK: # %bb.0:
; CHECK-NEXT: imull $1234, %edi, %eax # imm = 0x4D2
; CHECK-NEXT: movswq %ax, %rax
; CHECK-NEXT: retq
%mul = mul i16 %A, 1234
%r = sext i16 %mul to i64
ret i64 %r
}

; Tests ported from test/CodeGen/X86/imul.ll follow from this point.
; The generated code, which strength-reduces multiplies by certain
; constants, should be unaffected by enabling zu.

define i16 @mul4_16(i16 %A) {
;
; CHECK-LABEL: mul4_16:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: leal (,%rdi,4), %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%mul = mul i16 %A, 4
ret i16 %mul
}

define i16 @mul4096_16(i16 %A) {
;
; CHECK-LABEL: mul4096_16:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: shll $12, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%mul = mul i16 %A, 4096
ret i16 %mul
}

define i16 @mulmin4096_16(i16 %A) {
;
; CHECK-LABEL: mulmin4096_16:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: shll $12, %eax
; CHECK-NEXT: negl %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%mul = mul i16 %A, -4096
ret i16 %mul
}

define i16 @mul4_16_minsize(i16 %A) minsize {
;
; CHECK-LABEL: mul4_16_minsize:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: leal (,%rdi,4), %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%mul = mul i16 %A, 4
ret i16 %mul
}

define i16 @mul0_16(i16 %A) {
;
; CHECK-LABEL: mul0_16:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retq
%mul = mul i16 %A, 0
ret i16 %mul
}

define i16 @mul4294967295_16(i16 %A) {
;
; CHECK-LABEL: mul4294967295_16:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: negl %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%mul = mul i16 %A, 4294967295
ret i16 %mul
}
Loading