Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58919,6 +58919,12 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
if (IsFoldableAtomicRMW(N0, Op) ||
(Commute && IsFoldableAtomicRMW(N1, Op)))
return false;
// When ZU is enabled, we prefer to not promote for MUL by a constant,
// since a 16b imulzu will not incur partial-write stalls, and may be
// able to fold away a zero-extend of the 16b result.
if (Subtarget.hasZU() && Op.getOpcode() == ISD::MUL &&
(isa<ConstantSDNode>(N0) || isa<ConstantSDNode>(N1)))
return false;
}
}

Expand Down
34 changes: 30 additions & 4 deletions llvm/lib/Target/X86/X86InstrCompiler.td
Original file line number Diff line number Diff line change
Expand Up @@ -2184,17 +2184,43 @@ multiclass EFLAGSDefiningPats<string suffix, Predicate p> {
defm : EFLAGSDefiningPats<"", NoNDD>;
defm : EFLAGSDefiningPats<"_ND", HasNDD>;

let Predicates = [HasZU] in {
// zext (mul reg/mem, imm) -> imulzu
def : Pat<(i32 (zext (i16 (mul GR16:$src1, imm:$src2)))),
(SUBREG_TO_REG (i32 0), (IMULZU16rri GR16:$src1, imm:$src2), sub_16bit)>;
def : Pat<(i32 (zext (i16 (mul (loadi16 addr:$src1), imm:$src2)))),
(SUBREG_TO_REG (i32 0), (IMULZU16rmi addr:$src1, imm:$src2), sub_16bit)>;
def : Pat<(i64 (zext (i16 (mul GR16:$src1, imm:$src2)))),
(SUBREG_TO_REG (i64 0), (IMULZU16rri GR16:$src1, imm:$src2), sub_16bit)>;
def : Pat<(i64 (zext (i16 (mul (loadi16 addr:$src1), imm:$src2)))),
(SUBREG_TO_REG (i64 0), (IMULZU16rmi addr:$src1, imm:$src2), sub_16bit)>;

// (mul (reg/mem), imm) -> imulzu
// Note this pattern doesn't explicitly require the zero-upper behaviour of imulzu,
// but instead avoids the zero-extend of the reg/mem operand that would be
// required if the multiply were promoted to 32b to avoid partial-write stalls.
// The imulzu here simply doesn't incur any partial-write stalls.
def : Pat<(mul GR16:$src1, imm:$src2),
(IMULZU16rri GR16:$src1, imm:$src2)>;
def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
(IMULZU16rmi addr:$src1, imm:$src2)>;
}

// mul reg, imm
def : Pat<(mul GR16:$src1, imm:$src2),
(IMUL16rri GR16:$src1, imm:$src2)>;
let Predicates = [NoZU] in {
def : Pat<(mul GR16:$src1, imm:$src2),
(IMUL16rri GR16:$src1, imm:$src2)>;
}
def : Pat<(mul GR32:$src1, imm:$src2),
(IMUL32rri GR32:$src1, imm:$src2)>;
def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
(IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;

// reg = mul mem, imm
def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
(IMUL16rmi addr:$src1, imm:$src2)>;
let Predicates = [NoZU] in {
def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
(IMUL16rmi addr:$src1, imm:$src2)>;
}
def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
(IMUL32rmi addr:$src1, imm:$src2)>;
def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86InstrPredicates.td
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ def NoEGPR : Predicate<"!Subtarget->hasEGPR()">;
// entries, so that the NDD variant can be selected first to benefit RA.
def HasNDD : Predicate<"Subtarget->hasNDD()">;
def NoNDD : Predicate<"!Subtarget->hasNDD()">;
def HasZU : Predicate<"Subtarget->hasZU()">;
def NoZU : Predicate<"!Subtarget->hasZU()">;
def HasCF : Predicate<"Subtarget->hasCF()">;
def HasCMOV : Predicate<"Subtarget->canUseCMOV()">;
def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">;
Expand Down
238 changes: 238 additions & 0 deletions llvm/test/CodeGen/X86/apx/imulzu.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mattr=+zu | FileCheck %s --check-prefix=ZU
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s --check-prefix=NOZU

; Test generation of 16b imulzu when -mattr=+zu is specified.
; The mulzu_* tests check for basic generation, which will fold away a zero-extend of the
; result if present.
; The following tests are modifications of selected test/CodeGen/X86/imul.ll tests with
; 16b multiplies, to check that common strength reductions in ISel are still performed
; when -mattr=+zu is in effect.
;
; FIXME: several cases from imul.ll covering DAG combines, in particular those using LEA,
; are not ported as X86's IsDesirableToPromoteOp has no way to accurately identify when
; promotion will permit a better sequence than an unpromoted imulzu.
; These cases should be added when they are implemented.

define i32 @mulzu_16_32(i16 %A) {
; ZU-LABEL: mulzu_16_32:
; ZU: # %bb.0:
; ZU-NEXT: imulzuw $1234, %di, %ax # imm = 0x4D2
; ZU-NEXT: retq
;
; NOZU-LABEL: mulzu_16_32:
; NOZU: # %bb.0:
; NOZU-NEXT: imull $1234, %edi, %eax # imm = 0x4D2
; NOZU-NEXT: movzwl %ax, %eax
; NOZU-NEXT: retq
%mul = mul i16 %A, 1234
%r = zext i16 %mul to i32
ret i32 %r
}

define i64 @mulzu_16_64(i16 %A) {
; ZU-LABEL: mulzu_16_64:
; ZU: # %bb.0:
; ZU-NEXT: imulzuw $1234, %di, %ax # imm = 0x4D2
; ZU-NEXT: retq
;
; NOZU-LABEL: mulzu_16_64:
; NOZU: # %bb.0:
; NOZU-NEXT: imull $1234, %edi, %eax # imm = 0x4D2
; NOZU-NEXT: movzwl %ax, %eax
; NOZU-NEXT: retq
%mul = mul i16 %A, 1234
%r = zext i16 %mul to i64
ret i64 %r
}

define i32 @mulzu_16_32_mem(ptr %P) {
; ZU-LABEL: mulzu_16_32_mem:
; ZU: # %bb.0:
; ZU-NEXT: imulzuw $1234, (%rdi), %ax # imm = 0x4D2
; ZU-NEXT: retq
;
; NOZU-LABEL: mulzu_16_32_mem:
; NOZU: # %bb.0:
; NOZU-NEXT: movzwl (%rdi), %eax
; NOZU-NEXT: imull $1234, %eax, %eax # imm = 0x4D2
; NOZU-NEXT: movzwl %ax, %eax
; NOZU-NEXT: retq
%gep = getelementptr i16, ptr %P, i64 0
%A = load i16, ptr %gep
%mul = mul i16 %A, 1234
%r = zext i16 %mul to i32
ret i32 %r
}

define i64 @mulzu_16_64_mem(ptr %P) {
; ZU-LABEL: mulzu_16_64_mem:
; ZU: # %bb.0:
; ZU-NEXT: imulzuw $1234, (%rdi), %ax # imm = 0x4D2
; ZU-NEXT: retq
;
; NOZU-LABEL: mulzu_16_64_mem:
; NOZU: # %bb.0:
; NOZU-NEXT: movzwl (%rdi), %eax
; NOZU-NEXT: imull $1234, %eax, %eax # imm = 0x4D2
; NOZU-NEXT: movzwl %ax, %eax
; NOZU-NEXT: retq
%gep = getelementptr i16, ptr %P, i64 0
%A = load i16, ptr %gep
%mul = mul i16 %A, 1234
%r = zext i16 %mul to i64
ret i64 %r
}

define void @mulzu_16_store(i16 %A, ptr %R) {
; ZU-LABEL: mulzu_16_store:
; ZU: # %bb.0:
; ZU-NEXT: imulzuw $1234, %di, %ax # imm = 0x4D2
; ZU-NEXT: movw %ax, (%rsi)
; ZU-NEXT: retq
;
; NOZU-LABEL: mulzu_16_store:
; NOZU: # %bb.0:
; NOZU-NEXT: imull $1234, %edi, %eax # imm = 0x4D2
; NOZU-NEXT: movw %ax, (%rsi)
; NOZU-NEXT: retq
%gep = getelementptr i16, ptr %R, i64 0
%mul = mul i16 %A, 1234
store i16 %mul, ptr %gep
ret void
}

define void @mulzu_16_store_mem(ptr %P, ptr %R) {
; ZU-LABEL: mulzu_16_store_mem:
; ZU: # %bb.0:
; ZU-NEXT: imulzuw $1234, (%rdi), %ax # imm = 0x4D2
; ZU-NEXT: movw %ax, (%rsi)
; ZU-NEXT: retq
;
; NOZU-LABEL: mulzu_16_store_mem:
; NOZU: # %bb.0:
; NOZU-NEXT: movzwl (%rdi), %eax
; NOZU-NEXT: imull $1234, %eax, %eax # imm = 0x4D2
; NOZU-NEXT: movw %ax, (%rsi)
; NOZU-NEXT: retq
%gep = getelementptr i16, ptr %P, i64 0
%gep1 = getelementptr i16, ptr %R, i64 0
%A = load i16, ptr %gep
%mul = mul i16 %A, 1234
store i16 %mul, ptr %gep1
ret void
}

; Tests ported from test/CodeGen/X86/imul.ll follow from this point.

define i16 @mul4_16(i16 %A) {
;
; ZU-LABEL: mul4_16:
; ZU: # %bb.0:
; ZU-NEXT: # kill: def $edi killed $edi def $rdi
; ZU-NEXT: leal (,%rdi,4), %eax
; ZU-NEXT: # kill: def $ax killed $ax killed $eax
; ZU-NEXT: retq
;
; NOZU-LABEL: mul4_16:
; NOZU: # %bb.0:
; NOZU-NEXT: # kill: def $edi killed $edi def $rdi
; NOZU-NEXT: leal (,%rdi,4), %eax
; NOZU-NEXT: # kill: def $ax killed $ax killed $eax
; NOZU-NEXT: retq
%mul = mul i16 %A, 4
ret i16 %mul
}

define i16 @mul4096_16(i16 %A) {
;
; ZU-LABEL: mul4096_16:
; ZU: # %bb.0:
; ZU-NEXT: movl %edi, %eax
; ZU-NEXT: shll $12, %eax
; ZU-NEXT: # kill: def $ax killed $ax killed $eax
; ZU-NEXT: retq
;
; NOZU-LABEL: mul4096_16:
; NOZU: # %bb.0:
; NOZU-NEXT: movl %edi, %eax
; NOZU-NEXT: shll $12, %eax
; NOZU-NEXT: # kill: def $ax killed $ax killed $eax
; NOZU-NEXT: retq
%mul = mul i16 %A, 4096
ret i16 %mul
}

define i16 @mulmin4096_16(i16 %A) {
;
; ZU-LABEL: mulmin4096_16:
; ZU: # %bb.0:
; ZU-NEXT: movl %edi, %eax
; ZU-NEXT: shll $12, %eax
; ZU-NEXT: negl %eax
; ZU-NEXT: # kill: def $ax killed $ax killed $eax
; ZU-NEXT: retq
;
; NOZU-LABEL: mulmin4096_16:
; NOZU: # %bb.0:
; NOZU-NEXT: movl %edi, %eax
; NOZU-NEXT: shll $12, %eax
; NOZU-NEXT: negl %eax
; NOZU-NEXT: # kill: def $ax killed $ax killed $eax
; NOZU-NEXT: retq
%mul = mul i16 %A, -4096
ret i16 %mul
}

define i16 @mul4_16_minsize(i16 %A) minsize {
;
; ZU-LABEL: mul4_16_minsize:
; ZU: # %bb.0:
; ZU-NEXT: # kill: def $edi killed $edi def $rdi
; ZU-NEXT: leal (,%rdi,4), %eax
; ZU-NEXT: # kill: def $ax killed $ax killed $eax
; ZU-NEXT: retq
;
; NOZU-LABEL: mul4_16_minsize:
; NOZU: # %bb.0:
; NOZU-NEXT: # kill: def $edi killed $edi def $rdi
; NOZU-NEXT: leal (,%rdi,4), %eax
; NOZU-NEXT: # kill: def $ax killed $ax killed $eax
; NOZU-NEXT: retq
%mul = mul i16 %A, 4
ret i16 %mul
}

define i16 @mul0_16(i16 %A) {
;
; ZU-LABEL: mul0_16:
; ZU: # %bb.0:
; ZU-NEXT: xorl %eax, %eax
; ZU-NEXT: retq
;
; NOZU-LABEL: mul0_16:
; NOZU: # %bb.0:
; NOZU-NEXT: xorl %eax, %eax
; NOZU-NEXT: retq
%mul = mul i16 %A, 0
ret i16 %mul
}

define i16 @mul4294967295_16(i16 %A) {
;
; ZU-LABEL: mul4294967295_16:
; ZU: # %bb.0:
; ZU-NEXT: movl %edi, %eax
; ZU-NEXT: negl %eax
; ZU-NEXT: # kill: def $ax killed $ax killed $eax
; ZU-NEXT: retq
;
; NOZU-LABEL: mul4294967295_16:
; NOZU: # %bb.0:
; NOZU-NEXT: movl %edi, %eax
; NOZU-NEXT: negl %eax
; NOZU-NEXT: # kill: def $ax killed $ax killed $eax
; NOZU-NEXT: retq
%mul = mul i16 %A, 4294967295
ret i16 %mul
}
Loading