Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
fc7a509
Narrow 64 bit math to 32 bit if profitable
Shoreshen Mar 10, 2025
0fe9dbc
add tests
Shoreshen Mar 10, 2025
9df0718
fix mul, remove sub
Shoreshen Mar 10, 2025
a5084d2
fix lit.cfg.py
Shoreshen Mar 10, 2025
2e2d190
fix test
Shoreshen Mar 10, 2025
2063614
fix variable name
Shoreshen Mar 11, 2025
af47303
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Mar 11, 2025
0ac2f9e
fix comments
Shoreshen Mar 11, 2025
f7d0769
fix comments
Shoreshen Mar 11, 2025
f54c570
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Mar 12, 2025
68ef90b
move from aggressive-instcombine to codegenprepare
Shoreshen Mar 12, 2025
ad9c30d
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Mar 13, 2025
f4fb6d0
move to amdgpu-codegenprepare
Shoreshen Mar 13, 2025
c7fbcd1
fix comments
Shoreshen Mar 13, 2025
bc8d2a2
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Mar 13, 2025
29b30c9
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Mar 17, 2025
b03ea21
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Mar 18, 2025
aef04fa
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Mar 19, 2025
e40fbf2
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Mar 20, 2025
f946445
fix comments
Shoreshen Mar 20, 2025
ab4b6ce
fix lit
Shoreshen Mar 20, 2025
4159ffb
fix format
Shoreshen Mar 20, 2025
9d4736c
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Mar 21, 2025
4c53694
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Mar 24, 2025
4501fcf
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Mar 25, 2025
d44ee75
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Mar 26, 2025
9bfea1d
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Mar 27, 2025
f7357db
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Mar 28, 2025
279009c
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Mar 28, 2025
c55754c
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Mar 31, 2025
fa00e4d
Merge branch 'main' into narrow-math-for-and-operand
Shoreshen Apr 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1224,6 +1224,69 @@ static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
return false;
}

static bool isSaveToNarrow(unsigned opc, uint64_t num1, uint64_t num2) {
if (num1 > 0xffffffff || num2 > 0xffffffff) {
// if `num > 0xffffffff`, then `%and = and i64 %a, num` may or may not have
// higher 32bit set. Which cause truncate possibly lose infomation
return false;
}
switch (opc) {
// If `%and = and i64 %a, num` where num <= 0xffffffff, then `%and` must be
// positive.
// Since add and mul both increasing function on positive integer domain and
// `%ai <= numi`, then if `(num1 op num2) <= 0xffffffff` we have `%a1 + %a2 <=
// 0xffffffff`
case Instruction::Add:
return (num1 + num2) <= 0xffffffff;
case Instruction::Mul:
return (num1 * num2) <= 0xffffffff;
break;
}

return false;
}

static bool tryNarrowMathIfNoOverflow(Instruction &I,
TargetTransformInfo &TTI) {
unsigned opc = I.getOpcode();
if (opc != Instruction::Add && opc != Instruction::Mul) {
return false;
}
LLVMContext &ctx = I.getContext();
Type *I64Type = Type::getInt64Ty(ctx);
Type *I32Type = Type::getInt32Ty(ctx);

if (I.getType() != I64Type || !TTI.isTruncateFree(I64Type, I32Type)) {
return false;
}
InstructionCost CostOp64 =
TTI.getArithmeticInstrCost(opc, I64Type, TTI::TCK_RecipThroughput);
InstructionCost CostOp32 =
TTI.getArithmeticInstrCost(opc, I32Type, TTI::TCK_RecipThroughput);
InstructionCost CostZext64 = TTI.getCastInstrCost(
Instruction::ZExt, I64Type, I32Type, TTI.getCastContextHint(&I),
TTI::TCK_RecipThroughput);
if ((CostOp64 - CostOp32) <= CostZext64) {
return false;
}
uint64_t AndConst0, AndConst1;
Value *X;
if ((match(I.getOperand(0), m_And(m_Value(X), m_ConstantInt(AndConst0))) ||
match(I.getOperand(0), m_And(m_ConstantInt(AndConst0), m_Value(X)))) &&
(match(I.getOperand(1), m_And(m_Value(X), m_ConstantInt(AndConst1))) ||
match(I.getOperand(1), m_And(m_ConstantInt(AndConst1), m_Value(X)))) &&
isSaveToNarrow(opc, AndConst0, AndConst1)) {
IRBuilder<> Builder(&I);
Value *Trun0 = Builder.CreateTrunc(I.getOperand(0), I32Type);
Value *Trun1 = Builder.CreateTrunc(I.getOperand(1), I32Type);
Value *Arith32 = Builder.CreateAdd(Trun0, Trun1);
Value *Zext64 = Builder.CreateZExt(Arith32, I64Type);
I.replaceAllUsesWith(Zext64);
I.eraseFromParent();
}
return false;
}

/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
Expand Down Expand Up @@ -1256,6 +1319,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
// needs to be called at the end of this sequence, otherwise we may make
// bugs.
MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange);
MadeChange |= tryNarrowMathIfNoOverflow(I, TTI);
}
}

Expand Down
113 changes: 113 additions & 0 deletions llvm/test/Transforms/AggressiveInstCombine/narrow_math_for_and.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=aggressive-instcombine < %s | FileCheck %s


define i64 @narrow_add(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: define i64 @narrow_add(
; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483647
; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483647
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ZEXT0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[ZEXT1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: ret i64 [[TMP4]]
;
%zext0 = and i64 %a, 2147483647
%zext1 = and i64 %b, 2147483647
%add = add i64 %zext0, %zext1
ret i64 %add
}

define i64 @narrow_add_1(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: define i64 @narrow_add_1(
; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483647
; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483648
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ZEXT0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[ZEXT1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: ret i64 [[TMP4]]
;
%zext0 = and i64 %a, 2147483647
%zext1 = and i64 %b, 2147483648
%add = add i64 %zext0, %zext1
ret i64 %add
}

define i64 @narrow_mul(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: define i64 @narrow_mul(
; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483647
; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 0
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ZEXT0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[ZEXT1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: ret i64 [[TMP4]]
;
%zext0 = and i64 %a, 2147483647
%zext1 = and i64 %b, 0
%mul = mul i64 %zext0, %zext1
ret i64 %mul
}

define i64 @narrow_mul_1(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: define i64 @narrow_mul_1(
; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483647
; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ZEXT0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[ZEXT1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: ret i64 [[TMP4]]
;
%zext0 = and i64 %a, 2147483647
%zext1 = and i64 %b, 2
%mul = mul i64 %zext0, %zext1
ret i64 %mul
}

define i64 @no_narrow_add(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: define i64 @no_narrow_add(
; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483648
; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2147483648
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ZEXT0]], [[ZEXT1]]
; CHECK-NEXT: ret i64 [[ADD]]
;
%zext0 = and i64 %a, 2147483648
%zext1 = and i64 %b, 2147483648
%add = add i64 %zext0, %zext1
ret i64 %add
}

define i64 @no_narrow_add_1(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: define i64 @no_narrow_add_1(
; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 4294967295
; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 1
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ZEXT0]], [[ZEXT1]]
; CHECK-NEXT: ret i64 [[ADD]]
;
%zext0 = and i64 %a, 4294967295
%zext1 = and i64 %b, 1
%add = add i64 %zext0, %zext1
ret i64 %add
}

define i64 @no_narrow_mul(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: define i64 @no_narrow_mul(
; CHECK-SAME: i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ZEXT0:%.*]] = and i64 [[A]], 2147483648
; CHECK-NEXT: [[ZEXT1:%.*]] = and i64 [[B]], 2
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[ZEXT0]], [[ZEXT1]]
; CHECK-NEXT: ret i64 [[MUL]]
;
%zext0 = and i64 %a, 2147483648
%zext1 = and i64 %b, 2
%mul = mul i64 %zext0, %zext1
ret i64 %mul
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test vector cases

Loading