Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions llvm/lib/Transforms/Scalar/LoopFuse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ STATISTIC(OnlySecondCandidateIsGuarded,
"The second candidate is guarded while the first one is not");
STATISTIC(NumHoistedInsts, "Number of hoisted preheader instructions.");
STATISTIC(NumSunkInsts, "Number of hoisted preheader instructions.");
STATISTIC(NumDA, "DA checks passed");

enum FusionDependenceAnalysisChoice {
FUSION_DEPENDENCE_ANALYSIS_SCEV,
Expand Down Expand Up @@ -1371,6 +1372,47 @@ struct LoopFuser {
<< "\n");
}
#endif
unsigned Levels = DepResult->getLevels();
unsigned SameSDLevels = DepResult->getSameSDLevels();
unsigned CurLoopLevel = FC0.L->getLoopDepth();

// Check if DA is missing info regarding the current loop level
if (CurLoopLevel > Levels + SameSDLevels)
return false;

// Iterating over the outer levels.
for (unsigned Level = 1; Level <= std::min(CurLoopLevel - 1, Levels);
++Level) {
unsigned Direction = DepResult->getDirection(Level, false);

// Check if the direction vector does not include equality. If an outer
// loop has a non-equal direction, outer indicies are different and it
// is safe to fuse.
if (!(Direction & Dependence::DVEntry::EQ)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be more readable if it is changed to the following?
if (Direction != Dependence::DVEntry::EQ)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We want to consider directions that include equality not only the equality itself. For example if the direction is >=, it needs to return false as well.

LLVM_DEBUG(dbgs() << "Safe to fuse due to non-equal acceses in the "
"outer loops\n");
NumDA++;
return true;
}
}

assert(CurLoopLevel > Levels && "Fusion candidates are not separated");

unsigned CurDir = DepResult->getDirection(CurLoopLevel, true);

// Check if the direction vector does not include greater direction. In
// that case, the dependency is not a backward loop-carried and is legal
// to fuse. For example here we have a forward dependency
// for (int i = 0; i < n; i++)
// A[i] = ...;
// for (int i = 0; i < n; i++)
// ... = A[i-1];
if (!(CurDir & Dependence::DVEntry::GT)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be more readable if it is changed to the following?
if (CurDir != Dependence::DVEntry::GT)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to above.

LLVM_DEBUG(dbgs() << "Safe to fuse with no backward loop-carried "
"dependency\n");
NumDA++;
return true;
}

if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor())
LLVM_DEBUG(
Expand Down
182 changes: 182 additions & 0 deletions llvm/test/Transforms/LoopFusion/da_separate_loops.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
; REQUIRES: asserts

; RUN: opt -passes=loop-fusion -da-disable-delinearization-checks -disable-output -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
; STAT: 2 loop-fusion - DA checks passed

; The two inner loops have no dependency and are allowed to be fused as in the
; outer loops, different levels are accessed to.

; C Code
;
;; for (long int i = 0; i < n; i++) {
;; for (long int j = 0; j < n; j++) {
;; for (long int k = 0; k < n; k++)
;; A[i][j][k] = i;
;; for (long int k = 0; k < n; k++)
;; temp = A[i + 3][j + 2][k + 1];
;; }
;; }

define void @nonequal_outer_access(i64 %n, ptr %A) nounwind uwtable ssp {
entry:
%cmp10 = icmp sgt i64 %n, 0
br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26

for.cond1.preheader.preheader: ; preds = %entry
br label %for.cond1.preheader

for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
%i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
%cmp26 = icmp sgt i64 %n, 0
br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24

for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
br label %for.cond4.preheader

for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
%j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
%cmp51 = icmp sgt i64 %n, 0
br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit

for.body6.preheader: ; preds = %for.cond4.preheader
br label %for.body6

for.body6: ; preds = %for.body6.preheader, %for.body6
%k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
%arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02
store i64 %i.011, ptr %arrayidx8, align 8
%inc = add nsw i64 %k.02, 1
%exitcond13 = icmp ne i64 %inc, %n
br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit

for.cond10.loopexit.loopexit: ; preds = %for.body6
br label %for.cond10.loopexit

for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
%cmp113 = icmp sgt i64 %n, 0
br i1 %cmp113, label %for.body12.preheader, label %for.inc21

for.body12.preheader: ; preds = %for.cond10.loopexit
br label %for.body12

for.body12: ; preds = %for.body12.preheader, %for.body12
%k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
%add = add nsw i64 %k9.05, 1
%add13 = add nsw i64 %j.07, 2
%add14 = add nsw i64 %i.011, 3
%arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %add14, i64 %add13, i64 %add
%0 = load i64, ptr %arrayidx17, align 8
%inc19 = add nsw i64 %k9.05, 1
%exitcond = icmp ne i64 %inc19, %n
br i1 %exitcond, label %for.body12, label %for.inc21.loopexit

for.inc21.loopexit: ; preds = %for.body12
br label %for.inc21

for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
%inc22 = add nsw i64 %j.07, 1
%exitcond14 = icmp ne i64 %inc22, %n
br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit

for.inc24.loopexit: ; preds = %for.inc21
br label %for.inc24

for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
%inc25 = add nsw i64 %i.011, 1
%exitcond15 = icmp ne i64 %inc25, %n
br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit

for.end26.loopexit: ; preds = %for.inc24
br label %for.end26

for.end26: ; preds = %for.end26.loopexit, %entry
ret void
}

; The two inner loops have a forward loop-carried dependency, allowing them
; to be fused.

; C Code
;
;; for (long int i = 0; i < n; i++) {
;; for (long int j = 0; j < n; j++) {
;; for (long int k = 0; k < n; k++)
;; A[i][j][k] = i;
;; for (long int k = 0; k < n; k++)
;; temp = A[i][j][k - 1];
;; }
;; }

define void @forward_dep(i64 %n, ptr %A) nounwind uwtable ssp {
entry:
%cmp10 = icmp sgt i64 %n, 0
br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26

for.cond1.preheader.preheader: ; preds = %entry
br label %for.cond1.preheader

for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
%i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
%cmp26 = icmp sgt i64 %n, 0
br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24

for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
br label %for.cond4.preheader

for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
%j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
%cmp51 = icmp sgt i64 %n, 0
br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit

for.body6.preheader: ; preds = %for.cond4.preheader
br label %for.body6

for.body6: ; preds = %for.body6.preheader, %for.body6
%k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
%arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02
store i64 %i.011, ptr %arrayidx8, align 8
%inc = add nsw i64 %k.02, 1
%exitcond13 = icmp ne i64 %inc, %n
br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit

for.cond10.loopexit.loopexit: ; preds = %for.body6
br label %for.cond10.loopexit

for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
%cmp113 = icmp sgt i64 %n, 0
br i1 %cmp113, label %for.body12.preheader, label %for.inc21

for.body12.preheader: ; preds = %for.cond10.loopexit
br label %for.body12

for.body12: ; preds = %for.body12.preheader, %for.body12
%k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
%add = add nsw i64 %k9.05, -1
%arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %add
%0 = load i64, ptr %arrayidx17, align 8
%inc19 = add nsw i64 %k9.05, 1
%exitcond = icmp ne i64 %inc19, %n
br i1 %exitcond, label %for.body12, label %for.inc21.loopexit

for.inc21.loopexit: ; preds = %for.body12
br label %for.inc21

for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
%inc22 = add nsw i64 %j.07, 1
%exitcond14 = icmp ne i64 %inc22, %n
br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit

for.inc24.loopexit: ; preds = %for.inc21
br label %for.inc24

for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
%inc25 = add nsw i64 %i.011, 1
%exitcond15 = icmp ne i64 %inc25, %n
br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit

for.end26.loopexit: ; preds = %for.inc24
br label %for.end26

for.end26: ; preds = %for.end26.loopexit, %entry
ret void
}
45 changes: 29 additions & 16 deletions llvm/test/Transforms/LoopFusion/simple.ll
Original file line number Diff line number Diff line change
Expand Up @@ -298,42 +298,55 @@ bb23: ; preds = %bb17, %bb
ret void
}

; The following IR is a representation of the provided code below. With PR
; #146383, loop fusion is able to utilize the information from dependence
; analysis, enabling the loops in the function to be fused.
;
; void forward_dep(int *arg) {
; for (int i = 0; i < 100; i++) {
; int tmp = i - 3;
; int val = tmp * (i + 3) % i;
; arg[i] = val;
; }
;
; for (int j = 0; j < 100; j++) {
; int val = arg[j - 3];
; arg[j] = val * 3;
; }
; }
;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function was not fused previously, and now with your patch it is fused. Can you add some comment here to describe it like why it can be fused now?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment added.

define void @forward_dep(ptr noalias %arg) {
; CHECK-LABEL: @forward_dep(
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[BB7:%.*]]
; CHECK-NEXT: [[BB:.*]]:
; CHECK-NEXT: br label %[[BB7:.*]]
; CHECK: bb7:
; CHECK-NEXT: [[DOT013:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB14:%.*]] ]
; CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ 0, [[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[BB14]] ]
; CHECK-NEXT: [[DOT013:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP15:%.*]], %[[BB25:.*]] ]
; CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ 0, %[[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], %[[BB25]] ]
; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[BB25]] ], [ 0, %[[BB]] ]
; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[DOT013]], -3
; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[INDVARS_IV22]], 3
; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32
; CHECK-NEXT: [[TMP10:%.*]] = mul nsw i32 [[TMP]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[INDVARS_IV22]] to i32
; CHECK-NEXT: [[TMP12:%.*]] = srem i32 [[TMP10]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[INDVARS_IV22]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[INDVARS_IV22]]
; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP13]], align 4
; CHECK-NEXT: br label [[BB14]]
; CHECK-NEXT: br label %[[BB14:.*]]
; CHECK: bb14:
; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1
; CHECK-NEXT: [[TMP15]] = add nuw nsw i32 [[DOT013]], 1
; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100
; CHECK-NEXT: br i1 [[EXITCOND4]], label [[BB7]], label [[BB19_PREHEADER:%.*]]
; CHECK: bb19.preheader:
; CHECK-NEXT: br label [[BB19:%.*]]
; CHECK: bb19:
; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB25:%.*]] ], [ 0, [[BB19_PREHEADER]] ]
; CHECK-NEXT: [[TMP20:%.*]] = add nsw i64 [[INDVARS_IV1]], -3
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[TMP20]]
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
; CHECK-NEXT: [[TMP23:%.*]] = mul nsw i32 [[TMP22]], 3
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[INDVARS_IV1]]
; CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4
; CHECK-NEXT: br label [[BB25]]
; CHECK-NEXT: br label %[[BB25]]
; CHECK: bb25:
; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1
; CHECK-NEXT: [[TMP15]] = add nuw nsw i32 [[DOT013]], 1
; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV1]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100
; CHECK-NEXT: br i1 [[EXITCOND]], label [[BB19]], label [[BB26:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[BB7]], label %[[BB26:.*]]
; CHECK: bb26:
; CHECK-NEXT: ret void
;
Expand Down