-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[LoopFusion] Detecting legal dependencies for fusion using DA info #146383
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -100,6 +100,7 @@ STATISTIC(OnlySecondCandidateIsGuarded, | |
"The second candidate is guarded while the first one is not"); | ||
STATISTIC(NumHoistedInsts, "Number of hoisted preheader instructions."); | ||
STATISTIC(NumSunkInsts, "Number of hoisted preheader instructions."); | ||
STATISTIC(NumDA, "DA checks passed"); | ||
|
||
enum FusionDependenceAnalysisChoice { | ||
FUSION_DEPENDENCE_ANALYSIS_SCEV, | ||
|
@@ -1371,6 +1372,47 @@ struct LoopFuser { | |
<< "\n"); | ||
} | ||
#endif | ||
unsigned Levels = DepResult->getLevels(); | ||
unsigned SameSDLevels = DepResult->getSameSDLevels(); | ||
unsigned CurLoopLevel = FC0.L->getLoopDepth(); | ||
|
||
// Check if DA is missing info regarding the current loop level | ||
if (CurLoopLevel > Levels + SameSDLevels) | ||
return false; | ||
|
||
// Iterating over the outer levels. | ||
for (unsigned Level = 1; Level <= std::min(CurLoopLevel - 1, Levels); | ||
++Level) { | ||
unsigned Direction = DepResult->getDirection(Level, false); | ||
|
||
// Check if the direction vector does not include equality. If an outer | ||
// loop has a non-equal direction, outer indicies are different and it | ||
// is safe to fuse. | ||
if (!(Direction & Dependence::DVEntry::EQ)) { | ||
LLVM_DEBUG(dbgs() << "Safe to fuse due to non-equal acceses in the " | ||
"outer loops\n"); | ||
NumDA++; | ||
return true; | ||
} | ||
} | ||
|
||
assert(CurLoopLevel > Levels && "Fusion candidates are not separated"); | ||
|
||
unsigned CurDir = DepResult->getDirection(CurLoopLevel, true); | ||
|
||
// Check if the direction vector does not include greater direction. In | ||
// that case, the dependency is not a backward loop-carried and is legal | ||
// to fuse. For example here we have a forward dependency | ||
// for (int i = 0; i < n; i++) | ||
// A[i] = ...; | ||
// for (int i = 0; i < n; i++) | ||
// ... = A[i-1]; | ||
if (!(CurDir & Dependence::DVEntry::GT)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be more readable if it is changed to the following? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to above. |
||
LLVM_DEBUG(dbgs() << "Safe to fuse with no backward loop-carried " | ||
"dependency\n"); | ||
NumDA++; | ||
return true; | ||
} | ||
|
||
if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor()) | ||
LLVM_DEBUG( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
; REQUIRES: asserts | ||
|
||
; RUN: opt -passes=loop-fusion -da-disable-delinearization-checks -disable-output -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s | ||
; STAT: 2 loop-fusion - DA checks passed | ||
|
||
; The two inner loops have no dependency and are allowed to be fused as in the | ||
; outer loops, different levels are accessed to. | ||
|
||
; C Code | ||
; | ||
;; for (long int i = 0; i < n; i++) { | ||
;; for (long int j = 0; j < n; j++) { | ||
;; for (long int k = 0; k < n; k++) | ||
;; A[i][j][k] = i; | ||
;; for (long int k = 0; k < n; k++) | ||
;; temp = A[i + 3][j + 2][k + 1]; | ||
;; } | ||
;; } | ||
|
||
define void @nonequal_outer_access(i64 %n, ptr %A) nounwind uwtable ssp { | ||
entry: | ||
%cmp10 = icmp sgt i64 %n, 0 | ||
br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26 | ||
|
||
for.cond1.preheader.preheader: ; preds = %entry | ||
br label %for.cond1.preheader | ||
|
||
for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24 | ||
%i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ] | ||
%cmp26 = icmp sgt i64 %n, 0 | ||
br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24 | ||
|
||
for.cond4.preheader.preheader: ; preds = %for.cond1.preheader | ||
br label %for.cond4.preheader | ||
|
||
for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21 | ||
%j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ] | ||
%cmp51 = icmp sgt i64 %n, 0 | ||
br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit | ||
|
||
for.body6.preheader: ; preds = %for.cond4.preheader | ||
br label %for.body6 | ||
|
||
for.body6: ; preds = %for.body6.preheader, %for.body6 | ||
%k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ] | ||
%arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02 | ||
store i64 %i.011, ptr %arrayidx8, align 8 | ||
%inc = add nsw i64 %k.02, 1 | ||
%exitcond13 = icmp ne i64 %inc, %n | ||
br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit | ||
|
||
for.cond10.loopexit.loopexit: ; preds = %for.body6 | ||
br label %for.cond10.loopexit | ||
|
||
for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader | ||
%cmp113 = icmp sgt i64 %n, 0 | ||
br i1 %cmp113, label %for.body12.preheader, label %for.inc21 | ||
|
||
for.body12.preheader: ; preds = %for.cond10.loopexit | ||
br label %for.body12 | ||
|
||
for.body12: ; preds = %for.body12.preheader, %for.body12 | ||
%k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ] | ||
%add = add nsw i64 %k9.05, 1 | ||
%add13 = add nsw i64 %j.07, 2 | ||
%add14 = add nsw i64 %i.011, 3 | ||
%arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %add14, i64 %add13, i64 %add | ||
%0 = load i64, ptr %arrayidx17, align 8 | ||
%inc19 = add nsw i64 %k9.05, 1 | ||
%exitcond = icmp ne i64 %inc19, %n | ||
br i1 %exitcond, label %for.body12, label %for.inc21.loopexit | ||
|
||
for.inc21.loopexit: ; preds = %for.body12 | ||
br label %for.inc21 | ||
|
||
for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit | ||
%inc22 = add nsw i64 %j.07, 1 | ||
%exitcond14 = icmp ne i64 %inc22, %n | ||
br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit | ||
|
||
for.inc24.loopexit: ; preds = %for.inc21 | ||
br label %for.inc24 | ||
|
||
for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader | ||
%inc25 = add nsw i64 %i.011, 1 | ||
%exitcond15 = icmp ne i64 %inc25, %n | ||
br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit | ||
|
||
for.end26.loopexit: ; preds = %for.inc24 | ||
br label %for.end26 | ||
|
||
for.end26: ; preds = %for.end26.loopexit, %entry | ||
ret void | ||
} | ||
|
||
; The two inner loops have a forward loop-carried dependency, allowing them | ||
; to be fused. | ||
|
||
; C Code | ||
; | ||
;; for (long int i = 0; i < n; i++) { | ||
;; for (long int j = 0; j < n; j++) { | ||
;; for (long int k = 0; k < n; k++) | ||
;; A[i][j][k] = i; | ||
;; for (long int k = 0; k < n; k++) | ||
;; temp = A[i][j][k - 1]; | ||
;; } | ||
;; } | ||
|
||
define void @forward_dep(i64 %n, ptr %A) nounwind uwtable ssp { | ||
entry: | ||
%cmp10 = icmp sgt i64 %n, 0 | ||
br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26 | ||
|
||
for.cond1.preheader.preheader: ; preds = %entry | ||
br label %for.cond1.preheader | ||
|
||
for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24 | ||
%i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ] | ||
%cmp26 = icmp sgt i64 %n, 0 | ||
br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24 | ||
|
||
for.cond4.preheader.preheader: ; preds = %for.cond1.preheader | ||
br label %for.cond4.preheader | ||
|
||
for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21 | ||
%j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ] | ||
%cmp51 = icmp sgt i64 %n, 0 | ||
br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit | ||
|
||
for.body6.preheader: ; preds = %for.cond4.preheader | ||
br label %for.body6 | ||
|
||
for.body6: ; preds = %for.body6.preheader, %for.body6 | ||
%k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ] | ||
%arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02 | ||
store i64 %i.011, ptr %arrayidx8, align 8 | ||
%inc = add nsw i64 %k.02, 1 | ||
%exitcond13 = icmp ne i64 %inc, %n | ||
br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit | ||
|
||
for.cond10.loopexit.loopexit: ; preds = %for.body6 | ||
br label %for.cond10.loopexit | ||
|
||
for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader | ||
%cmp113 = icmp sgt i64 %n, 0 | ||
br i1 %cmp113, label %for.body12.preheader, label %for.inc21 | ||
|
||
for.body12.preheader: ; preds = %for.cond10.loopexit | ||
br label %for.body12 | ||
|
||
for.body12: ; preds = %for.body12.preheader, %for.body12 | ||
%k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ] | ||
%add = add nsw i64 %k9.05, -1 | ||
%arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %add | ||
%0 = load i64, ptr %arrayidx17, align 8 | ||
%inc19 = add nsw i64 %k9.05, 1 | ||
%exitcond = icmp ne i64 %inc19, %n | ||
br i1 %exitcond, label %for.body12, label %for.inc21.loopexit | ||
|
||
for.inc21.loopexit: ; preds = %for.body12 | ||
br label %for.inc21 | ||
|
||
for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit | ||
%inc22 = add nsw i64 %j.07, 1 | ||
%exitcond14 = icmp ne i64 %inc22, %n | ||
br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit | ||
|
||
for.inc24.loopexit: ; preds = %for.inc21 | ||
br label %for.inc24 | ||
|
||
for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader | ||
%inc25 = add nsw i64 %i.011, 1 | ||
%exitcond15 = icmp ne i64 %inc25, %n | ||
br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit | ||
|
||
for.end26.loopexit: ; preds = %for.inc24 | ||
br label %for.end26 | ||
|
||
for.end26: ; preds = %for.end26.loopexit, %entry | ||
ret void | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -298,42 +298,55 @@ bb23: ; preds = %bb17, %bb | |
ret void | ||
} | ||
|
||
; The following IR is a representation of the provided code below. With PR | ||
; #146383, loop fusion is able to utilize the information from dependence | ||
; analysis, enabling the loops in the function to be fused. | ||
; | ||
; void forward_dep(int *arg) { | ||
; for (int i = 0; i < 100; i++) { | ||
; int tmp = i - 3; | ||
; int val = tmp * (i + 3) % i; | ||
; arg[i] = val; | ||
; } | ||
; | ||
; for (int j = 0; j < 100; j++) { | ||
; int val = arg[j - 3]; | ||
; arg[j] = val * 3; | ||
; } | ||
; } | ||
; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function was not fused previously, and now with your patch it is fused. Can you add some comment here to describe it like why it can be fused now? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comment added. |
||
define void @forward_dep(ptr noalias %arg) { | ||
; CHECK-LABEL: @forward_dep( | ||
; CHECK-NEXT: bb: | ||
; CHECK-NEXT: br label [[BB7:%.*]] | ||
; CHECK-NEXT: [[BB:.*]]: | ||
amehsan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
; CHECK-NEXT: br label %[[BB7:.*]] | ||
; CHECK: bb7: | ||
; CHECK-NEXT: [[DOT013:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB14:%.*]] ] | ||
; CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ 0, [[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[BB14]] ] | ||
; CHECK-NEXT: [[DOT013:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP15:%.*]], %[[BB25:.*]] ] | ||
; CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ 0, %[[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], %[[BB25]] ] | ||
; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[BB25]] ], [ 0, %[[BB]] ] | ||
; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[DOT013]], -3 | ||
; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[INDVARS_IV22]], 3 | ||
; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32 | ||
; CHECK-NEXT: [[TMP10:%.*]] = mul nsw i32 [[TMP]], [[TMP9]] | ||
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[INDVARS_IV22]] to i32 | ||
; CHECK-NEXT: [[TMP12:%.*]] = srem i32 [[TMP10]], [[TMP11]] | ||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[INDVARS_IV22]] | ||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[INDVARS_IV22]] | ||
; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP13]], align 4 | ||
; CHECK-NEXT: br label [[BB14]] | ||
; CHECK-NEXT: br label %[[BB14:.*]] | ||
; CHECK: bb14: | ||
; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1 | ||
; CHECK-NEXT: [[TMP15]] = add nuw nsw i32 [[DOT013]], 1 | ||
; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100 | ||
; CHECK-NEXT: br i1 [[EXITCOND4]], label [[BB7]], label [[BB19_PREHEADER:%.*]] | ||
; CHECK: bb19.preheader: | ||
; CHECK-NEXT: br label [[BB19:%.*]] | ||
; CHECK: bb19: | ||
; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB25:%.*]] ], [ 0, [[BB19_PREHEADER]] ] | ||
; CHECK-NEXT: [[TMP20:%.*]] = add nsw i64 [[INDVARS_IV1]], -3 | ||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[TMP20]] | ||
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 | ||
; CHECK-NEXT: [[TMP23:%.*]] = mul nsw i32 [[TMP22]], 3 | ||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[INDVARS_IV1]] | ||
; CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 | ||
; CHECK-NEXT: br label [[BB25]] | ||
; CHECK-NEXT: br label %[[BB25]] | ||
; CHECK: bb25: | ||
; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1 | ||
; CHECK-NEXT: [[TMP15]] = add nuw nsw i32 [[DOT013]], 1 | ||
; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100 | ||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV1]], 1 | ||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100 | ||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[BB19]], label [[BB26:%.*]] | ||
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[BB7]], label %[[BB26:.*]] | ||
; CHECK: bb26: | ||
; CHECK-NEXT: ret void | ||
; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be more readable if it is changed to the following?
if (Direction != Dependence::DVEntry::EQ)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We want to consider directions that include equality not only the equality itself. For example if the direction is
>=
, it needs to return false as well.