Skip to content

Commit 70311b4

Browse files
authored
[unroll-and-jam] Document dependency patterns in dependencies.ll (NFC) (#156577)
Add detailed comments explaining each function's memory access patterns and why they should/shouldn't be unroll-and-jammed: - fore_aft_*: Dependencies between fore block and aft block - fore_sub_*: Dependencies between fore block and sub block - sub_aft_*: Dependencies between sub block and aft block - sub_sub_*: Dependencies within sub block - *_less: Backward dependency (i-1) - safe for fore/aft, fore/sub, sub/aft; unsafe for sub/sub due to jamming conflicts - *_eq: Same iteration dependency (i+0) - safe due to preserved execution order - *_more: Forward dependency (i+1) - unsafe due to write-after-write races between unrolled iterations, except sub/sub case creates conflicts
1 parent b83e458 commit 70311b4

File tree

1 file changed

+106
-0
lines changed

1 file changed

+106
-0
lines changed

llvm/test/Transforms/LoopUnrollAndJam/dependencies.ll

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
88
; CHECK: %j.1 = phi
99
; CHECK: %j.2 = phi
1010
; CHECK: %j.3 = phi
11+
;
12+
; fore_aft_less SHOULD be unroll-and-jammed (count=4) as it's safe.
13+
; Memory accesses:
14+
; - Fore block: A[i] = 1 (write in outer loop before inner)
15+
; - Aft block: A[i-1] = sum (write in outer loop after inner)
16+
; No dependency conflict: The fore block write A[i] and aft block write A[i-1]
17+
; access different array elements, so unrolling the outer loop and jamming the
18+
; inner loop is safe. The backward dependency (i-1) doesn't create conflicts
19+
; between different unrolled iterations.
1120
define void @fore_aft_less(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
1221
entry:
1322
%cmp = icmp sgt i32 %N, 0
@@ -48,6 +57,15 @@ cleanup:
4857
; CHECK: %j.1 = phi
4958
; CHECK: %j.2 = phi
5059
; CHECK: %j.3 = phi
60+
;
61+
; fore_aft_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
62+
; Memory accesses:
63+
; - Fore block: A[i] = 1 (write in outer loop before inner)
64+
; - Aft block: A[i] = sum (write in outer loop after inner)
65+
; Dependency conflict: Both fore and aft blocks write to A[i], creating a
66+
; write-after-write (WAW) dependency. However, this is safe for unroll-and-jam
67+
; because the aft block write always happens after the fore block write in
68+
; the same iteration, preserving the original execution order.
5169
define void @fore_aft_eq(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
5270
entry:
5371
%cmp = icmp sgt i32 %N, 0
@@ -86,6 +104,15 @@ cleanup:
86104
; CHECK-LABEL: fore_aft_more
87105
; CHECK: %j = phi
88106
; CHECK-NOT: %j.1 = phi
107+
;
108+
; fore_aft_more should NOT be unroll-and-jammed due to a dependency violation.
109+
; Memory accesses:
110+
; - Fore block: A[i] = 1 (write in outer loop before inner)
111+
; - Aft block: A[i+1] = sum (write in outer loop after inner)
112+
; Dependency conflict: The fore block writes A[i] and aft block writes A[i+1].
113+
; When unroll-and-jamming, iteration i's aft block writes A[i+1] which conflicts
114+
; with iteration i+1's fore block write to A[i+1], creating a write-after-write
115+
; race condition that violates the original sequential semantics.
89116
define void @fore_aft_more(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
90117
entry:
91118
%cmp = icmp sgt i32 %N, 0
@@ -126,6 +153,14 @@ cleanup:
126153
; CHECK: %j.1 = phi
127154
; CHECK: %j.2 = phi
128155
; CHECK: %j.3 = phi
156+
;
157+
; fore_sub_less SHOULD be unroll-and-jammed (count=4) as it's safe.
158+
; Memory accesses:
159+
; - Fore block: A[i] = 1 (write in outer loop before inner)
160+
; - Sub block: A[i-1] = sum (write inside inner loop)
161+
; No dependency conflict: The fore block writes A[i] and sub block writes A[i-1].
162+
; These access different array elements, so unroll-and-jam is safe. The backward
163+
; dependency pattern doesn't create conflicts between unrolled iterations.
129164
define void @fore_sub_less(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
130165
entry:
131166
%cmp = icmp sgt i32 %N, 0
@@ -166,6 +201,15 @@ cleanup:
166201
; CHECK: %j.1 = phi
167202
; CHECK: %j.2 = phi
168203
; CHECK: %j.3 = phi
204+
;
205+
; fore_sub_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
206+
; Memory accesses:
207+
; - Fore block: A[i] = 1 (write in outer loop before inner)
208+
; - Sub block: A[i] = sum (write inside inner loop)
209+
; Dependency conflict: Both fore and sub blocks write to A[i], creating a
210+
; write-after-write (WAW) dependency. However, this is safe for unroll-and-jam
211+
; because the execution order is preserved: fore block executes first, then
212+
; the entire inner loop (sub block) executes, maintaining the original semantics.
169213
define void @fore_sub_eq(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
170214
entry:
171215
%cmp = icmp sgt i32 %N, 0
@@ -204,6 +248,15 @@ cleanup:
204248
; CHECK-LABEL: fore_sub_more
205249
; CHECK: %j = phi
206250
; CHECK-NOT: %j.1 = phi
251+
;
252+
; fore_sub_more should NOT be unroll-and-jammed due to a dependency violation.
253+
; Memory accesses:
254+
; - Fore block: A[i] = 1 (write in outer loop before inner)
255+
; - Sub block: A[i+1] = sum (write inside inner loop)
256+
; Dependency conflict: The fore block writes A[i] and sub block writes A[i+1].
257+
; When unroll-and-jamming, iteration i's fore block writes A[i] but iteration i's
258+
; sub block writes A[i+1]. This conflicts with iteration i+1's fore block write
259+
; to A[i+1], creating a write-after-write race condition.
207260
define void @fore_sub_more(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
208261
entry:
209262
%cmp = icmp sgt i32 %N, 0
@@ -244,6 +297,14 @@ cleanup:
244297
; CHECK: %j.1 = phi
245298
; CHECK: %j.2 = phi
246299
; CHECK: %j.3 = phi
300+
;
301+
; sub_aft_less SHOULD be unroll-and-jammed (count=4) as it's safe.
302+
; Memory accesses:
303+
; - Sub block: A[i] = 1 (write inside inner loop)
304+
; - Aft block: A[i-1] = sum (write in outer loop after inner)
305+
; No dependency conflict: The sub block writes A[i] and aft block writes A[i-1].
306+
; These access different array elements, so unroll-and-jam is safe. The backward
307+
; dependency pattern doesn't create conflicts between unrolled iterations.
247308
define void @sub_aft_less(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
248309
entry:
249310
%cmp = icmp sgt i32 %N, 0
@@ -284,6 +345,15 @@ cleanup:
284345
; CHECK: %j.1 = phi
285346
; CHECK: %j.2 = phi
286347
; CHECK: %j.3 = phi
348+
;
349+
; sub_aft_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
350+
; Memory accesses:
351+
; - Sub block: A[i] = 1 (write inside inner loop)
352+
; - Aft block: A[i] = sum (write in outer loop after inner)
353+
; Dependency conflict: Both sub and aft blocks write to A[i], creating a
354+
; write-after-write (WAW) dependency. However, this is safe for unroll-and-jam
355+
; because the execution order is preserved: the entire inner loop (sub block)
356+
; executes first, then the aft block executes, maintaining original semantics.
287357
define void @sub_aft_eq(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
288358
entry:
289359
%cmp = icmp sgt i32 %N, 0
@@ -322,6 +392,15 @@ cleanup:
322392
; CHECK-LABEL: sub_aft_more
323393
; CHECK: %j = phi
324394
; CHECK-NOT: %j.1 = phi
395+
;
396+
; sub_aft_more should NOT be unroll-and-jammed due to a dependency violation.
397+
; Memory accesses:
398+
; - Sub block: A[i] = 1 (write inside inner loop)
399+
; - Aft block: A[i+1] = sum (write in outer loop after inner)
400+
; Dependency conflict: The sub block writes A[i] and aft block writes A[i+1].
401+
; When unroll-and-jamming, iteration i's aft block writes A[i+1] which conflicts
402+
; with iteration i+1's sub block write to A[i+1], creating a write-after-write
403+
; race condition that violates the original sequential semantics.
325404
define void @sub_aft_more(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
326405
entry:
327406
%cmp = icmp sgt i32 %N, 0
@@ -360,6 +439,15 @@ cleanup:
360439
; CHECK-LABEL: sub_sub_less
361440
; CHECK: %j = phi
362441
; CHECK-NOT: %j.1 = phi
442+
;
443+
; sub_sub_less should NOT be unroll-and-jammed due to a dependency violation.
444+
; Memory accesses:
445+
; - Sub block: A[i] = 1 (write inside inner loop)
446+
; - Sub block: A[i-1] = sum (write inside inner loop)
447+
; Dependency conflict: Both writes are in the sub block (inner loop), accessing
448+
; A[i] and A[i-1]. When unroll-and-jamming, the inner loop is jammed, meaning
449+
; iterations of the inner loop from different outer iterations execute together.
450+
; This creates a backward dependency that can cause race conditions.
363451
define void @sub_sub_less(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
364452
entry:
365453
%cmp = icmp sgt i32 %N, 0
@@ -400,6 +488,15 @@ cleanup:
400488
; CHECK: %j.1 = phi
401489
; CHECK: %j.2 = phi
402490
; CHECK: %j.3 = phi
491+
;
492+
; sub_sub_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
493+
; Memory accesses:
494+
; - Sub block: A[i] = 1 (write inside inner loop)
495+
; - Sub block: A[i] = sum (write inside inner loop)
496+
; Dependency conflict: Both writes are to A[i] within the sub block, creating a
497+
; write-after-write (WAW) dependency. However, this is safe for unroll-and-jam
498+
; because both writes are in the same basic block and maintain their relative
499+
; order: A[i] = 1 always executes before A[i] = sum in each iteration.
403500
define void @sub_sub_eq(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
404501
entry:
405502
%cmp = icmp sgt i32 %N, 0
@@ -438,6 +535,15 @@ cleanup:
438535
; CHECK-LABEL: sub_sub_more
439536
; CHECK: %j = phi
440537
; CHECK-NOT: %j.1 = phi
538+
;
539+
; sub_sub_more should NOT be unroll-and-jammed due to a dependency violation.
540+
; Memory accesses:
541+
; - Sub block: A[i] = 1 (write inside inner loop)
542+
; - Sub block: A[i+1] = sum (write inside inner loop)
543+
; Dependency conflict: Both writes are in the sub block, accessing A[i] and A[i+1].
544+
; When unroll-and-jamming, iteration i's sub block writes A[i+1] which conflicts
545+
; with iteration i+1's sub block write to A[i+1]. This creates a forward
546+
; dependency that causes write-after-write race conditions.
441547
define void @sub_sub_more(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
442548
entry:
443549
%cmp = icmp sgt i32 %N, 0

0 commit comments

Comments
 (0)