@@ -8,6 +8,15 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
88; CHECK: %j.1 = phi
99; CHECK: %j.2 = phi
1010; CHECK: %j.3 = phi
11+ ;
12+ ; fore_aft_less SHOULD be unroll-and-jammed (count=4) as it's safe.
13+ ; Memory accesses:
14+ ; - Fore block: A[i] = 1 (write in outer loop before inner)
15+ ; - Aft block: A[i-1] = sum (write in outer loop after inner)
16+ ; No dependency conflict: The fore block write A[i] and aft block write A[i-1]
17+ ; access different array elements, so unrolling the outer loop and jamming the
18+ ; inner loop is safe. The backward dependency (i-1) doesn't create conflicts
19+ ; between different unrolled iterations.
1120define void @fore_aft_less (ptr noalias nocapture %A , i32 %N , ptr noalias nocapture readonly %B ) {
1221entry:
1322 %cmp = icmp sgt i32 %N , 0
@@ -48,6 +57,15 @@ cleanup:
4857; CHECK: %j.1 = phi
4958; CHECK: %j.2 = phi
5059; CHECK: %j.3 = phi
60+ ;
61+ ; fore_aft_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
62+ ; Memory accesses:
63+ ; - Fore block: A[i] = 1 (write in outer loop before inner)
64+ ; - Aft block: A[i] = sum (write in outer loop after inner)
65+ ; Dependency conflict: Both fore and aft blocks write to A[i], creating a
66+ ; write-after-write (WAW) dependency. However, this is safe for unroll-and-jam
67+ ; because the aft block write always happens after the fore block write in
68+ ; the same iteration, preserving the original execution order.
5169define void @fore_aft_eq (ptr noalias nocapture %A , i32 %N , ptr noalias nocapture readonly %B ) {
5270entry:
5371 %cmp = icmp sgt i32 %N , 0
@@ -86,6 +104,15 @@ cleanup:
86104; CHECK-LABEL: fore_aft_more
87105; CHECK: %j = phi
88106; CHECK-NOT: %j.1 = phi
107+ ;
108+ ; fore_aft_more should NOT be unroll-and-jammed due to a dependency violation.
109+ ; Memory accesses:
110+ ; - Fore block: A[i] = 1 (write in outer loop before inner)
111+ ; - Aft block: A[i+1] = sum (write in outer loop after inner)
112+ ; Dependency conflict: The fore block writes A[i] and aft block writes A[i+1].
113+ ; When unroll-and-jamming, iteration i's aft block writes A[i+1] which conflicts
114+ ; with iteration i+1's fore block write to A[i+1], creating a write-after-write
115+ ; race condition that violates the original sequential semantics.
89116define void @fore_aft_more (ptr noalias nocapture %A , i32 %N , ptr noalias nocapture readonly %B ) {
90117entry:
91118 %cmp = icmp sgt i32 %N , 0
@@ -126,6 +153,14 @@ cleanup:
126153; CHECK: %j.1 = phi
127154; CHECK: %j.2 = phi
128155; CHECK: %j.3 = phi
156+ ;
157+ ; fore_sub_less SHOULD be unroll-and-jammed (count=4) as it's safe.
158+ ; Memory accesses:
159+ ; - Fore block: A[i] = 1 (write in outer loop before inner)
160+ ; - Sub block: A[i-1] = sum (write inside inner loop)
161+ ; No dependency conflict: The fore block writes A[i] and sub block writes A[i-1].
162+ ; These access different array elements, so unroll-and-jam is safe. The backward
163+ ; dependency pattern doesn't create conflicts between unrolled iterations.
129164define void @fore_sub_less (ptr noalias nocapture %A , i32 %N , ptr noalias nocapture readonly %B ) {
130165entry:
131166 %cmp = icmp sgt i32 %N , 0
@@ -166,6 +201,15 @@ cleanup:
166201; CHECK: %j.1 = phi
167202; CHECK: %j.2 = phi
168203; CHECK: %j.3 = phi
204+ ;
205+ ; fore_sub_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
206+ ; Memory accesses:
207+ ; - Fore block: A[i] = 1 (write in outer loop before inner)
208+ ; - Sub block: A[i] = sum (write inside inner loop)
209+ ; Dependency conflict: Both fore and sub blocks write to A[i], creating a
210+ ; write-after-write (WAW) dependency. However, this is safe for unroll-and-jam
211+ ; because the execution order is preserved: fore block executes first, then
212+ ; the entire inner loop (sub block) executes, maintaining the original semantics.
169213define void @fore_sub_eq (ptr noalias nocapture %A , i32 %N , ptr noalias nocapture readonly %B ) {
170214entry:
171215 %cmp = icmp sgt i32 %N , 0
@@ -204,6 +248,15 @@ cleanup:
204248; CHECK-LABEL: fore_sub_more
205249; CHECK: %j = phi
206250; CHECK-NOT: %j.1 = phi
251+ ;
252+ ; fore_sub_more should NOT be unroll-and-jammed due to a dependency violation.
253+ ; Memory accesses:
254+ ; - Fore block: A[i] = 1 (write in outer loop before inner)
255+ ; - Sub block: A[i+1] = sum (write inside inner loop)
256+ ; Dependency conflict: The fore block writes A[i] and sub block writes A[i+1].
257+ ; When unroll-and-jamming, iteration i's fore block writes A[i] but iteration i's
258+ ; sub block writes A[i+1]. This conflicts with iteration i+1's fore block write
259+ ; to A[i+1], creating a write-after-write race condition.
207260define void @fore_sub_more (ptr noalias nocapture %A , i32 %N , ptr noalias nocapture readonly %B ) {
208261entry:
209262 %cmp = icmp sgt i32 %N , 0
@@ -244,6 +297,14 @@ cleanup:
244297; CHECK: %j.1 = phi
245298; CHECK: %j.2 = phi
246299; CHECK: %j.3 = phi
300+ ;
301+ ; sub_aft_less SHOULD be unroll-and-jammed (count=4) as it's safe.
302+ ; Memory accesses:
303+ ; - Sub block: A[i] = 1 (write inside inner loop)
304+ ; - Aft block: A[i-1] = sum (write in outer loop after inner)
305+ ; No dependency conflict: The sub block writes A[i] and aft block writes A[i-1].
306+ ; These access different array elements, so unroll-and-jam is safe. The backward
307+ ; dependency pattern doesn't create conflicts between unrolled iterations.
247308define void @sub_aft_less (ptr noalias nocapture %A , i32 %N , ptr noalias nocapture readonly %B ) {
248309entry:
249310 %cmp = icmp sgt i32 %N , 0
@@ -284,6 +345,15 @@ cleanup:
284345; CHECK: %j.1 = phi
285346; CHECK: %j.2 = phi
286347; CHECK: %j.3 = phi
348+ ;
349+ ; sub_aft_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
350+ ; Memory accesses:
351+ ; - Sub block: A[i] = 1 (write inside inner loop)
352+ ; - Aft block: A[i] = sum (write in outer loop after inner)
353+ ; Dependency conflict: Both sub and aft blocks write to A[i], creating a
354+ ; write-after-write (WAW) dependency. However, this is safe for unroll-and-jam
355+ ; because the execution order is preserved: the entire inner loop (sub block)
356+ ; executes first, then the aft block executes, maintaining original semantics.
287357define void @sub_aft_eq (ptr noalias nocapture %A , i32 %N , ptr noalias nocapture readonly %B ) {
288358entry:
289359 %cmp = icmp sgt i32 %N , 0
@@ -322,6 +392,15 @@ cleanup:
322392; CHECK-LABEL: sub_aft_more
323393; CHECK: %j = phi
324394; CHECK-NOT: %j.1 = phi
395+ ;
396+ ; sub_aft_more should NOT be unroll-and-jammed due to a dependency violation.
397+ ; Memory accesses:
398+ ; - Sub block: A[i] = 1 (write inside inner loop)
399+ ; - Aft block: A[i+1] = sum (write in outer loop after inner)
400+ ; Dependency conflict: The sub block writes A[i] and aft block writes A[i+1].
401+ ; When unroll-and-jamming, iteration i's aft block writes A[i+1] which conflicts
402+ ; with iteration i+1's sub block write to A[i+1], creating a write-after-write
403+ ; race condition that violates the original sequential semantics.
325404define void @sub_aft_more (ptr noalias nocapture %A , i32 %N , ptr noalias nocapture readonly %B ) {
326405entry:
327406 %cmp = icmp sgt i32 %N , 0
@@ -360,6 +439,15 @@ cleanup:
360439; CHECK-LABEL: sub_sub_less
361440; CHECK: %j = phi
362441; CHECK-NOT: %j.1 = phi
442+ ;
443+ ; sub_sub_less should NOT be unroll-and-jammed due to a dependency violation.
444+ ; Memory accesses:
445+ ; - Sub block: A[i] = 1 (write inside inner loop)
446+ ; - Sub block: A[i-1] = sum (write inside inner loop)
447+ ; Dependency conflict: Both writes are in the sub block (inner loop), accessing
448+ ; A[i] and A[i-1]. When unroll-and-jamming, the inner loop is jammed, meaning
449+ ; iterations of the inner loop from different outer iterations execute together.
450+ ; This creates a backward dependency that can cause race conditions.
363451define void @sub_sub_less (ptr noalias nocapture %A , i32 %N , ptr noalias nocapture readonly %B ) {
364452entry:
365453 %cmp = icmp sgt i32 %N , 0
@@ -400,6 +488,15 @@ cleanup:
400488; CHECK: %j.1 = phi
401489; CHECK: %j.2 = phi
402490; CHECK: %j.3 = phi
491+ ;
492+ ; sub_sub_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
493+ ; Memory accesses:
494+ ; - Sub block: A[i] = 1 (write inside inner loop)
495+ ; - Sub block: A[i] = sum (write inside inner loop)
496+ ; Dependency conflict: Both writes are to A[i] within the sub block, creating a
497+ ; write-after-write (WAW) dependency. However, this is safe for unroll-and-jam
498+ ; because both writes are in the same basic block and maintain their relative
499+ ; order: A[i] = 1 always executes before A[i] = sum in each iteration.
403500define void @sub_sub_eq (ptr noalias nocapture %A , i32 %N , ptr noalias nocapture readonly %B ) {
404501entry:
405502 %cmp = icmp sgt i32 %N , 0
@@ -438,6 +535,15 @@ cleanup:
438535; CHECK-LABEL: sub_sub_more
439536; CHECK: %j = phi
440537; CHECK-NOT: %j.1 = phi
538+ ;
539+ ; sub_sub_more should NOT be unroll-and-jammed due to a dependency violation.
540+ ; Memory accesses:
541+ ; - Sub block: A[i] = 1 (write inside inner loop)
542+ ; - Sub block: A[i+1] = sum (write inside inner loop)
543+ ; Dependency conflict: Both writes are in the sub block, accessing A[i] and A[i+1].
544+ ; When unroll-and-jamming, iteration i's sub block writes A[i+1] which conflicts
545+ ; with iteration i+1's sub block write to A[i+1]. This creates a forward
546+ ; dependency that causes write-after-write race conditions.
441547define void @sub_sub_more (ptr noalias nocapture %A , i32 %N , ptr noalias nocapture readonly %B ) {
442548entry:
443549 %cmp = icmp sgt i32 %N , 0
0 commit comments