|
| 1 | +; RUN: opt -S -mtriple=amdgcn-- -passes=loop-unroll -debug-only=AMDGPUtti < %s 2>&1 | FileCheck %s |
| 2 | + |
| 3 | +; For @dependent_sub_fullunroll, the threshold bonus should apply |
| 4 | +; CHECK: due to subloop's trip count becoming runtime-independent after unrolling |
| 5 | + |
| 6 | +; For @dependent_sub_no_fullunroll, the threshold bonus should not apply |
| 7 | +; CHECK-NOT: due to subloop's trip count becoming runtime-independent after unrolling |
| 8 | + |
| 9 | +; Check that the outer loop of a double-nested loop where the inner loop's trip |
| 10 | +; count depends exclusively on constants and the outer IV is fully unrolled |
| 11 | +; thanks to receiving a threshold bonus in AMDGPU's TTI. |
| 12 | + |
| 13 | +; CHECK-LABEL: @dependent_sub_fullunroll |
| 14 | +; CHECK: inner.header_latch_exiting.7 |
| 15 | +; CHECK: outer.latch_exiting.7 |
| 16 | + |
| 17 | +define void @dependent_sub_fullunroll(ptr noundef %mem) { |
| 18 | +entry: |
| 19 | + br label %outer.header |
| 20 | + |
| 21 | +outer.header: ; preds = %entry, %outer.latch_exiting |
| 22 | + %outer.iv = phi i32 [ 0, %entry ], [ %outer.iv_next, %outer.latch_exiting ] |
| 23 | + br label %inner.header_latch_exiting |
| 24 | + |
| 25 | +inner.header_latch_exiting: ; preds = %outer.header, %inner.header_latch_exiting |
| 26 | + %inner.iv = phi i32 [ %outer.iv, %outer.header ], [ %inner.iv_next, %inner.header_latch_exiting ] |
| 27 | + %inner.iv_next = add nuw nsw i32 %inner.iv, 1 |
| 28 | + %outer.iv.ext = zext nneg i32 %outer.iv to i64 |
| 29 | + %idx_part = mul nuw nsw i64 %outer.iv.ext, 16 |
| 30 | + %inner.iv.ext = zext nneg i32 %inner.iv to i64 |
| 31 | + %idx = add nuw nsw i64 %idx_part, %inner.iv.ext |
| 32 | + %addr = getelementptr inbounds i8, ptr %mem, i64 %idx |
| 33 | + store i32 0, ptr %addr |
| 34 | + %inner.cond = icmp ult i32 %inner.iv_next, 8 |
| 35 | + br i1 %inner.cond, label %inner.header_latch_exiting, label %outer.latch_exiting, !llvm.loop !1 |
| 36 | + |
| 37 | +outer.latch_exiting: ; preds = %inner.header_latch_exiting |
| 38 | + %outer.iv_next = add nuw nsw i32 %outer.iv, 1 |
| 39 | + %outer.cond = icmp ult i32 %outer.iv_next, 8 |
| 40 | + br i1 %outer.cond, label %outer.header, label %end, !llvm.loop !1 |
| 41 | + |
| 42 | +end: ; preds = %outer.latch_exiting |
| 43 | + ret void |
| 44 | +} |
| 45 | + |
| 46 | +; Check that the outer loop of the same loop nest as dependent_sub_fullunroll |
| 47 | +; is not fully unrolled when the inner loop's final IV value depends on a |
| 48 | +; function argument instead of a combination of the outer IV and constants. |
| 49 | + |
| 50 | +; CHECK-LABEL: @dependent_sub_no_fullunroll |
| 51 | +; CHECK-NOT: outer.latch_exiting.7 |
| 52 | +; CHECK-NOT: outer.latch_exiting.7 |
| 53 | + |
| 54 | +define void @dependent_sub_no_fullunroll(ptr noundef %mem, i32 noundef %inner.ub) { |
| 55 | +entry: |
| 56 | + br label %outer.header |
| 57 | + |
| 58 | +outer.header: ; preds = %entry, %outer.latch_exiting |
| 59 | + %outer.iv = phi i32 [ 0, %entry ], [ %outer.iv_next, %outer.latch_exiting ] |
| 60 | + br label %inner.header_latch_exiting |
| 61 | + |
| 62 | +inner.header_latch_exiting: ; preds = %outer.header, %inner.header_latch_exiting |
| 63 | + %inner.iv = phi i32 [ %outer.iv, %outer.header ], [ %inner.iv_next, %inner.header_latch_exiting ] |
| 64 | + %inner.iv_next = add nuw nsw i32 %inner.iv, 1 |
| 65 | + %outer.iv.ext = zext nneg i32 %outer.iv to i64 |
| 66 | + %idx_part = mul nuw nsw i64 %outer.iv.ext, 16 |
| 67 | + %inner.iv.ext = zext nneg i32 %inner.iv to i64 |
| 68 | + %idx = add nuw nsw i64 %idx_part, %inner.iv.ext |
| 69 | + %addr = getelementptr inbounds i8, ptr %mem, i64 %idx |
| 70 | + store i32 0, ptr %addr |
| 71 | + %inner.cond = icmp ult i32 %inner.iv_next, %inner.ub |
| 72 | + br i1 %inner.cond, label %inner.header_latch_exiting, label %outer.latch_exiting, !llvm.loop !1 |
| 73 | + |
| 74 | +outer.latch_exiting: ; preds = %inner.header_latch_exiting |
| 75 | + %outer.iv_next = add nuw nsw i32 %outer.iv, 1 |
| 76 | + %outer.cond = icmp ult i32 %outer.iv_next, 8 |
| 77 | + br i1 %outer.cond, label %outer.header, label %end, !llvm.loop !1 |
| 78 | + |
| 79 | +end: ; preds = %outer.latch_exiting |
| 80 | + ret void |
| 81 | +} |
| 82 | + |
| 83 | +!1 = !{!1, !2} |
| 84 | +!2 = !{!"amdgpu.loop.unroll.threshold", i32 100} |
0 commit comments