|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| 2 | +; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck %s |
| 3 | + |
| 4 | +; Check that the copy from s[2:3] to v[0:1] occurs inside the loop, not after it. |
| 5 | + |
| 6 | +define i64 @test_temporal_divergence(i32 %arg) #0 { |
| 7 | +; CHECK-LABEL: test_temporal_divergence: |
| 8 | +; CHECK: ; %bb.0: ; %entry |
| 9 | +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 10 | +; CHECK-NEXT: v_add_u32_e32 v0, 1, v0 |
| 11 | +; CHECK-NEXT: s_mov_b64 s[4:5], 0 |
| 12 | +; CHECK-NEXT: s_mov_b64 s[0:1], 0 |
| 13 | +; CHECK-NEXT: .LBB0_1: ; %loop |
| 14 | +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| 15 | +; CHECK-NEXT: v_add_u32_e32 v0, -1, v0 |
| 16 | +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 |
| 17 | +; CHECK-NEXT: s_mov_b64 s[2:3], s[4:5] |
| 18 | +; CHECK-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| 19 | +; CHECK-NEXT: s_mov_b64 s[4:5], 1 |
| 20 | +; CHECK-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| 21 | +; CHECK-NEXT: s_cbranch_execnz .LBB0_1 |
| 22 | +; CHECK-NEXT: ; %bb.2: ; %end |
| 23 | +; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] |
| 24 | +; CHECK-NEXT: v_mov_b64_e32 v[0:1], s[2:3] |
| 25 | +; CHECK-NEXT: s_setpc_b64 s[30:31] |
| 26 | +entry: |
| 27 | + br label %loop |
| 28 | + |
| 29 | +loop: |
| 30 | + %i = phi i64 [ 1, %loop ], [ 0, %entry ] |
| 31 | + %count = phi i32 [ %inc, %loop ], [ 0, %entry ] |
| 32 | + %inc = add i32 %count, 1 |
| 33 | + %cond = icmp eq i32 %count, %arg |
| 34 | + br i1 %cond, label %end, label %loop |
| 35 | + |
| 36 | +end: |
| 37 | + ret i64 %i |
| 38 | +} |
0 commit comments