|
| 1 | +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 |
| 2 | +# RUN: llc -mtriple=nvptx64 -mcpu=sm_20 -run-pass=early-machinelicm %s -o - | FileCheck %s |
| 3 | + |
| 4 | +# This test checks that the early-machineLICM pass successfully creates a new |
| 5 | +# loop preheader by splitting the critical edge and hoisting the loop invariant |
| 6 | +# value `%8` to the preheader. |
| 7 | +# Since the critical edge successor is a loop header, the splitting does not |
| 8 | +# break the structured CFG, which is a requirement for the NVPTX target. |
| 9 | + |
| 10 | +--- |
| 11 | +name: test_hoist |
| 12 | +tracksRegLiveness: true |
| 13 | +registers: |
| 14 | + - { id: 0, class: b64, preferred-register: '', flags: [ ] } |
| 15 | + - { id: 1, class: b32, preferred-register: '', flags: [ ] } |
| 16 | + - { id: 2, class: b32, preferred-register: '', flags: [ ] } |
| 17 | + - { id: 3, class: b32, preferred-register: '', flags: [ ] } |
| 18 | + - { id: 4, class: b32, preferred-register: '', flags: [ ] } |
| 19 | + - { id: 5, class: b32, preferred-register: '', flags: [ ] } |
| 20 | + - { id: 6, class: b64, preferred-register: '', flags: [ ] } |
| 21 | + - { id: 7, class: b1, preferred-register: '', flags: [ ] } |
| 22 | + - { id: 8, class: b32, preferred-register: '', flags: [ ] } |
| 23 | + - { id: 9, class: b1, preferred-register: '', flags: [ ] } |
| 24 | +body: | |
| 25 | + ; CHECK-LABEL: name: test_hoist |
| 26 | + ; CHECK: bb.0.entry: |
| 27 | + ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.3(0x50000000) |
| 28 | + ; CHECK-NEXT: {{ $}} |
| 29 | + ; CHECK-NEXT: [[LD_i32_:%[0-9]+]]:b32 = LD_i32 0, 0, 101, 3, 32, &test_hoist_param_1, 0 :: (dereferenceable invariant load (s32), addrspace 101) |
| 30 | + ; CHECK-NEXT: [[LD_i64_:%[0-9]+]]:b64 = LD_i64 0, 0, 101, 3, 64, &test_hoist_param_0, 0 :: (dereferenceable invariant load (s64), addrspace 101) |
| 31 | + ; CHECK-NEXT: [[ADD64ri:%[0-9]+]]:b64 = nuw ADD64ri killed [[LD_i64_]], 2 |
| 32 | + ; CHECK-NEXT: [[LD_i32_1:%[0-9]+]]:b32 = LD_i32 0, 0, 1, 3, 32, [[ADD64ri]], 0 |
| 33 | + ; CHECK-NEXT: [[SETP_i32ri:%[0-9]+]]:b1 = SETP_i32ri [[LD_i32_]], 0, 0 |
| 34 | + ; CHECK-NEXT: CBranch killed [[SETP_i32ri]], %bb.2 |
| 35 | + ; CHECK-NEXT: {{ $}} |
| 36 | + ; CHECK-NEXT: bb.3: |
| 37 | + ; CHECK-NEXT: successors: %bb.1(0x80000000) |
| 38 | + ; CHECK-NEXT: {{ $}} |
| 39 | + ; CHECK-NEXT: [[ADD32ri:%[0-9]+]]:b32 = ADD32ri [[LD_i32_]], -1 |
| 40 | + ; CHECK-NEXT: {{ $}} |
| 41 | + ; CHECK-NEXT: bb.1: |
| 42 | + ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) |
| 43 | + ; CHECK-NEXT: {{ $}} |
| 44 | + ; CHECK-NEXT: [[PHI:%[0-9]+]]:b32 = PHI [[LD_i32_1]], %bb.3, %3, %bb.1 |
| 45 | + ; CHECK-NEXT: [[SREM32rr:%[0-9]+]]:b32 = SREM32rr [[PHI]], [[ADD32ri]] |
| 46 | + ; CHECK-NEXT: [[SETP_i32ri1:%[0-9]+]]:b1 = SETP_i32ri [[SREM32rr]], 0, 1 |
| 47 | + ; CHECK-NEXT: CBranch killed [[SETP_i32ri1]], %bb.1 |
| 48 | + ; CHECK-NEXT: GOTO %bb.2 |
| 49 | + ; CHECK-NEXT: {{ $}} |
| 50 | + ; CHECK-NEXT: bb.2: |
| 51 | + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:b32 = PHI [[LD_i32_1]], %bb.0, [[SREM32rr]], %bb.1 |
| 52 | + ; CHECK-NEXT: ST_i32 [[PHI1]], 0, 0, 1, 32, [[ADD64ri]], 0 |
| 53 | + ; CHECK-NEXT: Return |
| 54 | + bb.0.entry: |
| 55 | + successors: %bb.2(0x30000000), %bb.1(0x50000000) |
| 56 | +
|
| 57 | + %5:b32 = LD_i32 0, 0, 101, 3, 32, &test_hoist_param_1, 0 :: (dereferenceable invariant load (s32), addrspace 101) |
| 58 | + %6:b64 = LD_i64 0, 0, 101, 3, 64, &test_hoist_param_0, 0 :: (dereferenceable invariant load (s64), addrspace 101) |
| 59 | + %0:b64 = nuw ADD64ri killed %6, 2 |
| 60 | + %1:b32 = LD_i32 0, 0, 1, 3, 32, %0, 0 |
| 61 | + %7:b1 = SETP_i32ri %5, 0, 0 |
| 62 | + CBranch killed %7, %bb.2 |
| 63 | + GOTO %bb.1 |
| 64 | +
|
| 65 | +
|
| 66 | + bb.1: |
| 67 | + successors: %bb.2(0x04000000), %bb.1(0x7c000000) |
| 68 | +
|
| 69 | + %2:b32 = PHI %1, %bb.0, %3, %bb.1 |
| 70 | + %8:b32 = ADD32ri %5, -1 |
| 71 | + %3:b32 = SREM32rr %2, %8 |
| 72 | + %9:b1 = SETP_i32ri %3, 0, 1 |
| 73 | + CBranch killed %9, %bb.1 |
| 74 | + GOTO %bb.2 |
| 75 | +
|
| 76 | + bb.2: |
| 77 | + %4:b32 = PHI %1, %bb.0, %3, %bb.1 |
| 78 | + ST_i32 %4, 0, 0, 1, 32, %0, 0 |
| 79 | + Return |
| 80 | +... |
0 commit comments