|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| 2 | +; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -O3 -S | \ |
| 3 | +; RUN: FileCheck %s --check-prefix=CHECK-O3 |
| 4 | + |
| 5 | +define void @kernel__0(ptr addrspace(5) noalias %pout, ptr addrspace(3) noalias %pin, i32 %num, i32 %ofst0, i32 %ofst1, i32 %ofst2, i32 %ofst3, i32 %ofst4) { |
| 6 | +; CHECK-O3-LABEL: @kernel__0( |
| 7 | +; CHECK-O3-NEXT: entry: |
| 8 | +; CHECK-O3-NEXT: [[SREM:%.*]] = srem i32 [[NUM:%.*]], 1024 |
| 9 | +; CHECK-O3-NEXT: [[ADD1:%.*]] = add nsw i32 [[SREM]], [[OFST0:%.*]] |
| 10 | +; CHECK-O3-NEXT: [[ADD2:%.*]] = add nsw i32 [[OFST1:%.*]], [[SREM]] |
| 11 | +; CHECK-O3-NEXT: [[ADD3:%.*]] = add nsw i32 [[OFST2:%.*]], [[SREM]] |
| 12 | +; CHECK-O3-NEXT: [[ADD4:%.*]] = add nsw i32 [[OFST3:%.*]], [[SREM]] |
| 13 | +; CHECK-O3-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(3) [[PIN:%.*]], i32 222 |
| 14 | +; CHECK-O3-NEXT: [[GEP1:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[GEP]], i32 [[ADD1]] |
| 15 | +; CHECK-O3-NEXT: [[GEP2:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[GEP]], i32 [[ADD2]] |
| 16 | +; CHECK-O3-NEXT: [[GEP3:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(3) [[PIN]], i32 444 |
| 17 | +; CHECK-O3-NEXT: [[GEP4:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[GEP3]], i32 [[ADD1]] |
| 18 | +; CHECK-O3-NEXT: [[GEP5:%.*]] = getelementptr inbounds half, ptr addrspace(3) [[GEP3]], i32 [[ADD2]] |
| 19 | +; CHECK-O3-NEXT: [[GEP6:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(3) [[GEP4]], i32 666 |
| 20 | +; CHECK-O3-NEXT: [[GEP7:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[GEP5]], i32 -888 |
| 21 | +; CHECK-O3-NEXT: [[MUL:%.*]] = mul nsw i32 [[SREM]], [[OFST0]] |
| 22 | +; CHECK-O3-NEXT: [[GEP8:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[POUT:%.*]], i32 1110 |
| 23 | +; CHECK-O3-NEXT: [[GEP9:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP8]], i32 [[MUL]] |
| 24 | +; CHECK-O3-NEXT: [[GEP10:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP9]], i32 [[ADD3]] |
| 25 | +; CHECK-O3-NEXT: [[GEP11:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP9]], i32 [[ADD4]] |
| 26 | +; CHECK-O3-NEXT: [[GEP12:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[POUT]], i32 1332 |
| 27 | +; CHECK-O3-NEXT: [[GEP13:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP12]], i32 [[MUL]] |
| 28 | +; CHECK-O3-NEXT: [[GEP14:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP13]], i32 [[ADD3]] |
| 29 | +; CHECK-O3-NEXT: [[GEP15:%.*]] = getelementptr inbounds half, ptr addrspace(5) [[GEP13]], i32 [[ADD4]] |
| 30 | +; CHECK-O3-NEXT: [[GEP16:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[GEP14]], i32 1554 |
| 31 | +; CHECK-O3-NEXT: [[GEP17:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[GEP15]], i32 -1776 |
| 32 | +; CHECK-O3-NEXT: [[GEP18:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[PIN]], i32 [[OFST4:%.*]] |
| 33 | +; CHECK-O3-NEXT: [[GEP20:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[POUT]], i32 [[OFST4]] |
| 34 | +; CHECK-O3-NEXT: br label [[LOOP:%.*]] |
| 35 | +; CHECK-O3: loop: |
| 36 | +; CHECK-O3-NEXT: [[LOAD:%.*]] = load half, ptr addrspace(3) [[GEP1]], align 2 |
| 37 | +; CHECK-O3-NEXT: [[LOAD1:%.*]] = load half, ptr addrspace(3) [[GEP2]], align 2 |
| 38 | +; CHECK-O3-NEXT: [[LOAD2:%.*]] = load half, ptr addrspace(3) [[GEP6]], align 2 |
| 39 | +; CHECK-O3-NEXT: [[LOAD3:%.*]] = load half, ptr addrspace(3) [[GEP7]], align 2 |
| 40 | +; CHECK-O3-NEXT: store half [[LOAD]], ptr addrspace(5) [[GEP10]], align 2 |
| 41 | +; CHECK-O3-NEXT: store half [[LOAD1]], ptr addrspace(5) [[GEP11]], align 2 |
| 42 | +; CHECK-O3-NEXT: store half [[LOAD2]], ptr addrspace(5) [[GEP16]], align 2 |
| 43 | +; CHECK-O3-NEXT: store half [[LOAD3]], ptr addrspace(5) [[GEP17]], align 2 |
| 44 | +; CHECK-O3-NEXT: [[GEP19:%.*]] = load i32, ptr addrspace(3) [[GEP18]], align 4 |
| 45 | +; CHECK-O3-NEXT: [[GEP21:%.*]] = load i32, ptr addrspace(5) [[GEP20]], align 4 |
| 46 | +; CHECK-O3-NEXT: store i32 [[GEP21]], ptr addrspace(3) [[PIN]], align 4 |
| 47 | +; CHECK-O3-NEXT: [[ICMP:%.*]] = icmp eq i32 [[GEP19]], [[GEP21]] |
| 48 | +; CHECK-O3-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT:%.*]] |
| 49 | +; CHECK-O3: exit: |
| 50 | +; CHECK-O3-NEXT: ret void |
| 51 | +; |
| 52 | +entry: |
| 53 | + %srem = srem i32 %num, 1024 |
| 54 | + %add = add nsw i32 %srem, 3 |
| 55 | + %add1 = add nsw i32 %srem, %ofst0 |
| 56 | + %add2 = add nsw i32 %srem, %ofst1 |
| 57 | + %add3 = add nsw i32 %srem, %ofst2 |
| 58 | + %add4 = add nsw i32 %srem, %ofst3 |
| 59 | + |
| 60 | + %gep = getelementptr inbounds half, ptr addrspace(3) %pin, i32 111 |
| 61 | + %gep1 = getelementptr inbounds half, ptr addrspace(3) %gep, i32 %add1 |
| 62 | + %gep2 = getelementptr inbounds half, ptr addrspace(3) %gep, i32 %add2 |
| 63 | + %gep3 = getelementptr inbounds half, ptr addrspace(3) %pin, i32 222 |
| 64 | + %gep4 = getelementptr inbounds half, ptr addrspace(3) %gep3, i32 %add1 |
| 65 | + %gep5 = getelementptr inbounds half, ptr addrspace(3) %gep3, i32 %add2 |
| 66 | + %gep6 = getelementptr inbounds half, ptr addrspace(3) %gep4, i32 333 |
| 67 | + %gep7 = getelementptr inbounds half, ptr addrspace(3) %gep5, i32 -444 |
| 68 | + |
| 69 | + %mul = mul nsw i32 %srem, %ofst0 |
| 70 | + |
| 71 | + %gep8 = getelementptr inbounds half, ptr addrspace(5) %pout, i32 555 |
| 72 | + %gep9 = getelementptr inbounds half, ptr addrspace(5) %gep8, i32 %mul |
| 73 | + %gep10 = getelementptr inbounds half, ptr addrspace(5) %gep9, i32 %add3 |
| 74 | + %gep11 = getelementptr inbounds half, ptr addrspace(5) %gep9, i32 %add4 |
| 75 | + %gep12 = getelementptr inbounds half, ptr addrspace(5) %pout, i32 666 |
| 76 | + %gep13 = getelementptr inbounds half, ptr addrspace(5) %gep12, i32 %mul |
| 77 | + %gep14 = getelementptr inbounds half, ptr addrspace(5) %gep13, i32 %add3 |
| 78 | + %gep15 = getelementptr inbounds half, ptr addrspace(5) %gep13, i32 %add4 |
| 79 | + %gep16 = getelementptr inbounds half, ptr addrspace(5) %gep14, i32 777 |
| 80 | + %gep17 = getelementptr inbounds half, ptr addrspace(5) %gep15, i32 -888 |
| 81 | + |
| 82 | + br label %loop |
| 83 | + |
| 84 | +loop: ; loop |
| 85 | + %load = load half, ptr addrspace(3) %gep1, align 2 |
| 86 | + %load1 = load half, ptr addrspace(3) %gep2, align 2 |
| 87 | + %load2 = load half, ptr addrspace(3) %gep6, align 2 |
| 88 | + %load3 = load half, ptr addrspace(3) %gep7, align 2 |
| 89 | + |
| 90 | + store half %load, ptr addrspace(5) %gep10, align 2 |
| 91 | + store half %load1, ptr addrspace(5) %gep11, align 2 |
| 92 | + store half %load2, ptr addrspace(5) %gep16, align 2 |
| 93 | + store half %load3, ptr addrspace(5) %gep17, align 2 |
| 94 | + |
| 95 | + %gep18 = getelementptr inbounds i32, ptr addrspace(3) %pin, i32 %ofst4 |
| 96 | + %gep19 = load i32, ptr addrspace(3) %gep18, align 4 |
| 97 | + %gep20 = getelementptr inbounds i32, ptr addrspace(5) %pout, i32 %ofst4 |
| 98 | + %gep21 = load i32, ptr addrspace(5) %gep20, align 4 |
| 99 | + store i32 %gep21, ptr addrspace(3) %pin, align 4 |
| 100 | + %icmp = icmp eq i32 %gep19, %gep21 |
| 101 | + br i1 %icmp, label %loop, label %exit |
| 102 | + |
| 103 | +exit: |
| 104 | + ret void |
| 105 | +} |
0 commit comments