|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| 2 | +; RUN: opt -passes=debugify < %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s |
| 3 | + |
| 4 | +@lds = addrspace(3) global [512 x float] poison, align 4 |
| 5 | + |
| 6 | +define amdgpu_kernel void @simple_write2_one_val_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 { |
| 7 | +; CHECK-LABEL: simple_write2_one_val_f32: |
| 8 | +; CHECK: .Lfunc_begin0: |
| 9 | +; CHECK-NEXT: .cfi_sections .debug_frame |
| 10 | +; CHECK-NEXT: .cfi_startproc |
| 11 | +; CHECK-NEXT: ; %bb.0: |
| 12 | +; CHECK-NEXT: .file 1 "/" "<stdin>" |
| 13 | +; CHECK-NEXT: .loc 1 1 1 prologue_end ; <stdin>:1:1 |
| 14 | +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8 |
| 15 | +; CHECK-NEXT: .Ltmp0: |
| 16 | +; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:1 <- $vgpr0 |
| 17 | +; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:5 <- [DW_OP_plus_uconst 8, DW_OP_stack_value] $vgpr0 |
| 18 | +; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:3 <- undef |
| 19 | +; CHECK-NEXT: .loc 1 2 1 ; <stdin>:2:1 |
| 20 | +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 21 | +; CHECK-NEXT: .Ltmp1: |
| 22 | +; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:4 <- $vgpr0 |
| 23 | +; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:2 <- undef |
| 24 | +; CHECK-NEXT: .loc 1 3 1 ; <stdin>:3:1 |
| 25 | +; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| 26 | +; CHECK-NEXT: global_load_dword v1, v0, s[0:1] |
| 27 | +; CHECK-NEXT: .Ltmp2: |
| 28 | +; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:6 <- [DW_OP_plus_uconst 32, DW_OP_stack_value] $vgpr0 |
| 29 | +; CHECK-NEXT: .loc 1 0 0 is_stmt 0 ; <stdin>:0 |
| 30 | +; CHECK-NEXT: s_waitcnt vmcnt(0) |
| 31 | +; CHECK-NEXT: ds_write2_b32 v0, v1, v1 offset1:8 |
| 32 | +; CHECK-NEXT: .loc 1 9 1 is_stmt 1 ; <stdin>:9:1 |
| 33 | +; CHECK-NEXT: s_endpgm |
| 34 | +; CHECK-NEXT: .Ltmp3: |
| 35 | + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 |
| 36 | + %in.gep = getelementptr float, ptr addrspace(1) %in, i32 %x.i |
| 37 | + %val = load float, ptr addrspace(1) %in.gep, align 4 |
| 38 | + %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i |
| 39 | + store float %val, ptr addrspace(3) %arrayidx0, align 4 |
| 40 | + %add.x = add nsw i32 %x.i, 8 |
| 41 | + %arrayidx1 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %add.x |
| 42 | + store float %val, ptr addrspace(3) %arrayidx1, align 4 |
| 43 | + ret void |
| 44 | +} |
| 45 | + |
| 46 | +define amdgpu_kernel void @simple_read2_f32(ptr addrspace(1) %out) #0 { |
| 47 | +; CHECK-LABEL: simple_read2_f32: |
| 48 | +; CHECK: .Lfunc_begin1: |
| 49 | +; CHECK-NEXT: .cfi_startproc |
| 50 | +; CHECK-NEXT: ; %bb.0: |
| 51 | +; CHECK-NEXT: .loc 1 11 1 prologue_end ; <stdin>:11:1 |
| 52 | +; CHECK-NEXT: v_lshlrev_b32_e32 v2, 2, v0 |
| 53 | +; CHECK-NEXT: .Ltmp4: |
| 54 | +; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:8 <- $vgpr2 |
| 55 | +; CHECK-NEXT: .loc 1 0 0 is_stmt 0 ; <stdin>:0 |
| 56 | +; CHECK-NEXT: ds_read2_b32 v[0:1], v2 offset1:8 |
| 57 | +; CHECK-NEXT: .Ltmp5: |
| 58 | +; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:9 <- undef |
| 59 | +; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:11 <- [DW_OP_plus_uconst 32, DW_OP_stack_value] $vgpr2 |
| 60 | +; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:12 <- undef |
| 61 | +; CHECK-NEXT: .loc 1 10 1 is_stmt 1 ; <stdin>:10:1 |
| 62 | +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 |
| 63 | +; CHECK-NEXT: .Ltmp6: |
| 64 | +; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:7 <- undef |
| 65 | +; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:10 <- [DW_OP_plus_uconst 8, DW_OP_stack_value] undef |
| 66 | +; CHECK-NEXT: .loc 1 16 1 ; <stdin>:16:1 |
| 67 | +; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| 68 | +; CHECK-NEXT: v_add_f32_e32 v0, v0, v1 |
| 69 | +; CHECK-NEXT: .Ltmp7: |
| 70 | +; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:13 <- $vgpr0 |
| 71 | +; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:14 <- undef |
| 72 | +; CHECK-NEXT: .loc 1 18 1 ; <stdin>:18:1 |
| 73 | +; CHECK-NEXT: global_store_dword v2, v0, s[0:1] |
| 74 | +; CHECK-NEXT: .loc 1 19 1 ; <stdin>:19:1 |
| 75 | +; CHECK-NEXT: s_endpgm |
| 76 | +; CHECK-NEXT: .Ltmp8: |
| 77 | + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 |
| 78 | + %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i |
| 79 | + %val0 = load float, ptr addrspace(3) %arrayidx0, align 4 |
| 80 | + %add.x = add nsw i32 %x.i, 8 |
| 81 | + %arrayidx1 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %add.x |
| 82 | + %val1 = load float, ptr addrspace(3) %arrayidx1, align 4 |
| 83 | + %sum = fadd float %val0, %val1 |
| 84 | + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i32 %x.i |
| 85 | + store float %sum, ptr addrspace(1) %out.gep, align 4 |
| 86 | + ret void |
| 87 | +} |
| 88 | + |
| 89 | +attributes #0 = { nounwind } |
0 commit comments