Skip to content

Commit 0c5c1ce

Browse files
committed
Add test
1 parent 3e34979 commit 0c5c1ce

File tree

1 file changed

+89
-0
lines changed

1 file changed

+89
-0
lines changed
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=debugify < %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
3+
4+
@lds = addrspace(3) global [512 x float] poison, align 4
5+
6+
define amdgpu_kernel void @simple_write2_one_val_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
7+
; CHECK-LABEL: simple_write2_one_val_f32:
8+
; CHECK: .Lfunc_begin0:
9+
; CHECK-NEXT: .cfi_sections .debug_frame
10+
; CHECK-NEXT: .cfi_startproc
11+
; CHECK-NEXT: ; %bb.0:
12+
; CHECK-NEXT: .file 1 "/" "<stdin>"
13+
; CHECK-NEXT: .loc 1 1 1 prologue_end ; <stdin>:1:1
14+
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8
15+
; CHECK-NEXT: .Ltmp0:
16+
; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:1 <- $vgpr0
17+
; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:5 <- [DW_OP_plus_uconst 8, DW_OP_stack_value] $vgpr0
18+
; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:3 <- undef
19+
; CHECK-NEXT: .loc 1 2 1 ; <stdin>:2:1
20+
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
21+
; CHECK-NEXT: .Ltmp1:
22+
; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:4 <- $vgpr0
23+
; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:2 <- undef
24+
; CHECK-NEXT: .loc 1 3 1 ; <stdin>:3:1
25+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
26+
; CHECK-NEXT: global_load_dword v1, v0, s[0:1]
27+
; CHECK-NEXT: .Ltmp2:
28+
; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:6 <- [DW_OP_plus_uconst 32, DW_OP_stack_value] $vgpr0
29+
; CHECK-NEXT: .loc 1 0 0 is_stmt 0 ; <stdin>:0
30+
; CHECK-NEXT: s_waitcnt vmcnt(0)
31+
; CHECK-NEXT: ds_write2_b32 v0, v1, v1 offset1:8
32+
; CHECK-NEXT: .loc 1 9 1 is_stmt 1 ; <stdin>:9:1
33+
; CHECK-NEXT: s_endpgm
34+
; CHECK-NEXT: .Ltmp3:
35+
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
36+
%in.gep = getelementptr float, ptr addrspace(1) %in, i32 %x.i
37+
%val = load float, ptr addrspace(1) %in.gep, align 4
38+
%arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i
39+
store float %val, ptr addrspace(3) %arrayidx0, align 4
40+
%add.x = add nsw i32 %x.i, 8
41+
%arrayidx1 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %add.x
42+
store float %val, ptr addrspace(3) %arrayidx1, align 4
43+
ret void
44+
}
45+
46+
define amdgpu_kernel void @simple_read2_f32(ptr addrspace(1) %out) #0 {
47+
; CHECK-LABEL: simple_read2_f32:
48+
; CHECK: .Lfunc_begin1:
49+
; CHECK-NEXT: .cfi_startproc
50+
; CHECK-NEXT: ; %bb.0:
51+
; CHECK-NEXT: .loc 1 11 1 prologue_end ; <stdin>:11:1
52+
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 2, v0
53+
; CHECK-NEXT: .Ltmp4:
54+
; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:8 <- $vgpr2
55+
; CHECK-NEXT: .loc 1 0 0 is_stmt 0 ; <stdin>:0
56+
; CHECK-NEXT: ds_read2_b32 v[0:1], v2 offset1:8
57+
; CHECK-NEXT: .Ltmp5:
58+
; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:9 <- undef
59+
; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:11 <- [DW_OP_plus_uconst 32, DW_OP_stack_value] $vgpr2
60+
; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:12 <- undef
61+
; CHECK-NEXT: .loc 1 10 1 is_stmt 1 ; <stdin>:10:1
62+
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
63+
; CHECK-NEXT: .Ltmp6:
64+
; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:7 <- undef
65+
; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:10 <- [DW_OP_plus_uconst 8, DW_OP_stack_value] undef
66+
; CHECK-NEXT: .loc 1 16 1 ; <stdin>:16:1
67+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
68+
; CHECK-NEXT: v_add_f32_e32 v0, v0, v1
69+
; CHECK-NEXT: .Ltmp7:
70+
; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:13 <- $vgpr0
71+
; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:14 <- undef
72+
; CHECK-NEXT: .loc 1 18 1 ; <stdin>:18:1
73+
; CHECK-NEXT: global_store_dword v2, v0, s[0:1]
74+
; CHECK-NEXT: .loc 1 19 1 ; <stdin>:19:1
75+
; CHECK-NEXT: s_endpgm
76+
; CHECK-NEXT: .Ltmp8:
77+
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
78+
%arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i
79+
%val0 = load float, ptr addrspace(3) %arrayidx0, align 4
80+
%add.x = add nsw i32 %x.i, 8
81+
%arrayidx1 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %add.x
82+
%val1 = load float, ptr addrspace(3) %arrayidx1, align 4
83+
%sum = fadd float %val0, %val1
84+
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i32 %x.i
85+
store float %sum, ptr addrspace(1) %out.gep, align 4
86+
ret void
87+
}
88+
89+
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)