Skip to content

Commit a9541c7

Browse files
[AMDGPU] precision error observed
1 parent 6568062 commit a9541c7

File tree

1 file changed

+265
-0
lines changed

1 file changed

+265
-0
lines changed
Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
%struct.rocfft_complex = type { half, half }
2+
3+
$_Z32real_post_process_kernel_inplaceI14rocfft_complexIDF16_ELb1EEvmmmPT_mPKS2_ = comdat any
4+
5+
; Function Attrs: convergent inlinehint mustprogress nounwind
6+
define weak_odr hidden void @_Z32real_post_process_kernel_inplaceI14rocfft_complexIDF16_ELb1EEvmmmPT_mPKS2_(i64 noundef %0, i64 noundef %1, i64 noundef %2, ptr noundef %3, i64 noundef %4, ptr noundef %5) #2 comdat {
7+
%7 = alloca i64, align 8, addrspace(5)
8+
%8 = alloca i64, align 8, addrspace(5)
9+
%9 = alloca i64, align 8, addrspace(5)
10+
%10 = alloca ptr, align 8, addrspace(5)
11+
%11 = alloca i64, align 8, addrspace(5)
12+
%12 = alloca ptr, align 8, addrspace(5)
13+
%13 = alloca %struct.rocfft_complex, align 2, addrspace(5)
14+
%14 = alloca %struct.rocfft_complex, align 2, addrspace(5)
15+
%15 = alloca %struct.rocfft_complex, align 2, addrspace(5)
16+
%16 = alloca double, align 8, addrspace(5)
17+
%17 = alloca %struct.rocfft_complex, align 2, addrspace(5)
18+
%18 = alloca %struct.rocfft_complex, align 2, addrspace(5)
19+
%19 = alloca double, align 8, addrspace(5)
20+
%20 = alloca %struct.rocfft_complex, align 2, addrspace(5)
21+
%21 = alloca %struct.rocfft_complex, align 2, addrspace(5)
22+
%22 = addrspacecast ptr addrspace(5) %7 to ptr
23+
%23 = addrspacecast ptr addrspace(5) %8 to ptr
24+
%24 = addrspacecast ptr addrspace(5) %9 to ptr
25+
%25 = addrspacecast ptr addrspace(5) %10 to ptr
26+
%26 = addrspacecast ptr addrspace(5) %11 to ptr
27+
%27 = addrspacecast ptr addrspace(5) %12 to ptr
28+
%28 = addrspacecast ptr addrspace(5) %13 to ptr
29+
%29 = addrspacecast ptr addrspace(5) %14 to ptr
30+
%30 = addrspacecast ptr addrspace(5) %15 to ptr
31+
%31 = addrspacecast ptr addrspace(5) %16 to ptr
32+
%32 = addrspacecast ptr addrspace(5) %17 to ptr
33+
%33 = addrspacecast ptr addrspace(5) %18 to ptr
34+
%34 = addrspacecast ptr addrspace(5) %19 to ptr
35+
%35 = addrspacecast ptr addrspace(5) %20 to ptr
36+
%36 = addrspacecast ptr addrspace(5) %21 to ptr
37+
store i64 %0, ptr %22, align 8, !tbaa !6
38+
store i64 %1, ptr %23, align 8, !tbaa !6
39+
store i64 %2, ptr %24, align 8, !tbaa !6
40+
store ptr %3, ptr %25, align 8, !tbaa !10
41+
store i64 %4, ptr %26, align 8, !tbaa !6
42+
store ptr %5, ptr %27, align 8, !tbaa !10
43+
%37 = load i64, ptr %22, align 8, !tbaa !6
44+
%38 = load i64, ptr %24, align 8, !tbaa !6
45+
br label %40
46+
47+
40: ; preds = %6
48+
call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %13) #4
49+
%41 = load ptr, ptr %25, align 8, !tbaa !10
50+
%42 = load i64, ptr %26, align 8, !tbaa !6
51+
%43 = load i64, ptr %22, align 8, !tbaa !6
52+
%44 = add i64 %42, %43
53+
%45 = getelementptr inbounds %struct.rocfft_complex, ptr %41, i64 %44
54+
call void @llvm.memcpy.p0.p0.i64(ptr align 2 %28, ptr align 2 %45, i64 4, i1 false), !tbaa.struct !12
55+
call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %14) #4
56+
%46 = load ptr, ptr %25, align 8, !tbaa !10
57+
%47 = load i64, ptr %26, align 8, !tbaa !6
58+
%48 = load i64, ptr %23, align 8, !tbaa !6
59+
%49 = add i64 %47, %48
60+
%50 = getelementptr inbounds %struct.rocfft_complex, ptr %46, i64 %49
61+
call void @llvm.memcpy.p0.p0.i64(ptr align 2 %29, ptr align 2 %50, i64 4, i1 false), !tbaa.struct !12
62+
%51 = load i64, ptr %22, align 8, !tbaa !6
63+
%52 = icmp eq i64 %51, 0
64+
br i1 %52, label %53, label %102
65+
66+
53: ; preds = %40
67+
%54 = getelementptr inbounds %struct.rocfft_complex, ptr %28, i32 0, i32 0
68+
%55 = load half, ptr %54, align 2, !tbaa !15
69+
%56 = getelementptr inbounds %struct.rocfft_complex, ptr %28, i32 0, i32 1
70+
%57 = load half, ptr %56, align 2, !tbaa !17
71+
%58 = fadd contract half %55, %57
72+
%59 = load ptr, ptr %25, align 8, !tbaa !10
73+
%60 = load i64, ptr %26, align 8, !tbaa !6
74+
%61 = load i64, ptr %22, align 8, !tbaa !6
75+
%62 = add i64 %60, %61
76+
%63 = getelementptr inbounds %struct.rocfft_complex, ptr %59, i64 %62
77+
%64 = getelementptr inbounds %struct.rocfft_complex, ptr %63, i32 0, i32 0
78+
store half %58, ptr %64, align 2, !tbaa !15
79+
%65 = load ptr, ptr %25, align 8, !tbaa !10
80+
%66 = load i64, ptr %26, align 8, !tbaa !6
81+
%67 = load i64, ptr %22, align 8, !tbaa !6
82+
%68 = add i64 %66, %67
83+
%69 = getelementptr inbounds %struct.rocfft_complex, ptr %65, i64 %68
84+
%70 = getelementptr inbounds %struct.rocfft_complex, ptr %69, i32 0, i32 1
85+
store half 0xH0000, ptr %70, align 2, !tbaa !17
86+
%71 = getelementptr inbounds %struct.rocfft_complex, ptr %28, i32 0, i32 0
87+
%72 = load half, ptr %71, align 2, !tbaa !15
88+
%73 = getelementptr inbounds %struct.rocfft_complex, ptr %28, i32 0, i32 1
89+
%74 = load half, ptr %73, align 2, !tbaa !17
90+
%75 = fsub contract half %72, %74
91+
%76 = load ptr, ptr %25, align 8, !tbaa !10
92+
%77 = load i64, ptr %26, align 8, !tbaa !6
93+
%78 = load i64, ptr %23, align 8, !tbaa !6
94+
%79 = add i64 %77, %78
95+
%80 = getelementptr inbounds %struct.rocfft_complex, ptr %76, i64 %79
96+
%81 = getelementptr inbounds %struct.rocfft_complex, ptr %80, i32 0, i32 0
97+
store half %75, ptr %81, align 2, !tbaa !15
98+
%82 = load ptr, ptr %25, align 8, !tbaa !10
99+
%83 = load i64, ptr %26, align 8, !tbaa !6
100+
%84 = load i64, ptr %23, align 8, !tbaa !6
101+
%85 = add i64 %83, %84
102+
%86 = getelementptr inbounds %struct.rocfft_complex, ptr %82, i64 %85
103+
%87 = getelementptr inbounds %struct.rocfft_complex, ptr %86, i32 0, i32 1
104+
store half 0xH0000, ptr %87, align 2, !tbaa !17
105+
%88 = load ptr, ptr %25, align 8, !tbaa !10
106+
%89 = load i64, ptr %26, align 8, !tbaa !6
107+
%90 = load i64, ptr %24, align 8, !tbaa !6
108+
%91 = add i64 %89, %90
109+
%92 = getelementptr inbounds %struct.rocfft_complex, ptr %88, i64 %91
110+
%93 = getelementptr inbounds %struct.rocfft_complex, ptr %92, i32 0, i32 1
111+
%94 = load half, ptr %93, align 2, !tbaa !17
112+
%95 = fneg contract half %94
113+
%96 = load ptr, ptr %25, align 8, !tbaa !10
114+
%97 = load i64, ptr %26, align 8, !tbaa !6
115+
%98 = load i64, ptr %24, align 8, !tbaa !6
116+
%99 = add i64 %97, %98
117+
%100 = getelementptr inbounds %struct.rocfft_complex, ptr %96, i64 %99
118+
%101 = getelementptr inbounds %struct.rocfft_complex, ptr %100, i32 0, i32 1
119+
store half %95, ptr %101, align 2, !tbaa !17
120+
ret void
121+
122+
102: ; preds = %40
123+
call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %15) #4
124+
call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %16) #4
125+
store double 5.000000e-01, ptr %31, align 8, !tbaa !18
126+
call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %17) #4
127+
store i32 0, ptr %32, align 2
128+
store i32 0, ptr %30, align 2
129+
call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %17) #4
130+
call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %16) #4
131+
call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %18) #4
132+
call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %19) #4
133+
store double 5.000000e-01, ptr %34, align 8, !tbaa !18
134+
call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %20) #4
135+
store i32 0, ptr %35, align 2
136+
store i32 0, ptr %33, align 2
137+
call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %20) #4
138+
call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %19) #4
139+
call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %21) #4
140+
%107 = load ptr, ptr %27, align 8, !tbaa !10
141+
%108 = load i64, ptr %22, align 8, !tbaa !6
142+
%109 = getelementptr inbounds %struct.rocfft_complex, ptr %107, i64 %108
143+
call void @llvm.memcpy.p0.p0.i64(ptr align 2 %36, ptr align 2 %109, i64 4, i1 false), !tbaa.struct !12
144+
%110 = getelementptr inbounds %struct.rocfft_complex, ptr %30, i32 0, i32 0
145+
%111 = load half, ptr %110, align 2, !tbaa !15
146+
%112 = getelementptr inbounds %struct.rocfft_complex, ptr %33, i32 0, i32 0
147+
%113 = load half, ptr %112, align 2, !tbaa !15
148+
%114 = getelementptr inbounds %struct.rocfft_complex, ptr %36, i32 0, i32 1
149+
%115 = load half, ptr %114, align 2, !tbaa !17
150+
%116 = fmul contract half %113, %115
151+
%117 = fadd contract half %111, %116
152+
%118 = getelementptr inbounds %struct.rocfft_complex, ptr %30, i32 0, i32 1
153+
%119 = load half, ptr %118, align 2, !tbaa !17
154+
%120 = getelementptr inbounds %struct.rocfft_complex, ptr %36, i32 0, i32 0
155+
%121 = load half, ptr %120, align 2, !tbaa !15
156+
%122 = fmul contract half %119, %121
157+
%123 = fadd contract half %117, %122
158+
%124 = load ptr, ptr %25, align 8, !tbaa !10
159+
%125 = load i64, ptr %26, align 8, !tbaa !6
160+
%126 = load i64, ptr %22, align 8, !tbaa !6
161+
%127 = add i64 %125, %126
162+
%128 = getelementptr inbounds %struct.rocfft_complex, ptr %124, i64 %127
163+
%129 = getelementptr inbounds %struct.rocfft_complex, ptr %128, i32 0, i32 0
164+
store half %123, ptr %129, align 2, !tbaa !15
165+
%130 = getelementptr inbounds %struct.rocfft_complex, ptr %33, i32 0, i32 1
166+
%131 = load half, ptr %130, align 2, !tbaa !17
167+
%132 = getelementptr inbounds %struct.rocfft_complex, ptr %30, i32 0, i32 1
168+
%133 = load half, ptr %132, align 2, !tbaa !17
169+
%134 = getelementptr inbounds %struct.rocfft_complex, ptr %36, i32 0, i32 1
170+
%135 = load half, ptr %134, align 2, !tbaa !17
171+
%136 = fmul contract half %133, %135
172+
%137 = fadd contract half %131, %136
173+
%138 = getelementptr inbounds %struct.rocfft_complex, ptr %33, i32 0, i32 0
174+
%139 = load half, ptr %138, align 2, !tbaa !15
175+
%140 = getelementptr inbounds %struct.rocfft_complex, ptr %36, i32 0, i32 0
176+
%141 = load half, ptr %140, align 2, !tbaa !15
177+
%142 = fmul contract half %139, %141
178+
%143 = fsub contract half %137, %142
179+
%144 = load ptr, ptr %25, align 8, !tbaa !10
180+
%145 = load i64, ptr %26, align 8, !tbaa !6
181+
%146 = load i64, ptr %22, align 8, !tbaa !6
182+
%147 = add i64 %145, %146
183+
%148 = getelementptr inbounds %struct.rocfft_complex, ptr %144, i64 %147
184+
%149 = getelementptr inbounds %struct.rocfft_complex, ptr %148, i32 0, i32 1
185+
store half %143, ptr %149, align 2, !tbaa !17
186+
%150 = getelementptr inbounds %struct.rocfft_complex, ptr %30, i32 0, i32 0
187+
%151 = load half, ptr %150, align 2, !tbaa !15
188+
%152 = getelementptr inbounds %struct.rocfft_complex, ptr %33, i32 0, i32 0
189+
%153 = load half, ptr %152, align 2, !tbaa !15
190+
%154 = getelementptr inbounds %struct.rocfft_complex, ptr %36, i32 0, i32 1
191+
%155 = load half, ptr %154, align 2, !tbaa !17
192+
%156 = fmul contract half %153, %155
193+
%157 = fsub contract half %151, %156
194+
%158 = getelementptr inbounds %struct.rocfft_complex, ptr %30, i32 0, i32 1
195+
%159 = load half, ptr %158, align 2, !tbaa !17
196+
%160 = getelementptr inbounds %struct.rocfft_complex, ptr %36, i32 0, i32 0
197+
%161 = load half, ptr %160, align 2, !tbaa !15
198+
%162 = fmul contract half %159, %161
199+
%163 = fsub contract half %157, %162
200+
%164 = load ptr, ptr %25, align 8, !tbaa !10
201+
%165 = load i64, ptr %26, align 8, !tbaa !6
202+
%166 = load i64, ptr %23, align 8, !tbaa !6
203+
%167 = add i64 %165, %166
204+
%168 = getelementptr inbounds %struct.rocfft_complex, ptr %164, i64 %167
205+
%169 = getelementptr inbounds %struct.rocfft_complex, ptr %168, i32 0, i32 0
206+
store half %163, ptr %169, align 2, !tbaa !15
207+
%170 = getelementptr inbounds %struct.rocfft_complex, ptr %33, i32 0, i32 1
208+
%171 = load half, ptr %170, align 2, !tbaa !17
209+
%172 = fneg contract half %171
210+
%173 = getelementptr inbounds %struct.rocfft_complex, ptr %30, i32 0, i32 1
211+
%174 = load half, ptr %173, align 2, !tbaa !17
212+
%175 = getelementptr inbounds %struct.rocfft_complex, ptr %36, i32 0, i32 1
213+
%176 = load half, ptr %175, align 2, !tbaa !17
214+
%177 = fmul contract half %174, %176
215+
%178 = fadd contract half %172, %177
216+
%179 = getelementptr inbounds %struct.rocfft_complex, ptr %33, i32 0, i32 0
217+
%180 = load half, ptr %179, align 2, !tbaa !15
218+
%181 = getelementptr inbounds %struct.rocfft_complex, ptr %36, i32 0, i32 0
219+
%182 = load half, ptr %181, align 2, !tbaa !15
220+
%183 = fmul contract half %180, %182
221+
%184 = fsub contract half %178, %183
222+
%185 = load ptr, ptr %25, align 8, !tbaa !10
223+
%186 = load i64, ptr %26, align 8, !tbaa !6
224+
%187 = load i64, ptr %23, align 8, !tbaa !6
225+
%188 = add i64 %186, %187
226+
%189 = getelementptr inbounds %struct.rocfft_complex, ptr %185, i64 %188
227+
%190 = getelementptr inbounds %struct.rocfft_complex, ptr %189, i32 0, i32 1
228+
store half %184, ptr %190, align 2, !tbaa !17
229+
call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %21) #4
230+
call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %18) #4
231+
call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %15) #4
232+
ret void
233+
}
234+
235+
attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
236+
attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
237+
attributes #2 = { convergent inlinehint mustprogress nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+cumode,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+sramecc,+wavefrontsize64,-xnack" }
238+
attributes #3 = { convergent mustprogress nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+cumode,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+sramecc,+wavefrontsize64,-xnack" }
239+
attributes #4 = { nounwind }
240+
attributes #5 = { convergent nounwind }
241+
242+
!llvm.module.flags = !{!0, !1, !2, !3}
243+
!llvm.ident = !{!4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4}
244+
!opencl.ocl.version = !{!5, !5, !5, !5, !5, !5, !5, !5, !5, !5}
245+
246+
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
247+
!1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
248+
!2 = !{i32 1, !"wchar_size", i32 4}
249+
!3 = !{i32 8, !"PIC Level", i32 2}
250+
!4 = !{!"clang version 19.0.0git (ssh://[email protected]:29418/lightning/ec/llvm-project a2421f3d00e8e99003ddde4ce19939737b57d043)"}
251+
!5 = !{i32 2, i32 0}
252+
!6 = !{!7, !7, i64 0}
253+
!7 = !{!"long", !8, i64 0}
254+
!8 = !{!"omnipotent char", !9, i64 0}
255+
!9 = !{!"Simple C++ TBAA"}
256+
!10 = !{!11, !11, i64 0}
257+
!11 = !{!"any pointer", !8, i64 0}
258+
!12 = !{i64 0, i64 2, !13, i64 2, i64 2, !13}
259+
!13 = !{!14, !14, i64 0}
260+
!14 = !{!"_Float16", !8, i64 0}
261+
!15 = !{!16, !14, i64 0}
262+
!16 = !{!"_ZTS14rocfft_complexIDF16_E", !14, i64 0, !14, i64 2}
263+
!17 = !{!16, !14, i64 2}
264+
!18 = !{!19, !19, i64 0}
265+
!19 = !{!"double", !8, i64 0}

0 commit comments

Comments
 (0)