|
| 1 | +%struct.rocfft_complex = type { half, half } |
| 2 | + |
| 3 | +$_Z32real_post_process_kernel_inplaceI14rocfft_complexIDF16_ELb1EEvmmmPT_mPKS2_ = comdat any |
| 4 | + |
| 5 | +; Function Attrs: convergent inlinehint mustprogress nounwind |
| 6 | +define weak_odr hidden void @_Z32real_post_process_kernel_inplaceI14rocfft_complexIDF16_ELb1EEvmmmPT_mPKS2_(i64 noundef %0, i64 noundef %1, i64 noundef %2, ptr noundef %3, i64 noundef %4, ptr noundef %5) #2 comdat { |
| 7 | + %7 = alloca i64, align 8, addrspace(5) |
| 8 | + %8 = alloca i64, align 8, addrspace(5) |
| 9 | + %9 = alloca i64, align 8, addrspace(5) |
| 10 | + %10 = alloca ptr, align 8, addrspace(5) |
| 11 | + %11 = alloca i64, align 8, addrspace(5) |
| 12 | + %12 = alloca ptr, align 8, addrspace(5) |
| 13 | + %13 = alloca %struct.rocfft_complex, align 2, addrspace(5) |
| 14 | + %14 = alloca %struct.rocfft_complex, align 2, addrspace(5) |
| 15 | + %15 = alloca %struct.rocfft_complex, align 2, addrspace(5) |
| 16 | + %16 = alloca double, align 8, addrspace(5) |
| 17 | + %17 = alloca %struct.rocfft_complex, align 2, addrspace(5) |
| 18 | + %18 = alloca %struct.rocfft_complex, align 2, addrspace(5) |
| 19 | + %19 = alloca double, align 8, addrspace(5) |
| 20 | + %20 = alloca %struct.rocfft_complex, align 2, addrspace(5) |
| 21 | + %21 = alloca %struct.rocfft_complex, align 2, addrspace(5) |
| 22 | + %22 = addrspacecast ptr addrspace(5) %7 to ptr |
| 23 | + %23 = addrspacecast ptr addrspace(5) %8 to ptr |
| 24 | + %24 = addrspacecast ptr addrspace(5) %9 to ptr |
| 25 | + %25 = addrspacecast ptr addrspace(5) %10 to ptr |
| 26 | + %26 = addrspacecast ptr addrspace(5) %11 to ptr |
| 27 | + %27 = addrspacecast ptr addrspace(5) %12 to ptr |
| 28 | + %28 = addrspacecast ptr addrspace(5) %13 to ptr |
| 29 | + %29 = addrspacecast ptr addrspace(5) %14 to ptr |
| 30 | + %30 = addrspacecast ptr addrspace(5) %15 to ptr |
| 31 | + %31 = addrspacecast ptr addrspace(5) %16 to ptr |
| 32 | + %32 = addrspacecast ptr addrspace(5) %17 to ptr |
| 33 | + %33 = addrspacecast ptr addrspace(5) %18 to ptr |
| 34 | + %34 = addrspacecast ptr addrspace(5) %19 to ptr |
| 35 | + %35 = addrspacecast ptr addrspace(5) %20 to ptr |
| 36 | + %36 = addrspacecast ptr addrspace(5) %21 to ptr |
| 37 | + store i64 %0, ptr %22, align 8, !tbaa !6 |
| 38 | + store i64 %1, ptr %23, align 8, !tbaa !6 |
| 39 | + store i64 %2, ptr %24, align 8, !tbaa !6 |
| 40 | + store ptr %3, ptr %25, align 8, !tbaa !10 |
| 41 | + store i64 %4, ptr %26, align 8, !tbaa !6 |
| 42 | + store ptr %5, ptr %27, align 8, !tbaa !10 |
| 43 | + %37 = load i64, ptr %22, align 8, !tbaa !6 |
| 44 | + %38 = load i64, ptr %24, align 8, !tbaa !6 |
| 45 | + br label %40 |
| 46 | + |
| 47 | +40: ; preds = %6 |
| 48 | + call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %13) #4 |
| 49 | + %41 = load ptr, ptr %25, align 8, !tbaa !10 |
| 50 | + %42 = load i64, ptr %26, align 8, !tbaa !6 |
| 51 | + %43 = load i64, ptr %22, align 8, !tbaa !6 |
| 52 | + %44 = add i64 %42, %43 |
| 53 | + %45 = getelementptr inbounds %struct.rocfft_complex, ptr %41, i64 %44 |
| 54 | + call void @llvm.memcpy.p0.p0.i64(ptr align 2 %28, ptr align 2 %45, i64 4, i1 false), !tbaa.struct !12 |
| 55 | + call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %14) #4 |
| 56 | + %46 = load ptr, ptr %25, align 8, !tbaa !10 |
| 57 | + %47 = load i64, ptr %26, align 8, !tbaa !6 |
| 58 | + %48 = load i64, ptr %23, align 8, !tbaa !6 |
| 59 | + %49 = add i64 %47, %48 |
| 60 | + %50 = getelementptr inbounds %struct.rocfft_complex, ptr %46, i64 %49 |
| 61 | + call void @llvm.memcpy.p0.p0.i64(ptr align 2 %29, ptr align 2 %50, i64 4, i1 false), !tbaa.struct !12 |
| 62 | + %51 = load i64, ptr %22, align 8, !tbaa !6 |
| 63 | + %52 = icmp eq i64 %51, 0 |
| 64 | + br i1 %52, label %53, label %102 |
| 65 | + |
| 66 | +53: ; preds = %40 |
| 67 | + %54 = getelementptr inbounds %struct.rocfft_complex, ptr %28, i32 0, i32 0 |
| 68 | + %55 = load half, ptr %54, align 2, !tbaa !15 |
| 69 | + %56 = getelementptr inbounds %struct.rocfft_complex, ptr %28, i32 0, i32 1 |
| 70 | + %57 = load half, ptr %56, align 2, !tbaa !17 |
| 71 | + %58 = fadd contract half %55, %57 |
| 72 | + %59 = load ptr, ptr %25, align 8, !tbaa !10 |
| 73 | + %60 = load i64, ptr %26, align 8, !tbaa !6 |
| 74 | + %61 = load i64, ptr %22, align 8, !tbaa !6 |
| 75 | + %62 = add i64 %60, %61 |
| 76 | + %63 = getelementptr inbounds %struct.rocfft_complex, ptr %59, i64 %62 |
| 77 | + %64 = getelementptr inbounds %struct.rocfft_complex, ptr %63, i32 0, i32 0 |
| 78 | + store half %58, ptr %64, align 2, !tbaa !15 |
| 79 | + %65 = load ptr, ptr %25, align 8, !tbaa !10 |
| 80 | + %66 = load i64, ptr %26, align 8, !tbaa !6 |
| 81 | + %67 = load i64, ptr %22, align 8, !tbaa !6 |
| 82 | + %68 = add i64 %66, %67 |
| 83 | + %69 = getelementptr inbounds %struct.rocfft_complex, ptr %65, i64 %68 |
| 84 | + %70 = getelementptr inbounds %struct.rocfft_complex, ptr %69, i32 0, i32 1 |
| 85 | + store half 0xH0000, ptr %70, align 2, !tbaa !17 |
| 86 | + %71 = getelementptr inbounds %struct.rocfft_complex, ptr %28, i32 0, i32 0 |
| 87 | + %72 = load half, ptr %71, align 2, !tbaa !15 |
| 88 | + %73 = getelementptr inbounds %struct.rocfft_complex, ptr %28, i32 0, i32 1 |
| 89 | + %74 = load half, ptr %73, align 2, !tbaa !17 |
| 90 | + %75 = fsub contract half %72, %74 |
| 91 | + %76 = load ptr, ptr %25, align 8, !tbaa !10 |
| 92 | + %77 = load i64, ptr %26, align 8, !tbaa !6 |
| 93 | + %78 = load i64, ptr %23, align 8, !tbaa !6 |
| 94 | + %79 = add i64 %77, %78 |
| 95 | + %80 = getelementptr inbounds %struct.rocfft_complex, ptr %76, i64 %79 |
| 96 | + %81 = getelementptr inbounds %struct.rocfft_complex, ptr %80, i32 0, i32 0 |
| 97 | + store half %75, ptr %81, align 2, !tbaa !15 |
| 98 | + %82 = load ptr, ptr %25, align 8, !tbaa !10 |
| 99 | + %83 = load i64, ptr %26, align 8, !tbaa !6 |
| 100 | + %84 = load i64, ptr %23, align 8, !tbaa !6 |
| 101 | + %85 = add i64 %83, %84 |
| 102 | + %86 = getelementptr inbounds %struct.rocfft_complex, ptr %82, i64 %85 |
| 103 | + %87 = getelementptr inbounds %struct.rocfft_complex, ptr %86, i32 0, i32 1 |
| 104 | + store half 0xH0000, ptr %87, align 2, !tbaa !17 |
| 105 | + %88 = load ptr, ptr %25, align 8, !tbaa !10 |
| 106 | + %89 = load i64, ptr %26, align 8, !tbaa !6 |
| 107 | + %90 = load i64, ptr %24, align 8, !tbaa !6 |
| 108 | + %91 = add i64 %89, %90 |
| 109 | + %92 = getelementptr inbounds %struct.rocfft_complex, ptr %88, i64 %91 |
| 110 | + %93 = getelementptr inbounds %struct.rocfft_complex, ptr %92, i32 0, i32 1 |
| 111 | + %94 = load half, ptr %93, align 2, !tbaa !17 |
| 112 | + %95 = fneg contract half %94 |
| 113 | + %96 = load ptr, ptr %25, align 8, !tbaa !10 |
| 114 | + %97 = load i64, ptr %26, align 8, !tbaa !6 |
| 115 | + %98 = load i64, ptr %24, align 8, !tbaa !6 |
| 116 | + %99 = add i64 %97, %98 |
| 117 | + %100 = getelementptr inbounds %struct.rocfft_complex, ptr %96, i64 %99 |
| 118 | + %101 = getelementptr inbounds %struct.rocfft_complex, ptr %100, i32 0, i32 1 |
| 119 | + store half %95, ptr %101, align 2, !tbaa !17 |
| 120 | + ret void |
| 121 | + |
| 122 | +102: ; preds = %40 |
| 123 | + call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %15) #4 |
| 124 | + call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %16) #4 |
| 125 | + store double 5.000000e-01, ptr %31, align 8, !tbaa !18 |
| 126 | + call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %17) #4 |
| 127 | + store i32 0, ptr %32, align 2 |
| 128 | + store i32 0, ptr %30, align 2 |
| 129 | + call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %17) #4 |
| 130 | + call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %16) #4 |
| 131 | + call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %18) #4 |
| 132 | + call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %19) #4 |
| 133 | + store double 5.000000e-01, ptr %34, align 8, !tbaa !18 |
| 134 | + call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %20) #4 |
| 135 | + store i32 0, ptr %35, align 2 |
| 136 | + store i32 0, ptr %33, align 2 |
| 137 | + call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %20) #4 |
| 138 | + call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %19) #4 |
| 139 | + call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %21) #4 |
| 140 | + %107 = load ptr, ptr %27, align 8, !tbaa !10 |
| 141 | + %108 = load i64, ptr %22, align 8, !tbaa !6 |
| 142 | + %109 = getelementptr inbounds %struct.rocfft_complex, ptr %107, i64 %108 |
| 143 | + call void @llvm.memcpy.p0.p0.i64(ptr align 2 %36, ptr align 2 %109, i64 4, i1 false), !tbaa.struct !12 |
| 144 | + %110 = getelementptr inbounds %struct.rocfft_complex, ptr %30, i32 0, i32 0 |
| 145 | + %111 = load half, ptr %110, align 2, !tbaa !15 |
| 146 | + %112 = getelementptr inbounds %struct.rocfft_complex, ptr %33, i32 0, i32 0 |
| 147 | + %113 = load half, ptr %112, align 2, !tbaa !15 |
| 148 | + %114 = getelementptr inbounds %struct.rocfft_complex, ptr %36, i32 0, i32 1 |
| 149 | + %115 = load half, ptr %114, align 2, !tbaa !17 |
| 150 | + %116 = fmul contract half %113, %115 |
| 151 | + %117 = fadd contract half %111, %116 |
| 152 | + %118 = getelementptr inbounds %struct.rocfft_complex, ptr %30, i32 0, i32 1 |
| 153 | + %119 = load half, ptr %118, align 2, !tbaa !17 |
| 154 | + %120 = getelementptr inbounds %struct.rocfft_complex, ptr %36, i32 0, i32 0 |
| 155 | + %121 = load half, ptr %120, align 2, !tbaa !15 |
| 156 | + %122 = fmul contract half %119, %121 |
| 157 | + %123 = fadd contract half %117, %122 |
| 158 | + %124 = load ptr, ptr %25, align 8, !tbaa !10 |
| 159 | + %125 = load i64, ptr %26, align 8, !tbaa !6 |
| 160 | + %126 = load i64, ptr %22, align 8, !tbaa !6 |
| 161 | + %127 = add i64 %125, %126 |
| 162 | + %128 = getelementptr inbounds %struct.rocfft_complex, ptr %124, i64 %127 |
| 163 | + %129 = getelementptr inbounds %struct.rocfft_complex, ptr %128, i32 0, i32 0 |
| 164 | + store half %123, ptr %129, align 2, !tbaa !15 |
| 165 | + %130 = getelementptr inbounds %struct.rocfft_complex, ptr %33, i32 0, i32 1 |
| 166 | + %131 = load half, ptr %130, align 2, !tbaa !17 |
| 167 | + %132 = getelementptr inbounds %struct.rocfft_complex, ptr %30, i32 0, i32 1 |
| 168 | + %133 = load half, ptr %132, align 2, !tbaa !17 |
| 169 | + %134 = getelementptr inbounds %struct.rocfft_complex, ptr %36, i32 0, i32 1 |
| 170 | + %135 = load half, ptr %134, align 2, !tbaa !17 |
| 171 | + %136 = fmul contract half %133, %135 |
| 172 | + %137 = fadd contract half %131, %136 |
| 173 | + %138 = getelementptr inbounds %struct.rocfft_complex, ptr %33, i32 0, i32 0 |
| 174 | + %139 = load half, ptr %138, align 2, !tbaa !15 |
| 175 | + %140 = getelementptr inbounds %struct.rocfft_complex, ptr %36, i32 0, i32 0 |
| 176 | + %141 = load half, ptr %140, align 2, !tbaa !15 |
| 177 | + %142 = fmul contract half %139, %141 |
| 178 | + %143 = fsub contract half %137, %142 |
| 179 | + %144 = load ptr, ptr %25, align 8, !tbaa !10 |
| 180 | + %145 = load i64, ptr %26, align 8, !tbaa !6 |
| 181 | + %146 = load i64, ptr %22, align 8, !tbaa !6 |
| 182 | + %147 = add i64 %145, %146 |
| 183 | + %148 = getelementptr inbounds %struct.rocfft_complex, ptr %144, i64 %147 |
| 184 | + %149 = getelementptr inbounds %struct.rocfft_complex, ptr %148, i32 0, i32 1 |
| 185 | + store half %143, ptr %149, align 2, !tbaa !17 |
| 186 | + %150 = getelementptr inbounds %struct.rocfft_complex, ptr %30, i32 0, i32 0 |
| 187 | + %151 = load half, ptr %150, align 2, !tbaa !15 |
| 188 | + %152 = getelementptr inbounds %struct.rocfft_complex, ptr %33, i32 0, i32 0 |
| 189 | + %153 = load half, ptr %152, align 2, !tbaa !15 |
| 190 | + %154 = getelementptr inbounds %struct.rocfft_complex, ptr %36, i32 0, i32 1 |
| 191 | + %155 = load half, ptr %154, align 2, !tbaa !17 |
| 192 | + %156 = fmul contract half %153, %155 |
| 193 | + %157 = fsub contract half %151, %156 |
| 194 | + %158 = getelementptr inbounds %struct.rocfft_complex, ptr %30, i32 0, i32 1 |
| 195 | + %159 = load half, ptr %158, align 2, !tbaa !17 |
| 196 | + %160 = getelementptr inbounds %struct.rocfft_complex, ptr %36, i32 0, i32 0 |
| 197 | + %161 = load half, ptr %160, align 2, !tbaa !15 |
| 198 | + %162 = fmul contract half %159, %161 |
| 199 | + %163 = fsub contract half %157, %162 |
| 200 | + %164 = load ptr, ptr %25, align 8, !tbaa !10 |
| 201 | + %165 = load i64, ptr %26, align 8, !tbaa !6 |
| 202 | + %166 = load i64, ptr %23, align 8, !tbaa !6 |
| 203 | + %167 = add i64 %165, %166 |
| 204 | + %168 = getelementptr inbounds %struct.rocfft_complex, ptr %164, i64 %167 |
| 205 | + %169 = getelementptr inbounds %struct.rocfft_complex, ptr %168, i32 0, i32 0 |
| 206 | + store half %163, ptr %169, align 2, !tbaa !15 |
| 207 | + %170 = getelementptr inbounds %struct.rocfft_complex, ptr %33, i32 0, i32 1 |
| 208 | + %171 = load half, ptr %170, align 2, !tbaa !17 |
| 209 | + %172 = fneg contract half %171 |
| 210 | + %173 = getelementptr inbounds %struct.rocfft_complex, ptr %30, i32 0, i32 1 |
| 211 | + %174 = load half, ptr %173, align 2, !tbaa !17 |
| 212 | + %175 = getelementptr inbounds %struct.rocfft_complex, ptr %36, i32 0, i32 1 |
| 213 | + %176 = load half, ptr %175, align 2, !tbaa !17 |
| 214 | + %177 = fmul contract half %174, %176 |
| 215 | + %178 = fadd contract half %172, %177 |
| 216 | + %179 = getelementptr inbounds %struct.rocfft_complex, ptr %33, i32 0, i32 0 |
| 217 | + %180 = load half, ptr %179, align 2, !tbaa !15 |
| 218 | + %181 = getelementptr inbounds %struct.rocfft_complex, ptr %36, i32 0, i32 0 |
| 219 | + %182 = load half, ptr %181, align 2, !tbaa !15 |
| 220 | + %183 = fmul contract half %180, %182 |
| 221 | + %184 = fsub contract half %178, %183 |
| 222 | + %185 = load ptr, ptr %25, align 8, !tbaa !10 |
| 223 | + %186 = load i64, ptr %26, align 8, !tbaa !6 |
| 224 | + %187 = load i64, ptr %23, align 8, !tbaa !6 |
| 225 | + %188 = add i64 %186, %187 |
| 226 | + %189 = getelementptr inbounds %struct.rocfft_complex, ptr %185, i64 %188 |
| 227 | + %190 = getelementptr inbounds %struct.rocfft_complex, ptr %189, i32 0, i32 1 |
| 228 | + store half %184, ptr %190, align 2, !tbaa !17 |
| 229 | + call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %21) #4 |
| 230 | + call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %18) #4 |
| 231 | + call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %15) #4 |
| 232 | + ret void |
| 233 | +} |
| 234 | + |
| 235 | +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } |
| 236 | +attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } |
| 237 | +attributes #2 = { convergent inlinehint mustprogress nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+cumode,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+sramecc,+wavefrontsize64,-xnack" } |
| 238 | +attributes #3 = { convergent mustprogress nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+cumode,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+sramecc,+wavefrontsize64,-xnack" } |
| 239 | +attributes #4 = { nounwind } |
| 240 | +attributes #5 = { convergent nounwind } |
| 241 | + |
| 242 | +!llvm.module.flags = !{!0, !1, !2, !3} |
| 243 | +!llvm.ident = !{!4, !4, !4, !4, !4, !4, !4, !4, !4, !4, !4} |
| 244 | +!opencl.ocl.version = !{!5, !5, !5, !5, !5, !5, !5, !5, !5, !5} |
| 245 | + |
| 246 | +!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} |
| 247 | +!1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} |
| 248 | +!2 = !{i32 1, !"wchar_size", i32 4} |
| 249 | +!3 = !{i32 8, !"PIC Level", i32 2} |
| 250 | +!4 = !{! "clang version 19.0.0git (ssh://[email protected]:29418/lightning/ec/llvm-project a2421f3d00e8e99003ddde4ce19939737b57d043)"} |
| 251 | +!5 = !{i32 2, i32 0} |
| 252 | +!6 = !{!7, !7, i64 0} |
| 253 | +!7 = !{!"long", !8, i64 0} |
| 254 | +!8 = !{!"omnipotent char", !9, i64 0} |
| 255 | +!9 = !{!"Simple C++ TBAA"} |
| 256 | +!10 = !{!11, !11, i64 0} |
| 257 | +!11 = !{!"any pointer", !8, i64 0} |
| 258 | +!12 = !{i64 0, i64 2, !13, i64 2, i64 2, !13} |
| 259 | +!13 = !{!14, !14, i64 0} |
| 260 | +!14 = !{!"_Float16", !8, i64 0} |
| 261 | +!15 = !{!16, !14, i64 0} |
| 262 | +!16 = !{!"_ZTS14rocfft_complexIDF16_E", !14, i64 0, !14, i64 2} |
| 263 | +!17 = !{!16, !14, i64 2} |
| 264 | +!18 = !{!19, !19, i64 0} |
| 265 | +!19 = !{!"double", !8, i64 0} |
0 commit comments