@@ -19,161 +19,8 @@ function split_kwargs_runtime(kwargs, wanted::Vector{Symbol})
1919 return extracted, remaining
2020end
2121
22- """
23- code_agx([io], f, types, cap::VersionNumber)
24-
25- Prints the AGX code generated for the method matching the given generic function and type
26- signature to `io` which defaults to `stdout`.
27-
28- See also: [`@device_code_agx`](@ref)
29- """
30- function code_agx (io:: IO , @nospecialize (func:: Base.Callable ), @nospecialize (types),
31- kernel:: Bool = true ; kwargs... )
32- compiler_kwargs, kwargs = split_kwargs_runtime (kwargs, COMPILER_KWARGS)
33- source = methodinstance (typeof (func), Base. to_tuple_type (types))
34- config = compiler_config (device (); kernel, compiler_kwargs... )
35- job = CompilerJob (source, config)
36- code_agx (io, job)
37- end
38-
39- @autoreleasepool function code_agx (io:: IO , job:: MetalCompilerJob )
40- if ! job. config. kernel
41- error (" Can only generate AGX code for kernel functions" )
42- end
43-
44- # compile the kernel
45- compiled = compile (job)
46- pipeline, fun = link (job, compiled; return_function= true )
47- # XXX : can we re-use this pipeline?
48-
49- # register it with a pipeline descriptor
50- pipeline_desc = MTLComputePipelineDescriptor ()
51- pipeline_desc. computeFunction = fun
52-
53- # create a binary archive
54- bin_desc = MTLBinaryArchiveDescriptor ()
55- bin = MTLBinaryArchive (device (), bin_desc)
56- add_functions! (bin, pipeline_desc)
57-
58- mktempdir () do dir
59- # serialize the archive to a file
60- binary = joinpath (dir, " kernel.macho" )
61- write (binary, bin)
62-
63- # disassemble the main function
64- first = true
65- i = 0
66- extract_gpu_code (binary) do name, code
67- # skip all-zero functions
68- all (code .== 0 ) && return
69-
70- i += 1
71- file = joinpath (dir, " function$(i) .bin" )
72- write (file, code)
73-
74- # disassemble the function
75- first || println (io)
76- println (io, " $name :" )
77- print (io, disassemble (file))
78-
79- first = false
80- end
81- end
82- end
83-
84- @enum GPUMachineType:: UInt32 begin
85- AppleGPU = 0x1000013
86- AMDGPU = 0x1000014
87- IntelGPU = 0x1000015
88- AIR64 = 0x1000017
89- end
90-
91- function extract_gpu_code (f, binary)
92- fat_handle = readmeta (open (binary))
93- fat_handle isa FatMachOHandle || error (" Expected a universal binary, got a $(typeof (fat_handle)) " )
94-
95- # the universal binary contains several architectures; extract the GPU one
96- arch = findfirst (fat_handle) do arch
97- arch. header isa MachO. MachOHeader64 && GPUMachineType (arch. header. cputype) == AppleGPU
98- end
99- arch === nothing && error (" Could not find GPU architecture in universal binary" )
100-
101- # the GPU binary contains several sections...
102- # # ... extract the compute section, which is another Mach-O binary
103- compute_section = findfirst (Sections (fat_handle[arch]), " __TEXT,__compute" )
104- compute_section === nothing && error (" Could not find __compute section in GPU binary" )
105- compute_binary = read (compute_section)
106- native_handle = only (readmeta (IOBuffer (compute_binary)))
107- # # ... extract the metallib section, which is a Metal library
108- metallib_section = findfirst (Sections (fat_handle[arch]), " __TEXT,__metallib" )
109- metallib_section === nothing && error (" Could not find __metallib section in GPU binary" )
110- metallib_binary = read (metallib_section)
111- metallib = read (IOBuffer (metallib_binary), MetalLib)
112- # TODO : use this to implement a do-block device_code_air like CUDA.jl?
113-
114- # identify the kernel name
115- kernel_name = " unknown_kernel"
116- # XXX : does it happen that these metallibs contain multiple functions?
117- if length (metallib. functions) == 1
118- kernel_name = metallib. functions[1 ]. name
119- end
120- # XXX : we used to be able to identify the kernel by looking at symbols in
121- # the fat binary, one of which aliased with the start of the compute
122- # section. these symbols have disappeared on macOS 15.
123- # compute_symbol = nothing
124- # for symbol in Symbols(fat_handle[arch])
125- # symbol_value(symbol) == section_offset(compute_section) || continue
126- # endswith(symbol_name(symbol), "_begin") || continue
127- # compute_symbol = symbol
128- # end
129- # compute_symbol === nothing && error("Could not find symbol for __compute section")
130- # kernel_name = symbol_name(compute_symbol)[1:end-6]
131-
132- # within the native GPU binary, isolate the section containing code
133- section = findfirst (Sections (native_handle), " __TEXT,__text" )
134- isnothing (section) && error (" Could not find __TEXT,__text section" )
135-
136- # get all symbols, and sort them by address
137- symbols = sort (collect (Symbols (native_handle)), by= symbol_value)
138-
139- # extract relevant functions
140- code = read (section)
141- function extract_function (fn)
142- # find the symbol
143- symbol = findfirst (isequal (fn) , symbols)
144- symbol === nothing && return nothing
145- offset = symbol_value (symbols[symbol])
146-
147- # extract the function
148- size = if symbol < length (symbols)
149- # up until the next symbol
150- symbol_value (symbols[symbol + 1 ])
151- else
152- # up until the end of the section
153- section_size (section)
154- end - offset
155- return code[offset + 1 : offset + size]
156- end
157- for sym in symbols
158- f (" $kernel_name .$(symbol_name (sym)) " , extract_function (sym))
159- end
160- return
161- end
162-
163- function disassemble (path)
164- io = IOBuffer ()
165- disassembler = joinpath (only (readdir (artifact " applegpu" ; join= true )), " disassemble.py" )
166- run (pipeline (` $(python ()) $disassembler $path ` , stdout = io))
167- return String (take! (io))
168- end
169-
170- code_agx (@nospecialize (func:: Base.Callable ), @nospecialize (types); kwargs... ) =
171- code_agx (stdout , func, types; kwargs... )
172-
173- const code_native = code_agx
174-
17522# forward the rest to GPUCompiler with an appropriate CompilerJob
176- for method in (:code_typed , :code_warntype , :code_llvm )
23+ for method in (:code_typed , :code_warntype , :code_llvm , :code_native )
17724 # only code_typed doesn't take a io argument
17825 args = method === :code_typed ? (:job ,) : (:io , :job )
17926
@@ -191,37 +38,19 @@ for method in (:code_typed, :code_warntype, :code_llvm)
19138 end
19239end
19340
194-
19541#
19642# @device_code_* functions
19743#
19844
19945export @device_code_lowered , @device_code_typed , @device_code_warntype ,
200- @device_code_llvm , @device_code_native , @device_code_agx , @device_code
201-
202- """
203- @device_code_agx [io::IO=stdout, ...] ex
204-
205- Evaluates the expression `ex` and prints the result of [`Metal.code_agx`](@ref) to
206- `io` for every compiled Metal kernel. For other supported keywords, see
207- [`Metal.code_agx`](@ref).
208- """
209- macro device_code_agx (ex... )
210- function hook (job:: MetalCompilerJob ; io:: IO = stdout , kwargs... )
211- println (io, " ; $job " )
212- println (io)
213- code_agx (io, job; kwargs... )
214- end
215- GPUCompiler. emit_hooked_compilation (hook, ex... )
216- end
217-
218- const var"@device_code_native" = var"@device_code_agx"
46+ @device_code_llvm , @device_code_metal , @device_code
21947
22048# forward to GPUCompiler
22149@eval $ (Symbol (" @device_code_lowered" )) = $ (getfield (GPUCompiler, Symbol (" @device_code_lowered" )))
22250@eval $ (Symbol (" @device_code_typed" )) = $ (getfield (GPUCompiler, Symbol (" @device_code_typed" )))
22351@eval $ (Symbol (" @device_code_warntype" )) = $ (getfield (GPUCompiler, Symbol (" @device_code_warntype" )))
22452@eval $ (Symbol (" @device_code_llvm" )) = $ (getfield (GPUCompiler, Symbol (" @device_code_llvm" )))
53+ @eval $ (Symbol (" @device_code_metal" )) = $ (getfield (GPUCompiler, Symbol (" @device_code_native" )))
22554@eval $ (Symbol (" @device_code" )) = $ (getfield (GPUCompiler, Symbol (" @device_code" )))
22655
22756
0 commit comments