Skip to content

Commit db0dc92

Browse files
committed
Add: Metal draft
1 parent e1ac216 commit db0dc92

File tree

5 files changed

+422
-77
lines changed

5 files changed

+422
-77
lines changed

.vscode/settings.json

Lines changed: 79 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,112 +1,115 @@
11
{
2+
"cSpell.words": [
3+
"ashvardanian",
4+
"blas",
5+
"CCCL",
6+
"constexpr",
7+
"cublas",
8+
"CUDA",
9+
"Kahan",
10+
"METALLIB",
11+
"openmp",
12+
"shfl",
13+
"SPIR",
14+
"STREQUAL",
15+
"threadgroup",
16+
"Vardanian",
17+
"wmma"
18+
],
219
"files.associations": {
20+
"*.cl": "cpp",
21+
"*.ipp": "cpp",
22+
"*.metal": "cpp",
23+
"*.tcc": "cpp",
24+
"__debug": "cpp",
25+
"__nullptr": "cpp",
26+
"algorithm": "cpp",
27+
"any": "cpp",
328
"array": "cpp",
429
"atomic": "cpp",
30+
"barrier": "cpp",
531
"bit": "cpp",
6-
"*.tcc": "cpp",
7-
"*.cl": "cpp",
32+
"bitset": "cpp",
833
"cctype": "cpp",
34+
"cfenv": "cpp",
35+
"charconv": "cpp",
36+
"chrono": "cpp",
37+
"cinttypes": "cpp",
938
"clocale": "cpp",
1039
"cmath": "cpp",
40+
"codecvt": "cpp",
1141
"compare": "cpp",
42+
"complex": "cpp",
1243
"concepts": "cpp",
44+
"condition_variable": "cpp",
45+
"csetjmp": "cpp",
46+
"csignal": "cpp",
1347
"cstdarg": "cpp",
1448
"cstddef": "cpp",
1549
"cstdint": "cpp",
1650
"cstdio": "cpp",
1751
"cstdlib": "cpp",
52+
"cstring": "cpp",
53+
"ctime": "cpp",
54+
"cuchar": "cpp",
1855
"cwchar": "cpp",
1956
"cwctype": "cpp",
20-
"map": "cpp",
21-
"unordered_map": "cpp",
22-
"vector": "cpp",
57+
"deque": "cpp",
2358
"exception": "cpp",
24-
"algorithm": "cpp",
25-
"functional": "cpp",
26-
"iterator": "cpp",
27-
"memory": "cpp",
28-
"memory_resource": "cpp",
29-
"numeric": "cpp",
30-
"optional": "cpp",
31-
"random": "cpp",
32-
"string": "cpp",
33-
"string_view": "cpp",
34-
"system_error": "cpp",
35-
"tuple": "cpp",
36-
"type_traits": "cpp",
37-
"utility": "cpp",
59+
"expected": "cpp",
60+
"forward_list": "cpp",
3861
"fstream": "cpp",
62+
"functional": "cpp",
63+
"future": "cpp",
64+
"hash_map": "cpp",
65+
"hash_set": "cpp",
3966
"initializer_list": "cpp",
67+
"iomanip": "cpp",
4068
"iosfwd": "cpp",
4169
"iostream": "cpp",
4270
"istream": "cpp",
71+
"iterator": "cpp",
72+
"latch": "cpp",
4373
"limits": "cpp",
74+
"list": "cpp",
75+
"locale": "cpp",
76+
"map": "cpp",
77+
"memory": "cpp",
78+
"memory_resource": "cpp",
79+
"mutex": "cpp",
4480
"new": "cpp",
4581
"numbers": "cpp",
82+
"numeric": "cpp",
83+
"optional": "cpp",
4684
"ostream": "cpp",
85+
"propagate_const": "cpp",
86+
"random": "cpp",
4787
"ranges": "cpp",
48-
"sstream": "cpp",
49-
"stdexcept": "cpp",
50-
"streambuf": "cpp",
51-
"cinttypes": "cpp",
52-
"typeinfo": "cpp",
53-
"deque": "cpp",
88+
"ratio": "cpp",
5489
"regex": "cpp",
55-
"forward_list": "cpp",
56-
"list": "cpp",
57-
"valarray": "cpp",
58-
"cstring": "cpp",
59-
"ctime": "cpp",
60-
"any": "cpp",
61-
"bitset": "cpp",
62-
"chrono": "cpp",
63-
"codecvt": "cpp",
64-
"complex": "cpp",
65-
"condition_variable": "cpp",
90+
"scoped_allocator": "cpp",
91+
"semaphore": "cpp",
6692
"set": "cpp",
67-
"unordered_set": "cpp",
68-
"ratio": "cpp",
69-
"future": "cpp",
70-
"iomanip": "cpp",
71-
"mutex": "cpp",
7293
"shared_mutex": "cpp",
94+
"span": "cpp",
95+
"sstream": "cpp",
96+
"stdexcept": "cpp",
7397
"stop_token": "cpp",
98+
"streambuf": "cpp",
99+
"string": "cpp",
100+
"string_view": "cpp",
101+
"strstream": "cpp",
102+
"system_error": "cpp",
74103
"thread": "cpp",
104+
"tuple": "cpp",
105+
"type_traits": "cpp",
75106
"typeindex": "cpp",
107+
"typeinfo": "cpp",
108+
"unordered_map": "cpp",
109+
"unordered_set": "cpp",
110+
"utility": "cpp",
111+
"valarray": "cpp",
76112
"variant": "cpp",
77-
"csetjmp": "cpp",
78-
"csignal": "cpp",
79-
"strstream": "cpp",
80-
"scoped_allocator": "cpp",
81-
"cfenv": "cpp",
82-
"hash_map": "cpp",
83-
"*.ipp": "cpp",
84-
"__debug": "cpp",
85-
"barrier": "cpp",
86-
"charconv": "cpp",
87-
"propagate_const": "cpp",
88-
"semaphore": "cpp",
89-
"span": "cpp",
90-
"expected": "cpp",
91-
"locale": "cpp",
92-
"__nullptr": "cpp",
93-
"cuchar": "cpp",
94-
"hash_set": "cpp",
95-
"latch": "cpp"
96-
},
97-
"cSpell.words": [
98-
"ashvardanian",
99-
"blas",
100-
"CCCL",
101-
"constexpr",
102-
"cublas",
103-
"CUDA",
104-
"Kahan",
105-
"openmp",
106-
"shfl",
107-
"SPIR",
108-
"STREQUAL",
109-
"Vardanian",
110-
"wmma"
111-
]
113+
"vector": "cpp"
114+
}
112115
}

CMakeLists.txt

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,50 @@ if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" OR CMAKE_CUDA_COMPILER_ID STREQUAL "
100100
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda")
101101
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_86,code=sm_86")
102102

103+
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND 0)
104+
set(METAL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/reduce_metal.msl)
105+
set(METALLIB_OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/reduce_metal.metallib)
106+
107+
add_custom_command(
108+
OUTPUT ${METALLIB_OUTPUT}
109+
COMMAND xcrun -sdk macosx metal -c ${METAL_SRC} -o reduce_metal.air
110+
COMMAND xcrun -sdk macosx metallib reduce_metal.air -o ${METALLIB_OUTPUT}
111+
DEPENDS ${METAL_SRC}
112+
COMMENT "Compiling Metal shader: ${METAL_SRC} -> ${METALLIB_OUTPUT}"
113+
VERBATIM
114+
)
115+
116+
# Create a pseudo target to build the metallib
117+
add_custom_target(MetalLibBuild ALL
118+
DEPENDS ${METALLIB_OUTPUT}
119+
)
120+
121+
enable_language(OBJCXX)
122+
set_property(SOURCE reduce_metal.hpp PROPERTY LANGUAGE OBJCXX)
123+
set_property(SOURCE reduce_bench.cpp PROPERTY LANGUAGE OBJCXX)
124+
target_link_libraries(reduce_bench
125+
PRIVATE
126+
"-framework Metal"
127+
"-framework Foundation"
128+
)
129+
set_source_files_properties(
130+
reduce_bench.cpp
131+
PROPERTIES
132+
COMPILE_FLAGS "-x objective-c++ -fobjc-arc"
133+
)
134+
135+
# Make sure reduce_bench depends on MetalLibBuild so the .metallib is built first
136+
add_dependencies(reduce_bench MetalLibBuild)
137+
138+
# Copy the `metallib` to the same folder as reduce_bench
139+
add_custom_command(
140+
TARGET reduce_bench
141+
POST_BUILD
142+
COMMAND ${CMAKE_COMMAND} -E copy_if_different
143+
${METALLIB_OUTPUT}
144+
$<TARGET_FILE_DIR:reduce_bench>
145+
)
146+
103147
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
104148
message("-- Detected Clang Compiler")
105149
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")

reduce_bench.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
* @brief Benchmarking parallel reductions
55
* @author Ash Vardanian
66
*/
7-
#include <cstdlib> // Accessing environment variables
7+
#include <cstdlib> // `std::getenv`
88
#include <new> // `std::launder`
99

1010
#include <benchmark/benchmark.h>
@@ -20,6 +20,10 @@
2020
#include "reduce_cuda.cuh"
2121
#endif
2222

23+
#if defined(__APPLE__) && 0 // TODO: Fix compilation
24+
#include "reduce_metal.h"
25+
#endif
26+
2327
using namespace ashvardanian::reduce;
2428

2529
namespace bm = benchmark;
@@ -164,6 +168,10 @@ int main(int argc, char **argv) {
164168
}
165169
#endif
166170

171+
#if defined(__APPLE__) && 0
172+
bm::RegisterBenchmark("metal<f32>", &make<metal_t>)->MinTime(10)->UseRealTime();
173+
#endif
174+
167175
bm::Initialize(&argc, argv);
168176
if (bm::ReportUnrecognizedArguments(argc, argv))
169177
return 1;

0 commit comments

Comments
 (0)