diff --git a/sycl-jit/jit-compiler/CMakeLists.txt b/sycl-jit/jit-compiler/CMakeLists.txt index d0f63e27cda63..9deae68e1f969 100644 --- a/sycl-jit/jit-compiler/CMakeLists.txt +++ b/sycl-jit/jit-compiler/CMakeLists.txt @@ -1,4 +1,6 @@ +set(SYCL_JIT_RESOURCE_BLACKLIST "" CACHE FILEPATH "Path to a blacklist file for sycl-jit resources.") +set(SYCL_JIT_RESOURCE_BLACKLIST ${CMAKE_CURRENT_SOURCE_DIR}/utils/blacklist.txt) set(SYCL_JIT_RESOURCE_CPP "${CMAKE_CURRENT_BINARY_DIR}/resource.cpp") set(SYCL_JIT_RESOURCE_OBJ "${CMAKE_CURRENT_BINARY_DIR}/resource.cpp.o") @@ -10,8 +12,8 @@ set(SYCL_JIT_VIRTUAL_TOOLCHAIN_ROOT "/sycl-jit-toolchain/") endif() set(SYCL_JIT_RESOURCE_DEPS - sycl-headers # include/sycl - clang # lib/clang/N/include + sycl-headers # include/sycl + opencl-resource-headers # lib/clang/N/include ${CMAKE_CURRENT_SOURCE_DIR}/utils/generate.py) if ("libclc" IN_LIST LLVM_ENABLE_PROJECTS) @@ -23,11 +25,32 @@ if ("libdevice" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND SYCL_JIT_RESOURCE_DEPS libsycldevice) # lib/*.bc endif() +set(GENERATE_PY ${CMAKE_CURRENT_SOURCE_DIR}/utils/generate.py) + +set(GENERATE_PY_BASE_COMMAND ${Python3_EXECUTABLE} ${GENERATE_PY} + --toolchain-dir ${CMAKE_BINARY_DIR} + --output ${SYCL_JIT_RESOURCE_CPP} + --prefix ${SYCL_JIT_VIRTUAL_TOOLCHAIN_ROOT} +) +set(GENERATE_PY_DEPENDS ${SYCL_JIT_RESOURCE_DEPS}) + + +list(APPEND GENERATE_PY_BASE_COMMAND --manifest-output ${CMAKE_CURRENT_BINARY_DIR}/capture_manifest.txt) + + +if(EXISTS "${SYCL_JIT_RESOURCE_BLACKLIST}") + message(STATUS "sycl-jit: Using resource blacklist file: ${SYCL_JIT_RESOURCE_BLACKLIST}") + list(APPEND GENERATE_PY_BASE_COMMAND --blacklist ${SYCL_JIT_RESOURCE_BLACKLIST}) + # Make the build depend on the blacklist file, so it reruns if the file changes + list(APPEND GENERATE_PY_DEPENDS ${SYCL_JIT_RESOURCE_BLACKLIST}) +endif() + add_custom_command( - OUTPUT ${SYCL_JIT_RESOURCE_CPP} - COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/utils/generate.py --toolchain-dir ${CMAKE_BINARY_DIR} --output ${SYCL_JIT_RESOURCE_CPP} --prefix ${SYCL_JIT_VIRTUAL_TOOLCHAIN_ROOT} - DEPENDS - ${SYCL_JIT_RESOURCE_DEPS} + OUTPUT ${SYCL_JIT_RESOURCE_CPP} + COMMAND ${GENERATE_PY_BASE_COMMAND} + DEPENDS ${GENERATE_PY_DEPENDS} + COMMENT "Generating resource.cpp..." + VERBATIM ) # We use C23/C++26's `#embed` to implement this resource creation, and "current" diff --git a/sycl-jit/jit-compiler/utils/blacklist.txt b/sycl-jit/jit-compiler/utils/blacklist.txt new file mode 100644 index 0000000000000..0901a64af9800 --- /dev/null +++ b/sycl-jit/jit-compiler/utils/blacklist.txt @@ -0,0 +1,12 @@ +# paths in this file will NOT be bundled into sycl-jit +# use it to keep unneeded headers out of resources.cpp +# and to avoid IP issues. +lib/libsycl-msan-cpu.bc +lib/libsycl-asan-pvc.bc +lib/libsycl-msan.bc +lib/libsycl-asan-cpu.bc +lib/libsycl-msan-pvc.bc +lib/libsycl-asan-dg2.bc +lib/libsycl-tsan.bc +lib/libsycl-tsan-pvc.bc +lib/libsycl-tsan-cpu.bc \ No newline at end of file diff --git a/sycl-jit/jit-compiler/utils/generate.py b/sycl-jit/jit-compiler/utils/generate.py index 7fca23e869716..c172f65bf6533 100644 --- a/sycl-jit/jit-compiler/utils/generate.py +++ b/sycl-jit/jit-compiler/utils/generate.py @@ -1,66 +1,150 @@ import os import argparse +import sys +import fnmatch import glob def main(): parser = argparse.ArgumentParser( - description="Generate SYCL Headers Resource C++ file" + description="Generate SYCL Headers Resource C++ file." + ) + parser.add_argument( + "-o", "--output", type=str, required=True, help="Output C++ file" ) - parser.add_argument("-o", "--output", type=str, required=True, help="Output file") parser.add_argument( "-i", "--toolchain-dir", type=str, required=True, - help="Path to toolchain root directory", + help="Path to toolchain root directory.", + ) + parser.add_argument( + "--prefix", type=str, required=True, help="Prefix for virtual file locations" + ) + parser.add_argument( + "-m", + "--manifest-input", + type=str, + help="Build from this whitelist manifest (read-only).", ) parser.add_argument( - "--prefix", type=str, required=True, help="Prefix for file locations" + "--manifest-output", + type=str, + help="Glob for files and write them to this capture manifest.", + ) + parser.add_argument( + "--blacklist", + type=str, + help="Path to a file containing glob patterns of resources to exclude.", ) + args = parser.parse_args() - # abspath also strips trailing "/" + if args.manifest_input and args.manifest_output: + print( + "Error: --manifest-input and --manifest-output are mutually exclusive.", + file=sys.stderr, + ) + sys.exit(1) + + blacklist_patterns = set() + if args.blacklist: + print(f"Loading blacklist from: {args.blacklist}") + with open(args.blacklist, "r") as f: + for line in f: + pattern = line.strip() + if pattern and not pattern.startswith("#"): + blacklist_patterns.add(pattern) + toolchain_dir = os.path.abspath(args.toolchain_dir) - with open(args.output, "w") as out: + manifest_to_write = ( + open(args.manifest_output, "w") + if args.manifest_output + else open(os.devnull, "w") + ) + + with manifest_to_write as manifest_out, open(args.output, "w") as out: + if args.manifest_output: + preamble = f"""# This manifest was auto-geneerated by the sycl-jit build process + # It contains the list of all candidate resource files found when globbing. + # + # If any of these files should NOT be included in the final library + # (e.g. for IP reasons), add their relative path to the blacklist file at: + # {args.blacklist} + """ + manifest_out.write(preamble + "\n") + out.write( """ #include - namespace jit_compiler::resource { const resource_file ToolchainFiles[] = {""" ) - def process_file(file_path): + def generate_cpp_for_file(absolute_path): + relative_path = os.path.relpath(absolute_path, toolchain_dir) + portable_relative_path = relative_path.replace(os.sep, "/") + + for pattern in blacklist_patterns: + # Compare the pattern against the portable relative path + if fnmatch.fnmatch(portable_relative_path, pattern): + print(f" -> Skipping blacklisted file: {portable_relative_path}") + return None + out.write( f""" -{{ - {{"{args.prefix}{os.path.relpath(file_path, toolchain_dir).replace(os.sep, "/")}"}} , - []() {{ - static const char data[] = {{ - #embed "{file_path}" if_empty(0) - , 0}}; - return resource_string_view{{data}}; - }}() -}},""" + {{ + {{"{args.prefix}{portable_relative_path}"}} , + []() {{ + static const char data[] = {{ + #embed "{absolute_path}" if_empty(0) + , 0}}; + return resource_string_view{{data}}; + }}() + }},""" + ) + return portable_relative_path + + if args.manifest_input: + print( + f"Reading resource list from whitelist manifest: {args.manifest_input}" ) + with open(args.manifest_input, "r") as manifest_file: + for line in manifest_file: + relative_path = line.strip() + if relative_path: + absolute_path = os.path.join(toolchain_dir, relative_path) + generate_cpp_for_file(absolute_path) + else: + if args.manifest_output: + print( + f"Globbing for resources and writing capture manifest to: {args.manifest_output}" + ) + else: + print("Globbing for resources (no capture manifest output)...") + + def process_and_log_file(absolute_path): + relative_path = generate_cpp_for_file(absolute_path) + if relative_path: + manifest_out.write(relative_path + "\n") + + def process_dir(dir): + for root, _, files in os.walk(dir): + for file in files: + process_and_log_file(os.path.join(root, file)) + + process_dir(os.path.join(args.toolchain_dir, "include/")) + process_dir(os.path.join(args.toolchain_dir, "lib/clang/")) + process_dir(os.path.join(args.toolchain_dir, "lib/clc/")) + + print("Recursively searching for .bc files in lib/...") + lib_dir = os.path.join(args.toolchain_dir, "lib") + search_pattern = os.path.join(lib_dir, "**", "*.bc") - def process_dir(dir): - for root, _, files in os.walk(dir): - for file in files: - file_path = os.path.join(root, file) - process_file(file_path) - - process_dir(os.path.join(args.toolchain_dir, "include/")) - process_dir(os.path.join(args.toolchain_dir, "lib/clang/")) - process_dir(os.path.join(args.toolchain_dir, "lib/clc/")) - - for file in glob.iglob( - "*.bc", root_dir=os.path.join(args.toolchain_dir, "lib") - ): - file_path = os.path.join(args.toolchain_dir, "lib", file) - process_file(file_path) + for file_path in glob.glob(search_pattern, recursive=True): + process_and_log_file(file_path) out.write( f"""