SPIR-V Runner: Integrate argument parsing library (#2759)

kballeda · web-flow · commit 680965c7b68a · 2024-12-05T10:34:25.000-05:00
This PR integrates LLVM based argument parsing library into SPIR-V Runner and closes #2568 ``` /intel-xpu-backend-for-triton/utils/SPIRVRunner$ ./build/SPIRVRunner -o tensor_2 Running on device: Intel(R) Data Center GPU Max 1100 Read 3772 byte kernel. Loaded kernel with 0 registers and 0 register spills. Tensor output: [98432], Float (393728 bytes) Output Tensor Path: intel-xpu-backend-for-triton/utils/SPIRVRunner/cpp_outs.pt intel-xpu-backend-for-triton/utils/SPIRVRunner$ ./build/SPIRVRunner -o tensor_2 -p Running on device: Intel(R) Data Center GPU Max 1100 Read 3772 byte kernel. Loaded kernel with 0 registers and 0 register spills. Tensor output: [98432], Float (393728 bytes) Kernel execution time: 0.0096 ms Output Tensor Path: intel-xpu-backend-for-triton/utils/SPIRVRunner/cpp_outs.pt intel-xpu-backend-for-triton/utils/SPIRVRunner$ ./build/SPIRVRunner --help USAGE: SPIRVRunner [options] OPTIONS: Color Options: --color - Use colors in output (default=autodetect) General options: -o <filename> - Specify Output Tensor Name -p - Enable Profiling ```
diff --git a/utils/SPIRVRunner/CMakeLists.txt b/utils/SPIRVRunner/CMakeLists.txt
@@ -16,6 +16,13 @@ list(APPEND CMAKE_PREFIX_PATH "${ONEAPI_ROOT}/tbb/latest/lib/cmake/tbb/")
 
 find_package(Torch REQUIRED)
 
+# Include LLVM Support Library for CLI parsing
+find_package(LLVM REQUIRED CONFIG)
+include_directories(${LLVM_INCLUDE_DIRS})
+add_definitions(${LLVM_DEFINITIONS})
+add_library(llvm_parser OBJECT llvm_parser.cpp)
+target_compile_options(llvm_parser PRIVATE -fno-rtti)
+
 include(ExternalProject)
 ExternalProject_Add(
     json
@@ -34,14 +41,13 @@ set(COMPILE_FLAGS "-fsycl -Wall -fpreview-breaking-changes")
 set(LINK_FLAGS "-fsycl -lze_loader")
 
 set(SYCL_FUNCTIONS_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/intel/backend/include")
-
 set(TARGET_NAME SPIRVRunner)
-add_executable(${TARGET_NAME} ${TARGET_NAME}.cpp)
+add_executable(${TARGET_NAME} ${TARGET_NAME}.cpp $<TARGET_OBJECTS:llvm_parser>)
 target_include_directories(${TARGET_NAME} PRIVATE
-    "${ONEAPI_ROOT}/compiler/latest/include" ${SYCL_FUNCTIONS_INCLUDE_DIR} ${JSON_INCLUDE_DIR})
+	"${ONEAPI_ROOT}/compiler/latest/include" ${SYCL_FUNCTIONS_INCLUDE_DIR} ${JSON_INCLUDE_DIR})
 set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS}")
 set_target_properties(${TARGET_NAME} PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
 add_dependencies(${TARGET_NAME} json)
-
-target_link_libraries(${TARGET_NAME} "${TORCH_LIBRARIES}")
+llvm_map_components_to_libnames(LLVM_LIBS Support)
+target_link_libraries(${TARGET_NAME} PRIVATE "${TORCH_LIBRARIES}" "${LLVM_LIBS}")
 set_property(TARGET ${TARGET_NAME} PROPERTY CXX_STANDARD 17)
diff --git a/utils/SPIRVRunner/README.md b/utils/SPIRVRunner/README.md
@@ -10,10 +10,17 @@ find .venv -name TorchConfig.cmake
 ```
 in the top level Triton directory.
 
+`SPIRVRunner` depends on LLVM support libarary for argument parsing in order to use this run following in the top level Triton directory.
+```
+scripts/compile-triton.sh --llvm
+```
+
+SPIR-V Runner build steps:
+
 ```
 mkdir build
 cd build
-CMAKE_PREFIX_PATH=/abs/path/to/TorchConfig.cmake/FromAbove/ cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
+CMAKE_PREFIX_PATH=/abs/path/to/TorchConfig.cmake/FromAbove/ LLVM_DIR=/abs/path/to/packages/llvm cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
 make -j
 ```
 
@@ -38,7 +45,17 @@ Following input data is generated,
 ## Running
 
 Help:
-`./build/SPIRVRunner` < Output Tensor Name >
+
+```
+USAGE: SPIRVRunner [options]
+
+General options:
+
+  -o <string> - <Specify Output Tensor Name>
+
+  -p          - Enable kernel time profiling
+ ```
+
 
 Note: `Output Tensor Name`  is essentially a chosen tensor that needs to be copied back to the CPU and written to disk. Additionally, the name must match the tensor's name (tensor_) and number as specified in the JSON file. Please refer args_data.json file.
 
@@ -47,18 +64,17 @@ Note: `Output Tensor Name`  is essentially a chosen tensor that needs to be copi
 `SPIRVRunner` is configured to run the `add_kernel.spv` SPIRV binary with inputs `tensor_0.pt` and `tensor_1.pt` and output `tensor_2.pt`. `add_kernel.spv` was generated from the `01-vector-add.py` tutorial.
 
 SPIRVRunner Usage:
-`./build/SPIRVRunner tensor_2`
+`./build/SPIRVRunner -o tensor_2 -p`
 
 Expected output follows:
 
 ```
 Running on device: Intel(R) Data Center GPU Max 1100
 Read 3772 byte kernel.
-create kernel:add_kernel
 Loaded kernel with 0 registers and 0 register spills.
 Tensor output: [98432], Float (393728 bytes)
-Kernel return output: 1.37129
-[ CPUFloatType{} ]
+Kernel execution time: 0.0096 ms
+Output Tensor Path: /abs/path/utils/SPIRVRunner/cpp_outs.pt
 ```
 
 The GPU hardware, shape and data type of each Tensor (along with number of bytes), and kernel information are printed. The shape and data type of the output Tensor is currently printed, along with the the first cell in the output. Ensuring the value of the first cell is non-zero allows for a quick sanity check. The output Tensor is written to a file `cpp_outs.pt` which is a Tensor in PyTorch format. Typically, we will create a quick Python script to read the input Tensor, run the same computations in PyTorch, and then compare the PyTorch result with the loaded `cpp_outs.pt` Tensor using the PyTorch testing API.
diff --git a/utils/SPIRVRunner/SPIRVRunner.cpp b/utils/SPIRVRunner/SPIRVRunner.cpp
@@ -2,19 +2,18 @@
 #include <sycl/sycl.hpp>
 #include <torch/torch.h>
 
+#include "llvm_parser.h"
+#include "sycl_functions.h"
 #include <algorithm>
 #include <cstdlib>
 #include <filesystem>
 #include <fstream>
 #include <ios>
 #include <iostream>
+#include <nlohmann/json.hpp>
 #include <regex>
 #include <string>
 #include <vector>
-
-#include "sycl_functions.h"
-#include <nlohmann/json.hpp>
-
 using json = nlohmann::json;
 using ordered_json = nlohmann::ordered_json;
 
@@ -411,38 +410,14 @@ at::Tensor launchKernel(sycl::queue stream, sycl::kernel kernel,
   return triton_args.host_outbuffer;
 }
 
-bool check_option_amoung_argv(int argc, char **argv, std::string option) {
-  bool res = false;
-  if (argc > 2) {
-    // optional parameters can be in any order
-    for (int i = 2; i < argc; i++) {
-      if (argv[i] == option) {
-        res = true;
-        break;
-      }
-    }
-  }
-  return res;
-}
-
 int main(int argc, char **argv) {
   try {
-    std::string enable_profiling = "--enable-profiling";
-    if (argc < 2) {
-      std::cout << "Help: " << std::endl;
-      std::cout << "<Executable> <Output Tensor Name>" << std::endl;
-      std::cout << "./build/SPIRVRunner tensor_2" << std::endl;
-      std::cout << "To get kernel time, use:" << std::endl;
-      std::cout << "./build/SPIRVRunner tensor_2 " << enable_profiling
-                << std::endl;
-      throw std::runtime_error("Input arguments are missing \n");
-    }
+    command_line_parser cli(argc, argv);
+    auto cliopts = cli.parse();
 
     // initialize sycl runtime
-    bool get_kernel_time =
-        check_option_amoung_argv(argc, argv, enable_profiling);
     sycl::queue q;
-    if (get_kernel_time) {
+    if (cliopts.get_kernel_time) {
       sycl::property_list prop_list{sycl::property::queue::enable_profiling()};
       q = sycl::queue(sycl::gpu_selector_v, exception_handler, prop_list);
     } else {
@@ -455,7 +430,7 @@ int main(int argc, char **argv) {
     initDevices(&q);
 
     // Parse the JSON file and create argument dictionary
-    KernelArguments tritonArgDict(argv[1]);
+    KernelArguments tritonArgDict(cliopts.output_tensor);
 
     // read spirv
     auto spirv = read_spirv(tritonArgDict.spv_name);
@@ -469,7 +444,8 @@ int main(int argc, char **argv) {
     std::cout << "Loaded kernel with " << n_regs << " registers and "
               << n_spills << " register spills." << std::endl;
 
-    auto output = launchKernel(q, kernel, tritonArgDict, get_kernel_time);
+    auto output =
+        launchKernel(q, kernel, tritonArgDict, cliopts.get_kernel_time);
 
     auto output_tensor = tritonArgDict.spirv_dump_dir + "/cpp_outs.pt";
     write_tensor(output_tensor, output);
diff --git a/utils/SPIRVRunner/llvm_parser.cpp b/utils/SPIRVRunner/llvm_parser.cpp
@@ -0,0 +1,20 @@
+#include "llvm_parser.h"
+
+command_line_parser::command_line_parser(int argc, char **argv)
+    : argc(argc), argv(argv) {}
+
+command_line_parser::options command_line_parser::parse() {
+  options opts;
+  llvm::cl::opt<std::string> output_tensor(
+      "o", llvm::cl::desc("<Specify Output Tensor Name>"), llvm::cl::Required);
+  llvm::cl::opt<bool> enable_profiling(
+      "p", llvm::cl::desc("Enable kernel time profiling"),
+      llvm::cl::init(opts.get_kernel_time));
+
+  llvm::cl::ParseCommandLineOptions(argc, argv, "SPIRVRunner\n");
+
+  opts.output_tensor = output_tensor;
+  opts.get_kernel_time = enable_profiling;
+
+  return opts;
+}
diff --git a/utils/SPIRVRunner/llvm_parser.h b/utils/SPIRVRunner/llvm_parser.h
@@ -0,0 +1,20 @@
+#ifndef LLVM_PARSER_H
+#define LLVM_PARSER_H
+
+#include "llvm/Support/CommandLine.h"
+
+class command_line_parser {
+public:
+  struct options {
+    std::string output_tensor;
+    bool get_kernel_time = false;
+  };
+
+  command_line_parser(int argc, char **argv);
+  options parse();
+
+private:
+  int argc;
+  char **argv;
+};
+#endif