intel
diff --git a/‎third_party/intel/backend/driver.py‎
Lines changed: 62 additions & 3 deletions b/‎third_party/intel/backend/driver.py‎
Lines changed: 62 additions & 3 deletions
diff --git a/‎utils/SPIRVRunner/CMakeLists.txt‎
Lines changed: 14 additions & 2 deletions b/‎utils/SPIRVRunner/CMakeLists.txt‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎utils/SPIRVRunner/README.md‎
Lines changed: 25 additions & 9 deletions b/‎utils/SPIRVRunner/README.md‎
Lines changed: 25 additions & 9 deletions
@@ -435,19 +435,78 @@ def format_of(ty):
     return src
 
 
+def serialize_kernel_metadata(arg, args_dict):
+    args_dict['num_warps'] = arg.num_warps
+    args_dict['threads_per_warp'] = arg.threads_per_warp
+    args_dict['shared_memory'] = arg.shared
+    args_dict['kernel_name'] = arg.name
+    args_dict['spv_name'] = f"{arg.name}.spv"
+
+
+def serialize_args(args, constants, signature):
+    import torch
+    import numbers
+    dir_path = os.getenv('TRITON_XPU_DUMP_SPIRV_KERNEL_ARGS')
+    if not os.path.exists(dir_path):
+        os.makedirs(dir_path)
+        print(f"Path to directory consisting of SPIR-V Runner data: {dir_path}")
+
+    cnt = 0
+    args_dict = {"gridX": args[cnt], "gridY": args[cnt + 1], "gridZ": args[cnt + 2]}
+    args_dict['argument_list'] = []
+    counts = {"tensors": 0, "scalars": 0, "karg_cnt": 0}
+    cnt = 4
+    for arg in args[cnt:]:
+        if type(arg).__name__ == "KernelMetadata":
+            serialize_kernel_metadata(arg, args_dict)
+
+        if isinstance(arg, torch.Tensor):
+            cpu_tensor = arg.cpu()
+            tensor_path = os.path.join(dir_path, f"tensor_{counts['tensors']}.pt")
+            with open(tensor_path, 'wb') as f:
+                torch.save(cpu_tensor, f)
+            new_arg = {
+                "name": f"tensor_{counts['tensors']}", "type": "tensor", "dtype": str(arg.dtype), "ctype":
+                signature[counts['karg_cnt']]
+            }
+            args_dict['argument_list'].append(new_arg)
+            counts['karg_cnt'] += 1
+            counts['tensors'] += 1
+
+        if isinstance(arg, numbers.Number):
+            if counts['karg_cnt'] not in constants:
+                new_arg = {
+                    "name": f"scalarArg_{counts['scalars']}", "type": "scalar", "value": args[cnt], "ctype":
+                    signature[counts['karg_cnt']]
+                }
+                args_dict['argument_list'].append(new_arg)
+            counts['karg_cnt'] += 1
+            counts['scalars'] += 1
+        cnt += 1
+    # Dump argument info as a JSON file
+    json_path = os.path.join(dir_path, 'args_data.json')
+    with open(json_path, 'w') as json_file:
+        import json
+        json.dump(args_dict, json_file, indent=4)
+
+
 class XPULauncher(object):
 
     def __init__(self, src, metadata):
         ids = {"ids_of_const_exprs": src.fn.constexprs if hasattr(src, "fn") else tuple()}
         constants = src.constants if hasattr(src, "constants") else dict()
         cst_key = lambda i: src.fn.arg_names.index(i) if isinstance(i, str) else i
-        constants = {cst_key(key): value for key, value in constants.items()}
-        signature = {cst_key(key): value for key, value in src.signature.items()}
-        src = make_launcher(constants, signature, ids)
+        self.constants = {cst_key(key): value for key, value in constants.items()}
+        self.signature = {cst_key(key): value for key, value in src.signature.items()}
+        src = make_launcher(self.constants, self.signature, ids)
         mod = compile_module_from_src(src, "__triton_launcher")
         self.launch = mod.launch
 
     def __call__(self, *args, **kwargs):
+        # Serialize KernelArguments for SPIR-V Runner
+        serialize_kernel_args = os.getenv('TRITON_XPU_DUMP_SPIRV_KERNEL_ARGS', None)
+        if serialize_kernel_args:
+            serialize_args(args, self.constants, self.signature)
         self.launch(*args, **kwargs)
 
 
 
@@ -1,13 +1,24 @@
 cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
 project(reproducer)
-
 set(CMAKE_CXX_COMPILER icpx)
 set(BUILD_SHARED_LIBS OFF)
 
 list(APPEND CMAKE_PREFIX_PATH "/opt/intel/oneapi/tbb/latest/lib/cmake/tbb/")
 
 find_package(Torch REQUIRED)
 
+include(ExternalProject)
+ExternalProject_Add(
+    json
+    GIT_REPOSITORY https://github.com/nlohmann/json.git
+    GIT_TAG v3.11.2
+    PREFIX ${CMAKE_BINARY_DIR}/nlohmann_json
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND ""
+    INSTALL_COMMAND ""
+)
+set(JSON_INCLUDE_DIR ${CMAKE_BINARY_DIR}/nlohmann_json/src/json/include/)
+
 # Add preview-breaking-changes for ABI compatibility with SYCL library linked by PyTorch: https://github.com/pytorch/pytorch/commit/92bebb46fa9fd60523d8aeb7b5f1a3f488c4cd93
 set(COMPILE_FLAGS "-fsycl -Wall -fpreview-breaking-changes")
 set(LINK_FLAGS "-fsycl -lze_loader")
@@ -16,9 +27,10 @@ set(SYCL_FUNCTIONS_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/in
 
 set(TARGET_NAME SPIRVRunner)
 add_executable(${TARGET_NAME} ${TARGET_NAME}.cpp)
-target_include_directories(${TARGET_NAME} PRIVATE "/opt/intel/oneapi/compiler/latest/include" ${SYCL_FUNCTIONS_INCLUDE_DIR})
+target_include_directories(${TARGET_NAME} PRIVATE "/opt/intel/oneapi/compiler/latest/include" ${SYCL_FUNCTIONS_INCLUDE_DIR} ${JSON_INCLUDE_DIR})
 set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS}")
 set_target_properties(${TARGET_NAME} PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
+add_dependencies(${TARGET_NAME} json)
 
 target_link_libraries(${TARGET_NAME} "${TORCH_LIBRARIES}")
 set_property(TARGET ${TARGET_NAME} PROPERTY CXX_STANDARD 17)
@@ -17,28 +17,44 @@ CMAKE_PREFIX_PATH=/abs/path/to/TorchConfig.cmake/FromAbove/ cmake -DCMAKE_BUILD_
 make -j
 ```
 
-## Configuring
+## Configuration
 
-`SPIRVRunner` is configured to run the `add_kernel.spv` SPIRV binary with inputs `x.py` and `y.py`. `add_kernel.spv` was generated from the `01-vector-add.py` tutorial.
+### Generate Data
 
-Kernels of different shapes require modifying parameters manually in the `SPIRVRunner`. Two places require modification:
+In order to utilize this utility, Triton application must be run with following environment variables enabled
+Provide the path to the directory where the serialized JSON, tensors and SPRI-V binary stored. It is recommended to clear triton cache.
+
+```
+export TRITON_XPU_DUMP_SPIRV_KERNEL_ARGS=< Absolute path to SPV Dumps >
+```
+
+Following input data is generated,
+
+1. args_data.json - (Kernel Arguments / Grid Configuration)
+2. tensors  (Tensors used by the kernel (.pt))
+3. SPIR-V binary (.spv)
 
-1. `launchKernel`: Add input Tensors to the function signature, add arguments as variables within the function. Arguments can be pulled from the `args` variable to `XPULauncher.__call__` method in `driver.py`. Arguments should be passed to the `sycl_kernel_launch` function. Note that we currently rely on `sycl::memcpy` to move the PyTorch Tensor to XPU. In later versions of PyTorch we should be able to delegate this responsibility to `PyTorch`, and pass the raw XPU `data_ptr()` from `PyTorch` to the kernel.
-2. `sycl_kernel_launch`: Place all `arg*` parameters into the `params` array and add an appropriate call to `set_scalar_arg` for each param, which tells `SYCL` what the arguments are for the kernel we are going to launch.
 
 ## Running
 
-Once the `SPIRVRunner` has been appropriately configured for the kernel and inputs, run the binary with no arguments:
+Help:
+`./build/SPIRVRunner` < Output Tensor Name >
+
+Note: `Output Tensor Name`  is essentially a chosen tensor that needs to be copied back to the CPU and written to disk. Additionally, the name must match the tensor's name (tensor_) and number as specified in the JSON file. Please refer args_data.json file.
+
+### Demo (01-vector-add.py)
+
+`SPIRVRunner` is configured to run the `add_kernel.spv` SPIRV binary with inputs `tensor_0.pt` and `tensor_1.pt` and output `tensor_2.pt`. `add_kernel.spv` was generated from the `01-vector-add.py` tutorial.
 
-`./build/SPIRVRunner`
+SPIRVRunner Usage:
+`./build/SPIRVRunner tensor_2`
 
 Expected output follows:
 
 ```
 Running on device: Intel(R) Data Center GPU Max 1100
-Tensor a: [98432], Float (393728 bytes)
-Tensor b: [98432], Float (393728 bytes)
 Read 3772 byte kernel.
+create kernel:add_kernel
 Loaded kernel with 0 registers and 0 register spills.
 Tensor output: [98432], Float (393728 bytes)
 Kernel return output: 1.37129