Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions include/imex/Transforms/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@
#include <memory>

namespace imex {
struct InsertGPUAllocsOptions;
//===----------------------------------------------------------------------===//
// Passes
//===----------------------------------------------------------------------===//
std::unique_ptr<mlir::Pass> createSerializeSPIRVPass();
std::unique_ptr<mlir::Pass>
createInsertGPUAllocsPass(const char *clientAPI = "vulkan");
std::unique_ptr<mlir::Pass>
createInsertGPUAllocsPass(const InsertGPUAllocsOptions &);
std::unique_ptr<mlir::Pass> createSetSPIRVCapabilitiesPass();
std::unique_ptr<mlir::Pass>
createSetSPIRVAbiAttributePass(const char *clientAPI = "vulkan");
Expand Down
6 changes: 5 additions & 1 deletion include/imex/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@ def InsertGPUAllocs : Pass<"insert-gpu-allocs", "::mlir::func::FuncOp"> {
Option<"clientAPI", "client-api", "std::string", /*default=*/"\"opencl\"",
"The client API to use for inserting gpu allocs">,
Option<"inRegions", "in-regions", "bool", "false",
"Add gpu allocs only for memref.AllocOps within GPU regions">
"Add gpu allocs only for memref.AllocOps within GPU regions">,
Option<"isUsmArgs", "is-usm-args", "bool", "false",
"Whether to use USM(unified shared memory) func args, in which the "
"host and device could access the same buffer and there is no need "
"to add memcpy explicitly">
];
}

Expand Down
30 changes: 21 additions & 9 deletions lib/Transforms/InsertGPUAllocs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ class InsertGPUAllocsPass final
explicit InsertGPUAllocsPass() : m_clientAPI("vulkan") {}
explicit InsertGPUAllocsPass(const mlir::StringRef &clientAPI)
: m_clientAPI(clientAPI) {}
explicit InsertGPUAllocsPass(const imex::InsertGPUAllocsOptions &options)
: InsertGPUAllocsBase<InsertGPUAllocsPass>(options) {
if (clientAPI == "opencl") {
m_clientAPI = "opencl";
}
}

mlir::LogicalResult
initializeOptions(mlir::StringRef options,
Expand Down Expand Up @@ -540,15 +546,17 @@ class InsertGPUAllocsPass final
// This is the case where the inputs are passed as arguments to the
// function. This code will add the IR for memory allocation on the device
// with gpu.alloc and insert a memref.copy from host to device
for (const auto &it : gpuBufferParams) {
auto param = block.getArgument(it.first);
if (isGpuAddrSpace(param))
continue;
auto access = getAccessType(param);
access.hostRead = true;
access.hostWrite = true;
builder.setInsertionPointToStart(&block);
add_gpu_alloc(builder, param, access, term);
if (!isUsmArgs.getValue()) {
for (const auto &it : gpuBufferParams) {
auto param = block.getArgument(it.first);
if (isGpuAddrSpace(param))
continue;
auto access = getAccessType(param);
access.hostRead = true;
access.hostWrite = true;
builder.setInsertionPointToStart(&block);
add_gpu_alloc(builder, param, access, term);
}
}

// CallOp Case: This is the case where the memref producer is coming
Expand Down Expand Up @@ -580,4 +588,8 @@ namespace imex {
std::unique_ptr<mlir::Pass> createInsertGPUAllocsPass(const char *clientAPI) {
return std::make_unique<InsertGPUAllocsPass>(clientAPI);
}
std::unique_ptr<mlir::Pass>
createInsertGPUAllocsPass(const InsertGPUAllocsOptions &option) {
return std::make_unique<InsertGPUAllocsPass>(option);
}
} // namespace imex
54 changes: 54 additions & 0 deletions test/Transforms/InsertGpuAllocs/skip-gpu-alloc-for-usm-args.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// RUN: imex-opt --insert-gpu-allocs='client-api=opencl is-usm-args=1' %s | FileCheck %s --check-prefix=OPENCL
// RUN: imex-opt --insert-gpu-allocs='client-api=vulkan is-usm-args=1' %s | FileCheck %s --check-prefix=VULKAN

// OPENCL-LABEL: func.func @addt
// OPENCL-SAME: %[[arg0:.+]]: memref<2x5xf32>, %[[arg1:.+]]: memref<2x5xf32>, %[[out_buff:.+]]: memref<2x5xf32>
// VULKAN-LABEL: func.func @addt
// VULKAN-SAME: %[[arg0:.+]]: memref<2x5xf32>, %[[arg1:.+]]: memref<2x5xf32>, %[[out_buff:.+]]: memref<2x5xf32>
func.func @addt(%arg0: memref<2x5xf32>, %arg1: memref<2x5xf32>, %out_buff: memref<2x5xf32>) -> memref<2x5xf32> {
%c0 = arith.constant 0 : index
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c5 = arith.constant 5 : index
// OPENCL-NOT: %[[MEMREF0:.*]] = gpu.alloc host_shared () : memref<2x5xf32>
// OPENCL-NOT: %[[MEMREF1:.*]] = gpu.alloc host_shared () : memref<2x5xf32>
// OPENCL-NOT: memref.copy
// OPENCL-NOT: %[[MEMREF2:.*]] = gpu.alloc host_shared () : memref<2x5xf32>
// OPENCL-NOT: memref.copy

// VULKAN-NOT: %[[MEMREF0:.*]] = memref.alloc() : memref<2x5xf32>
// VULKAN-NOT: %[[MEMREF1:.*]] = memref.alloc() : memref<2x5xf32>
// VULKAN-NOT: memref.copy
// VULKAN-NOT: %[[MEMREF2:.*]] = memref.alloc() : memref<2x5xf32>
// VULKAN-NOT: memref.copy

%tmp_buff = memref.alloc() {alignment = 128 : i64} : memref<2x5xf32>
// OPENCL-NOT: %[[MEMREF3:.*]] = memref.alloc().*
// OPENCL: %[[MEMREF3:.*]] = gpu.alloc () : memref<2x5xf32>
// VULKAN: %[[MEMREF3:.*]] = memref.alloc() {alignment = 128 : i64} : memref<2x5xf32>

%c1_0 = arith.constant 1 : index
%1 = affine.apply affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>(%c2)[%c0, %c1]
%2 = affine.apply affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>(%c5)[%c0, %c1]
gpu.launch blocks(%arg2, %arg3, %arg4) in (%arg8 = %1, %arg9 = %2, %arg10 = %c1_0) threads(%arg5, %arg6, %arg7) in (%arg11 = %c1_0, %arg12 = %c1_0, %arg13 = %c1_0) {
%3 = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>(%arg2)[%c1, %c0]
%4 = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>(%arg3)[%c1, %c0]
%5 = memref.load %arg0[%3, %4] : memref<2x5xf32>
%6 = memref.load %arg1[%3, %4] : memref<2x5xf32>
%7 = arith.addf %5, %6 : f32
memref.store %7, %tmp_buff[%3, %4] : memref<2x5xf32>

%8 = memref.load %tmp_buff[%3, %4] : memref<2x5xf32>
%9 = arith.addf %8, %5 : f32
memref.store %9, %out_buff[%3, %4] : memref<2x5xf32>

gpu.terminator
} {SCFToGPU_visited}

// OPENCL-NOT: memref.dealloc %[[MEMREF3]] : memref<2x5xf32>
// OPENCL: gpu.dealloc %[[MEMREF3]] : memref<2x5xf32>
// VULKAN: memref.dealloc %[[MEMREF3]] : memref<2x5xf32>
memref.dealloc %tmp_buff : memref<2x5xf32>

return %out_buff : memref<2x5xf32>
}
Loading