Skip to content

Commit 385e488

Browse files
benvanikgiacs-epic
authored andcommitted
Adding fill/update/copy HAL ops. (iree-org#19026)
These ops use the newer style of 64-bit flags. TODOs were added to hal.imports.mlir for future cleanup to existing ops whenever we want to bump the version. Signed-off-by: Giacomo Serafini <[email protected]>
1 parent 39304b9 commit 385e488

File tree

10 files changed

+605
-2
lines changed

10 files changed

+605
-2
lines changed

compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertDeviceOps.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,50 @@ class DeviceQueryI64OpConversion
115115
mutable IREE::VM::ImportOp importOp;
116116
};
117117

118+
class DeviceQueueFillOpConversion
119+
: public OpConversionPattern<IREE::HAL::DeviceQueueFillOp> {
120+
public:
121+
DeviceQueueFillOpConversion(MLIRContext *context, SymbolTable &importSymbols,
122+
TypeConverter &typeConverter,
123+
StringRef importName)
124+
: OpConversionPattern(context) {
125+
importOp = importSymbols.lookup<IREE::VM::ImportOp>(importName);
126+
assert(importOp);
127+
}
128+
129+
LogicalResult
130+
matchAndRewrite(IREE::HAL::DeviceQueueFillOp op, OpAdaptor adaptor,
131+
ConversionPatternRewriter &rewriter) const override {
132+
auto importType = importOp.getFunctionType();
133+
auto i64Type = rewriter.getI64Type();
134+
auto patternLength = rewriter.create<IREE::VM::ConstI32Op>(
135+
op.getLoc(),
136+
llvm::divideCeil(op.getPattern().getType().getIntOrFloatBitWidth(), 8));
137+
auto flags =
138+
rewriter.create<IREE::VM::ConstI64Op>(op.getLoc(), op.getFlags());
139+
std::array<Value, 10> callOperands = {
140+
adaptor.getDevice(),
141+
castToImportType(adaptor.getQueueAffinity(), i64Type, rewriter),
142+
adaptor.getWaitFence(),
143+
adaptor.getSignalFence(),
144+
adaptor.getTargetBuffer(),
145+
castToImportType(adaptor.getTargetOffset(), i64Type, rewriter),
146+
castToImportType(adaptor.getLength(), i64Type, rewriter),
147+
castToImportType(adaptor.getPattern(), i64Type, rewriter),
148+
patternLength,
149+
flags,
150+
};
151+
auto callOp = rewriter.replaceOpWithNewOp<IREE::VM::CallOp>(
152+
op, SymbolRefAttr::get(importOp), importType.getResults(),
153+
callOperands);
154+
copyImportAttrs(importOp, callOp);
155+
return success();
156+
}
157+
158+
private:
159+
mutable IREE::VM::ImportOp importOp;
160+
};
161+
118162
class DeviceQueueExecuteIndirectOpConversion
119163
: public OpConversionPattern<IREE::HAL::DeviceQueueExecuteIndirectOp> {
120164
public:
@@ -185,6 +229,12 @@ void populateHALDeviceToVMPatterns(MLIRContext *context,
185229
context, importSymbols, typeConverter, "hal.device.queue.alloca");
186230
patterns.insert<VMImportOpConversion<IREE::HAL::DeviceQueueDeallocaOp>>(
187231
context, importSymbols, typeConverter, "hal.device.queue.dealloca");
232+
patterns.insert<DeviceQueueFillOpConversion>(
233+
context, importSymbols, typeConverter, "hal.device.queue.fill");
234+
patterns.insert<VMImportOpConversion<IREE::HAL::DeviceQueueUpdateOp>>(
235+
context, importSymbols, typeConverter, "hal.device.queue.update");
236+
patterns.insert<VMImportOpConversion<IREE::HAL::DeviceQueueCopyOp>>(
237+
context, importSymbols, typeConverter, "hal.device.queue.copy");
188238
patterns.insert<VMImportOpConversion<IREE::HAL::DeviceQueueReadOp>>(
189239
context, importSymbols, typeConverter, "hal.device.queue.read");
190240
patterns.insert<VMImportOpConversion<IREE::HAL::DeviceQueueWriteOp>>(

compiler/src/iree/compiler/Dialect/HAL/Conversion/HALToVM/test/device_ops.mlir

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,148 @@ util.func public @device_queue_dealloca(
141141

142142
// -----
143143

144+
// CHECK-LABEL: @device_queue_fill_i8
145+
util.func public @device_queue_fill_i8(
146+
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[AFFINITY:.+]]: i64,
147+
%device: !hal.device, %affinity: i64,
148+
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !vm.ref<!hal.fence>, %[[SIGNAL_FENCE:.+]]: !vm.ref<!hal.fence>,
149+
%wait_fence: !hal.fence, %signal_fence: !hal.fence,
150+
// CHECK-SAME: %[[PATTERN_I8_I32:.+]]: i32,
151+
%pattern_i8: i8,
152+
// CHECK-SAME: %[[TARGET_BUFFER:.+]]: !vm.ref<!hal.buffer>)
153+
%target_buffer: !hal.buffer) {
154+
// CHECK-DAG: %[[TARGET_OFFSET:.+]] = vm.const.i64 200
155+
%target_offset = arith.constant 200 : index
156+
// CHECK-DAG: %[[LENGTH:.+]] = vm.const.i64 300
157+
%length = arith.constant 300 : index
158+
// CHECK-DAG: %[[PATTERN_LENGTH:.+]] = vm.const.i32 1
159+
// CHECK-DAG: %[[FLAGS:.+]] = vm.const.i64.zero
160+
// CHECK-DAG: %[[PATTERN_I8_I64:.+]] = vm.ext.i32.i64.s %[[PATTERN_I8_I32]]
161+
// CHECK: vm.call @hal.device.queue.fill(
162+
// CHECK-SAME: %[[DEVICE]], %[[AFFINITY]],
163+
// CHECK-SAME: %[[WAIT_FENCE]], %[[SIGNAL_FENCE]],
164+
// CHECK-SAME: %[[TARGET_BUFFER]], %[[TARGET_OFFSET]],
165+
// CHECK-SAME: %[[LENGTH]],
166+
// CHECK-SAME: %[[PATTERN_I8_I64]], %[[PATTERN_LENGTH]],
167+
// CHECK-SAME: %[[FLAGS]])
168+
hal.device.queue.fill<%device : !hal.device>
169+
affinity(%affinity)
170+
wait(%wait_fence) signal(%signal_fence)
171+
target(%target_buffer : !hal.buffer)[%target_offset]
172+
length(%length)
173+
pattern(%pattern_i8 : i8)
174+
flags(0)
175+
util.return
176+
}
177+
178+
// -----
179+
180+
// CHECK-LABEL: @device_queue_fill_i32
181+
util.func public @device_queue_fill_i32(
182+
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[AFFINITY:.+]]: i64,
183+
%device: !hal.device, %affinity: i64,
184+
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !vm.ref<!hal.fence>, %[[SIGNAL_FENCE:.+]]: !vm.ref<!hal.fence>,
185+
%wait_fence: !hal.fence, %signal_fence: !hal.fence,
186+
// CHECK-SAME: %[[PATTERN_I32:.+]]: i32,
187+
%pattern_i32: i32,
188+
// CHECK-SAME: %[[TARGET_BUFFER:.+]]: !vm.ref<!hal.buffer>)
189+
%target_buffer: !hal.buffer) {
190+
// CHECK-DAG: %[[TARGET_OFFSET:.+]] = vm.const.i64 200
191+
%target_offset = arith.constant 200 : index
192+
// CHECK-DAG: %[[LENGTH:.+]] = vm.const.i64 300
193+
%length = arith.constant 300 : index
194+
// CHECK-DAG: %[[PATTERN_LENGTH:.+]] = vm.const.i32 4
195+
// CHECK-DAG: %[[FLAGS:.+]] = vm.const.i64.zero
196+
// CHECK-DAG: %[[PATTERN_I32_I64:.+]] = vm.ext.i32.i64.s %[[PATTERN_I32]]
197+
// CHECK: vm.call @hal.device.queue.fill(
198+
// CHECK-SAME: %[[DEVICE]], %[[AFFINITY]],
199+
// CHECK-SAME: %[[WAIT_FENCE]], %[[SIGNAL_FENCE]],
200+
// CHECK-SAME: %[[TARGET_BUFFER]], %[[TARGET_OFFSET]],
201+
// CHECK-SAME: %[[LENGTH]],
202+
// CHECK-SAME: %[[PATTERN_I32_I64]], %[[PATTERN_LENGTH]],
203+
// CHECK-SAME: %[[FLAGS]])
204+
hal.device.queue.fill<%device : !hal.device>
205+
affinity(%affinity)
206+
wait(%wait_fence) signal(%signal_fence)
207+
target(%target_buffer : !hal.buffer)[%target_offset]
208+
length(%length)
209+
pattern(%pattern_i32 : i32)
210+
flags(0)
211+
util.return
212+
}
213+
214+
// -----
215+
216+
// CHECK-LABEL: @device_queue_update
217+
util.func public @device_queue_update(
218+
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[AFFINITY:.+]]: i64,
219+
%device: !hal.device, %affinity: i64,
220+
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !vm.ref<!hal.fence>, %[[SIGNAL_FENCE:.+]]: !vm.ref<!hal.fence>,
221+
%wait_fence: !hal.fence, %signal_fence: !hal.fence,
222+
// CHECK-SAME: %[[SOURCE_BUFFER:.+]]: !vm.buffer,
223+
%source_buffer: !util.buffer,
224+
// CHECK-SAME: %[[TARGET_BUFFER:.+]]: !vm.ref<!hal.buffer>)
225+
%target_buffer: !hal.buffer) {
226+
// CHECK-DAG: %[[SOURCE_OFFSET:.+]] = vm.const.i64 100
227+
%source_offset = arith.constant 100 : index
228+
// CHECK-DAG: %[[TARGET_OFFSET:.+]] = vm.const.i64 200
229+
%target_offset = arith.constant 200 : index
230+
// CHECK-DAG: %[[LENGTH:.+]] = vm.const.i64 300
231+
%length = arith.constant 300 : index
232+
// CHECK-DAG: %[[FLAGS:.+]] = vm.const.i64.zero
233+
// CHECK: vm.call @hal.device.queue.update(
234+
// CHECK-SAME: %[[DEVICE]], %[[AFFINITY]],
235+
// CHECK-SAME: %[[WAIT_FENCE]], %[[SIGNAL_FENCE]],
236+
// CHECK-SAME: %[[SOURCE_BUFFER]], %[[SOURCE_OFFSET]],
237+
// CHECK-SAME: %[[TARGET_BUFFER]], %[[TARGET_OFFSET]],
238+
// CHECK-SAME: %[[LENGTH]], %[[FLAGS]])
239+
hal.device.queue.update<%device : !hal.device>
240+
affinity(%affinity)
241+
wait(%wait_fence) signal(%signal_fence)
242+
source(%source_buffer : !util.buffer)[%source_offset]
243+
target(%target_buffer : !hal.buffer)[%target_offset]
244+
length(%length)
245+
flags(0)
246+
util.return
247+
}
248+
249+
// -----
250+
251+
// CHECK-LABEL: @device_queue_copy
252+
util.func public @device_queue_copy(
253+
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[AFFINITY:.+]]: i64,
254+
%device: !hal.device, %affinity: i64,
255+
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !vm.ref<!hal.fence>, %[[SIGNAL_FENCE:.+]]: !vm.ref<!hal.fence>,
256+
%wait_fence: !hal.fence, %signal_fence: !hal.fence,
257+
// CHECK-SAME: %[[SOURCE_BUFFER:.+]]: !vm.ref<!hal.buffer>,
258+
%source_buffer: !hal.buffer,
259+
// CHECK-SAME: %[[TARGET_BUFFER:.+]]: !vm.ref<!hal.buffer>)
260+
%target_buffer: !hal.buffer) {
261+
// CHECK-DAG: %[[SOURCE_OFFSET:.+]] = vm.const.i64 100
262+
%source_offset = arith.constant 100 : index
263+
// CHECK-DAG: %[[TARGET_OFFSET:.+]] = vm.const.i64 200
264+
%target_offset = arith.constant 200 : index
265+
// CHECK-DAG: %[[LENGTH:.+]] = vm.const.i64 300
266+
%length = arith.constant 300 : index
267+
// CHECK-DAG: %[[FLAGS:.+]] = vm.const.i64.zero
268+
// CHECK: vm.call @hal.device.queue.copy(
269+
// CHECK-SAME: %[[DEVICE]], %[[AFFINITY]],
270+
// CHECK-SAME: %[[WAIT_FENCE]], %[[SIGNAL_FENCE]],
271+
// CHECK-SAME: %[[SOURCE_BUFFER]], %[[SOURCE_OFFSET]],
272+
// CHECK-SAME: %[[TARGET_BUFFER]], %[[TARGET_OFFSET]],
273+
// CHECK-SAME: %[[LENGTH]], %[[FLAGS]])
274+
hal.device.queue.copy<%device : !hal.device>
275+
affinity(%affinity)
276+
wait(%wait_fence) signal(%signal_fence)
277+
source(%source_buffer : !hal.buffer)[%source_offset]
278+
target(%target_buffer : !hal.buffer)[%target_offset]
279+
length(%length)
280+
flags(0)
281+
util.return
282+
}
283+
284+
// -----
285+
144286
// CHECK-LABEL: @device_queue_read
145287
util.func public @device_queue_read(
146288
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[AFFINITY:.+]]: i64,

compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1319,6 +1319,18 @@ LogicalResult DeviceQueueDeallocaOp::verify() {
13191319
return verifyDeviceQueueFences(*this, getWaitFence(), getSignalFence());
13201320
}
13211321

1322+
LogicalResult DeviceQueueFillOp::verify() {
1323+
return verifyDeviceQueueFences(*this, getWaitFence(), getSignalFence());
1324+
}
1325+
1326+
LogicalResult DeviceQueueUpdateOp::verify() {
1327+
return verifyDeviceQueueFences(*this, getWaitFence(), getSignalFence());
1328+
}
1329+
1330+
LogicalResult DeviceQueueCopyOp::verify() {
1331+
return verifyDeviceQueueFences(*this, getWaitFence(), getSignalFence());
1332+
}
1333+
13221334
LogicalResult DeviceQueueReadOp::verify() {
13231335
return verifyDeviceQueueFences(*this, getWaitFence(), getSignalFence());
13241336
}

compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1851,6 +1851,138 @@ def HAL_DeviceQueueDeallocaOp : HAL_Op<"device.queue.dealloca"> {
18511851
let hasVerifier = 1;
18521852
}
18531853

1854+
def HAL_DeviceQueueFillOp : HAL_Op<"device.queue.fill"> {
1855+
let summary = [{fills a buffer with a repeating pattern}];
1856+
let description = [{
1857+
The target buffer must be visible to the device queue performing the update.
1858+
In most cases the queue affinity should be set to where the target buffer
1859+
will be consumed so that it has a chance of being cached.
1860+
1861+
Note that individual queue transfer operations have a high overhead and they
1862+
should be batched with other operations in command buffers.
1863+
}];
1864+
1865+
let arguments = (ins
1866+
HAL_Device:$device,
1867+
HAL_DeviceQueueAffinity:$queue_affinity,
1868+
HAL_Fence:$wait_fence,
1869+
HAL_Fence:$signal_fence,
1870+
HAL_Buffer:$target_buffer,
1871+
HAL_DeviceSize:$target_offset,
1872+
HAL_DeviceSize:$length,
1873+
HAL_FillPatternType:$pattern,
1874+
I64Attr:$flags
1875+
);
1876+
let results = (outs);
1877+
1878+
let assemblyFormat = [{
1879+
`<` $device `:` type($device) `>`
1880+
`affinity` `(` $queue_affinity `)`
1881+
`wait` `(` $wait_fence `)`
1882+
`signal` `(` $signal_fence `)`
1883+
`target` `(` $target_buffer `:` type($target_buffer) `)`
1884+
`` `[` $target_offset `]`
1885+
`length` `(` $length `)`
1886+
`pattern` `(` $pattern `:` type($pattern) `)`
1887+
`flags` `(` $flags `)`
1888+
attr-dict-with-keyword
1889+
}];
1890+
1891+
let hasVerifier = 1;
1892+
}
1893+
1894+
def HAL_DeviceQueueUpdateOp : HAL_Op<"device.queue.update"> {
1895+
let summary = [{updates a buffer with the contents of a host buffer}];
1896+
let description = [{
1897+
The provided host source buffer will be captured and need not remain live or
1898+
unchanged while the operation is queued. The target buffer must be visible
1899+
to the device queue performing the update. In most cases the queue affinity
1900+
should be set to where the target buffer will be consumed so that it has a
1901+
chance of being cached.
1902+
1903+
Some implementations may have limits on the size of the update or may
1904+
perform poorly if the size is larger than an implementation-defined limit.
1905+
Updates should be kept as small and infrequent as possible.
1906+
1907+
Note that individual queue transfer operations have a high overhead and they
1908+
should be batched with other operations in command buffers.
1909+
}];
1910+
1911+
let arguments = (ins
1912+
HAL_Device:$device,
1913+
HAL_DeviceQueueAffinity:$queue_affinity,
1914+
HAL_Fence:$wait_fence,
1915+
HAL_Fence:$signal_fence,
1916+
Util_BufferType:$source_buffer,
1917+
HAL_DeviceSize:$source_offset,
1918+
HAL_Buffer:$target_buffer,
1919+
HAL_DeviceSize:$target_offset,
1920+
HAL_DeviceSize:$length,
1921+
I64Attr:$flags
1922+
);
1923+
let results = (outs);
1924+
1925+
let assemblyFormat = [{
1926+
`<` $device `:` type($device) `>`
1927+
`affinity` `(` $queue_affinity `)`
1928+
`wait` `(` $wait_fence `)`
1929+
`signal` `(` $signal_fence `)`
1930+
`source` `(` $source_buffer `:` type($source_buffer) `)`
1931+
`` `[` $source_offset `]`
1932+
`target` `(` $target_buffer `:` type($target_buffer) `)`
1933+
`` `[` $target_offset `]`
1934+
`length` `(` $length `)`
1935+
`flags` `(` $flags `)`
1936+
attr-dict-with-keyword
1937+
}];
1938+
1939+
let hasVerifier = 1;
1940+
}
1941+
1942+
def HAL_DeviceQueueCopyOp : HAL_Op<"device.queue.copy"> {
1943+
let summary = [{copies one device-visible buffer to another}];
1944+
let description = [{
1945+
The source buffer and target buffer must both be visible to the device
1946+
queue performing the copy. In most cases the queue affinity should be set to
1947+
where the target buffer will be consumed so that it has a chance of being
1948+
cached. The source buffer must have transfer-source usage and the target
1949+
buffer must have transfer-target usage.
1950+
1951+
Note that individual queue transfer operations have a high overhead and they
1952+
should be batched with other operations in command buffers.
1953+
}];
1954+
1955+
let arguments = (ins
1956+
HAL_Device:$device,
1957+
HAL_DeviceQueueAffinity:$queue_affinity,
1958+
HAL_Fence:$wait_fence,
1959+
HAL_Fence:$signal_fence,
1960+
HAL_Buffer:$source_buffer,
1961+
HAL_DeviceSize:$source_offset,
1962+
HAL_Buffer:$target_buffer,
1963+
HAL_DeviceSize:$target_offset,
1964+
HAL_DeviceSize:$length,
1965+
I64Attr:$flags
1966+
);
1967+
let results = (outs);
1968+
1969+
let assemblyFormat = [{
1970+
`<` $device `:` type($device) `>`
1971+
`affinity` `(` $queue_affinity `)`
1972+
`wait` `(` $wait_fence `)`
1973+
`signal` `(` $signal_fence `)`
1974+
`source` `(` $source_buffer `:` type($source_buffer) `)`
1975+
`` `[` $source_offset `]`
1976+
`target` `(` $target_buffer `:` type($target_buffer) `)`
1977+
`` `[` $target_offset `]`
1978+
`length` `(` $length `)`
1979+
`flags` `(` $flags `)`
1980+
attr-dict-with-keyword
1981+
}];
1982+
1983+
let hasVerifier = 1;
1984+
}
1985+
18541986
def HAL_DeviceQueueReadOp : HAL_Op<"device.queue.read"> {
18551987
let summary = [{reads a segment from a file into a device buffer}];
18561988
let description = [{

0 commit comments

Comments
 (0)