Skip to content

Commit 929d8d7

Browse files
committed
Implement feedback
1 parent 0e8d74e commit 929d8d7

File tree

6 files changed

+35
-38
lines changed

6 files changed

+35
-38
lines changed

Deeploy/Targets/PULPOpen/DMA/MchanDma.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,12 @@ class MchanChannelFuture(Future):
1717

1818
_allocTemplate = NodeTemplate("${name} = mchan_channel_alloc();")
1919

20-
_waitTemplate = NodeTemplate("mchan_channel_wait(${name});\nmchan_channel_free(${name});")
20+
_waitTemplate = NodeTemplate("""
21+
if (${name} <= MCHAN_TRANSFER_ID_MAX) {
22+
mchan_channel_wait(${name});
23+
mchan_channel_free(${name});
24+
}
25+
""")
2126

2227

2328
class MchanDma(AsyncDma):

Deeploy/TilingExtension/AsyncDma.py

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,12 @@ def transferOpRepr(self, externalBuffer: VariableBuffer, localBuffer: VariableBu
111111
return {"loc": localBuffer.name, "ext": externalBuffer.name, "future": future.name}
112112

113113
def transfer(self, ctxt: NetworkContext, externalBuffer: VariableBuffer, localBuffer: VariableBuffer,
114-
shape: Tuple[int, ...], strideExt: Tuple[int, ...], strideLoc: Tuple[int,
115-
...], direction: DmaDirection,
116-
future: Future) -> Tuple[List[CodeSnippet], List[CodeSnippet], List[CodeSnippet]]:
114+
shape: Tuple[int, ...], strideExt: Tuple[int, ...], strideLoc: Tuple[int, ...],
115+
direction: DmaDirection, future: Future) -> List[CodeSnippet]:
117116
self.checkTransfer(ctxt, externalBuffer, localBuffer, shape, strideExt, strideLoc, direction)
118117
opRepr = self.transferOpRepr(externalBuffer, localBuffer, shape, strideExt, strideLoc, direction, future)
119118
template = self._transferTemplates[len(shape)]
120-
return [future.alloc()], [CodeSnippet(template, opRepr)], []
119+
return [CodeSnippet(template, opRepr)]
121120

122121

123122
class EmptyFuture(Future):
@@ -148,18 +147,14 @@ def transferOpRepr(self, externalBuffer: VariableBuffer, localBuffer: VariableBu
148147
return self.dma.transferOpRepr(externalBuffer, localBuffer, shape, strideExt, strideLoc, direction, future)
149148

150149
def transfer(self, ctxt: NetworkContext, externalBuffer: VariableBuffer, localBuffer: VariableBuffer,
151-
shape: Tuple[int, ...], strideExt: Tuple[int, ...], strideLoc: Tuple[int,
152-
...], direction: DmaDirection,
153-
future: Future) -> Tuple[List[CodeSnippet], List[CodeSnippet], List[CodeSnippet]]:
150+
shape: Tuple[int, ...], strideExt: Tuple[int, ...], strideLoc: Tuple[int, ...],
151+
direction: DmaDirection, future: Future) -> List[CodeSnippet]:
154152
callStack = []
155-
alloc_code, dma_code, deinit_code = self.dma.transfer(ctxt, externalBuffer, localBuffer, shape, strideExt,
156-
strideLoc, direction, future)
157-
callStack.extend(alloc_code)
153+
dma_code = self.dma.transfer(ctxt, externalBuffer, localBuffer, shape, strideExt, strideLoc, direction, future)
154+
callStack.append(future.alloc())
158155
callStack.extend(dma_code)
159156
callStack.append(future.wait())
160-
callStack.extend(deinit_code)
161-
162-
return [], callStack, []
157+
return callStack
163158

164159

165160
class AnydimAsyncDmaTransferAdapter:
@@ -220,7 +215,7 @@ def transfer(self,
220215
strideLoc: Tuple[int, ...],
221216
direction: DmaDirection,
222217
future: Future,
223-
strideExtPad: int = 0) -> Tuple[List[CodeSnippet], List[CodeSnippet], List[CodeSnippet]]:
218+
strideExtPad: int = 0) -> List[CodeSnippet]:
224219
transferRank = len(shape)
225220
kernelRank = self.nearestSupportedTransferRank(transferRank)
226221

@@ -256,13 +251,12 @@ def transfer(self,
256251
"offset": "ext_offset"
257252
}))
258253

259-
alloc_code, dma_code, deinit_code = self.dma.transfer(ctxt, externalBufferOffseted, localBufferOffseted,
260-
shape[-kernelRank:], strideExt[-kernelRank:],
261-
strideLoc[-kernelRank:], direction, future)
254+
dma_code = self.dma.transfer(ctxt, externalBufferOffseted, localBufferOffseted, shape[-kernelRank:],
255+
strideExt[-kernelRank:], strideLoc[-kernelRank:], direction, future)
262256

263257
callStack.extend(dma_code)
264258
callStack.append(CodeSnippet(self.NestedForLoopCloseTemplate(nestedLoopDepth), {}))
265-
return alloc_code, callStack, deinit_code
259+
return callStack
266260
elif kernelRank == transferRank:
267261
return self.dma.transfer(ctxt, externalBuffer, localBuffer, shape, strideExt, strideLoc, direction, future)
268262
else:

Deeploy/TilingExtension/CodeTransformationPasses/DoubleBufferingTilingCodeGeneration.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ class DoubleBufferingTilingCodeGeneration(TilingCodeGeneration):
2828
}
2929
""")
3030

31+
# LMACAN: The brackets around ${tileIdxVar} are important to ensure correct order
32+
# of the modulo operation. Breaking case without the brackets is when we
33+
# put "TILING_I + 1" for tileIdxVar.
3134
_switchOpen = NodeTemplate("switch((${tileIdxVar}) % ${bufferCount}) {")
3235
_caseOpen = NodeTemplate("case ${case}:")
3336
_caseClose = NodeTemplate("break;")
@@ -148,9 +151,7 @@ def _tilingLoop(self, ctxt: NetworkContext, executionBlock: ExecutionBlock,
148151
stridesFromShape(externalBufferShape),
149152
stridesFromShape(rectangles[0].dims), "ExternalToLocal",
150153
_future, math.prod(externalBufferShape))
151-
152-
initialDmaTransferCalls = [item for tup in initialDmaTransferCalls for item in tup]
153-
setupStatements.append(CodeSnippet(self._lineComment, {"comment": "Transfer initial input tile"}))
154+
setupStatements.append(_future.alloc())
154155
setupStatements.extend(initialDmaTransferCalls)
155156

156157
# 4.1) Choose buffers for current tile (inputs and outputs)

Deeploy/TilingExtension/CodeTransformationPasses/TilingCodeGeneration.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,17 +130,16 @@ def _generateDmaTransferCalls(self, ctxt: NetworkContext, tensorName: str, trans
130130
stridesFromShape(transfers[0].dims), direction, future,
131131
math.prod(externalBuffer.shape,))
132132

133-
initSnippets = [item for tup in initSnippets for item in tup]
134-
133+
# Add allocation snippets
134+
initSnippets = [future.alloc()] + initSnippets
135135
templates = [snippet.template for snippet in initSnippets]
136136
opReprUpdates = [[] for _ in range(len(initSnippets))]
137137

138138
for rect in transfers:
139139
snippets = anydimAdapter.transfer(ctxt, externalBuffer, localBuffer, rect.dims,
140140
stridesFromShape(externalBuffer.shape), stridesFromShape(rect.dims),
141141
direction, future, math.prod(externalBuffer.shape))
142-
143-
snippets = [item for tup in snippets for item in tup]
142+
snippets = [future.alloc()] + snippets
144143
for i, snippet in enumerate(snippets):
145144
opReprUpdates[i].append(snippet.operatorRepresentation)
146145

TargetLibraries/PULPOpen/inc/mchan_v6.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#ifndef __MCHAN_V6_H__
88
#define __MCHAN_V6_H__
99

10+
#include "assert.h"
11+
1012
// Requires to have MCHAN_BASE_ADDR, MCHAN_EVENT defined outside of header
1113
#ifndef MCHAN_BASE_ADDR
1214
#error "[mchan_v6.h] MCHAN_BASE_ADDR not defined!"
@@ -69,20 +71,17 @@ static void mchan_transfer_2d_ext_strided(uint32_t cmd, void *loc, void *ext,
6971
static uint32_t mchan_channel_alloc() { return *cmd_ptr; }
7072

7173
static void mchan_channel_free(uint32_t channel_id) {
72-
if (channel_id > MCHAN_TRANSFER_ID_MAX)
73-
return;
74+
assert(channel_id <= MCHAN_TRANSFER_ID_MAX);
7475
*status_ptr = 1 << channel_id;
7576
}
7677

7778
static uint32_t mchan_channel_is_busy(uint32_t channel_id) {
78-
if (channel_id > MCHAN_TRANSFER_ID_MAX)
79-
return 0;
79+
assert(channel_id <= MCHAN_TRANSFER_ID_MAX);
8080
return *status_ptr & (1 << channel_id);
8181
}
8282

8383
static void mchan_channel_wait(uint32_t channel_id) {
84-
if (channel_id > MCHAN_TRANSFER_ID_MAX)
85-
return;
84+
assert(channel_id <= MCHAN_TRANSFER_ID_MAX);
8685
#if defined(MCHAN_EVENT)
8786
while (mchan_channel_is_busy(channel_id))
8887
eu_evt_maskWaitAndClr(1 << MCHAN_EVENT_BIT);

TargetLibraries/PULPOpen/inc/mchan_v7.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#ifndef __MCHAN_V7_H__
88
#define __MCHAN_V7_H__
99

10+
#include "assert.h"
11+
1012
// Requires to have MCHAN_BASE_ADDR, MCHAN_EVENT defined outside of header
1113
#ifndef MCHAN_BASE_ADDR
1214
#error "[mchan_v7.h] MCHAN_BASE_ADDR not defined!"
@@ -95,20 +97,17 @@ static void mchan_transfer_2d_loc_strided_ext_strided(
9597
static uint32_t mchan_channel_alloc() { return *cmd_ptr; }
9698

9799
static void mchan_channel_free(uint32_t channel_id) {
98-
if (channel_id > MCHAN_TRANSFER_ID_MAX)
99-
return;
100+
assert(channel_id <= MCHAN_TRANSFER_ID_MAX);
100101
*status_ptr = 1 << channel_id;
101102
}
102103

103104
static uint32_t mchan_channel_is_busy(uint32_t channel_id) {
104-
if (channel_id > MCHAN_TRANSFER_ID_MAX)
105-
return 0;
105+
assert(channel_id <= MCHAN_TRANSFER_ID_MAX);
106106
return *status_ptr & (1 << channel_id);
107107
}
108108

109109
static void mchan_channel_wait(uint32_t channel_id) {
110-
if (channel_id > MCHAN_TRANSFER_ID_MAX)
111-
return;
110+
assert(channel_id <= MCHAN_TRANSFER_ID_MAX);
112111
#if defined(MCHAN_EVENT)
113112
while (mchan_channel_is_busy(channel_id))
114113
eu_evt_maskWaitAndClr(1 << MCHAN_EVENT_BIT);

0 commit comments

Comments
 (0)