Skip to content

Commit 1682123

Browse files
committed
Implement PR feedback
1 parent 088be20 commit 1682123

File tree

6 files changed

+50
-29
lines changed

6 files changed

+50
-29
lines changed

Deeploy/Targets/PULPOpen/DMA/MchanDma.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class MchanChannelFuture(Future):
1818
_allocTemplate = NodeTemplate("${name} = mchan_channel_alloc();")
1919

2020
_waitTemplate = NodeTemplate("""
21-
if (${name} <= MCHAN_TRANSFER_ID_MAX) {
21+
if (${name} <= MCHAN_CHANNEL_ID_MAX) {
2222
mchan_channel_wait(${name});
2323
mchan_channel_free(${name});
2424
}

Deeploy/TilingExtension/AsyncDma.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,20 @@ def getFuture(self, tensorName: str, direction: DmaDirection) -> Future:
5151

5252
class PerTensorWaitingStrategy(AsyncDmaWaitingStrategy):
5353

54+
def __init__(self, FutureCls: Type[Future]) -> None:
55+
super().__init__(FutureCls)
56+
# map (tensorName, direction) -> Future instance so the same Future
57+
# object is returned for repeated requests for the same tensor/direction
58+
self._futures: Dict[Tuple[str, DmaDirection], Future] = {}
59+
5460
def getFuture(self, tensorName: str, direction: DmaDirection) -> Future:
55-
_ = direction
56-
return self.FutureCls(tensorName)
61+
key = (tensorName, direction)
62+
if key not in self._futures:
63+
# include direction in the future name to avoid accidental name
64+
# collisions between directions for the same tensor
65+
future_name = f"{tensorName}_{direction}"
66+
self._futures[key] = self.FutureCls(future_name)
67+
return self._futures[key]
5768

5869

5970
class DirectionWaitingStrategy(AsyncDmaWaitingStrategy):

Deeploy/TilingExtension/CodeTransformationPasses/DoubleBufferingTilingCodeGeneration.py

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from Deeploy.AbstractDataTypes import VoidType
99
from Deeploy.DeeployTypes import CodeSnippet, ExecutionBlock, NetworkContext, NodeTemplate, OperatorRepresentation, \
1010
VariableBuffer, _ReferenceBuffer
11-
from Deeploy.TilingExtension.AsyncDma import AnydimAsyncDmaTransferAdapter, AsyncDma, EmptyFuture, Future
11+
from Deeploy.TilingExtension.AsyncDma import AnydimAsyncDmaTransferAdapter, AsyncDma, Future
1212
from Deeploy.TilingExtension.CodeTransformationPasses.TilingCodeGeneration import TilingCodeGeneration
1313
from Deeploy.TilingExtension.CodeTransformationPasses.TilingHoistingMixIn import dictOfArrays
1414
from Deeploy.TilingExtension.CodeTransformationPasses.TilingPrototypes import ProfilingPrototypeMixIn, \
@@ -140,18 +140,15 @@ def _tilingLoop(self, ctxt: NetworkContext, executionBlock: ExecutionBlock,
140140
nextLocalBufferReference = self._hoistReference(ctxt, f"{tensorName}_next", l1BuffersReferences[1])
141141

142142
future = self.dma.getFuture(tensorName, "ExternalToLocal")
143-
# Extract the future that is not already in the set of ingress futures
144-
_future = set([future]) - ingressFutures
145-
_future = _future.pop() if len(_future) > 0 else EmptyFuture("")
146-
ingressFutures.add(future)
147143

148144
# 2) Load initial input tiles
149145
anydimAdapter = AnydimAsyncDmaTransferAdapter(self.dma)
150146
initialDmaTransferCalls = anydimAdapter.transfer(ctxt, externalBufferRef, localBuffer, rectangles[0].dims,
151147
stridesFromShape(externalBufferShape),
152148
stridesFromShape(rectangles[0].dims), "ExternalToLocal",
153-
_future, math.prod(externalBufferShape))
154-
setupStatements.append(_future.alloc())
149+
future, math.prod(externalBufferShape))
150+
if future not in ingressFutures:
151+
setupStatements.append(future.alloc())
155152
setupStatements.extend(initialDmaTransferCalls)
156153

157154
# 4.1) Choose buffers for current tile (inputs and outputs)
@@ -161,7 +158,9 @@ def _tilingLoop(self, ctxt: NetworkContext, executionBlock: ExecutionBlock,
161158

162159
# 4.2.1) Wait for current input tile
163160
ingressDMAStatements.append(CodeSnippet(self._lineComment, {"comment": "Wait for current input tile"}))
164-
ingressDMAStatements.append(_future.wait())
161+
162+
if future not in ingressFutures:
163+
ingressDMAStatements.append(future.wait())
165164

166165
# 4.2.2) if there is a next tile:
167166
ingressDMAStatements.append(
@@ -175,10 +174,15 @@ def _tilingLoop(self, ctxt: NetworkContext, executionBlock: ExecutionBlock,
175174

176175
# 4.2.4) Start transfer for next input tile
177176
ingressDMAStatements.append(CodeSnippet(self._lineComment, {"comment": "Transfer next input tile"}))
177+
178+
# Allocate the future for the next transfer
179+
if future not in ingressFutures:
180+
ingressDMAStatements.append(future.alloc())
181+
178182
ingressDMAStatements.extend(
179183
self._generateDmaTransferCalls(ctxt, tensorName, rectangles, "TILING_I+1", nextLocalBufferReference,
180-
externalBufferRef, "ExternalToLocal", _future))
181-
# 4.2.5) Update external reference for next tile
184+
externalBufferRef, "ExternalToLocal", future))
185+
# 4.2.5) Update external reference for next til
182186
referenceUpdate = self._generateExternalReferenceUpdate(ctxt, tensorName, rectangles, "TILING_I+1",
183187
externalBufferRef)
184188
if referenceUpdate is not None:
@@ -195,6 +199,9 @@ def _tilingLoop(self, ctxt: NetworkContext, executionBlock: ExecutionBlock,
195199
# Close the "if there is a next tile" block
196200
ingressDMAStatements.append(CodeSnippet(self._moveTileInCheckCloseStatement, {}))
197201

202+
# Add future to the set to prevent double wait/allocation
203+
ingressFutures.add(future)
204+
198205
# 4.4) Output Data Transfers
199206
# -----------------------------------
200207
for tensorName, rectangles in dictOfArrays(tilingSchedule.outputLoadSchedule).items():
@@ -227,19 +234,21 @@ def _tilingLoop(self, ctxt: NetworkContext, executionBlock: ExecutionBlock,
227234

228235
# 4.4.1) Wait for previous output tile
229236
future = self.dma.getFuture(tensorName, "LocalToExternal")
230-
# Extract the future that is not already in the set of ingress futures
231-
_future = set([future]) - egressFutures
232-
_future = _future.pop() if len(_future) > 0 else EmptyFuture("")
233-
egressFutures.add(future)
234237

235238
egressDMAStatements.append(CodeSnippet(self._lineComment, {"comment": "Wait for previous output tile"}))
236-
egressDMAStatements.append(_future.wait())
239+
240+
if future not in egressFutures:
241+
egressDMAStatements.append(future.wait())
237242

238243
# 4.4.2) Start transfer for current output tile
239244
dmaTransferCalls = self._generateDmaTransferCalls(ctxt, tensorName, rectangles, "TILING_I", localBuffer,
240-
externalBufferRef, "LocalToExternal", _future)
245+
externalBufferRef, "LocalToExternal", future)
241246

242247
egressDMAStatements.append(CodeSnippet(self._lineComment, {"comment": "Transfer current output tile"}))
248+
# Allocate the future for the next transfer
249+
if future not in egressFutures:
250+
egressDMAStatements.append(future.alloc())
251+
243252
egressDMAStatements.extend(dmaTransferCalls)
244253

245254
# 4.4.3) Update outut reference for next tile
@@ -248,6 +257,9 @@ def _tilingLoop(self, ctxt: NetworkContext, executionBlock: ExecutionBlock,
248257
if referenceUpdate is not None:
249258
egressDMAStatements.append(referenceUpdate)
250259

260+
# Add future to the set to prevent double wait/allocation
261+
egressFutures.add(future)
262+
251263
# 4.2.
252264
openLoopStatements += self._switch(buffer_choices, "TILING_I")
253265

Deeploy/TilingExtension/CodeTransformationPasses/TilingCodeGeneration.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,15 +131,13 @@ def _generateDmaTransferCalls(self, ctxt: NetworkContext, tensorName: str, trans
131131
math.prod(externalBuffer.shape,))
132132

133133
# Add allocation snippets
134-
initSnippets = [future.alloc()] + initSnippets
135134
templates = [snippet.template for snippet in initSnippets]
136135
opReprUpdates = [[] for _ in range(len(initSnippets))]
137136

138137
for rect in transfers:
139138
snippets = anydimAdapter.transfer(ctxt, externalBuffer, localBuffer, rect.dims,
140139
stridesFromShape(externalBuffer.shape), stridesFromShape(rect.dims),
141140
direction, future, math.prod(externalBuffer.shape))
142-
snippets = [future.alloc()] + snippets
143141
for i, snippet in enumerate(snippets):
144142
opReprUpdates[i].append(snippet.operatorRepresentation)
145143

TargetLibraries/PULPOpen/inc/mchan_v6.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
#include "pmsis.h"
3737

3838
#define MCHAN_TRANSFER_LEN_SIZE (16)
39-
#define MCHAN_TRANSFER_ID_MAX (15)
39+
#define MCHAN_CHANNEL_ID_MAX (15)
4040

4141
#define MCHAN_CMD_FLAG_DIRECTION_LOC2EXT (0 << (MCHAN_TRANSFER_LEN_SIZE + 0))
4242
#define MCHAN_CMD_FLAG_DIRECTION_EXT2LOC (1 << (MCHAN_TRANSFER_LEN_SIZE + 0))
@@ -71,17 +71,17 @@ static void mchan_transfer_2d_ext_strided(uint32_t cmd, void *loc, void *ext,
7171
static uint32_t mchan_channel_alloc() { return *cmd_ptr; }
7272

7373
static void mchan_channel_free(uint32_t channel_id) {
74-
assert(channel_id <= MCHAN_TRANSFER_ID_MAX);
74+
assert(channel_id <= MCHAN_CHANNEL_ID_MAX);
7575
*status_ptr = 1 << channel_id;
7676
}
7777

7878
static uint32_t mchan_channel_is_busy(uint32_t channel_id) {
79-
assert(channel_id <= MCHAN_TRANSFER_ID_MAX);
79+
assert(channel_id <= MCHAN_CHANNEL_ID_MAX);
8080
return *status_ptr & (1 << channel_id);
8181
}
8282

8383
static void mchan_channel_wait(uint32_t channel_id) {
84-
assert(channel_id <= MCHAN_TRANSFER_ID_MAX);
84+
assert(channel_id <= MCHAN_CHANNEL_ID_MAX);
8585
#if defined(MCHAN_EVENT)
8686
while (mchan_channel_is_busy(channel_id))
8787
eu_evt_maskWaitAndClr(1 << MCHAN_EVENT_BIT);

TargetLibraries/PULPOpen/inc/mchan_v7.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
#include "pmsis.h"
3737

3838
#define MCHAN_TRANSFER_LEN_SIZE (17)
39-
#define MCHAN_TRANSFER_ID_MAX (15)
39+
#define MCHAN_CHANNEL_ID_MAX (15)
4040

4141
#define MCHAN_CMD_FLAG_DIRECTION_LOC2EXT (0 << (MCHAN_TRANSFER_LEN_SIZE + 0))
4242
#define MCHAN_CMD_FLAG_DIRECTION_EXT2LOC (1 << (MCHAN_TRANSFER_LEN_SIZE + 0))
@@ -97,17 +97,17 @@ static void mchan_transfer_2d_loc_strided_ext_strided(
9797
static uint32_t mchan_channel_alloc() { return *cmd_ptr; }
9898

9999
static void mchan_channel_free(uint32_t channel_id) {
100-
assert(channel_id <= MCHAN_TRANSFER_ID_MAX);
100+
assert(channel_id <= MCHAN_CHANNEL_ID_MAX);
101101
*status_ptr = 1 << channel_id;
102102
}
103103

104104
static uint32_t mchan_channel_is_busy(uint32_t channel_id) {
105-
assert(channel_id <= MCHAN_TRANSFER_ID_MAX);
105+
assert(channel_id <= MCHAN_CHANNEL_ID_MAX);
106106
return *status_ptr & (1 << channel_id);
107107
}
108108

109109
static void mchan_channel_wait(uint32_t channel_id) {
110-
assert(channel_id <= MCHAN_TRANSFER_ID_MAX);
110+
assert(channel_id <= MCHAN_CHANNEL_ID_MAX);
111111
#if defined(MCHAN_EVENT)
112112
while (mchan_channel_is_busy(channel_id))
113113
eu_evt_maskWaitAndClr(1 << MCHAN_EVENT_BIT);

0 commit comments

Comments
 (0)