Skip to content
Draft
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
## Unreleased (Planned Release Target: v0.2.1)

### List of Pull Requests
- Add tile transfer annotation [#127](https://github.com/pulp-platform/Deeploy/pull/127)
- Refactor Logging for Improved Debugging [#115](https://github.com/pulp-platform/Deeploy/pull/115)
- Add reuse-tool as an SPDX license header linter [#113](https://github.com/pulp-platform/Deeploy/pull/113)
- Bug fixes, API Cleanup and Reduce Compiler Warning on PULP [#112](https://github.com/pulp-platform/Deeploy/pull/112)
Expand Down Expand Up @@ -46,6 +47,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
- Buffer utilities: `checkNumLevels` validation and `sizeInBytes` method
- Per–memory-level usage tracking and worst-case reporting in `NetworkContext`
- Memory/I/O summaries and input/output logging in deployers
- Added transfer annotation of tiled execution blocks

### Changed
- Replaced platform-specific tags (`*-amd64`, `*-arm64`) with direct digest references in `Noelware/docker-manifest-action`.
Expand Down Expand Up @@ -73,6 +75,8 @@ This file contains the changelog for the Deeploy project. The changelog is divid
- Changed types and added correct casts to fix many compiler warnings in the PULP target library
- Use [reuse-tool](https://github.com/fsfe/reuse-tool) in pre-commit, CI, and Makefile for SPDX license header linting
- Deployer workflow now uses `prepare(...)` instead of `generateFunction(...)`.
- Refactored computeTilingRectangles
- wrapTilingSolution now uses the transfer annotation

### Fixed
- Prevent node duplication for graphs generated via GraphSurgeon
Expand All @@ -83,6 +87,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
- Corrected method usage in `importDeeployState` to call `NetworkContext.importNetworkContext` instead of the incorrect method name
- Correctly return `signProp` from `setupDeployer` instead of hardcoding the value to `False` in `testMVP.py`
- Fixed `Unsqueeze` Op. when using ONNX opset 13 or higher (from attribute to input)
- Fixed compiler warning by casting the external pointer in L3Dma to uint32_t

### Removed
- Delete outdated and unused `.gitlab-ci.yml` file
Expand Down
1 change: 1 addition & 0 deletions Deeploy/DeeployTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1458,6 +1458,7 @@ def __init__(self, operatorCodeSnippet: Optional[CodeSnippet] = None):
) #: Sequence[CodeSnippet]: ordered list of code snippets that need to be generated to implemented the associated operator

self.patternMemoryConstraint: Optional = None #: Optional[PatternMemoryConstraint]: Tiling information of the operator which is annotated in the midend
self.transfers: Optional = None #: Optional[Dict[str, Dict[str, List[List[AbsoluteHyperRectangle]]]]]: Tiling transfers

def addLeft(self, template: NodeTemplate, operatorRepresentation: OperatorRepresentation):
"""Adds a code snippet that is generated BEFORE any of the other code snippets in this ExecutionBlock
Expand Down
2 changes: 1 addition & 1 deletion Deeploy/Targets/PULPOpen/DMA/L3Dma.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class L3Dma(AsyncDma):
_transferTemplates = {
2:
NodeTemplate(
"pi_cl_ram_copy_2d(get_ram_ptr(), ${ext}, ${loc}, ${transfer_size}, ${stride}, ${length}, ${ext2loc}, &${future});"
"pi_cl_ram_copy_2d(get_ram_ptr(), (uint32_t)${ext}, ${loc}, ${transfer_size}, ${stride}, ${length}, ${ext2loc}, &${future});"
)
}
_waitingStrategy = PerTensorWaitingStrategy(L3DmaFuture)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import copy
import math
from abc import abstractmethod
from typing import List, Optional, Tuple, TypeVar
from typing import Dict, List, Optional, Tuple, TypeVar

import numpy as np

Expand All @@ -19,8 +19,10 @@
from Deeploy.TilingExtension.CodeTransformationPasses.TilingHoistingMixIn import TilingHoistingMixIn
from Deeploy.TilingExtension.CodeTransformationPasses.TilingPrototypes import PrototypeTilingMixIn
from Deeploy.TilingExtension.MemoryConstraints import NodeMemoryConstraint, TensorMemoryConstraint
from Deeploy.TilingExtension.TilingCodegen import HyperRectangle, TilingSchedule, VariableReplacementScheme, \
calculateFlatOffset, minimizeRectangle, minimizeVariableReplacement, padOffset, padShape, stridesFromShape
from Deeploy.TilingExtension.TileConstraint import TileConstraint
from Deeploy.TilingExtension.TilingCodegen import AbsoluteHyperRectangle, HyperRectangle, TilingSchedule, \
VariableReplacementScheme, calculateFlatOffset, minimizeRectangle, minimizeVariableReplacement, padOffset, \
padShape, stridesFromShape

T = TypeVar('T')

Expand Down Expand Up @@ -241,8 +243,18 @@ def apply(self,
assert isinstance(buffer, VariableBuffer)
unraveledOpRepr[key] = ctxt.unravelReference(buffer).name

variableReplacement, tilingSchedules = template.tileConstraint.wrapTilingSolution(
nodeMemoryConstraint, self.localMemory, ctxt, unraveledOpRepr)
tileConstr: TileConstraint = template.tileConstraint
transfers: Dict[str, Dict[str, List[List[AbsoluteHyperRectangle]]]] = baseExecutionBlock.transfers
targetMemoryTransfers = {
tensorName: memTransfers.get(self.localMemory, None) for tensorName, memTransfers in transfers.items()
}

if any(v is None for v in targetMemoryTransfers.values()):
return ctxt, executionBlock

variableReplacement, tilingSchedules = tileConstr.wrapTilingSolution(nodeMemoryConstraint, self.localMemory,
ctxt, unraveledOpRepr,
targetMemoryTransfers)

minimalVariableReplacement, newOpRepr = minimizeVariableReplacement(variableReplacement, operatorRepresentation)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import copy
import itertools
from typing import List, Tuple
from typing import Dict, List, Tuple

from Deeploy.AbstractDataTypes import Struct
from Deeploy.CommonExtensions.CodeTransformationPasses.Closure import ClosureExecutionBlock
Expand All @@ -15,8 +15,10 @@
_ReferenceBuffer
from Deeploy.TilingExtension.CodeTransformationPasses.TilingHoistingMixIn import TilingHoistingMixIn
from Deeploy.TilingExtension.MemoryConstraints import NodeMemoryConstraint
from Deeploy.TilingExtension.TileConstraint import TileConstraint
from Deeploy.TilingExtension.TilerExtension import Tiler
from Deeploy.TilingExtension.TilingCodegen import TilingSchedule, VariableReplacementScheme, minimizeVariableReplacement
from Deeploy.TilingExtension.TilingCodegen import AbsoluteHyperRectangle, TilingSchedule, VariableReplacementScheme, \
minimizeVariableReplacement


class TilingVariableReplacement(CodeTransformationPass, IntrospectiveCodeTransformationMixIn, TilingHoistingMixIn):
Expand Down Expand Up @@ -133,8 +135,18 @@ def apply(self,
for key, value in operatorRepresentation.items()
}

variableReplacement, tilingSchedules = template.tileConstraint.wrapTilingSolution(
nodeMemoryConstraint, self.targetMemLevel, ctxt, unraveledOpRepr)
tileConstr: TileConstraint = template.tileConstraint
transfers: Dict[str, Dict[str, List[List[AbsoluteHyperRectangle]]]] = baseExecutionBlock.transfers
targetMemoryTransfers = {
tensorName: memTransfers.get(self.targetMemLevel, None) for tensorName, memTransfers in transfers.items()
}

if any(v is None for v in targetMemoryTransfers.values()):
return ctxt, executionBlock

variableReplacement, tilingSchedules = tileConstr.wrapTilingSolution(nodeMemoryConstraint, self.targetMemLevel,
ctxt, unraveledOpRepr,
targetMemoryTransfers)
Comment on lines +138 to +149
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Add None‑guard for baseExecutionBlock.transfers before accessing .items().

The type annotation declares transfers as a Dict, but if baseExecutionBlock.transfers is None, line 141 will raise an AttributeError when calling .items().

Apply this diff to add a None check:

 tileConstr: TileConstraint = template.tileConstraint
-transfers: Dict[str, Dict[str, List[List[AbsoluteHyperRectangle]]]] = baseExecutionBlock.transfers
+transfers = baseExecutionBlock.transfers
+if transfers is None:
+    return ctxt, executionBlock
+
 targetMemoryTransfers = {
     tensorName: memTransfers.get(self.targetMemLevel, None) for tensorName, memTransfers in transfers.items()
 }
🤖 Prompt for AI Agents
In Deeploy/TilingExtension/CodeTransformationPasses/TilingVariableReplacement.py
around lines 138 to 149, add a None-guard for baseExecutionBlock.transfers
before calling .items(): check if baseExecutionBlock.transfers is None (or
falsy) and in that case return ctxt, executionBlock early (same as the later
guard), otherwise assign transfers = baseExecutionBlock.transfers and proceed to
build targetMemoryTransfers and call wrapTilingSolution.


minimalVariableReplacement, newOpRepr = minimizeVariableReplacement(variableReplacement, operatorRepresentation)
operatorRepresentation.update(newOpRepr)
Expand Down Expand Up @@ -233,8 +245,17 @@ def apply(self,
for key, value in operatorRepresentation.items()
}

variableReplacement, _ = template.tileConstraint.wrapTilingSolution(nodeMemoryConstraint, self.targetMemLevel,
ctxt, unraveledOpRepr)
tileConstr: TileConstraint = template.tileConstraint
transfers: Dict[str, Dict[str, List[List[AbsoluteHyperRectangle]]]] = baseExecutionBlock.transfers
targetMemoryTransfers = {
tensorName: memTransfers.get(self.targetMemLevel, None) for tensorName, memTransfers in transfers.items()
}

if any(v is None for v in targetMemoryTransfers.values()):
return ctxt, executionBlock

variableReplacement, _ = tileConstr.wrapTilingSolution(nodeMemoryConstraint, self.targetMemLevel, ctxt,
unraveledOpRepr, targetMemoryTransfers)

minimalVariableReplacement, newOpRepr = minimizeVariableReplacement(variableReplacement, operatorRepresentation)
operatorRepresentation.update(newOpRepr)
Expand Down
83 changes: 8 additions & 75 deletions Deeploy/TilingExtension/TileConstraint.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,15 @@
#
# SPDX-License-Identifier: Apache-2.0

import copy
from abc import abstractmethod
from typing import Dict, List, Optional, Tuple, Union

import numpy as np
from ortools.constraint_solver.pywrapcp import IntVar

from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation
from Deeploy.TilingExtension.MemoryConstraints import MemoryConstraint, NodeMemoryConstraint, TensorMemoryConstraint
from Deeploy.TilingExtension.MemoryConstraints import NodeMemoryConstraint
from Deeploy.TilingExtension.TilerModel import TilerModel
from Deeploy.TilingExtension.TilingCodegen import AbsoluteHyperRectangle, HyperRectangle, MemoryTransfer, \
TilingSchedule, VariableReplacementScheme, computeTileHyperRectangles
from Deeploy.TilingExtension.TilingCodegen import AbsoluteHyperRectangle, TilingSchedule, VariableReplacementScheme


class TileConstraint():
Expand Down Expand Up @@ -91,81 +88,17 @@ def sanitizeTilingSchedule(tilingSchedule: TilingSchedule) -> TilingSchedule:

@classmethod
def wrapTilingSolution(
cls, tilingSolution: NodeMemoryConstraint, targetMemLevel: str, ctxt: NetworkContext,
operatorRepresentation: OperatorRepresentation) -> Tuple[VariableReplacementScheme, List[TilingSchedule]]:

def getMemoryTransfer(tensorConstraint: TensorMemoryConstraint, sourceCube: HyperRectangle,
sourceMemoryLevel: str, targetMemoryLevel: str) -> MemoryTransfer:

size = np.prod(sourceCube.dims)
sourceConstraint = MemoryConstraint(sourceMemoryLevel, size)
sourceConstraint.shape = sourceCube.dims

destConstraint = copy.copy(tensorConstraint.memoryConstraints[targetMemoryLevel])

if any(dim1 > dim2 for dim1, dim2 in zip(destConstraint.shape, sourceConstraint.shape)):
destConstraint.shape = sourceConstraint.shape

return MemoryTransfer(sourceConstraint, destConstraint)

def _offsetAdd(offsetA: Tuple[int, ...], offsetB: Tuple[int, ...]) -> Tuple[int, ...]:
return tuple(dimA + dimB for dimA, dimB in zip(offsetA, offsetB))

def getCubeTransfers(tensorConstraint: TensorMemoryConstraint, sourceCubes: List[AbsoluteHyperRectangle],
sourceMemoryLevel: str,
targetMemoryLevel: str) -> Tuple[List[AbsoluteHyperRectangle], List[int]]:
solution = []
solutionLengths = []

for sourceCube in sourceCubes:
memTransfer = getMemoryTransfer(tensorConstraint, sourceCube.rectangle, sourceMemoryLevel,
targetMemoryLevel)
solutionCubes = computeTileHyperRectangles(memTransfer)
solutionAbsoluteCubes = [
AbsoluteHyperRectangle(rectangle = cube,
absoluteOffset = _offsetAdd(sourceCube.absoluteOffset, cube.offset))
for cube in solutionCubes
]
solution += solutionAbsoluteCubes
solutionLengths.append(len(solutionAbsoluteCubes))

return solution, solutionLengths

cls, tilingSolution: NodeMemoryConstraint, targetMemLevel: str, ctxt: NetworkContext,
operatorRepresentation: OperatorRepresentation,
transfers: Dict[str,
List[List[AbsoluteHyperRectangle]]]) -> Tuple[VariableReplacementScheme, List[TilingSchedule]]:
assert len(tilingSolution.outputTensorMemoryConstraints) == 1, "Expected node to have only one output!"
Comment on lines 90 to 95
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Validate transfers structure early; clarify expected shape.

wrapTilingSolution indexes transfers[outVar], implying transfers must be var-keyed (not memory-level keyed). Add a guard and a clear error to prevent silent KeyErrors.

-        assert len(tilingSolution.outputTensorMemoryConstraints) == 1, "Expected node to have only one output!"
-        outVar, _ = next(iter(tilingSolution.outputTensorMemoryConstraints.items()))
+        assert len(tilingSolution.outputTensorMemoryConstraints) == 1, "Expected node to have only one output!"
+        outVar, _ = next(iter(tilingSolution.outputTensorMemoryConstraints.items()))
+        if outVar not in transfers:
+            raise KeyError(f"Missing transfers for output '{outVar}'. Expected var-keyed mapping.")

As per coding guidelines

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In Deeploy/TilingExtension/TileConstraint.py around lines 90 to 95,
wrapTilingSolution assumes transfers is keyed by output variable names and
shaped as Dict[str, List[List[AbsoluteHyperRectangle]]] which can cause a silent
KeyError or confusing failures; add an explicit guard that checks transfers is a
dict, that the expected output variable key (e.g., the single output variable
from tilingSolution.outputTensorMemoryConstraints) exists in transfers, and that
its value is a list of lists (and optionally non-empty) of
AbsoluteHyperRectangle; if any check fails, raise a clear ValueError or
AssertionError with a message describing the expected shape ("transfers must be
a dict keyed by output var name mapping to List[List[AbsoluteHyperRectangle]]")
and include the missing key name so callers get an immediate, informative error.


outVar, outTensorConstraint = next(iter(tilingSolution.outputTensorMemoryConstraints.items()))
memoryPath = list(outTensorConstraint.memoryConstraints.keys())

assert targetMemLevel in memoryPath, \
f"Target memory level {targetMemLevel} does not exist in the memory path {memoryPath}"

targetIdx = memoryPath.index(targetMemLevel)

if targetIdx == 0:
# SCHEREMO: Watch out - this happens if inputs are in L(N+1) but outputs only in L(N)
targetIdx = 1

fullShape = ctxt.lookup(outVar).shape
initialOffset = (0,) * len(fullShape)
outputCubes = [
AbsoluteHyperRectangle(rectangle = HyperRectangle(offset = initialOffset, dims = tuple(fullShape)),
absoluteOffset = initialOffset)
]

for source, target in zip(memoryPath[:targetIdx], memoryPath[1:targetIdx + 1]):
outputCubes, solutionLengths = getCubeTransfers(outTensorConstraint, outputCubes, source, target)

arrayOfCubes = []
_idx = 0
for idxLen in solutionLengths:
arrayOfCubes += [outputCubes[_idx:_idx + idxLen]]
_idx += idxLen
outVar, _ = next(iter(tilingSolution.outputTensorMemoryConstraints.items()))

varReplacements = []
tilingSchedules = []

for _outputCubes in arrayOfCubes:

for _outputCubes in transfers[outVar]:
varReplacement, tilingSchedule = cls.serializeTilingSolution(tilingSolution, _outputCubes, targetMemLevel,
ctxt, operatorRepresentation)
sanitizedTilingSchedule = cls.sanitizeTilingSchedule(tilingSchedule)
Expand Down
30 changes: 30 additions & 0 deletions Deeploy/TilingExtension/TilerExtension.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from Deeploy.TilingExtension.MemoryScheduler import MemoryBlock, MemoryScheduler
from Deeploy.TilingExtension.TileConstraint import TileConstraint
from Deeploy.TilingExtension.TilerModel import TilerModel
from Deeploy.TilingExtension.TilingCodegen import AbsoluteHyperRectangle, HyperRectangle, computeTileHyperRectangles

TilingSolution = List[PatternMemoryConstraints]
MemoryMap = Dict[str, List[List[MemoryBlock]]]
Expand Down Expand Up @@ -940,6 +941,34 @@ def testMemoryMapCorrectness(self, memoryMap: Dict[str, List[List[MemoryBlock]]]
assert stepIdx in range(lifetime[0], lifetime[-1] +
1), f"Invalid memory map! Buffer {tensor.name} is not alive at step {stepIdx}!"

def getTransfers(self, tensorMc: TensorMemoryConstraint) -> Dict[str, List[List[AbsoluteHyperRectangle]]]:
transfers: Dict[str, List[List[AbsoluteHyperRectangle]]] = {}
mcs = list(tensorMc.memoryConstraints.items())
for (externalMemory, externalMc), (localMemory, localMc) in zip(mcs[:-1], mcs[1:]):
# TODO: Should we also use externalMemory as a key in the transfers?
if externalMemory not in transfers:
assert externalMc.shape is not None
shape = externalMc.shape
zeroOffset = (0,) * len(shape)
externalAbsoluteRectangles = [AbsoluteHyperRectangle(HyperRectangle(zeroOffset, shape), zeroOffset)]
else:
# Flatten
externalAbsoluteRectangles = [rect for _list in transfers[externalMemory] for rect in _list]

transfers[localMemory] = [[
AbsoluteHyperRectangle(rect, tuple(a + b
for a, b in zip(extAbsRect.absoluteOffset, rect.offset)))
for rect in computeTileHyperRectangles(extAbsRect.rectangle.dims, localMc.shape)
]
for extAbsRect in externalAbsoluteRectangles]
return transfers

def getIoTransfers(self,
patternMc: PatternMemoryConstraints) -> Dict[str, Dict[str, List[List[AbsoluteHyperRectangle]]]]:
assert len(patternMc.nodeConstraints) == 1, "Only layerwise supported for now!"
tMcs = patternMc.nodeConstraints[0].tensorMemoryConstraints
return {name: self.getTransfers(mc) for name, mc in tMcs.items()}

Comment on lines +966 to +971
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major

Layer-wise assertion will break multi-node patterns at runtime.

tile() calls getIoTransfers() for every pattern, but getIoTransfers() asserts a single step. This will raise on schedules where a pattern has multiple nodes. Either support multi-step or avoid asserting here.

Proposed minimal fix: default to the last step (layer-wise remains unchanged), enabling non-layer-wise schedules.

-    def getIoTransfers(self,
-                       patternMc: PatternMemoryConstraints) -> Dict[str, Dict[str, List[List[AbsoluteHyperRectangle]]]]:
-        assert len(patternMc.nodeConstraints) == 1, "Only layerwise supported for now!"
-        tMcs = patternMc.nodeConstraints[0].tensorMemoryConstraints
-        return {name: self.getTransfers(mc) for name, mc in tMcs.items()}
+    def getIoTransfers(self,
+                       patternMc: PatternMemoryConstraints) -> Dict[str, Dict[str, List[List[AbsoluteHyperRectangle]]]]:
+        # Prefer layer-wise; if not, use the last step to represent the pattern's effective IO.
+        step_idx = -1
+        tMcs = patternMc.nodeConstraints[step_idx].tensorMemoryConstraints
+        return {name: self.getTransfers(mc) for name, mc in tMcs.items()}

Based on learnings

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def getIoTransfers(self,
patternMc: PatternMemoryConstraints) -> Dict[str, Dict[str, List[List[AbsoluteHyperRectangle]]]]:
assert len(patternMc.nodeConstraints) == 1, "Only layerwise supported for now!"
tMcs = patternMc.nodeConstraints[0].tensorMemoryConstraints
return {name: self.getTransfers(mc) for name, mc in tMcs.items()}
def getIoTransfers(self,
patternMc: PatternMemoryConstraints) -> Dict[str, Dict[str, List[List[AbsoluteHyperRectangle]]]]:
# Prefer layer-wise; if not, use the last step to represent the pattern's effective IO.
step_idx = -1
tMcs = patternMc.nodeConstraints[step_idx].tensorMemoryConstraints
return {name: self.getTransfers(mc) for name, mc in tMcs.items()}
🤖 Prompt for AI Agents
In Deeploy/TilingExtension/TilerExtension.py around lines 966 to 971, the code
currently asserts that patternMc.nodeConstraints has length 1 which will raise
for multi-node patterns; remove the assert and instead select the last node
constraint when multiple steps exist (e.g., node_mc =
patternMc.nodeConstraints[-1]) then use node_mc.tensorMemoryConstraints to build
the transfers so layer-wise behavior remains for single-step but multi-step
patterns default to the last step; ensure variable names and types match the
existing code and return the same dict shape.


class TilerDeployerWrapper(NetworkDeployerWrapper):

Expand Down Expand Up @@ -996,6 +1025,7 @@ def tile(self, tilingSolution: Optional[TilingSolution] = None, memoryMap: Optio
# SCHEREMO: Annotate execution block with solution
for layer, pattern in zip(self.layerBinding.values(), tilingSolution):
layer.mapper.binder.executionBlock.patternMemoryConstraint = pattern
layer.mapper.binder.executionBlock.transfers = self.tiler.getIoTransfers(pattern)

# SCHEREMO: Code generation STUB

Expand Down
Loading
Loading