Skip to content
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
## Unreleased (Planned Release Target: v0.2.1)

### List of Pull Requests
- Add tile transfer annotation [#127](https://github.com/pulp-platform/Deeploy/pull/127)
- Refactor Logging for Improved Debugging [#115](https://github.com/pulp-platform/Deeploy/pull/115)
- Add reuse-tool as an SPDX license header linter [#113](https://github.com/pulp-platform/Deeploy/pull/113)
- Bug fixes, API Cleanup and Reduce Compiler Warning on PULP [#112](https://github.com/pulp-platform/Deeploy/pull/112)
Expand Down Expand Up @@ -46,6 +47,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
- Buffer utilities: `checkNumLevels` validation and `sizeInBytes` method
- Per–memory-level usage tracking and worst-case reporting in `NetworkContext`
- Memory/I/O summaries and input/output logging in deployers
- Added transfer annotation of tiled execution blocks

### Changed
- Replaced platform-specific tags (`*-amd64`, `*-arm64`) with direct digest references in `Noelware/docker-manifest-action`.
Expand Down Expand Up @@ -73,6 +75,8 @@ This file contains the changelog for the Deeploy project. The changelog is divid
- Changed types and added correct casts to fix many compiler warnings in the PULP target library
- Use [reuse-tool](https://github.com/fsfe/reuse-tool) in pre-commit, CI, and Makefile for SPDX license header linting
- Deployer workflow now uses `prepare(...)` instead of `generateFunction(...)`.
- Refactored computeTilingRectangles
- wrapTilingSolution now uses the transfer annotation

### Fixed
- Prevent node duplication for graphs generated via GraphSurgeon
Expand All @@ -83,6 +87,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
- Corrected method usage in `importDeeployState` to call `NetworkContext.importNetworkContext` instead of the incorrect method name
- Correctly return `signProp` from `setupDeployer` instead of hardcoding the value to `False` in `testMVP.py`
- Fixed `Unsqueeze` Op. when using ONNX opset 13 or higher (from attribute to input)
- Fixed compiler warning by casting the external pointer in L3Dma to uint32_t

### Removed
- Delete outdated and unused `.gitlab-ci.yml` file
Expand Down
1 change: 1 addition & 0 deletions Deeploy/DeeployTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1458,6 +1458,7 @@ def __init__(self, operatorCodeSnippet: Optional[CodeSnippet] = None):
) #: Sequence[CodeSnippet]: ordered list of code snippets that need to be generated to implemented the associated operator

self.patternMemoryConstraint: Optional = None #: Optional[PatternMemoryConstraint]: Tiling information of the operator which is annotated in the midend
self.transfers: Optional = None #: Optional[Dict[str, Dict[str, List[List[AbsoluteHyperRectangle]]]]]: Tiling transfers

def addLeft(self, template: NodeTemplate, operatorRepresentation: OperatorRepresentation):
"""Adds a code snippet that is generated BEFORE any of the other code snippets in this ExecutionBlock
Expand Down
2 changes: 1 addition & 1 deletion Deeploy/Targets/PULPOpen/DMA/L3Dma.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class L3Dma(AsyncDma):
_transferTemplates = {
2:
NodeTemplate(
"pi_cl_ram_copy_2d(get_ram_ptr(), ${ext}, ${loc}, ${transfer_size}, ${stride}, ${length}, ${ext2loc}, &${future});"
"pi_cl_ram_copy_2d(get_ram_ptr(), (uint32_t)${ext}, ${loc}, ${transfer_size}, ${stride}, ${length}, ${ext2loc}, &${future});"
)
}
_waitingStrategy = PerTensorWaitingStrategy(L3DmaFuture)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from Deeploy.TilingExtension.CodeTransformationPasses.TilingHoistingMixIn import TilingHoistingMixIn
from Deeploy.TilingExtension.CodeTransformationPasses.TilingPrototypes import PrototypeTilingMixIn
from Deeploy.TilingExtension.MemoryConstraints import NodeMemoryConstraint, TensorMemoryConstraint
from Deeploy.TilingExtension.TileConstraint import TileConstraint
from Deeploy.TilingExtension.TilingCodegen import HyperRectangle, TilingSchedule, VariableReplacementScheme, \
calculateFlatOffset, minimizeRectangle, minimizeVariableReplacement, padOffset, padShape, stridesFromShape

Expand Down Expand Up @@ -241,8 +242,13 @@ def apply(self,
assert isinstance(buffer, VariableBuffer)
unraveledOpRepr[key] = ctxt.unravelReference(buffer).name

variableReplacement, tilingSchedules = template.tileConstraint.wrapTilingSolution(
nodeMemoryConstraint, self.localMemory, ctxt, unraveledOpRepr)
tileConstraint: TileConstraint = template.tileConstraint
transfers = {
tensorName: memTransfers[self.localMemory]
for tensorName, memTransfers in baseExecutionBlock.transfers.items()
}
variableReplacement, tilingSchedules = tileConstraint.wrapTilingSolution(nodeMemoryConstraint, self.localMemory,
ctxt, unraveledOpRepr, transfers)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Guard transfers before use; helpful error if level missing.

Avoids hard KeyErrors when transfers aren’t annotated or the level is unavailable.

-        tileConstraint: TileConstraint = template.tileConstraint
-        transfers = {
-            tensorName: memTransfers[self.localMemory]
-            for tensorName, memTransfers in baseExecutionBlock.transfers.items()
-        }
+        tileConstraint: TileConstraint = template.tileConstraint
+        assert baseExecutionBlock.transfers is not None, \
+            "Missing transfers on execution block; ensure Tiler annotated transfers before codegen."
+        transfers = {}
+        for tensorName, memTransfers in baseExecutionBlock.transfers.items():
+            if self.localMemory not in memTransfers:
+                raise KeyError(f"No transfers for memory level '{self.localMemory}' on tensor '{tensorName}'.")
+            transfers[tensorName] = memTransfers[self.localMemory]
🤖 Prompt for AI Agents
In Deeploy/TilingExtension/CodeTransformationPasses/TilingCodeGeneration.py
around lines 245 to 252, the comprehension unconditionally indexes
memTransfers[self.localMemory] which can raise KeyError when transfers aren’t
annotated or the requested memory level is missing; change this to guard access
(e.g., check self.localMemory in memTransfers or use
memTransfers.get(self.localMemory)) and build transfers only for tensors that
actually have that level, and if any expected tensor is missing the level raise
a clear error including the tensorName and the missing level (or optionally
log/collect missing entries and include them in the exception) before calling
tileConstraint.wrapTilingSolution so the failure is descriptive instead of a raw
KeyError.

minimalVariableReplacement, newOpRepr = minimizeVariableReplacement(variableReplacement, operatorRepresentation)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
_ReferenceBuffer
from Deeploy.TilingExtension.CodeTransformationPasses.TilingHoistingMixIn import TilingHoistingMixIn
from Deeploy.TilingExtension.MemoryConstraints import NodeMemoryConstraint
from Deeploy.TilingExtension.TileConstraint import TileConstraint
from Deeploy.TilingExtension.TilerExtension import Tiler
from Deeploy.TilingExtension.TilingCodegen import TilingSchedule, VariableReplacementScheme, minimizeVariableReplacement

Expand Down Expand Up @@ -133,8 +134,13 @@ def apply(self,
for key, value in operatorRepresentation.items()
}

variableReplacement, tilingSchedules = template.tileConstraint.wrapTilingSolution(
nodeMemoryConstraint, self.targetMemLevel, ctxt, unraveledOpRepr)
tileConstr: TileConstraint = template.tileConstraint
transfers = {
tensorName: memTransfers[self.targetMemLevel]
for tensorName, memTransfers in baseExecutionBlock.transfers.items()
}
variableReplacement, tilingSchedules = tileConstr.wrapTilingSolution(nodeMemoryConstraint, self.targetMemLevel,
ctxt, unraveledOpRepr, transfers)

minimalVariableReplacement, newOpRepr = minimizeVariableReplacement(variableReplacement, operatorRepresentation)
operatorRepresentation.update(newOpRepr)
Expand Down Expand Up @@ -233,8 +239,13 @@ def apply(self,
for key, value in operatorRepresentation.items()
}

variableReplacement, _ = template.tileConstraint.wrapTilingSolution(nodeMemoryConstraint, self.targetMemLevel,
ctxt, unraveledOpRepr)
tileConstr: TileConstraint = template.tileConstraint
transfers = {
tensorName: memTransfers[self.targetMemLevel]
for tensorName, memTransfers in baseExecutionBlock.transfers.items()
}
variableReplacement, _ = tileConstr.wrapTilingSolution(nodeMemoryConstraint, self.targetMemLevel, ctxt,
unraveledOpRepr, transfers)

minimalVariableReplacement, newOpRepr = minimizeVariableReplacement(variableReplacement, operatorRepresentation)
operatorRepresentation.update(newOpRepr)
Expand Down
83 changes: 8 additions & 75 deletions Deeploy/TilingExtension/TileConstraint.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,15 @@
#
# SPDX-License-Identifier: Apache-2.0

import copy
from abc import abstractmethod
from typing import Dict, List, Optional, Tuple, Union

import numpy as np
from ortools.constraint_solver.pywrapcp import IntVar

from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation
from Deeploy.TilingExtension.MemoryConstraints import MemoryConstraint, NodeMemoryConstraint, TensorMemoryConstraint
from Deeploy.TilingExtension.MemoryConstraints import NodeMemoryConstraint
from Deeploy.TilingExtension.TilerModel import TilerModel
from Deeploy.TilingExtension.TilingCodegen import AbsoluteHyperRectangle, HyperRectangle, MemoryTransfer, \
TilingSchedule, VariableReplacementScheme, computeTileHyperRectangles
from Deeploy.TilingExtension.TilingCodegen import AbsoluteHyperRectangle, TilingSchedule, VariableReplacementScheme


class TileConstraint():
Expand Down Expand Up @@ -91,81 +88,17 @@ def sanitizeTilingSchedule(tilingSchedule: TilingSchedule) -> TilingSchedule:

@classmethod
def wrapTilingSolution(
cls, tilingSolution: NodeMemoryConstraint, targetMemLevel: str, ctxt: NetworkContext,
operatorRepresentation: OperatorRepresentation) -> Tuple[VariableReplacementScheme, List[TilingSchedule]]:

def getMemoryTransfer(tensorConstraint: TensorMemoryConstraint, sourceCube: HyperRectangle,
sourceMemoryLevel: str, targetMemoryLevel: str) -> MemoryTransfer:

size = np.prod(sourceCube.dims)
sourceConstraint = MemoryConstraint(sourceMemoryLevel, size)
sourceConstraint.shape = sourceCube.dims

destConstraint = copy.copy(tensorConstraint.memoryConstraints[targetMemoryLevel])

if any(dim1 > dim2 for dim1, dim2 in zip(destConstraint.shape, sourceConstraint.shape)):
destConstraint.shape = sourceConstraint.shape

return MemoryTransfer(sourceConstraint, destConstraint)

def _offsetAdd(offsetA: Tuple[int, ...], offsetB: Tuple[int, ...]) -> Tuple[int, ...]:
return tuple(dimA + dimB for dimA, dimB in zip(offsetA, offsetB))

def getCubeTransfers(tensorConstraint: TensorMemoryConstraint, sourceCubes: List[AbsoluteHyperRectangle],
sourceMemoryLevel: str,
targetMemoryLevel: str) -> Tuple[List[AbsoluteHyperRectangle], List[int]]:
solution = []
solutionLengths = []

for sourceCube in sourceCubes:
memTransfer = getMemoryTransfer(tensorConstraint, sourceCube.rectangle, sourceMemoryLevel,
targetMemoryLevel)
solutionCubes = computeTileHyperRectangles(memTransfer)
solutionAbsoluteCubes = [
AbsoluteHyperRectangle(rectangle = cube,
absoluteOffset = _offsetAdd(sourceCube.absoluteOffset, cube.offset))
for cube in solutionCubes
]
solution += solutionAbsoluteCubes
solutionLengths.append(len(solutionAbsoluteCubes))

return solution, solutionLengths

cls, tilingSolution: NodeMemoryConstraint, targetMemLevel: str, ctxt: NetworkContext,
operatorRepresentation: OperatorRepresentation,
transfers: Dict[str,
List[List[AbsoluteHyperRectangle]]]) -> Tuple[VariableReplacementScheme, List[TilingSchedule]]:
assert len(tilingSolution.outputTensorMemoryConstraints) == 1, "Expected node to have only one output!"
Comment on lines 90 to 95
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Validate transfers structure early; clarify expected shape.

wrapTilingSolution indexes transfers[outVar], implying transfers must be var-keyed (not memory-level keyed). Add a guard and a clear error to prevent silent KeyErrors.

-        assert len(tilingSolution.outputTensorMemoryConstraints) == 1, "Expected node to have only one output!"
-        outVar, _ = next(iter(tilingSolution.outputTensorMemoryConstraints.items()))
+        assert len(tilingSolution.outputTensorMemoryConstraints) == 1, "Expected node to have only one output!"
+        outVar, _ = next(iter(tilingSolution.outputTensorMemoryConstraints.items()))
+        if outVar not in transfers:
+            raise KeyError(f"Missing transfers for output '{outVar}'. Expected var-keyed mapping.")

As per coding guidelines

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In Deeploy/TilingExtension/TileConstraint.py around lines 90 to 95,
wrapTilingSolution assumes transfers is keyed by output variable names and
shaped as Dict[str, List[List[AbsoluteHyperRectangle]]] which can cause a silent
KeyError or confusing failures; add an explicit guard that checks transfers is a
dict, that the expected output variable key (e.g., the single output variable
from tilingSolution.outputTensorMemoryConstraints) exists in transfers, and that
its value is a list of lists (and optionally non-empty) of
AbsoluteHyperRectangle; if any check fails, raise a clear ValueError or
AssertionError with a message describing the expected shape ("transfers must be
a dict keyed by output var name mapping to List[List[AbsoluteHyperRectangle]]")
and include the missing key name so callers get an immediate, informative error.


outVar, outTensorConstraint = next(iter(tilingSolution.outputTensorMemoryConstraints.items()))
memoryPath = list(outTensorConstraint.memoryConstraints.keys())

assert targetMemLevel in memoryPath, \
f"Target memory level {targetMemLevel} does not exist in the memory path {memoryPath}"

targetIdx = memoryPath.index(targetMemLevel)

if targetIdx == 0:
# SCHEREMO: Watch out - this happens if inputs are in L(N+1) but outputs only in L(N)
targetIdx = 1

fullShape = ctxt.lookup(outVar).shape
initialOffset = (0,) * len(fullShape)
outputCubes = [
AbsoluteHyperRectangle(rectangle = HyperRectangle(offset = initialOffset, dims = tuple(fullShape)),
absoluteOffset = initialOffset)
]

for source, target in zip(memoryPath[:targetIdx], memoryPath[1:targetIdx + 1]):
outputCubes, solutionLengths = getCubeTransfers(outTensorConstraint, outputCubes, source, target)

arrayOfCubes = []
_idx = 0
for idxLen in solutionLengths:
arrayOfCubes += [outputCubes[_idx:_idx + idxLen]]
_idx += idxLen
outVar, _ = next(iter(tilingSolution.outputTensorMemoryConstraints.items()))

varReplacements = []
tilingSchedules = []

for _outputCubes in arrayOfCubes:

for _outputCubes in transfers[outVar]:
varReplacement, tilingSchedule = cls.serializeTilingSolution(tilingSolution, _outputCubes, targetMemLevel,
ctxt, operatorRepresentation)
sanitizedTilingSchedule = cls.sanitizeTilingSchedule(tilingSchedule)
Expand Down
30 changes: 30 additions & 0 deletions Deeploy/TilingExtension/TilerExtension.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from Deeploy.TilingExtension.MemoryScheduler import MemoryBlock, MemoryScheduler
from Deeploy.TilingExtension.TileConstraint import TileConstraint
from Deeploy.TilingExtension.TilerModel import TilerModel
from Deeploy.TilingExtension.TilingCodegen import AbsoluteHyperRectangle, HyperRectangle, computeTileHyperRectangles

TilingSolution = List[PatternMemoryConstraints]
MemoryMap = Dict[str, List[List[MemoryBlock]]]
Expand Down Expand Up @@ -940,6 +941,34 @@ def testMemoryMapCorrectness(self, memoryMap: Dict[str, List[List[MemoryBlock]]]
assert stepIdx in range(lifetime[0], lifetime[-1] +
1), f"Invalid memory map! Buffer {tensor.name} is not alive at step {stepIdx}!"

def getTransfers(self, tensorMc: TensorMemoryConstraint) -> Dict[str, List[List[AbsoluteHyperRectangle]]]:
transfers: Dict[str, List[List[AbsoluteHyperRectangle]]] = {}
mcs = list(tensorMc.memoryConstraints.items())
for (externalMemory, externalMc), (localMemory, localMc) in zip(mcs[:-1], mcs[1:]):
# TODO: Should we also use externalMemory as a key in the transfers?
if externalMemory not in transfers:
assert externalMc.shape is not None
shape = externalMc.shape
zeroOffset = (0,) * len(shape)
externalAbsoluteRectangles = [AbsoluteHyperRectangle(HyperRectangle(zeroOffset, shape), zeroOffset)]
else:
# Flatten
externalAbsoluteRectangles = [rect for _list in transfers[externalMemory] for rect in _list]

transfers[localMemory] = [[
AbsoluteHyperRectangle(rect, tuple(a + b
for a, b in zip(extAbsRect.absoluteOffset, rect.offset)))
for rect in computeTileHyperRectangles(extAbsRect.rectangle.dims, localMc.shape)
]
for extAbsRect in externalAbsoluteRectangles]
return transfers

def getIoTransfers(self,
patternMc: PatternMemoryConstraints) -> Dict[str, Dict[str, List[List[AbsoluteHyperRectangle]]]]:
assert len(patternMc.nodeConstraints) == 1, "Only layerwise supported for now!"
tMcs = patternMc.nodeConstraints[0].tensorMemoryConstraints
return {name: self.getTransfers(mc) for name, mc in tMcs.items()}

Comment on lines +966 to +971
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major

Layer-wise assertion will break multi-node patterns at runtime.

tile() calls getIoTransfers() for every pattern, but getIoTransfers() asserts a single step. This will raise on schedules where a pattern has multiple nodes. Either support multi-step or avoid asserting here.

Proposed minimal fix: default to the last step (layer-wise remains unchanged), enabling non-layer-wise schedules.

-    def getIoTransfers(self,
-                       patternMc: PatternMemoryConstraints) -> Dict[str, Dict[str, List[List[AbsoluteHyperRectangle]]]]:
-        assert len(patternMc.nodeConstraints) == 1, "Only layerwise supported for now!"
-        tMcs = patternMc.nodeConstraints[0].tensorMemoryConstraints
-        return {name: self.getTransfers(mc) for name, mc in tMcs.items()}
+    def getIoTransfers(self,
+                       patternMc: PatternMemoryConstraints) -> Dict[str, Dict[str, List[List[AbsoluteHyperRectangle]]]]:
+        # Prefer layer-wise; if not, use the last step to represent the pattern's effective IO.
+        step_idx = -1
+        tMcs = patternMc.nodeConstraints[step_idx].tensorMemoryConstraints
+        return {name: self.getTransfers(mc) for name, mc in tMcs.items()}

Based on learnings

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def getIoTransfers(self,
patternMc: PatternMemoryConstraints) -> Dict[str, Dict[str, List[List[AbsoluteHyperRectangle]]]]:
assert len(patternMc.nodeConstraints) == 1, "Only layerwise supported for now!"
tMcs = patternMc.nodeConstraints[0].tensorMemoryConstraints
return {name: self.getTransfers(mc) for name, mc in tMcs.items()}
def getIoTransfers(self,
patternMc: PatternMemoryConstraints) -> Dict[str, Dict[str, List[List[AbsoluteHyperRectangle]]]]:
# Prefer layer-wise; if not, use the last step to represent the pattern's effective IO.
step_idx = -1
tMcs = patternMc.nodeConstraints[step_idx].tensorMemoryConstraints
return {name: self.getTransfers(mc) for name, mc in tMcs.items()}
🤖 Prompt for AI Agents
In Deeploy/TilingExtension/TilerExtension.py around lines 966 to 971, the code
currently asserts that patternMc.nodeConstraints has length 1 which will raise
for multi-node patterns; remove the assert and instead select the last node
constraint when multiple steps exist (e.g., node_mc =
patternMc.nodeConstraints[-1]) then use node_mc.tensorMemoryConstraints to build
the transfers so layer-wise behavior remains for single-step but multi-step
patterns default to the last step; ensure variable names and types match the
existing code and return the same dict shape.


class TilerDeployerWrapper(NetworkDeployerWrapper):

Expand Down Expand Up @@ -996,6 +1025,7 @@ def tile(self, tilingSolution: Optional[TilingSolution] = None, memoryMap: Optio
# SCHEREMO: Annotate execution block with solution
for layer, pattern in zip(self.layerBinding.values(), tilingSolution):
layer.mapper.binder.executionBlock.patternMemoryConstraint = pattern
layer.mapper.binder.executionBlock.transfers = self.tiler.getIoTransfers(pattern)

# SCHEREMO: Code generation STUB

Expand Down
33 changes: 10 additions & 23 deletions Deeploy/TilingExtension/TilingCodegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,6 @@

from Deeploy.AbstractDataTypes import Pointer
from Deeploy.DeeployTypes import OperatorRepresentation, VariableBuffer
from Deeploy.TilingExtension.MemoryConstraints import MemoryConstraint


@dataclass
class MemoryTransfer():
source: MemoryConstraint
destination: MemoryConstraint


@dataclass
Expand Down Expand Up @@ -242,18 +235,12 @@ def calculateFlatOffsetInBytes(tile: HyperRectangle, referenceBuffer: VariableBu
(referenceBuffer._type.referencedType.typeWidth // 8))


def computeTileHyperRectangles(memoryTransfer: MemoryTransfer) -> List[HyperRectangle]:
assert memoryTransfer.source.shape is not None, "Source transfer shape cannot be undefined!"
assert memoryTransfer.destination.shape is not None, "Destination transfer shape cannot be undefined!"

assert len(memoryTransfer.source.shape) == len(memoryTransfer.destination.shape), \
f"Source and target of memory transfer {memoryTransfer} don't have the same number of dimensions!"

largeShape = memoryTransfer.source.shape
smallShape = memoryTransfer.destination.shape
def computeTileHyperRectangles(externalShape: Tuple[int, ...], localShape: Tuple[int, ...]) -> List[HyperRectangle]:
assert len(externalShape) == len(localShape), \
f"External and local memory shapes don't have the same number of dimensions! External {externalShape} vs. Local {localShape}"

for dimIdx, (dimSizeSmall, dimSizeLarge) in enumerate(zip(smallShape, largeShape)):
assert dimSizeSmall <= dimSizeLarge, f"smallShape[{dimIdx}] should not be bigger then largeShape[{dimIdx}]. ({dimSizeSmall} > {dimSizeLarge})"
# LMACAN: The local shape dimensions are of the local buffer so if the external tile is smaller, that's fine
localShape = tuple(min(ext, loc) for ext, loc in zip(externalShape, localShape))

def nextTileIndex(tileIndexEnd: List[int]) -> Generator[List[int]]:
tileCount = np.prod(tileIndexEnd)
Expand All @@ -270,18 +257,18 @@ def nextTileIndex(tileIndexEnd: List[int]) -> Generator[List[int]]:
tileHyperRectangles = []

tileIndexEnd = [
int(np.ceil(dimSizeLarge / dimSizeSmall)) for dimSizeLarge, dimSizeSmall in zip(largeShape, smallShape)
int(np.ceil(dimSizeLarge / dimSizeSmall)) for dimSizeLarge, dimSizeSmall in zip(externalShape, localShape)
]
for tileIndex in nextTileIndex(tileIndexEnd):
tileOffset = tuple(dimIdx * dimSizeSmall for dimIdx, dimSizeSmall in zip(tileIndex, smallShape))
for dimIdx, (dimOffset, dimSizeLarge) in enumerate(zip(tileOffset, largeShape)):
tileOffset = tuple(dimIdx * dimSizeSmall for dimIdx, dimSizeSmall in zip(tileIndex, localShape))
for dimIdx, (dimOffset, dimSizeLarge) in enumerate(zip(tileOffset, externalShape)):
assert dimOffset >= 0, f"tileOffset[{dimIdx}] shoud not be smaller then zero ({dimOffset} < 0)"
assert dimOffset < dimSizeLarge, f"tileOffset[{dimIdx}] should not be bigger or equal then largeShape[{dimIdx}] ({dimOffset} >= {dimSizeLarge})"

tileSize = tuple(
min(dimSizeSmall, dimSizeLarge - dimOffset)
for dimSizeSmall, dimSizeLarge, dimOffset in zip(smallShape, largeShape, tileOffset))
for dimIdx, (dimSize, dimSizeSmall) in enumerate(zip(tileSize, smallShape)):
for dimSizeSmall, dimSizeLarge, dimOffset in zip(localShape, externalShape, tileOffset))
for dimIdx, (dimSize, dimSizeSmall) in enumerate(zip(tileSize, localShape)):
assert dimSize > 0, f"tileOffset[{dimIdx}] shoud not be smaller or equal then zero ({dimSize} <= 0)"
assert dimSize <= dimSizeSmall, f"tileSize[{dimIdx}] should not be bigger then smallShape[{dimIdx}] ({dimSize} > {dimSizeSmall})"

Expand Down
Loading