Fix transA and transB being treated like ints

lukamac · lukamac · commit 6c91310e9055 · 2025-09-29T08:24:27.000+02:00
diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py
@@ -1605,27 +1605,40 @@ def parseNodeCtxt(self,
             node.inputs.append(zeroTensor)
             self.operatorRepresentation['C'] = f'{node.name}_C_Tensor'
 
+        buffA = ctxt.lookup(node.inputs[0].name)
+        assert isinstance(buffA, VariableBuffer)
+        buffB = ctxt.lookup(node.inputs[1].name)
+        assert isinstance(buffB, VariableBuffer)
+        buffOut = ctxt.lookup(node.outputs[0].name)
+        assert isinstance(buffOut, VariableBuffer)
+
         # Store the input and output shapes in the operator representation
-        self.operatorRepresentation['size'] = np.prod(ctxt.lookup(node.inputs[0].name).shape)
-        self.operatorRepresentation['A_shape'] = ctxt.lookup(node.inputs[0].name).shape
-        self.operatorRepresentation['B_shape'] = ctxt.lookup(node.inputs[1].name).shape
-        self.operatorRepresentation['data_out_shape'] = ctxt.lookup(node.outputs[0].name).shape
+        self.operatorRepresentation['size'] = np.prod(buffA.shape)
+        self.operatorRepresentation['A_shape'] = buffA.shape
+        self.operatorRepresentation['B_shape'] = buffB.shape
+        self.operatorRepresentation['data_out_shape'] = buffOut.shape
+
+        if self.operatorRepresentation['transA']:
+            N_A, M = buffA.shape[-2:]
+        else:
+            M, N_A = buffA.shape[-2:]
+
+        if self.operatorRepresentation['transB']:
+            O, N_B = buffB.shape[-2:]
+        else:
+            N_B, O = buffB.shape[-2:]
 
         # Store the matrix dimensions in the operator representation
-        self.operatorRepresentation['M'] = ctxt.lookup(
-            node.inputs[0].name).shape[(-2 + self.operatorRepresentation['transA'])]
-        self.operatorRepresentation['N'] = ctxt.lookup(
-            node.inputs[0].name).shape[(-1 - self.operatorRepresentation['transA'])]
-        self.operatorRepresentation['O'] = ctxt.lookup(
-            node.inputs[1].name).shape[(-1 - self.operatorRepresentation['transB'])]
+        self.operatorRepresentation['M'] = M
+        self.operatorRepresentation['N'] = N_A
+        self.operatorRepresentation['O'] = O
 
         # SCHEREMO: Assert that reduction dimension is the same on both matrices
-        ret = ret and (self.operatorRepresentation['N'] == ctxt.lookup(
-            node.inputs[1].name).shape[-2 + self.operatorRepresentation['transB']])
+        ret = ret and N_A == N_B
 
         # Check if the batch dimensions are compatible
-        self.operatorRepresentation['batch_A'] = np.prod(ctxt.lookup(node.inputs[0].name).shape[:-2])
-        self.operatorRepresentation['batch_B'] = np.prod(ctxt.lookup(node.inputs[1].name).shape[:-2])
+        self.operatorRepresentation['batch_A'] = np.prod(buffA.shape[:-2])
+        self.operatorRepresentation['batch_B'] = np.prod(buffB.shape[:-2])
 
         self.operatorRepresentation['batch'] = max(self.operatorRepresentation['batch_A'],
                                                    self.operatorRepresentation['batch_B'])
@@ -1637,10 +1650,10 @@ def parseNodeCtxt(self,
         ), "Incompatible dimensions for input matrices. Broadcasting not yet supported for dimensions larger than 1 on one of the inputs, or equal dimensions between the 2."
 
         # Create flags for same dimension between each input matrix and the final batch dimension
-        self.operatorRepresentation['A_batched'] = (self.operatorRepresentation['batch'] == np.prod(
-            ctxt.lookup(node.inputs[0].name).shape[:-2]))
+        self.operatorRepresentation['A_batched'] = (
+            self.operatorRepresentation['batch'] == self.operatorRepresentation['batch_A'])
         self.operatorRepresentation['W_batched'] = self.operatorRepresentation['B_batched'] = (
-            self.operatorRepresentation['batch'] == np.prod(ctxt.lookup(node.inputs[1].name).shape[:-2]))
+            self.operatorRepresentation['batch'] == self.operatorRepresentation['batch_B'])
 
         return ctxt, ret
 
diff --git a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py
@@ -21,8 +21,8 @@
             ${M},
             ${N},
             ${O},
-            ${transA},
-            ${transB}
+            ${int(transA)},
+            ${int(transB)}
         );
 
         % if A_batched:
diff --git a/Deeploy/Targets/Generic/Templates/GemmTemplate.py b/Deeploy/Targets/Generic/Templates/GemmTemplate.py
@@ -56,8 +56,8 @@ def alignToContext(self, ctxt: NetworkContext,
             ${O},
             ${alpha},
             ${beta},
-            ${transA},
-            ${transB},
+            ${int(transA)},
+            ${int(transB)},
             ${A_offset},
             ${B_offset},
             ${C_offset},
diff --git a/Deeploy/Targets/Generic/TypeCheckers.py b/Deeploy/Targets/Generic/TypeCheckers.py
@@ -185,10 +185,8 @@ def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[
 
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [
-            2**((self.input_types[0].referencedType.typeWidth) * 2) *
-            inputs[0].shape[-1 - operatorRepresentation['transA']]
-        ]
+        O = inputs[0].shape[-1] if not operatorRepresentation['transA'] else inputs[0].shape[-2]
+        return [2**((self.input_types[0].referencedType.typeWidth) * 2) * O]
 
     def _inferSignedness(self, inputs: List[VariableBuffer],
                          operatorRepresentation: OperatorRepresentation) -> List[bool]:
diff --git a/Deeploy/Targets/MemPool/Templates/GemmTemplate.py b/Deeploy/Targets/MemPool/Templates/GemmTemplate.py
@@ -127,8 +127,8 @@ def hoistTransientBuffers(self, ctxt: NetworkContext,
         ${O},
         ${alpha},
         ${beta},
-        ${transA},
-        ${transB},
+        ${int(transA)},
+        ${int(transB)},
         ${A_offset},
         ${B_offset},
         ${C_offset},
diff --git a/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py b/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py
@@ -145,8 +145,8 @@ def hoistTransientBuffers(self, ctxt: NetworkContext,
         ${O},
         ${alpha},
         ${beta},
-        ${transA},
-        ${transB},
+        ${int(transA)},
+        ${int(transB)},
         ${mul},
         ${add},
         ${log2Dstring},
@@ -170,8 +170,8 @@ def hoistTransientBuffers(self, ctxt: NetworkContext,
         ${O},
         ${alpha},
         ${beta},
-        ${transA},
-        ${transB},
+        ${int(transA)},
+        ${int(transB)},
         ${mul},
         ${add},
         ${log2Dstring},
diff --git a/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py b/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py
@@ -20,8 +20,8 @@
         ${M},
         ${N},
         ${O},
-        ${transA},
-        ${transB}
+        ${int(transA)},
+        ${int(transB)}
     );
 
     ref_${data_out}_${A} += ${M} * ${N};
diff --git a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py
@@ -32,13 +32,13 @@ def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: Netw
         tensorsShapeLen = len(bufferA.shape)
 
         AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name,
-                                                  dimIdx = (tensorsShapeLen - 2) + parseDict['transA'])
+                                                  dimIdx = (tensorsShapeLen - 2) + int(parseDict['transA']))
         ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name,
-                                                   dimIdx = (tensorsShapeLen - 1) - parseDict['transA'])
+                                                   dimIdx = (tensorsShapeLen - 1) - int(parseDict['transA']))
         BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name,
-                                                  dimIdx = (tensorsShapeLen - 2) + parseDict['transB'])
+                                                  dimIdx = (tensorsShapeLen - 2) + int(parseDict['transB']))
         BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name,
-                                                   dimIdx = (tensorsShapeLen - 1) - parseDict['transB'])
+                                                   dimIdx = (tensorsShapeLen - 1) - int(parseDict['transB']))
         outputFirstDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = (tensorsShapeLen - 2))
         outputSecondDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = (tensorsShapeLen - 1))
 
diff --git a/Deeploy/Targets/Snitch/Parsers.py b/Deeploy/Targets/Snitch/Parsers.py
@@ -18,9 +18,7 @@ def parseNode(self, node: gs.Node) -> bool:
         if not ret:
             return False
 
-        if not all([
-                self.operatorRepresentation['transA'] == 0,
-        ]):
+        if self.operatorRepresentation['transA']:
             return False
 
         return True
@@ -50,9 +48,7 @@ def parseNode(self, node: gs.Node) -> bool:
         if not ret:
             return False
 
-        if not all([
-                self.operatorRepresentation['transA'] == 0,
-        ]):
+        if self.operatorRepresentation['transA']:
             return False
 
         return True