|
22 | 22 |
|
23 | 23 |
|
24 | 24 | class NeurekaMemoryLayout: |
25 | | - _WEIGHT_BANDWIDTH = 256 |
| 25 | + _WEIGHT_BANDWIDTH_1x1 = 256 |
| 26 | + _WEIGHT_BANDWIDTH_3x3 = 288 |
26 | 27 | _CIN_SUBTILE_1x1 = 32 |
27 | 28 | _CIN_SUBTILE_3x3 = 32 |
28 | 29 |
|
@@ -77,27 +78,29 @@ def weightEncode( |
77 | 78 | weight = weight.reshape(-1, height * width * cinSubtile) |
78 | 79 | # Pad only the last dimension to weight bandwidth size |
79 | 80 | # (-1, Weight Bandwidth) |
| 81 | + print("DEBUG", weight.shape) |
80 | 82 | weight = np.pad( |
81 | 83 | weight, |
82 | | - ((0, 0), (0, NeurekaMemoryLayout._WEIGHT_BANDWIDTH - weight.shape[-1])), |
| 84 | + ((0, 0), (0, NeurekaMemoryLayout._WEIGHT_BANDWIDTH_3x3 - weight.shape[-1])), |
83 | 85 | "constant", |
84 | 86 | constant_values=0, |
85 | 87 | ) |
| 88 | + weightBandwidthBytes = int(np.ceil(NeurekaMemoryLayout._WEIGHT_BANDWIDTH_3x3 / 8)) |
86 | 89 | elif height == 1 and width == 1: |
87 | 90 | # (cout * cinMajor, Bits * cinSubtile) |
88 | 91 | weight = weight.reshape(-1, bits * cinSubtile) |
89 | 92 | # Pad only the last dimension to weight bandwidth size |
90 | 93 | # (-1, Weight Bandwidth) |
91 | 94 | weight = np.pad( |
92 | 95 | weight, |
93 | | - ((0, 0), (0, NeurekaMemoryLayout._WEIGHT_BANDWIDTH - weight.shape[-1])), |
| 96 | + ((0, 0), (0, NeurekaMemoryLayout._WEIGHT_BANDWIDTH_1x1 - weight.shape[-1])), |
94 | 97 | "constant", |
95 | 98 | constant_values=0, |
96 | 99 | ) |
| 100 | + weightBandwidthBytes = int(np.ceil(NeurekaMemoryLayout._WEIGHT_BANDWIDTH_1x1 / 8)) |
97 | 101 |
|
98 | 102 | # Prepare for packing |
99 | 103 | # (-1, Weight Bandwidth Bytes, 8) |
100 | | - weightBandwidthBytes = int(np.ceil(NeurekaMemoryLayout._WEIGHT_BANDWIDTH / 8)) |
101 | 104 | weight = np.stack(np.split(weight, weightBandwidthBytes, axis=-1), axis=-2) |
102 | 105 |
|
103 | 106 | # Pack bits |
|
0 commit comments