Skip to content

Commit 67113b3

Browse files
fix neureka bandwidth in 3x3 mode
1 parent 1e19f46 commit 67113b3

File tree

3 files changed

+13
-9
lines changed

3 files changed

+13
-9
lines changed

neureka/hal/neureka_task.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,14 +166,16 @@ void neureka_task_set_strides(neureka_task_t *task, const uint32_t k_in,
166166
.d2 = h_out_stride};
167167
task->data.cfg.output_stride = output_stride;
168168

169-
task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES;
170169
if (task->kernel_shape == 1) { // 1x1
170+
task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1;
171171
task->data.cfg.weights_stride.d1 =
172-
NEUREKA_WEIGHT_BANDWIDTH_BYTES * num_k_in;
172+
NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 * num_k_in;
173173
} else if (!task->depthwise) { // 3x3
174+
task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3;
174175
task->data.cfg.weights_stride.d1 =
175-
NEUREKA_WEIGHT_BANDWIDTH_BYTES * task->qw * num_k_in;
176+
NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3 * task->qw * num_k_in;
176177
} else { // 3x3 depthwise
178+
task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3;
177179
task->data.cfg.weights_stride.d1 = 0;
178180
}
179181
task->data.cfg.weights_stride.d2 = 0;

neureka/hal/neureka_task_defs.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@
2323

2424
/* ARCHITECTURE */
2525

26-
#define NNX_NEUREKA_PE_H (6)
27-
#define NNX_NEUREKA_PE_W (6)
26+
#define NNX_NEUREKA_PE_H (4)
27+
#define NNX_NEUREKA_PE_W (4)
28+
#define NNX_NEUREKA_BANDWIDTH_1x1 (256)
29+
#define NNX_NEUREKA_BANDWIDTH_3x3 (288)
2830

2931
#define NEUREKA_SUBTILE_INPUT_HEIGHT_1x1 (NNX_NEUREKA_PE_H)
3032
#define NEUREKA_SUBTILE_INPUT_WIDTH_1x1 (NNX_NEUREKA_PE_W)
@@ -34,12 +36,13 @@
3436
#define NEUREKA_SUBTILE_INPUT_WIDTH_3x3 (NNX_NEUREKA_PE_W+2)
3537
#define NEUREKA_SUBTILE_INPUT_CHANNEL_3x3 (32)
3638

37-
#define NEUREKA_SUBTILE_OUTPUT_HEIGHT (4)
38-
#define NEUREKA_SUBTILE_OUTPUT_WIDTH (4)
39+
#define NEUREKA_SUBTILE_OUTPUT_HEIGHT (NNX_NEUREKA_PE_H)
40+
#define NEUREKA_SUBTILE_OUTPUT_WIDTH (NNX_NEUREKA_PE_W)
3941
#define NEUREKA_SUBTILE_OUTPUT_CHANNEL (32)
4042

4143
#define NEUREKA_OUTPUT_BANDWIDTH_BYTES (32)
42-
#define NEUREKA_WEIGHT_BANDWIDTH_BYTES (32)
44+
#define NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 (NNX_NEUREKA_BANDWIDTH_1x1/8)
45+
#define NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3 (NNX_NEUREKA_BANDWIDTH_3x3/8)
4346

4447
/* TASK REGISTERS */
4548

test/NeurekaMemoryLayout.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ def weightEncode(
7878
weight = weight.reshape(-1, height * width * cinSubtile)
7979
# Pad only the last dimension to weight bandwidth size
8080
# (-1, Weight Bandwidth)
81-
print("DEBUG", weight.shape)
8281
weight = np.pad(
8382
weight,
8483
((0, 0), (0, NeurekaMemoryLayout._WEIGHT_BANDWIDTH_3x3 - weight.shape[-1])),

0 commit comments

Comments
 (0)