Skip to content

Commit 28318a0

Browse files
committed
[memory strides] update check functions
[Change] - Add support for memory strides to the check functions. - Check that memory strides provided are compatible with the given tensor shape. - Check that memory strides are compatible with the kernel. - Check that the inner most mem stride equals 1 - Check that the output tensor has enough memory allocated and take into consideration the memory strides provided in the output tensor. - All memory strides set to zero is supported, which means that each kernel calculates its own memory strides such that all data is contiguous. Signed-off-by: R. Hilkens <[email protected]>
1 parent 1ae4883 commit 28318a0

File tree

3 files changed

+279
-86
lines changed

3 files changed

+279
-86
lines changed

lib/src/kernels/common/mli_krn_common.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,11 @@ static void __attribute__ ((always_inline)) basic_rnn_cell_prepare_and_run_fx(
150150
const MLI_PTR (io_T) in_ptr = (const MLI_PTR (io_T)) in->data;
151151
MLI_PTR (io_T) state_ptr = (MLI_PTR (io_T)) prev_out->data;
152152

153-
mli_tensor dense_out;
153+
mli_tensor dense_out = { 0 };
154154
dense_out.data = (cfg->mode != RNN_BATCH_TO_LAST) ? out->data : cfg->ir_tsr->data;
155155
dense_out.capacity = (cfg->mode != RNN_BATCH_TO_LAST) ? out->capacity : cfg->ir_tsr->capacity;
156156
dense_out.shape[0] = out_elements;
157+
dense_out.mem_stride[0] = 0;
157158
dense_out.rank = 1;
158159
dense_out.el_type = in->el_type;
159160
// 1sign and 3 integer bits for typical rnn nonlinearity (TANH/SIGM) is enough
@@ -269,6 +270,8 @@ static void __attribute__ ((always_inline)) lstm_cell_prepare_and_run_fx(
269270
ir_tensor->rank = bias->rank;
270271
ir_tensor->shape[0] = bias->shape[0];
271272
ir_tensor->shape[1] = bias->shape[1];
273+
ir_tensor->mem_stride[0] = 0;
274+
ir_tensor->mem_stride[1] = 0;
272275
ir_tensor->el_type = in->el_type;
273276
// 1sign and 3 integer bits for TANH/SIGM input is enough
274277
ir_tensor->el_params.fx.frac_bits = (sizeof(io_T) * 8) - 1 - 3;
@@ -278,9 +281,9 @@ static void __attribute__ ((always_inline)) lstm_cell_prepare_and_run_fx(
278281
const int in_to_state_dif = in->el_params.fx.frac_bits - prev_out->el_params.fx.frac_bits;
279282
const int dense_out_shift = mli_prv_calc_shift(prev_out, weights, ir_tensor);
280283

281-
// Paricular subtensors of intermediate tensor
282-
mli_tensor in_gate, forget_gate, out_gate; // Various gates to controll info flow
283-
mli_tensor g_tsr; // Information tensors
284+
// Paricular subtensors of intermediate tensor (mli_tensor.mem_stride[] should be zero and cannot be left uninitialized)
285+
mli_tensor in_gate = { 0 }, forget_gate = { 0 }, out_gate = { 0 }; // Various gates to controll info flow
286+
mli_tensor g_tsr = { 0 }; // Information tensors
284287

285288
// Init subtensors
286289
mli_point_to_subtsr_cfg iterator = {.start_coord = {0}, .coord_num=1, .first_out_dim_size=1};

lib/src/private/mli_prv_tensor.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,10 @@ static inline mli_status __attribute__ ((always_inline)) mli_prv_copy_tensor_for
198198
if (check != MLI_STATUS_OK)
199199
return check;
200200

201-
for (int idx = 0; idx < src->rank; idx++)
201+
for (int idx = 0; idx < src->rank; idx++) {
202202
dst->shape[idx] = src->shape[idx];
203+
dst->mem_stride[idx] = src->mem_stride[idx];
204+
}
203205

204206
dst->rank = src->rank;
205207
dst->el_type = src->el_type;

0 commit comments

Comments
 (0)