diff --git a/code/blocks/blocks.c b/code/blocks/blocks.c
new file mode 100644
index 00000000..9bb9d388
--- /dev/null
+++ b/code/blocks/blocks.c
@@ -0,0 +1,151 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2021 Zoltán Vörös
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/misc.h"
+
+#include "ndarray.h"
+#include "../ulab_tools.h"
+#include "blocks.h"
+
+#if ULAB_HAS_BLOCKS
+
+const mp_obj_type_t blocks_transformer_type = {
+    { &mp_type_type },
+    .name = MP_QSTR_transformer,
+};
+
+size_t *blocks_coords_from_pointer(void *p1, ndarray_obj_t *ndarray) {
+        // Calculates the coordinates in the original tensor from the position of the pointer
+    // The original view is assumed to be dense, i.e., the strides can be computed from the shape
+    // This is a utility function, and is not exposed to the python interpreter
+    blocks_block_obj_t *block = ndarray->block;
+    size_t diff = (uint8_t *)p1 - (uint8_t *)block->origin;
+    size_t stride = ndarray->itemsize;
+    size_t *coords = m_new(size_t, ULAB_MAX_DIMS);
+
+    // first, calculate the very first stride
+    for(uint8_t i = 0; i < block->ndim - 1; i++) {
+        stride *= block->shape[ULAB_MAX_DIMS - i - 1];
+    }
+    for(uint8_t i = block->ndim; i > 1; i--) {
+        coords[ULAB_MAX_DIMS - i] = diff / stride;
+        diff -= coords[ULAB_MAX_DIMS - i] * block->shape[ULAB_MAX_DIMS - i];
+        stride /= block->shape[ULAB_MAX_DIMS - i + 1];
+    }
+    return coords;
+}
+
+void blocks_block_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
+    (void)kind;
+    blocks_block_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    ndarray_obj_t *ndarray = (ndarray_obj_t *)self->ndarray;
+    mp_printf(print, "block(shape=(%ld,", ndarray->shape[ULAB_MAX_DIMS - ndarray->ndim]);
+    for(uint8_t i = 1; i < ndarray->ndim - 1; i++) {
+        mp_printf(print, " %ld,", ndarray->shape[ULAB_MAX_DIMS - ndarray->ndim + i]);
+    }
+    if(ndarray->ndim > 1) {
+        mp_printf(print, " %ld", ndarray->shape[ULAB_MAX_DIMS - 1]);
+    }
+    mp_print_str(print, "), dtype=");
+    ndarray_print_dtype(print, ndarray);
+    mp_print_str(print, ")");
+}
+
+const mp_obj_type_t blocks_block_type = {
+    { &mp_type_type },
+    .name = MP_QSTR_block,
+    .print = blocks_block_print,
+};
+
+mp_obj_t blocks_new_ndarray(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_shape, MP_ARG_OBJ | MP_ARG_REQUIRED, { .u_rom_obj = mp_const_none } },
+        { MP_QSTR_transformer, MP_ARG_KW_ONLY | MP_ARG_OBJ | MP_ARG_REQUIRED, { .u_rom_obj = mp_const_none } },
+        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_obj = MP_ROM_INT(NDARRAY_FLOAT) } },
+    };
+
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    if(!MP_OBJ_IS_TYPE(args[0].u_obj, &mp_type_tuple)) {
+        mp_raise_TypeError(translate("shape must be a tuple"));
+    }
+    mp_obj_tuple_t *shape_tuple = MP_OBJ_TO_PTR(args[0].u_obj);
+    if(shape_tuple->len > ULAB_MAX_DIMS) {
+        mp_raise_ValueError(translate("too many dimensions"));
+    }
+
+    uint8_t _dtype;
+    #if ULAB_HAS_DTYPE_OBJECT
+    if(MP_OBJ_IS_TYPE(args[1].u_obj, &ulab_dtype_type)) {
+        dtype_obj_t *dtype = MP_OBJ_TO_PTR(args[2].u_obj);
+        _dtype = dtype->dtype.type;
+    } else {
+        _dtype = mp_obj_get_int(args[2].u_obj);
+    }
+    #else
+    _dtype = mp_obj_get_int(args[2].u_obj);
+    #endif
+
+    size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+    memset(shape, 0, sizeof(size_t) * ULAB_MAX_DIMS);
+    size_t len = 0;
+    for(uint8_t i=0; i < shape_tuple->len; i++) {
+        shape[ULAB_MAX_DIMS - i - 1] = mp_obj_get_int(shape_tuple->items[shape_tuple->len - i - 1]);
+        // reserve as much space that data for the longest array can still be accommodated
+        if(shape[ULAB_MAX_DIMS - i - 1] > len) {
+            len = shape[ULAB_MAX_DIMS - i - 1];
+        }
+    }
+    ndarray_obj_t *ndarray = ndarray_new_ndarray_header(shape_tuple->len, shape, NULL, _dtype);
+    ndarray->flags = BLOCK_IS_READ_ONLY;
+    blocks_block_obj_t *block = m_new_obj(blocks_block_obj_t);
+    block->base.type = &blocks_block_type;
+    block->ndim = ndarray->ndim;
+    // store a pointer to the ndarray
+    block->ndarray = ndarray;
+
+    uint8_t *barray = m_new(uint8_t, ndarray->itemsize * len);
+    block->subarray = barray;
+    // store the original array dimensions; block->shape should never be touched
+    memcpy(&(block->shape), &(ndarray->shape), sizeof(size_t) * ULAB_MAX_DIMS);
+    // store the original address of the array; block->origin should never be touched
+    block->origin = ndarray->array;
+
+    // get the pointer to the reader function
+    blocks_transformer_obj_t *transformer = MP_OBJ_TO_PTR(args[1].u_obj);
+    block->arrfunc = transformer->arrfunc;
+    ndarray->array = transformer->array;
+    ndarray->block = block;
+    return ndarray;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_KW(blocks_new_ndarray_obj, 0, blocks_new_ndarray);
+
+static const mp_rom_map_elem_t ulab_blocks_globals_table[] = {
+    { MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_OBJ_NEW_QSTR(MP_QSTR_blocks) },
+    { MP_OBJ_NEW_QSTR(MP_QSTR_ndarray), (mp_obj_t)&blocks_new_ndarray_obj },
+    { MP_OBJ_NEW_QSTR(MP_QSTR_block), (mp_obj_t)&blocks_block_type },
+};
+
+static MP_DEFINE_CONST_DICT(mp_module_ulab_blocks_globals, ulab_blocks_globals_table);
+
+mp_obj_module_t ulab_blocks_module = {
+    .base = { &mp_type_module },
+    .globals = (mp_obj_dict_t*)&mp_module_ulab_blocks_globals,
+};
+
+#endif
diff --git a/code/blocks/blocks.h b/code/blocks/blocks.h
new file mode 100644
index 00000000..0f25276a
--- /dev/null
+++ b/code/blocks/blocks.h
@@ -0,0 +1,35 @@
+
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2020-2021 Zoltán Vörös
+*/
+
+#ifndef _BLOCKS_
+#define _BLOCKS_
+
+#include "ulab.h"
+#include "ndarray.h"
+
+#define BLOCK_NO_FLAG               0x00
+#define BLOCK_IS_READ_ONLY          0x01
+#define BLOCK_IS_READ_WRITE         0x02
+
+typedef struct _blocks_transformer_obj_t {
+    mp_obj_base_t base;
+    void *arrfunc;
+    void *array;
+} blocks_transformer_obj_t;
+
+extern const mp_obj_type_t blocks_transformer_type;
+
+extern const mp_obj_type_t blocks_block_type;
+extern mp_obj_module_t ulab_blocks_module;
+
+size_t *blocks_coords_from_pointer(void *, ndarray_obj_t *);
+
+#endif
diff --git a/code/micropython.mk b/code/micropython.mk
index f376ae49..e7ed7bdc 100644
--- a/code/micropython.mk
+++ b/code/micropython.mk
@@ -1,7 +1,6 @@
 
 USERMODULES_DIR := $(USERMOD_DIR)
 
-# Add all C files to SRC_USERMOD.
 SRC_USERMOD += $(USERMODULES_DIR)/scipy/optimize/optimize.c
 SRC_USERMOD += $(USERMODULES_DIR)/scipy/signal/signal.c
 SRC_USERMOD += $(USERMODULES_DIR)/scipy/special/special.c
@@ -22,6 +21,7 @@ SRC_USERMOD += $(USERMODULES_DIR)/numpy/stats/stats.c
 SRC_USERMOD += $(USERMODULES_DIR)/numpy/transform/transform.c
 SRC_USERMOD += $(USERMODULES_DIR)/numpy/vector/vector.c
 SRC_USERMOD += $(USERMODULES_DIR)/user/user.c
+SRC_USERMOD += $(USERMODULES_DIR)/blocks/blocks.c
 
 SRC_USERMOD += $(USERMODULES_DIR)/numpy/numpy.c
 SRC_USERMOD += $(USERMODULES_DIR)/scipy/scipy.c
diff --git a/code/ndarray.c b/code/ndarray.c
index ff769ff6..15353b84 100644
--- a/code/ndarray.c
+++ b/code/ndarray.c
@@ -24,6 +24,7 @@
 #include "ulab_tools.h"
 #include "ndarray.h"
 #include "ndarray_operators.h"
+#include "blocks/blocks.h"
 
 mp_uint_t ndarray_print_threshold = NDARRAY_PRINT_THRESHOLD;
 mp_uint_t ndarray_print_edgeitems = NDARRAY_PRINT_EDGEITEMS;
@@ -241,13 +242,13 @@ void ndarray_fill_array_iterable(mp_float_t *array, mp_obj_t iterable) {
     }
 }
 
-#if ULAB_HAS_FUNCTION_ITERATOR
 size_t *ndarray_new_coords(uint8_t ndim) {
     size_t *coords = m_new(size_t, ndim);
     memset(coords, 0, ndim*sizeof(size_t));
     return coords;
 }
 
+#if ULAB_HAS_FUNCTION_ITERATOR
 void ndarray_rewind_array(uint8_t ndim, uint8_t *array, size_t *shape, int32_t *strides, size_t *coords) {
     // resets the data pointer of a single array, whenever an axis is full
     // since we always iterate over the very last axis, we have to keep track of
@@ -315,29 +316,34 @@ void fill_array_iterable(mp_float_t *array, mp_obj_t iterable) {
     }
 }
 
+void ndarray_print_dtype(const mp_print_t *print, ndarray_obj_t *ndarray) {
+    if(ndarray->boolean) {
+        mp_printf(print, "bool");
+    } else if(ndarray->dtype.type == NDARRAY_UINT8) {
+        mp_printf(print, "uint8");
+    } else if(ndarray->dtype.type == NDARRAY_INT8) {
+        mp_printf(print, "int8");
+    } else if(ndarray->dtype.type == NDARRAY_UINT16) {
+        mp_printf(print, "uint16");
+    } else if(ndarray->dtype.type == NDARRAY_INT16) {
+        mp_printf(print, "int16");
+    } else if(ndarray->dtype.type == NDARRAY_FLOAT) {
+        #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+        mp_print_str(print, "float32");
+        #else
+        mp_print_str(print, "float64");
+        #endif
+    }
+}
+
 #if NDARRAY_HAS_DTYPE
 #if ULAB_HAS_DTYPE_OBJECT
 void ndarray_dtype_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
     (void)kind;
     dtype_obj_t *self = MP_OBJ_TO_PTR(self_in);
     mp_print_str(print, "dtype('");
-    if(self->dtype == NDARRAY_BOOLEAN) {
-        mp_print_str(print, "bool')");
-    } else if(self->dtype == NDARRAY_UINT8) {
-        mp_print_str(print, "uint8')");
-    } else if(self->dtype == NDARRAY_INT8) {
-        mp_print_str(print, "int8')");
-    } else if(self->dtype == NDARRAY_UINT16) {
-        mp_print_str(print, "uint16')");
-    } else if(self->dtype == NDARRAY_INT16) {
-        mp_print_str(print, "int16')");
-    } else {
-        #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
-        mp_print_str(print, "float32')");
-        #else
-        mp_print_str(print, "float64')");
-        #endif
-    }
+    ndarray_print_dtype(print, self);
+    mp_print_str(print, "')");
 }
 
 mp_obj_t ndarray_dtype_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *args) {
@@ -358,7 +364,7 @@ mp_obj_t ndarray_dtype_make_new(const mp_obj_type_t *type, size_t n_args, size_t
     if(MP_OBJ_IS_TYPE(args[0], &ulab_ndarray_type)) {
         // return the dtype of the array
         ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(args[0]);
-        dtype->dtype = ndarray->dtype;
+        dtype->dtype.type = ndarray->dtype.type;
     } else {
         uint8_t _dtype;
         if(MP_OBJ_IS_INT(_args[0].u_obj)) {
@@ -384,7 +390,7 @@ mp_obj_t ndarray_dtype_make_new(const mp_obj_type_t *type, size_t n_args, size_t
                 mp_raise_TypeError(translate("data type not understood"));
             }
         }
-        dtype->dtype = _dtype;
+        dtype->dtype.type = _dtype;
     }
     return dtype;
 }
@@ -393,7 +399,7 @@ mp_obj_t ndarray_dtype(mp_obj_t self_in) {
     ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
     dtype_obj_t *dtype = m_new_obj(dtype_obj_t);
     dtype->base.type = &ulab_dtype_type;
-    dtype->dtype = self->dtype;
+    dtype->dtype.type = self->dtype.type;
     return dtype;
 }
 
@@ -403,7 +409,7 @@ mp_obj_t ndarray_dtype(mp_obj_t self_in) {
     uint8_t dtype;
     if(MP_OBJ_IS_TYPE(self_in, &ulab_ndarray_type)) {
         ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
-        dtype = self->dtype;
+        dtype = self->dtype.type;
     } else { // we assume here that the input is a single character
         GET_STR_DATA_LEN(self_in, _dtype, len);
         if((len != 1) || ((*_dtype != NDARRAY_BOOL) && (*_dtype != NDARRAY_UINT8)
@@ -451,7 +457,7 @@ MP_DEFINE_CONST_FUN_OBJ_0(ndarray_get_printoptions_obj, ndarray_get_printoptions
 mp_obj_t ndarray_get_item(ndarray_obj_t *ndarray, void *array) {
     // returns a proper micropython object from an array
     if(!ndarray->boolean) {
-        return mp_binary_get_val_array(ndarray->dtype, array, 0);
+        return mp_binary_get_val_array(ndarray->dtype.type, array, 0);
     } else {
         if(*(uint8_t *)array) {
             return mp_const_true;
@@ -501,6 +507,13 @@ static void ndarray_print_bracket(const mp_print_t *print, const size_t conditio
 void ndarray_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
     (void)kind;
     ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    #if ULAB_HAS_BLOCKS
+    if(self->flags) {
+        const mp_obj_type_t *type = mp_obj_get_type(self->block);
+        type->print((mp_print_t *)print, self->block, kind);
+        return;
+    }
+    #endif
     uint8_t *array = (uint8_t *)self->array;
     mp_print_str(print, "array(");
     if(self->len == 0) {
@@ -545,23 +558,9 @@ void ndarray_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t ki
     } while(i < self->shape[ULAB_MAX_DIMS-4]);
     ndarray_print_bracket(print, 0, self->shape[ULAB_MAX_DIMS-4], "]");
     #endif
-    if(self->boolean) {
-        mp_print_str(print, ", dtype=bool)");
-    } else if(self->dtype == NDARRAY_UINT8) {
-        mp_print_str(print, ", dtype=uint8)");
-    } else if(self->dtype == NDARRAY_INT8) {
-        mp_print_str(print, ", dtype=int8)");
-    } else if(self->dtype == NDARRAY_UINT16) {
-        mp_print_str(print, ", dtype=uint16)");
-    } else if(self->dtype == NDARRAY_INT16) {
-        mp_print_str(print, ", dtype=int16)");
-    } else {
-        #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
-        mp_print_str(print, ", dtype=float32)");
-        #else
-        mp_print_str(print, ", dtype=float64)");
-        #endif
-    }
+    mp_print_str(print, ", dtype=");
+    ndarray_print_dtype(print, self);
+    mp_print_str(print, ")");
 }
 
 void ndarray_assign_elements(ndarray_obj_t *ndarray, mp_obj_t iterable, uint8_t dtype, size_t *idx) {
@@ -595,19 +594,18 @@ bool ndarray_is_dense(ndarray_obj_t *ndarray) {
     return stride == ndarray->strides[ULAB_MAX_DIMS-ndarray->ndim] ? true : false;
 }
 
-
-ndarray_obj_t *ndarray_new_ndarray(uint8_t ndim, size_t *shape, int32_t *strides, uint8_t dtype) {
-    // Creates the base ndarray with shape, and initialises the values to straight 0s
+ndarray_obj_t *ndarray_new_ndarray_header(uint8_t ndim, size_t *shape, int32_t *strides, uint8_t dtype) {
+    // creates an empty ndarray, i.e., one with header, but without data
     ndarray_obj_t *ndarray = m_new_obj(ndarray_obj_t);
     ndarray->base.type = &ulab_ndarray_type;
-    ndarray->dtype = dtype == NDARRAY_BOOL ? NDARRAY_UINT8 : dtype;
+    ndarray->dtype.type = dtype == NDARRAY_BOOL ? NDARRAY_UINT8 : dtype;
     ndarray->boolean = dtype == NDARRAY_BOOL ? NDARRAY_BOOLEAN : NDARRAY_NUMERIC;
     ndarray->ndim = ndim;
     ndarray->len = ndim == 0 ? 0 : 1;
-    ndarray->itemsize = mp_binary_get_size('@', ndarray->dtype, NULL);
+    ndarray->itemsize = mp_binary_get_size('@', ndarray->dtype.type, NULL);
     int32_t *_strides;
     if(strides == NULL) {
-        _strides = strides_from_shape(shape, ndarray->dtype);
+        _strides = strides_from_shape(shape, ndarray->dtype.type);
     } else {
         _strides = strides;
     }
@@ -617,6 +615,16 @@ ndarray_obj_t *ndarray_new_ndarray(uint8_t ndim, size_t *shape, int32_t *strides
         ndarray->len *= shape[i-1];
     }
 
+    #if ULAB_HAS_BLOCKS
+    // indicate that the array doesn't need special treatment in the readout function
+    ndarray->flags = BLOCK_NO_FLAG;
+    #endif
+    return ndarray;
+}
+
+ndarray_obj_t *ndarray_new_ndarray(uint8_t ndim, size_t *shape, int32_t *strides, uint8_t dtype) {
+    // Creates the base ndarray with shape, and initialises the values to straight 0s
+    ndarray_obj_t *ndarray = ndarray_new_ndarray_header(ndim, shape, strides, dtype);
     // if the length is 0, still allocate a single item, so that contractions can be handled
     size_t len = ndarray->itemsize * MAX(1, ndarray->len);
     uint8_t *array = m_new(byte, len);
@@ -704,7 +712,7 @@ ndarray_obj_t *ndarray_new_view(ndarray_obj_t *source, uint8_t ndim, size_t *sha
     ndarray_obj_t *ndarray = m_new_obj(ndarray_obj_t);
     ndarray->base.type = &ulab_ndarray_type;
     ndarray->boolean = source->boolean;
-    ndarray->dtype = source->dtype;
+    ndarray->dtype.type = source->dtype.type;
     ndarray->ndim = ndim;
     ndarray->itemsize = source->itemsize;
     ndarray->len = ndim == 0 ? 0 : 1;
@@ -716,6 +724,14 @@ ndarray_obj_t *ndarray_new_view(ndarray_obj_t *source, uint8_t ndim, size_t *sha
     uint8_t *pointer = (uint8_t *)source->array;
     pointer += offset;
     ndarray->array = pointer;
+    #if ULAB_HAS_BLOCKS
+    ndarray->flags = source->flags;
+    if(source->flags) {
+        // copy the block, only if ndarray has a block object
+        ndarray->block = source->block;
+        ndarray->block->ndarray = ndarray;
+    }
+    #endif
     return ndarray;
 }
 
@@ -725,9 +741,9 @@ ndarray_obj_t *ndarray_copy_view(ndarray_obj_t *source) {
     // In order to make it dtype-agnostic, we copy the memory content
     // instead of reading out the values
 
-    int32_t *strides = strides_from_shape(source->shape, source->dtype);
+    int32_t *strides = strides_from_shape(source->shape, source->dtype.type);
 
-    uint8_t dtype = source->dtype;
+    uint8_t dtype = source->dtype.type;
     if(source->boolean) {
         dtype = NDARRAY_BOOLEAN;
     }
@@ -755,7 +771,7 @@ mp_obj_t ndarray_byteswap(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_
     } else {
         ndarray = ndarray_new_view(self, self->ndim, self->shape, self->strides, 0);
     }
-    if((self->dtype == NDARRAY_BOOL) || (self->dtype == NDARRAY_UINT8) || (self->dtype == NDARRAY_INT8)) {
+    if((self->dtype.type == NDARRAY_BOOL) || (self->dtype.type == NDARRAY_UINT8) || (self->dtype.type == NDARRAY_INT8)) {
         return MP_OBJ_FROM_PTR(ndarray);
     } else {
         uint8_t *array = (uint8_t *)ndarray->array;
@@ -773,7 +789,7 @@ mp_obj_t ndarray_byteswap(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_
                 #endif
                     size_t l = 0;
                     do {
-                        if(self->dtype == NDARRAY_FLOAT) {
+                        if(self->dtype.type == NDARRAY_FLOAT) {
                             #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
                             SWAP(uint8_t, array[0], array[3]);
                             SWAP(uint8_t, array[1], array[2]);
@@ -845,7 +861,7 @@ STATIC uint8_t ndarray_init_helper(size_t n_args, const mp_obj_t *pos_args, mp_m
     #if ULAB_HAS_DTYPE_OBJECT
     if(MP_OBJ_IS_TYPE(args[1].u_obj, &ulab_dtype_type)) {
         dtype_obj_t *dtype = MP_OBJ_TO_PTR(args[1].u_obj);
-        _dtype = dtype->dtype;
+        _dtype = dtype->dtype.type;
     } else { // this must be an integer defined as a class constant (ulba.uint8 etc.)
         _dtype = mp_obj_get_int(args[1].u_obj);
     }
@@ -860,7 +876,7 @@ STATIC mp_obj_t ndarray_make_new_core(const mp_obj_type_t *type, size_t n_args,
 
     if(MP_OBJ_IS_TYPE(args[0], &ulab_ndarray_type)) {
         ndarray_obj_t *source = MP_OBJ_TO_PTR(args[0]);
-        if(dtype == source->dtype) {
+        if(dtype == source->dtype.type) {
             return ndarray_copy_view(source);
         }
         ndarray_obj_t *target = ndarray_new_dense_ndarray(source->ndim, source->shape, dtype);
@@ -882,12 +898,12 @@ STATIC mp_obj_t ndarray_make_new_core(const mp_obj_type_t *type, size_t n_args,
                     do {
                         mp_obj_t item;
                         // floats must be treated separately, because they can't directly be converted to integer types
-                        if((source->dtype == NDARRAY_FLOAT) && (dtype != NDARRAY_FLOAT)) {
+                        if((source->dtype.type == NDARRAY_FLOAT) && (dtype != NDARRAY_FLOAT)) {
                             // floats must be treated separately, because they can't directly be converted to integer types
-                            mp_float_t f = ndarray_get_float_value(sarray, source->dtype);
+                            mp_float_t f = ndarray_get_float_value(sarray, source->dtype.type);
                             item = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(floor)(f));
                         } else {
-                            item = mp_binary_get_val_array(source->dtype, sarray, 0);
+                            item = mp_binary_get_val_array(source->dtype.type, sarray, 0);
                         }
                         mp_binary_set_val_array(dtype, tarray, 0, item);
                         tarray += target->itemsize;
@@ -915,7 +931,7 @@ STATIC mp_obj_t ndarray_make_new_core(const mp_obj_type_t *type, size_t n_args,
         return MP_OBJ_FROM_PTR(target);
     }
 
-    // We have to figure out, whether the elements of the iterable are iterables themself
+    // We have to figure out, whether the elements of the iterable are iterables themselves
     uint8_t ndim = 0;
     size_t shape[ULAB_MAX_DIMS];
     mp_obj_iter_buf_t iter_buf[ULAB_MAX_DIMS];
@@ -961,7 +977,7 @@ STATIC mp_obj_t ndarray_make_new_core(const mp_obj_type_t *type, size_t n_args,
             do {
             #endif
                 iterable[ULAB_MAX_DIMS - 1] = mp_getiter(item, &iter_buf[ULAB_MAX_DIMS - 1]);
-                ndarray_assign_elements(self, iterable[ULAB_MAX_DIMS - 1], self->dtype, &idx);
+                ndarray_assign_elements(self, iterable[ULAB_MAX_DIMS - 1], self->dtype.type, &idx);
             #if ULAB_MAX_DIMS > 1
                 item = ndim > 1 ? mp_iternext(iterable[ULAB_MAX_DIMS - 2]) : MP_OBJ_STOP_ITERATION;
             } while(item != MP_OBJ_STOP_ITERATION);
@@ -1167,64 +1183,64 @@ void ndarray_assign_view(ndarray_obj_t *view, ndarray_obj_t *values) {
         lstrides[i] /= view->itemsize;
     }
 
-    if(view->dtype == NDARRAY_UINT8) {
-        if(values->dtype == NDARRAY_UINT8) {
+    if(view->dtype.type == NDARRAY_UINT8) {
+        if(values->dtype.type == NDARRAY_UINT8) {
             ASSIGNMENT_LOOP(view, uint8_t, uint8_t, lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_INT8) {
+        } else if(values->dtype.type == NDARRAY_INT8) {
             ASSIGNMENT_LOOP(view, uint8_t, int8_t, lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_UINT16) {
+        } else if(values->dtype.type == NDARRAY_UINT16) {
             ASSIGNMENT_LOOP(view, uint8_t, uint16_t, lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_INT16) {
+        } else if(values->dtype.type == NDARRAY_INT16) {
             ASSIGNMENT_LOOP(view, uint8_t, int16_t, lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_FLOAT) {
+        } else if(values->dtype.type == NDARRAY_FLOAT) {
             ASSIGNMENT_LOOP(view, uint8_t, mp_float_t, lstrides, rarray, rstrides);
         }
-    } else if(view->dtype == NDARRAY_INT8) {
-        if(values->dtype == NDARRAY_UINT8) {
+    } else if(view->dtype.type == NDARRAY_INT8) {
+        if(values->dtype.type == NDARRAY_UINT8) {
             ASSIGNMENT_LOOP(view, int8_t, uint8_t, lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_INT8) {
+        } else if(values->dtype.type == NDARRAY_INT8) {
             ASSIGNMENT_LOOP(view, int8_t, int8_t, lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_UINT16) {
+        } else if(values->dtype.type == NDARRAY_UINT16) {
             ASSIGNMENT_LOOP(view, int8_t, uint16_t, lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_INT16) {
+        } else if(values->dtype.type == NDARRAY_INT16) {
             ASSIGNMENT_LOOP(view, int8_t, int16_t, lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_FLOAT) {
+        } else if(values->dtype.type == NDARRAY_FLOAT) {
             ASSIGNMENT_LOOP(view, int8_t, mp_float_t, lstrides, rarray, rstrides);
         }
-    } else if(view->dtype == NDARRAY_UINT16) {
-        if(values->dtype == NDARRAY_UINT8) {
+    } else if(view->dtype.type == NDARRAY_UINT16) {
+        if(values->dtype.type == NDARRAY_UINT8) {
             ASSIGNMENT_LOOP(view, uint16_t, uint8_t, lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_INT8) {
+        } else if(values->dtype.type == NDARRAY_INT8) {
             ASSIGNMENT_LOOP(view, uint16_t, int8_t, lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_UINT16) {
+        } else if(values->dtype.type == NDARRAY_UINT16) {
             ASSIGNMENT_LOOP(view, uint16_t, uint16_t, lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_INT16) {
+        } else if(values->dtype.type == NDARRAY_INT16) {
             ASSIGNMENT_LOOP(view, uint16_t, int16_t, lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_FLOAT) {
+        } else if(values->dtype.type == NDARRAY_FLOAT) {
             ASSIGNMENT_LOOP(view, uint16_t, mp_float_t, lstrides, rarray, rstrides);
         }
-    } else if(view->dtype == NDARRAY_INT16) {
-        if(values->dtype == NDARRAY_UINT8) {
+    } else if(view->dtype.type == NDARRAY_INT16) {
+        if(values->dtype.type == NDARRAY_UINT8) {
             ASSIGNMENT_LOOP(view, int16_t, uint8_t, lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_INT8) {
+        } else if(values->dtype.type == NDARRAY_INT8) {
             ASSIGNMENT_LOOP(view, int16_t, int8_t, lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_UINT16) {
+        } else if(values->dtype.type == NDARRAY_UINT16) {
             ASSIGNMENT_LOOP(view, int16_t, uint16_t, lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_INT16) {
+        } else if(values->dtype.type == NDARRAY_INT16) {
             ASSIGNMENT_LOOP(view, int16_t, int16_t,  lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_FLOAT) {
+        } else if(values->dtype.type == NDARRAY_FLOAT) {
             ASSIGNMENT_LOOP(view, int16_t, mp_float_t,  lstrides, rarray, rstrides);
         }
     } else { // the dtype must be an mp_float_t now
-        if(values->dtype == NDARRAY_UINT8) {
+        if(values->dtype.type == NDARRAY_UINT8) {
             ASSIGNMENT_LOOP(view, mp_float_t, uint8_t, lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_INT8) {
+        } else if(values->dtype.type == NDARRAY_INT8) {
             ASSIGNMENT_LOOP(view, mp_float_t, int8_t,  lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_UINT16) {
+        } else if(values->dtype.type == NDARRAY_UINT16) {
             ASSIGNMENT_LOOP(view, mp_float_t, uint16_t,  lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_INT16) {
+        } else if(values->dtype.type == NDARRAY_INT16) {
             ASSIGNMENT_LOOP(view, mp_float_t, int16_t,  lstrides, rarray, rstrides);
-        } else if(values->dtype == NDARRAY_FLOAT) {
+        } else if(values->dtype.type == NDARRAY_FLOAT) {
             ASSIGNMENT_LOOP(view, mp_float_t, mp_float_t,  lstrides, rarray, rstrides);
         }
     }
@@ -1242,7 +1258,7 @@ static mp_obj_t ndarray_from_boolean_index(ndarray_obj_t *ndarray, ndarray_obj_t
         count += *iarray;
         iarray += index->strides[ULAB_MAX_DIMS - 1];
     }
-    ndarray_obj_t *results = ndarray_new_linear_array(count, ndarray->dtype);
+    ndarray_obj_t *results = ndarray_new_linear_array(count, ndarray->dtype.type);
     uint8_t *rarray = (uint8_t *)results->array;
     uint8_t *array = (uint8_t *)ndarray->array;
     // re-wind the index array
@@ -1281,64 +1297,64 @@ static mp_obj_t ndarray_assign_from_boolean_index(ndarray_obj_t *ndarray, ndarra
         // there is a single value
         vstride = 0;
     }
-    if(ndarray->dtype == NDARRAY_UINT8) {
-        if(values->dtype == NDARRAY_UINT8) {
+    if(ndarray->dtype.type == NDARRAY_UINT8) {
+        if(values->dtype.type == NDARRAY_UINT8) {
             BOOLEAN_ASSIGNMENT_LOOP(uint8_t, uint8_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_INT8) {
+        } else if(values->dtype.type == NDARRAY_INT8) {
             BOOLEAN_ASSIGNMENT_LOOP(uint8_t, int8_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_UINT16) {
+        } else if(values->dtype.type == NDARRAY_UINT16) {
             BOOLEAN_ASSIGNMENT_LOOP(uint8_t, uint16_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_INT16) {
+        } else if(values->dtype.type == NDARRAY_INT16) {
             BOOLEAN_ASSIGNMENT_LOOP(uint8_t, int16_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_FLOAT) {
+        } else if(values->dtype.type == NDARRAY_FLOAT) {
             BOOLEAN_ASSIGNMENT_LOOP(uint8_t, mp_float_t, ndarray, iarray, istride, varray, vstride);
         }
-    } else if(ndarray->dtype == NDARRAY_INT8) {
-        if(values->dtype == NDARRAY_UINT8) {
+    } else if(ndarray->dtype.type == NDARRAY_INT8) {
+        if(values->dtype.type == NDARRAY_UINT8) {
             BOOLEAN_ASSIGNMENT_LOOP(int8_t, uint8_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_INT8) {
+        } else if(values->dtype.type == NDARRAY_INT8) {
             BOOLEAN_ASSIGNMENT_LOOP(int8_t, int8_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_UINT16) {
+        } else if(values->dtype.type == NDARRAY_UINT16) {
             BOOLEAN_ASSIGNMENT_LOOP(int8_t, uint16_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_INT16) {
+        } else if(values->dtype.type == NDARRAY_INT16) {
             BOOLEAN_ASSIGNMENT_LOOP(int8_t, int16_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_FLOAT) {
+        } else if(values->dtype.type == NDARRAY_FLOAT) {
             BOOLEAN_ASSIGNMENT_LOOP(int8_t, mp_float_t, ndarray, iarray, istride, varray, vstride);
         }
-    } else if(ndarray->dtype == NDARRAY_UINT16) {
-        if(values->dtype == NDARRAY_UINT8) {
+    } else if(ndarray->dtype.type == NDARRAY_UINT16) {
+        if(values->dtype.type == NDARRAY_UINT8) {
             BOOLEAN_ASSIGNMENT_LOOP(uint16_t, uint8_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_INT8) {
+        } else if(values->dtype.type == NDARRAY_INT8) {
             BOOLEAN_ASSIGNMENT_LOOP(uint16_t, int8_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_UINT16) {
+        } else if(values->dtype.type == NDARRAY_UINT16) {
             BOOLEAN_ASSIGNMENT_LOOP(uint16_t, uint16_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_INT16) {
+        } else if(values->dtype.type == NDARRAY_INT16) {
             BOOLEAN_ASSIGNMENT_LOOP(uint16_t, int16_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_FLOAT) {
+        } else if(values->dtype.type == NDARRAY_FLOAT) {
             BOOLEAN_ASSIGNMENT_LOOP(uint16_t, mp_float_t, ndarray, iarray, istride, varray, vstride);
         }
-    } else if(ndarray->dtype == NDARRAY_INT16) {
-        if(values->dtype == NDARRAY_UINT8) {
+    } else if(ndarray->dtype.type == NDARRAY_INT16) {
+        if(values->dtype.type == NDARRAY_UINT8) {
             BOOLEAN_ASSIGNMENT_LOOP(int16_t, uint8_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_INT8) {
+        } else if(values->dtype.type == NDARRAY_INT8) {
             BOOLEAN_ASSIGNMENT_LOOP(int16_t, int8_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_UINT16) {
+        } else if(values->dtype.type == NDARRAY_UINT16) {
             BOOLEAN_ASSIGNMENT_LOOP(int16_t, uint16_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_INT16) {
+        } else if(values->dtype.type == NDARRAY_INT16) {
             BOOLEAN_ASSIGNMENT_LOOP(int16_t, int16_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_FLOAT) {
+        } else if(values->dtype.type == NDARRAY_FLOAT) {
             BOOLEAN_ASSIGNMENT_LOOP(int16_t, mp_float_t, ndarray, iarray, istride, varray, vstride);
         }
     } else {
-        if(values->dtype == NDARRAY_UINT8) {
+        if(values->dtype.type == NDARRAY_UINT8) {
             BOOLEAN_ASSIGNMENT_LOOP(mp_float_t, uint8_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_INT8) {
+        } else if(values->dtype.type == NDARRAY_INT8) {
             BOOLEAN_ASSIGNMENT_LOOP(mp_float_t, int8_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_UINT16) {
+        } else if(values->dtype.type == NDARRAY_UINT16) {
             BOOLEAN_ASSIGNMENT_LOOP(mp_float_t, uint16_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_INT16) {
+        } else if(values->dtype.type == NDARRAY_INT16) {
             BOOLEAN_ASSIGNMENT_LOOP(mp_float_t, int16_t, ndarray, iarray, istride, varray, vstride);
-        } else if(values->dtype == NDARRAY_FLOAT) {
+        } else if(values->dtype.type == NDARRAY_FLOAT) {
             BOOLEAN_ASSIGNMENT_LOOP(mp_float_t, mp_float_t, ndarray, iarray, istride, varray, vstride);
         }
     }
@@ -1373,7 +1389,7 @@ static mp_obj_t ndarray_get_slice(ndarray_obj_t *ndarray, mp_obj_t index, ndarra
         if(values == NULL) { // return value(s)
             // if the view has been reduced to nothing, return a single value
             if(view->ndim == 0) {
-                return mp_binary_get_val_array(view->dtype, view->array, 0);
+                return mp_binary_get_val_array(view->dtype.type, view->array, 0);
             } else {
                 return MP_OBJ_FROM_PTR(view);
             }
@@ -1390,6 +1406,11 @@ mp_obj_t ndarray_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
     if (value == MP_OBJ_SENTINEL) { // return value(s)
         return ndarray_get_slice(self, index, NULL);
     } else { // assignment to slices; the value must be an ndarray, or a scalar
+        #if ULAB_HAS_BLOCKS
+        if(self->flags) {
+            mp_raise_ValueError(translate("blocks cannot be assigned to"));
+        }
+        #endif
         ndarray_obj_t *values = ndarray_from_mp_obj(value);
         return ndarray_get_slice(self, index, values);
     }
@@ -1427,7 +1448,18 @@ mp_obj_t ndarray_iternext(mp_obj_t self_in) {
         if(ndarray->ndim == 1) { // we have a linear array
             array += self->cur * ndarray->strides[ULAB_MAX_DIMS - 1];
             self->cur++;
+            #if ULAB_HAS_BLOCKS
+            if(ndarray->flags) {
+                void (*arrfunc)(ndarray_obj_t *, void *, int32_t *, size_t) = ndarray->block->arrfunc;
+                int32_t increment;
+                arrfunc(ndarray, array, &increment, 1);
+                return ndarray_get_item(ndarray, ndarray->block->subarray);
+            } else {
+                return ndarray_get_item(ndarray, array);
+            }
+            #else
             return ndarray_get_item(ndarray, array);
+            #endif
         } else { // we have a tensor, return the reduced view
             size_t offset = self->cur * ndarray->strides[ULAB_MAX_DIMS - ndarray->ndim];
             self->cur++;
@@ -1465,7 +1497,7 @@ mp_obj_t ndarray_flatten(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_a
     }
 
     uint8_t *sarray = (uint8_t *)self->array;
-    ndarray_obj_t *ndarray = ndarray_new_linear_array(self->len, self->dtype);
+    ndarray_obj_t *ndarray = ndarray_new_linear_array(self->len, self->dtype.type);
     uint8_t *array = (uint8_t *)ndarray->array;
 
     if(memcmp(order, "C", 1) == 0) { // C-type ordering
@@ -1704,14 +1736,14 @@ mp_obj_t ndarray_binary_op(mp_binary_op_t _op, mp_obj_t lobj, mp_obj_t robj) {
             case MP_BINARY_OP_SUBTRACT:
                 // here we don't have to list those cases that result in an int16,
                 // because dtype is initialised with that NDARRAY_INT16
-                if(lhs->dtype == rhs->dtype) {
-                    dtype = rhs->dtype;
-                } else if((lhs->dtype == NDARRAY_FLOAT) || (rhs->dtype == NDARRAY_FLOAT)) {
+                if(lhs->dtype.type == rhs->dtype.type) {
+                    dtype = rhs->dtype.type;
+                } else if((lhs->dtype.type == NDARRAY_FLOAT) || (rhs->dtype.type == NDARRAY_FLOAT)) {
                     dtype = NDARRAY_FLOAT;
-                } else if(((lhs->dtype == NDARRAY_UINT8) && (rhs->dtype == NDARRAY_UINT16)) ||
-                            ((lhs->dtype == NDARRAY_INT8) && (rhs->dtype == NDARRAY_UINT16)) ||
-                            ((rhs->dtype == NDARRAY_UINT8) && (lhs->dtype == NDARRAY_UINT16)) ||
-                            ((rhs->dtype == NDARRAY_INT8) && (lhs->dtype == NDARRAY_UINT16))) {
+                } else if(((lhs->dtype.type == NDARRAY_UINT8) && (rhs->dtype.type == NDARRAY_UINT16)) ||
+                            ((lhs->dtype.type == NDARRAY_INT8) && (rhs->dtype.type == NDARRAY_UINT16)) ||
+                            ((rhs->dtype.type == NDARRAY_UINT8) && (lhs->dtype.type == NDARRAY_UINT16)) ||
+                            ((rhs->dtype.type == NDARRAY_INT8) && (lhs->dtype.type == NDARRAY_UINT16))) {
                     dtype = NDARRAY_UINT16;
                 }
                 return MP_OBJ_FROM_PTR(ndarray_new_linear_array(0, dtype));
@@ -1844,12 +1876,12 @@ mp_obj_t ndarray_unary_op(mp_unary_op_t op, mp_obj_t self_in) {
         case MP_UNARY_OP_ABS:
             ndarray = ndarray_copy_view(self);
             // if Boolean, NDARRAY_UINT8, or NDARRAY_UINT16, there is nothing to do
-            if(self->dtype == NDARRAY_INT8) {
+            if(self->dtype.type == NDARRAY_INT8) {
                 int8_t *array = (int8_t *)ndarray->array;
                 for(size_t i=0; i < self->len; i++, array++) {
                     if(*array < 0) *array = -(*array);
                 }
-            } else if(self->dtype == NDARRAY_INT16) {
+            } else if(self->dtype.type == NDARRAY_INT16) {
                 int16_t *array = (int16_t *)ndarray->array;
                 for(size_t i=0; i < self->len; i++, array++) {
                     if(*array < 0) *array = -(*array);
@@ -1865,7 +1897,7 @@ mp_obj_t ndarray_unary_op(mp_unary_op_t op, mp_obj_t self_in) {
         #endif
         #if NDARRAY_HAS_UNARY_OP_INVERT
         case MP_UNARY_OP_INVERT:
-            if(self->dtype == NDARRAY_FLOAT) {
+            if(self->dtype.type == NDARRAY_FLOAT) {
                 mp_raise_ValueError(translate("operation is not supported for given type"));
             }
             // we can invert the content byte by byte, no need to distinguish between different dtypes
@@ -1874,7 +1906,7 @@ mp_obj_t ndarray_unary_op(mp_unary_op_t op, mp_obj_t self_in) {
             if(ndarray->boolean) {
                 for(size_t i=0; i < ndarray->len; i++, array++) *array = *array ^ 0x01;
             } else {
-                uint8_t itemsize = mp_binary_get_size('@', self->dtype, NULL);
+                uint8_t itemsize = mp_binary_get_size('@', self->dtype.type, NULL);
                 for(size_t i=0; i < ndarray->len*itemsize; i++, array++) *array ^= 0xFF;
             }
             return MP_OBJ_FROM_PTR(ndarray);
@@ -1888,16 +1920,16 @@ mp_obj_t ndarray_unary_op(mp_unary_op_t op, mp_obj_t self_in) {
         #if NDARRAY_HAS_UNARY_OP_NEGATIVE
         case MP_UNARY_OP_NEGATIVE:
             ndarray = ndarray_copy_view(self); // from this point, this is a dense copy
-            if(self->dtype == NDARRAY_UINT8) {
+            if(self->dtype.type == NDARRAY_UINT8) {
                 uint8_t *array = (uint8_t *)ndarray->array;
                 for(size_t i=0; i < self->len; i++, array++) *array = -(*array);
-            } else if(self->dtype == NDARRAY_INT8) {
+            } else if(self->dtype.type == NDARRAY_INT8) {
                 int8_t *array = (int8_t *)ndarray->array;
                 for(size_t i=0; i < self->len; i++, array++) *array = -(*array);
-            } else if(self->dtype == NDARRAY_UINT16) {
+            } else if(self->dtype.type == NDARRAY_UINT16) {
                 uint16_t *array = (uint16_t *)ndarray->array;
                 for(size_t i=0; i < self->len; i++, array++) *array = -(*array);
-            } else if(self->dtype == NDARRAY_INT16) {
+            } else if(self->dtype.type == NDARRAY_INT16) {
                 int16_t *array = (int16_t *)ndarray->array;
                 for(size_t i=0; i < self->len; i++, array++) *array = -(*array);
             } else {
@@ -1954,7 +1986,7 @@ mp_obj_t ndarray_reshape(mp_obj_t oin, mp_obj_t _shape) {
 
     mp_obj_tuple_t *shape = MP_OBJ_TO_PTR(_shape);
     if(shape->len > ULAB_MAX_DIMS) {
-        mp_raise_ValueError(translate("maximum number of dimensions is 4"));
+        mp_raise_ValueError(translate("too many dimensions"));
     }
     size_t *new_shape = m_new(size_t, ULAB_MAX_DIMS);
     memset(new_shape, 0, sizeof(size_t)*ULAB_MAX_DIMS);
@@ -1969,10 +2001,10 @@ mp_obj_t ndarray_reshape(mp_obj_t oin, mp_obj_t _shape) {
     ndarray_obj_t *ndarray;
     if(ndarray_is_dense(source)) {
         // TODO: check if this is what numpy does
-        int32_t *new_strides = strides_from_shape(new_shape, source->dtype);
+        int32_t *new_strides = strides_from_shape(new_shape, source->dtype.type);
         ndarray = ndarray_new_view(source, shape->len, new_shape, new_strides, 0);
     } else {
-        ndarray = ndarray_new_ndarray_from_tuple(shape, source->dtype);
+        ndarray = ndarray_new_ndarray_from_tuple(shape, source->dtype.type);
         ndarray_copy_array(source, ndarray);
     }
     return MP_OBJ_FROM_PTR(ndarray);
@@ -1988,7 +2020,13 @@ mp_obj_t ndarray_info(mp_obj_t obj_in) {
     if(!MP_OBJ_IS_TYPE(ndarray, &ulab_ndarray_type)) {
         mp_raise_TypeError(translate("function is defined for ndarrays only"));
     }
+    #if ULAB_HAS_BLOCKS
+    if(ndarray->flags) {
+        mp_printf(MP_PYTHON_PRINTER, "class: block\n");
+    }
+    #else
     mp_printf(MP_PYTHON_PRINTER, "class: ndarray\n");
+    #endif
     mp_printf(MP_PYTHON_PRINTER, "shape: (");
     if(ndarray->ndim == 1) {
         mp_printf(MP_PYTHON_PRINTER, "%d,", ndarray->shape[ULAB_MAX_DIMS-1]);
@@ -2008,19 +2046,10 @@ mp_obj_t ndarray_info(mp_obj_t obj_in) {
     mp_printf(MP_PYTHON_PRINTER, "itemsize: %d\n", ndarray->itemsize);
     mp_printf(MP_PYTHON_PRINTER, "data pointer: 0x%p\n", ndarray->array);
     mp_printf(MP_PYTHON_PRINTER, "type: ");
-    if(ndarray->boolean) {
-        mp_printf(MP_PYTHON_PRINTER, "bool\n");
-    } else if(ndarray->dtype == NDARRAY_UINT8) {
-        mp_printf(MP_PYTHON_PRINTER, "uint8\n");
-    } else if(ndarray->dtype == NDARRAY_INT8) {
-        mp_printf(MP_PYTHON_PRINTER, "int8\n");
-    } else if(ndarray->dtype == NDARRAY_UINT16) {
-        mp_printf(MP_PYTHON_PRINTER, "uint16\n");
-    } else if(ndarray->dtype == NDARRAY_INT16) {
-        mp_printf(MP_PYTHON_PRINTER, "int16\n");
-    } else if(ndarray->dtype == NDARRAY_FLOAT) {
-        mp_printf(MP_PYTHON_PRINTER, "float\n");
-    }
+    ndarray_print_dtype(MP_PYTHON_PRINTER, ndarray);
+    #if ULAB_HAS_BLOCKS
+    mp_printf(MP_PYTHON_PRINTER, "\nflags: %d\n", ndarray->flags);
+    #endif
     return mp_const_none;
 }
 
@@ -2030,11 +2059,17 @@ MP_DEFINE_CONST_FUN_OBJ_1(ndarray_info_obj, ndarray_info);
 // (the get_buffer protocol returns 0 for success, 1 for failure)
 mp_int_t ndarray_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, mp_uint_t flags) {
     ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);
+    #if ULAB_HAS_BLOCKS
+    if(!ndarray_is_dense(self) || self->flags) {
+        return 1;
+    }
+    #else
     if(!ndarray_is_dense(self)) {
         return 1;
     }
+    #endif
     bufinfo->len = self->itemsize * self->len;
     bufinfo->buf = self->array;
-    bufinfo->typecode = self->dtype;
+    bufinfo->typecode = self->dtype.type;
     return 0;
 }
diff --git a/code/ndarray.h b/code/ndarray.h
index b659a19c..0d4e067d 100644
--- a/code/ndarray.h
+++ b/code/ndarray.h
@@ -63,9 +63,23 @@ enum NDARRAY_TYPE {
     NDARRAY_FLOAT = FLOAT_TYPECODE,
 };
 
+typedef struct _dtype_dtype {
+    uint8_t type;
+} dtype_dtype;
+
+typedef struct _blocks_block_obj_t {
+    mp_obj_base_t base;
+    uint8_t ndim;
+    void *ndarray;
+    void *arrfunc;
+    uint8_t *subarray;
+    size_t shape[ULAB_MAX_DIMS];
+    void *origin;
+} blocks_block_obj_t;
+
 typedef struct _ndarray_obj_t {
     mp_obj_base_t base;
-    uint8_t dtype;
+    dtype_dtype dtype;
     uint8_t itemsize;
     uint8_t boolean;
     uint8_t ndim;
@@ -73,6 +87,10 @@ typedef struct _ndarray_obj_t {
     size_t shape[ULAB_MAX_DIMS];
     int32_t strides[ULAB_MAX_DIMS];
     void *array;
+    #if ULAB_HAS_BLOCKS
+    uint8_t flags;
+    blocks_block_obj_t *block;
+    #endif
 } ndarray_obj_t;
 
 #if ULAB_HAS_DTYPE_OBJECT
@@ -80,11 +98,9 @@ extern const mp_obj_type_t ulab_dtype_type;
 
 typedef struct _dtype_obj_t {
     mp_obj_base_t base;
-    uint8_t dtype;
+    dtype_dtype dtype;
 } dtype_obj_t;
 
-void ndarray_dtype_print(const mp_print_t *, mp_obj_t , mp_print_kind_t );
-
 #ifdef CIRCUITPY
 mp_obj_t ndarray_dtype_make_new(const mp_obj_type_t *type, size_t n_args, const mp_obj_t *args, mp_map_t *kw_args);
 #else
@@ -92,6 +108,8 @@ mp_obj_t ndarray_dtype_make_new(const mp_obj_type_t *, size_t , size_t , const m
 #endif /* CIRCUITPY */
 #endif /* ULAB_HAS_DTYPE_OBJECT */
 
+void ndarray_print_dtype(const mp_print_t *, ndarray_obj_t *);
+
 mp_obj_t ndarray_new_ndarray_iterator(mp_obj_t , mp_obj_iter_buf_t *);
 
 mp_float_t ndarray_get_float_value(void *, uint8_t );
@@ -114,6 +132,7 @@ void ndarray_assign_elements(ndarray_obj_t *, mp_obj_t , uint8_t , size_t *);
 size_t *ndarray_contract_shape(ndarray_obj_t *, uint8_t );
 int32_t *ndarray_contract_strides(ndarray_obj_t *, uint8_t );
 
+ndarray_obj_t *ndarray_new_ndarray_header(uint8_t , size_t *, int32_t *, uint8_t );
 ndarray_obj_t *ndarray_new_dense_ndarray(uint8_t , size_t *, uint8_t );
 ndarray_obj_t *ndarray_new_ndarray_from_tuple(mp_obj_tuple_t *, uint8_t );
 ndarray_obj_t *ndarray_new_ndarray(uint8_t , size_t *, int32_t *, uint8_t );
diff --git a/code/ndarray_operators.c b/code/ndarray_operators.c
index 465140b6..011f821e 100644
--- a/code/ndarray_operators.c
+++ b/code/ndarray_operators.c
@@ -47,50 +47,50 @@ mp_obj_t ndarray_binary_equality(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
 
     #if NDARRAY_HAS_BINARY_OP_EQUAL
     if(op == MP_BINARY_OP_EQUAL) {
-        if(lhs->dtype == NDARRAY_UINT8) {
-            if(rhs->dtype == NDARRAY_UINT8) {
+        if(lhs->dtype.type == NDARRAY_UINT8) {
+            if(rhs->dtype.type == NDARRAY_UINT8) {
                 EQUALITY_LOOP(results, array, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, ==);
-            } else if(rhs->dtype == NDARRAY_INT8) {
+            } else if(rhs->dtype.type == NDARRAY_INT8) {
                 EQUALITY_LOOP(results, array, uint8_t, int8_t, larray, lstrides, rarray, rstrides, ==);
-            } else if(rhs->dtype == NDARRAY_UINT16) {
+            } else if(rhs->dtype.type == NDARRAY_UINT16) {
                 EQUALITY_LOOP(results, array, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, ==);
-            } else if(rhs->dtype == NDARRAY_INT16) {
+            } else if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, uint8_t, int16_t, larray, lstrides, rarray, rstrides, ==);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, ==);
             }
-        } else if(lhs->dtype == NDARRAY_INT8) {
-            if(rhs->dtype == NDARRAY_INT8) {
+        } else if(lhs->dtype.type == NDARRAY_INT8) {
+            if(rhs->dtype.type == NDARRAY_INT8) {
                 EQUALITY_LOOP(results, array, int8_t, int8_t, larray, lstrides, rarray, rstrides, ==);
-            } else if(rhs->dtype == NDARRAY_UINT16) {
+            } else if(rhs->dtype.type == NDARRAY_UINT16) {
                 EQUALITY_LOOP(results, array, int8_t, uint16_t, larray, lstrides, rarray, rstrides, ==);
-            } else if(rhs->dtype == NDARRAY_INT16) {
+            } else if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, int8_t, int16_t, larray, lstrides, rarray, rstrides, ==);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, ==);
             } else {
                 return ndarray_binary_op(op, rhs, lhs);
             }
-        } else if(lhs->dtype == NDARRAY_UINT16) {
-            if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(lhs->dtype.type == NDARRAY_UINT16) {
+            if(rhs->dtype.type == NDARRAY_UINT16) {
                 EQUALITY_LOOP(results, array, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, ==);
-            } else if(rhs->dtype == NDARRAY_INT16) {
+            } else if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, uint16_t, int16_t, larray, lstrides, rarray, rstrides, ==);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, ==);
             } else {
                 return ndarray_binary_op(op, rhs, lhs);
             }
-        } else if(lhs->dtype == NDARRAY_INT16) {
-            if(rhs->dtype == NDARRAY_INT16) {
+        } else if(lhs->dtype.type == NDARRAY_INT16) {
+            if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, int16_t, int16_t, larray, lstrides, rarray, rstrides, ==);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, int16_t, mp_float_t, larray, lstrides, rarray, rstrides, ==);
             } else {
                 return ndarray_binary_op(op, rhs, lhs);
             }
-        } else if(lhs->dtype == NDARRAY_FLOAT) {
-            if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(lhs->dtype.type == NDARRAY_FLOAT) {
+            if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, ==);
             } else {
                 return ndarray_binary_op(op, rhs, lhs);
@@ -101,50 +101,50 @@ mp_obj_t ndarray_binary_equality(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
 
     #if NDARRAY_HAS_BINARY_OP_NOT_EQUAL
     if(op == MP_BINARY_OP_NOT_EQUAL) {
-        if(lhs->dtype == NDARRAY_UINT8) {
-            if(rhs->dtype == NDARRAY_UINT8) {
+        if(lhs->dtype.type == NDARRAY_UINT8) {
+            if(rhs->dtype.type == NDARRAY_UINT8) {
                 EQUALITY_LOOP(results, array, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, !=);
-            } else if(rhs->dtype == NDARRAY_INT8) {
+            } else if(rhs->dtype.type == NDARRAY_INT8) {
                 EQUALITY_LOOP(results, array, uint8_t, int8_t, larray, lstrides, rarray, rstrides, !=);
-            } else if(rhs->dtype == NDARRAY_UINT16) {
+            } else if(rhs->dtype.type == NDARRAY_UINT16) {
                 EQUALITY_LOOP(results, array, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, !=);
-            } else if(rhs->dtype == NDARRAY_INT16) {
+            } else if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, uint8_t, int16_t, larray, lstrides, rarray, rstrides, !=);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, !=);
             }
-        } else if(lhs->dtype == NDARRAY_INT8) {
-            if(rhs->dtype == NDARRAY_INT8) {
+        } else if(lhs->dtype.type == NDARRAY_INT8) {
+            if(rhs->dtype.type == NDARRAY_INT8) {
                 EQUALITY_LOOP(results, array, int8_t, int8_t, larray, lstrides, rarray, rstrides, !=);
-            } else if(rhs->dtype == NDARRAY_UINT16) {
+            } else if(rhs->dtype.type == NDARRAY_UINT16) {
                 EQUALITY_LOOP(results, array, int8_t, uint16_t, larray, lstrides, rarray, rstrides, !=);
-            } else if(rhs->dtype == NDARRAY_INT16) {
+            } else if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, int8_t, int16_t, larray, lstrides, rarray, rstrides, !=);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, !=);
             } else {
                 return ndarray_binary_op(op, rhs, lhs);
             }
-        } else if(lhs->dtype == NDARRAY_UINT16) {
-            if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(lhs->dtype.type == NDARRAY_UINT16) {
+            if(rhs->dtype.type == NDARRAY_UINT16) {
                 EQUALITY_LOOP(results, array, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, !=);
-            } else if(rhs->dtype == NDARRAY_INT16) {
+            } else if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, uint16_t, int16_t, larray, lstrides, rarray, rstrides, !=);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, !=);
             } else {
                 return ndarray_binary_op(op, rhs, lhs);
             }
-        } else if(lhs->dtype == NDARRAY_INT16) {
-            if(rhs->dtype == NDARRAY_INT16) {
+        } else if(lhs->dtype.type == NDARRAY_INT16) {
+            if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, int16_t, int16_t, larray, lstrides, rarray, rstrides, !=);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, int16_t, mp_float_t, larray, lstrides, rarray, rstrides, !=);
             } else {
                 return ndarray_binary_op(op, rhs, lhs);
             }
-        } else if(lhs->dtype == NDARRAY_FLOAT) {
-            if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(lhs->dtype.type == NDARRAY_FLOAT) {
+            if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, !=);
             } else {
                 return ndarray_binary_op(op, rhs, lhs);
@@ -165,64 +165,64 @@ mp_obj_t ndarray_binary_add(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
     uint8_t *larray = (uint8_t *)lhs->array;
     uint8_t *rarray = (uint8_t *)rhs->array;
 
-    if(lhs->dtype == NDARRAY_UINT8) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    if(lhs->dtype.type == NDARRAY_UINT8) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
             BINARY_LOOP(results, uint16_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, +);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, +);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
             BINARY_LOOP(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, +);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, +);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, +);
         }
-    } else if(lhs->dtype == NDARRAY_INT8) {
-        if(rhs->dtype == NDARRAY_INT8) {
+    } else if(lhs->dtype.type == NDARRAY_INT8) {
+        if(rhs->dtype.type == NDARRAY_INT8) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
             BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, +);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, +);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, +);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, +);
         } else {
             return ndarray_binary_op(MP_BINARY_OP_ADD, rhs, lhs);
         }
-    } else if(lhs->dtype == NDARRAY_UINT16) {
-        if(rhs->dtype == NDARRAY_UINT16) {
+    } else if(lhs->dtype.type == NDARRAY_UINT16) {
+        if(rhs->dtype.type == NDARRAY_UINT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
             BINARY_LOOP(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, +);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, +);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, +);
         } else {
             return ndarray_binary_op(MP_BINARY_OP_ADD, rhs, lhs);
         }
-    } else if(lhs->dtype == NDARRAY_INT16) {
-        if(rhs->dtype == NDARRAY_INT16) {
+    } else if(lhs->dtype.type == NDARRAY_INT16) {
+        if(rhs->dtype.type == NDARRAY_INT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, +);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, int16_t, mp_float_t, larray, lstrides, rarray, rstrides, +);
         } else {
             return ndarray_binary_op(MP_BINARY_OP_ADD, rhs, lhs);
         }
-    } else if(lhs->dtype == NDARRAY_FLOAT) {
-        if(rhs->dtype == NDARRAY_FLOAT) {
+    } else if(lhs->dtype.type == NDARRAY_FLOAT) {
+        if(rhs->dtype.type == NDARRAY_FLOAT) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, +);
         } else {
@@ -242,64 +242,64 @@ mp_obj_t ndarray_binary_multiply(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
     uint8_t *larray = (uint8_t *)lhs->array;
     uint8_t *rarray = (uint8_t *)rhs->array;
 
-    if(lhs->dtype == NDARRAY_UINT8) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    if(lhs->dtype.type == NDARRAY_UINT8) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
             BINARY_LOOP(results, uint16_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, *);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, *);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
             BINARY_LOOP(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, *);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, *);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, *);
         }
-    } else if(lhs->dtype == NDARRAY_INT8) {
-        if(rhs->dtype == NDARRAY_INT8) {
+    } else if(lhs->dtype.type == NDARRAY_INT8) {
+        if(rhs->dtype.type == NDARRAY_INT8) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
             BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, *);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, *);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, *);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, *);
         } else {
             return ndarray_binary_op(MP_BINARY_OP_MULTIPLY, rhs, lhs);
         }
-    } else if(lhs->dtype == NDARRAY_UINT16) {
-        if(rhs->dtype == NDARRAY_UINT16) {
+    } else if(lhs->dtype.type == NDARRAY_UINT16) {
+        if(rhs->dtype.type == NDARRAY_UINT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
             BINARY_LOOP(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, *);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, *);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, *);
         } else {
             return ndarray_binary_op(MP_BINARY_OP_MULTIPLY, rhs, lhs);
         }
-    } else if(lhs->dtype == NDARRAY_INT16) {
-        if(rhs->dtype == NDARRAY_INT16) {
+    } else if(lhs->dtype.type == NDARRAY_INT16) {
+        if(rhs->dtype.type == NDARRAY_INT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, *);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, int16_t, mp_float_t, larray, lstrides, rarray, rstrides, *);
         } else {
             return ndarray_binary_op(MP_BINARY_OP_MULTIPLY, rhs, lhs);
         }
-    } else if(lhs->dtype == NDARRAY_FLOAT) {
-        if(rhs->dtype == NDARRAY_FLOAT) {
+    } else if(lhs->dtype.type == NDARRAY_FLOAT) {
+        if(rhs->dtype.type == NDARRAY_FLOAT) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, *);
         } else {
@@ -323,64 +323,64 @@ mp_obj_t ndarray_binary_more(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
 
     #if NDARRAY_HAS_BINARY_OP_MORE | NDARRAY_HAS_BINARY_OP_LESS
     if(op == MP_BINARY_OP_MORE) {
-        if(lhs->dtype == NDARRAY_UINT8) {
-            if(rhs->dtype == NDARRAY_UINT8) {
+        if(lhs->dtype.type == NDARRAY_UINT8) {
+            if(rhs->dtype.type == NDARRAY_UINT8) {
                 EQUALITY_LOOP(results, array, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_INT8) {
+            } else if(rhs->dtype.type == NDARRAY_INT8) {
                 EQUALITY_LOOP(results, array, uint8_t, int8_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_UINT16) {
+            } else if(rhs->dtype.type == NDARRAY_UINT16) {
                 EQUALITY_LOOP(results, array, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_INT16) {
+            } else if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, uint8_t, int16_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, >);
             }
-        } else if(lhs->dtype == NDARRAY_INT8) {
-            if(rhs->dtype == NDARRAY_UINT8) {
+        } else if(lhs->dtype.type == NDARRAY_INT8) {
+            if(rhs->dtype.type == NDARRAY_UINT8) {
                 EQUALITY_LOOP(results, array, int8_t, uint8_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_INT8) {
+            } else if(rhs->dtype.type == NDARRAY_INT8) {
                 EQUALITY_LOOP(results, array, int8_t, int8_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_UINT16) {
+            } else if(rhs->dtype.type == NDARRAY_UINT16) {
                 EQUALITY_LOOP(results, array, int8_t, uint16_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_INT16) {
+            } else if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, int8_t, int16_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, >);
             }
-        } else if(lhs->dtype == NDARRAY_UINT16) {
-            if(rhs->dtype == NDARRAY_UINT8) {
+        } else if(lhs->dtype.type == NDARRAY_UINT16) {
+            if(rhs->dtype.type == NDARRAY_UINT8) {
                 EQUALITY_LOOP(results, array, uint16_t, uint8_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_INT8) {
+            } else if(rhs->dtype.type == NDARRAY_INT8) {
                 EQUALITY_LOOP(results, array, uint16_t, int8_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_UINT16) {
+            } else if(rhs->dtype.type == NDARRAY_UINT16) {
                 EQUALITY_LOOP(results, array, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_INT16) {
+            } else if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, uint16_t, int16_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, >);
             }
-        } else if(lhs->dtype == NDARRAY_INT16) {
-            if(rhs->dtype == NDARRAY_UINT8) {
+        } else if(lhs->dtype.type == NDARRAY_INT16) {
+            if(rhs->dtype.type == NDARRAY_UINT8) {
                 EQUALITY_LOOP(results, array, int16_t, uint8_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_INT8) {
+            } else if(rhs->dtype.type == NDARRAY_INT8) {
                 EQUALITY_LOOP(results, array, int16_t, int8_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_UINT16) {
+            } else if(rhs->dtype.type == NDARRAY_UINT16) {
                 EQUALITY_LOOP(results, array, int16_t, uint16_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_INT16) {
+            } else if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, int16_t, int16_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, >);
             }
-        } else if(lhs->dtype == NDARRAY_FLOAT) {
-            if(rhs->dtype == NDARRAY_UINT8) {
+        } else if(lhs->dtype.type == NDARRAY_FLOAT) {
+            if(rhs->dtype.type == NDARRAY_UINT8) {
                 EQUALITY_LOOP(results, array, mp_float_t, uint8_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_INT8) {
+            } else if(rhs->dtype.type == NDARRAY_INT8) {
                 EQUALITY_LOOP(results, array, mp_float_t, int8_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_UINT16) {
+            } else if(rhs->dtype.type == NDARRAY_UINT16) {
                 EQUALITY_LOOP(results, array, mp_float_t, uint16_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_INT16) {
+            } else if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, mp_float_t, int16_t, larray, lstrides, rarray, rstrides, >);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, >);
             }
         }
@@ -388,64 +388,64 @@ mp_obj_t ndarray_binary_more(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
     #endif /* NDARRAY_HAS_BINARY_OP_MORE | NDARRAY_HAS_BINARY_OP_LESS*/
     #if NDARRAY_HAS_BINARY_OP_MORE_EQUAL | NDARRAY_HAS_BINARY_OP_LESS_EQUAL
     if(op == MP_BINARY_OP_MORE_EQUAL) {
-        if(lhs->dtype == NDARRAY_UINT8) {
-            if(rhs->dtype == NDARRAY_UINT8) {
+        if(lhs->dtype.type == NDARRAY_UINT8) {
+            if(rhs->dtype.type == NDARRAY_UINT8) {
                 EQUALITY_LOOP(results, array, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_INT8) {
+            } else if(rhs->dtype.type == NDARRAY_INT8) {
                 EQUALITY_LOOP(results, array, uint8_t, int8_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_UINT16) {
+            } else if(rhs->dtype.type == NDARRAY_UINT16) {
                 EQUALITY_LOOP(results, array, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_INT16) {
+            } else if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, uint8_t, int16_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, >=);
             }
-        } else if(lhs->dtype == NDARRAY_INT8) {
-            if(rhs->dtype == NDARRAY_UINT8) {
+        } else if(lhs->dtype.type == NDARRAY_INT8) {
+            if(rhs->dtype.type == NDARRAY_UINT8) {
                 EQUALITY_LOOP(results, array, int8_t, uint8_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_INT8) {
+            } else if(rhs->dtype.type == NDARRAY_INT8) {
                 EQUALITY_LOOP(results, array, int8_t, int8_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_UINT16) {
+            } else if(rhs->dtype.type == NDARRAY_UINT16) {
                 EQUALITY_LOOP(results, array, int8_t, uint16_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_INT16) {
+            } else if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, int8_t, int16_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, >=);
             }
-        } else if(lhs->dtype == NDARRAY_UINT16) {
-            if(rhs->dtype == NDARRAY_UINT8) {
+        } else if(lhs->dtype.type == NDARRAY_UINT16) {
+            if(rhs->dtype.type == NDARRAY_UINT8) {
                 EQUALITY_LOOP(results, array, uint16_t, uint8_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_INT8) {
+            } else if(rhs->dtype.type == NDARRAY_INT8) {
                 EQUALITY_LOOP(results, array, uint16_t, int8_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_UINT16) {
+            } else if(rhs->dtype.type == NDARRAY_UINT16) {
                 EQUALITY_LOOP(results, array, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_INT16) {
+            } else if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, uint16_t, int16_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, >=);
             }
-        } else if(lhs->dtype == NDARRAY_INT16) {
-            if(rhs->dtype == NDARRAY_UINT8) {
+        } else if(lhs->dtype.type == NDARRAY_INT16) {
+            if(rhs->dtype.type == NDARRAY_UINT8) {
                 EQUALITY_LOOP(results, array, int16_t, uint8_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_INT8) {
+            } else if(rhs->dtype.type == NDARRAY_INT8) {
                 EQUALITY_LOOP(results, array, int16_t, int8_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_UINT16) {
+            } else if(rhs->dtype.type == NDARRAY_UINT16) {
                 EQUALITY_LOOP(results, array, int16_t, uint16_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_INT16) {
+            } else if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, int16_t, int16_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, >=);
             }
-        } else if(lhs->dtype == NDARRAY_FLOAT) {
-            if(rhs->dtype == NDARRAY_UINT8) {
+        } else if(lhs->dtype.type == NDARRAY_FLOAT) {
+            if(rhs->dtype.type == NDARRAY_UINT8) {
                 EQUALITY_LOOP(results, array, mp_float_t, uint8_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_INT8) {
+            } else if(rhs->dtype.type == NDARRAY_INT8) {
                 EQUALITY_LOOP(results, array, mp_float_t, int8_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_UINT16) {
+            } else if(rhs->dtype.type == NDARRAY_UINT16) {
                 EQUALITY_LOOP(results, array, mp_float_t, uint16_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_INT16) {
+            } else if(rhs->dtype.type == NDARRAY_INT16) {
                 EQUALITY_LOOP(results, array, mp_float_t, int16_t, larray, lstrides, rarray, rstrides, >=);
-            } else if(rhs->dtype == NDARRAY_FLOAT) {
+            } else if(rhs->dtype.type == NDARRAY_FLOAT) {
                 EQUALITY_LOOP(results, array, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, >=);
             }
         }
@@ -464,88 +464,88 @@ mp_obj_t ndarray_binary_subtract(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
     uint8_t *larray = (uint8_t *)lhs->array;
     uint8_t *rarray = (uint8_t *)rhs->array;
 
-    if(lhs->dtype == NDARRAY_UINT8) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    if(lhs->dtype.type == NDARRAY_UINT8) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
             BINARY_LOOP(results, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
             BINARY_LOOP(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, -);
         }
-    } else if(lhs->dtype == NDARRAY_INT8) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    } else if(lhs->dtype.type == NDARRAY_INT8) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, int8_t, uint8_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
             BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, -);
         }
-    } else if(lhs->dtype == NDARRAY_UINT16) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    } else if(lhs->dtype.type == NDARRAY_UINT16) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
             BINARY_LOOP(results, uint16_t, uint16_t, uint8_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
             BINARY_LOOP(results, uint16_t, uint16_t, int8_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
             BINARY_LOOP(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, -);
         }
-    } else if(lhs->dtype == NDARRAY_INT16) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    } else if(lhs->dtype.type == NDARRAY_INT16) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, int16_t, uint8_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, int16_t, int8_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, int16_t, uint16_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
             BINARY_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, -);
         }
-    } else if(lhs->dtype == NDARRAY_FLOAT) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    } else if(lhs->dtype.type == NDARRAY_FLOAT) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, mp_float_t, uint8_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, mp_float_t, int8_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, mp_float_t, uint16_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, mp_float_t, int16_t, larray, lstrides, rarray, rstrides, -);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
             BINARY_LOOP(results, mp_float_t, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, -);
         }
@@ -564,8 +564,8 @@ mp_obj_t ndarray_binary_true_divide(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
     uint8_t *rarray = (uint8_t *)rhs->array;
 
     #if NDARRAY_BINARY_USES_FUN_POINTER
-    mp_float_t (*get_lhs)(void *) = ndarray_get_float_function(lhs->dtype);
-    mp_float_t (*get_rhs)(void *) = ndarray_get_float_function(rhs->dtype);
+    mp_float_t (*get_lhs)(void *) = ndarray_get_float_function(lhs->dtype.type);
+    mp_float_t (*get_rhs)(void *) = ndarray_get_float_function(rhs->dtype.type);
 
     uint8_t *array = (uint8_t *)results->array;
     void (*set_result)(void *, mp_float_t ) = ndarray_set_float_function(NDARRAY_FLOAT);
@@ -574,64 +574,64 @@ mp_obj_t ndarray_binary_true_divide(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
     FUNC_POINTER_LOOP(results, array, get_lhs, get_rhs, larray, lstrides, rarray, rstrides, lvalue/rvalue);
 
     #else
-    if(lhs->dtype == NDARRAY_UINT8) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    if(lhs->dtype.type == NDARRAY_UINT8) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             BINARY_LOOP(results, mp_float_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             BINARY_LOOP(results, mp_float_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             BINARY_LOOP(results, mp_float_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             BINARY_LOOP(results, mp_float_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             BINARY_LOOP(results, mp_float_t, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, /);
         }
-    } else if(lhs->dtype == NDARRAY_INT8) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    } else if(lhs->dtype.type == NDARRAY_INT8) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             BINARY_LOOP(results, mp_float_t, int8_t, uint8_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             BINARY_LOOP(results, mp_float_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             BINARY_LOOP(results, mp_float_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             BINARY_LOOP(results, mp_float_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             BINARY_LOOP(results, mp_float_t, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, /);
         }
-    } else if(lhs->dtype == NDARRAY_UINT16) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    } else if(lhs->dtype.type == NDARRAY_UINT16) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             BINARY_LOOP(results, mp_float_t, uint16_t, uint8_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             BINARY_LOOP(results, mp_float_t, uint16_t, int8_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             BINARY_LOOP(results, mp_float_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             BINARY_LOOP(results, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             BINARY_LOOP(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, /);
         }
-    } else if(lhs->dtype == NDARRAY_INT16) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    } else if(lhs->dtype.type == NDARRAY_INT16) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             BINARY_LOOP(results, mp_float_t, int16_t, uint8_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             BINARY_LOOP(results, mp_float_t, int16_t, int8_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             BINARY_LOOP(results, mp_float_t, int16_t, uint16_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             BINARY_LOOP(results, mp_float_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             BINARY_LOOP(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, /);
         }
-    } else if(lhs->dtype == NDARRAY_FLOAT) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    } else if(lhs->dtype.type == NDARRAY_FLOAT) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             BINARY_LOOP(results, mp_float_t, mp_float_t, uint8_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             BINARY_LOOP(results, mp_float_t, mp_float_t, int8_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             BINARY_LOOP(results, mp_float_t, mp_float_t, uint16_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             BINARY_LOOP(results, mp_float_t, mp_float_t, int16_t, larray, lstrides, rarray, rstrides, /);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             BINARY_LOOP(results, mp_float_t, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, /);
         }
     }
@@ -652,8 +652,8 @@ mp_obj_t ndarray_binary_power(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
     uint8_t *rarray = (uint8_t *)rhs->array;
 
     #if NDARRAY_BINARY_USES_FUN_POINTER
-    mp_float_t (*get_lhs)(void *) = ndarray_get_float_function(lhs->dtype);
-    mp_float_t (*get_rhs)(void *) = ndarray_get_float_function(rhs->dtype);
+    mp_float_t (*get_lhs)(void *) = ndarray_get_float_function(lhs->dtype.type);
+    mp_float_t (*get_rhs)(void *) = ndarray_get_float_function(rhs->dtype.type);
 
     uint8_t *array = (uint8_t *)results->array;
     void (*set_result)(void *, mp_float_t ) = ndarray_set_float_function(NDARRAY_FLOAT);
@@ -662,64 +662,64 @@ mp_obj_t ndarray_binary_power(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
     FUNC_POINTER_LOOP(results, array, get_lhs, get_rhs, larray, lstrides, rarray, rstrides, MICROPY_FLOAT_C_FUN(pow)(lvalue, rvalue));
 
     #else
-    if(lhs->dtype == NDARRAY_UINT8) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    if(lhs->dtype.type == NDARRAY_UINT8) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             POWER_LOOP(results, mp_float_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             POWER_LOOP(results, mp_float_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             POWER_LOOP(results, mp_float_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             POWER_LOOP(results, mp_float_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             POWER_LOOP(results, mp_float_t, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides);
         }
-    } else if(lhs->dtype == NDARRAY_INT8) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    } else if(lhs->dtype.type == NDARRAY_INT8) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             POWER_LOOP(results, mp_float_t, int8_t, uint8_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             POWER_LOOP(results, mp_float_t, int8_t, int8_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             POWER_LOOP(results, mp_float_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             POWER_LOOP(results, mp_float_t, int8_t, int16_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             POWER_LOOP(results, mp_float_t, int8_t, mp_float_t, larray, lstrides, rarray, rstrides);
         }
-    } else if(lhs->dtype == NDARRAY_UINT16) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    } else if(lhs->dtype.type == NDARRAY_UINT16) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             POWER_LOOP(results, mp_float_t, uint16_t, uint8_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             POWER_LOOP(results, mp_float_t, uint16_t, int8_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             POWER_LOOP(results, mp_float_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             POWER_LOOP(results, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             POWER_LOOP(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides);
         }
-    } else if(lhs->dtype == NDARRAY_INT16) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    } else if(lhs->dtype.type == NDARRAY_INT16) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             POWER_LOOP(results, mp_float_t, int16_t, uint8_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             POWER_LOOP(results, mp_float_t, int16_t, int8_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             POWER_LOOP(results, mp_float_t, int16_t, uint16_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             POWER_LOOP(results, mp_float_t, int16_t, int16_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             POWER_LOOP(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides);
         }
-    } else if(lhs->dtype == NDARRAY_FLOAT) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    } else if(lhs->dtype.type == NDARRAY_FLOAT) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             POWER_LOOP(results, mp_float_t, mp_float_t, uint8_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             POWER_LOOP(results, mp_float_t, mp_float_t, int8_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             POWER_LOOP(results, mp_float_t, mp_float_t, uint16_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             POWER_LOOP(results, mp_float_t, mp_float_t, int16_t, larray, lstrides, rarray, rstrides);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             POWER_LOOP(results, mp_float_t, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides);
         }
     }
@@ -732,7 +732,7 @@ mp_obj_t ndarray_binary_power(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
 #if NDARRAY_HAS_INPLACE_ADD || NDARRAY_HAS_INPLACE_MULTIPLY || NDARRAY_HAS_INPLACE_SUBTRACT
 mp_obj_t ndarray_inplace_ams(ndarray_obj_t *lhs, ndarray_obj_t *rhs, int32_t *rstrides, uint8_t optype) {
 
-    if((lhs->dtype != NDARRAY_FLOAT) && (rhs->dtype == NDARRAY_FLOAT)) {
+    if((lhs->dtype.type != NDARRAY_FLOAT) && (rhs->dtype.type == NDARRAY_FLOAT)) {
         mp_raise_TypeError(translate("cannot cast output with casting rule"));
     }
     uint8_t *larray = (uint8_t *)lhs->array;
@@ -761,21 +761,21 @@ mp_obj_t ndarray_inplace_ams(ndarray_obj_t *lhs, ndarray_obj_t *rhs, int32_t *rs
 #if NDARRAY_HAS_INPLACE_TRUE_DIVIDE
 mp_obj_t ndarray_inplace_divide(ndarray_obj_t *lhs, ndarray_obj_t *rhs, int32_t *rstrides) {
 
-    if((lhs->dtype != NDARRAY_FLOAT)) {
+    if((lhs->dtype.type != NDARRAY_FLOAT)) {
         mp_raise_TypeError(translate("results cannot be cast to specified type"));
     }
     uint8_t *larray = (uint8_t *)lhs->array;
     uint8_t *rarray = (uint8_t *)rhs->array;
 
-    if(rhs->dtype == NDARRAY_UINT8) {
+    if(rhs->dtype.type == NDARRAY_UINT8) {
         INPLACE_LOOP(lhs, mp_float_t, uint8_t, larray, rarray, rstrides, /=);
-    } else if(rhs->dtype == NDARRAY_INT8) {
+    } else if(rhs->dtype.type == NDARRAY_INT8) {
         INPLACE_LOOP(lhs, mp_float_t, int8_t, larray, rarray, rstrides, /=);
-    } else if(rhs->dtype == NDARRAY_UINT16) {
+    } else if(rhs->dtype.type == NDARRAY_UINT16) {
         INPLACE_LOOP(lhs, mp_float_t, uint16_t, larray, rarray, rstrides, /=);
-    } else if(rhs->dtype == NDARRAY_INT16) {
+    } else if(rhs->dtype.type == NDARRAY_INT16) {
         INPLACE_LOOP(lhs, mp_float_t, int16_t, larray, rarray, rstrides, /=);
-    } else if(lhs->dtype == NDARRAY_FLOAT) {
+    } else if(lhs->dtype.type == NDARRAY_FLOAT) {
         INPLACE_LOOP(lhs, mp_float_t, mp_float_t, larray, rarray, rstrides, /=);
     }
     return MP_OBJ_FROM_PTR(lhs);
@@ -785,21 +785,21 @@ mp_obj_t ndarray_inplace_divide(ndarray_obj_t *lhs, ndarray_obj_t *rhs, int32_t
 #if NDARRAY_HAS_INPLACE_POWER
 mp_obj_t ndarray_inplace_power(ndarray_obj_t *lhs, ndarray_obj_t *rhs, int32_t *rstrides) {
 
-    if((lhs->dtype != NDARRAY_FLOAT)) {
+    if((lhs->dtype.type != NDARRAY_FLOAT)) {
         mp_raise_TypeError(translate("results cannot be cast to specified type"));
     }
     uint8_t *larray = (uint8_t *)lhs->array;
     uint8_t *rarray = (uint8_t *)rhs->array;
 
-    if(rhs->dtype == NDARRAY_UINT8) {
+    if(rhs->dtype.type == NDARRAY_UINT8) {
         INPLACE_POWER(lhs, mp_float_t, uint8_t, larray, rarray, rstrides);
-    } else if(rhs->dtype == NDARRAY_INT8) {
+    } else if(rhs->dtype.type == NDARRAY_INT8) {
         INPLACE_POWER(lhs, mp_float_t, int8_t, larray, rarray, rstrides);
-    } else if(rhs->dtype == NDARRAY_UINT16) {
+    } else if(rhs->dtype.type == NDARRAY_UINT16) {
         INPLACE_POWER(lhs, mp_float_t, uint16_t, larray, rarray, rstrides);
-    } else if(rhs->dtype == NDARRAY_INT16) {
+    } else if(rhs->dtype.type == NDARRAY_INT16) {
         INPLACE_POWER(lhs, mp_float_t, int16_t, larray, rarray, rstrides);
-    } else if(lhs->dtype == NDARRAY_FLOAT) {
+    } else if(lhs->dtype.type == NDARRAY_FLOAT) {
         INPLACE_POWER(lhs, mp_float_t, mp_float_t, larray, rarray, rstrides);
     }
     return MP_OBJ_FROM_PTR(lhs);
diff --git a/code/ndarray_operators.h b/code/ndarray_operators.h
index 7849e030..3ca419b2 100644
--- a/code/ndarray_operators.h
+++ b/code/ndarray_operators.h
@@ -24,54 +24,54 @@ mp_obj_t ndarray_inplace_divide(ndarray_obj_t *, ndarray_obj_t *, int32_t *);
 
 #define UNWRAP_INPLACE_OPERATOR(lhs, larray, rarray, rstrides, OPERATOR)\
 ({\
-    if((lhs)->dtype == NDARRAY_UINT8) {\
-        if((rhs)->dtype == NDARRAY_UINT8) {\
+    if((lhs)->dtype.type == NDARRAY_UINT8) {\
+        if((rhs)->dtype.type == NDARRAY_UINT8) {\
             INPLACE_LOOP((lhs), uint8_t, uint8_t, (larray), (rarray), (rstrides), OPERATOR);\
-        } else if(rhs->dtype == NDARRAY_INT8) {\
+        } else if(rhs->dtype.type == NDARRAY_INT8) {\
             INPLACE_LOOP((lhs), uint8_t, int8_t, (larray), (rarray), (rstrides), OPERATOR);\
-        } else if(rhs->dtype == NDARRAY_UINT16) {\
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {\
             INPLACE_LOOP((lhs), uint8_t, uint16_t, (larray), (rarray), (rstrides), OPERATOR);\
         } else {\
             INPLACE_LOOP((lhs), uint8_t, int16_t, (larray), (rarray), (rstrides), OPERATOR);\
         }\
-    } else if(lhs->dtype == NDARRAY_INT8) {\
-        if(rhs->dtype == NDARRAY_UINT8) {\
+    } else if(lhs->dtype.type == NDARRAY_INT8) {\
+        if(rhs->dtype.type == NDARRAY_UINT8) {\
             INPLACE_LOOP((lhs), int8_t, uint8_t, (larray), (rarray), (rstrides), OPERATOR);\
-        } else if(rhs->dtype == NDARRAY_INT8) {\
+        } else if(rhs->dtype.type == NDARRAY_INT8) {\
             INPLACE_LOOP((lhs), int8_t, int8_t, (larray), (rarray), (rstrides), OPERATOR);\
-        } else if(rhs->dtype == NDARRAY_UINT16) {\
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {\
             INPLACE_LOOP((lhs), int8_t, uint16_t, (larray), (rarray), (rstrides), OPERATOR);\
         } else {\
             INPLACE_LOOP((lhs), int8_t, int16_t, (larray), (rarray), (rstrides), OPERATOR);\
         }\
-    } else if(lhs->dtype == NDARRAY_UINT16) {\
-        if(rhs->dtype == NDARRAY_UINT8) {\
+    } else if(lhs->dtype.type == NDARRAY_UINT16) {\
+        if(rhs->dtype.type == NDARRAY_UINT8) {\
             INPLACE_LOOP((lhs), uint16_t, uint8_t, (larray), (rarray), (rstrides), OPERATOR);\
-        } else if(rhs->dtype == NDARRAY_INT8) {\
+        } else if(rhs->dtype.type == NDARRAY_INT8) {\
             INPLACE_LOOP((lhs), uint16_t, int8_t, (larray), (rarray), (rstrides), OPERATOR);\
-        } else if(rhs->dtype == NDARRAY_UINT16) {\
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {\
             INPLACE_LOOP((lhs), uint16_t, uint16_t, (larray), (rarray), (rstrides), OPERATOR);\
         } else {\
             INPLACE_LOOP((lhs), uint16_t, int16_t, (larray), (rarray), (rstrides), OPERATOR);\
         }\
-    } else if(lhs->dtype == NDARRAY_INT16) {\
-        if(rhs->dtype == NDARRAY_UINT8) {\
+    } else if(lhs->dtype.type == NDARRAY_INT16) {\
+        if(rhs->dtype.type == NDARRAY_UINT8) {\
             INPLACE_LOOP((lhs), int16_t, uint8_t, (larray), (rarray), (rstrides), OPERATOR);\
-        } else if(rhs->dtype == NDARRAY_INT8) {\
+        } else if(rhs->dtype.type == NDARRAY_INT8) {\
             INPLACE_LOOP((lhs), int16_t, int8_t, (larray), (rarray), (rstrides), OPERATOR);\
-        } else if(rhs->dtype == NDARRAY_UINT16) {\
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {\
             INPLACE_LOOP((lhs), int16_t, uint16_t, (larray), (rarray), (rstrides), OPERATOR);\
         } else {\
             INPLACE_LOOP((lhs), int16_t, int16_t, (larray), (rarray), (rstrides), OPERATOR);\
         }\
-    } else if(lhs->dtype == NDARRAY_FLOAT) {\
-        if(rhs->dtype == NDARRAY_UINT8) {\
+    } else if(lhs->dtype.type == NDARRAY_FLOAT) {\
+        if(rhs->dtype.type == NDARRAY_UINT8) {\
             INPLACE_LOOP((lhs), mp_float_t, uint8_t, (larray), (rarray), (rstrides), OPERATOR);\
-        } else if(rhs->dtype == NDARRAY_INT8) {\
+        } else if(rhs->dtype.type == NDARRAY_INT8) {\
             INPLACE_LOOP((lhs), mp_float_t, int8_t, (larray), (rarray), (rstrides), OPERATOR);\
-        } else if(rhs->dtype == NDARRAY_UINT16) {\
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {\
             INPLACE_LOOP((lhs), mp_float_t, uint16_t, (larray), (rarray), (rstrides), OPERATOR);\
-        } else if(rhs->dtype == NDARRAY_INT16) {\
+        } else if(rhs->dtype.type == NDARRAY_INT16) {\
             INPLACE_LOOP((lhs), mp_float_t, int16_t, (larray), (rarray), (rstrides), OPERATOR);\
         } else {\
             INPLACE_LOOP((lhs), mp_float_t, mp_float_t, (larray), (rarray), (rstrides), OPERATOR);\
diff --git a/code/numpy/approx/approx.c b/code/numpy/approx/approx.c
index 171e0f4f..c02e0a63 100644
--- a/code/numpy/approx/approx.c
+++ b/code/numpy/approx/approx.c
@@ -69,20 +69,20 @@ STATIC mp_obj_t approx_interp(size_t n_args, const mp_obj_t *pos_args, mp_map_t
     mp_float_t left_value, right_value;
     uint8_t *xparray = (uint8_t *)xp->array;
 
-    mp_float_t xp_left = ndarray_get_float_value(xparray, xp->dtype);
+    mp_float_t xp_left = ndarray_get_float_value(xparray, xp->dtype.type);
     xparray += (xp->len-1) * xp->strides[ULAB_MAX_DIMS - 1];
-    mp_float_t xp_right = ndarray_get_float_value(xparray, xp->dtype);
+    mp_float_t xp_right = ndarray_get_float_value(xparray, xp->dtype.type);
 
     uint8_t *fparray = (uint8_t *)fp->array;
 
     if(args[3].u_obj == mp_const_none) {
-        left_value = ndarray_get_float_value(fparray, fp->dtype);
+        left_value = ndarray_get_float_value(fparray, fp->dtype.type);
     } else {
         left_value = mp_obj_get_float(args[3].u_obj);
     }
     if(args[4].u_obj == mp_const_none) {
         fparray += (fp->len-1) * fp->strides[ULAB_MAX_DIMS - 1];
-        right_value = ndarray_get_float_value(fparray, fp->dtype);
+        right_value = ndarray_get_float_value(fparray, fp->dtype.type);
     } else {
         right_value = mp_obj_get_float(args[4].u_obj);
     }
@@ -95,7 +95,7 @@ STATIC mp_obj_t approx_interp(size_t n_args, const mp_obj_t *pos_args, mp_map_t
     uint8_t *temp;
 
     for(size_t i=0; i < x->len; i++, yarray++) {
-        mp_float_t x_value = ndarray_get_float_value(xarray, x->dtype);
+        mp_float_t x_value = ndarray_get_float_value(xarray, x->dtype.type);
         xarray += x->strides[ULAB_MAX_DIMS - 1];
         if(x_value < xp_left) {
             *yarray = left_value;
@@ -108,7 +108,7 @@ STATIC mp_obj_t approx_interp(size_t n_args, const mp_obj_t *pos_args, mp_map_t
             while(right_index - left_index > 1) {
                 middle_index = left_index + (right_index - left_index) / 2;
                 temp = xparray + middle_index * xp->strides[ULAB_MAX_DIMS - 1];
-                mp_float_t xp_middle = ndarray_get_float_value(temp, xp->dtype);
+                mp_float_t xp_middle = ndarray_get_float_value(temp, xp->dtype.type);
                 if(x_value <= xp_middle) {
                     right_index = middle_index;
                 } else {
@@ -116,16 +116,16 @@ STATIC mp_obj_t approx_interp(size_t n_args, const mp_obj_t *pos_args, mp_map_t
                 }
             }
             temp = xparray + left_index * xp->strides[ULAB_MAX_DIMS - 1];
-            xp_left_ = ndarray_get_float_value(temp, xp->dtype);
+            xp_left_ = ndarray_get_float_value(temp, xp->dtype.type);
 
             temp = xparray + right_index * xp->strides[ULAB_MAX_DIMS - 1];
-            xp_right_ = ndarray_get_float_value(temp, xp->dtype);
+            xp_right_ = ndarray_get_float_value(temp, xp->dtype.type);
 
             temp = fparray + left_index * fp->strides[ULAB_MAX_DIMS - 1];
-            fp_left = ndarray_get_float_value(temp, fp->dtype);
+            fp_left = ndarray_get_float_value(temp, fp->dtype.type);
 
             temp = fparray + right_index * fp->strides[ULAB_MAX_DIMS - 1];
-            fp_right = ndarray_get_float_value(temp, fp->dtype);
+            fp_right = ndarray_get_float_value(temp, fp->dtype.type);
 
             *yarray = fp_left + (x_value - xp_left_) * (fp_right - fp_left) / (xp_right_ - xp_left_);
         }
@@ -167,7 +167,7 @@ STATIC mp_obj_t approx_trapz(size_t n_args, const mp_obj_t *pos_args, mp_map_t *
         mp_raise_ValueError(translate("trapz is defined for 1D arrays"));
     }
 
-    mp_float_t (*funcy)(void *) = ndarray_get_float_function(y->dtype);
+    mp_float_t (*funcy)(void *) = ndarray_get_float_function(y->dtype.type);
     uint8_t *yarray = (uint8_t *)y->array;
 
     size_t count = 1;
@@ -179,7 +179,7 @@ STATIC mp_obj_t approx_trapz(size_t n_args, const mp_obj_t *pos_args, mp_map_t *
             mp_raise_ValueError(translate("trapz is defined for 1D arrays of equal length"));
         }
 
-        mp_float_t (*funcx)(void *) = ndarray_get_float_function(x->dtype);
+        mp_float_t (*funcx)(void *) = ndarray_get_float_function(x->dtype.type);
         uint8_t *xarray = (uint8_t *)x->array;
         mp_float_t x1, x2;
 
@@ -206,7 +206,7 @@ STATIC mp_obj_t approx_trapz(size_t n_args, const mp_obj_t *pos_args, mp_map_t *
         yarray += y->strides[ULAB_MAX_DIMS - 1];
 
         for(size_t i=1; i < y->len; i++) {
-            y2 = ndarray_get_float_index(y->array, y->dtype, i);
+            y2 = ndarray_get_float_index(y->array, y->dtype.type, i);
             mp_float_t value = (y2 + y1);
             m = mean + (value - mean) / (mp_float_t)count;
             mean = m;
diff --git a/code/numpy/compare/compare.c b/code/numpy/compare/compare.c
index dd22a9d6..e799e8c1 100644
--- a/code/numpy/compare/compare.c
+++ b/code/numpy/compare/compare.c
@@ -54,64 +54,64 @@ static mp_obj_t compare_function(mp_obj_t x1, mp_obj_t x2, uint8_t op) {
     // uint16 + int16 => float
     // The parameters of RUN_COMPARE_LOOP are
     // typecode of result, type_out, type_left, type_right, lhs operand, rhs operand, operator
-    if(lhs->dtype == NDARRAY_UINT8) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    if(lhs->dtype.type == NDARRAY_UINT8) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             RUN_COMPARE_LOOP(NDARRAY_UINT8, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             RUN_COMPARE_LOOP(NDARRAY_INT16, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             RUN_COMPARE_LOOP(NDARRAY_UINT16, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             RUN_COMPARE_LOOP(NDARRAY_INT16, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
         }
-    } else if(lhs->dtype == NDARRAY_INT8) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    } else if(lhs->dtype.type == NDARRAY_INT8) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             RUN_COMPARE_LOOP(NDARRAY_INT16, int16_t, int8_t, uint8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             RUN_COMPARE_LOOP(NDARRAY_INT8, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             RUN_COMPARE_LOOP(NDARRAY_INT16, int16_t, int8_t, uint16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             RUN_COMPARE_LOOP(NDARRAY_INT16, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, int8_t, mp_float_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
         }
-    } else if(lhs->dtype == NDARRAY_UINT16) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    } else if(lhs->dtype.type == NDARRAY_UINT16) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             RUN_COMPARE_LOOP(NDARRAY_UINT16, uint16_t, uint16_t, uint8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             RUN_COMPARE_LOOP(NDARRAY_UINT16, uint16_t, uint16_t, int8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             RUN_COMPARE_LOOP(NDARRAY_UINT16, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
         }
-    } else if(lhs->dtype == NDARRAY_INT16) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    } else if(lhs->dtype.type == NDARRAY_INT16) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             RUN_COMPARE_LOOP(NDARRAY_INT16, int16_t, int16_t, uint8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             RUN_COMPARE_LOOP(NDARRAY_INT16, int16_t, int16_t, int8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, int16_t, uint16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             RUN_COMPARE_LOOP(NDARRAY_INT16, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
         }
-    } else if(lhs->dtype == NDARRAY_FLOAT) {
-        if(rhs->dtype == NDARRAY_UINT8) {
+    } else if(lhs->dtype.type == NDARRAY_FLOAT) {
+        if(rhs->dtype.type == NDARRAY_UINT8) {
             RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, mp_float_t, uint8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_INT8) {
+        } else if(rhs->dtype.type == NDARRAY_INT8) {
             RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, mp_float_t, int8_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_UINT16) {
+        } else if(rhs->dtype.type == NDARRAY_UINT16) {
             RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, mp_float_t, uint16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_INT16) {
+        } else if(rhs->dtype.type == NDARRAY_INT16) {
             RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, mp_float_t, int16_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
-        } else if(rhs->dtype == NDARRAY_FLOAT) {
+        } else if(rhs->dtype.type == NDARRAY_FLOAT) {
             RUN_COMPARE_LOOP(NDARRAY_FLOAT, mp_float_t, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides, ndim, shape, op);
         }
     }
@@ -199,7 +199,7 @@ static mp_obj_t compare_isinf_isfinite(mp_obj_t _x, uint8_t mask) {
         ndarray_obj_t *results = ndarray_new_dense_ndarray(x->ndim, x->shape, NDARRAY_BOOL);
         // At this point, results is all False
         uint8_t *rarray = (uint8_t *)results->array;
-        if(x->dtype != NDARRAY_FLOAT) {
+        if(x->dtype.type != NDARRAY_FLOAT) {
             // int types can never be infinite...
             if(!mask) {
                 // ...so flip all values in the array, if the function was called from isfinite
@@ -281,7 +281,7 @@ mp_obj_t compare_maximum(mp_obj_t x1, mp_obj_t x2) {
     mp_obj_t result = compare_function(x1, x2, COMPARE_MAXIMUM);
     if((MP_OBJ_IS_INT(x1) || mp_obj_is_float(x1)) && (MP_OBJ_IS_INT(x2) || mp_obj_is_float(x2))) {
         ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(result);
-        return mp_binary_get_val_array(ndarray->dtype, ndarray->array, 0);
+        return mp_binary_get_val_array(ndarray->dtype.type, ndarray->array, 0);
     }
     return result;
 }
@@ -296,7 +296,7 @@ mp_obj_t compare_minimum(mp_obj_t x1, mp_obj_t x2) {
     mp_obj_t result = compare_function(x1, x2, COMPARE_MINIMUM);
     if((MP_OBJ_IS_INT(x1) || mp_obj_is_float(x1)) && (MP_OBJ_IS_INT(x2) || mp_obj_is_float(x2))) {
         ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(result);
-        return mp_binary_get_val_array(ndarray->dtype, ndarray->array, 0);
+        return mp_binary_get_val_array(ndarray->dtype.type, ndarray->array, 0);
     }
     return result;
 }
diff --git a/code/numpy/fft/fft_tools.c b/code/numpy/fft/fft_tools.c
index e527f22d..39d43112 100644
--- a/code/numpy/fft/fft_tools.c
+++ b/code/numpy/fft/fft_tools.c
@@ -108,7 +108,7 @@ mp_obj_t fft_fft_ifft_spectrogram(size_t n_args, mp_obj_t arg_re, mp_obj_t arg_i
     mp_float_t *data_re = (mp_float_t *)out_re->array;
 
     uint8_t *array = (uint8_t *)re->array;
-    mp_float_t (*func)(void *) = ndarray_get_float_function(re->dtype);
+    mp_float_t (*func)(void *) = ndarray_get_float_function(re->dtype.type);
 
     for(size_t i=0; i < len; i++) {
         *data_re++ = func(array);
@@ -129,7 +129,7 @@ mp_obj_t fft_fft_ifft_spectrogram(size_t n_args, mp_obj_t arg_re, mp_obj_t arg_i
             mp_raise_ValueError(translate("real and imaginary parts must be of equal length"));
         }
         array = (uint8_t *)im->array;
-        func = ndarray_get_float_function(im->dtype);
+        func = ndarray_get_float_function(im->dtype.type);
         for(size_t i=0; i < len; i++) {
            *data_im++ = func(array);
            array += im->strides[ULAB_MAX_DIMS - 1];
diff --git a/code/numpy/filter/filter.c b/code/numpy/filter/filter.c
index 280efd0e..e90d3e38 100644
--- a/code/numpy/filter/filter.c
+++ b/code/numpy/filter/filter.c
@@ -69,8 +69,8 @@ mp_obj_t filter_convolve(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_a
         for(int32_t n=bot_n; n < top_n; n++) {
             int32_t idx_c = (len_c - n - 1) * cs;
             int32_t idx_a = (n + k) * as;
-            mp_float_t ai = ndarray_get_float_index(aarray, a->dtype, idx_a);
-            mp_float_t ci = ndarray_get_float_index(carray, c->dtype, idx_c);
+            mp_float_t ai = ndarray_get_float_index(aarray, a->dtype.type, idx_a);
+            mp_float_t ci = ndarray_get_float_index(carray, c->dtype.type, idx_c);
             accum += ai * ci;
         }
         *outptr++ = accum;
diff --git a/code/numpy/linalg/linalg.c b/code/numpy/linalg/linalg.c
index e62a0922..c7ccccda 100644
--- a/code/numpy/linalg/linalg.c
+++ b/code/numpy/linalg/linalg.c
@@ -46,7 +46,7 @@ static mp_obj_t linalg_cholesky(mp_obj_t oin) {
 
     size_t N = ndarray->shape[ULAB_MAX_DIMS - 1];
     uint8_t *array = (uint8_t *)ndarray->array;
-    mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype);
+    mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype.type);
 
     for(size_t m=0; m < N; m++) { // rows
         for(size_t n=0; n < N; n++) { // columns
@@ -112,7 +112,7 @@ static mp_obj_t linalg_det(mp_obj_t oin) {
     mp_float_t *tmp = m_new(mp_float_t, N * N);
     for(size_t m=0; m < N; m++) { // rows
         for(size_t n=0; n < N; n++) { // columns
-            *tmp++ = ndarray_get_float_value(array, ndarray->dtype);
+            *tmp++ = ndarray_get_float_value(array, ndarray->dtype.type);
             array += ndarray->strides[ULAB_MAX_DIMS - 1];
         }
         array -= ndarray->strides[ULAB_MAX_DIMS - 1] * N;
@@ -184,7 +184,7 @@ static mp_obj_t linalg_eig(mp_obj_t oin) {
     mp_float_t *array = m_new(mp_float_t, S*S);
     for(size_t i=0; i < S; i++) { // rows
         for(size_t j=0; j < S; j++) { // columns
-            *array++ = ndarray_get_float_value(iarray, in->dtype);
+            *array++ = ndarray_get_float_value(iarray, in->dtype.type);
             iarray += in->strides[ULAB_MAX_DIMS - 1];
         }
         iarray -= in->strides[ULAB_MAX_DIMS - 1] * S;
@@ -245,7 +245,7 @@ static mp_obj_t linalg_inv(mp_obj_t o_in) {
     ndarray_obj_t *inverted = ndarray_new_dense_ndarray(2, ndarray_shape_vector(0, 0, N, N), NDARRAY_FLOAT);
     mp_float_t *iarray = (mp_float_t *)inverted->array;
 
-    mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype);
+    mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype.type);
 
     for(size_t i=0; i < N; i++) { // rows
         for(size_t j=0; j < N; j++) { // columns
@@ -304,7 +304,7 @@ static mp_obj_t linalg_norm(size_t n_args, const mp_obj_t *pos_args, mp_map_t *k
         ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(x);
         uint8_t *array = (uint8_t *)ndarray->array;
         // always get a float, so that we don't have to resolve the dtype later
-        mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype);
+        mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype.type);
         shape_strides _shape_strides = tools_reduce_axes(ndarray, axis);
         ndarray_obj_t *results = ndarray_new_dense_ndarray(_shape_strides.ndim, _shape_strides.shape, NDARRAY_FLOAT);
         mp_float_t *rarray = (mp_float_t *)results->array;
diff --git a/code/numpy/numerical/numerical.c b/code/numpy/numerical/numerical.c
index a8227d87..c44aea56 100644
--- a/code/numpy/numerical/numerical.c
+++ b/code/numpy/numerical/numerical.c
@@ -23,6 +23,7 @@
 #include "../../ulab.h"
 #include "../../ulab_tools.h"
 #include "numerical.h"
+#include "blocks/blocks.h"
 
 enum NUMERICAL_FUNCTION_TYPE {
     NUMERICAL_ALL,
@@ -70,7 +71,7 @@ static mp_obj_t numerical_all_any(mp_obj_t oin, mp_obj_t axis, uint8_t optype) {
         ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(oin);
         uint8_t *array = (uint8_t *)ndarray->array;
         // always get a float, so that we don't have to resolve the dtype later
-        mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype);
+        mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype.type);
         ndarray_obj_t *results = NULL;
         uint8_t *rarray = NULL;
         shape_strides _shape_strides = tools_reduce_axes(ndarray, axis);
@@ -198,7 +199,16 @@ static mp_obj_t numerical_sum_mean_std_ndarray(ndarray_obj_t *ndarray, mp_obj_t
             // if there are too many degrees of freedom, there is no point in calculating anything
             return mp_obj_new_float(MICROPY_FLOAT_CONST(0.0));
         }
-        mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype);
+        mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype.type);
+        #if ULAB_HAS_BLOCKS
+        void (*arrfunc)(ndarray_obj_t *, void *, int32_t *, size_t) = NULL;
+        if(ndarray->flags) {
+            arrfunc = ndarray->block->arrfunc;
+        }
+        #endif
+        int32_t increment = _shape_strides.strides[ULAB_MAX_DIMS - 1];
+        uint8_t *barray = array;
+
         mp_float_t M = 0.0, m = 0.0, S = 0.0, s = 0.0;
         size_t count = 0;
 
@@ -215,20 +225,26 @@ static mp_obj_t numerical_sum_mean_std_ndarray(ndarray_obj_t *ndarray, mp_obj_t
                 do {
                 #endif
                     size_t l = 0;
+                    #if ULAB_HAS_BLOCKS
+                    if(ndarray->flags) {
+                        arrfunc(ndarray, array, &increment, _shape_strides.shape[ULAB_MAX_DIMS - 1]);
+                        barray = ndarray->block->subarray;
+                    }
+                    #endif
                     do {
                         count++;
-                        mp_float_t value = func(array);
+                        mp_float_t value = func(barray);
                         m = M + (value - M) / (mp_float_t)count;
                         if(optype == NUMERICAL_STD) {
                             s = S + (value - M) * (value - m);
                             S = s;
                         }
                         M = m;
-                        array += _shape_strides.strides[ULAB_MAX_DIMS - 1];
+                        barray += increment;
                         l++;
                     } while(l < _shape_strides.shape[ULAB_MAX_DIMS - 1]);
                 #if ULAB_MAX_DIMS > 1
-                    array -= _shape_strides.strides[ULAB_MAX_DIMS - 1] * _shape_strides.shape[ULAB_MAX_DIMS - 1];
+//                    array -= _shape_strides.strides[ULAB_MAX_DIMS - 1] * _shape_strides.shape[ULAB_MAX_DIMS - 1];
                     array += _shape_strides.strides[ULAB_MAX_DIMS - 2];
                     k++;
                 } while(k < _shape_strides.shape[ULAB_MAX_DIMS - 2]);
@@ -247,7 +263,7 @@ static mp_obj_t numerical_sum_mean_std_ndarray(ndarray_obj_t *ndarray, mp_obj_t
         #endif
         if(optype == NUMERICAL_SUM) {
             // numpy returns an integer for integer input types
-            if(ndarray->dtype == NDARRAY_FLOAT) {
+            if(ndarray->dtype.type == NDARRAY_FLOAT) {
                 return mp_obj_new_float(M * ndarray->len);
             } else {
                 return mp_obj_new_int((int32_t)(M * ndarray->len));
@@ -262,23 +278,24 @@ static mp_obj_t numerical_sum_mean_std_ndarray(ndarray_obj_t *ndarray, mp_obj_t
         ndarray_obj_t *results = NULL;
         uint8_t *rarray = NULL;
         mp_float_t *farray = NULL;
+
         if(optype == NUMERICAL_SUM) {
-            results = ndarray_new_dense_ndarray(_shape_strides.ndim, _shape_strides.shape, ndarray->dtype);
+            results = ndarray_new_dense_ndarray(_shape_strides.ndim, _shape_strides.shape, ndarray->dtype.type);
             rarray = (uint8_t *)results->array;
             // TODO: numpy promotes the output to the highest integer type
-            if(ndarray->dtype == NDARRAY_UINT8) {
-                RUN_SUM(uint8_t, array, results, rarray, _shape_strides);
-            } else if(ndarray->dtype == NDARRAY_INT8) {
-                RUN_SUM(int8_t, array, results, rarray, _shape_strides);
-            } else if(ndarray->dtype == NDARRAY_UINT16) {
-                RUN_SUM(uint16_t, array, results, rarray, _shape_strides);
-            } else if(ndarray->dtype == NDARRAY_INT16) {
-                RUN_SUM(int16_t, array, results, rarray, _shape_strides);
+            if(ndarray->dtype.type == NDARRAY_UINT8) {
+                RUN_SUM(uint8_t, ndarray, array, results, rarray, _shape_strides);
+            } else if(ndarray->dtype.type == NDARRAY_INT8) {
+                RUN_SUM(int8_t, ndarray, array, results, rarray, _shape_strides);
+            } else if(ndarray->dtype.type == NDARRAY_UINT16) {
+                RUN_SUM(uint16_t, ndarray, array, results, rarray, _shape_strides);
+            } else if(ndarray->dtype.type == NDARRAY_INT16) {
+                RUN_SUM(int16_t, ndarray, array, results, rarray, _shape_strides);
             } else {
                 // for floats, the sum might be inaccurate with the naive summation
                 // call mean, and multiply with the number of samples
                 farray = (mp_float_t *)results->array;
-                RUN_MEAN_STD(mp_float_t, array, farray, _shape_strides, 0.0, 0);
+                RUN_MEAN_STD(mp_float_t, ndarray, array, farray, _shape_strides, 0.0, 0);
                 mp_float_t norm = (mp_float_t)_shape_strides.shape[0];
                 // re-wind the array here
                 farray = (mp_float_t *)results->array;
@@ -295,20 +312,20 @@ static mp_obj_t numerical_sum_mean_std_ndarray(ndarray_obj_t *ndarray, mp_obj_t
                 return MP_OBJ_FROM_PTR(results);
             }
             mp_float_t div = optype == NUMERICAL_STD ? (mp_float_t)(_shape_strides.shape[0] - ddof) : 0.0;
-            if(ndarray->dtype == NDARRAY_UINT8) {
-                RUN_MEAN_STD(uint8_t, array, farray, _shape_strides, div, isStd);
-            } else if(ndarray->dtype == NDARRAY_INT8) {
-                RUN_MEAN_STD(int8_t, array, farray, _shape_strides, div, isStd);
-            } else if(ndarray->dtype == NDARRAY_UINT16) {
-                RUN_MEAN_STD(uint16_t, array, farray, _shape_strides, div, isStd);
-            } else if(ndarray->dtype == NDARRAY_INT16) {
-                RUN_MEAN_STD(int16_t, array, farray, _shape_strides, div, isStd);
+            if(ndarray->dtype.type == NDARRAY_UINT8) {
+                RUN_MEAN_STD(uint8_t, ndarray, array, farray, _shape_strides, div, isStd);
+            } else if(ndarray->dtype.type == NDARRAY_INT8) {
+                RUN_MEAN_STD(int8_t, ndarray, array, farray, _shape_strides, div, isStd);
+            } else if(ndarray->dtype.type == NDARRAY_UINT16) {
+                RUN_MEAN_STD(uint16_t, ndarray, array, farray, _shape_strides, div, isStd);
+            } else if(ndarray->dtype.type == NDARRAY_INT16) {
+                RUN_MEAN_STD(int16_t, ndarray, array, farray, _shape_strides, div, isStd);
             } else {
-                RUN_MEAN_STD(mp_float_t, array, farray, _shape_strides, div, isStd);
+                RUN_MEAN_STD(mp_float_t, ndarray, array, farray, _shape_strides, div, isStd);
             }
         }
         if(results->ndim == 0) { // return a scalar here
-            return mp_binary_get_val_array(results->dtype, results->array, 0);
+            return mp_binary_get_val_array(results->dtype.type, results->array, 0);
         }
         return MP_OBJ_FROM_PTR(results);
     }
@@ -359,7 +376,7 @@ static mp_obj_t numerical_argmin_argmax_ndarray(ndarray_obj_t *ndarray, mp_obj_t
 
     if(axis == mp_const_none) {
         // work with the flattened array
-        mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype);
+        mp_float_t (*func)(void *) = ndarray_get_float_function(ndarray->dtype.type);
         uint8_t *array = (uint8_t *)ndarray->array;
         mp_float_t best_value = func(array);
         mp_float_t value;
@@ -417,7 +434,7 @@ static mp_obj_t numerical_argmin_argmax_ndarray(ndarray_obj_t *ndarray, mp_obj_t
         if((optype == NUMERICAL_ARGMIN) || (optype == NUMERICAL_ARGMAX)) {
             return mp_obj_new_int(best_index);
         } else {
-            if(ndarray->dtype == NDARRAY_FLOAT) {
+            if(ndarray->dtype.type == NDARRAY_FLOAT) {
                 return mp_obj_new_float(best_value);
             } else {
                 return MP_OBJ_NEW_SMALL_INT((int32_t)best_value);
@@ -443,24 +460,24 @@ static mp_obj_t numerical_argmin_argmax_ndarray(ndarray_obj_t *ndarray, mp_obj_t
         if((optype == NUMERICAL_ARGMIN) || (optype == NUMERICAL_ARGMAX)) {
             results = ndarray_new_dense_ndarray(MAX(1, ndarray->ndim-1), shape, NDARRAY_INT16);
         } else {
-            results = ndarray_new_dense_ndarray(MAX(1, ndarray->ndim-1), shape, ndarray->dtype);
+            results = ndarray_new_dense_ndarray(MAX(1, ndarray->ndim-1), shape, ndarray->dtype.type);
         }
 
         uint8_t *rarray = (uint8_t *)results->array;
 
-        if(ndarray->dtype == NDARRAY_UINT8) {
+        if(ndarray->dtype.type == NDARRAY_UINT8) {
             RUN_ARGMIN(ndarray, uint8_t, array, results, rarray, shape, strides, index, optype);
-        } else if(ndarray->dtype == NDARRAY_INT8) {
+        } else if(ndarray->dtype.type == NDARRAY_INT8) {
             RUN_ARGMIN(ndarray, int8_t, array, results, rarray, shape, strides, index, optype);
-        } else if(ndarray->dtype == NDARRAY_UINT16) {
+        } else if(ndarray->dtype.type == NDARRAY_UINT16) {
             RUN_ARGMIN(ndarray, uint16_t, array, results, rarray, shape, strides, index, optype);
-        } else if(ndarray->dtype == NDARRAY_INT16) {
+        } else if(ndarray->dtype.type == NDARRAY_INT16) {
             RUN_ARGMIN(ndarray, int16_t, array, results, rarray, shape, strides, index, optype);
         } else {
             RUN_ARGMIN(ndarray, mp_float_t, array, results, rarray, shape, strides, index, optype);
         }
         if(results->len == 1) {
-            return mp_binary_get_val_array(results->dtype, results->array, 0);
+            return mp_binary_get_val_array(results->dtype.type, results->array, 0);
         }
         return MP_OBJ_FROM_PTR(results);
     }
@@ -561,9 +578,9 @@ static mp_obj_t numerical_sort_helper(mp_obj_t oin, mp_obj_t axis, uint8_t inpla
     int32_t increment = ndarray->strides[ax] / ndarray->itemsize;
 
     uint8_t *array = (uint8_t *)ndarray->array;
-    if((ndarray->dtype == NDARRAY_UINT8) || (ndarray->dtype == NDARRAY_INT8)) {
+    if((ndarray->dtype.type == NDARRAY_UINT8) || (ndarray->dtype.type == NDARRAY_INT8)) {
         HEAPSORT(ndarray, uint8_t, array, shape, strides, ax, increment, ndarray->shape[ax]);
-    } else if((ndarray->dtype == NDARRAY_INT16) || (ndarray->dtype == NDARRAY_INT16)) {
+    } else if((ndarray->dtype.type == NDARRAY_INT16) || (ndarray->dtype.type == NDARRAY_INT16)) {
         HEAPSORT(ndarray, uint16_t, array, shape, strides, ax, increment, ndarray->shape[ax]);
     } else {
         HEAPSORT(ndarray, mp_float_t, array, shape, strides, ax, increment, ndarray->shape[ax]);
@@ -710,9 +727,9 @@ mp_obj_t numerical_argsort(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
     // reset the array
     iarray = indices->array;
 
-    if((ndarray->dtype == NDARRAY_UINT8) || (ndarray->dtype == NDARRAY_INT8)) {
+    if((ndarray->dtype.type == NDARRAY_UINT8) || (ndarray->dtype.type == NDARRAY_INT8)) {
         HEAP_ARGSORT(ndarray, uint8_t, array, shape, strides, ax, increment, ndarray->shape[ax], iarray, istrides, iincrement);
-    } else if((ndarray->dtype == NDARRAY_UINT16) || (ndarray->dtype == NDARRAY_INT16)) {
+    } else if((ndarray->dtype.type == NDARRAY_UINT16) || (ndarray->dtype.type == NDARRAY_INT16)) {
         HEAP_ARGSORT(ndarray, uint16_t, array, shape, strides, ax, increment, ndarray->shape[ax], iarray, istrides, iincrement);
     } else {
         HEAP_ARGSORT(ndarray, mp_float_t, array, shape, strides, ax, increment, ndarray->shape[ax], iarray, istrides, iincrement);
@@ -740,12 +757,12 @@ static mp_obj_t numerical_cross(mp_obj_t _a, mp_obj_t _b) {
     }
 
     mp_float_t *results = m_new(mp_float_t, 3);
-    results[0] = ndarray_get_float_index(a->array, a->dtype, 1) * ndarray_get_float_index(b->array, b->dtype, 2);
-    results[0] -= ndarray_get_float_index(a->array, a->dtype, 2) * ndarray_get_float_index(b->array, b->dtype, 1);
-    results[1] = -ndarray_get_float_index(a->array, a->dtype, 0) * ndarray_get_float_index(b->array, b->dtype, 2);
-    results[1] += ndarray_get_float_index(a->array, a->dtype, 2) * ndarray_get_float_index(b->array, b->dtype, 0);
-    results[2] = ndarray_get_float_index(a->array, a->dtype, 0) * ndarray_get_float_index(b->array, b->dtype, 1);
-    results[2] -= ndarray_get_float_index(a->array, a->dtype, 1) * ndarray_get_float_index(b->array, b->dtype, 0);
+    results[0] = ndarray_get_float_index(a->array, a->dtype.type, 1) * ndarray_get_float_index(b->array, b->dtype.type, 2);
+    results[0] -= ndarray_get_float_index(a->array, a->dtype.type, 2) * ndarray_get_float_index(b->array, b->dtype.type, 1);
+    results[1] = -ndarray_get_float_index(a->array, a->dtype.type, 0) * ndarray_get_float_index(b->array, b->dtype.type, 2);
+    results[1] += ndarray_get_float_index(a->array, a->dtype.type, 2) * ndarray_get_float_index(b->array, b->dtype.type, 0);
+    results[2] = ndarray_get_float_index(a->array, a->dtype.type, 0) * ndarray_get_float_index(b->array, b->dtype.type, 1);
+    results[2] -= ndarray_get_float_index(a->array, a->dtype.type, 1) * ndarray_get_float_index(b->array, b->dtype.type, 0);
 
     /* The upcasting happens here with the rules
 
@@ -762,17 +779,17 @@ static mp_obj_t numerical_cross(mp_obj_t _a, mp_obj_t _b) {
     */
 
     uint8_t dtype = NDARRAY_FLOAT;
-    if(a->dtype == b->dtype) {
-        dtype = a->dtype;
-    } else if(((a->dtype == NDARRAY_UINT8) && (b->dtype == NDARRAY_INT8)) || ((a->dtype == NDARRAY_INT8) && (b->dtype == NDARRAY_UINT8))) {
+    if(a->dtype.type == b->dtype.type) {
+        dtype = a->dtype.type;
+    } else if(((a->dtype.type == NDARRAY_UINT8) && (b->dtype.type == NDARRAY_INT8)) || ((a->dtype.type == NDARRAY_INT8) && (b->dtype.type == NDARRAY_UINT8))) {
         dtype = NDARRAY_INT16;
-    } else if(((a->dtype == NDARRAY_UINT8) && (b->dtype == NDARRAY_INT16)) || ((a->dtype == NDARRAY_INT16) && (b->dtype == NDARRAY_UINT8))) {
+    } else if(((a->dtype.type == NDARRAY_UINT8) && (b->dtype.type == NDARRAY_INT16)) || ((a->dtype.type == NDARRAY_INT16) && (b->dtype.type == NDARRAY_UINT8))) {
         dtype = NDARRAY_INT16;
-    } else if(((a->dtype == NDARRAY_UINT8) && (b->dtype == NDARRAY_UINT16)) || ((a->dtype == NDARRAY_UINT16) && (b->dtype == NDARRAY_UINT8))) {
+    } else if(((a->dtype.type == NDARRAY_UINT8) && (b->dtype.type == NDARRAY_UINT16)) || ((a->dtype.type == NDARRAY_UINT16) && (b->dtype.type == NDARRAY_UINT8))) {
         dtype = NDARRAY_UINT16;
-    } else if(((a->dtype == NDARRAY_INT8) && (b->dtype == NDARRAY_INT16)) || ((a->dtype == NDARRAY_INT16) && (b->dtype == NDARRAY_INT8))) {
+    } else if(((a->dtype.type == NDARRAY_INT8) && (b->dtype.type == NDARRAY_INT16)) || ((a->dtype.type == NDARRAY_INT16) && (b->dtype.type == NDARRAY_INT8))) {
         dtype = NDARRAY_INT16;
-    } else if(((a->dtype == NDARRAY_INT8) && (b->dtype == NDARRAY_UINT16)) || ((a->dtype == NDARRAY_UINT16) && (b->dtype == NDARRAY_INT8))) {
+    } else if(((a->dtype.type == NDARRAY_INT8) && (b->dtype.type == NDARRAY_UINT16)) || ((a->dtype.type == NDARRAY_UINT16) && (b->dtype.type == NDARRAY_INT8))) {
         dtype = NDARRAY_UINT16;
     }
 
@@ -854,7 +871,7 @@ mp_obj_t numerical_diff(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_ar
         }
     }
     uint8_t *array = (uint8_t *)ndarray->array;
-    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndarray->ndim, shape, ndarray->dtype);
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndarray->ndim, shape, ndarray->dtype.type);
     uint8_t *rarray = (uint8_t *)results->array;
 
     memset(shape, 0, sizeof(size_t)*ULAB_MAX_DIMS);
@@ -862,13 +879,13 @@ mp_obj_t numerical_diff(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_ar
     memset(strides, 0, sizeof(int32_t)*ULAB_MAX_DIMS);
     numerical_reduce_axes(ndarray, ax, shape, strides);
 
-    if(ndarray->dtype == NDARRAY_UINT8) {
+    if(ndarray->dtype.type == NDARRAY_UINT8) {
         RUN_DIFF(ndarray, uint8_t, array, results, rarray, shape, strides, index, stencil, N);
-    } else if(ndarray->dtype == NDARRAY_INT8) {
+    } else if(ndarray->dtype.type == NDARRAY_INT8) {
         RUN_DIFF(ndarray, int8_t, array, results, rarray, shape, strides, index, stencil, N);
-    }  else if(ndarray->dtype == NDARRAY_UINT16) {
+    }  else if(ndarray->dtype.type == NDARRAY_UINT16) {
         RUN_DIFF(ndarray, uint16_t, array, results, rarray, shape, strides, index, stencil, N);
-    } else if(ndarray->dtype == NDARRAY_INT16) {
+    } else if(ndarray->dtype.type == NDARRAY_INT16) {
         RUN_DIFF(ndarray, int16_t, array, results, rarray, shape, strides, index, stencil, N);
     } else {
         RUN_DIFF(ndarray, mp_float_t, array, results, rarray, shape, strides, index, stencil, N);
@@ -905,7 +922,7 @@ mp_obj_t numerical_flip(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_ar
     ndarray_obj_t *results = NULL;
     ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(args[0].u_obj);
     if(args[1].u_obj == mp_const_none) { // flip the flattened array
-        results = ndarray_new_linear_array(ndarray->len, ndarray->dtype);
+        results = ndarray_new_linear_array(ndarray->len, ndarray->dtype.type);
         ndarray_copy_array(ndarray, results);
         uint8_t *rarray = (uint8_t *)results->array;
         rarray += (results->len - 1) * results->itemsize;
@@ -981,10 +998,10 @@ mp_obj_t numerical_median(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_
         uint8_t *array = (uint8_t *)ndarray->array;
         size_t len = ndarray->len;
         array += (len >> 1) * ndarray->itemsize;
-        mp_float_t median = ndarray_get_float_value(array, ndarray->dtype);
+        mp_float_t median = ndarray_get_float_value(array, ndarray->dtype.type);
         if(!(len & 0x01)) { // len is an even number
             array -= ndarray->itemsize;
-            median += ndarray_get_float_value(array, ndarray->dtype);
+            median += ndarray_get_float_value(array, ndarray->dtype.type);
             median *= MICROPY_FLOAT_CONST(0.5);
         }
         return mp_obj_new_float(median);
@@ -1017,10 +1034,10 @@ mp_obj_t numerical_median(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_
                 size_t k = 0;
                 do {
                     array += ndarray->strides[ax] * (len >> 1);
-                    mp_float_t median = ndarray_get_float_value(array, ndarray->dtype);
+                    mp_float_t median = ndarray_get_float_value(array, ndarray->dtype.type);
                     if(!(len & 0x01)) { // len is an even number
                         array -= ndarray->strides[ax];
-                        median += ndarray_get_float_value(array, ndarray->dtype);
+                        median += ndarray_get_float_value(array, ndarray->dtype.type);
                         median *= MICROPY_FLOAT_CONST(0.5);
                         array += ndarray->strides[ax];
                     }
@@ -1087,7 +1104,7 @@ mp_obj_t numerical_roll(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_ar
     }
     ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(args[0].u_obj);
     uint8_t *array = ndarray->array;
-    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndarray->ndim, ndarray->shape, ndarray->dtype);
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndarray->ndim, ndarray->shape, ndarray->dtype.type);
 
     int32_t shift = mp_obj_get_int(args[1].u_obj);
     int32_t _shift = shift < 0 ? -shift : shift;
diff --git a/code/numpy/numerical/numerical.h b/code/numpy/numerical/numerical.h
index ef7b95d7..295e68f7 100644
--- a/code/numpy/numerical/numerical.h
+++ b/code/numpy/numerical/numerical.h
@@ -46,7 +46,8 @@
     (rarray) += (results)->itemsize;\
 })
 
-#define RUN_SUM1(type, array, results, rarray, ss)\
+#if !(ULAB_HAS_BLOCKS)
+#define RUN_SUM1(type, ndarray, array, results, rarray, ss)\
 ({\
     type sum = 0;\
     for(size_t i=0; i < (ss).shape[0]; i++) {\
@@ -56,50 +57,150 @@
     memcpy((rarray), &sum, (results)->itemsize);\
     (rarray) += (results)->itemsize;\
 })
-
-// The mean could be calculated by simply dividing the sum by
-// the number of elements, but that method is numerically unstable
-#define RUN_MEAN1(type, array, rarray, ss)\
+#else
+#define RUN_SUM1(type, ndarray, array, results, rarray, ss)\
 ({\
-    mp_float_t M = 0.0;\
+    type sum = 0;\
+    uint8_t *barray = (array);\
+    int32_t increment = (ss).strides[0];\
+    if((ndarray)->flags) {\
+        void (*arrfunc)(ndarray_obj_t *, void *, int32_t *, size_t) = (ndarray)->block->arrfunc;\
+        arrfunc((ndarray), (array), &increment, (ss).shape[0]);\
+        barray = (ndarray)->block->subarray;\
+    }\
     for(size_t i=0; i < (ss).shape[0]; i++) {\
-        mp_float_t value = (mp_float_t)(*(type *)(array));\
-        M = M + (value - M) / (mp_float_t)(i+1);\
-        (array) += (ss).strides[0];\
+        sum += *((type *)(barray));\
+        barray += increment;\
     }\
-    *(rarray)++ = M;\
+    (array) += (ss).shape[0] * (ss).strides[0];\
+    memcpy((rarray), &sum, (results)->itemsize);\
+    (rarray) += (results)->itemsize;\
 })
+#endif
+
+#define RUN_SUM2(type, ndarray, array, results, rarray, ss) do {\
+    size_t l = 0;\
+    do {\
+        RUN_SUM1(type, (ndarray), (array), (results), (rarray), (ss));\
+        (array) -= (ss).strides[0] * (ss).shape[0];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+} while(0)
+
+#define RUN_SUM3(type, ndarray, array, results, rarray, ss) do {\
+    size_t k = 0;\
+    do {\
+        RUN_SUM2(type, (ndarray), (array), (results), (rarray), (ss));\
+        (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
+} while(0)
+
+#define RUN_SUM4(type, ndarray, array, results, rarray, ss) do {\
+    size_t j = 0;\
+    do {\
+        RUN_SUM3(type, (ndarray), (array), (results), (rarray), (ss));\
+        (array) -= (ss).strides[ULAB_MAX_DIMS - 2] * (ss).shape[ULAB_MAX_DIMS - 2];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\
+} while(0)
 
 // Instead of the straightforward implementation of the definition,
 // we take the numerically stable Welford algorithm here
 // https://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/
-#define RUN_STD1(type, array, rarray, ss, div)\
+
+#if !(ULAB_HAS_BLOCKS)
+#define RUN_MEAN_STD1(type, ndarray, array, rarray, ss, div, isStd)\
 ({\
     mp_float_t M = 0.0, m = 0.0, S = 0.0;\
     for(size_t i=0; i < (ss).shape[0]; i++) {\
-        mp_float_t value = (mp_float_t)(*(type *)(array));\
+        mp_float_t value = (mp_float_t)(*(type *)((array)));\
         m = M + (value - M) / (mp_float_t)(i+1);\
-        S = S + (value - M) * (value - m);\
+        if(isStd) {\
+            S += (value - M) * (value - m);\
+        }\
         M = m;\
         (array) += (ss).strides[0];\
     }\
-    *(rarray)++ = MICROPY_FLOAT_C_FUN(sqrt)(S / (div));\
+    *(rarray)++ = isStd ? MICROPY_FLOAT_C_FUN(sqrt)(S / (div)) : M;\
 })
-
-#define RUN_MEAN_STD1(type, array, rarray, ss, div, isStd)\
+#else
+#define RUN_MEAN_STD1(type, ndarray, array, rarray, ss, div, isStd)\
 ({\
     mp_float_t M = 0.0, m = 0.0, S = 0.0;\
+    uint8_t *barray = (array);\
+    int32_t increment = (ss).strides[0];\
+    if((ndarray)->flags) {\
+        void (*arrfunc)(ndarray_obj_t *, void *, int32_t *, size_t) = (ndarray)->block->arrfunc;\
+        arrfunc((ndarray), (array), &increment, (ss).shape[0]);\
+        barray = (ndarray)->block->subarray;\
+    }\
     for(size_t i=0; i < (ss).shape[0]; i++) {\
-        mp_float_t value = (mp_float_t)(*(type *)(array));\
+        mp_float_t value = (mp_float_t)(*(type *)(barray));\
         m = M + (value - M) / (mp_float_t)(i+1);\
         if(isStd) {\
             S += (value - M) * (value - m);\
         }\
         M = m;\
-        (array) += (ss).strides[0];\
+        barray += increment;\
     }\
+    (array) += (ss).shape[0] * (ss).strides[0];\
     *(rarray)++ = isStd ? MICROPY_FLOAT_C_FUN(sqrt)(S / (div)) : M;\
 })
+#endif
+
+#define RUN_MEAN_STD2(type, ndarray, array, rarray, ss, div, isStd) do {\
+    size_t l = 0;\
+    do {\
+        RUN_MEAN_STD1(type, (ndarray), (array), (rarray), (ss), (div), (isStd));\
+        (array) -= (ss).strides[0] * (ss).shape[0];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
+} while(0)
+
+#define RUN_MEAN_STD3(type, ndarray, array, rarray, ss, div, isStd) do {\
+    size_t k = 0;\
+    do {\
+        RUN_MEAN_STD2(type, (ndarray), (array), (rarray), (ss), (div), (isStd));\
+        (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
+        k++;\
+    } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
+} while(0)
+
+#define RUN_MEAN_STD4(type, ndarray, array, rarray, ss, div, isStd) do {\
+    size_t j = 0;\
+    do {\
+        RUN_MEAN_STD3(type, (ndarray), (array), (rarray), (ss), (div), (isStd));\
+        (array) -= (ss).strides[ULAB_MAX_DIMS - 2] * (ss).shape[ULAB_MAX_DIMS - 2];\
+        (array) += (ss).strides[ULAB_MAX_DIMS - 3];\
+        j++;\
+    } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\
+} while(0)
+
+#if ULAB_MAX_DIMS == 1
+#define RUN_SUM RUN_SUM1
+#define RUN_MEAN_STD RUN_MEAN_STD1
+#endif
+
+#if ULAB_MAX_DIMS == 2
+#define RUN_SUM RUN_SUM2
+#define RUN_MEAN_STD RUN_MEAN_STD2
+#endif
+
+#if ULAB_MAX_DIMS == 3
+#define RUN_SUM RUN_SUM3
+#define RUN_MEAN_STD RUN_MEAN_STD3
+#endif
+
+#if ULAB_MAX_DIMS == 4
+#define RUN_SUM RUN_SUM4
+#define RUN_MEAN_STD RUN_MEAN_STD4
+#endif
 
 #define RUN_DIFF1(ndarray, type, array, results, rarray, index, stencil, N)\
 ({\
@@ -188,22 +289,6 @@
 })
 
 #if ULAB_MAX_DIMS == 1
-#define RUN_SUM(type, array, results, rarray, ss) do {\
-    RUN_SUM1(type, (array), (results), (rarray), (ss));\
-} while(0)
-
-#define RUN_MEAN(type, array, rarray, ss) do {\
-    RUN_MEAN1(type, (array), (rarray), (ss));\
-} while(0)
-
-#define RUN_STD(type, array, rarray, ss, div) do {\
-    RUN_STD1(type, (array), (results), (rarray), (ss), (div));\
-} while(0)
-
-#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\
-    RUN_MEAN_STD1(type, (array), (results), (rarray), (ss), (div), (isStd));\
-} while(0)
-
 #define RUN_ARGMIN(ndarray, type, array, results, rarray, shape, strides, index, op) do {\
     RUN_ARGMIN1((ndarray), type, (array), (results), (rarray), (index), (op));\
 } while(0)
@@ -223,46 +308,6 @@
 #endif
 
 #if ULAB_MAX_DIMS == 2
-#define RUN_SUM(type, array, results, rarray, ss) do {\
-    size_t l = 0;\
-    do {\
-        RUN_SUM1(type, (array), (results), (rarray), (ss));\
-        (array) -= (ss).strides[0] * (ss).shape[0];\
-        (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
-        l++;\
-    } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
-} while(0)
-
-#define RUN_MEAN(type, array, rarray, ss) do {\
-    size_t l = 0;\
-    do {\
-        RUN_MEAN1(type, (array), (rarray), (ss));\
-        (array) -= (ss).strides[0] * (ss).shape[0];\
-        (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
-        l++;\
-    } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
-} while(0)
-
-#define RUN_STD(type, array, rarray, ss, div) do {\
-    size_t l = 0;\
-    do {\
-        RUN_STD1(type, (array), (rarray), (ss), (div));\
-        (array) -= (ss).strides[0] * (ss).shape[0];\
-        (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
-        l++;\
-    } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
-} while(0)
-
-#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\
-    size_t l = 0;\
-    do {\
-        RUN_MEAN_STD1(type, (array), (rarray), (ss), (div), (isStd));\
-        (array) -= (ss).strides[0] * (ss).shape[0];\
-        (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
-        l++;\
-    } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
-} while(0)
-
 
 #define RUN_ARGMIN(ndarray, type, array, results, rarray, shape, strides, index, op) do {\
     size_t l = 0;\
@@ -308,69 +353,7 @@
 #endif
 
 #if ULAB_MAX_DIMS == 3
-#define RUN_SUM(type, array, results, rarray, ss) do {\
-    size_t k = 0;\
-    do {\
-        size_t l = 0;\
-        do {\
-            RUN_SUM1(type, (array), (results), (rarray), (ss));\
-            (array) -= (ss).strides[0] * (ss).shape[0];\
-            (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
-            l++;\
-        } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
-        (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
-        (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
-        k++;\
-    } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
-} while(0)
 
-#define RUN_MEAN(type, array, rarray, ss) do {\
-    size_t k = 0;\
-    do {\
-        size_t l = 0;\
-        do {\
-            RUN_MEAN1(type, (array), (rarray), (ss));\
-            (array) -= (ss).strides[0] * (ss).shape[0];\
-            (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
-            l++;\
-        } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
-        (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
-        (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
-        k++;\
-    } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
-} while(0)
-
-#define RUN_STD(type, array, rarray, ss, div) do {\
-    size_t k = 0;\
-    do {\
-        size_t l = 0;\
-        do {\
-            RUN_STD1(type, (array), (rarray), (ss), (div));\
-            (array) -= (ss).strides[0] * (ss).shape[0];\
-            (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
-            l++;\
-        } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
-        (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
-        (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
-        k++;\
-    } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
-} while(0)
-
-#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\
-    size_t k = 0;\
-    do {\
-        size_t l = 0;\
-        do {\
-            RUN_MEAN_STD1(type, (array), (rarray), (ss), (div), (isStd));\
-            (array) -= (ss).strides[0] * (ss).shape[0];\
-            (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
-            l++;\
-        } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
-        (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
-        (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
-        k++;\
-    } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
-} while(0)
 
 #define RUN_ARGMIN(ndarray, type, array, results, rarray, shape, strides, index, op) do {\
     size_t k = 0;\
@@ -444,93 +427,6 @@
 #endif
 
 #if ULAB_MAX_DIMS == 4
-#define RUN_SUM(type, array, results, rarray, shape, strides, index) do {\
-    size_t j = 0;\
-    do {\
-        size_t k = 0;\
-        do {\
-            size_t l = 0;\
-            do {\
-                RUN_SUM1(type, (array), (results), (rarray), (ss));\
-                (array) -= (ss).strides[0] * (ss).shape[0];\
-                (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
-                l++;\
-            } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
-            (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
-            (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
-            k++;\
-        } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
-        (array) -= (ss).strides[ULAB_MAX_DIMS - 2] * (ss).shape[ULAB_MAX_DIMS - 2];\
-        (array) += (ss).strides[ULAB_MAX_DIMS - 3];\
-        j++;\
-    } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\
-} while(0)
-
-#define RUN_MEAN(type, array, rarray, ss) do {\
-    size_t j = 0;\
-    do {\
-        size_t k = 0;\
-        do {\
-            size_t l = 0;\
-            do {\
-                RUN_MEAN1(type, (array), (rarray), (ss));\
-                (array) -= (ss).strides[0] * (ss).shape[0];\
-                (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
-                l++;\
-            } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
-            (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
-            (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
-            k++;\
-        } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
-        (array) -= (ss).strides[ULAB_MAX_DIMS - 2] * (ss).shape[ULAB_MAX_DIMS - 2];\
-        (array) += (ss).strides[ULAB_MAX_DIMS - 3];\
-        j++;\
-    } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\
-} while(0)
-
-#define RUN_STD(type, array, rarray, ss, div) do {\
-    size_t j = 0;\
-    do {\
-        size_t k = 0;\
-        do {\
-            size_t l = 0;\
-            do {\
-                RUN_STD1(type, (array), (rarray), (ss), (div));\
-                (array) -= (ss).strides[0] * (ss).shape[0];\
-                (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
-                l++;\
-            } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
-            (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
-            (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
-            k++;\
-        } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
-        (array) -= (ss).strides[ULAB_MAX_DIMS - 2] * (ss).shape[ULAB_MAX_DIMS - 2];\
-        (array) += (ss).strides[ULAB_MAX_DIMS - 3];\
-        j++;\
-    } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\
-} while(0)
-
-#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\
-    size_t j = 0;\
-    do {\
-        size_t k = 0;\
-        do {\
-            size_t l = 0;\
-            do {\
-                RUN_MEAN_STD1(type, (array), (rarray), (ss), (div), (isStd));\
-                (array) -= (ss).strides[0] * (ss).shape[0];\
-                (array) += (ss).strides[ULAB_MAX_DIMS - 1];\
-                l++;\
-            } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\
-            (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS - 1];\
-            (array) += (ss).strides[ULAB_MAX_DIMS - 2];\
-            k++;\
-        } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\
-        (array) -= (ss).strides[ULAB_MAX_DIMS - 2] * (ss).shape[ULAB_MAX_DIMS - 2];\
-        (array) += (ss).strides[ULAB_MAX_DIMS - 3];\
-        j++;\
-    } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\
-} while(0)
 
 #define RUN_ARGMIN(ndarray, type, array, results, rarray, shape, strides, index, op) do {\
     size_t j = 0;\
diff --git a/code/numpy/poly/poly.c b/code/numpy/poly/poly.c
index 6c1ed816..cb7a1989 100644
--- a/code/numpy/poly/poly.c
+++ b/code/numpy/poly/poly.c
@@ -162,7 +162,7 @@ mp_obj_t poly_polyval(mp_obj_t o_p, mp_obj_t o_x) {
         ndarray = ndarray_new_dense_ndarray(source->ndim, source->shape, NDARRAY_FLOAT);
         mp_float_t *array = (mp_float_t *)ndarray->array;
         
-        mp_float_t (*func)(void *) = ndarray_get_float_function(source->dtype);
+        mp_float_t (*func)(void *) = ndarray_get_float_function(source->dtype.type);
 
         // TODO: these loops are really nothing, but the re-impplementation of 
         // ITERATE_VECTOR from vectorise.c. We could pass a function pointer here
diff --git a/code/numpy/stats/stats.c b/code/numpy/stats/stats.c
index 8022ebe3..1d136562 100644
--- a/code/numpy/stats/stats.c
+++ b/code/numpy/stats/stats.c
@@ -39,9 +39,9 @@ static mp_obj_t stats_trace(mp_obj_t oin) {
     mp_float_t trace = 0.0;
     for(size_t i=0; i < ndarray->shape[ULAB_MAX_DIMS - 1]; i++) {
         int32_t pos = i * (ndarray->strides[ULAB_MAX_DIMS - 1] + ndarray->strides[ULAB_MAX_DIMS - 2]);
-        trace += ndarray_get_float_index(ndarray->array, ndarray->dtype, pos/ndarray->itemsize);
+        trace += ndarray_get_float_index(ndarray->array, ndarray->dtype.type, pos/ndarray->itemsize);
     }
-    if(ndarray->dtype == NDARRAY_FLOAT) {
+    if(ndarray->dtype.type == NDARRAY_FLOAT) {
         return mp_obj_new_float(trace);
     }
     return mp_obj_new_int_from_float(trace);
diff --git a/code/numpy/transform/transform.c b/code/numpy/transform/transform.c
index 610b6173..1d68bfa3 100644
--- a/code/numpy/transform/transform.c
+++ b/code/numpy/transform/transform.c
@@ -42,8 +42,8 @@ mp_obj_t transform_dot(mp_obj_t _m1, mp_obj_t _m2) {
     uint8_t *array1 = (uint8_t *)m1->array;
     uint8_t *array2 = (uint8_t *)m2->array;
 
-    mp_float_t (*func1)(void *) = ndarray_get_float_function(m1->dtype);
-    mp_float_t (*func2)(void *) = ndarray_get_float_function(m2->dtype);
+    mp_float_t (*func1)(void *) = ndarray_get_float_function(m1->dtype.type);
+    mp_float_t (*func2)(void *) = ndarray_get_float_function(m2->dtype.type);
 
     #if ULAB_MAX_DIMS > 1
     if ((m1->ndim == 1) && (m2->ndim == 1)) {
diff --git a/code/numpy/vector/vector.c b/code/numpy/vector/vector.c
index a92edcca..48ddadde 100644
--- a/code/numpy/vector/vector.c
+++ b/code/numpy/vector/vector.c
@@ -46,7 +46,7 @@ static mp_obj_t vectorise_generic_vector(mp_obj_t o_in, mp_float_t (*f)(mp_float
         
         #if ULAB_VECTORISE_USES_FUN_POINTER
         
-            mp_float_t (*func)(void *) = ndarray_get_float_function(source->dtype);
+            mp_float_t (*func)(void *) = ndarray_get_float_function(source->dtype.type);
             
             #if ULAB_MAX_DIMS > 3
             size_t i = 0;
@@ -86,13 +86,13 @@ static mp_obj_t vectorise_generic_vector(mp_obj_t o_in, mp_float_t (*f)(mp_float
             } while(i < source->shape[ULAB_MAX_DIMS - 4]);
             #endif /* ULAB_MAX_DIMS > 3 */
         #else
-        if(source->dtype == NDARRAY_UINT8) {
+        if(source->dtype.type == NDARRAY_UINT8) {
             ITERATE_VECTOR(uint8_t, array, source, sarray);
-        } else if(source->dtype == NDARRAY_INT8) {
+        } else if(source->dtype.type == NDARRAY_INT8) {
             ITERATE_VECTOR(int8_t, array, source, sarray);
-        } else if(source->dtype == NDARRAY_UINT16) {
+        } else if(source->dtype.type == NDARRAY_UINT16) {
             ITERATE_VECTOR(uint16_t, array, source, sarray);
-        } else if(source->dtype == NDARRAY_INT16) {
+        } else if(source->dtype.type == NDARRAY_INT16) {
             ITERATE_VECTOR(int16_t, array, source, sarray);
         } else {
             ITERATE_VECTOR(mp_float_t, array, source, sarray);
@@ -183,7 +183,7 @@ mp_obj_t vectorise_around(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_
     mp_float_t *narray = (mp_float_t *)ndarray->array;
     uint8_t *sarray = (uint8_t *)source->array;
 
-    mp_float_t (*func)(void *) = ndarray_get_float_function(source->dtype);
+    mp_float_t (*func)(void *) = ndarray_get_float_function(source->dtype.type);
 
     #if ULAB_MAX_DIMS > 3
     size_t i = 0;
@@ -267,8 +267,8 @@ mp_obj_t vectorise_arctan2(mp_obj_t y, mp_obj_t x) {
     ndarray_obj_t *results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
     mp_float_t *rarray = (mp_float_t *)results->array;
 
-    mp_float_t (*funcx)(void *) = ndarray_get_float_function(ndarray_x->dtype);
-    mp_float_t (*funcy)(void *) = ndarray_get_float_function(ndarray_y->dtype);
+    mp_float_t (*funcx)(void *) = ndarray_get_float_function(ndarray_x->dtype.type);
+    mp_float_t (*funcy)(void *) = ndarray_get_float_function(ndarray_y->dtype.type);
 
     #if ULAB_MAX_DIMS > 3
     size_t i = 0;
@@ -556,7 +556,7 @@ static mp_obj_t vectorise_vectorized_function_call(mp_obj_t self_in, size_t n_ar
         ndarray_obj_t *source = MP_OBJ_TO_PTR(args[0]);
         ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(source->ndim, source->shape, self->otypes);
         for(size_t i=0; i < source->len; i++) {
-            avalue[0] = mp_binary_get_val_array(source->dtype, source->array, i);
+            avalue[0] = mp_binary_get_val_array(source->dtype.type, source->array, i);
             fvalue = self->type->call(self->fun, 1, 0, avalue);
             mp_binary_set_val_array(self->otypes, ndarray->array, i, fvalue);
         }
diff --git a/code/scipy/signal/signal.c b/code/scipy/signal/signal.c
index 09e92d79..eda26351 100644
--- a/code/scipy/signal/signal.c
+++ b/code/scipy/signal/signal.c
@@ -79,7 +79,7 @@ mp_obj_t signal_sosfilt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_ar
         #endif
         uint8_t *iarray = (uint8_t *)inarray->array;
         for(size_t i=0; i < lenx; i++) {
-            *yarray++ = ndarray_get_float_value(iarray, inarray->dtype);
+            *yarray++ = ndarray_get_float_value(iarray, inarray->dtype.type);
             iarray += inarray->strides[ULAB_MAX_DIMS - 1];
         }
         yarray -= lenx;
@@ -103,7 +103,7 @@ mp_obj_t signal_sosfilt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_ar
             if((zi->shape[ULAB_MAX_DIMS - 1] != lensos) || (zi->shape[ULAB_MAX_DIMS - 1] != 2)) {
                 mp_raise_ValueError(translate("zi must be of shape (n_section, 2)"));
             }
-            if(zi->dtype != NDARRAY_FLOAT) {
+            if(zi->dtype.type != NDARRAY_FLOAT) {
                 mp_raise_ValueError(translate("zi must be of float type"));
             }
             // TODO: this won't work with sparse arrays
diff --git a/code/ulab.c b/code/ulab.c
index f29283b4..64aa113f 100644
--- a/code/ulab.c
+++ b/code/ulab.c
@@ -26,6 +26,7 @@
 
 #include "numpy/numpy.h"
 #include "scipy/scipy.h"
+#include "blocks/blocks.h"
 #include "numpy/fft/fft.h"
 #include "numpy/linalg/linalg.h"
 // TODO: we should get rid of this; array.sort depends on it
@@ -128,10 +129,13 @@ STATIC const mp_map_elem_t ulab_globals_table[] = {
         { MP_OBJ_NEW_QSTR(MP_QSTR_dtype), (mp_obj_t)&ndarray_dtype_obj },
         #endif /* NDARRAY_HAS_DTYPE */
     #endif /* ULAB_HAS_DTYPE_OBJECT */
-        { MP_ROM_QSTR(MP_QSTR_numpy), MP_ROM_PTR(&ulab_numpy_module) },
+    { MP_ROM_QSTR(MP_QSTR_numpy), MP_ROM_PTR(&ulab_numpy_module) },
     #if ULAB_HAS_SCIPY
         { MP_ROM_QSTR(MP_QSTR_scipy), MP_ROM_PTR(&ulab_scipy_module) },
     #endif
+    #if ULAB_HAS_BLOCKS
+        { MP_ROM_QSTR(MP_QSTR_blocks), MP_ROM_PTR(&ulab_blocks_module) },
+    #endif
     #if ULAB_HAS_USER_MODULE
         { MP_ROM_QSTR(MP_QSTR_user), MP_ROM_PTR(&ulab_user_module) },
     #endif
diff --git a/code/ulab.h b/code/ulab.h
index 92dcca78..ae2c1b0a 100644
--- a/code/ulab.h
+++ b/code/ulab.h
@@ -38,17 +38,28 @@
 #define ULAB_HAS_SCIPY                      (1)
 #endif
 
+// Determines, whether the blocks module is defined in ulab. When ULAB_HAS_BLOCKS is
+// defined 1, a function pointer can be attached to the dtype object
+// Such function pointers are useful for custom data types
+#ifndef ULAB_HAS_BLOCKS
+#define ULAB_HAS_BLOCKS                      (1)
+#endif
+
 // The maximum number of dimensions the firmware should be able to support
 // Possible values lie between 1, and 4, inclusive
-#define ULAB_MAX_DIMS                       2
+#ifndef ULAB_MAX_DIMS
+#define ULAB_MAX_DIMS                       (2)
+#endif
 
 // By setting this constant to 1, iteration over array dimensions will be implemented
 // as a function (ndarray_rewind_array), instead of writing out the loops in macros
 // This reduces firmware size at the expense of speed
+#ifndef ULAB_HAS_FUNCTION_ITERATOR
 #define ULAB_HAS_FUNCTION_ITERATOR          (0)
+#endif
 
 // If NDARRAY_IS_ITERABLE is 1, the ndarray object defines its own iterator function
-// This option saves approx. 250 bytes of flash space
+// Setting this option to 0 saves approx. 250 bytes of flash space
 #ifndef NDARRAY_IS_ITERABLE
 #define NDARRAY_IS_ITERABLE                 (1)
 #endif
@@ -59,12 +70,12 @@
 #endif
 
 // The default threshold for pretty printing. These variables can be overwritten
-// at run-time via the set_printoptions() function
+// at run-time via the set_printoptions() function, if ULAB_HAS_PRINTOPTIONS is 1
+#define NDARRAY_PRINT_THRESHOLD             (10)
+#define NDARRAY_PRINT_EDGEITEMS             (3)
 #ifndef ULAB_HAS_PRINTOPTIONS
 #define ULAB_HAS_PRINTOPTIONS               (1)
 #endif
-#define NDARRAY_PRINT_THRESHOLD             10
-#define NDARRAY_PRINT_EDGEITEMS             3
 
 // determines, whether the dtype is an object, or simply a character
 // the object implementation is numpythonic, but requires more space
@@ -72,13 +83,15 @@
 #define ULAB_HAS_DTYPE_OBJECT               (0)
 #endif
 
-// the ndarray binary operators
+// The ndarray binary operators
+// If, e.g., only FFTs are required, massive savings of
+// flash space can be had by disabling the binary operators
 #ifndef NDARRAY_HAS_BINARY_OPS
 #define NDARRAY_HAS_BINARY_OPS              (1)
 #endif
 
 // Firmware size can be reduced at the expense of speed by using function
-// pointers in iterations. For each operator, he function pointer saves around
+// pointers in iterations. For each operator, the function pointer saves around
 // 2 kB in the two-dimensional case, and around 4 kB in the four-dimensional case.
 
 #ifndef NDARRAY_BINARY_USES_FUN_POINTER
@@ -179,7 +192,7 @@
 #endif
 
 
-// determines, which ndarray methods are available
+// Constants in the following section determine, which ndarray methods are available
 #ifndef NDARRAY_HAS_BYTESWAP
 #define NDARRAY_HAS_BYTESWAP            (1)
 #endif
@@ -612,7 +625,7 @@
 // user-defined module; source of the module and
 // its sub-modules should be placed in code/user/
 #ifndef ULAB_HAS_USER_MODULE
-#define ULAB_HAS_USER_MODULE                (0)
+#define ULAB_HAS_USER_MODULE                (1)
 #endif
 
 #endif
diff --git a/code/ulab_create.c b/code/ulab_create.c
index c3d4c2db..71161024 100644
--- a/code/ulab_create.c
+++ b/code/ulab_create.c
@@ -187,7 +187,7 @@ mp_obj_t create_concatenate(size_t n_args, const mp_obj_t *pos_args, mp_map_t *k
 
     // first check, whether the arrays are compatible
     ndarray_obj_t *_ndarray = MP_OBJ_TO_PTR(ndarrays->items[0]);
-    uint8_t dtype = _ndarray->dtype;
+    uint8_t dtype = _ndarray->dtype.type;
     uint8_t ndim = _ndarray->ndim;
     if(axis < 0) {
         axis += ndim;
@@ -204,7 +204,7 @@ mp_obj_t create_concatenate(size_t n_args, const mp_obj_t *pos_args, mp_map_t *k
     for(uint8_t i=1; i < ndarrays->len; i++) {
         _ndarray = MP_OBJ_TO_PTR(ndarrays->items[i]);
         // check, whether the arrays are compatible
-        if((dtype != _ndarray->dtype) || (ndim != _ndarray->ndim)) {
+        if((dtype != _ndarray->dtype.type) || (ndim != _ndarray->ndim)) {
             mp_raise_ValueError(translate("input arrays are not compatible"));
         }
         for(uint8_t j=0; j < ULAB_MAX_DIMS; j++) {
@@ -306,7 +306,7 @@ mp_obj_t create_diag(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args)
     }
     ndarray_obj_t *source = MP_OBJ_TO_PTR(args[0].u_obj);
     if(source->ndim == 1) { // return a rank-2 tensor with the prescribed diagonal
-        ndarray_obj_t *target = ndarray_new_dense_ndarray(2, ndarray_shape_vector(0, 0, source->len, source->len), source->dtype);
+        ndarray_obj_t *target = ndarray_new_dense_ndarray(2, ndarray_shape_vector(0, 0, source->len, source->len), source->dtype.type);
         uint8_t *sarray = (uint8_t *)source->array;
         uint8_t *tarray = (uint8_t *)target->array;
         for(size_t i=0; i < source->len; i++) {
@@ -338,7 +338,7 @@ mp_obj_t create_diag(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args)
         mp_raise_ValueError(translate("offset is too large"));
     }
 
-    ndarray_obj_t *target = ndarray_new_linear_array(len, source->dtype);
+    ndarray_obj_t *target = ndarray_new_linear_array(len, source->dtype.type);
     uint8_t *tarray = (uint8_t *)target->array;
 
     for(size_t i=0; i < len; i++) {
@@ -382,8 +382,8 @@ mp_obj_t create_eye(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args)
     }
     ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(2, ndarray_shape_vector(0, 0, n, m), dtype);
     if(dtype == NDARRAY_BOOL) {
-       dtype = NDARRAY_UINT8;
-   }
+        dtype = NDARRAY_UINT8;
+    }
     mp_obj_t one = mp_obj_new_int(1);
     size_t i = 0;
     if((args[2].u_int >= 0)) {
@@ -682,7 +682,7 @@ mp_obj_t create_frombuffer(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
         }
         ndarray_obj_t *ndarray = m_new_obj(ndarray_obj_t);
         ndarray->base.type = &ulab_ndarray_type;
-        ndarray->dtype = dtype == NDARRAY_BOOL ? NDARRAY_UINT8 : dtype;
+        ndarray->dtype.type = dtype == NDARRAY_BOOL ? NDARRAY_UINT8 : dtype;
         ndarray->boolean = dtype == NDARRAY_BOOL ? NDARRAY_BOOLEAN : NDARRAY_NUMERIC;
         ndarray->ndim = 1;
         ndarray->len = len;
diff --git a/code/ulab_tools.c b/code/ulab_tools.c
index 9663d3d5..b3a1499f 100644
--- a/code/ulab_tools.c
+++ b/code/ulab_tools.c
@@ -10,6 +10,7 @@
 
 
 #include <string.h>
+#include <math.h>
 #include "py/runtime.h"
 
 #include "ulab.h"
diff --git a/code/user/user.c b/code/user/user.c
index fa5e660e..6b6136ee 100644
--- a/code/user/user.c
+++ b/code/user/user.c
@@ -15,6 +15,9 @@
 #include "py/obj.h"
 #include "py/runtime.h"
 #include "py/misc.h"
+
+#include "../ulab_tools.h"
+#include "../blocks/blocks.h"
 #include "user.h"
 
 #if ULAB_HAS_USER_MODULE
@@ -39,29 +42,29 @@ static mp_obj_t user_square(mp_obj_t arg) {
 
     // if the input is a dense array, create `results` with the same number of
     // dimensions, shape, and dtype
-    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndarray->ndim, ndarray->shape, ndarray->dtype);
+    ndarray_obj_t *results = ndarray_new_dense_ndarray(ndarray->ndim, ndarray->shape, ndarray->dtype.type);
 
     // since in a dense array the iteration over the elements is trivial, we
     // can cast the data arrays ndarray->array and results->array to the actual type
-    if(ndarray->dtype == NDARRAY_UINT8) {
+    if(ndarray->dtype.type == NDARRAY_UINT8) {
         uint8_t *array = (uint8_t *)ndarray->array;
         uint8_t *rarray = (uint8_t *)results->array;
         for(size_t i=0; i < ndarray->len; i++, array++) {
             *rarray++ = (*array) * (*array);
         }
-    } else if(ndarray->dtype == NDARRAY_INT8) {
+    } else if(ndarray->dtype.type == NDARRAY_INT8) {
         int8_t *array = (int8_t *)ndarray->array;
         int8_t *rarray = (int8_t *)results->array;
         for(size_t i=0; i < ndarray->len; i++, array++) {
             *rarray++ = (*array) * (*array);
         }
-    } else if(ndarray->dtype == NDARRAY_UINT16) {
+    } else if(ndarray->dtype.type == NDARRAY_UINT16) {
         uint16_t *array = (uint16_t *)ndarray->array;
         uint16_t *rarray = (uint16_t *)results->array;
         for(size_t i=0; i < ndarray->len; i++, array++) {
             *rarray++ = (*array) * (*array);
         }
-    } else if(ndarray->dtype == NDARRAY_INT16) {
+    } else if(ndarray->dtype.type == NDARRAY_INT16) {
         int16_t *array = (int16_t *)ndarray->array;
         int16_t *rarray = (int16_t *)results->array;
         for(size_t i=0; i < ndarray->len; i++, array++) {
@@ -80,9 +83,52 @@ static mp_obj_t user_square(mp_obj_t arg) {
 
 MP_DEFINE_CONST_FUN_OBJ_1(user_square_obj, user_square);
 
+extern const mp_obj_type_t imreader_type;
+
+void imreader_imreader(ndarray_obj_t *ndarray, void *array, int32_t *strides, size_t count) {
+    blocks_block_obj_t *block = (blocks_block_obj_t *)ndarray->block;
+    uint8_t *barray = (uint8_t *)block->subarray;
+    // if necessary, get the coordinates in the original reference frame, i.e.,
+    // in the coordinates used at the time of the creation of the object
+    size_t *coords = blocks_coords_from_pointer(array, ndarray);
+    uint8_t x = (uint8_t)coords[ULAB_MAX_DIMS - 2] * (uint8_t)block->shape[ULAB_MAX_DIMS - 2];
+    for(size_t i = 0; i < count; i++) {
+        // fill up the array with dummy data
+        *barray++ = (uint8_t)((x + i) * (x + i));
+    }
+    // The subarray is a forward propagating dense array, so set the strides to the itemsize
+    *strides = ndarray->itemsize;
+}
+
+mp_obj_t imreader_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *args) {
+    (void)type;
+    mp_arg_check_num(n_args, n_kw, 0, 1, true);
+    mp_map_t kw_args;
+    mp_map_init_fixed_table(&kw_args, n_kw, args + n_args);
+
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_, MP_ARG_OBJ, { .u_obj = mp_const_none } },
+    };
+    mp_arg_val_t _args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args, args, &kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, _args);
+
+    blocks_transformer_obj_t *transformer = m_new_obj(blocks_transformer_obj_t);
+    transformer->base.type = &blocks_transformer_type;
+    transformer->arrfunc = imreader_imreader;
+    transformer->array = NULL;
+    return MP_OBJ_FROM_PTR(transformer);
+}
+
+const mp_obj_type_t imreader_type = {
+    { &mp_type_type },
+    .name = MP_QSTR_imreader,
+    .make_new = imreader_make_new,
+};
+
 static const mp_rom_map_elem_t ulab_user_globals_table[] = {
     { MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_OBJ_NEW_QSTR(MP_QSTR_user) },
     { MP_OBJ_NEW_QSTR(MP_QSTR_square), (mp_obj_t)&user_square_obj },
+    { MP_OBJ_NEW_QSTR(MP_QSTR_imreader), (mp_obj_t)&imreader_type },
 };
 
 static MP_DEFINE_CONST_DICT(mp_module_ulab_user_globals, ulab_user_globals_table);
diff --git a/docs/ulab-programming.ipynb b/docs/ulab-programming.ipynb
index 0776981f..99cdc41f 100644
--- a/docs/ulab-programming.ipynb
+++ b/docs/ulab-programming.ipynb
@@ -40,7 +40,7 @@
     "\n",
     "## Code organisation\n",
     "\n",
-    "As mentioned earlier, the `python` functions are organised into sub-modules at the C level. The C sub-modules can be found in `./ulab/code/`."
+    "As mentioned earlier, functions are organised into sub-modules at the C, as well as at the `python` level. At the moment, `ulab` has four such sub-modules, namely, `numpy`, `scipy`, `blocks`, and `user`, and the source of the corresponding C sub-modules can be found in `./ulab/code/numpy`, `./ulab/code/scipy`, `./ulab/code/blocks`, and `./ulab/code/user`, respectively."
    ]
   },
   {
@@ -81,7 +81,7 @@
    "source": [
     "### Memory layout\n",
     "\n",
-    "The values of an `ndarray` are stored in a contiguous segment in the RAM. The `ndarray` can be dense, meaning that all numbers in the linear memory segment belong to a linar combination of coordinates, and it can also be sparse, i.e., some elements of the linear storage space will be skipped, when the elements of the tensor are traversed. \n",
+    "The values of an `ndarray` are stored in a contiguous segment in the RAM. The `ndarray` can be dense, meaning that all numbers in the linear memory segment belong to a linear combination of coordinates, and it can also be sparse, i.e., some elements of the linear storage space will be skipped, when the elements of the tensor are traversed. \n",
     "\n",
     "In the RAM, the position of the item $M(n_1, n_2, ..., n_{k-1}, n_k)$ in a dense tensor of rank $k$ is given by the linear combination \n",
     "\n",
@@ -98,11 +98,11 @@
     "\n",
     "When creating a *view*, we simply re-calculate the `strides`, and re-set the `*array` pointer.\n",
     "\n",
-    "## Iterating over elements of a tensor\n",
+    "## Iterating over the elements of a tensor\n",
     "\n",
     "The `shape` and `strides` members of the array tell us how we have to move our pointer, when we want to read out the numbers. For technical reasons that will become clear later, the numbers in `shape` and in `strides` are aligned to the right, and begin on the right hand side, i.e., if the number of possible dimensions is `ULAB_MAX_DIMS`, then `shape[ULAB_MAX_DIMS-1]` is the length of the last axis, `shape[ULAB_MAX_DIMS-2]` is the length of the last but one axis, and so on. If the number of actual dimensions, `ndim < ULAB_MAX_DIMS`, the first `ULAB_MAX_DIMS - ndim` entries in `shape` and `strides` will be equal to zero, but they could, in fact, be assigned any value, because these will never be accessed in an operation.\n",
     "\n",
-    "With this definition of the strides, the linear combination in $P(n_1, n_2, ..., n_{k-1}, n_k)$ is a one-to-one mapping from the space of tensor coordinates, $(n_1, n_2, ..., n_{k-1}, n_k)$, and the coordinate in the linear array, $n_1s_1 + n_2s_2 + ... + n_{k-1}s_{k-1} + n_ks_k$, i.e., no two distinct sets of coordinates will result in the same position in the linear array. \n",
+    "With this definition of the strides, the linear combination in $P(n_1, n_2, ..., n_{k-1}, n_k)$ is a one-to-one mapping from the space of tensor coordinates, $(n_1, n_2, ..., n_{k-1}, n_k)$ to the coordinate in the linear array, $n_1s_1 + n_2s_2 + ... + n_{k-1}s_{k-1} + n_ks_k$, i.e., no two distinct sets of coordinates will result in the same position in the linear array. \n",
     "\n",
     "Since the `strides` are given in terms of bytes, when we iterate over an array, the void data pointer is usually cast to `uint8_t`, and the values are converted using the proper data type stored in `ndarray->dtype`. However, there might be cases, when it makes perfect sense to cast `*array` to a different type, in which case the `strides` have to be re-scaled by the value of `ndarray->itemsize`.\n",
     "\n",
@@ -123,39 +123,39 @@
     "                    *(array)++ = f(*((type *)(sarray)));\n",
     "                    (sarray) += (source)->strides[ULAB_MAX_DIMS - 1];\n",
     "                    l++;\n",
-    "                } while(l < (source)->shape[ULAB_MAX_DIMS-1]);\n",
-    "                (sarray) -= (source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS-1];\n",
+    "                } while(l < (source)->shape[ULAB_MAX_DIMS - 1]);\n",
+    "                (sarray) -= (source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS - 1];\n",
     "                (sarray) += (source)->strides[ULAB_MAX_DIMS - 2];\n",
     "                k++;\n",
-    "            } while(k < (source)->shape[ULAB_MAX_DIMS-2]);\n",
-    "            (sarray) -= (source)->strides[ULAB_MAX_DIMS - 2] * (source)->shape[ULAB_MAX_DIMS-2];\n",
+    "            } while(k < (source)->shape[ULAB_MAX_DIMS - 2]);\n",
+    "            (sarray) -= (source)->strides[ULAB_MAX_DIMS - 2] * (source)->shape[ULAB_MAX_DIMS - 2];\n",
     "            (sarray) += (source)->strides[ULAB_MAX_DIMS - 3];\n",
     "            j++;\n",
-    "        } while(j < (source)->shape[ULAB_MAX_DIMS-3]);\n",
-    "        (sarray) -= (source)->strides[ULAB_MAX_DIMS - 3] * (source)->shape[ULAB_MAX_DIMS-3];\n",
+    "        } while(j < (source)->shape[ULAB_MAX_DIMS - 3]);\n",
+    "        (sarray) -= (source)->strides[ULAB_MAX_DIMS - 3] * (source)->shape[ULAB_MAX_DIMS - 3];\n",
     "        (sarray) += (source)->strides[ULAB_MAX_DIMS - 4];\n",
     "        i++;\n",
-    "    } while(i < (source)->shape[ULAB_MAX_DIMS-4]);\n",
+    "    } while(i < (source)->shape[ULAB_MAX_DIMS - 4]);\n",
     "} while(0)\n",
     "```\n",
     "\n",
     "We start with the innermost loop, the one recursing `l`. `array` is already of type `mp_float_t`, while the source array, `sarray`, has been cast to `uint8_t` in the calling function. The numbers contained in `sarray` have to be read out in the proper type dictated by `ndarray->dtype`. This is what happens in the statement `*((type *)(sarray))`, and this number is then fed into the function `f`. Vectorised mathematical functions produce *dense* arrays, and for this reason, we can simply advance the `array` pointer. \n",
     "\n",
-    "The advancing of the `sarray` pointer is a bit more involving: first, in the innermost loop, we simply move forward by the amount given by the last stride, which is `(source)->strides[ULAB_MAX_DIMS - 1]`, because the `shape` and the `strides` are aligned to the right. We move the pointer as many times as given by `(source)->shape[ULAB_MAX_DIMS-1]`, which is the length of the very last axis. Hence the the structure of the loop\n",
+    "The advancing of the `sarray` pointer is a bit more involving: first, in the innermost loop, we simply move forward by the amount given by the last stride, which is `(source)->strides[ULAB_MAX_DIMS - 1]`, because the `shape` and the `strides` are aligned to the right. We move the pointer as many times as given by `(source)->shape[ULAB_MAX_DIMS - 1]`, which is the length of the very last axis. Hence the the structure of the loop\n",
     "\n",
     "```c\n",
     "    size_t l = 0;\n",
     "    do {\n",
     "        ...\n",
     "        l++;\n",
-    "    } while(l < (source)->shape[ULAB_MAX_DIMS-1]);\n",
+    "    } while(l < (source)->shape[ULAB_MAX_DIMS - 1]);\n",
     "\n",
     "```\n",
-    "Once we have exhausted the last axis, we have to re-wind the pointer, and advance it by an amount given by the last but one stride. Keep in mind that in the the innermost loop we moved our pointer `(source)->shape[ULAB_MAX_DIMS-1]` times by `(source)->strides[ULAB_MAX_DIMS - 1]`, i.e., we re-wind it by moving it backwards by `(source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS-1]`. In the next step, we move forward by `(source)->strides[ULAB_MAX_DIMS - 2]`, which is the last but one stride. \n",
+    "Once we have exhausted the last axis, we have to re-wind the pointer, and advance it by an amount given by the last but one stride. Keep in mind that in the the innermost loop we moved our pointer `(source)->shape[ULAB_MAX_DIMS - 1]` times by `(source)->strides[ULAB_MAX_DIMS - 1]`, i.e., we re-wind it by moving it backwards by `(source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS - 1]`. In the next step, we move forward by `(source)->strides[ULAB_MAX_DIMS - 2]`, which is the last but one stride. \n",
     "\n",
     "\n",
     "```c\n",
-    "    (sarray) -= (source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS-1];\n",
+    "    (sarray) -= (source)->strides[ULAB_MAX_DIMS - 1] * (source)->shape[ULAB_MAX_DIMS - 1];\n",
     "    (sarray) += (source)->strides[ULAB_MAX_DIMS - 2];\n",
     "\n",
     "```\n",
@@ -199,7 +199,7 @@
     "\n",
     "```c\n",
     "    size_t *coords = ndarray_new_coords(results->ndim);\n",
-    "    for(size_t i=0; i < results->len/results->shape[ULAB_MAX_DIMS -1]; i++) {\n",
+    "    for(size_t i=0; i < results->len/results->shape[ULAB_MAX_DIMS - 1]; i++) {\n",
     "        size_t l = 0;\n",
     "        do {\n",
     "            ...\n",
@@ -441,6 +441,12 @@
    "source": [
     "## Extending ulab\n",
     "\n",
+    "`ulab` offers two ways of extending it: one is via the `user` module, where you can implement your own functions and methods. Since the `user` module is not part of `ulab` _per se_, these functions do not have to conform to `numpy` or `scipy` conventions. \n",
+    "\n",
+    "The other possibility is extending `ulab` in the sense that you define your own data container and supply a single read-out function that will be called, whenever `ulab` needs access to the data. This method does not add new functions to `ulab`: you use the available functions, but you can specify, how the data are piped into them. The rationale for this will be spelt out later, when we discuss the `blocks` module. \n",
+    "\n",
+    "### Including the user module\n",
+    "\n",
     "The `user` module is disabled by default, as can be seen from the last couple of lines of [ulab.h](https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h)\n",
     "\n",
     "```c\n",
@@ -461,7 +467,8 @@
     "```\n",
     "which should just return 5.0. Even if `numpy`-compatibility is required (i.e., if most functions are bound at the top level to `ulab` directly), having to `import` the module has a great advantage. Namely, only the [user.h](https://github.com/v923z/micropython-ulab/blob/master/code/user/user.h) and [user.c](https://github.com/v923z/micropython-ulab/blob/master/code/user/user.c) files have to be modified, thus it should be relatively straightforward to update your local copy from [github](https://github.com/v923z/micropython-ulab/blob/master/). \n",
     "\n",
-    "Now, let us see, how we can add a more meaningful function. "
+    "\n",
+    "Now, let us see, how we can add a more meaningful function."
    ]
   },
   {
@@ -606,7 +613,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Boilerplate\n",
+    "## user module boilerplate\n",
     "\n",
     "In the next section, we will construct a function that generates the element-wise square of a dense array, otherwise, raises a `TypeError` exception. Dense arrays can easily be iterated over, since we do not have to care about the `shape` and the `strides`. If the array is sparse, the section [Iterating over elements of a tensor](#Iterating-over-elements-of-a-tensor) should contain hints as to how the iteration can be implemented.\n",
     "\n",
@@ -730,6 +737,111 @@
     "2. The definition of a function object by calling MP_DEFINE_CONST_FUN_OBJ_N()\n",
     "3. Binding this function object to the namespace in the `ulab_user_globals_table[]`"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Working with the `blocks` module\n",
+    "\n",
+    "Version 3.0 of `ulab` introduced the `blocks` sub-module for extensions. You can enable it by setting the `ULAB_HAS_BLOCKS` constant in [ulab.h](https://github.com/v923z/micropython-ulab/blob/master/code/ulab.h)\n",
+    "\n",
+    "```c\n",
+    "#ifndef ULAB_HAS_BLOCKS\n",
+    "#define ULAB_HAS_BLOCKS                      (1)\n",
+    "#endif\n",
+    "```\n",
+    "\n",
+    "and in `python`, you would use it as in \n",
+    "\n",
+    "```python\n",
+    "from ulab import numpy as np\n",
+    "from ulab import blocks\n",
+    "\n",
+    "from mymodule import func\n",
+    "\n",
+    "b = blocks.block(shape=(10, 10), transformer=func(), dtype=np.uint8)\n",
+    "print(np.std(b, axis=0))\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We have seen how you can write your own `numpy`-compatible functions in the `user` sub-module. What `blocks` enables you to do is the opposite in a sense: you can define your own data containers, write a simple _transformer_ function, and via the transformer, have access to all `ulab` functions.\n",
+    "\n",
+    "But first, what is the point of such an exercise?\n",
+    "\n",
+    "The first use of the `blocks` extension is what we have already stated: standard `numpy`-compatible numerical computations can be done on arbitrary data containers without having to change the core, and without having to compromise `numpy`-compatibility. In other words, if the data can be converted in some way to one of the five native `dtype`s, `ulab` will be able to deal with it. \n",
+    "\n",
+    "A trivial example is an image, which is encoded in jpeg, PNG or some other format. The image still has a tensorial structure, i.e., it has _x_, _y_, and possibly colour and _alpha_ axis, but the image as such cannot just be fed into an `ndarray`. Here is, where the transformer function plays its role: in `ulab`, all calculations are carried out along axes, so, if the there is a function that can convert the image data axis-wise, and pass it on to the computation routine, then the calculation is, in effect, done on the image itself. \n",
+    "\n",
+    "\n",
+    "However, `blocks` is capable of much more. It allows lazy loading: data are fetched, when they are needed in the computation loops, but otherwise, they do not even have to reside in the main RAM. This means that you can work with data sets that are so big, that they do not even fit into the RAM of your microcontroller, and you can off-load everything to an SPI RAM that cannot be mapped into the address space of the microcontroller. Data can also come directly from a peripheral device, or can be pixels of a display. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### The inner workings of the `blocks` module"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A standard `ndarray` has already been discussed at length. The gist is that it consists of a pointer to the data, and a small header telling the interpreter, how the pointer has to move in computation loops, and how the bytes at the pointer position are to be interpreted. Most of the magic happens in the header, and many operations, e.g., slicing, iterations, reshaping etc. do not even change the underlying data, they simply re-write the header.\n",
+    "\n",
+    "With the help of the `blocks` module, instead of attaching a pointer to actual data, one can attach a pointer to a function that supplies the data, when needed. So, the header of the `ndarray` now looks like \n",
+    "\n",
+    "```c\n",
+    "typedef struct _blocks_block_obj_t {\n",
+    "    mp_obj_base_t base;\n",
+    "    void *ndarray;\n",
+    "    void *arrfunc;\n",
+    "    uint8_t *subarray;\n",
+    "    size_t shape[ULAB_MAX_DIMS];\n",
+    "    void *origin;\n",
+    "} blocks_block_obj_t;\n",
+    "\n",
+    "typedef struct _ndarray_obj_t {\n",
+    "    mp_obj_base_t base;\n",
+    "    dtype_dtype dtype;\n",
+    "    uint8_t itemsize;\n",
+    "    uint8_t boolean;\n",
+    "    uint8_t ndim;\n",
+    "    size_t len;\n",
+    "    size_t shape[ULAB_MAX_DIMS];\n",
+    "    int32_t strides[ULAB_MAX_DIMS];\n",
+    "    void *array;\n",
+    "    #if ULAB_HAS_BLOCKS\n",
+    "    uint8_t flags;\n",
+    "    blocks_block_obj_t *block;\n",
+    "    #endif\n",
+    "} ndarray_obj_t;\n",
+    "\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `arrfunc` function fills the values in its own void pointer, `subarray`, and this is the array that will be used in the actual iteration: \n",
+    "\n",
+    "```c\n",
+    "barray = (ndarray)->block->subarray;\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {