Skip to content

Commit c284728

Browse files
committed
bit_transpose: Support from 2 to 7 strands, not just 8
1 parent 9cf7d73 commit c284728

File tree

4 files changed

+99
-17
lines changed

4 files changed

+99
-17
lines changed

locale/circuitpython.pot

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,7 +1093,8 @@ msgid "Initialization failed due to lack of memory"
10931093
msgstr ""
10941094

10951095
#: shared-bindings/_bit_transpose/__init__.c
1096-
msgid "Input buffer must be a multiple of 8 bytes"
1096+
#, c-format
1097+
msgid "Input buffer length (%d) must be a multiple of the strand count (%d)"
10971098
msgstr ""
10981099

10991100
#: ports/atmel-samd/common-hal/pulseio/PulseIn.c
@@ -1664,7 +1665,8 @@ msgid "Out of sockets"
16641665
msgstr ""
16651666

16661667
#: shared-bindings/_bit_transpose/__init__.c
1667-
msgid "Output buffer must be at least as big as input buffer"
1668+
#, c-format
1669+
msgid "Output buffer must be at least %d bytes"
16681670
msgstr ""
16691671

16701672
#: shared-bindings/audiobusio/PDMIn.c
@@ -3473,6 +3475,10 @@ msgstr ""
34733475
msgid "not enough arguments for format string"
34743476
msgstr ""
34753477

3478+
#: shared-bindings/_bit_transpose/__init__.c
3479+
msgid "num_strands must be from 2 to 8 (inclusive)"
3480+
msgstr ""
3481+
34763482
#: extmod/ulab/code/ulab_create.c
34773483
msgid "number of points must be at least 2"
34783484
msgstr ""

shared-bindings/_bit_transpose/__init__.c

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,10 @@
4343
//| ...
4444
//|
4545
STATIC mp_obj_t bit_transpose(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
46-
enum { ARG_input, ARG_output };
46+
enum { ARG_input, ARG_num_strands, ARG_output };
4747
static const mp_arg_t allowed_args[] = {
4848
{ MP_QSTR_input, MP_ARG_OBJ | MP_ARG_REQUIRED, {} },
49+
{ MP_QSTR_num_strands, MP_ARG_INT | MP_ARG_KW_ONLY, { .u_int = 8 } },
4950
{ MP_QSTR_output, MP_ARG_OBJ | MP_ARG_KW_ONLY, { .u_obj = mp_const_none } },
5051
};
5152
mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
@@ -55,21 +56,28 @@ STATIC mp_obj_t bit_transpose(size_t n_args, const mp_obj_t *pos_args, mp_map_t
5556
mp_buffer_info_t output_bufinfo;
5657

5758
mp_get_buffer_raise(args[ARG_input].u_obj, &input_bufinfo, MP_BUFFER_READ);
58-
int n = input_bufinfo.len;
59-
if (n % 8 != 0) {
60-
mp_raise_ValueError(translate("Input buffer must be a multiple of 8 bytes"));
59+
int num_strands = args[ARG_num_strands].u_int;
60+
61+
if (num_strands < 2 || num_strands > 8) {
62+
mp_raise_ValueError(translate("num_strands must be from 2 to 8 (inclusive)"));
63+
}
64+
65+
int inlen = input_bufinfo.len;
66+
if (inlen % num_strands != 0) {
67+
mp_raise_ValueError_varg(translate("Input buffer length (%d) must be a multiple of the strand count (%d)"), inlen, num_strands);
6168
}
6269
mp_obj_t output = args[ARG_output].u_obj;
6370

71+
int outlen = 8 * (inlen / num_strands);
6472
if (!output || output == mp_const_none) {
65-
output = mp_obj_new_bytearray_of_zeros(n);
73+
output = mp_obj_new_bytearray_of_zeros(outlen);
6674
}
6775
mp_get_buffer_raise(output, &output_bufinfo, MP_BUFFER_WRITE);
68-
int m = output_bufinfo.len;
69-
if (m < n) {
70-
mp_raise_ValueError(translate("Output buffer must be at least as big as input buffer"));
76+
int avail = output_bufinfo.len;
77+
if (avail < outlen) {
78+
mp_raise_ValueError_varg(translate("Output buffer must be at least %d bytes"), outlen);
7179
}
72-
common_hal_bit_transpose_bit_transpose(output_bufinfo.buf, input_bufinfo.buf, input_bufinfo.len);
80+
common_hal_bit_transpose_bit_transpose(output_bufinfo.buf, input_bufinfo.buf, inlen, num_strands);
7381
return output;
7482
}
7583
STATIC MP_DEFINE_CONST_FUN_OBJ_KW(bit_transpose_bit_transpose_obj, 1, bit_transpose);

shared-bindings/_bit_transpose/__init__.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,4 @@
2929
#include <stdint.h>
3030
#include <stdlib.h>
3131

32-
void common_hal_bit_transpose_bit_transpose(uint8_t *result, const uint8_t *src, size_t n);
32+
void common_hal_bit_transpose_bit_transpose(uint8_t *result, const uint8_t *src, size_t inlen, size_t num_strands);

shared-module/_bit_transpose/__init__.c

Lines changed: 73 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@
3030
#include <stdlib.h>
3131
#include <string.h>
3232

33+
#ifdef __GNUC__
34+
#define FALLTHROUGH __attribute__((fallthrough))
35+
#else
36+
#define FALLTHROUGH ((void)0) /* FALLTHROUGH */
37+
#endif
38+
3339
// adapted from "Hacker's Delight" - Figure 7-2 Transposing an 8x8-bit matrix
3440
// basic idea is:
3541
// > First, treat the 8x8-bit matrix as 16 2x2-bit matrices, and transpose each
@@ -40,7 +46,57 @@
4046
// > illustrated below.
4147
// We want a different definition of bit/byte order, deal with strides differently, etc.
4248
// so the code is heavily re-worked compared to the original.
43-
static void transpose8(uint32_t *result, const uint8_t *src, int src_stride) {
49+
static void transpose_var(uint32_t *result, const uint8_t *src, int src_stride, int num_strands) {
50+
uint32_t x = 0, y = 0, t;
51+
52+
src += (num_strands-1) * src_stride;
53+
54+
switch(num_strands) {
55+
case 7:
56+
x |= *src << 16;
57+
src -= src_stride;
58+
FALLTHROUGH;
59+
case 6:
60+
x |= *src << 8;
61+
src -= src_stride;
62+
FALLTHROUGH;
63+
case 5:
64+
x |= *src;
65+
src -= src_stride;
66+
FALLTHROUGH;
67+
case 4:
68+
y |= *src << 24;
69+
src -= src_stride;
70+
FALLTHROUGH;
71+
case 3:
72+
y |= *src << 16;
73+
src -= src_stride;
74+
FALLTHROUGH;
75+
case 2:
76+
y |= *src << 8;
77+
src -= src_stride;
78+
y |= *src;
79+
}
80+
81+
t = (x ^ (x >> 7)) & 0x00AA00AA; x = x ^ t ^ (t << 7);
82+
t = (y ^ (y >> 7)) & 0x00AA00AA; y = y ^ t ^ (t << 7);
83+
84+
t = (x ^ (x >>14)) & 0x0000CCCC; x = x ^ t ^ (t <<14);
85+
t = (y ^ (y >>14)) & 0x0000CCCC; y = y ^ t ^ (t <<14);
86+
87+
t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
88+
y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
89+
x = t;
90+
91+
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
92+
x = __builtin_bswap32(x);
93+
y = __builtin_bswap32(y);
94+
#endif
95+
result[0] = x;
96+
result[1] = y;
97+
}
98+
99+
static void transpose_8(uint32_t *result, const uint8_t *src, int src_stride) {
44100
uint32_t x, y, t;
45101

46102
y = *src; src += src_stride;
@@ -70,14 +126,26 @@ static void transpose8(uint32_t *result, const uint8_t *src, int src_stride) {
70126
result[1] = y;
71127
}
72128

73-
static void bit_transpose(uint32_t *result, const uint8_t *src, size_t src_stride, size_t n) {
129+
static void bit_transpose_8(uint32_t *result, const uint8_t *src, size_t src_stride, size_t n) {
74130
for(size_t i=0; i<n; i++) {
75-
transpose8(result, src, src_stride);
131+
transpose_8(result, src, src_stride);
76132
result += 2;
77133
src += 1;
78134
}
79135
}
80136

81-
void common_hal_bit_transpose_bit_transpose(uint8_t *result, const uint8_t *src, size_t n) {
82-
bit_transpose((uint32_t*)(void*)result, src, n/8, n/8);
137+
static void bit_transpose_var(uint32_t *result, const uint8_t *src, size_t src_stride, size_t n, int num_strands) {
138+
for(size_t i=0; i<n; i++) {
139+
transpose_var(result, src, src_stride, num_strands);
140+
result += 2;
141+
src += 1;
142+
}
143+
}
144+
145+
void common_hal_bit_transpose_bit_transpose(uint8_t *result, const uint8_t *src, size_t inlen, size_t num_strands) {
146+
if(num_strands == 8) {
147+
bit_transpose_8((uint32_t*)(void*)result, src, inlen/8, inlen/8);
148+
} else {
149+
bit_transpose_var((uint32_t*)(void*)result, src, inlen/num_strands, inlen/num_strands, num_strands);
150+
}
83151
}

0 commit comments

Comments
 (0)