Skip to content

Commit fbc21dc

Browse files
committed
Implement minimal bigint binary pattern matching
Implement everything needed to run JIT compiler (that uses bigint pattern matching) on AtomVM. Some bigint handling parts are not yet implemented, such as =:= binary pattern matching operation. Also unsigned 64-bits pattern matching is not yet fixed. Signed-off-by: Davide Bettio <[email protected]>
1 parent 2e60e29 commit fbc21dc

File tree

5 files changed

+209
-60
lines changed

5 files changed

+209
-60
lines changed

src/libAtomVM/bitstring.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,3 +406,20 @@ bool bitstring_extract_f64(
406406
return false;
407407
}
408408
}
409+
410+
intn_from_integer_options_t bitstring_flags_to_intn_opts(enum BitstringFlags bf)
411+
{
412+
intn_from_integer_options_t converted = IntnUnsignedBigEndian;
413+
if (bf & LittleEndianInteger) {
414+
converted |= IntnLittleEndian;
415+
}
416+
if (bf & SignedInteger) {
417+
converted |= IntnSigned;
418+
}
419+
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
420+
if (bf & NativeEndianInteger) {
421+
converted |= IntnLittleEndian;
422+
}
423+
#endif
424+
return converted;
425+
}

src/libAtomVM/bitstring.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#ifndef _BITSTRING_H_
2323
#define _BITSTRING_H_
2424

25+
#include "intn.h"
2526
#include "term.h"
2627
#include "unicode.h"
2728

@@ -528,6 +529,8 @@ bool bitstring_extract_f32(
528529
bool bitstring_extract_f64(
529530
term src_bin, size_t offset, avm_int_t n, enum BitstringFlags bs_flags, avm_float_t *dst);
530531

532+
intn_from_integer_options_t bitstring_flags_to_intn_opts(enum BitstringFlags bf);
533+
531534
#ifdef __cplusplus
532535
}
533536
#endif

src/libAtomVM/jit.c

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1242,19 +1242,64 @@ static term jit_term_alloc_bin_match_state(Context *ctx, term src, int slots)
12421242
return term_alloc_bin_match_state(src, slots, &ctx->heap);
12431243
}
12441244

1245-
static term jit_bitstring_extract_integer(Context *ctx, JITState *jit_state, term *bin_ptr, size_t offset, int n, int bs_flags)
1245+
static term extract_bigint(Context *ctx, JITState *jit_state, const uint8_t *bytes,
1246+
size_t bytes_size, intn_from_integer_options_t opts)
12461247
{
1247-
TRACE("jit_bitstring_extract_integer: bin_ptr=%p offset=%d n=%d bs_flags=%d\n", (void *) bin_ptr, (int) offset, n, bs_flags);
1248-
union maybe_unsigned_int64 value;
1249-
bool status = bitstring_extract_integer(((term) bin_ptr) | TERM_PRIMARY_BOXED, offset, n, bs_flags, &value);
1250-
if (UNLIKELY(!status)) {
1248+
intn_integer_sign_t sign;
1249+
intn_digit_t bigint[INTN_MAX_RES_LEN];
1250+
int count = intn_from_integer_bytes(bytes, bytes_size, opts, bigint, &sign);
1251+
// count will be always >= 0, caller ensures that bits <= INTN_MAX_UNSIGNED_BITS_SIZE
1252+
1253+
size_t intn_data_size;
1254+
size_t rounded_res_len;
1255+
term_bigint_size_requirements(count, &intn_data_size, &rounded_res_len);
1256+
1257+
Heap heap;
1258+
if (UNLIKELY(memory_init_heap(&heap, BOXED_BIGINT_HEAP_SIZE(intn_data_size)) != MEMORY_GC_OK)) {
1259+
set_error(ctx, jit_state, 0, OUT_OF_MEMORY_ATOM);
12511260
return FALSE_ATOM;
12521261
}
1253-
term t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
1254-
if (UNLIKELY(term_is_invalid_term(t))) {
1255-
set_error(ctx, jit_state, 0, OUT_OF_MEMORY_ATOM);
1262+
1263+
term bigint_term
1264+
= term_create_uninitialized_bigint(intn_data_size, (term_integer_sign_t) sign, &heap);
1265+
term_initialize_bigint(bigint_term, bigint, count, rounded_res_len);
1266+
1267+
memory_heap_append_heap(&ctx->heap, &heap);
1268+
1269+
return bigint_term;
1270+
}
1271+
1272+
static term jit_bitstring_extract_integer(
1273+
Context *ctx, JITState *jit_state, term *bin_ptr, size_t offset, int n, int bs_flags)
1274+
{
1275+
TRACE("jit_bitstring_extract_integer: bin_ptr=%p offset=%d n=%d bs_flags=%d\n",
1276+
(void *) bin_ptr, (int) offset, n, bs_flags);
1277+
if (n <= 64) {
1278+
union maybe_unsigned_int64 value;
1279+
bool status = bitstring_extract_integer(
1280+
((term) bin_ptr) | TERM_PRIMARY_BOXED, offset, n, bs_flags, &value);
1281+
if (UNLIKELY(!status)) {
1282+
return FALSE_ATOM;
1283+
}
1284+
term t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
1285+
if (UNLIKELY(term_is_invalid_term(t))) {
1286+
set_error(ctx, jit_state, 0, OUT_OF_MEMORY_ATOM);
1287+
}
1288+
return t;
1289+
} else if ((offset % 8 == 0) && (n % 8 == 0) && (n <= INTN_MAX_UNSIGNED_BITS_SIZE)) {
1290+
term bs_bin = ((term) bin_ptr) | TERM_PRIMARY_BOXED;
1291+
unsigned long capacity = term_binary_size(bs_bin);
1292+
if (8 * capacity - offset < (unsigned long) n) {
1293+
return FALSE_ATOM;
1294+
}
1295+
size_t byte_offset = offset / 8;
1296+
const uint8_t *int_bytes = (const uint8_t *) term_binary_data(bs_bin);
1297+
1298+
return extract_bigint(
1299+
ctx, jit_state, int_bytes + byte_offset, n / 8, bitstring_flags_to_intn_opts(bs_flags));
1300+
} else {
1301+
return FALSE_ATOM;
12561302
}
1257-
return t;
12581303
}
12591304

12601305
static term jit_bitstring_extract_float(Context *ctx, term *bin_ptr, size_t offset, int n, int bs_flags)

src/libAtomVM/opcodesswitch.h

Lines changed: 96 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1814,6 +1814,41 @@ static bool maybe_call_native(Context *ctx, atom_index_t module_name, atom_index
18141814
#endif
18151815

18161816
#ifndef AVM_NO_EMU
1817+
static term extract_nbits_integer(Context *ctx, const uint8_t *bytes, size_t bytes_size, intn_from_integer_options_t opts)
1818+
{
1819+
intn_integer_sign_t sign;
1820+
intn_digit_t bigint[INTN_MAX_RES_LEN];
1821+
int count = intn_from_integer_bytes(bytes, bytes_size, opts, bigint, &sign);
1822+
if (UNLIKELY(count < 0)) {
1823+
// this is likely unreachable, compiler seem to generate an external term
1824+
// and to encode this as SMALL_BIG_EXT, so I don't think this code is executed
1825+
ctx->x[0] = ERROR_ATOM;
1826+
ctx->x[1] = OVERFLOW_ATOM;
1827+
return term_invalid_term();
1828+
}
1829+
1830+
size_t intn_data_size;
1831+
size_t rounded_res_len;
1832+
term_intn_to_term_size(count, &intn_data_size, &rounded_res_len);
1833+
1834+
Heap heap;
1835+
if (UNLIKELY(
1836+
memory_init_heap(&heap, BOXED_INTN_SIZE(intn_data_size)) != MEMORY_GC_OK)) {
1837+
ctx->x[0] = ERROR_ATOM;
1838+
ctx->x[1] = OUT_OF_MEMORY_ATOM;
1839+
return term_invalid_term();
1840+
}
1841+
1842+
term bigint_term
1843+
= term_create_uninitialized_intn(intn_data_size, (term_integer_sign_t) sign, &heap);
1844+
intn_digit_t *dest_buf = (void *) term_intn_data(bigint_term);
1845+
intn_copy(bigint, count, dest_buf, rounded_res_len);
1846+
1847+
memory_heap_append_heap(&ctx->heap, &heap);
1848+
1849+
return bigint_term;
1850+
}
1851+
18171852
static size_t decode_nbits_integer(Context *ctx, const uint8_t *encoded, term *out_term)
18181853
{
18191854
const uint8_t *new_encoded = encoded;
@@ -1826,42 +1861,9 @@ static bool maybe_call_native(Context *ctx, atom_index_t module_name, atom_index
18261861
len += 9;
18271862

18281863
if (out_term) {
1829-
intn_integer_sign_t sign;
1830-
intn_digit_t bigint[INTN_MAX_RES_LEN];
1831-
int count = intn_from_integer_bytes(new_encoded, len, IntnSigned, bigint, &sign);
1832-
if (UNLIKELY(count < 0)) {
1833-
// this is likely unreachable, compiler seem to generate an external term
1834-
// and to encode this as SMALL_BIG_EXT, so I don't think this code is executed
1835-
ctx->x[0] = ERROR_ATOM;
1836-
ctx->x[1] = OVERFLOW_ATOM;
1837-
*out_term = term_invalid_term();
1838-
goto return_size;
1839-
}
1840-
1841-
size_t intn_data_size;
1842-
size_t rounded_res_len;
1843-
term_intn_to_term_size(count, &intn_data_size, &rounded_res_len);
1844-
1845-
Heap heap;
1846-
if (UNLIKELY(
1847-
memory_init_heap(&heap, BOXED_INTN_SIZE(intn_data_size)) != MEMORY_GC_OK)) {
1848-
ctx->x[0] = ERROR_ATOM;
1849-
ctx->x[1] = OUT_OF_MEMORY_ATOM;
1850-
*out_term = term_invalid_term();
1851-
goto return_size;
1852-
}
1853-
1854-
term bigint_term
1855-
= term_create_uninitialized_intn(intn_data_size, (term_integer_sign_t) sign, &heap);
1856-
intn_digit_t *dest_buf = (void *) term_intn_data(bigint_term);
1857-
intn_copy(bigint, count, dest_buf, rounded_res_len);
1858-
1859-
memory_heap_append_heap(&ctx->heap, &heap);
1860-
1861-
*out_term = bigint_term;
1864+
*out_term = extract_nbits_integer(ctx, new_encoded, len, IntnSigned);
18621865
}
18631866

1864-
return_size:
18651867
return (new_encoded - encoded) + len;
18661868
}
18671869
#endif
@@ -5299,25 +5301,44 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
52995301
union maybe_unsigned_int64 value;
53005302
term bs_bin = term_get_match_state_binary(src);
53015303
avm_int_t bs_offset = term_get_match_state_offset(src);
5302-
bool status = bitstring_extract_integer(bs_bin, bs_offset, increment, flags_value, &value);
5303-
if (UNLIKELY(!status)) {
5304-
TRACE("bs_get_integer2: error extracting integer.\n");
5305-
JUMP_TO_ADDRESS(mod->labels[fail]);
5306-
} else {
5307-
term_set_match_state_offset(src, bs_offset + increment);
5304+
term t;
5305+
if (increment <= 64) {
5306+
bool status = bitstring_extract_integer(bs_bin, bs_offset, increment, flags_value, &value);
5307+
if (UNLIKELY(!status)) {
5308+
TRACE("bs_get_integer2: error extracting integer.\n");
5309+
JUMP_TO_ADDRESS(mod->labels[fail]);
5310+
} else {
5311+
term_set_match_state_offset(src, bs_offset + increment);
53085312

5309-
term t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
5310-
if (UNLIKELY(term_is_invalid_term(t))) {
5313+
t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
5314+
if (UNLIKELY(term_is_invalid_term(t))) {
5315+
HANDLE_ERROR();
5316+
}
5317+
}
5318+
} else if ((bs_offset % 8 == 0) && (increment % 8 == 0) && (increment <= INTN_MAX_UNSIGNED_BITS_SIZE)) {
5319+
unsigned long capacity = term_binary_size(bs_bin);
5320+
if (8 * capacity - bs_offset < (unsigned long) increment) {
5321+
JUMP_TO_ADDRESS(mod->labels[fail]);
5322+
}
5323+
size_t byte_offset = bs_offset / 8;
5324+
const uint8_t *int_bytes = (const uint8_t *) term_binary_data(bs_bin);
5325+
5326+
t = extract_nbits_integer(ctx, int_bytes + byte_offset, increment / 8,
5327+
bitstring_flags_to_intn_opts(flags_value));
5328+
term_set_match_state_offset(src, bs_offset + increment);
5329+
if (term_is_invalid_term(t)) {
53115330
HANDLE_ERROR();
53125331
}
5332+
} else {
5333+
JUMP_TO_ADDRESS(mod->labels[fail]);
5334+
}
53135335
#endif
53145336

53155337
DEST_REGISTER(dreg);
53165338
DECODE_DEST_REGISTER(dreg, pc);
53175339

53185340
#ifdef IMPL_EXECUTE_LOOP
5319-
WRITE_REGISTER(dreg, t);
5320-
}
5341+
WRITE_REGISTER(dreg, t);
53215342
#endif
53225343
break;
53235344
}
@@ -7274,15 +7295,35 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
72747295
avm_int_t size_val = term_to_int(size);
72757296
avm_int_t increment = size_val * unit;
72767297
union maybe_unsigned_int64 value;
7277-
bool status = bitstring_extract_integer(bs_bin, bs_offset, increment, flags_value, &value);
7278-
if (UNLIKELY(!status)) {
7279-
TRACE("bs_match/3: error extracting integer.\n");
7298+
term t;
7299+
if (increment <= 64) {
7300+
bool status = bitstring_extract_integer(bs_bin, bs_offset, increment, flags_value, &value);
7301+
if (UNLIKELY(!status)) {
7302+
TRACE("bs_match/3: error extracting integer.\n");
7303+
goto bs_match_jump_to_fail;
7304+
}
7305+
//FIXME: handling of 64-bit unsigned integers is not reliable
7306+
t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
7307+
if (UNLIKELY(term_is_invalid_term(t))) {
7308+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
7309+
}
7310+
} else if ((bs_offset % 8 == 0) && (increment % 8 == 0) && (increment <= INTN_MAX_UNSIGNED_BITS_SIZE)) {
7311+
unsigned long capacity = term_binary_size(bs_bin);
7312+
if (8 * capacity - bs_offset < (unsigned long) increment) {
7313+
goto bs_match_jump_to_fail;
7314+
}
7315+
size_t byte_offset = bs_offset / 8;
7316+
const uint8_t *int_bytes
7317+
= (const uint8_t *) term_binary_data(bs_bin);
7318+
7319+
t = extract_nbits_integer(ctx, int_bytes + byte_offset,
7320+
increment / 8, bitstring_flags_to_intn_opts(flags_value));
7321+
if (term_is_invalid_term(t)) {
7322+
HANDLE_ERROR();
7323+
}
7324+
} else {
72807325
goto bs_match_jump_to_fail;
72817326
}
7282-
term t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
7283-
if (UNLIKELY(term_is_invalid_term(t))) {
7284-
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
7285-
}
72867327
#endif
72877328
DEST_REGISTER(dreg);
72887329
DECODE_DEST_REGISTER(dreg, pc);
@@ -7389,6 +7430,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
73897430
DECODE_LITERAL(pattern_value, pc);
73907431
j++;
73917432
#ifdef IMPL_EXECUTE_LOOP
7433+
if (size > 64) {
7434+
// TODO: implement support for big integers also here
7435+
RAISE_ERROR(BADARG_ATOM);
7436+
}
73927437
union maybe_unsigned_int64 matched_value;
73937438
bool status = bitstring_extract_integer(bs_bin, bs_offset, size, 0, &matched_value);
73947439
if (UNLIKELY(!status)) {

tests/erlang_tests/bigint.erl

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ start() ->
7272
test_is_number() +
7373
test_gt_lt_guards() +
7474
to_external_term() +
75+
test_pattern_match() +
7576
test_band() +
7677
test_bxor() +
7778
test_bor() +
@@ -2138,6 +2139,44 @@ to_external_term() ->
21382139

21392140
0.
21402141

2142+
test_pattern_match() ->
2143+
<<Int72:72/integer-little-signed>> = ?MODULE:id(<<23, 4, 222, 66, 172, 197, 113, 183, 80>>),
2144+
<<"50B771C5AC42DE0417">> = erlang:integer_to_binary(?MODULE:id(Int72), 16),
2145+
<<Int80:80/integer-little-signed>> = ?MODULE:id(
2146+
<<165, 63, 196, 58, 33, 96, 209, 59, 244, 213>>
2147+
),
2148+
<<"-2A0BC42E9FDEC53BC05B">> = erlang:integer_to_binary(?MODULE:id(Int80), 16),
2149+
<<Int120:120/unsigned-big-integer>> = ?MODULE:id(
2150+
<<0, 242, 138, 221, 68, 111, 58, 120, 145, 135, 164, 56, 164, 12, 205>>
2151+
),
2152+
<<"F28ADD446F3A789187A438A40CCD">> = erlang:integer_to_binary(?MODULE:id(Int120), 16),
2153+
<<Int256:256/unsigned-big-integer>> = ?MODULE:id(
2154+
<<202, 196, 64, 150, 63, 238, 50, 47, 214, 81, 247, 55, 151, 242, 169, 106, 162, 211, 73,
2155+
155, 211, 85, 164, 237, 153, 138, 191, 77, 87, 183, 204, 111>>
2156+
),
2157+
<<"CAC440963FEE322FD651F73797F2A96AA2D3499BD355A4ED998ABF4D57B7CC6F">> = erlang:integer_to_binary(
2158+
?MODULE:id(Int256), 16
2159+
),
2160+
2161+
<<"foo", Int128:128/unsigned-little-integer, Bar/binary>> = ?MODULE:id(
2162+
<<102, 111, 111, 183, 226, 155, 102, 249, 246, 168, 101, 53, 36, 21, 10, 133, 223, 231, 10,
2163+
98, 97, 114>>
2164+
),
2165+
<<"AE7DF850A15243565A8F6F9669BE2B7">> = erlang:integer_to_binary(?MODULE:id(Int128), 16),
2166+
<<"bar">> = ?MODULE:id(Bar),
2167+
2168+
ok =
2169+
case
2170+
?MODULE:id(
2171+
<<102, 111, 111, 183, 226, 155, 102, 249, 246, 168, 101, 53, 36, 21, 10, 133, 223,
2172+
231>>
2173+
)
2174+
of
2175+
<<"foo", _I128:128/unsigned-little-integer, Bar/binary>> -> error;
2176+
_ -> ok
2177+
end,
2178+
0.
2179+
21412180
test_band() ->
21422181
MaxPatternBin = <<"FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF">>,
21432182
MaxPattern = erlang:binary_to_integer(?MODULE:id(MaxPatternBin), 16),

0 commit comments

Comments
 (0)