Skip to content

Commit 950c9ae

Browse files
committed
Implement minimal bigint binary pattern matching
Implement everything needed to run JIT compiler (that uses bigint pattern matching) on AtomVM. Some bigint handling parts are not yet implemented, such as =:= binary pattern matching operation. Also unsigned 64-bits pattern matching is not yet fixed. Signed-off-by: Davide Bettio <[email protected]>
1 parent 310c0f6 commit 950c9ae

File tree

5 files changed

+208
-59
lines changed

5 files changed

+208
-59
lines changed

src/libAtomVM/bitstring.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,3 +406,20 @@ bool bitstring_extract_f64(
406406
return false;
407407
}
408408
}
409+
410+
intn_from_integer_options_t bitstring_flags_to_intn_opts(enum BitstringFlags bf)
411+
{
412+
intn_from_integer_options_t converted = IntnUnsignedBigEndian;
413+
if (bf & LittleEndianInteger) {
414+
converted |= IntnLittleEndian;
415+
}
416+
if (bf & SignedInteger) {
417+
converted |= IntnSigned;
418+
}
419+
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
420+
if (bf & NativeEndianInteger) {
421+
converted |= IntnLittleEndian;
422+
}
423+
#endif
424+
return converted;
425+
}

src/libAtomVM/bitstring.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#ifndef _BITSTRING_H_
2323
#define _BITSTRING_H_
2424

25+
#include "intn.h"
2526
#include "term.h"
2627
#include "unicode.h"
2728

@@ -528,6 +529,8 @@ bool bitstring_extract_f32(
528529
bool bitstring_extract_f64(
529530
term src_bin, size_t offset, avm_int_t n, enum BitstringFlags bs_flags, avm_float_t *dst);
530531

532+
intn_from_integer_options_t bitstring_flags_to_intn_opts(enum BitstringFlags bf);
533+
531534
#ifdef __cplusplus
532535
}
533536
#endif

src/libAtomVM/jit.c

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,19 +1244,64 @@ static term jit_term_alloc_bin_match_state(Context *ctx, term src, int slots)
12441244
return term_alloc_bin_match_state(src, slots, &ctx->heap);
12451245
}
12461246

1247-
static term jit_bitstring_extract_integer(Context *ctx, JITState *jit_state, term *bin_ptr, size_t offset, int n, int bs_flags)
1247+
static term extract_bigint(Context *ctx, JITState *jit_state, const uint8_t *bytes,
1248+
size_t bytes_size, intn_from_integer_options_t opts)
12481249
{
1249-
TRACE("jit_bitstring_extract_integer: bin_ptr=%p offset=%d n=%d bs_flags=%d\n", (void *) bin_ptr, (int) offset, n, bs_flags);
1250-
union maybe_unsigned_int64 value;
1251-
bool status = bitstring_extract_integer(((term) bin_ptr) | TERM_PRIMARY_BOXED, offset, n, bs_flags, &value);
1252-
if (UNLIKELY(!status)) {
1250+
intn_integer_sign_t sign;
1251+
intn_digit_t bigint[INTN_MAX_RES_LEN];
1252+
int count = intn_from_integer_bytes(bytes, bytes_size, opts, bigint, &sign);
1253+
// count will be always >= 0, caller ensures that bits <= INTN_MAX_UNSIGNED_BITS_SIZE
1254+
1255+
size_t intn_data_size;
1256+
size_t rounded_res_len;
1257+
term_bigint_size_requirements(count, &intn_data_size, &rounded_res_len);
1258+
1259+
Heap heap;
1260+
if (UNLIKELY(memory_init_heap(&heap, BOXED_BIGINT_HEAP_SIZE(intn_data_size)) != MEMORY_GC_OK)) {
1261+
set_error(ctx, jit_state, 0, OUT_OF_MEMORY_ATOM);
12531262
return FALSE_ATOM;
12541263
}
1255-
term t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
1256-
if (UNLIKELY(term_is_invalid_term(t))) {
1257-
set_error(ctx, jit_state, 0, OUT_OF_MEMORY_ATOM);
1264+
1265+
term bigint_term
1266+
= term_create_uninitialized_bigint(intn_data_size, (term_integer_sign_t) sign, &heap);
1267+
term_initialize_bigint(bigint_term, bigint, count, rounded_res_len);
1268+
1269+
memory_heap_append_heap(&ctx->heap, &heap);
1270+
1271+
return bigint_term;
1272+
}
1273+
1274+
static term jit_bitstring_extract_integer(
1275+
Context *ctx, JITState *jit_state, term *bin_ptr, size_t offset, int n, int bs_flags)
1276+
{
1277+
TRACE("jit_bitstring_extract_integer: bin_ptr=%p offset=%d n=%d bs_flags=%d\n",
1278+
(void *) bin_ptr, (int) offset, n, bs_flags);
1279+
if (n <= 64) {
1280+
union maybe_unsigned_int64 value;
1281+
bool status = bitstring_extract_integer(
1282+
((term) bin_ptr) | TERM_PRIMARY_BOXED, offset, n, bs_flags, &value);
1283+
if (UNLIKELY(!status)) {
1284+
return FALSE_ATOM;
1285+
}
1286+
term t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
1287+
if (UNLIKELY(term_is_invalid_term(t))) {
1288+
set_error(ctx, jit_state, 0, OUT_OF_MEMORY_ATOM);
1289+
}
1290+
return t;
1291+
} else if ((offset % 8 == 0) && (n % 8 == 0) && (n <= INTN_MAX_UNSIGNED_BITS_SIZE)) {
1292+
term bs_bin = ((term) bin_ptr) | TERM_PRIMARY_BOXED;
1293+
unsigned long capacity = term_binary_size(bs_bin);
1294+
if (8 * capacity - offset < (unsigned long) n) {
1295+
return FALSE_ATOM;
1296+
}
1297+
size_t byte_offset = offset / 8;
1298+
const uint8_t *int_bytes = (const uint8_t *) term_binary_data(bs_bin);
1299+
1300+
return extract_bigint(
1301+
ctx, jit_state, int_bytes + byte_offset, n / 8, bitstring_flags_to_intn_opts(bs_flags));
1302+
} else {
1303+
return FALSE_ATOM;
12581304
}
1259-
return t;
12601305
}
12611306

12621307
static term jit_bitstring_extract_float(Context *ctx, term *bin_ptr, size_t offset, int n, int bs_flags)

src/libAtomVM/opcodesswitch.h

Lines changed: 95 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1814,6 +1814,40 @@ static bool maybe_call_native(Context *ctx, atom_index_t module_name, atom_index
18141814
#endif
18151815

18161816
#ifndef AVM_NO_EMU
1817+
static term extract_nbits_integer(Context *ctx, const uint8_t *bytes, size_t bytes_size, intn_from_integer_options_t opts)
1818+
{
1819+
intn_integer_sign_t sign;
1820+
intn_digit_t bigint[INTN_MAX_RES_LEN];
1821+
int count = intn_from_integer_bytes(bytes, bytes_size, opts, bigint, &sign);
1822+
if (UNLIKELY(count < 0)) {
1823+
// this is likely unreachable, compiler seem to generate an external term
1824+
// and to encode this as SMALL_BIG_EXT, so I don't think this code is executed
1825+
ctx->x[0] = ERROR_ATOM;
1826+
ctx->x[1] = OVERFLOW_ATOM;
1827+
return term_invalid_term();
1828+
}
1829+
1830+
size_t intn_data_size;
1831+
size_t rounded_res_len;
1832+
term_bigint_size_requirements(count, &intn_data_size, &rounded_res_len);
1833+
1834+
Heap heap;
1835+
if (UNLIKELY(
1836+
memory_init_heap(&heap, BOXED_BIGINT_HEAP_SIZE(intn_data_size)) != MEMORY_GC_OK)) {
1837+
ctx->x[0] = ERROR_ATOM;
1838+
ctx->x[1] = OUT_OF_MEMORY_ATOM;
1839+
return term_invalid_term();
1840+
}
1841+
1842+
term bigint_term
1843+
= term_create_uninitialized_bigint(intn_data_size, (term_integer_sign_t) sign, &heap);
1844+
term_initialize_bigint(bigint_term, bigint, count, rounded_res_len);
1845+
1846+
memory_heap_append_heap(&ctx->heap, &heap);
1847+
1848+
return bigint_term;
1849+
}
1850+
18171851
static size_t decode_nbits_integer(Context *ctx, const uint8_t *encoded, term *out_term)
18181852
{
18191853
const uint8_t *new_encoded = encoded;
@@ -1826,41 +1860,9 @@ static bool maybe_call_native(Context *ctx, atom_index_t module_name, atom_index
18261860
len += 9;
18271861

18281862
if (out_term) {
1829-
intn_integer_sign_t sign;
1830-
intn_digit_t bigint[INTN_MAX_RES_LEN];
1831-
int count = intn_from_integer_bytes(new_encoded, len, IntnSigned, bigint, &sign);
1832-
if (UNLIKELY(count < 0)) {
1833-
// this is likely unreachable, compiler seem to generate an external term
1834-
// and to encode this as SMALL_BIG_EXT, so I don't think this code is executed
1835-
ctx->x[0] = ERROR_ATOM;
1836-
ctx->x[1] = OVERFLOW_ATOM;
1837-
*out_term = term_invalid_term();
1838-
goto return_size;
1839-
}
1840-
1841-
size_t intn_data_size;
1842-
size_t rounded_res_len;
1843-
term_bigint_size_requirements(count, &intn_data_size, &rounded_res_len);
1844-
1845-
Heap heap;
1846-
if (UNLIKELY(
1847-
memory_init_heap(&heap, BOXED_BIGINT_HEAP_SIZE(intn_data_size)) != MEMORY_GC_OK)) {
1848-
ctx->x[0] = ERROR_ATOM;
1849-
ctx->x[1] = OUT_OF_MEMORY_ATOM;
1850-
*out_term = term_invalid_term();
1851-
goto return_size;
1852-
}
1853-
1854-
term bigint_term
1855-
= term_create_uninitialized_bigint(intn_data_size, (term_integer_sign_t) sign, &heap);
1856-
term_initialize_bigint(bigint_term, bigint, count, rounded_res_len);
1857-
1858-
memory_heap_append_heap(&ctx->heap, &heap);
1859-
1860-
*out_term = bigint_term;
1863+
*out_term = extract_nbits_integer(ctx, new_encoded, len, IntnSigned);
18611864
}
18621865

1863-
return_size:
18641866
return (new_encoded - encoded) + len;
18651867
}
18661868
#endif
@@ -5298,25 +5300,44 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
52985300
union maybe_unsigned_int64 value;
52995301
term bs_bin = term_get_match_state_binary(src);
53005302
avm_int_t bs_offset = term_get_match_state_offset(src);
5301-
bool status = bitstring_extract_integer(bs_bin, bs_offset, increment, flags_value, &value);
5302-
if (UNLIKELY(!status)) {
5303-
TRACE("bs_get_integer2: error extracting integer.\n");
5304-
JUMP_TO_ADDRESS(mod->labels[fail]);
5305-
} else {
5306-
term_set_match_state_offset(src, bs_offset + increment);
5303+
term t;
5304+
if (increment <= 64) {
5305+
bool status = bitstring_extract_integer(bs_bin, bs_offset, increment, flags_value, &value);
5306+
if (UNLIKELY(!status)) {
5307+
TRACE("bs_get_integer2: error extracting integer.\n");
5308+
JUMP_TO_ADDRESS(mod->labels[fail]);
5309+
} else {
5310+
term_set_match_state_offset(src, bs_offset + increment);
53075311

5308-
term t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
5309-
if (UNLIKELY(term_is_invalid_term(t))) {
5312+
t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
5313+
if (UNLIKELY(term_is_invalid_term(t))) {
5314+
HANDLE_ERROR();
5315+
}
5316+
}
5317+
} else if ((bs_offset % 8 == 0) && (increment % 8 == 0) && (increment <= INTN_MAX_UNSIGNED_BITS_SIZE)) {
5318+
unsigned long capacity = term_binary_size(bs_bin);
5319+
if (8 * capacity - bs_offset < (unsigned long) increment) {
5320+
JUMP_TO_ADDRESS(mod->labels[fail]);
5321+
}
5322+
size_t byte_offset = bs_offset / 8;
5323+
const uint8_t *int_bytes = (const uint8_t *) term_binary_data(bs_bin);
5324+
5325+
t = extract_nbits_integer(ctx, int_bytes + byte_offset, increment / 8,
5326+
bitstring_flags_to_intn_opts(flags_value));
5327+
term_set_match_state_offset(src, bs_offset + increment);
5328+
if (term_is_invalid_term(t)) {
53105329
HANDLE_ERROR();
53115330
}
5331+
} else {
5332+
JUMP_TO_ADDRESS(mod->labels[fail]);
5333+
}
53125334
#endif
53135335

53145336
DEST_REGISTER(dreg);
53155337
DECODE_DEST_REGISTER(dreg, pc);
53165338

53175339
#ifdef IMPL_EXECUTE_LOOP
5318-
WRITE_REGISTER(dreg, t);
5319-
}
5340+
WRITE_REGISTER(dreg, t);
53205341
#endif
53215342
break;
53225343
}
@@ -7273,15 +7294,35 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
72737294
avm_int_t size_val = term_to_int(size);
72747295
avm_int_t increment = size_val * unit;
72757296
union maybe_unsigned_int64 value;
7276-
bool status = bitstring_extract_integer(bs_bin, bs_offset, increment, flags_value, &value);
7277-
if (UNLIKELY(!status)) {
7278-
TRACE("bs_match/3: error extracting integer.\n");
7297+
term t;
7298+
if (increment <= 64) {
7299+
bool status = bitstring_extract_integer(bs_bin, bs_offset, increment, flags_value, &value);
7300+
if (UNLIKELY(!status)) {
7301+
TRACE("bs_match/3: error extracting integer.\n");
7302+
goto bs_match_jump_to_fail;
7303+
}
7304+
//FIXME: handling of 64-bit unsigned integers is not reliable
7305+
t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
7306+
if (UNLIKELY(term_is_invalid_term(t))) {
7307+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
7308+
}
7309+
} else if ((bs_offset % 8 == 0) && (increment % 8 == 0) && (increment <= INTN_MAX_UNSIGNED_BITS_SIZE)) {
7310+
unsigned long capacity = term_binary_size(bs_bin);
7311+
if (8 * capacity - bs_offset < (unsigned long) increment) {
7312+
goto bs_match_jump_to_fail;
7313+
}
7314+
size_t byte_offset = bs_offset / 8;
7315+
const uint8_t *int_bytes
7316+
= (const uint8_t *) term_binary_data(bs_bin);
7317+
7318+
t = extract_nbits_integer(ctx, int_bytes + byte_offset,
7319+
increment / 8, bitstring_flags_to_intn_opts(flags_value));
7320+
if (term_is_invalid_term(t)) {
7321+
HANDLE_ERROR();
7322+
}
7323+
} else {
72797324
goto bs_match_jump_to_fail;
72807325
}
7281-
term t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
7282-
if (UNLIKELY(term_is_invalid_term(t))) {
7283-
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
7284-
}
72857326
#endif
72867327
DEST_REGISTER(dreg);
72877328
DECODE_DEST_REGISTER(dreg, pc);
@@ -7388,6 +7429,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
73887429
DECODE_LITERAL(pattern_value, pc);
73897430
j++;
73907431
#ifdef IMPL_EXECUTE_LOOP
7432+
if (size > 64) {
7433+
// TODO: implement support for big integers also here
7434+
RAISE_ERROR(BADARG_ATOM);
7435+
}
73917436
union maybe_unsigned_int64 matched_value;
73927437
bool status = bitstring_extract_integer(bs_bin, bs_offset, size, 0, &matched_value);
73937438
if (UNLIKELY(!status)) {

tests/erlang_tests/bigint.erl

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ start() ->
7272
test_is_number() +
7373
test_gt_lt_guards() +
7474
to_external_term() +
75+
test_pattern_match() +
7576
test_band() +
7677
test_bxor() +
7778
test_bor() +
@@ -2138,6 +2139,44 @@ to_external_term() ->
21382139

21392140
0.
21402141

2142+
test_pattern_match() ->
2143+
<<Int72:72/integer-little-signed>> = ?MODULE:id(<<23, 4, 222, 66, 172, 197, 113, 183, 80>>),
2144+
<<"50B771C5AC42DE0417">> = erlang:integer_to_binary(?MODULE:id(Int72), 16),
2145+
<<Int80:80/integer-little-signed>> = ?MODULE:id(
2146+
<<165, 63, 196, 58, 33, 96, 209, 59, 244, 213>>
2147+
),
2148+
<<"-2A0BC42E9FDEC53BC05B">> = erlang:integer_to_binary(?MODULE:id(Int80), 16),
2149+
<<Int120:120/unsigned-big-integer>> = ?MODULE:id(
2150+
<<0, 242, 138, 221, 68, 111, 58, 120, 145, 135, 164, 56, 164, 12, 205>>
2151+
),
2152+
<<"F28ADD446F3A789187A438A40CCD">> = erlang:integer_to_binary(?MODULE:id(Int120), 16),
2153+
<<Int256:256/unsigned-big-integer>> = ?MODULE:id(
2154+
<<202, 196, 64, 150, 63, 238, 50, 47, 214, 81, 247, 55, 151, 242, 169, 106, 162, 211, 73,
2155+
155, 211, 85, 164, 237, 153, 138, 191, 77, 87, 183, 204, 111>>
2156+
),
2157+
<<"CAC440963FEE322FD651F73797F2A96AA2D3499BD355A4ED998ABF4D57B7CC6F">> = erlang:integer_to_binary(
2158+
?MODULE:id(Int256), 16
2159+
),
2160+
2161+
<<"foo", Int128:128/unsigned-little-integer, Bar/binary>> = ?MODULE:id(
2162+
<<102, 111, 111, 183, 226, 155, 102, 249, 246, 168, 101, 53, 36, 21, 10, 133, 223, 231, 10,
2163+
98, 97, 114>>
2164+
),
2165+
<<"AE7DF850A15243565A8F6F9669BE2B7">> = erlang:integer_to_binary(?MODULE:id(Int128), 16),
2166+
<<"bar">> = ?MODULE:id(Bar),
2167+
2168+
ok =
2169+
case
2170+
?MODULE:id(
2171+
<<102, 111, 111, 183, 226, 155, 102, 249, 246, 168, 101, 53, 36, 21, 10, 133, 223,
2172+
231>>
2173+
)
2174+
of
2175+
<<"foo", _I128:128/unsigned-little-integer, Bar/binary>> -> error;
2176+
_ -> ok
2177+
end,
2178+
0.
2179+
21412180
test_band() ->
21422181
MaxPatternBin = <<"FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF">>,
21432182
MaxPattern = erlang:binary_to_integer(?MODULE:id(MaxPatternBin), 16),

0 commit comments

Comments
 (0)