Skip to content

Commit f7676a8

Browse files
committed
Merge pull request #1938 from bettio/minimal-bigint-pattern-matching
Minimal bigint pattern matching Add everything needed to allow JIT compiler (with big integers support) to run on AtomVM. Continuation of #1933 These changes are made under both the "Apache 2.0" and the "GNU Lesser General Public License 2.1 or later" license terms (dual license). SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
2 parents b5e4511 + 950c9ae commit f7676a8

File tree

6 files changed

+210
-59
lines changed

6 files changed

+210
-59
lines changed

src/libAtomVM/bitstring.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,3 +406,20 @@ bool bitstring_extract_f64(
406406
return false;
407407
}
408408
}
409+
410+
intn_from_integer_options_t bitstring_flags_to_intn_opts(enum BitstringFlags bf)
411+
{
412+
intn_from_integer_options_t converted = IntnUnsignedBigEndian;
413+
if (bf & LittleEndianInteger) {
414+
converted |= IntnLittleEndian;
415+
}
416+
if (bf & SignedInteger) {
417+
converted |= IntnSigned;
418+
}
419+
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
420+
if (bf & NativeEndianInteger) {
421+
converted |= IntnLittleEndian;
422+
}
423+
#endif
424+
return converted;
425+
}

src/libAtomVM/bitstring.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#ifndef _BITSTRING_H_
2323
#define _BITSTRING_H_
2424

25+
#include "intn.h"
2526
#include "term.h"
2627
#include "unicode.h"
2728

@@ -528,6 +529,8 @@ bool bitstring_extract_f32(
528529
bool bitstring_extract_f64(
529530
term src_bin, size_t offset, avm_int_t n, enum BitstringFlags bs_flags, avm_float_t *dst);
530531

532+
intn_from_integer_options_t bitstring_flags_to_intn_opts(enum BitstringFlags bf);
533+
531534
#ifdef __cplusplus
532535
}
533536
#endif

src/libAtomVM/intn.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,6 +1182,8 @@ int intn_from_integer_bytes(const uint8_t in[], size_t in_size, intn_from_intege
11821182
sign = IntNNegativeInteger;
11831183
}
11841184
*out_sign = sign;
1185+
} else if (out_sign) {
1186+
*out_sign = IntNPositiveInteger;
11851187
}
11861188

11871189
memset(out, filler, INTN_MAX_RES_LEN * sizeof(intn_digit_t));

src/libAtomVM/jit.c

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,19 +1244,64 @@ static term jit_term_alloc_bin_match_state(Context *ctx, term src, int slots)
12441244
return term_alloc_bin_match_state(src, slots, &ctx->heap);
12451245
}
12461246

1247-
static term jit_bitstring_extract_integer(Context *ctx, JITState *jit_state, term *bin_ptr, size_t offset, int n, int bs_flags)
1247+
static term extract_bigint(Context *ctx, JITState *jit_state, const uint8_t *bytes,
1248+
size_t bytes_size, intn_from_integer_options_t opts)
12481249
{
1249-
TRACE("jit_bitstring_extract_integer: bin_ptr=%p offset=%d n=%d bs_flags=%d\n", (void *) bin_ptr, (int) offset, n, bs_flags);
1250-
union maybe_unsigned_int64 value;
1251-
bool status = bitstring_extract_integer(((term) bin_ptr) | TERM_PRIMARY_BOXED, offset, n, bs_flags, &value);
1252-
if (UNLIKELY(!status)) {
1250+
intn_integer_sign_t sign;
1251+
intn_digit_t bigint[INTN_MAX_RES_LEN];
1252+
int count = intn_from_integer_bytes(bytes, bytes_size, opts, bigint, &sign);
1253+
// count will be always >= 0, caller ensures that bits <= INTN_MAX_UNSIGNED_BITS_SIZE
1254+
1255+
size_t intn_data_size;
1256+
size_t rounded_res_len;
1257+
term_bigint_size_requirements(count, &intn_data_size, &rounded_res_len);
1258+
1259+
Heap heap;
1260+
if (UNLIKELY(memory_init_heap(&heap, BOXED_BIGINT_HEAP_SIZE(intn_data_size)) != MEMORY_GC_OK)) {
1261+
set_error(ctx, jit_state, 0, OUT_OF_MEMORY_ATOM);
12531262
return FALSE_ATOM;
12541263
}
1255-
term t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
1256-
if (UNLIKELY(term_is_invalid_term(t))) {
1257-
set_error(ctx, jit_state, 0, OUT_OF_MEMORY_ATOM);
1264+
1265+
term bigint_term
1266+
= term_create_uninitialized_bigint(intn_data_size, (term_integer_sign_t) sign, &heap);
1267+
term_initialize_bigint(bigint_term, bigint, count, rounded_res_len);
1268+
1269+
memory_heap_append_heap(&ctx->heap, &heap);
1270+
1271+
return bigint_term;
1272+
}
1273+
1274+
static term jit_bitstring_extract_integer(
1275+
Context *ctx, JITState *jit_state, term *bin_ptr, size_t offset, int n, int bs_flags)
1276+
{
1277+
TRACE("jit_bitstring_extract_integer: bin_ptr=%p offset=%d n=%d bs_flags=%d\n",
1278+
(void *) bin_ptr, (int) offset, n, bs_flags);
1279+
if (n <= 64) {
1280+
union maybe_unsigned_int64 value;
1281+
bool status = bitstring_extract_integer(
1282+
((term) bin_ptr) | TERM_PRIMARY_BOXED, offset, n, bs_flags, &value);
1283+
if (UNLIKELY(!status)) {
1284+
return FALSE_ATOM;
1285+
}
1286+
term t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
1287+
if (UNLIKELY(term_is_invalid_term(t))) {
1288+
set_error(ctx, jit_state, 0, OUT_OF_MEMORY_ATOM);
1289+
}
1290+
return t;
1291+
} else if ((offset % 8 == 0) && (n % 8 == 0) && (n <= INTN_MAX_UNSIGNED_BITS_SIZE)) {
1292+
term bs_bin = ((term) bin_ptr) | TERM_PRIMARY_BOXED;
1293+
unsigned long capacity = term_binary_size(bs_bin);
1294+
if (8 * capacity - offset < (unsigned long) n) {
1295+
return FALSE_ATOM;
1296+
}
1297+
size_t byte_offset = offset / 8;
1298+
const uint8_t *int_bytes = (const uint8_t *) term_binary_data(bs_bin);
1299+
1300+
return extract_bigint(
1301+
ctx, jit_state, int_bytes + byte_offset, n / 8, bitstring_flags_to_intn_opts(bs_flags));
1302+
} else {
1303+
return FALSE_ATOM;
12581304
}
1259-
return t;
12601305
}
12611306

12621307
static term jit_bitstring_extract_float(Context *ctx, term *bin_ptr, size_t offset, int n, int bs_flags)

src/libAtomVM/opcodesswitch.h

Lines changed: 95 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1814,6 +1814,40 @@ static bool maybe_call_native(Context *ctx, atom_index_t module_name, atom_index
18141814
#endif
18151815

18161816
#ifndef AVM_NO_EMU
1817+
static term extract_nbits_integer(Context *ctx, const uint8_t *bytes, size_t bytes_size, intn_from_integer_options_t opts)
1818+
{
1819+
intn_integer_sign_t sign;
1820+
intn_digit_t bigint[INTN_MAX_RES_LEN];
1821+
int count = intn_from_integer_bytes(bytes, bytes_size, opts, bigint, &sign);
1822+
if (UNLIKELY(count < 0)) {
1823+
// this is likely unreachable, compiler seem to generate an external term
1824+
// and to encode this as SMALL_BIG_EXT, so I don't think this code is executed
1825+
ctx->x[0] = ERROR_ATOM;
1826+
ctx->x[1] = OVERFLOW_ATOM;
1827+
return term_invalid_term();
1828+
}
1829+
1830+
size_t intn_data_size;
1831+
size_t rounded_res_len;
1832+
term_bigint_size_requirements(count, &intn_data_size, &rounded_res_len);
1833+
1834+
Heap heap;
1835+
if (UNLIKELY(
1836+
memory_init_heap(&heap, BOXED_BIGINT_HEAP_SIZE(intn_data_size)) != MEMORY_GC_OK)) {
1837+
ctx->x[0] = ERROR_ATOM;
1838+
ctx->x[1] = OUT_OF_MEMORY_ATOM;
1839+
return term_invalid_term();
1840+
}
1841+
1842+
term bigint_term
1843+
= term_create_uninitialized_bigint(intn_data_size, (term_integer_sign_t) sign, &heap);
1844+
term_initialize_bigint(bigint_term, bigint, count, rounded_res_len);
1845+
1846+
memory_heap_append_heap(&ctx->heap, &heap);
1847+
1848+
return bigint_term;
1849+
}
1850+
18171851
static size_t decode_nbits_integer(Context *ctx, const uint8_t *encoded, term *out_term)
18181852
{
18191853
const uint8_t *new_encoded = encoded;
@@ -1826,41 +1860,9 @@ static bool maybe_call_native(Context *ctx, atom_index_t module_name, atom_index
18261860
len += 9;
18271861

18281862
if (out_term) {
1829-
intn_integer_sign_t sign;
1830-
intn_digit_t bigint[INTN_MAX_RES_LEN];
1831-
int count = intn_from_integer_bytes(new_encoded, len, IntnSigned, bigint, &sign);
1832-
if (UNLIKELY(count < 0)) {
1833-
// this is likely unreachable, compiler seem to generate an external term
1834-
// and to encode this as SMALL_BIG_EXT, so I don't think this code is executed
1835-
ctx->x[0] = ERROR_ATOM;
1836-
ctx->x[1] = OVERFLOW_ATOM;
1837-
*out_term = term_invalid_term();
1838-
goto return_size;
1839-
}
1840-
1841-
size_t intn_data_size;
1842-
size_t rounded_res_len;
1843-
term_bigint_size_requirements(count, &intn_data_size, &rounded_res_len);
1844-
1845-
Heap heap;
1846-
if (UNLIKELY(
1847-
memory_init_heap(&heap, BOXED_BIGINT_HEAP_SIZE(intn_data_size)) != MEMORY_GC_OK)) {
1848-
ctx->x[0] = ERROR_ATOM;
1849-
ctx->x[1] = OUT_OF_MEMORY_ATOM;
1850-
*out_term = term_invalid_term();
1851-
goto return_size;
1852-
}
1853-
1854-
term bigint_term
1855-
= term_create_uninitialized_bigint(intn_data_size, (term_integer_sign_t) sign, &heap);
1856-
term_initialize_bigint(bigint_term, bigint, count, rounded_res_len);
1857-
1858-
memory_heap_append_heap(&ctx->heap, &heap);
1859-
1860-
*out_term = bigint_term;
1863+
*out_term = extract_nbits_integer(ctx, new_encoded, len, IntnSigned);
18611864
}
18621865

1863-
return_size:
18641866
return (new_encoded - encoded) + len;
18651867
}
18661868
#endif
@@ -5298,25 +5300,44 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
52985300
union maybe_unsigned_int64 value;
52995301
term bs_bin = term_get_match_state_binary(src);
53005302
avm_int_t bs_offset = term_get_match_state_offset(src);
5301-
bool status = bitstring_extract_integer(bs_bin, bs_offset, increment, flags_value, &value);
5302-
if (UNLIKELY(!status)) {
5303-
TRACE("bs_get_integer2: error extracting integer.\n");
5304-
JUMP_TO_ADDRESS(mod->labels[fail]);
5305-
} else {
5306-
term_set_match_state_offset(src, bs_offset + increment);
5303+
term t;
5304+
if (increment <= 64) {
5305+
bool status = bitstring_extract_integer(bs_bin, bs_offset, increment, flags_value, &value);
5306+
if (UNLIKELY(!status)) {
5307+
TRACE("bs_get_integer2: error extracting integer.\n");
5308+
JUMP_TO_ADDRESS(mod->labels[fail]);
5309+
} else {
5310+
term_set_match_state_offset(src, bs_offset + increment);
53075311

5308-
term t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
5309-
if (UNLIKELY(term_is_invalid_term(t))) {
5312+
t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
5313+
if (UNLIKELY(term_is_invalid_term(t))) {
5314+
HANDLE_ERROR();
5315+
}
5316+
}
5317+
} else if ((bs_offset % 8 == 0) && (increment % 8 == 0) && (increment <= INTN_MAX_UNSIGNED_BITS_SIZE)) {
5318+
unsigned long capacity = term_binary_size(bs_bin);
5319+
if (8 * capacity - bs_offset < (unsigned long) increment) {
5320+
JUMP_TO_ADDRESS(mod->labels[fail]);
5321+
}
5322+
size_t byte_offset = bs_offset / 8;
5323+
const uint8_t *int_bytes = (const uint8_t *) term_binary_data(bs_bin);
5324+
5325+
t = extract_nbits_integer(ctx, int_bytes + byte_offset, increment / 8,
5326+
bitstring_flags_to_intn_opts(flags_value));
5327+
term_set_match_state_offset(src, bs_offset + increment);
5328+
if (term_is_invalid_term(t)) {
53105329
HANDLE_ERROR();
53115330
}
5331+
} else {
5332+
JUMP_TO_ADDRESS(mod->labels[fail]);
5333+
}
53125334
#endif
53135335

53145336
DEST_REGISTER(dreg);
53155337
DECODE_DEST_REGISTER(dreg, pc);
53165338

53175339
#ifdef IMPL_EXECUTE_LOOP
5318-
WRITE_REGISTER(dreg, t);
5319-
}
5340+
WRITE_REGISTER(dreg, t);
53205341
#endif
53215342
break;
53225343
}
@@ -7273,15 +7294,35 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
72737294
avm_int_t size_val = term_to_int(size);
72747295
avm_int_t increment = size_val * unit;
72757296
union maybe_unsigned_int64 value;
7276-
bool status = bitstring_extract_integer(bs_bin, bs_offset, increment, flags_value, &value);
7277-
if (UNLIKELY(!status)) {
7278-
TRACE("bs_match/3: error extracting integer.\n");
7297+
term t;
7298+
if (increment <= 64) {
7299+
bool status = bitstring_extract_integer(bs_bin, bs_offset, increment, flags_value, &value);
7300+
if (UNLIKELY(!status)) {
7301+
TRACE("bs_match/3: error extracting integer.\n");
7302+
goto bs_match_jump_to_fail;
7303+
}
7304+
//FIXME: handling of 64-bit unsigned integers is not reliable
7305+
t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
7306+
if (UNLIKELY(term_is_invalid_term(t))) {
7307+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
7308+
}
7309+
} else if ((bs_offset % 8 == 0) && (increment % 8 == 0) && (increment <= INTN_MAX_UNSIGNED_BITS_SIZE)) {
7310+
unsigned long capacity = term_binary_size(bs_bin);
7311+
if (8 * capacity - bs_offset < (unsigned long) increment) {
7312+
goto bs_match_jump_to_fail;
7313+
}
7314+
size_t byte_offset = bs_offset / 8;
7315+
const uint8_t *int_bytes
7316+
= (const uint8_t *) term_binary_data(bs_bin);
7317+
7318+
t = extract_nbits_integer(ctx, int_bytes + byte_offset,
7319+
increment / 8, bitstring_flags_to_intn_opts(flags_value));
7320+
if (term_is_invalid_term(t)) {
7321+
HANDLE_ERROR();
7322+
}
7323+
} else {
72797324
goto bs_match_jump_to_fail;
72807325
}
7281-
term t = maybe_alloc_boxed_integer_fragment(ctx, value.s);
7282-
if (UNLIKELY(term_is_invalid_term(t))) {
7283-
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
7284-
}
72857326
#endif
72867327
DEST_REGISTER(dreg);
72877328
DECODE_DEST_REGISTER(dreg, pc);
@@ -7388,6 +7429,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
73887429
DECODE_LITERAL(pattern_value, pc);
73897430
j++;
73907431
#ifdef IMPL_EXECUTE_LOOP
7432+
if (size > 64) {
7433+
// TODO: implement support for big integers also here
7434+
RAISE_ERROR(BADARG_ATOM);
7435+
}
73917436
union maybe_unsigned_int64 matched_value;
73927437
bool status = bitstring_extract_integer(bs_bin, bs_offset, size, 0, &matched_value);
73937438
if (UNLIKELY(!status)) {

tests/erlang_tests/bigint.erl

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ start() ->
7272
test_is_number() +
7373
test_gt_lt_guards() +
7474
to_external_term() +
75+
test_pattern_match() +
7576
test_band() +
7677
test_bxor() +
7778
test_bor() +
@@ -2138,6 +2139,44 @@ to_external_term() ->
21382139

21392140
0.
21402141

2142+
test_pattern_match() ->
2143+
<<Int72:72/integer-little-signed>> = ?MODULE:id(<<23, 4, 222, 66, 172, 197, 113, 183, 80>>),
2144+
<<"50B771C5AC42DE0417">> = erlang:integer_to_binary(?MODULE:id(Int72), 16),
2145+
<<Int80:80/integer-little-signed>> = ?MODULE:id(
2146+
<<165, 63, 196, 58, 33, 96, 209, 59, 244, 213>>
2147+
),
2148+
<<"-2A0BC42E9FDEC53BC05B">> = erlang:integer_to_binary(?MODULE:id(Int80), 16),
2149+
<<Int120:120/unsigned-big-integer>> = ?MODULE:id(
2150+
<<0, 242, 138, 221, 68, 111, 58, 120, 145, 135, 164, 56, 164, 12, 205>>
2151+
),
2152+
<<"F28ADD446F3A789187A438A40CCD">> = erlang:integer_to_binary(?MODULE:id(Int120), 16),
2153+
<<Int256:256/unsigned-big-integer>> = ?MODULE:id(
2154+
<<202, 196, 64, 150, 63, 238, 50, 47, 214, 81, 247, 55, 151, 242, 169, 106, 162, 211, 73,
2155+
155, 211, 85, 164, 237, 153, 138, 191, 77, 87, 183, 204, 111>>
2156+
),
2157+
<<"CAC440963FEE322FD651F73797F2A96AA2D3499BD355A4ED998ABF4D57B7CC6F">> = erlang:integer_to_binary(
2158+
?MODULE:id(Int256), 16
2159+
),
2160+
2161+
<<"foo", Int128:128/unsigned-little-integer, Bar/binary>> = ?MODULE:id(
2162+
<<102, 111, 111, 183, 226, 155, 102, 249, 246, 168, 101, 53, 36, 21, 10, 133, 223, 231, 10,
2163+
98, 97, 114>>
2164+
),
2165+
<<"AE7DF850A15243565A8F6F9669BE2B7">> = erlang:integer_to_binary(?MODULE:id(Int128), 16),
2166+
<<"bar">> = ?MODULE:id(Bar),
2167+
2168+
ok =
2169+
case
2170+
?MODULE:id(
2171+
<<102, 111, 111, 183, 226, 155, 102, 249, 246, 168, 101, 53, 36, 21, 10, 133, 223,
2172+
231>>
2173+
)
2174+
of
2175+
<<"foo", _I128:128/unsigned-little-integer, Bar/binary>> -> error;
2176+
_ -> ok
2177+
end,
2178+
0.
2179+
21412180
test_band() ->
21422181
MaxPatternBin = <<"FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF">>,
21432182
MaxPattern = erlang:binary_to_integer(?MODULE:id(MaxPatternBin), 16),

0 commit comments

Comments
 (0)