Skip to content

Commit b989e98

Browse files
committed
Add support for SMALL_BIG_EXT term encoding
Signed-off-by: Fred Dushin <[email protected]>
1 parent afea246 commit b989e98

File tree

4 files changed

+204
-6
lines changed

4 files changed

+204
-6
lines changed

src/libAtomVM/externalterm.c

Lines changed: 86 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#define STRING_EXT 107
4141
#define LIST_EXT 108
4242
#define BINARY_EXT 109
43+
#define SMALL_BIG_EXT 110
4344
#define EXPORT_EXT 113
4445
#define MAP_EXT 116
4546
#define SMALL_ATOM_UTF8_EXT 119
@@ -48,13 +49,17 @@
4849
#define NEW_FLOAT_EXT_SIZE 9
4950
#define SMALL_INTEGER_EXT_SIZE 2
5051
#define INTEGER_EXT_SIZE 5
52+
#define SMALL_BIG_EXT_BASE_SIZE 3
5153
#define ATOM_EXT_BASE_SIZE 3
5254
#define STRING_EXT_BASE_SIZE 3
5355
#define LIST_EXT_BASE_SIZE 5
5456
#define BINARY_EXT_BASE_SIZE 5
5557
#define MAP_EXT_BASE_SIZE 5
5658
#define SMALL_ATOM_EXT_BASE_SIZE 2
5759

60+
// Assuming two's-complement implementation of signed integers
61+
#define SIGNED_INT_TO_UNSIGNED(val, unsigned_type) ((val) < 0 ? ~((unsigned_type) (val)) + 1 : (val))
62+
5863
// MAINTENANCE NOTE. Range checking on the external term buffer is only performed in
5964
// the calculate_heap_usage function, which will fail with an invalid term if there is
6065
// insufficient space in the external term buffer (preventing reading off the end of the
@@ -202,6 +207,27 @@ static size_t compute_external_size(Context *ctx, term t)
202207
return serialize_term(ctx, NULL, t);
203208
}
204209

210+
static uint8_t get_num_bytes(avm_uint64_t val)
211+
{
212+
uint8_t num_bytes = 0;
213+
while (val != 0) {
214+
val = val >> 8;
215+
++num_bytes;
216+
}
217+
return num_bytes;
218+
}
219+
220+
static void write_bytes(uint8_t *buf, avm_uint64_t val)
221+
{
222+
uint8_t i = 0;
223+
while (val != 0) {
224+
uint8_t byte = val & 0xFF;
225+
buf[i] = byte;
226+
val = val >> 8;
227+
++i;
228+
}
229+
}
230+
205231
static int serialize_term(Context *ctx, uint8_t *buf, term t)
206232
{
207233
if (term_is_uint8(t)) {
@@ -211,13 +237,26 @@ static int serialize_term(Context *ctx, uint8_t *buf, term t)
211237
}
212238
return 2;
213239

214-
} else if (term_is_integer(t)) {
215-
if (!IS_NULL_PTR(buf)) {
216-
int32_t val = term_to_int32(t);
217-
buf[0] = INTEGER_EXT;
218-
WRITE_32_UNALIGNED(buf + 1, val);
240+
} else if (term_is_any_integer(t)) {
241+
242+
avm_int64_t val = term_maybe_unbox_int64(t);
243+
if (val >= INT32_MIN && val <= INT32_MAX) {
244+
if (buf != NULL) {
245+
buf[0] = INTEGER_EXT;
246+
WRITE_32_UNALIGNED(buf + 1, (int32_t) val);
247+
}
248+
return INTEGER_EXT_SIZE;
249+
} else {
250+
avm_uint64_t unsigned_val = SIGNED_INT_TO_UNSIGNED(val, avm_uint64_t);
251+
uint8_t num_bytes = get_num_bytes(unsigned_val);
252+
if (buf != NULL) {
253+
buf[0] = SMALL_BIG_EXT;
254+
buf[1] = num_bytes;
255+
buf[2] = val < 0 ? 0x01 : 0x00;
256+
write_bytes(buf + 3, unsigned_val);
257+
}
258+
return SMALL_BIG_EXT_BASE_SIZE + num_bytes;
219259
}
220-
return 5;
221260

222261
} else if (term_is_atom(t)) {
223262
AtomString atom_string = globalcontext_atomstring_from_term(ctx->global, t);
@@ -328,6 +367,15 @@ static int serialize_term(Context *ctx, uint8_t *buf, term t)
328367
}
329368
}
330369

370+
static avm_uint64_t read_bytes(const uint8_t *buf, uint8_t num_bytes)
371+
{
372+
avm_uint64_t value = 0;
373+
for (uint8_t i = 0; i < num_bytes; ++i) {
374+
value |= (((avm_uint64_t) buf[i]) << (i * 8));
375+
}
376+
return value;
377+
}
378+
331379
static term parse_external_terms(const uint8_t *external_term_buf, int *eterm_size, Context *ctx, bool copy)
332380
{
333381
switch (external_term_buf[0]) {
@@ -355,6 +403,23 @@ static term parse_external_terms(const uint8_t *external_term_buf, int *eterm_si
355403
return term_make_maybe_boxed_int64(ctx, value);
356404
}
357405

406+
case SMALL_BIG_EXT: {
407+
uint8_t num_bytes = external_term_buf[1];
408+
uint8_t sign = external_term_buf[2];
409+
avm_uint64_t unsigned_value = read_bytes(external_term_buf + 3, num_bytes);
410+
// NB due to call to calculate_heap_usage, there is no loss of precision:
411+
// 1. 0 <= unsigned_value <= INT64_MAX if sign is 0
412+
// 2. 0 <= unsigned_value <= INT64_MAX + 1 if sign is not 0
413+
avm_int64_t value = 0;
414+
if (sign != 0x00) {
415+
value = -((avm_int64_t) unsigned_value);
416+
} else {
417+
value = (avm_int64_t) unsigned_value;
418+
}
419+
*eterm_size = SMALL_BIG_EXT_BASE_SIZE + num_bytes;
420+
return term_make_maybe_boxed_int64(ctx, value);
421+
}
422+
358423
case ATOM_EXT: {
359424
uint16_t atom_len = READ_16_UNALIGNED(external_term_buf + 1);
360425

@@ -536,6 +601,21 @@ static int calculate_heap_usage(const uint8_t *external_term_buf, size_t remaini
536601
return term_boxed_integer_size(value);
537602
}
538603

604+
case SMALL_BIG_EXT: {
605+
uint8_t num_bytes = external_term_buf[1];
606+
if (UNLIKELY(num_bytes > 8 || remaining < (SMALL_BIG_EXT_BASE_SIZE + num_bytes))) {
607+
return INVALID_TERM_SIZE;
608+
}
609+
uint8_t sign = external_term_buf[2];
610+
*eterm_size = SMALL_BIG_EXT_BASE_SIZE + num_bytes;
611+
avm_uint64_t value = read_bytes(external_term_buf + 3, num_bytes);
612+
// NB. We currently support max 64-bit signed integers (assuming two's complement signed values in 63 bits)
613+
if (UNLIKELY((sign == 0 && value > INT64_MAX) || (sign != 0 && value > (((avm_uint64_t) INT64_MAX) + 1)))) {
614+
return INVALID_TERM_SIZE;
615+
}
616+
return term_boxed_integer_size(value);
617+
}
618+
539619
case ATOM_EXT: {
540620
if (UNLIKELY(remaining < ATOM_EXT_BASE_SIZE)) {
541621
return INVALID_TERM_SIZE;

tests/erlang_tests/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,7 @@ compile_erlang(unlink_error)
438438
compile_erlang(trap_exit_flag)
439439

440440
compile_erlang(test_stacktrace)
441+
compile_erlang(small_big_ext)
441442

442443
add_custom_target(erlang_test_modules DEPENDS
443444
add.beam
@@ -851,4 +852,5 @@ add_custom_target(erlang_test_modules DEPENDS
851852
trap_exit_flag.beam
852853

853854
test_stacktrace.beam
855+
small_big_ext.beam
854856
)
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
%
2+
% This file is part of AtomVM.
3+
%
4+
% Copyright 2023 Fred Dushin <[email protected]>
5+
%
6+
% Licensed under the Apache License, Version 2.0 (the "License");
7+
% you may not use this file except in compliance with the License.
8+
% You may obtain a copy of the License at
9+
%
10+
% http://www.apache.org/licenses/LICENSE-2.0
11+
%
12+
% Unless required by applicable law or agreed to in writing, software
13+
% distributed under the License is distributed on an "AS IS" BASIS,
14+
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
% See the License for the specific language governing permissions and
16+
% limitations under the License.
17+
%
18+
% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
19+
%
20+
21+
-module(small_big_ext).
22+
23+
-export([start/0]).
24+
25+
-define(INT64_MAX, 9223372036854775807).
26+
-define(INT64_MIN, -9223372036854775808).
27+
28+
start() ->
29+
true = test_reverse(pow(32), <<131, 110, 5, 0, 0, 0, 0, 0, 1>>),
30+
true = test_reverse(pow(32) + 1, <<131, 110, 5, 0, 1, 0, 0, 0, 1>>),
31+
true = test_reverse(pow(60) + 1, <<131, 110, 8, 0, 1, 0, 0, 0, 0, 0, 0, 16>>),
32+
true = test_reverse(-1 * (pow(60) + 1), <<131, 110, 8, 1, 1, 0, 0, 0, 0, 0, 0, 16>>),
33+
true = test_reverse(?INT64_MAX, <<131, 110, 8, 0, 255, 255, 255, 255, 255, 255, 255, 127>>),
34+
true = test_reverse(?INT64_MIN, <<131, 110, 8, 1, 0, 0, 0, 0, 0, 0, 0, 128>>),
35+
36+
%% we can still decode really small encodings
37+
1 = erlang:binary_to_term(<<131, 110, 1, 0, 1>>),
38+
-1 = erlang:binary_to_term(<<131, 110, 1, 1, 1>>),
39+
-1 = erlang:binary_to_term(<<131, 110, 1, 3, 1>>),
40+
-1 = erlang:binary_to_term(<<131, 110, 1, -1, 1>>),
41+
42+
%% 0-length encodings are legal
43+
0 = erlang:binary_to_term(<<131, 110, 0, 0>>),
44+
0 = erlang:binary_to_term(<<131, 110, 0, 1>>),
45+
46+
%% edge cases around INT32 min/max boundaries
47+
true = test_reverse(pow(31) - 1, <<131, 98, 127, 255, 255, 255>>),
48+
true = test_reverse(pow(31), <<131, 110, 4, 0, 0, 0, 0, 128>>),
49+
true = test_reverse(-pow(31), <<131, 98, 128, 0, 0, 0>>),
50+
true = test_reverse(-pow(31) - 1, <<131, 110, 4, 1, 1, 0, 0, 128>>),
51+
52+
%% missing sign
53+
ok = assert_badarg(
54+
fun() ->
55+
erlang:binary_to_term(<<131, 110, 0>>)
56+
end
57+
),
58+
59+
%% we currently only support up to 64 bit (signed) integers
60+
case erlang:system_info(machine) of
61+
"BEAM" ->
62+
test_reverse(
63+
pow(63) + 1, <<131, 110, 8, 0, 1, 0, 0, 0, 0, 0, 0, 128>>
64+
),
65+
test_reverse(
66+
-(pow(63) + 2), <<131, 110, 8, 1, 2, 0, 0, 0, 0, 0, 0, 128>>
67+
),
68+
test_reverse(
69+
pow(128), <<131, 110, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1>>
70+
);
71+
_ ->
72+
ok = assert_badarg(
73+
fun() ->
74+
erlang:binary_to_term(
75+
<<131, 110, 8, 0, 1, 0, 0, 0, 0, 0, 0, 128>>
76+
)
77+
end
78+
),
79+
ok = assert_badarg(
80+
fun() ->
81+
erlang:binary_to_term(
82+
<<131, 110, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1>>
83+
)
84+
end
85+
),
86+
ok = assert_badarg(
87+
fun() ->
88+
erlang:binary_to_term(
89+
<<131, 110, 8, 1, 2, 0, 0, 0, 0, 0, 0, 128>>
90+
)
91+
end
92+
)
93+
end,
94+
0.
95+
96+
test_reverse(T, Interop) ->
97+
Bin = erlang:term_to_binary(T),
98+
Bin = Interop,
99+
{X, Used} = erlang:binary_to_term(Bin, [used]),
100+
Used = erlang:byte_size(Bin),
101+
X =:= T.
102+
103+
assert_badarg(F) ->
104+
try
105+
R = F(),
106+
{fail_no_ex, R}
107+
catch
108+
error:badarg -> ok
109+
end.
110+
111+
pow(0) ->
112+
1;
113+
pow(X) ->
114+
Y = pow(X - 1),
115+
Y bsl 1.

tests/test.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ struct Test tests[] = {
474474
TEST_CASE_EXPECTED(unlink_error, 1),
475475
TEST_CASE_EXPECTED(trap_exit_flag, 1),
476476
TEST_CASE_COND(test_stacktrace, 0, SKIP_STACKTRACES),
477+
TEST_CASE(small_big_ext),
477478

478479
// TEST CRASHES HERE: TEST_CASE(memlimit),
479480

0 commit comments

Comments
 (0)