Skip to content

Commit 8194d1f

Browse files
committed
[Tolk] Support binary number literals 0b1100
Along with hex 0x, 0b is also parsed. Example: 0b1010101010101010 == 43690.
1 parent 0439613 commit 8194d1f

File tree

3 files changed

+84
-14
lines changed

3 files changed

+84
-14
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
const x1 = 0xFF;
2+
const x2 = 0xAbcdEF01234561AC;
3+
const x3 = 0x00BEC;
4+
const x4 = -0xD70B;
5+
6+
const d1 = 255;
7+
const d2 = 12379813738877116844;
8+
const d3 = 003052;
9+
const d4 = -55051;
10+
11+
const b1 = 0b11111111;
12+
const b2 = 0b1010101111001101111011110000000100100011010001010110000110101100;
13+
const b3 = 0b00101111101100;
14+
const b4 = -0b1101011100001011;
15+
16+
fun main() {
17+
assert(x1 == d1 && x1 == b1, 100);
18+
assert(x2 == d2 && x2 == b2, 100);
19+
assert(x3 == d3 && x3 == b3, 100);
20+
assert(x4 == d4 && x4 == b4, 100);
21+
assert(x1 + x2 + x3 == b1 + b2 + b3, 100);
22+
assert(-x2 == -d2 && -x2 == -b2, 100);
23+
24+
return (
25+
0b1010101010101010+0b00 == 43690,
26+
0b0 == 0,
27+
-0b1010100001010101010-0b1+0b1 == -344746,
28+
0b00001 == 1,
29+
--0b00101111101100 == 3052
30+
);
31+
}
32+
33+
/**
34+
@testcase | 0 | | -1 -1 -1 -1 -1
35+
*/

tolk/ast-from-tokens.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,25 @@ static AnyExprV maybe_replace_eq_null_with_isNull_check(V<ast_binary_operator> v
123123
return createV<ast_is_null_check>(v->loc, v_nullable, v->tok == tok_neq);
124124
}
125125

126+
// parse `123` / `0xFF` / `0b10001` to td::RefInt256
127+
static td::RefInt256 parse_tok_int_const(std::string_view text) {
128+
bool bin = text[0] == '0' && text[1] == 'b';
129+
if (!bin) {
130+
// this function parses decimal and hex numbers
131+
return td::string_to_int256(static_cast<std::string>(text));
132+
}
133+
// parse a binary number; to make it simpler, don't allow too long numbers, it's impractical
134+
if (text.size() > 64 + 2) {
135+
return {};
136+
}
137+
uint64_t result = 0;
138+
for (char c : text.substr(2)) { // skip "0b"
139+
result = (result << 1) | static_cast<uint64_t>(c - '0');
140+
}
141+
return td::make_refint(result);
142+
}
143+
144+
126145

127146
/*
128147
*
@@ -313,7 +332,7 @@ static AnyExprV parse_expr100(Lexer& lex) {
313332
}
314333
case tok_int_const: {
315334
std::string_view orig_str = lex.cur_str();
316-
td::RefInt256 intval = td::string_to_int256(static_cast<std::string>(orig_str));
335+
td::RefInt256 intval = parse_tok_int_const(orig_str);
317336
if (intval.is_null() || !intval->signed_fits_bits(257)) {
318337
lex.error("invalid integer constant");
319338
}

tolk/lexer.cpp

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -238,27 +238,43 @@ struct ChunkAnnotation final : ChunkLexerBase {
238238

239239
// A number, may be a hex one.
240240
struct ChunkNumber final : ChunkLexerBase {
241-
bool parse(Lexer* lex) const override {
241+
static bool parse_hex_or_bin(Lexer* lex, bool bin) {
242242
const char* str_begin = lex->c_str();
243-
bool hex = false;
244-
if (lex->char_at() == '0' && lex->char_at(1) == 'x') {
245-
lex->skip_chars(2);
246-
hex = true;
247-
}
243+
lex->skip_chars(2); // 0x / 0b
248244
if (lex->is_eof()) {
249245
return false;
250246
}
247+
251248
while (!lex->is_eof()) {
252249
char c = lex->char_at();
253-
if (c >= '0' && c <= '9') {
254-
lex->skip_chars(1);
255-
continue;
256-
}
257-
if (!hex) {
250+
bool ok = bin
251+
? c == '0' || c == '1'
252+
: (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f');
253+
if (!ok) {
258254
break;
259255
}
260-
c |= 0x20;
261-
if (c < 'a' || c > 'f') {
256+
lex->skip_chars(1);
257+
}
258+
259+
std::string_view str_val(str_begin, lex->c_str() - str_begin);
260+
lex->add_token(tok_int_const, str_val);
261+
return true;
262+
}
263+
264+
bool parse(Lexer* lex) const override {
265+
if (lex->char_at() == '0') {
266+
if (lex->char_at(1) == 'x') {
267+
return parse_hex_or_bin(lex, false);
268+
}
269+
if (lex->char_at(1) == 'b') {
270+
return parse_hex_or_bin(lex, true);
271+
}
272+
}
273+
274+
const char* str_begin = lex->c_str();
275+
while (!lex->is_eof()) {
276+
char c = lex->char_at();
277+
if (c < '0' || c > '9') {
262278
break;
263279
}
264280
lex->skip_chars(1);

0 commit comments

Comments
 (0)