Skip to content

Commit f0e6470

Browse files
committed
[Tolk] Rewrite lexer, spaces are not mandatory anymore
A new lexer is noticeably faster and memory efficient (although splitting a file to tokens is negligible in a whole pipeline). But the purpose of rewriting lexer was not just to speed up, but to allow writing code without spaces: `2+2` is now 4, not a valid identifier as earlier. The variety of symbols allowed in identifier has greatly reduced and is now similar to other languages. SrcLocation became 8 bytes on stack everywhere. Command-line flags were also reworked: - the input for Tolk compiler is only a single file now, it's parsed, and parsing continues while new #include are resolved - flags like -A -P and so on are no more needed, actually
1 parent 0bcc0b3 commit f0e6470

26 files changed

+2022
-2109
lines changed

crypto/fift/utils.cpp

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -211,20 +211,39 @@ td::Result<fift::SourceLookup> create_mem_source_lookup(std::string main, std::s
211211
fift_dir);
212212
}
213213

214-
td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code, std::string fift_dir, bool is_raw) {
214+
td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code) {
215215
std::stringstream ss;
216216
std::string sb;
217217
sb.reserve(asm_code.size() + 100);
218-
sb.append("\"Asm.fif\" include\n ");
219-
sb.append(is_raw ? "<{" : "");
218+
sb.append("\"Asm.fif\" include\n <{\n");
220219
sb.append(asm_code.data(), asm_code.size());
221-
sb.append(is_raw ? "}>c" : "");
222-
sb.append(" boc>B \"res\" B>file");
220+
sb.append("\n}>c boc>B \"res\" B>file");
223221

224-
TRY_RESULT(source_lookup, create_source_lookup(std::move(sb), true, true, true, false, false, false, false, fift_dir));
222+
TRY_RESULT(source_lookup, create_source_lookup(std::move(sb), true, true, true, false, false, false, false));
225223
TRY_RESULT(res, run_fift(std::move(source_lookup), &ss));
226224
TRY_RESULT(boc, res.read_file("res"));
227225
return vm::std_boc_deserialize(std::move(boc.data));
228226
}
229227

228+
td::Result<CompiledProgramOutput> compile_asm_program(std::string&& program_code, const std::string& fift_dir) {
229+
std::string main_fif;
230+
main_fif.reserve(program_code.size() + 100);
231+
main_fif.append(program_code.data(), program_code.size());
232+
main_fif.append(R"( dup hashB B>X $>B "hex" B>file)"); // write codeHashHex to a file
233+
main_fif.append(R"( boc>B B>base64 $>B "boc" B>file)"); // write codeBoc64 to a file
234+
235+
std::stringstream fift_output_stream;
236+
TRY_RESULT(source_lookup, create_source_lookup(std::move(main_fif), true, true, false, false, false, false, false, fift_dir));
237+
TRY_RESULT(res, run_fift(std::move(source_lookup), &fift_output_stream));
238+
239+
TRY_RESULT(boc, res.read_file("boc"));
240+
TRY_RESULT(hex, res.read_file("hex"));
241+
242+
return CompiledProgramOutput{
243+
std::move(program_code),
244+
std::move(boc.data),
245+
std::move(hex.data),
246+
};
247+
}
248+
230249
} // namespace fift

crypto/fift/utils.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,21 @@ struct FiftOutput {
2626
SourceLookup source_lookup;
2727
std::string output;
2828
};
29+
30+
// given a valid Fift code PROGRAM{ ... }END>c, compile_asm_program() returns this output
31+
// now it's used primarily for wasm output (see tolk-js, for example)
32+
struct CompiledProgramOutput {
33+
std::string fiftCode;
34+
std::string codeBoc64;
35+
std::string codeHashHex;
36+
};
37+
2938
td::Result<fift::SourceLookup> create_mem_source_lookup(std::string main, std::string fift_dir = "",
3039
bool need_preamble = true, bool need_asm = true,
3140
bool need_ton_util = true, bool need_lisp = true,
3241
bool need_w3_code = true);
3342
td::Result<FiftOutput> mem_run_fift(std::string source, std::vector<std::string> args = {}, std::string fift_dir = "");
3443
td::Result<FiftOutput> mem_run_fift(SourceLookup source_lookup, std::vector<std::string> args);
35-
td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code, std::string fift_dir = "", bool is_raw = true);
44+
td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code);
45+
td::Result<CompiledProgramOutput> compile_asm_program(std::string&& program_code, const std::string& fift_dir);
3646
} // namespace fift

crypto/funcfiftlib/funcfiftlib.cpp

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@
3737

3838
td::Result<std::string> compile_internal(char *config_json) {
3939
TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json)))
40-
auto &obj = input_json.get_object();
40+
td::JsonObject& config = input_json.get_object();
4141

42-
TRY_RESULT(opt_level, td::get_json_object_int_field(obj, "optLevel", false));
43-
TRY_RESULT(sources_obj, td::get_json_object_field(obj, "sources", td::JsonValue::Type::Array, false));
42+
TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optLevel", false));
43+
TRY_RESULT(sources_obj, td::get_json_object_field(config, "sources", td::JsonValue::Type::Array, false));
4444

4545
auto &sources_arr = sources_obj.get_array();
4646

@@ -52,29 +52,25 @@ td::Result<std::string> compile_internal(char *config_json) {
5252

5353
funC::opt_level = std::max(0, opt_level);
5454
funC::program_envelope = true;
55+
funC::asm_preamble = true;
5556
funC::verbosity = 0;
5657
funC::indent = 1;
5758

5859
std::ostringstream outs, errs;
59-
auto compile_res = funC::func_proceed(sources, outs, errs);
60-
61-
if (compile_res != 0) {
62-
return td::Status::Error(std::string("Func compilation error: ") + errs.str());
60+
int funC_res = funC::func_proceed(sources, outs, errs);
61+
if (funC_res != 0) {
62+
return td::Status::Error("FunC compilation error: " + errs.str());
6363
}
6464

65-
TRY_RESULT(code_cell, fift::compile_asm(outs.str(), "/fiftlib/", false));
66-
TRY_RESULT(boc, vm::std_boc_serialize(code_cell));
65+
TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/"));
6766

6867
td::JsonBuilder result_json;
69-
auto result_obj = result_json.enter_object();
70-
result_obj("status", "ok");
71-
result_obj("codeBoc", td::base64_encode(boc));
72-
result_obj("fiftCode", outs.str());
73-
result_obj("codeHashHex", code_cell->get_hash().to_hex());
74-
result_obj.leave();
75-
76-
outs.clear();
77-
errs.clear();
68+
auto obj = result_json.enter_object();
69+
obj("status", "ok");
70+
obj("fiftCode", std::move(fift_res.fiftCode));
71+
obj("codeBoc", std::move(fift_res.codeBoc64));
72+
obj("codeHashHex", std::move(fift_res.codeHashHex));
73+
obj.leave();
7874

7975
return result_json.string_builder().as_cslice().str();
8076
}

crypto/smartcont/mathlib.tolk

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -572,9 +572,9 @@ int atanh_f261(int x, int n) inline_ref {
572572
s -= 1;
573573
}
574574
x += t;
575-
int 2x = 2 * x;
576-
int y = lshift256divr(2x, (x >> 1) - t);
577-
;; y = 2x - (mulrshiftr256(2x, y) ~>> 2); ;; this line could improve precision on very rare occasions
575+
int `2x` = 2 * x;
576+
int y = lshift256divr(`2x`, (x >> 1) - t);
577+
;; y = `2x` - (mulrshiftr256(2x, y) ~>> 2); ;; this line could improve precision on very rare occasions
578578
return (atanh_f258(y, 36), s);
579579
}
580580

tolk/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
22

33
set(TOLK_SOURCE
4-
srcread.cpp
4+
src-file.cpp
55
lexer.cpp
66
symtable.cpp
7-
keywords.cpp
87
unify-types.cpp
98
parse-tolk.cpp
109
abscode.cpp

tolk/abscode.cpp

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,29 +24,19 @@ namespace tolk {
2424
*
2525
*/
2626

27-
TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, const SrcLocation* loc)
28-
: v_type(_type), idx(_idx), cls(_cls), coord(0) {
27+
TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, SrcLocation loc)
28+
: v_type(_type), idx(_idx), cls(_cls), coord(0), where(loc) {
2929
if (sym) {
3030
name = sym->sym_idx;
3131
sym->value->idx = _idx;
3232
}
33-
if (loc) {
34-
where = std::make_unique<SrcLocation>(*loc);
35-
}
3633
if (!_type) {
3734
v_type = TypeExpr::new_hole();
3835
}
39-
if (cls == _Named) {
40-
undefined = true;
41-
}
4236
}
4337

44-
void TmpVar::set_location(const SrcLocation& loc) {
45-
if (where) {
46-
*where = loc;
47-
} else {
48-
where = std::make_unique<SrcLocation>(loc);
49-
}
38+
void TmpVar::set_location(SrcLocation loc) {
39+
where = loc;
5040
}
5141

5242
void TmpVar::dump(std::ostream& os) const {
@@ -469,10 +459,10 @@ void CodeBlob::print(std::ostream& os, int flags) const {
469459
if ((flags & 8) != 0) {
470460
for (const auto& var : vars) {
471461
var.dump(os);
472-
if (var.where && (flags & 1) != 0) {
473-
var.where->show(os);
462+
if (var.where.is_defined() && (flags & 1) != 0) {
463+
var.where.show(os);
474464
os << " defined here:\n";
475-
var.where->show_context(os);
465+
var.where.show_context(os);
476466
}
477467
}
478468
}
@@ -483,7 +473,7 @@ void CodeBlob::print(std::ostream& os, int flags) const {
483473
os << "-------- END ---------\n\n";
484474
}
485475

486-
var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, const SrcLocation* location) {
476+
var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, SrcLocation location) {
487477
vars.emplace_back(var_cnt, cls, var_type, sym, location);
488478
if (sym) {
489479
sym->value->idx = var_cnt;
@@ -501,7 +491,7 @@ bool CodeBlob::import_params(FormalArgList arg_list) {
501491
SymDef* arg_sym;
502492
SrcLocation arg_loc;
503493
std::tie(arg_type, arg_sym, arg_loc) = par;
504-
list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, &arg_loc));
494+
list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, arg_loc));
505495
}
506496
emplace_back(loc, Op::_Import, list);
507497
in_var_cnt = var_cnt;

tolk/analyzer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ int CodeBlob::split_vars(bool strict) {
3636
for (int j = 0; j < var_cnt; j++) {
3737
TmpVar& var = vars[j];
3838
if (strict && var.v_type->minw != var.v_type->maxw) {
39-
throw ParseError{var.where.get(), "variable does not have fixed width, cannot manipulate it"};
39+
throw ParseError{var.where, "variable does not have fixed width, cannot manipulate it"};
4040
}
4141
std::vector<TypeExpr*> comp_types;
4242
int k = var.v_type->extract_components(comp_types);
@@ -45,7 +45,7 @@ int CodeBlob::split_vars(bool strict) {
4545
if (k != 1) {
4646
var.coord = ~((n << 8) + k);
4747
for (int i = 0; i < k; i++) {
48-
auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where.get());
48+
auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where);
4949
tolk_assert(v == n + i);
5050
tolk_assert(vars[v].idx == v);
5151
vars[v].name = vars[j].name;
@@ -54,7 +54,7 @@ int CodeBlob::split_vars(bool strict) {
5454
n += k;
5555
++changes;
5656
} else if (strict && var.v_type->minw != 1) {
57-
throw ParseError{var.where.get(),
57+
throw ParseError{var.where,
5858
"cannot work with variable or variable component of width greater than one"};
5959
}
6060
}

0 commit comments

Comments
 (0)