Skip to content

Commit f361e78

Browse files
committed
[Tolk] Optimize arg_order in advance when it's safe
This partially "reverts" the behavior of pragma 'compute-asm-ltr' from FunC, which was always on in Tolk. Now, if it's safe, for asm functions with arg_order, arguments are evaluated and placed onto the stack in a desired order. When it's unsafe (the purpose of this pragma, actually), arguments are evaluated left-to-right.
1 parent 76c9f9e commit f361e78

File tree

8 files changed

+248
-21
lines changed

8 files changed

+248
-21
lines changed

tolk-tester/tests/asm-arg-order.tolk

Lines changed: 116 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,67 @@ fun test_new_dot(): (tuple, tuple) {
120120
return (t, t2);
121121
}
122122

123+
@pure
124+
fun asmAPlus1TimesB(a: int, b: int): int
125+
asm(b a) "1 ADDCONST MUL";
126+
127+
@pure
128+
fun int.plus1TimesB(self, b: int): int
129+
asm(b self) "1 ADDCONST MUL";
130+
131+
@pure
132+
fun get2Pure() { return 2; }
133+
@pure
134+
fun get10Pure() { return 10; }
135+
136+
fun get2Impure() { return 2; }
137+
fun get10Impure() { return 10; }
138+
139+
global g2: int;
140+
global g10: int;
141+
142+
fun setG2(v: int) { g2 = v; return v; }
143+
144+
@method_id(27)
145+
fun test27() {
146+
return asmAPlus1TimesB(2, 10);
147+
}
148+
149+
@method_id(28)
150+
fun test28() {
151+
return asmAPlus1TimesB(get2Pure(), get10Pure());
152+
}
153+
154+
@method_id(29)
155+
fun test29() {
156+
return asmAPlus1TimesB(get2Impure(), get10Impure());
157+
}
158+
159+
@method_id(30)
160+
fun test30() {
161+
g2 = 2;
162+
g10 = 10;
163+
return asmAPlus1TimesB(g2, g10);
164+
}
165+
166+
@method_id(31)
167+
fun test31() {
168+
g2 = 2;
169+
g10 = 10;
170+
return asmAPlus1TimesB(g2 += 2, g10 += g2);
171+
}
172+
173+
@method_id(32)
174+
fun test32() {
175+
return 2.plus1TimesB(10);
176+
}
177+
178+
@method_id(33)
179+
fun test33(x: int) {
180+
return ((x += 10).plus1TimesB(2), (x += 20).plus1TimesB(x), ((x /= (g2=2)).plus1TimesB(x*g2)), setG2(7).plus1TimesB(g2));
181+
}
182+
183+
123184
fun main() {
124185
}
125186

@@ -137,6 +198,60 @@ fun main() {
137198
@testcase | 24 | | [ 11 22 33 44 55 ] [ 220 330 440 110 550 ]
138199
@testcase | 25 | | [ 22 33 ] [ 220 330 ]
139200
@testcase | 26 | | [ 11 22 33 ] [ 220 330 110 ]
201+
@testcase | 27 | | 30
202+
@testcase | 28 | | 30
203+
@testcase | 29 | | 30
204+
@testcase | 30 | | 30
205+
@testcase | 31 | | 70
206+
@testcase | 32 | | 30
207+
@testcase | 33 | 0 | 22 930 480 56
208+
209+
@fif_codegen
210+
"""
211+
test27 PROC:<{
212+
10 PUSHINT
213+
2 PUSHINT
214+
1 ADDCONST MUL
215+
}>
216+
"""
217+
218+
@fif_codegen
219+
"""
220+
test28 PROC:<{
221+
get10Pure CALLDICT
222+
get2Pure CALLDICT
223+
1 ADDCONST MUL
224+
}>
225+
"""
226+
227+
@fif_codegen
228+
"""
229+
test29 PROC:<{
230+
get2Impure CALLDICT
231+
get10Impure CALLDICT
232+
SWAP
233+
1 ADDCONST MUL
234+
}>
235+
"""
236+
237+
@fif_codegen
238+
"""
239+
test30 PROC:<{
240+
...
241+
g10 GETGLOB
242+
g2 GETGLOB
243+
1 ADDCONST MUL
244+
}>
245+
"""
246+
247+
@fif_codegen
248+
"""
249+
test32 PROC:<{
250+
10 PUSHINT
251+
2 PUSHINT
252+
1 ADDCONST MUL
253+
}>
254+
"""
140255

141-
@code_hash 93068291567112337250118419287631047120002003622184251973082208096953112184588
256+
@code_hash 78671986831403867804966279036762472603849672357801214378328975900111280733054
142257
*/

tolk-tester/tests/cells-slices.tolk

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,37 @@ fun test111() {
198198
return (op1, q1, op2, q2);
199199
}
200200

201+
global g8: int;
202+
global g10: int;
203+
global g16: int;
204+
global g32: int;
205+
206+
@inline_ref
207+
fun initGlobals() {
208+
g8 = 8;
209+
g10 = 10;
210+
g16 = 16;
211+
g32 = 32;
212+
}
213+
214+
@method_id(116)
215+
fun test16() {
216+
initGlobals();
217+
var b1 = beginCell().storeUint(g8, g16).storeUint(0xFF, g32).storeUint(g8, g16 * 2);
218+
var b2 = beginCell().storeUint(8, 16).storeUint(0xFF, 32).storeUint(8, 16 * 2);
219+
assert(b1.bitsCount() == b2.bitsCount(), 400);
220+
var c1 = b1.endCell().beginParse();
221+
var c2 = b2.endCell().beginParse();
222+
assert(c1.bitsEqual(c2), 400);
223+
assert(c1.loadUint(g16) == g8, 400);
224+
assert(c1.loadUint(g32) == 0xFF, 400);
225+
assert(c1.loadUint(2 * g16) == 8, 400);
226+
return b1;
227+
// 00140008000000ff00000008
228+
// 00140008000000ff00000008
229+
}
230+
231+
201232
fun main(): int {
202233
return 0;
203234
}
@@ -217,21 +248,19 @@ fun main(): int {
217248
@testcase | 114 | -1 | -1 0 -1
218249
@testcase | 114 | 0 | 0 0 0
219250
@testcase | 115 | | 123 456 123 456
251+
@testcase | 116 | | BC{00140008000000ff00000008}
220252

221253
Note, that since 'compute-asm-ltr' became on be default, chaining methods codegen is not quite optimal.
222254
@fif_codegen
223255
"""
224256
test6 PROC:<{
225-
NEWC // '0
226-
1 PUSHINT // '0 '1=1
227-
SWAP // '1=1 '0
228-
32 STU // '0
229-
2 PUSHINT // '0 '4=2
230-
SWAP // '4=2 '0
231-
32 STU // '0
232-
3 PUSHINT // '0 '7=3
233-
SWAP // '7=3 '0
234-
32 STU // '0
257+
3 PUSHINT // '0=3
258+
2 PUSHINT // '0=3 '1=2
259+
1 PUSHINT // '0=3 '1=2 '2=1
260+
NEWC // '0=3 '1=2 '2=1 '3
261+
32 STU // '0=3 '1=2 '3
262+
32 STU // '0=3 '3
263+
32 STU // '3
235264
}>
236265
"""
237266
*/

tolk-tester/tests/constants-tests.tolk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,5 +118,5 @@ fun main() {
118118
@testcase | 104 | | 1 1 2
119119
@testcase | 105 | | -1 0 7 48
120120

121-
@code_hash 80040709432962217077682091261201772251141677197885524779745956896218368868623
121+
@code_hash 49556957179018386976033482229516007597784982050169632168468608374010225644988
122122
*/

tolk-tester/tests/strings-tests.tolk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,5 +78,5 @@ fun test1() {
7878
@testcase | 0 | | 0
7979
@testcase | 101 | | [ 65 66 67 68 ]
8080

81-
@code_hash 38184847030631877916087987911699475358017315230885358090110033079289166112584
81+
@code_hash 55974318379341089957961227475446008591490555692181953973486962465702042912657
8282
*/

tolk/analyzer.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -851,6 +851,10 @@ void Op::set_impure_flag() {
851851
flags |= _Impure;
852852
}
853853

854+
void Op::set_arg_order_already_equals_asm_flag() {
855+
flags |= _ArgOrderAlreadyEqualsAsm;
856+
}
857+
854858
bool Op::mark_noreturn() {
855859
switch (cl) {
856860
case _Nop:

tolk/codegen.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -453,8 +453,10 @@ bool Op::generate_code_step(Stack& stack) {
453453
if (disabled()) {
454454
return true;
455455
}
456-
// f_sym can be nullptr for Op::_CallInd (invoke a variable, not a function)
457-
const std::vector<int>* arg_order = f_sym ? f_sym->get_arg_order() : nullptr;
456+
// f_sym can be nullptr for Op::_CallInd (invoke a variable, not a function);
457+
// if f has arg_order, when it's safe, the compiler evaluates arguments in that order in advance (for fewer stack manipulations);
458+
// when it's unsafe, arguments are evaluated left-to-right, and we need to match asm arg_order here
459+
const std::vector<int>* arg_order = f_sym && !arg_order_already_equals_asm() ? f_sym->get_arg_order() : nullptr;
458460
const std::vector<int>* ret_order = f_sym ? f_sym->get_ret_order() : nullptr;
459461
tolk_assert(!arg_order || arg_order->size() == right.size());
460462
tolk_assert(!ret_order || ret_order->size() == left.size());

tolk/pipe-ast-to-legacy.cpp

Lines changed: 79 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,53 @@ class LValContext {
278278
}
279279
};
280280

281+
// the purpose of this class is having a call `f(a1,a2,...)` when f has asm arg_order, to check
282+
// whether it's safe to rearrange arguments (to evaluate them in arg_order right here for fewer stack manipulations)
283+
// or it's unsafe, and we should evaluate them left-to-right;
284+
// example: `f(1,2,3)` / `b.storeUint(2,32)` is safe
285+
// example: `f(x,x+=5,x)` / `f(impureF1(), global_var)` is unsafe
286+
class CheckReorderingForAsmArgOrderIsSafeVisitor final : public ASTVisitorFunctionBody {
287+
bool has_side_effects = false;
288+
289+
protected:
290+
void visit(V<ast_function_call> v) override {
291+
has_side_effects |= v->fun_maybe == nullptr || !v->fun_maybe->is_marked_as_pure();
292+
parent::visit(v);
293+
}
294+
295+
void visit(V<ast_assign> v) override {
296+
has_side_effects = true;
297+
parent::visit(v);
298+
}
299+
300+
void visit(V<ast_set_assign> v) override {
301+
has_side_effects = true;
302+
parent::visit(v);
303+
}
304+
305+
public:
306+
bool should_visit_function(FunctionPtr fun_ref) override {
307+
tolk_assert(false);
308+
}
309+
310+
static bool is_safe_to_reorder(V<ast_function_call> v) {
311+
for (const LocalVarData& param : v->fun_maybe->parameters) {
312+
if (param.declared_type->get_width_on_stack() != 1) {
313+
return false;
314+
}
315+
}
316+
317+
CheckReorderingForAsmArgOrderIsSafeVisitor visitor;
318+
for (int i = 0; i < v->get_num_args(); ++i) {
319+
visitor.ASTVisitorFunctionBody::visit(v->get_arg(i)->get_expr());
320+
}
321+
if (v->dot_obj_is_self) {
322+
visitor.ASTVisitorFunctionBody::visit(v->get_self_obj());
323+
}
324+
return !visitor.has_side_effects;
325+
}
326+
};
327+
281328
// given `{some_expr}!`, return some_expr
282329
static AnyExprV unwrap_not_null_operator(AnyExprV v) {
283330
while (auto v_notnull = v->try_as<ast_not_null_operator>()) {
@@ -469,12 +516,16 @@ static std::vector<var_idx_t> pre_compile_is_type(CodeBlob& code, TypePtr expr_t
469516
}
470517

471518
static std::vector<var_idx_t> gen_op_call(CodeBlob& code, TypePtr ret_type, SrcLocation loc,
472-
std::vector<var_idx_t>&& args_vars, FunctionPtr fun_ref, const char* debug_desc) {
519+
std::vector<var_idx_t>&& args_vars, FunctionPtr fun_ref, const char* debug_desc,
520+
bool arg_order_already_equals_asm = false) {
473521
std::vector<var_idx_t> rvect = code.create_tmp_var(ret_type, loc, debug_desc);
474522
Op& op = code.emplace_back(loc, Op::_Call, rvect, std::move(args_vars), fun_ref);
475523
if (!fun_ref->is_marked_as_pure()) {
476524
op.set_impure_flag();
477525
}
526+
if (arg_order_already_equals_asm) {
527+
op.set_arg_order_already_equals_asm_flag();
528+
}
478529
return rvect;
479530
}
480531

@@ -1234,9 +1285,31 @@ static std::vector<var_idx_t> process_function_call(V<ast_function_call> v, Code
12341285
for (int i = 0; i < v->get_num_args(); ++i) {
12351286
args.push_back(v->get_arg(i)->get_expr());
12361287
}
1288+
12371289
// the purpose of tensor_tt ("tensor target type") is to transition `null` to `(int, int)?` and so on
12381290
// the purpose of calling `pre_compile_tensor_inner` is to have 0-th IR vars to handle return self
12391291
std::vector<TypePtr> params_types = fun_ref->inferred_full_type->try_as<TypeDataFunCallable>()->params_types;
1292+
1293+
// if fun_ref has asm arg_order, maybe it's safe to swap arguments here (to put them onto a stack in the right way);
1294+
// (if it's not safe, arguments are evaluated left-to-right, involving stack transformations later)
1295+
bool arg_order_already_equals_asm = false;
1296+
int asm_self_idx = 0;
1297+
if (!fun_ref->arg_order.empty() && CheckReorderingForAsmArgOrderIsSafeVisitor::is_safe_to_reorder(v)) {
1298+
std::vector<AnyExprV> new_args(args.size());
1299+
std::vector<TypePtr> new_params_types(params_types.size());
1300+
for (int i = 0; i < static_cast<int>(fun_ref->arg_order.size()); ++i) {
1301+
int real_i = fun_ref->arg_order[i];
1302+
new_args[i] = args[real_i];
1303+
new_params_types[i] = params_types[real_i];
1304+
if (real_i == 0) {
1305+
asm_self_idx = i;
1306+
}
1307+
}
1308+
args = std::move(new_args);
1309+
params_types = std::move(new_params_types);
1310+
arg_order_already_equals_asm = true;
1311+
}
1312+
12401313
const TypeDataTensor* tensor_tt = TypeDataTensor::create(std::move(params_types))->try_as<TypeDataTensor>();
12411314
std::vector<std::vector<var_idx_t>> vars_per_arg = pre_compile_tensor_inner(code, args, tensor_tt, nullptr);
12421315

@@ -1263,20 +1336,21 @@ static std::vector<var_idx_t> process_function_call(V<ast_function_call> v, Code
12631336
for (const std::vector<var_idx_t>& list : vars_per_arg) {
12641337
args_vars.insert(args_vars.end(), list.cbegin(), list.cend());
12651338
}
1266-
std::vector<var_idx_t> rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), fun_ref, "(fun-call)");
1339+
std::vector<var_idx_t> rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), fun_ref, "(fun-call)", arg_order_already_equals_asm);
12671340

12681341
if (fun_ref->has_mutate_params()) {
12691342
LValContext local_lval;
12701343
std::vector<var_idx_t> left;
12711344
for (int i = 0; i < delta_self + v->get_num_args(); ++i) {
1345+
int real_i = arg_order_already_equals_asm ? i == 0 && delta_self ? asm_self_idx : fun_ref->arg_order[i - delta_self] : i;
12721346
if (fun_ref->parameters[i].is_mutate_parameter()) {
1273-
AnyExprV arg_i = obj_leftmost && i == 0 ? obj_leftmost : args[i];
1347+
AnyExprV arg_i = obj_leftmost && i == 0 ? obj_leftmost : args[real_i];
12741348
tolk_assert(arg_i->is_lvalue || i == 0);
12751349
if (arg_i->is_lvalue) {
12761350
std::vector<var_idx_t> ith_var_idx = pre_compile_expr(arg_i, code, nullptr, &local_lval);
12771351
left.insert(left.end(), ith_var_idx.begin(), ith_var_idx.end());
12781352
} else {
1279-
left.insert(left.end(), vars_per_arg[0].begin(), vars_per_arg[0].end());
1353+
left.insert(left.end(), vars_per_arg[asm_self_idx].begin(), vars_per_arg[asm_self_idx].end());
12801354
}
12811355
}
12821356
}
@@ -1292,7 +1366,7 @@ static std::vector<var_idx_t> process_function_call(V<ast_function_call> v, Code
12921366
if (obj_leftmost->is_lvalue) { // to handle if obj is global var, potentially re-assigned inside a chain
12931367
rvect_apply = pre_compile_expr(obj_leftmost, code, nullptr);
12941368
} else { // temporary object, not lvalue, pre_compile_expr
1295-
rvect_apply = vars_per_arg[0];
1369+
rvect_apply = vars_per_arg[asm_self_idx];
12961370
}
12971371
}
12981372

tolk/tolk.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ struct Op {
287287
_SliceConst,
288288
};
289289
OpKind cl;
290-
enum { _Disabled = 1, _NoReturn = 4, _Impure = 24 };
290+
enum { _Disabled = 1, _NoReturn = 2, _Impure = 4, _ArgOrderAlreadyEqualsAsm = 8 };
291291
int flags;
292292
std::unique_ptr<Op> next;
293293
FunctionPtr f_sym = nullptr;
@@ -347,6 +347,9 @@ struct Op {
347347
bool impure() const { return flags & _Impure; }
348348
void set_impure_flag();
349349

350+
bool arg_order_already_equals_asm() const { return flags & _ArgOrderAlreadyEqualsAsm; }
351+
void set_arg_order_already_equals_asm_flag();
352+
350353
void show(std::ostream& os, const std::vector<TmpVar>& vars, std::string pfx = "", int mode = 0) const;
351354
void show_var_list(std::ostream& os, const std::vector<var_idx_t>& idx_list, const std::vector<TmpVar>& vars) const;
352355
void show_var_list(std::ostream& os, const std::vector<VarDescr>& list, const std::vector<TmpVar>& vars) const;

0 commit comments

Comments
 (0)