Skip to content

Commit 6a39f8a

Browse files
committed
Merge pull request atomvm#1841 from pguyot/w40/jit-optimize-verify-is-function
JIT: optimize `verify_is_function` using types These changes are made under both the "Apache 2.0" and the "GNU Lesser General Public License 2.1 or later" license terms (dual license). SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
2 parents f58ec7b + d45ea61 commit 6a39f8a

File tree

2 files changed

+108
-16
lines changed

2 files changed

+108
-16
lines changed

libs/jit/src/jit.erl

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -949,13 +949,12 @@ first_pass(<<?OP_CALL_FUN, Rest0/binary>>, MMod, MSt0, State0) ->
949949
?TRACE("OP_CALL_FUN ~p\n", [ArgsCount]),
950950
MSt1 = MMod:decrement_reductions_and_maybe_schedule_next(MSt0),
951951
{MSt2, FuncReg} = read_any_xreg(ArgsCount, MMod, MSt1),
952-
{MSt3, Reg} = MMod:move_to_native_register(MSt2, FuncReg),
953-
MSt4 = verify_is_function(Reg, MMod, MSt3),
954-
MSt5 = MMod:call_primitive_with_cp(MSt4, ?PRIM_CALL_FUN, [
955-
ctx, jit_state, offset, Reg, ArgsCount
952+
{MSt3, Reg} = verify_is_function(FuncReg, MMod, MSt2),
953+
MSt4 = MMod:call_primitive_with_cp(MSt3, ?PRIM_CALL_FUN, [
954+
ctx, jit_state, offset, {free, Reg}, ArgsCount
956955
]),
957-
?ASSERT_ALL_NATIVE_FREE(MSt5),
958-
first_pass(Rest1, MMod, MSt5, State0);
956+
?ASSERT_ALL_NATIVE_FREE(MSt4),
957+
first_pass(Rest1, MMod, MSt4, State0);
959958
% 77
960959
first_pass(<<?OP_IS_FUNCTION, Rest0/binary>>, MMod, MSt0, State0) ->
961960
?ASSERT_ALL_NATIVE_FREE(MSt0),
@@ -2322,18 +2321,17 @@ first_pass(<<?OP_CALL_FUN2, Rest0/binary>>, MMod, MSt0, State0) ->
23222321
?ASSERT_ALL_NATIVE_FREE(MSt0),
23232322
{MSt1, Tag, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
23242323
{ArgsCount, Rest2} = decode_literal(Rest1),
2325-
{MSt2, Fun, Rest3} = decode_compact_term(Rest2, MMod, MSt1, State0),
2324+
{MSt2, Fun, Rest3} = decode_typed_compact_term(Rest2, MMod, MSt1, State0),
23262325
?TRACE("OP_CALL_FUN2 ~p, ~p, ~p\n", [Tag, ArgsCount, Fun]),
23272326
% We ignore Tag (could be literal 0 or atom unsafe)
23282327
MSt3 = MMod:free_native_registers(MSt2, [Tag]),
23292328
MSt4 = MMod:decrement_reductions_and_maybe_schedule_next(MSt3),
2330-
{MSt5, Reg} = MMod:move_to_native_register(MSt4, Fun),
2331-
MSt6 = verify_is_function(Reg, MMod, MSt5),
2332-
MSt7 = MMod:call_primitive_with_cp(MSt6, ?PRIM_CALL_FUN, [
2333-
ctx, jit_state, offset, Reg, ArgsCount
2329+
{MSt5, Reg} = verify_is_function(Fun, MMod, MSt4),
2330+
MSt6 = MMod:call_primitive_with_cp(MSt5, ?PRIM_CALL_FUN, [
2331+
ctx, jit_state, offset, {free, Reg}, ArgsCount
23342332
]),
2335-
?ASSERT_ALL_NATIVE_FREE(MSt7),
2336-
first_pass(Rest3, MMod, MSt7, State0);
2333+
?ASSERT_ALL_NATIVE_FREE(MSt6),
2334+
first_pass(Rest3, MMod, MSt6, State0);
23372335
% 180
23382336
first_pass(<<?OP_BADRECORD, Rest0/binary>>, MMod, MSt0, State0) ->
23392337
?ASSERT_ALL_NATIVE_FREE(MSt0),
@@ -3016,8 +3014,18 @@ term_is_boxed_with_tag_and_get_ptr(Label, Arg1, BoxedTag, MMod, MSt1) ->
30163014
%% @param MSt0 backend state
30173015
%% @return new backend state
30183016
%%-----------------------------------------------------------------------------
3019-
verify_is_function(Arg, MMod, MSt0) ->
3020-
{MSt1, Reg} = MMod:copy_to_native_register(MSt0, Arg),
3017+
verify_is_function({typed, Func, t_fun}, MMod, MSt0) ->
3018+
MMod:move_to_native_register(MSt0, Func);
3019+
verify_is_function({typed, Func, any}, MMod, MSt0) ->
3020+
verify_is_function(Func, MMod, MSt0);
3021+
verify_is_function({typed, Func, _Other}, MMod, MSt0) ->
3022+
{MSt1, Reg} = MMod:move_to_native_register(MSt0, Func),
3023+
MSt2 = MMod:call_primitive_last(MSt1, ?PRIM_RAISE_ERROR_TUPLE, [
3024+
ctx, jit_state, offset, ?BADFUN_ATOM, Reg
3025+
]),
3026+
{MSt2, Reg};
3027+
verify_is_function(Func, MMod, MSt0) ->
3028+
{MSt1, Reg} = MMod:copy_to_native_register(MSt0, Func),
30213029
MSt2 = MMod:if_block(MSt1, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) ->
30223030
MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [
30233031
ctx, jit_state, offset, ?BADFUN_ATOM, Reg
@@ -3030,7 +3038,8 @@ verify_is_function(Arg, MMod, MSt0) ->
30303038
ctx, jit_state, offset, ?BADFUN_ATOM, Reg
30313039
])
30323040
end),
3033-
MMod:free_native_registers(MSt5, [Reg]).
3041+
MSt6 = MMod:free_native_registers(MSt5, [Reg]),
3042+
MMod:move_to_native_register(MSt6, Func).
30343043

30353044
verify_is_binary_or_match_state(Label, Src, MMod, MSt0) ->
30363045
{MSt1, Reg} = MMod:copy_to_native_register(MSt0, Src),

tests/libs/jit/jit_tests.erl

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,20 @@
4848
<<0, 0, 0, 3, 0, 0, 0, 3, 15, 255, 0, 2, 0, 32>>
4949
).
5050

51+
% Code chunk with typed register from test_call_simple.erl
52+
% Contains call_fun2 opcode with typed register that uses verify_is_function optimization
53+
-define(CODE_CHUNK_2,
54+
<<0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 178, 0, 0, 0, 3, 0, 0, 0, 1, 1, 16, 153, 16, 2, 18, 34, 32,
55+
1, 32, 77, 21, 19, 12, 0, 32, 153, 32, 178, 50, 16, 87, 19, 16, 18, 0, 19, 3>>
56+
).
57+
-define(ATU8_CHUNK_2,
58+
<<255, 255, 255, 253, 8, 16, 116, 101, 115, 116, 95, 99, 97, 108, 108, 95, 115, 105, 109, 112,
59+
108, 101, 144, 116, 101, 115, 116, 95, 99, 97, 108, 108, 96, 117, 110, 115, 97, 102, 101>>
60+
).
61+
-define(TYPE_CHUNK_2,
62+
<<0, 0, 0, 3, 0, 0, 0, 2, 15, 255, 0, 16>>
63+
).
64+
5165
compile_minimal_x86_64_test() ->
5266
Stream0 = jit_stream_binary:new(0),
5367
<<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_0,
@@ -167,3 +181,72 @@ term_to_int_verify_is_match_state_typed_optimization_x86_64_test() ->
167181
),
168182

169183
ok.
184+
185+
verify_is_function_typed_optimization_x86_64_test() ->
186+
% Compile CODE_CHUNK_1 which contains a typed register for term_to_int optimization
187+
Stream0 = jit_stream_binary:new(0),
188+
<<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_2,
189+
Stream1 = jit_stream_binary:append(
190+
Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC)
191+
),
192+
Stream2 = jit_x86_64:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1),
193+
194+
AtomResolver = jit_precompile:atom_resolver(?ATU8_CHUNK_2),
195+
LiteralResolver = fun(_) -> test_literal end,
196+
TypeResolver = jit_precompile:type_resolver(?TYPE_CHUNK_2),
197+
198+
% Compile with typed register support
199+
{_LabelsCount, Stream3} = jit:compile(
200+
?CODE_CHUNK_2, AtomResolver, LiteralResolver, TypeResolver, jit_x86_64, Stream2
201+
),
202+
CompiledCode = jit_x86_64:stream(Stream3),
203+
204+
% Check that call to allocate is directly followed by the building the cp
205+
% for call
206+
% b6: 48 8b 42 10 mov 0x10(%rdx),%rax
207+
% ba: ff e0 jmpq *%rax
208+
% bc: 48 8b 47 38 mov 0x38(%rdi),%rax
209+
% c0: 4c 8b 1e mov (%rsi),%r11
210+
% c3: 45 8b 1b mov (%r11),%r11d
211+
% c6: 49 c1 e3 18 shl $0x18,%r11
212+
% ...
213+
214+
% As opposed to:
215+
% b6: 48 8b 42 10 mov 0x10(%rdx),%rax
216+
% ba: ff e0 jmpq *%rax
217+
% bc: 48 8b 47 38 mov 0x38(%rdi),%rax
218+
% c0: 49 89 c3 mov %rax,%r11
219+
% c3: 4d 89 da mov %r11,%r10
220+
% c6: 41 80 e2 03 and $0x3,%r10b
221+
% ca: 41 80 fa 02 cmp $0x2,%r10b
222+
% ce: 74 1a je 0xea
223+
% d0: 48 8b 82 98 00 00 00 mov 0x98(%rdx),%rax
224+
% d7: 48 c7 c2 d7 00 00 00 mov $0xd7,%rdx
225+
% de: 48 c7 c1 8b 01 00 00 mov $0x18b,%rcx
226+
% e5: 4d 89 d8 mov %r11,%r8
227+
% e8: ff e0 jmpq *%rax
228+
% ea: 49 83 e3 fc and $0xfffffffffffffffc,%r11
229+
% ee: 4d 8b 1b mov (%r11),%r11
230+
% f1: 4d 89 da mov %r11,%r10
231+
% f4: 41 80 e2 3f and $0x3f,%r10b
232+
% f8: 41 80 fa 14 cmp $0x14,%r10b
233+
% fc: 74 1a je 0x118
234+
% fe: 48 8b 82 98 00 00 00 mov 0x98(%rdx),%rax
235+
% 105: 48 c7 c2 05 01 00 00 mov $0x105,%rdx
236+
% 10c: 48 c7 c1 8b 01 00 00 mov $0x18b,%rcx
237+
% 113: 4d 89 d8 mov %r11,%r8
238+
% 116: ff e0 jmpq *%rax
239+
% 118: 4c 8b 1e mov (%rsi),%r11
240+
% 11b: 45 8b 1b mov (%r11),%r11d
241+
% 11e: 49 c1 e3 18 shl $0x18,%r11
242+
% ...
243+
244+
?assertMatch(
245+
{_, 20},
246+
binary:match(
247+
CompiledCode,
248+
<<16#48, 16#8b, 16#42, 16#10, 16#ff, 16#e0, 16#48, 16#8b, 16#47, 16#38, 16#4c, 16#8b,
249+
16#1e, 16#45, 16#8b, 16#1b, 16#49, 16#c1, 16#e3, 16#18>>
250+
)
251+
),
252+
ok.

0 commit comments

Comments
 (0)