Skip to content

Commit 6b06240

Browse files
committed
Implement support for private_append
Add a new `term_reuse_binary` to reuse a refc binary, taking advantage of private_append compiler optimization. Add handling of out of memory errors in term_alloc_refc_binary by raising out of memory error instead of aborting. Update `and_/3` signature in JIT backends to handle and with a new register, and performed few optimizations accordingly by removing unnecessary copy. Signed-off-by: Paul Guyot <[email protected]>
1 parent 7933b82 commit 6b06240

File tree

14 files changed

+393
-160
lines changed

14 files changed

+393
-160
lines changed

libs/jit/src/jit.erl

Lines changed: 173 additions & 113 deletions
Large diffs are not rendered by default.

libs/jit/src/jit_aarch64.erl

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -933,7 +933,7 @@ if_block_cond(
933933
) when ?IS_GPR(Reg) ->
934934
% AND with mask
935935
OffsetBefore = StreamModule:offset(Stream0),
936-
State1 = and_(State0, Reg, Mask),
936+
{State1, Reg} = and_(State0, RegTuple, Mask),
937937
Stream1 = State1#state.stream,
938938
% Compare with value
939939
I2 = jit_aarch64_asm:cmp(Reg, Val),
@@ -1945,9 +1945,18 @@ op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, RegA,
19451945
%% @param Val immediate value to AND
19461946
%% @return Updated backend state
19471947
%%-----------------------------------------------------------------------------
1948-
-spec and_(state(), aarch64_register(), integer()) -> state().
1949-
and_(State, Reg, Val) ->
1950-
op_imm(State, and_, Reg, Reg, Val).
1948+
and_(State, {free, Reg}, Val) ->
1949+
NewState = op_imm(State, and_, Reg, Reg, Val),
1950+
{NewState, Reg};
1951+
and_(
1952+
#state{available_regs = [ResultReg | T], used_regs = UR} = State,
1953+
Reg,
1954+
Val
1955+
) ->
1956+
NewState = op_imm(
1957+
State#state{available_regs = T, used_regs = [ResultReg | UR]}, and_, ResultReg, Reg, Val
1958+
),
1959+
{NewState, ResultReg}.
19511960

19521961
%%-----------------------------------------------------------------------------
19531962
%% @doc Perform bitwise OR of a register with an immediate value.

libs/jit/src/jit_armv6m.erl

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
-include_lib("jit.hrl").
7575

7676
-include("primitives.hrl").
77+
-include("term.hrl").
7778

7879
-define(ASSERT(Expr), true = Expr).
7980

@@ -1301,7 +1302,7 @@ if_block_cond(
13011302
I1 = jit_armv6m_asm:mov(Temp, Reg),
13021303
Stream1 = StreamModule:append(Stream0, I1),
13031304
State1 = State0#state{stream = Stream1},
1304-
State2 = and_(State1#state{available_regs = AT}, Temp, Mask),
1305+
{State2, Temp} = and_(State1#state{available_regs = AT}, {free, Temp}, Mask),
13051306
Stream2 = State2#state.stream,
13061307
% Compare with value
13071308
I2 = jit_armv6m_asm:cmp(Temp, Val),
@@ -1320,7 +1321,7 @@ if_block_cond(
13201321
) when ?IS_GPR(Reg) ->
13211322
% AND with mask
13221323
OffsetBefore = StreamModule:offset(Stream0),
1323-
State1 = and_(State0, Reg, Mask),
1324+
{State1, Reg} = and_(State0, RegTuple, Mask),
13241325
Stream1 = State1#state.stream,
13251326
% Compare with value
13261327
I2 = jit_armv6m_asm:cmp(Reg, Val),
@@ -2508,34 +2509,34 @@ get_module_index(
25082509
%% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to
25092510
%% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool
25102511
%% by using BICS for -4.
2511-
and_(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, 16#FFFFFF) ->
2512+
and_(#state{stream_module = StreamModule, stream = Stream0} = State0, {free, Reg}, 16#FFFFFF) ->
25122513
I1 = jit_armv6m_asm:lsls(Reg, Reg, 8),
25132514
I2 = jit_armv6m_asm:lsrs(Reg, Reg, 8),
25142515
Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
2515-
State0#state{stream = Stream1};
2516+
{State0#state{stream = Stream1}, Reg};
25162517
and_(
25172518
#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
2518-
Reg,
2519+
{free, Reg},
25192520
Val
25202521
) when Val < 0 andalso Val >= -256 ->
25212522
State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)),
25222523
Stream1 = State1#state.stream,
25232524
I = jit_armv6m_asm:bics(Reg, Temp),
25242525
Stream2 = StreamModule:append(Stream1, I),
2525-
State1#state{available_regs = [Temp | AT], stream = Stream2};
2526+
{State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg};
25262527
and_(
25272528
#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
2528-
Reg,
2529+
{free, Reg},
25292530
Val
25302531
) ->
25312532
State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
25322533
Stream1 = State1#state.stream,
25332534
I = jit_armv6m_asm:ands(Reg, Temp),
25342535
Stream2 = StreamModule:append(Stream1, I),
2535-
State1#state{available_regs = [Temp | AT], stream = Stream2};
2536+
{State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg};
25362537
and_(
25372538
#state{stream_module = StreamModule, available_regs = []} = State0,
2538-
Reg,
2539+
{free, Reg},
25392540
Val
25402541
) when Val < 0 andalso Val >= -256 ->
25412542
% No available registers, use r0 as temp and save it to r12
@@ -2552,10 +2553,10 @@ and_(
25522553
% Restore r0 from r12
25532554
Restore = jit_armv6m_asm:mov(r0, ?IP_REG),
25542555
Stream4 = StreamModule:append(Stream3, Restore),
2555-
State0#state{stream = Stream4};
2556+
{State0#state{stream = Stream4}, Reg};
25562557
and_(
25572558
#state{stream_module = StreamModule, available_regs = []} = State0,
2558-
Reg,
2559+
{free, Reg},
25592560
Val
25602561
) ->
25612562
% No available registers, use r0 as temp and save it to r12
@@ -2572,7 +2573,17 @@ and_(
25722573
% Restore r0 from r12
25732574
Restore = jit_armv6m_asm:mov(r0, ?IP_REG),
25742575
Stream4 = StreamModule:append(Stream3, Restore),
2575-
State0#state{stream = Stream4}.
2576+
{State0#state{stream = Stream4}, Reg};
2577+
and_(
2578+
#state{stream_module = StreamModule, available_regs = [ResultReg | AT], used_regs = UR} =
2579+
State0,
2580+
Reg,
2581+
?TERM_PRIMARY_CLEAR_MASK
2582+
) ->
2583+
I1 = jit_armv6m_asm:lsrs(ResultReg, Reg, 2),
2584+
I2 = jit_armv6m_asm:lsls(ResultReg, ResultReg, 2),
2585+
Stream1 = StreamModule:append(State0#state.stream, <<I1/binary, I2/binary>>),
2586+
{State0#state{stream = Stream1, available_regs = AT, used_regs = [ResultReg | UR]}, ResultReg}.
25762587

25772588
or_(
25782589
#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,

libs/jit/src/jit_x86_64.erl

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1826,15 +1826,38 @@ get_module_index(
18261826
Reg
18271827
}.
18281828

1829-
and_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
1829+
and_(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Val) when
1830+
?IS_GPR(Reg)
1831+
->
18301832
% 32 bits instructions on x86-64 zero the high 32 bits
18311833
I1 =
18321834
if
18331835
Val >= 0, Val =< 16#FFFFFFFF -> jit_x86_64_asm:andl(Val, Reg);
18341836
true -> jit_x86_64_asm:andq(Val, Reg)
18351837
end,
18361838
Stream1 = StreamModule:append(Stream0, I1),
1837-
State#state{stream = Stream1}.
1839+
{State#state{stream = Stream1}, Reg};
1840+
and_(
1841+
#state{
1842+
stream_module = StreamModule,
1843+
available_regs = [ResultReg | T],
1844+
used_regs = UR,
1845+
stream = Stream0
1846+
} = State,
1847+
Reg,
1848+
Val
1849+
) when
1850+
?IS_GPR(Reg)
1851+
->
1852+
I1 = jit_x86_64_asm:movq(Reg, ResultReg),
1853+
I2 =
1854+
if
1855+
Val >= 0, Val =< 16#FFFFFFFF -> jit_x86_64_asm:andl(Val, ResultReg);
1856+
true -> jit_x86_64_asm:andq(Val, ResultReg)
1857+
end,
1858+
Stream1 = StreamModule:append(Stream0, I1),
1859+
Stream2 = StreamModule:append(Stream1, I2),
1860+
{State#state{stream = Stream2, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}.
18381861

18391862
or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
18401863
I1 = jit_x86_64_asm:orq(Val, Reg),

libs/jit/src/primitives.hrl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
-define(PRIM_BITSTRING_GET_UTF32, 69).
9393
-define(PRIM_TERM_COPY_MAP, 70).
9494
-define(PRIM_STACKTRACE_BUILD, 71).
95+
-define(PRIM_TERM_REUSE_BINARY, 72).
9596

9697
% Parameters to ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS
9798
% -define(MEMORY_NO_SHRINK, 0).

libs/jit/src/term.hrl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,5 @@
7474
-define(REFC_BINARY_MIN_64, 64).
7575
-define(TERM_BOXED_REFC_BINARY_SIZE, 6).
7676
-define(BINARY_HEADER_SIZE, 2).
77+
78+
-define(TERM_INVALID_TERM, 0).

src/libAtomVM/jit.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1301,6 +1301,12 @@ static term jit_term_create_empty_binary(Context *ctx, size_t len)
13011301
return term_create_empty_binary(len, &ctx->heap, ctx->global);
13021302
}
13031303

1304+
static term jit_term_reuse_binary(Context *ctx, term src, size_t len)
1305+
{
1306+
TRACE("jit_term_reuse_binary: src=0x%lx, len=%d\n", src, (int) len);
1307+
return term_reuse_binary(src, len, &ctx->heap, ctx->global);
1308+
}
1309+
13041310
static int jit_decode_flags_list(Context *ctx, JITState *jit_state, term flags)
13051311
{
13061312
int flags_value = 0;
@@ -1734,7 +1740,8 @@ const ModuleNativeInterface module_native_interface = {
17341740
jit_bitstring_get_utf16,
17351741
jit_bitstring_get_utf32,
17361742
term_copy_map,
1737-
jit_stacktrace_build
1743+
jit_stacktrace_build,
1744+
jit_term_reuse_binary
17381745
};
17391746

17401747
#endif

src/libAtomVM/jit.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ struct ModuleNativeInterface
158158
term (*bitstring_get_utf32)(term src, int flags_value);
159159
term (*term_copy_map)(Context *ctx, term src);
160160
term (*stacktrace_build)(Context *ctx);
161+
term (*term_reuse_binary)(Context *ctx, term src, size_t len);
161162
};
162163

163164
extern const ModuleNativeInterface module_native_interface;

src/libAtomVM/opcodesswitch.h

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4074,6 +4074,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
40744074
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
40754075
}
40764076
term t = term_create_empty_binary(size_val, &ctx->heap, ctx->global);
4077+
if (UNLIKELY(term_is_invalid_term(t))) {
4078+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
4079+
}
40774080

40784081
ctx->bs = t;
40794082
ctx->bs_offset = 0;
@@ -4122,6 +4125,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
41224125
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
41234126
}
41244127
term t = term_create_empty_binary(size_val / 8, &ctx->heap, ctx->global);
4128+
if (UNLIKELY(term_is_invalid_term(t))) {
4129+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
4130+
}
41254131

41264132
ctx->bs = t;
41274133
ctx->bs_offset = 0;
@@ -4530,6 +4536,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
45304536
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
45314537
}
45324538
term t = term_create_empty_binary(0, &ctx->heap, ctx->global);
4539+
if (UNLIKELY(term_is_invalid_term(t))) {
4540+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
4541+
}
45334542

45344543
ctx->bs = t;
45354544
ctx->bs_offset = 0;
@@ -4595,6 +4604,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
45954604
TRACE("bs_append/8, fail=%u size=" AVM_INT_FMT " unit=%u src=0x%" TERM_X_FMT " dreg=%c%i\n", (unsigned) fail, size_val, (unsigned) unit, src, T_DEST_REG(dreg));
45964605
src = x_regs[live];
45974606
term t = term_create_empty_binary(src_size + size_val / 8, &ctx->heap, ctx->global);
4607+
if (UNLIKELY(term_is_invalid_term(t))) {
4608+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
4609+
}
45984610
memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), src_size);
45994611

46004612
ctx->bs = t;
@@ -4641,8 +4653,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
46414653
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
46424654
}
46434655
DECODE_COMPACT_TERM(src, src_pc)
4644-
term t = term_create_empty_binary(src_size + size_val / 8, &ctx->heap, ctx->global);
4645-
memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), src_size);
4656+
term t = term_reuse_binary(src, src_size + size_val / 8, &ctx->heap, ctx->global);
4657+
if (UNLIKELY(term_is_invalid_term(t))) {
4658+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
4659+
}
46464660

46474661
ctx->bs = t;
46484662
ctx->bs_offset = src_size * 8;
@@ -6736,6 +6750,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
67366750
// Verify parameters and compute binary size in first iteration
67376751
#ifdef IMPL_EXECUTE_LOOP
67386752
size_t binary_size = 0;
6753+
bool reuse_binary = false;
67396754
#endif
67406755
for (size_t j = 0; j < nb_segments; j++) {
67416756
term atom_type;
@@ -6824,6 +6839,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
68246839
// We only support src as a binary of bytes here.
68256840
segment_size = term_binary_size(src);
68266841
segment_unit = 8;
6842+
if (atom_type == PRIVATE_APPEND_ATOM && j == 0) {
6843+
reuse_binary = true;
6844+
}
68276845
} else {
68286846
VERIFY_IS_INTEGER(size, "bs_create_bin/6", fail);
68296847
avm_int_t signed_size_value = term_to_int(size);
@@ -6864,7 +6882,16 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
68646882
if (UNLIKELY(memory_ensure_free_with_roots(ctx, alloc + term_binary_heap_size(binary_size / 8), live, x_regs, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
68656883
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
68666884
}
6867-
term t = term_create_empty_binary(binary_size / 8, &ctx->heap, ctx->global);
6885+
term t;
6886+
if (!reuse_binary) {
6887+
t = term_create_empty_binary(binary_size / 8, &ctx->heap, ctx->global);
6888+
if (UNLIKELY(term_is_invalid_term(t))) {
6889+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
6890+
}
6891+
} else {
6892+
// t will be created in the first segment (PRIVATE_APPEND case)
6893+
t = term_invalid_term();
6894+
}
68686895
size_t offset = 0;
68696896

68706897
for (size_t j = 0; j < nb_segments; j++) {
@@ -6968,9 +6995,17 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
69686995
TRACE("bs_create_bin/6: current offset (%d) is not evenly divisible by 8\n", (int) offset);
69696996
RAISE_ERROR(UNSUPPORTED_ATOM);
69706997
}
6998+
size_t src_size = term_binary_size(src);
6999+
if (reuse_binary && j == 0) {
7000+
t = term_reuse_binary(src, binary_size / 8, &ctx->heap, ctx->global);
7001+
if (UNLIKELY(term_is_invalid_term(t))) {
7002+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
7003+
}
7004+
segment_size = src_size * 8;
7005+
break;
7006+
}
69717007
uint8_t *dst = (uint8_t *) term_binary_data(t) + (offset / 8);
69727008
const uint8_t *bin = (const uint8_t *) term_binary_data(src);
6973-
size_t binary_size = term_binary_size(src);
69747009
if (size != ALL_ATOM) {
69757010
VERIFY_IS_INTEGER(size, "bs_create_bin/6", fail);
69767011
avm_int_t signed_size_value = term_to_int(size);
@@ -6979,17 +7014,17 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
69797014
RAISE_ERROR(BADARG_ATOM);
69807015
}
69817016
size_value = (size_t) signed_size_value;
6982-
if (size_value > binary_size) {
7017+
if (size_value > src_size) {
69837018
if (fail == 0) {
69847019
RAISE_ERROR(BADARG_ATOM);
69857020
} else {
69867021
JUMP_TO_LABEL(mod, fail);
69877022
}
69887023
}
6989-
binary_size = size_value;
7024+
src_size = size_value;
69907025
}
6991-
memcpy(dst, bin, binary_size);
6992-
segment_size = binary_size * 8;
7026+
memcpy(dst, bin, src_size);
7027+
segment_size = src_size * 8;
69937028
break;
69947029
}
69957030
default:

0 commit comments

Comments
 (0)