Skip to content

Commit eeb4f7b

Browse files
committed
Implement support for private_append
Add a new `term_reuse_binary` to reuse a refc binary, taking advantage of private_append compiler optimization. Add handling of out of memory errors in term_alloc_refc_binary by raising out of memory error instead of aborting. Update `and_/3` signature in JIT backends to handle and with a new register, and performed few optimizations accordingly by removing unnecessary copy. Signed-off-by: Paul Guyot <[email protected]>
1 parent 7933b82 commit eeb4f7b

File tree

14 files changed

+378
-155
lines changed

14 files changed

+378
-155
lines changed

libs/jit/src/jit.erl

Lines changed: 173 additions & 113 deletions
Large diffs are not rendered by default.

libs/jit/src/jit_aarch64.erl

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -933,7 +933,7 @@ if_block_cond(
933933
) when ?IS_GPR(Reg) ->
934934
% AND with mask
935935
OffsetBefore = StreamModule:offset(Stream0),
936-
State1 = and_(State0, Reg, Mask),
936+
{State1, Reg} = and_(State0, RegTuple, Mask),
937937
Stream1 = State1#state.stream,
938938
% Compare with value
939939
I2 = jit_aarch64_asm:cmp(Reg, Val),
@@ -1945,9 +1945,18 @@ op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, RegA,
19451945
%% @param Val immediate value to AND
19461946
%% @return Updated backend state
19471947
%%-----------------------------------------------------------------------------
1948-
-spec and_(state(), aarch64_register(), integer()) -> state().
1949-
and_(State, Reg, Val) ->
1950-
op_imm(State, and_, Reg, Reg, Val).
1948+
and_(State, {free, Reg}, Val) ->
1949+
NewState = op_imm(State, and_, Reg, Reg, Val),
1950+
{NewState, Reg};
1951+
and_(
1952+
#state{available_regs = [ResultReg | T], used_regs = UR} = State,
1953+
Reg,
1954+
Val
1955+
) ->
1956+
NewState = op_imm(
1957+
State#state{available_regs = T, used_regs = [ResultReg | UR]}, and_, ResultReg, Reg, Val
1958+
),
1959+
{NewState, ResultReg}.
19511960

19521961
%%-----------------------------------------------------------------------------
19531962
%% @doc Perform bitwise OR of a register with an immediate value.

libs/jit/src/jit_armv6m.erl

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
-include_lib("jit.hrl").
7575

7676
-include("primitives.hrl").
77+
-include("term.hrl").
7778

7879
-define(ASSERT(Expr), true = Expr).
7980

@@ -1301,7 +1302,7 @@ if_block_cond(
13011302
I1 = jit_armv6m_asm:mov(Temp, Reg),
13021303
Stream1 = StreamModule:append(Stream0, I1),
13031304
State1 = State0#state{stream = Stream1},
1304-
State2 = and_(State1#state{available_regs = AT}, Temp, Mask),
1305+
{State2, Temp} = and_(State1#state{available_regs = AT}, {free, Temp}, Mask),
13051306
Stream2 = State2#state.stream,
13061307
% Compare with value
13071308
I2 = jit_armv6m_asm:cmp(Temp, Val),
@@ -1320,7 +1321,7 @@ if_block_cond(
13201321
) when ?IS_GPR(Reg) ->
13211322
% AND with mask
13221323
OffsetBefore = StreamModule:offset(Stream0),
1323-
State1 = and_(State0, Reg, Mask),
1324+
{State1, Reg} = and_(State0, RegTuple, Mask),
13241325
Stream1 = State1#state.stream,
13251326
% Compare with value
13261327
I2 = jit_armv6m_asm:cmp(Reg, Val),
@@ -2508,34 +2509,34 @@ get_module_index(
25082509
%% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to
25092510
%% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool
25102511
%% by using BICS for -4.
2511-
and_(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, 16#FFFFFF) ->
2512+
and_(#state{stream_module = StreamModule, stream = Stream0} = State0, {free, Reg}, 16#FFFFFF) ->
25122513
I1 = jit_armv6m_asm:lsls(Reg, Reg, 8),
25132514
I2 = jit_armv6m_asm:lsrs(Reg, Reg, 8),
25142515
Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
2515-
State0#state{stream = Stream1};
2516+
{State0#state{stream = Stream1}, Reg};
25162517
and_(
25172518
#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
2518-
Reg,
2519+
{free, Reg},
25192520
Val
25202521
) when Val < 0 andalso Val >= -256 ->
25212522
State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)),
25222523
Stream1 = State1#state.stream,
25232524
I = jit_armv6m_asm:bics(Reg, Temp),
25242525
Stream2 = StreamModule:append(Stream1, I),
2525-
State1#state{available_regs = [Temp | AT], stream = Stream2};
2526+
{State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg};
25262527
and_(
25272528
#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
2528-
Reg,
2529+
{free, Reg},
25292530
Val
25302531
) ->
25312532
State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
25322533
Stream1 = State1#state.stream,
25332534
I = jit_armv6m_asm:ands(Reg, Temp),
25342535
Stream2 = StreamModule:append(Stream1, I),
2535-
State1#state{available_regs = [Temp | AT], stream = Stream2};
2536+
{State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg};
25362537
and_(
25372538
#state{stream_module = StreamModule, available_regs = []} = State0,
2538-
Reg,
2539+
{free, Reg},
25392540
Val
25402541
) when Val < 0 andalso Val >= -256 ->
25412542
% No available registers, use r0 as temp and save it to r12
@@ -2552,10 +2553,10 @@ and_(
25522553
% Restore r0 from r12
25532554
Restore = jit_armv6m_asm:mov(r0, ?IP_REG),
25542555
Stream4 = StreamModule:append(Stream3, Restore),
2555-
State0#state{stream = Stream4};
2556+
{State0#state{stream = Stream4}, Reg};
25562557
and_(
25572558
#state{stream_module = StreamModule, available_regs = []} = State0,
2558-
Reg,
2559+
{free, Reg},
25592560
Val
25602561
) ->
25612562
% No available registers, use r0 as temp and save it to r12
@@ -2572,7 +2573,17 @@ and_(
25722573
% Restore r0 from r12
25732574
Restore = jit_armv6m_asm:mov(r0, ?IP_REG),
25742575
Stream4 = StreamModule:append(Stream3, Restore),
2575-
State0#state{stream = Stream4}.
2576+
{State0#state{stream = Stream4}, Reg};
2577+
and_(
2578+
#state{stream_module = StreamModule, available_regs = [ResultReg | AT], used_regs = UR} =
2579+
State0,
2580+
Reg,
2581+
?TERM_PRIMARY_CLEAR_MASK
2582+
) ->
2583+
I1 = jit_armv6m_asm:lsrs(ResultReg, Reg, 2),
2584+
I2 = jit_armv6m_asm:lsls(ResultReg, ResultReg, 2),
2585+
Stream1 = StreamModule:append(State0#state.stream, <<I1/binary, I2/binary>>),
2586+
{State0#state{stream = Stream1, available_regs = AT, used_regs = [ResultReg | UR]}, ResultReg}.
25762587

25772588
or_(
25782589
#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,

libs/jit/src/jit_x86_64.erl

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1826,15 +1826,38 @@ get_module_index(
18261826
Reg
18271827
}.
18281828

1829-
and_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
1829+
and_(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Val) when
1830+
?IS_GPR(Reg)
1831+
->
18301832
% 32 bits instructions on x86-64 zero the high 32 bits
18311833
I1 =
18321834
if
18331835
Val >= 0, Val =< 16#FFFFFFFF -> jit_x86_64_asm:andl(Val, Reg);
18341836
true -> jit_x86_64_asm:andq(Val, Reg)
18351837
end,
18361838
Stream1 = StreamModule:append(Stream0, I1),
1837-
State#state{stream = Stream1}.
1839+
{State#state{stream = Stream1}, Reg};
1840+
and_(
1841+
#state{
1842+
stream_module = StreamModule,
1843+
available_regs = [ResultReg | T],
1844+
used_regs = UR,
1845+
stream = Stream0
1846+
} = State,
1847+
Reg,
1848+
Val
1849+
) when
1850+
?IS_GPR(Reg)
1851+
->
1852+
I1 = jit_x86_64_asm:movq(Reg, ResultReg),
1853+
I2 =
1854+
if
1855+
Val >= 0, Val =< 16#FFFFFFFF -> jit_x86_64_asm:andl(Val, ResultReg);
1856+
true -> jit_x86_64_asm:andq(Val, ResultReg)
1857+
end,
1858+
Stream1 = StreamModule:append(Stream0, I1),
1859+
Stream2 = StreamModule:append(Stream1, I2),
1860+
{State#state{stream = Stream2, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}.
18381861

18391862
or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
18401863
I1 = jit_x86_64_asm:orq(Val, Reg),

libs/jit/src/primitives.hrl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
-define(PRIM_BITSTRING_GET_UTF32, 69).
9393
-define(PRIM_TERM_COPY_MAP, 70).
9494
-define(PRIM_STACKTRACE_BUILD, 71).
95+
-define(PRIM_TERM_REUSE_BINARY, 72).
9596

9697
% Parameters to ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS
9798
% -define(MEMORY_NO_SHRINK, 0).

libs/jit/src/term.hrl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,5 @@
7474
-define(REFC_BINARY_MIN_64, 64).
7575
-define(TERM_BOXED_REFC_BINARY_SIZE, 6).
7676
-define(BINARY_HEADER_SIZE, 2).
77+
78+
-define(TERM_INVALID_TERM, 0).

src/libAtomVM/jit.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1301,6 +1301,12 @@ static term jit_term_create_empty_binary(Context *ctx, size_t len)
13011301
return term_create_empty_binary(len, &ctx->heap, ctx->global);
13021302
}
13031303

1304+
static term jit_term_reuse_binary(Context *ctx, term src, size_t len)
1305+
{
1306+
TRACE("jit_term_reuse_binary: src=0x%lx, len=%d\n", src, (int) len);
1307+
return term_reuse_binary(src, len, &ctx->heap, ctx->global);
1308+
}
1309+
13041310
static int jit_decode_flags_list(Context *ctx, JITState *jit_state, term flags)
13051311
{
13061312
int flags_value = 0;
@@ -1734,7 +1740,8 @@ const ModuleNativeInterface module_native_interface = {
17341740
jit_bitstring_get_utf16,
17351741
jit_bitstring_get_utf32,
17361742
term_copy_map,
1737-
jit_stacktrace_build
1743+
jit_stacktrace_build,
1744+
jit_term_reuse_binary
17381745
};
17391746

17401747
#endif

src/libAtomVM/jit.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ struct ModuleNativeInterface
158158
term (*bitstring_get_utf32)(term src, int flags_value);
159159
term (*term_copy_map)(Context *ctx, term src);
160160
term (*stacktrace_build)(Context *ctx);
161+
term (*term_reuse_binary)(Context *ctx, term src, size_t len);
161162
};
162163

163164
extern const ModuleNativeInterface module_native_interface;

src/libAtomVM/opcodesswitch.h

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4074,6 +4074,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
40744074
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
40754075
}
40764076
term t = term_create_empty_binary(size_val, &ctx->heap, ctx->global);
4077+
if (UNLIKELY(term_is_invalid_term(t))) {
4078+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
4079+
}
40774080

40784081
ctx->bs = t;
40794082
ctx->bs_offset = 0;
@@ -4122,6 +4125,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
41224125
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
41234126
}
41244127
term t = term_create_empty_binary(size_val / 8, &ctx->heap, ctx->global);
4128+
if (UNLIKELY(term_is_invalid_term(t))) {
4129+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
4130+
}
41254131

41264132
ctx->bs = t;
41274133
ctx->bs_offset = 0;
@@ -4530,6 +4536,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
45304536
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
45314537
}
45324538
term t = term_create_empty_binary(0, &ctx->heap, ctx->global);
4539+
if (UNLIKELY(term_is_invalid_term(t))) {
4540+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
4541+
}
45334542

45344543
ctx->bs = t;
45354544
ctx->bs_offset = 0;
@@ -4595,6 +4604,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
45954604
TRACE("bs_append/8, fail=%u size=" AVM_INT_FMT " unit=%u src=0x%" TERM_X_FMT " dreg=%c%i\n", (unsigned) fail, size_val, (unsigned) unit, src, T_DEST_REG(dreg));
45964605
src = x_regs[live];
45974606
term t = term_create_empty_binary(src_size + size_val / 8, &ctx->heap, ctx->global);
4607+
if (UNLIKELY(term_is_invalid_term(t))) {
4608+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
4609+
}
45984610
memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), src_size);
45994611

46004612
ctx->bs = t;
@@ -4641,8 +4653,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
46414653
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
46424654
}
46434655
DECODE_COMPACT_TERM(src, src_pc)
4644-
term t = term_create_empty_binary(src_size + size_val / 8, &ctx->heap, ctx->global);
4645-
memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), src_size);
4656+
term t = term_reuse_binary(src, src_size + size_val / 8, &ctx->heap, ctx->global);
4657+
if (UNLIKELY(term_is_invalid_term(t))) {
4658+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
4659+
}
46464660

46474661
ctx->bs = t;
46484662
ctx->bs_offset = src_size * 8;
@@ -6736,6 +6750,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
67366750
// Verify parameters and compute binary size in first iteration
67376751
#ifdef IMPL_EXECUTE_LOOP
67386752
size_t binary_size = 0;
6753+
term reuse_binary = term_invalid_term();
67396754
#endif
67406755
for (size_t j = 0; j < nb_segments; j++) {
67416756
term atom_type;
@@ -6824,6 +6839,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
68246839
// We only support src as a binary of bytes here.
68256840
segment_size = term_binary_size(src);
68266841
segment_unit = 8;
6842+
if (atom_type == PRIVATE_APPEND_ATOM && j == 0) {
6843+
reuse_binary = src;
6844+
}
68276845
} else {
68286846
VERIFY_IS_INTEGER(size, "bs_create_bin/6", fail);
68296847
avm_int_t signed_size_value = term_to_int(size);
@@ -6864,7 +6882,17 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
68646882
if (UNLIKELY(memory_ensure_free_with_roots(ctx, alloc + term_binary_heap_size(binary_size / 8), live, x_regs, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
68656883
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
68666884
}
6867-
term t = term_create_empty_binary(binary_size / 8, &ctx->heap, ctx->global);
6885+
term t;
6886+
size_t original_size = 0;
6887+
if (term_is_invalid_term(reuse_binary)) {
6888+
t = term_create_empty_binary(binary_size / 8, &ctx->heap, ctx->global);
6889+
} else {
6890+
original_size = term_binary_size(reuse_binary);
6891+
t = term_reuse_binary(reuse_binary, binary_size / 8, &ctx->heap, ctx->global);
6892+
}
6893+
if (UNLIKELY(term_is_invalid_term(t))) {
6894+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
6895+
}
68686896
size_t offset = 0;
68696897

68706898
for (size_t j = 0; j < nb_segments; j++) {
@@ -6968,6 +6996,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
69686996
TRACE("bs_create_bin/6: current offset (%d) is not evenly divisible by 8\n", (int) offset);
69696997
RAISE_ERROR(UNSUPPORTED_ATOM);
69706998
}
6999+
if (reuse_binary == src && j == 0) {
7000+
segment_size = original_size * 8;
7001+
break;
7002+
}
69717003
uint8_t *dst = (uint8_t *) term_binary_data(t) + (offset / 8);
69727004
const uint8_t *bin = (const uint8_t *) term_binary_data(src);
69737005
size_t binary_size = term_binary_size(src);

src/libAtomVM/term.c

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -909,7 +909,7 @@ term term_alloc_refc_binary(size_t size, bool is_const, Heap *heap, GlobalContex
909909
if (IS_NULL_PTR(refc)) {
910910
// TODO propagate error to callers of this function, e.g., as an invalid term
911911
fprintf(stderr, "memory_create_refc_binary: Unable to allocate %zu bytes for refc_binary.\n", size);
912-
AVM_ABORT();
912+
return term_invalid_term();
913913
}
914914
boxed_value[3] = (term) refc;
915915
refc->ref_count = 1; // added to mso list, increment ref count
@@ -919,6 +919,64 @@ term term_alloc_refc_binary(size_t size, bool is_const, Heap *heap, GlobalContex
919919
return ret;
920920
}
921921

922+
term term_reuse_binary(term src, size_t size, Heap *heap, GlobalContext *glb)
923+
{
924+
if (term_is_refc_binary(src) && !term_refc_binary_is_const(src)) {
925+
term *boxed_value = term_to_term_ptr(src);
926+
struct RefcBinary *old_refc = (struct RefcBinary *) boxed_value[3];
927+
size_t old_size = old_refc->size;
928+
929+
// Only reuse if refcount is 1 (only this term references it)
930+
if (old_refc->ref_count == 1) {
931+
// Lock the list of refc binaries while we're trying to realloc.
932+
struct ListHead *refc_binaries = synclist_wrlock(&glb->refc_binaries);
933+
934+
// Remove from list before realloc because realloc might move the memory
935+
list_remove(&old_refc->head);
936+
937+
// Realloc to new size.
938+
size_t n = sizeof(struct RefcBinary) + size;
939+
struct RefcBinary *new_refc = realloc(old_refc, n);
940+
if (IS_NULL_PTR(new_refc)) {
941+
// Re-add to list before unlocking
942+
list_append(refc_binaries, &old_refc->head);
943+
synclist_unlock(&glb->refc_binaries);
944+
fprintf(stderr, "term_reuse_binary: Unable to reallocate %zu bytes for refc_binary.\n", size);
945+
return term_invalid_term();
946+
}
947+
948+
// Update size
949+
new_refc->size = size;
950+
951+
// Zero the new part if size increased
952+
if (LIKELY(size > old_size)) {
953+
memset((char *) &new_refc->data + old_size, 0, size - old_size);
954+
}
955+
956+
// Update the boxed value to point to the new refc BEFORE unlocking
957+
// so other threads see a consistent state
958+
boxed_value[1] = (term) size;
959+
boxed_value[3] = (term) new_refc;
960+
961+
// Re-add to list after realloc (whether pointer changed or not)
962+
list_append(refc_binaries, &new_refc->head);
963+
964+
// Unlock the list of refc binaries
965+
synclist_unlock(&glb->refc_binaries);
966+
967+
// Return the same term (boxed_value pointer hasn't changed)
968+
return src;
969+
}
970+
}
971+
// Not a refc binary or it's a const refc binary - create a new one
972+
size_t src_size = term_binary_size(src);
973+
term t = term_create_uninitialized_binary(size, heap, glb);
974+
// Copy the source data (up to the smaller of src_size and size)
975+
size_t copy_size = src_size < size ? src_size : size;
976+
memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), copy_size);
977+
return t;
978+
}
979+
922980
static term find_binary(term binary_or_state)
923981
{
924982
term t = binary_or_state;

0 commit comments

Comments
 (0)