Skip to content

Commit 6138bc3

Browse files
committed
libexpr: Structural sharing of attrsets
This changes the implementation of Bindings to allow for a more space-efficient implementation of attribute set merges. This is accomplished by "layering" over the "base" Bindings. The top "layer" is naturally the right-hand-side of the update operator //. Such an implementation leads to significantly better memory usage on something like nixpkgs: nix-env --query --available --out-path --file ../nixpkgs --eval-system x86_64-linux > /dev/null Comparison against 2b0fd88 for x86_64-linux on nixpkgs f06c7c3b6f5074dbffcf02542fb86af3a5526afa: | metric | mean_before | mean_after | mean_diff | mean_%_change | p_value | t_stat | | - | - | - | - | - | - | - | | cpuTime | 21.1520 | 21.3414 | 0.1894 | 0.7784 | 0.3190 | 1.0219 | | envs.bytes | 461451951.6190 | 461451951.6190 | - | - | - | - | | envs.elements | 34344544.8571 | 34344544.8571 | - | - | - | - | | envs.number | 23336949.0952 | 23336949.0952 | - | - | - | - | | gc.cycles | 7.5238 | 7.2857 | -0.2381 | -4.6825 | 0.0565 | -2.0244 | | gc.heapSize | 1777848124.9524 | 1252162023.6190 | -525686101.3333 | -29.9472 | 0.0000 | -8.7041 | | gc.totalBytes | 3102787383.6190 | 2498431578.6667 | -604355804.9524 | -19.7704 | 0.0000 | -9.3502 | | list.bytes | 59928225.9048 | 59928225.9048 | - | - | - | - | | list.concats | 1240028.2857 | 1240028.2857 | - | - | - | - | | list.elements | 7491028.2381 | 7491028.2381 | - | - | - | - | | nrAvoided | 28165342.2381 | 28165342.2381 | - | - | - | - | | nrExprs | 1577412.9524 | 1577412.9524 | - | - | - | - | | nrFunctionCalls | 20970743.4286 | 20970743.4286 | - | - | - | - | | nrLookups | 10867306.0952 | 10867306.0952 | - | - | - | - | | nrOpUpdateValuesCopied | 61206062.0000 | 25748169.5238 | -35457892.4762 | -58.8145 | 0.0000 | -8.9189 | | nrOpUpdates | 2167097.4286 | 2167097.4286 | - | - | - | - | | nrPrimOpCalls | 12337423.4286 | 12337423.4286 | - | - | - | - | | nrThunks | 29361806.7619 | 29361806.7619 | - | - | - | - | | sets.bytes | 1393822818.6667 | 897587655.2381 | -496235163.4286 | -36.7168 | 0.0000 | -9.1115 | | sets.elements | 84504465.3333 | 48270845.9524 | -36233619.3810 | -43.8698 | 0.0000 | -8.9181 | | sets.number | 5218921.6667 | 5218921.6667 | - | - | - | - | | sizes.Attr | 16.0000 | 16.0000 | - | - | - | - | | sizes.Bindings | 8.0000 | 24.0000 | 16.0000 | 200.0000 | - | inf | | sizes.Env | 8.0000 | 8.0000 | - | - | - | - | | sizes.Value | 16.0000 | 16.0000 | - | - | - | - | | symbols.bytes | 1368494.0952 | 1368494.0952 | - | - | - | - | | symbols.number | 109147.1905 | 109147.1905 | - | - | - | - | | time.cpu | 21.1520 | 21.3414 | 0.1894 | 0.7784 | 0.3190 | 1.0219 | | time.gc | 1.6011 | 0.8508 | -0.7503 | -37.1507 | 0.0017 | -3.6328 | | time.gcFraction | 0.0849 | 0.0399 | -0.0450 | -37.4504 | 0.0035 | -3.3116 | | values.bytes | 615968144.7619 | 615968144.7619 | - | - | - | - | | values.number | 38498009.0476 | 38498009.0476 | - | - | - | - | Overall this does slow down the evaluator slightly (no more than ~10% in most cases), but this seems like a very decent tradeoff for shaving off 33% of memory usage.
1 parent 3eb223f commit 6138bc3

File tree

7 files changed

+423
-62
lines changed

7 files changed

+423
-62
lines changed

src/libexpr-c/nix_api_value.cc

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -371,13 +371,24 @@ bool nix_has_attr_byname(nix_c_context * context, const nix_value * value, EvalS
371371
NIXC_CATCH_ERRS_RES(false);
372372
}
373373

374-
nix_value * nix_get_attr_byidx(
375-
nix_c_context * context, const nix_value * value, EvalState * state, unsigned int i, const char ** name)
374+
static void collapse_attrset_layer_chain_if_needed(nix::Value & v, EvalState * state)
375+
{
376+
auto & attrs = *v.attrs();
377+
if (attrs.isLayered()) {
378+
auto bindings = state->state.buildBindings(attrs.size());
379+
std::ranges::copy(attrs, std::back_inserter(bindings));
380+
v.mkAttrs(bindings);
381+
}
382+
}
383+
384+
nix_value *
385+
nix_get_attr_byidx(nix_c_context * context, nix_value * value, EvalState * state, unsigned int i, const char ** name)
376386
{
377387
if (context)
378388
context->last_err_code = NIX_OK;
379389
try {
380390
auto & v = check_value_in(value);
391+
collapse_attrset_layer_chain_if_needed(v, state);
381392
const nix::Attr & a = (*v.attrs())[i];
382393
*name = state->state.symbols[a.name].c_str();
383394
nix_gc_incref(nullptr, a.value);
@@ -387,13 +398,13 @@ nix_value * nix_get_attr_byidx(
387398
NIXC_CATCH_ERRS_NULL
388399
}
389400

390-
const char *
391-
nix_get_attr_name_byidx(nix_c_context * context, const nix_value * value, EvalState * state, unsigned int i)
401+
const char * nix_get_attr_name_byidx(nix_c_context * context, nix_value * value, EvalState * state, unsigned int i)
392402
{
393403
if (context)
394404
context->last_err_code = NIX_OK;
395405
try {
396406
auto & v = check_value_in(value);
407+
collapse_attrset_layer_chain_if_needed(v, state);
397408
const nix::Attr & a = (*v.attrs())[i];
398409
return state->state.symbols[a.name].c_str();
399410
}

src/libexpr-c/nix_api_value.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,8 @@ bool nix_has_attr_byname(nix_c_context * context, const nix_value * value, EvalS
297297
* @param[out] name will store a pointer to the attribute name
298298
* @return value, NULL in case of errors
299299
*/
300-
nix_value * nix_get_attr_byidx(
301-
nix_c_context * context, const nix_value * value, EvalState * state, unsigned int i, const char ** name);
300+
nix_value *
301+
nix_get_attr_byidx(nix_c_context * context, nix_value * value, EvalState * state, unsigned int i, const char ** name);
302302

303303
/** @brief Get an attribute name by index in the sorted bindings
304304
*
@@ -311,8 +311,7 @@ nix_value * nix_get_attr_byidx(
311311
* @param[in] i attribute index
312312
* @return name, NULL in case of errors
313313
*/
314-
const char *
315-
nix_get_attr_name_byidx(nix_c_context * context, const nix_value * value, EvalState * state, unsigned int i);
314+
const char * nix_get_attr_name_byidx(nix_c_context * context, nix_value * value, EvalState * state, unsigned int i);
316315

317316
/**@}*/
318317
/** @name Initializers

src/libexpr-tests/nix_api_expr.cc

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,4 +437,31 @@ TEST_F(nix_api_expr_test, nix_value_call_multi_no_args)
437437
assert_ctx_ok();
438438
ASSERT_EQ(3, rInt);
439439
}
440+
441+
TEST_F(nix_api_expr_test, nix_expr_attrset_update)
442+
{
443+
nix_expr_eval_from_string(ctx, state, "{ a = 0; b = 2; } // { a = 1; b = 3; } // { a = 2; }", ".", value);
444+
assert_ctx_ok();
445+
446+
ASSERT_EQ(nix_get_attrs_size(ctx, value), 2);
447+
assert_ctx_ok();
448+
std::array<std::pair<std::string_view, nix_value *>, 2> values;
449+
for (unsigned int i = 0; i < 2; ++i) {
450+
const char * name;
451+
values[i].second = nix_get_attr_byidx(ctx, value, state, i, &name);
452+
assert_ctx_ok();
453+
values[i].first = name;
454+
}
455+
std::sort(values.begin(), values.end(), [](const auto & lhs, const auto & rhs) { return lhs.first < rhs.first; });
456+
457+
nix_value * a = values[0].second;
458+
ASSERT_EQ("a", values[0].first);
459+
ASSERT_EQ(nix_get_int(ctx, a), 2);
460+
assert_ctx_ok();
461+
nix_value * b = values[1].second;
462+
ASSERT_EQ("b", values[1].first);
463+
ASSERT_EQ(nix_get_int(ctx, b), 3);
464+
assert_ctx_ok();
465+
}
466+
440467
} // namespace nixC

src/libexpr/attr-set.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ Bindings * EvalState::allocBindings(size_t capacity)
1414
{
1515
if (capacity == 0)
1616
return &Bindings::emptyBindings;
17-
if (capacity > std::numeric_limits<Bindings::size_t>::max())
17+
if (capacity > std::numeric_limits<Bindings::size_type>::max())
1818
throw Error("attribute set of size %d is too big", capacity);
1919
nrAttrsets++;
2020
nrAttrsInAttrsets += capacity;
@@ -35,7 +35,7 @@ Value & BindingsBuilder::alloc(std::string_view name, PosIdx pos)
3535

3636
void Bindings::sort()
3737
{
38-
std::sort(attrs, attrs + size_);
38+
std::sort(attrs, attrs + numAttrs);
3939
}
4040

4141
Value & Value::mkAttrs(BindingsBuilder & bindings)

src/libexpr/eval.cc

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1873,37 +1873,71 @@ void ExprOpUpdate::eval(EvalState & state, Env & env, Value & v)
18731873

18741874
state.nrOpUpdates++;
18751875

1876-
if (v1.attrs()->size() == 0) {
1876+
const Bindings & bindings1 = *v1.attrs();
1877+
if (bindings1.empty()) {
18771878
v = v2;
18781879
return;
18791880
}
1880-
if (v2.attrs()->size() == 0) {
1881+
1882+
const Bindings & bindings2 = *v2.attrs();
1883+
if (bindings2.empty()) {
18811884
v = v1;
18821885
return;
18831886
}
18841887

1885-
auto attrs = state.buildBindings(v1.attrs()->size() + v2.attrs()->size());
1888+
/* Simple heuristic for determining whether attrs2 should be "layered" on top of
1889+
attrs1 instead of copying to a new Bindings. */
1890+
bool shouldLayer = [&]() -> bool {
1891+
if (bindings1.isLayerListFull())
1892+
return false;
1893+
1894+
if (bindings2.size() > state.settings.bindingsUpdateLayerRhsSizeThreshold)
1895+
return false;
1896+
1897+
return true;
1898+
}();
1899+
1900+
if (shouldLayer) {
1901+
auto attrs = state.buildBindings(bindings2.size());
1902+
attrs.layerOnTopOf(bindings1);
1903+
1904+
std::ranges::copy(bindings2, std::back_inserter(attrs));
1905+
v.mkAttrs(attrs.alreadySorted());
1906+
1907+
state.nrOpUpdateValuesCopied += bindings2.size();
1908+
return;
1909+
}
1910+
1911+
auto attrs = state.buildBindings(bindings1.size() + bindings2.size());
18861912

18871913
/* Merge the sets, preferring values from the second set. Make
18881914
sure to keep the resulting vector in sorted order. */
1889-
auto i = v1.attrs()->begin();
1890-
auto j = v2.attrs()->begin();
1915+
auto i = bindings1.begin();
1916+
auto j = bindings2.begin();
18911917

1892-
while (i != v1.attrs()->end() && j != v2.attrs()->end()) {
1918+
while (i != bindings1.end() && j != bindings2.end()) {
18931919
if (i->name == j->name) {
18941920
attrs.insert(*j);
18951921
++i;
18961922
++j;
1897-
} else if (i->name < j->name)
1898-
attrs.insert(*i++);
1899-
else
1900-
attrs.insert(*j++);
1923+
} else if (i->name < j->name) {
1924+
attrs.insert(*i);
1925+
++i;
1926+
} else {
1927+
attrs.insert(*j);
1928+
++j;
1929+
}
19011930
}
19021931

1903-
while (i != v1.attrs()->end())
1904-
attrs.insert(*i++);
1905-
while (j != v2.attrs()->end())
1906-
attrs.insert(*j++);
1932+
while (i != bindings1.end()) {
1933+
attrs.insert(*i);
1934+
++i;
1935+
}
1936+
1937+
while (j != bindings2.end()) {
1938+
attrs.insert(*j);
1939+
++j;
1940+
}
19071941

19081942
v.mkAttrs(attrs.alreadySorted());
19091943

0 commit comments

Comments
 (0)