Network loading refactoring

anematode · vondele · commit 8be6b142189d · 2026-01-06T12:02:37.000+01:00
closes official-stockfish#6523 No functional change
diff --git a/src/engine.cpp b/src/engine.cpp
@@ -57,14 +57,10 @@ Engine::Engine(std::optional<std::string> path) :
     numaContext(NumaConfig::from_system()),
     states(new std::deque<StateInfo>(1)),
     threads(),
-    networks(
-      numaContext,
-      // Heap-allocate because sizeof(NN::Networks) is large
-      std::make_unique<NN::Networks>(
-        std::make_unique<NN::NetworkBig>(NN::EvalFile{EvalFileDefaultNameBig, "None", ""},
-                                         NN::EmbeddedNNUEType::BIG),
-        std::make_unique<NN::NetworkSmall>(NN::EvalFile{EvalFileDefaultNameSmall, "None", ""},
-                                           NN::EmbeddedNNUEType::SMALL))) {
+    networks(numaContext,
+             // Heap-allocate because sizeof(NN::Networks) is large
+             std::make_unique<NN::Networks>(NN::EvalFile{EvalFileDefaultNameBig, "None", ""},
+                                            NN::EvalFile{EvalFileDefaultNameSmall, "None", ""})) {
 
     pos.set(StartFEN, false, &states->back());
 
diff --git a/src/nnue/network.h b/src/nnue/network.h
@@ -28,7 +28,6 @@
 #include <string>
 #include <string_view>
 #include <tuple>
-#include <utility>
 
 #include "../misc.h"
 #include "../types.h"
@@ -130,9 +129,9 @@ using NetworkSmall = Network<SmallNetworkArchitecture, SmallFeatureTransformer>;
 
 
 struct Networks {
-    Networks(std::unique_ptr<NetworkBig>&& nB, std::unique_ptr<NetworkSmall>&& nS) :
-        big(std::move(*nB)),
-        small(std::move(*nS)) {}
+    Networks(EvalFile bigFile, EvalFile smallFile) :
+        big(bigFile, EmbeddedNNUEType::BIG),
+        small(smallFile, EmbeddedNNUEType::SMALL) {}
 
     NetworkBig   big;
     NetworkSmall small;
diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h
@@ -169,52 +169,56 @@ inline void write_little_endian(std::ostream& stream, const IntType* values, std
             write_little_endian<IntType>(stream, values[i]);
 }
 
-
 // Read N signed integers from the stream s, putting them in the array out.
 // The stream is assumed to be compressed using the signed LEB128 format.
 // See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme.
-template<typename IntType, std::size_t Count>
-inline void read_leb_128(std::istream& stream, std::array<IntType, Count>& out) {
-
-    // Check the presence of our LEB128 magic string
-    char leb128MagicString[Leb128MagicStringSize];
-    stream.read(leb128MagicString, Leb128MagicStringSize);
-    assert(strncmp(Leb128MagicString, leb128MagicString, Leb128MagicStringSize) == 0);
+template<typename BufType, typename IntType, std::size_t Count>
+inline void read_leb_128_detail(std::istream&               stream,
+                                std::array<IntType, Count>& out,
+                                std::uint32_t&              bytes_left,
+                                BufType&                    buf,
+                                std::uint32_t&              buf_pos) {
 
     static_assert(std::is_signed_v<IntType>, "Not implemented for unsigned types");
+    static_assert(sizeof(IntType) <= 4, "Not implemented for types larger than 32 bit");
 
-    const std::uint32_t BUF_SIZE = 4096;
-    std::uint8_t        buf[BUF_SIZE];
-
-    auto bytes_left = read_little_endian<std::uint32_t>(stream);
-
-    std::uint32_t buf_pos = BUF_SIZE;
-    for (std::size_t i = 0; i < Count; ++i)
+    IntType result = 0;
+    size_t  shift = 0, i = 0;
+    while (i < Count)
     {
-        IntType result = 0;
-        size_t  shift  = 0;
-        do
+        if (buf_pos == buf.size())
         {
-            if (buf_pos == BUF_SIZE)
-            {
-                stream.read(reinterpret_cast<char*>(buf), std::min(bytes_left, BUF_SIZE));
-                buf_pos = 0;
-            }
+            stream.read(reinterpret_cast<char*>(buf.data()),
+                        std::min(std::size_t(bytes_left), buf.size()));
+            buf_pos = 0;
+        }
 
-            std::uint8_t byte = buf[buf_pos++];
-            --bytes_left;
-            result |= (byte & 0x7f) << shift;
-            shift += 7;
+        std::uint8_t byte = buf[buf_pos++];
+        --bytes_left;
+        result |= (byte & 0x7f) << (shift % 32);
+        shift += 7;
 
-            if ((byte & 0x80) == 0)
-            {
-                out[i] = (sizeof(IntType) * 8 <= shift || (byte & 0x40) == 0)
-                         ? result
-                         : result | ~((1 << shift) - 1);
-                break;
-            }
-        } while (shift < sizeof(IntType) * 8);
+        if ((byte & 0x80) == 0)
+        {
+            out[i++] = (shift >= 32 || (byte & 0x40) == 0) ? result : result | ~((1 << shift) - 1);
+            result   = 0;
+            shift    = 0;
+        }
     }
+}
+
+template<typename... Arrays>
+inline void read_leb_128(std::istream& stream, Arrays&... outs) {
+    // Check the presence of our LEB128 magic string
+    char leb128MagicString[Leb128MagicStringSize];
+    stream.read(leb128MagicString, Leb128MagicStringSize);
+    assert(strncmp(Leb128MagicString, leb128MagicString, Leb128MagicStringSize) == 0);
+
+    auto                           bytes_left = read_little_endian<std::uint32_t>(stream);
+    std::array<std::uint8_t, 8192> buf;
+    std::uint32_t                  buf_pos = buf.size();
+
+    (read_leb_128_detail(stream, outs, bytes_left, buf, buf_pos), ...);
 
     assert(bytes_left == 0);
 }
diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h
@@ -152,35 +152,21 @@ class FeatureTransformer {
     }
 
     // Read network parameters
-    // TODO: This is ugly. Currently LEB128 on the entire L1 necessitates
-    // reading the weights into a combined array, and then splitting.
     bool read_parameters(std::istream& stream) {
-        read_leb_128<BiasType>(stream, biases);
+        read_leb_128(stream, biases);
 
         if (UseThreats)
         {
             read_little_endian<ThreatWeightType>(stream, threatWeights.data(),
                                                  ThreatInputDimensions * HalfDimensions);
-            read_leb_128<WeightType>(stream, weights);
+            read_leb_128(stream, weights);
 
-            auto combinedPsqtWeights =
-              std::make_unique<std::array<PSQTWeightType, TotalInputDimensions * PSQTBuckets>>();
-
-            read_leb_128<PSQTWeightType>(stream, *combinedPsqtWeights);
-
-            std::copy(combinedPsqtWeights->begin(),
-                      combinedPsqtWeights->begin() + ThreatInputDimensions * PSQTBuckets,
-                      std::begin(threatPsqtWeights));
-
-            std::copy(combinedPsqtWeights->begin() + ThreatInputDimensions * PSQTBuckets,
-                      combinedPsqtWeights->begin()
-                        + (ThreatInputDimensions + InputDimensions) * PSQTBuckets,
-                      std::begin(psqtWeights));
+            read_leb_128(stream, threatPsqtWeights, psqtWeights);
         }
         else
         {
-            read_leb_128<WeightType>(stream, weights);
-            read_leb_128<PSQTWeightType>(stream, psqtWeights);
+            read_leb_128(stream, weights);
+            read_leb_128(stream, psqtWeights);
         }
 
         permute_weights();