@@ -39,10 +39,12 @@ class ECCVMCircuitBuilder {
3939 static constexpr size_t WNAF_DIGITS_PER_ROW = bb::eccvm::WNAF_DIGITS_PER_ROW;
4040 static constexpr size_t ADDITIONS_PER_ROW = bb::eccvm::ADDITIONS_PER_ROW;
4141
42- using MSM = bb::eccvm::MSM<CycleGroup>;
4342 std::shared_ptr<ECCOpQueue> op_queue;
43+ // `ScalarMul` represents a single scalar multiplication, i.e., a pair of a scalar and point on the curve,
44+ // which will eventually be multiplied and accumulated.
4445 using ScalarMul = bb::eccvm::ScalarMul<CycleGroup>;
45-
46+ // `MSM` is an ordered container of `ScalarMul`s
47+ using MSM = bb::eccvm::MSM<CycleGroup>;
4648 ECCVMCircuitBuilder (std::shared_ptr<ECCOpQueue>& op_queue)
4749 : op_queue(op_queue){};
4850
@@ -51,8 +53,13 @@ class ECCVMCircuitBuilder {
5153 std::vector<MSM> get_msms () const
5254 {
5355 const uint32_t num_muls = get_number_of_muls ();
56+
57+ // `compute_precomputed_table` and `compute_wnaf_digits` are helper functions that will be used when we
58+ // populate our vector of MSMs.
59+
5460 /* *
5561 * For input point [P], return { -15[P], -13[P], ..., -[P], [P], ..., 13[P], 15[P] }
62+ * this "precomputed table" will be an entry in `ScalarMuls` corresponding to [P]
5663 */
5764 const auto compute_precomputed_table =
5865 [](const AffineElement& base_point) -> std::array<AffineElement, POINT_TABLE_SIZE + 1 > {
@@ -74,6 +81,15 @@ class ECCVMCircuitBuilder {
7481 }
7582 return result;
7683 };
84+ /* *
85+ * Computes the WNAF representation of `scalar`. When `scalar` is even, we represent this by adding 1 to the
86+ * least-significant slice. we will also later set the `skew` boolean to True when we populate `ScalarMul`.
87+ * (this is necessary because otherwise we would only be able to represent odd multiples of our point.)
88+ * Note also that in our applications, `NUM_WNAF_DIGITS_PER_SCALAR = 32`; this corresponds to the fact that we
89+ * split up our scalar into two 128 bit numbers, using the endomorphism of the curve (corresponding to a
90+ * primitive cube root of unity).
91+ *
92+ */
7793 const auto compute_wnaf_digits = [](uint256_t scalar) -> std::array<int , NUM_WNAF_DIGITS_PER_SCALAR> {
7894 std::array<int , NUM_WNAF_DIGITS_PER_SCALAR> output;
7995 int previous_slice = 0 ;
@@ -89,7 +105,7 @@ class ECCVMCircuitBuilder {
89105 // if least significant slice is even, we add 1 to create an odd value && set 'skew' to true
90106 wnaf_slice += 1 ;
91107 } else if (is_even) {
92- // for other slices, if it's even, we add 1 to the slice value
108+ // for other slices, if it's even, we add 1 to the slice value, again to create an odd value,
93109 // and subtract 16 from the previous slice to preserve the total scalar sum
94110 static constexpr int borrow_constant = static_cast <int >(1ULL << NUM_WNAF_DIGIT_BITS);
95111 previous_slice -= borrow_constant;
@@ -112,11 +128,26 @@ class ECCVMCircuitBuilder {
112128
113129 return output;
114130 };
131+ // the variables and vectors here correspond to the EC ops that we will actually do; in particular, we have
132+ // compilation skipping logic for both when the scalar is 0 and when the EC point is the point-at-infinity, as
133+ // terms of each of these types do not contribute to the final sum.
115134
116- size_t msm_count = 0 ;
117- size_t active_mul_count = 0 ;
118- std::vector<size_t > msm_opqueue_index;
119- std::vector<std::pair<size_t , size_t >> msm_mul_index;
135+ // more precisely, we will break up our op_queue into a sequence of MSMs, where we throw away computations that
136+ // obviously don't contribute to the final desired value.
137+ size_t msm_count = 0 ; // total number of MSMs
138+ size_t active_mul_count =
139+ 0 ; // number of scalar multiplications required in the current MSM. Given a scalar n in F_q and a point P,
140+ // we in general get *two* scalar multiplications, as we break up n into 128-bit chunks (using the extra
141+ // endomorphism). this is an optimization.
142+ std::vector<size_t >
143+ msm_opqueue_index; // a vector recording which op from the op_queue we are performing in our VM.
144+ std::vector<std::pair<size_t , size_t >>
145+ msm_mul_index; // recording pairs, where the first element specifies "which MSM are we in" (via an index)
146+ // and the second element specifies "which scalar multiplication is this in our VM simulation
147+ // of this MSM". note that the second element, the `active_mul_count`, incorporates some
148+ // skipping logic: what contributes to it are multiplications we actually need to perform.
149+ // generically each scalar multiplication contributes to 2 VM mul operations, as we
150+ // split up each Fq element into 2 128-bit elements.
120151 std::vector<size_t > msm_sizes;
121152
122153 const auto & eccvm_ops = op_queue->get_eccvm_ops ();
@@ -141,12 +172,18 @@ class ECCVMCircuitBuilder {
141172 msm_sizes.push_back (active_mul_count);
142173 msm_count++;
143174 }
144- std::vector<MSM> result (msm_count);
175+
176+ std::vector<MSM> result (
177+ msm_count); // the vector we will return, containing all of the MSMs that our VM will have to perform.
178+ // this amounts to breaking up our op-queue, splitting the elmenets of Fq into two 128
179+ // bit scalars, and throwing out operations that a priori won't contribute.
145180 for (size_t i = 0 ; i < msm_count; ++i) {
146181 auto & msm = result[i];
147182 msm.resize (msm_sizes[i]);
148183 }
149-
184+ // populate result using the auxiliary vectors `msm_opqueue_index` and `msm_mul_index`, together with
185+ // `eccvm_ops`. this first pass will *not* get the pc (program counter) correct. we explain why when we set it
186+ // correctly.
150187 parallel_for_range (msm_opqueue_index.size (), [&](size_t start, size_t end) {
151188 for (size_t i = start; i < end; i++) {
152189 const auto & op = eccvm_ops[msm_opqueue_index[i]];
@@ -187,7 +224,9 @@ class ECCVMCircuitBuilder {
187224 // 2: the value of pc for the final mul = 1
188225 // The latter point is valuable as it means that we can add empty rows (where pc = 0) and still satisfy our
189226 // sumcheck relations that involve pc (if we did the other way around, starting at 1 and ending at num_muls,
190- // we create a discontinuity in pc values between the last transcript row and the following empty row)
227+ // we create a discontinuity in pc values between the last transcript row and the following empty row).
228+ // TL;DR we choose a decreasing `pc` so that the subsequent entries of the column (after the last entry) are 0.
229+ // this is simply an optimization.
191230 uint32_t pc = num_muls;
192231 for (auto & msm : result) {
193232 for (auto & mul : msm) {
0 commit comments