distributed-lab
diff --git a/‎Cargo.lock‎
Lines changed: 7 additions & 0 deletions b/‎Cargo.lock‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 4 deletions b/‎README.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎sage/optimized.sage‎
Lines changed: 55 additions & 0 deletions b/‎sage/optimized.sage‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎sage/u29x9.sage‎
Lines changed: 26 additions & 0 deletions b/‎sage/u29x9.sage‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎src/bigint/arithmetics/mod.rs‎
Lines changed: 1 addition & 0 deletions b/‎src/bigint/arithmetics/mod.rs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/bigint/arithmetics/mul.rs‎
Lines changed: 165 additions & 9 deletions b/‎src/bigint/arithmetics/mul.rs‎
Lines changed: 165 additions & 9 deletions
@@ -25,6 +25,7 @@ lazy_static = "1.4.0"
 bitcoin-script-stack = { git = "https://github.com/FairgateLabs/rust-bitcoin-script-stack"}
 prettytable-rs = "0.10.0"
 paste = "1.0"
+seq-macro = "0.3.5"
 
 [dev-dependencies]
 rand_chacha = "0.3.1"
 
@@ -16,10 +16,10 @@ specify where you can find the corresponding unit test in the project.
 
 | Command | Description | Location |
 | --- | --- | --- |
-| `cargo test -- --nocapture test_254_bit_windowed_widening_mul` | Test our widening multiplication algorithm | [`test.rs`](src/bigint/arithmetics/test.rs#L517) |
-| `cargo test -- --nocapture test_mul_w_width_254bit` | Test our narrow multiplication algorithm | [`test.rs`](src/bigint/arithmetics/test.rs#L487) |
-| `cargo test -- --nocapture test_254_bit_widening_mul` | Test _BitVM_'s widening multiplication algorithm (extended by us) | [`test.rs`](src/bigint/arithmetics/test.rs#L457) |
-| `cargo test -- --nocapture test_64_and_254_bit_mul` | Test _BitVM_'s narrow multiplication algorithm (a bit optimized by us) | [`test.rs`](src/bigint/arithmetics/test.rs#L414) |
+| `cargo test -- --nocapture test_254_bit_windowed_widening_optimized_mul` | Test our widening multiplication algorithm | [`test.rs`](src/bigint/arithmetics/test.rs#L517) |
+| `cargo test -- --nocapture test_254_bit_narrow_mul_w_width` | Test our narrow multiplication algorithm | [`test.rs`](src/bigint/arithmetics/test.rs#L487) |
+| `cargo test -- --nocapture test_254_bit_windowed_lazy_widening_mul` | Test _BitVM_'s widening multiplication algorithm (extended by us) | [`test.rs`](src/bigint/arithmetics/test.rs#L457) |
+| `cargo test -- --nocapture test_254_bit_naive_widening_mul` | Test _BitVM_'s narrow multiplication algorithm (a bit optimized by us) | [`test.rs`](src/bigint/arithmetics/test.rs#L414) |
 | `cargo test -- --nocapture test_255_bit_cmpeq_widening_mul` | Test [`cmpeq`](https://bitcointalk.org/index.php?topic=5477449.0)'s widening multiplication algorithm | [`test.rs`](src/bigint/cmpeq/test.rs#L56) |
 | `cargo test -- --nocapture --ignored debug_mul_performance_comparison` | Compare the performance of several multiplication algorithms used | [`test.rs`](src/bigint/performance.rs#L14) |
 
 
@@ -0,0 +1,55 @@
+import random
+
+def to_window_w_form(n: Integer, w: Integer) -> Integer:
+    """
+    Converts the given integer n into the w-width representation
+    """
+
+    decomposition = []
+    while n >= 1:
+        c = n % (1 << w)
+        decomposition.append(c)
+        n = n - c
+        n = n // (1 << w)
+    
+    return decomposition
+
+def mul_window_w_form(x: Integer, y: Integer, w: Integer) -> Integer:
+    """
+    Multiplies two integers using window-w form
+    """
+
+    d = to_window_w_form(y, w)
+    if len(d) == 63: # Ensuring that the decomposition is 64-bit
+        d.append(0)
+    
+    precompute_table = [i*x for i in range(1<<w)]
+
+    r = precompute_table[d[63]]
+    print(r.nbits())
+    for i in range(1, 64):
+        k = 256 + 4*i
+        for _ in range(4):
+            r = 2*r
+        r = r + precompute_table[d[63-i]]
+        print(r.nbits())
+        assert k == r.nbits()
+        
+    return r
+
+a = (1<<254)-1 # First random 254-bit integer
+b = (1<<254)-1 # Second random 254-bit integer
+
+w = 4 # Window width
+
+# Asserting that the decomposition is correct
+b_decomposition = to_window_w_form(b, w)
+if len(b_decomposition) == 63:
+    b_decomposition.append(0)
+assert sum([2**(w*i)*c for i, c in enumerate(b_decomposition)]) == b, 'decomposition is wrong'
+print('decomposition is correct')
+
+# Asserting that the multiplication is correct
+c = a * b
+assert mul_window_w_form(a, b, 4) == c, 'multiplication is wrong'
+print('multiplication is correct')
@@ -0,0 +1,26 @@
+import random
+
+N_BITS = 254
+LIMB_SIZE = 29 # 30-bit limbs
+
+def to_limbs(a: Integer) -> list[Integer]:
+    """
+    Converts the given integer a into a list of 254-bit limbs
+    """
+    limbs = []
+    while a >= 1:
+        c = a % (1 << LIMB_SIZE)
+        limbs.append(c)
+        a = a - c
+        a = a // (1 << LIMB_SIZE)
+    
+    return limbs
+
+# Validating the correctness of the conversion
+a = Integer(random.randint(0, (1<<N_BITS)-1))
+b = Integer(random.randint(0, (1<<N_BITS)-1))
+c = a * b
+
+print('a', to_limbs(a))
+print('b', to_limbs(b))
+print('c', to_limbs(c))
@@ -1,5 +1,6 @@
 pub mod add;
 pub mod mul;
+pub mod u29x9;
 
 #[cfg(test)]
 pub mod test;
@@ -1,6 +1,13 @@
+use bitcoin::opcodes::all::{OP_ADD, OP_FROMALTSTACK, OP_SUB, OP_SWAP};
+use bitcoin_script_stack::debugger::pushable::Builder;
+use seq_macro::seq;
+
 use crate::bigint::window::precompute::WindowedPrecomputeTable;
 use crate::bigint::window::NonNativeWindowedBigIntImpl;
-use crate::traits::integer::NonNativeLimbInteger;
+use crate::bigint::{U254, U508};
+use crate::pseudo::OP_4MUL;
+use crate::traits::arithmeticable::Arithmeticable;
+use crate::traits::integer::{NonNativeInteger, NonNativeLimbInteger};
 use crate::traits::window::Windowable;
 use crate::{
     bigint::NonNativeBigIntImpl,
@@ -99,12 +106,18 @@ where
             { <Self as Windowable>::OP_TOBEWINDOWEDFORM_TOALTSTACK() }
 
             // Precomputing {0*z, 1*z, ..., ((1<<WIDTH)-1)*z}
-            { WindowedPrecomputeTable::<T, WIDTH>::initialize() }
+            { WindowedPrecomputeTable::<T, WIDTH, false>::initialize() }
 
             // We initialize the result
-            { T::OP_0() }
+            // Note that we can simply pick the precomputed value
+            // since 0*16 is still 0, so we omit the doubling :)
+            OP_FROMALTSTACK 1 OP_ADD
+            { 1<<WIDTH }
+            OP_SWAP
+            OP_SUB
+            { T::OP_PICKSTACK() }
 
-            for _ in 0..Self::DECOMPOSITION_SIZE {
+            for _ in 1..Self::DECOMPOSITION_SIZE {
                 // Double the result WIDTH times
                 for _ in 0..WIDTH {
                     { T::OP_2MUL(0) }
@@ -137,7 +150,9 @@ where
     /// Multiplies the top two big integers on the stack
     /// represented as little-endian 32-bit limbs
     /// using w-width decomposition to get twice as large integer.
-    pub(in super::super) fn handle_OP_WIDENINGMUL<Q>() -> Script
+    /// Note: this is done lazily, that is operations are from the very
+    /// beginning are performed over U508.
+    pub(in super::super) fn handle_lazy_OP_WIDENINGMUL<Q>() -> Script
     where
         Q: NonNativeLimbInteger,
     {
@@ -149,12 +164,22 @@ where
             { T::OP_EXTEND::<Q>() }
 
             // Precomputing {0*z, 1*z, ..., ((1<<WIDTH)-1)*z}
-            { WindowedPrecomputeTable::<Q, WIDTH>::initialize() }
+            { WindowedPrecomputeTable::<Q, WIDTH, true>::initialize() }
 
-            // We initialize the result
-            { Q::OP_0() }
+            // Picking di from the stack
+            OP_FROMALTSTACK 1 OP_ADD
+
+            // Add the precomputed value to the result.
+            // Since currently stack looks like:
+            // {0*z, 1*z, ..., ((1<<WIDTH)-1)*z, di} with
+            // r being the result, we need to copy
+            // (1<<WIDTH - di)th element to the top of the stack.
+            { 1<<WIDTH }
+            OP_SWAP
+            OP_SUB
+            { Q::OP_PICKSTACK() }
 
-            for _ in 0..Self::DECOMPOSITION_SIZE {
+            for _ in 1..Self::DECOMPOSITION_SIZE {
                 // Double the result WIDTH times
                 for _ in 0..WIDTH {
                     { Q::OP_2MUL_NOOVERFLOW(0) }
@@ -183,4 +208,135 @@ where
             { Q::OP_FROMALTSTACK() }
         }
     }
+
+    /// Multiplies the top two big integers on the stack
+    /// represented as little-endian 32-bit limbs
+    /// using w-width decomposition to get twice as large integer. Chooses
+    /// the most optimal method if present.
+    pub(in super::super) fn handle_OP_WIDENINGMUL<Q>() -> Script 
+    where Q: NonNativeLimbInteger,
+    {
+        match Self::N_BITS {
+            U254::N_BITS => NonNativeWindowedBigIntImpl::<U254, 4>::handle_optimized_OP_WIDENINGMUL(),
+            _ => Self::handle_lazy_OP_WIDENINGMUL::<Q>(),
+        }
+    }
+}
+
+/// Special optimized implementation for U254 Windowed method
+#[allow(non_snake_case)]
+impl NonNativeWindowedBigIntImpl<U254, 4> {
+    /// Since copy operation requires input depth to be equal to
+    /// `Self::TOP_STACK_INT_LIMBS + Self::OTHER_LIMBS * depth`, this function normalizes the depth
+    /// to the required value.
+    fn normalize_stack_depth<Q>() -> Script 
+    where Q: NonNativeLimbInteger{
+        let n_limbs = (Q::N_BITS + Q::LIMB_SIZE - 1) / Q::LIMB_SIZE;
+
+        script! {
+            OP_DUP OP_4MUL {crate::pseudo::OP_2MUL()} // Multiplying depth by 8
+            OP_ADD // Adding depth to 8*depth to get 9*depth
+            { n_limbs }
+            OP_ADD
+        }
+    }
+
+    /// Copies the big integer located at depth to the top of the stack.
+    /// Works similarly to `OP_PICK`, but for big integers.
+    ///
+    /// For example, calling `copy(0)` will copy the top element to the top of the stack, while
+    /// calling `copy(1)` will copy the second element to the top of the stack.
+    pub(in super::super) fn handle_OP_PICKSTACK<Q: NonNativeLimbInteger>() -> Script {
+        let n_limbs = (Self::N_BITS + Self::LIMB_SIZE - 1) / Self::LIMB_SIZE;
+
+        script! {
+            { Self::normalize_stack_depth::<Q>() }
+
+            for _ in 0..n_limbs - 1 {
+                OP_DUP OP_PICK OP_SWAP
+            }
+            OP_1SUB OP_PICK
+        }
+    }
+
+    /// Multiplies the top two big integers on the stack
+    /// represented as little-endian 32-bit limbs
+    /// using w-width decomposition to get twice as large integer.
+    pub(in super::super) fn handle_optimized_OP_WIDENINGMUL() -> Script {
+        // The main loop script, see explanation in the returned script down below
+        let main_loop_script = {
+            let mut script_var = Vec::new();
+            // Iterating 63 times (omitting the first iteration, we have already done it)
+            seq!(N in 1..64 { #(
+                let next_script = Builder::new()
+                    // Extending the result to 256+4*N bits from 256*4(N-1) bits
+                    .push_expression(NonNativeBigIntImpl::<{ 256 + 4*(N-1) }, 30>::OP_EXTEND::<NonNativeBigIntImpl::<{ 256 + 4*N }, 30>>())
+                    // First, multiply by 16 without caring for overflow
+                    .push_expression({
+                        let mut script_var = Vec::new();
+                        for _ in 0..4 {
+                            let next_script = Builder::new()
+                                .push_expression(NonNativeBigIntImpl::<{ 256 + 4*N }, 30>::OP_2MUL_NOOVERFLOW(0))
+                                .0
+                                .into_script();
+                            script_var.extend_from_slice(next_script.as_bytes());
+                        }
+                        Script::from(script_var)
+                    })
+                    // Taking coefficient, finding 16-coefficient and picking it
+                    .push_opcode(OP_FROMALTSTACK)
+                    .push_expression(1<<4)
+                    .push_opcode(OP_SWAP)
+                    .push_opcode(OP_SUB)
+                    .push_expression(Self::handle_OP_PICKSTACK::<NonNativeBigIntImpl::<{ 256 + 4*N }, 30>>())
+                    // Since we need to only care about last limbs,
+                    // we do not extend the result
+                    .push_expression(NonNativeBigIntImpl::<256, 30>::OP_ADD_NOOVERFLOW(0, 1))
+                    .0
+                    .into_script();
+                script_var.extend_from_slice(next_script.as_bytes());
+            )* });
+
+            Script::from(script_var)
+        };
+
+        pushable::Builder::new()
+            // Push w-width form to the stack
+            .push_expression(Self::OP_TOBEWINDOWEDFORM_TOALTSTACK())
+            // Initialize precompute table to the stack
+            // Since 256 bits fits in 9x30 limbs, we do not need
+            // to extend anything
+            .push_expression(WindowedPrecomputeTable::<Self, 4, true>::initialize())
+            // Making the first iteration of the loop (without the initial doubling step) 
+            // Taking coefficient, finding 16-coefficient and picking 
+            // corresponding precomputed value
+            .push_opcode(OP_FROMALTSTACK)
+            .push_expression(1)
+            .push_opcode(OP_ADD)
+            .push_expression(1<<4)
+            .push_opcode(OP_SWAP)
+            .push_opcode(OP_SUB)
+            .push_expression(Self::OP_PICKSTACK())
+            // At this point, we have a 256-bit number in the stack
+            // Now the interesting part: the loop
+            .push_expression(main_loop_script)
+            // Moving result to the altstack
+            .push_expression(U508::OP_TOALTSTACK())
+            .push_expression({
+                // Remvoing precomputed values from the stack
+                let mut script_var = Vec::new();
+                for _ in 0..1<<4 {
+                    let next_script = Builder::new()
+                        .push_expression(Self::OP_DROP())
+                        .0
+                        .into_script();
+                    script_var.extend_from_slice(next_script.as_bytes());
+                }
+                Script::from(script_var)
+            })  
+            // Returning our element to the stack
+            .push_expression(U508::OP_FROMALTSTACK())
+            .0
+            .into_script()
+    }
 }