AztecProtocol
diff --git a/‎barretenberg/cpp/pil/vm2/alu.pil‎
Lines changed: 134 additions & 39 deletions b/‎barretenberg/cpp/pil/vm2/alu.pil‎
Lines changed: 134 additions & 39 deletions
diff --git a/‎barretenberg/cpp/src/barretenberg/vm2/common/instruction_spec.cpp‎
Lines changed: 14 additions & 15 deletions b/‎barretenberg/cpp/src/barretenberg/vm2/common/instruction_spec.cpp‎
Lines changed: 14 additions & 15 deletions
diff --git a/‎barretenberg/cpp/src/barretenberg/vm2/common/tagged_value.cpp‎
Lines changed: 24 additions & 6 deletions b/‎barretenberg/cpp/src/barretenberg/vm2/common/tagged_value.cpp‎
Lines changed: 24 additions & 6 deletions
@@ -39,7 +39,7 @@ sel = 0;
 pol commit cf;
 
 // Generic helper column
-// Current use: EQ (inverse of a-b) & DIV (remainder)
+// Current use: EQ (inverse of a-b), DIV (remainder), and SHL (2**ib)
 pol commit helper1;
 
 // maximum bits the number can hold (i.e. 8 for a u8):
@@ -95,7 +95,7 @@ execution.sel_execute_alu {
 
 // IS_FF CHECKING
 
-pol CHECK_TAG_FF = sel_op_div + sel_op_fdiv + sel_op_lt + sel_op_lte + sel_op_not;
+pol CHECK_TAG_FF = sel_op_div + sel_op_fdiv + sel_op_lt + sel_op_lte + sel_op_not + sel_shift_ops;
 // We prove that sel_is_ff == 1 <==> ia_tag == MEM_TAG_FF
 pol TAG_FF_DIFF = ia_tag - constants.MEM_TAG_FF;
 pol commit tag_ff_diff_inv;
@@ -113,7 +113,7 @@ CHECK_TAG_U128 * (TAG_U128_DIFF * (sel_is_u128 * (1 - tag_u128_diff_inv) + tag_u
 
 // TAG CHECKING
 
-pol EXPECTED_C_TAG = (sel_op_add + sel_op_sub + sel_op_mul + sel_op_div + sel_op_truncate + sel_op_shr + sel_op_shl) * ia_tag + (sel_op_eq + sel_op_lt + sel_op_lte) * constants.MEM_TAG_U1 + sel_op_fdiv * constants.MEM_TAG_FF;
+pol EXPECTED_C_TAG = (sel_op_add + sel_op_sub + sel_op_mul + sel_op_div + sel_op_truncate + sel_shift_ops) * ia_tag + (sel_op_eq + sel_op_lt + sel_op_lte) * constants.MEM_TAG_U1 + sel_op_fdiv * constants.MEM_TAG_FF;
 
 // The tag of c is generated by the opcode and is never wrong.
 // Gating with (1 - sel_tag_err) is necessary because when an error occurs, we have to set the tag to 0,
@@ -125,22 +125,22 @@ pol commit sel_tag_err;
 sel_tag_err * (1 - sel_tag_err) = 0;
 
 // Tag errors currently have cases:
-// 1. Input tagged as a field for NOT or DIV operations or non-field for FDIV operation
+// 1. Input tagged as a field for NOT, DIV, or shift operations or non-field for FDIV operation
 // 2. Mismatched tags for inputs a and b for all opcodes apart from TRUNC
 // 1 is handled by checking FF_TAG_ERR in TAG_ERR_CHECK and 2 is handled in AB_TAGS_CHECK.
-pol FF_TAG_ERR = (sel_op_div + sel_op_not) * sel_is_ff + sel_op_fdiv * IS_NOT_FF;
+pol FF_TAG_ERR = (sel_op_div + sel_op_not + sel_shift_ops) * sel_is_ff + sel_op_fdiv * IS_NOT_FF;
 pol commit sel_ab_tag_mismatch;
 sel_ab_tag_mismatch * (1 - sel_ab_tag_mismatch) = 0;
 
-// TODO(MW): It's technically possible to have BOTH cases be true if we perform a DIV with FF ib and integer ia,
+// TODO(MW): It's technically possible to have BOTH cases be true if we perform a DIV or shift with FF ib and integer ia,
 // so for now I take sel_ab_tag_mismatch * FF_TAG_ERR.
 #[TAG_ERR_CHECK]
 sel_tag_err = sel_ab_tag_mismatch + FF_TAG_ERR - sel_ab_tag_mismatch * FF_TAG_ERR;
 
 // For NOT opcode, an error occurs if the tag of a is FF. In this case, tracegen will set
 // b's tag as 0 which, while it would currently pass the checks below, is not a tag inequality we
 // want to throw with sel_ab_tag_mismatch:
-pol CHECK_AB_TAGS = 1 - sel_op_not * sel_is_ff - sel_op_truncate - sel_op_shr - sel_op_shl; // note: shifts are temporary, they should be subject to AB checks
+pol CHECK_AB_TAGS = 1 - sel_op_not * sel_is_ff - sel_op_truncate;
 pol AB_TAGS_EQ = 1 - sel_ab_tag_mismatch;
 pol commit ab_tags_diff_inv;
 // Prove that sel_ab_tag_mismatch = 1 <==> we have a disallowed inequality between the tags:
@@ -150,6 +150,52 @@ CHECK_AB_TAGS * ( (ia_tag - ib_tag) * ( AB_TAGS_EQ * (1 - ab_tags_diff_inv) + ab
 #[TAG_MAX_BITS_VALUE]
 sel { ia_tag, max_bits, max_value } in precomputed.sel_tag_parameters { precomputed.clk, precomputed.tag_max_bits, precomputed.tag_max_value };
 
+// BIT DECOMPOSITION
+
+// We use the below to prove correct decomposition of limbs. Currently used by MUL, DIV, SHL, and SHR.
+pol commit sel_decompose_a;
+// #[OP_ID_CHECK] ensures selectors are mutually exclusive:
+sel_decompose_a = sel_mul_div_u128 + sel_shift_ops * IS_NOT_FF;
+// Currently, sel_decompose_b would just equal sel_mul_div_u128, so no need for another column.
+pol commit a_lo, a_hi, b_lo, b_hi;
+pol TWO_POW_64 = 2 ** 64;
+
+// Reusing columns for decomposition (#[OP_ID_CHECK] ensures selectors are mutually exclusive):
+pol DECOMPOSED_A = ((sel_mul_u128 + sel_shift_ops_no_overflow) * ia) + (sel_shift_ops - sel_shift_ops_no_overflow) * (ib - max_bits) + (sel_is_u128 * sel_op_div * (1 - sel_tag_err) * ic);
+pol DECOMPOSED_B = ib;
+// For MUL and DIV, we decompose into 64 bit limbs. For shifts, we have one limb of b bits and one limb of max_bits - b bits.
+pol LIMB_SIZE = sel_mul_div_u128 * TWO_POW_64 + sel_shift_ops * two_pow_shift_lo_bits;
+
+#[A_DECOMPOSITION]
+sel_decompose_a * (DECOMPOSED_A - (a_lo + LIMB_SIZE * a_hi)) = 0;
+#[B_DECOMPOSITION]
+sel_mul_div_u128 * (DECOMPOSED_B - (b_lo + LIMB_SIZE * b_hi)) = 0;
+
+// Note: the only current use for decomposition of b has 64 bit limbs, so no need for b_lo/hi_bits.
+pol commit a_lo_bits, a_hi_bits;
+// TODO: Once lookups support expression in tuple, we can inline constant_64 into the lookup.
+// Note: only currently used for MUL/DIV u128, so gated by sel_mul_div_u128:
+pol commit constant_64;
+sel_mul_div_u128 * (64 - constant_64) = 0;
+
+#[A_LO_BITS]
+a_lo_bits - sel_mul_div_u128 * constant_64 - sel_shift_ops * shift_lo_bits = 0;
+
+#[A_HI_BITS]
+a_hi_bits - sel_mul_div_u128 * constant_64 - sel_shift_ops * SHIFT_HI_BITS = 0;
+
+#[RANGE_CHECK_DECOMPOSITION_A_LO]
+sel_decompose_a { a_lo, a_lo_bits } in range_check.sel { range_check.value, range_check.rng_chk_bits };
+
+#[RANGE_CHECK_DECOMPOSITION_A_HI]
+sel_decompose_a { a_hi, a_hi_bits } in range_check.sel { range_check.value, range_check.rng_chk_bits };
+
+#[RANGE_CHECK_DECOMPOSITION_B_LO]
+sel_mul_div_u128 { b_lo, constant_64 } in range_check.sel { range_check.value, range_check.rng_chk_bits };
+
+#[RANGE_CHECK_DECOMPOSITION_B_HI]
+sel_mul_div_u128 { b_hi, constant_64 } in range_check.sel { range_check.value, range_check.rng_chk_bits };
+
 
 // ADD
 
@@ -194,21 +240,6 @@ sel_mul_u128 = sel_is_u128 * sel_op_mul;
 // a * b_l + a_l * b_h * 2^64 = (cf * 2^64 + c_hi) * 2^128 + c
 // => no need for a_h in final relation
 
-pol commit a_lo;
-pol commit a_hi;
-pol commit b_lo;
-pol commit b_hi;
-pol TWO_POW_64 = 2 ** 64;
-
-// Reusing columns for decomposition (#[OP_ID_CHECK] ensures selectors are mutually exclusive):
-pol DECOMPOSED_A = (sel_mul_u128 * ia) + (sel_is_u128 * sel_op_div * (1 - sel_tag_err) * ic);
-pol DECOMPOSED_B = ib;
-
-#[A_DECOMPOSITION]
-sel_mul_div_u128 * (DECOMPOSED_A - (a_lo + TWO_POW_64 * a_hi)) = 0;
-#[B_DECOMPOSITION]
-sel_mul_div_u128 * (DECOMPOSED_B - (b_lo + TWO_POW_64 * b_hi)) = 0;
-
 #[ALU_MUL_U128]
 sel_mul_u128 * (1 - sel_tag_err)
     * (
@@ -217,23 +248,6 @@ sel_mul_u128 * (1 - sel_tag_err)
         - (max_value + 1) * (cf * TWO_POW_64 + c_hi)    // c_hi * 2^128 + (cf ? 2^192 : 0)
     ) = 0;
 
-// TODO: Once lookups support expression in tuple, we can inline constant_64 into the lookup.
-// Note: only used for MUL/DIV u128, so gated by sel_mul_div_u128
-pol commit constant_64;
-sel_mul_div_u128 * (64 - constant_64) = 0;
-
-#[RANGE_CHECK_MUL_U128_A_LO]
-sel_mul_div_u128 { a_lo, constant_64 } in range_check.sel { range_check.value, range_check.rng_chk_bits };
-
-#[RANGE_CHECK_MUL_U128_A_HI]
-sel_mul_div_u128 { a_hi, constant_64 } in range_check.sel { range_check.value, range_check.rng_chk_bits };
-
-#[RANGE_CHECK_MUL_U128_B_LO]
-sel_mul_div_u128 { b_lo, constant_64 } in range_check.sel { range_check.value, range_check.rng_chk_bits };
-
-#[RANGE_CHECK_MUL_U128_B_HI]
-sel_mul_div_u128 { b_hi, constant_64 } in range_check.sel { range_check.value, range_check.rng_chk_bits };
-
 // No need to range_check c_hi for cases other than u128 because we know a and b's size from the tags and have looked
 // up max_value. i.e. we cannot provide a malicious c, c_hi such that a + b - c_hi * 2^n = c passes for n < 128.
 // No need to range_check c_lo = ic because the memory write will ensure ic <= max_value.
@@ -392,6 +406,87 @@ sel_op_not * (1 - sel_op_not) = 0;
 #[NOT_OP_MAIN]
 sel_op_not * (1 - sel_tag_err) * (ia + ib - max_value) = 0;
 
+// SHIFTS - Taken from vm1:
+// Given (1) an input a, within the range [0, 2**128-1],
+//       (2) a value s, the amount of bits to shift a by (stored in ib),
+//       (3) and a memory tag, mem_tag that supports a maximum of t bits (stored in max_bits).
+// Split input a into Big Endian hi and lo limbs, (we re-use the a_hi and a_lo columns we used for the MUL/DIV u128 operators)
+// a_hi and a_lo, and the number of bits represented by the memory tag, t.
+// If we are shifting by more than the bit length represented by the memory tag, the result is trivially zero.
+
+// SHL
+
+// === Steps when performing SHL
+// (1) Prove the correct decomposition: a_hi * 2**(t-s) + a_lo = a ---> see #[A_DECOMPOSITION]
+// (2) Range check a_hi < 2**s && a_lo < 2**(t-s)                  ---> see #[RANGE_CHECK_DECOMPOSITION_A_LO/HI]
+// (3) Return a_lo * 2**s                                          ---> see #[ALU_SHL]
+//
+//  <-- s bits -->   | <-- (t-s) bits -->
+// ------------------|-------------------
+// |      a_hi       |      a_lo        | --> a
+// --------------------------------------
+//
+// Use of helper1 for SHL:
+//  We have: s (=ib), t (=max_bits), 2**(t-s) (=two_pow_shift_lo_bits), and 2**t (=max_value + 1)
+//  We want: 2**s (=2**ib), ideally without another precomputed.power_of_2 lookup
+//  Injecting 2**s (=helper1), we can check that 2**t == 2**(t-s) * 2**s:
+#[SHL_TWO_POW_SHIFT]
+sel_op_shl * sel_shift_ops_no_overflow * (1 - sel_tag_err) * (max_value + 1 - two_pow_shift_lo_bits * helper1) = 0;
+
+#[ALU_SHL]
+sel_op_shl * (1 - sel_tag_err) * (ic - sel_shift_ops_no_overflow * a_lo * helper1 ) = 0;
+
+// SHR
+
+// === Steps when performing SHR
+// (1) Prove the correct decomposition: a_hi * 2**s + a_lo = a ---> see #[A_DECOMPOSITION]
+// (2) Range check a_hi < 2**(t-s) && a_lo < 2**s              ---> see #[RANGE_CHECK_DECOMPOSITION_A_LO/HI]
+// (3) Return a_hi                                             ---> see #[ALU_SHR]
+//
+//  <--(t-s) bits --> |   <-- s bits -->
+// -------------------|-------------------
+// |      a_hi        |       a_lo       | --> a
+// ---------------------------------------
+
+#[ALU_SHR]
+sel_op_shr * (1 - sel_tag_err) * (ic - sel_shift_ops_no_overflow * a_hi) = 0;
+
+// SHL & SHR - Shared relations:
+
+pol commit sel_shift_ops;
+// sel_op_shl || sel_op_shr:
+sel_shift_ops = sel_op_shl + sel_op_shr;
+
+pol commit sel_shift_ops_no_overflow;
+// sel_shift_ops_no_overflow = 1 ==> sel_shift_ops = 1:
+sel_shift_ops_no_overflow * (1 - sel_shift_ops) = 0;
+// (sel_op_shl || sel_op_shr) & b < max_bits: see below* for constraining.
+pol SHIFT_OVERFLOW = sel_shift_ops * (1 - sel_shift_ops_no_overflow);
+
+// The bit size of the lo limb used by the shift:
+pol commit shift_lo_bits;
+pol commit two_pow_shift_lo_bits;
+
+// *For SHL and SHR, when the shift (b) > max_bits we want SHIFT_OVERFLOW == 1 and c == 0:
+//  SHL: a_lo_bits = max_bits - b -> will underflow
+//  SHR: a_hi_bits = max_bits - b -> will underflow
+//  so instead set a_lo = b - max_bits and shift_lo_bits = max_bits for both SHL and SHR (see DECOMPOSED_A) and reuse the range check
+//  RANGE_CHECK_DECOMPOSITION_A_LO to prove that b > max_bits <==> SHIFT_OVERFLOW = 1 <==> c = 0.
+// Note: sel_decompose_a is gated by IS_NOT_FF, so no gating for the FF tag error case is required below.
+
+#[SHIFTS_LO_BITS]
+shift_lo_bits
+    - sel_shift_ops_no_overflow * (sel_op_shl * (max_bits - ib) + sel_op_shr * ib)
+    - SHIFT_OVERFLOW * max_bits
+    = 0;
+
+// Set shift_hi_bits = max_bits in the overflow case, so RANGE_CHECK_DECOMPOSITION_A_HI passes. Since we set c == 0 in this case,
+// we don't need to constrain that a_hi is within a certain limb size.
+pol SHIFT_HI_BITS = max_bits - sel_shift_ops_no_overflow * shift_lo_bits;
+
+#[SHIFTS_TWO_POW]
+sel_shift_ops_no_overflow { shift_lo_bits, two_pow_shift_lo_bits } in precomputed.sel_range_8 { precomputed.clk, precomputed.power_of_2 };
+
 // TRUNCATE (ALU part for opcodes CAST and SET)
 // Input of truncation value is sent to ia, destination tag in ia_tag and output is computed as ic.
 // We have one dispatching lookup from execution specific to CAST and another one for SET, as
 
@@ -484,6 +484,20 @@ const std::unordered_map<ExecutionOpCode, ExecInstructionSpec> EXEC_INSTRUCTION_
       { .num_addresses = 2,
         .gas_cost = { .opcode_gas = AVM_NOT_BASE_L2_GAS, .base_da = 0, .dyn_l2 = 0, .dyn_da = 0 },
         .register_info = RegisterInfo().add_inputs({ /*a*/ RegisterInfo::ANY_TAG }).add_output(/*b*/) } },
+    { ExecutionOpCode::SHL,
+      { .num_addresses = 3,
+        .gas_cost = { .opcode_gas = AVM_SHL_BASE_L2_GAS, .base_da = 0, .dyn_l2 = 0, .dyn_da = 0 },
+        .register_info = RegisterInfo()
+                             .add_inputs({ /*a*/ RegisterInfo::ANY_TAG,
+                                           /*b*/ RegisterInfo::ANY_TAG })
+                             .add_output(/*c*/) } },
+    { ExecutionOpCode::SHR,
+      { .num_addresses = 3,
+        .gas_cost = { .opcode_gas = AVM_SHR_BASE_L2_GAS, .base_da = 0, .dyn_l2 = 0, .dyn_da = 0 },
+        .register_info = RegisterInfo()
+                             .add_inputs({ /*a*/ RegisterInfo::ANY_TAG,
+                                           /*b*/ RegisterInfo::ANY_TAG })
+                             .add_output(/*c*/) } },
     { ExecutionOpCode::CAST,
       { .num_addresses = 2,
         .gas_cost = { .opcode_gas = AVM_CAST_BASE_L2_GAS, .base_da = 0, .dyn_l2 = 0, .dyn_da = 0 },
@@ -690,21 +704,6 @@ const std::unordered_map<ExecutionOpCode, ExecInstructionSpec> EXEC_INSTRUCTION_
         { .num_addresses = 3,
           .gas_cost = { .opcode_gas = AVM_SHA256COMPRESSION_BASE_L2_GAS, .base_da = 0, .dyn_l2 = 0, .dyn_da = 0 } },
     },
-
-    { ExecutionOpCode::SHR,
-      { .num_addresses = 3,
-        .gas_cost = { .opcode_gas = AVM_SHR_BASE_L2_GAS, .base_da = 0, .dyn_l2 = 0, .dyn_da = 0 },
-        .register_info = RegisterInfo()
-                             .add_inputs({ /*a*/ RegisterInfo::ANY_TAG,
-                                           /*b*/ RegisterInfo::ANY_TAG })
-                             .add_output(/*c*/) } },
-    { ExecutionOpCode::SHL,
-      { .num_addresses = 3,
-        .gas_cost = { .opcode_gas = AVM_SHL_BASE_L2_GAS, .base_da = 0, .dyn_l2 = 0, .dyn_da = 0 },
-        .register_info = RegisterInfo()
-                             .add_inputs({ /*a*/ RegisterInfo::ANY_TAG,
-                                           /*b*/ RegisterInfo::ANY_TAG })
-                             .add_output(/*c*/) } },
 };
 
 } // namespace bb::avm2
@@ -22,24 +22,42 @@ template <class... Ts> struct overloads : Ts... {
 // This is a deduction guide. Apparently not needed in C++20, but we somehow still need it.
 template <class... Ts> overloads(Ts...) -> overloads<Ts...>;
 
+template <std::integral T> T safe_shift_left(T a, T b)
+{
+    constexpr size_t bits = sizeof(T) * 8;
+    if (b >= bits) {
+        return static_cast<T>(0);
+    }
+    return static_cast<T>(a << b);
+}
+
 struct shift_left {
-    template <typename T, typename U> T operator()(const T& a, const U& b) const
+    template <typename T> T operator()(const T& a, const T& b) const
     {
         if constexpr (std::is_same_v<T, uint1_t>) {
-            return static_cast<T>(a.operator<<(b));
+            return static_cast<T>(b == uint1_t(0) ? a : uint1_t(0));
         } else {
-            return static_cast<T>(a << b);
+            return safe_shift_left<T>(a, b);
         }
     }
 };
 
+template <std::integral T> T safe_shift_right(T a, T b)
+{
+    constexpr size_t bits = sizeof(T) * 8;
+    if (b >= bits) {
+        return static_cast<T>(0);
+    }
+    return static_cast<T>(a >> b);
+}
+
 struct shift_right {
-    template <typename T, typename U> T operator()(const T& a, const U& b) const
+    template <typename T> T operator()(const T& a, const T& b) const
     {
         if constexpr (std::is_same_v<T, uint1_t>) {
-            return static_cast<T>(a.operator>>(b));
+            return static_cast<T>(b == uint1_t(0) ? a : uint1_t(0));
         } else {
-            return static_cast<T>(a >> b);
+            return safe_shift_right<T>(a, b);
         }
     }
 };