Merge pull request #3782 from cskiraly/peer-das-sampling

hwwhww · web-flow · commit 0a4957279999 · 2024-06-27T19:27:57.000+08:00
PeerDAS sampling clarifications
diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md
@@ -23,6 +23,7 @@
     - [`compute_extended_matrix`](#compute_extended_matrix)
     - [`recover_matrix`](#recover_matrix)
     - [`get_data_column_sidecars`](#get_data_column_sidecars)
+    - [`get_extended_sample_count`](#get_extended_sample_count)
 - [Custody](#custody)
   - [Custody requirement](#custody-requirement)
   - [Public, deterministic selection](#public-deterministic-selection)
@@ -31,6 +32,8 @@
 - [Column gossip](#column-gossip)
   - [Parameters](#parameters)
 - [Peer sampling](#peer-sampling)
+  - [Sample selection](#sample-selection)
+  - [Sample queries](#sample-queries)
 - [Peer scoring](#peer-scoring)
 - [Reconstruction and cross-seeding](#reconstruction-and-cross-seeding)
 - [DAS providers](#das-providers)
@@ -221,6 +224,48 @@ def get_data_column_sidecars(signed_block: SignedBeaconBlock,
     return sidecars
 ```
 
+#### `get_extended_sample_count`
+
+```python
+def get_extended_sample_count(allowed_failures: uint64) -> uint64:
+    assert 0 <= allowed_failures <= NUMBER_OF_COLUMNS // 2
+    """
+    Return the sample count if allowing failures.
+
+    This helper demonstrates how to calculate the number of columns to query per slot when
+    allowing given number of failures, assuming uniform random selection without replacement.
+    Nested functions are direct replacements of Python library functions math.comb and
+    scipy.stats.hypergeom.cdf, with the same signatures.
+    """
+
+    def math_comb(n: int, k: int) -> int:
+        if not 0 <= k <= n:
+            return 0
+        r = 1
+        for i in range(min(k, n - k)):
+            r = r * (n - i) // (i + 1)
+        return r
+
+    def hypergeom_cdf(k: uint64, M: uint64, n: uint64, N: uint64) -> float:
+        # NOTE: It contains float-point computations.
+        # Convert uint64 to Python integers before computations.
+        k = int(k)
+        M = int(M)
+        n = int(n)
+        N = int(N)
+        return sum([math_comb(n, i) * math_comb(M - n, N - i) / math_comb(M, N)
+                    for i in range(k + 1)])
+
+    worst_case_missing = NUMBER_OF_COLUMNS // 2 + 1
+    false_positive_threshold = hypergeom_cdf(0, NUMBER_OF_COLUMNS,
+                                             worst_case_missing, SAMPLES_PER_SLOT)
+    for sample_count in range(SAMPLES_PER_SLOT, NUMBER_OF_COLUMNS + 1):
+        if hypergeom_cdf(allowed_failures, NUMBER_OF_COLUMNS,
+                         worst_case_missing, sample_count) <= false_positive_threshold:
+            break
+    return sample_count
+```
+
 ## Custody
 
 ### Custody requirement
@@ -263,7 +308,29 @@ Verifiable samples from their respective column are distributed on the assigned
 
 ## Peer sampling
 
-A node SHOULD maintain a diverse set of peers for each column and each slot by verifying responsiveness to sample queries. At each slot, a node makes `SAMPLES_PER_SLOT` queries for samples from their peers via `DataColumnSidecarsByRoot` request. A node utilizes `get_custody_columns` helper to determine which peer(s) to request from. If a node has enough good/honest peers across all rows and columns, this has a high chance of success.
+### Sample selection
+
+At each slot, a node SHOULD select at least `SAMPLES_PER_SLOT` column IDs for sampling. It is recommended to use uniform random selection without replacement based on local randomness. Sampling is considered successful if the node manages to retrieve all selected columns.
+
+Alternatively, a node MAY use a method that selects more than `SAMPLES_PER_SLOT` columns while allowing some missing, respecting the same target false positive threshold (the probability of successful sampling of an unavailable block) as dictated by the `SAMPLES_PER_SLOT` parameter. If using uniform random selection without replacement, a node can use the `get_extended_sample_count(allowed_failures) -> sample_count` helper function to determine the sample count (number of unique column IDs) for any selected number of allowed failures. Sampling is then considered successful if any `sample_count - allowed_failures` columns are retrieved successfully.
+
+For reference, the table below shows the number of samples and the number of allowed missing columns assuming `NUMBER_OF_COLUMNS = 128` and `SAMPLES_PER_SLOT = 16`.
+
+| Allowed missing | 0| 1| 2| 3| 4| 5| 6| 7| 8|
+|-----------------|--|--|--|--|--|--|--|--|--|
+| Sample count    |16|20|24|27|29|32|35|37|40|
+
+### Sample queries
+
+A node SHOULD maintain a diverse set of peers for each column and each slot by verifying responsiveness to sample queries.
+
+A node SHOULD query for samples from selected peers via `DataColumnSidecarsByRoot` request. A node utilizes `get_custody_columns` helper to determine which peer(s) it could request from, identifying a list of candidate peers for each selected column.
+
+If more than one candidate peer is found for a given column, a node SHOULD randomize its peer selection to distribute sample query load in the network. Nodes MAY use peer scoring to tune this selection (for example, by using weighted selection or by using a cut-off threshold). If possible, it is also recommended to avoid requesting many columns from the same peer in order to avoid relying on and exposing the sample selection to a single peer.
+
+If a node already has a column because of custody, it is not required to send out queries for that column.
+
+If a node has enough good/honest peers across all columns, and the data is being made available, the above procedure has a high chance of success.
 
 ## Peer scoring
 
diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py
@@ -1,7 +1,9 @@
 import random
 from eth2spec.test.context import (
+    expect_assertion_error,
     spec_test,
     single_phase,
+    with_config_overrides,
     with_eip7594_and_later,
 )
 from eth2spec.test.helpers.sharding import (
@@ -64,3 +66,80 @@ def test_recover_matrix(spec):
 
     # Ensure that the recovered matrix matches the original matrix
     assert recovered_matrix == extended_matrix
+
+
+@with_eip7594_and_later
+@spec_test
+@single_phase
+def test_get_extended_sample_count__1(spec):
+    rng = random.Random(1111)
+    allowed_failures = rng.randint(0, spec.config.NUMBER_OF_COLUMNS // 2)
+    spec.get_extended_sample_count(allowed_failures)
+
+
+@with_eip7594_and_later
+@spec_test
+@single_phase
+def test_get_extended_sample_count__2(spec):
+    rng = random.Random(2222)
+    allowed_failures = rng.randint(0, spec.config.NUMBER_OF_COLUMNS // 2)
+    spec.get_extended_sample_count(allowed_failures)
+
+
+@with_eip7594_and_later
+@spec_test
+@single_phase
+def test_get_extended_sample_count__3(spec):
+    rng = random.Random(3333)
+    allowed_failures = rng.randint(0, spec.config.NUMBER_OF_COLUMNS // 2)
+    spec.get_extended_sample_count(allowed_failures)
+
+
+@with_eip7594_and_later
+@spec_test
+@single_phase
+def test_get_extended_sample_count__lower_bound(spec):
+    allowed_failures = 0
+    spec.get_extended_sample_count(allowed_failures)
+
+
+@with_eip7594_and_later
+@spec_test
+@single_phase
+def test_get_extended_sample_count__upper_bound(spec):
+    allowed_failures = spec.config.NUMBER_OF_COLUMNS // 2
+    spec.get_extended_sample_count(allowed_failures)
+
+
+@with_eip7594_and_later
+@spec_test
+@single_phase
+def test_get_extended_sample_count__upper_bound_exceed(spec):
+    allowed_failures = spec.config.NUMBER_OF_COLUMNS // 2 + 1
+    expect_assertion_error(lambda: spec.get_extended_sample_count(allowed_failures))
+
+
+@with_eip7594_and_later
+@spec_test
+@with_config_overrides({
+    'NUMBER_OF_COLUMNS': 128,
+    'SAMPLES_PER_SLOT': 16,
+})
+@single_phase
+def test_get_extended_sample_count__table_in_spec(spec):
+    table = dict(
+        # (allowed_failures, expected_extended_sample_count)
+        {
+            0: 16,
+            1: 20,
+            2: 24,
+            3: 27,
+            4: 29,
+            5: 32,
+            6: 35,
+            7: 37,
+            8: 40,
+        }
+    )
+    for allowed_failures, expected_extended_sample_count in table.items():
+        assert spec.get_extended_sample_count(allowed_failures=allowed_failures) == expected_extended_sample_count