-
Notifications
You must be signed in to change notification settings - Fork 14
feat: error bars for BSEQ benchmark #594
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -7,6 +7,8 @@ | |||||||||||||||||
| from math import sqrt | ||||||||||||||||||
| from typing import Mapping | ||||||||||||||||||
|
|
||||||||||||||||||
| import numpy as np | ||||||||||||||||||
|
|
||||||||||||||||||
|
|
||||||||||||||||||
| def effective_shot_count(shots: int, count_results: Mapping[str, int]) -> int: | ||||||||||||||||||
| total_measurements = sum(count_results.values()) | ||||||||||||||||||
|
|
@@ -31,3 +33,88 @@ def binary_expectation_stddev( | |||||||||||||||||
| expectation = binary_expectation_value(shots, count_results, outcome=outcome) | ||||||||||||||||||
| variance = expectation * (1 - expectation) / effective_shots | ||||||||||||||||||
| return float(sqrt(max(variance, 0.0))) | ||||||||||||||||||
|
|
||||||||||||||||||
|
|
||||||||||||||||||
| def _largest_component_from_edges(edges: list[tuple[int, int]], num_nodes: int) -> int: | ||||||||||||||||||
| """Return the size of the largest connected component for an edge list.""" | ||||||||||||||||||
| if num_nodes <= 0: | ||||||||||||||||||
| return 0 | ||||||||||||||||||
|
|
||||||||||||||||||
| parent = list(range(num_nodes)) | ||||||||||||||||||
| sizes = [1] * num_nodes | ||||||||||||||||||
|
|
||||||||||||||||||
| def find(node: int) -> int: | ||||||||||||||||||
| while parent[node] != node: | ||||||||||||||||||
| parent[node] = parent[parent[node]] | ||||||||||||||||||
| node = parent[node] | ||||||||||||||||||
| return node | ||||||||||||||||||
|
|
||||||||||||||||||
| def union(u: int, v: int) -> None: | ||||||||||||||||||
| root_u = find(u) | ||||||||||||||||||
| root_v = find(v) | ||||||||||||||||||
| if root_u == root_v: | ||||||||||||||||||
| return | ||||||||||||||||||
| if sizes[root_u] < sizes[root_v]: | ||||||||||||||||||
| root_u, root_v = root_v, root_u | ||||||||||||||||||
| parent[root_v] = root_u | ||||||||||||||||||
| sizes[root_u] += sizes[root_v] | ||||||||||||||||||
|
|
||||||||||||||||||
| for u, v in edges: | ||||||||||||||||||
| union(u, v) | ||||||||||||||||||
|
|
||||||||||||||||||
| largest = 1 | ||||||||||||||||||
| for idx in range(num_nodes): | ||||||||||||||||||
| root = find(idx) | ||||||||||||||||||
| if sizes[root] > largest: | ||||||||||||||||||
| largest = sizes[root] | ||||||||||||||||||
| return largest | ||||||||||||||||||
|
Comment on lines
+65
to
+70
|
||||||||||||||||||
| largest = 1 | |
| for idx in range(num_nodes): | |
| root = find(idx) | |
| if sizes[root] > largest: | |
| largest = sizes[root] | |
| return largest | |
| # The size of the largest component is the maximum in sizes | |
| return max(sizes) if num_nodes > 0 else 0 |
Copilot
AI
Oct 17, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The check std is None is redundant since the type annotation indicates edge_stats values are tuple[float, float], meaning std cannot be None. If NaN values are expected to represent missing standard deviations, document this in the function docstring or consider using Optional[float] in the edge_stats type annotation to make the contract explicit.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| import numpy as np | ||
| import pytest | ||
|
|
||
| from metriq_gym.benchmarks.bseq import BSEQResult, CHSH_THRESHOLD | ||
| from metriq_gym.benchmarks.benchmark import BenchmarkScore | ||
| from metriq_gym.helpers.statistics import bootstrap_largest_component_stddev | ||
|
|
||
|
|
||
| def test_bootstrap_stddev_returns_zero_without_edges(): | ||
| assert bootstrap_largest_component_stddev({}, num_nodes=4, threshold=CHSH_THRESHOLD) == 0.0 | ||
|
|
||
|
|
||
| def test_bootstrap_stddev_zero_variance_is_deterministic(): | ||
| edge_stats = {(0, 1): (CHSH_THRESHOLD + 0.5, 0.0)} | ||
| rng = np.random.default_rng(0) | ||
| assert ( | ||
| bootstrap_largest_component_stddev( | ||
| edge_stats, num_nodes=3, threshold=CHSH_THRESHOLD, rng=rng | ||
| ) | ||
| == 0.0 | ||
| ) | ||
|
|
||
|
|
||
| def test_bootstrap_stddev_reflects_sampling_noise(): | ||
| edge_stats = {(0, 1): (CHSH_THRESHOLD + 0.1, 0.3)} | ||
| rng = np.random.default_rng(1234) | ||
| std = bootstrap_largest_component_stddev( | ||
| edge_stats, | ||
| num_nodes=3, | ||
| threshold=CHSH_THRESHOLD, | ||
| rng=rng, | ||
| num_samples=256, | ||
| ) | ||
| assert 0.0 < std < 3.0 | ||
|
|
||
|
|
||
| def test_bseq_result_exposes_benchmark_score(): | ||
| result = BSEQResult(largest_connected_size=BenchmarkScore(value=8.0, uncertainty=1.5)) | ||
| assert result.values == pytest.approx({"largest_connected_size": 8.0}) | ||
| assert result.uncertainties == pytest.approx({"largest_connected_size": 1.5}) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The variance-to-stddev conversion should validate that
variances[idx] >= 0before taking the square root to prevent potential issues with floating-point precision producing negative values due to the accumulation in line 144. Consider usingmax(variances[idx], 0.0)beforenp.sqrt.