Introduce benchmark CI and Python binding test suite (#45)

HudsonGraeme · web-flow · commit d8bd9e6a1658 · 2026-02-26T04:16:38.000Z
* Introduce label-gated benchmark CI workflow for PR comparison

* Introduce Python binding test suite and CI job

* Raise benchmark regression threshold to 10% for shared runner noise tolerance

* Resolve config validation failures in custom config constructor tests

* Resolve resource leaks and imprecise exception assertions in Python test suite

* Resolve missing cleanup guarantee in query_without_signer error test
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -0,0 +1,165 @@
+name: Benchmarks
+
+on:
+  pull_request:
+    types: [labeled, synchronize]
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  bench:
+    name: Benchmark comparison
+    runs-on: ubuntu-latest
+    if: contains(github.event.pull_request.labels.*.name, 'bench')
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          fetch-depth: 0
+
+      - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7
+        with:
+          toolchain: stable
+
+      - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
+
+      - name: Build benchmark (PR)
+        run: cargo build --release -p lightning-bench
+
+      - name: Copy PR binary
+        run: cp target/release/lightning-bench /tmp/lightning-bench-pr
+
+      - name: Checkout main
+        run: git checkout origin/main
+
+      - name: Build benchmark (main)
+        id: build-main
+        continue-on-error: true
+        run: cargo build --release -p lightning-bench
+
+      - name: Copy main binary
+        if: steps.build-main.outcome == 'success'
+        run: cp target/release/lightning-bench /tmp/lightning-bench-main
+
+      - name: Run main benchmark
+        if: steps.build-main.outcome == 'success'
+        run: /tmp/lightning-bench-main > /tmp/bench-main.json 2>/dev/null
+
+      - name: Run PR benchmark
+        run: /tmp/lightning-bench-pr > /tmp/bench-pr.json 2>/dev/null
+
+      - name: Post comparison comment
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
+        with:
+          script: |
+            const fs = require('fs');
+            const pr = JSON.parse(fs.readFileSync('/tmp/bench-pr.json', 'utf8'));
+
+            let hasMain = false;
+            let main = {};
+            try {
+              main = JSON.parse(fs.readFileSync('/tmp/bench-main.json', 'utf8'));
+              hasMain = true;
+            } catch {}
+
+            function fmt(v) {
+              return typeof v === 'number' ? v.toFixed(2) : String(v);
+            }
+
+            function pctChange(oldVal, newVal, lowerIsBetter) {
+              if (!oldVal || oldVal === 0) return '';
+              const pct = ((newVal - oldVal) / oldVal) * 100;
+              const sign = pct > 0 ? '+' : '';
+              const tag = lowerIsBetter
+                ? (pct > 15 ? ' !!!' : pct < -15 ? ' +++' : '')
+                : (pct < -15 ? ' !!!' : pct > 15 ? ' +++' : '');
+              return ` (${sign}${pct.toFixed(1)}%${tag})`;
+            }
+
+            let body = '<!-- bench-results -->\n## Benchmark Results\n\n';
+
+            body += '### Connection Setup (ms)\n\n';
+            body += '| Percentile | PR |' + (hasMain ? ' main | change |' : '') + '\n';
+            body += '|---|---|' + (hasMain ? '---|---|' : '') + '\n';
+            for (const p of ['p50', 'p95', 'p99']) {
+              const prVal = pr.connection_setup_ms[p];
+              if (hasMain) {
+                const mainVal = main.connection_setup_ms[p];
+                body += `| ${p} | ${fmt(prVal)} | ${fmt(mainVal)} | ${pctChange(mainVal, prVal, true)} |\n`;
+              } else {
+                body += `| ${p} | ${fmt(prVal)} |\n`;
+              }
+            }
+
+            const sizes = Object.keys(pr.latency_ms).sort((a, b) => {
+              const order = {'256B': 0, '1KB': 1, '10KB': 2, '100KB': 3, '1MB': 4};
+              return (order[a] ?? 99) - (order[b] ?? 99);
+            });
+
+            body += '\n### Latency (ms)\n\n';
+            body += '| Size | Percentile | PR |' + (hasMain ? ' main | change |' : '') + '\n';
+            body += '|---|---|---|' + (hasMain ? '---|---|' : '') + '\n';
+            for (const size of sizes) {
+              for (const p of ['p50', 'p95', 'p99']) {
+                const prVal = pr.latency_ms[size][p];
+                if (hasMain && main.latency_ms?.[size]) {
+                  const mainVal = main.latency_ms[size][p];
+                  body += `| ${size} | ${p} | ${fmt(prVal)} | ${fmt(mainVal)} | ${pctChange(mainVal, prVal, true)} |\n`;
+                } else {
+                  body += `| ${size} | ${p} | ${fmt(prVal)} |\n`;
+                }
+              }
+            }
+
+            body += '\n### Throughput (req/s)\n\n';
+            body += '| Size | PR |' + (hasMain ? ' main | change |' : '') + '\n';
+            body += '|---|---|' + (hasMain ? '---|---|' : '') + '\n';
+            for (const size of sizes) {
+              const prVal = pr.throughput_rps[size];
+              if (hasMain && main.throughput_rps?.[size] != null) {
+                const mainVal = main.throughput_rps[size];
+                body += `| ${size} | ${fmt(prVal)} | ${fmt(mainVal)} | ${pctChange(mainVal, prVal, false)} |\n`;
+              } else {
+                body += `| ${size} | ${fmt(prVal)} |\n`;
+              }
+            }
+
+            if (pr.wire_bytes) {
+              body += '\n### Wire Bytes\n\n';
+              body += '| Size | PR |' + (hasMain ? ' main | change |' : '') + '\n';
+              body += '|---|---|' + (hasMain ? '---|---|' : '') + '\n';
+              for (const size of sizes) {
+                const prVal = pr.wire_bytes[size];
+                if (hasMain && main.wire_bytes?.[size] != null) {
+                  const mainVal = main.wire_bytes[size];
+                  body += `| ${size} | ${prVal} | ${mainVal} | ${pctChange(mainVal, prVal, true)} |\n`;
+                } else {
+                  body += `| ${size} | ${prVal} |\n`;
+                }
+              }
+            }
+
+            const marker = '<!-- bench-results -->';
+            const { data: comments } = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+            });
+            const existing = comments.find(c => c.body?.includes(marker));
+
+            if (existing) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: existing.id,
+                body,
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body,
+              });
+            }
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -125,6 +125,19 @@ jobs:
           files: lcov.info
           fail_ci_if_error: false
 
+  python-test:
+    name: Python tests
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+      - uses: dtolnay/rust-toolchain@631a55b12751854ce901bb631d5902ceb48146f7
+        with:
+          toolchain: stable
+      - uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2
+      - uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0
+      - run: uv venv && uv pip install maturin pytest pytest-timeout && uv run maturin develop --release && uv run pytest tests/ -v --timeout=30
+        working-directory: crates/btlightning-py
+
   subtensor:
     name: Subtensor integration
     runs-on: ubuntu-latest
diff --git a/.gitignore b/.gitignore
@@ -209,6 +209,8 @@ benchmarks/uv.lock
 *.gif
 plots/
 
-# Test files
-test_*.py
-*_test.py
+# Ad-hoc test scripts (not in test suites)
+/test_*.py
+/*_test.py
+/benchmarks/test_*.py
+/benchmarks/*_test.py
diff --git a/crates/btlightning-py/pyproject.toml b/crates/btlightning-py/pyproject.toml
@@ -17,6 +17,13 @@ classifiers = [
 ]
 dependencies = ["typing_extensions~=4.0"]
 
+[project.optional-dependencies]
+test = ["pytest>=8.0", "pytest-timeout>=2.0"]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+timeout = 30
+
 [tool.maturin]
 python-source = "python"
 module-name = "btlightning._native"
diff --git a/crates/btlightning-py/tests/conftest.py b/crates/btlightning-py/tests/conftest.py
@@ -0,0 +1,44 @@
+import socket
+import threading
+import time
+
+import pytest
+
+from btlightning import Lightning, LightningServer
+
+MINER_SEED = bytes([1] * 32)
+VALIDATOR_SEED = bytes([2] * 32)
+MINER_HOTKEY = "5CcyqxXnJucaCnQQvvUg5EPzj1uoNAxACZvzArHw5aVDvgNH"
+VALIDATOR_HOTKEY = "5CfCr47V5Dte6bwxNBE8K9oNnQd9fiay6aDEEkgYtFv7w4Fq"
+
+
+@pytest.fixture()
+def free_port():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(("127.0.0.1", 0))
+        return s.getsockname()[1]
+
+
+@pytest.fixture()
+def echo_server(free_port):
+    server = LightningServer(miner_hotkey=MINER_HOTKEY, host="127.0.0.1", port=free_port)
+    server.set_miner_keypair(MINER_SEED)
+    server.register_synapse_handler("echo", lambda data: data)
+    server.start()
+    t = threading.Thread(target=server.serve_forever, daemon=True)
+    t.start()
+    time.sleep(0.05)
+    yield server, free_port
+    server.stop()
+    t.join(timeout=5)
+
+
+@pytest.fixture()
+def client_and_axon(echo_server):
+    _, port = echo_server
+    client = Lightning(wallet_hotkey=VALIDATOR_HOTKEY)
+    client.set_validator_keypair(VALIDATOR_SEED)
+    axon = {"hotkey": MINER_HOTKEY, "ip": "127.0.0.1", "port": port}
+    client.initialize_connections([axon])
+    yield client, axon
+    client.close()
diff --git a/crates/btlightning-py/tests/test_client.py b/crates/btlightning-py/tests/test_client.py
@@ -0,0 +1,45 @@
+from btlightning import Lightning
+
+from conftest import VALIDATOR_HOTKEY, VALIDATOR_SEED
+
+
+def test_constructor_defaults():
+    client = Lightning(wallet_hotkey=VALIDATOR_HOTKEY)
+    assert client.wallet_hotkey == VALIDATOR_HOTKEY
+    client.close()
+
+
+def test_constructor_custom_config():
+    client = Lightning(
+        wallet_hotkey=VALIDATOR_HOTKEY,
+        connect_timeout_secs=5,
+        idle_timeout_secs=30,
+        keep_alive_interval_secs=10,
+        reconnect_initial_backoff_secs=1,
+        reconnect_max_backoff_secs=60,
+        reconnect_max_retries=3,
+        max_frame_payload_bytes=2097152,
+        max_stream_payload_bytes=10485760,
+    )
+    assert client.wallet_hotkey == VALIDATOR_HOTKEY
+    client.close()
+
+
+def test_set_validator_keypair():
+    client = Lightning(wallet_hotkey=VALIDATOR_HOTKEY)
+    client.set_validator_keypair(VALIDATOR_SEED)
+    client.close()
+
+
+def test_set_python_signer():
+    client = Lightning(wallet_hotkey=VALIDATOR_HOTKEY)
+    client.set_python_signer(lambda msg: b"\x00" * 64)
+    client.close()
+
+
+def test_get_connection_stats_empty():
+    client = Lightning(wallet_hotkey=VALIDATOR_HOTKEY)
+    stats = client.get_connection_stats()
+    assert "total_connections" in stats
+    assert "active_miners" in stats
+    client.close()
diff --git a/crates/btlightning-py/tests/test_errors.py b/crates/btlightning-py/tests/test_errors.py
@@ -0,0 +1,45 @@
+import pytest
+
+from btlightning import Lightning
+
+from conftest import MINER_HOTKEY, VALIDATOR_HOTKEY
+
+
+def test_missing_synapse_type(client_and_axon):
+    client, axon = client_and_axon
+    with pytest.raises(KeyError, match="synapse_type"):
+        client.query_axon(axon, {"data": {"msg": "hello"}})
+
+
+def test_missing_hotkey(client_and_axon):
+    client, _ = client_and_axon
+    with pytest.raises(KeyError, match="hotkey"):
+        client.query_axon({"ip": "127.0.0.1", "port": 1234}, {"synapse_type": "echo", "data": {}})
+
+
+def test_missing_ip(client_and_axon):
+    client, _ = client_and_axon
+    with pytest.raises(KeyError, match="ip"):
+        client.query_axon({"hotkey": MINER_HOTKEY, "port": 1234}, {"synapse_type": "echo", "data": {}})
+
+
+def test_missing_port(client_and_axon):
+    client, _ = client_and_axon
+    with pytest.raises(KeyError, match="port"):
+        client.query_axon({"hotkey": MINER_HOTKEY, "ip": "127.0.0.1"}, {"synapse_type": "echo", "data": {}})
+
+
+def test_query_without_signer():
+    client = Lightning(wallet_hotkey=VALIDATOR_HOTKEY)
+    try:
+        axon = {"hotkey": MINER_HOTKEY, "ip": "127.0.0.1", "port": 9999}
+        with pytest.raises(ConnectionError, match="endpoint not initialized"):
+            client.query_axon(axon, {"synapse_type": "echo", "data": {}})
+    finally:
+        client.close()
+
+
+def test_invalid_timeout(client_and_axon):
+    client, axon = client_and_axon
+    with pytest.raises(ValueError, match="timeout_secs must be a finite positive number"):
+        client.query_axon(axon, {"synapse_type": "echo", "data": {}}, timeout_secs=-1.0)
diff --git a/crates/btlightning-py/tests/test_multi_handler.py b/crates/btlightning-py/tests/test_multi_handler.py
@@ -0,0 +1,43 @@
+import socket
+import threading
+import time
+
+from btlightning import Lightning, LightningServer
+
+from conftest import MINER_HOTKEY, MINER_SEED, VALIDATOR_HOTKEY, VALIDATOR_SEED
+
+
+def test_two_handlers_on_same_server():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(("127.0.0.1", 0))
+        port = s.getsockname()[1]
+
+    def upper_handler(data):
+        return {"result": data["text"].upper()}
+
+    server = LightningServer(miner_hotkey=MINER_HOTKEY, host="127.0.0.1", port=port)
+    server.set_miner_keypair(MINER_SEED)
+    server.register_synapse_handler("echo", lambda data: data)
+    server.register_synapse_handler("upper", upper_handler)
+    server.start()
+    t = threading.Thread(target=server.serve_forever, daemon=True)
+    t.start()
+    time.sleep(0.05)
+
+    client = Lightning(wallet_hotkey=VALIDATOR_HOTKEY)
+    client.set_validator_keypair(VALIDATOR_SEED)
+    axon = {"hotkey": MINER_HOTKEY, "ip": "127.0.0.1", "port": port}
+    client.initialize_connections([axon])
+
+    try:
+        echo_resp = client.query_axon(axon, {"synapse_type": "echo", "data": {"msg": "hi"}})
+        assert echo_resp["success"] is True
+        assert echo_resp["data"]["msg"] == "hi"
+
+        upper_resp = client.query_axon(axon, {"synapse_type": "upper", "data": {"text": "hello"}})
+        assert upper_resp["success"] is True
+        assert upper_resp["data"]["result"] == "HELLO"
+    finally:
+        client.close()
+        server.stop()
+        t.join(timeout=5)
diff --git a/crates/btlightning-py/tests/test_roundtrip.py b/crates/btlightning-py/tests/test_roundtrip.py
diff --git a/crates/btlightning-py/tests/test_server.py b/crates/btlightning-py/tests/test_server.py
diff --git a/crates/btlightning-py/tests/test_streaming.py b/crates/btlightning-py/tests/test_streaming.py
diff --git a/crates/btlightning-py/tests/test_types.py b/crates/btlightning-py/tests/test_types.py