Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 36 additions & 18 deletions .config/nextest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,26 @@ path = "junit.xml"
store-success-output = true
store-failure-output = true

# Serialize Docker Compose tests: "binary(=integration_test)" is an exact match,
# so it does NOT affect integration_test_isolated (which runs in parallel).
# Long timeout for tests that use production-sized FHE parameters (Default +
# SnS). Give them a 2h window before termination.
#
# Docker-based Default-param tests (integration_test binary):
[[profile.ci.overrides]]
filter = 'binary(=integration_test) & test(/full_gen_tests_default|nightly_tests_threshold_sequential_crs|test_threshold_concurrent_crs|test_threshold_insecure/)'
slow-timeout = { period = "600s", terminate-after = 12 }
test-group = "docker-integration"

# Native isolated Default-param tests (integration_test_isolated binary):
# These don't use Docker but are placed in docker-integration to prevent
# CPU contention with Docker Default-param tests running concurrently.
[[profile.ci.overrides]]
filter = 'binary(=integration_test_isolated) & test(/full_gen_tests_default|nightly_tests_threshold_sequential_crs|test_threshold_concurrent_crs/)'
slow-timeout = { period = "600s", terminate-after = 12 }
test-group = "docker-integration"

# Serialize remaining Docker Compose tests (non-Default-param).
# "binary(=integration_test)" is an exact match — does NOT affect
# integration_test_isolated.
[[profile.ci.overrides]]
filter = "binary(=integration_test)"
test-group = "docker-integration"
Expand All @@ -43,15 +61,6 @@ test-group = "docker-integration"
filter = "binary(/^kubernetes_test_/)"
test-group = "k8s-integration"

# Long timeout for tests that use production-sized FHE parameters (Default +
# SnS). These are inherently very slow (keygen/CRS can take 30-90 min).
# Give them a 2h window before termination and serialize them with the Docker
# tests to avoid CPU/memory contention on CI machines.
[[profile.ci.overrides]]
filter = 'test(/full_gen_tests_default|nightly_tests_threshold_sequential_crs|test_threshold_concurrent_crs/)'
slow-timeout = { period = "600s", terminate-after = 12 }
test-group = "docker-integration"

# =============================================================================
# CI-nightly profile: used for scheduled builds — runs ALL tests
# =============================================================================
Expand All @@ -65,7 +74,22 @@ path = "junit.xml"
store-success-output = true
store-failure-output = true

# Serialize Docker Compose tests (same as ci profile).
# Long timeout + serialization for slow Default-param tests (same as ci profile).
# IMPORTANT: Must come before docker-integration catch-all (first match wins).
#
# Docker-based Default-param tests (integration_test binary):
[[profile.ci-nightly.overrides]]
filter = 'binary(=integration_test) & test(/full_gen_tests_default|nightly_tests_threshold_sequential_crs|test_threshold_concurrent_crs|test_threshold_insecure/)'
slow-timeout = { period = "600s", terminate-after = 12 }
test-group = "docker-integration"

# Native isolated Default-param tests (integration_test_isolated binary):
[[profile.ci-nightly.overrides]]
filter = 'binary(=integration_test_isolated) & test(/full_gen_tests_default|nightly_tests_threshold_sequential_crs|test_threshold_concurrent_crs/)'
slow-timeout = { period = "600s", terminate-after = 12 }
test-group = "docker-integration"

# Serialize remaining Docker Compose tests (same as ci profile).
[[profile.ci-nightly.overrides]]
filter = "binary(=integration_test)"
test-group = "docker-integration"
Expand All @@ -74,9 +98,3 @@ test-group = "docker-integration"
[[profile.ci-nightly.overrides]]
filter = "binary(/^kubernetes_test_/)"
test-group = "k8s-integration"

# Long timeout + serialization for slow Default-param tests (same as ci profile).
[[profile.ci-nightly.overrides]]
filter = 'test(/full_gen_tests_default|nightly_tests_threshold_sequential_crs|test_threshold_concurrent_crs/)'
slow-timeout = { period = "600s", terminate-after = 12 }
test-group = "docker-integration"
11 changes: 8 additions & 3 deletions .github/workflows/common-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ on:
type: boolean
required: false
default: false # If true, download test material from artifacts instead of generating
generate-default-material:
type: boolean
required: false
default: false # If true, generate Default test material (includes pre-generated PRSS)
secrets:
CODECOV_TOKEN:
required: false
Expand Down Expand Up @@ -302,12 +306,13 @@ jobs:
echo "Generating testing material..."
cargo run -p generate-test-material -- --output ./test-material --verbose testing

# Generate default material only if slow_tests and not using pre-generated
# Generate default material when explicitly requested or for slow_tests,
# and only when not using pre-generated artifacts.
- name: Generate Test Material (Default)
if: ${{ ! inputs.use-pregenerated-material && contains(inputs.args-tests, 'slow_tests') }}
if: ${{ ! inputs.use-pregenerated-material && (inputs.generate-default-material || contains(inputs.args-tests, 'slow_tests')) }}
working-directory: .
run: |
echo "Generating default test material for slow_tests..."
echo "Generating default test material..."
cargo run -p generate-test-material --features slow_tests -- --output ./test-material --verbose default

# Build kms-custodian binary required by integration tests
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/kind-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ jobs:
#============================================================
#
set +e
cargo nextest run --test 'kubernetes_test_'"${DEPLOYMENT_TYPE}"'*' --features k8s_tests,testing --profile "${NEXTEST_PROFILE}" --no-fail-fast
cargo nextest run --test 'kubernetes_test_'"${DEPLOYMENT_TYPE}"'*' --features kind_tests --profile "${NEXTEST_PROFILE}" --no-fail-fast
echo "TEST_EXIT_CODE=$?" >> "${GITHUB_ENV}"
set -e

Expand Down
11 changes: 7 additions & 4 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
- 'Cargo.toml'
- 'Cargo.lock'
core-client-k8s-tests:
- 'core-client/tests/kubernetes_test_*'
- 'core-client/tests/**'
core-grpc:
- 'core/grpc/**'
- 'Cargo.toml'
Expand Down Expand Up @@ -263,7 +263,7 @@ jobs:
############################################################################
# Prepares test matrix for core-client integration tests
# Splits tests into two parallel jobs: threshold and centralized
# Features required: k8s_tests (for PRSS tests), testing (for test helpers)
# Features required: threshold_tests (for PRSS tests), testing (for test helpers)
prepare-core-client-matrix:
name: main/prepare-core-client-matrix
needs: check-changes
Expand All @@ -285,7 +285,7 @@ jobs:
# Threshold tests: includes PRSS tests (sequential execution via #[serial])
# Centralized tests: basic keygen, CRS, backup/restore workflows
# Both skip: full_gen_tests (nightly), nightly_* (scheduled), k8s_* (cluster tests), isolated_test_example (demo)
MATRIX="${MATRIX}{\"args-tests\":\"--features testing,k8s_tests -- threshold --skip full_gen_tests --skip nightly --skip k8s_ --skip centralized --skip isolated_test_example\"},{\"args-tests\":\"--features testing -- centralized --skip full_gen_tests --skip nightly --skip k8s_ --skip threshold --skip isolated_test_example\"}"
MATRIX="${MATRIX}{\"args-tests\":\"--features threshold_tests -- threshold --skip full_gen_tests --skip nightly --skip k8s_ --skip centralized --skip isolated_test_example\",\"generate_default_material\":true},{\"args-tests\":\"--features testing -- centralized --skip full_gen_tests --skip nightly --skip k8s_ --skip threshold --skip isolated_test_example\",\"generate_default_material\":false}"
MATRIX="${MATRIX%,}]}"
echo "$MATRIX" >> "$GITHUB_OUTPUT"
echo "$MATRIX"
Expand Down Expand Up @@ -320,6 +320,7 @@ jobs:
working-directory: './core-client'
package-name: 'kms-core-client'
args-tests: ${{ matrix.args-tests }}
generate-default-material: ${{ matrix.generate_default_material }}
app-cache-dir: 'kms-core-client'
# Availables runners:
# - 1cpu-linux-x64 (m7a.medium)
Expand Down Expand Up @@ -363,7 +364,9 @@ jobs:
working-directory: './core-client'
package-name: 'kms-core-client'
# Run ALL tests - no skips (includes nightly_* and full_gen_tests_*)
args-tests: '--features testing,k8s_tests -- --skip k8s_ --skip isolated_test_example'
# nightly_* and full_gen_tests_* are skipped in regular CI via --skip prefix matching
args-tests: '--features threshold_tests -- --skip k8s_ --skip isolated_test_example'
generate-default-material: true
nextest-profile: 'ci-nightly'
app-cache-dir: 'kms-core-client'
runs-on: '32cpu-linux-x64'
Expand Down
50 changes: 47 additions & 3 deletions core-client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,36 @@ path = "src/lib.rs"
name = "kms-core-client"
path = "src/bin.rs"

[[test]]
name = "integration_test"
path = "tests/integration/integration_test.rs"
required-features = ["testing"]

[[test]]
name = "integration_test_isolated"
path = "tests/integration/integration_test_isolated.rs"
required-features = ["testing"]

[[test]]
name = "kubernetes_test_centralized"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider using k8s instead of the full kubernetes. :)

path = "tests/kind-testing/kubernetes_test_centralized.rs"
required-features = ["kind_tests"]

[[test]]
name = "kubernetes_test_centralized_isolated"
path = "tests/kind-testing/kubernetes_test_centralized_isolated.rs"
required-features = ["kind_tests"]

[[test]]
name = "kubernetes_test_threshold"
path = "tests/kind-testing/kubernetes_test_threshold.rs"
required-features = ["kind_tests"]

[[test]]
name = "kubernetes_test_threshold_isolated"
path = "tests/kind-testing/kubernetes_test_threshold_isolated.rs"
required-features = ["kind_tests"]

[dependencies]
# ⚠️ IMPORTANT: Add new dependencies to workspace root Cargo.toml, NOT here!
# Use .workspace = true to reference workspace dependencies
Expand Down Expand Up @@ -71,7 +101,21 @@ tonic-build.workspace = true
ignored = ["kms", "strum"]

[features]
# Enable PRSS-dependent tests that require sequential execution and stable networking
# These tests are designed for K8s CI environments
k8s_tests = []
# Enables threshold PRSS tests in `integration_test_isolated`.
# These are the threshold tests that require PRSS infrastructure:
# preproc+keygen, MPC context init/switch, reshare, full-gen preproc.
# Un-ignores tests guarded with `#[cfg_attr(not(feature = "threshold_tests"), ignore)]`.
# Compiles setup helpers that run threshold servers with `run_prss=true`.
#
# This flag only gates code/tests; it does NOT generate key material by itself.
# Material requirements:
# - Test params: missing PRSS can be initialized live at server startup.
# - Default params: pre-generated PRSS is required in `test-material/default`
# (run `make generate-test-material-default`); missing PRSS is a hard error.
# - Some tests generate PRSS during the test itself (via NewEpoch/new_prss_isolated)
# and therefore do not require pre-generated startup PRSS.
# Does NOT enable Kind/Kubernetes tests (use `kind_tests` for those).
threshold_tests = ["testing"]
# Enable tests that require a running Kind/Kubernetes cluster.
kind_tests = ["testing"]
testing = []
8 changes: 8 additions & 0 deletions core-client/src/keygen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,10 @@ pub(crate) async fn get_preproc_keygen_responses(
))
.await;

tracing::info!(
"Polling preproc result for request {} from party {}",
request_id, core_conf.party_id
);
let mut response = client
.get_key_gen_preproc_result(tonic::Request::new(request_id.into()))
.await;
Expand All @@ -610,6 +614,10 @@ pub(crate) async fn get_preproc_keygen_responses(
);
}
ctr += 1;
tracing::info!(
"Preproc result not ready yet for request {} from party {} (retry {}/{})",
request_id, core_conf.party_id, ctr, max_iter
);
response = client
.get_key_gen_preproc_result(tonic::Request::new(request_id.into()))
.await;
Expand Down
35 changes: 35 additions & 0 deletions core-client/tests-utils/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
use std::env;
use std::fmt;
use std::io::Write;
use std::net::TcpStream;
use std::path::PathBuf;
use std::process::{Command, Output};
use std::time::{Duration, Instant};

pub struct DockerComposeCmd {
pub root_path: PathBuf,
Expand Down Expand Up @@ -40,6 +42,21 @@ pub fn format_output(output: &Output) -> OutputWrapper<'_> {
OutputWrapper(output)
}

/// Wait until all given TCP ports on localhost are no longer bound.
/// This prevents "address already in use" errors when Docker Compose retries
/// start before the OS has released ports from the previous run.
fn wait_for_ports_free(ports: &[u16], timeout: Duration) {
let deadline = Instant::now() + timeout;
for &port in ports {
while Instant::now() < deadline {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't this mean that if one of the ports in ports is slow to free, then this single slow port will "consume" the deadline?
I think it's more correct to check all ports in the same iteration, sort of like so:

loop {
  if ports.iter().all(|&p| port_is_bindable(DOCKER_ADDR, p) { return Ok(()) }
  // Now check deadline, bail with error if reached
  if Instant::now() >= deadline { … … … }
  thread::sleep(sleep);
}

Overkill I guess but we should be able to check the ports in parallel too yeah?

if TcpStream::connect(("127.0.0.1", port)).is_err() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Isn't it faster and more accurate to bind() rather than connect? It should be roughly the same for most cases, but if there's any weirdness on the network, connect could be slow/fail, when bind succeeds. And bind is what we actually need yeah?
  • Are we positive that Docker binds to 127.0.0.1:PORT and not 0.0.0.0::PORT? By only probing loopback, wouldn't we miss services that bind to all interfaces?

break; // port is free
}
std::thread::sleep(Duration::from_millis(500));
}
}
}

impl DockerComposeCmd {
pub fn new(mode: KMSMode) -> Self {
let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set");
Expand All @@ -51,8 +68,26 @@ impl DockerComposeCmd {
DockerComposeCmd { root_path, mode }
}

fn ports_for_mode(&self) -> &'static [u16] {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this function could be const?

match self.mode {
KMSMode::ThresholdTestParameterNoInitSixParty => &[
50100, 50200, 50300, 50400, 50500, 50600, 50001, 50002, 50003, 50004, 50005, 50006,
],
KMSMode::ThresholdDefaultParameter
| KMSMode::ThresholdTestParameter
| KMSMode::ThresholdTestParameterNoInit
| KMSMode::ThresholdCustodianTestParameter => {
&[50100, 50200, 50300, 50400, 50001, 50002, 50003, 50004]
}
KMSMode::Centralized | KMSMode::CentralizedCustodian => &[50100],
}
}

pub fn up(&self) {
self.down(); // Make sure that no container is running
// Wait for the OS to release ports before starting new containers.
// Without this, Docker Compose retries fail with "address already in use".
wait_for_ports_free(self.ports_for_mode(), Duration::from_secs(30));
let build_docker = env::var("DOCKER_BUILD_TEST_CORE_CLIENT").unwrap_or("".to_string());

// set the FHE params based on mode
Expand Down
Loading
Loading