Skip to content

Commit 759558f

Browse files
committed
Add more diagnostics and test the core alone
Signed-off-by: James Duong <[email protected]>
1 parent 2c4e354 commit 759558f

File tree

1 file changed

+149
-4
lines changed

1 file changed

+149
-4
lines changed

utils/remote_cluster_manager.py

Lines changed: 149 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,9 @@ def start_cluster(
496496
if tls:
497497
self.diagnose_tls_issue(endpoints)
498498
self.test_cluster_discovery_tls(endpoints)
499+
500+
# Test glide-core cluster TLS to isolate Java vs Rust issue
501+
self.test_glide_core_cluster_tls(endpoints)
499502

500503
return endpoints
501504
else:
@@ -675,9 +678,9 @@ def diagnose_tls_issue(self, endpoints: List[str]) -> None:
675678
returncode, stdout, stderr = self._execute_remote_command(openssl_cmd, timeout=10)
676679

677680
if "Verify return code: 0 (ok)" in stdout:
678-
logging.info(f" TLS handshake OK for {endpoint}")
681+
logging.info(f" OK - TLS handshake OK for {endpoint}")
679682
else:
680-
logging.warning(f" TLS handshake FAILED for {endpoint}")
683+
logging.warning(f" FAIL - TLS handshake FAILED for {endpoint}")
681684
# Extract relevant error info
682685
for line in stdout.split('\n'):
683686
if 'verify error' in line.lower() or 'certificate verify failed' in line.lower():
@@ -738,9 +741,9 @@ def test_cluster_discovery_tls(self, endpoints: List[str]) -> None:
738741
discovered_set = set(discovered_nodes)
739742

740743
if initial_set == discovered_set:
741-
logging.info(" Discovered nodes match initial endpoints")
744+
logging.info("OK - Discovered nodes match initial endpoints")
742745
else:
743-
logging.warning(" Discovered nodes differ from initial endpoints")
746+
logging.warning("FAIL - Discovered nodes differ from initial endpoints")
744747
only_initial = initial_set - discovered_set
745748
only_discovered = discovered_set - initial_set
746749
if only_initial:
@@ -753,6 +756,148 @@ def test_cluster_discovery_tls(self, endpoints: List[str]) -> None:
753756
logging.info("=== END CLUSTER DISCOVERY TLS TEST ===")
754757

755758

759+
def test_glide_core_cluster_tls(self, endpoints: List[str]) -> bool:
760+
"""Test glide-core Rust cluster TLS against the remote cluster"""
761+
if not endpoints:
762+
return False
763+
764+
logging.info("=== GLIDE-CORE CLUSTER TLS TEST ===")
765+
766+
# Copy TLS certificates to local machine for glide-core test
767+
local_tls_dir = "tls_test_certs"
768+
os.makedirs(local_tls_dir, exist_ok=True)
769+
770+
try:
771+
# Copy certificates from remote
772+
cert_files = ["ca.crt", "server.crt", "server.key"]
773+
for cert_file in cert_files:
774+
remote_path = f"{self.remote_repo_path}/utils/tls_crts/{cert_file}"
775+
local_path = f"{local_tls_dir}/{cert_file}"
776+
self._copy_file_from_remote(remote_path, local_path)
777+
logging.info(f"Copied {cert_file} to {local_path}")
778+
779+
# Create a simple Rust test program
780+
test_program = f'''
781+
use redis::{{Client, cluster::{{ClusterClient, ClusterClientBuilder}}}};
782+
use std::fs;
783+
784+
#[tokio::main]
785+
async fn main() -> Result<(), Box<dyn std::error::Error>> {{
786+
let endpoints = vec![{", ".join(f'"{ep}"' for ep in endpoints)}];
787+
788+
// Read certificates
789+
let ca_cert = fs::read("{local_tls_dir}/ca.crt")?;
790+
791+
println!("Testing cluster connection to: {{:?}}", endpoints);
792+
793+
// Create cluster client with TLS
794+
let client = ClusterClientBuilder::new(endpoints)
795+
.tls(redis::cluster::TlsMode::Secure)
796+
.certs(redis::TlsCertificates {{
797+
client_tls: None,
798+
root_cert: Some(ca_cert),
799+
}})
800+
.build()?;
801+
802+
println!("Created cluster client, attempting connection...");
803+
804+
// Test connection
805+
let mut conn = client.get_async_connection().await?;
806+
807+
println!("Connected successfully! Testing PING...");
808+
809+
// Test basic operation
810+
let pong: String = redis::cmd("PING").query_async(&mut conn).await?;
811+
println!("PING response: {{}}", pong);
812+
813+
println!("SUCCESS: Rust glide-core cluster TLS test passed");
814+
Ok(())
815+
}}
816+
'''
817+
818+
# Write test program
819+
test_dir = "rust_cluster_test"
820+
os.makedirs(test_dir, exist_ok=True)
821+
822+
with open(f"{test_dir}/main.rs", "w") as f:
823+
f.write(test_program)
824+
825+
# Create Cargo.toml
826+
cargo_toml = '''
827+
[package]
828+
name = "cluster_tls_test"
829+
version = "0.1.0"
830+
edition = "2021"
831+
832+
[dependencies]
833+
redis = { path = "../gh/jduo/valkey-glide/glide-core/redis-rs/redis", features = ["cluster-async", "tokio-comp"] }
834+
tokio = { version = "1", features = ["full"] }
835+
'''
836+
837+
with open(f"{test_dir}/Cargo.toml", "w") as f:
838+
f.write(cargo_toml)
839+
840+
# Run the test
841+
logging.info("Running Rust cluster TLS test...")
842+
result = subprocess.run(
843+
["cargo", "run", "--manifest-path", f"{test_dir}/Cargo.toml"],
844+
capture_output=True,
845+
text=True,
846+
timeout=60,
847+
env={**os.environ, "RUST_LOG": "debug"}
848+
)
849+
850+
if result.returncode == 0:
851+
logging.info("SUCCESS - Rust cluster TLS test passed")
852+
logging.info("This indicates the issue is Java-specific, not in Rust core")
853+
if "SUCCESS: Rust glide-core cluster TLS test passed" in result.stdout:
854+
logging.info("Rust test output: Connection and PING successful")
855+
return True
856+
else:
857+
logging.warning("FAILED - Rust cluster TLS test failed")
858+
logging.warning("This indicates the issue is in the Rust core")
859+
logging.warning(f"Test stdout: {result.stdout}")
860+
logging.warning(f"Test stderr: {result.stderr}")
861+
862+
# Check for specific BadSignature error
863+
if "BadSignature" in result.stderr:
864+
logging.warning("CONFIRMED: Rust core also shows BadSignature error")
865+
logging.warning("This is a RustTLS issue in the core, not Java-specific")
866+
867+
return False
868+
869+
except Exception as e:
870+
logging.error(f"Error running Rust cluster test: {e}")
871+
return False
872+
finally:
873+
# Cleanup
874+
import shutil
875+
for cleanup_dir in [local_tls_dir, "rust_cluster_test"]:
876+
if os.path.exists(cleanup_dir):
877+
shutil.rmtree(cleanup_dir)
878+
879+
logging.info("=== END GLIDE-CORE CLUSTER TLS TEST ===")
880+
881+
def _copy_file_from_remote(self, remote_path: str, local_path: str) -> bool:
882+
"""Copy file from remote host to local machine"""
883+
try:
884+
cmd = [
885+
"scp",
886+
"-i", self.key_path,
887+
"-o", "StrictHostKeyChecking=no",
888+
"-o", "UserKnownHostsFile=/dev/null",
889+
f"{self.user}@{self.host}:{remote_path}",
890+
local_path
891+
]
892+
893+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
894+
return result.returncode == 0
895+
896+
except Exception as e:
897+
logging.error(f"Failed to copy {remote_path} from remote: {e}")
898+
return False
899+
900+
756901
def main():
757902
logfile = "./cluster_manager.log"
758903
init_logger(logfile)

0 commit comments

Comments
 (0)