feat(enterprise): add endpoint management commands (#159)

joshrotenberg · joshrotenberg · commit 3314c387a8a6 · 2025-09-13T20:49:12.000-07:00
- Implement endpoint stats command to get aggregate statistics
- Add endpoint availability command for database endpoints
- Support JMESPath query filtering for output
- Add comprehensive mdBook documentation
- Include unit tests for command parsing
diff --git a/crates/redisctl/src/cli.rs b/crates/redisctl/src/cli.rs
@@ -977,6 +977,10 @@ pub enum EnterpriseCommands {
     #[command(subcommand)]
     Diagnostics(crate::commands::enterprise::diagnostics::DiagnosticsCommands),
 
+    /// Endpoint operations
+    #[command(subcommand)]
+    Endpoint(crate::commands::enterprise::endpoint::EndpointCommands),
+
     /// Node operations
     #[command(subcommand)]
     Node(EnterpriseNodeCommands),
diff --git a/crates/redisctl/src/commands/enterprise/endpoint.rs b/crates/redisctl/src/commands/enterprise/endpoint.rs
@@ -0,0 +1,117 @@
+use anyhow::Context;
+use clap::Subcommand;
+
+use crate::{cli::OutputFormat, connection::ConnectionManager, error::Result as CliResult};
+
+#[allow(dead_code)]
+pub async fn handle_endpoint_command(
+    conn_mgr: &ConnectionManager,
+    profile_name: Option<&str>,
+    endpoint_cmd: EndpointCommands,
+    output_format: OutputFormat,
+    query: Option<&str>,
+) -> CliResult<()> {
+    endpoint_cmd
+        .execute(conn_mgr, profile_name, output_format, query)
+        .await
+}
+
+#[derive(Debug, Clone, Subcommand)]
+pub enum EndpointCommands {
+    /// Get endpoint statistics
+    Stats,
+
+    /// Check endpoint availability for a database
+    Availability {
+        /// Database UID
+        bdb_uid: u64,
+    },
+}
+
+impl EndpointCommands {
+    #[allow(dead_code)]
+    pub async fn execute(
+        &self,
+        conn_mgr: &ConnectionManager,
+        profile_name: Option<&str>,
+        output_format: OutputFormat,
+        query: Option<&str>,
+    ) -> CliResult<()> {
+        handle_endpoint_command_impl(conn_mgr, profile_name, self, output_format, query).await
+    }
+}
+
+#[allow(dead_code)]
+async fn handle_endpoint_command_impl(
+    conn_mgr: &ConnectionManager,
+    profile_name: Option<&str>,
+    command: &EndpointCommands,
+    output_format: OutputFormat,
+    query: Option<&str>,
+) -> CliResult<()> {
+    let client = conn_mgr.create_enterprise_client(profile_name).await?;
+
+    match command {
+        EndpointCommands::Stats => {
+            let response: serde_json::Value = client
+                .get("/v1/endpoints/stats")
+                .await
+                .context("Failed to get endpoint statistics")?;
+
+            let output_data = if let Some(q) = query {
+                super::utils::apply_jmespath(&response, q)?
+            } else {
+                response
+            };
+
+            super::utils::print_formatted_output(output_data, output_format)?;
+        }
+        EndpointCommands::Availability { bdb_uid } => {
+            let response: serde_json::Value = client
+                .get(&format!("/v1/local/bdbs/{}/endpoint/availability", bdb_uid))
+                .await
+                .context(format!(
+                    "Failed to check endpoint availability for database {}",
+                    bdb_uid
+                ))?;
+
+            let output_data = if let Some(q) = query {
+                super::utils::apply_jmespath(&response, q)?
+            } else {
+                response
+            };
+
+            super::utils::print_formatted_output(output_data, output_format)?;
+        }
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_endpoint_command_parsing() {
+        use clap::Parser;
+
+        #[derive(Parser)]
+        struct TestCli {
+            #[command(subcommand)]
+            cmd: EndpointCommands,
+        }
+
+        // Test stats command
+        let cli = TestCli::parse_from(["test", "stats"]);
+        assert!(matches!(cli.cmd, EndpointCommands::Stats));
+
+        // Test availability command
+        let cli = TestCli::parse_from(["test", "availability", "1"]);
+        if let EndpointCommands::Availability { bdb_uid } = cli.cmd {
+            assert_eq!(bdb_uid, 1);
+        } else {
+            panic!("Expected Availability command");
+        }
+    }
+}
diff --git a/crates/redisctl/src/commands/enterprise/mod.rs b/crates/redisctl/src/commands/enterprise/mod.rs
@@ -10,6 +10,7 @@ pub mod crdb_task;
 pub mod database;
 pub mod database_impl;
 pub mod diagnostics;
+pub mod endpoint;
 pub mod job_scheduler;
 pub mod logs;
 pub mod logs_impl;
diff --git a/crates/redisctl/src/main.rs b/crates/redisctl/src/main.rs
@@ -232,6 +232,16 @@ async fn execute_enterprise_command(
             )
             .await
         }
+        Endpoint(endpoint_cmd) => {
+            commands::enterprise::endpoint::handle_endpoint_command(
+                conn_mgr,
+                profile,
+                endpoint_cmd.clone(),
+                output,
+                query,
+            )
+            .await
+        }
         Node(node_cmd) => {
             commands::enterprise::node::handle_node_command(
                 conn_mgr, profile, node_cmd, output, query,
diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md
@@ -39,6 +39,7 @@
 - [CRDB Tasks](./enterprise/crdb-tasks.md)
 - [Actions (Tasks)](./enterprise/actions.md)
 - [Diagnostics](./enterprise/diagnostics.md)
+- [Endpoints](./enterprise/endpoints.md)
 - [Job Scheduler](./enterprise/job-scheduler.md)
 - [Workflows](./enterprise/workflows.md)
 - [Raw API Access](./enterprise/api-access.md)
diff --git a/docs/src/enterprise/endpoints.md b/docs/src/enterprise/endpoints.md
@@ -0,0 +1,209 @@
+# Endpoint Management
+
+The endpoint commands provide access to Redis Enterprise database endpoint statistics and availability monitoring.
+
+> **Note**: Redis Enterprise manages most endpoint configurations through database commands. These commands provide monitoring and statistics capabilities.
+
+## Available Commands
+
+### Get Endpoint Statistics
+
+Get aggregate statistics for all database endpoints in the cluster:
+
+```bash
+# Get all endpoint statistics
+redisctl enterprise endpoint stats
+
+# Get statistics as YAML
+redisctl enterprise endpoint stats -o yaml
+
+# Filter to specific metrics
+redisctl enterprise endpoint stats -q '[].{name: endpoint_name, connections: current_connections}'
+
+# Get statistics for endpoints with high connection counts
+redisctl enterprise endpoint stats -q "[?current_connections > `100`]"
+```
+
+The statistics include:
+- Connection metrics (current, total, failed)
+- Request/response rates
+- Latency information
+- Error counts
+- Bandwidth usage
+
+### Check Endpoint Availability
+
+Check the availability status of a specific database endpoint:
+
+```bash
+# Check endpoint availability for database 1
+redisctl enterprise endpoint availability 1
+
+# Get availability as table
+redisctl enterprise endpoint availability 1 -o table
+
+# Extract specific availability information
+redisctl enterprise endpoint availability 1 -q 'available'
+```
+
+Availability information includes:
+- Current availability status
+- Node availability
+- Shard distribution
+- Failover status
+- Connection health
+
+## Output Examples
+
+### Endpoint Statistics
+```json
+[
+  {
+    "endpoint_name": "redis-12345.cluster.local:16379",
+    "bdb_uid": 1,
+    "current_connections": 45,
+    "total_connections": 12543,
+    "failed_connections": 2,
+    "requests_per_sec": 5432,
+    "responses_per_sec": 5430,
+    "avg_latency_ms": 0.8,
+    "bandwidth_in_mbps": 12.5,
+    "bandwidth_out_mbps": 8.3,
+    "errors_per_sec": 0.1
+  }
+]
+```
+
+### Endpoint Availability
+```json
+{
+  "bdb_uid": 1,
+  "available": true,
+  "endpoints": [
+    {
+      "addr": "redis-12345.cluster.local:16379",
+      "node": 1,
+      "role": "master",
+      "status": "active"
+    }
+  ],
+  "shards_placement": "optimal",
+  "last_failover": null
+}
+```
+
+## Common Use Cases
+
+### Monitoring Endpoint Health
+
+Monitor endpoint statistics and set up alerts:
+
+```bash
+# Check endpoints with high error rates
+redisctl enterprise endpoint stats -q "[?errors_per_sec > `10`]"
+
+# Monitor endpoints with connection issues
+redisctl enterprise endpoint stats -q "[?failed_connections > `0`].{name: endpoint_name, failed: failed_connections}"
+
+# Check latency across all endpoints
+redisctl enterprise endpoint stats -q "[].{endpoint: endpoint_name, latency: avg_latency_ms}" -o table
+```
+
+### Availability Monitoring
+
+Check database endpoint availability during maintenance:
+
+```bash
+# Check availability for critical databases
+for db in 1 2 3; do
+  echo "Database $db:"
+  redisctl enterprise endpoint availability $db -q 'available'
+done
+
+# Get detailed availability for troubleshooting
+redisctl enterprise endpoint availability 1 -o yaml
+```
+
+### Performance Analysis
+
+Analyze endpoint performance metrics:
+
+```bash
+# Get top endpoints by connection count
+redisctl enterprise endpoint stats -q "reverse(sort_by([],&current_connections))[:5]" -o table
+
+# Find endpoints with bandwidth issues
+redisctl enterprise endpoint stats -q "[?bandwidth_in_mbps > `100` || bandwidth_out_mbps > `100`]"
+
+# Compare request/response rates
+redisctl enterprise endpoint stats -q "[].{endpoint: endpoint_name, req_rate: requests_per_sec, resp_rate: responses_per_sec, diff: requests_per_sec - responses_per_sec}"
+```
+
+## Integration with Monitoring
+
+Export endpoint metrics for monitoring systems:
+
+```bash
+# Export to monitoring format
+redisctl enterprise endpoint stats -o json > endpoint_metrics.json
+
+# Create CSV for analysis
+redisctl enterprise endpoint stats -q "[].{endpoint: endpoint_name, connections: current_connections, latency: avg_latency_ms, errors: errors_per_sec}" | jq -r '["endpoint","connections","latency","errors"], (.[] | [.endpoint, .connections, .latency, .errors]) | @csv'
+
+# Stream to monitoring pipeline
+while true; do
+  redisctl enterprise endpoint stats -q '[].{timestamp: now(), metrics: @}' | \
+    curl -X POST http://metrics-collector/ingest -d @-
+  sleep 60
+done
+```
+
+## Troubleshooting
+
+### High Connection Counts
+
+If endpoints show high connection counts:
+
+```bash
+# Identify affected endpoints
+redisctl enterprise endpoint stats -q "[?current_connections > `1000`]"
+
+# Check database configuration
+redisctl enterprise database get <bdb_uid> -q '{max_connections: max_connections, current: @ | current_connections}'
+
+# Monitor connection trends
+for i in {1..10}; do
+  redisctl enterprise endpoint stats -q "[].{endpoint: endpoint_name, connections: current_connections}" -o table
+  sleep 30
+done
+```
+
+### Availability Issues
+
+When endpoints report availability problems:
+
+```bash
+# Check specific database endpoint
+redisctl enterprise endpoint availability <bdb_uid>
+
+# Verify node status
+redisctl enterprise node list -q "[?status != 'active']"
+
+# Check shard distribution
+redisctl enterprise database get <bdb_uid> -q 'shards_placement'
+```
+
+## Best Practices
+
+1. **Regular Monitoring**: Set up regular checks of endpoint statistics to catch issues early
+2. **Baseline Metrics**: Establish baseline performance metrics for comparison
+3. **Alert Thresholds**: Configure alerts based on your specific workload patterns
+4. **Correlation**: Correlate endpoint metrics with database and node statistics
+5. **Capacity Planning**: Use connection and bandwidth metrics for capacity planning
+
+## Related Commands
+
+- `redisctl enterprise database` - Manage databases and their endpoints
+- `redisctl enterprise stats` - View detailed statistics
+- `redisctl enterprise node` - Check node status affecting endpoints
+- `redisctl enterprise cluster` - View cluster-wide endpoint configuration