@@ -59,8 +59,20 @@ use tracing::info;
5959use crate :: adapters:: ssh:: SshConfig ;
6060use crate :: adapters:: ssh:: SshCredentials ;
6161use crate :: infrastructure:: external_validators:: RunningServicesValidator ;
62+ use crate :: infrastructure:: remote_actions:: validators:: PrometheusValidator ;
6263use crate :: infrastructure:: remote_actions:: { RemoteAction , RemoteActionError } ;
6364
65+ /// Service validation configuration
66+ ///
67+ /// Controls which optional service validations should be performed
68+ /// during run validation. This allows for flexible validation
69+ /// based on which services are enabled in the environment configuration.
70+ #[ derive( Debug , Clone , Copy , Default ) ]
71+ pub struct ServiceValidation {
72+ /// Whether to validate Prometheus is running and accessible
73+ pub prometheus : bool ,
74+ }
75+
6476/// Errors that can occur during run validation
6577#[ derive( Debug , Error ) ]
6678pub enum RunValidationError {
@@ -73,6 +85,16 @@ Tip: Ensure Docker Compose services are started and healthy"
7385 #[ source]
7486 source : RemoteActionError ,
7587 } ,
88+
89+ /// Prometheus smoke test failed
90+ #[ error(
91+ "Prometheus smoke test failed: {source}
92+ Tip: Ensure Prometheus container is running and accessible on port 9090"
93+ ) ]
94+ PrometheusValidationFailed {
95+ #[ source]
96+ source : RemoteActionError ,
97+ } ,
7698}
7799
78100impl RunValidationError {
@@ -118,6 +140,35 @@ impl RunValidationError {
118140 - Re-run the 'run' command: cargo run -- run <environment>
119141 - Or manually: cd /opt/torrust && docker compose up -d
120142
143+ For more information, see docs/e2e-testing/."
144+ }
145+ Self :: PrometheusValidationFailed { .. } => {
146+ "Prometheus Smoke Test Failed - Detailed Troubleshooting:
147+
148+ 1. Check Prometheus container status:
149+ - SSH to instance: ssh user@instance-ip
150+ - Check container: cd /opt/torrust && docker compose ps
151+ - View Prometheus logs: docker compose logs prometheus
152+
153+ 2. Verify Prometheus is accessible:
154+ - Test from inside VM: curl http://localhost:9090
155+ - Check if port 9090 is listening: ss -tlnp | grep 9090
156+
157+ 3. Common issues:
158+ - Prometheus container failed to start (check logs)
159+ - Port 9090 already in use by another process
160+ - Prometheus configuration file has errors
161+ - Insufficient memory for Prometheus
162+
163+ 4. Debug steps:
164+ - Check Prometheus config: docker compose exec prometheus cat /etc/prometheus/prometheus.yml
165+ - Restart Prometheus: docker compose restart prometheus
166+ - Check scrape targets: curl http://localhost:9090/api/v1/targets | jq
167+
168+ 5. Re-deploy if needed:
169+ - Re-run 'run' command: cargo run -- run <environment>
170+ - Or manually: cd /opt/torrust && docker compose up -d prometheus
171+
121172For more information, see docs/e2e-testing/."
122173 }
123174 }
@@ -135,6 +186,7 @@ For more information, see docs/e2e-testing/."
135186/// * `ssh_credentials` - SSH credentials for connecting to the instance
136187/// * `tracker_api_port` - Port for the tracker API health endpoint
137188/// * `http_tracker_ports` - Ports for HTTP tracker health endpoints (can be empty)
189+ /// * `services` - Optional service validation configuration (defaults to no optional services)
138190///
139191/// # Returns
140192///
@@ -146,24 +198,29 @@ For more information, see docs/e2e-testing/."
146198/// - SSH connection cannot be established
147199/// - Services are not running
148200/// - Services are unhealthy
201+ /// - Optional service validation fails (when enabled)
149202pub async fn run_run_validation (
150203 socket_addr : SocketAddr ,
151204 ssh_credentials : & SshCredentials ,
152205 tracker_api_port : u16 ,
153206 http_tracker_ports : Vec < u16 > ,
207+ services : Option < ServiceValidation > ,
154208) -> Result < ( ) , RunValidationError > {
209+ let services = services. unwrap_or_default ( ) ;
210+
155211 info ! (
156212 socket_addr = %socket_addr,
157213 ssh_user = %ssh_credentials. ssh_username,
158214 tracker_api_port = tracker_api_port,
159215 http_tracker_ports = ?http_tracker_ports,
216+ validate_prometheus = services. prometheus,
160217 "Running 'run' command validation tests"
161218 ) ;
162219
163220 let ip_addr = socket_addr. ip ( ) ;
164221
165- // Validate running services
166- validate_running_services (
222+ // Validate externally accessible services (tracker API, HTTP tracker)
223+ validate_external_services (
167224 ip_addr,
168225 ssh_credentials,
169226 socket_addr. port ( ) ,
@@ -172,6 +229,11 @@ pub async fn run_run_validation(
172229 )
173230 . await ?;
174231
232+ // Optionally validate Prometheus is running and accessible
233+ if services. prometheus {
234+ validate_prometheus ( ip_addr, ssh_credentials, socket_addr. port ( ) ) . await ?;
235+ }
236+
175237 info ! (
176238 socket_addr = %socket_addr,
177239 status = "success" ,
@@ -181,19 +243,25 @@ pub async fn run_run_validation(
181243 Ok ( ( ) )
182244}
183245
184- /// Validate running services on a configured instance
246+ /// Validate externally accessible services on a configured instance
247+ ///
248+ /// This function validates services that are exposed outside the VM and accessible
249+ /// without SSH (e.g., tracker API, HTTP tracker). These services have firewall rules
250+ /// allowing external access. It checks the status of services started by the `run`
251+ /// command and verifies they are operational by connecting from outside the VM.
252+ ///
253+ /// # Note
185254///
186- /// This function validates that Docker Compose services are running and healthy
187- /// on the target instance. It checks the status of services started by the `run`
188- /// command and verifies they are operational.
189- async fn validate_running_services (
255+ /// Internal services like Prometheus (not exposed externally) are validated separately
256+ /// via SSH in `validate_prometheus()`.
257+ async fn validate_external_services (
190258 ip_addr : IpAddr ,
191259 ssh_credentials : & SshCredentials ,
192260 port : u16 ,
193261 tracker_api_port : u16 ,
194262 http_tracker_ports : Vec < u16 > ,
195263) -> Result < ( ) , RunValidationError > {
196- info ! ( "Validating running services" ) ;
264+ info ! ( "Validating externally accessible services (tracker API, HTTP tracker) " ) ;
197265
198266 let ssh_config = SshConfig :: new ( ssh_credentials. clone ( ) , SocketAddr :: new ( ip_addr, port) ) ;
199267
@@ -206,3 +274,30 @@ async fn validate_running_services(
206274
207275 Ok ( ( ) )
208276}
277+
278+ /// Validate Prometheus is running and accessible via smoke test
279+ ///
280+ /// This function performs a smoke test on Prometheus by connecting via SSH
281+ /// and executing a curl command to verify the web UI is accessible.
282+ ///
283+ /// # Note
284+ ///
285+ /// Prometheus runs on port 9090 inside the VM but is NOT exposed externally
286+ /// (blocked by firewall). Validation must be performed from inside the VM.
287+ async fn validate_prometheus (
288+ ip_addr : IpAddr ,
289+ ssh_credentials : & SshCredentials ,
290+ port : u16 ,
291+ ) -> Result < ( ) , RunValidationError > {
292+ info ! ( "Validating Prometheus is running and accessible" ) ;
293+
294+ let ssh_config = SshConfig :: new ( ssh_credentials. clone ( ) , SocketAddr :: new ( ip_addr, port) ) ;
295+
296+ let prometheus_validator = PrometheusValidator :: new ( ssh_config, None ) ;
297+ prometheus_validator
298+ . execute ( & ip_addr)
299+ . await
300+ . map_err ( |source| RunValidationError :: PrometheusValidationFailed { source } ) ?;
301+
302+ Ok ( ( ) )
303+ }
0 commit comments