Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions crates/aptos-telemetry-service/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ gcp-bigquery-client = { workspace = true }
jsonwebtoken = { workspace = true }
once_cell = { workspace = true }
prometheus = { workspace = true }
prost = { workspace = true }
rand = { workspace = true }
rand_core = { workspace = true }
reqwest = { workspace = true }
Expand All @@ -43,6 +44,7 @@ reqwest-retry = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
serde_yaml = { workspace = true }
snap = "1.1"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't want to use the workspace version?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's only used in this crate actually, so doesn't make sense to include it in the workspace

thiserror = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
Expand Down
20 changes: 20 additions & 0 deletions crates/aptos-telemetry-service/e2e-test/cleanup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,26 @@ else
echo -e "${YELLOW}No node process found${NC}"
fi
fi

# Stop telemetry service
if [ -f "$TEST_DIR/telemetry.pid" ]; then
TELEMETRY_PID=$(cat "$TEST_DIR/telemetry.pid")
if ps -p $TELEMETRY_PID > /dev/null 2>&1; then
kill $TELEMETRY_PID
echo -e "${GREEN}✓ Telemetry service stopped (PID: $TELEMETRY_PID)${NC}"
else
echo -e "${YELLOW}Telemetry process not running${NC}"
fi
rm "$TEST_DIR/telemetry.pid"
else
# Try to find and kill any telemetry service process on port 8082
if lsof -Pi :8082 -sTCP:LISTEN -t >/dev/null 2>&1; then
kill $(lsof -t -i:8082) 2>/dev/null || true
echo -e "${GREEN}✓ Stopped process on port 8082${NC}"
else
echo -e "${YELLOW}No telemetry service process found${NC}"
fi
fi
echo ""

# Ask about data removal
Expand Down
35 changes: 33 additions & 2 deletions crates/aptos-telemetry-service/e2e-test/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
# Docker Compose setup for E2E testing of Aptos Telemetry Service
# Runs VictoriaMetrics and Loki locally for telemetry data ingestion testing
#
# Architecture:
# - VictoriaMetrics: Primary metrics backend (accepts Prometheus text format via /api/v1/import/prometheus)
# - Prometheus: Secondary metrics store (accepts remote write via /api/v1/write)
# - Loki: Log aggregation backend
# - Grafana: Visualization (queries all backends)
version: '3.8'

services:
# VictoriaMetrics - Time-series database for metrics
# VictoriaMetrics - Primary metrics backend for telemetry ingestion
# Accepts Prometheus text format via /api/v1/import/prometheus (simpler than protobuf)
victoria-metrics:
image: victoriametrics/victoria-metrics:latest
container_name: telemetry-victoria-metrics
Expand All @@ -23,6 +29,28 @@ services:
timeout: 5s
retries: 3

# Prometheus - Secondary metrics backend for remote write
prometheus:
image: prom/prometheus:latest
container_name: telemetry-prometheus
ports:
- "9090:9090" # HTTP API for queries and remote write
volumes:
- ./prometheus.yaml:/etc/prometheus/prometheus.yml
- prometheus-data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
- '--web.enable-remote-write-receiver' # Enable receiving remote writes
- '--web.enable-lifecycle' # Enable /-/reload endpoint
restart: unless-stopped
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://localhost:9090/-/healthy"]
interval: 10s
timeout: 5s
retries: 3

# Loki - Log aggregation system
loki:
image: grafana/loki:3.0.0
Expand Down Expand Up @@ -56,11 +84,14 @@ services:
restart: unless-stopped
depends_on:
- victoria-metrics
- prometheus
- loki

volumes:
victoria-data:
driver: local
prometheus-data:
driver: local
loki-data:
driver: local
grafana-data:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
# Grafana datasources for E2E testing
# Auto-configures VictoriaMetrics and Loki datasources
#
# Architecture:
# - VictoriaMetrics (default): Receives telemetry metrics via text format import
# - Prometheus: Scrapes VictoriaMetrics, provides alternative PromQL endpoint
# - Loki: Receives telemetry logs
#
# Both VictoriaMetrics and Prometheus support PromQL, so queries work on either.

apiVersion: 1

datasources:
# VictoriaMetrics (Prometheus-compatible) datasource
# VictoriaMetrics - Primary metrics backend (receives telemetry via import)
- name: VictoriaMetrics
type: prometheus
access: proxy
Expand All @@ -15,7 +21,18 @@ datasources:
timeInterval: 10s
editable: true

# Loki datasource
# Prometheus - Secondary metrics backend (receives telemetry via remote write)
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: false
jsonData:
httpMethod: POST
timeInterval: 15s
editable: true

# Loki datasource - log aggregation
- name: Loki
type: loki
access: proxy
Expand Down
Loading
Loading