diff --git a/.github/workflows/build-and-push-uptime-service.yml b/.github/workflows/build-and-push-uptime-service.yml new file mode 100644 index 0000000..323ee58 --- /dev/null +++ b/.github/workflows/build-and-push-uptime-service.yml @@ -0,0 +1,47 @@ +--- +name: Build and push Uptime Service docker image + +on: + workflow_call: + inputs: + ref: + description: "git ref: hash, branch, tag to build uptime-service files from" + type: string + required: true + +jobs: + main: + name: Build Uptime Service + runs-on: ubuntu-24.04 + steps: + - name: Checkout source code + uses: actions/checkout@v4 + with: + ref: ${{ inputs.ref }} + fetch-depth: 0 + + - name: Call action get-ref-properties + id: get-ref-properties + uses: Cardinal-Cryptography/github-actions/get-ref-properties@v7 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Public Amazon ECR + uses: docker/login-action@v3 + with: + registry: ${{ vars.ECR_PUBLIC_HOST }} + username: ${{ secrets.AWS_MAINNET_ECR_CC_ACCESS_KEY_ID }} + password: ${{ secrets.AWS_MAINNET_ECR_CC_ACCESS_KEY }} + + - name: Build and push docker image + id: build-image + uses: docker/build-push-action@v6 + with: + context: ./ts/uptime-service + file: ./ts/uptime-service/Dockerfile + push: true + # yamllint disable rule:line-length + tags: | + ${{ vars.ECR_CC_RES_PUBLIC_REGISTRY }}uptime-service:${{ steps.get-ref-properties.outputs.sha }} + ${{ github.ref == 'refs/heads/main' && format('{0}uptime-service:latest', vars.ECR_CC_RES_PUBLIC_REGISTRY) || '' }} diff --git a/ts/pnpm-workspace.yaml b/ts/pnpm-workspace.yaml index d53251a..2d9fa18 100644 --- a/ts/pnpm-workspace.yaml +++ b/ts/pnpm-workspace.yaml @@ -6,3 +6,4 @@ packages: - "shielder-sdk" - "shielder-sdk-tests" - "!shielder-sdk-crypto-mobile" + - "!uptime-service" diff --git a/ts/uptime-service/.env.example b/ts/uptime-service/.env.example new file mode 100644 index 0000000..71223f5 --- /dev/null +++ b/ts/uptime-service/.env.example @@ -0,0 +1,27 @@ +# Port for the metrics HTTP server +PORT=9615 + +# Interval between health check probes in milliseconds +PROBE_INTERVAL=10000 + +# HTTP request timeout in milliseconds +TIMEOUT=5000 + +# List of endpoints to monitor (JSON array format) +# Each endpoint should have: +# - name: Unique identifier for the service +# - url: Full URL of the health endpoint +# - method: HTTP method (optional, defaults to GET) +# - expectedStatus: Expected HTTP status code (optional, defaults to 200) +ENDPOINTS='[ + { + "name": "example-api", + "url": "http://localhost:3000/health", + "method": "GET", + "expectedStatus": 200 + }, + { + "name": "example-database", + "url": "http://localhost:5432/health" + } +]' diff --git a/ts/uptime-service/.gitignore b/ts/uptime-service/.gitignore new file mode 100644 index 0000000..f92d6c0 --- /dev/null +++ b/ts/uptime-service/.gitignore @@ -0,0 +1,7 @@ +node_modules/ +.env +*.log +.DS_Store +dist/ +build/ +coverage/ diff --git a/ts/uptime-service/Dockerfile b/ts/uptime-service/Dockerfile new file mode 100644 index 0000000..82250d9 --- /dev/null +++ b/ts/uptime-service/Dockerfile @@ -0,0 +1,41 @@ +FROM oven/bun:1.1.38-slim AS builder + +WORKDIR /app + +# Copy package files +COPY package.json bun.lock* ./ + +# Install dependencies +RUN bun install --frozen-lockfile --production + +# Copy source code +COPY src ./src + +FROM oven/bun:1.1.38-slim + +WORKDIR /app + +# Install ca-certificates for HTTPS requests +RUN apt-get update && \ + apt-get install -y ca-certificates && \ + rm -rf /var/lib/apt/lists/* + +# Copy dependencies and source from builder +COPY --from=builder /app/node_modules ./node_modules +COPY --from=builder /app/package.json ./package.json +COPY --from=builder /app/src ./src + +# Create non-root user +RUN useradd -r -s /bin/false appuser && \ + chown -R appuser:appuser /app + +USER appuser + +# Expose metrics port +EXPOSE 9615 + +# Health check +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD bun run -e "fetch('http://localhost:9615/health').then(r => r.ok ? process.exit(0) : process.exit(1)).catch(() => process.exit(1))" + +ENTRYPOINT ["bun", "run", "src/index.js"] diff --git a/ts/uptime-service/README.md b/ts/uptime-service/README.md new file mode 100644 index 0000000..bfce725 --- /dev/null +++ b/ts/uptime-service/README.md @@ -0,0 +1,181 @@ +# Uptime Monitoring Service + +A lightweight Node.js/Bun service that monitors health endpoints and exposes metrics in Prometheus format for Grafana dashboards and alerting. + +## Prerequisites + +- [Bun](https://bun.sh/) installed on your system +- Services with health check endpoints to monitor + +## Installation + +1. Clone or download this repository +2. Install dependencies: + + ```bash + bun install + ``` + +3. Create a `.env` file based on `.env.example`: + + ```bash + cp .env.example .env + ``` + +4. Configure your endpoints in the `.env` file + +## Configuration + +All configuration is done via environment variables: + +| Variable | Description | Default | Required | +| ---------------- | ----------------------------------- | ------- | -------- | +| `PORT` | Port for the metrics server | `9090` | No | +| `PROBE_INTERVAL` | Interval between health checks (ms) | `30000` | No | +| `TIMEOUT` | HTTP request timeout (ms) | `5000` | No | +| `ENDPOINTS` | JSON array of endpoints to monitor | - | Yes | + +### Endpoint Configuration + +The `ENDPOINTS` variable should contain a JSON array with the following structure: + +```json +[ + { + "name": "api-service", + "url": "http://api.example.com/health", + "method": "GET", + "expectedStatus": 200 + }, + { + "name": "database", + "url": "http://localhost:5432/health" + } +] +``` + +**Endpoint fields:** + +- `name` (required): Unique identifier for the service +- `url` (required): Full URL of the health endpoint +- `method` (optional): HTTP method, defaults to `GET` +- `expectedStatus` (optional): Expected HTTP status code, defaults to `200` + +### Example Configuration + +```env +PORT=9090 +PROBE_INTERVAL=30000 +TIMEOUT=5000 +ENDPOINTS='[ + {"name":"frontend","url":"http://localhost:3000/health"}, + {"name":"backend-api","url":"http://localhost:8080/health","expectedStatus":200}, + {"name":"redis","url":"http://localhost:6379/health"} +]' +``` + +## Running the Service + +### Development Mode + +```bash +bun run dev +``` + +This runs the service with auto-reload on file changes. + +### Production Mode + +```bash +bun start +``` + +Or run directly: + +```bash +bun run src/index.js +``` + +## Exposed Endpoints + +The service exposes the following HTTP endpoints: + +- **`/metrics`** - Prometheus metrics endpoint (for scraping) +- **`/health`** - Health check for the service itself +- **`/`** - Service information and available endpoints + +## Prometheus Metrics + +The service exposes the following metrics: + +### `service_up` + +**Type:** Gauge +**Description:** Service availability status (1 = up, 0 = down) +**Labels:** `service_name`, `endpoint` + +### `service_response_time_seconds` + +**Type:** Histogram +**Description:** Service response time in seconds +**Labels:** `service_name`, `endpoint` +**Buckets:** 0.001, 0.01, 0.1, 0.5, 1, 2, 5, 10 + +### `service_last_probe_timestamp` + +**Type:** Gauge +**Description:** Unix timestamp of the last probe attempt +**Labels:** `service_name`, `endpoint` + +## Grafana Dashboard + +### Example Queries + +**Current Uptime Status:** + +```promql +service_up +``` + +**Uptime Percentage (last 24h):** + +```promql +avg_over_time(service_up[24h]) * 100 +``` + +**Average Response Time:** + +```promql +rate(service_response_time_seconds_sum[5m]) / rate(service_response_time_seconds_count[5m]) +``` + +### Alert Rules + +**Service Down Alert:** + +```yaml +groups: + - name: uptime_alerts + rules: + - alert: ServiceDown + expr: service_up == 0 + for: 2m + labels: + severity: critical + annotations: + summary: "Service {{ $labels.service_name }} is down" + description: "{{ $labels.service_name }} has been down for more than 2 minutes" +``` + +**High Response Time Alert:** + +```yaml +- alert: HighResponseTime + expr: rate(service_response_time_seconds_sum[5m]) / rate(service_response_time_seconds_count[5m]) > 1 + for: 5m + labels: + severity: warning + annotations: + summary: "High response time for {{ $labels.service_name }}" + description: "{{ $labels.service_name }} response time is above 1 second" +``` diff --git a/ts/uptime-service/bun.lock b/ts/uptime-service/bun.lock new file mode 100644 index 0000000..a7081b8 --- /dev/null +++ b/ts/uptime-service/bun.lock @@ -0,0 +1,159 @@ +{ + "lockfileVersion": 1, + "workspaces": { + "": { + "name": "uptime-service", + "dependencies": { + "express": "^4.18.2", + "prom-client": "^15.1.0", + }, + }, + }, + "packages": { + "@opentelemetry/api": ["@opentelemetry/api@1.9.0", "", {}, "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg=="], + + "accepts": ["accepts@1.3.8", "", { "dependencies": { "mime-types": "~2.1.34", "negotiator": "0.6.3" } }, "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw=="], + + "array-flatten": ["array-flatten@1.1.1", "", {}, "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg=="], + + "bintrees": ["bintrees@1.0.2", "", {}, "sha512-VOMgTMwjAaUG580SXn3LacVgjurrbMme7ZZNYGSSV7mmtY6QQRh0Eg3pwIcntQ77DErK1L0NxkbetjcoXzVwKw=="], + + "body-parser": ["body-parser@1.20.3", "", { "dependencies": { "bytes": "3.1.2", "content-type": "~1.0.5", "debug": "2.6.9", "depd": "2.0.0", "destroy": "1.2.0", "http-errors": "2.0.0", "iconv-lite": "0.4.24", "on-finished": "2.4.1", "qs": "6.13.0", "raw-body": "2.5.2", "type-is": "~1.6.18", "unpipe": "1.0.0" } }, "sha512-7rAxByjUMqQ3/bHJy7D6OGXvx/MMc4IqBn/X0fcM1QUcAItpZrBEYhWGem+tzXH90c+G01ypMcYJBO9Y30203g=="], + + "bytes": ["bytes@3.1.2", "", {}, "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg=="], + + "call-bind-apply-helpers": ["call-bind-apply-helpers@1.0.2", "", { "dependencies": { "es-errors": "^1.3.0", "function-bind": "^1.1.2" } }, "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ=="], + + "call-bound": ["call-bound@1.0.4", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.2", "get-intrinsic": "^1.3.0" } }, "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg=="], + + "content-disposition": ["content-disposition@0.5.4", "", { "dependencies": { "safe-buffer": "5.2.1" } }, "sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ=="], + + "content-type": ["content-type@1.0.5", "", {}, "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA=="], + + "cookie": ["cookie@0.7.1", "", {}, "sha512-6DnInpx7SJ2AK3+CTUE/ZM0vWTUboZCegxhC2xiIydHR9jNuTAASBrfEpHhiGOZw/nX51bHt6YQl8jsGo4y/0w=="], + + "cookie-signature": ["cookie-signature@1.0.6", "", {}, "sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ=="], + + "debug": ["debug@2.6.9", "", { "dependencies": { "ms": "2.0.0" } }, "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA=="], + + "depd": ["depd@2.0.0", "", {}, "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw=="], + + "destroy": ["destroy@1.2.0", "", {}, "sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg=="], + + "dunder-proto": ["dunder-proto@1.0.1", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.1", "es-errors": "^1.3.0", "gopd": "^1.2.0" } }, "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A=="], + + "ee-first": ["ee-first@1.1.1", "", {}, "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow=="], + + "encodeurl": ["encodeurl@2.0.0", "", {}, "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg=="], + + "es-define-property": ["es-define-property@1.0.1", "", {}, "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g=="], + + "es-errors": ["es-errors@1.3.0", "", {}, "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw=="], + + "es-object-atoms": ["es-object-atoms@1.1.1", "", { "dependencies": { "es-errors": "^1.3.0" } }, "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA=="], + + "escape-html": ["escape-html@1.0.3", "", {}, "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow=="], + + "etag": ["etag@1.8.1", "", {}, "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg=="], + + "express": ["express@4.21.2", "", { "dependencies": { "accepts": "~1.3.8", "array-flatten": "1.1.1", "body-parser": "1.20.3", "content-disposition": "0.5.4", "content-type": "~1.0.4", "cookie": "0.7.1", "cookie-signature": "1.0.6", "debug": "2.6.9", "depd": "2.0.0", "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "etag": "~1.8.1", "finalhandler": "1.3.1", "fresh": "0.5.2", "http-errors": "2.0.0", "merge-descriptors": "1.0.3", "methods": "~1.1.2", "on-finished": "2.4.1", "parseurl": "~1.3.3", "path-to-regexp": "0.1.12", "proxy-addr": "~2.0.7", "qs": "6.13.0", "range-parser": "~1.2.1", "safe-buffer": "5.2.1", "send": "0.19.0", "serve-static": "1.16.2", "setprototypeof": "1.2.0", "statuses": "2.0.1", "type-is": "~1.6.18", "utils-merge": "1.0.1", "vary": "~1.1.2" } }, "sha512-28HqgMZAmih1Czt9ny7qr6ek2qddF4FclbMzwhCREB6OFfH+rXAnuNCwo1/wFvrtbgsQDb4kSbX9de9lFbrXnA=="], + + "finalhandler": ["finalhandler@1.3.1", "", { "dependencies": { "debug": "2.6.9", "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "on-finished": "2.4.1", "parseurl": "~1.3.3", "statuses": "2.0.1", "unpipe": "~1.0.0" } }, "sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ=="], + + "forwarded": ["forwarded@0.2.0", "", {}, "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow=="], + + "fresh": ["fresh@0.5.2", "", {}, "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q=="], + + "function-bind": ["function-bind@1.1.2", "", {}, "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA=="], + + "get-intrinsic": ["get-intrinsic@1.3.0", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.2", "es-define-property": "^1.0.1", "es-errors": "^1.3.0", "es-object-atoms": "^1.1.1", "function-bind": "^1.1.2", "get-proto": "^1.0.1", "gopd": "^1.2.0", "has-symbols": "^1.1.0", "hasown": "^2.0.2", "math-intrinsics": "^1.1.0" } }, "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ=="], + + "get-proto": ["get-proto@1.0.1", "", { "dependencies": { "dunder-proto": "^1.0.1", "es-object-atoms": "^1.0.0" } }, "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g=="], + + "gopd": ["gopd@1.2.0", "", {}, "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg=="], + + "has-symbols": ["has-symbols@1.1.0", "", {}, "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ=="], + + "hasown": ["hasown@2.0.2", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ=="], + + "http-errors": ["http-errors@2.0.0", "", { "dependencies": { "depd": "2.0.0", "inherits": "2.0.4", "setprototypeof": "1.2.0", "statuses": "2.0.1", "toidentifier": "1.0.1" } }, "sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ=="], + + "iconv-lite": ["iconv-lite@0.4.24", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3" } }, "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA=="], + + "inherits": ["inherits@2.0.4", "", {}, "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="], + + "ipaddr.js": ["ipaddr.js@1.9.1", "", {}, "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g=="], + + "math-intrinsics": ["math-intrinsics@1.1.0", "", {}, "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g=="], + + "media-typer": ["media-typer@0.3.0", "", {}, "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ=="], + + "merge-descriptors": ["merge-descriptors@1.0.3", "", {}, "sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ=="], + + "methods": ["methods@1.1.2", "", {}, "sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w=="], + + "mime": ["mime@1.6.0", "", { "bin": { "mime": "cli.js" } }, "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg=="], + + "mime-db": ["mime-db@1.52.0", "", {}, "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="], + + "mime-types": ["mime-types@2.1.35", "", { "dependencies": { "mime-db": "1.52.0" } }, "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw=="], + + "ms": ["ms@2.0.0", "", {}, "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="], + + "negotiator": ["negotiator@0.6.3", "", {}, "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg=="], + + "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="], + + "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="], + + "parseurl": ["parseurl@1.3.3", "", {}, "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ=="], + + "path-to-regexp": ["path-to-regexp@0.1.12", "", {}, "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ=="], + + "prom-client": ["prom-client@15.1.3", "", { "dependencies": { "@opentelemetry/api": "^1.4.0", "tdigest": "^0.1.1" } }, "sha512-6ZiOBfCywsD4k1BN9IX0uZhF+tJkV8q8llP64G5Hajs4JOeVLPCwpPVcpXy3BwYiUGgyJzsJJQeOIv7+hDSq8g=="], + + "proxy-addr": ["proxy-addr@2.0.7", "", { "dependencies": { "forwarded": "0.2.0", "ipaddr.js": "1.9.1" } }, "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg=="], + + "qs": ["qs@6.13.0", "", { "dependencies": { "side-channel": "^1.0.6" } }, "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg=="], + + "range-parser": ["range-parser@1.2.1", "", {}, "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg=="], + + "raw-body": ["raw-body@2.5.2", "", { "dependencies": { "bytes": "3.1.2", "http-errors": "2.0.0", "iconv-lite": "0.4.24", "unpipe": "1.0.0" } }, "sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA=="], + + "safe-buffer": ["safe-buffer@5.2.1", "", {}, "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="], + + "safer-buffer": ["safer-buffer@2.1.2", "", {}, "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="], + + "send": ["send@0.19.0", "", { "dependencies": { "debug": "2.6.9", "depd": "2.0.0", "destroy": "1.2.0", "encodeurl": "~1.0.2", "escape-html": "~1.0.3", "etag": "~1.8.1", "fresh": "0.5.2", "http-errors": "2.0.0", "mime": "1.6.0", "ms": "2.1.3", "on-finished": "2.4.1", "range-parser": "~1.2.1", "statuses": "2.0.1" } }, "sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw=="], + + "serve-static": ["serve-static@1.16.2", "", { "dependencies": { "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "parseurl": "~1.3.3", "send": "0.19.0" } }, "sha512-VqpjJZKadQB/PEbEwvFdO43Ax5dFBZ2UECszz8bQ7pi7wt//PWe1P6MN7eCnjsatYtBT6EuiClbjSWP2WrIoTw=="], + + "setprototypeof": ["setprototypeof@1.2.0", "", {}, "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw=="], + + "side-channel": ["side-channel@1.1.0", "", { "dependencies": { "es-errors": "^1.3.0", "object-inspect": "^1.13.3", "side-channel-list": "^1.0.0", "side-channel-map": "^1.0.1", "side-channel-weakmap": "^1.0.2" } }, "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw=="], + + "side-channel-list": ["side-channel-list@1.0.0", "", { "dependencies": { "es-errors": "^1.3.0", "object-inspect": "^1.13.3" } }, "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA=="], + + "side-channel-map": ["side-channel-map@1.0.1", "", { "dependencies": { "call-bound": "^1.0.2", "es-errors": "^1.3.0", "get-intrinsic": "^1.2.5", "object-inspect": "^1.13.3" } }, "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA=="], + + "side-channel-weakmap": ["side-channel-weakmap@1.0.2", "", { "dependencies": { "call-bound": "^1.0.2", "es-errors": "^1.3.0", "get-intrinsic": "^1.2.5", "object-inspect": "^1.13.3", "side-channel-map": "^1.0.1" } }, "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A=="], + + "statuses": ["statuses@2.0.1", "", {}, "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ=="], + + "tdigest": ["tdigest@0.1.2", "", { "dependencies": { "bintrees": "1.0.2" } }, "sha512-+G0LLgjjo9BZX2MfdvPfH+MKLCrxlXSYec5DaPYP1fe6Iyhf0/fSmJ0bFiZ1F8BT6cGXl2LpltQptzjXKWEkKA=="], + + "toidentifier": ["toidentifier@1.0.1", "", {}, "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA=="], + + "type-is": ["type-is@1.6.18", "", { "dependencies": { "media-typer": "0.3.0", "mime-types": "~2.1.24" } }, "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g=="], + + "unpipe": ["unpipe@1.0.0", "", {}, "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ=="], + + "utils-merge": ["utils-merge@1.0.1", "", {}, "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA=="], + + "vary": ["vary@1.1.2", "", {}, "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg=="], + + "send/encodeurl": ["encodeurl@1.0.2", "", {}, "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w=="], + + "send/ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], + } +} diff --git a/ts/uptime-service/package.json b/ts/uptime-service/package.json new file mode 100644 index 0000000..be502c9 --- /dev/null +++ b/ts/uptime-service/package.json @@ -0,0 +1,24 @@ +{ + "name": "uptime-service", + "version": "1.0.0", + "description": "Uptime monitoring service that exposes Prometheus metrics for health endpoints", + "main": "src/index.js", + "type": "module", + "scripts": { + "start": "bun run src/index.js", + "dev": "bun --watch src/index.js" + }, + "keywords": [ + "uptime", + "monitoring", + "prometheus", + "health-check", + "metrics" + ], + "author": "", + "license": "MIT", + "dependencies": { + "express": "^4.18.2", + "prom-client": "^15.1.0" + } +} diff --git a/ts/uptime-service/src/config.js b/ts/uptime-service/src/config.js new file mode 100644 index 0000000..572121e --- /dev/null +++ b/ts/uptime-service/src/config.js @@ -0,0 +1,84 @@ +/** + * Configuration loader - reads and validates environment variables + */ + +export function loadConfig() { + // Load environment variables (Bun automatically loads .env) + const config = { + port: parseInt(process.env.PORT || "9090", 10), + probeInterval: parseInt(process.env.PROBE_INTERVAL || "30000", 10), + timeout: parseInt(process.env.TIMEOUT || "5000", 10), + endpoints: [] + }; + + // Parse and validate endpoints + if (!process.env.ENDPOINTS) { + console.error("ERROR: ENDPOINTS environment variable is required"); + process.exit(1); + } + + try { + const endpoints = JSON.parse(process.env.ENDPOINTS); + + if (!Array.isArray(endpoints)) { + throw new Error("ENDPOINTS must be a JSON array"); + } + + if (endpoints.length === 0) { + throw new Error("ENDPOINTS array cannot be empty"); + } + + // Validate each endpoint + config.endpoints = endpoints.map((endpoint, index) => { + if (!endpoint.name || typeof endpoint.name !== "string") { + throw new Error( + `Endpoint at index ${index} missing required 'name' field` + ); + } + if (!endpoint.url || typeof endpoint.url !== "string") { + throw new Error( + `Endpoint at index ${index} missing required 'url' field` + ); + } + + return { + name: endpoint.name, + url: endpoint.url, + method: endpoint.method || "GET", + expectedStatus: endpoint.expectedStatus || 200 + }; + }); + + // Check for duplicate names + const names = config.endpoints.map((e) => e.name); + const duplicates = names.filter( + (name, index) => names.indexOf(name) !== index + ); + if (duplicates.length > 0) { + throw new Error( + `Duplicate endpoint names found: ${duplicates.join(", ")}` + ); + } + } catch (error) { + console.error("ERROR: Failed to parse ENDPOINTS:", error.message); + process.exit(1); + } + + // Validate config values + if (config.port < 1 || config.port > 65535) { + console.error("ERROR: PORT must be between 1 and 65535"); + process.exit(1); + } + + if (config.probeInterval < 1000) { + console.error("ERROR: PROBE_INTERVAL must be at least 1000ms"); + process.exit(1); + } + + if (config.timeout < 100) { + console.error("ERROR: TIMEOUT must be at least 100ms"); + process.exit(1); + } + + return config; +} diff --git a/ts/uptime-service/src/index.js b/ts/uptime-service/src/index.js new file mode 100644 index 0000000..ed88398 --- /dev/null +++ b/ts/uptime-service/src/index.js @@ -0,0 +1,30 @@ +/** + * Uptime Service - Main Entry Point + * Monitors health endpoints and exposes Prometheus metrics + */ + +import { loadConfig } from "./config.js"; +import { startMetricsServer } from "./server.js"; +import { startProbing } from "./prober.js"; + +// Main function +async function main() { + console.log("=== Uptime Monitoring Service ===\n"); + + try { + // Load and validate configuration + const config = loadConfig(); + + // Start HTTP server for Prometheus metrics + startMetricsServer(config.port); + + // Start health check probes + startProbing(config); + } catch (error) { + console.error("Fatal error:", error.message); + process.exit(1); + } +} + +// Run the service +main(); diff --git a/ts/uptime-service/src/metrics.js b/ts/uptime-service/src/metrics.js new file mode 100644 index 0000000..b97f32e --- /dev/null +++ b/ts/uptime-service/src/metrics.js @@ -0,0 +1,54 @@ +/** + * Prometheus metrics definitions + */ + +import { Registry, Gauge, Counter, Histogram } from "prom-client"; + +// Create a new registry +export const register = new Registry(); + +// Metric: Service availability (1 = up, 0 = down) +export const serviceUp = new Gauge({ + name: "service_up", + help: "Service availability status (1 = up, 0 = down)", + labelNames: ["service_name", "endpoint"], + registers: [register] +}); + +// Metric: Response time in seconds +export const serviceResponseTime = new Histogram({ + name: "service_response_time_seconds", + help: "Service response time in seconds", + labelNames: ["service_name", "endpoint"], + buckets: [0.001, 0.01, 0.1, 0.5, 1, 2, 5, 10], + registers: [register] +}); + +// Metric: Last probe timestamp +export const serviceLastProbeTimestamp = new Gauge({ + name: "service_last_probe_timestamp", + help: "Unix timestamp of the last probe attempt", + labelNames: ["service_name", "endpoint"], + registers: [register] +}); + +/** + * Record a successful probe + */ +export function recordSuccess(serviceName, endpoint, responseTime) { + const labels = { service_name: serviceName, endpoint }; + + serviceUp.set(labels, 1); + serviceResponseTime.observe(labels, responseTime); + serviceLastProbeTimestamp.set(labels, Date.now() / 1000); +} + +/** + * Record a failed probe + */ +export function recordFailure(serviceName, endpoint) { + const labels = { service_name: serviceName, endpoint }; + + serviceUp.set(labels, 0); + serviceLastProbeTimestamp.set(labels, Date.now() / 1000); +} diff --git a/ts/uptime-service/src/prober.js b/ts/uptime-service/src/prober.js new file mode 100644 index 0000000..c8e927b --- /dev/null +++ b/ts/uptime-service/src/prober.js @@ -0,0 +1,110 @@ +/** + * Health probe logic - performs periodic health checks on configured endpoints + */ + +import { recordSuccess, recordFailure } from "./metrics.js"; + +/** + * Perform a single health check on an endpoint + */ +async function probeEndpoint(endpoint, timeout) { + const startTime = Date.now(); + + try { + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), timeout); + + const response = await fetch(endpoint.url, { + method: endpoint.method, + signal: controller.signal, + headers: { + "User-Agent": "uptime-service/1.0" + } + }); + + clearTimeout(timeoutId); + + const endTime = Date.now(); + const responseTime = (endTime - startTime) / 1000; // Convert to seconds + + // Check if status code matches expected + if (response.status === endpoint.expectedStatus) { + recordSuccess(endpoint.name, endpoint.url, responseTime); + console.log( + `[${new Date().toISOString()}] ✓ ${endpoint.name} - UP (${ + response.status + }, ${responseTime.toFixed(3)}s)` + ); + return true; + } else { + recordFailure(endpoint.name, endpoint.url); + console.log( + `[${new Date().toISOString()}] ✗ ${endpoint.name} - DOWN (status: ${ + response.status + }, expected: ${endpoint.expectedStatus})` + ); + return false; + } + } catch (error) { + recordFailure(endpoint.name, endpoint.url); + + let errorMessage = error.message; + if (error.name === "AbortError") { + errorMessage = "timeout"; + } + + console.log( + `[${new Date().toISOString()}] ✗ ${ + endpoint.name + } - DOWN (${errorMessage})` + ); + return false; + } +} + +/** + * Probe all configured endpoints + */ +async function probeAllEndpoints(endpoints, timeout) { + const promises = endpoints.map((endpoint) => + probeEndpoint(endpoint, timeout) + ); + await Promise.all(promises); +} + +/** + * Start the probe scheduler + */ +export function startProbing(config) { + console.log("\n=== Starting uptime monitoring ==="); + console.log(`Monitoring ${config.endpoints.length} endpoint(s):`); + config.endpoints.forEach((endpoint) => { + console.log(` - ${endpoint.name}: ${endpoint.url}`); + }); + console.log(`Probe interval: ${config.probeInterval}ms`); + console.log(`Timeout: ${config.timeout}ms`); + console.log("================================\n"); + + // Perform initial probe immediately + probeAllEndpoints(config.endpoints, config.timeout); + + // Schedule periodic probes + const intervalId = setInterval(() => { + probeAllEndpoints(config.endpoints, config.timeout); + }, config.probeInterval); + + // Handle graceful shutdown + process.on("SIGINT", () => { + console.log("\n\nShutting down gracefully..."); + clearInterval(intervalId); + process.exit(0); + }); + + process.on("SIGTERM", () => { + console.log("\n\nShutting down gracefully..."); + clearInterval(intervalId); + process.exit(0); + }); + + return intervalId; +} diff --git a/ts/uptime-service/src/server.js b/ts/uptime-service/src/server.js new file mode 100644 index 0000000..11e6a5d --- /dev/null +++ b/ts/uptime-service/src/server.js @@ -0,0 +1,64 @@ +/** + * HTTP server that exposes Prometheus metrics + */ + +import express from "express"; +import { register } from "./metrics.js"; + +/** + * Create and start the metrics HTTP server + */ +export function startMetricsServer(port) { + const app = express(); + + // Health check endpoint for the service itself + app.get("/health", (req, res) => { + res.status(200).json({ + status: "ok", + timestamp: new Date().toISOString() + }); + }); + + // Prometheus metrics endpoint + app.get("/metrics", async (req, res) => { + try { + res.set("Content-Type", register.contentType); + const metrics = await register.metrics(); + res.send(metrics); + } catch (error) { + console.error("Error generating metrics:", error); + res.status(500).send("Error generating metrics"); + } + }); + + // Root endpoint with service info + app.get("/", (req, res) => { + res.status(200).json({ + service: "uptime-service", + version: "1.0.0", + endpoints: { + health: "/health", + metrics: "/metrics" + } + }); + }); + + // Start server + const server = app.listen(port, () => { + console.log(`Metrics server listening on http://localhost:${port}`); + console.log(` - Metrics endpoint: http://localhost:${port}/metrics`); + console.log(` - Health endpoint: http://localhost:${port}/health\n`); + }); + + // Handle server errors + server.on("error", (error) => { + if (error.code === "EADDRINUSE") { + console.error(`ERROR: Port ${port} is already in use`); + } else { + console.error("Server error:", error); + } + process.exit(1); + }); + + return server; +}