Skip to content

Commit 9f546a2

Browse files
authored
Merge pull request #4 from ECADInfra/hardening-and-workers
Harden Dockerfile and integrate worker orchestration
2 parents a6a1fdc + 0a9729a commit 9f546a2

23 files changed

+1877
-237
lines changed

.github/workflows/docker-publish.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ jobs:
3030
uses: docker/setup-buildx-action@v3
3131

3232
- name: Log in to Container Registry
33-
if: startsWith(github.ref, 'refs/tags/v')
3433
uses: docker/login-action@v3
3534
with:
3635
registry: ${{ env.REGISTRY }}
@@ -44,16 +43,17 @@ jobs:
4443
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
4544
annotations: |
4645
org.opencontainers.image.description=Synapse homeserver with Ed25519 Beacon auth for Tezos dApp/wallet relay
47-
# Tag scheme: push v1.147.1-ecad.1 -> ghcr tags: v1.147.1-ecad.1, latest
4846
tags: |
4947
type=semver,pattern={{version}}
5048
type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/v') }}
49+
type=ref,event=pr
50+
type=ref,event=branch
5151
5252
- name: Build and push Docker image
5353
uses: docker/build-push-action@v6
5454
with:
5555
context: .
56-
push: ${{ startsWith(github.ref, 'refs/tags/v') }}
56+
push: true
5757
tags: ${{ steps.meta.outputs.tags }}
5858
labels: ${{ steps.meta.outputs.labels }}
5959
annotations: ${{ steps.meta.outputs.annotations }}

Dockerfile

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,37 @@ LABEL org.opencontainers.image.description="Synapse homeserver with Ed25519 Beac
44
LABEL org.opencontainers.image.source="https://github.com/ECADInfra/beacon-synapse"
55
LABEL org.opencontainers.image.licenses="AGPL-3.0-only"
66

7-
# Install dependencies for crypto auth provider
8-
RUN apt-get update && apt-get install -y libsodium-dev gcc netcat-openbsd gettext-base && apt-get clean && rm -rf /var/lib/apt/lists/*
9-
10-
# Install Python packages
11-
RUN pip install --no-cache-dir psycopg2 pysodium
7+
# netcat-openbsd: TCP readiness checks (wait-for.sh)
8+
# gettext-base: envsubst for config templating
9+
# nginx-light, redis-server, supervisor: worker mode orchestration
10+
# No gcc/libsodium-dev needed: PyNaCl (with bundled libsodium) and psycopg2
11+
# are already installed in the base Synapse image.
12+
RUN apt-get update && apt-get install -y --no-install-recommends \
13+
netcat-openbsd gettext-base \
14+
nginx-light redis-server supervisor \
15+
&& apt-get clean && rm -rf /var/lib/apt/lists/*
16+
17+
# Configure nginx for worker mode (remove default site, log to stdout)
18+
RUN rm -f /etc/nginx/sites-enabled/default && \
19+
ln -sf /dev/stdout /var/log/nginx/access.log && \
20+
ln -sf /dev/stderr /var/log/nginx/error.log
21+
22+
# Symlink binaries to expected locations (configure_workers_and_start.py expects these)
23+
RUN mkdir -p /etc/supervisor/conf.d && \
24+
ln -s /usr/bin/supervisord /usr/local/bin/supervisord && \
25+
ln -s /usr/bin/supervisorctl /usr/local/bin/supervisorctl && \
26+
ln -s /usr/bin/redis-server /usr/local/bin/redis-server
1227

1328
# Create keys and data directories
1429
RUN mkdir -p /keys /data
1530

31+
# Increase max event size (1MB instead of default 64KB).
32+
# Beacon messages can exceed the default Matrix PDU size limit.
33+
# Runs before COPY so code changes don't invalidate this layer.
34+
RUN sed -i 's/^MAX_PDU_SIZE = 65536$/MAX_PDU_SIZE = 1048576/' /usr/local/lib/python3.13/site-packages/synapse/api/constants.py && \
35+
grep -q '^MAX_PDU_SIZE = 1048576$' /usr/local/lib/python3.13/site-packages/synapse/api/constants.py || \
36+
(echo "FATAL: PDU size patch failed - 'MAX_PDU_SIZE = 65536' not found in constants.py. Upstream may have changed." >&2 && exit 1)
37+
1638
# Copy custom modules (using Python 3.13 path for Element HQ image)
1739
COPY crypto_auth_provider.py /usr/local/lib/python3.13/site-packages/
1840
COPY beacon_info_module.py /usr/local/lib/python3.13/site-packages/
@@ -21,24 +43,20 @@ COPY beacon_monitor_module.py /usr/local/lib/python3.13/site-packages/
2143
# Copy configuration templates (envsubst at runtime) and static configs
2244
COPY homeserver.yaml /config/homeserver.yaml.template
2345
COPY synapse.log.config /config/
24-
COPY shared_config.yaml /config/shared_config.yaml.template
2546

26-
# Copy worker configuration templates
27-
COPY workers /config/workers.template
28-
29-
# Increase max event size (1MB instead of default 64KB).
30-
# Beacon messages can exceed the default Matrix PDU size limit.
31-
RUN sed -i 's/65536/1048576/' /usr/local/lib/python3.13/site-packages/synapse/api/constants.py && \
32-
grep -q '1048576' /usr/local/lib/python3.13/site-packages/synapse/api/constants.py || \
33-
(echo "FATAL: PDU size patch failed - 65536 not found in constants.py. Upstream may have changed." >&2 && exit 1)
47+
# Copy worker orchestration configs (Jinja2 templates for configure_workers_and_start.py)
48+
COPY conf-workers /conf/
49+
COPY configure_workers_and_start.py /usr/local/bin/
50+
COPY prefix-log /usr/local/bin/
3451

3552
COPY wait-for.sh /usr/local/bin/
3653
COPY synctl_entrypoint.sh /usr/local/bin/
3754

3855
# Expose ports:
39-
# 8008: HTTP (client and federation)
56+
# 8008: HTTP (client + federation; direct in single mode, nginx in worker mode)
57+
# 8080: Main process HTTP (worker mode only, internal)
58+
# 9469: Prometheus service discovery (worker mode, nginx)
4059
# 19090: Metrics for main process (when SYNAPSE_ENABLE_METRICS=1)
41-
# 19091-19094: Metrics for workers 1-4 (when SYNAPSE_ENABLE_METRICS=1 and SYNAPSE_WORKERS=true)
42-
EXPOSE 8008 19090 19091 19092 19093 19094
60+
EXPOSE 8008 8080 9469 19090
4361

4462
ENTRYPOINT ["/usr/local/bin/synctl_entrypoint.sh"]

README.md

Lines changed: 82 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,6 @@
22

33
Production-ready [Matrix](https://matrix.org/) homeserver image for operating [Beacon](https://tzip.tezosagora.org/proposal/tzip-10/) relay nodes. Beacon is the open communication standard connecting Tezos wallets and dApps, ratified as [TZIP-10](https://tzip.tezosagora.org/proposal/tzip-10/). Matrix provides the federated transport layer.
44

5-
This image is what [ECAD Infra](https://ecadinfra.com) runs in production. We publish it so that anyone can operate Beacon relay infrastructure using the same tooling we do.
6-
7-
## Why operator diversity matters
8-
9-
Beacon relay nodes are the infrastructure that every Tezos wallet-to-dApp connection depends on. When a user pairs a wallet with a dApp, that connection flows through Matrix relay servers.
10-
11-
The Beacon network is stronger when relay infrastructure is operated by **multiple independent organizations** across different regions and hosting providers. Federation is a core design property of the Matrix protocol, and Beacon inherits it: wallets and dApps can communicate regardless of which operator's relay node they connect through. No single organization needs to be a bottleneck or a single point of failure.
12-
13-
This image exists to make running a Beacon relay node straightforward. If you operate infrastructure in the Tezos ecosystem, consider running one.
14-
155
## Provenance
166

177
Beacon relay infrastructure was originally created by [Papers (AirGap)](https://papers.ch/) as part of the [Beacon SDK](https://github.com/airgap-it/beacon-sdk) ecosystem. Papers designed the protocol, built the SDK, and operated the first relay nodes, establishing the standard that the entire Tezos wallet and dApp ecosystem relies on today.
@@ -21,20 +11,51 @@ This image builds on [AirGap's beacon-node](https://github.com/airgap-it/beacon-
2111
### What changed from upstream
2212

2313
- **Synapse v1.98.0 to v1.147.1**: Upgraded to current release
14+
- **`crypto_auth_provider.py` v0.3**: PyNaCl-based Ed25519 auth (no gcc/libsodium-dev build deps), structured logfmt logging, race condition handling
2415
- **`beacon_monitor_module.py`**: Observability module for diagnosing connection and federation issues. Logs operational metadata (room lifecycle, membership changes, payload sizes, login events) in logfmt format. All Beacon message payloads are encrypted end-to-end between wallet and dApp using NaCl cryptobox before reaching the relay server; message content is not and cannot be logged. User and room identifiers are opaque hashes with no link to real-world identity.
2516
- **`beacon_info_module.py`**: HTTP endpoint exposing server region and known relay servers
26-
- **Worker mode**: Support for 4 generic workers behind the main process
17+
- **Worker mode**: Official Element HQ worker orchestration (supervisord + nginx + redis) for horizontal scaling
2718
- **`MAX_PDU_SIZE` patch**: 64KB to 1MB (Beacon messages can exceed the default Matrix limit)
2819
- **logfmt logging**: Structured log output for ingestion into Loki/Grafana/etc.
20+
- **SSRF protection**: `federation_ip_range_blacklist` covering RFC 1918, link-local, and loopback
2921
- **Robust entrypoint**: Template variable substitution, database readiness check, single-process or multi-worker modes
3022

3123
## Quick start
3224

3325
```bash
26+
# Single-process mode (default)
3427
docker compose -f docker-compose.example.yml up --build
28+
29+
# Worker mode
30+
SYNAPSE_WORKERS=true docker compose -f docker-compose.example.yml up --build
31+
```
32+
33+
This starts Synapse with PostgreSQL. The server will be available at `http://localhost:8008`.
34+
35+
## Architecture
36+
37+
### Single-process mode (default)
38+
39+
```
40+
Client/Federation --> :8008 [Synapse] --> PostgreSQL
41+
```
42+
43+
One Synapse process handles everything. Good for development and low-traffic deployments.
44+
45+
### Worker mode
46+
47+
```
48+
Client/Federation --> :8008 [nginx] --> :8080 [Synapse main]
49+
\-> :18009+ [workers]
50+
- synchrotron (sync)
51+
- event_persister (writes)
52+
- federation_inbound (federation)
53+
:9469 [nginx] --> Prometheus service discovery
54+
[redis] <------> inter-process replication
55+
[supervisord] --> manages all processes
3556
```
3657

37-
This starts Synapse with PostgreSQL and Redis. The server will be available at `http://localhost:8008`.
58+
Supervisord orchestrates the main Synapse process, worker processes, nginx, and redis, all inside a single container. nginx routes requests to the appropriate worker based on URL patterns. Redis handles inter-process replication.
3859

3960
## Configuration
4061

@@ -52,11 +73,32 @@ The image uses template variables in `homeserver.yaml` that are substituted at s
5273
| `SERVER_REGION` | No | Region label for the `/beacon/info` endpoint |
5374
| `SYNAPSE_ENABLE_METRICS` | No | Set to `1` to expose Prometheus metrics on port 19090 |
5475
| `SYNAPSE_WORKERS` | No | Set to `true` to enable multi-worker mode |
76+
| `SYNAPSE_WORKER_TYPES` | No | Comma-separated worker types (default: `synchrotron:2,event_persister:1,federation_inbound:1`) |
5577
| `PUBLIC_BASEURL` | No | Public URL for federation (default: `https://SERVER_NAME`) |
5678
| `SERVE_WELLKNOWN` | No | Set to `true` to serve `.well-known/matrix/server` for Cloudflare |
5779
| `DB_CP_MIN` | No | Minimum database connections (default: `20`) |
5880
| `DB_CP_MAX` | No | Maximum database connections (default: `80`) |
5981

82+
### Worker types
83+
84+
The `SYNAPSE_WORKER_TYPES` variable accepts the official Element HQ worker type syntax:
85+
86+
```bash
87+
# Simple list
88+
SYNAPSE_WORKER_TYPES="synchrotron,event_persister,federation_inbound"
89+
90+
# With multipliers
91+
SYNAPSE_WORKER_TYPES="synchrotron:2,event_persister:2,federation_inbound:1"
92+
93+
# Combined workers (merge types into one process)
94+
SYNAPSE_WORKER_TYPES="stream_writers=account_data+presence+typing"
95+
96+
# Custom names
97+
SYNAPSE_WORKER_TYPES="sync=synchrotron:2,persist=event_persister:1"
98+
```
99+
100+
Available types: `synchrotron`, `event_persister`, `federation_inbound`, `federation_sender`, `federation_reader`, `client_reader`, `event_creator`, `media_repository`, `user_dir`, `pusher`, `appservice`, `background_worker`, `account_data`, `presence`, `receipts`, `to_device`, `typing`, `push_rules`, `device_lists`, `thread_subscriptions`.
101+
60102
### Entrypoint options
61103

62104
```bash
@@ -72,11 +114,30 @@ docker run ghcr.io/ecadinfra/beacon-synapse --skip-templating
72114

73115
### Ports
74116

75-
| Port | Service |
76-
|---|---|
77-
| 8008 | HTTP (client + federation) |
78-
| 19090 | Prometheus metrics (main process, when enabled) |
79-
| 19091-19094 | Prometheus metrics (workers 1-4, when enabled) |
117+
| Port | Mode | Service |
118+
|---|---|---|
119+
| 8008 | Both | HTTP (client + federation). Direct in single mode, nginx in worker mode |
120+
| 8080 | Worker | Main Synapse process (internal, behind nginx) |
121+
| 9469 | Worker | Prometheus service discovery + metrics proxy |
122+
| 19090 | Both | Prometheus metrics (main process, when enabled) |
123+
124+
### Prometheus service discovery (worker mode)
125+
126+
When `SYNAPSE_ENABLE_METRICS=1` and worker mode is active, port 9469 serves:
127+
128+
- `GET /metrics/service_discovery` - JSON for Prometheus `http_sd_config`
129+
- `GET /metrics/worker/<name>` - Proxied metrics for each worker
130+
- `GET /metrics/worker/main` - Proxied metrics for the main process
131+
132+
Prometheus config:
133+
134+
```yaml
135+
scrape_configs:
136+
- job_name: beacon-synapse
137+
http_sd_configs:
138+
- url: http://synapse:9469/metrics/service_discovery
139+
honor_labels: true
140+
```
80141
81142
### Federation behind Cloudflare
82143
@@ -93,9 +154,9 @@ This configures Synapse to:
93154
2. Set `public_baseurl` for proper federation discovery
94155

95156
Other Matrix servers will then connect on port 443 instead of 8448. Ensure your reverse proxy routes:
96-
- `/.well-known/matrix/server` Synapse (port 8008)
97-
- `/_matrix/federation/*` Synapse (port 8008)
98-
- `/_matrix/client/*` Synapse (port 8008)
157+
- `/.well-known/matrix/server` -> Synapse (port 8008)
158+
- `/_matrix/federation/*` -> Synapse (port 8008)
159+
- `/_matrix/client/*` -> Synapse (port 8008)
99160

100161
**Important**: Configure Cloudflare to not challenge `/_matrix/federation/*` paths (these are server-to-server requests, not browsers).
101162

@@ -142,7 +203,7 @@ Exposes `/_synapse/client/beacon/info` returning:
142203
If you want to operate a Beacon relay node for the Tezos ecosystem:
143204

144205
1. Deploy this image with the configuration above
145-
2. Ensure port 8443 (or your federation port) is reachable from other relay nodes
206+
2. Ensure port 8448 (or 443 if using `.well-known` delegation) is reachable from other relay nodes
146207
3. Configure federation with existing operators so wallets and dApps on your node can communicate with the broader network
147208
4. Open an issue or reach out to coordinate federation peering
148209

conf-workers/healthcheck.sh.j2

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/sh
2+
# This healthcheck script is designed to return OK when every
3+
# host involved returns OK
4+
{%- for healthcheck_url in healthcheck_urls %}
5+
curl -fSs {{ healthcheck_url }} || exit 1
6+
{%- endfor %}

conf-workers/log.config

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
version: 1
2+
formatters:
3+
logfmt:
4+
{% if include_worker_name_in_log_line %}
5+
format: 'ts=%(asctime)s worker={{ worker_name }} logger=%(name)s level=%(levelname)s request=%(request)s %(message)s'
6+
{% else %}
7+
format: 'ts=%(asctime)s logger=%(name)s level=%(levelname)s request=%(request)s %(message)s'
8+
{% endif %}
9+
datefmt: '%Y-%m-%dT%H:%M:%S'
10+
filters:
11+
context:
12+
(): synapse.logging.context.LoggingContextFilter
13+
request: ""
14+
handlers:
15+
console:
16+
class: logging.StreamHandler
17+
formatter: logfmt
18+
filters: [context]
19+
{% if LOG_FILE_PATH %}
20+
file:
21+
class: logging.handlers.TimedRotatingFileHandler
22+
formatter: logfmt
23+
filters: [context]
24+
filename: {{ LOG_FILE_PATH }}
25+
when: midnight
26+
backupCount: 3
27+
encoding: utf8
28+
{% endif %}
29+
loggers:
30+
synapse.storage.SQL:
31+
{% if SYNAPSE_LOG_SENSITIVE %}
32+
level: {{ SYNAPSE_LOG_LEVEL or "DEBUG" }}
33+
{% else %}
34+
level: ERROR
35+
{% endif %}
36+
37+
# Beacon modules: auth, monitoring, info
38+
crypto_auth_provider:
39+
level: INFO
40+
beacon_monitor_module:
41+
level: INFO
42+
beacon_info_module:
43+
level: INFO
44+
45+
# Synapse auth/login (failed logins, rate limiting)
46+
synapse.handlers.auth:
47+
level: WARNING
48+
synapse.rest.client.login:
49+
level: INFO
50+
51+
{% if SYNAPSE_LOG_TESTING %}
52+
synapse.handlers.typing:
53+
level: DEBUG
54+
{% endif %}
55+
root:
56+
level: {{ SYNAPSE_LOG_LEVEL or "ERROR" }}
57+
handlers:
58+
- console
59+
{% if LOG_FILE_PATH %}
60+
- file
61+
{% endif %}
62+
disable_existing_loggers: false

0 commit comments

Comments
 (0)