diff --git a/_unit-test/create-docker-volumes-test.sh b/_unit-test/create-docker-volumes-test.sh index 2cb9b962a8..83591d3aa8 100755 --- a/_unit-test/create-docker-volumes-test.sh +++ b/_unit-test/create-docker-volumes-test.sh @@ -14,6 +14,7 @@ sentry-data sentry-kafka sentry-postgres sentry-redis +sentry-seaweedfs sentry-symbolicator" before=$(get_volumes) diff --git a/docker-compose.yml b/docker-compose.yml index f29a9bfd56..e37bc54025 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -36,6 +36,8 @@ x-sentry-defaults: &sentry_defaults <<: *depends_on-default smtp: <<: *depends_on-default + seaweedfs: + <<: *depends_on-default snuba-api: <<: *depends_on-default symbolicator: @@ -141,6 +143,7 @@ services: kafka: <<: *restart_policy image: "confluentinc/cp-kafka:7.6.1" + user: root environment: # https://docs.confluent.io/platform/current/installation/docker/config-reference.html#cp-kakfa-example KAFKA_PROCESS_ROLES: "broker,controller" @@ -209,6 +212,44 @@ services: interval: 10s timeout: 10s retries: 30 + seaweedfs: + image: "chrislusf/seaweedfs:3.96_large_disk" + entrypoint: "weed" + command: >- + server + -filer=true + -filer.port=8888 + -filer.port.grpc=18888 + -filer.defaultReplicaPlacement=000 + -master=true + -master.port=9333 + -master.port.grpc=19333 + -metricsPort=9091 + -s3=true + -s3.port=8333 + -s3.port.grpc=18333 + -volume=true + -volume.dir.idx=/data/idx + -volume.index=leveldbLarge + -volume.max=0 + -volume.preStopSeconds=8 + -volume.readMode=redirect + -volume.port=8080 + -volume.port.grpc=18080 + -ip=127.0.0.1 + -ip.bind=0.0.0.0 + -webdav=false + environment: + AWS_ACCESS_KEY_ID: sentry + AWS_SECRET_ACCESS_KEY: sentry + volumes: + - "sentry-seaweedfs:/data" + healthcheck: + test: ["CMD", "wget", "-q", "-O-", "http://seaweedfs:8080/healthz", "http://seaweedfs:9333/cluster/healthz", "http://seaweedfs:8333/healthz"] + interval: 30s + timeout: 20s + retries: 5 + start_period: 60s snuba-api: <<: *snuba_defaults # Kafka consumer responsible for feeding events into Clickhouse @@ -625,6 +666,8 @@ volumes: external: true sentry-symbolicator: external: true + sentry-seaweedfs: + external: true # This volume stores JS SDK assets and the data inside this volume should # be cleaned periodically on upgrades. sentry-nginx-www: diff --git a/install.sh b/install.sh index d7f8a036f4..c6b3a62a17 100755 --- a/install.sh +++ b/install.sh @@ -36,6 +36,7 @@ source install/ensure-relay-credentials.sh source install/generate-secret-key.sh source install/update-docker-images.sh source install/build-docker-images.sh +source install/bootstrap-s3-nodestore.sh source install/bootstrap-snuba.sh source install/upgrade-postgres.sh source install/ensure-correct-permissions-profiles-dir.sh diff --git a/install/bootstrap-s3-nodestore.sh b/install/bootstrap-s3-nodestore.sh new file mode 100644 index 0000000000..4ff465c146 --- /dev/null +++ b/install/bootstrap-s3-nodestore.sh @@ -0,0 +1,25 @@ +echo "${_group}Bootstrapping seaweedfs (node store)..." + +$dc up --wait seaweedfs postgres +$dc exec seaweedfs apk add --no-cache s3cmd +$dc exec seaweedfs mkdir -p /data/idx/ +s3cmd="$dc exec seaweedfs s3cmd" + +bucket_list=$($s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=localhost:8333 --host-bucket='localhost:8333/%(bucket)' ls) + +if [[ $($bucket_list | tail -1 | awk '{print $3}') != 's3://nodestore' ]]; then + # Only touch if no existing nodestore config is found + if ! grep -q "SENTRY_NODESTORE" $SENTRY_CONFIG_PY; then + nodestore_config=$(sed -n '/SENTRY_NODESTORE/,/[}]/{p}' sentry/sentry.conf.example.py) + if [[ $($dc exec postgres psql -qAt -U postgres -c "select exists (select * from nodestore_node limit 1)") = "f" ]]; then + nodestore_config=$(echo -e "$nodestore_config" | sed '$s/\}/ "read_through": True,\n "delete_through": True,\n\}/') + fi + echo "$nodestore_config" >>$SENTRY_CONFIG_PY + fi + $dc exec seaweedfs mkdir -p /data/idx/ + $s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=localhost:8333 --host-bucket='localhost:8333/%(bucket)' mb s3://nodestore +else + echo "Node store already exists, skipping..." +fi + +echo "${_endgroup}" diff --git a/install/create-docker-volumes.sh b/install/create-docker-volumes.sh index fdbecc2288..c0437c5563 100644 --- a/install/create-docker-volumes.sh +++ b/install/create-docker-volumes.sh @@ -17,5 +17,6 @@ echo "Created $(create_volume sentry-kafka)." echo "Created $(create_volume sentry-postgres)." echo "Created $(create_volume sentry-redis)." echo "Created $(create_volume sentry-symbolicator)." +echo "Created $(create_volume sentry-seaweedfs)." echo "${_endgroup}" diff --git a/sentry/Dockerfile b/sentry/Dockerfile index 557046f143..40398a773f 100644 --- a/sentry/Dockerfile +++ b/sentry/Dockerfile @@ -1,13 +1,15 @@ ARG SENTRY_IMAGE FROM ${SENTRY_IMAGE} +RUN pip install https://github.com/stayallive/sentry-nodestore-s3/archive/main.zip + COPY . /usr/src/sentry RUN if [ -s /usr/src/sentry/enhance-image.sh ]; then \ /usr/src/sentry/enhance-image.sh; \ -fi + fi RUN if [ -s /usr/src/sentry/requirements.txt ]; then \ echo "sentry/requirements.txt is deprecated, use sentry/enhance-image.sh - see https://develop.sentry.dev/self-hosted/#enhance-sentry-image"; \ pip install -r /usr/src/sentry/requirements.txt; \ -fi + fi diff --git a/sentry/sentry.conf.example.py b/sentry/sentry.conf.example.py index 826f43ae2d..d47b2e6b7a 100644 --- a/sentry/sentry.conf.example.py +++ b/sentry/sentry.conf.example.py @@ -95,6 +95,31 @@ def get_internal_network(): # See https://develop.sentry.dev/self-hosted/experimental/errors-only/ SENTRY_SELF_HOSTED_ERRORS_ONLY = env("COMPOSE_PROFILES") != "feature-complete" +################ +# Node Storage # +################ + +# Sentry uses an abstraction layer called "node storage" to store raw events. +# Previously, it used PostgreSQL as the backend, but this didn't scale for +# high-throughput environments. Read more about this in the documentation: +# https://develop.sentry.dev/backend/application-domains/nodestore/ +# +# Through this setting, you can use the provided blob storage or +# your own S3-compatible API from your infrastructure. +# Other backend implementations for node storage developed by the community +# are available in public GitHub repositories. + +SENTRY_NODESTORE = "sentry_nodestore_s3.S3PassthroughDjangoNodeStorage" +SENTRY_NODESTORE_OPTIONS = { + "compression": True, + "endpoint_url": "http://seaweedfs:8333", + "bucket_path": "nodestore", + "bucket_name": "nodestore", + "region_name": "us-east-1", + "aws_access_key_id": "sentry", + "aws_secret_access_key": "sentry", +} + ######### # Redis # #########