diff --git a/_unit-test/bootstrap-s3-seaweed-test.sh b/_unit-test/bootstrap-s3-nodestore-test.sh similarity index 100% rename from _unit-test/bootstrap-s3-seaweed-test.sh rename to _unit-test/bootstrap-s3-nodestore-test.sh diff --git a/_unit-test/bootstrap-s3-profiles-test.sh b/_unit-test/bootstrap-s3-profiles-test.sh new file mode 100755 index 00000000000..660247fdbaf --- /dev/null +++ b/_unit-test/bootstrap-s3-profiles-test.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +source _unit-test/_test_setup.sh +source install/dc-detect-version.sh +source install/create-docker-volumes.sh +source install/ensure-files-from-examples.sh +export COMPOSE_PROFILES="feature-complete" +$dc pull vroom +source install/ensure-correct-permissions-profiles-dir.sh + +# Generate some random files on `sentry-vroom` volume for testing +$dc run --rm --no-deps -v sentry-vroom:/var/vroom/sentry-profiles --entrypoint /bin/bash vroom -c ' + for i in $(seq 1 1000); do + echo This is test file $i > /var/vroom/sentry-profiles/test_file_$i.txt + done +' + +# Set the flag to apply automatic updates +export APPLY_AUTOMATIC_CONFIG_UPDATES=1 + +# Here we're just gonna test to run it multiple times +# Only to make sure it doesn't break +for i in $(seq 1 5); do + source install/bootstrap-s3-profiles.sh +done + +# Ensure that the files have been migrated to SeaweedFS +migrated_files_count=$($dc exec seaweedfs s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=seaweedfs:8333 --host-bucket="seaweedfs:8333/%(bucket)" ls s3://profiles/ | wc -l) +if [[ "$migrated_files_count" -ne 1000 ]]; then + echo "Error: Expected 1000 migrated files, but found $migrated_files_count" + exit 1 +fi + +# Manual cleanup, otherwise `create-docker-volumes.sh` will fail +$dc down -v --remove-orphans + +report_success diff --git a/docker-compose.yml b/docker-compose.yml index bcc858c0b96..10cb21716b7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -752,7 +752,9 @@ services: environment: SENTRY_KAFKA_BROKERS_PROFILING: "kafka:9092" SENTRY_KAFKA_BROKERS_OCCURRENCES: "kafka:9092" - SENTRY_BUCKET_PROFILES: file:///var/vroom/sentry-profiles + SENTRY_BUCKET_PROFILES: "s3://profiles?region=us-east-1&endpoint=seaweedfs:8333&s3ForcePathStyle=true&disableSSL=true" + AWS_ACCESS_KEY: "sentry" + AWS_SECRET_KEY: "sentry" SENTRY_SNUBA_HOST: "http://snuba-api:1218" volumes: - sentry-vroom:/var/vroom/sentry-profiles diff --git a/install.sh b/install.sh index b31c9d9ee33..648f8184543 100755 --- a/install.sh +++ b/install.sh @@ -40,6 +40,7 @@ source install/bootstrap-s3-nodestore.sh source install/bootstrap-snuba.sh source install/upgrade-postgres.sh source install/ensure-correct-permissions-profiles-dir.sh +source install/bootstrap-s3-profiles.sh source install/set-up-and-migrate-database.sh source install/migrate-pgbouncer.sh source install/geoip.sh diff --git a/install/bootstrap-s3-nodestore.sh b/install/bootstrap-s3-nodestore.sh index 98d934b4461..8170131a873 100644 --- a/install/bootstrap-s3-nodestore.sh +++ b/install/bootstrap-s3-nodestore.sh @@ -1,6 +1,6 @@ echo "${_group}Bootstrapping seaweedfs (node store)..." -$dc up --wait seaweedfs postgres +start_service_and_wait_ready seaweedfs postgres $dc exec -e "HTTP_PROXY=${HTTP_PROXY:-}" -e "HTTPS_PROXY=${HTTPS_PROXY:-}" -e "NO_PROXY=${NO_PROXY:-}" -e "http_proxy=${http_proxy:-}" -e "https_proxy=${https_proxy:-}" -e "no_proxy=${no_proxy:-}" seaweedfs apk add --no-cache s3cmd $dc exec seaweedfs mkdir -p /data/idx/ s3cmd="$dc exec seaweedfs s3cmd" diff --git a/install/bootstrap-s3-profiles.sh b/install/bootstrap-s3-profiles.sh new file mode 100644 index 00000000000..326dce583dc --- /dev/null +++ b/install/bootstrap-s3-profiles.sh @@ -0,0 +1,118 @@ +# The purpose of this file is to have both `sentry`-based containers and `vroom` use the same bucket for profiling. +# On pre-25.10.0, we have a `sentry-vroom` volume which stores the profiling data however, since this version, +# the behavior changed, and `vroomrs` now ingests profiles directly. Both services must share the same bucket, +# but at the time of this writing, it's not possible because the `sentry-vroom` volume has ownership set to `vroom:vroom`. +# This prevents the `sentry`-based containers from performing read/write operations on that volume. +# +# Therefore, this script should do the following: +# 1. Check if there are any files inside the `sentry-vroom` volume. +# 2. If (1) finds files, copy those files into a "profiles" bucket on SeaweedFS. +# 3. Point `filestore-profiles` and vroom to the SeaweedFS "profiles" bucket. + +# Should only run when `$COMPOSE_PROFILES` is set to `feature-complete` +if [[ "$COMPOSE_PROFILES" == "feature-complete" ]]; then + echo "${_group}Bootstrapping seaweedfs (profiles)..." + + start_service_and_wait_ready seaweedfs + $dc exec -e "HTTP_PROXY=${HTTP_PROXY:-}" -e "HTTPS_PROXY=${HTTPS_PROXY:-}" -e "NO_PROXY=${NO_PROXY:-}" -e "http_proxy=${http_proxy:-}" -e "https_proxy=${https_proxy:-}" -e "no_proxy=${no_proxy:-}" seaweedfs apk add --no-cache s3cmd + s3cmd="$dc exec seaweedfs s3cmd" + + bucket_list=$($s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=localhost:8333 --host-bucket='localhost:8333/%(bucket)' ls) + + if [[ $(echo "$bucket_list" | tail -1 | awk '{print $3}') != 's3://profiles' ]]; then + apply_config_changes_profiles=0 + # Only touch if no existing profiles config is found + if ! grep -q "filestore.profiles-backend" $SENTRY_CONFIG_YML; then + if [[ -z "${APPLY_AUTOMATIC_CONFIG_UPDATES:-}" ]]; then + echo + echo "We are migrating the Profiles data directory from the 'sentry-vroom' volume to SeaweedFS." + echo "This migration will ensure profiles ingestion works correctly with the new 'vroomrs'" + echo "and allows both 'sentry' and 'vroom' to transition smoothly." + echo "To complete this, your sentry/config.yml file needs to be modified." + echo "Would you like us to perform this modification automatically?" + echo + + yn="" + until [ ! -z "$yn" ]; do + read -p "y or n? " yn + case $yn in + y | yes | 1) + export apply_config_changes_profiles=1 + echo + echo -n "Thank you." + ;; + n | no | 0) + export apply_config_changes_profiles=0 + echo + echo -n "Alright, you will need to update your sentry/config.yml file manually before running 'docker compose up'." + ;; + *) yn="" ;; + esac + done + + echo + echo "To avoid this prompt in the future, use one of these flags:" + echo + echo " --apply-automatic-config-updates" + echo " --no-apply-automatic-config-updates" + echo + echo "or set the APPLY_AUTOMATIC_CONFIG_UPDATES environment variable:" + echo + echo " APPLY_AUTOMATIC_CONFIG_UPDATES=1 to apply automatic updates" + echo " APPLY_AUTOMATIC_CONFIG_UPDATES=0 to not apply automatic updates" + echo + sleep 5 + fi + + if [[ "$APPLY_AUTOMATIC_CONFIG_UPDATES" == 1 || "$apply_config_changes_profiles" == 1 ]]; then + profiles_config=$(sed -n '/filestore.profiles-backend/,/s3v4"/{p}' sentry/config.example.yml) + echo "$profiles_config" >>$SENTRY_CONFIG_YML + fi + fi + + $s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=localhost:8333 --host-bucket='localhost:8333/%(bucket)' mb s3://profiles + + # Check if there are files in the sentry-vroom volume + start_service_and_wait_ready vroom + vroom_files_count=$($dc exec vroom sh -c "find /var/vroom/sentry-profiles -type f | wc -l") + if [[ "$vroom_files_count" -gt 0 ]]; then + echo "Migrating $vroom_files_count files from 'sentry-vroom' volume to 'profiles' bucket on SeaweedFS..." + + # Use a temporary container to copy files from the volume to SeaweedFS + + $dc exec -e "HTTP_PROXY=${HTTP_PROXY:-}" -e "HTTPS_PROXY=${HTTPS_PROXY:-}" -e "NO_PROXY=${NO_PROXY:-}" -e "http_proxy=${http_proxy:-}" -e "https_proxy=${https_proxy:-}" -e "no_proxy=${no_proxy:-}" -u root vroom sh -c 'mkdir -p /var/lib/apt/lists/partial && apt-get update && apt-get install -y --no-install-recommends s3cmd' + $dc exec vroom sh -c 's3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=seaweedfs:8333 --host-bucket="seaweedfs:8333/%(bucket)" sync /var/vroom/sentry-profiles/ s3://profiles/' + + echo "Migration completed." + else + echo "No files found in 'sentry-vroom' volume. Skipping files migration." + fi + else + echo "'profiles' bucket already exists on SeaweedFS. Skipping creation." + fi + + if [[ -z "${APPLY_AUTOMATIC_CONFIG_UPDATES:-}" || "$APPLY_AUTOMATIC_CONFIG_UPDATES" == 1 ]]; then + lifecycle_policy=$( + cat < + + + Sentry-Profiles-Rule + Enabled + + + $SENTRY_EVENT_RETENTION_DAYS + + + +EOF + ) + + $dc exec seaweedfs sh -c "printf '%s' '$lifecycle_policy' > /tmp/profiles-lifecycle-policy.xml" + $s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=localhost:8333 --host-bucket='localhost:8333/%(bucket)' setlifecycle /tmp/profiles-lifecycle-policy.xml s3://profiles + + echo "Making sure the bucket lifecycle policy is all set up correctly..." + $s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=localhost:8333 --host-bucket='localhost:8333/%(bucket)' getlifecycle s3://profiles + fi + echo "${_endgroup}" +fi diff --git a/sentry/config.example.yml b/sentry/config.example.yml index fefe9511b81..f1c3669d4f0 100644 --- a/sentry/config.example.yml +++ b/sentry/config.example.yml @@ -96,6 +96,18 @@ releasefile.cache-path: '/data/releasefile-cache' # secret_key: 'XXXXXXX' # bucket_name: 's3-bucket-name' +filestore.profiles-backend: 's3' +filestore.profiles-options: + bucket_acl: "private" + default_acl: "private" + access_key: "sentry" + secret_key: "sentry" + bucket_name: "profiles" + region_name: "us-east-1" + endpoint_url: "http://seaweedfs:8333" + addressing_style: "path" + signature_version: "s3v4" + symbolicator.enabled: true symbolicator.options: url: "http://symbolicator:3021"