Skip to content

Commit 84ebc58

Browse files
Zylphrexaldy505BYK
authored
fix(profiling): Ingest profile file path (#4060)
* fix(profiling): Ingest profile file path `ingest-profiles` is now using vroomrs to ingest profiles instead of writing through vroom. For self-hosted, we need to make sure filestore for profiles is properly configured so vroom can find the ingested profiles. * feat: move profiling data to seaweedfs * feat: review from Sentry * Apply suggestions from code review Co-authored-by: Burak Yigit Kaya <[email protected]> * ref: volume migration tests * ref: execute file creation from vroom container * fix: brainfart * debug * hack * more debug * now I know what I'm missing out * explicitly state feature complete * try to pull vroom image * should only run when COMPOSE_PROFILES is feature complete * using run invoked weed instead of empty shell * execute the upload script from vroom container * execute apt command as root * gonna sleep * missing endgroup * missing sh * directly execute s3cmd and do 'wc' outside out the container * why did other test start failing * manual cleanup * vroom is not a persistent volume * what about not removing the seaweed volume --------- Co-authored-by: Reinaldy Rafli <[email protected]> Co-authored-by: Burak Yigit Kaya <[email protected]>
1 parent 5a670d1 commit 84ebc58

File tree

7 files changed

+172
-2
lines changed

7 files changed

+172
-2
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/usr/bin/env bash
2+
3+
source _unit-test/_test_setup.sh
4+
source install/dc-detect-version.sh
5+
source install/create-docker-volumes.sh
6+
source install/ensure-files-from-examples.sh
7+
export COMPOSE_PROFILES="feature-complete"
8+
$dc pull vroom
9+
source install/ensure-correct-permissions-profiles-dir.sh
10+
11+
# Generate some random files on `sentry-vroom` volume for testing
12+
$dc run --rm --no-deps -v sentry-vroom:/var/vroom/sentry-profiles --entrypoint /bin/bash vroom -c '
13+
for i in $(seq 1 1000); do
14+
echo This is test file $i > /var/vroom/sentry-profiles/test_file_$i.txt
15+
done
16+
'
17+
18+
# Set the flag to apply automatic updates
19+
export APPLY_AUTOMATIC_CONFIG_UPDATES=1
20+
21+
# Here we're just gonna test to run it multiple times
22+
# Only to make sure it doesn't break
23+
for i in $(seq 1 5); do
24+
source install/bootstrap-s3-profiles.sh
25+
done
26+
27+
# Ensure that the files have been migrated to SeaweedFS
28+
migrated_files_count=$($dc exec seaweedfs s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=seaweedfs:8333 --host-bucket="seaweedfs:8333/%(bucket)" ls s3://profiles/ | wc -l)
29+
if [[ "$migrated_files_count" -ne 1000 ]]; then
30+
echo "Error: Expected 1000 migrated files, but found $migrated_files_count"
31+
exit 1
32+
fi
33+
34+
# Manual cleanup, otherwise `create-docker-volumes.sh` will fail
35+
$dc down -v --remove-orphans
36+
37+
report_success

docker-compose.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,9 @@ services:
752752
environment:
753753
SENTRY_KAFKA_BROKERS_PROFILING: "kafka:9092"
754754
SENTRY_KAFKA_BROKERS_OCCURRENCES: "kafka:9092"
755-
SENTRY_BUCKET_PROFILES: file:///var/vroom/sentry-profiles
755+
SENTRY_BUCKET_PROFILES: "s3://profiles?region=us-east-1&endpoint=seaweedfs:8333&s3ForcePathStyle=true&disableSSL=true"
756+
AWS_ACCESS_KEY: "sentry"
757+
AWS_SECRET_KEY: "sentry"
756758
SENTRY_SNUBA_HOST: "http://snuba-api:1218"
757759
volumes:
758760
- sentry-vroom:/var/vroom/sentry-profiles

install.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ source install/bootstrap-s3-nodestore.sh
4040
source install/bootstrap-snuba.sh
4141
source install/upgrade-postgres.sh
4242
source install/ensure-correct-permissions-profiles-dir.sh
43+
source install/bootstrap-s3-profiles.sh
4344
source install/set-up-and-migrate-database.sh
4445
source install/migrate-pgbouncer.sh
4546
source install/geoip.sh

install/bootstrap-s3-nodestore.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
echo "${_group}Bootstrapping seaweedfs (node store)..."
22

3-
$dc up --wait seaweedfs postgres
3+
start_service_and_wait_ready seaweedfs postgres
44
$dc exec -e "HTTP_PROXY=${HTTP_PROXY:-}" -e "HTTPS_PROXY=${HTTPS_PROXY:-}" -e "NO_PROXY=${NO_PROXY:-}" -e "http_proxy=${http_proxy:-}" -e "https_proxy=${https_proxy:-}" -e "no_proxy=${no_proxy:-}" seaweedfs apk add --no-cache s3cmd
55
$dc exec seaweedfs mkdir -p /data/idx/
66
s3cmd="$dc exec seaweedfs s3cmd"

install/bootstrap-s3-profiles.sh

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# The purpose of this file is to have both `sentry`-based containers and `vroom` use the same bucket for profiling.
2+
# On pre-25.10.0, we have a `sentry-vroom` volume which stores the profiling data however, since this version,
3+
# the behavior changed, and `vroomrs` now ingests profiles directly. Both services must share the same bucket,
4+
# but at the time of this writing, it's not possible because the `sentry-vroom` volume has ownership set to `vroom:vroom`.
5+
# This prevents the `sentry`-based containers from performing read/write operations on that volume.
6+
#
7+
# Therefore, this script should do the following:
8+
# 1. Check if there are any files inside the `sentry-vroom` volume.
9+
# 2. If (1) finds files, copy those files into a "profiles" bucket on SeaweedFS.
10+
# 3. Point `filestore-profiles` and vroom to the SeaweedFS "profiles" bucket.
11+
12+
# Should only run when `$COMPOSE_PROFILES` is set to `feature-complete`
13+
if [[ "$COMPOSE_PROFILES" == "feature-complete" ]]; then
14+
echo "${_group}Bootstrapping seaweedfs (profiles)..."
15+
16+
start_service_and_wait_ready seaweedfs
17+
$dc exec -e "HTTP_PROXY=${HTTP_PROXY:-}" -e "HTTPS_PROXY=${HTTPS_PROXY:-}" -e "NO_PROXY=${NO_PROXY:-}" -e "http_proxy=${http_proxy:-}" -e "https_proxy=${https_proxy:-}" -e "no_proxy=${no_proxy:-}" seaweedfs apk add --no-cache s3cmd
18+
s3cmd="$dc exec seaweedfs s3cmd"
19+
20+
bucket_list=$($s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=localhost:8333 --host-bucket='localhost:8333/%(bucket)' ls)
21+
22+
if [[ $(echo "$bucket_list" | tail -1 | awk '{print $3}') != 's3://profiles' ]]; then
23+
apply_config_changes_profiles=0
24+
# Only touch if no existing profiles config is found
25+
if ! grep -q "filestore.profiles-backend" $SENTRY_CONFIG_YML; then
26+
if [[ -z "${APPLY_AUTOMATIC_CONFIG_UPDATES:-}" ]]; then
27+
echo
28+
echo "We are migrating the Profiles data directory from the 'sentry-vroom' volume to SeaweedFS."
29+
echo "This migration will ensure profiles ingestion works correctly with the new 'vroomrs'"
30+
echo "and allows both 'sentry' and 'vroom' to transition smoothly."
31+
echo "To complete this, your sentry/config.yml file needs to be modified."
32+
echo "Would you like us to perform this modification automatically?"
33+
echo
34+
35+
yn=""
36+
until [ ! -z "$yn" ]; do
37+
read -p "y or n? " yn
38+
case $yn in
39+
y | yes | 1)
40+
export apply_config_changes_profiles=1
41+
echo
42+
echo -n "Thank you."
43+
;;
44+
n | no | 0)
45+
export apply_config_changes_profiles=0
46+
echo
47+
echo -n "Alright, you will need to update your sentry/config.yml file manually before running 'docker compose up'."
48+
;;
49+
*) yn="" ;;
50+
esac
51+
done
52+
53+
echo
54+
echo "To avoid this prompt in the future, use one of these flags:"
55+
echo
56+
echo " --apply-automatic-config-updates"
57+
echo " --no-apply-automatic-config-updates"
58+
echo
59+
echo "or set the APPLY_AUTOMATIC_CONFIG_UPDATES environment variable:"
60+
echo
61+
echo " APPLY_AUTOMATIC_CONFIG_UPDATES=1 to apply automatic updates"
62+
echo " APPLY_AUTOMATIC_CONFIG_UPDATES=0 to not apply automatic updates"
63+
echo
64+
sleep 5
65+
fi
66+
67+
if [[ "$APPLY_AUTOMATIC_CONFIG_UPDATES" == 1 || "$apply_config_changes_profiles" == 1 ]]; then
68+
profiles_config=$(sed -n '/filestore.profiles-backend/,/s3v4"/{p}' sentry/config.example.yml)
69+
echo "$profiles_config" >>$SENTRY_CONFIG_YML
70+
fi
71+
fi
72+
73+
$s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=localhost:8333 --host-bucket='localhost:8333/%(bucket)' mb s3://profiles
74+
75+
# Check if there are files in the sentry-vroom volume
76+
start_service_and_wait_ready vroom
77+
vroom_files_count=$($dc exec vroom sh -c "find /var/vroom/sentry-profiles -type f | wc -l")
78+
if [[ "$vroom_files_count" -gt 0 ]]; then
79+
echo "Migrating $vroom_files_count files from 'sentry-vroom' volume to 'profiles' bucket on SeaweedFS..."
80+
81+
# Use a temporary container to copy files from the volume to SeaweedFS
82+
83+
$dc exec -e "HTTP_PROXY=${HTTP_PROXY:-}" -e "HTTPS_PROXY=${HTTPS_PROXY:-}" -e "NO_PROXY=${NO_PROXY:-}" -e "http_proxy=${http_proxy:-}" -e "https_proxy=${https_proxy:-}" -e "no_proxy=${no_proxy:-}" -u root vroom sh -c 'mkdir -p /var/lib/apt/lists/partial && apt-get update && apt-get install -y --no-install-recommends s3cmd'
84+
$dc exec vroom sh -c 's3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=seaweedfs:8333 --host-bucket="seaweedfs:8333/%(bucket)" sync /var/vroom/sentry-profiles/ s3://profiles/'
85+
86+
echo "Migration completed."
87+
else
88+
echo "No files found in 'sentry-vroom' volume. Skipping files migration."
89+
fi
90+
else
91+
echo "'profiles' bucket already exists on SeaweedFS. Skipping creation."
92+
fi
93+
94+
if [[ -z "${APPLY_AUTOMATIC_CONFIG_UPDATES:-}" || "$APPLY_AUTOMATIC_CONFIG_UPDATES" == 1 ]]; then
95+
lifecycle_policy=$(
96+
cat <<EOF
97+
<?xml version="1.0" encoding="UTF-8"?>
98+
<LifecycleConfiguration>
99+
<Rule>
100+
<ID>Sentry-Profiles-Rule</ID>
101+
<Status>Enabled</Status>
102+
<Filter></Filter>
103+
<Expiration>
104+
<Days>$SENTRY_EVENT_RETENTION_DAYS</Days>
105+
</Expiration>
106+
</Rule>
107+
</LifecycleConfiguration>
108+
EOF
109+
)
110+
111+
$dc exec seaweedfs sh -c "printf '%s' '$lifecycle_policy' > /tmp/profiles-lifecycle-policy.xml"
112+
$s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=localhost:8333 --host-bucket='localhost:8333/%(bucket)' setlifecycle /tmp/profiles-lifecycle-policy.xml s3://profiles
113+
114+
echo "Making sure the bucket lifecycle policy is all set up correctly..."
115+
$s3cmd --access_key=sentry --secret_key=sentry --no-ssl --region=us-east-1 --host=localhost:8333 --host-bucket='localhost:8333/%(bucket)' getlifecycle s3://profiles
116+
fi
117+
echo "${_endgroup}"
118+
fi

sentry/config.example.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,18 @@ releasefile.cache-path: '/data/releasefile-cache'
9696
# secret_key: 'XXXXXXX'
9797
# bucket_name: 's3-bucket-name'
9898

99+
filestore.profiles-backend: 's3'
100+
filestore.profiles-options:
101+
bucket_acl: "private"
102+
default_acl: "private"
103+
access_key: "sentry"
104+
secret_key: "sentry"
105+
bucket_name: "profiles"
106+
region_name: "us-east-1"
107+
endpoint_url: "http://seaweedfs:8333"
108+
addressing_style: "path"
109+
signature_version: "s3v4"
110+
99111
symbolicator.enabled: true
100112
symbolicator.options:
101113
url: "http://symbolicator:3021"

0 commit comments

Comments
 (0)