Skip to content

Commit 4d8b2f5

Browse files
committed
Polished ansible, added all configs for each component.
1 parent 6f815af commit 4d8b2f5

File tree

16 files changed

+547
-48
lines changed

16 files changed

+547
-48
lines changed

infra/ansible/playbooks/caddy.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
ansible_ssh_user: "{{ admin_user }}"
4949
caddy_metrics_url: "{{ lookup('ini', 'caddy_metrics_url', file=ini_file) }}"
5050
caddy_telemetry_url: "{{ lookup('ini', 'caddy_telemetry_url', file=ini_file) }}"
51-
caddy_tailscale_url: "{{ lookup('ini', 'caddy_tailscale_url', file=ini_file) }}"
51+
caddy_jaeger_url: "{{ lookup('ini', 'caddy_jaeger_url', file=ini_file) }}"
5252

5353
- name: Enable caddy
5454
become: true

infra/ansible/playbooks/cassandra.yaml

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,15 @@
1111
register: cassandra_exists
1212

1313
- name: Install java
14-
when: not cassandra_exists.stat.exists
1514
apt:
16-
pkg:
17-
- default-jre
15+
pkg: openjdk-17-jre
1816

1917
- name: Create cassandra group
20-
when: not cassandra_exists.stat.exists
2118
group:
2219
name: cassandra
2320
system: yes
2421

2522
- name: Create cassandra user
26-
when: not cassandra_exists.stat.exists
2723
user:
2824
name: cassandra
2925
group: cassandra
@@ -55,8 +51,23 @@
5551
extra_opts:
5652
- --strip-components=1
5753

54+
- name: Add /opt/cassandra/bin to PATH
55+
lineinfile:
56+
path: /home/{{ ansible_user }}/.bashrc
57+
line: PATH=/opt/cassandra/bin:$PATH
58+
state: present
59+
5860
- name: Create Cassandra systemd service
59-
when: not cassandra_exists.stat.exists
6061
template:
6162
src: services/cassandra.service.j2
6263
dest: /etc/systemd/system/cassandra.service
64+
vars:
65+
cassandra_telemetry_user: "{{ lookup('ini', 'cassandra_telemetry_user', file=ini_file) }}"
66+
cassandra_telemetry_pass: "{{ lookup('ini', 'cassandra_telemetry_pass', file=ini_file) }}"
67+
68+
- name: Start Cassandra
69+
systemd_service:
70+
name: cassandra
71+
state: started
72+
daemon_reload: true
73+
enabled: true
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright (c) 2024 The Jaeger Authors.
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
function usage {
7+
>&2 echo "Error: $1"
8+
>&2 echo ""
9+
>&2 echo "Usage: MODE=(prod|test) [PARAM=value ...] $0 [template-file] | cqlsh"
10+
>&2 echo ""
11+
>&2 echo "The following parameters can be set via environment:"
12+
>&2 echo " MODE - prod or test. Test keyspace is usable on a single node cluster (no replication)"
13+
>&2 echo " DATACENTER - datacenter name for network topology used in prod (optional in MODE=test)"
14+
>&2 echo " TRACE_TTL - time to live for trace data, in seconds (default: 172800, 2 days)"
15+
>&2 echo " DEPENDENCIES_TTL - time to live for dependencies data, in seconds (default: 0, no TTL)"
16+
>&2 echo " KEYSPACE - keyspace (default: jaeger_v1_{datacenter})"
17+
>&2 echo " REPLICATION_FACTOR - replication factor for prod (default: 2 for prod, 1 for test)"
18+
>&2 echo " VERSION - Cassandra backend version, 3 or 4 (default: 4). Ignored if template is provided."
19+
>&2 echo ""
20+
>&2 echo "The template-file argument must be fully qualified path to a v00#.cql.tmpl template file."
21+
>&2 echo "If omitted, the template file with the highest available version will be used."
22+
exit 1
23+
}
24+
25+
trace_ttl=${TRACE_TTL:-172800}
26+
dependencies_ttl=${DEPENDENCIES_TTL:-0}
27+
cas_version=${VERSION:-4}
28+
29+
template=$1
30+
if [[ "$template" == "" ]]; then
31+
case "$cas_version" in
32+
3)
33+
template=$(dirname $0)/v003.cql.tmpl
34+
;;
35+
4)
36+
template=$(dirname $0)/v004.cql.tmpl
37+
;;
38+
5)
39+
template=$(dirname $0)/v004.cql.tmpl
40+
;;
41+
*)
42+
template=$(ls $(dirname $0)/*cql.tmpl | sort | tail -1)
43+
;;
44+
esac
45+
fi
46+
47+
if [[ "$MODE" == "" ]]; then
48+
usage "missing MODE parameter"
49+
elif [[ "$MODE" == "prod" ]]; then
50+
if [[ "$DATACENTER" == "" ]]; then usage "missing DATACENTER parameter for prod mode"; fi
51+
datacenter=$DATACENTER
52+
replication_factor=${REPLICATION_FACTOR:-2}
53+
replication="{'class': 'NetworkTopologyStrategy', '$datacenter': '${replication_factor}' }"
54+
elif [[ "$MODE" == "test" ]]; then
55+
datacenter=${DATACENTER:-'test'}
56+
replication_factor=${REPLICATION_FACTOR:-1}
57+
replication="{'class': 'SimpleStrategy', 'replication_factor': '${replication_factor}'}"
58+
else
59+
usage "invalid MODE=$MODE, expecting 'prod' or 'test'"
60+
fi
61+
62+
keyspace=${KEYSPACE:-"jaeger_v1_${datacenter}"}
63+
64+
if [[ $keyspace =~ [^a-zA-Z0-9_] ]]; then
65+
usage "invalid characters in KEYSPACE=$keyspace parameter, please use letters, digits or underscores"
66+
fi
67+
68+
if [ ! -z "$COMPACTION_WINDOW" ]; then
69+
if echo "$COMPACTION_WINDOW" | grep -E -q '^[0-9]+[mhd]$'; then
70+
compaction_window_size="$(echo "$COMPACTION_WINDOW" | sed 's/[mhd]//')"
71+
compaction_window_unit="$(echo "$COMPACTION_WINDOW" | sed 's/[0-9]//g')"
72+
else
73+
usage "Invalid compaction window size format. Please use numeric value followed by 'm' for minutes, 'h' for hours, or 'd' for days."
74+
fi
75+
else
76+
trace_ttl_minutes=$(($trace_ttl / 60))
77+
# Taking the ceiling of the result
78+
compaction_window_size=$((($trace_ttl_minutes + 30 - 1) / 30))
79+
compaction_window_unit="m"
80+
fi
81+
82+
case "$compaction_window_unit" in
83+
m) compaction_window_unit="MINUTES" ;;
84+
h) compaction_window_unit="HOURS" ;;
85+
d) compaction_window_unit="DAYS" ;;
86+
esac
87+
88+
>&2 cat <<EOF
89+
Using template file $template with parameters:
90+
mode = $MODE
91+
datacenter = $datacenter
92+
keyspace = $keyspace
93+
replication = ${replication}
94+
trace_ttl = ${trace_ttl}
95+
dependencies_ttl = ${dependencies_ttl}
96+
compaction_window_size = ${compaction_window_size}
97+
compaction_window_unit = ${compaction_window_unit}
98+
EOF
99+
100+
# strip out comments, collapse multiple adjacent empty lines (cat -s), substitute variables
101+
cat $template | sed \
102+
-e 's/--.*$//g' \
103+
-e 's/^\s*$//g' \
104+
-e "s/\${keyspace}/${keyspace}/g" \
105+
-e "s/\${replication}/${replication}/g" \
106+
-e "s/\${trace_ttl}/${trace_ttl}/g" \
107+
-e "s/\${dependencies_ttl}/${dependencies_ttl}/g" \
108+
-e "s/\${compaction_window_size}/${compaction_window_size}/g" \
109+
-e "s/\${compaction_window_unit}/${compaction_window_unit}/g" | cat -s
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
--
2+
-- Creates Cassandra keyspace with tables for traces and dependencies.
3+
--
4+
-- Required parameters:
5+
--
6+
-- keyspace
7+
-- name of the keyspace
8+
-- replication
9+
-- replication strategy for the keyspace, such as
10+
-- for prod environments
11+
-- {'class': 'NetworkTopologyStrategy', '$datacenter': '${replication_factor}' }
12+
-- for test environments
13+
-- {'class': 'SimpleStrategy', 'replication_factor': '1'}
14+
-- trace_ttl
15+
-- default time to live for trace data, in seconds
16+
-- dependencies_ttl
17+
-- default time to live for dependencies data, in seconds (0 for no TTL)
18+
--
19+
-- Non-configurable settings:
20+
-- gc_grace_seconds is non-zero, see: http://www.uberobert.com/cassandra_gc_grace_disables_hinted_handoff/
21+
-- For TTL of 2 days, compaction window is 1 hour, rule of thumb here: http://thelastpickle.com/blog/2016/12/08/TWCS-part1.html
22+
23+
CREATE KEYSPACE IF NOT EXISTS ${keyspace} WITH replication = ${replication};
24+
25+
CREATE TYPE IF NOT EXISTS ${keyspace}.keyvalue (
26+
key text,
27+
value_type text,
28+
value_string text,
29+
value_bool boolean,
30+
value_long bigint,
31+
value_double double,
32+
value_binary blob
33+
);
34+
35+
CREATE TYPE IF NOT EXISTS ${keyspace}.log (
36+
ts bigint, -- microseconds since epoch
37+
fields frozen<list<frozen<${keyspace}.keyvalue>>>
38+
);
39+
40+
CREATE TYPE IF NOT EXISTS ${keyspace}.span_ref (
41+
ref_type text,
42+
trace_id blob,
43+
span_id bigint
44+
);
45+
46+
CREATE TYPE IF NOT EXISTS ${keyspace}.process (
47+
service_name text,
48+
tags frozen<list<frozen<${keyspace}.keyvalue>>>
49+
);
50+
51+
-- Notice we have span_hash. This exists only for zipkin backwards compat. Zipkin allows spans with the same ID.
52+
-- Note: Cassandra re-orders non-PK columns alphabetically, so the table looks differently in CQLSH "describe table".
53+
-- start_time is bigint instead of timestamp as we require microsecond precision
54+
CREATE TABLE IF NOT EXISTS ${keyspace}.traces (
55+
trace_id blob,
56+
span_id bigint,
57+
span_hash bigint,
58+
parent_id bigint,
59+
operation_name text,
60+
flags int,
61+
start_time bigint, -- microseconds since epoch
62+
duration bigint, -- microseconds
63+
tags list<frozen<keyvalue>>,
64+
logs list<frozen<log>>,
65+
refs list<frozen<span_ref>>,
66+
process frozen<process>,
67+
PRIMARY KEY (trace_id, span_id, span_hash)
68+
)
69+
WITH compaction = {
70+
'compaction_window_size': '${compaction_window_size}',
71+
'compaction_window_unit': '${compaction_window_unit}',
72+
'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy'
73+
}
74+
AND default_time_to_live = ${trace_ttl}
75+
AND speculative_retry = 'NONE'
76+
AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
77+
78+
CREATE TABLE IF NOT EXISTS ${keyspace}.service_names (
79+
service_name text,
80+
PRIMARY KEY (service_name)
81+
)
82+
WITH compaction = {
83+
'min_threshold': '4',
84+
'max_threshold': '32',
85+
'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy'
86+
}
87+
AND default_time_to_live = ${trace_ttl}
88+
AND speculative_retry = 'NONE'
89+
AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
90+
91+
CREATE TABLE IF NOT EXISTS ${keyspace}.operation_names_v2 (
92+
service_name text,
93+
span_kind text,
94+
operation_name text,
95+
PRIMARY KEY ((service_name), span_kind, operation_name)
96+
)
97+
WITH compaction = {
98+
'min_threshold': '4',
99+
'max_threshold': '32',
100+
'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy'
101+
}
102+
AND default_time_to_live = ${trace_ttl}
103+
AND speculative_retry = 'NONE'
104+
AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
105+
106+
-- index of trace IDs by service + operation names, sorted by span start_time.
107+
CREATE TABLE IF NOT EXISTS ${keyspace}.service_operation_index (
108+
service_name text,
109+
operation_name text,
110+
start_time bigint, -- microseconds since epoch
111+
trace_id blob,
112+
PRIMARY KEY ((service_name, operation_name), start_time)
113+
) WITH CLUSTERING ORDER BY (start_time DESC)
114+
AND compaction = {
115+
'compaction_window_size': '1',
116+
'compaction_window_unit': 'HOURS',
117+
'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy'
118+
}
119+
AND default_time_to_live = ${trace_ttl}
120+
AND speculative_retry = 'NONE'
121+
AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
122+
123+
CREATE TABLE IF NOT EXISTS ${keyspace}.service_name_index (
124+
service_name text,
125+
bucket int,
126+
start_time bigint, -- microseconds since epoch
127+
trace_id blob,
128+
PRIMARY KEY ((service_name, bucket), start_time)
129+
) WITH CLUSTERING ORDER BY (start_time DESC)
130+
AND compaction = {
131+
'compaction_window_size': '1',
132+
'compaction_window_unit': 'HOURS',
133+
'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy'
134+
}
135+
AND default_time_to_live = ${trace_ttl}
136+
AND speculative_retry = 'NONE'
137+
AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
138+
139+
CREATE TABLE IF NOT EXISTS ${keyspace}.duration_index (
140+
service_name text, -- service name
141+
operation_name text, -- operation name, or blank for queries without span name
142+
bucket timestamp, -- time bucket, - the start_time of the given span rounded to an hour
143+
duration bigint, -- span duration, in microseconds
144+
start_time bigint, -- microseconds since epoch
145+
trace_id blob,
146+
PRIMARY KEY ((service_name, operation_name, bucket), duration, start_time, trace_id)
147+
) WITH CLUSTERING ORDER BY (duration DESC, start_time DESC)
148+
AND compaction = {
149+
'compaction_window_size': '1',
150+
'compaction_window_unit': 'HOURS',
151+
'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy'
152+
}
153+
AND default_time_to_live = ${trace_ttl}
154+
AND speculative_retry = 'NONE'
155+
AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
156+
157+
-- a bucketing strategy may have to be added for tag queries
158+
-- we can make this table even better by adding a timestamp to it
159+
CREATE TABLE IF NOT EXISTS ${keyspace}.tag_index (
160+
service_name text,
161+
tag_key text,
162+
tag_value text,
163+
start_time bigint, -- microseconds since epoch
164+
trace_id blob,
165+
span_id bigint,
166+
PRIMARY KEY ((service_name, tag_key, tag_value), start_time, trace_id, span_id)
167+
)
168+
WITH CLUSTERING ORDER BY (start_time DESC)
169+
AND compaction = {
170+
'compaction_window_size': '1',
171+
'compaction_window_unit': 'HOURS',
172+
'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy'
173+
}
174+
AND default_time_to_live = ${trace_ttl}
175+
AND speculative_retry = 'NONE'
176+
AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
177+
178+
CREATE TYPE IF NOT EXISTS ${keyspace}.dependency (
179+
parent text,
180+
child text,
181+
call_count bigint,
182+
source text
183+
);
184+
185+
-- compaction strategy is intentionally different as compared to other tables due to the size of dependencies data
186+
CREATE TABLE IF NOT EXISTS ${keyspace}.dependencies_v2 (
187+
ts_bucket timestamp,
188+
ts timestamp,
189+
dependencies list<frozen<dependency>>,
190+
PRIMARY KEY (ts_bucket, ts)
191+
) WITH CLUSTERING ORDER BY (ts DESC)
192+
AND compaction = {
193+
'min_threshold': '4',
194+
'max_threshold': '32',
195+
'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy'
196+
}
197+
AND default_time_to_live = ${dependencies_ttl};
198+
199+
-- adaptive sampling tables
200+
-- ./plugin/storage/cassandra/samplingstore/storage.go
201+
CREATE TABLE IF NOT EXISTS ${keyspace}.operation_throughput (
202+
bucket int,
203+
ts timeuuid,
204+
throughput text,
205+
PRIMARY KEY(bucket, ts)
206+
) WITH CLUSTERING ORDER BY (ts desc);
207+
208+
CREATE TABLE IF NOT EXISTS ${keyspace}.sampling_probabilities (
209+
bucket int,
210+
ts timeuuid,
211+
hostname text,
212+
probabilities text,
213+
PRIMARY KEY(bucket, ts)
214+
) WITH CLUSTERING ORDER BY (ts desc);
215+
216+
-- distributed lock
217+
-- ./plugin/pkg/distributedlock/cassandra/lock.go
218+
CREATE TABLE IF NOT EXISTS ${keyspace}.leases (
219+
name text,
220+
owner text,
221+
PRIMARY KEY (name)
222+
);

0 commit comments

Comments
 (0)