Skip to content

Commit 5a3792f

Browse files
authored
Add deletion methods to clear data from TSDB. (#5317)
* Add deletion methods to abstract TSDB backend. * Add deletion methods to `RedisTSDB` * Add deletion methods to `DummyTSDB`. * Add deletion methods to `InMemoryTSDB`.
1 parent 603278a commit 5a3792f

File tree

5 files changed

+163
-17
lines changed

5 files changed

+163
-17
lines changed

src/sentry/tsdb/base.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from django.utils import timezone
1616
from enum import Enum
1717

18-
from sentry.utils.dates import to_timestamp
18+
from sentry.utils.dates import to_datetime, to_timestamp
1919

2020
ONE_MINUTE = 60
2121
ONE_HOUR = ONE_MINUTE * 60
@@ -181,6 +181,22 @@ def get_optimal_rollup_series(self, start, end=None, rollup=None):
181181

182182
return rollup, sorted(series)
183183

184+
def get_active_series(self, start=None, end=None, timestamp=None):
185+
rollups = {}
186+
for rollup, samples in self.rollups.items():
187+
_, series = self.get_optimal_rollup_series(
188+
start if start is not None else to_datetime(
189+
self.get_earliest_timestamp(
190+
rollup,
191+
timestamp=timestamp,
192+
),
193+
),
194+
end,
195+
rollup=rollup,
196+
)
197+
rollups[rollup] = map(to_datetime, series)
198+
return rollups
199+
184200
def calculate_expiry(self, rollup, samples, timestamp):
185201
"""
186202
Calculate the expiration time for a rollup.
@@ -232,6 +248,12 @@ def merge(self, model, destination, sources, timestamp=None):
232248
"""
233249
raise NotImplementedError
234250

251+
def delete(self, models, keys, start=None, end=None, timestamp=None):
252+
"""
253+
Delete all counters.
254+
"""
255+
raise NotImplementedError
256+
235257
def get_range(self, model, keys, start, end, rollup=None):
236258
"""
237259
To get a range of data for group ID=[1, 2, 3]:
@@ -311,6 +333,12 @@ def merge_distinct_counts(self, model, destination, sources, timestamp=None):
311333
"""
312334
raise NotImplementedError
313335

336+
def delete_distinct_counts(self, models, keys, start=None, end=None, timestamp=None):
337+
"""
338+
Delete all distinct counters.
339+
"""
340+
raise NotImplementedError
341+
314342
def record_frequency_multi(self, requests, timestamp=None):
315343
"""
316344
Record items in a frequency table.
@@ -380,3 +408,9 @@ def merge_frequencies(self, model, destination, sources, timestamp=None):
380408
key.
381409
"""
382410
raise NotImplementedError
411+
412+
def delete_frequencies(self, models, keys, start=None, end=None, timestamp=None):
413+
"""
414+
Delete all frequency tables.
415+
"""
416+
raise NotImplementedError

src/sentry/tsdb/dummy.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ def incr(self, model, key, timestamp=None, count=1):
2020
def merge(self, model, destination, sources, timestamp=None):
2121
pass
2222

23+
def delete(self, models, keys, start=None, end=None, timestamp=None):
24+
pass
25+
2326
def get_range(self, model, keys, start, end, rollup=None):
2427
_, series = self.get_optimal_rollup_series(start, end, rollup)
2528
return {k: [(ts, 0) for ts in series] for k in keys}
@@ -40,6 +43,9 @@ def get_distinct_counts_union(self, model, keys, start, end=None, rollup=None):
4043
def merge_distinct_counts(self, model, destination, sources, timestamp=None):
4144
pass
4245

46+
def delete_distinct_counts(self, models, keys, start=None, end=None, timestamp=None):
47+
pass
48+
4349
def record_frequency_multi(self, requests, timestamp=None):
4450
pass
4551

@@ -72,3 +78,6 @@ def get_frequency_totals(self, model, items, start, end=None, rollup=None):
7278

7379
def merge_frequencies(self, model, destination, sources, timestamp=None):
7480
pass
81+
82+
def delete_frequencies(self, models, keys, start=None, end=None, timestamp=None):
83+
pass

src/sentry/tsdb/inmemory.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,19 @@ def merge(self, model, destination, sources, timestamp=None):
4040
for bucket, count in self.data[model].pop(source, {}).items():
4141
destination[bucket] += count
4242

43+
def delete(self, models, keys, start=None, end=None, timestamp=None):
44+
rollups = self.get_active_series(start, end, timestamp)
45+
46+
for rollup, series in rollups.items():
47+
for model in models:
48+
for key in keys:
49+
data = self.data[model][key]
50+
for timestamp in series:
51+
data.pop(
52+
self.normalize_to_rollup(timestamp, rollup),
53+
0,
54+
)
55+
4356
def get_range(self, model, keys, start, end, rollup=None):
4457
rollup, series = self.get_optimal_rollup_series(start, end, rollup)
4558

@@ -112,6 +125,19 @@ def merge_distinct_counts(self, model, destination, sources, timestamp=None):
112125
for bucket, values in self.sets[model].pop(source, {}).items():
113126
destination[bucket].update(values)
114127

128+
def delete_distinct_counts(self, models, keys, start=None, end=None, timestamp=None):
129+
rollups = self.get_active_series(start, end, timestamp)
130+
131+
for rollup, series in rollups.items():
132+
for model in models:
133+
for key in keys:
134+
data = self.data[model][key]
135+
for timestamp in series:
136+
data.pop(
137+
self.normalize_to_rollup(timestamp, rollup),
138+
set(),
139+
)
140+
115141
def flush(self):
116142
# self.data[model][key][rollup] = count
117143
self.data = defaultdict(
@@ -198,3 +224,16 @@ def merge_frequencies(self, model, destination, sources, timestamp=None):
198224
for source in sources:
199225
for bucket, counter in self.data[model].pop(source, {}).items():
200226
destination[bucket].update(counter)
227+
228+
def delete_frequencies(self, models, keys, start=None, end=None, timestamp=None):
229+
rollups = self.get_active_series(start, end, timestamp)
230+
231+
for rollup, series in rollups.items():
232+
for model in models:
233+
for key in keys:
234+
data = self.data[model][key]
235+
for timestamp in series:
236+
data.pop(
237+
self.normalize_to_rollup(timestamp, rollup),
238+
Counter(),
239+
)

src/sentry/tsdb/redis.py

Lines changed: 48 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -214,14 +214,7 @@ def get_range(self, model, keys, start, end, rollup=None):
214214
return dict(results_by_key)
215215

216216
def merge(self, model, destination, sources, timestamp=None):
217-
rollups = {}
218-
for rollup, samples in self.rollups.items():
219-
_, series = self.get_optimal_rollup_series(
220-
to_datetime(self.get_earliest_timestamp(rollup, timestamp=timestamp)),
221-
end=None,
222-
rollup=rollup,
223-
)
224-
rollups[rollup] = map(to_datetime, series)
217+
rollups = self.get_active_series(timestamp=timestamp)
225218

226219
with self.cluster.map() as client:
227220
data = {}
@@ -265,6 +258,24 @@ def merge(self, model, destination, sources, timestamp=None):
265258
),
266259
)
267260

261+
def delete(self, models, keys, start=None, end=None, timestamp=None):
262+
rollups = self.get_active_series(start, end, timestamp)
263+
264+
with self.cluster.map() as client:
265+
for rollup, series in rollups.items():
266+
for timestamp in series:
267+
for model in models:
268+
for key in keys:
269+
model_key = self.get_model_key(key)
270+
client.hdel(
271+
self.make_counter_key(
272+
model,
273+
self.normalize_to_rollup(timestamp, rollup),
274+
model_key,
275+
),
276+
model_key,
277+
)
278+
268279
def record(self, model, key, values, timestamp=None):
269280
self.record_multi(((model, key, values),), timestamp)
270281

@@ -434,14 +445,7 @@ def merge_aggregates(values):
434445
)
435446

436447
def merge_distinct_counts(self, model, destination, sources, timestamp=None):
437-
rollups = {}
438-
for rollup, samples in self.rollups.items():
439-
_, series = self.get_optimal_rollup_series(
440-
to_datetime(self.get_earliest_timestamp(rollup, timestamp=timestamp)),
441-
end=None,
442-
rollup=rollup,
443-
)
444-
rollups[rollup] = map(to_datetime, series)
448+
rollups = self.get_active_series(timestamp=timestamp)
445449

446450
temporary_id = uuid.uuid1().hex
447451

@@ -499,6 +503,23 @@ def make_temporary_key(key):
499503
),
500504
)
501505

506+
def delete_distinct_counts(self, models, keys, start=None, end=None, timestamp=None):
507+
rollups = self.get_active_series(start, end, timestamp)
508+
509+
with self.cluster.map() as client:
510+
for rollup, series in rollups.items():
511+
for timestamp in series:
512+
for model in models:
513+
for key in keys:
514+
client.delete(
515+
self.make_key(
516+
model,
517+
rollup,
518+
to_timestamp(timestamp),
519+
key,
520+
)
521+
)
522+
502523
def make_frequency_table_keys(self, model, rollup, timestamp, key):
503524
prefix = self.make_key(model, rollup, timestamp, key)
504525
return map(
@@ -697,3 +718,14 @@ def merge_frequencies(self, model, destination, sources, timestamp=None):
697718
self.cluster.execute_commands({
698719
destination: imports,
699720
})
721+
722+
def delete_frequencies(self, models, keys, start=None, end=None, timestamp=None):
723+
rollups = self.get_active_series(start, end, timestamp)
724+
725+
with self.cluster.map() as client:
726+
for rollup, series in rollups.items():
727+
for timestamp in series:
728+
for model in models:
729+
for key in keys:
730+
for k in self.make_frequency_table_keys(model, rollup, to_timestamp(timestamp), key):
731+
client.delete(k)

tests/sentry/tsdb/test_redis.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,14 @@ def timestamp(d):
109109
2: 0,
110110
}
111111

112+
self.db.delete([TSDBModel.project], [1, 2], dts[0], dts[-1])
113+
114+
results = self.db.get_sums(TSDBModel.project, [1, 2], dts[0], dts[-1])
115+
assert results == {
116+
1: 0,
117+
2: 0,
118+
}
119+
112120
def test_count_distinct(self):
113121
now = datetime.utcnow().replace(tzinfo=pytz.UTC) - timedelta(hours=4)
114122
dts = [now + timedelta(hours=i) for i in range(4)]
@@ -211,6 +219,14 @@ def timestamp(d):
211219
assert self.db.get_distinct_counts_union(model, [1, 2], dts[0], dts[-1], rollup=3600) == 3
212220
assert self.db.get_distinct_counts_union(model, [2], dts[0], dts[-1], rollup=3600) == 0
213221

222+
self.db.delete_distinct_counts([model], [1, 2], dts[0], dts[-1])
223+
224+
results = self.db.get_distinct_counts_totals(model, [1, 2], dts[0], dts[-1])
225+
assert results == {
226+
1: 0,
227+
2: 0,
228+
}
229+
214230
def test_frequency_tables(self):
215231
now = datetime.utcnow().replace(tzinfo=pytz.UTC)
216232
model = TSDBModel.frequent_projects_by_organization
@@ -386,6 +402,22 @@ def test_frequency_tables(self):
386402
},
387403
}
388404

405+
self.db.delete_frequencies(
406+
[model],
407+
['organization:1', 'organization:2'],
408+
now - timedelta(hours=1),
409+
now,
410+
)
411+
412+
assert self.db.get_most_frequent(
413+
model,
414+
('organization:1', 'organization:2'),
415+
now,
416+
) == {
417+
'organization:1': [],
418+
'organization:2': [],
419+
}
420+
389421
def test_frequency_table_import_export_no_estimators(self):
390422
client = self.db.cluster.get_local_client_for_key('key')
391423

0 commit comments

Comments
 (0)