Skip to content

Commit fa8b499

Browse files
authored
Implement event deduplication (#650)
1 parent f0a35fe commit fa8b499

File tree

9 files changed

+200
-11
lines changed

9 files changed

+200
-11
lines changed

lib/sentry/application.ex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ defmodule Sentry.Application do
2020
end
2121

2222
children =
23-
[{Registry, keys: :unique, name: Sentry.Transport.SenderRegistry}] ++
23+
[{Registry, keys: :unique, name: Sentry.Transport.SenderRegistry}, Sentry.Dedupe] ++
2424
maybe_http_client_spec ++
2525
[Sentry.Transport.SenderPool]
2626

lib/sentry/client.ex

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ defmodule Sentry.Client do
55
# and sampling.
66
# See https://develop.sentry.dev/sdk/unified-api/#client.
77

8-
alias Sentry.{Config, Envelope, Event, Interfaces, Transport}
8+
alias Sentry.{Config, Dedupe, Envelope, Event, Interfaces, Transport}
99

1010
require Logger
1111

@@ -31,7 +31,8 @@ defmodule Sentry.Client do
3131

3232
result =
3333
with {:ok, %Event{} = event} <- maybe_call_before_send(event, before_send),
34-
:ok <- sample_event(sample_rate) do
34+
:ok <- sample_event(sample_rate),
35+
:ok <- maybe_dedupe(event) do
3536
send_result = encode_and_send(event, result_type, client, request_retries)
3637
_ignored = maybe_call_after_send(event, send_result, after_send_event)
3738
send_result
@@ -64,6 +65,21 @@ defmodule Sentry.Client do
6465
end
6566
end
6667

68+
defp maybe_dedupe(%Event{} = event) do
69+
if Config.dedup_events?() do
70+
case Dedupe.insert(event) do
71+
:new ->
72+
:ok
73+
74+
:existing ->
75+
log("Event dropped due to being a duplicate of a previously-captured event.")
76+
:excluded
77+
end
78+
else
79+
:ok
80+
end
81+
end
82+
6783
defp maybe_call_before_send(event, nil) do
6884
{:ok, event}
6985
end
@@ -228,9 +244,10 @@ defmodule Sentry.Client do
228244
nil
229245
end
230246

231-
if message do
232-
level = Config.log_level()
233-
Logger.log(level, fn -> ["Failed to send Sentry event. ", message] end, domain: [:sentry])
234-
end
247+
if message, do: log(fn -> ["Failed to send Sentry event. ", message] end)
248+
end
249+
250+
defp log(message) do
251+
Logger.log(Config.log_level(), message, domain: [:sentry])
235252
end
236253
end

lib/sentry/config.ex

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,18 @@ defmodule Sentry.Config do
129129
behaviour. Defaults to `Sentry.DefaultEventFilter`. See the
130130
[*Filtering Exceptions* section](#module-filtering-exceptions) below.
131131
"""
132+
],
133+
dedup_events: [
134+
type: :boolean,
135+
default: true,
136+
doc: """
137+
Whether to **deduplicate** events before reporting them to Sentry. If this option is `true`,
138+
then the SDK will store reported events for around 30 seconds after they're reported.
139+
Any time the SDK is about to report an event, it will check if it has already reported
140+
within the past 30 seconds. If it has, then it will not report the event again, and will
141+
log a message instead. Events are deduplicated by comparing their message, exception,
142+
stacktrace, and fingerprint. *Available since v10.0.0*.
143+
"""
132144
]
133145
]
134146

@@ -443,6 +455,9 @@ defmodule Sentry.Config do
443455
@spec max_breadcrumbs() :: non_neg_integer()
444456
def max_breadcrumbs, do: fetch!(:max_breadcrumbs)
445457

458+
@spec dedup_events?() :: boolean()
459+
def dedup_events?, do: fetch!(:dedup_events)
460+
446461
@spec put_config(atom(), term()) :: :ok
447462
def put_config(key, value) when is_atom(key) do
448463
case NimbleOptions.validate([{key, value}], @opts_schema) do

lib/sentry/dedupe.ex

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
defmodule Sentry.Dedupe do
2+
@moduledoc false
3+
4+
use GenServer
5+
6+
alias Sentry.Event
7+
8+
@ets __MODULE__
9+
@sweep_interval_millisec 10_000
10+
@ttl_millisec 30_000
11+
12+
@spec start_link(keyword()) :: GenServer.on_start()
13+
def start_link(opts) when is_list(opts) do
14+
ttl_millisec = Keyword.get(opts, :ttl_millisec, @ttl_millisec)
15+
GenServer.start_link(__MODULE__, ttl_millisec, name: __MODULE__)
16+
end
17+
18+
@spec insert(Event.t()) :: :new | :existing
19+
def insert(%Event{} = event) do
20+
hash = Event.hash(event)
21+
now = System.system_time(:millisecond)
22+
23+
cond do
24+
_found? = :ets.update_element(@ets, hash, {_position = 2, now}) -> :existing
25+
_inserted_new? = :ets.insert_new(@ets, {hash, now}) -> :new
26+
true -> :existing
27+
end
28+
end
29+
30+
## State
31+
defstruct [:ttl_millisec]
32+
33+
## Callbacks
34+
35+
@impl true
36+
def init(ttl_millisec) do
37+
_table = :ets.new(@ets, [:named_table, :public, :set])
38+
Process.send_after(self(), :sweep, @sweep_interval_millisec)
39+
{:ok, %__MODULE__{ttl_millisec: ttl_millisec}}
40+
end
41+
42+
@impl true
43+
def handle_info(:sweep, %__MODULE__{} = state) do
44+
now = System.system_time(:millisecond)
45+
46+
# All rows (which are {hash, inserted_at}) with an inserted_at older than
47+
# now - @ttl_millisec.
48+
match_spec = [{{:"$1", :"$2"}, [], [{:<, :"$2", now - state.ttl_millisec}]}]
49+
_ = :ets.select_delete(@ets, match_spec)
50+
51+
Process.send_after(self(), :sweep, @sweep_interval_millisec)
52+
{:noreply, state}
53+
end
54+
end

lib/sentry/event.ex

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,4 +466,16 @@ defmodule Sentry.Event do
466466
runtime: %{name: "elixir", version: System.build_info().build}
467467
}
468468
end
469+
470+
# Used to compare events for deduplication. See "Sentry.Dedupe".
471+
@doc false
472+
@spec hash(t()) :: non_neg_integer()
473+
def hash(%__MODULE__{} = event) do
474+
:erlang.phash2([
475+
event.exception,
476+
event.message,
477+
event.level,
478+
event.fingerprint
479+
])
480+
end
469481
end

mix.exs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ defmodule Sentry.Mixfile do
6666
mod: {Sentry.Application, []},
6767
extra_applications: [:logger],
6868
registered: [
69+
Sentry.Dedupe,
6970
Sentry.Transport.SenderRegistry,
7071
Sentry.Supervisor
7172
]

test/envelope_test.exs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ defmodule Sentry.EnvelopeTest do
22
use ExUnit.Case, async: false
33

44
alias Sentry.{Envelope, Event, Interfaces}
5+
alias Sentry.TestEnvironmentHelper
56

67
describe "from_binary/1" do
78
test "parses envelope with empty headers" do
@@ -132,6 +133,8 @@ defmodule Sentry.EnvelopeTest do
132133

133134
describe "to_binary/1" do
134135
test "encodes an envelope" do
136+
TestEnvironmentHelper.modify_env(:sentry, environment_name: "test")
137+
135138
event = Event.create_event([])
136139
envelope = Envelope.new([event])
137140

test/sentry/client_test.exs

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,24 +59,23 @@ defmodule Sentry.ClientTest do
5959
end
6060

6161
test "respects the :sample_rate option", %{bypass: bypass} do
62-
event = Event.create_event([])
63-
6462
# Always sends with sample rate of 1.
6563
Bypass.expect_once(bypass, fn conn ->
6664
Plug.Conn.resp(conn, 200, ~s<{"id": "340"}>)
6765
end)
6866

69-
assert {:ok, "340"} = Client.send_event(event, sample_rate: 1.0)
67+
assert {:ok, "340"} = Client.send_event(Event.create_event([]), sample_rate: 1.0)
7068

7169
# Never sends with sample rate of 0.
72-
assert :unsampled = Client.send_event(event, sample_rate: 0.0)
70+
assert :unsampled = Client.send_event(Event.create_event([]), sample_rate: 0.0)
7371

7472
# Either sends or doesn't with :sample_rate of 0.5.
7573
Bypass.expect(bypass, fn conn ->
7674
Plug.Conn.resp(conn, 200, ~s<{"id": "340"}>)
7775
end)
7876

7977
for _ <- 1..10 do
78+
event = Event.create_event(message: "Unique: #{System.unique_integer()}")
8079
result = Client.send_event(event, sample_rate: 0.5)
8180
assert match?({:ok, _}, result) or result == :unsampled
8281
end
@@ -276,5 +275,45 @@ defmodule Sentry.ClientTest do
276275
assert %Event{} = event
277276
assert event.message == "Something went wrong"
278277
end
278+
279+
test "dedupes events", %{bypass: bypass} do
280+
{:current_stacktrace, stacktrace} = Process.info(self(), :current_stacktrace)
281+
282+
events = [
283+
Event.create_event(message: "Dedupes by message")
284+
|> Tuple.duplicate(2),
285+
Event.create_event(exception: %RuntimeError{message: "Dedupes by exception"})
286+
|> Tuple.duplicate(2),
287+
Event.create_event(message: "Dedupes by message and stacktrace", stacktrace: stacktrace)
288+
|> Tuple.duplicate(2),
289+
{
290+
Event.create_event(
291+
message: "Same message but diff extra",
292+
user: %{id: 1},
293+
request: %{method: :GET}
294+
),
295+
Event.create_event(
296+
message: "Same message but diff extra",
297+
user: %{id: 2},
298+
request: %{method: :POST}
299+
)
300+
}
301+
]
302+
303+
for {event, dup_event} <- events do
304+
Bypass.expect_once(bypass, fn conn ->
305+
Plug.Conn.resp(conn, 200, ~s<{"id": "340"}>)
306+
end)
307+
308+
assert {:ok, "340"} = Client.send_event(event, [])
309+
310+
log =
311+
capture_log(fn ->
312+
assert :excluded = Client.send_event(dup_event, [])
313+
end)
314+
315+
assert log =~ "Event dropped due to being a duplicate of a previously-captured event."
316+
end
317+
end
279318
end
280319
end

test/sentry/dedupe_test.exs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
defmodule Sentry.DedupeTest do
2+
# This is not async because it tests a singleton (the dedupe GenServer).
3+
use ExUnit.Case, async: false
4+
5+
alias Sentry.Dedupe
6+
alias Sentry.Event
7+
8+
@ttl_millisec 25
9+
10+
describe "insert/1" do
11+
test "works correctly" do
12+
stop_application()
13+
start_supervised({Dedupe, ttl_millisec: @ttl_millisec})
14+
15+
event = %Event{
16+
message: "Something went wrong",
17+
timestamp: System.system_time(:millisecond),
18+
event_id: Sentry.UUID.uuid4_hex()
19+
}
20+
21+
# First time, it's :new.
22+
assert Dedupe.insert(event) == :new
23+
24+
# Then, it's :existing.
25+
assert Dedupe.insert(event) == :existing
26+
assert Dedupe.insert(event) == :existing
27+
28+
# Now, we trigger a sweep after waiting for the TTL interval.
29+
# To ensure the :sweep message is processed, we use the trick
30+
# of asking the GenServer for its state (which is a sync call).
31+
Process.sleep(@ttl_millisec * 2)
32+
send(Dedupe, :sweep)
33+
_ = :sys.get_state(Dedupe)
34+
35+
# Now, it's :new again.
36+
assert Dedupe.insert(event) == :new
37+
assert Dedupe.insert(event) == :existing
38+
end
39+
end
40+
41+
defp stop_application do
42+
for {{:sentry_config, _} = key, _val} <- :persistent_term.get() do
43+
:persistent_term.erase(key)
44+
end
45+
46+
ExUnit.CaptureLog.capture_log(fn -> Application.stop(:sentry) end)
47+
end
48+
end

0 commit comments

Comments
 (0)