22 * License, v. 2.0. If a copy of the MPL was not distributed with this
33 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
44
5+ use std:: {
6+ collections:: HashMap ,
7+ time:: { Duration , Instant } ,
8+ } ;
9+
510use parking_lot:: Mutex ;
611
7- static RECENT_BREADCRUMBS : Mutex < BreadcrumbRingBuffer > = Mutex :: new ( BreadcrumbRingBuffer :: new ( ) ) ;
12+ static GLOBALS : Mutex < Globals > = Mutex :: new ( Globals :: new ( ) ) ;
813
914pub fn report_error_to_app ( type_name : String , message : String ) {
15+ // First step: perform all actions that we need the lock for.
16+ let breadcrumbs = {
17+ let mut globals = GLOBALS . lock ( ) ;
18+ if !globals
19+ . rate_limiter
20+ . should_send_report ( & type_name, Instant :: now ( ) )
21+ {
22+ return ;
23+ }
24+ globals. breadcrumbs . get_breadcrumbs ( )
25+ } ;
1026 // Report errors by sending a tracing event to the `app-services-error-reporter::error` target.
1127 //
1228 // Applications should register for these events and send a glean error ping when they occur.
1329 //
1430 // breadcrumbs will be sent in the `breadcrumbs` field as a single string, with each individual
1531 // breadcrumb joined by newlines.
16- let breadcrumbs = RECENT_BREADCRUMBS . lock ( ) . get_breadcrumbs ( ) . join ( "\n " ) ;
32+ let breadcrumbs = breadcrumbs . join ( "\n " ) ;
1733 tracing_support:: error!( target: "app-services-error-reporter::error" , message, type_name, breadcrumbs) ;
1834}
1935
@@ -22,10 +38,25 @@ pub fn report_breadcrumb(message: String, module: String, line: u32, column: u32
2238 // - Push it to the `RECENT_BREADCRUMBS` list
2339 // - Send out the `app-services-error-reporter::breadcrumb`. Applications can register for
2440 // these events and log them.
25- RECENT_BREADCRUMBS . lock ( ) . push ( message. clone ( ) ) ;
41+ GLOBALS . lock ( ) . breadcrumbs . push ( message. clone ( ) ) ;
2642 tracing_support:: info!( target: "app-services-error-reporter::breadcrumb" , message, module, line, column) ;
2743}
2844
45+ // Global structs used for error reporting
46+ struct Globals {
47+ breadcrumbs : BreadcrumbRingBuffer ,
48+ rate_limiter : RateLimiter ,
49+ }
50+
51+ impl Globals {
52+ const fn new ( ) -> Self {
53+ Self {
54+ breadcrumbs : BreadcrumbRingBuffer :: new ( ) ,
55+ rate_limiter : RateLimiter :: new ( ) ,
56+ }
57+ }
58+ }
59+
2960/// Ring buffer implementation that we use to store the most recent 20 breadcrumbs
3061#[ derive( Default ) ]
3162struct BreadcrumbRingBuffer {
@@ -73,6 +104,49 @@ fn truncate_breadcrumb(breadcrumb: String) -> String {
73104 breadcrumb[ 0 ..split_point] . to_string ( )
74105}
75106
107+ /// Rate-limits error reports per component by type to a max of 20/hr
108+ ///
109+ /// This uses the simplest algorithm possible. We could use something like a token bucket to allow
110+ /// for a small burst of errors, but that doesn't seem so useful. In that scenario, the first
111+ /// error report is the one we want to fix.
112+ struct RateLimiter {
113+ // Optional so we can make `new()` const.
114+ last_report : Option < HashMap < String , Instant > > ,
115+ }
116+
117+ impl RateLimiter {
118+ // Rate limit reports if they're within 3 minutes of each other.
119+ const INTERVAL : Duration = Duration :: from_secs ( 180 ) ;
120+
121+ const fn new ( ) -> Self {
122+ Self { last_report : None }
123+ }
124+
125+ fn should_send_report ( & mut self , error_type : & str , now : Instant ) -> bool {
126+ let component = error_type. split ( "-" ) . next ( ) . unwrap ( ) ;
127+ let last_report = self . last_report . get_or_insert_with ( HashMap :: default) ;
128+
129+ if let Some ( last_report) = last_report. get ( component) {
130+ match now. checked_duration_since ( * last_report) {
131+ // Not enough time has passed, rate-limit the report
132+ Some ( elapsed) if elapsed < Self :: INTERVAL => {
133+ return false ;
134+ }
135+ // For all other cases, fall through and allow the report to be sent.
136+ //
137+ // Note: this also covers the `None` case which happens when the clock is
138+ // non-monotonic. This shouldn't happen often, but it's possible after the OS syncs
139+ // with NTP, if users manually adjust their clocks, etc. Letting an extra event
140+ // through seems okay in this case. We should get back into a good state soon
141+ // after.
142+ _ => ( ) ,
143+ }
144+ }
145+ last_report. insert ( component. to_string ( ) , now) ;
146+ true
147+ }
148+ }
149+
76150#[ cfg( test) ]
77151mod test {
78152 use super :: * ;
@@ -212,4 +286,41 @@ mod test {
212286 // fire emoji, which is multiple bytes long.
213287 assert_eq ! ( truncate_breadcrumb( "0" . repeat( 99 ) + "🔥" ) . len( ) , 99 ) ;
214288 }
289+
290+ #[ test]
291+ fn test_rate_limiter ( ) {
292+ let mut rate_limiter = RateLimiter :: new ( ) ;
293+ let start = Instant :: now ( ) ;
294+ let min = Duration :: from_secs ( 60 ) ;
295+ // The first error report is okay
296+ assert ! ( rate_limiter. should_send_report( "test-type" , start) ) ;
297+ // The report should be rate limited until 3 minutes pass, then we can send another one.
298+ // Add time from the instant to simulate time going forward.
299+ assert ! ( !rate_limiter. should_send_report( "test-type" , start) ) ;
300+ assert ! ( !rate_limiter. should_send_report( "test-type" , start + min * 1 ) ) ;
301+ assert ! ( !rate_limiter. should_send_report( "test-type" , start + min * 2 ) ) ;
302+ assert ! ( rate_limiter. should_send_report( "test-type" , start + min * 3 ) ) ;
303+ assert ! ( !rate_limiter. should_send_report( "test-type" , start + min * 4 ) ) ;
304+ assert ! ( !rate_limiter. should_send_report( "test-type" , start + min * 5 ) ) ;
305+ assert ! ( rate_limiter. should_send_report( "test-type" , start + min * 6 ) ) ;
306+
307+ assert ! ( rate_limiter. should_send_report( "test-type" , start + min * 60 ) ) ;
308+ assert ! ( !rate_limiter. should_send_report( "test-type" , start + min * 61 ) ) ;
309+ assert ! ( !rate_limiter. should_send_report( "test-type" , start + min * 62 ) ) ;
310+ assert ! ( rate_limiter. should_send_report( "test-type" , start + min * 63 ) ) ;
311+ }
312+
313+ #[ test]
314+ fn test_rate_limiter_type_matching ( ) {
315+ let mut rate_limiter = RateLimiter :: new ( ) ;
316+ let start = Instant :: now ( ) ;
317+ // Cause error error reports to be rate limited
318+ assert ! ( rate_limiter. should_send_report( "componenta-network-error" , start) ) ;
319+ assert ! ( !rate_limiter. should_send_report( "componenta-network-error" , start) ) ;
320+ // Other reports from the same component should also be rate limited
321+ assert ! ( !rate_limiter. should_send_report( "componenta-database-error" , start) ) ;
322+ // But not ones from other components
323+ assert ! ( rate_limiter. should_send_report( "componentb-database-error" , start) ) ;
324+ assert ! ( rate_limiter. should_send_report( "componentaa-network-error" , start) ) ;
325+ }
215326}
0 commit comments