@@ -227,3 +227,225 @@ async def get_event(destination: str, event_id: str, timeout=None):
227227
228228 if prev_exists_as_outlier :
229229 self .mock_federation_transport_client .get_event .assert_not_called ()
230+
231+ def test_process_pulled_event_records_failed_backfill_attempts (
232+ self ,
233+ ) -> None :
234+ """
235+ Test to make sure that failed backfill attempts for an event are
236+ recorded in the `event_failed_pull_attempts` table.
237+
238+ In this test, we pretend we are processing a "pulled" event via
239+ backfill. The pulled event has a fake `prev_event` which our server has
240+ obviously never seen before so it attempts to request the state at that
241+ `prev_event` which expectedly fails because it's a fake event. Because
242+ the server can't fetch the state at the missing `prev_event`, the
243+ "pulled" event fails the history check and is fails to process.
244+
245+ We check that we correctly record the number of failed pull attempts
246+ of the pulled event and as a sanity check, that the "pulled" event isn't
247+ persisted.
248+ """
249+ OTHER_USER = f"@user:{ self .OTHER_SERVER_NAME } "
250+ main_store = self .hs .get_datastores ().main
251+
252+ # Create the room
253+ user_id = self .register_user ("kermit" , "test" )
254+ tok = self .login ("kermit" , "test" )
255+ room_id = self .helper .create_room_as (room_creator = user_id , tok = tok )
256+ room_version = self .get_success (main_store .get_room_version (room_id ))
257+
258+ # We expect an outbound request to /state_ids, so stub that out
259+ self .mock_federation_transport_client .get_room_state_ids .return_value = make_awaitable (
260+ {
261+ # Mimic the other server not knowing about the state at all.
262+ # We want to cause Synapse to throw an error (`Unable to get
263+ # missing prev_event $fake_prev_event`) and fail to backfill
264+ # the pulled event.
265+ "pdu_ids" : [],
266+ "auth_chain_ids" : [],
267+ }
268+ )
269+ # We also expect an outbound request to /state
270+ self .mock_federation_transport_client .get_room_state .return_value = make_awaitable (
271+ StateRequestResponse (
272+ # Mimic the other server not knowing about the state at all.
273+ # We want to cause Synapse to throw an error (`Unable to get
274+ # missing prev_event $fake_prev_event`) and fail to backfill
275+ # the pulled event.
276+ auth_events = [],
277+ state = [],
278+ )
279+ )
280+
281+ pulled_event = make_event_from_dict (
282+ self .add_hashes_and_signatures_from_other_server (
283+ {
284+ "type" : "test_regular_type" ,
285+ "room_id" : room_id ,
286+ "sender" : OTHER_USER ,
287+ "prev_events" : [
288+ # The fake prev event will make the pulled event fail
289+ # the history check (`Unable to get missing prev_event
290+ # $fake_prev_event`)
291+ "$fake_prev_event"
292+ ],
293+ "auth_events" : [],
294+ "origin_server_ts" : 1 ,
295+ "depth" : 12 ,
296+ "content" : {"body" : "pulled" },
297+ }
298+ ),
299+ room_version ,
300+ )
301+
302+ # The function under test: try to process the pulled event
303+ with LoggingContext ("test" ):
304+ self .get_success (
305+ self .hs .get_federation_event_handler ()._process_pulled_event (
306+ self .OTHER_SERVER_NAME , pulled_event , backfilled = True
307+ )
308+ )
309+
310+ # Make sure our failed pull attempt was recorded
311+ backfill_num_attempts = self .get_success (
312+ main_store .db_pool .simple_select_one_onecol (
313+ table = "event_failed_pull_attempts" ,
314+ keyvalues = {"event_id" : pulled_event .event_id },
315+ retcol = "num_attempts" ,
316+ )
317+ )
318+ self .assertEqual (backfill_num_attempts , 1 )
319+
320+ # The function under test: try to process the pulled event again
321+ with LoggingContext ("test" ):
322+ self .get_success (
323+ self .hs .get_federation_event_handler ()._process_pulled_event (
324+ self .OTHER_SERVER_NAME , pulled_event , backfilled = True
325+ )
326+ )
327+
328+ # Make sure our second failed pull attempt was recorded (`num_attempts` was incremented)
329+ backfill_num_attempts = self .get_success (
330+ main_store .db_pool .simple_select_one_onecol (
331+ table = "event_failed_pull_attempts" ,
332+ keyvalues = {"event_id" : pulled_event .event_id },
333+ retcol = "num_attempts" ,
334+ )
335+ )
336+ self .assertEqual (backfill_num_attempts , 2 )
337+
338+ # And as a sanity check, make sure the event was not persisted through all of this.
339+ persisted = self .get_success (
340+ main_store .get_event (pulled_event .event_id , allow_none = True )
341+ )
342+ self .assertIsNone (
343+ persisted ,
344+ "pulled event that fails the history check should not be persisted at all" ,
345+ )
346+
347+ def test_process_pulled_event_clears_backfill_attempts_after_being_successfully_persisted (
348+ self ,
349+ ) -> None :
350+ """
351+ Test to make sure that failed pull attempts
352+ (`event_failed_pull_attempts` table) for an event are cleared after the
353+ event is successfully persisted.
354+
355+ In this test, we pretend we are processing a "pulled" event via
356+ backfill. The pulled event succesfully processes and the backward
357+ extremeties are updated along with clearing out any failed pull attempts
358+ for those old extremities.
359+
360+ We check that we correctly cleared failed pull attempts of the
361+ pulled event.
362+ """
363+ OTHER_USER = f"@user:{ self .OTHER_SERVER_NAME } "
364+ main_store = self .hs .get_datastores ().main
365+
366+ # Create the room
367+ user_id = self .register_user ("kermit" , "test" )
368+ tok = self .login ("kermit" , "test" )
369+ room_id = self .helper .create_room_as (room_creator = user_id , tok = tok )
370+ room_version = self .get_success (main_store .get_room_version (room_id ))
371+
372+ # allow the remote user to send state events
373+ self .helper .send_state (
374+ room_id ,
375+ "m.room.power_levels" ,
376+ {"events_default" : 0 , "state_default" : 0 },
377+ tok = tok ,
378+ )
379+
380+ # add the remote user to the room
381+ member_event = self .get_success (
382+ event_injection .inject_member_event (self .hs , room_id , OTHER_USER , "join" )
383+ )
384+
385+ initial_state_map = self .get_success (
386+ main_store .get_partial_current_state_ids (room_id )
387+ )
388+
389+ auth_event_ids = [
390+ initial_state_map [("m.room.create" , "" )],
391+ initial_state_map [("m.room.power_levels" , "" )],
392+ member_event .event_id ,
393+ ]
394+
395+ pulled_event = make_event_from_dict (
396+ self .add_hashes_and_signatures_from_other_server (
397+ {
398+ "type" : "test_regular_type" ,
399+ "room_id" : room_id ,
400+ "sender" : OTHER_USER ,
401+ "prev_events" : [member_event .event_id ],
402+ "auth_events" : auth_event_ids ,
403+ "origin_server_ts" : 1 ,
404+ "depth" : 12 ,
405+ "content" : {"body" : "pulled" },
406+ }
407+ ),
408+ room_version ,
409+ )
410+
411+ # Fake the "pulled" event failing to backfill once so we can test
412+ # if it's cleared out later on.
413+ self .get_success (
414+ main_store .record_event_failed_pull_attempt (
415+ pulled_event .room_id , pulled_event .event_id , "fake cause"
416+ )
417+ )
418+ # Make sure we have a failed pull attempt recorded for the pulled event
419+ backfill_num_attempts = self .get_success (
420+ main_store .db_pool .simple_select_one_onecol (
421+ table = "event_failed_pull_attempts" ,
422+ keyvalues = {"event_id" : pulled_event .event_id },
423+ retcol = "num_attempts" ,
424+ )
425+ )
426+ self .assertEqual (backfill_num_attempts , 1 )
427+
428+ # The function under test: try to process the pulled event
429+ with LoggingContext ("test" ):
430+ self .get_success (
431+ self .hs .get_federation_event_handler ()._process_pulled_event (
432+ self .OTHER_SERVER_NAME , pulled_event , backfilled = True
433+ )
434+ )
435+
436+ # Make sure the failed pull attempts for the pulled event are cleared
437+ backfill_num_attempts = self .get_success (
438+ main_store .db_pool .simple_select_one_onecol (
439+ table = "event_failed_pull_attempts" ,
440+ keyvalues = {"event_id" : pulled_event .event_id },
441+ retcol = "num_attempts" ,
442+ allow_none = True ,
443+ )
444+ )
445+ self .assertIsNone (backfill_num_attempts )
446+
447+ # And as a sanity check, make sure the "pulled" event was persisted.
448+ persisted = self .get_success (
449+ main_store .get_event (pulled_event .event_id , allow_none = True )
450+ )
451+ self .assertIsNotNone (persisted , "pulled event was not persisted at all" )
0 commit comments