@@ -25,6 +25,7 @@ defmodule Realtime.Integration.RtChannelTest do
25
25
alias Realtime.Tenants
26
26
alias Realtime.Tenants.Authorization
27
27
alias Realtime.Tenants.Connect
28
+ alias Realtime.Tenants.ReplicationConnection
28
29
29
30
alias RealtimeWeb.RealtimeChannel.Tracker
30
31
alias RealtimeWeb.SocketDisconnect
@@ -2354,6 +2355,135 @@ defmodule Realtime.Integration.RtChannelTest do
2354
2355
assert count == 2
2355
2356
end
2356
2357
2358
+ describe "WAL bloat handling" do
2359
+ setup % { tenant: tenant } do
2360
+ topic = random_string ( )
2361
+ { :ok , db_conn } = Database . connect ( tenant , "realtime_test" , :stop )
2362
+
2363
+ % { rows: [ [ max_wal_size ] ] } = Postgrex . query! ( db_conn , "SHOW max_wal_size" , [ ] )
2364
+ % { rows: [ [ wal_keep_size ] ] } = Postgrex . query! ( db_conn , "SHOW wal_keep_size" , [ ] )
2365
+ % { rows: [ [ max_slot_wal_keep_size ] ] } = Postgrex . query! ( db_conn , "SHOW max_slot_wal_keep_size" , [ ] )
2366
+
2367
+ assert max_wal_size == "32MB"
2368
+ assert wal_keep_size == "32MB"
2369
+ assert max_slot_wal_keep_size == "32MB"
2370
+
2371
+ Postgrex . query! ( db_conn , "CREATE TABLE IF NOT EXISTS wal_test (id INT, data TEXT)" , [ ] )
2372
+
2373
+ Postgrex . query! (
2374
+ db_conn ,
2375
+ """
2376
+ CREATE OR REPLACE FUNCTION wal_test_trigger_func() RETURNS TRIGGER AS $$
2377
+ BEGIN
2378
+ PERFORM realtime.send(json_build_object ('value', 'test' :: text)::jsonb, 'test', '#{ topic } ', false);
2379
+ RETURN NULL;
2380
+ END;
2381
+ $$ LANGUAGE plpgsql;
2382
+ """ ,
2383
+ [ ]
2384
+ )
2385
+
2386
+ Postgrex . query! ( db_conn , "DROP TRIGGER IF EXISTS wal_test_trigger ON wal_test" , [ ] )
2387
+
2388
+ Postgrex . query! (
2389
+ db_conn ,
2390
+ """
2391
+ CREATE TRIGGER wal_test_trigger
2392
+ AFTER INSERT OR UPDATE OR DELETE ON wal_test
2393
+ FOR EACH ROW
2394
+ EXECUTE FUNCTION wal_test_trigger_func()
2395
+ """ ,
2396
+ [ ]
2397
+ )
2398
+
2399
+ GenServer . stop ( db_conn )
2400
+
2401
+ on_exit ( fn ->
2402
+ { :ok , db_conn } = Database . connect ( tenant , "realtime_test" , :stop )
2403
+
2404
+ Postgrex . query! ( db_conn , "DROP TABLE IF EXISTS wal_test CASCADE" , [ ] )
2405
+ end )
2406
+
2407
+ % { topic: topic }
2408
+ end
2409
+
2410
+ test "track PID changes during WAL bloat creation" , % { tenant: tenant , topic: topic } do
2411
+ { socket , _ } = get_connection ( tenant , "authenticated" )
2412
+ config = % { broadcast: % { self: true } , private: false }
2413
+ full_topic = "realtime:#{ topic } "
2414
+
2415
+ active_slot_query =
2416
+ "SELECT active_pid FROM pg_replication_slots where active_pid is not null and slot_name = 'supabase_realtime_messages_replication_slot_'"
2417
+
2418
+ WebsocketClient . join ( socket , full_topic , % { config: config } )
2419
+
2420
+ assert_receive % Message { event: "phx_reply" , payload: % { "status" => "ok" } } , 500
2421
+ assert_receive % Message { event: "presence_state" } , 500
2422
+
2423
+ assert Connect . ready? ( tenant . external_id )
2424
+
2425
+ { :ok , db_conn } = Connect . lookup_or_start_connection ( tenant . external_id )
2426
+
2427
+ original_connect_pid = Connect . whereis ( tenant . external_id )
2428
+ original_replication_pid = ReplicationConnection . whereis ( tenant . external_id )
2429
+ % { rows: [ [ original_db_pid ] ] } = Postgrex . query! ( db_conn , active_slot_query , [ ] )
2430
+
2431
+ tasks =
2432
+ for _ <- 1 .. 5 do
2433
+ Task . async ( fn ->
2434
+ { :ok , bloat_conn } = Database . connect ( tenant , "realtime_bloat" , :stop )
2435
+
2436
+ Postgrex . transaction ( bloat_conn , fn conn ->
2437
+ Postgrex . query ( conn , "INSERT INTO wal_test SELECT generate_series(1, 100000), repeat('x', 2000)" , [ ] )
2438
+ { :error , "test" }
2439
+ end )
2440
+
2441
+ Process . exit ( bloat_conn , :normal )
2442
+ end )
2443
+ end
2444
+
2445
+ Task . await_many ( tasks , 20000 )
2446
+
2447
+ # Kill all pending transactions still running
2448
+ Postgrex . query! (
2449
+ db_conn ,
2450
+ "SELECT pg_terminate_backend(pid) from pg_stat_activity where application_name='realtime_bloat'" ,
2451
+ [ ]
2452
+ )
2453
+
2454
+ # Does it recover?
2455
+ assert Connect . ready? ( tenant . external_id )
2456
+ { :ok , db_conn } = Connect . lookup_or_start_connection ( tenant . external_id )
2457
+ Process . sleep ( 1000 )
2458
+ % { rows: [ [ new_db_pid ] ] } = Postgrex . query! ( db_conn , active_slot_query , [ ] )
2459
+
2460
+ assert new_db_pid != original_db_pid
2461
+ assert ^ original_connect_pid = Connect . whereis ( tenant . external_id )
2462
+ assert original_replication_pid != ReplicationConnection . whereis ( tenant . external_id )
2463
+
2464
+ # Check if socket is still connected
2465
+ payload = % { "event" => "TEST" , "payload" => % { "msg" => 1 } , "type" => "broadcast" }
2466
+ WebsocketClient . send_event ( socket , full_topic , "broadcast" , payload )
2467
+ assert_receive % Message { event: "broadcast" , payload: ^ payload , topic: ^ full_topic } , 500
2468
+
2469
+ # Check if we are receiving the message from replication connection
2470
+ Postgrex . query! ( db_conn , "INSERT INTO wal_test VALUES (1, 'test')" , [ ] )
2471
+
2472
+ assert_receive % Phoenix.Socket.Message {
2473
+ event: "broadcast" ,
2474
+ payload: % {
2475
+ "event" => "test" ,
2476
+ "payload" => % { "value" => "test" } ,
2477
+ "type" => "broadcast"
2478
+ } ,
2479
+ join_ref: nil ,
2480
+ ref: nil ,
2481
+ topic: ^ full_topic
2482
+ } ,
2483
+ 5000
2484
+ end
2485
+ end
2486
+
2357
2487
defp mode ( % { mode: :distributed } ) do
2358
2488
tenant = Api . get_tenant_by_external_id ( "dev_tenant" )
2359
2489
0 commit comments