@@ -457,3 +457,210 @@ def test_flags_no_retry_on_quota_limit(self, mock_get_flags_session):
457457
458458 # QuotaLimitError is raised after response is received, not retried
459459 self .assertEqual (mock_session .post .call_count , 1 )
460+
461+
462+ class TestFlagsSessionNetworkRetries (unittest .TestCase ):
463+ """Tests for network failure retries in the flags session."""
464+
465+ def test_flags_session_retry_config_includes_connection_errors (self ):
466+ """
467+ Verify that the flags session is configured to retry on connection errors.
468+
469+ The urllib3 Retry adapter with connect=2 and read=2 automatically
470+ retries on network-level failures (DNS failures, connection refused,
471+ connection reset, etc.) up to 2 times each.
472+ """
473+ from posthog .request import _build_flags_session
474+
475+ session = _build_flags_session ()
476+
477+ # Get the adapter for https://
478+ adapter = session .get_adapter ("https://test.posthog.com" )
479+
480+ # Verify retry configuration
481+ retry = adapter .max_retries
482+ self .assertEqual (retry .total , 2 , "Should have 2 total retries" )
483+ self .assertEqual (retry .connect , 2 , "Should retry connection errors twice" )
484+ self .assertEqual (retry .read , 2 , "Should retry read errors twice" )
485+ self .assertIn ("POST" , retry .allowed_methods , "Should allow POST retries" )
486+
487+ def test_flags_session_retries_on_server_errors (self ):
488+ """
489+ Verify that transient server errors (5xx) trigger retries.
490+
491+ This tests the status_forcelist configuration which specifies
492+ which HTTP status codes should trigger a retry.
493+ """
494+ from posthog .request import _build_flags_session , RETRY_STATUS_FORCELIST
495+
496+ session = _build_flags_session ()
497+ adapter = session .get_adapter ("https://test.posthog.com" )
498+ retry = adapter .max_retries
499+
500+ # Verify the status codes that trigger retries
501+ self .assertEqual (
502+ set (retry .status_forcelist ),
503+ set (RETRY_STATUS_FORCELIST ),
504+ "Should retry on transient server errors" ,
505+ )
506+
507+ # Verify specific codes are included
508+ self .assertIn (500 , retry .status_forcelist )
509+ self .assertIn (502 , retry .status_forcelist )
510+ self .assertIn (503 , retry .status_forcelist )
511+ self .assertIn (504 , retry .status_forcelist )
512+
513+ # Verify rate limits and quota errors are NOT retried
514+ self .assertNotIn (429 , retry .status_forcelist )
515+ self .assertNotIn (402 , retry .status_forcelist )
516+
517+ def test_flags_session_has_backoff (self ):
518+ """
519+ Verify that retries use exponential backoff to avoid thundering herd.
520+ """
521+ from posthog .request import _build_flags_session
522+
523+ session = _build_flags_session ()
524+ adapter = session .get_adapter ("https://test.posthog.com" )
525+ retry = adapter .max_retries
526+
527+ self .assertEqual (
528+ retry .backoff_factor ,
529+ 0.5 ,
530+ "Should use 0.5s backoff factor (0.5s, 1s delays)" ,
531+ )
532+
533+
534+ class TestFlagsSessionRetryIntegration (unittest .TestCase ):
535+ """Integration tests that verify actual retry behavior with a local server."""
536+
537+ def test_retries_on_503_then_succeeds (self ):
538+ """
539+ Verify that 503 errors trigger retries and eventually succeed.
540+
541+ Uses a local HTTP server that fails twice with 503, then succeeds.
542+ This tests the full retry flow including backoff timing.
543+ """
544+ import threading
545+ from http .server import HTTPServer , BaseHTTPRequestHandler
546+ from socketserver import ThreadingMixIn
547+ from urllib3 .util .retry import Retry
548+ from posthog .request import HTTPAdapterWithSocketOptions , RETRY_STATUS_FORCELIST
549+
550+ request_count = 0
551+
552+ class RetryTestHandler (BaseHTTPRequestHandler ):
553+ protocol_version = "HTTP/1.1"
554+
555+ def do_POST (self ):
556+ nonlocal request_count
557+ request_count += 1
558+
559+ # Read and discard request body to prevent connection issues
560+ content_length = int (self .headers .get ("Content-Length" , 0 ))
561+ if content_length > 0 :
562+ self .rfile .read (content_length )
563+
564+ if request_count <= 2 :
565+ self .send_response (503 )
566+ self .send_header ("Content-Type" , "application/json" )
567+ body = b'{"error": "Service unavailable"}'
568+ self .send_header ("Content-Length" , str (len (body )))
569+ self .end_headers ()
570+ self .wfile .write (body )
571+ else :
572+ self .send_response (200 )
573+ self .send_header ("Content-Type" , "application/json" )
574+ body = (
575+ b'{"featureFlags": {"test": true}, "featureFlagPayloads": {}}'
576+ )
577+ self .send_header ("Content-Length" , str (len (body )))
578+ self .end_headers ()
579+ self .wfile .write (body )
580+
581+ def log_message (self , format , * args ):
582+ pass # Suppress logging
583+
584+ # Use ThreadingMixIn for cleaner shutdown
585+ class ThreadedHTTPServer (ThreadingMixIn , HTTPServer ):
586+ daemon_threads = True
587+
588+ # Start server on a random available port
589+ server = ThreadedHTTPServer (("127.0.0.1" , 0 ), RetryTestHandler )
590+ port = server .server_address [1 ]
591+ server_thread = threading .Thread (target = server .serve_forever )
592+ server_thread .daemon = True
593+ server_thread .start ()
594+
595+ try :
596+ # Build session with same retry config as _build_flags_session
597+ # but mounted on http:// for local testing
598+ adapter = HTTPAdapterWithSocketOptions (
599+ max_retries = Retry (
600+ total = 2 ,
601+ connect = 2 ,
602+ read = 2 ,
603+ backoff_factor = 0.01 , # Fast backoff for testing
604+ status_forcelist = RETRY_STATUS_FORCELIST ,
605+ allowed_methods = ["POST" ],
606+ ),
607+ )
608+ session = requests .Session ()
609+ session .mount ("http://" , adapter )
610+
611+ response = session .post (
612+ f"http://127.0.0.1:{ port } /flags/?v=2" ,
613+ json = {"distinct_id" : "user123" },
614+ timeout = 5 ,
615+ )
616+
617+ # Should succeed on 3rd attempt
618+ self .assertEqual (response .status_code , 200 )
619+ self .assertEqual (request_count , 3 ) # 1 initial + 2 retries
620+ finally :
621+ server .shutdown ()
622+ server .server_close ()
623+
624+ def test_connection_errors_are_retried (self ):
625+ """
626+ Verify that connection errors (no server) trigger retries.
627+
628+ Binds a socket to get a guaranteed available port, then closes it
629+ so connection attempts fail with ConnectionError.
630+ """
631+ import socket
632+ import time
633+ from urllib3 .util .retry import Retry
634+ from posthog .request import HTTPAdapterWithSocketOptions , RETRY_STATUS_FORCELIST
635+
636+ # Get an available port by binding then closing a socket
637+ sock = socket .socket (socket .AF_INET , socket .SOCK_STREAM )
638+ sock .bind (("127.0.0.1" , 0 ))
639+ port = sock .getsockname ()[1 ]
640+ sock .close () # Port is now available but nothing is listening
641+
642+ adapter = HTTPAdapterWithSocketOptions (
643+ max_retries = Retry (
644+ total = 2 ,
645+ connect = 2 ,
646+ read = 2 ,
647+ backoff_factor = 0.05 , # Very fast for testing
648+ status_forcelist = RETRY_STATUS_FORCELIST ,
649+ allowed_methods = ["POST" ],
650+ ),
651+ )
652+ session = requests .Session ()
653+ session .mount ("http://" , adapter )
654+
655+ start = time .time ()
656+ with self .assertRaises (requests .exceptions .ConnectionError ):
657+ session .post (
658+ f"http://127.0.0.1:{ port } /flags/?v=2" ,
659+ json = {"distinct_id" : "user123" },
660+ timeout = 1 ,
661+ )
662+ elapsed = time .time () - start
663+
664+ # With 3 attempts and backoff, should take more than instant
665+ # but less than timeout (confirms retries happened)
666+ self .assertGreater (elapsed , 0.05 , "Should have some delay from retries" )
0 commit comments