@@ -920,6 +920,139 @@ UCS_TEST_SKIP_COND_P(test_ud, ctls_loss,
920920
921921UCT_INSTANTIATE_UD_TEST_CASE (test_ud)
922922
923+ #if UCT_UD_EP_DEBUG_HOOKS
924+ class test_ud_stale_ack : public test_ud {
925+ public:
926+ static ucs_status_t inject_stale_ack_psn (uct_ud_ep_t *ep, uct_ud_neth_t *neth)
927+ {
928+ if (m_stale_ack_psn_to_inject != 0 ) {
929+ neth->ack_psn = m_stale_ack_psn_to_inject;
930+ }
931+ return UCS_OK;
932+ }
933+
934+ static ucs_status_t capture_rx_ack_psn (uct_ud_ep_t *ep, uct_ud_neth_t *neth)
935+ {
936+ m_received_ack_psn = neth->ack_psn ;
937+ return UCS_OK;
938+ }
939+
940+ static void set_stale_ack_psn (uct_ud_psn_t psn) {
941+ m_stale_ack_psn_to_inject = psn;
942+ }
943+
944+ static uct_ud_psn_t get_stale_ack_psn () {
945+ return m_stale_ack_psn_to_inject;
946+ }
947+
948+ static uct_ud_psn_t get_received_ack_psn () {
949+ return m_received_ack_psn;
950+ }
951+
952+ private:
953+ /* Stale ACK PSN to inject - simulates ACK from before endpoint reset */
954+ static volatile uct_ud_psn_t m_stale_ack_psn_to_inject;
955+ /* Captured ACK PSN from received packet */
956+ static volatile uct_ud_psn_t m_received_ack_psn;
957+ };
958+
959+ volatile uct_ud_psn_t test_ud_stale_ack::m_stale_ack_psn_to_inject = 0 ;
960+ volatile uct_ud_psn_t test_ud_stale_ack::m_received_ack_psn = 0 ;
961+
962+ UCS_TEST_SKIP_COND_P (test_ud_stale_ack, stale_ack_after_reset,
963+ !check_caps(UCT_IFACE_FLAG_AM_SHORT)) {
964+ constexpr uct_ud_psn_t STALE_ACK_PSN = 135 ;
965+ constexpr uct_ud_psn_t WINDOW_SIZE = 1024 ;
966+
967+ disable_async (m_e1);
968+ disable_async (m_e2);
969+ connect ();
970+ set_tx_win (m_e1, WINDOW_SIZE);
971+ set_tx_win (m_e2, WINDOW_SIZE);
972+
973+ uct_ud_ep_t *ud_ep1 = ep (m_e1);
974+
975+ /* Send some data to advance PSN */
976+ for (int i = 0 ; i < 5 ; i++) {
977+ EXPECT_UCS_OK (tx (m_e1));
978+ }
979+ flush ();
980+
981+ /* Simulate endpoint reset on m_e1:
982+ * Reset PSN to low values as if endpoint was just created. */
983+ uct_ud_enter (iface (m_e1));
984+ ud_ep1->tx .psn = 3 ; /* Next PSN to send */
985+ ud_ep1->tx .acked_psn = 0 ; /* Last ACKed PSN */
986+ ucs_queue_head_init (&ud_ep1->tx .window ); /* Clear TX window */
987+ uct_ud_leave (iface (m_e1));
988+
989+ /* Set up TX hook on m_e2 to inject stale ack_psn in outgoing packets.
990+ * This simulates a delayed/stale packet arriving after reset. */
991+ set_stale_ack_psn (STALE_ACK_PSN);
992+ ep (m_e2)->tx .tx_hook = test_ud_stale_ack::inject_stale_ack_psn;
993+ ep (m_e1)->rx .rx_hook = test_ud_stale_ack::capture_rx_ack_psn;
994+
995+ /* m_e2 sends a packet. Due to the hook, it will contain ack_psn=135. */
996+ EXPECT_UCS_OK (tx (m_e2));
997+ short_progress_loop ();
998+
999+ /* Verify the hook actually injected the stale ack_psn */
1000+ EXPECT_EQ (STALE_ACK_PSN, get_received_ack_psn ())
1001+ << " Invalid value after the hook" ;
1002+
1003+ set_stale_ack_psn (0 );
1004+ ep (m_e2)->tx .tx_hook = uct_ud_ep_null_hook;
1005+ ep (m_e1)->rx .rx_hook = uct_ud_ep_null_hook;
1006+
1007+ /* Verify endpoint state wasn't corrupted by the stale ACK */
1008+ EXPECT_EQ (0 , ud_ep1->tx .acked_psn ); /* Should NOT be STALE_ACK_PSN */
1009+ }
1010+
1011+ UCT_INSTANTIATE_UD_TEST_CASE (test_ud_stale_ack)
1012+
1013+ /* Test that simulates a stale EP reuse, when an EP with an existing dest_ep_id receives a CREQ with
1014+ * a different ep_id, the dest_ep_id is updated. */
1015+ UCS_TEST_SKIP_COND_P(test_ud, stale_dest_ep_id_update,
1016+ !check_caps(UCT_IFACE_FLAG_AM_SHORT)) {
1017+ constexpr uint32_t STALE_DEST_EP_ID = 0xBEEF ;
1018+ constexpr uint32_t REMOTE_EP_ID = 1 ;
1019+
1020+ /* Create a dummy EP on m_e1 first, so the actual EP will have ep_id=1
1021+ * (not 0, which could be confused with NULL/default values) */
1022+ m_e1->create_ep (0 );
1023+
1024+ /* Start connection from m_e1 to m_e2 - block m_e2's TX to delay CREP */
1025+ iface (m_e2)->tx .available = 0 ;
1026+
1027+ m_e1->connect_to_iface (1 , *m_e2);
1028+
1029+ /* Let CREQ be received and passive EP created on m_e2 */
1030+ short_progress_loop ();
1031+
1032+ /* m_e2 side: connect back, which will reuse the passive EP */
1033+ m_e2->connect_to_iface (0 , *m_e1);
1034+
1035+ /* Now manually set a "stale" dest_ep_id on m_e2's EP to simulate
1036+ * the scenario where the peer had a different ep_id before reset */
1037+ ep (m_e2)->dest_ep_id = STALE_DEST_EP_ID;
1038+
1039+ /* Allow m_e2 to send - CREP will be sent, and any pending CREQs processed */
1040+ iface (m_e2)->tx .available = 128 ;
1041+
1042+ /* Wait for m_e2's dest_ep_id to be updated from the stale value */
1043+ wait_for_value (&ep (m_e2)->dest_ep_id , REMOTE_EP_ID, true ,
1044+ TEST_UD_LINGER_TIMEOUT_IN_SEC);
1045+
1046+ EXPECT_EQ (REMOTE_EP_ID, ep (m_e1, REMOTE_EP_ID)->ep_id );
1047+ EXPECT_NE (UCT_UD_EP_NULL_ID, ep (m_e1, REMOTE_EP_ID)->dest_ep_id );
1048+ EXPECT_NE (STALE_DEST_EP_ID, ep (m_e2)->dest_ep_id )
1049+ << " dest_ep_id should have been updated from stale value" ;
1050+ EXPECT_EQ (ep (m_e1, REMOTE_EP_ID)->ep_id , ep (m_e2)->dest_ep_id )
1051+ << " dest_ep_id should match the remote EP's actual ep_id" ;
1052+ EXPECT_EQ (REMOTE_EP_ID, ep (m_e2)->dest_ep_id )
1053+ << " dest_ep_id should be 1 (not 0)" ;
1054+ }
1055+ #endif
9231056
9241057class test_ud_peer_failure : public ud_base_test {
9251058public:
0 commit comments