@@ -1186,6 +1186,8 @@ static byte_t rx_parse(const uint32_t can_id,
11861186
11871187// Reassembly state at a specific priority level.
11881188// Maintaining separate state per priority level allows preemption of higher-priority transfers without loss.
1189+ // Interface affinity is required because frames duplicated across redundant interfaces may arrive with a significant
1190+ // delay, which may cause the receiver to accept more frames than necessary.
11891191typedef struct
11901192{
11911193 canard_us_t start_ts ;
@@ -1221,13 +1223,14 @@ static void rx_slot_destroy(const canard_subscription_t* const sub, rx_slot_t* c
12211223 mem_free (sub -> owner -> mem .rx_payload , RX_SLOT_OVERHEAD + sub -> extent , slot );
12221224}
12231225
1224- static void rx_slot_store (rx_slot_t * const slot , const size_t extent , const canard_bytes_t payload )
1226+ static void rx_slot_advance (rx_slot_t * const slot , const size_t extent , const canard_bytes_t payload )
12251227{
12261228 if (slot -> total_size < extent ) {
12271229 const size_t copy_size = smaller (payload .size , (size_t )(extent - slot -> total_size ));
12281230 (void )memcpy (& slot -> payload [slot -> total_size ], payload .data , copy_size );
12291231 }
12301232 slot -> total_size = (uint32_t )(slot -> total_size + payload .size ); // Before truncation.
1233+ slot -> expected_toggle ^= 1U ;
12311234}
12321235
12331236// A compact representation is needed because we need to store an array of these in dynamic memory.
@@ -1280,6 +1283,11 @@ static uint64_t rx_seqno_linearize(const uint64_t ref_seqno, const byte_t transf
12801283// - The case of a zero transfer-ID timeout is a first-class use case. In this mode, duplication is tolerated by
12811284// the application, but multi-frame transfers must still follow interface affinity to avoid incorrect reassembly.
12821285//
1286+ // Assumptions:
1287+ // - Frames within a transfer arrive in order;
1288+ // see https://forum.opencyphal.org/t/uavcan-can-tx-buffer-management-in-can-fd-controllers/1215
1289+ // - A frame may be duplicated (a well-known CAN PHY edge case), but duplicates immediately follow the original.
1290+ //
12831291// Core invariants:
12841292// - Only start-of-transfer may create/replace a slot.
12851293// - Non-start frames never create state.
@@ -1307,6 +1315,7 @@ static int32_t rx_session_cavl_compare(const void* const user, const canard_tree
13071315typedef struct
13081316{
13091317 canard_subscription_t * owner ;
1318+ byte_t iface_index ; // Start with the affinity to the iface that delivered the first frame.
13101319 byte_t node_id ;
13111320} rx_session_factory_context_t ;
13121321
@@ -1322,6 +1331,7 @@ static canard_tree_t* rx_session_factory(void* const user)
13221331 }
13231332 ses -> last_admitted_start_ts = BIG_BANG ;
13241333 ses -> owner = ctx -> owner ;
1334+ ses -> iface_index = ctx -> iface_index ;
13251335 ses -> node_id = ctx -> node_id ;
13261336 enlist_tail (& ctx -> owner -> owner -> rx .list_session_by_animation , & ses -> list_animation );
13271337 return & ses -> index ;
@@ -1395,22 +1405,37 @@ static bool rx_session_should_admit(const rx_session_t* const ses,
13951405 const uint64_t seqno ,
13961406 const byte_t iface_index )
13971407{
1408+ // Continuation frames cannot create new state so their handling is simpler.
1409+ // They are only accepted if there is a slot with an exact match of all transfer parameters.
1410+ // We ignore the transfer-ID timeout to avoid breaking transfers that are preempted for a long time,
1411+ // and especially to allow reassembly of multi-frame transfers even when the transfer-ID timeout is zero.
13981412 if (!start ) {
1399- // Continuation frames only accepted if there is a slot with an exact match of all transfer parameters.
14001413 const rx_slot_t * const slot = ses -> slots [priority ];
14011414 return (slot != NULL ) && (slot -> transfer_id == (seqno & CANARD_TRANSFER_ID_MAX )) &&
14021415 (slot -> iface_index == iface_index ) && (slot -> expected_toggle == toggle );
14031416 }
1417+ // Duplicate start frames do not require special treatment because a duplicate frame can only follow the original
1418+ // without any frames belonging to the same transfer in between (see the assumptions). If we get a duplicate start,
1419+ // with a nonzero TID timeout it will be rejected as not-new; even if the timeout is zero, accepting the duplicate
1420+ // will simply restart the slot without loss of information since there were no other frames in between.
1421+ //
14041422 // In the case of a single priority level, the seqno would be considered new if it is greater than last admitted.
14051423 // Priority preemption makes this simple condition insufficient, because a newer higher-priority transfer with
14061424 // a greater seqno may push older lower-priority transfers aside, causing the receiver to observe seqno going
14071425 // backward for new transfers. Our solution is to keep a dedicated seqno frontier per priority level.
1408- //
14091426 // The lowest bound of a genuinely new seqno would then be located at the lowest priority level, since that level
14101427 // cannot preempt others. To decide if a given seqno is new, one needs to scan all priority levels from the
14111428 // current one (inclusive) down to the lowest level (numerically greater).
1412- //
14131429 // Higher priority levels (numerically lesser) may have greater seqno frontiers which bear no relevance.
1430+ //
1431+ // The original design had a special case that enabled admittance of a transfer with the next seqno from a
1432+ // different interface if there is no pending reassembly in progress; see details here and the original v4 code:
1433+ // https://github.com/OpenCyphal/libcanard/issues/228. This behavior is no longer present here mostly because we
1434+ // now update the session state only on admittance and not upon completion of a transfer, which changes the logic
1435+ // considerably. One side effect is that even after a timeout (potentially a very long time as long as the session
1436+ // survives) we will still reject a new transfer arriving from a different interface if it happened to roll the
1437+ // same transfer-ID timeout. This is not an issue because we would still accept new transfers on the same iface,
1438+ // and after the RX_SESSION_TIMEOUT the session is destroyed and all new transfers will be accepted unconditionally.
14141439 const bool seqno_new = seqno > rx_session_seqno_frontier (ses , priority );
14151440 const bool iface_match = ses -> iface_index == iface_index ;
14161441 const bool timed_out = ts > (ses -> last_admitted_start_ts + ses -> owner -> transfer_id_timeout );
@@ -1427,7 +1452,7 @@ static bool rx_session_update(canard_subscription_t* const sub,
14271452 CANARD_ASSERT (frame -> end || (frame -> payload .size >= 7 ));
14281453
14291454 // Only start frames may create new states.
1430- rx_session_factory_context_t factory_context = { .owner = sub , .node_id = frame -> src };
1455+ rx_session_factory_context_t factory_context = { .owner = sub , .iface_index = iface_index , . node_id = frame -> src };
14311456 rx_session_t * const ses =
14321457 CAVL2_TO_OWNER (frame -> start ? cavl2_find_or_insert (& sub -> sessions , //
14331458 & frame -> src ,
@@ -1452,8 +1477,10 @@ static bool rx_session_update(canard_subscription_t* const sub,
14521477 // The frame must be accepted. If this is the start of a new transfer, we must update state.
14531478 enlist_tail (& sub -> owner -> rx .list_session_by_animation , & ses -> list_animation );
14541479 if (frame -> start ) {
1455- rx_slot_destroy (sub , ses -> slots [frame -> priority ]);
1456- ses -> slots [frame -> priority ] = NULL ;
1480+ if (ses -> slots [frame -> priority ] != NULL ) {
1481+ rx_slot_destroy (sub , ses -> slots [frame -> priority ]);
1482+ ses -> slots [frame -> priority ] = NULL ;
1483+ }
14571484 if (!frame -> end ) { // more frames to follow, must store in-progress state
14581485 ses -> slots [frame -> priority ] = rx_slot_new (sub , ts , frame -> transfer_id , iface_index );
14591486 if (ses -> slots [frame -> priority ] == NULL ) {
0 commit comments