@@ -150,29 +150,38 @@ ucp_sys_dev_map_t ucp_memh_sys_dev_map(ucp_mem_h memh)
150150 return 0 ;
151151}
152152
153- ucs_sys_device_t ucp_rkey_pack_sys_dev (ucp_mem_h memh )
153+ static int ucp_memh_send_flush_is_needed (ucp_mem_h memh )
154154{
155- ucs_sys_device_t sys_dev_packed = memh -> sys_dev ;
156155 ucp_md_index_t md_index ;
157156 ucp_sys_dev_map_t sys_dev_map ;
158157 ucs_sys_device_t sys_dev ;
159158
160- ucs_assert (sys_dev_packed <= UCP_SYS_DEVICE_MAX_PACKED );
159+ if (memh -> flags & UCP_MEMH_FLAG_SEND_FLUSH_CHECKED ) {
160+ return !!(memh -> flags & UCP_MEMH_FLAG_SEND_FLUSH_NEEDED );
161+ }
162+
163+ memh -> flags |= UCP_MEMH_FLAG_SEND_FLUSH_CHECKED ;
161164
162- ucs_for_each_bit (md_index , memh -> md_map ) {
163- sys_dev_map = memh -> context -> tl_mds [md_index ].sys_dev_map ;
164- ucs_for_each_bit (sys_dev , sys_dev_map ) {
165- if (ucs_topo_is_sibling (sys_dev , sys_dev_packed )) {
166- /* PUT operation on such rkey requires remote flush.
167- * Set a flag for the peer to recognize it. */
168- sys_dev_packed |= UCP_SYS_DEVICE_FLUSH_BIT ;
169- goto out ;
165+ if (memh -> sys_dev != UCS_SYS_DEVICE_ID_UNKNOWN ) {
166+ ucs_assert (memh -> sys_dev <= UCP_SYS_DEVICE_MAX_PACKED );
167+
168+ ucs_for_each_bit (md_index , memh -> md_map ) {
169+ sys_dev_map = memh -> context -> tl_mds [md_index ].sys_dev_map ;
170+ ucs_for_each_bit (sys_dev , sys_dev_map ) {
171+ if (ucs_topo_is_sibling (sys_dev , memh -> sys_dev )) {
172+ /*
173+ * PUT operation on such device will require remote flush
174+ * when using network devices.
175+ * Set a flag for the peer to recognize it.
176+ */
177+ memh -> flags |= UCP_MEMH_FLAG_SEND_FLUSH_NEEDED ;
178+ return 1 ;
179+ }
170180 }
171181 }
172182 }
173183
174- out :
175- return sys_dev_packed ;
184+ return 0 ;
176185}
177186
178187UCS_PROFILE_FUNC (ssize_t , ucp_rkey_pack_memh ,
@@ -231,8 +240,14 @@ UCS_PROFILE_FUNC(ssize_t, ucp_rkey_pack_memh,
231240
232241 if (md_map != 0 ) {
233242 /* Since UCX 1.20: always pack sys_dev for non-empty rkeys. */
234- ucs_assert (memh != NULL );
235- * ucs_serialize_next (& p , uint8_t ) = memh -> packed_sys_dev ;
243+ ucs_assert (memh != NULL );
244+
245+ sys_dev = memh -> sys_dev ;
246+ if (ucp_memh_send_flush_is_needed (memh )) {
247+ sys_dev |= UCP_SYS_DEVICE_FLUSH_BIT ;
248+ }
249+
250+ * ucs_serialize_next (& p , uint8_t ) = sys_dev ;
236251 }
237252
238253 if ((mem_info -> sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN ) || (md_map == 0 )) {
@@ -829,15 +844,24 @@ ucp_rkey_unpack_lanes_distance(const ucp_ep_config_key_t *ep_config_key,
829844 }
830845}
831846
832- static UCS_F_ALWAYS_INLINE ucs_sys_device_t
847+ static UCS_F_ALWAYS_INLINE void
833848ucp_rkey_extract_sys_dev (const ucp_ep_config_t * ep_config , ucp_rkey_h rkey ,
834- const void * * buffer_p , const void * buffer_end )
849+ const void * * buffer_p , const void * buffer_end ,
850+ ucp_rkey_config_key_t * rkey_config_key )
835851{
836852 if ((* buffer_p < buffer_end ) ||
837853 ((ep_config -> key .dst_version > 19 ) && (rkey -> md_map != 0 ))) {
838- return * ucs_serialize_next (buffer_p , const uint8_t );
854+ rkey_config_key -> sys_dev = * ucs_serialize_next (buffer_p , const uint8_t );
855+ } else {
856+ rkey_config_key -> sys_dev = UCS_SYS_DEVICE_ID_UNKNOWN ;
857+ }
858+
859+ if ((rkey_config_key -> sys_dev != UCS_SYS_DEVICE_ID_UNKNOWN ) &&
860+ (rkey_config_key -> sys_dev & UCP_SYS_DEVICE_FLUSH_BIT )) {
861+ rkey_config_key -> flags = UCP_RKEY_CONFIG_FLAG_FLUSH ;
862+ rkey_config_key -> sys_dev &= ~UCP_SYS_DEVICE_FLUSH_BIT ;
839863 } else {
840- return UCS_SYS_DEVICE_ID_UNKNOWN ;
864+ rkey_config_key -> flags = 0 ;
841865 }
842866}
843867
@@ -864,8 +888,8 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_rkey_proto_resolve,
864888 rkey_config_key .md_map = rkey -> md_map ;
865889 rkey_config_key .mem_type = rkey -> mem_type ;
866890 rkey_config_key .unreachable_md_map = unreachable_md_map ;
867- rkey_config_key . sys_dev = ucp_rkey_extract_sys_dev (
868- ep_config , rkey , & p , buffer_end );
891+
892+ ucp_rkey_extract_sys_dev ( ep_config , rkey , & p , buffer_end , & rkey_config_key );
869893
870894 /* Starting with UCX v1.20, lane distances are always packed if sys_dev is
871895 * not UNKNOWN. Even if the rkey length is not explicitly passed to the API,
0 commit comments