@@ -103,47 +103,47 @@ OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item);
103103/**
104104 * Selects NIC (provider) based on hardware locality
105105 *
106- * In multi-nic situations, use hardware topology to pick the "best"
107- * of the selected NICs.
108- * There are 3 main cases that this covers :
109- *
110- * 1. If the first provider passed into this function is the only valid
111- * provider, this provider is returned.
112- *
113- * 2. If there is more than 1 provider that matches the type of the first
114- * provider in the list, and the BDF data
115- * is available then a provider is selected based on locality of device
116- * cpuset and process cpuset and tries to ensure that processes
117- * are distributed evenly across NICs. This has two separate
118- * cases:
119- *
120- * i. There is one or more provider local to the process:
121- *
122- * (local rank % number of providers of the same type
123- * that share the process cpuset) is used to select one
124- * of these providers .
125- *
126- * ii. There is no provider that is local to the process:
127- *
128- * (local rank % number of providers of the same type)
129- * is used to select one of these providers
130- *
131- * 3. If there is more than 1 providers of the same type in the
132- * list, and the BDF data is not available (the ofi version does
133- * not support fi_info.nic or the provider does not support BDF)
134- * then (local rank % number of providers of the same type) is
135- * used to select one of these providers
136- *
137- * @param provider_list (IN) struct fi_info* An initially selected
138- * provider NIC. The provider name and
139- * attributes are used to restrict NIC
140- * selection. This provider is returned if the
141- * NIC selection fails.
142- *
143- * @param provider (OUT) struct fi_info* object with the selected
144- * provider if the selection succeeds
145- * if the selection fails, returns the fi_info
146- * object that was initially provided.
106+ * The selection is based on the following priority:
107+ *
108+ * Single-NIC :
109+ *
110+ * If only 1 provider is available, always return that provider.
111+ *
112+ * Multi-NIC:
113+ *
114+ * 1. If the process is NOT bound, pick a NIC using (local rank % number
115+ * of providers of the same type). This gives a fair chance to each
116+ * qualified NIC and balances overall utilization.
117+ *
118+ * 2. If the process is bound, we compare providers in the list that have
119+ * the same type as the first provider, and find the provider with the
120+ * shortest distance to the current process.
121+ *
122+ * i. If the provider has PCI BDF data, we attempt to compute the
123+ * distance between the NIC and the current process cpuset. The NIC
124+ * with the shortest distance is returned .
125+ *
126+ * * For equidistant NICs, we select a NIC in round-robin fashion
127+ * using the package rank of the current process, i.e. (package
128+ * rank % number of providers with the same distance).
129+ *
130+ * ii. If we cannot compute the distance between the NIC and the
131+ * current process, e.g. PCI BDF data is not available, a NIC will be
132+ * selected in a round-robin fashion using package rank, i.e. (package
133+ * rank % number of providers of the same type).
134+ *
135+ * @param[in] provider_list struct fi_info* An initially selected
136+ * provider NIC. The provider name and
137+ * attributes are used to restrict NIC
138+ * selection. This provider is returned if the
139+ * NIC selection fails.
140+ *
141+ * @param[in] process_info opal_process_info_t* The current process info
142+ *
143+ * @param[out] provider struct fi_info* object with the selected
144+ * provider if the selection succeeds
145+ * if the selection fails, returns the fi_info
146+ * object that was initially provided.
147147 *
148148 * All errors should be recoverable and will return the initially provided
149149 * provider. However, if an error occurs we can no longer guarantee
@@ -152,7 +152,7 @@ OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item);
152152 *
153153 */
154154OPAL_DECLSPEC struct fi_info * opal_common_ofi_select_provider (struct fi_info * provider_list ,
155- opal_process_info_t * process_info );
155+ opal_process_info_t * process_info );
156156
157157/**
158158 * Obtain EP endpoint name
0 commit comments