Skip to content

Commit 73145e6

Browse files
michalQbanguy11
authored andcommitted
ice: fix rebuilding the Tx scheduler tree for large queue counts
The current implementation of the Tx scheduler allows the tree to be rebuilt as the user adds more Tx queues to the VSI. In such a case, additional child nodes are added to the tree to support the new number of queues. Unfortunately, this algorithm does not take into account that the limit of the VSI support node may be exceeded, so an additional node in the VSI layer may be required to handle all the requested queues. Such a scenario occurs when adding XDP Tx queues on machines with many CPUs. Although the driver still respects the queue limit returned by the FW, the Tx scheduler was unable to add those queues to its tree and returned one of the errors below. Such a scenario occurs when adding XDP Tx queues on machines with many CPUs (e.g. at least 321 CPUs, if there is already 128 Tx/Rx queue pairs). Although the driver still respects the queue limit returned by the FW, the Tx scheduler was unable to add those queues to its tree and returned the following errors: Failed VSI LAN queue config for XDP, error: -5 or: Failed to set LAN Tx queue context, error: -22 Fix this problem by extending the tree rebuild algorithm to check if the current VSI node can support the requested number of queues. If it cannot, create as many additional VSI support nodes as necessary to handle all the required Tx queues. Symmetrically, adjust the VSI node removal algorithm to remove all nodes associated with the given VSI. Also, make the search for the next free VSI node more restrictive. That is, add queue group nodes only to the VSI support nodes that have a matching VSI handle. Finally, fix the comment describing the tree update algorithm to better reflect the current scenario. Fixes: b0153fd ("ice: update VSI config dynamically") Reviewed-by: Dawid Osuchowski <[email protected]> Reviewed-by: Przemek Kitszel <[email protected]> Signed-off-by: Michal Kubiak <[email protected]> Reviewed-by: Simon Horman <[email protected]> Tested-by: Jesse Brandeburg <[email protected]> Tested-by: Saritha Sanigani <[email protected]> (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen <[email protected]>
1 parent 6fa2942 commit 73145e6

File tree

1 file changed

+142
-28
lines changed

1 file changed

+142
-28
lines changed

drivers/net/ethernet/intel/ice/ice_sched.c

Lines changed: 142 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,27 @@ ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
8484
return NULL;
8585
}
8686

87+
/**
88+
* ice_sched_find_next_vsi_node - find the next node for a given VSI
89+
* @vsi_node: VSI support node to start search with
90+
*
91+
* Return: Next VSI support node, or NULL.
92+
*
93+
* The function returns a pointer to the next node from the VSI layer
94+
* assigned to the given VSI, or NULL if there is no such a node.
95+
*/
96+
static struct ice_sched_node *
97+
ice_sched_find_next_vsi_node(struct ice_sched_node *vsi_node)
98+
{
99+
unsigned int vsi_handle = vsi_node->vsi_handle;
100+
101+
while ((vsi_node = vsi_node->sibling) != NULL)
102+
if (vsi_node->vsi_handle == vsi_handle)
103+
break;
104+
105+
return vsi_node;
106+
}
107+
87108
/**
88109
* ice_aqc_send_sched_elem_cmd - send scheduling elements cmd
89110
* @hw: pointer to the HW struct
@@ -1084,8 +1105,10 @@ ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
10841105
if (parent->num_children < max_child_nodes) {
10851106
new_num_nodes = max_child_nodes - parent->num_children;
10861107
} else {
1087-
/* This parent is full, try the next sibling */
1088-
parent = parent->sibling;
1108+
/* This parent is full,
1109+
* try the next available sibling.
1110+
*/
1111+
parent = ice_sched_find_next_vsi_node(parent);
10891112
/* Don't modify the first node TEID memory if the
10901113
* first node was added already in the above call.
10911114
* Instead send some temp memory for all other
@@ -1528,12 +1551,23 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
15281551
/* get the first queue group node from VSI sub-tree */
15291552
qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer);
15301553
while (qgrp_node) {
1554+
struct ice_sched_node *next_vsi_node;
1555+
15311556
/* make sure the qgroup node is part of the VSI subtree */
15321557
if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
15331558
if (qgrp_node->num_children < max_children &&
15341559
qgrp_node->owner == owner)
15351560
break;
15361561
qgrp_node = qgrp_node->sibling;
1562+
if (qgrp_node)
1563+
continue;
1564+
1565+
next_vsi_node = ice_sched_find_next_vsi_node(vsi_node);
1566+
if (!next_vsi_node)
1567+
break;
1568+
1569+
vsi_node = next_vsi_node;
1570+
qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer);
15371571
}
15381572

15391573
/* Select the best queue group */
@@ -1779,7 +1813,11 @@ ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_handle,
17791813
if (!parent)
17801814
return -EIO;
17811815

1782-
if (i == vsil)
1816+
/* Do not modify the VSI handle for already existing VSI nodes,
1817+
* (if no new VSI node was added to the tree).
1818+
* Assign the VSI handle only to newly added VSI nodes.
1819+
*/
1820+
if (i == vsil && num_added)
17831821
parent->vsi_handle = vsi_handle;
17841822
}
17851823

@@ -1812,6 +1850,41 @@ ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc)
18121850
num_nodes);
18131851
}
18141852

1853+
/**
1854+
* ice_sched_recalc_vsi_support_nodes - recalculate VSI support nodes count
1855+
* @hw: pointer to the HW struct
1856+
* @vsi_node: pointer to the leftmost VSI node that needs to be extended
1857+
* @new_numqs: new number of queues that has to be handled by the VSI
1858+
* @new_num_nodes: pointer to nodes count table to modify the VSI layer entry
1859+
*
1860+
* This function recalculates the number of supported nodes that need to
1861+
* be added after adding more Tx queues for a given VSI.
1862+
* The number of new VSI support nodes that shall be added will be saved
1863+
* to the @new_num_nodes table for the VSI layer.
1864+
*/
1865+
static void
1866+
ice_sched_recalc_vsi_support_nodes(struct ice_hw *hw,
1867+
struct ice_sched_node *vsi_node,
1868+
unsigned int new_numqs, u16 *new_num_nodes)
1869+
{
1870+
u32 vsi_nodes_cnt = 1;
1871+
u32 max_queue_cnt = 1;
1872+
u32 qgl, vsil;
1873+
1874+
qgl = ice_sched_get_qgrp_layer(hw);
1875+
vsil = ice_sched_get_vsi_layer(hw);
1876+
1877+
for (u32 i = vsil; i <= qgl; i++)
1878+
max_queue_cnt *= hw->max_children[i];
1879+
1880+
while ((vsi_node = ice_sched_find_next_vsi_node(vsi_node)) != NULL)
1881+
vsi_nodes_cnt++;
1882+
1883+
if (new_numqs > (max_queue_cnt * vsi_nodes_cnt))
1884+
new_num_nodes[vsil] = DIV_ROUND_UP(new_numqs, max_queue_cnt) -
1885+
vsi_nodes_cnt;
1886+
}
1887+
18151888
/**
18161889
* ice_sched_update_vsi_child_nodes - update VSI child nodes
18171890
* @pi: port information structure
@@ -1863,16 +1936,25 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
18631936
return status;
18641937
}
18651938

1939+
ice_sched_recalc_vsi_support_nodes(hw, vsi_node,
1940+
new_numqs, new_num_nodes);
18661941
ice_sched_calc_vsi_child_nodes(hw, new_numqs - prev_numqs,
18671942
new_num_nodes);
18681943

1869-
/* Keep the max number of queue configuration all the time. Update the
1870-
* tree only if number of queues > previous number of queues. This may
1944+
/* Never decrease the number of queues in the tree. Update the tree
1945+
* only if number of queues > previous number of queues. This may
18711946
* leave some extra nodes in the tree if number of queues < previous
18721947
* number but that wouldn't harm anything. Removing those extra nodes
18731948
* may complicate the code if those nodes are part of SRL or
18741949
* individually rate limited.
1950+
* Also, add the required VSI support nodes if the existing ones cannot
1951+
* handle the requested new number of queues.
18751952
*/
1953+
status = ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node,
1954+
new_num_nodes);
1955+
if (status)
1956+
return status;
1957+
18761958
status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node,
18771959
new_num_nodes, owner);
18781960
if (status)
@@ -2013,6 +2095,58 @@ static bool ice_sched_is_leaf_node_present(struct ice_sched_node *node)
20132095
return (node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF);
20142096
}
20152097

2098+
/**
2099+
* ice_sched_rm_vsi_subtree - remove all nodes assigned to a given VSI
2100+
* @pi: port information structure
2101+
* @vsi_node: pointer to the leftmost node of the VSI to be removed
2102+
* @owner: LAN or RDMA
2103+
* @tc: TC number
2104+
*
2105+
* Return: Zero in case of success, or -EBUSY if the VSI has leaf nodes in TC.
2106+
*
2107+
* This function removes all the VSI support nodes associated with a given VSI
2108+
* and its LAN or RDMA children nodes from the scheduler tree.
2109+
*/
2110+
static int
2111+
ice_sched_rm_vsi_subtree(struct ice_port_info *pi,
2112+
struct ice_sched_node *vsi_node, u8 owner, u8 tc)
2113+
{
2114+
u16 vsi_handle = vsi_node->vsi_handle;
2115+
bool all_vsi_nodes_removed = true;
2116+
int j = 0;
2117+
2118+
while (vsi_node) {
2119+
struct ice_sched_node *next_vsi_node;
2120+
2121+
if (ice_sched_is_leaf_node_present(vsi_node)) {
2122+
ice_debug(pi->hw, ICE_DBG_SCHED, "VSI has leaf nodes in TC %d\n", tc);
2123+
return -EBUSY;
2124+
}
2125+
while (j < vsi_node->num_children) {
2126+
if (vsi_node->children[j]->owner == owner)
2127+
ice_free_sched_node(pi, vsi_node->children[j]);
2128+
else
2129+
j++;
2130+
}
2131+
2132+
next_vsi_node = ice_sched_find_next_vsi_node(vsi_node);
2133+
2134+
/* remove the VSI if it has no children */
2135+
if (!vsi_node->num_children)
2136+
ice_free_sched_node(pi, vsi_node);
2137+
else
2138+
all_vsi_nodes_removed = false;
2139+
2140+
vsi_node = next_vsi_node;
2141+
}
2142+
2143+
/* clean up aggregator related VSI info if any */
2144+
if (all_vsi_nodes_removed)
2145+
ice_sched_rm_agg_vsi_info(pi, vsi_handle);
2146+
2147+
return 0;
2148+
}
2149+
20162150
/**
20172151
* ice_sched_rm_vsi_cfg - remove the VSI and its children nodes
20182152
* @pi: port information structure
@@ -2039,7 +2173,6 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
20392173

20402174
ice_for_each_traffic_class(i) {
20412175
struct ice_sched_node *vsi_node, *tc_node;
2042-
u8 j = 0;
20432176

20442177
tc_node = ice_sched_get_tc_node(pi, i);
20452178
if (!tc_node)
@@ -2049,31 +2182,12 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
20492182
if (!vsi_node)
20502183
continue;
20512184

2052-
if (ice_sched_is_leaf_node_present(vsi_node)) {
2053-
ice_debug(pi->hw, ICE_DBG_SCHED, "VSI has leaf nodes in TC %d\n", i);
2054-
status = -EBUSY;
2185+
status = ice_sched_rm_vsi_subtree(pi, vsi_node, owner, i);
2186+
if (status)
20552187
goto exit_sched_rm_vsi_cfg;
2056-
}
2057-
while (j < vsi_node->num_children) {
2058-
if (vsi_node->children[j]->owner == owner) {
2059-
ice_free_sched_node(pi, vsi_node->children[j]);
20602188

2061-
/* reset the counter again since the num
2062-
* children will be updated after node removal
2063-
*/
2064-
j = 0;
2065-
} else {
2066-
j++;
2067-
}
2068-
}
2069-
/* remove the VSI if it has no children */
2070-
if (!vsi_node->num_children) {
2071-
ice_free_sched_node(pi, vsi_node);
2072-
vsi_ctx->sched.vsi_node[i] = NULL;
2189+
vsi_ctx->sched.vsi_node[i] = NULL;
20732190

2074-
/* clean up aggregator related VSI info if any */
2075-
ice_sched_rm_agg_vsi_info(pi, vsi_handle);
2076-
}
20772191
if (owner == ICE_SCHED_NODE_OWNER_LAN)
20782192
vsi_ctx->sched.max_lanq[i] = 0;
20792193
else

0 commit comments

Comments
 (0)