Skip to content

Commit 3582f79

Browse files
committed
B #7408: Fix PCI device to virtual NUMA node mapping
When using q35 machines and NUMA pinning a specialized PCIe topology is generated to resemble the one presented by the host. In this case PCI devices can be mapped to the incorrect node, missmatching CPU+Mem and PCI pinning. (cherry picked from commit 52ba320d0dd27f85131da7e681d98dbef748eaeb)
1 parent c36da69 commit 3582f79

File tree

3 files changed

+33
-13
lines changed

3 files changed

+33
-13
lines changed

include/HostShareNUMA.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,7 @@ class HostShareNUMA
500500
};
501501

502502
bool schedule_nodes(NUMANodeRequest &nr, unsigned int thr, bool dedicated,
503-
unsigned long hpsz_kb, std::set<unsigned int> &pci, bool do_alloc);
503+
unsigned long hpsz_kb, int pci_idx, bool do_alloc);
504504
};
505505

506506
#endif /*HOST_SHARE_NUMA_H_*/

src/host/HostShareNUMA.cc

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -926,8 +926,8 @@ static bool sort_node_mem(VectorAttribute *i, VectorAttribute *j)
926926
// -----------------------------------------------------------------------------
927927

928928
bool HostShareNUMA::schedule_nodes(NUMANodeRequest &nr, unsigned int threads,
929-
bool dedicated, unsigned long hpsz_kb, std::set<unsigned int> &pci,
930-
bool do_alloc)
929+
bool dedicated, unsigned long hpsz_kb,
930+
int pci_idx, bool do_alloc)
931931
{
932932
std::vector<std::tuple<float, int> > cpu_fits;
933933
std::set<unsigned int> mem_fits;
@@ -976,7 +976,7 @@ bool HostShareNUMA::schedule_nodes(NUMANodeRequest &nr, unsigned int threads,
976976
{
977977
float fcpu_after = 1 - ((float) nr.total_cpus / (threads * n_fcpu));
978978

979-
if ( pci.count(it->second->node_id) != 0 )
979+
if ( pci_idx == it->second->node_id )
980980
{
981981
fcpu_after += 1;
982982
}
@@ -1372,17 +1372,26 @@ int HostShareNUMA::make_topology(HostShareCapacity &sr, int vm_id, bool do_alloc
13721372
//
13731373
// NOTE: We want to pin CPUS in the same core in the VM to CPUS in the same
13741374
// core in the host as well.
1375+
//
1376+
// pci_alloc (true) if the PCI devices are pre-allocated to each NUMA node
1377+
// (i.e. set to the node PICe expander bus). Virtual NUMA node 0 should be
1378+
// mapped to physical NUMA node 0 in this case.
13751379
//--------------------------------------------------------------------------
13761380
unsigned int na = 0;
13771381
std::set<unsigned int> pci_nodes;
1382+
bool pci_alloc = sr.nodes.size() > 0;
13781383

1379-
for (auto it = sr.pci.begin(); it != sr.pci.end(); ++it)
1384+
if ( pci_alloc )
13801385
{
1381-
int pnode = -1;
1382-
1383-
if ((*it)->vector_value("NUMA_NODE", pnode) == 0 && pnode != -1)
1386+
//Store NUMA nodes with pre-assigned PCI devices to force allocation
1387+
for (auto it = sr.pci.begin(); it != sr.pci.end(); ++it)
13841388
{
1385-
pci_nodes.insert(pnode);
1389+
int pnode = -1;
1390+
1391+
if ((*it)->vector_value("NUMA_NODE", pnode) == 0 && pnode != -1)
1392+
{
1393+
pci_nodes.insert(pnode);
1394+
}
13861395
}
13871396
}
13881397

@@ -1402,16 +1411,27 @@ int HostShareNUMA::make_topology(HostShareCapacity &sr, int vm_id, bool do_alloc
14021411
}
14031412
}
14041413

1414+
int idx = 0;
1415+
14051416
// Check allocation of virtual NUMA nodes
1406-
for (auto vn_it = vm_nodes.begin(); vn_it != vm_nodes.end(); ++vn_it)
1417+
for (auto& vn_it : vm_nodes)
14071418
{
1408-
if (!schedule_nodes(*vn_it, *tc_it, dedicated, hpsz_kb, pci_nodes,
1419+
// If PCI devices are preassigned try to use same idx NUMA node
1420+
int use_node = -1;
1421+
1422+
if ( pci_nodes.count(idx) != 0 )
1423+
{
1424+
use_node = idx;
1425+
}
1426+
1427+
if (!schedule_nodes(vn_it, *tc_it, dedicated, hpsz_kb, use_node,
14091428
do_alloc))
14101429
{
14111430
break; //Node cannot be allocated with *tc_it threads/core
14121431
}
14131432

14141433
na++;
1434+
idx++;
14151435
}
14161436

14171437
if (na == vm_nodes.size())

src/vmm/LibVirtDriverKVM.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2326,8 +2326,8 @@ int LibVirtDriver::deployment_description_kvm(
23262326
file << "\t\t<controller type='pci' index='" << sw_i << "'"
23272327
<< " model='pcie-switch-upstream-port'>" << endl
23282328
<< "\t\t\t<address type='pci' bus='" << bus_i + 1 << "'"
2329-
<< " slot='0' function='0'/>"
2330-
<< "\t\t</controller>";
2329+
<< " slot='0' function='0'/>" << endl
2330+
<< "\t\t</controller>" << endl;
23312331

23322332
for (unsigned int j = 0; j < 8; j++)
23332333
{

0 commit comments

Comments
 (0)