Skip to content

Commit a6030d7

Browse files
rarbabdgibson
authored andcommitted
spapr: Add a new level of NUMA for GPUs
NUMA nodes corresponding to GPU memory currently have the same affinity/distance as normal memory nodes. Add a third NUMA associativity reference point enabling us to give GPU nodes more distance. This is guest visible information, which shouldn't change under a running guest across migration between different qemu versions, so make the change effective only in new (pseries > 5.0) machine types. Before, `numactl -H` output in a guest with 4 GPUs (nodes 2-5): node distances: node 0 1 2 3 4 5 0: 10 40 40 40 40 40 1: 40 10 40 40 40 40 2: 40 40 10 40 40 40 3: 40 40 40 10 40 40 4: 40 40 40 40 10 40 5: 40 40 40 40 40 10 After: node distances: node 0 1 2 3 4 5 0: 10 40 80 80 80 80 1: 40 10 80 80 80 80 2: 80 80 10 80 80 80 3: 80 80 80 10 80 80 4: 80 80 80 80 10 80 5: 80 80 80 80 80 10 These are the same distances as on the host, mirroring the change made to host firmware in skiboot commit f845a648b8cb ("numa/associativity: Add a new level of NUMA for GPU's"). Signed-off-by: Reza Arbab <[email protected]> Message-Id: <[email protected]> Signed-off-by: David Gibson <[email protected]>
1 parent a4beb5f commit a6030d7

File tree

5 files changed

+33
-5
lines changed

5 files changed

+33
-5
lines changed

hw/ppc/spapr.c

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -890,10 +890,16 @@ static int spapr_dt_rng(void *fdt)
890890
static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
891891
{
892892
MachineState *ms = MACHINE(spapr);
893+
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms);
893894
int rtas;
894895
GString *hypertas = g_string_sized_new(256);
895896
GString *qemu_hypertas = g_string_sized_new(256);
896-
uint32_t refpoints[] = { cpu_to_be32(0x4), cpu_to_be32(0x4) };
897+
uint32_t refpoints[] = {
898+
cpu_to_be32(0x4),
899+
cpu_to_be32(0x4),
900+
cpu_to_be32(0x2),
901+
};
902+
uint32_t nr_refpoints = ARRAY_SIZE(refpoints);
897903
uint64_t max_device_addr = MACHINE(spapr)->device_memory->base +
898904
memory_region_size(&MACHINE(spapr)->device_memory->mr);
899905
uint32_t lrdr_capacity[] = {
@@ -945,8 +951,12 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
945951
qemu_hypertas->str, qemu_hypertas->len));
946952
g_string_free(qemu_hypertas, TRUE);
947953

954+
if (smc->pre_5_1_assoc_refpoints) {
955+
nr_refpoints = 2;
956+
}
957+
948958
_FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
949-
refpoints, sizeof(refpoints)));
959+
refpoints, nr_refpoints * sizeof(refpoints[0])));
950960

951961
_FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
952962
maxdomains, sizeof(maxdomains)));
@@ -4584,9 +4594,16 @@ DEFINE_SPAPR_MACHINE(5_1, "5.1", true);
45844594
*/
45854595
static void spapr_machine_5_0_class_options(MachineClass *mc)
45864596
{
4597+
SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
4598+
static GlobalProperty compat[] = {
4599+
{ TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-5.1-associativity", "on" },
4600+
};
4601+
45874602
spapr_machine_5_1_class_options(mc);
45884603
compat_props_add(mc->compat_props, hw_compat_5_0, hw_compat_5_0_len);
4604+
compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
45894605
mc->numa_mem_supported = true;
4606+
smc->pre_5_1_assoc_refpoints = true;
45904607
}
45914608

45924609
DEFINE_SPAPR_MACHINE(5_0, "5.0", false);

hw/ppc/spapr_pci.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2089,6 +2089,8 @@ static Property spapr_phb_properties[] = {
20892089
pcie_ecs, true),
20902090
DEFINE_PROP_UINT64("gpa", SpaprPhbState, nv2_gpa_win_addr, 0),
20912091
DEFINE_PROP_UINT64("atsd", SpaprPhbState, nv2_atsd_win_addr, 0),
2092+
DEFINE_PROP_BOOL("pre-5.1-associativity", SpaprPhbState,
2093+
pre_5_1_assoc, false),
20922094
DEFINE_PROP_END_OF_LIST(),
20932095
};
20942096

hw/ppc/spapr_pci_nvlink2.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -362,9 +362,9 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt)
362362
&error_abort);
363363
uint32_t associativity[] = {
364364
cpu_to_be32(0x4),
365-
SPAPR_GPU_NUMA_ID,
366-
SPAPR_GPU_NUMA_ID,
367-
SPAPR_GPU_NUMA_ID,
365+
cpu_to_be32(nvslot->numa_id),
366+
cpu_to_be32(nvslot->numa_id),
367+
cpu_to_be32(nvslot->numa_id),
368368
cpu_to_be32(nvslot->numa_id)
369369
};
370370
uint64_t size = object_property_get_uint(nv_mrobj, "size", NULL);
@@ -375,6 +375,13 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt)
375375
_FDT(off);
376376
_FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
377377
_FDT((fdt_setprop(fdt, off, "reg", mem_reg, sizeof(mem_reg))));
378+
379+
if (sphb->pre_5_1_assoc) {
380+
associativity[1] = SPAPR_GPU_NUMA_ID;
381+
associativity[2] = SPAPR_GPU_NUMA_ID;
382+
associativity[3] = SPAPR_GPU_NUMA_ID;
383+
}
384+
378385
_FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
379386
sizeof(associativity))));
380387

include/hw/pci-host/spapr.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ struct SpaprPhbState {
9494
hwaddr nv2_gpa_win_addr;
9595
hwaddr nv2_atsd_win_addr;
9696
SpaprPhbPciNvGpuConfig *nvgpus;
97+
bool pre_5_1_assoc;
9798
};
9899

99100
#define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL

include/hw/ppc/spapr.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ struct SpaprMachineClass {
129129
bool linux_pci_probe;
130130
bool smp_threads_vsmt; /* set VSMT to smp_threads by default */
131131
hwaddr rma_limit; /* clamp the RMA to this size */
132+
bool pre_5_1_assoc_refpoints;
132133

133134
void (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
134135
uint64_t *buid, hwaddr *pio,

0 commit comments

Comments
 (0)