Skip to content

Commit 8a50c58

Browse files
swahlhpehansendc
authored andcommitted
x86/platform/uv: UV support for sub-NUMA clustering
Sub-NUMA clustering (SNC) invalidates previous assumptions of a 1:1 relationship between blades, sockets, and nodes. Fix these assumptions and build tables correctly when SNC is enabled. Signed-off-by: Steve Wahl <[email protected]> Signed-off-by: Dave Hansen <[email protected]> Link: https://lore.kernel.org/all/20230519190752.3297140-7-steve.wahl%40hpe.com
1 parent 45e9f9a commit 8a50c58

File tree

2 files changed

+107
-77
lines changed

2 files changed

+107
-77
lines changed

arch/x86/include/asm/uv/uv_hub.h

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ struct uv_hub_info_s {
177177
unsigned short nr_possible_cpus;
178178
unsigned short nr_online_cpus;
179179
short memory_nid;
180+
unsigned short *node_to_socket;
180181
};
181182

182183
/* CPU specific info with a pointer to the hub common info struct */
@@ -531,19 +532,18 @@ static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
531532
{
532533
unsigned int m_val = uv_hub_info->m_val;
533534
unsigned long base;
534-
unsigned short sockid, node;
535+
unsigned short sockid;
535536

536537
if (m_val)
537538
return __va(((unsigned long)pnode << m_val) | offset);
538539

539540
sockid = uv_pnode_to_socket(pnode);
540-
node = uv_socket_to_node(sockid);
541541

542542
/* limit address of previous socket is our base, except node 0 is 0 */
543-
if (!node)
543+
if (sockid == 0)
544544
return __va((unsigned long)offset);
545545

546-
base = (unsigned long)(uv_hub_info->gr_table[node - 1].limit);
546+
base = (unsigned long)(uv_hub_info->gr_table[sockid - 1].limit);
547547
return __va(base << UV_GAM_RANGE_SHFT | offset);
548548
}
549549

@@ -650,7 +650,7 @@ static inline int uv_cpu_blade_processor_id(int cpu)
650650
/* Blade number to Node number (UV2..UV4 is 1:1) */
651651
static inline int uv_blade_to_node(int blade)
652652
{
653-
return blade;
653+
return uv_socket_to_node(blade);
654654
}
655655

656656
/* Blade number of current cpu. Numnbered 0 .. <#blades -1> */
@@ -662,23 +662,27 @@ static inline int uv_numa_blade_id(void)
662662
/*
663663
* Convert linux node number to the UV blade number.
664664
* .. Currently for UV2 thru UV4 the node and the blade are identical.
665-
* .. If this changes then you MUST check references to this function!
665+
* .. UV5 needs conversion when sub-numa clustering is enabled.
666666
*/
667667
static inline int uv_node_to_blade_id(int nid)
668668
{
669-
return nid;
669+
unsigned short *n2s = uv_hub_info->node_to_socket;
670+
671+
return n2s ? n2s[nid] : nid;
670672
}
671673

672674
/* Convert a CPU number to the UV blade number */
673675
static inline int uv_cpu_to_blade_id(int cpu)
674676
{
675-
return uv_node_to_blade_id(cpu_to_node(cpu));
677+
return uv_cpu_hub_info(cpu)->numa_blade_id;
676678
}
677679

678680
/* Convert a blade id to the PNODE of the blade */
679681
static inline int uv_blade_to_pnode(int bid)
680682
{
681-
return uv_hub_info_list(uv_blade_to_node(bid))->pnode;
683+
unsigned short *s2p = uv_hub_info->socket_to_pnode;
684+
685+
return s2p ? s2p[bid] : bid;
682686
}
683687

684688
/* Nid of memory node on blade. -1 if no blade-local memory */

arch/x86/kernel/apic/x2apic_uv_x.c

Lines changed: 94 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -546,14 +546,14 @@ unsigned long sn_rtc_cycles_per_second;
546546
EXPORT_SYMBOL(sn_rtc_cycles_per_second);
547547

548548
/* The following values are used for the per node hub info struct */
549-
static __initdata unsigned short *_node_to_pnode;
550549
static __initdata unsigned short _min_socket, _max_socket;
551550
static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len;
552551
static __initdata struct uv_gam_range_entry *uv_gre_table;
553552
static __initdata struct uv_gam_parameters *uv_gp_table;
554553
static __initdata unsigned short *_socket_to_node;
555554
static __initdata unsigned short *_socket_to_pnode;
556555
static __initdata unsigned short *_pnode_to_socket;
556+
static __initdata unsigned short *_node_to_socket;
557557

558558
static __initdata struct uv_gam_range_s *_gr_table;
559559

@@ -1293,6 +1293,7 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi)
12931293
hi->nasid_shift = uv_cpuid.nasid_shift;
12941294
hi->min_pnode = _min_pnode;
12951295
hi->min_socket = _min_socket;
1296+
hi->node_to_socket = _node_to_socket;
12961297
hi->pnode_to_socket = _pnode_to_socket;
12971298
hi->socket_to_node = _socket_to_node;
12981299
hi->socket_to_pnode = _socket_to_pnode;
@@ -1526,6 +1527,11 @@ static void __init free_1_to_1_table(unsigned short **tp, char *tname, int min,
15261527
pr_info("UV: %s is 1:1, conversion table removed\n", tname);
15271528
}
15281529

1530+
/*
1531+
* Build Socket Tables
1532+
* If the number of nodes is >1 per socket, socket to node table will
1533+
* contain lowest node number on that socket.
1534+
*/
15291535
static void __init build_socket_tables(void)
15301536
{
15311537
struct uv_gam_range_entry *gre = uv_gre_table;
@@ -1552,27 +1558,25 @@ static void __init build_socket_tables(void)
15521558
/* Allocate and clear tables */
15531559
if ((alloc_conv_table(nump, &_pnode_to_socket) < 0)
15541560
|| (alloc_conv_table(nums, &_socket_to_pnode) < 0)
1555-
|| (alloc_conv_table(numn, &_node_to_pnode) < 0)
1561+
|| (alloc_conv_table(numn, &_node_to_socket) < 0)
15561562
|| (alloc_conv_table(nums, &_socket_to_node) < 0)) {
15571563
kfree(_pnode_to_socket);
15581564
kfree(_socket_to_pnode);
1559-
kfree(_node_to_pnode);
1565+
kfree(_node_to_socket);
15601566
return;
15611567
}
15621568

15631569
/* Fill in pnode/node/addr conversion list values: */
1564-
pr_info("UV: GAM Building socket/pnode conversion tables\n");
15651570
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
15661571
if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
15671572
continue;
15681573
i = gre->sockid - minsock;
1569-
/* Duplicate: */
1570-
if (_socket_to_pnode[i] != SOCK_EMPTY)
1571-
continue;
1572-
_socket_to_pnode[i] = gre->pnode;
1574+
if (_socket_to_pnode[i] == SOCK_EMPTY)
1575+
_socket_to_pnode[i] = gre->pnode;
15731576

15741577
i = gre->pnode - minpnode;
1575-
_pnode_to_socket[i] = gre->sockid;
1578+
if (_pnode_to_socket[i] == SOCK_EMPTY)
1579+
_pnode_to_socket[i] = gre->sockid;
15761580

15771581
pr_info("UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
15781582
gre->sockid, gre->type, gre->nasid,
@@ -1582,41 +1586,37 @@ static void __init build_socket_tables(void)
15821586

15831587
/* Set socket -> node values: */
15841588
lnid = NUMA_NO_NODE;
1585-
for_each_present_cpu(cpu) {
1589+
for_each_possible_cpu(cpu) {
15861590
int nid = cpu_to_node(cpu);
15871591
int apicid, sockid;
15881592

15891593
if (lnid == nid)
15901594
continue;
15911595
lnid = nid;
1596+
15921597
apicid = per_cpu(x86_cpu_to_apicid, cpu);
15931598
sockid = apicid >> uv_cpuid.socketid_shift;
1594-
_socket_to_node[sockid - minsock] = nid;
1595-
pr_info("UV: sid:%02x: apicid:%04x node:%2d\n",
1596-
sockid, apicid, nid);
1597-
}
15981599

1599-
/* Set up physical blade to pnode translation from GAM Range Table: */
1600-
for (lnid = 0; lnid < num_possible_nodes(); lnid++) {
1601-
unsigned short sockid;
1600+
if (_socket_to_node[sockid - minsock] == SOCK_EMPTY)
1601+
_socket_to_node[sockid - minsock] = nid;
16021602

1603-
for (sockid = minsock; sockid <= maxsock; sockid++) {
1604-
if (lnid == _socket_to_node[sockid - minsock]) {
1605-
_node_to_pnode[lnid] = _socket_to_pnode[sockid - minsock];
1606-
break;
1607-
}
1608-
}
1609-
if (sockid > maxsock) {
1610-
pr_err("UV: socket for node %d not found!\n", lnid);
1611-
BUG();
1612-
}
1603+
if (_node_to_socket[nid] == SOCK_EMPTY)
1604+
_node_to_socket[nid] = sockid;
1605+
1606+
pr_info("UV: sid:%02x: apicid:%04x socket:%02d node:%03x s2n:%03x\n",
1607+
sockid,
1608+
apicid,
1609+
_node_to_socket[nid],
1610+
nid,
1611+
_socket_to_node[sockid - minsock]);
16131612
}
16141613

16151614
/*
16161615
* If e.g. socket id == pnode for all pnodes,
16171616
* system runs faster by removing corresponding conversion table.
16181617
*/
16191618
FREE_1_TO_1_TABLE(_socket_to_node, _min_socket, nums, numn);
1619+
FREE_1_TO_1_TABLE(_node_to_socket, _min_socket, nums, numn);
16201620
FREE_1_TO_1_TABLE(_socket_to_pnode, _min_pnode, nums, nump);
16211621
FREE_1_TO_1_TABLE(_pnode_to_socket, _min_pnode, nums, nump);
16221622
}
@@ -1702,12 +1702,13 @@ static __init int uv_system_init_hubless(void)
17021702
static void __init uv_system_init_hub(void)
17031703
{
17041704
struct uv_hub_info_s hub_info = {0};
1705-
int bytes, cpu, nodeid;
1705+
int bytes, cpu, nodeid, bid;
17061706
unsigned short min_pnode = USHRT_MAX, max_pnode = 0;
17071707
char *hub = is_uv5_hub() ? "UV500" :
17081708
is_uv4_hub() ? "UV400" :
17091709
is_uv3_hub() ? "UV300" :
17101710
is_uv2_hub() ? "UV2000/3000" : NULL;
1711+
struct uv_hub_info_s **uv_hub_info_list_blade;
17111712

17121713
if (!hub) {
17131714
pr_err("UV: Unknown/unsupported UV hub\n");
@@ -1730,9 +1731,12 @@ static void __init uv_system_init_hub(void)
17301731
build_uv_gr_table();
17311732
set_block_size();
17321733
uv_init_hub_info(&hub_info);
1733-
uv_possible_blades = num_possible_nodes();
1734-
if (!_node_to_pnode)
1734+
/* If UV2 or UV3 may need to get # blades from HW */
1735+
if (is_uv(UV2|UV3) && !uv_gre_table)
17351736
boot_init_possible_blades(&hub_info);
1737+
else
1738+
/* min/max sockets set in decode_gam_rng_tbl */
1739+
uv_possible_blades = (_max_socket - _min_socket) + 1;
17361740

17371741
/* uv_num_possible_blades() is really the hub count: */
17381742
pr_info("UV: Found %d hubs, %d nodes, %d CPUs\n", uv_num_possible_blades(), num_possible_nodes(), num_possible_cpus());
@@ -1741,86 +1745,108 @@ static void __init uv_system_init_hub(void)
17411745
hub_info.coherency_domain_number = sn_coherency_id;
17421746
uv_rtc_init();
17431747

1748+
/*
1749+
* __uv_hub_info_list[] is indexed by node, but there is only
1750+
* one hub_info structure per blade. First, allocate one
1751+
* structure per blade. Further down we create a per-node
1752+
* table (__uv_hub_info_list[]) pointing to hub_info
1753+
* structures for the correct blade.
1754+
*/
1755+
17441756
bytes = sizeof(void *) * uv_num_possible_blades();
1745-
__uv_hub_info_list = kzalloc(bytes, GFP_KERNEL);
1746-
BUG_ON(!__uv_hub_info_list);
1757+
uv_hub_info_list_blade = kzalloc(bytes, GFP_KERNEL);
1758+
if (WARN_ON_ONCE(!uv_hub_info_list_blade))
1759+
return;
17471760

17481761
bytes = sizeof(struct uv_hub_info_s);
1749-
for_each_node(nodeid) {
1762+
for_each_possible_blade(bid) {
17501763
struct uv_hub_info_s *new_hub;
17511764

1752-
if (__uv_hub_info_list[nodeid]) {
1753-
pr_err("UV: Node %d UV HUB already initialized!?\n", nodeid);
1754-
BUG();
1765+
/* Allocate & fill new per hub info list */
1766+
new_hub = (bid == 0) ? &uv_hub_info_node0
1767+
: kzalloc_node(bytes, GFP_KERNEL, uv_blade_to_node(bid));
1768+
if (WARN_ON_ONCE(!new_hub)) {
1769+
/* do not kfree() bid 0, which is statically allocated */
1770+
while (--bid > 0)
1771+
kfree(uv_hub_info_list_blade[bid]);
1772+
kfree(uv_hub_info_list_blade);
1773+
return;
17551774
}
17561775

1757-
/* Allocate new per hub info list */
1758-
new_hub = (nodeid == 0) ? &uv_hub_info_node0 : kzalloc_node(bytes, GFP_KERNEL, nodeid);
1759-
BUG_ON(!new_hub);
1760-
__uv_hub_info_list[nodeid] = new_hub;
1761-
new_hub = uv_hub_info_list(nodeid);
1762-
BUG_ON(!new_hub);
1776+
uv_hub_info_list_blade[bid] = new_hub;
17631777
*new_hub = hub_info;
17641778

17651779
/* Use information from GAM table if available: */
1766-
if (_node_to_pnode)
1767-
new_hub->pnode = _node_to_pnode[nodeid];
1780+
if (uv_gre_table)
1781+
new_hub->pnode = uv_blade_to_pnode(bid);
17681782
else /* Or fill in during CPU loop: */
17691783
new_hub->pnode = 0xffff;
17701784

1771-
new_hub->numa_blade_id = uv_node_to_blade_id(nodeid);
1785+
new_hub->numa_blade_id = bid;
17721786
new_hub->memory_nid = NUMA_NO_NODE;
17731787
new_hub->nr_possible_cpus = 0;
17741788
new_hub->nr_online_cpus = 0;
17751789
}
17761790

1791+
/*
1792+
* Now populate __uv_hub_info_list[] for each node with the
1793+
* pointer to the struct for the blade it resides on.
1794+
*/
1795+
1796+
bytes = sizeof(void *) * num_possible_nodes();
1797+
__uv_hub_info_list = kzalloc(bytes, GFP_KERNEL);
1798+
if (WARN_ON_ONCE(!__uv_hub_info_list)) {
1799+
for_each_possible_blade(bid)
1800+
/* bid 0 is statically allocated */
1801+
if (bid != 0)
1802+
kfree(uv_hub_info_list_blade[bid]);
1803+
kfree(uv_hub_info_list_blade);
1804+
return;
1805+
}
1806+
1807+
for_each_node(nodeid)
1808+
__uv_hub_info_list[nodeid] = uv_hub_info_list_blade[uv_node_to_blade_id(nodeid)];
1809+
17771810
/* Initialize per CPU info: */
17781811
for_each_possible_cpu(cpu) {
1779-
int apicid = per_cpu(x86_cpu_to_apicid, cpu);
1780-
int numa_node_id;
1812+
int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
1813+
unsigned short bid;
17811814
unsigned short pnode;
17821815

1783-
nodeid = cpu_to_node(cpu);
1784-
numa_node_id = numa_cpu_node(cpu);
17851816
pnode = uv_apicid_to_pnode(apicid);
1817+
bid = uv_pnode_to_socket(pnode) - _min_socket;
17861818

1787-
uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid);
1819+
uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list_blade[bid];
17881820
uv_cpu_info_per(cpu)->blade_cpu_id = uv_cpu_hub_info(cpu)->nr_possible_cpus++;
17891821
if (uv_cpu_hub_info(cpu)->memory_nid == NUMA_NO_NODE)
17901822
uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu);
17911823

1792-
/* Init memoryless node: */
1793-
if (nodeid != numa_node_id &&
1794-
uv_hub_info_list(numa_node_id)->pnode == 0xffff)
1795-
uv_hub_info_list(numa_node_id)->pnode = pnode;
1796-
else if (uv_cpu_hub_info(cpu)->pnode == 0xffff)
1824+
if (uv_cpu_hub_info(cpu)->pnode == 0xffff)
17971825
uv_cpu_hub_info(cpu)->pnode = pnode;
17981826
}
17991827

1800-
for_each_node(nodeid) {
1801-
unsigned short pnode = uv_hub_info_list(nodeid)->pnode;
1828+
for_each_possible_blade(bid) {
1829+
unsigned short pnode = uv_hub_info_list_blade[bid]->pnode;
18021830

1803-
/* Add pnode info for pre-GAM list nodes without CPUs: */
1804-
if (pnode == 0xffff) {
1805-
unsigned long paddr;
1831+
if (pnode == 0xffff)
1832+
continue;
18061833

1807-
paddr = node_start_pfn(nodeid) << PAGE_SHIFT;
1808-
pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr));
1809-
uv_hub_info_list(nodeid)->pnode = pnode;
1810-
}
18111834
min_pnode = min(pnode, min_pnode);
18121835
max_pnode = max(pnode, max_pnode);
1813-
pr_info("UV: UVHUB node:%2d pn:%02x nrcpus:%d\n",
1814-
nodeid,
1815-
uv_hub_info_list(nodeid)->pnode,
1816-
uv_hub_info_list(nodeid)->nr_possible_cpus);
1836+
pr_info("UV: HUB:%2d pn:%02x nrcpus:%d\n",
1837+
bid,
1838+
uv_hub_info_list_blade[bid]->pnode,
1839+
uv_hub_info_list_blade[bid]->nr_possible_cpus);
18171840
}
18181841

18191842
pr_info("UV: min_pnode:%02x max_pnode:%02x\n", min_pnode, max_pnode);
18201843
map_gru_high(max_pnode);
18211844
map_mmr_high(max_pnode);
18221845
map_mmioh_high(min_pnode, max_pnode);
18231846

1847+
kfree(uv_hub_info_list_blade);
1848+
uv_hub_info_list_blade = NULL;
1849+
18241850
uv_nmi_setup();
18251851
uv_cpu_init();
18261852
uv_setup_proc_files(0);

0 commit comments

Comments
 (0)