From 5bce95c40f6cecfd0bff4f6a08633f30fc28dd11 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 11 Oct 2023 04:01:10 +0000 Subject: [PATCH 01/10] x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach jira roc-2673 commit fbf6449f84bf5e4ad09f2c09ee70ed7d629b5ff6 Instead of setting x86_virt_bits to a possibly-correct value and then correcting it later, do all the necessary checks before setting it. At this point, the #VC handler references boot_cpu_data.x86_virt_bits, and in the previous version, it would be triggered by the CPUIDs between the point at which it is set to 48 and when it is set to the correct value. Suggested-by: Dave Hansen Signed-off-by: Adam Dunlap Signed-off-by: Ingo Molnar Tested-by: Jacob Xu Link: https://lore.kernel.org/r/20230912002703.3924521-3-acdunlap@google.com Signed-off-by: Ronnie Sahlberg Signed-off-by: Jonathan Maple --- arch/x86/kernel/cpu/common.c | 37 +++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dbb5dee28ca27..40b4861cf9c79 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1020,17 +1020,32 @@ void get_cpu_cap(struct cpuinfo_x86 *c) static void get_cpu_address_sizes(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; + bool vp_bits_from_cpuid = true; - if (c->extended_cpuid_level >= 0x80000008) { + if (!cpu_has(c, X86_FEATURE_CPUID) || + (c->extended_cpuid_level < 0x80000008)) + vp_bits_from_cpuid = false; + + if (vp_bits_from_cpuid) { cpuid(0x80000008, &eax, &ebx, &ecx, &edx); c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; + } else { + if (IS_ENABLED(CONFIG_X86_64)) { + c->x86_clflush_size = 64; + c->x86_phys_bits = 36; + c->x86_virt_bits = 48; + } else { + c->x86_clflush_size = 32; + c->x86_virt_bits = 32; + c->x86_phys_bits = 32; + + if (cpu_has(c, X86_FEATURE_PAE) || + cpu_has(c, X86_FEATURE_PSE36)) + c->x86_phys_bits = 36; + } } -#ifdef CONFIG_X86_32 - else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) - c->x86_phys_bits = 36; -#endif c->x86_cache_bits = c->x86_phys_bits; } @@ -1468,15 +1483,6 @@ static void __init cpu_parse_early_param(void) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_64 - c->x86_clflush_size = 64; - c->x86_phys_bits = 36; - c->x86_virt_bits = 48; -#else - c->x86_clflush_size = 32; - c->x86_phys_bits = 32; - c->x86_virt_bits = 32; -#endif c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof(c->x86_capability)); @@ -1488,7 +1494,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_vendor(c); get_cpu_cap(c); get_model_name(c); /* RHEL8: get model name for unsupported check */ - get_cpu_address_sizes(c); setup_force_cpu_cap(X86_FEATURE_CPUID); cpu_parse_early_param(); @@ -1505,6 +1510,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_CPUID); } + get_cpu_address_sizes(c); + setup_force_cpu_cap(X86_FEATURE_ALWAYS); cpu_set_bug_bits(c); From 9fa9cba4a213bd6d49755869b1e7c566742ae6fa Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 11 Oct 2023 04:04:08 +0000 Subject: [PATCH 02/10] x86/boot: Move x86_cache_alignment initialization to correct spot jira roc-2673 commit 3e32552652917f10c0aa8ac75cdc8f0b8d257dec c->x86_cache_alignment is initialized from c->x86_clflush_size. However, commit fbf6449f84bf moved c->x86_clflush_size initialization to later in boot without moving the c->x86_cache_alignment assignment: fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach") This presumably left c->x86_cache_alignment set to zero for longer than it should be. The result was an oops on 32-bit kernels while accessing a pointer at 0x20. The 0x20 came from accessing a structure member at offset 0x10 (buffer->cpumask) from a ZERO_SIZE_PTR=0x10. kmalloc() can evidently return ZERO_SIZE_PTR when it's given 0 as its alignment requirement. Move the c->x86_cache_alignment initialization to be after c->x86_clflush_size has an actual value. Fixes: fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach") Signed-off-by: Dave Hansen Signed-off-by: Ingo Molnar Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/20231002220045.1014760-1-dave.hansen@linux.intel.com (cherry picked from commit 3e32552652917f10c0aa8ac75cdc8f0b8d257dec) Signed-off-by: Ronnie Sahlberg Signed-off-by: Jonathan Maple --- arch/x86/kernel/cpu/common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 40b4861cf9c79..2ad285f913633 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1047,6 +1047,7 @@ static void get_cpu_address_sizes(struct cpuinfo_x86 *c) } } c->x86_cache_bits = c->x86_phys_bits; + c->x86_cache_alignment = c->x86_clflush_size; } static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) @@ -1483,8 +1484,6 @@ static void __init cpu_parse_early_param(void) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { - c->x86_cache_alignment = c->x86_clflush_size; - memset(&c->x86_capability, 0, sizeof(c->x86_capability)); c->extended_cpuid_level = 0; From 811b549ed174947120f8328d4a3601c14810266d Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Thu, 26 Dec 2024 14:34:31 -0500 Subject: [PATCH 03/10] x86/cpu: Allow reducing x86_phys_bits during early_identify_cpu() jira LE-2183 bug-fix x86/sev-es: Set x86_virt_bits commit-author Paolo Bonzini commit 9a458198eba98b7207669a166e64d04b04cb651b In commit fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach"), the initialization of c->x86_phys_bits was moved after this_cpu->c_early_init(c). This is incorrect because early_init_amd() expected to be able to reduce the value according to the contents of CPUID leaf 0x8000001f. Fortunately, the bug was negated by init_amd()'s call to early_init_amd(), which does reduce x86_phys_bits in the end. However, this is very late in the boot process and, most notably, the wrong value is used for x86_phys_bits when setting up MTRRs. To fix this, call get_cpu_address_sizes() as soon as X86_FEATURE_CPUID is set/cleared, and c->extended_cpuid_level is retrieved. Fixes: fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach") Signed-off-by: Paolo Bonzini Signed-off-by: Dave Hansen Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20240131230902.1867092-2-pbonzini%40redhat.com (cherry picked from commit 9a458198eba98b7207669a166e64d04b04cb651b) Signed-off-by: Jonathan Maple Signed-off-by: Jonathan Maple --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2ad285f913633..e10abdd903bf3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1494,6 +1494,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_cap(c); get_model_name(c); /* RHEL8: get model name for unsupported check */ setup_force_cpu_cap(X86_FEATURE_CPUID); + get_cpu_address_sizes(c); cpu_parse_early_param(); if (this_cpu->c_early_init) @@ -1507,10 +1508,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) } else { identify_cpu_without_cpuid(c); setup_clear_cpu_cap(X86_FEATURE_CPUID); + get_cpu_address_sizes(c); } - get_cpu_address_sizes(c); - setup_force_cpu_cap(X86_FEATURE_ALWAYS); cpu_set_bug_bits(c); From 038287130491d402d2619a18481f9af2aea686b7 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Thu, 26 Dec 2024 14:44:56 -0500 Subject: [PATCH 04/10] x86/cpu: Get rid of an unnecessary local variable in get_cpu_address_sizes() jira LE-2183 bug-fix-prereq x86/sev-es: Set x86_virt_bits commit-author Borislav Petkov (AMD) commit 95bfb35269b2e85cff0dd2c957b2d42ebf95ae5f Drop 'vp_bits_from_cpuid' as it is not really needed. No functional changes. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20240316120706.4352-1-bp@alien8.de (cherry picked from commit 95bfb35269b2e85cff0dd2c957b2d42ebf95ae5f) Signed-off-by: Jonathan Maple Signed-off-by: Jonathan Maple --- arch/x86/kernel/cpu/common.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e10abdd903bf3..1a4d0b20d5b7f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1020,18 +1020,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c) static void get_cpu_address_sizes(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; - bool vp_bits_from_cpuid = true; if (!cpu_has(c, X86_FEATURE_CPUID) || - (c->extended_cpuid_level < 0x80000008)) - vp_bits_from_cpuid = false; - - if (vp_bits_from_cpuid) { - cpuid(0x80000008, &eax, &ebx, &ecx, &edx); - - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; - } else { + (c->extended_cpuid_level < 0x80000008)) { if (IS_ENABLED(CONFIG_X86_64)) { c->x86_clflush_size = 64; c->x86_phys_bits = 36; @@ -1045,7 +1036,13 @@ static void get_cpu_address_sizes(struct cpuinfo_x86 *c) cpu_has(c, X86_FEATURE_PSE36)) c->x86_phys_bits = 36; } + } else { + cpuid(0x80000008, &eax, &ebx, &ecx, &edx); + + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; } + c->x86_cache_bits = c->x86_phys_bits; c->x86_cache_alignment = c->x86_clflush_size; } From e42b62e1614ee94104f0751b2924b838545249e3 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Thu, 26 Dec 2024 14:45:08 -0500 Subject: [PATCH 05/10] x86/cpu: Provide default cache line size if not enumerated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira LE-2183 bug-fix x86/sev-es: Set x86_virt_bits commit-author Dave Hansen commit 2a38e4ca302280fdcce370ba2bee79bac16c4587 tl;dr: CPUs with CPUID.80000008H but without CPUID.01H:EDX[CLFSH] will end up reporting cache_line_size()==0 and bad things happen. Fill in a default on those to avoid the problem. Long Story: The kernel dies a horrible death if c->x86_cache_alignment (aka. cache_line_size() is 0. Normally, this value is populated from c->x86_clflush_size. Right now the code is set up to get c->x86_clflush_size from two places. First, modern CPUs get it from CPUID. Old CPUs that don't have leaf 0x80000008 (or CPUID at all) just get some sane defaults from the kernel in get_cpu_address_sizes(). The vast majority of CPUs that have leaf 0x80000008 also get ->x86_clflush_size from CPUID. But there are oddballs. Intel Quark CPUs[1] and others[2] have leaf 0x80000008 but don't set CPUID.01H:EDX[CLFSH], so they skip over filling in ->x86_clflush_size: cpuid(0x00000001, &tfms, &misc, &junk, &cap0); if (cap0 & (1<<19)) c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; So they: land in get_cpu_address_sizes() and see that CPUID has level 0x80000008 and jump into the side of the if() that does not fill in c->x86_clflush_size. That assigns a 0 to c->x86_cache_alignment, and hilarity ensues in code like: buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), GFP_KERNEL); To fix this, always provide a sane value for ->x86_clflush_size. Big thanks to Andy Shevchenko for finding and reporting this and also providing a first pass at a fix. But his fix was only partial and only worked on the Quark CPUs. It would not, for instance, have worked on the QEMU config. 1. https://raw.githubusercontent.com/InstLatx64/InstLatx64/master/GenuineIntel/GenuineIntel0000590_Clanton_03_CPUID.txt 2. You can also get this behavior if you use "-cpu 486,+clzero" in QEMU. [ dhansen: remove 'vp_bits_from_cpuid' reference in changelog because bpetkov brutally murdered it recently. ] Fixes: fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach") Reported-by: Andy Shevchenko Signed-off-by: Dave Hansen Tested-by: Andy Shevchenko Tested-by: Jörn Heusipp Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240516173928.3960193-1-andriy.shevchenko@linux.intel.com/ Link: https://lore.kernel.org/lkml/5e31cad3-ad4d-493e-ab07-724cfbfaba44@heusipp.de/ Link: https://lore.kernel.org/all/20240517200534.8EC5F33E%40davehans-spike.ostc.intel.com (cherry picked from commit 2a38e4ca302280fdcce370ba2bee79bac16c4587) Signed-off-by: Jonathan Maple Signed-off-by: Jonathan Maple --- arch/x86/kernel/cpu/common.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1a4d0b20d5b7f..e3e7b06692907 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1041,6 +1041,10 @@ static void get_cpu_address_sizes(struct cpuinfo_x86 *c) c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; + + /* Provide a sane default if not enumerated: */ + if (!c->x86_clflush_size) + c->x86_clflush_size = 32; } c->x86_cache_bits = c->x86_phys_bits; From c2ee39cd721f59767bcf272a7a8da4f1a25fbc45 Mon Sep 17 00:00:00 2001 From: Shreeya Patel Date: Tue, 19 Aug 2025 09:41:58 +0000 Subject: [PATCH 06/10] net: mana: Enable MANA driver on ARM64 with 4K page size jira LE-3812 commit-author Haiyang Zhang commit 40a1d11fc670ac03c5dc2e5a9724b330e74f38b0 Change the Kconfig dependency, so this driver can be built and run on ARM64 with 4K page size. 16/64K page sizes are not supported yet. Signed-off-by: Haiyang Zhang Link: https://lore.kernel.org/r/1715632141-8089-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Jakub Kicinski (cherry picked from commit 40a1d11fc670ac03c5dc2e5a9724b330e74f38b0) Signed-off-by: Shreeya Patel Signed-off-by: Jonathan Maple --- drivers/net/ethernet/microsoft/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microsoft/Kconfig b/drivers/net/ethernet/microsoft/Kconfig index 090e6b9832431..61cb91c3a5dbd 100644 --- a/drivers/net/ethernet/microsoft/Kconfig +++ b/drivers/net/ethernet/microsoft/Kconfig @@ -17,7 +17,8 @@ if NET_VENDOR_MICROSOFT config MICROSOFT_MANA tristate "Microsoft Azure Network Adapter (MANA) support" - depends on PCI_MSI && X86_64 + depends on PCI_MSI + depends on X86_64 || (ARM64 && !CPU_BIG_ENDIAN && ARM64_4K_PAGES) depends on PCI_HYPERV select AUXILIARY_BUS help From 1848c2214ab39ec1eb77258588b1b0a806114141 Mon Sep 17 00:00:00 2001 From: Shreeya Patel Date: Tue, 19 Aug 2025 09:42:06 +0000 Subject: [PATCH 07/10] net: mana: Add support for page sizes other than 4KB on ARM64 jira LE-3812 commit-author Haiyang Zhang commit 382d1741b5b2feffef7942dd074206372afe1a96 As defined by the MANA Hardware spec, the queue size for DMA is 4KB minimal, and power of 2. And, the HWC queue size has to be exactly 4KB. To support page sizes other than 4KB on ARM64, define the minimal queue size as a macro separately from the PAGE_SIZE, which we always assumed it to be 4KB before supporting ARM64. Also, add MANA specific macros and update code related to size alignment, DMA region calculations, etc. Signed-off-by: Haiyang Zhang Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1718655446-6576-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Jakub Kicinski (cherry picked from commit 382d1741b5b2feffef7942dd074206372afe1a96) Signed-off-by: Shreeya Patel Signed-off-by: Jonathan Maple --- drivers/net/ethernet/microsoft/Kconfig | 2 +- drivers/net/ethernet/microsoft/mana/gdma_main.c | 10 +++++----- drivers/net/ethernet/microsoft/mana/hw_channel.c | 14 +++++++------- drivers/net/ethernet/microsoft/mana/mana_en.c | 8 ++++---- drivers/net/ethernet/microsoft/mana/shm_channel.c | 13 +++++++------ include/net/mana/gdma.h | 10 +++++++++- include/net/mana/mana.h | 3 ++- 7 files changed, 35 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/microsoft/Kconfig b/drivers/net/ethernet/microsoft/Kconfig index 61cb91c3a5dbd..77bc47cbfbbf4 100644 --- a/drivers/net/ethernet/microsoft/Kconfig +++ b/drivers/net/ethernet/microsoft/Kconfig @@ -18,7 +18,7 @@ if NET_VENDOR_MICROSOFT config MICROSOFT_MANA tristate "Microsoft Azure Network Adapter (MANA) support" depends on PCI_MSI - depends on X86_64 || (ARM64 && !CPU_BIG_ENDIAN && ARM64_4K_PAGES) + depends on X86_64 || (ARM64 && !CPU_BIG_ENDIAN) depends on PCI_HYPERV select AUXILIARY_BUS help diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index f1b4b0b6ae65b..8d9e019bf1162 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -182,7 +182,7 @@ int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, dma_addr_t dma_handle; void *buf; - if (length < PAGE_SIZE || !is_power_of_2(length)) + if (length < MANA_PAGE_SIZE || !is_power_of_2(length)) return -EINVAL; gmi->dev = gc->dev; @@ -717,7 +717,7 @@ EXPORT_SYMBOL(mana_gd_destroy_dma_region); static int mana_gd_create_dma_region(struct gdma_dev *gd, struct gdma_mem_info *gmi) { - unsigned int num_page = gmi->length / PAGE_SIZE; + unsigned int num_page = gmi->length / MANA_PAGE_SIZE; struct gdma_create_dma_region_req *req = NULL; struct gdma_create_dma_region_resp resp = {}; struct gdma_context *gc = gd->gdma_context; @@ -727,10 +727,10 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, int err; int i; - if (length < PAGE_SIZE || !is_power_of_2(length)) + if (length < MANA_PAGE_SIZE || !is_power_of_2(length)) return -EINVAL; - if (offset_in_page(gmi->virt_addr) != 0) + if (!MANA_PAGE_ALIGNED(gmi->virt_addr)) return -EINVAL; hwc = gc->hwc.driver_data; @@ -751,7 +751,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, req->page_addr_list_len = num_page; for (i = 0; i < num_page; i++) - req->page_addr_list[i] = gmi->dma_handle + i * PAGE_SIZE; + req->page_addr_list[i] = gmi->dma_handle + i * MANA_PAGE_SIZE; err = mana_gd_send_request(gc, req_msg_size, req, sizeof(resp), &resp); if (err) diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index 3a31ba66b821e..d2a339bc1cd25 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -361,12 +361,12 @@ static int mana_hwc_create_cq(struct hw_channel_context *hwc, u16 q_depth, int err; eq_size = roundup_pow_of_two(GDMA_EQE_SIZE * q_depth); - if (eq_size < MINIMUM_SUPPORTED_PAGE_SIZE) - eq_size = MINIMUM_SUPPORTED_PAGE_SIZE; + if (eq_size < MANA_MIN_QSIZE) + eq_size = MANA_MIN_QSIZE; cq_size = roundup_pow_of_two(GDMA_CQE_SIZE * q_depth); - if (cq_size < MINIMUM_SUPPORTED_PAGE_SIZE) - cq_size = MINIMUM_SUPPORTED_PAGE_SIZE; + if (cq_size < MANA_MIN_QSIZE) + cq_size = MANA_MIN_QSIZE; hwc_cq = kzalloc(sizeof(*hwc_cq), GFP_KERNEL); if (!hwc_cq) @@ -428,7 +428,7 @@ static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth, dma_buf->num_reqs = q_depth; - buf_size = PAGE_ALIGN(q_depth * max_msg_size); + buf_size = MANA_PAGE_ALIGN(q_depth * max_msg_size); gmi = &dma_buf->mem_info; err = mana_gd_alloc_memory(gc, buf_size, gmi); @@ -496,8 +496,8 @@ static int mana_hwc_create_wq(struct hw_channel_context *hwc, else queue_size = roundup_pow_of_two(GDMA_MAX_SQE_SIZE * q_depth); - if (queue_size < MINIMUM_SUPPORTED_PAGE_SIZE) - queue_size = MINIMUM_SUPPORTED_PAGE_SIZE; + if (queue_size < MANA_MIN_QSIZE) + queue_size = MANA_MIN_QSIZE; hwc_wq = kzalloc(sizeof(*hwc_wq), GFP_KERNEL); if (!hwc_wq) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index e16317aadbca4..d80dd8baefdeb 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1867,10 +1867,10 @@ static int mana_create_txq(struct mana_port_context *apc, * to prevent overflow. */ txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32; - BUILD_BUG_ON(!PAGE_ALIGNED(txq_size)); + BUILD_BUG_ON(!MANA_PAGE_ALIGNED(txq_size)); cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE; - cq_size = PAGE_ALIGN(cq_size); + cq_size = MANA_PAGE_ALIGN(cq_size); gc = gd->gdma_context; @@ -2128,8 +2128,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, if (err) goto out; - rq_size = PAGE_ALIGN(rq_size); - cq_size = PAGE_ALIGN(cq_size); + rq_size = MANA_PAGE_ALIGN(rq_size); + cq_size = MANA_PAGE_ALIGN(cq_size); /* Create RQ */ memset(&spec, 0, sizeof(spec)); diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.c b/drivers/net/ethernet/microsoft/mana/shm_channel.c index 5553af9c8085a..0f1679ebad96b 100644 --- a/drivers/net/ethernet/microsoft/mana/shm_channel.c +++ b/drivers/net/ethernet/microsoft/mana/shm_channel.c @@ -6,6 +6,7 @@ #include #include +#include #include #define PAGE_FRAME_L48_WIDTH_BYTES 6 @@ -155,8 +156,8 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, return err; } - if (!PAGE_ALIGNED(eq_addr) || !PAGE_ALIGNED(cq_addr) || - !PAGE_ALIGNED(rq_addr) || !PAGE_ALIGNED(sq_addr)) + if (!MANA_PAGE_ALIGNED(eq_addr) || !MANA_PAGE_ALIGNED(cq_addr) || + !MANA_PAGE_ALIGNED(rq_addr) || !MANA_PAGE_ALIGNED(sq_addr)) return -EINVAL; if ((eq_msix_index & VECTOR_MASK) != eq_msix_index) @@ -183,7 +184,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* EQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(eq_addr); + frame_addr = MANA_PFN(eq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); @@ -191,7 +192,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* CQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(cq_addr); + frame_addr = MANA_PFN(cq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); @@ -199,7 +200,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* RQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(rq_addr); + frame_addr = MANA_PFN(rq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); @@ -207,7 +208,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* SQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(sq_addr); + frame_addr = MANA_PFN(sq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 27684135bb4d1..35507588a14d5 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -224,7 +224,15 @@ struct gdma_dev { struct auxiliary_device *adev; }; -#define MINIMUM_SUPPORTED_PAGE_SIZE PAGE_SIZE +/* MANA_PAGE_SIZE is the DMA unit */ +#define MANA_PAGE_SHIFT 12 +#define MANA_PAGE_SIZE BIT(MANA_PAGE_SHIFT) +#define MANA_PAGE_ALIGN(x) ALIGN((x), MANA_PAGE_SIZE) +#define MANA_PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), MANA_PAGE_SIZE) +#define MANA_PFN(a) ((a) >> MANA_PAGE_SHIFT) + +/* Required by HW */ +#define MANA_MIN_QSIZE MANA_PAGE_SIZE #define GDMA_CQE_SIZE 64 #define GDMA_EQE_SIZE 16 diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 5da352adf1d68..6e4fee310e865 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -40,7 +40,8 @@ enum TRI_STATE { #define MAX_SEND_BUFFERS_PER_QUEUE 256 -#define EQ_SIZE (8 * PAGE_SIZE) +#define EQ_SIZE (8 * MANA_PAGE_SIZE) + #define LOG2_EQ_THROTTLE 3 #define MAX_PORTS_IN_MANA_DEV 256 From 57cda77b4fdc175484352d58f3312e4a4a4b657d Mon Sep 17 00:00:00 2001 From: Shreeya Patel Date: Tue, 19 Aug 2025 12:04:36 +0000 Subject: [PATCH 08/10] RDMA/mana_ib: Fix bug in creation of dma regions jira LE-3812 commit-author Konstantin Taranov commit e02497fb654689049ba8b46f098f17d5f19e0b3c Use ib_umem_dma_offset() helper to calculate correct dma offset. Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") Signed-off-by: Konstantin Taranov Link: https://lore.kernel.org/r/1709560361-26393-2-git-send-email-kotaranov@linux.microsoft.com Signed-off-by: Leon Romanovsky (cherry picked from commit e02497fb654689049ba8b46f098f17d5f19e0b3c) Signed-off-by: Shreeya Patel Signed-off-by: Jonathan Maple --- drivers/infiniband/hw/mana/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index faca092456fa3..7840c9e2631cc 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -358,7 +358,7 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem, sizeof(struct gdma_create_dma_region_resp)); create_req->length = umem->length; - create_req->offset_in_page = umem->address & (page_sz - 1); + create_req->offset_in_page = ib_umem_dma_offset(umem, page_sz); create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT; create_req->page_count = num_pages_total; From 51db70a6afd3a190df868fa2253d020eef1b1e62 Mon Sep 17 00:00:00 2001 From: Shreeya Patel Date: Mon, 18 Aug 2025 21:04:33 +0000 Subject: [PATCH 09/10] RDMA/mana_ib: use the correct page size for mapping user-mode doorbell page jira LE-3812 commit-author Long Li commit 4a3b99bc04e501b816db78f70064e26a01257910 When mapping doorbell page from user-mode, the driver should use the system page size as this memory is allocated via mmap() from user-mode. Cc: stable@vger.kernel.org Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") Signed-off-by: Long Li Link: https://patch.msgid.link/1725030993-16213-2-git-send-email-longli@linuxonhyperv.com Signed-off-by: Leon Romanovsky (cherry picked from commit 4a3b99bc04e501b816db78f70064e26a01257910) Signed-off-by: Shreeya Patel Signed-off-by: Jonathan Maple --- drivers/infiniband/hw/mana/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 7840c9e2631cc..a0affa92975cc 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -460,13 +460,13 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) PAGE_SHIFT; prot = pgprot_writecombine(vma->vm_page_prot); - ret = rdma_user_mmap_io(ibcontext, vma, pfn, gc->db_page_size, prot, + ret = rdma_user_mmap_io(ibcontext, vma, pfn, PAGE_SIZE, prot, NULL); if (ret) ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret); else - ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %u, ret %d\n", - pfn, gc->db_page_size, ret); + ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %lu, ret %d\n", + pfn, PAGE_SIZE, ret); return ret; } From 5a1cae8e02194ea129589465da5f785513fc436b Mon Sep 17 00:00:00 2001 From: Shreeya Patel Date: Tue, 19 Aug 2025 12:04:57 +0000 Subject: [PATCH 10/10] RDMA/mana_ib: use the correct page table index based on hardware page size jira LE-3812 commit-author Long Li commit 9e517a8e9d9a303bf9bde35e5c5374795544c152 MANA hardware uses 4k page size. When calculating the page table index, it should use the hardware page size, not the system page size. Cc: stable@vger.kernel.org Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") Signed-off-by: Long Li Link: https://patch.msgid.link/1725030993-16213-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Leon Romanovsky (cherry picked from commit 9e517a8e9d9a303bf9bde35e5c5374795544c152) Signed-off-by: Shreeya Patel Signed-off-by: Jonathan Maple --- drivers/infiniband/hw/mana/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index a0affa92975cc..3132705aa192f 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -359,7 +359,7 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem, create_req->length = umem->length; create_req->offset_in_page = ib_umem_dma_offset(umem, page_sz); - create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT; + create_req->gdma_page_type = order_base_2(page_sz) - MANA_PAGE_SHIFT; create_req->page_count = num_pages_total; ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n",