Skip to content

Commit 20516d6

Browse files
committed
x86: Stop using weak symbols for __iowrite32_copy()
Start switching iomap_copy routines over to use #define and arch provided inline/macro functions instead of weak symbols. Inline functions allow more compiler optimization and this is often a driver hot path. x86 has the only weak implementation for __iowrite32_copy(), so replace it with a static inline containing the same single instruction inline assembly. The compiler will generate the "mov edx,ecx" in a more optimal way. Remove iomap_copy_64.S Link: https://lore.kernel.org/r/[email protected] Acked-by: Arnd Bergmann <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 1a633bd commit 20516d6

File tree

5 files changed

+24
-20
lines changed

5 files changed

+24
-20
lines changed

arch/x86/include/asm/io.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,23 @@ void memset_io(volatile void __iomem *, int, size_t);
209209
#define memcpy_toio memcpy_toio
210210
#define memset_io memset_io
211211

212+
#ifdef CONFIG_X86_64
213+
/*
214+
* Commit 0f07496144c2 ("[PATCH] Add faster __iowrite32_copy routine for
215+
* x86_64") says that circa 2006 rep movsl is noticeably faster than a copy
216+
* loop.
217+
*/
218+
static inline void __iowrite32_copy(void __iomem *to, const void *from,
219+
size_t count)
220+
{
221+
asm volatile("rep ; movsl"
222+
: "=&c"(count), "=&D"(to), "=&S"(from)
223+
: "0"(count), "1"(to), "2"(from)
224+
: "memory");
225+
}
226+
#define __iowrite32_copy __iowrite32_copy
227+
#endif
228+
212229
/*
213230
* ISA space is 'always mapped' on a typical x86 system, no need to
214231
* explicitly ioremap() it. The fact that the ISA IO space is mapped

arch/x86/lib/Makefile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ ifneq ($(CONFIG_X86_CMPXCHG64),y)
5353
lib-y += atomic64_386_32.o
5454
endif
5555
else
56-
obj-y += iomap_copy_64.o
5756
ifneq ($(CONFIG_GENERIC_CSUM),y)
5857
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
5958
endif

arch/x86/lib/iomap_copy_64.S

Lines changed: 0 additions & 15 deletions
This file was deleted.

include/linux/io.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616
struct device;
1717
struct resource;
1818

19-
__visible void __iowrite32_copy(void __iomem *to, const void *from, size_t count);
19+
#ifndef __iowrite32_copy
20+
void __iowrite32_copy(void __iomem *to, const void *from, size_t count);
21+
#endif
22+
2023
void __ioread32_copy(void *to, const void __iomem *from, size_t count);
2124
void __iowrite64_copy(void __iomem *to, const void *from, size_t count);
2225

lib/iomap_copy.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,8 @@
1616
* time. Order of access is not guaranteed, nor is a memory barrier
1717
* performed afterwards.
1818
*/
19-
void __attribute__((weak)) __iowrite32_copy(void __iomem *to,
20-
const void *from,
21-
size_t count)
19+
#ifndef __iowrite32_copy
20+
void __iowrite32_copy(void __iomem *to, const void *from, size_t count)
2221
{
2322
u32 __iomem *dst = to;
2423
const u32 *src = from;
@@ -28,6 +27,7 @@ void __attribute__((weak)) __iowrite32_copy(void __iomem *to,
2827
__raw_writel(*src++, dst++);
2928
}
3029
EXPORT_SYMBOL_GPL(__iowrite32_copy);
30+
#endif
3131

3232
/**
3333
* __ioread32_copy - copy data from MMIO space, in 32-bit units

0 commit comments

Comments
 (0)