Skip to content

Commit da7bc9c

Browse files
djbwIngo Molnar
authored andcommitted
x86/asm/memcpy_mcsafe: Remove loop unrolling
In preparation for teaching memcpy_mcsafe() to return 'bytes remaining' rather than pass / fail, simplify the implementation to remove loop unrolling. The unrolling complicates the fault handling for negligible benefit given modern CPUs perform loop stream detection. Suggested-by: Linus Torvalds <[email protected]> Signed-off-by: Dan Williams <[email protected]> Cc: Al Viro <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Tony Luck <[email protected]> Cc: [email protected] Cc: [email protected] Cc: [email protected] Link: http://lkml.kernel.org/r/152539237092.31796.9115692316555638048.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar <[email protected]>
1 parent 67b8d5c commit da7bc9c

File tree

2 files changed

+12
-51
lines changed

2 files changed

+12
-51
lines changed

arch/x86/include/asm/string_64.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ int strcmp(const char *cs, const char *ct);
116116
#endif
117117

118118
#define __HAVE_ARCH_MEMCPY_MCSAFE 1
119-
__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt);
119+
__must_check int __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
120120
DECLARE_STATIC_KEY_FALSE(mcsafe_key);
121121

122122
/**
@@ -138,7 +138,7 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt)
138138
{
139139
#ifdef CONFIG_X86_MCE
140140
if (static_branch_unlikely(&mcsafe_key))
141-
return memcpy_mcsafe_unrolled(dst, src, cnt);
141+
return __memcpy_mcsafe(dst, src, cnt);
142142
else
143143
#endif
144144
memcpy(dst, src, cnt);

arch/x86/lib/memcpy_64.S

Lines changed: 10 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -184,11 +184,11 @@ ENDPROC(memcpy_orig)
184184

185185
#ifndef CONFIG_UML
186186
/*
187-
* memcpy_mcsafe_unrolled - memory copy with machine check exception handling
187+
* __memcpy_mcsafe - memory copy with machine check exception handling
188188
* Note that we only catch machine checks when reading the source addresses.
189189
* Writes to target are posted and don't generate machine checks.
190190
*/
191-
ENTRY(memcpy_mcsafe_unrolled)
191+
ENTRY(__memcpy_mcsafe)
192192
cmpl $8, %edx
193193
/* Less than 8 bytes? Go to byte copy loop */
194194
jb .L_no_whole_words
@@ -213,49 +213,18 @@ ENTRY(memcpy_mcsafe_unrolled)
213213
jnz .L_copy_leading_bytes
214214

215215
.L_8byte_aligned:
216-
/* Figure out how many whole cache lines (64-bytes) to copy */
217-
movl %edx, %ecx
218-
andl $63, %edx
219-
shrl $6, %ecx
220-
jz .L_no_whole_cache_lines
221-
222-
/* Loop copying whole cache lines */
223-
.L_cache_w0: movq (%rsi), %r8
224-
.L_cache_w1: movq 1*8(%rsi), %r9
225-
.L_cache_w2: movq 2*8(%rsi), %r10
226-
.L_cache_w3: movq 3*8(%rsi), %r11
227-
movq %r8, (%rdi)
228-
movq %r9, 1*8(%rdi)
229-
movq %r10, 2*8(%rdi)
230-
movq %r11, 3*8(%rdi)
231-
.L_cache_w4: movq 4*8(%rsi), %r8
232-
.L_cache_w5: movq 5*8(%rsi), %r9
233-
.L_cache_w6: movq 6*8(%rsi), %r10
234-
.L_cache_w7: movq 7*8(%rsi), %r11
235-
movq %r8, 4*8(%rdi)
236-
movq %r9, 5*8(%rdi)
237-
movq %r10, 6*8(%rdi)
238-
movq %r11, 7*8(%rdi)
239-
leaq 64(%rsi), %rsi
240-
leaq 64(%rdi), %rdi
241-
decl %ecx
242-
jnz .L_cache_w0
243-
244-
/* Are there any trailing 8-byte words? */
245-
.L_no_whole_cache_lines:
246216
movl %edx, %ecx
247217
andl $7, %edx
248218
shrl $3, %ecx
249219
jz .L_no_whole_words
250220

251-
/* Copy trailing words */
252-
.L_copy_trailing_words:
221+
.L_copy_words:
253222
movq (%rsi), %r8
254-
mov %r8, (%rdi)
255-
leaq 8(%rsi), %rsi
256-
leaq 8(%rdi), %rdi
223+
movq %r8, (%rdi)
224+
addq $8, %rsi
225+
addq $8, %rdi
257226
decl %ecx
258-
jnz .L_copy_trailing_words
227+
jnz .L_copy_words
259228

260229
/* Any trailing bytes? */
261230
.L_no_whole_words:
@@ -276,8 +245,8 @@ ENTRY(memcpy_mcsafe_unrolled)
276245
.L_done_memcpy_trap:
277246
xorq %rax, %rax
278247
ret
279-
ENDPROC(memcpy_mcsafe_unrolled)
280-
EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
248+
ENDPROC(__memcpy_mcsafe)
249+
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
281250

282251
.section .fixup, "ax"
283252
/* Return -EFAULT for any failure */
@@ -288,14 +257,6 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
288257
.previous
289258

290259
_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
291-
_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
292-
_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
293-
_ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
294-
_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
295-
_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
296-
_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
297-
_ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
298-
_ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
299-
_ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
260+
_ASM_EXTABLE_FAULT(.L_copy_words, .L_memcpy_mcsafe_fail)
300261
_ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
301262
#endif

0 commit comments

Comments
 (0)