From 1851613f12e8e17f9b39ab7766fd2cad223a530d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:01 +0100 Subject: [PATCH 01/17] iomap: hold state_lock over call to ifs_set_range_uptodate() mainline inclusion from mainline-v6.7-rc1 category: performance Patch series "Add folio_end_read", v2. The core of this patchset is the new folio_end_read() call which filesystems can use when finishing a page cache read instead of separate calls to mark the folio uptodate and unlock it. As an illustration of its use, I converted ext4, iomap & mpage; more can be converted. I think that's useful by itself, but the interesting optimisation is that we can implement that with a single XOR instruction that sets the uptodate bit, clears the lock bit, tests the waiter bit and provides a write memory barrier. That removes one memory barrier and one atomic instruction from each page read, which seems worth doing. That's in patch 15. The last two patches could be a separate series, but basically we can do the same thing with the writeback flag that we do with the unlock flag; clear it and test the waiters bit at the same time. This patch (of 17): This is really preparation for the next patch, but it lets us call folio_mark_uptodate() in just one place instead of two. Link: https://lkml.kernel.org/r/20231004165317.1061855-1-willy@infradead.org Link: https://lkml.kernel.org/r/20231004165317.1061855-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Matthew Wilcox (Oracle) Cc: Nicholas Piggin Cc: "Theodore Ts'o" Cc: Andreas Dilger Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Thomas Bogendoerfer Cc: Michael Ellerman Cc: Christophe Leroy Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Sven Schnelle Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton (cherry picked from commit 279d5fc3227f04ef2c6125e5c440e7952173a89a) Signed-off-by: Wentao Guan --- fs/iomap/buffered-io.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 7ffdf0d037fae..c01e34f057933 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -57,30 +57,32 @@ static inline bool ifs_block_is_uptodate(struct iomap_folio_state *ifs, return test_bit(block, ifs->state); } -static void ifs_set_range_uptodate(struct folio *folio, +static bool ifs_set_range_uptodate(struct folio *folio, struct iomap_folio_state *ifs, size_t off, size_t len) { struct inode *inode = folio->mapping->host; unsigned int first_blk = off >> inode->i_blkbits; unsigned int last_blk = (off + len - 1) >> inode->i_blkbits; unsigned int nr_blks = last_blk - first_blk + 1; - unsigned long flags; - spin_lock_irqsave(&ifs->state_lock, flags); bitmap_set(ifs->state, first_blk, nr_blks); - if (ifs_is_fully_uptodate(folio, ifs)) - folio_mark_uptodate(folio); - spin_unlock_irqrestore(&ifs->state_lock, flags); + return ifs_is_fully_uptodate(folio, ifs); } static void iomap_set_range_uptodate(struct folio *folio, size_t off, size_t len) { struct iomap_folio_state *ifs = folio->private; + unsigned long flags; + bool uptodate = true; - if (ifs) - ifs_set_range_uptodate(folio, ifs, off, len); - else + if (ifs) { + spin_lock_irqsave(&ifs->state_lock, flags); + uptodate = ifs_set_range_uptodate(folio, ifs, off, len); + spin_unlock_irqrestore(&ifs->state_lock, flags); + } + + if (uptodate) folio_mark_uptodate(folio); } From f68e48b225347ec304a7e86c8803cf2548346b25 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:02 +0100 Subject: [PATCH 02/17] iomap: protect read_bytes_pending with the state_lock mainline inlcusion from mainline-v6.7-rc1 category: performance Perform one atomic operation (acquiring the spinlock) instead of two (spinlock & atomic_sub) per read completion. Link: https://lkml.kernel.org/r/20231004165317.1061855-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Albert Ou Cc: Alexander Gordeev Cc: Andreas Dilger Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Sven Schnelle Cc: "Theodore Ts'o" Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Signed-off-by: Andrew Morton (cherry picked from commit f45b494e2a24d86afd79cab7c343b414c5213447) Signed-off-by: Wentao Guan --- fs/iomap/buffered-io.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index c01e34f057933..f61f92a384f97 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -29,9 +29,9 @@ typedef int (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length); * and I/O completions. */ struct iomap_folio_state { - atomic_t read_bytes_pending; - atomic_t write_bytes_pending; spinlock_t state_lock; + unsigned int read_bytes_pending; + atomic_t write_bytes_pending; /* * Each block has two bits in this bitmap: @@ -183,7 +183,7 @@ static void ifs_free(struct folio *folio) if (!ifs) return; - WARN_ON_ONCE(atomic_read(&ifs->read_bytes_pending)); + WARN_ON_ONCE(ifs->read_bytes_pending != 0); WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending)); WARN_ON_ONCE(ifs_is_fully_uptodate(folio, ifs) != folio_test_uptodate(folio)); @@ -251,19 +251,29 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio, *lenp = plen; } -static void iomap_finish_folio_read(struct folio *folio, size_t offset, +static void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, int error) { struct iomap_folio_state *ifs = folio->private; + bool uptodate = !error; + bool finished = true; - if (unlikely(error)) { - folio_clear_uptodate(folio); - folio_set_error(folio); - } else { - iomap_set_range_uptodate(folio, offset, len); + if (ifs) { + unsigned long flags; + + spin_lock_irqsave(&ifs->state_lock, flags); + if (!error) + uptodate = ifs_set_range_uptodate(folio, ifs, off, len); + ifs->read_bytes_pending -= len; + finished = !ifs->read_bytes_pending; + spin_unlock_irqrestore(&ifs->state_lock, flags); } - if (!ifs || atomic_sub_and_test(len, &ifs->read_bytes_pending)) + if (error) + folio_set_error(folio); + if (uptodate) + folio_mark_uptodate(folio); + if (finished) folio_unlock(folio); } @@ -361,8 +371,11 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, } ctx->cur_folio_in_bio = true; - if (ifs) - atomic_add(plen, &ifs->read_bytes_pending); + if (ifs) { + spin_lock_irq(&ifs->state_lock); + ifs->read_bytes_pending += plen; + spin_unlock_irq(&ifs->state_lock); + } sector = iomap_sector(iomap, pos); if (!ctx->bio || From 6894f9638cc2a21aefe612958e050f40c4757411 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:03 +0100 Subject: [PATCH 03/17] mm: add folio_end_read() mainline inclusion from mainline-v6.7-rc1 category: performance Provide a function for filesystems to call when they have finished reading an entire folio. Link: https://lkml.kernel.org/r/20231004165317.1061855-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Albert Ou Cc: Alexander Gordeev Cc: Andreas Dilger Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Sven Schnelle Cc: "Theodore Ts'o" Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Signed-off-by: Andrew Morton (cherry picked from commit 0b237047d5a72ffe06c0bdf2f4536f669dcd31c9) Signed-off-by: Wentao Guan --- include/linux/pagemap.h | 1 + mm/filemap.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 8e9547ec2686e..7514246539a21 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1162,6 +1162,7 @@ static inline void wait_on_page_locked(struct page *page) folio_wait_locked(page_folio(page)); } +void folio_end_read(struct folio *folio, bool success); void wait_on_page_writeback(struct page *page); void folio_wait_writeback(struct folio *folio); int folio_wait_writeback_killable(struct folio *folio); diff --git a/mm/filemap.c b/mm/filemap.c index a7c0ad7b058fb..030cc6b40537a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1536,6 +1536,28 @@ void folio_unlock(struct folio *folio) } EXPORT_SYMBOL(folio_unlock); +/** + * folio_end_read - End read on a folio. + * @folio: The folio. + * @success: True if all reads completed successfully. + * + * When all reads against a folio have completed, filesystems should + * call this function to let the pagecache know that no more reads + * are outstanding. This will unlock the folio and wake up any thread + * sleeping on the lock. The folio will also be marked uptodate if all + * reads succeeded. + * + * Context: May be called from interrupt or process context. May not be + * called from NMI context. + */ +void folio_end_read(struct folio *folio, bool success) +{ + if (likely(success)) + folio_mark_uptodate(folio); + folio_unlock(folio); +} +EXPORT_SYMBOL(folio_end_read); + /** * folio_end_private_2 - Clear PG_private_2 and wake any waiters. * @folio: The folio. From efe1f3814f4d504cceda8a1cec00efcadadf6bfd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:04 +0100 Subject: [PATCH 04/17] ext4: use folio_end_read() mainline inclusion from mainline-v6.7-rc1 category: performance folio_end_read() is the perfect fit for ext4. Link: https://lkml.kernel.org/r/20231004165317.1061855-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Albert Ou Cc: Alexander Gordeev Cc: Andreas Dilger Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Sven Schnelle Cc: "Theodore Ts'o" Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Signed-off-by: Andrew Morton (cherry picked from commit f8174a1181220d24d6b4332216112318f5905729) Signed-off-by: Wentao Guan --- fs/ext4/readpage.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 8cb83e7b699bd..8494492582abe 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -70,15 +70,8 @@ static void __read_end_io(struct bio *bio) { struct folio_iter fi; - bio_for_each_folio_all(fi, bio) { - struct folio *folio = fi.folio; - - if (bio->bi_status) - folio_clear_uptodate(folio); - else - folio_mark_uptodate(folio); - folio_unlock(folio); - } + bio_for_each_folio_all(fi, bio) + folio_end_read(fi.folio, bio->bi_status == 0); if (bio->bi_private) mempool_free(bio->bi_private, bio_post_read_ctx_pool); bio_put(bio); @@ -335,8 +328,7 @@ int ext4_mpage_readpages(struct inode *inode, if (ext4_need_verity(inode, folio->index) && !fsverity_verify_folio(folio)) goto set_error_page; - folio_mark_uptodate(folio); - folio_unlock(folio); + folio_end_read(folio, true); continue; } } else if (fully_mapped) { From a5fd89b8f82a4564f4e8c5829f1f703bf54b26ca Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:05 +0100 Subject: [PATCH 05/17] buffer: use folio_end_read() mainline inclusion from mainline-v6.7-rc1 category: performance There are two places that we can use this new helper. Link: https://lkml.kernel.org/r/20231004165317.1061855-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Albert Ou Cc: Alexander Gordeev Cc: Andreas Dilger Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Sven Schnelle Cc: "Theodore Ts'o" Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Signed-off-by: Andrew Morton (cherry picked from commit 6ba924d341c24027d95352ae8802c9cd1c308559) Signed-off-by: Wentao Guan --- fs/buffer.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 99b8a831c1abc..11da16b6da1bb 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -282,13 +282,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) } while (tmp != bh); spin_unlock_irqrestore(&first->b_uptodate_lock, flags); - /* - * If all of the buffers are uptodate then we can set the page - * uptodate. - */ - if (folio_uptodate) - folio_mark_uptodate(folio); - folio_unlock(folio); + folio_end_read(folio, folio_uptodate); return; still_busy: @@ -2439,12 +2433,10 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block) if (!nr) { /* - * All buffers are uptodate - we can set the folio uptodate - * as well. But not if get_block() returned an error. + * All buffers are uptodate or get_block() returned an + * error when trying to map them - we can finish the read. */ - if (!page_error) - folio_mark_uptodate(folio); - folio_unlock(folio); + folio_end_read(folio, !page_error); return 0; } From ed77f40faea06187f25cbc0d72b21b7a7ae0047b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:06 +0100 Subject: [PATCH 06/17] iomap: use folio_end_read() mainline inclusion from mainline-v6.7-rc1 category: performance Combine the setting of the uptodate flag with the clearing of the locked flag. Link: https://lkml.kernel.org/r/20231004165317.1061855-7-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Albert Ou Cc: Alexander Gordeev Cc: Andreas Dilger Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Sven Schnelle Cc: "Theodore Ts'o" Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Signed-off-by: Andrew Morton (cherry picked from commit 7a4847e54cc1889d109ce2a6ebed19aafc4a4af8) Signed-off-by: Wentao Guan --- fs/iomap/buffered-io.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index f61f92a384f97..a010a41c49402 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -271,10 +271,8 @@ static void iomap_finish_folio_read(struct folio *folio, size_t off, if (error) folio_set_error(folio); - if (uptodate) - folio_mark_uptodate(folio); if (finished) - folio_unlock(folio); + folio_end_read(folio, uptodate); } static void iomap_read_end_io(struct bio *bio) From 409e6407f56cdc87057c3c2c55121b498db25ef1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:07 +0100 Subject: [PATCH 07/17] bitops: add xor_unlock_is_negative_byte() mainline inclusion from mainline-v6.7-rc1 category: performance Replace clear_bit_and_unlock_is_negative_byte() with xor_unlock_is_negative_byte(). We have a few places that like to lock a folio, set a flag and unlock it again. Allow for the possibility of combining the latter two operations for efficiency. We are guaranteed that the caller holds the lock, so it is safe to unlock it with the xor. The caller must guarantee that nobody else will set the flag without holding the lock; it is not safe to do this with the PG_dirty flag, for example. Link: https://lkml.kernel.org/r/20231004165317.1061855-8-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Albert Ou Cc: Alexander Gordeev Cc: Andreas Dilger Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Sven Schnelle Cc: "Theodore Ts'o" Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Signed-off-by: Andrew Morton (cherry picked from commit 247dbcdbf790c52fc76cf8e327cd0a5778e41e66) Signed-off-by: Wentao Guan --- arch/powerpc/include/asm/bitops.h | 17 ++++-------- arch/x86/include/asm/bitops.h | 11 ++++---- .../asm-generic/bitops/instrumented-lock.h | 27 ++++++++++--------- include/asm-generic/bitops/lock.h | 21 ++++----------- kernel/kcsan/kcsan_test.c | 8 +++--- kernel/kcsan/selftest.c | 8 +++--- mm/filemap.c | 5 ++++ mm/kasan/kasan_test.c | 7 ++--- 8 files changed, 47 insertions(+), 57 deletions(-) diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 7e0f0322912b6..40cc3ded60cb9 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -234,32 +234,25 @@ static inline int arch_test_and_change_bit(unsigned long nr, } #ifdef CONFIG_PPC64 -static inline unsigned long -clear_bit_unlock_return_word(int nr, volatile unsigned long *addr) +static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, + volatile unsigned long *p) { unsigned long old, t; - unsigned long *p = (unsigned long *)addr + BIT_WORD(nr); - unsigned long mask = BIT_MASK(nr); __asm__ __volatile__ ( PPC_RELEASE_BARRIER "1:" PPC_LLARX "%0,0,%3,0\n" - "andc %1,%0,%2\n" + "xor %1,%0,%2\n" PPC_STLCX "%1,0,%3\n" "bne- 1b\n" : "=&r" (old), "=&r" (t) : "r" (mask), "r" (p) : "cc", "memory"); - return old; + return (old & BIT_MASK(7)) != 0; } -/* - * This is a special function for mm/filemap.c - * Bit 7 corresponds to PG_waiters. - */ -#define arch_clear_bit_unlock_is_negative_byte(nr, addr) \ - (clear_bit_unlock_return_word(nr, addr) & BIT_MASK(7)) +#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte #endif /* CONFIG_PPC64 */ diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 2edf68475fec4..f03c0a50ec3a2 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -94,18 +94,17 @@ arch___clear_bit(unsigned long nr, volatile unsigned long *addr) asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } -static __always_inline bool -arch_clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) +static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, + volatile unsigned long *addr) { bool negative; - asm volatile(LOCK_PREFIX "andb %2,%1" + asm volatile(LOCK_PREFIX "xorb %2,%1" CC_SET(s) : CC_OUT(s) (negative), WBYTE_ADDR(addr) - : "ir" ((char) ~(1 << nr)) : "memory"); + : "iq" ((char)mask) : "memory"); return negative; } -#define arch_clear_bit_unlock_is_negative_byte \ - arch_clear_bit_unlock_is_negative_byte +#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte static __always_inline void arch___clear_bit_unlock(long nr, volatile unsigned long *addr) diff --git a/include/asm-generic/bitops/instrumented-lock.h b/include/asm-generic/bitops/instrumented-lock.h index eb64bd4f11f3c..e8ea3aeda9a9a 100644 --- a/include/asm-generic/bitops/instrumented-lock.h +++ b/include/asm-generic/bitops/instrumented-lock.h @@ -58,27 +58,30 @@ static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr) return arch_test_and_set_bit_lock(nr, addr); } -#if defined(arch_clear_bit_unlock_is_negative_byte) +#if defined(arch_xor_unlock_is_negative_byte) /** - * clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom - * byte is negative, for unlock. - * @nr: the bit to clear - * @addr: the address to start counting from + * xor_unlock_is_negative_byte - XOR a single byte in memory and test if + * it is negative, for unlock. + * @mask: Change the bits which are set in this mask. + * @addr: The address of the word containing the byte to change. * + * Changes some of bits 0-6 in the word pointed to by @addr. * This operation is atomic and provides release barrier semantics. + * Used to optimise some folio operations which are commonly paired + * with an unlock or end of writeback. Bit 7 is used as PG_waiters to + * indicate whether anybody is waiting for the unlock. * - * This is a bit of a one-trick-pony for the filemap code, which clears - * PG_locked and tests PG_waiters, + * Return: Whether the top bit of the byte is set. */ -static inline bool -clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) +static inline bool xor_unlock_is_negative_byte(unsigned long mask, + volatile unsigned long *addr) { kcsan_release(); - instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long)); - return arch_clear_bit_unlock_is_negative_byte(nr, addr); + instrument_atomic_write(addr, sizeof(long)); + return arch_xor_unlock_is_negative_byte(mask, addr); } /* Let everybody know we have it. */ -#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte +#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte #endif #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_LOCK_H */ diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h index 40913516e654c..6a638e89d130d 100644 --- a/include/asm-generic/bitops/lock.h +++ b/include/asm-generic/bitops/lock.h @@ -66,27 +66,16 @@ arch___clear_bit_unlock(unsigned int nr, volatile unsigned long *p) raw_atomic_long_set_release((atomic_long_t *)p, old); } -/** - * arch_clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom - * byte is negative, for unlock. - * @nr: the bit to clear - * @addr: the address to start counting from - * - * This is a bit of a one-trick-pony for the filemap code, which clears - * PG_locked and tests PG_waiters, - */ -#ifndef arch_clear_bit_unlock_is_negative_byte -static inline bool arch_clear_bit_unlock_is_negative_byte(unsigned int nr, - volatile unsigned long *p) +#ifndef arch_xor_unlock_is_negative_byte +static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, + volatile unsigned long *p) { long old; - unsigned long mask = BIT_MASK(nr); - p += BIT_WORD(nr); - old = raw_atomic_long_fetch_andnot_release(mask, (atomic_long_t *)p); + old = raw_atomic_long_fetch_xor_release(mask, (atomic_long_t *)p); return !!(old & BIT(7)); } -#define arch_clear_bit_unlock_is_negative_byte arch_clear_bit_unlock_is_negative_byte +#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte #endif #include diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c index 9d8c95defdd67..286c709d2bd1a 100644 --- a/kernel/kcsan/kcsan_test.c +++ b/kernel/kcsan/kcsan_test.c @@ -700,10 +700,10 @@ static void test_barrier_nothreads(struct kunit *test) KCSAN_EXPECT_RW_BARRIER(mutex_lock(&test_mutex), false); KCSAN_EXPECT_RW_BARRIER(mutex_unlock(&test_mutex), true); -#ifdef clear_bit_unlock_is_negative_byte - KCSAN_EXPECT_READ_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var), true); - KCSAN_EXPECT_WRITE_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var), true); - KCSAN_EXPECT_RW_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var), true); +#ifdef xor_unlock_is_negative_byte + KCSAN_EXPECT_READ_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true); + KCSAN_EXPECT_WRITE_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true); + KCSAN_EXPECT_RW_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true); #endif kcsan_nestable_atomic_end(); } diff --git a/kernel/kcsan/selftest.c b/kernel/kcsan/selftest.c index 8679322450f22..619be7417420f 100644 --- a/kernel/kcsan/selftest.c +++ b/kernel/kcsan/selftest.c @@ -228,10 +228,10 @@ static bool __init test_barrier(void) spin_lock(&test_spinlock); KCSAN_CHECK_RW_BARRIER(spin_unlock(&test_spinlock)); -#ifdef clear_bit_unlock_is_negative_byte - KCSAN_CHECK_RW_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var)); - KCSAN_CHECK_READ_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var)); - KCSAN_CHECK_WRITE_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var)); +#ifdef xor_unlock_is_negative_byte + KCSAN_CHECK_RW_BARRIER(xor_unlock_is_negative_byte(1, &test_var)); + KCSAN_CHECK_READ_BARRIER(xor_unlock_is_negative_byte(1, &test_var)); + KCSAN_CHECK_WRITE_BARRIER(xor_unlock_is_negative_byte(1, &test_var)); #endif kcsan_nestable_atomic_end(); diff --git a/mm/filemap.c b/mm/filemap.c index 030cc6b40537a..64f5c76093464 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1493,6 +1493,11 @@ void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter) } EXPORT_SYMBOL_GPL(folio_add_wait_queue); +#ifdef xor_unlock_is_negative_byte +#define clear_bit_unlock_is_negative_byte(nr, p) \ + xor_unlock_is_negative_byte(1 << nr, p) +#endif + #ifndef clear_bit_unlock_is_negative_byte /* diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c index ecf9f5aa35200..5ad711e176ca8 100644 --- a/mm/kasan/kasan_test.c +++ b/mm/kasan/kasan_test.c @@ -1099,9 +1099,10 @@ static void kasan_bitops_test_and_modify(struct kunit *test, int nr, void *addr) KUNIT_EXPECT_KASAN_FAIL(test, __test_and_change_bit(nr, addr)); KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = test_bit(nr, addr)); -#if defined(clear_bit_unlock_is_negative_byte) - KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = - clear_bit_unlock_is_negative_byte(nr, addr)); +#if defined(xor_unlock_is_negative_byte) + if (nr < 7) + KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = + xor_unlock_is_negative_byte(1 << nr, addr)); #endif } From 274dc49222733663a1f7d69a16fab2993874e641 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:08 +0100 Subject: [PATCH 08/17] alpha: implement xor_unlock_is_negative_byte mainline inclusion from mainline-v6.7-rc1 category: performance Inspired by the alpha clear_bit() and arch_atomic_add_return(), this will surely be more efficient than the generic one defined in filemap.c. Link: https://lkml.kernel.org/r/20231004165317.1061855-9-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Albert Ou Cc: Alexander Gordeev Cc: Andreas Dilger Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Sven Schnelle Cc: "Theodore Ts'o" Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Signed-off-by: Andrew Morton (cherry picked from commit e28ff5dc8cf6aec042741f1ea62089dca6a894ab) Signed-off-by: Wentao Guan --- arch/alpha/include/asm/bitops.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h index bafb1c1f0fdc1..b50ad6b83e854 100644 --- a/arch/alpha/include/asm/bitops.h +++ b/arch/alpha/include/asm/bitops.h @@ -286,6 +286,27 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) #define arch_test_bit generic_test_bit #define arch_test_bit_acquire generic_test_bit_acquire +static inline bool xor_unlock_is_negative_byte(unsigned long mask, + volatile unsigned long *p) +{ + unsigned long temp, old; + + __asm__ __volatile__( + "1: ldl_l %0,%4\n" + " mov %0,%2\n" + " xor %0,%3,%0\n" + " stl_c %0,%1\n" + " beq %0,2f\n" + ".subsection 2\n" + "2: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (*p), "=&r" (old) + :"Ir" (mask), "m" (*p)); + + return (old & BIT(7)) != 0; +} +#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. From b04f8fd902473c60b6109ae13a814462508d3719 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:09 +0100 Subject: [PATCH 09/17] m68k: implement xor_unlock_is_negative_byte mainline inclusion from mainline-v6.7-rc1 category: performance Using EOR to clear the guaranteed-to-be-set lock bit will test the negative flag just like the x86 implementation. This should be more efficient than the generic implementation in filemap.c. It would be better if m68k had __GCC_ASM_FLAG_OUTPUTS__. Coldfire doesn't have a byte-sized EOR, so we test bit 7 after the EOR, which is a second memory access, but it's slightly better than the current C code. Link: https://lkml.kernel.org/r/20231004165317.1061855-10-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Albert Ou Cc: Alexander Gordeev Cc: Andreas Dilger Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Sven Schnelle Cc: "Theodore Ts'o" Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Signed-off-by: Andrew Morton (cherry picked from commit ea845e3173f7552aae539aeb943cd19ebe90ba38) Signed-off-by: Wentao Guan --- arch/m68k/include/asm/bitops.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h index e984af71df6be..80ee360959055 100644 --- a/arch/m68k/include/asm/bitops.h +++ b/arch/m68k/include/asm/bitops.h @@ -319,6 +319,28 @@ arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) return test_and_change_bit(nr, addr); } +static inline bool xor_unlock_is_negative_byte(unsigned long mask, + volatile unsigned long *p) +{ +#ifdef CONFIG_COLDFIRE + __asm__ __volatile__ ("eorl %1, %0" + : "+m" (*p) + : "d" (mask) + : "memory"); + return *p & (1 << 7); +#else + char result; + char *cp = (char *)p + 3; /* m68k is big-endian */ + + __asm__ __volatile__ ("eor.b %1, %2; smi %0" + : "=d" (result) + : "di" (mask), "o" (*cp) + : "memory"); + return result; +#endif +} +#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte + /* * The true 68020 and more advanced processors support the "bfffo" * instruction for finding bits. ColdFire and simple 68000 parts From 5576c83aecb8864662a90ba909464158ee163d36 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:10 +0100 Subject: [PATCH 10/17] mips: implement xor_unlock_is_negative_byte mainline inclusion from mainline-v6.7-rc1 category: performance Inspired by the mips test_and_change_bit(), this will surely be more efficient than the generic one defined in filemap.c Link: https://lkml.kernel.org/r/20231004165317.1061855-11-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Albert Ou Cc: Alexander Gordeev Cc: Andreas Dilger Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Sven Schnelle Cc: "Theodore Ts'o" Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Signed-off-by: Andrew Morton (cherry picked from commit 8da36b26e3d8640364a9e60e0b5c3fa3f55d298b) Signed-off-by: Wentao Guan --- arch/mips/include/asm/bitops.h | 26 +++++++++++++++++++++++++- arch/mips/lib/bitops.c | 14 ++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index b4bf754f7db3f..d98a05c478f42 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -73,7 +73,8 @@ int __mips_test_and_clear_bit(unsigned long nr, volatile unsigned long *addr); int __mips_test_and_change_bit(unsigned long nr, volatile unsigned long *addr); - +bool __mips_xor_is_negative_byte(unsigned long mask, + volatile unsigned long *addr); /* * set_bit - Atomically set a bit in memory @@ -279,6 +280,29 @@ static inline int test_and_change_bit(unsigned long nr, return res; } +static inline bool xor_unlock_is_negative_byte(unsigned long mask, + volatile unsigned long *p) +{ + unsigned long orig; + bool res; + + smp_mb__before_atomic(); + + if (!kernel_uses_llsc) { + res = __mips_xor_is_negative_byte(mask, p); + } else { + orig = __test_bit_op(*p, "%0", + "xor\t%1, %0, %3", + "ir"(mask)); + res = (orig & BIT(7)) != 0; + } + + smp_llsc_mb(); + + return res; +} +#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte + #undef __bit_op #undef __test_bit_op diff --git a/arch/mips/lib/bitops.c b/arch/mips/lib/bitops.c index 116d0bd8b2ae9..00aee98e9d545 100644 --- a/arch/mips/lib/bitops.c +++ b/arch/mips/lib/bitops.c @@ -146,3 +146,17 @@ int __mips_test_and_change_bit(unsigned long nr, volatile unsigned long *addr) return res; } EXPORT_SYMBOL(__mips_test_and_change_bit); + +bool __mips_xor_is_negative_byte(unsigned long mask, + volatile unsigned long *addr) +{ + unsigned long flags; + unsigned long data; + + raw_local_irq_save(flags); + data = *addr; + *addr = data ^ mask; + raw_local_irq_restore(flags); + + return (data & BIT(7)) != 0; +} From daef225f92da458ac26a210d33340910974e7b09 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:11 +0100 Subject: [PATCH 11/17] powerpc: implement arch_xor_unlock_is_negative_byte on 32-bit mainline inclusion from mainline-v6.7-rc1 category: performance Simply remove the ifdef. The assembly is identical to that in the non-optimised case of test_and_clear_bits() on PPC32, and it's not clear to me how the PPC32 optimisation works, nor whether it would work for arch_xor_unlock_is_negative_byte(). If that optimisation would work, someone can implement it later, but this is more efficient than the implementation in filemap.c. Link: https://lkml.kernel.org/r/20231004165317.1061855-12-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Albert Ou Cc: Alexander Gordeev Cc: Andreas Dilger Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Sven Schnelle Cc: "Theodore Ts'o" Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Signed-off-by: Andrew Morton (cherry picked from commit 51a752c28bcf901618bbc25a43f84ef539f9e682) Signed-off-by: Wentao Guan --- arch/powerpc/include/asm/bitops.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 40cc3ded60cb9..671ecc6711e36 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -233,7 +233,6 @@ static inline int arch_test_and_change_bit(unsigned long nr, return test_and_change_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0; } -#ifdef CONFIG_PPC64 static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, volatile unsigned long *p) { @@ -251,11 +250,8 @@ static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, return (old & BIT_MASK(7)) != 0; } - #define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte -#endif /* CONFIG_PPC64 */ - #include static inline void arch___clear_bit_unlock(int nr, volatile unsigned long *addr) From 0efb534868fa4f58fe0e5e902c649747023d54de Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:12 +0100 Subject: [PATCH 12/17] riscv: implement xor_unlock_is_negative_byte mainline inclusion from mainline-v6.7-rc1 category: performance Inspired by the riscv clear_bit_unlock(), this will surely be more efficient than the generic one defined in filemap.c. Link: https://lkml.kernel.org/r/20231004165317.1061855-13-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Albert Ou Cc: Alexander Gordeev Cc: Andreas Dilger Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Sven Schnelle Cc: "Theodore Ts'o" Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Signed-off-by: Andrew Morton (cherry picked from commit 2a667285b53c58d72f8bdb736c040f0f36bff58a) Signed-off-by: Wentao Guan --- arch/riscv/include/asm/bitops.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 3540b690944be..15e3044298a27 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -191,6 +191,19 @@ static inline void __clear_bit_unlock( clear_bit_unlock(nr, addr); } +static inline bool xor_unlock_is_negative_byte(unsigned long mask, + volatile unsigned long *addr) +{ + unsigned long res; + __asm__ __volatile__ ( + __AMO(xor) ".rl %0, %2, %1" + : "=r" (res), "+A" (*addr) + : "r" (__NOP(mask)) + : "memory"); + return (res & BIT(7)) != 0; +} +#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte + #undef __test_and_op_bit #undef __op_bit #undef __NOP From 200651527dc76b321b5da950898f6a71e4ff94fc Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:13 +0100 Subject: [PATCH 13/17] s390: implement arch_xor_unlock_is_negative_byte mainline inclusion from mainline-v6.7-rc1 category: performance Inspired by the s390 arch_test_and_clear_bit(), this will surely be more efficient than the generic one defined in filemap.c. Link: https://lkml.kernel.org/r/20231004165317.1061855-14-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Albert Ou Cc: Alexander Gordeev Cc: Andreas Dilger Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Sven Schnelle Cc: "Theodore Ts'o" Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Signed-off-by: Andrew Morton (cherry picked from commit 12010aa89f8705c8bacddc5c2276cc80badeac56) Signed-off-by: Wentao Guan --- arch/s390/include/asm/bitops.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 2de74fcd0578f..c467dffa8c12a 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -201,6 +201,16 @@ static inline void arch___clear_bit_unlock(unsigned long nr, arch___clear_bit(nr, ptr); } +static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, + volatile unsigned long *ptr) +{ + unsigned long old; + + old = __atomic64_xor_barrier(mask, (long *)ptr); + return old & BIT(7); +} +#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte + #include #include #include From 5361dfca485fed63520d2bfbf316992a9a01ed72 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:14 +0100 Subject: [PATCH 14/17] mm: delete checks for xor_unlock_is_negative_byte() mainline inclusion from mainline-v6.7-rc1 category: performance Architectures which don't define their own use the one in asm-generic/bitops/lock.h. Get rid of all the ifdefs around "maybe we don't have it". Link: https://lkml.kernel.org/r/20231004165317.1061855-15-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Geert Uytterhoeven Cc: Albert Ou Cc: Alexander Gordeev Cc: Andreas Dilger Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Sven Schnelle Cc: "Theodore Ts'o" Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Signed-off-by: Andrew Morton (cherry picked from commit f12fb73b74fd23ca33e3f95fb996f295eeae1da7) Signed-off-by: Wentao Guan --- arch/alpha/include/asm/bitops.h | 1 - arch/m68k/include/asm/bitops.h | 1 - arch/mips/include/asm/bitops.h | 1 - arch/riscv/include/asm/bitops.h | 1 - .../asm-generic/bitops/instrumented-lock.h | 5 ---- include/asm-generic/bitops/lock.h | 1 - kernel/kcsan/kcsan_test.c | 3 -- kernel/kcsan/selftest.c | 3 -- mm/filemap.c | 30 +------------------ mm/kasan/kasan_test.c | 3 -- 10 files changed, 1 insertion(+), 48 deletions(-) diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h index b50ad6b83e854..3e33621922c31 100644 --- a/arch/alpha/include/asm/bitops.h +++ b/arch/alpha/include/asm/bitops.h @@ -305,7 +305,6 @@ static inline bool xor_unlock_is_negative_byte(unsigned long mask, return (old & BIT(7)) != 0; } -#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte /* * ffz = Find First Zero in word. Undefined if no zero exists, diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h index 80ee360959055..14c64a6f12176 100644 --- a/arch/m68k/include/asm/bitops.h +++ b/arch/m68k/include/asm/bitops.h @@ -339,7 +339,6 @@ static inline bool xor_unlock_is_negative_byte(unsigned long mask, return result; #endif } -#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte /* * The true 68020 and more advanced processors support the "bfffo" diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index d98a05c478f42..89f73d1a4ea4e 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -301,7 +301,6 @@ static inline bool xor_unlock_is_negative_byte(unsigned long mask, return res; } -#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte #undef __bit_op #undef __test_bit_op diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 15e3044298a27..65f6eee4ab8d7 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -202,7 +202,6 @@ static inline bool xor_unlock_is_negative_byte(unsigned long mask, : "memory"); return (res & BIT(7)) != 0; } -#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte #undef __test_and_op_bit #undef __op_bit diff --git a/include/asm-generic/bitops/instrumented-lock.h b/include/asm-generic/bitops/instrumented-lock.h index e8ea3aeda9a9a..542d3727ee4e3 100644 --- a/include/asm-generic/bitops/instrumented-lock.h +++ b/include/asm-generic/bitops/instrumented-lock.h @@ -58,7 +58,6 @@ static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr) return arch_test_and_set_bit_lock(nr, addr); } -#if defined(arch_xor_unlock_is_negative_byte) /** * xor_unlock_is_negative_byte - XOR a single byte in memory and test if * it is negative, for unlock. @@ -80,8 +79,4 @@ static inline bool xor_unlock_is_negative_byte(unsigned long mask, instrument_atomic_write(addr, sizeof(long)); return arch_xor_unlock_is_negative_byte(mask, addr); } -/* Let everybody know we have it. */ -#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte -#endif - #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_LOCK_H */ diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h index 6a638e89d130d..14d4ec8c5152d 100644 --- a/include/asm-generic/bitops/lock.h +++ b/include/asm-generic/bitops/lock.h @@ -75,7 +75,6 @@ static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, old = raw_atomic_long_fetch_xor_release(mask, (atomic_long_t *)p); return !!(old & BIT(7)); } -#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte #endif #include diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c index 286c709d2bd1a..9924eb3f74cf3 100644 --- a/kernel/kcsan/kcsan_test.c +++ b/kernel/kcsan/kcsan_test.c @@ -699,12 +699,9 @@ static void test_barrier_nothreads(struct kunit *test) KCSAN_EXPECT_RW_BARRIER(spin_unlock(&test_spinlock), true); KCSAN_EXPECT_RW_BARRIER(mutex_lock(&test_mutex), false); KCSAN_EXPECT_RW_BARRIER(mutex_unlock(&test_mutex), true); - -#ifdef xor_unlock_is_negative_byte KCSAN_EXPECT_READ_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true); KCSAN_EXPECT_WRITE_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true); KCSAN_EXPECT_RW_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true); -#endif kcsan_nestable_atomic_end(); } diff --git a/kernel/kcsan/selftest.c b/kernel/kcsan/selftest.c index 619be7417420f..84a1200271aff 100644 --- a/kernel/kcsan/selftest.c +++ b/kernel/kcsan/selftest.c @@ -227,12 +227,9 @@ static bool __init test_barrier(void) KCSAN_CHECK_RW_BARRIER(arch_spin_unlock(&arch_spinlock)); spin_lock(&test_spinlock); KCSAN_CHECK_RW_BARRIER(spin_unlock(&test_spinlock)); - -#ifdef xor_unlock_is_negative_byte KCSAN_CHECK_RW_BARRIER(xor_unlock_is_negative_byte(1, &test_var)); KCSAN_CHECK_READ_BARRIER(xor_unlock_is_negative_byte(1, &test_var)); KCSAN_CHECK_WRITE_BARRIER(xor_unlock_is_negative_byte(1, &test_var)); -#endif kcsan_nestable_atomic_end(); return ret; diff --git a/mm/filemap.c b/mm/filemap.c index 64f5c76093464..eb6e510ba9ebd 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1493,34 +1493,6 @@ void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter) } EXPORT_SYMBOL_GPL(folio_add_wait_queue); -#ifdef xor_unlock_is_negative_byte -#define clear_bit_unlock_is_negative_byte(nr, p) \ - xor_unlock_is_negative_byte(1 << nr, p) -#endif - -#ifndef clear_bit_unlock_is_negative_byte - -/* - * PG_waiters is the high bit in the same byte as PG_lock. - * - * On x86 (and on many other architectures), we can clear PG_lock and - * test the sign bit at the same time. But if the architecture does - * not support that special operation, we just do this all by hand - * instead. - * - * The read of PG_waiters has to be after (or concurrently with) PG_locked - * being cleared, but a memory barrier should be unnecessary since it is - * in the same byte as PG_locked. - */ -static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem) -{ - clear_bit_unlock(nr, mem); - /* smp_mb__after_atomic(); */ - return test_bit(PG_waiters, mem); -} - -#endif - /** * folio_unlock - Unlock a locked folio. * @folio: The folio. @@ -1536,7 +1508,7 @@ void folio_unlock(struct folio *folio) BUILD_BUG_ON(PG_waiters != 7); BUILD_BUG_ON(PG_locked > 7); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - if (clear_bit_unlock_is_negative_byte(PG_locked, folio_flags(folio, 0))) + if (xor_unlock_is_negative_byte(1 << PG_locked, folio_flags(folio, 0))) folio_wake_bit(folio, PG_locked); } EXPORT_SYMBOL(folio_unlock); diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c index 5ad711e176ca8..c73fc568eec81 100644 --- a/mm/kasan/kasan_test.c +++ b/mm/kasan/kasan_test.c @@ -1098,12 +1098,9 @@ static void kasan_bitops_test_and_modify(struct kunit *test, int nr, void *addr) KUNIT_EXPECT_KASAN_FAIL(test, test_and_change_bit(nr, addr)); KUNIT_EXPECT_KASAN_FAIL(test, __test_and_change_bit(nr, addr)); KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = test_bit(nr, addr)); - -#if defined(xor_unlock_is_negative_byte) if (nr < 7) KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = xor_unlock_is_negative_byte(1 << nr, addr)); -#endif } static void kasan_bitops_generic(struct kunit *test) From 1d8d22747256b8ac28149cd5052833588b76aa12 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:15 +0100 Subject: [PATCH 15/17] mm: add folio_xor_flags_has_waiters() mainline inclusion from mainline-v6.7-rc1 category: performance Optimise folio_end_read() by setting the uptodate bit at the same time we clear the unlock bit. This saves at least one memory barrier and one write-after-write hazard. Link: https://lkml.kernel.org/r/20231004165317.1061855-16-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Albert Ou Cc: Alexander Gordeev Cc: Andreas Dilger Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Sven Schnelle Cc: "Theodore Ts'o" Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Signed-off-by: Andrew Morton (cherry picked from commit 0410cd844ed0af3db3cb510d877d62c66d26e5cc) Signed-off-by: Wentao Guan --- include/linux/page-flags.h | 19 +++++++++++++++++++ mm/filemap.c | 14 +++++++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index c8f5438c6fc79..3cbe24d32e35f 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -726,6 +726,25 @@ TESTPAGEFLAG_FALSE(Ksm, ksm) u64 stable_page_flags(struct page *page); +/** + * folio_xor_flags_has_waiters - Change some folio flags. + * @folio: The folio. + * @mask: Bits set in this word will be changed. + * + * This must only be used for flags which are changed with the folio + * lock held. For example, it is unsafe to use for PG_dirty as that + * can be set without the folio lock held. It can also only be used + * on flags which are in the range 0-6 as some of the implementations + * only affect those bits. + * + * Return: Whether there are tasks waiting on the folio. + */ +static inline bool folio_xor_flags_has_waiters(struct folio *folio, + unsigned long mask) +{ + return xor_unlock_is_negative_byte(mask, folio_flags(folio, 0)); +} + /** * folio_test_uptodate - Is this folio up to date? * @folio: The folio. diff --git a/mm/filemap.c b/mm/filemap.c index eb6e510ba9ebd..84cdd33cb2a0c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1508,7 +1508,7 @@ void folio_unlock(struct folio *folio) BUILD_BUG_ON(PG_waiters != 7); BUILD_BUG_ON(PG_locked > 7); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - if (xor_unlock_is_negative_byte(1 << PG_locked, folio_flags(folio, 0))) + if (folio_xor_flags_has_waiters(folio, 1 << PG_locked)) folio_wake_bit(folio, PG_locked); } EXPORT_SYMBOL(folio_unlock); @@ -1529,9 +1529,17 @@ EXPORT_SYMBOL(folio_unlock); */ void folio_end_read(struct folio *folio, bool success) { + unsigned long mask = 1 << PG_locked; + + /* Must be in bottom byte for x86 to work */ + BUILD_BUG_ON(PG_uptodate > 7); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + VM_BUG_ON_FOLIO(folio_test_uptodate(folio), folio); + if (likely(success)) - folio_mark_uptodate(folio); - folio_unlock(folio); + mask |= 1 << PG_uptodate; + if (folio_xor_flags_has_waiters(folio, mask)) + folio_wake_bit(folio, PG_locked); } EXPORT_SYMBOL(folio_end_read); From b30a67100a3bb26de0e1bc1d98a3f8c5f7bee1b9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:16 +0100 Subject: [PATCH 16/17] mm: make __end_folio_writeback() return void mainline incusion from mainline-v6.7-rc1 category: performance Rather than check the result of test-and-clear, just check that we have the writeback bit set at the start. This wouldn't catch every case, but it's good enough (and enables the next patch). Link: https://lkml.kernel.org/r/20231004165317.1061855-17-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Albert Ou Cc: Alexander Gordeev Cc: Andreas Dilger Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Sven Schnelle Cc: "Theodore Ts'o" Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Signed-off-by: Andrew Morton (cherry picked from commit 7d0795d098a127508f3e29ab4257c9ab598efaea) Signed-off-by: Wentao Guan --- mm/filemap.c | 9 +++++++-- mm/internal.h | 2 +- mm/page-writeback.c | 38 ++++++++++++++++---------------------- 3 files changed, 24 insertions(+), 25 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 84cdd33cb2a0c..049961b4c4e0e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1604,9 +1604,15 @@ EXPORT_SYMBOL(folio_wait_private_2_killable); /** * folio_end_writeback - End writeback against a folio. * @folio: The folio. + * + * The folio must actually be under writeback. + * + * Context: May be called from process or interrupt context. */ void folio_end_writeback(struct folio *folio) { + VM_BUG_ON_FOLIO(!folio_test_writeback(folio), folio); + /* * folio_test_clear_reclaim() could be used here but it is an * atomic operation and overkill in this particular case. Failing @@ -1626,8 +1632,7 @@ void folio_end_writeback(struct folio *folio) * reused before the folio_wake(). */ folio_get(folio); - if (!__folio_end_writeback(folio)) - BUG(); + __folio_end_writeback(folio); smp_mb__after_atomic(); folio_wake(folio, PG_writeback); diff --git a/mm/internal.h b/mm/internal.h index 84eedb78af662..1cf35a39852b0 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -150,7 +150,7 @@ static inline void wake_throttle_isolated(pg_data_t *pgdat) vm_fault_t do_swap_page(struct vm_fault *vmf); void folio_rotate_reclaimable(struct folio *folio); -bool __folio_end_writeback(struct folio *folio); +void __folio_end_writeback(struct folio *folio); void deactivate_file_folio(struct folio *folio); void folio_activate(struct folio *folio); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6e3faed567260..dc006bee82824 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2995,11 +2995,10 @@ static void wb_inode_writeback_end(struct bdi_writeback *wb) spin_unlock_irqrestore(&wb->work_lock, flags); } -bool __folio_end_writeback(struct folio *folio) +void __folio_end_writeback(struct folio *folio) { long nr = folio_nr_pages(folio); struct address_space *mapping = folio_mapping(folio); - bool ret; folio_memcg_lock(folio); if (mapping && mapping_use_writeback_tags(mapping)) { @@ -3008,19 +3007,16 @@ bool __folio_end_writeback(struct folio *folio) unsigned long flags; xa_lock_irqsave(&mapping->i_pages, flags); - ret = folio_test_clear_writeback(folio); - if (ret) { - __xa_clear_mark(&mapping->i_pages, folio_index(folio), - PAGECACHE_TAG_WRITEBACK); - if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { - struct bdi_writeback *wb = inode_to_wb(inode); - - wb_stat_mod(wb, WB_WRITEBACK, -nr); - __wb_writeout_add(wb, nr); - if (!mapping_tagged(mapping, - PAGECACHE_TAG_WRITEBACK)) - wb_inode_writeback_end(wb); - } + folio_test_clear_writeback(folio); + __xa_clear_mark(&mapping->i_pages, folio_index(folio), + PAGECACHE_TAG_WRITEBACK); + if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { + struct bdi_writeback *wb = inode_to_wb(inode); + + wb_stat_mod(wb, WB_WRITEBACK, -nr); + __wb_writeout_add(wb, nr); + if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) + wb_inode_writeback_end(wb); } if (mapping->host && !mapping_tagged(mapping, @@ -3029,15 +3025,13 @@ bool __folio_end_writeback(struct folio *folio) xa_unlock_irqrestore(&mapping->i_pages, flags); } else { - ret = folio_test_clear_writeback(folio); - } - if (ret) { - lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr); - zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); - node_stat_mod_folio(folio, NR_WRITTEN, nr); + folio_test_clear_writeback(folio); } + + lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr); + zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); + node_stat_mod_folio(folio, NR_WRITTEN, nr); folio_memcg_unlock(folio); - return ret; } bool __folio_start_writeback(struct folio *folio, bool keep_write) From b4262605072028de6b28ced90a550da0709b03f4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 4 Oct 2023 17:53:17 +0100 Subject: [PATCH 17/17] mm: use folio_xor_flags_has_waiters() in folio_end_writeback() mainline inclusion from mainline-v6.7-rc1 category: performance Match how folio_unlock() works by combining the test for PG_waiters with the clearing of PG_writeback. This should have a small performance win, and removes the last user of folio_wake(). Link: https://lkml.kernel.org/r/20231004165317.1061855-18-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Albert Ou Cc: Alexander Gordeev Cc: Andreas Dilger Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Richard Henderson Cc: Sven Schnelle Cc: "Theodore Ts'o" Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Signed-off-by: Andrew Morton (cherry picked from commit 2580d554585c52a644839864ef9238af5b030ebc) Signed-off-by: Wentao Guan --- mm/filemap.c | 15 +++------------ mm/internal.h | 2 +- mm/page-writeback.c | 9 ++++++--- 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 049961b4c4e0e..e2bfb8ce9e9ee 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1186,13 +1186,6 @@ static void folio_wake_bit(struct folio *folio, int bit_nr) spin_unlock_irqrestore(&q->lock, flags); } -static void folio_wake(struct folio *folio, int bit) -{ - if (!folio_test_waiters(folio)) - return; - folio_wake_bit(folio, bit); -} - /* * A choice of three behaviors for folio_wait_bit_common(): */ @@ -1629,13 +1622,11 @@ void folio_end_writeback(struct folio *folio) * Writeback does not hold a folio reference of its own, relying * on truncation to wait for the clearing of PG_writeback. * But here we must make sure that the folio is not freed and - * reused before the folio_wake(). + * reused before the folio_wake_bit(). */ folio_get(folio); - __folio_end_writeback(folio); - - smp_mb__after_atomic(); - folio_wake(folio, PG_writeback); + if (__folio_end_writeback(folio)) + folio_wake_bit(folio, PG_writeback); acct_reclaim_writeback(folio); folio_put(folio); } diff --git a/mm/internal.h b/mm/internal.h index 1cf35a39852b0..84eedb78af662 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -150,7 +150,7 @@ static inline void wake_throttle_isolated(pg_data_t *pgdat) vm_fault_t do_swap_page(struct vm_fault *vmf); void folio_rotate_reclaimable(struct folio *folio); -void __folio_end_writeback(struct folio *folio); +bool __folio_end_writeback(struct folio *folio); void deactivate_file_folio(struct folio *folio); void folio_activate(struct folio *folio); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index dc006bee82824..21cb71e65f653 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2995,10 +2995,11 @@ static void wb_inode_writeback_end(struct bdi_writeback *wb) spin_unlock_irqrestore(&wb->work_lock, flags); } -void __folio_end_writeback(struct folio *folio) +bool __folio_end_writeback(struct folio *folio) { long nr = folio_nr_pages(folio); struct address_space *mapping = folio_mapping(folio); + bool ret; folio_memcg_lock(folio); if (mapping && mapping_use_writeback_tags(mapping)) { @@ -3007,7 +3008,7 @@ void __folio_end_writeback(struct folio *folio) unsigned long flags; xa_lock_irqsave(&mapping->i_pages, flags); - folio_test_clear_writeback(folio); + ret = folio_xor_flags_has_waiters(folio, 1 << PG_writeback); __xa_clear_mark(&mapping->i_pages, folio_index(folio), PAGECACHE_TAG_WRITEBACK); if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { @@ -3025,13 +3026,15 @@ void __folio_end_writeback(struct folio *folio) xa_unlock_irqrestore(&mapping->i_pages, flags); } else { - folio_test_clear_writeback(folio); + ret = folio_xor_flags_has_waiters(folio, 1 << PG_writeback); } lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr); zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); node_stat_mod_folio(folio, NR_WRITTEN, nr); folio_memcg_unlock(folio); + + return ret; } bool __folio_start_writeback(struct folio *folio, bool keep_write)