Skip to content

Commit da32b58

Browse files
committed
mm: Add fault_in_subpage_writeable() to probe at sub-page granularity
On hardware with features like arm64 MTE or SPARC ADI, an access fault can be triggered at sub-page granularity. Depending on how the fault_in_writeable() function is used, the caller can get into a live-lock by continuously retrying the fault-in on an address different from the one where the uaccess failed. In the majority of cases progress is ensured by the following conditions: 1. copy_to_user_nofault() guarantees at least one byte access if the user address is not faulting. 2. The fault_in_writeable() loop is resumed from the first address that could not be accessed by copy_to_user_nofault(). If the loop iteration is restarted from an earlier (initial) point, the loop is repeated with the same conditions and it would live-lock. Introduce an arch-specific probe_subpage_writeable() and call it from the newly added fault_in_subpage_writeable() function. The arch code with sub-page faults will have to implement the specific probing functionality. Note that no other fault_in_subpage_*() functions are added since they have no callers currently susceptible to a live-lock. Signed-off-by: Catalin Marinas <[email protected]> Cc: Andrew Morton <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Catalin Marinas <[email protected]>
1 parent b2d229d commit da32b58

File tree

4 files changed

+59
-0
lines changed

4 files changed

+59
-0
lines changed

arch/Kconfig

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@ config KEXEC_ELF
2424
config HAVE_IMA_KEXEC
2525
bool
2626

27+
config ARCH_HAS_SUBPAGE_FAULTS
28+
bool
29+
help
30+
Select if the architecture can check permissions at sub-page
31+
granularity (e.g. arm64 MTE). The probe_user_*() functions
32+
must be implemented.
33+
2734
config HOTPLUG_SMT
2835
bool
2936

include/linux/pagemap.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,6 +1046,7 @@ void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter);
10461046
* Fault in userspace address range.
10471047
*/
10481048
size_t fault_in_writeable(char __user *uaddr, size_t size);
1049+
size_t fault_in_subpage_writeable(char __user *uaddr, size_t size);
10491050
size_t fault_in_safe_writeable(const char __user *uaddr, size_t size);
10501051
size_t fault_in_readable(const char __user *uaddr, size_t size);
10511052

include/linux/uaccess.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,28 @@ static inline bool pagefault_disabled(void)
231231
*/
232232
#define faulthandler_disabled() (pagefault_disabled() || in_atomic())
233233

234+
#ifndef CONFIG_ARCH_HAS_SUBPAGE_FAULTS
235+
236+
/**
237+
* probe_subpage_writeable: probe the user range for write faults at sub-page
238+
* granularity (e.g. arm64 MTE)
239+
* @uaddr: start of address range
240+
* @size: size of address range
241+
*
242+
* Returns 0 on success, the number of bytes not probed on fault.
243+
*
244+
* It is expected that the caller checked for the write permission of each
245+
* page in the range either by put_user() or GUP. The architecture port can
246+
* implement a more efficient get_user() probing if the same sub-page faults
247+
* are triggered by either a read or a write.
248+
*/
249+
static inline size_t probe_subpage_writeable(char __user *uaddr, size_t size)
250+
{
251+
return 0;
252+
}
253+
254+
#endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */
255+
234256
#ifndef ARCH_HAS_NOCACHE_UACCESS
235257

236258
static inline __must_check unsigned long

mm/gup.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1648,6 +1648,35 @@ size_t fault_in_writeable(char __user *uaddr, size_t size)
16481648
}
16491649
EXPORT_SYMBOL(fault_in_writeable);
16501650

1651+
/**
1652+
* fault_in_subpage_writeable - fault in an address range for writing
1653+
* @uaddr: start of address range
1654+
* @size: size of address range
1655+
*
1656+
* Fault in a user address range for writing while checking for permissions at
1657+
* sub-page granularity (e.g. arm64 MTE). This function should be used when
1658+
* the caller cannot guarantee forward progress of a copy_to_user() loop.
1659+
*
1660+
* Returns the number of bytes not faulted in (like copy_to_user() and
1661+
* copy_from_user()).
1662+
*/
1663+
size_t fault_in_subpage_writeable(char __user *uaddr, size_t size)
1664+
{
1665+
size_t faulted_in;
1666+
1667+
/*
1668+
* Attempt faulting in at page granularity first for page table
1669+
* permission checking. The arch-specific probe_subpage_writeable()
1670+
* functions may not check for this.
1671+
*/
1672+
faulted_in = size - fault_in_writeable(uaddr, size);
1673+
if (faulted_in)
1674+
faulted_in -= probe_subpage_writeable(uaddr, faulted_in);
1675+
1676+
return size - faulted_in;
1677+
}
1678+
EXPORT_SYMBOL(fault_in_subpage_writeable);
1679+
16511680
/*
16521681
* fault_in_safe_writeable - fault in an address range for writing
16531682
* @uaddr: start of address range

0 commit comments

Comments
 (0)