Rust-for-Linux
diff --git a/‎Documentation/ABI/testing/sysfs-kernel-mm-numa
Lines changed: 24 additions & 0 deletions b/‎Documentation/ABI/testing/sysfs-kernel-mm-numa
Lines changed: 24 additions & 0 deletions
diff --git a/‎Documentation/admin-guide/mm/numa_memory_policy.rst
Lines changed: 11 additions & 4 deletions b/‎Documentation/admin-guide/mm/numa_memory_policy.rst
Lines changed: 11 additions & 4 deletions
diff --git a/‎Documentation/admin-guide/sysctl/vm.rst
Lines changed: 2 additions & 1 deletion b/‎Documentation/admin-guide/sysctl/vm.rst
Lines changed: 2 additions & 1 deletion
diff --git a/‎Documentation/core-api/cachetlb.rst
Lines changed: 37 additions & 49 deletions b/‎Documentation/core-api/cachetlb.rst
Lines changed: 37 additions & 49 deletions
diff --git a/‎Documentation/dev-tools/kasan.rst
Lines changed: 8 additions & 5 deletions b/‎Documentation/dev-tools/kasan.rst
Lines changed: 8 additions & 5 deletions
diff --git a/‎Documentation/translations/zh_CN/core-api/cachetlb.rst
Lines changed: 0 additions & 9 deletions b/‎Documentation/translations/zh_CN/core-api/cachetlb.rst
Lines changed: 0 additions & 9 deletions
diff --git a/‎Documentation/vm/hwpoison.rst
Lines changed: 0 additions & 1 deletion b/‎Documentation/vm/hwpoison.rst
Lines changed: 0 additions & 1 deletion
diff --git a/‎arch/alpha/kernel/syscalls/syscall.tbl
Lines changed: 2 additions & 0 deletions b/‎arch/alpha/kernel/syscalls/syscall.tbl
Lines changed: 2 additions & 0 deletions
diff --git a/‎arch/arm/include/asm/cacheflush.h
Lines changed: 1 addition & 3 deletions b/‎arch/arm/include/asm/cacheflush.h
Lines changed: 1 addition & 3 deletions
diff --git a/‎arch/arm/kernel/setup.c
Lines changed: 7 additions & 13 deletions b/‎arch/arm/kernel/setup.c
Lines changed: 7 additions & 13 deletions
@@ -0,0 +1,24 @@
+What:		/sys/kernel/mm/numa/
+Date:		June 2021
+Contact:	Linux memory management mailing list <[email protected]>
+Description:	Interface for NUMA
+
+What:		/sys/kernel/mm/numa/demotion_enabled
+Date:		June 2021
+Contact:	Linux memory management mailing list <[email protected]>
+Description:	Enable/disable demoting pages during reclaim
+
+		Page migration during reclaim is intended for systems
+		with tiered memory configurations.  These systems have
+		multiple types of memory with varied performance
+		characteristics instead of plain NUMA systems where
+		the same kind of memory is found at varied distances.
+		Allowing page migration during reclaim enables these
+		systems to migrate pages from fast tiers to slow tiers
+		when the fast tier is under pressure.  This migration
+		is performed before swap.  It may move data to a NUMA
+		node that does not fall into the cpuset of the
+		allocating process which might be construed to violate
+		the guarantees of cpusets.  This should not be enabled
+		on systems which need strict cpuset location
+		guarantees.
@@ -245,6 +245,13 @@ MPOL_INTERLEAVED
 	address range or file.  During system boot up, the temporary
 	interleaved system default policy works in this mode.
 
+MPOL_PREFERRED_MANY
+	This mode specifices that the allocation should be preferrably
+	satisfied from the nodemask specified in the policy. If there is
+	a memory pressure on all nodes in the nodemask, the allocation
+	can fall back to all existing numa nodes. This is effectively
+	MPOL_PREFERRED allowed for a mask rather than a single node.
+
 NUMA memory policy supports the following optional mode flags:
 
 MPOL_F_STATIC_NODES
@@ -253,10 +260,10 @@ MPOL_F_STATIC_NODES
 	nodes changes after the memory policy has been defined.
 
 	Without this flag, any time a mempolicy is rebound because of a
-	change in the set of allowed nodes, the node (Preferred) or
-	nodemask (Bind, Interleave) is remapped to the new set of
-	allowed nodes.  This may result in nodes being used that were
-	previously undesired.
+        change in the set of allowed nodes, the preferred nodemask (Preferred
+        Many), preferred node (Preferred) or nodemask (Bind, Interleave) is
+        remapped to the new set of allowed nodes.  This may result in nodes
+        being used that were previously undesired.
 
 	With this flag, if the user-specified nodes overlap with the
 	nodes allowed by the task's cpuset, then the memory policy is
 
@@ -118,7 +118,8 @@ compaction_proactiveness
 
 This tunable takes a value in the range [0, 100] with a default value of
 20. This tunable determines how aggressively compaction is done in the
-background. Setting it to 0 disables proactive compaction.
+background. Write of a non zero value to this tunable will immediately
+trigger the proactive compaction. Setting it to 0 disables proactive compaction.
 
 Note that compaction has a non-trivial system-wide impact as pages
 belonging to different processes are moved around, which could also lead
 
@@ -271,10 +271,15 @@ maps this page at its virtual address.
 
   ``void flush_dcache_page(struct page *page)``
 
-	Any time the kernel writes to a page cache page, _OR_
-	the kernel is about to read from a page cache page and
-	user space shared/writable mappings of this page potentially
-	exist, this routine is called.
+        This routines must be called when:
+
+	  a) the kernel did write to a page that is in the page cache page
+	     and / or in high memory
+	  b) the kernel is about to read from a page cache page and user space
+	     shared/writable mappings of this page potentially exist.  Note
+	     that {get,pin}_user_pages{_fast} already call flush_dcache_page
+	     on any page found in the user address space and thus driver
+	     code rarely needs to take this into account.
 
 	.. note::
 
@@ -284,38 +289,34 @@ maps this page at its virtual address.
 	      handling vfs symlinks in the page cache need not call
 	      this interface at all.
 
-	The phrase "kernel writes to a page cache page" means,
-	specifically, that the kernel executes store instructions
-	that dirty data in that page at the page->virtual mapping
-	of that page.  It is important to flush here to handle
-	D-cache aliasing, to make sure these kernel stores are
-	visible to user space mappings of that page.
-
-	The corollary case is just as important, if there are users
-	which have shared+writable mappings of this file, we must make
-	sure that kernel reads of these pages will see the most recent
-	stores done by the user.
-
-	If D-cache aliasing is not an issue, this routine may
-	simply be defined as a nop on that architecture.
-
-        There is a bit set aside in page->flags (PG_arch_1) as
-	"architecture private".  The kernel guarantees that,
-	for pagecache pages, it will clear this bit when such
-	a page first enters the pagecache.
-
-	This allows these interfaces to be implemented much more
-	efficiently.  It allows one to "defer" (perhaps indefinitely)
-	the actual flush if there are currently no user processes
-	mapping this page.  See sparc64's flush_dcache_page and
-	update_mmu_cache implementations for an example of how to go
-	about doing this.
-
-	The idea is, first at flush_dcache_page() time, if
-	page->mapping->i_mmap is an empty tree, just mark the architecture
-	private page flag bit.  Later, in update_mmu_cache(), a check is
-	made of this flag bit, and if set the flush is done and the flag
-	bit is cleared.
+	The phrase "kernel writes to a page cache page" means, specifically,
+	that the kernel executes store instructions that dirty data in that
+	page at the page->virtual mapping of that page.  It is important to
+	flush here to handle D-cache aliasing, to make sure these kernel stores
+	are visible to user space mappings of that page.
+
+	The corollary case is just as important, if there are users which have
+	shared+writable mappings of this file, we must make sure that kernel
+	reads of these pages will see the most recent stores done by the user.
+
+	If D-cache aliasing is not an issue, this routine may simply be defined
+	as a nop on that architecture.
+
+        There is a bit set aside in page->flags (PG_arch_1) as "architecture
+	private".  The kernel guarantees that, for pagecache pages, it will
+	clear this bit when such a page first enters the pagecache.
+
+	This allows these interfaces to be implemented much more efficiently.
+	It allows one to "defer" (perhaps indefinitely) the actual flush if
+	there are currently no user processes mapping this page.  See sparc64's
+	flush_dcache_page and update_mmu_cache implementations for an example
+	of how to go about doing this.
+
+	The idea is, first at flush_dcache_page() time, if page_file_mapping()
+	returns a mapping, and mapping_mapped on that mapping returns %false,
+	just mark the architecture private page flag bit.  Later, in
+	update_mmu_cache(), a check is made of this flag bit, and if set the
+	flush is done and the flag bit is cleared.
 
 	.. important::
 
@@ -351,19 +352,6 @@ maps this page at its virtual address.
 	architectures).  For incoherent architectures, it should flush
 	the cache of the page at vmaddr.
 
-  ``void flush_kernel_dcache_page(struct page *page)``
-
-	When the kernel needs to modify a user page is has obtained
-	with kmap, it calls this function after all modifications are
-	complete (but before kunmapping it) to bring the underlying
-	page up to date.  It is assumed here that the user has no
-	incoherent cached copies (i.e. the original page was obtained
-	from a mechanism like get_user_pages()).  The default
-	implementation is a nop and should remain so on all coherent
-	architectures.  On incoherent architectures, this should flush
-	the kernel cache for page (using page_address(page)).
-
-
   ``void flush_icache_range(unsigned long start, unsigned long end)``
 
   	When the kernel stores into addresses that it will execute
 
@@ -181,9 +181,16 @@ By default, KASAN prints a bug report only for the first invalid memory access.
 With ``kasan_multi_shot``, KASAN prints a report on every invalid access. This
 effectively disables ``panic_on_warn`` for KASAN reports.
 
+Alternatively, independent of ``panic_on_warn`` the ``kasan.fault=`` boot
+parameter can be used to control panic and reporting behaviour:
+
+- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
+  report or also panic the kernel (default: ``report``). The panic happens even
+  if ``kasan_multi_shot`` is enabled.
+
 Hardware tag-based KASAN mode (see the section about various modes below) is
 intended for use in production as a security mitigation. Therefore, it supports
-boot parameters that allow disabling KASAN or controlling its features.
+additional boot parameters that allow disabling KASAN or controlling features:
 
 - ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``).
 
@@ -199,10 +206,6 @@ boot parameters that allow disabling KASAN or controlling its features.
 - ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
   traces collection (default: ``on``).
 
-- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
-  report or also panic the kernel (default: ``report``). The panic happens even
-  if ``kasan_multi_shot`` is enabled.
-
 Implementation details
 ----------------------
 
 
@@ -298,15 +298,6 @@ HyperSparc cpu就是这样一个具有这种属性的cpu。
 	用。默认的实现是nop（对于所有相干的架构应该保持这样）。对于不一致性
 	的架构，它应该刷新vmaddr处的页面缓存。
 
-  ``void flush_kernel_dcache_page(struct page *page)``
-
-	当内核需要修改一个用kmap获得的用户页时，它会在所有修改完成后（但在
-	kunmapping之前）调用这个函数，以使底层页面达到最新状态。这里假定用
-	户没有不一致性的缓存副本（即原始页面是从类似get_user_pages()的机制
-	中获得的）。默认的实现是一个nop，在所有相干的架构上都应该如此。在不
-	一致性的架构上，这应该刷新内核缓存中的页面（使用page_address(page)）。
-
-
   ``void flush_icache_range(unsigned long start, unsigned long end)``
 
 	当内核存储到它将执行的地址中时（例如在加载模块时），这个函数被调用。
 
@@ -180,7 +180,6 @@ Limitations
 ===========
 - Not all page types are supported and never will. Most kernel internal
   objects cannot be recovered, only LRU pages for now.
-- Right now hugepage support is missing.
 
 ---
 Andi Kleen, Oct 2009
@@ -486,3 +486,5 @@
 554	common	landlock_create_ruleset		sys_landlock_create_ruleset
 555	common	landlock_add_rule		sys_landlock_add_rule
 556	common	landlock_restrict_self		sys_landlock_restrict_self
+# 557 reserved for memfd_secret
+558	common	process_mrelease		sys_process_mrelease
@@ -291,6 +291,7 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
 
+#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 static inline void flush_kernel_vmap_range(void *addr, int size)
 {
 	if ((cache_is_vivt() || cache_is_vipt_aliasing()))
@@ -312,9 +313,6 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
 		__flush_anon_page(vma, page, vmaddr);
 }
 
-#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
-extern void flush_kernel_dcache_page(struct page *);
-
 #define flush_dcache_mmap_lock(mapping)		xa_lock_irq(&mapping->i_pages)
 #define flush_dcache_mmap_unlock(mapping)	xa_unlock_irq(&mapping->i_pages)
 
 
@@ -1012,31 +1012,25 @@ static void __init reserve_crashkernel(void)
 		unsigned long long lowmem_max = __pa(high_memory - 1) + 1;
 		if (crash_max > lowmem_max)
 			crash_max = lowmem_max;
-		crash_base = memblock_find_in_range(CRASH_ALIGN, crash_max,
-						    crash_size, CRASH_ALIGN);
+
+		crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
+						       CRASH_ALIGN, crash_max);
 		if (!crash_base) {
 			pr_err("crashkernel reservation failed - No suitable area found.\n");
 			return;
 		}
 	} else {
+		unsigned long long crash_max = crash_base + crash_size;
 		unsigned long long start;
 
-		start = memblock_find_in_range(crash_base,
-					       crash_base + crash_size,
-					       crash_size, SECTION_SIZE);
-		if (start != crash_base) {
+		start = memblock_phys_alloc_range(crash_size, SECTION_SIZE,
+						  crash_base, crash_max);
+		if (!start) {
 			pr_err("crashkernel reservation failed - memory is in use.\n");
 			return;
 		}
 	}
 
-	ret = memblock_reserve(crash_base, crash_size);
-	if (ret < 0) {
-		pr_warn("crashkernel reservation failed - memory is in use (0x%lx)\n",
-			(unsigned long)crash_base);
-		return;
-	}
-
 	pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
 		(unsigned long)(crash_size >> 20),
 		(unsigned long)(crash_base >> 20),
Original file line number	Diff line number	Diff line change
`@@ -291,6 +291,7 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr`
`291`	`291`	`#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1`
`292`	`292`	`extern void flush_dcache_page(struct page *);`
`293`	`293`
	`294`	`+#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1`
`294`	`295`	`static inline void flush_kernel_vmap_range(void *addr, int size)`
`295`	`296`	`{`
`296`	`297`	`if ((cache_is_vivt() \|\| cache_is_vipt_aliasing()))`
`@@ -312,9 +313,6 @@ static inline void flush_anon_page(struct vm_area_struct *vma,`
`312`	`313`	`__flush_anon_page(vma, page, vmaddr);`
`313`	`314`	`}`
`314`	`315`
`315`		`-#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE`
`316`		`-extern void flush_kernel_dcache_page(struct page *);`
`317`		`-`
`318`	`316`	`#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages)`
`319`	`317`	`#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages)`
`320`	`318`