diff --git a/arch/xtensa/core/coredump.c b/arch/xtensa/core/coredump.c
index 761cf9a05ac13..14942586ad6fb 100644
--- a/arch/xtensa/core/coredump.c
+++ b/arch/xtensa/core/coredump.c
@@ -135,8 +135,6 @@ void arch_coredump_info_dump(const struct arch_esf *esf)
 	xtensa_coredump_fault_sp = (uint32_t)esf;
 #endif
 
-	__asm__ volatile("rsr.exccause %0" : "=r"(arch_blk.r.exccause));
-
 	_xtensa_irq_stack_frame_raw_t *frame = (void *)esf;
 	_xtensa_irq_bsa_t *bsa = frame->ptr_to_bsa;
 	uintptr_t num_high_regs;
@@ -150,7 +148,7 @@ void arch_coredump_info_dump(const struct arch_esf *esf)
 	regs_blk_remaining = (int)num_high_regs / 4;
 
 	arch_blk.r.pc = bsa->pc;
-	__asm__ volatile("rsr.excvaddr %0" : "=r"(arch_blk.r.excvaddr));
+	arch_blk.r.excvaddr = bsa->excvaddr;
 	arch_blk.r.ps = bsa->ps;
 #if XCHAL_HAVE_S32C1I
 	arch_blk.r.scompare1 = bsa->scompare1;
@@ -160,6 +158,7 @@ void arch_coredump_info_dump(const struct arch_esf *esf)
 	arch_blk.r.a1 = (uint32_t)((char *)bsa) + sizeof(*bsa);
 	arch_blk.r.a2 = bsa->a2;
 	arch_blk.r.a3 = bsa->a3;
+	arch_blk.r.exccause = bsa->exccause;
 	if (regs_blk_remaining > 0) {
 		regs_blk_remaining--;
 
diff --git a/arch/xtensa/core/gen_zsr.py b/arch/xtensa/core/gen_zsr.py
index 8cb8a713019e3..a50e205e84cd0 100755
--- a/arch/xtensa/core/gen_zsr.py
+++ b/arch/xtensa/core/gen_zsr.py
@@ -31,7 +31,7 @@ def parse_args():
 
 NEEDED = ["A0SAVE", "CPU"]
 if args.mmu:
-    NEEDED += ["DBLEXC", "DEPC_SAVE", "EXCCAUSE_SAVE"]
+    NEEDED += ["DBLEXC", "DEPC_SAVE"]
 if args.flush_reg:
     NEEDED += ["FLUSH"]
 
diff --git a/arch/xtensa/core/offsets/offsets.c b/arch/xtensa/core/offsets/offsets.c
index 9edf341b7a7b4..3a1d04636576c 100644
--- a/arch/xtensa/core/offsets/offsets.c
+++ b/arch/xtensa/core/offsets/offsets.c
@@ -22,6 +22,7 @@ GEN_OFFSET_SYM(_xtensa_irq_bsa_t, a2);
 GEN_OFFSET_SYM(_xtensa_irq_bsa_t, a3);
 
 GEN_OFFSET_SYM(_xtensa_irq_bsa_t, exccause);
+GEN_OFFSET_SYM(_xtensa_irq_bsa_t, excvaddr);
 GEN_OFFSET_SYM(_xtensa_irq_bsa_t, pc);
 GEN_OFFSET_SYM(_xtensa_irq_bsa_t, ps);
 GEN_OFFSET_SYM(_xtensa_irq_bsa_t, sar);
diff --git a/arch/xtensa/core/ptables.c b/arch/xtensa/core/ptables.c
index 605dbe2453723..3680fc7cf2a65 100644
--- a/arch/xtensa/core/ptables.c
+++ b/arch/xtensa/core/ptables.c
@@ -11,6 +11,7 @@
 #include <zephyr/kernel/mm.h>
 #include <zephyr/toolchain.h>
 #include <xtensa/corebits.h>
+#include <xtensa_asm2_context.h>
 #include <xtensa_mmu_priv.h>
 
 #include <kernel_arch_func.h>
@@ -22,6 +23,14 @@
  */
 #define OPTION_NO_TLB_IPI BIT(0)
 
+/* Restore the PTE attributes if they have been
+ * stored in the SW bits part in the PTE.
+ */
+#define OPTION_RESTORE_ATTRS BIT(1)
+
+/* Save the PTE attributes and ring in the SW bits part in the PTE. */
+#define OPTION_SAVE_ATTRS BIT(2)
+
 /* Level 1 contains page table entries
  * necessary to map the page table itself.
  */
@@ -53,14 +62,14 @@ BUILD_ASSERT(CONFIG_MMU_PAGE_SIZE == 0x1000,
  * Each memory domain contains its own l1 page table. The kernel l1 page table is
  * located at the index 0.
  */
-static uint32_t l1_page_table[CONFIG_XTENSA_MMU_NUM_L1_TABLES][XTENSA_L1_PAGE_TABLE_ENTRIES]
+static uint32_t l1_page_tables[CONFIG_XTENSA_MMU_NUM_L1_TABLES][XTENSA_L1_PAGE_TABLE_ENTRIES]
 				__aligned(KB(4));
 
 
 /*
  * That is an alias for the page tables set used by the kernel.
  */
-uint32_t *xtensa_kernel_ptables = (uint32_t *)l1_page_table[0];
+uint32_t *xtensa_kernel_ptables = (uint32_t *)l1_page_tables[0];
 
 /*
  * Each table in the level 2 maps a 4Mb memory range. It consists of 1024 entries each one
@@ -75,7 +84,7 @@ static uint32_t l2_page_tables[CONFIG_XTENSA_MMU_NUM_L2_TABLES][XTENSA_L2_PAGE_T
  *
  * @note: The first bit is set because it is used for the kernel page tables.
  */
-static ATOMIC_DEFINE(l1_page_table_track, CONFIG_XTENSA_MMU_NUM_L1_TABLES);
+static ATOMIC_DEFINE(l1_page_tables_track, CONFIG_XTENSA_MMU_NUM_L1_TABLES);
 
 /*
  * This additional variable tracks which l2 tables are in use. This is kept separated from
@@ -157,6 +166,8 @@ static const struct xtensa_mmu_range mmu_zephyr_ranges[] = {
 	},
 };
 
+static inline uint32_t restore_pte(uint32_t pte);
+
 /**
  * @brief Check if the page table entry is illegal.
  *
@@ -174,18 +185,19 @@ static inline bool is_pte_illegal(uint32_t pte)
 	return (attr == 12) || (attr == 14);
 }
 
-/*
- * @brief Initialize all page table entries to be illegal.
+/**
+ * @brief Initialize all page table entries to the same value (@a val).
  *
- * @param[in] Pointer to page table.
- * @param[in] Number of page table entries in the page table.
+ * @param[in] ptable Pointer to page table.
+ * @param[in] num_entries Number of page table entries in the page table.
+ * @param[in] val Initialize all PTEs with this value.
  */
-static void init_page_table(uint32_t *ptable, size_t num_entries)
+static void init_page_table(uint32_t *ptable, size_t num_entries, uint32_t val)
 {
 	int i;
 
 	for (i = 0; i < num_entries; i++) {
-		ptable[i] = XTENSA_MMU_PTE_ILLEGAL;
+		ptable[i] = val;
 	}
 }
 
@@ -203,60 +215,66 @@ static inline uint32_t *alloc_l2_table(void)
 }
 
 static void map_memory_range(const uint32_t start, const uint32_t end,
-			     const uint32_t attrs)
+			     const uint32_t attrs, const uint32_t options)
 {
-	uint32_t page, *table;
+	uint32_t page;
 	bool shared = !!(attrs & XTENSA_MMU_MAP_SHARED);
-	uint32_t sw_attrs = (attrs & XTENSA_MMU_PTE_ATTR_ORIGINAL) == XTENSA_MMU_PTE_ATTR_ORIGINAL ?
-		attrs : 0;
+	bool do_save_attrs = (options & OPTION_SAVE_ATTRS) == OPTION_SAVE_ATTRS;
+	uint32_t ring, sw_attrs, sw_ring, pte_sw;
+
+	ring = shared ? XTENSA_MMU_SHARED_RING : XTENSA_MMU_KERNEL_RING;
+	sw_attrs = do_save_attrs ? attrs : XTENSA_MMU_PTE_ATTR_ILLEGAL;
+	sw_ring = do_save_attrs ? ring : XTENSA_MMU_KERNEL_RING;
+	pte_sw = XTENSA_MMU_PTE_SW(sw_ring, sw_attrs);
 
 	for (page = start; page < end; page += CONFIG_MMU_PAGE_SIZE) {
-		uint32_t pte = XTENSA_MMU_PTE(page,
-					      shared ? XTENSA_MMU_SHARED_RING :
-						       XTENSA_MMU_KERNEL_RING,
-					      sw_attrs, attrs);
+		uint32_t *l2_table;
+		uint32_t pte = XTENSA_MMU_PTE(page, ring, pte_sw, attrs);
 		uint32_t l2_pos = XTENSA_MMU_L2_POS(page);
 		uint32_t l1_pos = XTENSA_MMU_L1_POS(page);
 
 		if (is_pte_illegal(xtensa_kernel_ptables[l1_pos])) {
-			table  = alloc_l2_table();
+			l2_table = alloc_l2_table();
 
-			__ASSERT(table != NULL, "There is no l2 page table available to "
-				"map 0x%08x\n", page);
+			__ASSERT(l2_table != NULL,
+				 "There is no l2 page table available to map 0x%08x\n", page);
 
-			init_page_table(table, XTENSA_L2_PAGE_TABLE_ENTRIES);
+			init_page_table(l2_table, XTENSA_L2_PAGE_TABLE_ENTRIES,
+					XTENSA_MMU_PTE_L2_ILLEGAL);
 
 			xtensa_kernel_ptables[l1_pos] =
-				XTENSA_MMU_PTE((uint32_t)table, XTENSA_MMU_KERNEL_RING,
-					       sw_attrs, XTENSA_MMU_PAGE_TABLE_ATTR);
+				XTENSA_MMU_PTE((uint32_t)l2_table, XTENSA_MMU_KERNEL_RING,
+					       0, XTENSA_MMU_PAGE_TABLE_ATTR);
 		}
 
-		table = (uint32_t *)(xtensa_kernel_ptables[l1_pos] & XTENSA_MMU_PTE_PPN_MASK);
-		table[l2_pos] = pte;
+		l2_table = (uint32_t *)(xtensa_kernel_ptables[l1_pos] & XTENSA_MMU_PTE_PPN_MASK);
+		l2_table[l2_pos] = pte;
 	}
 }
 
 static void map_memory(const uint32_t start, const uint32_t end,
-		       const uint32_t attrs)
+		       const uint32_t attrs, const uint32_t options)
 {
 #ifdef CONFIG_XTENSA_MMU_DOUBLE_MAP
 	uint32_t uc_attrs = attrs & ~XTENSA_MMU_PTE_ATTR_CACHED_MASK;
 	uint32_t c_attrs = attrs | XTENSA_MMU_CACHED_WB;
 
 	if (sys_cache_is_ptr_uncached((void *)start)) {
-		map_memory_range(start, end, uc_attrs);
+		map_memory_range(start, end, uc_attrs, options);
 
 		map_memory_range(POINTER_TO_UINT(sys_cache_cached_ptr_get((void *)start)),
-			POINTER_TO_UINT(sys_cache_cached_ptr_get((void *)end)),	c_attrs);
+				 POINTER_TO_UINT(sys_cache_cached_ptr_get((void *)end)),
+				 c_attrs, options);
 	} else if (sys_cache_is_ptr_cached((void *)start)) {
-		map_memory_range(start, end, c_attrs);
+		map_memory_range(start, end, c_attrs, options);
 
 		map_memory_range(POINTER_TO_UINT(sys_cache_uncached_ptr_get((void *)start)),
-			POINTER_TO_UINT(sys_cache_uncached_ptr_get((void *)end)), uc_attrs);
+				 POINTER_TO_UINT(sys_cache_uncached_ptr_get((void *)end)),
+				 uc_attrs, options);
 	} else
 #endif
 	{
-		map_memory_range(start, end, attrs);
+		map_memory_range(start, end, attrs, options);
 	}
 }
 
@@ -270,19 +288,20 @@ static void xtensa_init_page_tables(void)
 	}
 	already_inited = true;
 
-	init_page_table(xtensa_kernel_ptables, XTENSA_L1_PAGE_TABLE_ENTRIES);
-	atomic_set_bit(l1_page_table_track, 0);
+	init_page_table(xtensa_kernel_ptables, XTENSA_L1_PAGE_TABLE_ENTRIES,
+			XTENSA_MMU_PTE_L1_ILLEGAL);
+	atomic_set_bit(l1_page_tables_track, 0);
 
 	for (entry = 0; entry < ARRAY_SIZE(mmu_zephyr_ranges); entry++) {
 		const struct xtensa_mmu_range *range = &mmu_zephyr_ranges[entry];
 
-		map_memory(range->start, range->end, range->attrs | XTENSA_MMU_PTE_ATTR_ORIGINAL);
+		map_memory(range->start, range->end, range->attrs, OPTION_SAVE_ATTRS);
 	}
 
 	for (entry = 0; entry < xtensa_soc_mmu_ranges_num; entry++) {
 		const struct xtensa_mmu_range *range = &xtensa_soc_mmu_ranges[entry];
 
-		map_memory(range->start, range->end, range->attrs | XTENSA_MMU_PTE_ATTR_ORIGINAL);
+		map_memory(range->start, range->end, range->attrs, OPTION_SAVE_ATTRS);
 	}
 
 	/* Finally, the direct-mapped pages used in the page tables
@@ -290,12 +309,12 @@ static void xtensa_init_page_tables(void)
 	 * must be writable, obviously).  They shouldn't be left at
 	 * the default.
 	 */
-	map_memory_range((uint32_t) &l1_page_table[0],
-			 (uint32_t) &l1_page_table[CONFIG_XTENSA_MMU_NUM_L1_TABLES],
-			 XTENSA_MMU_PAGE_TABLE_ATTR | XTENSA_MMU_PERM_W);
+	map_memory_range((uint32_t) &l1_page_tables[0],
+			 (uint32_t) &l1_page_tables[CONFIG_XTENSA_MMU_NUM_L1_TABLES],
+			 XTENSA_MMU_PAGE_TABLE_ATTR | XTENSA_MMU_PERM_W, OPTION_SAVE_ATTRS);
 	map_memory_range((uint32_t) &l2_page_tables[0],
 			 (uint32_t) &l2_page_tables[CONFIG_XTENSA_MMU_NUM_L2_TABLES],
-			 XTENSA_MMU_PAGE_TABLE_ATTR | XTENSA_MMU_PERM_W);
+			 XTENSA_MMU_PAGE_TABLE_ATTR | XTENSA_MMU_PERM_W, OPTION_SAVE_ATTRS);
 
 	sys_cache_data_flush_all();
 }
@@ -360,46 +379,48 @@ __weak void arch_reserved_pages_update(void)
 #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
 
 static bool l2_page_table_map(uint32_t *l1_table, void *vaddr, uintptr_t phys,
-			      uint32_t flags, bool is_user)
+			      uint32_t attrs, bool is_user)
 {
 	uint32_t l1_pos = XTENSA_MMU_L1_POS((uint32_t)vaddr);
 	uint32_t l2_pos = XTENSA_MMU_L2_POS((uint32_t)vaddr);
-	uint32_t *table;
+	uint32_t *l2_table;
 
 	sys_cache_data_invd_range((void *)&l1_table[l1_pos], sizeof(l1_table[0]));
 
 	if (is_pte_illegal(l1_table[l1_pos])) {
-		table  = alloc_l2_table();
+		l2_table = alloc_l2_table();
 
-		if (table == NULL) {
+		if (l2_table == NULL) {
 			return false;
 		}
 
-		init_page_table(table, XTENSA_L2_PAGE_TABLE_ENTRIES);
+		init_page_table(l2_table, XTENSA_L2_PAGE_TABLE_ENTRIES, XTENSA_MMU_PTE_L2_ILLEGAL);
 
-		l1_table[l1_pos] = XTENSA_MMU_PTE((uint32_t)table, XTENSA_MMU_KERNEL_RING,
+		l1_table[l1_pos] = XTENSA_MMU_PTE((uint32_t)l2_table, XTENSA_MMU_KERNEL_RING,
 						  0, XTENSA_MMU_PAGE_TABLE_ATTR);
 
 		sys_cache_data_flush_range((void *)&l1_table[l1_pos], sizeof(l1_table[0]));
 	}
 
-	table = (uint32_t *)(l1_table[l1_pos] & XTENSA_MMU_PTE_PPN_MASK);
-	table[l2_pos] = XTENSA_MMU_PTE(phys, is_user ? XTENSA_MMU_USER_RING :
-						       XTENSA_MMU_KERNEL_RING,
-				       0, flags);
+	l2_table = (uint32_t *)(l1_table[l1_pos] & XTENSA_MMU_PTE_PPN_MASK);
+	l2_table[l2_pos] = XTENSA_MMU_PTE(phys, is_user ? XTENSA_MMU_USER_RING :
+							  XTENSA_MMU_KERNEL_RING,
+					  XTENSA_MMU_PTE_SW(XTENSA_MMU_KERNEL_RING,
+							  XTENSA_MMU_PTE_ATTR_ILLEGAL),
+					  attrs);
 
-	sys_cache_data_flush_range((void *)&table[l2_pos], sizeof(table[0]));
+	sys_cache_data_flush_range((void *)&l2_table[l2_pos], sizeof(l2_table[0]));
 	xtensa_tlb_autorefill_invalidate();
 
 	return true;
 }
 
-static inline void __arch_mem_map(void *va, uintptr_t pa, uint32_t xtensa_flags, bool is_user)
+static inline void __arch_mem_map(void *va, uintptr_t pa, uint32_t new_attrs, bool is_user)
 {
 	bool ret;
 	void *vaddr, *vaddr_uc;
 	uintptr_t paddr, paddr_uc;
-	uint32_t flags, flags_uc;
+	uint32_t attrs, attrs_uc;
 
 	if (IS_ENABLED(CONFIG_XTENSA_MMU_DOUBLE_MAP)) {
 		if (sys_cache_is_ptr_cached(va)) {
@@ -418,22 +439,22 @@ static inline void __arch_mem_map(void *va, uintptr_t pa, uint32_t xtensa_flags,
 			paddr_uc = pa;
 		}
 
-		flags_uc = (xtensa_flags & ~XTENSA_MMU_PTE_ATTR_CACHED_MASK);
-		flags = flags_uc | XTENSA_MMU_CACHED_WB;
+		attrs_uc = (new_attrs & ~XTENSA_MMU_PTE_ATTR_CACHED_MASK);
+		attrs = attrs_uc | XTENSA_MMU_CACHED_WB;
 	} else {
 		vaddr = va;
 		paddr = pa;
-		flags = xtensa_flags;
+		attrs = new_attrs;
 	}
 
 	ret = l2_page_table_map(xtensa_kernel_ptables, (void *)vaddr, paddr,
-				flags, is_user);
-	__ASSERT(ret, "Virtual address (%p) already mapped", va);
+				attrs, is_user);
+	__ASSERT(ret, "Cannot map virtual address (%p)", va);
 
 	if (IS_ENABLED(CONFIG_XTENSA_MMU_DOUBLE_MAP) && ret) {
 		ret = l2_page_table_map(xtensa_kernel_ptables, (void *)vaddr_uc, paddr_uc,
-					flags_uc, is_user);
-		__ASSERT(ret, "Virtual address (%p) already mapped", vaddr_uc);
+					attrs_uc, is_user);
+		__ASSERT(ret, "Cannot map virtual address (%p)", vaddr_uc);
 	}
 
 #ifndef CONFIG_USERSPACE
@@ -449,15 +470,15 @@ static inline void __arch_mem_map(void *va, uintptr_t pa, uint32_t xtensa_flags,
 			domain = CONTAINER_OF(node, struct arch_mem_domain, node);
 
 			ret = l2_page_table_map(domain->ptables, (void *)vaddr, paddr,
-						flags, is_user);
-			__ASSERT(ret, "Virtual address (%p) already mapped for domain %p",
+						attrs, is_user);
+			__ASSERT(ret, "Cannot map virtual address (%p) for domain %p",
 				 vaddr, domain);
 
 			if (IS_ENABLED(CONFIG_XTENSA_MMU_DOUBLE_MAP) && ret) {
 				ret = l2_page_table_map(domain->ptables,
 							(void *)vaddr_uc, paddr_uc,
-							flags_uc, is_user);
-				__ASSERT(ret, "Virtual address (%p) already mapped for domain %p",
+							attrs_uc, is_user);
+				__ASSERT(ret, "Cannot map virtual address (%p) for domain %p",
 					 vaddr_uc, domain);
 			}
 		}
@@ -471,7 +492,7 @@ void arch_mem_map(void *virt, uintptr_t phys, size_t size, uint32_t flags)
 	uint32_t va = (uint32_t)virt;
 	uint32_t pa = (uint32_t)phys;
 	uint32_t rem_size = (uint32_t)size;
-	uint32_t xtensa_flags = 0;
+	uint32_t attrs = 0;
 	k_spinlock_key_t key;
 	bool is_user;
 
@@ -484,10 +505,10 @@ void arch_mem_map(void *virt, uintptr_t phys, size_t size, uint32_t flags)
 	switch (flags & K_MEM_CACHE_MASK) {
 
 	case K_MEM_CACHE_WB:
-		xtensa_flags |= XTENSA_MMU_CACHED_WB;
+		attrs |= XTENSA_MMU_CACHED_WB;
 		break;
 	case K_MEM_CACHE_WT:
-		xtensa_flags |= XTENSA_MMU_CACHED_WT;
+		attrs |= XTENSA_MMU_CACHED_WT;
 		break;
 	case K_MEM_CACHE_NONE:
 		__fallthrough;
@@ -496,10 +517,10 @@ void arch_mem_map(void *virt, uintptr_t phys, size_t size, uint32_t flags)
 	}
 
 	if ((flags & K_MEM_PERM_RW) == K_MEM_PERM_RW) {
-		xtensa_flags |= XTENSA_MMU_PERM_W;
+		attrs |= XTENSA_MMU_PERM_W;
 	}
 	if ((flags & K_MEM_PERM_EXEC) == K_MEM_PERM_EXEC) {
-		xtensa_flags |= XTENSA_MMU_PERM_X;
+		attrs |= XTENSA_MMU_PERM_X;
 	}
 
 	is_user = (flags & K_MEM_PERM_USER) == K_MEM_PERM_USER;
@@ -507,7 +528,7 @@ void arch_mem_map(void *virt, uintptr_t phys, size_t size, uint32_t flags)
 	key = k_spin_lock(&xtensa_mmu_lock);
 
 	while (rem_size > 0) {
-		__arch_mem_map((void *)va, pa, xtensa_flags, is_user);
+		__arch_mem_map((void *)va, pa, attrs, is_user);
 
 		rem_size -= (rem_size >= KB(4)) ? KB(4) : rem_size;
 		va += KB(4);
@@ -531,7 +552,7 @@ static bool l2_page_table_unmap(uint32_t *l1_table, void *vaddr)
 	uint32_t l1_pos = XTENSA_MMU_L1_POS((uint32_t)vaddr);
 	uint32_t l2_pos = XTENSA_MMU_L2_POS((uint32_t)vaddr);
 	uint32_t *l2_table;
-	uint32_t table_pos;
+	uint32_t table_trk_pos;
 	bool exec;
 
 	sys_cache_data_invd_range((void *)&l1_table[l1_pos], sizeof(l1_table[0]));
@@ -549,21 +570,28 @@ static bool l2_page_table_unmap(uint32_t *l1_table, void *vaddr)
 
 	sys_cache_data_invd_range((void *)&l2_table[l2_pos], sizeof(l2_table[0]));
 
-	l2_table[l2_pos] = XTENSA_MMU_PTE_ILLEGAL;
+	/* Restore the PTE to previous ring and attributes. */
+	l2_table[l2_pos] = restore_pte(l2_table[l2_pos]);
 
 	sys_cache_data_flush_range((void *)&l2_table[l2_pos], sizeof(l2_table[0]));
 
 	for (l2_pos = 0; l2_pos < XTENSA_L2_PAGE_TABLE_ENTRIES; l2_pos++) {
 		if (!is_pte_illegal(l2_table[l2_pos])) {
+			/* If any PTE is mapped (== not illegal), we need to
+			 * keep this L2 table.
+			 */
 			goto end;
 		}
 	}
 
-	l1_table[l1_pos] = XTENSA_MMU_PTE_ILLEGAL;
+	/* All L2 PTE are illegal (== nothing mapped), we can safely remove
+	 * the L2 table mapping in L1 table and return the L2 table to the pool.
+	 */
+	l1_table[l1_pos] = XTENSA_MMU_PTE_L1_ILLEGAL;
 	sys_cache_data_flush_range((void *)&l1_table[l1_pos], sizeof(l1_table[0]));
 
-	table_pos = (l2_table - (uint32_t *)l2_page_tables) / (XTENSA_L2_PAGE_TABLE_ENTRIES);
-	atomic_clear_bit(l2_page_tables_track, table_pos);
+	table_trk_pos = (l2_table - (uint32_t *)l2_page_tables) / (XTENSA_L2_PAGE_TABLE_ENTRIES);
+	atomic_clear_bit(l2_page_tables_track, table_trk_pos);
 
 end:
 	/* Need to invalidate L2 page table as it is no longer valid. */
@@ -669,7 +697,7 @@ void xtensa_mmu_tlb_shootdown(void)
 		/* We don't have information on which page tables have changed,
 		 * so we just invalidate the cache for all L1 page tables.
 		 */
-		sys_cache_data_invd_range((void *)l1_page_table, sizeof(l1_page_table));
+		sys_cache_data_invd_range((void *)l1_page_tables, sizeof(l1_page_tables));
 		sys_cache_data_invd_range((void *)l2_page_tables, sizeof(l2_page_tables));
 	}
 
@@ -719,6 +747,37 @@ void xtensa_mmu_tlb_shootdown(void)
 	arch_irq_unlock(key);
 }
 
+/**
+ * @brief Restore PTE ring and attributes from those stashed in SW bits.
+ *
+ * @param[in] pte Page table entry to be restored.
+ *
+ * @note This does not check if the SW bits contain ring and attributes to be
+ *       restored.
+ *
+ * @return PTE with restored ring and attributes. Illegal entry if original is
+ *         illegal.
+ */
+static inline uint32_t restore_pte(uint32_t pte)
+{
+	uint32_t restored_pte;
+
+	uint32_t original_sw = XTENSA_MMU_PTE_SW_GET(pte);
+	uint32_t original_attr = XTENSA_MMU_PTE_SW_ATTR_GET(original_sw);
+
+	if (original_attr != XTENSA_MMU_PTE_ATTR_ILLEGAL) {
+		uint8_t original_ring = XTENSA_MMU_PTE_SW_RING_GET(original_sw);
+
+		restored_pte = pte;
+		restored_pte = XTENSA_MMU_PTE_ATTR_SET(restored_pte, original_attr);
+		restored_pte = XTENSA_MMU_PTE_RING_SET(restored_pte, original_ring);
+	} else {
+		restored_pte = XTENSA_MMU_PTE_L2_ILLEGAL;
+	}
+
+	return restored_pte;
+}
+
 #ifdef CONFIG_USERSPACE
 
 static inline uint32_t *thread_page_tables_get(const struct k_thread *thread)
@@ -735,8 +794,8 @@ static inline uint32_t *alloc_l1_table(void)
 	uint16_t idx;
 
 	for (idx = 0; idx < CONFIG_XTENSA_MMU_NUM_L1_TABLES; idx++) {
-		if (!atomic_test_and_set_bit(l1_page_table_track, idx)) {
-			return (uint32_t *)&l1_page_table[idx];
+		if (!atomic_test_and_set_bit(l1_page_tables_track, idx)) {
+			return (uint32_t *)&l1_page_tables[idx];
 		}
 	}
 
@@ -746,9 +805,9 @@ static inline uint32_t *alloc_l1_table(void)
 static uint32_t *dup_table(void)
 {
 	uint16_t i, j;
-	uint32_t *dst_table = alloc_l1_table();
+	uint32_t *l1_table = alloc_l1_table();
 
-	if (!dst_table) {
+	if (!l1_table) {
 		return NULL;
 	}
 
@@ -757,7 +816,7 @@ static uint32_t *dup_table(void)
 
 		if (is_pte_illegal(xtensa_kernel_ptables[i]) ||
 			(i == XTENSA_MMU_L1_POS(XTENSA_MMU_PTEVADDR))) {
-			dst_table[i] = XTENSA_MMU_PTE_ILLEGAL;
+			l1_table[i] = XTENSA_MMU_PTE_L1_ILLEGAL;
 			continue;
 		}
 
@@ -768,32 +827,21 @@ static uint32_t *dup_table(void)
 		}
 
 		for (j = 0; j < XTENSA_L2_PAGE_TABLE_ENTRIES; j++) {
-			uint32_t original_attr =  XTENSA_MMU_PTE_SW_GET(src_l2_table[j]);
-
-			l2_table[j] =  src_l2_table[j];
-			if (original_attr != 0x0) {
-				uint8_t ring;
-
-				ring = XTENSA_MMU_PTE_RING_GET(l2_table[j]);
-				l2_table[j] =  XTENSA_MMU_PTE_ATTR_SET(l2_table[j], original_attr);
-				l2_table[j] =  XTENSA_MMU_PTE_RING_SET(l2_table[j],
-						ring == XTENSA_MMU_SHARED_RING ?
-						XTENSA_MMU_SHARED_RING : XTENSA_MMU_KERNEL_RING);
-			}
+			l2_table[j] = restore_pte(src_l2_table[j]);
 		}
 
 		/* The page table is using kernel ASID because we don't
 		 * user thread manipulate it.
 		 */
-		dst_table[i] = XTENSA_MMU_PTE((uint32_t)l2_table, XTENSA_MMU_KERNEL_RING,
-					      0, XTENSA_MMU_PAGE_TABLE_ATTR);
+		l1_table[i] = XTENSA_MMU_PTE((uint32_t)l2_table, XTENSA_MMU_KERNEL_RING,
+					     0, XTENSA_MMU_PAGE_TABLE_ATTR);
 
 		sys_cache_data_flush_range((void *)l2_table, XTENSA_L2_PAGE_TABLE_SIZE);
 	}
 
-	sys_cache_data_flush_range((void *)dst_table, XTENSA_L1_PAGE_TABLE_SIZE);
+	sys_cache_data_flush_range((void *)l1_table, XTENSA_L1_PAGE_TABLE_SIZE);
 
-	return dst_table;
+	return l1_table;
 
 err:
 	/* TODO: Cleanup failed allocation*/
@@ -847,23 +895,36 @@ int arch_mem_domain_init(struct k_mem_domain *domain)
 	return ret;
 }
 
-static void region_map_update(uint32_t *ptables, uintptr_t start,
-			      size_t size, uint32_t ring, uint32_t flags)
+static void region_map_update(uint32_t *l1_table, uintptr_t start,
+			      size_t size, uint32_t ring, uint32_t flags, uint32_t option)
 {
 	for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
 		uint32_t *l2_table, pte;
+		uint32_t new_ring, new_attrs;
 		uint32_t page = start + offset;
 		uint32_t l1_pos = XTENSA_MMU_L1_POS(page);
 		uint32_t l2_pos = XTENSA_MMU_L2_POS(page);
 		/* Make sure we grab a fresh copy of L1 page table */
-		sys_cache_data_invd_range((void *)&ptables[l1_pos], sizeof(ptables[0]));
+		sys_cache_data_invd_range((void *)&l1_table[l1_pos], sizeof(l1_table[0]));
 
-		l2_table = (uint32_t *)(ptables[l1_pos] & XTENSA_MMU_PTE_PPN_MASK);
+		l2_table = (uint32_t *)(l1_table[l1_pos] & XTENSA_MMU_PTE_PPN_MASK);
 
 		sys_cache_data_invd_range((void *)&l2_table[l2_pos], sizeof(l2_table[0]));
 
-		pte = XTENSA_MMU_PTE_RING_SET(l2_table[l2_pos], ring);
-		pte = XTENSA_MMU_PTE_ATTR_SET(pte, flags);
+		pte = l2_table[l2_pos];
+
+		if ((option & OPTION_RESTORE_ATTRS) == OPTION_RESTORE_ATTRS) {
+			uint32_t original_sw = XTENSA_MMU_PTE_SW_GET(pte);
+
+			new_attrs = XTENSA_MMU_PTE_SW_ATTR_GET(original_sw);
+			new_ring = XTENSA_MMU_PTE_SW_RING_GET(original_sw);
+		} else {
+			new_attrs = flags;
+			new_ring = ring;
+		}
+
+		pte = XTENSA_MMU_PTE_RING_SET(pte, new_ring);
+		pte = XTENSA_MMU_PTE_ATTR_SET(pte, new_attrs);
 
 		l2_table[l2_pos] = pte;
 
@@ -895,10 +956,10 @@ static void update_region(uint32_t *ptables, uintptr_t start, size_t size,
 	new_flags_uc = (flags & ~XTENSA_MMU_PTE_ATTR_CACHED_MASK);
 	new_flags = new_flags_uc | XTENSA_MMU_CACHED_WB;
 
-	region_map_update(ptables, va, size, ring, new_flags);
-	region_map_update(ptables, va_uc, size, ring, new_flags_uc);
+	region_map_update(ptables, va, size, ring, new_flags, option);
+	region_map_update(ptables, va_uc, size, ring, new_flags_uc, option);
 #else
-	region_map_update(ptables, start, size, ring, flags);
+	region_map_update(ptables, start, size, ring, flags, option);
 #endif /* CONFIG_XTENSA_MMU_DOUBLE_MAP */
 
 #if CONFIG_MP_MAX_NUM_CPUS > 1
@@ -914,7 +975,8 @@ static void update_region(uint32_t *ptables, uintptr_t start, size_t size,
 static inline void reset_region(uint32_t *ptables, uintptr_t start, size_t size, uint32_t option)
 {
 	update_region(ptables, start, size,
-		      XTENSA_MMU_KERNEL_RING, XTENSA_MMU_PERM_W, option);
+		      XTENSA_MMU_KERNEL_RING, XTENSA_MMU_PERM_W,
+		      option | OPTION_RESTORE_ATTRS);
 }
 
 void xtensa_user_stack_perms(struct k_thread *thread)
@@ -1112,6 +1174,63 @@ int arch_buffer_validate(const void *addr, size_t size, int write)
 	return mem_buffer_validate(addr, size, write, XTENSA_MMU_USER_RING);
 }
 
+void xtensa_exc_dtlb_multihit_handle(void)
+{
+	/* For some unknown reasons, using xtensa_dtlb_probe() would result in
+	 * QEMU raising privileged instruction exception. So for now, just
+	 * invalidate all auto-refilled DTLBs.
+	 */
+
+	xtensa_dtlb_autorefill_invalidate();
+}
+
+bool xtensa_exc_load_store_ring_error_check(void *bsa_p)
+{
+	uintptr_t ring, vaddr;
+	_xtensa_irq_bsa_t *bsa = (_xtensa_irq_bsa_t *)bsa_p;
+
+	ring = (bsa->ps & XCHAL_PS_RING_MASK) >> XCHAL_PS_RING_SHIFT;
+
+	if (ring != XTENSA_MMU_USER_RING) {
+		return true;
+	}
+
+	vaddr = bsa->excvaddr;
+
+	if (arch_buffer_validate((void *)vaddr, sizeof(uint32_t), false) != 0) {
+		/* User thread DO NOT have access to this memory according to
+		 * page table. so this is a true access violation.
+		 */
+		return true;
+	}
+
+	/* User thread has access to this memory according to
+	 * page table. so this is not a true access violation.
+	 *
+	 * Now we need to find all associated auto-refilled DTLBs
+	 * and invalidate them. So that hardware can reload
+	 * from page table with correct permission for user
+	 * thread.
+	 */
+	while (true) {
+		uint32_t dtlb_entry = xtensa_dtlb_probe((void *)vaddr);
+
+		if ((dtlb_entry & XTENSA_MMU_PDTLB_HIT) != XTENSA_MMU_PDTLB_HIT) {
+			/* No more DTLB entry found. */
+			return false;
+		}
+
+		if ((dtlb_entry & XTENSA_MMU_PDTLB_WAY_MASK) >=
+				XTENSA_MMU_NUM_TLB_AUTOREFILL_WAYS) {
+			return false;
+		}
+
+		xtensa_dtlb_entry_invalidate_sync(dtlb_entry);
+	}
+
+	return false;
+}
+
 #ifdef CONFIG_XTENSA_MMU_FLUSH_AUTOREFILL_DTLBS_ON_SWAP
 /* This is only used when swapping page tables and auto-refill DTLBs
  * needing to be invalidated. Otherwise, SWAP_PAGE_TABLE assembly
diff --git a/arch/xtensa/core/userspace.S b/arch/xtensa/core/userspace.S
index 56d53d59b0b56..be1f959ac0582 100644
--- a/arch/xtensa/core/userspace.S
+++ b/arch/xtensa/core/userspace.S
@@ -4,7 +4,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-#include <xtensa_asm2_s.h>
+#include <xtensa_asm2.inc.S>
 #include <zephyr/offsets.h>
 #include <offsets_short.h>
 #include <zephyr/syscall.h>
diff --git a/arch/xtensa/core/vector_handlers.c b/arch/xtensa/core/vector_handlers.c
index d15d779933a03..c51d5e83a588e 100644
--- a/arch/xtensa/core/vector_handlers.c
+++ b/arch/xtensa/core/vector_handlers.c
@@ -3,6 +3,7 @@
  *
  * SPDX-License-Identifier: Apache-2.0
  */
+#include "xtensa/corebits.h"
 #include <string.h>
 #include <xtensa_asm2_context.h>
 #include <zephyr/kernel.h>
@@ -220,38 +221,35 @@ static inline unsigned int get_bits(int offset, int num_bits, unsigned int val)
 	return val & mask;
 }
 
-static void print_fatal_exception(void *print_stack, int cause,
-				  bool is_dblexc, uint32_t depc)
+static void print_fatal_exception(void *print_stack, bool is_dblexc, uint32_t depc)
 {
 	void *pc;
-	uint32_t ps, vaddr;
+	uint32_t ps;
 	_xtensa_irq_bsa_t *bsa = (void *)*(int **)print_stack;
 
-	__asm__ volatile("rsr.excvaddr %0" : "=r"(vaddr));
-
 	if (is_dblexc) {
 		EXCEPTION_DUMP(" ** FATAL EXCEPTION (DOUBLE)");
 	} else {
 		EXCEPTION_DUMP(" ** FATAL EXCEPTION");
 	}
 
-	EXCEPTION_DUMP(" ** CPU %d EXCCAUSE %d (%s)",
-		arch_curr_cpu()->id, cause,
-		xtensa_exccause(cause));
+	EXCEPTION_DUMP(" ** CPU %d EXCCAUSE %u (%s)",
+		arch_curr_cpu()->id, (uint32_t)bsa->exccause,
+		xtensa_exccause(bsa->exccause));
 
 	/* Don't print information if the BSA area is invalid as any elements
 	 * obtained via de-referencing the pointer are probably also invalid.
 	 * Or worse, cause another access violation.
 	 */
 	if (xtensa_is_outside_stack_bounds((uintptr_t)bsa, sizeof(*bsa), UINT32_MAX)) {
-		EXCEPTION_DUMP(" ** VADDR %p Invalid SP %p", (void *)vaddr, print_stack);
+		EXCEPTION_DUMP(" ** VADDR %p Invalid SP %p", (void *)bsa->excvaddr, print_stack);
 		return;
 	}
 
 	ps = bsa->ps;
 	pc = (void *)bsa->pc;
 
-	EXCEPTION_DUMP(" **  PC %p VADDR %p", pc, (void *)vaddr);
+	EXCEPTION_DUMP(" **  PC %p VADDR %p", pc, (void *)bsa->excvaddr);
 
 	if (is_dblexc) {
 		EXCEPTION_DUMP(" **  DEPC %p", (void *)depc);
@@ -259,10 +257,13 @@ static void print_fatal_exception(void *print_stack, int cause,
 
 	EXCEPTION_DUMP(" **  PS %p", (void *)bsa->ps);
 	EXCEPTION_DUMP(" **    (INTLEVEL:%d EXCM: %d UM:%d RING:%d WOE:%d OWB:%d CALLINC:%d)",
-		get_bits(0, 4, ps), get_bits(4, 1, ps),
-		get_bits(5, 1, ps), get_bits(6, 2, ps),
-		get_bits(18, 1, ps),
-		get_bits(8, 4, ps), get_bits(16, 2, ps));
+		       get_bits(XCHAL_PS_INTLEVEL_SHIFT, XCHAL_PS_INTLEVEL_BITS, ps),
+		       get_bits(XCHAL_PS_EXCM_SHIFT, XCHAL_PS_EXCM_BITS, ps),
+		       get_bits(XCHAL_PS_UM_SHIFT, XCHAL_PS_UM_BITS, ps),
+		       get_bits(XCHAL_PS_RING_SHIFT, XCHAL_PS_RING_BITS, ps),
+		       get_bits(XCHAL_PS_WOE_SHIFT, XCHAL_PS_WOE_BITS, ps),
+		       get_bits(XCHAL_PS_OWB_SHIFT, XCHAL_PS_OWB_BITS, ps),
+		       get_bits(XCHAL_PS_CALLINC_SHIFT, XCHAL_PS_CALLINC_BITS, ps));
 }
 
 static ALWAYS_INLINE void usage_stop(void)
@@ -544,13 +545,12 @@ void *xtensa_excint1_c(void *esf)
 
 #ifdef CONFIG_XTENSA_MMU
 	depc = XTENSA_RSR(ZSR_DEPC_SAVE_STR);
-	cause = XTENSA_RSR(ZSR_EXCCAUSE_SAVE_STR);
 
 	is_dblexc = (depc != 0U);
-#else /* CONFIG_XTENSA_MMU */
-	__asm__ volatile("rsr.exccause %0" : "=r"(cause));
 #endif /* CONFIG_XTENSA_MMU */
 
+	cause = bsa->exccause;
+
 	switch (cause) {
 	case EXCCAUSE_LEVEL1_INTERRUPT:
 #ifdef CONFIG_XTENSA_MMU
@@ -631,6 +631,16 @@ void *xtensa_excint1_c(void *esf)
 		xtensa_lazy_hifi_load(thread->arch.hifi_regs);
 		break;
 #endif /* CONFIG_XTENSA_LAZY_HIFI_SHARING */
+#if defined(CONFIG_XTENSA_MMU) && defined(CONFIG_USERSPACE)
+	case EXCCAUSE_DTLB_MULTIHIT:
+		xtensa_exc_dtlb_multihit_handle();
+		break;
+	case EXCCAUSE_LOAD_STORE_RING:
+		if (!xtensa_exc_load_store_ring_error_check(bsa)) {
+			break;
+		}
+		__fallthrough;
+#endif /* CONFIG_XTENSA_MMU && CONFIG_USERSPACE */
 	default:
 		reason = K_ERR_CPU_EXCEPTION;
 
@@ -661,7 +671,6 @@ void *xtensa_excint1_c(void *esf)
 		if (cause == EXCCAUSE_ILLEGAL) {
 			if (pc == (void *)&xtensa_arch_except_epc) {
 				cause = 63;
-				__asm__ volatile("wsr.exccause %0" : : "r"(cause));
 				reason = bsa->a2;
 			} else if (pc == (void *)&xtensa_arch_kernel_oops_epc) {
 				cause = 64; /* kernel oops */
@@ -674,11 +683,13 @@ void *xtensa_excint1_c(void *esf)
 				 */
 				print_stack = (void *)bsa->a3;
 			}
+
+			bsa->exccause = cause;
 		}
 
 skip_checks:
 		if (reason != K_ERR_KERNEL_OOPS) {
-			print_fatal_exception(print_stack, cause, is_dblexc, depc);
+			print_fatal_exception(print_stack, is_dblexc, depc);
 		}
 #ifdef CONFIG_XTENSA_EXCEPTION_ENTER_GDB
 		extern void z_gdb_isr(struct arch_esf *esf);
diff --git a/arch/xtensa/core/xtensa_asm2_util.S b/arch/xtensa/core/xtensa_asm2_util.S
index ffa75bbc3a6a0..24893db692dc5 100644
--- a/arch/xtensa/core/xtensa_asm2_util.S
+++ b/arch/xtensa/core/xtensa_asm2_util.S
@@ -3,7 +3,7 @@
  *
  * SPDX-License-Identifier: Apache-2.0
  */
-#include <xtensa_asm2_s.h>
+#include <xtensa_asm2.inc.S>
 #include <zephyr/offsets.h>
 #include <zephyr/zsr.h>
 
diff --git a/arch/xtensa/include/xtensa_asm2_s.h b/arch/xtensa/include/xtensa_asm2.inc.S
similarity index 97%
rename from arch/xtensa/include/xtensa_asm2_s.h
rename to arch/xtensa/include/xtensa_asm2.inc.S
index af345ab308360..9d8ce2bf7b8a1 100644
--- a/arch/xtensa/include/xtensa_asm2_s.h
+++ b/arch/xtensa/include/xtensa_asm2.inc.S
@@ -431,6 +431,29 @@ _xstack_returned_\@:
 	 */
 	s32i a2, a1, ___xtensa_irq_bsa_t_scratch_OFFSET
 
+#ifdef CONFIG_USERSPACE
+	/* When restoring context via xtensa_switch and
+	 * returning from non-nested interrupts, we will be
+	 * using the stashed PS value in the thread struct
+	 * instead of the one in the thread stack. Both of
+	 * these scenarios will have nested value of 0.
+	 * So when nested value is zero, we store the PS
+	 * value into thread struct.
+	 */
+	rsr.ZSR_CPU a0
+	l32i a2, a0, ___cpu_t_nested_OFFSET
+	bnez a2, _excint_skip_ps_save_to_thread
+
+	l32i a2, a0, ___cpu_t_current_OFFSET
+	s32i a3, a2, _thread_offset_to_return_ps
+
+_excint_skip_ps_save_to_thread:
+	/* DEF_EXCINT saved PS into A3 so we need to restore
+	 * A3 here before proceeding.
+	 */
+	l32i a3, a1, ___xtensa_irq_bsa_t_a3_OFFSET
+#endif
+
 	ODD_REG_SAVE a0, a1
 
 #if XCHAL_HAVE_FP && defined(CONFIG_CPU_HAS_FPU) && defined(CONFIG_FPU_SHARING)
@@ -663,7 +686,7 @@ _Level\LVL\()Vector:
 	 * For double exception, DEPC in saved in earlier vector
 	 * code.
 	 */
-	wsr a0, ZSR_EXCCAUSE_SAVE
+	wsr a0, ZSR_A0SAVE
 
 	esync
 
@@ -690,15 +713,11 @@ _Level\LVL\()Vector:
 	 * jump to an infinite loop, or quit the simulator, or invoke
 	 * debugger.
 	 */
-	rsr a0, ZSR_EXCCAUSE_SAVE
+	rsr a0, ZSR_A0SAVE
 	j _TripleFault
 
 _not_triple_fault:
-	rsr.exccause a0
-
-	xsr a0, ZSR_EXCCAUSE_SAVE
-
-	esync
+	rsr a0, ZSR_A0SAVE
 .endif
 #endif
 
@@ -707,6 +726,16 @@ _Level\LVL\()Vector:
 	s32i a2, a1, ___xtensa_irq_bsa_t_a2_OFFSET
 	s32i a3, a1, ___xtensa_irq_bsa_t_a3_OFFSET
 
+	/* Save registers needed for handling the exception as
+	 * these registers can be overwritten during nested
+	 * exceptions.
+	 */
+	rsr.exccause a0
+	s32i a0, a1, ___xtensa_irq_bsa_t_exccause_OFFSET
+
+	rsr.excvaddr a0
+	s32i a0, a1, ___xtensa_irq_bsa_t_excvaddr_OFFSET
+
 	/* Level "1" is the exception handler, which uses a different
 	 * calling convention.  No special register holds the
 	 * interrupted PS, instead we just assume that the CPU has
@@ -734,22 +763,10 @@ _Level\LVL\()Vector:
 .endif
 
 #ifdef CONFIG_USERSPACE
-	/* When restoring context via xtensa_switch and
-	 * returning from non-nested interrupts, we will be
-	 * using the stashed PS value in the thread struct
-	 * instead of the one in the thread stack. Both of
-	 * these scenarios will have nested value of 0.
-	 * So when nested value is zero, we store the PS
-	 * value into thread struct.
+	/* Stash the PS into A3 so EXCINT_HANDLER can read this
+	 * and save it into thread struct if needed.
 	 */
-	rsr.ZSR_CPU a3
-	l32i a2, a3, ___cpu_t_nested_OFFSET
-	bnez a2, _excint_skip_ps_save_to_thread_\LVL
-
-	l32i a2, a3, ___cpu_t_current_OFFSET
-	s32i a0, a2, _thread_offset_to_return_ps
-
-_excint_skip_ps_save_to_thread_\LVL:
+	mov a3, a0
 #endif
 
 	rsr.epc\LVL a0
diff --git a/arch/xtensa/include/xtensa_asm2_context.h b/arch/xtensa/include/xtensa_asm2_context.h
index 56fc84b56acbd..8e1bfc1099892 100644
--- a/arch/xtensa/include/xtensa_asm2_context.h
+++ b/arch/xtensa/include/xtensa_asm2_context.h
@@ -116,7 +116,7 @@
 #endif
 
 /* Must have fields regardless of features. */
-#define _BSA_PADDING_COMMON		(sizeof(uintptr_t) * 12U)
+#define _BSA_PADDING_COMMON		(sizeof(uintptr_t) * 13U)
 
 /* Raw size by adding up all the above. */
 #define _BSA_PADDING_BASE_SIZE		\
@@ -152,6 +152,31 @@
  * are saved after the BSA.
  */
 struct xtensa_irq_base_save_area {
+#if XCHAL_HAVE_THREADPTR
+	uintptr_t threadptr;
+#endif
+
+#if XCHAL_HAVE_S32C1I
+	uintptr_t scompare1;
+#endif
+
+	uintptr_t exccause;
+	uintptr_t excvaddr;
+
+#if XCHAL_HAVE_LOOPS
+	uintptr_t lcount;
+	uintptr_t lend;
+	uintptr_t lbeg;
+#endif
+
+	uintptr_t sar;
+	uintptr_t ps;
+	uintptr_t pc;
+	uintptr_t a0;
+	uintptr_t scratch;
+	uintptr_t a2;
+	uintptr_t a3;
+
 #if XCHAL_HAVE_FP && defined(CONFIG_CPU_HAS_FPU) && defined(CONFIG_FPU_SHARING)
 	uintptr_t fcr;
 	uintptr_t fsr;
@@ -185,30 +210,6 @@ struct xtensa_irq_base_save_area {
 	uint8_t  hifi[XCHAL_CP1_SA_SIZE + XCHAL_CP1_SA_ALIGN];
 #endif
 
-#if XCHAL_HAVE_THREADPTR
-	uintptr_t threadptr;
-#endif
-
-#if XCHAL_HAVE_S32C1I
-	uintptr_t scompare1;
-#endif
-
-	uintptr_t exccause;
-
-#if XCHAL_HAVE_LOOPS
-	uintptr_t lcount;
-	uintptr_t lend;
-	uintptr_t lbeg;
-#endif
-
-	uintptr_t sar;
-	uintptr_t ps;
-	uintptr_t pc;
-	uintptr_t a0;
-	uintptr_t scratch;
-	uintptr_t a2;
-	uintptr_t a3;
-
 	uintptr_t padding[_BSA_PADDING_NEEDED / sizeof(uintptr_t)];
 
 	uintptr_t caller_a0;
diff --git a/arch/xtensa/include/xtensa_internal.h b/arch/xtensa/include/xtensa_internal.h
index 982a2711b2351..c56e1068afe30 100644
--- a/arch/xtensa/include/xtensa_internal.h
+++ b/arch/xtensa/include/xtensa_internal.h
@@ -74,6 +74,30 @@ void xtensa_userspace_enter(k_thread_entry_t user_entry,
  */
 bool xtensa_mem_kernel_has_access(const void *addr, size_t size, int write);
 
+/**
+ * @brief Handle DTLB multihit exception.
+ *
+ * Handle DTLB multihit exception by invalidating all auto-refilled DTLBs of
+ * a particular memory page.
+ */
+void xtensa_exc_dtlb_multihit_handle(void);
+
+/**
+ * @brief Check if it is a true load/store ring exception.
+ *
+ * When a page can be accessed by both kernel and user threads, the autofill DTLB
+ * may contain an entry for kernel thread. This will result in load/store ring
+ * exception when it is accessed by user thread later. In this case, this will
+ * invalidate all associated TLBs related to kernel access so hardware can reload
+ * the page table the correct permission for user thread.
+ *
+ * @param bsa_p Pointer to BSA struct.
+ *
+ * @retval True This is a true access violation.
+ * @retval False Access violation is due to incorrectly cached auto-refilled TLB.
+ */
+bool xtensa_exc_load_store_ring_error_check(void *bsa_p);
+
 /**
  * @}
  */
diff --git a/arch/xtensa/include/xtensa_mmu_priv.h b/arch/xtensa/include/xtensa_mmu_priv.h
index 7cd51f1329361..eab1df8c27423 100644
--- a/arch/xtensa/include/xtensa_mmu_priv.h
+++ b/arch/xtensa/include/xtensa_mmu_priv.h
@@ -52,17 +52,38 @@
 #define XTENSA_MMU_PTE_RING_SHIFT		4U
 
 /** Number of bits to shift for SW reserved ared in PTE */
-#define XTENSA_MMU_PTE_SW_SHIFT		6U
+#define XTENSA_MMU_PTE_SW_SHIFT			6U
 
 /** Mask for SW bits in PTE */
-#define XTENSA_MMU_PTE_SW_MASK		0x00000FC0U
+#define XTENSA_MMU_PTE_SW_MASK			0x00000FC0U
 
 /**
- * Internal bit just used to indicate that the attr field must
- * be set in the SW bits too. It is used later when duplicating the
- * kernel page tables.
+ * Number of bits to shift for backup attributes in PTE SW field.
+ *
+ * This is relative to the SW field, not the PTE entry.
+ */
+#define XTENSA_MMU_PTE_SW_ATTR_SHIFT		0U
+
+/**
+ * Mask for backup attributes in PTE SW field.
+ *
+ * This is relative to the SW field, not the PTE entry.
+ */
+#define XTENSA_MMU_PTE_SW_ATTR_MASK		0x0000000FU
+
+/**
+ * Number of bits to shift for backup ring value in PTE SW field.
+ *
+ * This is relative to the SW field, not the PTE entry.
+ */
+#define XTENSA_MMU_PTE_SW_RING_SHIFT		4U
+
+/**
+ * Mask for backup ring value in PTE SW field.
+ *
+ * This is relative to the SW field, not the PTE entry.
  */
-#define XTENSA_MMU_PTE_ATTR_ORIGINAL BIT(31)
+#define XTENSA_MMU_PTE_SW_RING_MASK		0x00000030U
 
 /** Construct a page table entry (PTE) */
 #define XTENSA_MMU_PTE(paddr, ring, sw, attr) \
@@ -87,6 +108,19 @@
 #define XTENSA_MMU_PTE_SW_GET(pte) \
 	(((pte) & XTENSA_MMU_PTE_SW_MASK) >> XTENSA_MMU_PTE_SW_SHIFT)
 
+/** Construct a PTE SW field to be used for backing up PTE ring and attributes. */
+#define XTENSA_MMU_PTE_SW(ring, attr) \
+	((((ring) << XTENSA_MMU_PTE_SW_RING_SHIFT) & XTENSA_MMU_PTE_SW_RING_MASK) | \
+	 (((attr) << XTENSA_MMU_PTE_SW_ATTR_SHIFT) & XTENSA_MMU_PTE_SW_ATTR_MASK))
+
+/** Get the backed up attributes from the PTE SW field. */
+#define XTENSA_MMU_PTE_SW_ATTR_GET(sw) \
+	(((sw) & XTENSA_MMU_PTE_SW_ATTR_MASK) >> XTENSA_MMU_PTE_SW_ATTR_SHIFT)
+
+/** Get the backed up ring value from the PTE SW field. */
+#define XTENSA_MMU_PTE_SW_RING_GET(sw) \
+	(((sw) & XTENSA_MMU_PTE_SW_RING_MASK) >> XTENSA_MMU_PTE_SW_RING_SHIFT)
+
 /** Set the ring in a PTE */
 #define XTENSA_MMU_PTE_RING_SET(pte, ring) \
 	(((pte) & ~XTENSA_MMU_PTE_RING_MASK) | \
@@ -148,8 +182,18 @@
 /** Number of auto-refill ways */
 #define XTENSA_MMU_NUM_TLB_AUTOREFILL_WAYS	4
 
-/** Indicate PTE is illegal. */
-#define XTENSA_MMU_PTE_ILLEGAL			(BIT(3) | BIT(2))
+/** Attribute indicating PTE is illegal. */
+#define XTENSA_MMU_PTE_ATTR_ILLEGAL		(BIT(3) | BIT(2))
+
+/** Illegal PTE entry for Level 1 page tables */
+#define XTENSA_MMU_PTE_L1_ILLEGAL		XTENSA_MMU_PTE_ATTR_ILLEGAL
+
+/** Illegal PTE entry for Level 2 page tables */
+#define XTENSA_MMU_PTE_L2_ILLEGAL \
+	XTENSA_MMU_PTE(0, XTENSA_MMU_KERNEL_RING, \
+		       XTENSA_MMU_PTE_SW(XTENSA_MMU_KERNEL_RING, \
+					 XTENSA_MMU_PTE_ATTR_ILLEGAL), \
+		       XTENSA_MMU_PTE_ATTR_ILLEGAL)
 
 /**
  * PITLB HIT bit.
@@ -169,6 +213,15 @@
  */
 #define XTENSA_MMU_PDTLB_HIT			BIT(4)
 
+/**
+ * PDTLB WAY mask.
+ *
+ * For more information see
+ * Xtensa Instruction Set Architecture (ISA) Reference Manual
+ * 4.6.5.7 Formats for Probing MMU Option TLB Entries
+ */
+#define XTENSA_MMU_PDTLB_WAY_MASK		0xFU
+
 /**
  * Virtual address where the page table is mapped
  */
diff --git a/tests/kernel/mem_protect/userspace/Kconfig b/tests/kernel/mem_protect/userspace/Kconfig
new file mode 100644
index 0000000000000..21f6ffed0245b
--- /dev/null
+++ b/tests/kernel/mem_protect/userspace/Kconfig
@@ -0,0 +1,15 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+mainmenu "Userspace test"
+
+source "Kconfig.zephyr"
+
+config USERSPACE_SWITCHING_TESTS
+	bool "Run thread switching tests"
+	select SCHED_CPU_MASK if MP_MAX_NUM_CPUS > 1
+	help
+	  Run userspace_domain_switching tests.
+
+	  Enable this via board overlay.
diff --git a/tests/kernel/mem_protect/userspace/boards/intel_adsp_ace30_ptl.conf b/tests/kernel/mem_protect/userspace/boards/intel_adsp_ace30_ptl.conf
new file mode 100644
index 0000000000000..c0c9f02279360
--- /dev/null
+++ b/tests/kernel/mem_protect/userspace/boards/intel_adsp_ace30_ptl.conf
@@ -0,0 +1,2 @@
+CONFIG_MAX_THREAD_BYTES=3
+CONFIG_USERSPACE_SWITCHING_TESTS=y
diff --git a/tests/kernel/mem_protect/userspace/boards/intel_adsp_ace30_ptl_sim.conf b/tests/kernel/mem_protect/userspace/boards/intel_adsp_ace30_ptl_sim.conf
new file mode 100644
index 0000000000000..c0c9f02279360
--- /dev/null
+++ b/tests/kernel/mem_protect/userspace/boards/intel_adsp_ace30_ptl_sim.conf
@@ -0,0 +1,2 @@
+CONFIG_MAX_THREAD_BYTES=3
+CONFIG_USERSPACE_SWITCHING_TESTS=y
diff --git a/tests/kernel/mem_protect/userspace/boards/qemu_cortex_a53.conf b/tests/kernel/mem_protect/userspace/boards/qemu_cortex_a53.conf
new file mode 100644
index 0000000000000..e74376d117959
--- /dev/null
+++ b/tests/kernel/mem_protect/userspace/boards/qemu_cortex_a53.conf
@@ -0,0 +1,2 @@
+CONFIG_MAX_XLAT_TABLES=24
+CONFIG_USERSPACE_SWITCHING_TESTS=y
diff --git a/tests/kernel/mem_protect/userspace/boards/qemu_cortex_a53_qemu_cortex_a53_smp.conf b/tests/kernel/mem_protect/userspace/boards/qemu_cortex_a53_qemu_cortex_a53_smp.conf
new file mode 100644
index 0000000000000..e74376d117959
--- /dev/null
+++ b/tests/kernel/mem_protect/userspace/boards/qemu_cortex_a53_qemu_cortex_a53_smp.conf
@@ -0,0 +1,2 @@
+CONFIG_MAX_XLAT_TABLES=24
+CONFIG_USERSPACE_SWITCHING_TESTS=y
diff --git a/tests/kernel/mem_protect/userspace/boards/qemu_x86.conf b/tests/kernel/mem_protect/userspace/boards/qemu_x86.conf
new file mode 100644
index 0000000000000..a2d1cb23f9a90
--- /dev/null
+++ b/tests/kernel/mem_protect/userspace/boards/qemu_x86.conf
@@ -0,0 +1,2 @@
+CONFIG_X86_MAX_ADDITIONAL_MEM_DOMAINS=4
+CONFIG_USERSPACE_SWITCHING_TESTS=y
diff --git a/tests/kernel/mem_protect/userspace/boards/qemu_x86_64.conf b/tests/kernel/mem_protect/userspace/boards/qemu_x86_64.conf
new file mode 100644
index 0000000000000..a2d1cb23f9a90
--- /dev/null
+++ b/tests/kernel/mem_protect/userspace/boards/qemu_x86_64.conf
@@ -0,0 +1,2 @@
+CONFIG_X86_MAX_ADDITIONAL_MEM_DOMAINS=4
+CONFIG_USERSPACE_SWITCHING_TESTS=y
diff --git a/tests/kernel/mem_protect/userspace/boards/qemu_x86_atom_nopae.conf b/tests/kernel/mem_protect/userspace/boards/qemu_x86_atom_nopae.conf
new file mode 100644
index 0000000000000..a2d1cb23f9a90
--- /dev/null
+++ b/tests/kernel/mem_protect/userspace/boards/qemu_x86_atom_nopae.conf
@@ -0,0 +1,2 @@
+CONFIG_X86_MAX_ADDITIONAL_MEM_DOMAINS=4
+CONFIG_USERSPACE_SWITCHING_TESTS=y
diff --git a/tests/kernel/mem_protect/userspace/boards/qemu_x86_lakemont.conf b/tests/kernel/mem_protect/userspace/boards/qemu_x86_lakemont.conf
new file mode 100644
index 0000000000000..a2d1cb23f9a90
--- /dev/null
+++ b/tests/kernel/mem_protect/userspace/boards/qemu_x86_lakemont.conf
@@ -0,0 +1,2 @@
+CONFIG_X86_MAX_ADDITIONAL_MEM_DOMAINS=4
+CONFIG_USERSPACE_SWITCHING_TESTS=y
diff --git a/tests/kernel/mem_protect/userspace/boards/qemu_xtensa_dc233c_mmu.conf b/tests/kernel/mem_protect/userspace/boards/qemu_xtensa_dc233c_mmu.conf
new file mode 100644
index 0000000000000..260345c5a32a8
--- /dev/null
+++ b/tests/kernel/mem_protect/userspace/boards/qemu_xtensa_dc233c_mmu.conf
@@ -0,0 +1 @@
+CONFIG_USERSPACE_SWITCHING_TESTS=y
diff --git a/tests/kernel/mem_protect/userspace/src/main.c b/tests/kernel/mem_protect/userspace/src/main.c
index 6211f5ebc5995..032c2e7ed5ed2 100644
--- a/tests/kernel/mem_protect/userspace/src/main.c
+++ b/tests/kernel/mem_protect/userspace/src/main.c
@@ -63,7 +63,7 @@ K_APP_BMEM(alt_part) volatile bool alt_bool;
 static struct k_thread test_thread;
 static K_THREAD_STACK_DEFINE(test_stack, STACKSIZE);
 
-static void clear_fault(void)
+void clear_fault(void)
 {
 	expect_fault = false;
 	compiler_barrier();
@@ -1191,6 +1191,12 @@ void *userspace_setup(void)
 	priv_stack_ptr = (char *)((uintptr_t)ztest_thread_stack +
 				  Z_RISCV_STACK_GUARD_SIZE);
 #endif
+#elif defined(CONFIG_XTENSA)
+	struct xtensa_thread_stack_header *hdr;
+	void *vhdr = ((struct xtensa_thread_stack_header *)ztest_thread_stack);
+
+	hdr = vhdr;
+	priv_stack_ptr = (((char *)&hdr->privilege_stack) + (sizeof(hdr->privilege_stack) - 1));
 #endif
 	k_thread_access_grant(k_current_get(),
 			      &test_thread, &test_stack,
diff --git a/tests/kernel/mem_protect/userspace/src/switching.c b/tests/kernel/mem_protect/userspace/src/switching.c
new file mode 100644
index 0000000000000..3b312de10fa7f
--- /dev/null
+++ b/tests/kernel/mem_protect/userspace/src/switching.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2025 Intel Corporation
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <zephyr/kernel.h>
+#include <zephyr/ztest.h>
+#include <zephyr/kernel_structs.h>
+#include <zephyr/app_memory/app_memdomain.h>
+#include <zephyr/sys/util.h>
+#include <zephyr/sys/barrier.h>
+#include <zephyr/sys/libc-hooks.h> /* for z_libc_partition */
+
+#define NUM_THREADS     3
+#define TIMES_SWITCHING 10
+#define STACKSIZE       (256 + CONFIG_TEST_EXTRA_STACK_SIZE)
+
+extern void clear_fault(void);
+
+#ifdef CONFIG_USERSPACE_SWITCHING_TESTS
+/*
+ * Even numbered threads use domain_a.
+ * Odd numbered threads use domain_b.
+ */
+
+struct k_mem_domain domain_a;
+K_APPMEM_PARTITION_DEFINE(partition_a);
+K_APP_BMEM(partition_a) volatile unsigned int part_a_loops[NUM_THREADS];
+
+struct k_mem_domain domain_b;
+K_APPMEM_PARTITION_DEFINE(partition_b);
+K_APP_BMEM(partition_b) volatile unsigned int part_b_loops[NUM_THREADS];
+
+static struct k_thread threads[NUM_THREADS];
+static K_THREAD_STACK_ARRAY_DEFINE(threads_stacks, NUM_THREADS, STACKSIZE);
+
+static K_SEM_DEFINE(sem_switching, 1, 1);
+
+static void switch_thread_fn(void *arg1, void *arg2, void *arg3)
+{
+	volatile unsigned int *loop_ptr;
+	const uintptr_t thread_id = (uintptr_t)arg1;
+
+	if ((thread_id % 2) == 0) {
+		loop_ptr = &part_a_loops[thread_id];
+	} else {
+		loop_ptr = &part_b_loops[thread_id];
+	}
+
+	for (int i = 0; i < TIMES_SWITCHING; i++) {
+#ifdef CONFIG_DEBUG
+		TC_PRINT("Thread %lu (%u)\n", thread_id, *loop_ptr);
+#endif
+
+		*loop_ptr += 1;
+		compiler_barrier();
+
+		/* Make sure this can still use kernel objects. */
+		k_sem_take(&sem_switching, K_FOREVER);
+		k_sem_give(&sem_switching);
+
+		k_yield();
+	}
+}
+
+#endif /* CONFIG_USERSPACE_SWITCHING_TESTS */
+
+static void run_switching(int num_kernel_threads)
+{
+#ifdef CONFIG_USERSPACE_SWITCHING_TESTS
+	unsigned int i;
+	int remaining_kernel_threads = num_kernel_threads;
+
+	/* Not expecting any errors. */
+	clear_fault();
+
+	for (i = 0; i < NUM_THREADS; i++) {
+		uint32_t perms;
+		bool is_kernel_thread = remaining_kernel_threads > 0;
+
+		if (is_kernel_thread) {
+			perms = K_INHERIT_PERMS;
+
+			remaining_kernel_threads--;
+		} else {
+			perms = K_INHERIT_PERMS | K_USER;
+		}
+
+		/* Clear loop counters. */
+		part_a_loops[i] = 0;
+		part_b_loops[i] = 0;
+
+		/* Must delay start of threads to apply memory domains to them. */
+		k_thread_create(&threads[i], threads_stacks[i], STACKSIZE, switch_thread_fn,
+				(void *)(uintptr_t)i, NULL, NULL, -1, perms, K_FOREVER);
+
+#ifdef CONFIG_SCHED_CPU_MASK
+		/*
+		 * Make sure all created threads run on the same CPU
+		 * so that memory domain switching is being tested.
+		 */
+		(void)k_thread_cpu_pin(&threads[i], 0);
+#endif /* CONFIG_SCHED_CPU_MASK */
+
+		k_thread_access_grant(&threads[i], &sem_switching);
+
+		/*
+		 * Kernel threads by default has access to all memory.
+		 * So no need to put them into memory domains.
+		 */
+		if (!is_kernel_thread) {
+			/* Remember EVEN -> A, ODD -> B. */
+			if ((i % 2) == 0) {
+				k_mem_domain_add_thread(&domain_a, &threads[i]);
+			} else {
+				k_mem_domain_add_thread(&domain_b, &threads[i]);
+			}
+		}
+	}
+
+	/* Start the thread loops. */
+	for (i = 0; i < NUM_THREADS; i++) {
+		k_thread_start(&threads[i]);
+	}
+
+	/* Wait for all threads to finish. */
+	for (i = 0; i < NUM_THREADS; i++) {
+		k_thread_join(&threads[i], K_FOREVER);
+	}
+
+	/* Check to make sure all threads have looped enough times. */
+	for (i = 0; i < NUM_THREADS; i++) {
+		int loops;
+
+		/*
+		 * Each thread should never have access to the loop counters on
+		 * the other partition. Accessing them should generate faults.
+		 * Though we check just in case.
+		 */
+		if ((i % 2) == 0) {
+			loops = part_a_loops[i];
+
+			zassert_equal(part_b_loops[i], 0, "part_b_loops[%i] should be zero but not",
+				      i);
+		} else {
+			loops = part_b_loops[i];
+
+			zassert_equal(part_a_loops[i], 0, "part_a_loops[%i] should be zero but not",
+				      i);
+		}
+
+		zassert_equal(loops, TIMES_SWITCHING,
+			      "thread %u has not done enough loops (%u != %u)", i, loops,
+			      TIMES_SWITCHING);
+	}
+#else  /* CONFIG_USERSPACE_SWITCHING_TESTS */
+	ztest_test_skip();
+#endif /* CONFIG_USERSPACE_SWITCHING_TESTS */
+}
+
+ZTEST(userspace_domain_switching, test_kernel_only_switching)
+{
+	/*
+	 * Run with all kernel threads.
+	 *
+	 * This should work as kernel threads by default have access to
+	 * all memory, without having to attach them to memory domains.
+	 * This serves as a baseline check.
+	 */
+	run_switching(NUM_THREADS);
+}
+
+ZTEST(userspace_domain_switching, test_user_only_switching)
+{
+	/* Run with all user threads. */
+	run_switching(0);
+}
+
+ZTEST(userspace_domain_switching, test_kernel_user_mix_switching)
+{
+	/* Run with one kernel thread while others are all user threads. */
+	run_switching(1);
+}
+
+void *switching_setup(void)
+{
+#ifdef CONFIG_USERSPACE_SWITCHING_TESTS
+	static bool already_inited;
+
+	if (already_inited) {
+		return NULL;
+	}
+
+	struct k_mem_partition *parts_a[] = {
+#if Z_LIBC_PARTITION_EXISTS
+		&z_libc_partition,
+#endif
+		&ztest_mem_partition, &partition_a
+	};
+
+	struct k_mem_partition *parts_b[] = {
+#if Z_LIBC_PARTITION_EXISTS
+		&z_libc_partition,
+#endif
+		&ztest_mem_partition, &partition_b
+	};
+
+	zassert_equal(k_mem_domain_init(&domain_a, ARRAY_SIZE(parts_a), parts_a), 0,
+		      "failed to initialize memory domain A");
+
+	zassert_equal(k_mem_domain_init(&domain_b, ARRAY_SIZE(parts_b), parts_b), 0,
+		      "failed to initialize memory domain B");
+
+	already_inited = true;
+#endif /* CONFIG_USERSPACE_SWITCHING_TESTS */
+
+	return NULL;
+}
+
+void switching_before(void *fixture)
+{
+#ifdef CONFIG_USERSPACE_SWITCHING_TESTS
+	int i;
+
+	for (i = 0; i < NUM_THREADS; i++) {
+		k_thread_access_grant(k_current_get(), &threads[i]);
+	}
+#endif /* CONFIG_USERSPACE_SWITCHING_TESTS */
+}
+
+ZTEST_SUITE(userspace_domain_switching, NULL, switching_setup, switching_before, NULL, NULL);