diff --git a/src/arch/armv8/aarch32/exceptions.S b/src/arch/armv8/aarch32/exceptions.S
index fd4aeb96d..aa0b8ce79 100644
--- a/src/arch/armv8/aarch32/exceptions.S
+++ b/src/arch/armv8/aarch32/exceptions.S
@@ -50,11 +50,34 @@
     push {r0-r12}
     SAVE_ELR_SPSR
 
+#ifdef MEM_PROT_MPU
+    mrc p15, 4, r0, c13, c0, 2  // Read HTPIDR (CPU base address)
+    add r0, r0, #CPU_AS_ARCH_MASK_OFF
+    ldr r0, [r0]
+    mcr p15, 4, r0, c6, c1, 1
+#endif /* MEM_PROT_MPU */
+
 .endm
 
 .macro VM_ENTRY
 
     mrc p15, 4, r0, c13, c0, 2  // Read HTPIDR (CPU base address)
+
+#ifdef MEM_PROT_MPU
+    ldr r1, [r0, #CPU_VCPU_OFF]
+    mov r2, #VCPU_VM_OFF
+    add r1, r1, r2
+    ldr r1, [r1]
+    ldr r1, [r1, #VM_AS_ARCH_MASK_OFF]
+
+    mov r2, #CPU_ARCH_PROFILE_MPU_LOCKED_OFF
+    add r2, r2, r0
+    ldr r2, [r2]
+
+    orr r1, r1, r2
+    mcr p15, 4, r1, c6, c1, 1
+#endif /* MEM_PROT_MPU */
+
     ldr r0, [r0, #CPU_VCPU_OFF]
     add r0, r0, #VCPU_REGS_OFF
     mov sp, r0
diff --git a/src/arch/armv8/aarch32/inc/arch/subarch/sysregs.h b/src/arch/armv8/aarch32/inc/arch/subarch/sysregs.h
index e7cbee288..238d97331 100644
--- a/src/arch/armv8/aarch32/inc/arch/subarch/sysregs.h
+++ b/src/arch/armv8/aarch32/inc/arch/subarch/sysregs.h
@@ -115,6 +115,7 @@ SYSREG_GEN_ACCESSORS(ich_hcr_el2, 4, c12, c11, 0)
 SYSREG_GEN_ACCESSORS_64(icc_sgi1r_el1, 0, c12)
 
 SYSREG_GEN_ACCESSORS(vsctlr_el2, 4, c2, c0, 0)
+SYSREG_GEN_ACCESSORS(sctlr_el2, 4, c1, c0, 0)
 
 #define SYSREG_GEN_GIC_LR(n, crn1, crn2, op2)           \
     SYSREG_GEN_ACCESSORS(ich_lr##n, 4, c12, crn1, op2)  \
diff --git a/src/arch/armv8/aarch64/exceptions.S b/src/arch/armv8/aarch64/exceptions.S
index c4c4a7095..c581732c7 100644
--- a/src/arch/armv8/aarch64/exceptions.S
+++ b/src/arch/armv8/aarch64/exceptions.S
@@ -64,11 +64,35 @@
     add x0, x0, x1
     mov sp, x0
 
+    #ifdef MEM_PROT_MPU
+    mrs x0, tpidr_el2
+    add x0, x0, #CPU_AS_ARCH_MASK_OFF
+    ldr x0, [x0]
+    msr prenr_el2, x0
+    #endif /* MEM_PROT_MPU */
+
 .endm
 
 .global vcpu_arch_entry
 vcpu_arch_entry:
     mrs x0, tpidr_el2
+
+    #ifdef MEM_PROT_MPU
+    ldr x1, [x0, #CPU_VCPU_OFF]
+    mov x2, #VCPU_VM_OFF
+    add x1, x1, x2
+    ldr x1, [x1]
+    ldr x1, [x1, #VM_AS_ARCH_MASK_OFF]
+
+    mov x2, #CPU_ARCH_PROFILE_MPU_LOCKED_OFF
+    add x2, x2, x0
+    ldr x2, [x2]
+
+    orr x1, x1, x2
+    
+    msr prenr_el2, x1
+    #endif /* MEM_PROT_MPU */
+
     ldr x0, [x0, #CPU_VCPU_OFF]
     add x0, x0, #VCPU_REGS_OFF
     mov sp, x0
diff --git a/src/arch/armv8/aarch64/inc/arch/subarch/sysregs.h b/src/arch/armv8/aarch64/inc/arch/subarch/sysregs.h
index 0dbf632cd..949f85cc3 100644
--- a/src/arch/armv8/aarch64/inc/arch/subarch/sysregs.h
+++ b/src/arch/armv8/aarch64/inc/arch/subarch/sysregs.h
@@ -87,6 +87,7 @@ SYSREG_GEN_ACCESSORS(vttbr_el2)
 SYSREG_GEN_ACCESSORS(id_aa64mmfr0_el1)
 SYSREG_GEN_ACCESSORS(tpidr_el2)
 SYSREG_GEN_ACCESSORS(vsctlr_el2)
+SYSREG_GEN_ACCESSORS(sctlr_el2)
 SYSREG_GEN_ACCESSORS(mpuir_el2)
 SYSREG_GEN_ACCESSORS(prselr_el2)
 SYSREG_GEN_ACCESSORS(prbar_el2)
diff --git a/src/arch/armv8/armv8-r/aarch32/boot.S b/src/arch/armv8/armv8-r/aarch32/boot.S
index 8a69d18a6..91c0aaf35 100644
--- a/src/arch/armv8/armv8-r/aarch32/boot.S
+++ b/src/arch/armv8/armv8-r/aarch32/boot.S
@@ -50,68 +50,8 @@ boot_arch_profile_init:
     /* r4 contains the id of the MPU entry being used */
     mov r4, #(-1)
 
-    /**
-     * Map loadable image (and possibly unloadable)
-     * If the vm image section is used and has built-in vm images, we need to map the loadble and
-     * non-loadble region of the image separately. Otherwise we can map it as a single region.
-     */
-    add r4, r4, #1
-    mcr p15, 4, r4, c6, c2, 1   // HPRSELR
-    ldr r3, =_image_start
-    and r3, r3, #PRBAR_BASE_MSK
-    orr r3, r3, #PRBAR_SH_IS
-    orr r3, r3, #PRBAR_AP_RW_EL2
-    mcr p15, 4, r3, c6, c3, 0   // HPRBAR
-    ldr r10, =_image_load_end
-    ldr r11, =_image_noload_start
-    cmp r10, r11
-    ldreq r3, =_image_end
-    ldrne r3, =_image_load_end
-    sub r3, r3, #1
-    and r3, r3, #PRLAR_LIMIT_MSK
-    orr r3, r3, #(PRLAR_ATTR(1) | PRLAR_EN)
-    mcr p15, 4, r3, c6, c3, 1   // HPRLAR
-
-    /* Map Image Non-loadable if needed */
-    ldr r10, =_image_load_end
-    ldr r11, =_image_noload_start
-    cmp r10, r11
-    beq skip_non_loadable
-    add r4, r4, #1
-    mcr p15, 4, r4, c6, c2, 1   // HPRSELR
-    ldr r3, =_image_noload_start
-    and r3, r3, #PRBAR_BASE_MSK
-    orr r3, r3, #PRBAR_SH_IS
-    orr r3, r3, #PRBAR_AP_RW_EL2
-    mcr p15, 4, r3, c6, c3, 0   // HPRBAR
-    ldr r3, =_image_end
-    sub r3, r3, #1
-    and r3, r3, #PRLAR_LIMIT_MSK
-    orr r3, r3, #(PRLAR_ATTR(1) | PRLAR_EN)
-    mcr p15, 4, r3, c6, c3, 1   // HPRLAR
-
-skip_non_loadable:
-
-    /* Region 2 - CPU */
-    add r4, r4, #1
-    mcr p15, 4, r4, c6, c2, 1   // HPRSELR
-    mrc p15, 4, r3, c13, c0, 2  // HTPIDR (read CPU base addr)
-    and r3, r3, #PRBAR_BASE_MSK
-    orr r3, r3, #PRBAR_SH_IS
-    orr r3, r3, #PRBAR_AP_RW_EL2
-    mcr p15, 4, r3, c6, c3, 0   // HPRBAR
-    mrc p15, 4, r3, c13, c0, 2  // HTPIDR (read CPU base addr)
-    add r3, r3, #CPU_SIZE
-    sub r3, r3, #1
-    and r3, r3, #PRLAR_LIMIT_MSK
-    orr r3, #(PRLAR_ATTR(1) | PRLAR_EN)
-    mcr p15, 4, r3, c6, c3, 1   // HPRLAR
-
-    dsb
-    isb
-
     /* Enable caches and MPU */
-    ldr r4, =(SCTLR_RES1_AARCH32 | SCTLR_C | SCTLR_I | SCTLR_M)
+    ldr r4, =(SCTLR_RES1_AARCH32 | SCTLR_C | SCTLR_I)
     mcr p15, 4, r4, c1, c0, 0 // HSCTLR
 
     dsb
diff --git a/src/arch/armv8/armv8-r/aarch64/boot.S b/src/arch/armv8/armv8-r/aarch64/boot.S
index 75a90f1af..ecf8e035d 100644
--- a/src/arch/armv8/armv8-r/aarch64/boot.S
+++ b/src/arch/armv8/armv8-r/aarch64/boot.S
@@ -45,76 +45,10 @@ boot_arch_profile_init:
 	ldr x3, =MAIR_EL2_DFLT
 	msr	MAIR_EL2, x3
 
-    /* x4 contains the id of the MPU entry being used */
-    mov x4, 0
-
-    /**
-     * Map loadable image (and possibly unloadable)
-     * If the vm image section is used and has built-in vm images, we need to map the loadble and
-     * non-loadble region of the image separately. Otherwise we can map it as a single region.
-     */
-    msr prselr_el2, x4
-    isb
-    ldr x3, =_image_start
-    and x3, x3, PRBAR_BASE_MSK
-    orr x3, x3, (PRBAR_SH_IS | PRBAR_AP_RW_EL2)
-    msr prbar_el2, x3
-    ldr x10, =_image_load_end
-    ldr x11, =_image_noload_start
-    cmp x10, x11
-    bne 1f
-    ldr x3, =_image_end
-    b 2f
-1:
-    ldr x3, =_image_load_end
-2:
-    sub x3, x3, 1
-    and x3, x3, PRLAR_LIMIT_MSK
-    orr x3, x3, (PRLAR_ATTR(1) | PRLAR_EN)
-    msr prlar_el2, x3
-
-    /* Map Image Non-loadable if needed */
-    ldr x10, =_image_load_end
-    ldr x11, =_image_noload_start
-    cmp x10, x11
-    beq skip_non_loadable
-
-    add x4, x4, 1
-    msr prselr_el2, x4
-    ldr x3, =_image_noload_start
-    and x3, x3, PRBAR_BASE_MSK
-    orr x3, x3, PRBAR_SH_IS
-    add x3, x3, PRBAR_AP_RW_EL2
-    msr prbar_el2, x3
-    isb
-    ldr x3, =_image_end
-    sub x3, x3, 1
-    and x3, x3, PRLAR_LIMIT_MSK
-    orr x3, x3, (PRLAR_ATTR(1) | PRLAR_EN)
-    msr prlar_el2, x3
-
-skip_non_loadable:
-
-    /* Region 2 - CPU */
-    add x4, x4, 1
-    msr prselr_el2, x4
-    isb
-    mrs x3, tpidr_el2
-    and x3, x3, PRBAR_BASE_MSK
-    orr x3, x3, (PRBAR_SH_IS | PRBAR_AP_RW_EL2)
-    msr prbar_el2, x3
-    mrs x3, tpidr_el2
-    ldr x5, =CPU_SIZE
-    add x3, x3, x5
-    sub x3, x3, 1
-    and x3, x3, PRLAR_LIMIT_MSK
-    orr x3, x3, (PRLAR_ATTR(1) | PRLAR_EN)
-    msr prlar_el2, x3
-
     isb
 
     /* Enable caches and MPU */
-    ldr x4, =(SCTLR_RES1 | SCTLR_C | SCTLR_I | SCTLR_M)
+    ldr x4, =(SCTLR_RES1 | SCTLR_C | SCTLR_I)
     msr sctlr_el2, x4
 
     dsb nsh
diff --git a/src/arch/armv8/armv8-r/inc/arch/mem.h b/src/arch/armv8/armv8-r/inc/arch/mem.h
index cbeb2843b..0323a7762 100644
--- a/src/arch/armv8/armv8-r/inc/arch/mem.h
+++ b/src/arch/armv8/armv8-r/inc/arch/mem.h
@@ -36,6 +36,10 @@ typedef union {
     };
 } mem_flags_t;
 
+struct addr_space_arch {
+    unsigned long mpu_entry_mask;
+};
+
 #define PTE_FLAGS(_prbar, _prlar) \
     ((mem_flags_t){               \
         .prbar = (_prbar),        \
diff --git a/src/arch/armv8/armv8-r/inc/arch/mpu.h b/src/arch/armv8/armv8-r/inc/arch/mpu.h
new file mode 100644
index 000000000..53123ae5b
--- /dev/null
+++ b/src/arch/armv8/armv8-r/inc/arch/mpu.h
@@ -0,0 +1,21 @@
+/**
+ * SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) Bao Project and Contributors. All rights reserved.
+ */
+
+#ifndef __ARCH_MPU_H__
+#define __ARCH_MPU_H__
+
+#include <bao.h>
+#include <arch/sysregs.h>
+#include <bitmap.h>
+#include <mem.h>
+
+struct mpu_arch {
+    BITMAP_ALLOC(allocated_entries, MPU_ARCH_MAX_NUM_ENTRIES);
+    BITMAP_ALLOC(locked_entries, MPU_ARCH_MAX_NUM_ENTRIES);
+};
+
+bool mpu_perms_compatible(unsigned long perms1, unsigned long perms2);
+
+#endif /* __ARCH_MPU_H__ */
diff --git a/src/arch/armv8/armv8-r/inc/arch/profile/cpu.h b/src/arch/armv8/armv8-r/inc/arch/profile/cpu.h
index 6f8f20479..95c0981c8 100644
--- a/src/arch/armv8/armv8-r/inc/arch/profile/cpu.h
+++ b/src/arch/armv8/armv8-r/inc/arch/profile/cpu.h
@@ -8,34 +8,14 @@
 
 #include <bao.h>
 #include <arch/sysregs.h>
+#include <arch/mpu.h>
 #include <bitmap.h>
 #include <list.h>
 #include <mem.h>
 #include <list.h>
 
 struct cpu_arch_profile {
-    struct {
-        BITMAP_ALLOC(bitmap, MPU_ARCH_MAX_NUM_ENTRIES);
-        /**
-         * A locked region means that it can never be removed from the MPU. For example,
-         */
-        BITMAP_ALLOC(locked, MPU_ARCH_MAX_NUM_ENTRIES);
-        struct mpu_perms {
-            perms_t el2;
-            perms_t el1;
-        } perms[MPU_ARCH_MAX_NUM_ENTRIES];
-        /**
-         * We maintain an ordered list of the regions currently in the mpu to simplify the merging
-         * algorithm when mapping an overllaping region.
-         */
-        struct {
-            struct list list;
-            struct mpu_node {
-                node_t node;
-                mpid_t mpid;
-            } node[MPU_ARCH_MAX_NUM_ENTRIES];
-        } order;
-    } mpu;
+    struct mpu_arch mpu;
 };
 
 static inline struct cpu* cpu(void)
diff --git a/src/arch/armv8/armv8-r/mem.c b/src/arch/armv8/armv8-r/mem.c
index 276fcaa87..110622366 100644
--- a/src/arch/armv8/armv8-r/mem.c
+++ b/src/arch/armv8/armv8-r/mem.c
@@ -7,5 +7,5 @@
 
 void as_arch_init(struct addr_space* as)
 {
-    UNUSED_ARG(as);
+    as->arch.mpu_entry_mask = 0;
 }
diff --git a/src/arch/armv8/armv8-r/mpu.c b/src/arch/armv8/armv8-r/mpu.c
index d60c64f0c..1f5e119c3 100644
--- a/src/arch/armv8/armv8-r/mpu.c
+++ b/src/arch/armv8/armv8-r/mpu.c
@@ -5,545 +5,207 @@
 
 #include <mem.h>
 #include <cpu.h>
+#include <vm.h>
 #include <arch/sysregs.h>
 #include <arch/fences.h>
 
-static inline size_t mpu_num_entries(void)
+static priv_t mpu_as_priv(struct addr_space* as)
 {
-    return (size_t)MPUIR_REGION(sysreg_mpuir_el2_read());
-}
+    priv_t priv;
 
-static void mpu_entry_get_region(mpid_t mpid, struct mp_region* mpe)
-{
-    sysreg_prselr_el2_write(mpid);
-    ISB();
-    unsigned long prbar = sysreg_prbar_el2_read();
-    unsigned long prlar = sysreg_prlar_el2_read();
-    mpe->mem_flags.prbar = PRBAR_FLAGS(prbar);
-    mpe->mem_flags.prlar = PRLAR_FLAGS(prlar);
-    mpe->base = PRBAR_BASE(prbar);
-    mpe->size = (PRLAR_LIMIT(prlar) + 1) - mpe->base;
-    mpe->as_sec = SEC_UNKNOWN;
-}
-
-static int mpu_node_cmp(node_t* _n1, node_t* _n2)
-{
-    struct mpu_node* n1 = (struct mpu_node*)_n1;
-    struct mpu_node* n2 = (struct mpu_node*)_n2;
-    struct mp_region r1;
-    struct mp_region r2;
-    mpu_entry_get_region(n1->mpid, &r1);
-    mpu_entry_get_region(n2->mpid, &r2);
-    if (r1.base > r2.base) {
-        return 1;
-    } else if (r1.base < r2.base) {
-        return -1;
+    if (as->type == AS_VM) {
+        priv = PRIV_VM;
     } else {
-        return 0;
+        priv = PRIV_HYP;
     }
+
+    return priv;
 }
 
-static void mpu_entry_set(mpid_t mpid, struct mp_region* mpr)
+static unsigned long mpu_get_region_base(mpid_t mpid)
 {
-    unsigned long lim = mpr->base + mpr->size - 1;
+    unsigned long prbar = 0;
 
     sysreg_prselr_el2_write(mpid);
     ISB();
-    sysreg_prbar_el2_write((mpr->base & PRBAR_BASE_MSK) | mpr->mem_flags.prbar);
-    sysreg_prlar_el2_write((lim & PRLAR_LIMIT_MSK) | mpr->mem_flags.prlar);
+    prbar = sysreg_prbar_el2_read();
 
-    list_insert_ordered(&cpu()->arch.profile.mpu.order.list,
-        (node_t*)&cpu()->arch.profile.mpu.order.node[mpid], mpu_node_cmp);
+    return PRBAR_BASE(prbar);
 }
 
-static void mpu_entry_modify(mpid_t mpid, struct mp_region* mpr)
+static unsigned long mpu_get_region_limit(mpid_t mpid)
 {
-    list_rm(&cpu()->arch.profile.mpu.order.list, (node_t*)&cpu()->arch.profile.mpu.order.node[mpid]);
-
-    mpu_entry_set(mpid, mpr);
-}
-
-static bool mpu_entry_clear(mpid_t mpid)
-{
-    list_rm(&cpu()->arch.profile.mpu.order.list, (node_t*)&cpu()->arch.profile.mpu.order.node[mpid]);
+    unsigned long prlar = 0;
 
     sysreg_prselr_el2_write(mpid);
     ISB();
-    sysreg_prlar_el2_write(0);
-    sysreg_prbar_el2_write(0);
-    return true;
-}
+    prlar = sysreg_prlar_el2_read();
 
-static inline void mpu_entry_free(mpid_t mpid)
-{
-    mpu_entry_clear(mpid);
-    bitmap_clear(cpu()->arch.profile.mpu.bitmap, mpid);
+    return PRLAR_LIMIT(prlar);
 }
 
-static inline bool mpu_entry_valid(mpid_t mpid)
+static mpid_t mpu_find_region_base(struct mp_region* mpr)
 {
-    sysreg_prselr_el2_write(mpid);
-    ISB();
-    return !!(sysreg_prlar_el2_read() & PRLAR_EN);
-}
+    mpid_t mpid = INVALID_MPID;
 
-static inline bool mpu_entry_locked(mpid_t mpid)
-{
-    return !!bitmap_get(cpu()->arch.profile.mpu.locked, mpid);
+    for (mpid_t i = 0; i < MPU_ARCH_MAX_NUM_ENTRIES; i++) {
+        if (bitmap_get(cpu()->arch.profile.mpu.allocated_entries, i)) {
+            if (mpu_get_region_base(i) == mpr->base) {
+                mpid = i;
+                break;
+            }
+        }
+    }
+    return mpid;
 }
 
-static bool mpu_entry_has_priv(mpid_t mpid, priv_t priv)
+static mpid_t mpu_find_region_exact(struct mp_region* mpr)
 {
-    if (priv == PRIV_VM) {
-        return cpu()->arch.profile.mpu.perms[mpid].el1 != PERM_NONE;
-    } else {
-        return cpu()->arch.profile.mpu.perms[mpid].el2 != PERM_NONE;
+    mpid_t mpid = mpu_find_region_base(mpr);
+
+    if (mpid != INVALID_MPID) {
+        if (mpu_get_region_limit(mpid) == PRLAR_LIMIT(mpr->base + mpr->size - 1)) {
+            return mpid;
+        }
     }
-}
 
-static inline perms_t mem_vmpu_entry_perms(struct mp_region* mpr)
-{
-    perms_t perms = PERM_R;
-    perms |= !(mpr->mem_flags.prbar & PRBAR_XN) ? PERM_X : 0;
-    perms |= !(mpr->mem_flags.prbar & PRBAR_NWR_BIT) ? PERM_W : 0;
-    return perms;
+    return INVALID_MPID;
 }
 
-static inline void mpu_entry_set_perms(struct mp_region* mpr, struct mpu_perms mpu_perms)
+static mpid_t mpu_entry_allocate(void)
 {
-    // TODO: should we check this is following the allowed permission combinations?
+    mpid_t reg_num = INVALID_MPID;
+    reg_num = (mpid_t)bitmap_find_nth(cpu()->arch.profile.mpu.allocated_entries,
+        MPU_ARCH_MAX_NUM_ENTRIES, 1, 0, false);
 
-    bool el1_priv = mpu_perms.el1 != PERM_NONE;
-    perms_t perms = mpu_perms.el1 | mpu_perms.el2;
+    bitmap_set(cpu()->arch.profile.mpu.allocated_entries, reg_num);
 
-    mpr->mem_flags.prbar &= (uint16_t) ~(PRBAR_PERMS_FLAGS_MSK);
-    if (perms & PERM_W) {
-        mpr->mem_flags.prbar |= PRBAR_AP_RW_EL2;
-    } else {
-        mpr->mem_flags.prbar |= PRBAR_AP_RO_EL2;
-    }
-
-    if (!(perms & PERM_X)) {
-        mpr->mem_flags.prbar |= PRBAR_XN;
-    }
-
-    if (el1_priv) {
-        mpr->mem_flags.prbar |= PRBAR_EL1_BIT;
-    }
+    return reg_num;
 }
 
-static void mpu_entry_update_priv_perms(priv_t priv, mpid_t mpid, perms_t perms)
+static inline void mpu_entry_deallocate(mpid_t mpid)
 {
-    if (priv == PRIV_VM) {
-        cpu()->arch.profile.mpu.perms[mpid].el1 = perms;
-    } else {
-        cpu()->arch.profile.mpu.perms[mpid].el2 = perms;
-    }
+    bitmap_clear(cpu()->arch.profile.mpu.allocated_entries, mpid);
 }
 
-static inline bool mpu_perms_equivalent(struct mpu_perms* p1, struct mpu_perms* p2)
+static inline void mpu_entry_lock(mpid_t mpid)
 {
-    return (p1->el1 == p2->el1) && (p1->el2 == p2->el2);
+    bitmap_set(cpu()->arch.profile.mpu.locked_entries, mpid);
 }
 
-static inline mem_attrs_t mpu_entry_attrs(struct mp_region* mpr)
+static inline void mpu_entry_unlock(mpid_t mpid)
 {
-    mem_flags_t flags = mpr->mem_flags;
-    flags.prbar &= PRBAR_MEM_ATTR_FLAGS_MSK;
-    flags.prlar &= PRLAR_MEM_ATTR_FLAGS_MSK;
-    return (mem_attrs_t)flags.raw;
+    bitmap_clear(cpu()->arch.profile.mpu.locked_entries, mpid);
 }
 
-static mpid_t mpu_entry_allocate(void)
+static void mpu_entry_set(mpid_t mpid, struct mp_region* mpr)
 {
-    mpid_t reg_num = INVALID_MPID;
-    for (mpid_t i = 0; i < (mpid_t)mpu_num_entries(); i++) {
-        if (bitmap_get(cpu()->arch.profile.mpu.bitmap, i) == 0) {
-            bitmap_set(cpu()->arch.profile.mpu.bitmap, i);
-            reg_num = i;
-            break;
-        }
-    }
-    return reg_num;
+    unsigned long lim = mpr->base + mpr->size - 1;
+
+    sysreg_prselr_el2_write(mpid);
+    ISB();
+    sysreg_prbar_el2_write((mpr->base & PRBAR_BASE_MSK) | mpr->mem_flags.prbar);
+    sysreg_prlar_el2_write((lim & PRLAR_LIMIT_MSK) | mpr->mem_flags.prlar);
+    ISB();
 }
 
-bool mpu_map(priv_t priv, struct mp_region* mpr)
+static void mpu_entry_update_limit(mpid_t mpid, struct mp_region* mpr)
 {
-    size_t size_left = mpr->size;
-    bool failed = false;
-    struct mp_region reg1 = *mpr;
-    struct mp_region reg2 = *mpr;
-    bool reg1_valid = true;
-    bool reg2_valid = false;
-    mpid_t prev = INVALID_MPID;
-    mpid_t next = INVALID_MPID;
-    mpid_t bottom_mpid = INVALID_MPID;
-    mpid_t top_mpid = INVALID_MPID;
-
-    while (size_left > 0 && !failed) {
-        /**
-         * Since we'll be checking for overlapping regions in order, there will be at most two
-         * regions to map in a given iteration. This happens when the previous iteration found an
-         * overlapping region that is fully contained by the new region.
-         */
-
-        struct mp_region* new_reg;
-        if (reg1_valid) {
-            new_reg = &reg1;
-        } else if (reg2_valid) {
-            new_reg = &reg2;
-        } else {
-            break;
-        }
-
-        // As Armv8-R does not allow overlapping regions, we must first check if usch regions
-        // already exist. Specifically, for the case where the regions has hypervisor permissions
-        // only, and this is a map targetting a guest mpu, we just need to flip the guest
-        // permission bit. This will allow us to share regions between guest and hypevisor to, for
-        // example, (i) share the use of a peripheral (mainly uart for debugging purposes), or (ii)
-        // share a RW page between hypervisor and guest. Although having a RO page for guest while
-        // RW for the hypervisor is highly useful, this MPU does not allow it. That said, in the
-        // case we need it in the future, we'll have to implement a mechanism for that based on
-        // traps.
-
-        bool overlaped = false;
-        perms_t new_perms = mem_vmpu_entry_perms(new_reg);
-
-        prev = INVALID_MPID;
-        next = INVALID_MPID;
-        bottom_mpid = INVALID_MPID;
-        top_mpid = INVALID_MPID;
-
-        struct list* mpu_order_list = &cpu()->arch.profile.mpu.order.list;
-        list_foreach ((*mpu_order_list), struct mpu_node, entry) {
-            mpid_t mpid = entry->mpid;
-            struct mp_region overlapped_reg;
-
-            mpu_entry_get_region(mpid, &overlapped_reg);
-
-            if ((new_reg->base + new_reg->size) <= overlapped_reg.base) {
-                next = mpid;
-                break;
-            }
-
-            if (!mem_regions_overlap(new_reg, &overlapped_reg)) {
-                // If we are not overlapping, continue to search for overlapped regions until we
-                // check all entries. This should be the most frequent case, so the overhead for
-                // the checks on overllap will rarely execute.
-                prev = mpid;
-                continue;
-            }
-            overlaped = true;
-
-            if (mpu_entry_has_priv(mpid, priv)) {
-                // We don't allow overlapping regions of the same privilege. This is something that
-                // should be checked at the vmpu level, but we re-check it here anyway.
-                failed = true;
-                break;
-            }
-
-            // We only allow to bump up permissions if the overlapped region is a RO hypervisor
-            // region. Otherwise permissions have to be RW in both regions. We don't allow to
-            // overlap executable regions.
-            struct mpu_perms overlapped_perms = cpu()->arch.profile.mpu.perms[mpid];
-            struct mpu_perms overlap_perms = overlapped_perms;
-            priv_t overlapped_priv;
-            perms_t overlapped_perms2;
-            if (priv == PRIV_VM) {
-                overlap_perms.el1 = new_perms;
-                overlapped_priv = PRIV_HYP;
-                overlapped_perms2 = overlapped_perms.el2;
-            } else {
-                overlap_perms.el2 = new_perms;
-                overlapped_priv = PRIV_VM;
-                overlapped_perms2 = overlapped_perms.el1;
-            }
-
-            if (((overlap_perms.el1 & PERM_RW) == PERM_R) &&
-                ((overlap_perms.el2 & PERM_W) != PERM_NONE)) {
-                // We allow promoting read/write privielges of the hypervisor region to match the
-                // guest's. However, this combination promotes the guest privielges, which we don't
-                // allow.
-                failed = true;
-                break;
-            }
-
-            if ((overlap_perms.el1 & PERM_X) != (overlap_perms.el2 & PERM_X)) {
-                // Unless explicitly mapped, we don't promote execution privileges.
-                failed = true;
-                break;
-            }
-
-            // The Armv8-R MPU does not allow us to have different permissions for hypervisor and
-            // guest. So we must fail if asked to add an overlapping mapping with different
-            // permissions or attributes
-            if (mpu_entry_attrs(new_reg) != mpu_entry_attrs(&overlapped_reg)) {
-                failed = true;
-                break;
-            }
-
-            vaddr_t new_reg_limit = new_reg->base + new_reg->size;
-            vaddr_t overlapped_reg_limit = overlapped_reg.base + overlapped_reg.size;
-            size_t top_size =
-                new_reg_limit >= overlapped_reg_limit ? 0 : overlapped_reg_limit - new_reg_limit;
-            size_t bottom_size =
-                new_reg->base <= overlapped_reg.base ? 0 : new_reg->base - overlapped_reg.base;
-            size_t top_left =
-                new_reg_limit <= overlapped_reg_limit ? 0 : new_reg_limit - overlapped_reg_limit;
-            size_t bottom_left =
-                new_reg->base >= overlapped_reg.base ? 0 : overlapped_reg.base - new_reg->base;
-            bool subset =
-                (new_reg->base >= overlapped_reg.base) && (new_reg_limit <= overlapped_reg_limit);
-            bool superset = (bottom_left > 0) || (top_left > 0);
-
-            struct mp_region middle;
-            middle.size = overlapped_reg.size - (top_size + bottom_size);
-            middle.base = overlapped_reg.base + bottom_size;
-            middle.mem_flags = overlapped_reg.mem_flags; // copy attributes
-            mpu_entry_set_perms(&middle, overlap_perms);
-
-            if (bottom_size > 0) {
-                bottom_mpid = mpu_entry_allocate();
-                if (bottom_mpid == INVALID_MPID) {
-                    failed = true;
-                    break;
-                }
-            }
+    unsigned long lim = mpr->base + mpr->size - 1;
 
-            if (top_size > 0) {
-                top_mpid = mpu_entry_allocate();
-                if (top_mpid == INVALID_MPID) {
-                    failed = true;
-                    break;
-                }
-            }
+    sysreg_prselr_el2_write(mpid);
+    ISB();
+    sysreg_prlar_el2_write((lim & PRLAR_LIMIT_MSK) | mpr->mem_flags.prlar);
+    ISB();
+}
 
-            mpu_entry_update_priv_perms(priv, mpid, new_perms);
-            mpu_entry_modify(mpid, &middle);
-
-            if (bottom_size > 0) {
-                struct mp_region bottom;
-                bottom.base = overlapped_reg.base;
-                bottom.size = bottom_size;
-                bottom.mem_flags = overlapped_reg.mem_flags; // copy attrs
-                mpu_entry_set_perms(&bottom, overlapped_perms);
-                mpu_entry_update_priv_perms(overlapped_priv, bottom_mpid, overlapped_perms2);
-                mpu_entry_set(bottom_mpid, &bottom);
-            }
+static bool mpu_entry_clear(mpid_t mpid)
+{
+    sysreg_prselr_el2_write(mpid);
+    ISB();
+    sysreg_prlar_el2_write(0);
+    sysreg_prbar_el2_write(0);
+    ISB();
+    return true;
+}
 
-            if (top_size > 0) {
-                struct mp_region top;
-                top.base = new_reg_limit;
-                top.size = top_size;
-                top.mem_flags = overlapped_reg.mem_flags; // copy attrs
-                mpu_entry_set_perms(&top, overlapped_perms);
-                mpu_entry_update_priv_perms(overlapped_priv, top_mpid, overlapped_perms2);
-                mpu_entry_set(top_mpid, &top);
-            }
+bool mpu_map(struct addr_space* as, struct mp_region* mpr, bool locked)
+{
+    mpid_t mpid = INVALID_MPID;
+    priv_t priv = mpu_as_priv(as);
 
-            if (bottom_left > 0) {
-                reg1_valid = true;
-                reg1.base = new_reg->base;
-                reg1.size = bottom_left;
-            } else {
-                reg1_valid = false;
-            }
+    if (mpr->size == 0) {
+        return false;
+    }
 
-            if (top_left > 0) {
-                reg2_valid = true;
-                reg2.base = overlapped_reg_limit;
-                reg2.size = top_left;
-            } else {
-                reg2_valid = true;
-            }
+    /* We don't check if there is an existing region because bao ensure that
+    there is only 1 address space active at the same time, and as such, only a
+    single set of mpu entries are enabled.
+    Furthermore, the same check is done at the vMPU level.
+    */
 
-            if (superset) {
-                size_left = (top_left + bottom_left);
-            } else if (subset) {
-                size_left = 0;
-            } else {
-                size_left -= middle.size;
+    else {
+        mpid = mpu_entry_allocate();
+        if (mpid != INVALID_MPID) {
+            if (locked) {
+                mpu_entry_lock(mpid);
             }
-
-            break;
-        }
-
-        if (!overlaped && !failed) {
-            mpid_t merge_mpid = INVALID_MPID;
-            size_t mem_size = new_reg->size;
-            struct mpu_perms* prev_perms = &cpu()->arch.profile.mpu.perms[prev];
-            struct mpu_perms* next_perms = &cpu()->arch.profile.mpu.perms[next];
-            struct mpu_perms new_reg_perms;
+            bitmap_set((bitmap_t*)&as->arch.mpu_entry_mask, mpid);
             if (priv == PRIV_VM) {
-                new_reg_perms.el1 = new_perms;
-                new_reg_perms.el2 = PERM_NONE;
-            } else {
-                new_reg_perms.el1 = PERM_NONE;
-                new_reg_perms.el2 = new_perms;
-            }
-
-            /**
-             * Check if we can merge the current region with the region right before and/or right
-             * after. This can only be done if they are adjacent and have the same exect flags
-             * (i.e. permissions and memory attribtues).
-             */
-
-            if ((prev != INVALID_MPID) && !mpu_entry_locked(prev)) {
-                struct mp_region r;
-                mpu_entry_get_region(prev, &r);
-                if (((r.base + r.size) == new_reg->base) &&
-                    (mpu_entry_attrs(&r) == mpu_entry_attrs(new_reg)) &&
-                    (mpu_perms_equivalent(prev_perms, &new_reg_perms))) {
-                    merge_mpid = prev;
-                    new_reg->base = r.base;
-                    new_reg->size += r.size;
-                }
-            }
-
-            if ((next != INVALID_MPID) && !mpu_entry_locked(next)) {
-                struct mp_region r;
-                mpu_entry_get_region(next, &r);
-                if ((new_reg->base + new_reg->size) == r.base &&
-                    (mpu_entry_attrs(&r) == mpu_entry_attrs(new_reg)) &&
-                    (mpu_perms_equivalent(next_perms, &new_reg_perms))) {
-                    if (merge_mpid == INVALID_MPID) {
-                        merge_mpid = next;
-                    } else {
-                        mpu_entry_free(next);
-                    }
-                    new_reg->size += r.size;
-                }
-            }
-
-            /**
-             * If we can merge the region do it. Otherwise, allocate a new entry and set it.
-             */
-            if (merge_mpid != INVALID_MPID) {
-                mpu_entry_update_priv_perms(priv, merge_mpid, new_perms);
-                mpu_entry_modify(merge_mpid, new_reg);
-            } else {
-                mpid_t mpid = mpu_entry_allocate();
-                if (mpid == INVALID_MPID) {
-                    ERROR("failed to allocate mpu entry");
-                }
-                mpu_entry_update_priv_perms(priv, mpid, new_perms);
-                mpu_entry_set(mpid, new_reg);
+                mpr->mem_flags.prlar &= (uint16_t)~PRLAR_EN;
             }
-            size_left -= mem_size;
-        }
-    }
-
-    if (failed) {
-        if (bottom_mpid != INVALID_MPID) {
-            mpu_entry_free(bottom_mpid);
-        }
-
-        if (top_mpid != INVALID_MPID) {
-            mpu_entry_free(top_mpid);
+            mpu_entry_set(mpid, mpr);
         }
     }
 
-    return !failed;
+    return true;
 }
 
-bool mpu_unmap(priv_t priv, struct mp_region* mpr)
+bool mpu_unmap(struct addr_space* as, struct mp_region* mpr)
 {
-    size_t size_left = mpr->size;
-
-    while (size_left > 0) {
-        mpid_t mpid = INVALID_MPID;
-        struct mp_region reg;
+    UNUSED_ARG(as);
+    mpid_t mpid = mpu_find_region_exact(mpr);
 
-        struct list* mpu_order_list = &cpu()->arch.profile.mpu.order.list;
-        list_foreach ((*mpu_order_list), struct mpu_node, entry) {
-            mpu_entry_get_region(entry->mpid, &reg);
+    if (mpid != INVALID_MPID) {
+        mpu_entry_deallocate(mpid);
+        mpu_entry_unlock(mpid);
 
-            if ((mpr->base + mpr->size) < reg.base) {
-                break;
-            }
-
-            if (!mpu_entry_has_priv(entry->mpid, priv)) {
-                continue;
-            }
-
-            if (mem_regions_overlap(&reg, mpr)) {
-                mpid = entry->mpid;
-                break;
-            }
-        }
+        mpu_entry_clear(mpid);
 
-        if (mpid == INVALID_MPID) {
-            break;
-        }
+        bitmap_clear((bitmap_t*)&as->arch.mpu_entry_mask, mpid);
+    }
 
-        vaddr_t mpr_limit = mpr->base + mpr->size;
-        vaddr_t reg_limit = reg.base + reg.size;
-        size_t top_size = mpr_limit >= reg_limit ? 0 : reg_limit - mpr_limit;
-        size_t bottom_size = mpr->base <= reg.base ? 0 : mpr->base - reg.base;
-        struct mpu_perms orig_perms = cpu()->arch.profile.mpu.perms[mpid];
-
-        mpu_entry_update_priv_perms(priv, mpid, PERM_NONE);
-        bool update_perms = !((cpu()->arch.profile.mpu.perms[mpid].el1 == PERM_NONE) &&
-            (cpu()->arch.profile.mpu.perms[mpid].el2 == PERM_NONE));
-
-        if (update_perms) {
-            struct mp_region update_reg = reg;
-            update_reg.base = bottom_size > 0 ? mpr->base : reg.base;
-            update_reg.size = reg.size - top_size - bottom_size;
-            mpu_entry_set_perms(&update_reg, cpu()->arch.profile.mpu.perms[mpid]);
-            mpu_entry_modify(mpid, &update_reg);
-        } else {
-            mpu_entry_free(mpid);
-        }
+    return true;
+}
 
-        if (top_size > 0) {
-            struct mp_region top = reg;
-            top.base = mpr_limit;
-            top.size = top_size;
-            mpid_t top_mpid = mpu_entry_allocate();
-            cpu()->arch.profile.mpu.perms[top_mpid] = orig_perms;
-            mpu_entry_set(top_mpid, &top);
-        }
+bool mpu_update(struct addr_space* as, struct mp_region* mpr)
+{
+    UNUSED_ARG(as);
 
-        if (bottom_size > 0) {
-            struct mp_region bottom = reg;
-            bottom.size = bottom_size;
-            mpid_t bottom_mpid = mpu_entry_allocate();
-            cpu()->arch.profile.mpu.perms[bottom_mpid] = orig_perms;
-            mpu_entry_set(bottom_mpid, &bottom);
-        }
+    mpid_t mpid = mpu_find_region_base(mpr);
 
-        size_t overlap_size = reg.size - top_size - bottom_size;
-        size_left -= overlap_size;
+    if (mpid != INVALID_MPID) {
+        mpu_entry_update_limit(mpid, mpr);
+        return true;
     }
 
-    // TODO: check if we can merge new regions after unmapping a given privilege from a shared
-    // region
-
-    return size_left == 0;
+    return false;
 }
 
-void mpu_init(void)
+bool mpu_perms_compatible(unsigned long perms1, unsigned long perms2)
 {
-    bitmap_clear_consecutive(cpu()->arch.profile.mpu.bitmap, 0, mpu_num_entries());
-    list_init(&cpu()->arch.profile.mpu.order.list);
-
-    for (mpid_t mpid = 0; mpid < (mpid_t)mpu_num_entries(); mpid++) {
-        cpu()->arch.profile.mpu.order.node[mpid].mpid = mpid;
-
-        if (mpu_entry_valid(mpid)) {
-            bitmap_set(cpu()->arch.profile.mpu.bitmap, mpid);
-            bitmap_set(cpu()->arch.profile.mpu.locked, mpid);
+    return perms1 == perms2;
+}
 
-            /**
-             * We are assuming all initial regions have all hyp perms. This might change in the
-             * future.
-             */
-            cpu()->arch.profile.mpu.perms[mpid].el1 = PERM_NONE;
-            cpu()->arch.profile.mpu.perms[mpid].el2 = PERM_RWX;
+void mpu_enable(void)
+{
+    sysreg_sctlr_el2_write(SCTLR_M);
+}
 
-            list_insert_ordered(&cpu()->arch.profile.mpu.order.list,
-                (node_t*)&cpu()->arch.profile.mpu.order.node[mpid], mpu_node_cmp);
-        }
+void mpu_init()
+{
+    for (mpid_t mpid = 0; mpid < MPU_ARCH_MAX_NUM_ENTRIES; mpid++) {
+        bitmap_clear(cpu()->arch.profile.mpu.allocated_entries, mpid);
+        bitmap_clear(cpu()->arch.profile.mpu.locked_entries, mpid);
     }
 }
diff --git a/src/arch/armv8/asm_defs.c b/src/arch/armv8/asm_defs.c
index 9bff72c0a..d2ff212b1 100644
--- a/src/arch/armv8/asm_defs.c
+++ b/src/arch/armv8/asm_defs.c
@@ -6,6 +6,7 @@
 #include <bao.h>
 #include <cpu.h>
 #include <vm.h>
+#include <mem.h>
 #include <platform.h>
 
 __attribute__((used)) static void cpu_defines(void)
@@ -16,13 +17,23 @@ __attribute__((used)) static void cpu_defines(void)
     DEFINE_SIZE(CPU_STACK_SIZE, ((struct cpu*)NULL)->stack);
 
     DEFINE_OFFSET(CPU_VCPU_OFF, struct cpu, vcpu);
+
+#ifdef MEM_PROT_MPU
+    DEFINE_OFFSET(CPU_AS_ARCH_MASK_OFF, struct cpu, as.arch.mpu_entry_mask);
+    DEFINE_OFFSET(CPU_ARCH_PROFILE_MPU_LOCKED_OFF, struct cpu, arch.profile.mpu.locked_entries);
+#endif /* MEM_PROT_MPU */
 }
 
 __attribute__((used)) static void vcpu_defines(void)
 {
     DEFINE_SIZE(VCPU_ARCH_SIZE, struct vcpu_arch);
     DEFINE_OFFSET(VCPU_REGS_OFF, struct vcpu, regs);
+    DEFINE_OFFSET(VCPU_VM_OFF, struct vcpu, vm);
     DEFINE_SIZE(VCPU_REGS_SIZE, struct arch_regs);
+
+#ifdef MEM_PROT_MPU
+    DEFINE_OFFSET(VM_AS_ARCH_MASK_OFF, struct vm, as.arch.mpu_entry_mask);
+#endif /* MEM_PROT_MPU */
 }
 
 __attribute__((used)) static void platform_defines(void)
diff --git a/src/core/mmu/inc/mem_prot/mem.h b/src/core/mmu/inc/mem_prot/mem.h
index 981a9a517..489852851 100644
--- a/src/core/mmu/inc/mem_prot/mem.h
+++ b/src/core/mmu/inc/mem_prot/mem.h
@@ -15,6 +15,7 @@
 struct addr_space {
     struct page_table pt;
     enum AS_TYPE type;
+    cpumap_t cpus;
     colormap_t colors;
     asid_t id;
     spinlock_t lock;
diff --git a/src/core/mpu/inc/mem_prot/mem.h b/src/core/mpu/inc/mem_prot/mem.h
index a4cdfeae6..2df86d1a3 100644
--- a/src/core/mpu/inc/mem_prot/mem.h
+++ b/src/core/mpu/inc/mem_prot/mem.h
@@ -8,6 +8,7 @@
 
 #include <bao.h>
 #include <bitmap.h>
+#include <list.h>
 #include <arch/mem.h>
 #include <arch/spinlock.h>
 
@@ -24,21 +25,31 @@ struct mp_region {
 struct addr_space {
     asid_t id;
     enum AS_TYPE type;
+    cpumap_t cpus;
     colormap_t colors;
-    struct mpe {
-        enum { MPE_S_FREE, MPE_S_INVALID, MPE_S_VALID } state;
-        struct mp_region region;
-    } vmpu[VMPU_NUM_ENTRIES];
+    struct addr_space_arch arch;
+    struct {
+        struct list ordered_list;
+        struct mpe {
+            node_t node;
+            enum { MPE_S_FREE, MPE_S_INVALID, MPE_S_VALID } state;
+            bool lock;
+            struct mp_region region;
+            mpid_t mpid;
+        } node[VMPU_NUM_ENTRIES];
+    } vmpu;
     spinlock_t lock;
 };
 
-void as_init(struct addr_space* as, enum AS_TYPE type, asid_t id, colormap_t colors);
+void as_init(struct addr_space* as, enum AS_TYPE type, asid_t id, cpumap_t cpus, colormap_t colors);
 
 static inline bool mem_regions_overlap(struct mp_region* reg1, struct mp_region* reg2)
 {
     return range_in_range(reg1->base, reg1->size, reg2->base, reg2->size);
 }
 
+bool mem_map(struct addr_space* as, struct mp_region* mpr, bool broadcast, bool locked);
+
 /**
  * This functions must be defined for the physical MPU. The abstraction provided by the physical
  * MPU layer is minimal. Besides initialization:
@@ -47,7 +58,9 @@ static inline bool mem_regions_overlap(struct mp_region* reg1, struct mp_region*
  * success value.
  */
 void mpu_init(void);
-bool mpu_map(priv_t priv, struct mp_region* mem);
-bool mpu_unmap(priv_t priv, struct mp_region* mem);
+void mpu_enable(void);
+bool mpu_map(struct addr_space* as, struct mp_region* mem, bool locked);
+bool mpu_unmap(struct addr_space* as, struct mp_region* mem);
+bool mpu_update(struct addr_space* as, struct mp_region* mpr);
 
 #endif /* __MEM_PROT_H__ */
diff --git a/src/core/mpu/mem.c b/src/core/mpu/mem.c
index e7af5ab33..c78198d62 100644
--- a/src/core/mpu/mem.c
+++ b/src/core/mpu/mem.c
@@ -11,19 +11,20 @@
 #include <platform_defs.h>
 #include <objpool.h>
 #include <config.h>
+#include <arch/mpu.h>
 
 struct shared_region {
     enum AS_TYPE as_type;
     asid_t asid;
     struct mp_region region;
     cpumap_t sharing_cpus;
+    bool lock;
 };
 
 void mem_handle_broadcast_region(uint32_t event, uint64_t data);
-bool mem_map(struct addr_space* as, struct mp_region* mpr, bool broadcast);
 bool mem_unmap_range(struct addr_space* as, vaddr_t vaddr, size_t size, bool broadcast);
 
-enum { MEM_INSERT_REGION, MEM_REMOVE_REGION };
+enum { MEM_INSERT_REGION, MEM_REMOVE_REGION, MEM_UPDATE_REGION };
 
 #define SHARED_REGION_POOL_SIZE_DEFAULT (128)
 #ifndef SHARED_REGION_POOL_SIZE
@@ -34,12 +35,27 @@ OBJPOOL_ALLOC(shared_region_pool, struct shared_region, SHARED_REGION_POOL_SIZE)
 static inline struct mpe* mem_vmpu_get_entry(struct addr_space* as, mpid_t mpid)
 {
     if (mpid < VMPU_NUM_ENTRIES) {
-        return &as->vmpu[mpid];
+        return &as->vmpu.node[mpid];
     }
     return NULL;
 }
 
-static void mem_vmpu_set_entry(struct addr_space* as, mpid_t mpid, struct mp_region* mpr)
+static int vmpu_node_cmp(node_t* _n1, node_t* _n2)
+{
+    struct mpe* n1 = (struct mpe*)_n1;
+    struct mpe* n2 = (struct mpe*)_n2;
+
+    if (n1->region.base > n2->region.base) {
+        return 1;
+    } else if (n1->region.base < n2->region.base) {
+        return -1;
+    } else {
+        return 0;
+    }
+}
+
+static void mem_vmpu_set_entry(struct addr_space* as, mpid_t mpid, struct mp_region* mpr,
+    bool locked)
 {
     struct mpe* mpe = mem_vmpu_get_entry(as, mpid);
 
@@ -48,6 +64,10 @@ static void mem_vmpu_set_entry(struct addr_space* as, mpid_t mpid, struct mp_reg
     mpe->region.mem_flags = mpr->mem_flags;
     mpe->region.as_sec = mpr->as_sec;
     mpe->state = MPE_S_VALID;
+    mpe->mpid = mpid;
+    mpe->lock = locked;
+
+    list_insert_ordered(&as->vmpu.ordered_list, (node_t*)&as->vmpu.node[mpid], vmpu_node_cmp);
 }
 
 static void mem_vmpu_clear_entry(struct addr_space* as, mpid_t mpid)
@@ -59,6 +79,7 @@ static void mem_vmpu_clear_entry(struct addr_space* as, mpid_t mpid)
     mpe->region.mem_flags = PTE_INVALID;
     mpe->region.as_sec = SEC_UNKNOWN;
     mpe->state = MPE_S_INVALID;
+    mpe->lock = false;
 }
 
 static void mem_vmpu_free_entry(struct addr_space* as, mpid_t mpid)
@@ -66,6 +87,8 @@ static void mem_vmpu_free_entry(struct addr_space* as, mpid_t mpid)
     mem_vmpu_clear_entry(as, mpid);
     struct mpe* mpe = mem_vmpu_get_entry(as, mpid);
     mpe->state = MPE_S_FREE;
+
+    list_rm(&as->vmpu.ordered_list, (node_t*)&as->vmpu.node[mpid]);
 }
 
 static mpid_t mem_vmpu_allocate_entry(struct addr_space* as)
@@ -84,6 +107,18 @@ static mpid_t mem_vmpu_allocate_entry(struct addr_space* as)
     return mpid;
 }
 
+static void mem_vmpu_deallocate_entry(struct addr_space* as, mpid_t mpid)
+{
+    struct mpe* mpe = mem_vmpu_get_entry(as, mpid);
+
+    mpe->region.base = 0;
+    mpe->region.size = 0;
+    mpe->region.mem_flags = PTE_INVALID;
+    mpe->region.as_sec = SEC_UNKNOWN;
+    mpe->state = MPE_S_FREE;
+    mpe->lock = false;
+}
+
 static mpid_t mem_vmpu_get_entry_by_addr(struct addr_space* as, vaddr_t addr)
 {
     mpid_t mpid = INVALID_MPID;
@@ -105,27 +140,7 @@ static mpid_t mem_vmpu_get_entry_by_addr(struct addr_space* as, vaddr_t addr)
     return mpid;
 }
 
-static inline priv_t as_priv(struct addr_space* as)
-{
-    priv_t priv;
-
-    switch (as->type) {
-        case AS_HYP:
-        /* fallthrough */
-        case AS_HYP_CPY:
-            priv = PRIV_HYP;
-            break;
-        case AS_VM:
-            priv = PRIV_VM;
-            break;
-        default:
-            priv = PRIV_NONE;
-    }
-
-    return priv;
-}
-
-static void as_init_boot_regions(void)
+static void mem_init_boot_regions(void)
 {
     /**
      * Add hypervisor mpu entries set up during boot to the vmpu:
@@ -140,7 +155,6 @@ static void as_init_boot_regions(void)
     vaddr_t image_end = (vaddr_t)&_image_end;
 
     struct mp_region mpr;
-    mpid_t mpid = 0;
 
     bool separate_noload_region = image_load_end != image_noload_start;
     vaddr_t first_region_end = separate_noload_region ? image_load_end : image_end;
@@ -151,18 +165,16 @@ static void as_init_boot_regions(void)
         .mem_flags = PTE_HYP_FLAGS,
         .as_sec = SEC_HYP_IMAGE,
     };
-    mem_vmpu_set_entry(&cpu()->as, mpid, &mpr);
-    mpid++;
+    mem_map(&cpu()->as, &mpr, false, true);
 
     if (separate_noload_region) {
         mpr = (struct mp_region){
             .base = image_noload_start,
-            .size = (size_t)(image_end - image_noload_start),
+            .size = (size_t)image_end - image_noload_start,
             .mem_flags = PTE_HYP_FLAGS,
             .as_sec = SEC_HYP_IMAGE,
         };
-        mem_vmpu_set_entry(&cpu()->as, mpid, &mpr);
-        mpid++;
+        mem_map(&cpu()->as, &mpr, false, true);
     }
 
     mpr = (struct mp_region){
@@ -171,15 +183,15 @@ static void as_init_boot_regions(void)
         .mem_flags = PTE_HYP_FLAGS,
         .as_sec = SEC_HYP_PRIVATE,
     };
-    mem_vmpu_set_entry(&cpu()->as, mpid, &mpr);
-    mpid++;
+    mem_map(&cpu()->as, &mpr, false, true);
 }
 
 void mem_prot_init()
 {
     mpu_init();
-    as_init(&cpu()->as, AS_HYP, HYP_ASID, 0);
-    as_init_boot_regions();
+    as_init(&cpu()->as, AS_HYP, HYP_ASID, BIT_MASK(0, PLAT_CPU_NUM), 0);
+    mem_init_boot_regions();
+    mpu_enable();
 }
 
 size_t mem_cpu_boot_alloc_size()
@@ -188,16 +200,19 @@ size_t mem_cpu_boot_alloc_size()
     return size;
 }
 
-void as_init(struct addr_space* as, enum AS_TYPE type, asid_t id, colormap_t colors)
+void as_init(struct addr_space* as, enum AS_TYPE type, asid_t id, cpumap_t cpus, colormap_t colors)
 {
     UNUSED_ARG(colors);
 
     as->type = type;
     as->colors = 0;
     as->id = id;
+    as->cpus = cpus;
     as->lock = SPINLOCK_INITVAL;
     as_arch_init(as);
 
+    list_init(&(as->vmpu.ordered_list));
+
     for (size_t i = 0; i < VMPU_NUM_ENTRIES; i++) {
         mem_vmpu_free_entry(as, i);
     }
@@ -244,7 +259,8 @@ static cpumap_t mem_section_shared_cpus(struct addr_space* as, as_sec_t section)
     return cpus;
 }
 
-static void mem_region_broadcast(struct addr_space* as, struct mp_region* mpr, uint32_t op)
+static void mem_region_broadcast(struct addr_space* as, struct mp_region* mpr, uint32_t op,
+    bool locked)
 {
     cpumap_t shared_cpus = mem_section_shared_cpus(as, mpr->as_sec);
 
@@ -256,6 +272,7 @@ static void mem_region_broadcast(struct addr_space* as, struct mp_region* mpr, u
         .as_type = as->type,
         .asid = as->id,
         .region = *mpr,
+        .lock = locked,
     };
 
     for (cpuid_t cpuid = 0; cpuid < PLAT_CPU_NUM; cpuid++) {
@@ -271,17 +288,37 @@ static void mem_region_broadcast(struct addr_space* as, struct mp_region* mpr, u
     }
 }
 
+static bool mem_broadcast(struct addr_space* as, struct mp_region* mpr, bool broadcast)
+{
+    if (as->type == AS_HYP && mpr->as_sec == SEC_HYP_PRIVATE) {
+        return false;
+    }
+
+    return broadcast;
+}
+
+static bool mem_check_forced_locked(struct mp_region* mpr, bool locked)
+{
+    if (mpr->as_sec == SEC_HYP_PRIVATE || mpr->as_sec == SEC_HYP_VM ||
+        mpr->as_sec == SEC_HYP_IMAGE) {
+        return true;
+    }
+
+    return locked;
+}
+
 static bool mem_vmpu_insert_region(struct addr_space* as, mpid_t mpid, struct mp_region* mpr,
-    bool broadcast)
+    bool broadcast, bool locked)
 {
+    bool lock = mem_check_forced_locked(mpr, locked);
     if (mpid == INVALID_MPID) {
         return false;
     }
 
-    if (mpu_map(as_priv(as), mpr)) {
-        mem_vmpu_set_entry(as, mpid, mpr);
-        if (broadcast) {
-            mem_region_broadcast(as, mpr, MEM_INSERT_REGION);
+    if (mpu_map(as, mpr, lock)) {
+        mem_vmpu_set_entry(as, mpid, mpr, lock);
+        if (mem_broadcast(as, mpr, broadcast)) {
+            mem_region_broadcast(as, mpr, MEM_INSERT_REGION, lock);
         }
         return true;
     }
@@ -289,6 +326,22 @@ static bool mem_vmpu_insert_region(struct addr_space* as, mpid_t mpid, struct mp
     return false;
 }
 
+static bool mem_vmpu_update_region(struct addr_space* as, mpid_t mpid, struct mp_region merge_reg,
+    bool broadcast, bool locked)
+{
+    bool merged = false;
+
+    if (mpu_update(as, &merge_reg)) {
+        struct mpe* mpe = mem_vmpu_get_entry(as, mpid);
+        mpe->region = merge_reg;
+        if (mem_broadcast(as, &mpe->region, broadcast)) {
+            mem_region_broadcast(as, &mpe->region, MEM_UPDATE_REGION, locked);
+        }
+        merged = true;
+    }
+    return merged;
+}
+
 static bool mem_vmpu_remove_region(struct addr_space* as, mpid_t mpid, bool broadcast)
 {
     bool removed = false;
@@ -296,10 +349,11 @@ static bool mem_vmpu_remove_region(struct addr_space* as, mpid_t mpid, bool broa
     struct mpe* mpe = mem_vmpu_get_entry(as, mpid);
 
     if ((mpe != NULL) && (mpe->state == MPE_S_VALID)) {
-        if (broadcast) {
-            mem_region_broadcast(as, &mpe->region, MEM_REMOVE_REGION);
+        bool lock = mpe->lock;
+        if (mem_broadcast(as, &mpe->region, broadcast)) {
+            mem_region_broadcast(as, &mpe->region, MEM_REMOVE_REGION, lock);
         }
-        mpu_unmap(as_priv(as), &mpe->region);
+        mpu_unmap(as, &mpe->region);
         mem_vmpu_free_entry(as, mpid);
         removed = true;
     }
@@ -307,21 +361,50 @@ static bool mem_vmpu_remove_region(struct addr_space* as, mpid_t mpid, bool broa
     return removed;
 }
 
-static void mem_handle_broadcast_insert(struct addr_space* as, struct mp_region* mpr)
+static void mem_handle_broadcast_insert(struct addr_space* as, struct mp_region* mpr, bool locked)
 {
     if (as->type == AS_HYP) {
-        mem_map(&cpu()->as, mpr, false);
+        mem_map(&cpu()->as, mpr, false, locked);
     } else {
-        mpu_map(as_priv(as), mpr);
+        mpu_map(as, mpr, locked);
     }
 }
 
 static void mem_handle_broadcast_remove(struct addr_space* as, struct mp_region* mpr)
 {
     if (as->type == AS_HYP) {
+        /*
+            We don't use mpu_unmap because that API forces a broadcast.
+            During the handle of a broadcast we don't want that, to avoid
+            a chain of broadcasts
+        */
         mem_unmap_range(&cpu()->as, mpr->base, mpr->size, false);
     } else {
-        mpu_unmap(as_priv(as), mpr);
+        mpu_unmap(as, mpr);
+    }
+}
+
+static bool mem_update(struct addr_space* as, struct mp_region* mpr, bool broadcast, bool locked)
+{
+    mpid_t update_mpid = INVALID_MPID;
+    list_foreach (as->vmpu.ordered_list, struct mpe, cur) {
+        if (cur->region.base == mpr->base && cur->region.size != mpr->size) {
+            update_mpid = cur->mpid;
+            break;
+        }
+    }
+    if (update_mpid != INVALID_MPID) {
+        return mem_vmpu_update_region(as, update_mpid, *mpr, broadcast, locked);
+    }
+    return false;
+}
+
+static void mem_handle_broadcast_update(struct addr_space* as, struct mp_region* mpr, bool locked)
+{
+    if (as->type == AS_HYP) {
+        mem_update(&cpu()->as, mpr, false, locked);
+    } else {
+        mpu_update(as, mpr);
     }
 }
 
@@ -343,11 +426,14 @@ void mem_handle_broadcast_region(uint32_t event, uint64_t data)
 
         switch (event) {
             case MEM_INSERT_REGION:
-                mem_handle_broadcast_insert(as, &sh_reg->region);
+                mem_handle_broadcast_insert(as, &sh_reg->region, sh_reg->lock);
                 break;
             case MEM_REMOVE_REGION:
                 mem_handle_broadcast_remove(as, &sh_reg->region);
                 break;
+            case MEM_UPDATE_REGION:
+                mem_handle_broadcast_update(as, &sh_reg->region, sh_reg->lock);
+                break;
             default:
                 ERROR("unknown mem broadcast msg");
         }
@@ -376,9 +462,53 @@ static mpid_t mem_vmpu_find_overlapping_region(struct addr_space* as, struct mp_
     return mpid;
 }
 
-bool mem_map(struct addr_space* as, struct mp_region* mpr, bool broadcast)
+static void mem_vmpu_coalesce_contiguous(struct addr_space* as, bool broadcast)
+{
+    while (true) {
+        bool merge = false;
+        mpid_t cur_mpid = INVALID_MPID;
+        mpid_t prev_mpid = INVALID_MPID;
+        struct mpe* prev_reg;
+        struct mpe* cur_reg;
+        list_foreach_tail(as->vmpu.ordered_list, struct mpe, cur, prev)
+        {
+            if (prev == NULL) {
+                continue;
+            }
+            cur_reg = mem_vmpu_get_entry(as, cur->mpid);
+            prev_reg = mem_vmpu_get_entry(as, prev->mpid);
+
+            bool contiguous = prev_reg->region.base + prev_reg->region.size == cur_reg->region.base;
+            bool perms_compatible =
+                mpu_perms_compatible(prev_reg->region.mem_flags.raw, cur_reg->region.mem_flags.raw);
+            bool lock_compatible = !prev_reg->lock && !cur_reg->lock;
+            if (contiguous && perms_compatible && lock_compatible) {
+                cur_mpid = cur->mpid;
+                prev_mpid = prev->mpid;
+                merge = true;
+                break;
+            }
+        }
+
+        if (merge) {
+            struct mp_region merged_reg = {
+                .base = prev_reg->region.base,
+                .size = prev_reg->region.size + cur_reg->region.size,
+                .mem_flags = cur_reg->region.mem_flags,
+            };
+            if (mem_vmpu_update_region(as, prev_mpid, merged_reg, broadcast, prev_reg->lock)) {
+                mem_vmpu_remove_region(as, cur_mpid, broadcast);
+            }
+        } else {
+            break;
+        }
+    }
+}
+
+bool mem_map(struct addr_space* as, struct mp_region* mpr, bool broadcast, bool locked)
 {
     bool mapped = false;
+    mpid_t mpid = INVALID_MPID;
 
     if (mpr->size == 0) {
         return true;
@@ -392,13 +522,18 @@ bool mem_map(struct addr_space* as, struct mp_region* mpr, bool broadcast)
     spin_lock(&as->lock);
 
     if (mem_vmpu_find_overlapping_region(as, mpr) == INVALID_MPID) {
-        // TODO: check if it possible to merge with another region
-        mpid_t mpid = mem_vmpu_allocate_entry(as);
+        mpid = mem_vmpu_allocate_entry(as);
         if (mpid != INVALID_MPID) {
-            mapped = mem_vmpu_insert_region(as, mpid, mpr, broadcast);
+            mapped = mem_vmpu_insert_region(as, mpid, mpr, broadcast, locked);
+        } else {
+            mem_vmpu_deallocate_entry(as, mpid);
         }
     }
 
+    if (mapped && !locked) {
+        mem_vmpu_coalesce_contiguous(as, broadcast);
+    }
+
     spin_unlock(&as->lock);
 
     return mapped;
@@ -406,8 +541,6 @@ bool mem_map(struct addr_space* as, struct mp_region* mpr, bool broadcast)
 
 bool mem_unmap_range(struct addr_space* as, vaddr_t vaddr, size_t size, bool broadcast)
 {
-    UNUSED_ARG(broadcast);
-
     spin_lock(&as->lock);
 
     size_t size_left = size;
@@ -429,6 +562,8 @@ bool mem_unmap_range(struct addr_space* as, vaddr_t vaddr, size_t size, bool bro
         struct mpe* mpe = mem_vmpu_get_entry(as, mpid);
         reg = mpe->region;
 
+        bool locked = mpe->lock;
+
         vaddr_t limit = vaddr + size;
         vaddr_t r_limit = reg.base + reg.size;
         vaddr_t r_base = reg.base;
@@ -436,21 +571,25 @@ bool mem_unmap_range(struct addr_space* as, vaddr_t vaddr, size_t size, bool bro
         size_t top_size = limit >= r_limit ? 0 : r_limit - limit;
         size_t bottom_size = vaddr <= r_base ? 0 : vaddr - r_base;
 
-        mem_vmpu_remove_region(as, mpid, true);
+        mem_vmpu_remove_region(as, mpid, broadcast);
 
         if (top_size > 0) {
             struct mp_region top = reg;
             top.base = limit;
             top.size = top_size;
             mpid_t top_mpid = mem_vmpu_allocate_entry(as);
-            mem_vmpu_insert_region(as, top_mpid, &top, true);
+            if (!mem_vmpu_insert_region(as, top_mpid, &top, broadcast, locked)) {
+                return false;
+            }
         }
 
         if (bottom_size > 0) {
             struct mp_region bottom = reg;
             bottom.size = bottom_size;
             mpid_t bottom_mpid = mem_vmpu_allocate_entry(as);
-            mem_vmpu_insert_region(as, bottom_mpid, &bottom, true);
+            if (!mem_vmpu_insert_region(as, bottom_mpid, &bottom, broadcast, locked)) {
+                return false;
+            }
         }
 
         size_t overlap_size = reg.size - top_size - bottom_size;
@@ -491,10 +630,16 @@ vaddr_t mem_map_cpy(struct addr_space* ass, struct addr_space* asd, vaddr_t vas,
         mpr = mpe->region;
         spin_unlock(&ass->lock);
 
-        if (mem_map(asd, &mpr, true)) {
-            va_res = vas;
+        if (num_pages * PAGE_SIZE > mpr.size) {
+            va_res = INVALID_VA;
         } else {
-            INFO("failed mem map on mem map cpy");
+            mpr.size = num_pages * PAGE_SIZE;
+            bool broadcast = mem_broadcast(asd, &mpr, true);
+            if (mem_map(asd, &mpr, broadcast, false)) {
+                va_res = vas;
+            } else {
+                INFO("failed mem map on mem map cpy");
+            }
         }
     } else {
         INFO("failed mem map cpy");
@@ -543,7 +688,7 @@ vaddr_t mem_alloc_map(struct addr_space* as, as_sec_t section, struct ppages* pp
         .mem_flags = flags,
     };
 
-    mem_map(as, &mpr, true);
+    mem_map(as, &mpr, true, false);
 
     return at;
 }
diff --git a/src/core/mpu/vm.c b/src/core/mpu/vm.c
index 6e303ed8b..5371f7d74 100644
--- a/src/core/mpu/vm.c
+++ b/src/core/mpu/vm.c
@@ -9,5 +9,5 @@ void vm_mem_prot_init(struct vm* vm, const struct vm_config* config)
 {
     UNUSED_ARG(config);
 
-    as_init(&vm->as, AS_VM, vm->id, 0);
+    as_init(&vm->as, AS_VM, vm->id, vm->cpus, 0);
 }
diff --git a/src/core/vm.c b/src/core/vm.c
index 1db13fdc5..f1d922d4a 100644
--- a/src/core/vm.c
+++ b/src/core/vm.c
@@ -28,6 +28,7 @@ static void vm_cpu_init(struct vm* vm)
 {
     spin_lock(&vm->lock);
     vm->cpus |= (1UL << cpu()->id);
+    vm->as.cpus |= (1UL << cpu()->id);
     spin_unlock(&vm->lock);
 }
 
@@ -129,7 +130,7 @@ static void vm_install_image(struct vm* vm, struct vm_mem_region* reg)
 
     size_t img_num_pages = NUM_PAGES(vm->config->image.size);
     struct ppages img_ppages = mem_ppages_get(vm->config->image.load_addr, img_num_pages);
-    vaddr_t src_va = mem_alloc_map(&cpu()->as, SEC_HYP_GLOBAL, &img_ppages, INVALID_VA,
+    vaddr_t src_va = mem_alloc_map(&cpu()->as, SEC_HYP_PRIVATE, &img_ppages, INVALID_VA,
         img_num_pages, PTE_HYP_FLAGS);
     vaddr_t dst_va =
         mem_map_cpy(&vm->as, &cpu()->as, vm->config->image.base_addr, INVALID_VA, img_num_pages);