diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..a748b84
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,741 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# clang-format configuration file. Intended for clang-format >= 11.
+#
+# For more information, see:
+#
+#   Documentation/dev-tools/clang-format.rst
+#   https://clang.llvm.org/docs/ClangFormat.html
+#   https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+#
+---
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Left
+AlignOperands: true
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: false
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterClass: false
+  AfterControlStatement: false
+  AfterEnum: false
+  AfterFunction: true
+  AfterNamespace: true
+  AfterObjCDeclaration: false
+  AfterStruct: false
+  AfterUnion: false
+  AfterExternBlock: false
+  BeforeCatch: false
+  BeforeElse: false
+  IndentBraces: false
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
+  SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Custom
+BreakBeforeInheritanceComma: false
+BreakBeforeTernaryOperators: false
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeComma
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: false
+ColumnLimit: 80
+CommentPragmas: '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 8
+ContinuationIndentWidth: 8
+Cpp11BracedListStyle: false
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: false
+
+# Taken from:
+#   git grep -h '^#define [^[:space:]]*for_each[^[:space:]]*(' include/ tools/ \
+#   | sed "s,^#define \([^[:space:]]*for_each[^[:space:]]*\)(.*$,  - '\1'," \
+#   | LC_ALL=C sort -u
+ForEachMacros:
+  - '__ata_qc_for_each'
+  - '__bio_for_each_bvec'
+  - '__bio_for_each_segment'
+  - '__evlist__for_each_entry'
+  - '__evlist__for_each_entry_continue'
+  - '__evlist__for_each_entry_from'
+  - '__evlist__for_each_entry_reverse'
+  - '__evlist__for_each_entry_safe'
+  - '__for_each_mem_range'
+  - '__for_each_mem_range_rev'
+  - '__for_each_thread'
+  - '__hlist_for_each_rcu'
+  - '__map__for_each_symbol_by_name'
+  - '__pci_bus_for_each_res0'
+  - '__pci_bus_for_each_res1'
+  - '__pci_dev_for_each_res0'
+  - '__pci_dev_for_each_res1'
+  - '__perf_evlist__for_each_entry'
+  - '__perf_evlist__for_each_entry_reverse'
+  - '__perf_evlist__for_each_entry_safe'
+  - '__rq_for_each_bio'
+  - '__shost_for_each_device'
+  - '__sym_for_each'
+  - 'apei_estatus_for_each_section'
+  - 'ata_for_each_dev'
+  - 'ata_for_each_link'
+  - 'ata_qc_for_each'
+  - 'ata_qc_for_each_raw'
+  - 'ata_qc_for_each_with_internal'
+  - 'ax25_for_each'
+  - 'ax25_uid_for_each'
+  - 'bio_for_each_bvec'
+  - 'bio_for_each_bvec_all'
+  - 'bio_for_each_folio_all'
+  - 'bio_for_each_integrity_vec'
+  - 'bio_for_each_segment'
+  - 'bio_for_each_segment_all'
+  - 'bio_list_for_each'
+  - 'bip_for_each_vec'
+  - 'bond_for_each_slave'
+  - 'bond_for_each_slave_rcu'
+  - 'bpf_for_each'
+  - 'bpf_for_each_reg_in_vstate'
+  - 'bpf_for_each_reg_in_vstate_mask'
+  - 'bpf_for_each_spilled_reg'
+  - 'bpf_object__for_each_map'
+  - 'bpf_object__for_each_program'
+  - 'btree_for_each_safe128'
+  - 'btree_for_each_safe32'
+  - 'btree_for_each_safe64'
+  - 'btree_for_each_safel'
+  - 'card_for_each_dev'
+  - 'cgroup_taskset_for_each'
+  - 'cgroup_taskset_for_each_leader'
+  - 'cpu_aggr_map__for_each_idx'
+  - 'cpufreq_for_each_efficient_entry_idx'
+  - 'cpufreq_for_each_entry'
+  - 'cpufreq_for_each_entry_idx'
+  - 'cpufreq_for_each_valid_entry'
+  - 'cpufreq_for_each_valid_entry_idx'
+  - 'css_for_each_child'
+  - 'css_for_each_descendant_post'
+  - 'css_for_each_descendant_pre'
+  - 'damon_for_each_region'
+  - 'damon_for_each_region_from'
+  - 'damon_for_each_region_safe'
+  - 'damon_for_each_scheme'
+  - 'damon_for_each_scheme_safe'
+  - 'damon_for_each_target'
+  - 'damon_for_each_target_safe'
+  - 'damos_for_each_filter'
+  - 'damos_for_each_filter_safe'
+  - 'data__for_each_file'
+  - 'data__for_each_file_new'
+  - 'data__for_each_file_start'
+  - 'device_for_each_child_node'
+  - 'displayid_iter_for_each'
+  - 'dma_fence_array_for_each'
+  - 'dma_fence_chain_for_each'
+  - 'dma_fence_unwrap_for_each'
+  - 'dma_resv_for_each_fence'
+  - 'dma_resv_for_each_fence_unlocked'
+  - 'do_for_each_ftrace_op'
+  - 'drm_atomic_crtc_for_each_plane'
+  - 'drm_atomic_crtc_state_for_each_plane'
+  - 'drm_atomic_crtc_state_for_each_plane_state'
+  - 'drm_atomic_for_each_plane_damage'
+  - 'drm_client_for_each_connector_iter'
+  - 'drm_client_for_each_modeset'
+  - 'drm_connector_for_each_possible_encoder'
+  - 'drm_exec_for_each_locked_object'
+  - 'drm_exec_for_each_locked_object_reverse'
+  - 'drm_for_each_bridge_in_chain'
+  - 'drm_for_each_connector_iter'
+  - 'drm_for_each_crtc'
+  - 'drm_for_each_crtc_reverse'
+  - 'drm_for_each_encoder'
+  - 'drm_for_each_encoder_mask'
+  - 'drm_for_each_fb'
+  - 'drm_for_each_legacy_plane'
+  - 'drm_for_each_plane'
+  - 'drm_for_each_plane_mask'
+  - 'drm_for_each_privobj'
+  - 'drm_gem_for_each_gpuva'
+  - 'drm_gem_for_each_gpuva_safe'
+  - 'drm_gpuva_for_each_op'
+  - 'drm_gpuva_for_each_op_from_reverse'
+  - 'drm_gpuva_for_each_op_safe'
+  - 'drm_gpuvm_for_each_va'
+  - 'drm_gpuvm_for_each_va_range'
+  - 'drm_gpuvm_for_each_va_range_safe'
+  - 'drm_gpuvm_for_each_va_safe'
+  - 'drm_mm_for_each_hole'
+  - 'drm_mm_for_each_node'
+  - 'drm_mm_for_each_node_in_range'
+  - 'drm_mm_for_each_node_safe'
+  - 'dsa_switch_for_each_available_port'
+  - 'dsa_switch_for_each_cpu_port'
+  - 'dsa_switch_for_each_cpu_port_continue_reverse'
+  - 'dsa_switch_for_each_port'
+  - 'dsa_switch_for_each_port_continue_reverse'
+  - 'dsa_switch_for_each_port_safe'
+  - 'dsa_switch_for_each_user_port'
+  - 'dsa_tree_for_each_cpu_port'
+  - 'dsa_tree_for_each_user_port'
+  - 'dsa_tree_for_each_user_port_continue_reverse'
+  - 'dso__for_each_symbol'
+  - 'dsos__for_each_with_build_id'
+  - 'elf_hash_for_each_possible'
+  - 'elf_symtab__for_each_symbol'
+  - 'evlist__for_each_cpu'
+  - 'evlist__for_each_entry'
+  - 'evlist__for_each_entry_continue'
+  - 'evlist__for_each_entry_from'
+  - 'evlist__for_each_entry_reverse'
+  - 'evlist__for_each_entry_safe'
+  - 'flow_action_for_each'
+  - 'for_each_acpi_consumer_dev'
+  - 'for_each_acpi_dev_match'
+  - 'for_each_active_dev_scope'
+  - 'for_each_active_drhd_unit'
+  - 'for_each_active_iommu'
+  - 'for_each_active_route'
+  - 'for_each_aggr_pgid'
+  - 'for_each_and_bit'
+  - 'for_each_andnot_bit'
+  - 'for_each_available_child_of_node'
+  - 'for_each_bench'
+  - 'for_each_bio'
+  - 'for_each_board_func_rsrc'
+  - 'for_each_btf_ext_rec'
+  - 'for_each_btf_ext_sec'
+  - 'for_each_bvec'
+  - 'for_each_card_auxs'
+  - 'for_each_card_auxs_safe'
+  - 'for_each_card_components'
+  - 'for_each_card_dapms'
+  - 'for_each_card_pre_auxs'
+  - 'for_each_card_prelinks'
+  - 'for_each_card_rtds'
+  - 'for_each_card_rtds_safe'
+  - 'for_each_card_widgets'
+  - 'for_each_card_widgets_safe'
+  - 'for_each_cgroup_storage_type'
+  - 'for_each_child_of_node'
+  - 'for_each_clear_bit'
+  - 'for_each_clear_bit_from'
+  - 'for_each_clear_bitrange'
+  - 'for_each_clear_bitrange_from'
+  - 'for_each_cmd'
+  - 'for_each_cmsghdr'
+  - 'for_each_collection'
+  - 'for_each_comp_order'
+  - 'for_each_compatible_node'
+  - 'for_each_component_dais'
+  - 'for_each_component_dais_safe'
+  - 'for_each_conduit'
+  - 'for_each_console'
+  - 'for_each_console_srcu'
+  - 'for_each_cpu'
+  - 'for_each_cpu_and'
+  - 'for_each_cpu_andnot'
+  - 'for_each_cpu_or'
+  - 'for_each_cpu_wrap'
+  - 'for_each_dapm_widgets'
+  - 'for_each_dedup_cand'
+  - 'for_each_dev_addr'
+  - 'for_each_dev_scope'
+  - 'for_each_dma_cap_mask'
+  - 'for_each_dpcm_be'
+  - 'for_each_dpcm_be_rollback'
+  - 'for_each_dpcm_be_safe'
+  - 'for_each_dpcm_fe'
+  - 'for_each_drhd_unit'
+  - 'for_each_dss_dev'
+  - 'for_each_efi_memory_desc'
+  - 'for_each_efi_memory_desc_in_map'
+  - 'for_each_element'
+  - 'for_each_element_extid'
+  - 'for_each_element_id'
+  - 'for_each_endpoint_of_node'
+  - 'for_each_event'
+  - 'for_each_event_tps'
+  - 'for_each_evictable_lru'
+  - 'for_each_fib6_node_rt_rcu'
+  - 'for_each_fib6_walker_rt'
+  - 'for_each_free_mem_pfn_range_in_zone'
+  - 'for_each_free_mem_pfn_range_in_zone_from'
+  - 'for_each_free_mem_range'
+  - 'for_each_free_mem_range_reverse'
+  - 'for_each_func_rsrc'
+  - 'for_each_gpiochip_node'
+  - 'for_each_group_evsel'
+  - 'for_each_group_evsel_head'
+  - 'for_each_group_member'
+  - 'for_each_group_member_head'
+  - 'for_each_hstate'
+  - 'for_each_if'
+  - 'for_each_inject_fn'
+  - 'for_each_insn'
+  - 'for_each_insn_prefix'
+  - 'for_each_intid'
+  - 'for_each_iommu'
+  - 'for_each_ip_tunnel_rcu'
+  - 'for_each_irq_nr'
+  - 'for_each_lang'
+  - 'for_each_link_codecs'
+  - 'for_each_link_cpus'
+  - 'for_each_link_platforms'
+  - 'for_each_lru'
+  - 'for_each_matching_node'
+  - 'for_each_matching_node_and_match'
+  - 'for_each_media_entity_data_link'
+  - 'for_each_mem_pfn_range'
+  - 'for_each_mem_range'
+  - 'for_each_mem_range_rev'
+  - 'for_each_mem_region'
+  - 'for_each_member'
+  - 'for_each_memory'
+  - 'for_each_migratetype_order'
+  - 'for_each_missing_reg'
+  - 'for_each_mle_subelement'
+  - 'for_each_mod_mem_type'
+  - 'for_each_net'
+  - 'for_each_net_continue_reverse'
+  - 'for_each_net_rcu'
+  - 'for_each_netdev'
+  - 'for_each_netdev_continue'
+  - 'for_each_netdev_continue_rcu'
+  - 'for_each_netdev_continue_reverse'
+  - 'for_each_netdev_dump'
+  - 'for_each_netdev_feature'
+  - 'for_each_netdev_in_bond_rcu'
+  - 'for_each_netdev_rcu'
+  - 'for_each_netdev_reverse'
+  - 'for_each_netdev_safe'
+  - 'for_each_new_connector_in_state'
+  - 'for_each_new_crtc_in_state'
+  - 'for_each_new_mst_mgr_in_state'
+  - 'for_each_new_plane_in_state'
+  - 'for_each_new_plane_in_state_reverse'
+  - 'for_each_new_private_obj_in_state'
+  - 'for_each_new_reg'
+  - 'for_each_node'
+  - 'for_each_node_by_name'
+  - 'for_each_node_by_type'
+  - 'for_each_node_mask'
+  - 'for_each_node_state'
+  - 'for_each_node_with_cpus'
+  - 'for_each_node_with_property'
+  - 'for_each_nonreserved_multicast_dest_pgid'
+  - 'for_each_numa_hop_mask'
+  - 'for_each_of_allnodes'
+  - 'for_each_of_allnodes_from'
+  - 'for_each_of_cpu_node'
+  - 'for_each_of_pci_range'
+  - 'for_each_old_connector_in_state'
+  - 'for_each_old_crtc_in_state'
+  - 'for_each_old_mst_mgr_in_state'
+  - 'for_each_old_plane_in_state'
+  - 'for_each_old_private_obj_in_state'
+  - 'for_each_oldnew_connector_in_state'
+  - 'for_each_oldnew_crtc_in_state'
+  - 'for_each_oldnew_mst_mgr_in_state'
+  - 'for_each_oldnew_plane_in_state'
+  - 'for_each_oldnew_plane_in_state_reverse'
+  - 'for_each_oldnew_private_obj_in_state'
+  - 'for_each_online_cpu'
+  - 'for_each_online_node'
+  - 'for_each_online_pgdat'
+  - 'for_each_or_bit'
+  - 'for_each_path'
+  - 'for_each_pci_bridge'
+  - 'for_each_pci_dev'
+  - 'for_each_pcm_streams'
+  - 'for_each_physmem_range'
+  - 'for_each_populated_zone'
+  - 'for_each_possible_cpu'
+  - 'for_each_present_blessed_reg'
+  - 'for_each_present_cpu'
+  - 'for_each_prime_number'
+  - 'for_each_prime_number_from'
+  - 'for_each_probe_cache_entry'
+  - 'for_each_process'
+  - 'for_each_process_thread'
+  - 'for_each_prop_codec_conf'
+  - 'for_each_prop_dai_codec'
+  - 'for_each_prop_dai_cpu'
+  - 'for_each_prop_dlc_codecs'
+  - 'for_each_prop_dlc_cpus'
+  - 'for_each_prop_dlc_platforms'
+  - 'for_each_property_of_node'
+  - 'for_each_reg'
+  - 'for_each_reg_filtered'
+  - 'for_each_reloc'
+  - 'for_each_reloc_from'
+  - 'for_each_requested_gpio'
+  - 'for_each_requested_gpio_in_range'
+  - 'for_each_reserved_mem_range'
+  - 'for_each_reserved_mem_region'
+  - 'for_each_rtd_codec_dais'
+  - 'for_each_rtd_components'
+  - 'for_each_rtd_cpu_dais'
+  - 'for_each_rtd_dais'
+  - 'for_each_sband_iftype_data'
+  - 'for_each_script'
+  - 'for_each_sec'
+  - 'for_each_set_bit'
+  - 'for_each_set_bit_from'
+  - 'for_each_set_bit_wrap'
+  - 'for_each_set_bitrange'
+  - 'for_each_set_bitrange_from'
+  - 'for_each_set_clump8'
+  - 'for_each_sg'
+  - 'for_each_sg_dma_page'
+  - 'for_each_sg_page'
+  - 'for_each_sgtable_dma_page'
+  - 'for_each_sgtable_dma_sg'
+  - 'for_each_sgtable_page'
+  - 'for_each_sgtable_sg'
+  - 'for_each_sibling_event'
+  - 'for_each_sta_active_link'
+  - 'for_each_subelement'
+  - 'for_each_subelement_extid'
+  - 'for_each_subelement_id'
+  - 'for_each_sublist'
+  - 'for_each_subsystem'
+  - 'for_each_supported_activate_fn'
+  - 'for_each_supported_inject_fn'
+  - 'for_each_sym'
+  - 'for_each_test'
+  - 'for_each_thread'
+  - 'for_each_token'
+  - 'for_each_unicast_dest_pgid'
+  - 'for_each_valid_link'
+  - 'for_each_vif_active_link'
+  - 'for_each_vma'
+  - 'for_each_vma_range'
+  - 'for_each_vsi'
+  - 'for_each_wakeup_source'
+  - 'for_each_zone'
+  - 'for_each_zone_zonelist'
+  - 'for_each_zone_zonelist_nodemask'
+  - 'func_for_each_insn'
+  - 'fwnode_for_each_available_child_node'
+  - 'fwnode_for_each_child_node'
+  - 'fwnode_for_each_parent_node'
+  - 'fwnode_graph_for_each_endpoint'
+  - 'gadget_for_each_ep'
+  - 'genradix_for_each'
+  - 'genradix_for_each_from'
+  - 'genradix_for_each_reverse'
+  - 'hash_for_each'
+  - 'hash_for_each_possible'
+  - 'hash_for_each_possible_rcu'
+  - 'hash_for_each_possible_rcu_notrace'
+  - 'hash_for_each_possible_safe'
+  - 'hash_for_each_rcu'
+  - 'hash_for_each_safe'
+  - 'hashmap__for_each_entry'
+  - 'hashmap__for_each_entry_safe'
+  - 'hashmap__for_each_key_entry'
+  - 'hashmap__for_each_key_entry_safe'
+  - 'hctx_for_each_ctx'
+  - 'hists__for_each_format'
+  - 'hists__for_each_sort_list'
+  - 'hlist_bl_for_each_entry'
+  - 'hlist_bl_for_each_entry_rcu'
+  - 'hlist_bl_for_each_entry_safe'
+  - 'hlist_for_each'
+  - 'hlist_for_each_entry'
+  - 'hlist_for_each_entry_continue'
+  - 'hlist_for_each_entry_continue_rcu'
+  - 'hlist_for_each_entry_continue_rcu_bh'
+  - 'hlist_for_each_entry_from'
+  - 'hlist_for_each_entry_from_rcu'
+  - 'hlist_for_each_entry_rcu'
+  - 'hlist_for_each_entry_rcu_bh'
+  - 'hlist_for_each_entry_rcu_notrace'
+  - 'hlist_for_each_entry_safe'
+  - 'hlist_for_each_entry_srcu'
+  - 'hlist_for_each_safe'
+  - 'hlist_nulls_for_each_entry'
+  - 'hlist_nulls_for_each_entry_from'
+  - 'hlist_nulls_for_each_entry_rcu'
+  - 'hlist_nulls_for_each_entry_safe'
+  - 'i3c_bus_for_each_i2cdev'
+  - 'i3c_bus_for_each_i3cdev'
+  - 'idr_for_each_entry'
+  - 'idr_for_each_entry_continue'
+  - 'idr_for_each_entry_continue_ul'
+  - 'idr_for_each_entry_ul'
+  - 'in_dev_for_each_ifa_rcu'
+  - 'in_dev_for_each_ifa_rtnl'
+  - 'inet_bind_bucket_for_each'
+  - 'interval_tree_for_each_span'
+  - 'intlist__for_each_entry'
+  - 'intlist__for_each_entry_safe'
+  - 'kcore_copy__for_each_phdr'
+  - 'key_for_each'
+  - 'key_for_each_safe'
+  - 'klp_for_each_func'
+  - 'klp_for_each_func_safe'
+  - 'klp_for_each_func_static'
+  - 'klp_for_each_object'
+  - 'klp_for_each_object_safe'
+  - 'klp_for_each_object_static'
+  - 'kunit_suite_for_each_test_case'
+  - 'kvm_for_each_memslot'
+  - 'kvm_for_each_memslot_in_gfn_range'
+  - 'kvm_for_each_vcpu'
+  - 'libbpf_nla_for_each_attr'
+  - 'list_for_each'
+  - 'list_for_each_codec'
+  - 'list_for_each_codec_safe'
+  - 'list_for_each_continue'
+  - 'list_for_each_entry'
+  - 'list_for_each_entry_continue'
+  - 'list_for_each_entry_continue_rcu'
+  - 'list_for_each_entry_continue_reverse'
+  - 'list_for_each_entry_from'
+  - 'list_for_each_entry_from_rcu'
+  - 'list_for_each_entry_from_reverse'
+  - 'list_for_each_entry_lockless'
+  - 'list_for_each_entry_rcu'
+  - 'list_for_each_entry_reverse'
+  - 'list_for_each_entry_safe'
+  - 'list_for_each_entry_safe_continue'
+  - 'list_for_each_entry_safe_from'
+  - 'list_for_each_entry_safe_reverse'
+  - 'list_for_each_entry_srcu'
+  - 'list_for_each_from'
+  - 'list_for_each_prev'
+  - 'list_for_each_prev_safe'
+  - 'list_for_each_rcu'
+  - 'list_for_each_reverse'
+  - 'list_for_each_safe'
+  - 'llist_for_each'
+  - 'llist_for_each_entry'
+  - 'llist_for_each_entry_safe'
+  - 'llist_for_each_safe'
+  - 'lwq_for_each_safe'
+  - 'map__for_each_symbol'
+  - 'map__for_each_symbol_by_name'
+  - 'maps__for_each_entry'
+  - 'maps__for_each_entry_safe'
+  - 'mas_for_each'
+  - 'mci_for_each_dimm'
+  - 'media_device_for_each_entity'
+  - 'media_device_for_each_intf'
+  - 'media_device_for_each_link'
+  - 'media_device_for_each_pad'
+  - 'media_entity_for_each_pad'
+  - 'media_pipeline_for_each_entity'
+  - 'media_pipeline_for_each_pad'
+  - 'mlx5_lag_for_each_peer_mdev'
+  - 'msi_domain_for_each_desc'
+  - 'msi_for_each_desc'
+  - 'mt_for_each'
+  - 'nanddev_io_for_each_page'
+  - 'netdev_for_each_lower_dev'
+  - 'netdev_for_each_lower_private'
+  - 'netdev_for_each_lower_private_rcu'
+  - 'netdev_for_each_mc_addr'
+  - 'netdev_for_each_synced_mc_addr'
+  - 'netdev_for_each_synced_uc_addr'
+  - 'netdev_for_each_uc_addr'
+  - 'netdev_for_each_upper_dev_rcu'
+  - 'netdev_hw_addr_list_for_each'
+  - 'nft_rule_for_each_expr'
+  - 'nla_for_each_attr'
+  - 'nla_for_each_nested'
+  - 'nlmsg_for_each_attr'
+  - 'nlmsg_for_each_msg'
+  - 'nr_neigh_for_each'
+  - 'nr_neigh_for_each_safe'
+  - 'nr_node_for_each'
+  - 'nr_node_for_each_safe'
+  - 'of_for_each_phandle'
+  - 'of_property_for_each_string'
+  - 'of_property_for_each_u32'
+  - 'pci_bus_for_each_resource'
+  - 'pci_dev_for_each_resource'
+  - 'pcl_for_each_chunk'
+  - 'pcl_for_each_segment'
+  - 'pcm_for_each_format'
+  - 'perf_config_items__for_each_entry'
+  - 'perf_config_sections__for_each_entry'
+  - 'perf_config_set__for_each_entry'
+  - 'perf_cpu_map__for_each_cpu'
+  - 'perf_cpu_map__for_each_idx'
+  - 'perf_evlist__for_each_entry'
+  - 'perf_evlist__for_each_entry_reverse'
+  - 'perf_evlist__for_each_entry_safe'
+  - 'perf_evlist__for_each_evsel'
+  - 'perf_evlist__for_each_mmap'
+  - 'perf_hpp_list__for_each_format'
+  - 'perf_hpp_list__for_each_format_safe'
+  - 'perf_hpp_list__for_each_sort_list'
+  - 'perf_hpp_list__for_each_sort_list_safe'
+  - 'perf_tool_event__for_each_event'
+  - 'plist_for_each'
+  - 'plist_for_each_continue'
+  - 'plist_for_each_entry'
+  - 'plist_for_each_entry_continue'
+  - 'plist_for_each_entry_safe'
+  - 'plist_for_each_safe'
+  - 'pnp_for_each_card'
+  - 'pnp_for_each_dev'
+  - 'protocol_for_each_card'
+  - 'protocol_for_each_dev'
+  - 'queue_for_each_hw_ctx'
+  - 'radix_tree_for_each_slot'
+  - 'radix_tree_for_each_tagged'
+  - 'rb_for_each'
+  - 'rbtree_postorder_for_each_entry_safe'
+  - 'rdma_for_each_block'
+  - 'rdma_for_each_port'
+  - 'rdma_umem_for_each_dma_block'
+  - 'resort_rb__for_each_entry'
+  - 'resource_list_for_each_entry'
+  - 'resource_list_for_each_entry_safe'
+  - 'rhl_for_each_entry_rcu'
+  - 'rhl_for_each_rcu'
+  - 'rht_for_each'
+  - 'rht_for_each_entry'
+  - 'rht_for_each_entry_from'
+  - 'rht_for_each_entry_rcu'
+  - 'rht_for_each_entry_rcu_from'
+  - 'rht_for_each_entry_safe'
+  - 'rht_for_each_from'
+  - 'rht_for_each_rcu'
+  - 'rht_for_each_rcu_from'
+  - 'rq_for_each_bvec'
+  - 'rq_for_each_segment'
+  - 'rq_list_for_each'
+  - 'rq_list_for_each_safe'
+  - 'sample_read_group__for_each'
+  - 'scsi_for_each_prot_sg'
+  - 'scsi_for_each_sg'
+  - 'sctp_for_each_hentry'
+  - 'sctp_skb_for_each'
+  - 'sec_for_each_insn'
+  - 'sec_for_each_insn_continue'
+  - 'sec_for_each_insn_from'
+  - 'sec_for_each_sym'
+  - 'shdma_for_each_chan'
+  - 'shost_for_each_device'
+  - 'sk_for_each'
+  - 'sk_for_each_bound'
+  - 'sk_for_each_bound_bhash2'
+  - 'sk_for_each_entry_offset_rcu'
+  - 'sk_for_each_from'
+  - 'sk_for_each_rcu'
+  - 'sk_for_each_safe'
+  - 'sk_nulls_for_each'
+  - 'sk_nulls_for_each_from'
+  - 'sk_nulls_for_each_rcu'
+  - 'snd_array_for_each'
+  - 'snd_pcm_group_for_each_entry'
+  - 'snd_soc_dapm_widget_for_each_path'
+  - 'snd_soc_dapm_widget_for_each_path_safe'
+  - 'snd_soc_dapm_widget_for_each_sink_path'
+  - 'snd_soc_dapm_widget_for_each_source_path'
+  - 'strlist__for_each_entry'
+  - 'strlist__for_each_entry_safe'
+  - 'sym_for_each_insn'
+  - 'sym_for_each_insn_continue_reverse'
+  - 'symbols__for_each_entry'
+  - 'tb_property_for_each'
+  - 'tcf_act_for_each_action'
+  - 'tcf_exts_for_each_action'
+  - 'ttm_resource_manager_for_each_res'
+  - 'twsk_for_each_bound_bhash2'
+  - 'udp_portaddr_for_each_entry'
+  - 'udp_portaddr_for_each_entry_rcu'
+  - 'usb_hub_for_each_child'
+  - 'v4l2_device_for_each_subdev'
+  - 'v4l2_m2m_for_each_dst_buf'
+  - 'v4l2_m2m_for_each_dst_buf_safe'
+  - 'v4l2_m2m_for_each_src_buf'
+  - 'v4l2_m2m_for_each_src_buf_safe'
+  - 'virtio_device_for_each_vq'
+  - 'while_for_each_ftrace_op'
+  - 'xa_for_each'
+  - 'xa_for_each_marked'
+  - 'xa_for_each_range'
+  - 'xa_for_each_start'
+  - 'xas_for_each'
+  - 'xas_for_each_conflict'
+  - 'xas_for_each_marked'
+  - 'xbc_array_for_each_value'
+  - 'xbc_for_each_key_value'
+  - 'xbc_node_for_each_array_value'
+  - 'xbc_node_for_each_child'
+  - 'xbc_node_for_each_key_value'
+  - 'xbc_node_for_each_subkey'
+  - 'zorro_for_each_dev'
+
+IncludeBlocks: Preserve
+IncludeCategories:
+  - Regex: '.*'
+    Priority: 1
+IncludeIsMainRegex: '(Test)?$'
+IndentCaseLabels: false
+IndentGotoLabels: false
+IndentPPDirectives: None
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Auto
+ObjCBlockIndentWidth: 8
+ObjCSpaceAfterProperty: true
+ObjCSpaceBeforeProtocolList: true
+
+# Taken from git's rules
+PenaltyBreakAssignment: 10
+PenaltyBreakBeforeFirstCallParameter: 30
+PenaltyBreakComment: 10
+PenaltyBreakFirstLessLess: 0
+PenaltyBreakString: 10
+PenaltyExcessCharacter: 100
+PenaltyReturnTypeOnItsOwnLine: 60
+
+PointerAlignment: Right
+ReflowComments: false
+SortIncludes: false
+SortUsingDeclarations: false
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInContainerLiterals: false
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp03
+TabWidth: 4
+UseTab: Never
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..538cddb
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,68 @@
+# Prerequisites
+*.d
+
+# Object files
+*.o
+*.ko
+*.obj
+kernel8.*
+*.img
+build/
+
+# Linker output
+*.ilk
+*.map
+*.exp
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Libraries
+*.lib
+*.a
+*.la
+*.lo
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
+*.i*86
+*.x86_64
+*.hex
+
+# Debug files
+*.dSYM/
+*.su
+*.idb
+*.pdb
+
+# Kernel Module Compile Results
+*.mod*
+*.cmd
+.tmp_versions/
+modules.order
+Module.symvers
+Mkfile.old
+dkms.conf
+
+# Local files
+.gdb_history
+.vscode/
+.cache/
+.mypy_cache/
+
+# CMake output
+compile_flags.txt
+compile_commands.json
+CMakeFiles/
+cmake_install.cmake
+CMakeCache.txt
+Makefile
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..f80450c
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "musl"]
+	path = musl
+	url = git://git.musl-libc.org/musl
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100755
index 0000000..8e8e304
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,77 @@
+cmake_minimum_required(VERSION 3.16)
+
+project(rpi-os VERSION 0.1.0 LANGUAGES C ASM)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS True)
+
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
+    set(aarch64_prefix "")
+    set(aarch64_gdb "gdb")
+else()
+    set(aarch64_prefix "aarch64-linux-gnu-")
+    set(aarch64_gdb "gdb-multiarch")
+endif()
+
+set(aarch64_gcc "${aarch64_prefix}gcc")
+set(aarch64_ld "${aarch64_prefix}ld")
+set(aarch64_objdump "${aarch64_prefix}objdump")
+set(aarch64_objcopy "${aarch64_prefix}objcopy")
+
+set(aarch64_qemu "qemu-system-aarch64")
+
+add_subdirectory(src)
+add_subdirectory(boot)
+
+get_property(kernel_elf GLOBAL PROPERTY kernel_elf_path)
+get_property(kernel_image GLOBAL PROPERTY kernel_image_path)
+get_property(sd_image GLOBAL PROPERTY sd_image_path)
+
+set(qemu_flags
+    -machine virt,gic-version=3
+    -cpu cortex-a72
+    -smp 4
+    -m 4096
+    -nographic
+    -monitor none
+    -serial "mon:stdio"
+    -global virtio-mmio.force-legacy=false
+    -drive file=${sd_image},if=none,format=raw,id=d0
+    -device virtio-blk-device,drive=d0,bus=virtio-mmio-bus.0
+    -kernel "${kernel_elf}")
+
+add_custom_target(qemu
+    COMMAND ${aarch64_qemu} ${qemu_flags} -gdb tcp::1234
+    DEPENDS image)
+add_custom_target(qemu-debug
+    COMMAND ${aarch64_qemu} ${qemu_flags} -gdb tcp::1234 -S
+    DEPENDS image)
+add_custom_target(debug
+    COMMAND ${aarch64_gdb} --nx --quiet
+            -ex "set architecture aarch64"
+            -ex "file ${kernel_elf}"
+            -ex "target remote localhost:1234"
+    DEPENDS kernel)
+add_custom_target(pwn
+    COMMAND pwndbg-dev --nx --quiet
+            -ex "set architecture aarch64"
+            -ex "file ${kernel_elf}"
+            -ex "target remote localhost:1234"
+    DEPENDS kernel)
+
+
+# if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
+# 	add_custom_target(init_libc
+# 	COMMAND git submodule update --init && cd ../libc &&
+# 			./configure)
+# else()
+# 	add_custom_target(init_libc
+# 		COMMAND git submodule update --init && cd ../libc &&
+# 				export CROSS_COMPILE=${aarch64_prefix} &&
+# 				./configure --target=aarch64)
+# endif()
+
+# set(LIBC_SPEC ${CMAKE_CURRENT_SOURCE_DIR}/libc/lib/musl-gcc.specs)
+# set(LIBC_SPEC_OUT musl-gcc.specs)
+# add_custom_target(libc
+# 	COMMAND make -C ../libc -j12 &&
+# 	sed -e \"s/\\/usr\\/local\\/musl/..\\/..\\/..\\/libc/g\" ${LIBC_SPEC} > ${LIBC_SPEC_OUT})
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..f288702
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.
diff --git a/README.md b/README.md
index 58c6102..11a9d7c 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ Lab repository for OS Fall 2025 at Fudan University
 
 这是复旦大学 2025 年秋季学期《操作系统（H）》课程的配套实验内容。我们将建立一个基于 ARM 架构的简易教学操作系统。
 
-[实验文档](https://osh.fducslg.com)
+[实验文档](https://osh2025.fducslg.com/)
 
 暂定的实验内容将包括：
 
diff --git a/boot/CMakeLists.txt b/boot/CMakeLists.txt
new file mode 100644
index 0000000..8705e14
--- /dev/null
+++ b/boot/CMakeLists.txt
@@ -0,0 +1,36 @@
+get_property(kernel_image GLOBAL PROPERTY kernel_image_path)
+
+set(boot_files
+    "${kernel_image}"
+    "armstub8-rpi4.bin"
+    "bootcode.bin"
+    "config.txt"
+    "COPYING.linux"
+    "fixup_cd.dat"
+    "fixup.dat"
+    "fixup4.dat"
+    "fixup4cd.dat"
+    "LICENCE.broadcom"
+    "start_cd.elf"
+    "start.elf"
+    "start4.elf"
+    "start4cd.elf")
+
+# set(user_files "init" 
+# "cat"
+# "sh"
+# "echo"
+# "ls"
+# "mkfs"
+# "mkdir")
+
+add_custom_command(
+    OUTPUT sd.img
+    BYPRODUCTS boot.img
+    COMMAND ./generate-image.py ${CMAKE_CURRENT_BINARY_DIR} ${boot_files} # ${user_files}
+    DEPENDS kernel generate-image.py ${boot_files} # user_bin
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+
+add_custom_target(image ALL DEPENDS sd.img)
+
+set_property(GLOBAL PROPERTY sd_image_path ${CMAKE_CURRENT_BINARY_DIR}/sd.img)
diff --git a/boot/COPYING.linux b/boot/COPYING.linux
new file mode 100644
index 0000000..ca442d3
--- /dev/null
+++ b/boot/COPYING.linux
@@ -0,0 +1,356 @@
+
+   NOTE! This copyright does *not* cover user programs that use kernel
+ services by normal system calls - this is merely considered normal use
+ of the kernel, and does *not* fall under the heading of "derived work".
+ Also note that the GPL below is copyrighted by the Free Software
+ Foundation, but the instance of code that it refers to (the Linux
+ kernel) is copyrighted by me and others who actually wrote it.
+
+ Also note that the only valid version of the GPL as far as the kernel
+ is concerned is _this_ particular version of the license (ie v2, not
+ v2.2 or v3.x or whatever), unless explicitly otherwise stated.
+
+			Linus Torvalds
+
+----------------------------------------
+
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                       51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/boot/LICENCE.broadcom b/boot/LICENCE.broadcom
new file mode 100644
index 0000000..d5793b5
--- /dev/null
+++ b/boot/LICENCE.broadcom
@@ -0,0 +1,31 @@
+Copyright (c) 2006, Broadcom Corporation.
+Copyright (c) 2015, Raspberry Pi (Trading) Ltd
+All rights reserved.
+
+Redistribution.  Redistribution and use in binary form, without
+modification, are permitted provided that the following conditions are
+met:
+
+* This software may only be used for the purposes of developing for, 
+  running or using a Raspberry Pi device, or authorised derivative
+  device manufactured via the element14 Raspberry Pi Customization Service
+* Redistributions must reproduce the above copyright notice and the
+  following disclaimer in the documentation and/or other materials
+  provided with the distribution.
+* Neither the name of Broadcom Corporation nor the names of its suppliers
+  may be used to endorse or promote products derived from this software
+  without specific prior written permission.
+
+DISCLAIMER.  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
+BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+
diff --git a/boot/README.md b/boot/README.md
new file mode 100644
index 0000000..43aa0ea
--- /dev/null
+++ b/boot/README.md
@@ -0,0 +1,17 @@
+This folder contains necessary files to boot rpi-os up. They are downloaded from <https://github.com/raspberrypi/firmware/tree/8c7c52466505df5d420a5cb9131ec29205bcecf8/boot>.
+
+`armstub8-rpi4.bin` is compiled from `armstub8.S` by following make rules:
+
+```makefile
+%8-rpi4.o: %8.S
+	$(CC) -DBCM2711=1 -c $< -o $@
+
+%8-rpi4.elf: %8-rpi4.o
+	$(LD) --section-start=.text=0 $< -o $@
+
+%8-rpi4.tmp: %8-rpi4.elf
+	$(OBJCOPY) $< -O binary $@
+
+%8-rpi4.bin: %8-rpi4.tmp
+	dd if=$< ibs=256 of=$@ conv=sync
+```
diff --git a/boot/armstub8-rpi4.bin b/boot/armstub8-rpi4.bin
new file mode 100644
index 0000000..2635844
Binary files /dev/null and b/boot/armstub8-rpi4.bin differ
diff --git a/boot/armstub8.S b/boot/armstub8.S
new file mode 100644
index 0000000..7c5b100
--- /dev/null
+++ b/boot/armstub8.S
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2016-2019 Raspberry Pi (Trading) Ltd.
+ * Copyright (c) 2016 Stephen Warren <swarren@wwwdotorg.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the names of its contributors
+ *   may be used to endorse or promote products derived from this software
+ *   without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define BIT(x) (1 << (x))
+
+#if BCM2711
+#ifdef HIGH_PERI
+#define LOCAL_CONTROL		0x4c0000000
+#define LOCAL_PRESCALER		0x4c0000008
+#else
+#define LOCAL_CONTROL		0xff800000
+#define LOCAL_PRESCALER		0xff800008
+#endif
+#else
+#define LOCAL_CONTROL		0x40000000
+#define LOCAL_PRESCALER		0x40000008
+#endif
+
+#ifdef HIGH_PERI
+#define GIC_DISTB		0x4c0041000
+#define GIC_CPUB		0x4c0042000
+#else
+#define GIC_DISTB		0xff841000
+#define GIC_CPUB		0xff842000
+#endif
+
+#if BCM2711
+#define OSC_FREQ		54000000
+#else
+#define OSC_FREQ		19200000
+#endif
+
+#define SCR_RW			BIT(10)
+#define SCR_HCE			BIT(8)
+#define SCR_SMD			BIT(7)
+#define SCR_RES1_5		BIT(5)
+#define SCR_RES1_4		BIT(4)
+#define SCR_NS			BIT(0)
+#define SCR_VAL \
+    (SCR_RW | SCR_HCE | SCR_SMD | SCR_RES1_5 | SCR_RES1_4 | SCR_NS)
+
+#define ACTLR_VAL \
+	(BIT(0) | BIT(1) | BIT(4) | BIT(5) | BIT(6))
+
+#define CPUECTLR_EL1		S3_1_C15_C2_1
+#define CPUECTLR_EL1_SMPEN	BIT(6)
+
+#define SPSR_EL3_D		BIT(9)
+#define SPSR_EL3_A		BIT(8)
+#define SPSR_EL3_I		BIT(7)
+#define SPSR_EL3_F		BIT(6)
+#define SPSR_EL3_MODE_EL2H	9
+#define SPSR_EL3_VAL \
+    (SPSR_EL3_D | SPSR_EL3_A | SPSR_EL3_I | SPSR_EL3_F | SPSR_EL3_MODE_EL2H)
+
+#define L2CTLR_EL1		S3_1_C11_C0_2
+
+
+#define GICC_CTRLR	0x0
+#define GICC_PMR	0x4
+#define IT_NR		0x8	// Number of interrupt enable registers (256 total irqs)
+#define GICD_CTRLR	0x0
+#define GICD_IGROUPR	0x80
+
+.globl _start
+_start:
+	/*
+	 * LOCAL_CONTROL:
+	 * Bit 9 clear: Increment by 1 (vs. 2).
+	 * Bit 8 clear: Timer source is 19.2MHz crystal (vs. APB).
+	 */
+	ldr x0, =LOCAL_CONTROL
+	str wzr, [x0]
+	/* LOCAL_PRESCALER; divide-by (0x80000000 / register_val) == 1 */
+	mov w1, 0x80000000
+	str w1, [x0, #(LOCAL_PRESCALER - LOCAL_CONTROL)]
+
+	/* Set L2 read/write cache latency to 3 */
+	mrs x0, L2CTLR_EL1
+	mov x1, #0x22
+	orr x0, x0, x1
+	msr L2CTLR_EL1, x0
+
+	/* Set up CNTFRQ_EL0 */
+	ldr x0, =OSC_FREQ
+	msr CNTFRQ_EL0, x0
+
+	/* Set up CNTVOFF_EL2 */
+	msr CNTVOFF_EL2, xzr
+
+	/* Enable FP/SIMD */
+	/* All set bits below are res1; bit 10 (TFP) is set to 0 */
+	mov x0, #0x33ff
+	msr CPTR_EL3, x0
+
+	/* Set up SCR */
+	mov x0, #SCR_VAL
+	msr SCR_EL3, x0
+
+	/* Set up ACTLR */
+	mov x0, #ACTLR_VAL
+	msr ACTLR_EL3, x0
+
+	/* Set SMPEN */
+	mov x0, #CPUECTLR_EL1_SMPEN
+	msr CPUECTLR_EL1, x0
+
+#ifdef GIC
+        bl      setup_gic
+#endif
+	/*
+	 * Set up SCTLR_EL2
+	 * All set bits below are res1. LE, no WXN/I/SA/C/A/M
+	 */
+	ldr x0, =0x30c50830
+	msr SCTLR_EL2, x0
+
+	/* Switch to EL2 */
+	mov x0, #SPSR_EL3_VAL
+	msr spsr_el3, x0
+	adr x0, in_el2
+	msr elr_el3, x0
+	eret
+in_el2:
+
+	mrs x6, MPIDR_EL1
+	and x6, x6, #0x3
+	cbz x6, primary_cpu
+
+	adr x5, spin_cpu0
+secondary_spin:
+	wfe
+	ldr x4, [x5, x6, lsl #3]
+	cbz x4, secondary_spin
+	mov x0, #0
+	b boot_kernel
+
+primary_cpu:
+	ldr w4, kernel_entry32
+	ldr w0, dtb_ptr32
+
+boot_kernel:
+	mov x1, #0
+	mov x2, #0
+	mov x3, #0
+	br x4
+
+.ltorg
+
+.org 0xd8
+.globl spin_cpu0
+spin_cpu0:
+	.quad 0
+.org 0xe0
+.globl spin_cpu1
+spin_cpu1:
+	.quad 0
+.org 0xe8
+.globl spin_cpu2
+spin_cpu2:
+	.quad 0
+.org 0xf0
+.globl spin_cpu3
+spin_cpu3:
+	# Shared with next two symbols/.word
+	# FW clears the next 8 bytes after reading the initial value, leaving
+	# the location suitable for use as spin_cpu3
+.org 0xf0
+.globl stub_magic
+stub_magic:
+	.word 0x5afe570b
+.org 0xf4
+.globl stub_version
+stub_version:
+	.word 0
+.org 0xf8
+.globl dtb_ptr32
+dtb_ptr32:
+	.word 0x0
+.org 0xfc
+.globl kernel_entry32
+kernel_entry32:
+	.word 0x0
+
+// Leave space for the ATAGS, which are loaded at 0x100
+// See https://www.raspberrypi.org/forums/viewtopic.php?f=72&t=293320
+.org 0x400
+
+#ifdef GIC
+
+setup_gic:				// Called from secure mode - set all interrupts to group 1 and enable.
+	mrs	x0, MPIDR_EL1
+	ldr	x2, =GIC_DISTB
+	tst	x0, #0x3
+	b.ne	2f			// secondary cores
+
+	mov	w0, #3			// Enable group 0 and 1 IRQs from distributor
+	str	w0, [x2, #GICD_CTRLR]
+2:
+	add	x1, x2, #(GIC_CPUB - GIC_DISTB)
+	mov	w0, #0x1e7
+	str	w0, [x1, #GICC_CTRLR]	// Enable group 1 IRQs from CPU interface
+	mov	w0, #0xff
+	str	w0, [x1, #GICC_PMR]	// priority mask
+	add	x2, x2, #GICD_IGROUPR
+	mov	x0, #(IT_NR * 4)
+	mov	w1, #~0			// group 1 all the things
+3:
+	subs	x0, x0, #4
+	str	w1, [x2, x0]
+	b.ne	3b
+	ret
+
+#endif
+
+.globl dtb_space
+dtb_space:
diff --git a/boot/bootcode.bin b/boot/bootcode.bin
new file mode 100644
index 0000000..c7ec95e
Binary files /dev/null and b/boot/bootcode.bin differ
diff --git a/boot/config.txt b/boot/config.txt
new file mode 100644
index 0000000..9a1ce2d
--- /dev/null
+++ b/boot/config.txt
@@ -0,0 +1,9 @@
+arm_64bit=1
+enable_uart=1
+# disable_l2cache=1
+
+[pi4]
+device_tree=
+enable_gic=0
+core_freq_min=250
+armstub=armstub8-rpi4.bin
diff --git a/boot/fixup.dat b/boot/fixup.dat
new file mode 100644
index 0000000..5018e69
Binary files /dev/null and b/boot/fixup.dat differ
diff --git a/boot/fixup4.dat b/boot/fixup4.dat
new file mode 100644
index 0000000..d5fdacd
Binary files /dev/null and b/boot/fixup4.dat differ
diff --git a/boot/fixup4cd.dat b/boot/fixup4cd.dat
new file mode 100644
index 0000000..b55ac8d
Binary files /dev/null and b/boot/fixup4cd.dat differ
diff --git a/boot/fixup_cd.dat b/boot/fixup_cd.dat
new file mode 100644
index 0000000..b55ac8d
Binary files /dev/null and b/boot/fixup_cd.dat differ
diff --git a/boot/generate-image.py b/boot/generate-image.py
new file mode 100755
index 0000000..6870d63
--- /dev/null
+++ b/boot/generate-image.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+
+# TODO: generate filesystem image.
+
+from os import system
+from pathlib import Path
+from argparse import ArgumentParser
+
+def sh(command):
+    print(f'> {command}')
+    assert system(command) == 0
+
+sector_size = 512
+n_sectors = 256 * 1024
+boot_offset = 2048
+n_boot_sectors = 128 * 1024
+filesystem_offset = boot_offset + n_boot_sectors
+n_filesystem_sectors = n_sectors - filesystem_offset
+
+def generate_boot_image(target, files):
+    sh(f'dd if=/dev/zero of={target} seek={n_boot_sectors - 1} bs={sector_size} count=1')
+
+    # "-F 32" specifies FAT32.
+	# "-s 1" specifies one sector per cluster so that we can create a smaller one.
+    sh(f'mkfs.vfat -F 32 -s 1 {target}')
+
+	# copy files into boot partition.
+    for file in files:
+        sh(f'mcopy -i {target} {file} ::{Path(file).name};')
+
+def generate_fs_image(target, files):
+	sh(f'cc ../src/user/mkfs/main.c -o ../build/mkfs -I../src/')
+	file_list=""
+	for file in files:
+		file_list = file_list + "../build/src/user/" + str(file) + ' '
+	print(file_list)
+	sh(f'../build/mkfs {target} {file_list}')
+
+def generate_sd_image(target, boot_image, fs_image):
+    sh(f'dd if=/dev/zero of={target} seek={n_sectors - 1} bs={sector_size} count=1')
+
+    boot_line = f'{boot_offset}, {n_boot_sectors * sector_size // 1024}K, c,'
+    filesystem_line = f'{filesystem_offset}, {n_filesystem_sectors * sector_size // 1024}K, L,'
+    sh(f'printf "{boot_line}\\n{filesystem_line}\\n" | sfdisk {target}')
+
+    sh(f'dd if={boot_image} of={target} seek={boot_offset} conv=notrunc')
+    sh(f'dd if={fs_image} of={target} seek={filesystem_offset} conv=notrunc')
+
+if __name__ == '__main__':
+    parser = ArgumentParser()
+    parser.add_argument('root')
+    parser.add_argument('files', nargs=14)
+    parser.add_argument('user_files', nargs='*')
+
+    args = parser.parse_args()
+
+    boot_image = f'{args.root}/boot.img'
+    sd_image = f'{args.root}/sd.img'
+    fs_image = f'{args.root}/fs.img'
+
+    generate_boot_image(boot_image, args.files)
+    generate_fs_image(fs_image, args.user_files)
+    generate_sd_image(sd_image, boot_image, fs_image)
diff --git a/boot/start.elf b/boot/start.elf
new file mode 100644
index 0000000..3477828
Binary files /dev/null and b/boot/start.elf differ
diff --git a/boot/start4.elf b/boot/start4.elf
new file mode 100644
index 0000000..2a19058
Binary files /dev/null and b/boot/start4.elf differ
diff --git a/boot/start4cd.elf b/boot/start4cd.elf
new file mode 100644
index 0000000..151e8cf
Binary files /dev/null and b/boot/start4cd.elf differ
diff --git a/boot/start_cd.elf b/boot/start_cd.elf
new file mode 100644
index 0000000..7652305
Binary files /dev/null and b/boot/start_cd.elf differ
diff --git a/musl/.gitignore b/musl/.gitignore
new file mode 100644
index 0000000..8043b6b
--- /dev/null
+++ b/musl/.gitignore
@@ -0,0 +1,8 @@
+*.o
+*.lo
+*.a
+*.so
+*.so.1
+config.mak
+lib/musl-gcc.specs
+/obj/
diff --git a/musl/.mailmap b/musl/.mailmap
new file mode 100644
index 0000000..aede9ec
--- /dev/null
+++ b/musl/.mailmap
@@ -0,0 +1 @@
+Ada Worcester <oss@ada.pikhq.com> <josiahw@gmail.com>
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..1471efa
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,69 @@
+set(CMAKE_C_STANDARD 11)
+
+set(CMAKE_C_COMPILER ${aarch64_gcc})
+set(CMAKE_ASM_COMPILER ${aarch64_gcc})
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../musl/obj/include)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../musl/include)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../musl/arch/aarch64)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../musl/arch/generic)
+
+set(compiler_warnings "-Werror -Wall -Wextra")
+set(compiler_flags "${compiler_warnings} \
+    -fno-pie -fno-pic -fno-stack-protector \
+    -fno-zero-initialized-in-bss \
+    -Og -g -static -fno-builtin -nostdlib -nostdinc -ffreestanding -nostartfiles \
+    -Wl,--whole-archive \
+    -mgeneral-regs-only \
+    -MMD -MP \
+    -mlittle-endian -mcmodel=small -mno-outline-atomics \
+    -mcpu=cortex-a72+nofp -mtune=cortex-a72 -DUSE_ARMVIRT -Wno-error=unused-parameter")
+
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${compiler_flags}")
+set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${compiler_flags}")
+
+set(linker_script "${CMAKE_CURRENT_SOURCE_DIR}/linker.ld")
+set(LINK_DEPENDS "${LINK_DEPENDS} ${linker_script}")
+
+# "--build-id=none": remove ".note.gnu.build-id" section.
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} \
+    -T ${linker_script} \
+    -Wl,--build-id=none")
+
+add_subdirectory(aarch64)
+add_subdirectory(common)
+add_subdirectory(kernel)
+add_subdirectory(driver)
+add_subdirectory(user)
+add_subdirectory(test)
+
+set(kernel_name kernel8)
+add_executable(${kernel_name}.elf main.c start.S)
+target_link_libraries(${kernel_name}.elf test kernelx driver common aarch64 user)
+
+set(kernel_prefix "${CMAKE_CURRENT_BINARY_DIR}/${kernel_name}")
+set(kernel_elf "${kernel_prefix}.elf")
+set(kernel_asm "${kernel_prefix}.asm")
+set(kernel_hdr "${kernel_prefix}.hdr")
+set(kernel_img "${kernel_prefix}.img")
+
+add_custom_command(
+    OUTPUT ${kernel_asm}
+    COMMAND ${aarch64_objdump} -S -d ${kernel_elf} > ${kernel_asm}
+    DEPENDS ${kernel_elf})
+
+add_custom_command(
+    OUTPUT ${kernel_hdr}
+    COMMAND ${aarch64_objdump} -x ${kernel_elf} > ${kernel_hdr}
+    DEPENDS ${kernel_elf})
+
+add_custom_command(
+    OUTPUT ${kernel_img}
+    COMMAND ${aarch64_objcopy} -O binary ${kernel_elf} ${kernel_img}
+    DEPENDS ${kernel_elf})
+
+add_custom_target(kernel ALL DEPENDS ${kernel_asm} ${kernel_hdr} ${kernel_img})
+
+set_property(GLOBAL PROPERTY kernel_elf_path ${kernel_elf})
+set_property(GLOBAL PROPERTY kernel_image_path ${kernel_img})
diff --git a/src/aarch64/CMakeLists.txt b/src/aarch64/CMakeLists.txt
new file mode 100644
index 0000000..51675be
--- /dev/null
+++ b/src/aarch64/CMakeLists.txt
@@ -0,0 +1,3 @@
+file(GLOB aarch64_sources CONFIGURE_DEPENDS "*.[Sc]")
+
+add_library(aarch64 STATIC ${aarch64_sources})
diff --git a/src/aarch64/exception_vector.S b/src/aarch64/exception_vector.S
new file mode 100644
index 0000000..d95ea07
--- /dev/null
+++ b/src/aarch64/exception_vector.S
@@ -0,0 +1,33 @@
+#define enter_trap .align 7; b trap_entry
+#define trap_error(type) .align 7; mov x0, #(type); b trap_error_handler
+
+.globl exception_vector
+
+.align 12
+exception_vector:
+el1_sp0:
+    trap_error(0)
+    trap_error(1)
+    trap_error(2)
+    trap_error(3)
+
+el1_spx:
+    /* if you want to disable in-kernel traps, just replace `enter_trap` with `trap_error` */
+    //trap_error(4)
+    //trap_error(5)
+    enter_trap
+    enter_trap
+    trap_error(6)
+    trap_error(7)
+
+el0_aarch64:
+    enter_trap
+    enter_trap
+    trap_error(10)
+    trap_error(11)
+
+el0_aarch32:
+    trap_error(12)
+    trap_error(13)
+    trap_error(14)
+    trap_error(15)
diff --git a/src/aarch64/intrinsic.c b/src/aarch64/intrinsic.c
new file mode 100644
index 0000000..21a469d
--- /dev/null
+++ b/src/aarch64/intrinsic.c
@@ -0,0 +1,19 @@
+#include <aarch64/intrinsic.h>
+
+void delay_us(u64 n)
+{
+    u64 freq = get_clock_frequency();
+    u64 end = get_timestamp(), now;
+    end += freq / 1000000 * n;
+
+    do {
+        now = get_timestamp();
+    } while (now <= end);
+}
+
+void smp_init()
+{
+    psci_cpu_on(1, SECONDARY_CORE_ENTRY);
+    psci_cpu_on(2, SECONDARY_CORE_ENTRY);
+    psci_cpu_on(3, SECONDARY_CORE_ENTRY);
+}
diff --git a/src/aarch64/intrinsic.h b/src/aarch64/intrinsic.h
new file mode 100755
index 0000000..7affd57
--- /dev/null
+++ b/src/aarch64/intrinsic.h
@@ -0,0 +1,333 @@
+#pragma once
+
+#include <common/defines.h>
+
+#define SECONDARY_CORE_ENTRY 0x40000000
+#define PSCI_SYSTEM_OFF 0x84000008
+#define PSCI_SYSTEM_RESET 0x84000009
+#define PSCI_SYSTEM_CPUON 0xC4000003
+
+/**
+ * PSCI (Power State Coordination Interface) function on QEMU's virt platform
+ * -------------------------------------------------------------------------
+ * This function provides an interface to interact with the PSCI (Power State 
+ * Coordination Interface) on ARM architectures, which is particularly useful 
+ * in virtualized environments like QEMU's virt platform.
+ *
+ * Background:
+ * PSCI is an ARM-defined interface that allows software running at the highest 
+ * privilege level (typically a hypervisor or OS kernel) to manage power states 
+ * of CPUs. It includes operations to turn CPUs on or off, put them into a low 
+ * power state, or reset them.
+ *
+ * In a virtualized environment, such as when using QEMU with the virt machine 
+ * type, the PSCI interface can be used to control the power states of virtual
+ * CPUs (vCPUs). This is essential for operations like starting a secondary
+ * vCPU or putting a vCPU into a suspend state.
+ */
+static ALWAYS_INLINE u64 psci_fn(u64 id, u64 arg1, u64 arg2, u64 arg3)
+{
+    u64 result;
+
+    asm volatile("mov x0, %1\n"
+                 "mov x1, %2\n"
+                 "mov x2, %3\n"
+                 "mov x3, %4\n"
+                 "hvc #0\n"
+                 "mov %0, x0\n"
+                 : "=r"(result)
+                 : "r"(id), "r"(arg1), "r"(arg2), "r"(arg3)
+                 : "x0", "x1", "x2", "x3");
+
+    return result;
+}
+
+static ALWAYS_INLINE u64 psci_cpu_on(u64 cpuid, u64 ep)
+{
+    return psci_fn(PSCI_SYSTEM_CPUON, cpuid, ep, 0);
+}
+
+static WARN_RESULT ALWAYS_INLINE usize cpuid()
+{
+    u64 id;
+    asm volatile("mrs %[x], mpidr_el1" : [x] "=r"(id));
+    return id & 0xff;
+}
+
+/* Instruct compiler not to reorder instructions around the fence. */
+static ALWAYS_INLINE void compiler_fence()
+{
+    asm volatile("" ::: "memory");
+}
+
+static WARN_RESULT ALWAYS_INLINE u64 get_clock_frequency()
+{
+    u64 result;
+    asm volatile("mrs %[freq], cntfrq_el0" : [freq] "=r"(result));
+    return result;
+}
+
+static WARN_RESULT ALWAYS_INLINE u64 get_timestamp()
+{
+    u64 result;
+    compiler_fence();
+    asm volatile("mrs %[cnt], cntpct_el0" : [cnt] "=r"(result));
+    compiler_fence();
+    return result;
+}
+
+/* Instruction synchronization barrier. */
+static ALWAYS_INLINE void arch_isb()
+{
+    asm volatile("isb" ::: "memory");
+}
+
+/* Data synchronization barrier. */
+static ALWAYS_INLINE void arch_dsb_sy()
+{
+    asm volatile("dsb sy" ::: "memory");
+}
+
+static ALWAYS_INLINE void arch_fence()
+{
+    arch_dsb_sy();
+    arch_isb();
+}
+
+/**
+ * The `device_get/put_*` functions do not require protection using
+ * architectural barriers. This is because they are specifically
+ * designed to access device memory regions, which are already marked as
+ * nGnRnE (Non-Gathering, Non-Reordering, on-Early Write Acknowledgement)
+ * in the `kernel_pt_level0`.
+ */
+static ALWAYS_INLINE void device_put_u32(u64 addr, u32 value)
+{
+    compiler_fence();
+    *(volatile u32 *)addr = value;
+    compiler_fence();
+}
+
+static WARN_RESULT ALWAYS_INLINE u32 device_get_u32(u64 addr)
+{
+    compiler_fence();
+    u32 value = *(volatile u32 *)addr;
+    compiler_fence();
+    return value;
+}
+
+/* Read Exception Syndrome Register (EL1). */
+static WARN_RESULT ALWAYS_INLINE u64 arch_get_esr()
+{
+    u64 result;
+    arch_fence();
+    asm volatile("mrs %[x], esr_el1" : [x] "=r"(result));
+    arch_fence();
+    return result;
+}
+
+/* Reset Exception Syndrome Register (EL1) to zero. */
+static ALWAYS_INLINE void arch_reset_esr()
+{
+    arch_fence();
+    asm volatile("msr esr_el1, %[x]" : : [x] "r"(0ll));
+    arch_fence();
+}
+
+/* Read Exception Link Register (EL1). */
+static WARN_RESULT ALWAYS_INLINE u64 arch_get_elr()
+{
+    u64 result;
+    arch_fence();
+    asm volatile("mrs %[x], elr_el1" : [x] "=r"(result));
+    arch_fence();
+    return result;
+}
+
+/* Set vector base (virtual) address register (EL1). */
+static ALWAYS_INLINE void arch_set_vbar(void *ptr)
+{
+    arch_fence();
+    asm volatile("msr vbar_el1, %[x]" : : [x] "r"(ptr));
+    arch_fence();
+}
+
+/* Flush TLB entries. */
+static ALWAYS_INLINE void arch_tlbi_vmalle1is()
+{
+    arch_fence();
+    asm volatile("tlbi vmalle1is");
+    arch_fence();
+}
+
+/* Set Translation Table Base Register 0 (EL1). */
+static ALWAYS_INLINE void arch_set_ttbr0(u64 addr)
+{
+    arch_fence();
+    asm volatile("msr ttbr0_el1, %[x]" : : [x] "r"(addr));
+    arch_tlbi_vmalle1is();
+}
+
+/* Get Translation Table Base Register 0 (EL1). */
+static inline WARN_RESULT u64 arch_get_ttbr0()
+{
+    u64 result;
+    arch_fence();
+    asm volatile("mrs %[x], ttbr0_el1" : [x] "=r"(result));
+    arch_fence();
+    return result;
+}
+
+/* Set Translation Table Base Register 1 (EL1). */
+static ALWAYS_INLINE void arch_set_ttbr1(u64 addr)
+{
+    arch_fence();
+    asm volatile("msr ttbr1_el1, %[x]" : : [x] "r"(addr));
+    arch_tlbi_vmalle1is();
+}
+
+/* Read Fault Address Register. */
+static inline u64 arch_get_far()
+{
+    u64 result;
+    arch_fence();
+    asm volatile("mrs %[x], far_el1" : [x] "=r"(result));
+    arch_fence();
+    return result;
+}
+
+static inline WARN_RESULT u64 arch_get_tid()
+{
+    u64 tid;
+    asm volatile("mrs %[x], tpidr_el1" : [x] "=r"(tid));
+    return tid;
+}
+
+static inline void arch_set_tid(u64 tid)
+{
+    arch_fence();
+    asm volatile("msr tpidr_el1, %[x]" : : [x] "r"(tid));
+    arch_fence();
+}
+
+/* Get User Stack Pointer. */
+static inline WARN_RESULT u64 arch_get_usp()
+{
+    u64 usp;
+    arch_fence();
+    asm volatile("mrs %[x], sp_el0" : [x] "=r"(usp));
+    arch_fence();
+    return usp;
+}
+
+/* Set User Stack Pointer. */
+static inline void arch_set_usp(u64 usp)
+{
+    arch_fence();
+    asm volatile("msr sp_el0, %[x]" : : [x] "r"(usp));
+    arch_fence();
+}
+
+static inline WARN_RESULT u64 arch_get_tid0()
+{
+    u64 tid;
+    asm volatile("mrs %[x], tpidr_el0" : [x] "=r"(tid));
+    return tid;
+}
+
+static inline void arch_set_tid0(u64 tid)
+{
+    arch_fence();
+    asm volatile("msr tpidr_el0, %[x]" : : [x] "r"(tid));
+    arch_fence();
+}
+
+static ALWAYS_INLINE void arch_sev()
+{
+    asm volatile("sev" ::: "memory");
+}
+
+static ALWAYS_INLINE void arch_wfe()
+{
+    asm volatile("wfe" ::: "memory");
+}
+
+static ALWAYS_INLINE void arch_wfi()
+{
+    asm volatile("wfi" ::: "memory");
+}
+
+static ALWAYS_INLINE void arch_yield()
+{
+    asm volatile("yield" ::: "memory");
+}
+
+static ALWAYS_INLINE u64 get_cntv_ctl_el0()
+{
+    u64 c;
+    asm volatile("mrs %0, cntv_ctl_el0" : "=r"(c));
+    return c;
+}
+
+static ALWAYS_INLINE void set_cntv_ctl_el0(u64 c)
+{
+    asm volatile("msr cntv_ctl_el0, %0" : : "r"(c));
+}
+
+static ALWAYS_INLINE void set_cntv_tval_el0(u64 t)
+{
+    asm volatile("msr cntv_tval_el0, %0" : : "r"(t));
+}
+
+static inline WARN_RESULT bool _arch_enable_trap()
+{
+    u64 t;
+    asm volatile("mrs %[x], daif" : [x] "=r"(t));
+    if (t == 0)
+        return true;
+    asm volatile("msr daif, %[x]" ::[x] "r"(0ll));
+    return false;
+}
+
+static inline WARN_RESULT bool _arch_disable_trap()
+{
+    u64 t;
+    asm volatile("mrs %[x], daif" : [x] "=r"(t));
+    if (t != 0)
+        return false;
+    asm volatile("msr daif, %[x]" ::[x] "r"(0xfll << 6));
+    return true;
+}
+
+#define arch_with_trap                                          \
+    for (int __t_e = _arch_enable_trap(), __t_i = 0; __t_i < 1; \
+         __t_i++, __t_e || _arch_disable_trap())
+
+static ALWAYS_INLINE NO_RETURN void arch_stop_cpu()
+{
+    while (1)
+        arch_wfe();
+}
+
+#define set_return_addr(addr)                                       \
+    (compiler_fence(),                                              \
+     ((volatile u64 *)__builtin_frame_address(0))[1] = (u64)(addr), \
+     compiler_fence())
+
+void delay_us(u64 n);
+u64 psci_cpu_on(u64 cpuid, u64 ep);
+void smp_init();
+
+static inline u64 arch_get_currentel()
+{
+    u64 result;
+    asm volatile("mrs %[x], CurrentEL" : [x] "=r"(result));
+    return result; // bits[3:2] indicate EL (e.g., 0x4 for EL1, 0x0 for EL0)
+}
+
+static inline u64 arch_get_sctlr()
+{
+    u64 result;
+    asm volatile("mrs %[x], sctlr_el1" : [x] "=r"(result));
+    return result;
+}
\ No newline at end of file
diff --git a/src/aarch64/kernel_pt.c b/src/aarch64/kernel_pt.c
new file mode 100644
index 0000000..4e164a9
--- /dev/null
+++ b/src/aarch64/kernel_pt.c
@@ -0,0 +1,394 @@
+#include <aarch64/mmu.h>
+
+/**
+ * The layout of physical memory space of virt:
+ * 
+ * 0..128MB is space for a flash device so we can run bootrom code such as UEFI.
+ * 128MB..256MB is used for miscellaneous device I/O.
+ * 256MB..1GB is reserved for possible future PCI support (ie where the
+ * PCI memory window will go if we add a PCI host controller).
+ * 1GB and up is RAM (which may happily spill over into the
+ * high memory region beyond 4GB).
+ * This represents a compromise between how much RAM can be given to
+ * a 32 bit VM and leaving space for expansion and in particular for PCI.
+ * Note that devices should generally be placed at multiples of 0x10000,
+ * to accommodate guests using 64K pages.
+ */
+
+/**
+ * Bits used to index: 29:21
+ * Size per entry: 2MB
+ * Address Span: [0x0, 0x40000000 (1GB)]
+ */
+__attribute__((__aligned__(PAGE_SIZE))) PTEntries _kernel_pt_lv2_dev = {
+    // Space up to 0x8000000 is reserved for a boot ROM
+    0x0 & ~PTE_VALID,
+    0x200000 & ~PTE_VALID,
+    0x400000 & ~PTE_VALID,
+    0x600000 & ~PTE_VALID,
+    0x800000 & ~PTE_VALID,
+    0xa00000 & ~PTE_VALID,
+    0xc00000 & ~PTE_VALID,
+    0xe00000 & ~PTE_VALID,
+    0x1000000 & ~PTE_VALID,
+    0x1200000 & ~PTE_VALID,
+    0x1400000 & ~PTE_VALID,
+    0x1600000 & ~PTE_VALID,
+    0x1800000 & ~PTE_VALID,
+    0x1a00000 & ~PTE_VALID,
+    0x1c00000 & ~PTE_VALID,
+    0x1e00000 & ~PTE_VALID,
+    0x2000000 & ~PTE_VALID,
+    0x2200000 & ~PTE_VALID,
+    0x2400000 & ~PTE_VALID,
+    0x2600000 & ~PTE_VALID,
+    0x2800000 & ~PTE_VALID,
+    0x2a00000 & ~PTE_VALID,
+    0x2c00000 & ~PTE_VALID,
+    0x2e00000 & ~PTE_VALID,
+    0x3000000 & ~PTE_VALID,
+    0x3200000 & ~PTE_VALID,
+    0x3400000 & ~PTE_VALID,
+    0x3600000 & ~PTE_VALID,
+    0x3800000 & ~PTE_VALID,
+    0x3a00000 & ~PTE_VALID,
+    0x3c00000 & ~PTE_VALID,
+    0x3e00000 & ~PTE_VALID,
+    0x4000000 & ~PTE_VALID,
+    0x4200000 & ~PTE_VALID,
+    0x4400000 & ~PTE_VALID,
+    0x4600000 & ~PTE_VALID,
+    0x4800000 & ~PTE_VALID,
+    0x4a00000 & ~PTE_VALID,
+    0x4c00000 & ~PTE_VALID,
+    0x4e00000 & ~PTE_VALID,
+    0x5000000 & ~PTE_VALID,
+    0x5200000 & ~PTE_VALID,
+    0x5400000 & ~PTE_VALID,
+    0x5600000 & ~PTE_VALID,
+    0x5800000 & ~PTE_VALID,
+    0x5a00000 & ~PTE_VALID,
+    0x5c00000 & ~PTE_VALID,
+    0x5e00000 & ~PTE_VALID,
+    0x6000000 & ~PTE_VALID,
+    0x6200000 & ~PTE_VALID,
+    0x6400000 & ~PTE_VALID,
+    0x6600000 & ~PTE_VALID,
+    0x6800000 & ~PTE_VALID,
+    0x6a00000 & ~PTE_VALID,
+    0x6c00000 & ~PTE_VALID,
+    0x6e00000 & ~PTE_VALID,
+    0x7000000 & ~PTE_VALID,
+    0x7200000 & ~PTE_VALID,
+    0x7400000 & ~PTE_VALID,
+    0x7600000 & ~PTE_VALID,
+    0x7800000 & ~PTE_VALID,
+    0x7a00000 & ~PTE_VALID,
+    0x7c00000 & ~PTE_VALID,
+    0x7e00000 & ~PTE_VALID,
+
+    // GIC
+    0x8000000 | PTE_KERNEL_DEVICE,
+    0x8200000 | PTE_KERNEL_DEVICE,
+    0x8400000 | PTE_KERNEL_DEVICE,
+    0x8600000 | PTE_KERNEL_DEVICE,
+    0x8800000 | PTE_KERNEL_DEVICE,
+    0x8a00000 | PTE_KERNEL_DEVICE,
+    0x8c00000 | PTE_KERNEL_DEVICE,
+    0x8e00000 | PTE_KERNEL_DEVICE,
+
+    // UART0
+    0x9000000 | PTE_KERNEL_DEVICE,
+    0x9200000 | PTE_KERNEL_DEVICE,
+    0x9400000 | PTE_KERNEL_DEVICE,
+    0x9600000 | PTE_KERNEL_DEVICE,
+    0x9800000 | PTE_KERNEL_DEVICE,
+    0x9a00000 | PTE_KERNEL_DEVICE,
+    0x9c00000 | PTE_KERNEL_DEVICE,
+    0x9e00000 | PTE_KERNEL_DEVICE,
+
+    // VIRTIO
+    0xa000000 | PTE_KERNEL_DEVICE,
+    0xa200000 | PTE_KERNEL_DEVICE,
+    0xa400000 | PTE_KERNEL_DEVICE,
+    0xa600000 | PTE_KERNEL_DEVICE,
+    0xa800000 | PTE_KERNEL_DEVICE,
+    0xaa00000 | PTE_KERNEL_DEVICE,
+    0xac00000 | PTE_KERNEL_DEVICE,
+    0xae00000 | PTE_KERNEL_DEVICE,
+};
+
+/**
+ * Bits used to index: 29:21
+ * Size per entry: 2MB
+ * Address Span: [0x40000000 (1GB), 0x80000000 (2GB)]
+ */
+__attribute__((__aligned__(PAGE_SIZE))) PTEntries _kernel_pt_lv2_ram = {
+    0x40000000 | PTE_KERNEL_DATA, 0x40200000 | PTE_KERNEL_DATA,
+    0x40400000 | PTE_KERNEL_DATA, 0x40600000 | PTE_KERNEL_DATA,
+    0x40800000 | PTE_KERNEL_DATA, 0x40a00000 | PTE_KERNEL_DATA,
+    0x40c00000 | PTE_KERNEL_DATA, 0x40e00000 | PTE_KERNEL_DATA,
+    0x41000000 | PTE_KERNEL_DATA, 0x41200000 | PTE_KERNEL_DATA,
+    0x41400000 | PTE_KERNEL_DATA, 0x41600000 | PTE_KERNEL_DATA,
+    0x41800000 | PTE_KERNEL_DATA, 0x41a00000 | PTE_KERNEL_DATA,
+    0x41c00000 | PTE_KERNEL_DATA, 0x41e00000 | PTE_KERNEL_DATA,
+    0x42000000 | PTE_KERNEL_DATA, 0x42200000 | PTE_KERNEL_DATA,
+    0x42400000 | PTE_KERNEL_DATA, 0x42600000 | PTE_KERNEL_DATA,
+    0x42800000 | PTE_KERNEL_DATA, 0x42a00000 | PTE_KERNEL_DATA,
+    0x42c00000 | PTE_KERNEL_DATA, 0x42e00000 | PTE_KERNEL_DATA,
+    0x43000000 | PTE_KERNEL_DATA, 0x43200000 | PTE_KERNEL_DATA,
+    0x43400000 | PTE_KERNEL_DATA, 0x43600000 | PTE_KERNEL_DATA,
+    0x43800000 | PTE_KERNEL_DATA, 0x43a00000 | PTE_KERNEL_DATA,
+    0x43c00000 | PTE_KERNEL_DATA, 0x43e00000 | PTE_KERNEL_DATA,
+    0x44000000 | PTE_KERNEL_DATA, 0x44200000 | PTE_KERNEL_DATA,
+    0x44400000 | PTE_KERNEL_DATA, 0x44600000 | PTE_KERNEL_DATA,
+    0x44800000 | PTE_KERNEL_DATA, 0x44a00000 | PTE_KERNEL_DATA,
+    0x44c00000 | PTE_KERNEL_DATA, 0x44e00000 | PTE_KERNEL_DATA,
+    0x45000000 | PTE_KERNEL_DATA, 0x45200000 | PTE_KERNEL_DATA,
+    0x45400000 | PTE_KERNEL_DATA, 0x45600000 | PTE_KERNEL_DATA,
+    0x45800000 | PTE_KERNEL_DATA, 0x45a00000 | PTE_KERNEL_DATA,
+    0x45c00000 | PTE_KERNEL_DATA, 0x45e00000 | PTE_KERNEL_DATA,
+    0x46000000 | PTE_KERNEL_DATA, 0x46200000 | PTE_KERNEL_DATA,
+    0x46400000 | PTE_KERNEL_DATA, 0x46600000 | PTE_KERNEL_DATA,
+    0x46800000 | PTE_KERNEL_DATA, 0x46a00000 | PTE_KERNEL_DATA,
+    0x46c00000 | PTE_KERNEL_DATA, 0x46e00000 | PTE_KERNEL_DATA,
+    0x47000000 | PTE_KERNEL_DATA, 0x47200000 | PTE_KERNEL_DATA,
+    0x47400000 | PTE_KERNEL_DATA, 0x47600000 | PTE_KERNEL_DATA,
+    0x47800000 | PTE_KERNEL_DATA, 0x47a00000 | PTE_KERNEL_DATA,
+    0x47c00000 | PTE_KERNEL_DATA, 0x47e00000 | PTE_KERNEL_DATA,
+    0x48000000 | PTE_KERNEL_DATA, 0x48200000 | PTE_KERNEL_DATA,
+    0x48400000 | PTE_KERNEL_DATA, 0x48600000 | PTE_KERNEL_DATA,
+    0x48800000 | PTE_KERNEL_DATA, 0x48a00000 | PTE_KERNEL_DATA,
+    0x48c00000 | PTE_KERNEL_DATA, 0x48e00000 | PTE_KERNEL_DATA,
+    0x49000000 | PTE_KERNEL_DATA, 0x49200000 | PTE_KERNEL_DATA,
+    0x49400000 | PTE_KERNEL_DATA, 0x49600000 | PTE_KERNEL_DATA,
+    0x49800000 | PTE_KERNEL_DATA, 0x49a00000 | PTE_KERNEL_DATA,
+    0x49c00000 | PTE_KERNEL_DATA, 0x49e00000 | PTE_KERNEL_DATA,
+    0x4a000000 | PTE_KERNEL_DATA, 0x4a200000 | PTE_KERNEL_DATA,
+    0x4a400000 | PTE_KERNEL_DATA, 0x4a600000 | PTE_KERNEL_DATA,
+    0x4a800000 | PTE_KERNEL_DATA, 0x4aa00000 | PTE_KERNEL_DATA,
+    0x4ac00000 | PTE_KERNEL_DATA, 0x4ae00000 | PTE_KERNEL_DATA,
+    0x4b000000 | PTE_KERNEL_DATA, 0x4b200000 | PTE_KERNEL_DATA,
+    0x4b400000 | PTE_KERNEL_DATA, 0x4b600000 | PTE_KERNEL_DATA,
+    0x4b800000 | PTE_KERNEL_DATA, 0x4ba00000 | PTE_KERNEL_DATA,
+    0x4bc00000 | PTE_KERNEL_DATA, 0x4be00000 | PTE_KERNEL_DATA,
+    0x4c000000 | PTE_KERNEL_DATA, 0x4c200000 | PTE_KERNEL_DATA,
+    0x4c400000 | PTE_KERNEL_DATA, 0x4c600000 | PTE_KERNEL_DATA,
+    0x4c800000 | PTE_KERNEL_DATA, 0x4ca00000 | PTE_KERNEL_DATA,
+    0x4cc00000 | PTE_KERNEL_DATA, 0x4ce00000 | PTE_KERNEL_DATA,
+    0x4d000000 | PTE_KERNEL_DATA, 0x4d200000 | PTE_KERNEL_DATA,
+    0x4d400000 | PTE_KERNEL_DATA, 0x4d600000 | PTE_KERNEL_DATA,
+    0x4d800000 | PTE_KERNEL_DATA, 0x4da00000 | PTE_KERNEL_DATA,
+    0x4dc00000 | PTE_KERNEL_DATA, 0x4de00000 | PTE_KERNEL_DATA,
+    0x4e000000 | PTE_KERNEL_DATA, 0x4e200000 | PTE_KERNEL_DATA,
+    0x4e400000 | PTE_KERNEL_DATA, 0x4e600000 | PTE_KERNEL_DATA,
+    0x4e800000 | PTE_KERNEL_DATA, 0x4ea00000 | PTE_KERNEL_DATA,
+    0x4ec00000 | PTE_KERNEL_DATA, 0x4ee00000 | PTE_KERNEL_DATA,
+    0x4f000000 | PTE_KERNEL_DATA, 0x4f200000 | PTE_KERNEL_DATA,
+    0x4f400000 | PTE_KERNEL_DATA, 0x4f600000 | PTE_KERNEL_DATA,
+    0x4f800000 | PTE_KERNEL_DATA, 0x4fa00000 | PTE_KERNEL_DATA,
+    0x4fc00000 | PTE_KERNEL_DATA, 0x4fe00000 | PTE_KERNEL_DATA,
+    0x50000000 | PTE_KERNEL_DATA, 0x50200000 | PTE_KERNEL_DATA,
+    0x50400000 | PTE_KERNEL_DATA, 0x50600000 | PTE_KERNEL_DATA,
+    0x50800000 | PTE_KERNEL_DATA, 0x50a00000 | PTE_KERNEL_DATA,
+    0x50c00000 | PTE_KERNEL_DATA, 0x50e00000 | PTE_KERNEL_DATA,
+    0x51000000 | PTE_KERNEL_DATA, 0x51200000 | PTE_KERNEL_DATA,
+    0x51400000 | PTE_KERNEL_DATA, 0x51600000 | PTE_KERNEL_DATA,
+    0x51800000 | PTE_KERNEL_DATA, 0x51a00000 | PTE_KERNEL_DATA,
+    0x51c00000 | PTE_KERNEL_DATA, 0x51e00000 | PTE_KERNEL_DATA,
+    0x52000000 | PTE_KERNEL_DATA, 0x52200000 | PTE_KERNEL_DATA,
+    0x52400000 | PTE_KERNEL_DATA, 0x52600000 | PTE_KERNEL_DATA,
+    0x52800000 | PTE_KERNEL_DATA, 0x52a00000 | PTE_KERNEL_DATA,
+    0x52c00000 | PTE_KERNEL_DATA, 0x52e00000 | PTE_KERNEL_DATA,
+    0x53000000 | PTE_KERNEL_DATA, 0x53200000 | PTE_KERNEL_DATA,
+    0x53400000 | PTE_KERNEL_DATA, 0x53600000 | PTE_KERNEL_DATA,
+    0x53800000 | PTE_KERNEL_DATA, 0x53a00000 | PTE_KERNEL_DATA,
+    0x53c00000 | PTE_KERNEL_DATA, 0x53e00000 | PTE_KERNEL_DATA,
+    0x54000000 | PTE_KERNEL_DATA, 0x54200000 | PTE_KERNEL_DATA,
+    0x54400000 | PTE_KERNEL_DATA, 0x54600000 | PTE_KERNEL_DATA,
+    0x54800000 | PTE_KERNEL_DATA, 0x54a00000 | PTE_KERNEL_DATA,
+    0x54c00000 | PTE_KERNEL_DATA, 0x54e00000 | PTE_KERNEL_DATA,
+    0x55000000 | PTE_KERNEL_DATA, 0x55200000 | PTE_KERNEL_DATA,
+    0x55400000 | PTE_KERNEL_DATA, 0x55600000 | PTE_KERNEL_DATA,
+    0x55800000 | PTE_KERNEL_DATA, 0x55a00000 | PTE_KERNEL_DATA,
+    0x55c00000 | PTE_KERNEL_DATA, 0x55e00000 | PTE_KERNEL_DATA,
+    0x56000000 | PTE_KERNEL_DATA, 0x56200000 | PTE_KERNEL_DATA,
+    0x56400000 | PTE_KERNEL_DATA, 0x56600000 | PTE_KERNEL_DATA,
+    0x56800000 | PTE_KERNEL_DATA, 0x56a00000 | PTE_KERNEL_DATA,
+    0x56c00000 | PTE_KERNEL_DATA, 0x56e00000 | PTE_KERNEL_DATA,
+    0x57000000 | PTE_KERNEL_DATA, 0x57200000 | PTE_KERNEL_DATA,
+    0x57400000 | PTE_KERNEL_DATA, 0x57600000 | PTE_KERNEL_DATA,
+    0x57800000 | PTE_KERNEL_DATA, 0x57a00000 | PTE_KERNEL_DATA,
+    0x57c00000 | PTE_KERNEL_DATA, 0x57e00000 | PTE_KERNEL_DATA,
+    0x58000000 | PTE_KERNEL_DATA, 0x58200000 | PTE_KERNEL_DATA,
+    0x58400000 | PTE_KERNEL_DATA, 0x58600000 | PTE_KERNEL_DATA,
+    0x58800000 | PTE_KERNEL_DATA, 0x58a00000 | PTE_KERNEL_DATA,
+    0x58c00000 | PTE_KERNEL_DATA, 0x58e00000 | PTE_KERNEL_DATA,
+    0x59000000 | PTE_KERNEL_DATA, 0x59200000 | PTE_KERNEL_DATA,
+    0x59400000 | PTE_KERNEL_DATA, 0x59600000 | PTE_KERNEL_DATA,
+    0x59800000 | PTE_KERNEL_DATA, 0x59a00000 | PTE_KERNEL_DATA,
+    0x59c00000 | PTE_KERNEL_DATA, 0x59e00000 | PTE_KERNEL_DATA,
+    0x5a000000 | PTE_KERNEL_DATA, 0x5a200000 | PTE_KERNEL_DATA,
+    0x5a400000 | PTE_KERNEL_DATA, 0x5a600000 | PTE_KERNEL_DATA,
+    0x5a800000 | PTE_KERNEL_DATA, 0x5aa00000 | PTE_KERNEL_DATA,
+    0x5ac00000 | PTE_KERNEL_DATA, 0x5ae00000 | PTE_KERNEL_DATA,
+    0x5b000000 | PTE_KERNEL_DATA, 0x5b200000 | PTE_KERNEL_DATA,
+    0x5b400000 | PTE_KERNEL_DATA, 0x5b600000 | PTE_KERNEL_DATA,
+    0x5b800000 | PTE_KERNEL_DATA, 0x5ba00000 | PTE_KERNEL_DATA,
+    0x5bc00000 | PTE_KERNEL_DATA, 0x5be00000 | PTE_KERNEL_DATA,
+    0x5c000000 | PTE_KERNEL_DATA, 0x5c200000 | PTE_KERNEL_DATA,
+    0x5c400000 | PTE_KERNEL_DATA, 0x5c600000 | PTE_KERNEL_DATA,
+    0x5c800000 | PTE_KERNEL_DATA, 0x5ca00000 | PTE_KERNEL_DATA,
+    0x5cc00000 | PTE_KERNEL_DATA, 0x5ce00000 | PTE_KERNEL_DATA,
+    0x5d000000 | PTE_KERNEL_DATA, 0x5d200000 | PTE_KERNEL_DATA,
+    0x5d400000 | PTE_KERNEL_DATA, 0x5d600000 | PTE_KERNEL_DATA,
+    0x5d800000 | PTE_KERNEL_DATA, 0x5da00000 | PTE_KERNEL_DATA,
+    0x5dc00000 | PTE_KERNEL_DATA, 0x5de00000 | PTE_KERNEL_DATA,
+    0x5e000000 | PTE_KERNEL_DATA, 0x5e200000 | PTE_KERNEL_DATA,
+    0x5e400000 | PTE_KERNEL_DATA, 0x5e600000 | PTE_KERNEL_DATA,
+    0x5e800000 | PTE_KERNEL_DATA, 0x5ea00000 | PTE_KERNEL_DATA,
+    0x5ec00000 | PTE_KERNEL_DATA, 0x5ee00000 | PTE_KERNEL_DATA,
+    0x5f000000 | PTE_KERNEL_DATA, 0x5f200000 | PTE_KERNEL_DATA,
+    0x5f400000 | PTE_KERNEL_DATA, 0x5f600000 | PTE_KERNEL_DATA,
+    0x5f800000 | PTE_KERNEL_DATA, 0x5fa00000 | PTE_KERNEL_DATA,
+    0x5fc00000 | PTE_KERNEL_DATA, 0x5fe00000 | PTE_KERNEL_DATA,
+    0x60000000 | PTE_KERNEL_DATA, 0x60200000 | PTE_KERNEL_DATA,
+    0x60400000 | PTE_KERNEL_DATA, 0x60600000 | PTE_KERNEL_DATA,
+    0x60800000 | PTE_KERNEL_DATA, 0x60a00000 | PTE_KERNEL_DATA,
+    0x60c00000 | PTE_KERNEL_DATA, 0x60e00000 | PTE_KERNEL_DATA,
+    0x61000000 | PTE_KERNEL_DATA, 0x61200000 | PTE_KERNEL_DATA,
+    0x61400000 | PTE_KERNEL_DATA, 0x61600000 | PTE_KERNEL_DATA,
+    0x61800000 | PTE_KERNEL_DATA, 0x61a00000 | PTE_KERNEL_DATA,
+    0x61c00000 | PTE_KERNEL_DATA, 0x61e00000 | PTE_KERNEL_DATA,
+    0x62000000 | PTE_KERNEL_DATA, 0x62200000 | PTE_KERNEL_DATA,
+    0x62400000 | PTE_KERNEL_DATA, 0x62600000 | PTE_KERNEL_DATA,
+    0x62800000 | PTE_KERNEL_DATA, 0x62a00000 | PTE_KERNEL_DATA,
+    0x62c00000 | PTE_KERNEL_DATA, 0x62e00000 | PTE_KERNEL_DATA,
+    0x63000000 | PTE_KERNEL_DATA, 0x63200000 | PTE_KERNEL_DATA,
+    0x63400000 | PTE_KERNEL_DATA, 0x63600000 | PTE_KERNEL_DATA,
+    0x63800000 | PTE_KERNEL_DATA, 0x63a00000 | PTE_KERNEL_DATA,
+    0x63c00000 | PTE_KERNEL_DATA, 0x63e00000 | PTE_KERNEL_DATA,
+    0x64000000 | PTE_KERNEL_DATA, 0x64200000 | PTE_KERNEL_DATA,
+    0x64400000 | PTE_KERNEL_DATA, 0x64600000 | PTE_KERNEL_DATA,
+    0x64800000 | PTE_KERNEL_DATA, 0x64a00000 | PTE_KERNEL_DATA,
+    0x64c00000 | PTE_KERNEL_DATA, 0x64e00000 | PTE_KERNEL_DATA,
+    0x65000000 | PTE_KERNEL_DATA, 0x65200000 | PTE_KERNEL_DATA,
+    0x65400000 | PTE_KERNEL_DATA, 0x65600000 | PTE_KERNEL_DATA,
+    0x65800000 | PTE_KERNEL_DATA, 0x65a00000 | PTE_KERNEL_DATA,
+    0x65c00000 | PTE_KERNEL_DATA, 0x65e00000 | PTE_KERNEL_DATA,
+    0x66000000 | PTE_KERNEL_DATA, 0x66200000 | PTE_KERNEL_DATA,
+    0x66400000 | PTE_KERNEL_DATA, 0x66600000 | PTE_KERNEL_DATA,
+    0x66800000 | PTE_KERNEL_DATA, 0x66a00000 | PTE_KERNEL_DATA,
+    0x66c00000 | PTE_KERNEL_DATA, 0x66e00000 | PTE_KERNEL_DATA,
+    0x67000000 | PTE_KERNEL_DATA, 0x67200000 | PTE_KERNEL_DATA,
+    0x67400000 | PTE_KERNEL_DATA, 0x67600000 | PTE_KERNEL_DATA,
+    0x67800000 | PTE_KERNEL_DATA, 0x67a00000 | PTE_KERNEL_DATA,
+    0x67c00000 | PTE_KERNEL_DATA, 0x67e00000 | PTE_KERNEL_DATA,
+    0x68000000 | PTE_KERNEL_DATA, 0x68200000 | PTE_KERNEL_DATA,
+    0x68400000 | PTE_KERNEL_DATA, 0x68600000 | PTE_KERNEL_DATA,
+    0x68800000 | PTE_KERNEL_DATA, 0x68a00000 | PTE_KERNEL_DATA,
+    0x68c00000 | PTE_KERNEL_DATA, 0x68e00000 | PTE_KERNEL_DATA,
+    0x69000000 | PTE_KERNEL_DATA, 0x69200000 | PTE_KERNEL_DATA,
+    0x69400000 | PTE_KERNEL_DATA, 0x69600000 | PTE_KERNEL_DATA,
+    0x69800000 | PTE_KERNEL_DATA, 0x69a00000 | PTE_KERNEL_DATA,
+    0x69c00000 | PTE_KERNEL_DATA, 0x69e00000 | PTE_KERNEL_DATA,
+    0x6a000000 | PTE_KERNEL_DATA, 0x6a200000 | PTE_KERNEL_DATA,
+    0x6a400000 | PTE_KERNEL_DATA, 0x6a600000 | PTE_KERNEL_DATA,
+    0x6a800000 | PTE_KERNEL_DATA, 0x6aa00000 | PTE_KERNEL_DATA,
+    0x6ac00000 | PTE_KERNEL_DATA, 0x6ae00000 | PTE_KERNEL_DATA,
+    0x6b000000 | PTE_KERNEL_DATA, 0x6b200000 | PTE_KERNEL_DATA,
+    0x6b400000 | PTE_KERNEL_DATA, 0x6b600000 | PTE_KERNEL_DATA,
+    0x6b800000 | PTE_KERNEL_DATA, 0x6ba00000 | PTE_KERNEL_DATA,
+    0x6bc00000 | PTE_KERNEL_DATA, 0x6be00000 | PTE_KERNEL_DATA,
+    0x6c000000 | PTE_KERNEL_DATA, 0x6c200000 | PTE_KERNEL_DATA,
+    0x6c400000 | PTE_KERNEL_DATA, 0x6c600000 | PTE_KERNEL_DATA,
+    0x6c800000 | PTE_KERNEL_DATA, 0x6ca00000 | PTE_KERNEL_DATA,
+    0x6cc00000 | PTE_KERNEL_DATA, 0x6ce00000 | PTE_KERNEL_DATA,
+    0x6d000000 | PTE_KERNEL_DATA, 0x6d200000 | PTE_KERNEL_DATA,
+    0x6d400000 | PTE_KERNEL_DATA, 0x6d600000 | PTE_KERNEL_DATA,
+    0x6d800000 | PTE_KERNEL_DATA, 0x6da00000 | PTE_KERNEL_DATA,
+    0x6dc00000 | PTE_KERNEL_DATA, 0x6de00000 | PTE_KERNEL_DATA,
+    0x6e000000 | PTE_KERNEL_DATA, 0x6e200000 | PTE_KERNEL_DATA,
+    0x6e400000 | PTE_KERNEL_DATA, 0x6e600000 | PTE_KERNEL_DATA,
+    0x6e800000 | PTE_KERNEL_DATA, 0x6ea00000 | PTE_KERNEL_DATA,
+    0x6ec00000 | PTE_KERNEL_DATA, 0x6ee00000 | PTE_KERNEL_DATA,
+    0x6f000000 | PTE_KERNEL_DATA, 0x6f200000 | PTE_KERNEL_DATA,
+    0x6f400000 | PTE_KERNEL_DATA, 0x6f600000 | PTE_KERNEL_DATA,
+    0x6f800000 | PTE_KERNEL_DATA, 0x6fa00000 | PTE_KERNEL_DATA,
+    0x6fc00000 | PTE_KERNEL_DATA, 0x6fe00000 | PTE_KERNEL_DATA,
+    0x70000000 | PTE_KERNEL_DATA, 0x70200000 | PTE_KERNEL_DATA,
+    0x70400000 | PTE_KERNEL_DATA, 0x70600000 | PTE_KERNEL_DATA,
+    0x70800000 | PTE_KERNEL_DATA, 0x70a00000 | PTE_KERNEL_DATA,
+    0x70c00000 | PTE_KERNEL_DATA, 0x70e00000 | PTE_KERNEL_DATA,
+    0x71000000 | PTE_KERNEL_DATA, 0x71200000 | PTE_KERNEL_DATA,
+    0x71400000 | PTE_KERNEL_DATA, 0x71600000 | PTE_KERNEL_DATA,
+    0x71800000 | PTE_KERNEL_DATA, 0x71a00000 | PTE_KERNEL_DATA,
+    0x71c00000 | PTE_KERNEL_DATA, 0x71e00000 | PTE_KERNEL_DATA,
+    0x72000000 | PTE_KERNEL_DATA, 0x72200000 | PTE_KERNEL_DATA,
+    0x72400000 | PTE_KERNEL_DATA, 0x72600000 | PTE_KERNEL_DATA,
+    0x72800000 | PTE_KERNEL_DATA, 0x72a00000 | PTE_KERNEL_DATA,
+    0x72c00000 | PTE_KERNEL_DATA, 0x72e00000 | PTE_KERNEL_DATA,
+    0x73000000 | PTE_KERNEL_DATA, 0x73200000 | PTE_KERNEL_DATA,
+    0x73400000 | PTE_KERNEL_DATA, 0x73600000 | PTE_KERNEL_DATA,
+    0x73800000 | PTE_KERNEL_DATA, 0x73a00000 | PTE_KERNEL_DATA,
+    0x73c00000 | PTE_KERNEL_DATA, 0x73e00000 | PTE_KERNEL_DATA,
+    0x74000000 | PTE_KERNEL_DATA, 0x74200000 | PTE_KERNEL_DATA,
+    0x74400000 | PTE_KERNEL_DATA, 0x74600000 | PTE_KERNEL_DATA,
+    0x74800000 | PTE_KERNEL_DATA, 0x74a00000 | PTE_KERNEL_DATA,
+    0x74c00000 | PTE_KERNEL_DATA, 0x74e00000 | PTE_KERNEL_DATA,
+    0x75000000 | PTE_KERNEL_DATA, 0x75200000 | PTE_KERNEL_DATA,
+    0x75400000 | PTE_KERNEL_DATA, 0x75600000 | PTE_KERNEL_DATA,
+    0x75800000 | PTE_KERNEL_DATA, 0x75a00000 | PTE_KERNEL_DATA,
+    0x75c00000 | PTE_KERNEL_DATA, 0x75e00000 | PTE_KERNEL_DATA,
+    0x76000000 | PTE_KERNEL_DATA, 0x76200000 | PTE_KERNEL_DATA,
+    0x76400000 | PTE_KERNEL_DATA, 0x76600000 | PTE_KERNEL_DATA,
+    0x76800000 | PTE_KERNEL_DATA, 0x76a00000 | PTE_KERNEL_DATA,
+    0x76c00000 | PTE_KERNEL_DATA, 0x76e00000 | PTE_KERNEL_DATA,
+    0x77000000 | PTE_KERNEL_DATA, 0x77200000 | PTE_KERNEL_DATA,
+    0x77400000 | PTE_KERNEL_DATA, 0x77600000 | PTE_KERNEL_DATA,
+    0x77800000 | PTE_KERNEL_DATA, 0x77a00000 | PTE_KERNEL_DATA,
+    0x77c00000 | PTE_KERNEL_DATA, 0x77e00000 | PTE_KERNEL_DATA,
+    0x78000000 | PTE_KERNEL_DATA, 0x78200000 | PTE_KERNEL_DATA,
+    0x78400000 | PTE_KERNEL_DATA, 0x78600000 | PTE_KERNEL_DATA,
+    0x78800000 | PTE_KERNEL_DATA, 0x78a00000 | PTE_KERNEL_DATA,
+    0x78c00000 | PTE_KERNEL_DATA, 0x78e00000 | PTE_KERNEL_DATA,
+    0x79000000 | PTE_KERNEL_DATA, 0x79200000 | PTE_KERNEL_DATA,
+    0x79400000 | PTE_KERNEL_DATA, 0x79600000 | PTE_KERNEL_DATA,
+    0x79800000 | PTE_KERNEL_DATA, 0x79a00000 | PTE_KERNEL_DATA,
+    0x79c00000 | PTE_KERNEL_DATA, 0x79e00000 | PTE_KERNEL_DATA,
+    0x7a000000 | PTE_KERNEL_DATA, 0x7a200000 | PTE_KERNEL_DATA,
+    0x7a400000 | PTE_KERNEL_DATA, 0x7a600000 | PTE_KERNEL_DATA,
+    0x7a800000 | PTE_KERNEL_DATA, 0x7aa00000 | PTE_KERNEL_DATA,
+    0x7ac00000 | PTE_KERNEL_DATA, 0x7ae00000 | PTE_KERNEL_DATA,
+    0x7b000000 | PTE_KERNEL_DATA, 0x7b200000 | PTE_KERNEL_DATA,
+    0x7b400000 | PTE_KERNEL_DATA, 0x7b600000 | PTE_KERNEL_DATA,
+    0x7b800000 | PTE_KERNEL_DATA, 0x7ba00000 | PTE_KERNEL_DATA,
+    0x7bc00000 | PTE_KERNEL_DATA, 0x7be00000 | PTE_KERNEL_DATA,
+    0x7c000000 | PTE_KERNEL_DATA, 0x7c200000 | PTE_KERNEL_DATA,
+    0x7c400000 | PTE_KERNEL_DATA, 0x7c600000 | PTE_KERNEL_DATA,
+    0x7c800000 | PTE_KERNEL_DATA, 0x7ca00000 | PTE_KERNEL_DATA,
+    0x7cc00000 | PTE_KERNEL_DATA, 0x7ce00000 | PTE_KERNEL_DATA,
+    0x7d000000 | PTE_KERNEL_DATA, 0x7d200000 | PTE_KERNEL_DATA,
+    0x7d400000 | PTE_KERNEL_DATA, 0x7d600000 | PTE_KERNEL_DATA,
+    0x7d800000 | PTE_KERNEL_DATA, 0x7da00000 | PTE_KERNEL_DATA,
+    0x7dc00000 | PTE_KERNEL_DATA, 0x7de00000 | PTE_KERNEL_DATA,
+    0x7e000000 | PTE_KERNEL_DATA, 0x7e200000 | PTE_KERNEL_DATA,
+    0x7e400000 | PTE_KERNEL_DATA, 0x7e600000 | PTE_KERNEL_DATA,
+    0x7e800000 | PTE_KERNEL_DATA, 0x7ea00000 | PTE_KERNEL_DATA,
+    0x7ec00000 | PTE_KERNEL_DATA, 0x7ee00000 | PTE_KERNEL_DATA,
+    0x7f000000 | PTE_KERNEL_DATA, 0x7f200000 | PTE_KERNEL_DATA,
+    0x7f400000 | PTE_KERNEL_DATA, 0x7f600000 | PTE_KERNEL_DATA,
+    0x7f800000 | PTE_KERNEL_DATA, 0x7fa00000 | PTE_KERNEL_DATA,
+    0x7fc00000 | PTE_KERNEL_DATA, 0x7fe00000 | PTE_KERNEL_DATA
+};
+
+__attribute__((__aligned__(PAGE_SIZE))) PTEntries _kernel_pt_level1 = {
+    K2P(_kernel_pt_lv2_dev) + PTE_TABLE,
+    K2P(_kernel_pt_lv2_ram) + PTE_TABLE,
+};
+
+__attribute__((__aligned__(PAGE_SIZE))) PTEntries kernel_pt_level0 = {
+    K2P(_kernel_pt_level1) + PTE_TABLE
+};
+
+__attribute__((__aligned__(PAGE_SIZE))) PTEntries invalid_pt = { 0 };
\ No newline at end of file
diff --git a/src/aarch64/mmu.h b/src/aarch64/mmu.h
new file mode 100644
index 0000000..9b78926
--- /dev/null
+++ b/src/aarch64/mmu.h
@@ -0,0 +1,71 @@
+#pragma once
+
+#include <common/defines.h>
+typedef unsigned long long u64;
+#define PAGE_SIZE 4096
+
+/* Memory region attributes */
+#define MT_DEVICE_nGnRnE 0x0
+#define MT_NORMAL 0x1
+#define MT_NORMAL_NC 0x2
+#define MT_DEVICE_nGnRnE_FLAGS 0x00
+#define MT_NORMAL_FLAGS \
+    0xFF /* Inner/Outer Write-Back Non-Transient RW-Allocate */
+#define MT_NORMAL_NC_FLAGS 0x44 /* Inner/Outer Non-Cacheable */
+
+#define SH_OUTER (2 << 8)
+#define SH_INNER (3 << 8)
+
+#define AF_USED (1 << 10)
+
+#define PTE_NORMAL_NC ((MT_NORMAL_NC << 2) | AF_USED | SH_OUTER)
+#define PTE_NORMAL ((MT_NORMAL << 2) | AF_USED | SH_OUTER)
+#define PTE_DEVICE ((MT_DEVICE_nGnRnE << 2) | AF_USED)
+
+#define PTE_VALID 0x1
+
+#define PTE_TABLE 0x3
+#define PTE_BLOCK 0x1
+#define PTE_PAGE 0x3
+
+#define PTE_KERNEL (0 << 6)
+#define PTE_USER (1 << 6)
+#define PTE_RO (1 << 7)
+#define PTE_RW (0 << 7)
+
+#define PTE_KERNEL_DATA (PTE_KERNEL | PTE_NORMAL | PTE_BLOCK)
+#define PTE_KERNEL_DEVICE (PTE_KERNEL | PTE_DEVICE | PTE_BLOCK)
+#define PTE_USER_DATA (PTE_USER | PTE_NORMAL | PTE_PAGE)
+
+#define N_PTE_PER_TABLE 512
+
+#define PTE_HIGH_NX (1LL << 54)
+
+#define KSPACE_MASK 0xFFFF000000000000
+
+// convert kernel address into physical address.
+#define K2P(addr) ((u64)(addr) - (KSPACE_MASK))
+
+// convert physical address into kernel address.
+#define P2K(addr) ((u64)(addr) + (KSPACE_MASK))
+
+// convert any address into kernel address space.
+#define KSPACE(addr) ((u64)(addr) | (KSPACE_MASK))
+
+// conver any address into physical address space.
+#define PSPACE(addr) ((u64)(addr) & (~KSPACE_MASK))
+
+typedef u64 PTEntry;
+typedef PTEntry PTEntries[N_PTE_PER_TABLE];
+typedef PTEntry *PTEntriesPtr;
+
+#define VA_OFFSET(va) ((u64)(va) & 0xFFF)
+#define PTE_ADDRESS(pte) ((pte) & ~0xFFFF000000000FFF)
+#define PTE_FLAGS(pte) ((pte) & 0xFFFF000000000FFF)
+#define P2N(addr) (addr >> 12)
+#define PAGE_BASE(addr) ((u64)addr & ~(PAGE_SIZE - 1))
+
+#define VA_PART0(va) (((u64)(va) & 0xFF8000000000) >> 39)
+#define VA_PART1(va) (((u64)(va) & 0x7FC0000000) >> 30)
+#define VA_PART2(va) (((u64)(va) & 0x3FE00000) >> 21)
+#define VA_PART3(va) (((u64)(va) & 0x1FF000) >> 12)
diff --git a/src/aarch64/swtch.S b/src/aarch64/swtch.S
new file mode 100644
index 0000000..c6a404f
--- /dev/null
+++ b/src/aarch64/swtch.S
@@ -0,0 +1,31 @@
+// Do kernel-mode context switch
+// x0 (first parameter): new context ptr
+// x1 (second parameter): addr to save old context ptr
+
+#define pushp(a, b) stp a, b, [sp, #-0x10]!
+#define popp(a, b) ldp a, b, [sp], #0x10 
+
+.globl swtch
+swtch:
+// TODO: save and restore KernelContext
+// store the last callee-saved registers
+pushp(x28,x29)
+pushp(x26,x27)
+pushp(x24,x25)
+pushp(x22,x23)
+pushp(x20,x21)
+pushp(x1,x19)
+pushp(lr,x0)
+// change stack
+mov x2 , sp
+str x2 , [x1]
+mov sp , x0
+// load the new callee-saved registers
+popp(lr,x0)
+popp(x1,x19)
+popp(x20,x21)
+popp(x22,x23)
+popp(x24,x25)
+popp(x26,x27)
+popp(x28,x29)
+ret
diff --git a/src/aarch64/trap.S b/src/aarch64/trap.S
new file mode 100644
index 0000000..5dfad6f
--- /dev/null
+++ b/src/aarch64/trap.S
@@ -0,0 +1,50 @@
+#define pushp(a, b) stp a, b, [sp, #-0x10]!
+#define popp(a, b) ldp a, b, [sp], #0x10 
+
+/* `exception_vector.S` send all traps here. */
+.global trap_entry
+trap_entry:
+// TODO: save UserContext
+pushp(x16,x17)
+pushp(x14,x15)
+pushp(x12,x13)
+pushp(x10,x11)
+pushp(x8,x9)
+pushp(x6,x7)
+pushp(x4,x5)
+pushp(x2,x3)
+pushp(x0,x1)
+
+mrs x0, spsr_el1
+mrs x1, elr_el1
+pushp(x0,x1)
+
+mrs x0, sp_el0
+mrs x1, sp_el0
+pushp(x0,x1)
+
+mov x0, sp
+bl trap_global_handler
+
+.global trap_return
+trap_return:
+// TODO: restore UserContext
+
+popp(x0,x1)
+msr sp_el0, x0
+msr sp_el0, x1
+
+popp(x0,x1)
+msr spsr_el1, x0
+msr elr_el1, x1
+
+popp(x0,x1)
+popp(x2,x3)
+popp(x4,x5)
+popp(x6,x7)
+popp(x8,x9)
+popp(x10,x11)
+popp(x12,x13)
+popp(x14,x15)
+popp(x16,x17)
+eret
diff --git a/src/aarch64/trap.c b/src/aarch64/trap.c
new file mode 100644
index 0000000..6f83779
--- /dev/null
+++ b/src/aarch64/trap.c
@@ -0,0 +1,71 @@
+#include <aarch64/trap.h>
+#include <aarch64/intrinsic.h>
+#include <kernel/sched.h>
+#include <kernel/printk.h>
+#include <driver/interrupt.h>
+#include <kernel/proc.h>
+#include <kernel/syscall.h>
+
+void trap_global_handler(UserContext *context)
+{
+    //printk("[TRAP] Trap occurred! ELR=0x%llx, ESR=0x%llx\n", context->elr, arch_get_esr());
+    thisproc()->ucontext = context;
+
+    u64 esr = arch_get_esr();
+    u64 ec = esr >> ESR_EC_SHIFT;
+    u64 iss = esr & ESR_ISS_MASK;
+    u64 ir = esr & ESR_IR_MASK;
+
+    (void)iss;
+
+    arch_reset_esr();
+
+    switch (ec) {
+    case ESR_EC_UNKNOWN: {
+        if (ir)
+            PANIC();
+        else
+            interrupt_global_handler();
+    } break;
+    case ESR_EC_SVC64: {
+        syscall_entry(context);
+    } break;
+    case ESR_EC_IABORT_EL0:
+    case ESR_EC_IABORT_EL1:
+    case ESR_EC_DABORT_EL0:
+    case ESR_EC_DABORT_EL1: {
+        printk("[ERROR] Page Fault Exception!\n");
+        u64 far = arch_get_far();
+        u64 elr = arch_get_elr();
+        u64 ttbr0 = arch_get_ttbr0();
+        int pid = thisproc() ? thisproc()->pid : -999;
+        // Decode ISS basic fields (DFSC/IFSC low 6 bits, WnR bit for Data Abort)
+        u64 fsc = iss & 0x3F; // Fault Status Code
+        u64 is_write = (iss >> 6) & 1; // WnR for DAbort
+        printk("[ERROR] pid=%d ec=0x%llx iss=0x%llx fsc=0x%llx wr=%llu FAR=0x%llx ELR=0x%llx TTBR0=0x%llx\n",
+               pid, ec, iss, fsc, is_write, far, elr, ttbr0);
+            // Extra diagnostics: current EL and SCTLR (to infer PAN/behavior)
+            u64 cur_el = arch_get_currentel();
+            u64 sctlr = arch_get_sctlr();
+            printk("[ERROR] CurrentEL=0x%llx SCTLR_EL1=0x%llx\n", cur_el, sctlr);
+        if (thisproc()) {
+            extern void pt_dump_va(struct pgdir*, u64);
+            pt_dump_va(&thisproc()->pgdir, far);
+        }
+        PANIC();
+    } break;
+    default: {
+        printk("[ERROR] Unknwon exception %llu\n", ec);
+        PANIC();
+    }
+    }
+
+    // TODO: stop killed process while returning to user space
+    if (thisproc()->killed == true && ((context->elr) & 0xffff000000000000) == 0) exit(-1);
+}
+
+NO_RETURN void trap_error_handler(u64 type)
+{
+    printk("[ERROR] Unknown trap type %llu\n", type);
+    PANIC();
+}
diff --git a/src/aarch64/trap.h b/src/aarch64/trap.h
new file mode 100644
index 0000000..83ffd71
--- /dev/null
+++ b/src/aarch64/trap.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <common/defines.h>
+
+#define ESR_EC_SHIFT 26
+#define ESR_ISS_MASK 0xFFFFFF
+#define ESR_IR_MASK (1 << 25)
+
+#define ESR_EC_UNKNOWN 0x00
+#define ESR_EC_SVC64 0x15
+#define ESR_EC_IABORT_EL0 0x20
+#define ESR_EC_IABORT_EL1 0x21
+#define ESR_EC_DABORT_EL0 0x24
+#define ESR_EC_DABORT_EL1 0x25
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
new file mode 100644
index 0000000..2ea3d0c
--- /dev/null
+++ b/src/common/CMakeLists.txt
@@ -0,0 +1,3 @@
+file(GLOB common_sources CONFIGURE_DEPENDS "*.c")
+
+add_library(common STATIC ${common_sources})
diff --git a/src/common/bitmap.h b/src/common/bitmap.h
new file mode 100755
index 0000000..b9e3d6b
--- /dev/null
+++ b/src/common/bitmap.h
@@ -0,0 +1,48 @@
+#pragma once
+
+#include <common/defines.h>
+
+// bitmap is a compact representation of boolean array.
+// consecutive 64 bits are stored in one u64 (BitmapCell).
+typedef u64 BitmapCell;
+
+#define BITMAP_BITS_PER_CELL (sizeof(BitmapCell) * 8)
+#define BITMAP_TO_NUM_CELLS(size) \
+    (((size) + BITMAP_BITS_PER_CELL - 1) / BITMAP_BITS_PER_CELL)
+
+// calculate cell index `idx` and in-cell `offset` from `index`.
+#define BITMAP_PARSE_INDEX(index, idx, offset) \
+    do {                                       \
+        idx = index / BITMAP_BITS_PER_CELL;    \
+        offset = index % BITMAP_BITS_PER_CELL; \
+    } while (false)
+
+// declare a new bitmap with `size` bits.
+#define Bitmap(name, size) BitmapCell name[BITMAP_TO_NUM_CELLS(size)]
+
+// initialize a bitmap with `size` bits. All bits are cleared.
+void init_bitmap(BitmapCell *bitmap, usize size);
+
+// get the bit at `index`.
+static INLINE bool bitmap_get(BitmapCell *bitmap, usize index)
+{
+    usize idx, offset;
+    BITMAP_PARSE_INDEX(index, idx, offset);
+    return (bitmap[idx] >> offset) & 1;
+}
+
+// set the bit at `index` to 1.
+static INLINE void bitmap_set(BitmapCell *bitmap, usize index)
+{
+    usize idx, offset;
+    BITMAP_PARSE_INDEX(index, idx, offset);
+    bitmap[idx] |= BIT(offset);
+}
+
+// set the bit at `index` to 0.
+static INLINE void bitmap_clear(BitmapCell *bitmap, usize index)
+{
+    usize idx, offset;
+    BITMAP_PARSE_INDEX(index, idx, offset);
+    bitmap[idx] &= ~BIT(offset);
+}
diff --git a/src/common/buf.h b/src/common/buf.h
new file mode 100755
index 0000000..38792e9
--- /dev/null
+++ b/src/common/buf.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <common/defines.h>
+#include <common/sem.h>
+
+#define BSIZE 512
+#define B_VALID 0x2 // Buffer has been read from disk.
+#define B_DIRTY 0x4 // Buffer needs to be written to disk.
+
+typedef struct {
+    int flags;
+    u8 data[BSIZE];
+    u32 block_no;
+
+    /* @todo: It depends on you to add other necessary elements. */
+    Semaphore sem;
+} Buf;
diff --git a/src/common/checker.h b/src/common/checker.h
new file mode 100644
index 0000000..0451189
--- /dev/null
+++ b/src/common/checker.h
@@ -0,0 +1,16 @@
+#pragma once
+
+typedef int Checker;
+
+__attribute__((error("Checker: context mismatching"), unused)) void
+__checker_fail();
+__attribute__((always_inline)) static inline void __checker_check(Checker *x)
+{
+    if (*x)
+        __checker_fail();
+}
+
+#define setup_checker(id) \
+    Checker __chkcounter_##id __attribute__((cleanup(__checker_check))) = 0
+#define checker_begin_ctx(id) (__chkcounter_##id++)
+#define checker_end_ctx(id) (__chkcounter_##id--)
diff --git a/src/common/defines.h b/src/common/defines.h
new file mode 100755
index 0000000..c37a9fe
--- /dev/null
+++ b/src/common/defines.h
@@ -0,0 +1,108 @@
+#pragma once
+
+#ifndef __cplusplus
+#define true 1
+#define false 0
+#define auto __auto_type
+typedef char bool;
+#endif
+
+#define TRUE true
+#define FALSE false
+#ifndef NULL
+#define NULL 0
+#endif
+
+typedef signed char i8;
+typedef unsigned char u8;
+typedef signed short i16;
+typedef unsigned short u16;
+typedef signed int i32;
+typedef unsigned int u32;
+typedef signed long long i64;
+typedef unsigned long long u64;
+
+typedef i64 isize;
+typedef u64 usize;
+
+/* Efficient min and max operations */
+#define MIN(_a, _b)             \
+    ({                          \
+        typeof(_a) __a = (_a);  \
+        typeof(_b) __b = (_b);  \
+        __a <= __b ? __a : __b; \
+    })
+
+#define MAX(_a, _b)             \
+    ({                          \
+        typeof(_a) __a = (_a);  \
+        typeof(_b) __b = (_b);  \
+        __a >= __b ? __a : __b; \
+    })
+
+#define BIT(i) (1ull << (i))
+
+#define NO_BSS __attribute__((section(".data")))
+#define NO_RETURN __attribute__((noreturn))
+#define INLINE inline __attribute__((unused))
+#define ALWAYS_INLINE inline __attribute__((unused, always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_IPA __attribute__((noipa))
+#define WARN_RESULT __attribute__((warn_unused_result))
+
+// NOTE: no_return will disable traps.
+// NO_RETURN NO_INLINE void no_return();
+
+/* Return the offset of `member` inside struct `type`. */
+#define offset_of(type, member) ((usize)(&((type *)NULL)->member))
+
+/**
+ * The following macro assumes that `mptr` is a pointer to a `member` within
+ * a struct of type `type`. It returns a pointer to the encompassing struct of
+ * type `type` that contains this `member`.
+ *
+ * This macro is particularly useful in scenarios involving lists. For instance,
+ * it is common practice to embed a `ListNode` within a struct, as demonstrated
+ * below:
+ * 
+ * typedef struct {
+ *     u64 data;
+ *     ListNode node;
+ * } Container;
+ *
+ * Container a;
+ * ListNode *b = &a.node;
+ *
+ * In this example, the expression `container_of(b, Container, node)` will yield
+ * the same result as `&a`.
+ */
+#define container_of(mptr, type, member)                      \
+    ({                                                        \
+        const typeof(((type *)NULL)->member) *_mptr = (mptr); \
+        (type *)((u8 *)_mptr - offset_of(type, member));      \
+    })
+
+/* Return the largest c that c is a multiple of b and c <= a. */
+static INLINE u64 round_down(u64 a, u64 b)
+{
+    return a - a % b;
+}
+
+/* Return the smallest c that c is a multiple of b and c >= a. */
+static INLINE u64 round_up(u64 a, u64 b)
+{
+    return round_down(a + b - 1, b);
+}
+
+void _panic(const char *, int);
+NO_INLINE NO_RETURN void _panic(const char *, int);
+#define PANIC() _panic(__FILE__, __LINE__)
+#define ASSERT(expr) \
+    ({               \
+        if (!(expr)) \
+            PANIC(); \
+    })
+
+#define LO(addr) (u32)((addr) & 0xffffffff)
+#define HI(addr) (u32)(((addr) >> 32) & 0xffffffff)
+#define REG(addr) (*(volatile u32 *)(u64)(addr))
diff --git a/src/common/format.c b/src/common/format.c
new file mode 100644
index 0000000..843a5c0
--- /dev/null
+++ b/src/common/format.c
@@ -0,0 +1,91 @@
+#include <common/defines.h>
+#include <common/format.h>
+#include <common/string.h>
+
+static void _print_int(PutCharFunc put_char, void *ctx, i64 u, int _base,
+                       bool is_signed)
+{
+    static char digit[] = "0123456789abcdef";
+    static char buf[64];
+
+    u64 v = (u64)u, base = (u64)_base;
+    if (is_signed && u < 0) {
+        v = -v;
+        put_char(ctx, '-');
+    }
+
+    char *pos = buf;
+    do {
+        *pos++ = digit[v % base];
+    } while (v /= base);
+
+    do {
+        put_char(ctx, *(--pos));
+    } while (pos != buf);
+}
+
+void vformat(PutCharFunc put_char, void *ctx, const char *fmt, va_list arg)
+{
+    const char *pos = fmt;
+
+#define _INT_CASE(ident, type, base, sign)                             \
+    else if (strncmp(pos, ident, sizeof(ident) - 1) == 0)              \
+    {                                                                  \
+        _print_int(put_char, ctx, (i64)va_arg(arg, type), base, sign); \
+        pos += sizeof(ident) - 1;                                      \
+    }
+
+    char c;
+    while ((c = *pos++) != '\0') {
+        bool special = false;
+
+        if (c == '%') {
+            special = 1;
+
+            if (*pos == '%') {
+                // simple case: %% -> %
+                put_char(ctx, '%');
+                pos++;
+            } else if (*pos == 'c') {
+                put_char(ctx, (char)va_arg(arg, int));
+                pos++;
+            } else if (*pos == 's') {
+                const char *s = va_arg(arg, const char *);
+
+                if (!s)
+                    s = "(null)";
+                while (*s != '\0') {
+                    put_char(ctx, *s++);
+                }
+
+                pos++;
+            }
+            _INT_CASE("u", u32, 10, 0)
+            _INT_CASE("llu", u64, 10, 0)
+            _INT_CASE("d", i32, 10, 1)
+            _INT_CASE("lld", i64, 10, 1)
+            _INT_CASE("x", u32, 16, 0)
+            _INT_CASE("llx", u64, 16, 0)
+            _INT_CASE("p", u64, 16, 0)
+            _INT_CASE("zu", usize, 10, 0)
+            _INT_CASE("zd", isize, 10, 1)
+            else
+            {
+                special = 0;
+            }
+        }
+
+        if (!special)
+            put_char(ctx, c);
+    }
+
+#undef _INT_CASE
+}
+
+void format(PutCharFunc put_char, void *ctx, const char *fmt, ...)
+{
+    va_list arg;
+    va_start(arg, fmt);
+    vformat(put_char, ctx, fmt, arg);
+    va_end(arg);
+}
diff --git a/src/common/format.h b/src/common/format.h
new file mode 100644
index 0000000..40c77b9
--- /dev/null
+++ b/src/common/format.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include <common/variadic.h>
+
+typedef void (*PutCharFunc)(void *ctx, char c);
+
+void vformat(PutCharFunc put_char, void *ctx, const char *fmt, va_list arg);
+void format(PutCharFunc put_char, void *ctx, const char *fmt, ...);
diff --git a/src/common/list.c b/src/common/list.c
new file mode 100644
index 0000000..301e4b2
--- /dev/null
+++ b/src/common/list.c
@@ -0,0 +1,121 @@
+#include <common/list.h>
+
+void init_list_node(ListNode *node)
+{
+    node->prev = node;
+    node->next = node;
+}
+
+ListNode *_merge_list(ListNode *node1, ListNode *node2)
+{
+    if (!node1)
+        return node2;
+    if (!node2)
+        return node1;
+
+    // before: (arrow is the next pointer)
+    //   ... --> node1 --> node3 --> ...
+    //   ... <-- node2 <-- node4 <-- ...
+    //
+    // after:
+    //   ... --> node1 --+  +-> node3 --> ...
+    //                   |  |
+    //   ... <-- node2 <-+  +-- node4 <-- ...
+
+    ListNode *node3 = node1->next;
+    ListNode *node4 = node2->prev;
+
+    node1->next = node2;
+    node2->prev = node1;
+    node4->next = node3;
+    node3->prev = node4;
+
+    return node1;
+}
+
+ListNode *_detach_from_list(ListNode *node)
+{
+    ListNode *prev = node->prev;
+
+    node->prev->next = node->next;
+    node->next->prev = node->prev;
+    init_list_node(node);
+
+    if (prev == node)
+        return NULL;
+    return prev;
+}
+
+QueueNode *add_to_queue(QueueNode **head, QueueNode *node)
+{
+    do
+        node->next = *head;
+    while (!__atomic_compare_exchange_n(head, &node->next, node, true,
+                                        __ATOMIC_ACQ_REL, __ATOMIC_RELAXED));
+    return node;
+}
+
+QueueNode *fetch_from_queue(QueueNode **head)
+{
+    QueueNode *node;
+    do
+        node = *head;
+    while (node &&
+           !__atomic_compare_exchange_n(head, &node, node->next, true,
+                                        __ATOMIC_ACQ_REL, __ATOMIC_RELAXED));
+    return node;
+}
+
+QueueNode *fetch_all_from_queue(QueueNode **head)
+{
+    return __atomic_exchange_n(head, NULL, __ATOMIC_ACQ_REL);
+}
+
+void queue_init(Queue *x)
+{
+    x->begin = x->end = 0;
+    x->sz = 0;
+    init_spinlock(&x->lk);
+}
+void queue_lock(Queue *x)
+{
+    acquire_spinlock(&x->lk);
+}
+void queue_unlock(Queue *x)
+{
+    release_spinlock(&x->lk);
+}
+void queue_push(Queue *x, ListNode *item)
+{
+    init_list_node(item);
+    if (x->sz == 0) {
+        x->begin = x->end = item;
+    } else {
+        _merge_list(x->end, item);
+        x->end = item;
+    }
+    x->sz++;
+}
+void queue_pop(Queue *x)
+{
+    if (x->sz == 0)
+        PANIC();
+    if (x->sz == 1) {
+        x->begin = x->end = 0;
+    } else {
+        auto t = x->begin;
+        x->begin = x->begin->next;
+        _detach_from_list(t);
+    }
+    x->sz--;
+}
+ListNode *queue_front(Queue *x)
+{
+    if (!x || !x->begin)
+        PANIC();
+    return x->begin;
+}
+bool queue_empty(Queue *x)
+{
+    return x->sz == 0;
+}
\ No newline at end of file
diff --git a/src/common/list.h b/src/common/list.h
new file mode 100644
index 0000000..bfa680b
--- /dev/null
+++ b/src/common/list.h
@@ -0,0 +1,79 @@
+#pragma once
+
+#include <common/defines.h>
+#include <common/spinlock.h>
+
+// ListNode represents one node on a circular list.
+typedef struct ListNode {
+    struct ListNode *prev, *next;
+} ListNode;
+
+// initialize a sigle node circular list.
+void init_list_node(ListNode *node);
+
+// * List operations without locks: USE THEM CAREFULLY
+// - merge the list containing `node1` and the list containing `node2`
+// into one list. It guarantees `node1->next == node2`. Both lists can be
+// empty. This function will return the merged list.
+ListNode *_merge_list(ListNode *node1, ListNode *node2);
+// - syntax sugar: insert a single new node into the list
+#define _insert_into_list(list, node) \
+    (init_list_node(node), _merge_list(list, node))
+// - remove `node` from the list, and then `node` becomes a single
+// node list. It usually returns `node->prev`. If `node` is
+// the last one in the list, it will return NULL.
+ListNode *_detach_from_list(ListNode *node);
+// - walk through the list
+#define _for_in_list(valptr, list)                                  \
+    for (ListNode *__flag = (list), *valptr = __flag->next; valptr; \
+         valptr = valptr == __flag ? (void *)0 : valptr->next)
+// - test if the list is empty
+#define _empty_list(list) ((list)->next == (list))
+
+// * List operations with locks
+#define merge_list(lock, node1, node2)             \
+    ({                                             \
+        acquire_spinlock(lock);                    \
+        ListNode *__t = _merge_list(node1, node2); \
+        release_spinlock(lock);                    \
+        __t;                                       \
+    })
+#define insert_into_list(lock, list, node)             \
+    ({                                                 \
+        acquire_spinlock(lock);                        \
+        ListNode *__t = _insert_into_list(list, node); \
+        release_spinlock(lock);                        \
+        __t;                                           \
+    })
+#define detach_from_list(lock, node)             \
+    ({                                           \
+        acquire_spinlock(lock);                  \
+        ListNode *__t = _detach_from_list(node); \
+        release_spinlock(lock);                  \
+        __t;                                     \
+    })
+
+// Lockfree Queue: implemented as a lock-free single linked list.
+typedef struct QueueNode {
+    struct QueueNode *next;
+} QueueNode;
+// add a node to the queue and return the added node
+QueueNode *add_to_queue(QueueNode **head, QueueNode *node);
+// remove the last added node from the queue and return it
+QueueNode *fetch_from_queue(QueueNode **head);
+// remove all nodes from the queue and return them as a single list
+QueueNode *fetch_all_from_queue(QueueNode **head);
+
+typedef struct Queue {
+    ListNode *begin;
+    ListNode *end;
+    int sz;
+    SpinLock lk;
+} Queue;
+void queue_init(Queue *x);
+void queue_lock(Queue *x);
+void queue_unlock(Queue *x);
+void queue_push(Queue *x, ListNode *item);
+void queue_pop(Queue *x);
+ListNode *queue_front(Queue *x);
+bool queue_empty(Queue *x);
\ No newline at end of file
diff --git a/src/common/rbtree.c b/src/common/rbtree.c
new file mode 100644
index 0000000..4547944
--- /dev/null
+++ b/src/common/rbtree.c
@@ -0,0 +1,316 @@
+#include "rbtree.h"
+#define RB_RED 0
+#define RB_BLACK 1
+#define rb_parent(r) ((rb_node)((r)->__rb_parent_color & ~3))
+#define __rb_parent(pc) ((rb_node)(pc & ~3))
+
+#define __rb_color(pc) ((pc) & 1)
+#define __rb_is_black(pc) __rb_color(pc)
+#define __rb_is_red(pc) (!__rb_color(pc))
+#define rb_color(rb) __rb_color((rb)->__rb_parent_color)
+#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color)
+#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color)
+static inline void rb_set_black(rb_node rb)
+{
+    rb->__rb_parent_color |= RB_BLACK;
+}
+static inline rb_node rb_red_parent(rb_node red)
+{
+    return (rb_node)red->__rb_parent_color;
+}
+static inline void rb_set_parent(rb_node rb, rb_node p)
+{
+    rb->__rb_parent_color = rb_color(rb) | (unsigned long)p;
+}
+static inline void rb_set_parent_color(rb_node rb, rb_node p, int color)
+{
+    rb->__rb_parent_color = (unsigned long)p | color;
+}
+static inline void __rb_change_child(rb_node old, rb_node new, rb_node parent,
+                                     rb_root root)
+{
+    if (parent) {
+        if (parent->rb_left == old)
+            parent->rb_left = new;
+        else
+            parent->rb_right = new;
+    } else
+        root->rb_node = new;
+}
+static inline void __rb_rotate_set_parents(rb_node old, rb_node new,
+                                           rb_root root, int color)
+{
+    rb_node parent = rb_parent(old);
+    new->__rb_parent_color = old->__rb_parent_color;
+    rb_set_parent_color(old, new, color);
+    __rb_change_child(old, new, parent, root);
+}
+static void __rb_insert_fix(rb_node node, rb_root root)
+{
+    rb_node parent = rb_red_parent(node), gparent, tmp;
+    while (1) {
+        if (!parent) {
+            rb_set_parent_color(node, NULL, RB_BLACK);
+            root->rb_node = node;
+            break;
+        } else if (rb_is_black(parent))
+            break;
+
+        gparent = rb_red_parent(parent);
+        tmp = gparent->rb_right;
+        if (parent != tmp) { /* parent == gparent->rb_left */
+            if (tmp && rb_is_red(tmp)) { /*Case 1,uncle is red*/
+                rb_set_parent_color(tmp, gparent, RB_BLACK);
+                rb_set_parent_color(parent, gparent, RB_BLACK);
+                node = gparent;
+                parent = rb_parent(node);
+                rb_set_parent_color(node, parent, RB_RED);
+                continue;
+            }
+            // Uncle is black
+            tmp = parent->rb_right;
+            if (node == tmp) { /*Case 2,node is right child,left rotate*/
+                parent->rb_right = tmp = node->rb_left;
+                if (tmp)
+                    rb_set_parent_color(tmp, parent, RB_BLACK);
+                node->rb_left = parent;
+                rb_set_parent_color(parent, node, RB_RED);
+                parent = node;
+                tmp = node->rb_right;
+            }
+            /*Case 3,can break*/
+            gparent->rb_left = tmp;
+            if (tmp)
+                rb_set_parent_color(tmp, gparent, RB_BLACK);
+            parent->rb_right = gparent;
+            __rb_rotate_set_parents(gparent, parent, root, RB_RED);
+            break;
+        } else {
+            tmp = gparent->rb_left;
+            if (tmp && rb_is_red(tmp)) { /*Case 1,uncle is red*/
+                rb_set_parent_color(tmp, gparent, RB_BLACK);
+                rb_set_parent_color(parent, gparent, RB_BLACK);
+                node = gparent;
+                parent = rb_parent(node);
+                rb_set_parent_color(node, parent, RB_RED);
+                continue;
+            }
+            // Uncle is black
+            tmp = parent->rb_left;
+            if (node == tmp) { /*Case 2,node is right child,left rotate*/
+                parent->rb_left = tmp = node->rb_right;
+                if (tmp)
+                    rb_set_parent_color(tmp, parent, RB_BLACK);
+                node->rb_right = parent;
+                rb_set_parent_color(parent, node, RB_RED);
+                parent = node;
+                tmp = node->rb_left;
+            }
+            /*Case 3,can break*/
+            gparent->rb_right = tmp;
+            if (tmp)
+                rb_set_parent_color(tmp, gparent, RB_BLACK);
+            parent->rb_left = gparent;
+            __rb_rotate_set_parents(gparent, parent, root, RB_RED);
+            break;
+        }
+    }
+}
+static rb_node __rb_erase(rb_node node, rb_root root)
+{
+    rb_node child = node->rb_right, tmp = node->rb_left;
+    rb_node parent, rebalance;
+    unsigned long pc;
+    if (!tmp) {
+        pc = node->__rb_parent_color;
+        parent = __rb_parent(pc);
+        __rb_change_child(node, child, parent, root);
+        if (child) {
+            child->__rb_parent_color = pc;
+            rebalance = NULL;
+        } else
+            rebalance = __rb_is_black(pc) ? parent : NULL;
+    } else if (!child) {
+        tmp->__rb_parent_color = pc = node->__rb_parent_color;
+        parent = __rb_parent(pc);
+        __rb_change_child(node, tmp, parent, root);
+        rebalance = NULL;
+    } else {
+        rb_node successor = child, child2;
+        tmp = child->rb_left;
+        if (!tmp) {
+            parent = successor;
+            child2 = successor->rb_right;
+        } else {
+            do {
+                parent = successor;
+                successor = tmp;
+                tmp = tmp->rb_left;
+            } while (tmp);
+            parent->rb_left = child2 = successor->rb_right;
+            successor->rb_right = child;
+            rb_set_parent(child, successor);
+        }
+        successor->rb_left = tmp = node->rb_left;
+        rb_set_parent(tmp, successor);
+        pc = node->__rb_parent_color;
+        tmp = __rb_parent(pc);
+        __rb_change_child(node, successor, tmp, root);
+        if (child2) {
+            successor->__rb_parent_color = pc;
+            rb_set_parent_color(child2, parent, RB_BLACK);
+            rebalance = NULL;
+        } else {
+            unsigned long pc2 = successor->__rb_parent_color;
+            successor->__rb_parent_color = pc;
+            rebalance = __rb_is_black(pc2) ? parent : NULL;
+        }
+    }
+    return rebalance;
+}
+static void __rb_erase_fix(rb_node parent, rb_root root)
+{
+    rb_node node = NULL, sibling, tmp1, tmp2;
+    while (1) {
+        sibling = parent->rb_right;
+        if (node != sibling) {
+            if (rb_is_red(sibling)) { /*Case 1,sibling is red*/
+                parent->rb_right = tmp1 = sibling->rb_left;
+                rb_set_parent_color(tmp1, parent, RB_BLACK);
+                sibling->rb_left = parent;
+                __rb_rotate_set_parents(parent, sibling, root, RB_RED);
+                sibling = tmp1;
+            }
+            tmp1 = sibling->rb_right;
+            if (!tmp1 || rb_is_black(tmp1)) {
+                tmp2 = sibling->rb_left;
+                if (!tmp2 ||
+                    rb_is_black(tmp2)) { /*Case 2,sibling black,ch1,ch2 black*/
+                    rb_set_parent_color(sibling, parent, RB_RED);
+                    if (rb_is_red(parent)) {
+                        rb_set_black(parent);
+                    } else {
+                        node = parent;
+                        parent = rb_parent(node);
+                        if (parent)
+                            continue;
+                    }
+                    break;
+                } else { /*Case 3*/
+                    sibling->rb_left = tmp1 = tmp2->rb_right;
+                    if (tmp1)
+                        rb_set_parent_color(tmp1, sibling, RB_BLACK);
+                    tmp2->rb_right = sibling;
+                    parent->rb_right = tmp2;
+                    tmp1 = sibling;
+                    sibling = tmp2;
+                }
+            }
+            parent->rb_right = tmp2 = sibling->rb_left;
+            if (tmp2)
+                rb_set_parent(tmp2, parent);
+            sibling->rb_left = parent;
+            rb_set_parent_color(tmp1, sibling, RB_BLACK);
+            __rb_rotate_set_parents(parent, sibling, root, RB_BLACK);
+            break;
+        } else {
+            sibling = parent->rb_left;
+            if (rb_is_red(sibling)) { /*Case 1,sibling is red*/
+                parent->rb_left = tmp1 = sibling->rb_right;
+                rb_set_parent_color(tmp1, parent, RB_BLACK);
+                sibling->rb_right = parent;
+                __rb_rotate_set_parents(parent, sibling, root, RB_RED);
+                sibling = tmp1;
+            }
+            tmp1 = sibling->rb_left;
+            if (!tmp1 || rb_is_black(tmp1)) {
+                tmp2 = sibling->rb_right;
+                if (!tmp2 ||
+                    rb_is_black(tmp2)) { /*Case 2,sibling black,ch1,ch2 black*/
+                    rb_set_parent_color(sibling, parent, RB_RED);
+                    if (rb_is_red(parent)) {
+                        rb_set_black(parent);
+                    } else {
+                        node = parent;
+                        parent = rb_parent(node);
+                        if (parent)
+                            continue;
+                    }
+                    break;
+                } else { /*Case 3*/
+                    sibling->rb_right = tmp1 = tmp2->rb_left;
+                    if (tmp1)
+                        rb_set_parent_color(tmp1, sibling, RB_BLACK);
+                    tmp2->rb_left = sibling;
+                    parent->rb_left = tmp2;
+                    tmp1 = sibling;
+                    sibling = tmp2;
+                }
+            }
+            parent->rb_left = tmp2 = sibling->rb_right;
+            if (tmp2)
+                rb_set_parent(tmp2, parent);
+            sibling->rb_right = parent;
+            rb_set_parent_color(tmp1, sibling, RB_BLACK);
+            __rb_rotate_set_parents(parent, sibling, root, RB_BLACK);
+            break;
+        }
+    }
+}
+int _rb_insert(rb_node node, rb_root rt,
+               bool (*cmp)(rb_node lnode, rb_node rnode))
+{
+    rb_node nw = rt->rb_node, parent = NULL;
+    node->rb_left = node->rb_right = NULL;
+    node->__rb_parent_color = 0;
+    while (nw) {
+        parent = nw;
+        if (cmp(node, nw)) {
+            nw = nw->rb_left;
+            if (nw == NULL) {
+                parent->rb_left = node;
+                node->__rb_parent_color = (unsigned long)parent;
+            }
+        } else if (cmp(nw, node)) {
+            nw = nw->rb_right;
+            if (nw == NULL) {
+                parent->rb_right = node;
+                node->__rb_parent_color = (unsigned long)parent;
+            }
+        } else
+            return -1;
+    }
+    __rb_insert_fix(node, rt);
+    return 0;
+}
+void _rb_erase(rb_node node, rb_root root)
+{
+    rb_node rebalance;
+    rebalance = __rb_erase(node, root);
+    if (rebalance)
+        __rb_erase_fix(rebalance, root);
+}
+rb_node _rb_lookup(rb_node node, rb_root rt,
+                   bool (*cmp)(rb_node lnode, rb_node rnode))
+{
+    rb_node nw = rt->rb_node;
+    while (nw) {
+        if (cmp(node, nw)) {
+            nw = nw->rb_left;
+        } else if (cmp(nw, node)) {
+            nw = nw->rb_right;
+        } else
+            return nw;
+    }
+    return NULL;
+}
+rb_node _rb_first(rb_root root)
+{
+    rb_node n;
+    n = root->rb_node;
+    if (!n)
+        return NULL;
+    while (n->rb_left)
+        n = n->rb_left;
+    return n;
+}
\ No newline at end of file
diff --git a/src/common/rbtree.h b/src/common/rbtree.h
new file mode 100755
index 0000000..535f356
--- /dev/null
+++ b/src/common/rbtree.h
@@ -0,0 +1,22 @@
+#pragma once
+#include "common/defines.h"
+
+struct rb_node_ {
+    unsigned long __rb_parent_color;
+    struct rb_node_ *rb_right;
+    struct rb_node_ *rb_left;
+} __attribute__((aligned(sizeof(long))));
+
+typedef struct rb_node_ *rb_node;
+struct rb_root_ {
+    rb_node rb_node;
+};
+typedef struct rb_root_ *rb_root;
+
+/* NOTE:You should add lock when use */
+WARN_RESULT int _rb_insert(rb_node node, rb_root root,
+                           bool (*cmp)(rb_node lnode, rb_node rnode));
+void _rb_erase(rb_node node, rb_root root);
+rb_node _rb_lookup(rb_node node, rb_root rt,
+                   bool (*cmp)(rb_node lnode, rb_node rnode));
+rb_node _rb_first(rb_root root);
diff --git a/src/common/rc.c b/src/common/rc.c
new file mode 100644
index 0000000..673ca8f
--- /dev/null
+++ b/src/common/rc.c
@@ -0,0 +1,17 @@
+#include <common/rc.h>
+
+void init_rc(RefCount *rc)
+{
+    rc->count = 0;
+}
+
+void increment_rc(RefCount *rc)
+{
+    __atomic_fetch_add(&rc->count, 1, __ATOMIC_ACQ_REL);
+}
+
+bool decrement_rc(RefCount *rc)
+{
+    i64 r = __atomic_sub_fetch(&rc->count, 1, __ATOMIC_ACQ_REL);
+    return r <= 0;
+}
diff --git a/src/common/rc.h b/src/common/rc.h
new file mode 100644
index 0000000..b7fbf40
--- /dev/null
+++ b/src/common/rc.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <common/defines.h>
+
+typedef struct {
+    isize count;
+} RefCount;
+
+void init_rc(RefCount *);
+void increment_rc(RefCount *);
+bool decrement_rc(RefCount *);
diff --git a/src/common/sem.c b/src/common/sem.c
new file mode 100755
index 0000000..cf277bf
--- /dev/null
+++ b/src/common/sem.c
@@ -0,0 +1,96 @@
+#include <common/sem.h>
+#include <kernel/mem.h>
+#include <kernel/sched.h>
+#include <kernel/printk.h>
+#include <common/list.h>
+
+void init_sem(Semaphore *sem, int val)
+{
+    sem->val = val;
+    init_spinlock(&sem->lock);
+    init_list_node(&sem->sleeplist);
+}
+
+void _lock_sem(Semaphore *sem)
+{
+    acquire_spinlock(&sem->lock);
+}
+
+void _unlock_sem(Semaphore *sem)
+{
+    release_spinlock(&sem->lock);
+}
+
+bool _get_sem(Semaphore *sem)
+{
+    bool ret = false;
+    if (sem->val > 0) {
+        sem->val--;
+        ret = true;
+    }
+    return ret;
+}
+
+int _query_sem(Semaphore *sem)
+{
+    return sem->val;
+}
+
+int get_all_sem(Semaphore *sem)
+{
+    int ret = 0;
+    _lock_sem(sem);
+    if (sem->val > 0) {
+        ret = sem->val;
+        sem->val = 0;
+    }
+    _unlock_sem(sem);
+    return ret;
+}
+
+int post_all_sem(Semaphore *sem)
+{
+    int ret = -1;
+    _lock_sem(sem);
+    do
+        _post_sem(sem), ret++;
+    while (!_get_sem(sem));
+    _unlock_sem(sem);
+    return ret;
+}
+
+bool _wait_sem(Semaphore *sem, bool alertable)
+{
+    if (--sem->val >= 0) {
+        release_spinlock(&sem->lock);
+        return true;
+    }
+    WaitData *wait = kalloc(sizeof(WaitData));
+    wait->proc = thisproc();
+    wait->up = false;
+    _insert_into_list(&sem->sleeplist, &wait->slnode);
+    acquire_sched_lock();
+    release_spinlock(&sem->lock);
+    sched(alertable ? SLEEPING : DEEPSLEEPING);
+    acquire_spinlock(&sem->lock); // also the lock for waitdata
+    if (!wait->up) // wakeup by other sources
+    {
+        ASSERT(++sem->val <= 0);
+        _detach_from_list(&wait->slnode);
+    }
+    release_spinlock(&sem->lock);
+    bool ret = wait->up;
+    kfree(wait);
+    return ret;
+}
+
+void _post_sem(Semaphore *sem)
+{
+    if (++sem->val <= 0) {
+        ASSERT(!_empty_list(&sem->sleeplist));
+        auto wait = container_of(sem->sleeplist.prev, WaitData, slnode);
+        wait->up = true;
+        _detach_from_list(&wait->slnode);
+        activate_proc(wait->proc);
+    }
+}
\ No newline at end of file
diff --git a/src/common/sem.h b/src/common/sem.h
new file mode 100755
index 0000000..656376d
--- /dev/null
+++ b/src/common/sem.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <common/list.h>
+
+struct Proc;
+
+typedef struct {
+    bool up;
+    struct Proc *proc;
+    ListNode slnode;
+} WaitData;
+
+typedef struct {
+    SpinLock lock;
+    int val;
+    ListNode sleeplist;
+} Semaphore;
+
+void init_sem(Semaphore *, int val);
+void _post_sem(Semaphore *);
+WARN_RESULT bool _wait_sem(Semaphore *, bool alertable);
+bool _get_sem(Semaphore *);
+WARN_RESULT int _query_sem(Semaphore *);
+void _lock_sem(Semaphore *);
+void _unlock_sem(Semaphore *);
+int get_all_sem(Semaphore *);
+int post_all_sem(Semaphore *);
+#define wait_sem(sem) (_lock_sem(sem), _wait_sem(sem, true))
+#define unalertable_wait_sem(sem) \
+    ASSERT((_lock_sem(sem), _wait_sem(sem, false)))
+#define post_sem(sem) (_lock_sem(sem), _post_sem(sem), _unlock_sem(sem))
+#define get_sem(sem)                \
+    ({                              \
+        _lock_sem(sem);             \
+        bool __ret = _get_sem(sem); \
+        _unlock_sem(sem);           \
+        __ret;                      \
+    })
+
+#define SleepLock Semaphore
+#define init_sleeplock(lock) init_sem(lock, 1)
+#define acquire_sleeplock(lock) wait_sem(lock)
+#define unalertable_acquire_sleeplock(lock) unalertable_wait_sem(lock)
+#define release_sleeplock(lock) post_sem(lock)
diff --git a/src/common/spinlock.c b/src/common/spinlock.c
new file mode 100644
index 0000000..1bbb362
--- /dev/null
+++ b/src/common/spinlock.c
@@ -0,0 +1,28 @@
+#include <aarch64/intrinsic.h>
+#include <common/spinlock.h>
+
+void init_spinlock(SpinLock *lock)
+{
+    lock->locked = 0;
+}
+
+bool try_acquire_spinlock(SpinLock *lock)
+{
+    if (!lock->locked &&
+        !__atomic_test_and_set(&lock->locked, __ATOMIC_ACQUIRE)) {
+        return true;
+    } else {
+        return false;
+    }
+}
+
+void acquire_spinlock(SpinLock *lock)
+{
+    while (!try_acquire_spinlock(lock))
+        arch_yield();
+}
+
+void release_spinlock(SpinLock *lock)
+{
+    __atomic_clear(&lock->locked, __ATOMIC_RELEASE);
+}
diff --git a/src/common/spinlock.h b/src/common/spinlock.h
new file mode 100755
index 0000000..d8c265a
--- /dev/null
+++ b/src/common/spinlock.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <common/defines.h>
+#include <aarch64/intrinsic.h>
+
+typedef struct {
+    volatile bool locked;
+} SpinLock;
+
+void init_spinlock(SpinLock *);
+WARN_RESULT bool try_acquire_spinlock(SpinLock *);
+void acquire_spinlock(SpinLock *);
+void release_spinlock(SpinLock *);
diff --git a/src/common/string.c b/src/common/string.c
new file mode 100644
index 0000000..8b5d906
--- /dev/null
+++ b/src/common/string.c
@@ -0,0 +1,93 @@
+#include <common/string.h>
+
+void *memset(void *s, int c, usize n)
+{
+    for (usize i = 0; i < n; i++)
+        ((u8 *)s)[i] = (u8)(c & 0xff);
+
+    return s;
+}
+
+void *memcpy(void *restrict dest, const void *restrict src, usize n)
+{
+    for (usize i = 0; i < n; i++)
+        ((u8 *)dest)[i] = ((u8 *)src)[i];
+
+    return dest;
+}
+
+int memcmp(const void *s1, const void *s2, usize n)
+{
+    for (usize i = 0; i < n; i++) {
+        int c1 = ((u8 *)s1)[i];
+        int c2 = ((u8 *)s2)[i];
+
+        if (c1 != c2)
+            return c1 - c2;
+    }
+
+    return 0;
+}
+
+void *memmove(void *dest, const void *src, usize n)
+{
+    const char *s = (const char *)src;
+    char *d = (char *)dest;
+
+    if (s < d && (usize)(d - s) < n) {
+        s += n;
+        d += n;
+        while (n-- > 0) {
+            *--d = *--s;
+        }
+    } else {
+        while (n-- > 0) {
+            *d++ = *s++;
+        }
+    }
+
+    return dest;
+}
+
+char *strncpy(char *restrict dest, const char *restrict src, usize n)
+{
+    usize i = 0;
+    for (; i < n && src[i] != '\0'; i++)
+        dest[i] = src[i];
+    for (; i < n; i++)
+        dest[i] = '\0';
+
+    return dest;
+}
+
+char *strncpy_fast(char *restrict dest, const char *restrict src, usize n)
+{
+    usize i = 0;
+    for (; i < n && src[i] != '\0'; i++)
+        dest[i] = src[i];
+    if (i < n)
+        dest[i] = '\0';
+
+    return dest;
+}
+
+int strncmp(const char *s1, const char *s2, usize n)
+{
+    for (usize i = 0; i < n; i++) {
+        if (s1[i] != s2[i])
+            return s1[i] - s2[i];
+        if (s1[i] == '\0' || s2[i] == '\0')
+            break;
+    }
+
+    return 0;
+}
+
+usize strlen(const char *s)
+{
+    usize i = 0;
+    while (s[i] != '\0')
+        i++;
+
+    return i;
+}
diff --git a/src/common/string.h b/src/common/string.h
new file mode 100755
index 0000000..6248d79
--- /dev/null
+++ b/src/common/string.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <common/defines.h>
+
+void *memset(void *s, int c, usize n);
+void *memcpy(void *restrict dest, const void *restrict src, usize n);
+WARN_RESULT int memcmp(const void *s1, const void *s2, usize n);
+
+/**
+ * Note that memmove does not allocate extra memory and handles overlapped memory
+ * regions correctly, but it does not take side effects into consideration
+ * (e.g. two virtual memory regions mapped to the same physical memory region).
+ */
+void *memmove(void *dest, const void *src, usize n);
+
+/**
+ * Note that for string functions, please specify `n` explicitly.
+ * strncpy will `dest` with zeroes if the length of `src` is less than `n`.
+ * strncpy_fast will not do that.
+ */
+char *strncpy(char *restrict dest, const char *restrict src, usize n);
+char *strncpy_fast(char *restrict dest, const char *restrict src, usize n);
+
+WARN_RESULT int strncmp(const char *s1, const char *s2, usize n);
+WARN_RESULT usize strlen(const char *s);
diff --git a/src/common/variadic.h b/src/common/variadic.h
new file mode 100644
index 0000000..751ceba
--- /dev/null
+++ b/src/common/variadic.h
@@ -0,0 +1,7 @@
+#pragma once
+
+typedef __builtin_va_list va_list;
+
+#define va_start(ap, param) __builtin_va_start(ap, param)
+#define va_end(ap) __builtin_va_end(ap)
+#define va_arg(ap, type) __builtin_va_arg(ap, type)
diff --git a/src/driver/CMakeLists.txt b/src/driver/CMakeLists.txt
new file mode 100644
index 0000000..ec32eef
--- /dev/null
+++ b/src/driver/CMakeLists.txt
@@ -0,0 +1,3 @@
+file(GLOB driver_sources CONFIGURE_DEPENDS "*.c")
+
+add_library(driver STATIC ${driver_sources})
diff --git a/src/driver/aux.h b/src/driver/aux.h
new file mode 100644
index 0000000..5c58a98
--- /dev/null
+++ b/src/driver/aux.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <driver/base.h>
+
+#define AUX_BASE (MMIO_BASE + 0x215000)
+
+#define AUX_ENABLES (AUX_BASE + 0x04)
+#define AUX_MU_IO_REG (AUX_BASE + 0x40)
+#define AUX_MU_IER_REG (AUX_BASE + 0x44)
+#define AUX_MU_IIR_REG (AUX_BASE + 0x48)
+#define AUX_MU_LCR_REG (AUX_BASE + 0x4C)
+#define AUX_MU_MCR_REG (AUX_BASE + 0x50)
+#define AUX_MU_LSR_REG (AUX_BASE + 0x54)
+#define AUX_MU_MSR_REG (AUX_BASE + 0x58)
+#define AUX_MU_SCRATCH (AUX_BASE + 0x5C)
+#define AUX_MU_CNTL_REG (AUX_BASE + 0x60)
+#define AUX_MU_STAT_REG (AUX_BASE + 0x64)
+#define AUX_MU_BAUD_REG (AUX_BASE + 0x68)
+
+#define AUX_UART_CLOCK 250000000
+
+#define AUX_MU_BAUD(baudrate) ((AUX_UART_CLOCK / ((baudrate) * 8)) - 1)
diff --git a/src/driver/base.h b/src/driver/base.h
new file mode 100755
index 0000000..214cd83
--- /dev/null
+++ b/src/driver/base.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#define KERNEL_BASE 0xFFFF000000000000
+#define MMIO_BASE (KERNEL_BASE + 0xA000000)
+#define LOCAL_BASE (KERNEL_BASE + 0x40000000)
+
+#define V2P(v) ((u64)(v) - KERNEL_BASE)
+#define P2V(p) ((u64)(p) + KERNEL_BASE)
+
+#define PUARTBASE 0x9000000
+#define UARTBASE P2V(PUARTBASE)
+
+#define PGICBASE 0x08000000
+#define GICBASE P2V(PGICBASE)
+
+#define PVIRTIO0 0x0A000000
+#define VIRTIO0 P2V(PVIRTIO0)
\ No newline at end of file
diff --git a/src/driver/clock.c b/src/driver/clock.c
new file mode 100755
index 0000000..c8325d5
--- /dev/null
+++ b/src/driver/clock.c
@@ -0,0 +1,43 @@
+#include <aarch64/intrinsic.h>
+#include <kernel/sched.h>
+#include <driver/base.h>
+#include <driver/clock.h>
+#include <driver/interrupt.h>
+#include <kernel/printk.h>
+#include <driver/timer.h>
+
+static struct {
+    ClockHandler handler;
+} clock;
+
+void init_clock()
+{
+    // reserve one second for the first time.
+    enable_timer();
+    reset_clock(10);
+}
+
+void reset_clock(u64 interval_ms)
+{
+    u64 interval_clk = interval_ms * get_clock_frequency() / 1000;
+    ASSERT(interval_clk <= 0x7fffffff);
+    set_cntv_tval_el0(interval_clk);
+}
+
+void set_clock_handler(ClockHandler handler)
+{
+    clock.handler = handler;
+    set_interrupt_handler(TIMER_IRQ, invoke_clock_handler);
+}
+
+void invoke_clock_handler()
+{
+    if (!clock.handler)
+        PANIC();
+    clock.handler();
+}
+
+u64 get_timestamp_ms()
+{
+    return get_timestamp() * 1000 / get_clock_frequency();
+}
diff --git a/src/driver/clock.h b/src/driver/clock.h
new file mode 100755
index 0000000..677f232
--- /dev/null
+++ b/src/driver/clock.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <common/defines.h>
+
+typedef void (*ClockHandler)(void);
+
+WARN_RESULT u64 get_timestamp_ms();
+void init_clock();
+void reset_clock(u64 interval_ms);
+void set_clock_handler(ClockHandler handler);
+void invoke_clock_handler();
diff --git a/src/driver/gicv3.c b/src/driver/gicv3.c
new file mode 100644
index 0000000..aa2be84
--- /dev/null
+++ b/src/driver/gicv3.c
@@ -0,0 +1,287 @@
+#include <kernel/cpu.h>
+#include <common/defines.h>
+#include <driver/base.h>
+#include <driver/interrupt.h>
+#include <kernel/printk.h>
+
+#define GICD_CTLR (0x0)
+#define GICD_TYPER (0x4)
+#define GICD_IGROUPR(n) (0x80 + (u64)(n) * 4)
+#define GICD_ISENABLER(n) (0x100 + (u64)(n) * 4)
+#define GICD_ICENABLER(n) (0x180 + (u64)(n) * 4)
+#define GICD_ISPENDR(n) (0x200 + (u64)(n) * 4)
+#define GICD_ICPENDR(n) (0x280 + (u64)(n) * 4)
+#define GICD_IPRIORITYR(n) (0x400 + (u64)(n) * 4)
+#define GICD_ITARGETSR(n) (0x800 + (u64)(n) * 4)
+#define GICD_ICFGR(n) (0xc00 + (u64)(n) * 4)
+
+#define GICC_CTLR (0x0)
+#define GICC_PMR (0x4)
+#define GICC_IAR (0xc)
+#define GICC_EOIR (0x10)
+#define GICC_HPPIR (0x18)
+#define GICC_AIAR (0x20)
+#define GICC_AEOIR (0x24)
+
+#define GICR_CTLR (0x0)
+#define GICR_WAKER (0x14)
+
+#define SGI_BASE 0x10000
+#define GICR_IGROUPR0 (SGI_BASE + 0x80)
+#define GICR_ISENABLER0 (SGI_BASE + 0x100)
+#define GICR_ICENABLER0 (SGI_BASE + 0x180)
+#define GICR_ICPENDR0 (SGI_BASE + 0x280)
+#define GICR_IPRIORITYR(n) (SGI_BASE + 0x400 + (n) * 4)
+#define GICR_ICFGR0 (SGI_BASE + 0xc00)
+#define GICR_ICFGR1 (SGI_BASE + 0xc04)
+#define GICR_IGRPMODR0 (SGI_BASE + 0xd00)
+
+static bool is_sgi_ppi(u32 id);
+
+static inline u32 icc_igrpen1_el1()
+{
+    u32 x;
+    asm volatile("mrs %0, S3_0_C12_C12_7" : "=r"(x));
+    return x;
+}
+
+static inline void w_icc_igrpen1_el1(u32 x)
+{
+    asm volatile("msr S3_0_C12_C12_7, %0" : : "r"(x));
+}
+
+static inline u32 icc_pmr_el1()
+{
+    u32 x;
+    asm volatile("mrs %0, S3_0_C4_C6_0" : "=r"(x));
+    return x;
+}
+
+static inline void w_icc_pmr_el1(u32 x)
+{
+    asm volatile("msr S3_0_C4_C6_0, %0" : : "r"(x));
+}
+
+static inline u32 icc_iar1_el1()
+{
+    u32 x;
+    asm volatile("mrs %0, S3_0_C12_C12_0" : "=r"(x));
+    return x;
+}
+
+static inline void w_icc_eoir1_el1(u32 x)
+{
+    asm volatile("msr S3_0_C12_C12_1, %0" : : "r"(x));
+}
+
+static inline u32 icc_sre_el1()
+{
+    u32 x;
+    asm volatile("mrs %0, S3_0_C12_C12_5" : "=r"(x));
+    return x;
+}
+
+static inline void w_icc_sre_el1(u32 x)
+{
+    asm volatile("msr S3_0_C12_C12_5, %0" : : "r"(x));
+}
+
+static struct {
+    char *gicd;
+    char *rdist_addrs[NCPU];
+} gicv3;
+
+static void wd32(u32 off, u32 val)
+{
+    *(volatile u32 *)(gicv3.gicd + off) = val;
+}
+
+static u32 rd32(u32 off)
+{
+    return *(volatile u32 *)(gicv3.gicd + off);
+}
+
+static void wr32(u32 cpuid, u32 off, u32 val)
+{
+    *(volatile u32 *)(gicv3.rdist_addrs[cpuid] + off) = val;
+}
+
+static u32 rr32(u32 cpuid, u32 off)
+{
+    return *(volatile u32 *)(gicv3.rdist_addrs[cpuid] + off);
+}
+
+static void gic_enable_int(u32 intid)
+{
+    u32 is = rd32(GICD_ISENABLER(intid / 32));
+    is |= 1 << (intid % 32);
+    wd32(GICD_ISENABLER(intid / 32), is);
+}
+
+static void gicr_enable_int(u32 cpuid, u32 intid)
+{
+    if (!is_sgi_ppi(intid)) {
+        PANIC();
+    }
+
+    u32 is = rr32(cpuid, GICR_ISENABLER0);
+    is |= 1 << (intid % 32);
+    wr32(cpuid, GICR_ISENABLER0, is);
+}
+
+static void gic_clear_pending(u32 intid)
+{
+    u32 ic = rd32(GICD_ICPENDR(intid / 32));
+    ic |= 1 << (intid % 32);
+    wd32(GICD_ICPENDR(intid / 32), ic);
+}
+
+static void gicr_clear_pending(u32 cpuid, u32 intid)
+{
+    if (!is_sgi_ppi(intid)) {
+        PANIC();
+    }
+
+    u32 ic = rr32(cpuid, GICR_ICPENDR0);
+    ic |= 1 << (intid % 32);
+    wr32(cpuid, GICR_ICPENDR0, ic);
+}
+
+static void gic_set_prio(u32 intid, u32 prio)
+{
+    (void)prio;
+    u32 p = rd32(GICD_IPRIORITYR(intid / 4));
+    p &= ~((u32)0xff << (intid % 4 * 8)); // set prio 0
+    wd32(GICD_IPRIORITYR(intid / 4), p);
+}
+
+static void gicr_set_prio(u32 cpuid, u32 intid, u32 prio)
+{
+    (void)prio;
+    if (!is_sgi_ppi(intid)) {
+        PANIC();
+    }
+
+    u32 p = rr32(cpuid, GICR_IPRIORITYR(intid / 4));
+    p &= ~((u32)0xff << (intid % 4 * 8)); // set prio 0
+    wr32(cpuid, GICR_IPRIORITYR(intid / 4), p);
+}
+
+static void gic_set_target(u32 intid, u32 cpuid)
+{
+    u32 itargetsr = rd32(GICD_ITARGETSR(intid / 4));
+    itargetsr &= ~((u32)0xff << (intid % 4 * 8));
+    wd32(GICD_ITARGETSR(intid / 4),
+         itargetsr | ((u32)(1 << cpuid) << (intid % 4 * 8)));
+}
+
+/*static void gicr_wait_rwp(u32 cpuid)
+{
+    u32 ctlr = rr32(cpuid, GICR_CTLR);
+    while ((ctlr >> 3) & 1) // RWP
+        ;
+}*/
+
+void gic_setup_ppi(u32 cpu, u32 intid, int prio)
+{
+    gicr_set_prio(cpu, intid, prio);
+    gicr_clear_pending(cpu, intid);
+    gicr_enable_int(cpu, intid);
+}
+
+void gic_setup_spi(u32 intid, int prio)
+{
+    gic_set_prio(intid, prio);
+    gic_set_target(intid, 0);
+    gic_clear_pending(intid);
+    gic_enable_int(intid);
+}
+
+static void gic_cpu_init()
+{
+    w_icc_igrpen1_el1(0);
+
+    w_icc_pmr_el1(0xff);
+}
+
+static void gic_dist_init()
+{
+    wd32(GICD_CTLR, 0);
+
+    for (int i = 0; i < 32; i++)
+        wd32(GICD_IGROUPR(i), ~0);
+}
+
+static void gic_redist_init(u32 cpuid)
+{
+    wr32(cpuid, GICR_CTLR, 0);
+
+    w_icc_sre_el1(icc_sre_el1() | 1);
+
+    /* Non-secure Group1 */
+    wr32(cpuid, GICR_IGROUPR0, ~0);
+    wr32(cpuid, GICR_IGRPMODR0, 0);
+
+    wr32(cpuid, GICR_ICFGR1, 0);
+
+    /* enable redist */
+    u32 waker = rr32(cpuid, GICR_WAKER);
+    wr32(cpuid, GICR_WAKER, waker & ~(1 << 1));
+    while (rr32(cpuid, GICR_WAKER) & (1 << 2))
+        ;
+}
+
+static void gic_enable()
+{
+    /* enable Group0/Non-secure Group1 */
+    wd32(GICD_CTLR, 3);
+
+    w_icc_igrpen1_el1(1);
+}
+
+void gicv3_init_percpu()
+{
+    u32 cpu = cpuid();
+
+    gic_cpu_init();
+    gic_dist_init();
+    gic_redist_init(cpu);
+
+    gic_setup_ppi(cpuid(), TIMER_IRQ, 0);
+
+    gic_enable();
+}
+
+void gicv3_init()
+{
+    gicv3.gicd = (char *)GICBASE;
+    for (int i = 0; i < NCPU; i++) {
+        gicv3.rdist_addrs[i] = (char *)(GICBASE + 0xa0000 + (i) * 0x20000);
+    }
+
+    gic_setup_spi(UART_IRQ, 0);
+    gic_setup_spi(VIRTIO_BLK_IRQ, 0);
+}
+
+bool gic_enabled()
+{
+    return (icc_igrpen1_el1() & 0x1) && (rd32(GICD_CTLR) & 0x1);
+}
+
+u32 gic_iar()
+{
+    return icc_iar1_el1();
+}
+
+void gic_eoi(u32 iar)
+{
+    w_icc_eoir1_el1(iar);
+}
+
+static bool is_sgi_ppi(u32 id)
+{
+    if (id < 32)
+        return true;
+    else
+        return false;
+}
\ No newline at end of file
diff --git a/src/driver/gicv3.h b/src/driver/gicv3.h
new file mode 100644
index 0000000..fd41d42
--- /dev/null
+++ b/src/driver/gicv3.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <common/defines.h>
+
+void gicv3_init(void);
+void gicv3_init_percpu(void);
+void gic_eoi(u32 iar);
+u32 gic_iar(void);
+bool gic_enabled(void);
diff --git a/src/driver/gpio.h b/src/driver/gpio.h
new file mode 100644
index 0000000..c1ede8b
--- /dev/null
+++ b/src/driver/gpio.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <driver/base.h>
+
+#define GPIO_BASE (MMIO_BASE + 0x200000)
+
+#define GPFSEL0 (GPIO_BASE + 0x00)
+#define GPFSEL1 (GPIO_BASE + 0x04)
+#define GPFSEL2 (GPIO_BASE + 0x08)
+#define GPFSEL3 (GPIO_BASE + 0x0C)
+#define GPFSEL4 (GPIO_BASE + 0x10)
+#define GPFSEL5 (GPIO_BASE + 0x14)
+#define GPSET0 (GPIO_BASE + 0x1C)
+#define GPSET1 (GPIO_BASE + 0x20)
+#define GPCLR0 (GPIO_BASE + 0x28)
+#define GPLEV0 (GPIO_BASE + 0x34)
+#define GPLEV1 (GPIO_BASE + 0x38)
+#define GPEDS0 (GPIO_BASE + 0x40)
+#define GPEDS1 (GPIO_BASE + 0x44)
+#define GPHEN0 (GPIO_BASE + 0x64)
+#define GPHEN1 (GPIO_BASE + 0x68)
+#define GPPUD (GPIO_BASE + 0x94)
+#define GPPUDCLK0 (GPIO_BASE + 0x98)
+#define GPPUDCLK1 (GPIO_BASE + 0x9C)
diff --git a/src/driver/interrupt.c b/src/driver/interrupt.c
new file mode 100644
index 0000000..4b1f2fd
--- /dev/null
+++ b/src/driver/interrupt.c
@@ -0,0 +1,45 @@
+#include <aarch64/intrinsic.h>
+#include <driver/base.h>
+#include <driver/interrupt.h>
+#include <driver/irq.h>
+#include <kernel/printk.h>
+#include <kernel/sched.h>
+#include <driver/gicv3.h>
+
+static InterruptHandler int_handler[NUM_IRQ_TYPES];
+
+static void default_handler(u32 intid)
+{
+    printk("\033[1;31m[Error CPU %lld]: Interrupt %d not implemented.\033[0m\n", cpuid(), intid);
+    PANIC();
+}
+
+void init_interrupt()
+{
+    for (usize i = 0; i < NUM_IRQ_TYPES; i++) {
+        int_handler[i] = default_handler;
+    }
+}
+
+void set_interrupt_handler(InterruptType type, InterruptHandler handler)
+{
+    int_handler[type] = handler;
+}
+
+void interrupt_global_handler()
+{
+    //printk("[Interrupt] Interrupt occurred on CPU %lld.\n", cpuid());
+    u32 iar = gic_iar();
+    u32 intid = iar & 0x3ff;
+
+    if (intid == 1023) {
+        
+        printk("\033[1;31m[Warning]: Spurious Interrupt.\033[0m\n");
+        return;
+    }
+
+    gic_eoi(iar);
+
+    if (int_handler[intid])
+        int_handler[intid](intid);
+}
diff --git a/src/driver/interrupt.h b/src/driver/interrupt.h
new file mode 100644
index 0000000..3372154
--- /dev/null
+++ b/src/driver/interrupt.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#define NUM_IRQ_TYPES 64
+
+typedef enum {
+    TIMER_IRQ = 27,
+    UART_IRQ = 33,
+    VIRTIO_BLK_IRQ = 48
+} InterruptType;
+
+typedef void (*InterruptHandler)();
+
+void init_interrupt();
+void interrupt_global_handler();
+void set_interrupt_handler(InterruptType type, InterruptHandler handler);
diff --git a/src/driver/irq.h b/src/driver/irq.h
new file mode 100644
index 0000000..532dc67
--- /dev/null
+++ b/src/driver/irq.h
@@ -0,0 +1,42 @@
+#include <driver/base.h>
+
+#define IRQ_BASIC_PENDING (MMIO_BASE + 0xB200)
+#define IRQ_PENDING_1 (MMIO_BASE + 0xB204)
+#define IRQ_PENDING_2 (MMIO_BASE + 0xB208)
+#define FIQ_CONTROL (MMIO_BASE + 0xB20C)
+#define ENABLE_IRQS_1 (MMIO_BASE + 0xB210)
+#define ENABLE_IRQS_2 (MMIO_BASE + 0xB214)
+#define ENABLE_BASIC_IRQS (MMIO_BASE + 0xB218)
+#define DISABLE_IRQS_1 (MMIO_BASE + 0xB21C)
+#define DISABLE_IRQS_2 (MMIO_BASE + 0xB220)
+#define DISABLE_BASIC_IRQS (MMIO_BASE + 0xB224)
+
+#define AUX_INT (1 << 29)
+#define VC_ARASANSDIO_INT (1 << 30)
+
+/* ARM Local Peripherals */
+#define GPU_INT_ROUTE (LOCAL_BASE + 0xC)
+#define GPU_IRQ2CORE(i) (i)
+
+#define IRQ_SRC_CORE(i) (LOCAL_BASE + 0x60 + 4 * (i))
+#define IRQ_SRC_TIMER (1 << 11) /* Local Timer */
+#define IRQ_SRC_GPU (1 << 8)
+#define IRQ_SRC_CNTPNSIRQ (1 << 1) /* Core Timer */
+#define FIQ_SRC_CORE(i) (LOCAL_BASE + 0x70 + 4 * (i))
+
+/* Local timer */
+#define TIMER_ROUTE (LOCAL_BASE + 0x24)
+#define TIMER_IRQ2CORE(i) (i)
+
+#define TIMER_CTRL (LOCAL_BASE + 0x34)
+#define TIMER_INTENA (1 << 29)
+#define TIMER_ENABLE (1 << 28)
+#define TIMER_RELOAD_SEC (38400000) /* 2 * 19.2 MHz */
+
+#define TIMER_CLR (LOCAL_BASE + 0x38)
+#define TIMER_CLR_INT (1 << 31)
+#define TIMER_RELOAD (1 << 30)
+
+/* Core Timer */
+#define CORE_TIMER_CTRL(i) (LOCAL_BASE + 0x40 + 4 * (i))
+#define CORE_TIMER_ENABLE (1 << 1) /* CNTPNSIRQ */
diff --git a/src/driver/memlayout.h b/src/driver/memlayout.h
new file mode 100644
index 0000000..00482e9
--- /dev/null
+++ b/src/driver/memlayout.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#define EXTMEM 0x40000000
+#define PHYSTOP 0x80000000
+
+#define KSPACE_MASK 0xFFFF000000000000
+#define KERNLINK (KSPACE_MASK + EXTMEM) /* Address where kernel is linked */
+
+#define K2P_WO(x) ((x) - (KSPACE_MASK)) /* Same as V2P, but without casts */
+#define P2K_WO(x) ((x) + (KSPACE_MASK)) /* Same as P2V, but without casts */
diff --git a/src/driver/timer.c b/src/driver/timer.c
new file mode 100644
index 0000000..31556b8
--- /dev/null
+++ b/src/driver/timer.c
@@ -0,0 +1,38 @@
+#include <common/defines.h>
+#include <driver/interrupt.h>
+#include <aarch64/intrinsic.h>
+#include <kernel/printk.h>
+
+#define CNTV_CTL_ENABLE (1 << 0)
+#define CNTV_CTL_IMASK (1 << 1)
+#define CNTV_CTL_ISTATUS (1 << 2)
+
+void enable_timer()
+{
+    u64 c = get_cntv_ctl_el0();
+    c |= CNTV_CTL_ENABLE;
+    c &= ~CNTV_CTL_IMASK;
+    set_cntv_ctl_el0(c);
+}
+
+void disable_timer()
+{
+    u64 c = get_cntv_ctl_el0();
+    c &= ~CNTV_CTL_ENABLE;
+    c |= CNTV_CTL_IMASK;
+    set_cntv_ctl_el0(c);
+}
+
+bool timer_enabled()
+{
+    u64 c = get_cntv_ctl_el0();
+    return c & 1;
+}
+
+void reload_timer(u64 interval_ms)
+{
+    u64 interval_us = interval_ms * 1000;
+    u64 interval_clk = interval_us * (get_clock_frequency() / 1000000);
+
+    set_cntv_tval_el0(interval_clk);
+}
diff --git a/src/driver/timer.h b/src/driver/timer.h
new file mode 100644
index 0000000..395bed1
--- /dev/null
+++ b/src/driver/timer.h
@@ -0,0 +1,4 @@
+#include <common/defines.h>
+
+void enable_timer();
+void disable_timer();
\ No newline at end of file
diff --git a/src/driver/uart.c b/src/driver/uart.c
new file mode 100644
index 0000000..18adfe6
--- /dev/null
+++ b/src/driver/uart.c
@@ -0,0 +1,37 @@
+#include <aarch64/intrinsic.h>
+#include <driver/aux.h>
+#include <driver/gpio.h>
+#include <driver/uart.h>
+#include <driver/interrupt.h>
+
+static void uartintr()
+{
+    device_put_u32(UART_ICR, 1 << 4 | 1 << 5);
+}
+
+void uart_init()
+{
+    device_put_u32(UART_CR, 0);
+    set_interrupt_handler(UART_IRQ, uartintr);
+    device_put_u32(UART_LCRH, LCRH_FEN | LCRH_WLEN_8BIT);
+    device_put_u32(UART_CR, 0x301);
+    device_put_u32(UART_IMSC, 0);
+    delay_us(5);
+    device_put_u32(UART_IMSC, 1 << 4 | 1 << 5);
+}
+
+char uart_get_char()
+{
+    if (device_get_u32(UART_FR) & FR_RXFE)
+        return -1;
+    return device_get_u32(UART_DR);
+}
+
+void uart_put_char(char c)
+{
+    while (device_get_u32(UART_FR) & FR_TXFF)
+        ;
+    device_put_u32(UART_DR, c);
+}
+
+__attribute__((weak, alias("uart_put_char"))) void putch(char);
diff --git a/src/driver/uart.h b/src/driver/uart.h
new file mode 100644
index 0000000..c923c23
--- /dev/null
+++ b/src/driver/uart.h
@@ -0,0 +1,24 @@
+#pragma once
+#include <aarch64/intrinsic.h>
+
+#define UART_DR (UARTBASE + 0x00)
+#define UART_FR (UARTBASE + 0x18)
+#define FR_RXFE (1 << 4) // Recieve fifo empty
+#define FR_TXFF (1 << 5) // Transmit fifo full
+#define FR_RXFF (1 << 6) // Recieve fifo full
+#define FR_TXFE (1 << 7) // Transmit fifo empty
+#define RXFE (device_get_u32(UART_FR) & FR_RXFE)
+#define TXFF (device_get_u32(UART_FR) & FR_TXFF)
+#define RXFF (device_get_u32(UART_FR) & FR_RXFF)
+#define TXFE (device_get_u32(UART_FR) & FR_TXFE)
+#define UART_IBRD (UARTBASE + 0x24)
+#define UART_FBRD (UARTBASE + 0x28)
+#define UART_LCRH (UARTBASE + 0x2c)
+#define LCRH_FEN (1 << 4)
+#define LCRH_WLEN_8BIT (3 << 5)
+#define UART_CR (UARTBASE + 0x30)
+#define UART_IMSC (UARTBASE + 0x38)
+#define UART_ICR (UARTBASE + 0x44)
+
+void uart_init();
+void uart_put_char(char c);
\ No newline at end of file
diff --git a/src/driver/virtio.h b/src/driver/virtio.h
new file mode 100755
index 0000000..65fa102
--- /dev/null
+++ b/src/driver/virtio.h
@@ -0,0 +1,140 @@
+#pragma once
+
+#include <common/defines.h>
+#include <driver/base.h>
+#include <common/buf.h>
+
+#define NQUEUE 8
+
+#define VIRTIO_REG_MAGICVALUE (VIRTIO0 + 0x00)
+#define VIRTIO_REG_VERSION (VIRTIO0 + 0x04)
+#define VIRTIO_REG_DEVICE_ID (VIRTIO0 + 0x08)
+#define VIRTIO_REG_VENDOR_ID (VIRTIO0 + 0x0c)
+#define VIRTIO_REG_DEVICE_FEATURES (VIRTIO0 + 0x10)
+#define VIRTIO_REG_DEVICE_FEATURES_SEL (VIRTIO0 + 0x14)
+#define VIRTIO_REG_DRIVER_FEATURES (VIRTIO0 + 0x20)
+#define VIRTIO_REG_DRIVER_FEATURES_SEL (VIRTIO0 + 0x24)
+#define VIRTIO_REG_QUEUE_SEL (VIRTIO0 + 0x30)
+#define VIRTIO_REG_QUEUE_NUM_MAX (VIRTIO0 + 0x34)
+#define VIRTIO_REG_QUEUE_NUM (VIRTIO0 + 0x38)
+#define VIRTIO_REG_QUEUE_READY (VIRTIO0 + 0x44)
+#define VIRTIO_REG_QUEUE_NOTIFY (VIRTIO0 + 0x50)
+#define VIRTIO_REG_INTERRUPT_STATUS (VIRTIO0 + 0x60)
+#define VIRTIO_REG_INTERRUPT_ACK (VIRTIO0 + 0x64)
+#define VIRTIO_REG_STATUS (VIRTIO0 + 0x70)
+#define VIRTIO_REG_QUEUE_DESC_LOW (VIRTIO0 + 0x80)
+#define VIRTIO_REG_QUEUE_DESC_HIGH (VIRTIO0 + 0x84)
+#define VIRTIO_REG_QUEUE_DRIVER_LOW (VIRTIO0 + 0x90)
+#define VIRTIO_REG_QUEUE_DRIVER_HIGH (VIRTIO0 + 0x94)
+#define VIRTIO_REG_QUEUE_DEVICE_LOW (VIRTIO0 + 0xa0)
+#define VIRTIO_REG_QUEUE_DEVICE_HIGH (VIRTIO0 + 0xa4)
+#define VIRTIO_REG_CONFIG_GENERATION (VIRTIO0 + 0xfc)
+#define VIRTIO_REG_CONFIG (VIRTIO0 + 0x100)
+
+#define DEV_STATUS_ACKNOWLEDGE 1
+#define DEV_STATUS_DRIVER 2
+#define DEV_STATUS_FAILED 128
+#define DEV_STATUS_FEATURES_OK 8
+#define DEV_STATUS_DRIVER_OK 4
+#define DEV_STATUS_NEEDS_RESET 64
+
+#define VIRTIO_BLK_F_SIZE_MAX 1
+#define VIRTIO_BLK_F_SEG_MAX 2
+#define VIRTIO_BLK_F_GEOMETRY 4
+#define VIRTIO_BLK_F_RO 5
+#define VIRTIO_BLK_F_BLK_SIZE 6
+#define VIRTIO_BLK_F_FLUSH 9
+#define VIRTIO_BLK_F_TOPOLOGY 10
+#define VIRTIO_BLK_F_CONFIG_WCE 11
+#define VIRTIO_BLK_F_DISCARD 13
+#define VIRTIO_BLK_F_WRITE_ZEROES 14
+#define VIRTIO_F_ANY_LAYOUT 27
+#define VIRTIO_RING_F_INDIRECT_DESC 28
+#define VIRTIO_RING_F_EVENT_IDX 29
+
+#define VIRTIO_BLK_S_OK 0
+#define VIRTIO_BLK_S_IOERR 1
+#define VIRTIO_BLK_S_UNSUPP 2
+
+#define VIRTQ_DESC_F_NEXT 1
+#define VIRTQ_DESC_F_WRITE 2
+#define VIRTQ_DESC_F_INDIRECT 4
+struct virtq_desc {
+    u64 addr;
+    u32 len;
+    u16 flags;
+    u16 next;
+} __attribute__((packed, aligned(16)));
+
+#define VIRTQ_AVAIL_F_NO_INTERRUPT 1
+struct virtq_avail {
+    u16 flags;
+    u16 idx;
+    u16 ring[NQUEUE];
+} __attribute__((packed, aligned(2)));
+
+struct virtq_used_elem {
+    u32 id;
+    u32 len;
+} __attribute__((packed));
+
+#define VIRTQ_USED_F_NO_NOTIFY 1
+struct virtq_used {
+    u16 flags;
+    u16 idx;
+    struct virtq_used_elem ring[NQUEUE];
+} __attribute__((packed, aligned(4)));
+
+struct virtq {
+    struct virtq_desc *desc;
+    struct virtq_avail *avail;
+    struct virtq_used *used;
+    u16 free_head;
+    u16 nfree;
+    u16 last_used_idx;
+
+    struct {
+        volatile u8 status;
+        volatile u8 done;
+        u8 *buf;
+    } info[NQUEUE];
+};
+
+#define VIRTIO_BLK_T_IN 0
+#define VIRTIO_BLK_T_OUT 1
+#define VIRTIO_BLK_T_FLUSH 4
+#define VIRTIO_BLK_T_DISCARD 11
+#define VIRTIO_BLK_T_WRITE_ZEROES 13
+struct virtio_blk_req_hdr {
+    u32 type;
+    u32 reserved;
+    u64 sector;
+} __attribute__((packed));
+
+enum diskop {
+    DREAD,
+    DWRITE,
+};
+
+int virtio_blk_rw(Buf *b);
+void virtio_init(void);
+
+
+typedef struct __attribute__((packed)) {
+    u8  boot_indicator;   // 0x00
+    u8  chs_first[3];     // 0x01
+    u8  partition_type;   // 0x04
+    u8  chs_last[3];      // 0x05
+    u32 lba_start;        // 0x08  LBA of first absolute sector in the partition
+    u32 num_sectors;      // 0x0C  Number of sectors in partition
+} PartitionEntry;
+
+typedef struct __attribute__((packed)) {
+    u8 bootcode[446];         // 0x000
+    PartitionEntry pte[4];    // 0x1BE, 4 * 16B
+    u16 signature;            // 0x1FE (0xAA55)
+} MBR;
+
+/* helpers to access partition 2 (index 1) */
+static inline u32 mbr_partition2_lba(const MBR *m) { return m->pte[1].lba_start; }
+static inline u32 mbr_partition2_sectors(const MBR *m) { return m->pte[1].num_sectors; }
\ No newline at end of file
diff --git a/src/driver/virtio_blk.c b/src/driver/virtio_blk.c
new file mode 100755
index 0000000..499c8a4
--- /dev/null
+++ b/src/driver/virtio_blk.c
@@ -0,0 +1,265 @@
+#include <common/spinlock.h>
+#include <driver/virtio.h>
+#include <driver/interrupt.h>
+#include <common/buf.h>
+#include <common/sem.h>
+#include <common/string.h>
+#include <kernel/mem.h>
+#include <kernel/printk.h>
+
+#define VIRTIO_MAGIC 0x74726976
+
+struct disk {
+    SpinLock lk;
+    struct virtq virtq;
+} disk;
+
+static void desc_init(struct virtq *virtq)
+{
+    for (int i = 0; i < NQUEUE; i++) {
+        if (i != NQUEUE - 1) {
+            virtq->desc[i].flags = VIRTQ_DESC_F_NEXT;
+            virtq->desc[i].next = i + 1;
+        }
+    }
+}
+
+static int alloc_desc(struct virtq *virtq)
+{
+    if (virtq->nfree == 0) {
+        PANIC();
+    }
+
+    u16 d = virtq->free_head;
+    if (virtq->desc[d].flags & VIRTQ_DESC_F_NEXT)
+        virtq->free_head = virtq->desc[d].next;
+
+    virtq->nfree--;
+
+    return d;
+}
+
+static void free_desc(struct virtq *virtq, u16 n)
+{
+    u16 head = n;
+    int empty = 0;
+
+    if (virtq->nfree == 0)
+        empty = 1;
+
+    while (virtq->nfree++, (virtq->desc[n].flags & VIRTQ_DESC_F_NEXT)) {
+        n = virtq->desc[n].next;
+    }
+
+    virtq->desc[n].flags = VIRTQ_DESC_F_NEXT;
+    if (!empty)
+        virtq->desc[n].next = virtq->free_head;
+    virtq->free_head = head;
+}
+
+int virtio_blk_rw(Buf *b)
+{
+    enum diskop op = DREAD;
+    if (b->flags & B_DIRTY)
+        op = DWRITE;
+    
+    init_sem(&b->sem, 0);
+
+    u64 sector = b->block_no;
+    struct virtio_blk_req_hdr hdr;
+
+    if (op == DREAD)
+        hdr.type = VIRTIO_BLK_T_IN;
+    else if (op == DWRITE)
+        hdr.type = VIRTIO_BLK_T_OUT;
+    else
+        return -1;
+    hdr.reserved = 0;
+    hdr.sector = sector;
+
+    acquire_spinlock(&disk.lk);
+
+    int d0 = alloc_desc(&disk.virtq);
+    if (d0 < 0)
+        return -1;
+    disk.virtq.desc[d0].addr = (u64)V2P(&hdr);
+    disk.virtq.desc[d0].len = sizeof(hdr);
+    disk.virtq.desc[d0].flags = VIRTQ_DESC_F_NEXT;
+
+    int d1 = alloc_desc(&disk.virtq);
+    if (d1 < 0)
+        return -1;
+    disk.virtq.desc[d0].next = d1;
+    disk.virtq.desc[d1].addr = (u64)V2P(b->data);
+    disk.virtq.desc[d1].len = 512;
+    disk.virtq.desc[d1].flags = VIRTQ_DESC_F_NEXT;
+    if (op == DREAD)
+        disk.virtq.desc[d1].flags |= VIRTQ_DESC_F_WRITE;
+
+    int d2 = alloc_desc(&disk.virtq);
+    if (d2 < 0)
+        return -1;
+    disk.virtq.desc[d1].next = d2;
+    disk.virtq.desc[d2].addr = (u64)V2P(&disk.virtq.info[d0].status);
+    disk.virtq.desc[d2].len = sizeof(disk.virtq.info[d0].status);
+    disk.virtq.desc[d2].flags = VIRTQ_DESC_F_WRITE;
+    disk.virtq.desc[d2].next = 0;
+
+    disk.virtq.avail->ring[disk.virtq.avail->idx % NQUEUE] = d0;
+    disk.virtq.avail->idx++;
+
+    disk.virtq.info[d0].buf = b->data;
+
+    arch_fence();
+    REG(VIRTIO_REG_QUEUE_NOTIFY) = 0;
+    arch_fence();
+
+    /* LAB 4 TODO 1 BEGIN */
+
+    release_spinlock(&disk.lk);
+    _lock_sem(&b->sem);
+    bool ok = _wait_sem(&b->sem, true);
+    if (!ok) {
+        PANIC();
+    }
+    acquire_spinlock(&disk.lk);
+
+    /* LAB 4 TODO 1 END */
+
+    disk.virtq.info[d0].done = 0;
+    free_desc(&disk.virtq, d0);
+    release_spinlock(&disk.lk);
+    return 0;
+}
+
+static void virtio_blk_intr()
+{
+    acquire_spinlock(&disk.lk);
+
+    u32 intr_status = REG(VIRTIO_REG_INTERRUPT_STATUS);
+    REG(VIRTIO_REG_INTERRUPT_ACK) = intr_status & 0x3;
+
+    int d0;
+    while (disk.virtq.last_used_idx != disk.virtq.used->idx) {
+        d0 = disk.virtq.used->ring[disk.virtq.last_used_idx % NQUEUE].id;
+        if (disk.virtq.info[d0].status != 0) {
+            PANIC();
+        }
+
+        /* LAB 4 TODO 2 BEGIN */
+        Buf *b = container_of((void*)disk.virtq.info[d0].buf, Buf, data);
+        if (b != NULL) {
+            post_sem(&b->sem);
+        }
+        /* LAB 4 TODO 2 END */
+
+        disk.virtq.info[d0].buf = NULL;
+        disk.virtq.last_used_idx++;
+    }
+
+    release_spinlock(&disk.lk);
+}
+
+static int virtq_init(struct virtq *vq)
+{
+    memset(vq, 0, sizeof(*vq));
+
+    vq->desc = kalloc_page();
+    vq->avail = kalloc_page();
+    vq->used = kalloc_page();
+
+    memset(vq->desc, 0, 4096);
+    memset(vq->avail, 0, 4096);
+    memset(vq->used, 0, 4096);
+
+    if (!vq->desc || !vq->avail || !vq->used) {
+        PANIC();
+    }
+    vq->nfree = NQUEUE;
+    desc_init(vq);
+
+    return 0;
+}
+
+void virtio_init()
+{
+    if (REG(VIRTIO_REG_MAGICVALUE) != VIRTIO_MAGIC ||
+        REG(VIRTIO_REG_VERSION) != 2 || REG(VIRTIO_REG_DEVICE_ID) != 2) {
+        printk("[Virtio]: Device not found.");
+        PANIC();
+    }
+
+    /* Reset the device. */
+    REG(VIRTIO_REG_STATUS) = 0;
+
+    u32 status = 0;
+
+    /* Set the ACKNOWLEDGE status bit: the guest OS has noticed the device. */
+    status |= DEV_STATUS_ACKNOWLEDGE;
+    REG(VIRTIO_REG_STATUS) = status;
+
+    /* Set the DRIVER status bit: the guest OS knows how to drive the device. */
+    status |= DEV_STATUS_DRIVER;
+    REG(VIRTIO_REG_STATUS) = status;
+
+    /* Read device feature bits, and write the subset of feature bits understood by the OS and driver to the device. */
+    REG(VIRTIO_REG_DEVICE_FEATURES_SEL) = 0;
+    REG(VIRTIO_REG_DRIVER_FEATURES_SEL) = 0;
+
+    u32 features = REG(VIRTIO_REG_DEVICE_FEATURES);
+    features &= ~(1 << VIRTIO_BLK_F_SEG_MAX);
+    features &= ~(1 << VIRTIO_BLK_F_GEOMETRY);
+    features &= ~(1 << VIRTIO_BLK_F_RO);
+    features &= ~(1 << VIRTIO_BLK_F_BLK_SIZE);
+    features &= ~(1 << VIRTIO_BLK_F_FLUSH);
+    features &= ~(1 << VIRTIO_BLK_F_TOPOLOGY);
+    features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE);
+    features &= ~(1 << VIRTIO_F_ANY_LAYOUT);
+    features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
+    features &= ~(1 << VIRTIO_RING_F_EVENT_IDX);
+    REG(VIRTIO_REG_DRIVER_FEATURES) = features;
+
+    status |= DEV_STATUS_FEATURES_OK;
+    REG(VIRTIO_REG_STATUS) = status;
+
+    arch_fence();
+    status = REG(VIRTIO_REG_STATUS);
+    arch_fence();
+    if (!(status & DEV_STATUS_FEATURES_OK)) {
+        PANIC();
+    }
+
+    virtq_init(&disk.virtq);
+
+    int qmax = REG(VIRTIO_REG_QUEUE_NUM_MAX);
+    if (qmax < NQUEUE) {
+        printk("[Virtio]: Too many queues.");
+        PANIC();
+    }
+
+    REG(VIRTIO_REG_QUEUE_SEL) = 0;
+    REG(VIRTIO_REG_QUEUE_NUM) = NQUEUE;
+
+    u64 phy_desc = V2P(disk.virtq.desc);
+    REG(VIRTIO_REG_QUEUE_DESC_LOW) = LO(phy_desc);
+    REG(VIRTIO_REG_QUEUE_DESC_HIGH) = HI(phy_desc);
+
+    u64 phy_avail = V2P(disk.virtq.avail);
+    REG(VIRTIO_REG_QUEUE_DRIVER_LOW) = LO(phy_avail);
+    REG(VIRTIO_REG_QUEUE_DRIVER_HIGH) = HI(phy_avail);
+    u64 phy_used = V2P(disk.virtq.used);
+
+    REG(VIRTIO_REG_QUEUE_DEVICE_LOW) = LO(phy_used);
+    REG(VIRTIO_REG_QUEUE_DEVICE_HIGH) = HI(phy_used);
+
+    arch_fence();
+
+    REG(VIRTIO_REG_QUEUE_READY) = 1;
+    status |= DEV_STATUS_DRIVER_OK;
+    REG(VIRTIO_REG_STATUS) = status;
+
+    arch_fence();
+
+    set_interrupt_handler(VIRTIO_BLK_IRQ, virtio_blk_intr);
+    init_spinlock(&disk.lk);
+}
diff --git a/src/fs/CMakeLists.txt b/src/fs/CMakeLists.txt
new file mode 100755
index 0000000..fb46d81
--- /dev/null
+++ b/src/fs/CMakeLists.txt
@@ -0,0 +1,3 @@
+file(GLOB fs_sources CONFIGURE_DEPENDS "*.c")
+
+add_library(fs STATIC ${fs_sources})
diff --git a/src/fs/block_device.c b/src/fs/block_device.c
new file mode 100755
index 0000000..9a3f075
--- /dev/null
+++ b/src/fs/block_device.c
@@ -0,0 +1,50 @@
+#include <driver/virtio.h>
+#include <fs/block_device.h>
+
+/**
+    @brief a simple implementation of reading a block from SD card.
+
+    @param[in] block_no the block number to read
+    @param[out] buffer the buffer to store the data
+ */
+static void sd_read(usize block_no, u8 *buffer) {
+    Buf b;
+    b.block_no = (u32)block_no;
+    b.flags = 0;
+    virtio_blk_rw(&b);
+    memcpy(buffer, b.data, BLOCK_SIZE);
+}
+
+/**
+    @brief a simple implementation of writing a block to SD card.
+
+    @param[in] block_no the block number to write
+    @param[in] buffer the buffer to store the data
+ */
+static void sd_write(usize block_no, u8 *buffer) {
+    Buf b;
+    b.block_no = (u32)block_no;
+    b.flags = B_DIRTY | B_VALID;
+    memcpy(b.data, buffer, BLOCK_SIZE);
+    virtio_blk_rw(&b);
+}
+
+/**
+    @brief the in-memory copy of the super block.
+
+    We may need to read the super block multiple times, so keep a copy of it in
+    memory.
+
+    @note the super block, in our lab, is always read-only, so we don't need to
+    write it back.
+ */
+static u8 sblock_data[BLOCK_SIZE];
+
+BlockDevice block_device;
+
+void init_block_device() {
+    block_device.read = sd_read;
+    block_device.write = sd_write;
+}
+
+const SuperBlock *get_super_block() { return (const SuperBlock *)sblock_data; }
\ No newline at end of file
diff --git a/src/fs/block_device.h b/src/fs/block_device.h
new file mode 100755
index 0000000..ed9b361
--- /dev/null
+++ b/src/fs/block_device.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include <fs/defines.h>
+
+/**
+    @brief interface for block devices.
+
+    @note yes, there is no OOP in C, but we can use function pointers to
+   simulate it. this is a common pattern in C, and you can find it in Linux
+   kernel too.
+
+    @see init_block_device
+ */
+typedef struct {
+    /**
+        read `BLOCK_SIZE` bytes in block at `block_no` to `buffer`.
+        caller must guarantee `buffer` is large enough.
+
+        @param[in] block_no the block number to read from.
+        @param[out] buffer the buffer to read into.
+     */
+    void (*read)(usize block_no, u8 *buffer);
+
+    /**
+        write `BLOCK_SIZE` bytes in `buffer` to block at `block_no`.
+        caller must guarantee `buffer` is large enough.
+
+        @param[in] block_no the block number to write to.
+        @param[in] buffer the buffer to write from.
+     */
+    void (*write)(usize block_no, u8 *buffer);
+} BlockDevice;
+
+/**
+    @brief the global block device instance.
+ */
+extern BlockDevice block_device;
+
+/**
+    @brief initialize the block device.
+
+    This method must be called before any other block device methods,
+    and initializes the global block device and (if necessary) the
+    global super block.
+
+    e.g. for the SD card, this method is responsible for initializing
+    the SD card and reading the super block from the SD card.
+
+    @note You may want to put it into `*_init` method groups.
+ */
+void init_block_device();
+
+/**
+ * @brief get the global super block.
+ *
+ * @return const SuperBlock* the global super block.
+ */
+const SuperBlock *get_super_block();
\ No newline at end of file
diff --git a/src/fs/cache.c b/src/fs/cache.c
new file mode 100755
index 0000000..5458803
--- /dev/null
+++ b/src/fs/cache.c
@@ -0,0 +1,358 @@
+#include <common/bitmap.h>
+#include <common/string.h>
+#include <fs/cache.h>
+#include <kernel/mem.h>
+#include <kernel/printk.h>
+#include <kernel/proc.h>
+
+/**
+    @brief the private reference to the super block.
+
+    @note we need these two variables because we allow the caller to
+            specify the block device and super block to use.
+            Correspondingly, you should NEVER use global instance of
+            them, e.g. `get_super_block`, `block_device`
+
+    @see init_bcache
+ */
+static const SuperBlock *sblock;
+
+/**
+    @brief the reference to the underlying block device.
+ */
+static const BlockDevice *device; 
+
+/**
+    @brief global lock for block cache.
+
+    Use it to protect anything you need.
+
+    e.g. the list of allocated blocks, etc.
+ */
+static SpinLock lock;
+
+/**
+    @brief the list of all allocated in-memory block.
+
+    We use a linked list to manage all allocated cached blocks.
+
+    You can implement your own data structure if you like better performance.
+
+    @see Block
+ */
+static ListNode head;
+
+static LogHeader header; // in-memory copy of log header block.
+
+/**
+    @brief a struct to maintain other logging states.
+    
+    You may wonder where we store some states, e.g.
+    
+    * how many atomic operations are running?
+    * are we checkpointing?
+    * how to notify `end_op` that a checkpoint is done?
+
+    Put them here!
+
+    @see cache_begin_op, cache_end_op, cache_sync
+ */
+struct {
+    int waiting_ops;      // number of operations waiting to begin
+    bool committing;      // is the log being committed?
+    Semaphore logsem; // semaphore to notify the completion of checkpointing
+} log;
+
+// read the content from disk.
+static INLINE void device_read(Block *block) {
+    device->read(block->block_no, block->data);
+}
+
+// write the content back to disk.
+static INLINE void device_write(Block *block) {
+    device->write(block->block_no, block->data);
+}
+
+// read log header from disk.
+static INLINE void read_header() {
+    device->read(sblock->log_start, (u8 *)&header);
+}
+
+// write log header back to disk.
+static INLINE void write_header() {
+    device->write(sblock->log_start, (u8 *)&header);
+}
+
+// initialize a block struct.
+static void init_block(Block *block) {
+    block->block_no = 0;
+    init_list_node(&block->node);
+    block->acquired = false;
+    block->refcnt = 0;
+    block->pinned = false;
+
+    init_sleeplock(&block->lock);
+    block->valid = false;
+    memset(block->data, 0, sizeof(block->data));
+}
+
+static usize cache_nums_blocks;
+
+// see `cache.h`.
+static usize get_num_cached_blocks() {
+    return cache_nums_blocks;
+}
+
+// see `cache.h`.
+static Block *cache_acquire(usize block_no) {
+    //printk("[BCACHE] Acquiring block no %llu\n", block_no);
+    acquire_spinlock(&lock);
+    Block *block_to_acquire = NULL;
+    Block *block_current = NULL;
+    _for_in_list(nd, &head) {
+        if (nd == &head) continue;
+        block_current = container_of(nd, Block, node);
+        if (block_current->block_no == block_no) {
+            block_to_acquire = block_current;
+            break;
+        }
+
+    }
+    if (block_to_acquire != NULL) {
+        //printk("[BCACHE] Block no %llu found in cache\n", block_no);
+        // found in cache
+        block_to_acquire->refcnt++;
+        block_to_acquire->acquired = true;
+        release_spinlock(&lock);
+        if (!wait_sem(&block_to_acquire->lock)) {
+            //printk("[BCACHE] Error: cannot acquire block no %llu\n", block_no);
+            PANIC();
+        }
+        acquire_spinlock(&lock);
+        _detach_from_list(&block_to_acquire->node);
+        _insert_into_list(&head, &block_to_acquire->node);
+        release_spinlock(&lock);
+        //printk("[BCACHE] Acquired block no %llu\n", block_no);
+        return block_to_acquire;
+    }
+    //printk("[BCACHE] Block no %llu not found in cache\n", block_no);
+    // not found in cache
+    // need to allocate a new block, evict if necessary
+    ListNode *p = head.prev;
+    while(cache_nums_blocks >= EVICTION_THRESHOLD){
+        ListNode* next_p = p->prev;
+        if(p == &head){
+            break;
+        }
+        Block* block_current = container_of(p, Block, node);
+        if(!block_current->acquired && !block_current->pinned){
+            _detach_from_list(p);
+            cache_nums_blocks--;
+            kfree(block_current);
+        }
+        p = next_p;
+    }
+    block_to_acquire = kalloc(sizeof(Block));
+    init_block(block_to_acquire);
+    if (!wait_sem(&block_to_acquire->lock)) {
+        //printk("[BCACHE] Error: cannot acquire newly allocated block\n");
+        PANIC();
+    }
+    block_to_acquire->block_no = block_no;
+    block_to_acquire->refcnt = 1;
+    block_to_acquire->acquired = true;
+    block_to_acquire->valid = true;
+    cache_nums_blocks++;
+    release_spinlock(&lock);
+    device_read(block_to_acquire);
+    acquire_spinlock(&lock);
+    _insert_into_list(&head, &block_to_acquire->node);
+    release_spinlock(&lock);
+    //printk("[BCACHE] Acquired block no %llu\n", block_no);
+    return block_to_acquire;
+}
+
+// see `cache.h`.
+static void cache_release(Block *block) {
+    acquire_spinlock(&lock);
+    if (block->refcnt <= 0) {
+        //printk("[BCACHE] Error: releasing a block that is not acquired\n");
+        PANIC();
+    }
+    block->refcnt--;
+    if (block->refcnt == 0) {
+        block->acquired = false;
+    }
+    post_sem(&block->lock);
+    release_spinlock(&lock);
+}
+
+// see `cache.h`.
+static void cache_begin_op(OpContext *ctx) {
+    //printk("[BCACHE] Beginning atomic operation\n");
+    acquire_spinlock(&lock);
+    ctx->rm = OP_MAX_NUM_BLOCKS;
+    while (log.committing || LOG_MAX_SIZE <= header.num_blocks + (log.waiting_ops + 1)* OP_MAX_NUM_BLOCKS) {
+        release_spinlock(&lock);
+        if (!wait_sem(&log.logsem)) {
+            //printk("[BCACHE] Error: cannot begin atomic operation due to log full\n");
+            PANIC();
+        }
+        acquire_spinlock(&lock);
+    }
+    log.waiting_ops++;
+    release_spinlock(&lock);
+}
+
+// see `cache.h`.
+static void cache_sync(OpContext *ctx, Block *block) {
+    //printk("[BCACHE] Syncing block no %llu\n", block->block_no);
+    if(ctx == NULL) {
+        device_write(block);
+        return;
+    }
+    acquire_spinlock(&lock);
+    block->pinned = true;
+    bool already_in_log = false;
+    for(usize i = 0; i < header.num_blocks; i++) {
+        if(header.block_no[i] == block->block_no) {
+            already_in_log = true;
+            break;
+        }
+    }
+    if(!already_in_log) {
+        header.block_no[header.num_blocks] = block->block_no;
+        header.num_blocks++;
+        if(ctx->rm <= 0 || header.num_blocks > LOG_MAX_SIZE) {
+            //printk("[BCACHE] Error: too many blocks in atomic operation\n");
+            PANIC();
+        }
+        ctx->rm--;
+    }
+    release_spinlock(&lock);
+}
+
+// see `cache.h`.
+static void cache_end_op(OpContext *ctx) {
+    //printk("[BCACHE] Ending atomic operation\n");
+    ctx = ctx;
+    acquire_spinlock(&lock);
+    log.waiting_ops--;
+    if(log.waiting_ops > 0) {
+        post_sem(&log.logsem);
+        release_spinlock(&lock);
+        return;
+    }
+
+    log.committing = true;
+    release_spinlock(&lock);
+
+    for(usize i = 0; i < header.num_blocks; i++) {
+        usize from_block_no = header.block_no[i];
+        usize to_block_no = sblock->log_start + 1 + i;
+        Block *from_block = cache_acquire(from_block_no);
+        Block *to_block = cache_acquire(to_block_no);
+        for(int j = 0; j < BLOCK_SIZE; j++) {
+            to_block->data[j] = from_block->data[j];
+        }
+        device_write(to_block);
+        cache_release(from_block);
+        cache_release(to_block);
+    }
+    write_header();
+    for(usize i = 0; i < header.num_blocks; i++) {
+        Block *block = cache_acquire(header.block_no[i]);
+        device_write(block);
+        acquire_spinlock(&lock);
+        block->pinned = false;
+        release_spinlock(&lock);
+        cache_release(block);
+    }
+    
+    acquire_spinlock(&lock);
+    header.num_blocks = 0;
+    release_spinlock(&lock);
+    
+    write_header();
+    
+    acquire_spinlock(&lock);
+    log.committing = false;
+    post_all_sem(&log.logsem);
+    release_spinlock(&lock);
+}
+
+// see `cache.h`.
+static usize cache_alloc(OpContext *ctx) {
+    //printk("[BCACHE] Allocating block\n");
+    Block *bitmap_block = cache_acquire(sblock->bitmap_start);
+    for(usize i = 0; i < sblock->num_blocks; i++) {
+        if(!bitmap_get((BitmapCell*)bitmap_block->data, i)) {
+            bitmap_set((BitmapCell*)bitmap_block->data, i);
+
+            cache_sync(ctx, bitmap_block);
+            cache_release(bitmap_block);
+            
+            Block *new_block = cache_acquire(i);
+            memset(new_block->data, 0, BLOCK_SIZE);
+            cache_sync(ctx, new_block);
+            cache_release(new_block);
+            return i;
+        }
+    }
+    cache_release(bitmap_block);
+    //printk("[BCACHE] Error: no free block available for allocation\n");
+    PANIC();
+}
+
+// see `cache.h`.
+static void cache_free(OpContext *ctx, usize block_no) {
+    //printk("[BCACHE] Freeing block no %llu\n", block_no);
+    Block *bitmap_block = cache_acquire(sblock->bitmap_start);
+    
+    bitmap_clear((BitmapCell*)bitmap_block->data, block_no);
+
+    cache_sync(ctx, bitmap_block);
+    cache_release(bitmap_block);
+}
+
+// see `cache.h`.
+void init_bcache(const SuperBlock *_sblock, const BlockDevice *_device) {
+    sblock = _sblock;
+    device = _device;
+    cache_nums_blocks = 0;
+    init_spinlock(&lock);
+    
+    init_sem(&log.logsem, 0);
+    log.waiting_ops = 0;
+    log.committing = false;
+
+    init_list_node(&head);
+    read_header();
+    for (usize i = 0; i < header.num_blocks; i++) {
+        usize real_block_no = header.block_no[i];
+        usize log_block_no = sblock->log_start + 1 + i;
+        Block *real_block = cache_acquire(real_block_no);
+        Block *log_block = cache_acquire(log_block_no);
+        
+        memcpy(real_block->data, log_block->data, BLOCK_SIZE);
+        
+        device_write(real_block);
+        cache_release(real_block);
+        cache_release(log_block);
+    }
+    header.num_blocks = 0;
+    memset(header.block_no, 0, LOG_MAX_SIZE);
+    write_header();
+    // TODO
+}
+
+BlockCache bcache = {
+    .get_num_cached_blocks = get_num_cached_blocks,
+    .acquire = cache_acquire,
+    .release = cache_release,
+    .begin_op = cache_begin_op,
+    .sync = cache_sync,
+    .end_op = cache_end_op,
+    .alloc = cache_alloc,
+    .free = cache_free,
+};
\ No newline at end of file
diff --git a/src/fs/cache.h b/src/fs/cache.h
new file mode 100755
index 0000000..754df5d
--- /dev/null
+++ b/src/fs/cache.h
@@ -0,0 +1,262 @@
+#pragma once
+#include <common/list.h>
+#include <common/sem.h>
+#include <fs/block_device.h>
+#include <fs/defines.h>
+
+/**
+    @brief maximum number of distinct blocks that one atomic operation can hold.
+ */
+#define OP_MAX_NUM_BLOCKS 10
+
+/**
+    @brief the threshold of block cache to start eviction.
+
+    if the number of cached blocks is no less than this threshold, we can
+    evict some blocks in `acquire` to keep block cache small.
+ */
+#define EVICTION_THRESHOLD 20
+
+/**
+    @brief a block in block cache.
+
+    @note you can add any member to this struct as you want.
+ */
+typedef struct {
+    /**
+        @brief the corresponding block number on disk.
+
+        @note should be protected by the global lock of the block cache.
+
+        @note required by our test. Do NOT remove it.
+     */
+    usize block_no;
+
+    /**
+        @brief list this block into a linked list.
+
+        @note should be protected by the global lock of the block cache.
+     */
+    ListNode node;
+
+    /**
+        @brief is the block already acquired by some thread or process?
+
+        @note should be protected by the global lock of the block cache.
+     */
+    bool acquired;
+
+    /**
+        @brief the reference count of the block.
+
+        @note should be protected by the global lock of the block cache.
+     */
+    int refcnt;
+
+    /**
+        @brief is the block pinned?
+
+        A pinned block should not be evicted from the cache.
+
+        e.g. it is dirty.
+
+        @note should be protected by the global lock of the block cache.
+     */
+    bool pinned;
+
+    /**
+        @brief the sleep lock protecting `valid` and `data`.
+     */
+    SleepLock lock;
+
+    /**
+        @brief is the content of block loaded from disk?
+
+        You may find it useless and it *is*. It is just a test flag read
+        by our test. In your code, you should:
+
+        * set `valid` to `false` when you allocate a new `Block` struct.
+        * set `valid` to `true` only after you load the content of block from
+       disk.
+
+        @note required by our test. Do NOT remove it.
+     */
+    bool valid;
+    /**
+        @brief the real in-memory content of the block on disk.
+     */
+    u8 data[BLOCK_SIZE];
+} Block;
+
+/**
+    @brief an atomic operation context.
+
+    @note add any member to this struct as you want.
+
+    @see begin_op, end_op
+ */
+typedef struct {
+    /**
+        @brief how many operation remains in this atomic operation?
+
+        If `rm` is 0, any **new** `sync` will panic.
+     */
+    usize rm;
+    /**
+        @brief a timestamp (i.e. an ID) to identify this atomic operation.
+
+        @note your implementation does NOT have to use this field, just ignoring
+       it is OK too.
+
+        @note only required by our test. Do NOT remove it.
+     */
+    usize ts;
+} OpContext;
+
+
+typedef struct {
+    /**
+        @return the number of cached blocks at this moment.
+
+        @note only required by our test to print statistics.
+     */
+    usize (*get_num_cached_blocks)();
+
+    /**
+        @brief declare a block as acquired by the caller.
+
+        It reads the content of block at `block_no` from disk, and locks the
+       block so that the caller can exclusively modify it.
+
+        @return the pointer to the locked block.
+
+        @see `release` - the counterpart of this function.
+     */
+    Block *(*acquire)(usize block_no);
+
+    /**
+        @brief declare an acquired block as released by the caller.
+
+        It unlocks the block so that other threads can acquire it again.
+
+        @note it does not need to write the block content back to disk.
+     */
+    void (*release)(Block *block);
+
+    // # NOTES FOR ATOMIC OPERATIONS
+    //
+    // atomic operation has three states:
+    // * running: this atomic operation may have more modifications.
+    // * committed: this atomic operation is ended. No more modifications.
+    // * checkpointed: all modifications have been already persisted to disk.
+    //
+    // `begin_op` creates a new running atomic operation.
+    // `end_op` commits an atomic operation, and waits for it to be
+    // checkpointed.
+
+    /**
+        @brief begin a new atomic operation and initialize `ctx`.
+
+        If there are too many running operations (i.e. our logging is
+        too small to hold all of them), `begin_op` should sleep until
+        we can start a new operation.
+
+        @param[out] ctx the context to be initialized.
+
+        @throw panic if `ctx` is NULL.
+
+        @see `end_op` - the counterpart of this function.
+     */
+    void (*begin_op)(OpContext *ctx);
+
+    /**
+        @brief synchronize the content of `block` to disk.
+
+        If `ctx` is NULL, it immediately writes the content of `block` to disk.
+
+        However this is very dangerous, since it may break atomicity of
+        concurrent atomic operations. YOU SHOULD USE THIS MODE WITH CARE.
+
+        @param ctx the atomic operation context to which this block belongs.
+
+        @note the caller must hold the lock of `block`.
+
+        @throw panic if the number of blocks associated with `ctx` is larger
+                than `OP_MAX_NUM_BLOCKS` after `sync`
+     */
+    void (*sync)(OpContext *ctx, Block *block);
+
+    /**
+        @brief end the atomic operation managed by `ctx`.
+
+        It sleeps until all associated blocks are written to disk.
+
+        @param ctx the atomic operation context to be ended.
+
+        @throw panic if `ctx` is NULL.
+     */
+    void (*end_op)(OpContext *ctx);
+
+    // # NOTES FOR BITMAP
+    //
+    // every block on disk has a bit in bitmap, including blocks inside bitmap!
+    //
+    // usually, MBR block, super block, inode blocks, log blocks and bitmap
+    // blocks are preallocated on disk, i.e. those bits for them are already set
+    // in bitmap. therefore when we allocate a new block, it usually returns a
+    // data block. however, nobody can prevent you freeing a non-data block :)
+
+    /**
+        @brief allocate a new zero-initialized block.
+
+        It searches bitmap for a free block, mark it allocated and
+        returns the block number.
+
+        @param ctx since this function may write on-disk bitmap, it must be
+                   associated with an atomic operation.
+                   The caller must ensure that `ctx` is **running**.
+
+        @return the block number of the allocated block.
+
+        @note you should use `acquire`, `sync` and `release` to do disk I/O
+                here.
+
+        @throw panic if there is no free block on disk.
+     */
+    usize (*alloc)(OpContext *ctx);
+
+    /**
+        @brief free the block at `block_no` in bitmap.
+
+        It will NOT panic if `block_no` is already free or invalid.
+
+        @param ctx since this function may write on-disk bitmap, it must be
+                   associated with an atomic operation.
+                   The caller must ensure that `ctx` is **running**.
+        @param block_no the block number to be freed.
+
+        @note you should use `acquire`, `sync` and `release` to do disk I/O
+                here.
+     */
+    void (*free)(OpContext *ctx, usize block_no);
+} BlockCache;
+
+/**
+    @brief the global block cache instance.
+ */
+extern BlockCache bcache;
+
+/**
+    @brief initialize the block cache.
+
+    This method is also responsible for restoring logs after system crash,
+
+    i.e. it should read the uncommitted blocks from log section and
+    write them back to their original positions.
+
+    @param sblock the loaded super block.
+    @param device the initialized block device.
+
+    @note You may want to put it into `*_init` method groups.
+ */
+void init_bcache(const SuperBlock *sblock, const BlockDevice *device);
\ No newline at end of file
diff --git a/src/fs/defines.h b/src/fs/defines.h
new file mode 100644
index 0000000..af690ea
--- /dev/null
+++ b/src/fs/defines.h
@@ -0,0 +1,78 @@
+#pragma once
+
+#include <common/defines.h>
+
+/**
+ * this file contains on-disk representations of primitives in our filesystem.
+ */
+
+#define BLOCK_SIZE 512
+
+// maximum number of distinct block numbers can be recorded in the log header.
+#define LOG_MAX_SIZE ((BLOCK_SIZE - sizeof(usize)) / sizeof(usize))
+
+#define INODE_NUM_DIRECT 12
+#define INODE_NUM_INDIRECT (BLOCK_SIZE / sizeof(u32))
+#define INODE_PER_BLOCK (BLOCK_SIZE / sizeof(InodeEntry))
+#define INODE_MAX_BLOCKS (INODE_NUM_DIRECT + INODE_NUM_INDIRECT)
+#define INODE_MAX_BYTES (INODE_MAX_BLOCKS * BLOCK_SIZE)
+
+// the maximum length of file names, including trailing '\0'.
+#define FILE_NAME_MAX_LENGTH 14
+
+// inode types:
+#define INODE_INVALID 0
+#define INODE_DIRECTORY 1
+#define INODE_REGULAR 2 // regular file
+#define INODE_DEVICE 3
+
+#define ROOT_INODE_NO 1
+
+typedef u16 InodeType;
+
+#define BIT_PER_BLOCK (BLOCK_SIZE * 8)
+
+// disk layout:
+// [ MBR block | super block | log blocks | inode blocks | bitmap blocks | data blocks ]
+//
+// `mkfs` generates the super block and builds an initial filesystem. The
+// super block describes the disk layout.
+typedef struct {
+    u32 num_blocks; // total number of blocks in filesystem.
+    u32 num_data_blocks;
+    u32 num_inodes;
+    u32 num_log_blocks; // number of blocks for logging, including log header.
+    u32 log_start; // the first block of logging area.
+    u32 inode_start; // the first block of inode area.
+    u32 bitmap_start; // the first block of bitmap area.
+} SuperBlock;
+
+// `type == INODE_INVALID` implies this inode is free.
+typedef struct dinode {
+    InodeType type;
+    u16 major; // major device id, for INODE_DEVICE only.
+    u16 minor; // minor device id, for INODE_DEVICE only.
+    u16 num_links; // number of hard links to this inode in the filesystem.
+    u32 num_bytes; // number of bytes in the file, i.e. the size of file.
+    u32 addrs[INODE_NUM_DIRECT]; // direct addresses/block numbers.
+    u32 indirect; // the indirect address block.
+} InodeEntry;
+
+// the block pointed by `InodeEntry.indirect`.
+typedef struct {
+    u32 addrs[INODE_NUM_INDIRECT];
+} IndirectBlock;
+
+// directory entry. `inode_no == 0` implies this entry is free.
+typedef struct dirent {
+    u16 inode_no;
+    char name[FILE_NAME_MAX_LENGTH];
+} DirEntry;
+
+typedef struct {
+    usize num_blocks;
+    usize block_no[LOG_MAX_SIZE];
+} LogHeader;
+
+// mkfs only
+#define FSSIZE 1000 // Size of file system in blocks
\ No newline at end of file
diff --git a/src/fs/inode.c b/src/fs/inode.c
new file mode 100755
index 0000000..427839c
--- /dev/null
+++ b/src/fs/inode.c
@@ -0,0 +1,468 @@
+#include <common/string.h>
+#include <fs/inode.h>
+#include <kernel/mem.h>
+#include <kernel/printk.h>
+
+/**
+    @brief the private reference to the super block.
+
+    @note we need these two variables because we allow the caller to
+            specify the block cache and super block to use.
+            Correspondingly, you should NEVER use global instance of
+            them.
+
+    @see init_inodes
+ */
+static const SuperBlock* sblock;
+
+/**
+    @brief the reference to the underlying block cache.
+ */
+static const BlockCache* cache;
+
+/**
+    @brief global lock for inode layer.
+
+    Use it to protect anything you need.
+
+    e.g. the list of allocated blocks, ref counts, etc.
+ */
+static SpinLock lock;
+
+/**
+    @brief the list of all allocated in-memory inodes.
+
+    We use a linked list to manage all allocated inodes.
+
+    You can implement your own data structure if you want better performance.
+
+    @see Inode
+ */
+static ListNode head;
+
+
+// return which block `inode_no` lives on.
+static INLINE usize to_block_no(usize inode_no) {
+    return sblock->inode_start + (inode_no / (INODE_PER_BLOCK));
+}
+
+// return the pointer to on-disk inode.
+static INLINE InodeEntry* get_entry(Block* block, usize inode_no) {
+    return ((InodeEntry*)block->data) + (inode_no % INODE_PER_BLOCK);
+}
+
+// return address array in indirect block.
+static INLINE u32* get_addrs(Block* block) {
+    return ((IndirectBlock*)block->data)->addrs;
+}
+
+// initialize inode tree.
+void init_inodes(const SuperBlock* _sblock, const BlockCache* _cache) {
+    init_spinlock(&lock);
+    init_list_node(&head);
+    sblock = _sblock;
+    cache = _cache;
+
+    if (ROOT_INODE_NO < sblock->num_inodes)
+        inodes.root = inodes.get(ROOT_INODE_NO);
+    else
+        printk("(warn) init_inodes: no root inode.\n");
+}
+
+// initialize in-memory inode.
+static void init_inode(Inode* inode) {
+    init_sleeplock(&inode->lock);
+    init_rc(&inode->rc);
+    init_list_node(&inode->node);
+    inode->inode_no = 0;
+    inode->valid = false;
+}
+
+// see `inode.h`.
+static usize inode_alloc(OpContext* ctx, InodeType type) {
+    //printk("[INODE] Allocating inode of type %d\n", type);
+    ASSERT(type != INODE_INVALID);
+    acquire_spinlock(&lock);
+    for (usize i = 1; i < sblock->num_inodes; i++) {
+        // search for a free inode
+        Block* block = cache->acquire(to_block_no(i));
+        InodeEntry* entry = get_entry(block, i);
+        if (entry->type == INODE_INVALID) {
+            // found a free inode
+            memset(entry, 0, sizeof(InodeEntry));
+            entry->type = type;
+            cache->sync(ctx, block);
+            cache->release(block);
+            release_spinlock(&lock);
+            return i;
+        }
+        cache->release(block);
+    }
+    release_spinlock(&lock);
+    PANIC();
+    return 0;
+}
+
+// see `inode.h`.
+static void inode_lock(Inode* inode) {
+    if(inode == NULL) {
+        PANIC();
+    }
+    //printk("[INODE] Locking inode no %llu\n", inode->inode_no);
+    //printk("[INODE] rc count: %d\n", inode->rc.count);
+    ASSERT(inode->rc.count > 0);
+    //printk("[INODE] Acquiring sleeplock for inode no %llu\n", inode->inode_no);
+    unalertable_wait_sem(&inode->lock);
+    //printk("[INODE] Sleeplock acquired for inode no %llu\n", inode->inode_no);
+}
+
+// see `inode.h`.
+static void inode_unlock(Inode* inode) {
+    if(inode == NULL) {
+        PANIC();
+    }
+    //printk("[INODE] Unlocking inode no %llu\n", inode->inode_no);
+    ASSERT(inode->rc.count > 0);
+    // TODO
+    post_sem(&inode->lock);
+    //printk("[INODE] Sleeplock released for inode no %llu\n", inode->inode_no);
+}
+
+// see `inode.h`.
+static void inode_sync(OpContext* ctx, Inode* inode, bool do_write) {
+    // TODO
+    //printk("[INODE] Syncing inode no %llu, do_write: %d\n", inode->inode_no, do_write);
+    if(!inode->valid && do_write) {
+        PANIC();
+    } else if (!inode->valid && !do_write) {
+        // Load inode from disk
+        Block* block = cache->acquire(to_block_no(inode->inode_no));
+        InodeEntry* entry = get_entry(block, inode->inode_no);
+        memcpy(&inode->entry, entry, sizeof(InodeEntry));
+        cache->release(block);
+        inode->valid = true; // Mark as valid after loading
+    } else if (inode->valid && do_write) {
+        Block* block = cache->acquire(to_block_no(inode->inode_no));
+        InodeEntry* entry = get_entry(block, inode->inode_no);
+        memcpy(entry, &inode->entry, sizeof(InodeEntry));
+        cache->sync(ctx, block);
+        cache->release(block);
+    } else if (inode->valid && !do_write) {
+        Block* block = cache->acquire(to_block_no(inode->inode_no));
+        InodeEntry* entry = get_entry(block, inode->inode_no);
+        memcpy(&inode->entry, entry, sizeof(InodeEntry));
+        cache->release(block);
+    }
+}
+
+// see `inode.h`.
+static Inode* inode_get(usize inode_no) {
+    //printk("[INODE] Getting inode no %llu\n", inode_no);
+    ASSERT(inode_no > 0);
+    //printk("[INODE] Total inodes: %llu\n", sblock->num_inodes);
+    ASSERT(inode_no < sblock->num_inodes);
+    //printk("[INODE] Acquiring global inode lock\n");
+    acquire_spinlock(&lock);
+    _for_in_list(nd, &head) {
+        Inode* inode = container_of(nd, Inode, node);
+        if (inode->inode_no == inode_no) {
+            // found in-memory inode
+            increment_rc(&inode->rc);
+            release_spinlock(&lock);
+            return inode;
+        }
+    }
+    release_spinlock(&lock);
+    // not found, create a new one
+    Inode* new_node = kalloc(sizeof(Inode));
+    init_inode(new_node);
+    new_node->inode_no = inode_no;
+    increment_rc(&new_node->rc);
+    //printk("[INODE] Loading inode no %llu from disk\n", inode_no);
+    //printk("[INODE] rc count: %d\n", new_node->rc.count);
+    inode_lock(new_node);
+    inode_sync(NULL, new_node, false);
+    inode_unlock(new_node);
+    acquire_spinlock(&lock);
+    //printk("[INODE] Inode no %llu loaded from disk\n", inode_no);
+    _insert_into_list(&head, &new_node->node);
+    //printk("[INODE] Inserting inode no %llu into global inode list\n", new_node->inode_no);
+    release_spinlock(&lock);
+    return new_node;
+}
+// see `inode.h`.
+static void inode_clear(OpContext* ctx, Inode* inode) {
+    //printk("[INODE] Clearing inode no %llu\n", inode->inode_no);
+    //acquire_spinlock(&lock);
+    // 清空 inode 的内容（使文件变成长度为 0 的空文件)
+    // inode is not a leave
+    if (inode->entry.indirect){
+        Block* indirect_block = cache->acquire(inode->entry.indirect);
+        u32* addrs = get_addrs(indirect_block);
+        for (usize i = 0; i < INODE_NUM_INDIRECT; i++) {
+            if (addrs[i]) {
+                cache->free(ctx, addrs[i]);
+            }
+        }
+        cache->sync(ctx, indirect_block);
+        cache->release(indirect_block);
+        cache->free(ctx, inode->entry.indirect);
+        inode->entry.indirect = 0;
+    }
+    // direct blocks
+    for (usize i = 0; i < INODE_NUM_DIRECT; i++) {
+        if (inode->entry.addrs[i]) {
+            cache->free(ctx, inode->entry.addrs[i]);
+            inode->entry.addrs[i] = 0;
+        }
+    }
+    inode->entry.num_bytes = 0;
+    inode_sync(ctx, inode, true);
+    //release_spinlock(&lock);
+}
+
+// see `inode.h`.
+static Inode* inode_share(Inode* inode) {
+    //printk("[INODE] Sharing inode no %llu\n", inode->inode_no);
+    // TODO
+    increment_rc(&inode->rc);
+    return inode;
+}
+
+// see `inode.h`.
+static void inode_put(OpContext* ctx, Inode* inode) {
+    // TODO
+    //printk("[INODE] Putting inode no %llu\n", inode->inode_no);
+    acquire_spinlock(&lock);
+    decrement_rc(&inode->rc);
+    //printk("[INODE] rc count after decrement: %d\n", inode->rc.count);
+    // if no one needs the inode any more
+    if(inode->rc.count == 0) {
+        // Remove from list immediately so no one else gets it
+        _detach_from_list(&inode->node);
+        release_spinlock(&lock); // Release before IO
+
+        if (inode->entry.num_links == 0) {
+            inode->entry.type = INODE_INVALID;
+            inode_clear(ctx, inode);
+        }
+        kfree(inode);
+    } else {
+        release_spinlock(&lock);
+    }
+}
+
+/**
+    @brief get which block is the offset of the inode in.
+
+    e.g. `inode_map(ctx, my_inode, 1234, &modified)` will return the block_no
+    of the block that contains the 1234th byte of the file
+    represented by `my_inode`.
+
+    If a block has not been allocated for that byte, `inode_map` will
+    allocate a new block and update `my_inode`, at which time, `modified`
+    will be set to true.
+
+    HOWEVER, if `ctx == NULL`, `inode_map` will NOT try to allocate any new block,
+    and when it finds that the block has not been allocated, it will return 0.
+    
+    @param[out] modified true if some new block is allocated and `inode`
+    has been changed.
+
+    @return usize the block number of that block, or 0 if `ctx == NULL` and
+    the required block has not been allocated.
+
+    @note the caller must hold the lock of `inode`.
+ */
+static usize inode_map(OpContext* ctx,
+                       Inode* inode,
+                       usize offset,
+                       bool* modified) {
+    //direct blocks
+    //printk("[INODE] Mapping offset %llu for inode no %llu\n", offset, inode->inode_no);
+    *modified = false;
+    if (offset < INODE_NUM_DIRECT){
+        if (inode->entry.addrs[offset] == 0){
+            if (ctx == NULL){
+                return 0;
+            }
+            inode->entry.addrs[offset] = cache->alloc(ctx);
+            *modified = true;
+        }
+        return inode->entry.addrs[offset];
+    }
+    //indirect blocks
+    offset -= INODE_NUM_DIRECT;
+    if (inode->entry.indirect == 0){
+        if (ctx == NULL) return 0;
+        inode->entry.indirect = cache->alloc(ctx);
+        *modified = true;
+        // Initialize new indirect block with zeros
+        Block* new_ind = cache->acquire(inode->entry.indirect);
+        memset(new_ind->data, 0, BLOCK_SIZE);
+        cache->sync(ctx, new_ind);
+        cache->release(new_ind);
+    }
+    
+    Block* indirect_block = cache->acquire(inode->entry.indirect);
+    u32* addrs = get_addrs(indirect_block);
+    
+    if (addrs[offset] == 0){
+        if (ctx == NULL){
+            cache->release(indirect_block);
+            return 0;
+        }
+        addrs[offset] = cache->alloc(ctx);
+        // Sync the indirect block because we updated an entry in it
+        cache->sync(ctx, indirect_block);
+    }
+    usize ret = addrs[offset];
+    cache->release(indirect_block);
+    return ret;
+}
+
+// see `inode.h`.
+static usize inode_read(Inode* inode, u8* dest, usize offset, usize count) {
+    //printk("[INODE] Reading %llu bytes from offset %llu of inode no %llu\n", count, offset, inode->inode_no);
+    InodeEntry* entry = &inode->entry;
+    if (count + offset > entry->num_bytes)
+        count = entry->num_bytes - offset;
+    usize end = offset + count;
+    //printk("[INODE] [ASSERT] offset: %llu, end: %llu, entry->num_bytes: %llu\n", offset, end, entry->num_bytes);
+    ASSERT(offset <= entry->num_bytes);
+    //printk("[INODE] [ASSERT PASSED] offset: %llu, end: %llu, entry->num_bytes: %llu\n", offset, end, entry->num_bytes);
+    ASSERT(end <= entry->num_bytes);
+    //printk("[INODE] [ASSERT PASSED] offset: %llu, end: %llu, entry->num_bytes: %llu\n", offset, end, entry->num_bytes);
+    ASSERT(offset <= end);
+    bool modified = false;
+    for (usize readed_block = 0, size = 0; readed_block < count; readed_block += size) {
+        Block* block = NULL;
+        usize block_no = inode_map(NULL, inode, offset / BLOCK_SIZE, &modified);
+        block = cache->acquire(block_no);
+        if (count - readed_block < BLOCK_SIZE - (offset % BLOCK_SIZE))
+            size = count - readed_block;
+        else
+            size = BLOCK_SIZE - (offset % BLOCK_SIZE);
+        memmove(dest, block->data + (offset % BLOCK_SIZE), size);
+        cache->release(block);
+        dest += size;
+        offset += size;
+    }
+    return count;
+}
+
+// see `inode.h`.
+static usize inode_write(OpContext* ctx,
+                         Inode* inode,
+                         u8* src,
+                         usize offset,
+                         usize count) {
+    //printk("[INODE] Writing %llu bytes to offset %llu of inode no %llu\n", count, offset, inode->inode_no);
+    InodeEntry* entry = &inode->entry;
+    usize end = offset + count;
+    ASSERT(offset <= entry->num_bytes);
+    ASSERT(end <= INODE_MAX_BYTES);
+    ASSERT(offset <= end);
+    if (end > entry->num_bytes){
+        entry->num_bytes = end;
+        inode_sync(ctx, inode, true);
+    }
+    usize written_byte = 0;
+    while (offset < end) {
+        usize byte_to_write = MIN(BLOCK_SIZE - (offset % BLOCK_SIZE), end - offset);
+        bool modified = false;
+        usize block_no = inode_map(ctx, inode, offset / BLOCK_SIZE, &modified);
+        if (block_no == 0) {
+            PANIC();
+        }
+        if (modified) {
+            inode_sync(ctx, inode, true);
+        }
+        Block* block = cache->acquire(block_no);
+        memcpy(block->data + (offset % BLOCK_SIZE), src, byte_to_write);
+        cache->sync(ctx, block);
+        cache->release(block);
+        offset += byte_to_write;
+        src += byte_to_write;
+        written_byte += byte_to_write;
+    }
+    return written_byte;
+}
+
+// see `inode.h`.
+static usize inode_lookup(Inode* inode, const char* name, usize* index) {
+    //printk("[INODE] Looking up name %s in directory inode no %llu\n", name, inode->inode_no);
+    InodeEntry* entry = &inode->entry;
+    ASSERT(entry->type == INODE_DIRECTORY);
+    DirEntry dir_entry;
+    for (usize off = 0; off < entry->num_bytes; off += sizeof(DirEntry)) {
+        inode_read(inode, (u8*)&dir_entry, off, sizeof(DirEntry));
+        if ((strncmp(dir_entry.name, name, FILE_NAME_MAX_LENGTH) == 0) && dir_entry.inode_no != 0) {
+            if (index)
+                *index = off;
+            return dir_entry.inode_no;
+        }
+    }
+    return 0;
+}
+
+// see `inode.h`.
+static usize inode_insert(OpContext* ctx,
+                          Inode* inode,
+                          const char* name,
+                          usize inode_no) {
+    InodeEntry* entry = &inode->entry;
+    ASSERT(entry->type == INODE_DIRECTORY);
+    //printk("[INODE] Inserting name %s with inode no %llu into directory inode no %llu\n", name, inode_no, inode->inode_no);
+    if (inode_lookup(inode, name, NULL) != 0) {
+        return -1;
+    }
+    DirEntry dir_entry;
+    usize idx = inode->entry.num_bytes;
+     // find a free entry
+    for (usize i = 0; i < inode->entry.num_bytes; i += sizeof(DirEntry)) {
+        inode_read(inode, (u8*)&dir_entry, i, sizeof(DirEntry));
+        if (dir_entry.inode_no == 0) {
+            idx = i;
+            break;
+        }
+    }
+    memset(&dir_entry, 0, sizeof(DirEntry));
+    strncpy(dir_entry.name, name, FILE_NAME_MAX_LENGTH);
+    dir_entry.inode_no = inode_no;
+    inode_write(ctx, inode, (u8*)&dir_entry, idx, sizeof(DirEntry));
+    return idx;
+}
+
+// see `inode.h`.
+static void inode_remove(OpContext* ctx, Inode* inode, usize index) {
+    //printk("[INODE] Removing entry at index %llu from directory inode no %llu\n", index, inode->inode_no);
+    InodeEntry* entry = &inode->entry;
+    ASSERT(entry->type == INODE_DIRECTORY);
+    ASSERT(index < entry->num_bytes);
+    DirEntry dir_entry;
+    inode_read(inode, (u8*)&dir_entry, index, sizeof(DirEntry));
+    if (dir_entry.inode_no == 0) {
+        return;
+    }
+    usize last_index = entry->num_bytes - sizeof(DirEntry);
+    inode_read(inode, (u8*)&dir_entry, last_index, sizeof(DirEntry));
+    dir_entry.inode_no = 0;
+    inode_write(ctx, inode, (u8*)&dir_entry, index, sizeof(DirEntry));   
+}
+
+InodeTree inodes = {
+    .alloc = inode_alloc,
+    .lock = inode_lock,
+    .unlock = inode_unlock,
+    .sync = inode_sync,
+    .get = inode_get,
+    .clear = inode_clear,
+    .share = inode_share,
+    .put = inode_put,
+    .read = inode_read,
+    .write = inode_write,
+    .lookup = inode_lookup,
+    .insert = inode_insert,
+    .remove = inode_remove,
+};
\ No newline at end of file
diff --git a/src/fs/inode.h b/src/fs/inode.h
new file mode 100755
index 0000000..78a4345
--- /dev/null
+++ b/src/fs/inode.h
@@ -0,0 +1,266 @@
+#pragma once
+#include <common/list.h>
+#include <common/rc.h>
+#include <common/spinlock.h>
+#include <fs/cache.h>
+#include <fs/defines.h>
+
+/**
+    @brief the number of the root inode (i.e. the inode_no of `/`).
+ */
+#define ROOT_INODE_NO 1
+
+/**
+    @brief an inode in memory.
+
+    You can compare it to a `Block` because they have similar operating ways.
+
+    @see Block
+ */
+typedef struct {
+    /**
+        @brief the lock protecting the inode metadata and its content.
+
+        @note it does NOT protect `rc`, `node`, `valid`, etc, because they are
+        "runtime" variables, not "filesystem" metadata or data of the inode.
+     */
+    SleepLock lock;
+
+    /**
+        @brief the reference count of this inode.
+
+        Different from `Block`, an inode can be shared by multiple threads or
+        processes, so we need a reference count to track the number of
+        references to this inode.
+     */
+    RefCount rc;
+
+    /**
+        @brief link this inode into a linked list.
+     */
+    ListNode node;
+
+    /**
+        @brief the corresponding inode number on disk.
+
+        @note distinguish it from `block_no` in `Block`, which is the "block number".
+
+        `inode_no` should be the offset in block from the beginning of the inode area.
+     */
+    usize inode_no;
+
+    /**
+        @brief has the `entry` been loaded from disk?
+     */
+    bool valid;
+
+    /**
+        @brief the real in-memory copy of the inode on disk.
+     */
+    InodeEntry entry; 
+} Inode;
+
+/**
+    @brief interface of inode layer.
+ */
+typedef struct {
+    /**
+        @brief the root inode of the file system.
+
+        @see `init_inodes` should initialize it to a valid inode.
+     */
+    Inode* root;
+
+    /**
+        @brief allocate a new zero-initialized inode on disk.
+        
+        @param type the type of the inode to allocate.
+
+        @return the number of newly allocated inode.
+
+        @throw panic if allocation fails (e.g. no more free inode).
+     */
+    usize (*alloc)(OpContext* ctx, InodeType type);
+
+    /**
+        @brief acquire the sleep lock of `inode`.
+        
+        This method should be called before any write operation to `inode` and its
+        file content.
+
+        If the inode has not been loaded, this method should load it from disk.
+
+        @see `unlock` - the counterpart of this method.
+     */
+    void (*lock)(Inode* inode);
+
+    /**
+        @brief release the sleep lock of `inode`.
+        
+        @see `lock` - the counterpart of this method.
+     */
+    void (*unlock)(Inode* inode);
+
+    /**
+        @brief synchronize the content of `inode` between memory and disk.
+        
+        Different from block cache, this method can either read or write the inode.
+
+        If `do_write` is true and the inode is valid, write the content of `inode` to disk.
+
+        If `do_write` is false and the inode is invalid, read the content of `inode` from disk.
+
+        If `do_write` is false and the inode is valid, do nothing.
+
+        @note here "write to disk" means "sync with block cache", not "directly
+        write to underneath SD card".
+
+        @note caller must hold the lock of `inode`.
+
+        @throw panic if `do_write` is true and `inode` is invalid.
+     */
+    void (*sync)(OpContext* ctx, Inode* inode, bool do_write);
+
+    /**
+        @brief get an inode by its inode number.
+        
+        This method should increment the reference count of the inode by one.
+
+        @note it does NOT have to load the inode from disk!
+
+        @see `sync` will be responsible to load the content of inode.
+        
+        @return the `inode` of `inode_no`. `inode->valid` can be false.
+
+        @see `put` - the counterpart of this method.
+     */
+    Inode* (*get)(usize inode_no);
+
+    /**
+        @brief truncate all contents of `inode`.
+        
+        This method removes (i.e. "frees") all file blocks of `inode`.
+
+        @note do not forget to reset related metadata of `inode`, e.g. `inode->entry.num_bytes`.
+
+        @note caller must hold the lock of `inode`.
+     */
+    void (*clear)(OpContext* ctx, Inode* inode);
+
+    /**
+        @brief duplicate an inode.
+        
+        Call this if you want to share an inode with others.
+
+        It should increment the reference count of `inode` by one.
+
+        @return the duplicated inode (i.e. may just return `inode`).
+     */
+    Inode* (*share)(Inode* inode);
+
+    /**
+        @brief notify that you no longer need `inode`.
+        
+        This method is also responsible to free the inode if no one needs it:
+
+        "No one needs it" means it is useless BOTH in-memory (`inode->rc == 0`) and on-disk
+        (`inode->entry.num_links == 0`).
+
+        "Free the inode" means freeing all related file blocks and the inode itself.
+
+        @note do not forget `kfree(inode)` after you have done them all!
+
+        @note caller must NOT hold the lock of `inode`. i.e. caller should have `unlock`ed it.
+
+        @see `get` - the counterpart of this method.
+
+        @see `clear` can be used to free all file blocks of `inode`.
+     */
+    void (*put)(OpContext* ctx, Inode* inode);
+
+    /**
+        @brief read `count` bytes from `inode`, beginning at `offset`, to `dest`.
+        
+        @return how many bytes you actually read.
+
+        @note caller must hold the lock of `inode`.
+     */
+    usize (*read)(Inode* inode, u8* dest, usize offset, usize count);
+
+    /**
+        @brief write `count` bytes from `src` to `inode`, beginning at `offset`.
+        
+        @return how many bytes you actually write.
+
+        @note caller must hold the lock of `inode`.
+     */
+    usize (*write)(OpContext* ctx,
+                   Inode* inode,
+                   u8* src,
+                   usize offset,
+                   usize count);
+
+    /**
+        @brief look up an entry named `name` in directory `inode`.
+
+        @param[out] index the index of found entry in this directory.
+
+        @return the inode number of the corresponding inode, or 0 if not found.
+        
+        @note caller must hold the lock of `inode`.
+
+        @throw panic if `inode` is not a directory.
+     */
+    usize (*lookup)(Inode* inode, const char* name, usize* index);
+
+    /**
+        @brief insert a new directory entry in directory `inode`.
+        
+        Add a new directory entry in `inode` called `name`, which points to inode 
+        with `inode_no`.
+
+        @return the index of new directory entry, or -1 if `name` already exists.
+
+        @note if the directory inode is full, you should grow the size of directory inode.
+
+        @note you do NOT need to change `inode->entry.num_links`. Another function
+        to be finished in our final lab will do this.
+
+        @note caller must hold the lock of `inode`.
+
+        @throw panic if `inode` is not a directory.
+     */
+    usize (*insert)(OpContext* ctx,
+                    Inode* inode,
+                    const char* name,
+                    usize inode_no);
+
+    /**
+        @brief remove the directory entry at `index`.
+        
+        If the corresponding entry is not used before, `remove` does nothing.
+
+        @note if the last entry is removed, you can shrink the size of directory inode.
+        If you like, you can also move entries to fill the hole.
+
+        @note caller must hold the lock of `inode`.
+
+        @throw panic if `inode` is not a directory.
+     */
+    void (*remove)(OpContext* ctx, Inode* inode, usize index);
+} InodeTree;
+
+/**
+    @brief the global inode layer instance.
+ */
+extern InodeTree inodes;
+
+/**
+    @brief initialize the inode layer.
+
+    @note do not forget to read the root inode from disk!
+
+    @param sblock the loaded super block.
+    @param cache the initialized block cache.
+ */
+void init_inodes(const SuperBlock* sblock, const BlockCache* cache);
\ No newline at end of file
diff --git a/src/fs/test/.gitignore b/src/fs/test/.gitignore
new file mode 100755
index 0000000..d5d6e41
--- /dev/null
+++ b/src/fs/test/.gitignore
@@ -0,0 +1 @@
+cache_test
\ No newline at end of file
diff --git a/src/fs/test/CMakeLists.txt b/src/fs/test/CMakeLists.txt
new file mode 100755
index 0000000..340f47c
--- /dev/null
+++ b/src/fs/test/CMakeLists.txt
@@ -0,0 +1,33 @@
+cmake_minimum_required(VERSION 3.16)
+
+project(fs-test VERSION 0.1.0 LANGUAGES C CXX)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS True)
+
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
+
+include_directories(../..)
+
+set(compiler_warnings "-Wall -Wextra")
+set(compiler_flags "${compiler_warnings} \
+    -O1 -ftree-pre -g \
+    -fno-omit-frame-pointer \
+    -fsanitize=undefined \
+    -fno-sanitize=alignment")
+
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${compiler_flags}")
+set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} ${compiler_flags}")
+
+file(GLOB mock_sources CONFIGURE_DEPENDS "mock/*.cpp")
+add_library(mock STATIC ${mock_sources})
+
+file(GLOB fs_sources CONFIGURE_DEPENDS "../*.c")
+add_library(fs STATIC ${fs_sources} "instrument.c")
+target_compile_options(fs PUBLIC "-fno-builtin")
+
+add_executable(inode_test inode_test.cpp)
+target_link_libraries(inode_test fs mock pthread)
+
+add_executable(cache_test cache_test.cpp)
+target_link_libraries(cache_test fs mock pthread)
diff --git a/src/fs/test/assert.hpp b/src/fs/test/assert.hpp
new file mode 100755
index 0000000..fd8522d
--- /dev/null
+++ b/src/fs/test/assert.hpp
@@ -0,0 +1,55 @@
+#pragma once
+
+#include <sstream>
+
+#include "exception.hpp"
+
+[[maybe_unused]] static inline auto
+get_source_location(size_t line, const char *file, const char *func) -> std::string {
+    std::stringstream buf;
+    buf << "'" << func << "' (" << file << ":L" << line << ")";
+    return buf.str();
+}
+
+template <typename X, typename Y>
+void _assert_eq(const X &actual, const Y &expect, const char *expr, const std::string &location) {
+    if (actual != static_cast<X>(expect)) {
+        std::stringstream buf;
+        buf << location << ": ";
+        buf << "assert_eq failed: '" << expr << "': expect '" << expect << "', got '" << actual
+            << "'";
+        throw AssertionFailure(buf.str());
+    }
+}
+
+#define assert_eq(actual, expect)                                                                  \
+    _assert_eq(                                                                                    \
+        (actual), (expect), #actual, get_source_location(__LINE__, __FILE__, __PRETTY_FUNCTION__))
+
+template <typename X, typename Y>
+void _assert_ne(const X &actual, const Y &expect, const char *expr, const std::string &location) {
+    if (actual == static_cast<X>(expect)) {
+        std::stringstream buf;
+        buf << location << ": ";
+        buf << "assert_ne failed: '" << expr << "': expect ≠ '" << expect << "', got '" << actual
+            << "'";
+        throw AssertionFailure(buf.str());
+    }
+}
+
+#define assert_ne(actual, expect)                                                                  \
+    _assert_ne(                                                                                    \
+        (actual), (expect), #actual, get_source_location(__LINE__, __FILE__, __PRETTY_FUNCTION__))
+
+[[maybe_unused]] static inline void
+_assert_true(bool predicate, const char *expr, const std::string &location) {
+    if (!predicate) {
+        std::stringstream buf;
+        buf << location << ": ";
+        buf << "assert_true failed: '" << expr << "'";
+        throw AssertionFailure(buf.str());
+    }
+}
+
+#define assert_true(expr)                                                                          \
+    _assert_true((expr), #expr, get_source_location(__LINE__, __FILE__, __PRETTY_FUNCTION__))
diff --git a/src/fs/test/cache_test.cpp b/src/fs/test/cache_test.cpp
new file mode 100755
index 0000000..98cdff3
--- /dev/null
+++ b/src/fs/test/cache_test.cpp
@@ -0,0 +1,953 @@
+extern "C" {
+#include <fs/cache.h>
+}
+
+#include "assert.hpp"
+#include "pause.hpp"
+#include "runner.hpp"
+
+#include "mock/block_device.hpp"
+
+#include <chrono>
+#include <condition_variable>
+#include <random>
+#include <thread>
+
+namespace
+{
+
+constexpr int IN_CHILD = 0;
+
+static void wait_process(int pid)
+{
+    int wstatus;
+    waitpid(pid, &wstatus, 0);
+    if (!WIFEXITED(wstatus)) {
+        std::stringstream buf;
+        buf << "process [" << pid << "] exited abnormally with code "
+            << wstatus;
+        throw Internal(buf.str());
+    }
+}
+
+} // namespace
+
+namespace basic
+{
+
+void test_init()
+{
+    initialize(1, 1);
+}
+
+// targets: `acquire`, `release`, `sync(NULL, ...)`.
+
+void test_read_write()
+{
+    initialize(1, 1);
+
+    auto *b = bcache.acquire(1);
+    auto *d = mock.inspect(1);
+    assert_eq(b->block_no, 1);
+    assert_eq(b->valid, true);
+
+    for (usize i = 0; i < BLOCK_SIZE; i++) {
+        assert_eq(b->data[i], d[i]);
+    }
+
+    u8 value = b->data[128];
+    b->data[128] = ~value;
+    bcache.sync(NULL, b);
+    assert_eq(d[128], ~value);
+
+    bcache.release(b);
+    b = bcache.acquire(1);
+}
+
+void test_loop_read()
+{
+    initialize(1, 128);
+    constexpr usize num_rounds = 10;
+    for (usize round = 0; round < num_rounds; round++) {
+        std::vector<Block *> p;
+        p.resize(sblock.num_blocks);
+
+        for (usize i = 0; i < sblock.num_blocks; i++) {
+            // PAUSE
+            p[i] = bcache.acquire(i);
+
+            assert_eq(p[i]->block_no, i);
+
+            auto *d = mock.inspect(i);
+            for (usize j = 0; j < BLOCK_SIZE; j++) {
+                assert_eq(p[i]->data[j], d[j]);
+            }
+        }
+
+        for (usize i = 0; i < sblock.num_blocks; i++) {
+            assert_eq(p[i]->valid, true);
+            bcache.release(p[i]);
+        }
+    }
+}
+
+void test_reuse()
+{
+    initialize(1, 500);
+
+    constexpr usize num_rounds = 200;
+    constexpr usize blocks[] = { 1, 123, 233, 399, 415 };
+
+    auto matched = [&](usize bno) {
+        for (usize b : blocks) {
+            if (bno == b)
+                return true;
+        }
+        return false;
+    };
+
+    usize rcnt = 0, wcnt = 0;
+    mock.on_read = [&](usize bno, auto) {
+        if (matched(bno))
+            rcnt++;
+    };
+    mock.on_write = [&](usize bno, auto) {
+        if (matched(bno))
+            wcnt++;
+    };
+
+    for (usize round = 0; round < num_rounds; round++) {
+        std::vector<Block *> p;
+        for (usize block_no : blocks) {
+            p.push_back(bcache.acquire(block_no));
+        }
+        for (auto *b : p) {
+            assert_eq(b->valid, true);
+            bcache.release(b);
+        }
+    }
+
+    assert_true(rcnt < 10);
+    assert_eq(wcnt, 0);
+}
+
+void test_lru()
+{
+    std::mt19937 gen(0xdeadbeef);
+
+    usize cold_size = 1000;
+    usize hot_size = EVICTION_THRESHOLD * 0.8;
+    initialize(1, cold_size + hot_size);
+    for (int i = 0; i < 1000; i++) {
+        bool hot = (gen() % 100) <= 90;
+        usize bno = hot ? (gen() % hot_size) : (hot_size + gen() % cold_size);
+
+        auto *b = bcache.acquire(bno);
+        auto *d = mock.inspect(bno);
+        assert_eq(b->data[123], d[123]);
+        bcache.release(b);
+    }
+
+    printf("(debug) #cached = %zu, #read = %zu\n",
+           bcache.get_num_cached_blocks(), mock.read_count.load());
+    assert_true(bcache.get_num_cached_blocks() <= EVICTION_THRESHOLD);
+    assert_true(mock.read_count < 233);
+    assert_true(mock.write_count < 5);
+}
+
+// targets: `begin_op`, `end_op`, `sync`.
+
+void test_atomic_op()
+{
+    initialize(32, 64);
+
+    OpContext ctx;
+    bcache.begin_op(&ctx);
+    bcache.end_op(&ctx);
+
+    bcache.begin_op(&ctx);
+
+    usize t = sblock.num_blocks - 1;
+    auto *b = bcache.acquire(t);
+    assert_eq(b->block_no, t);
+    assert_eq(b->valid, true);
+    auto *d = mock.inspect(t);
+    u8 v = d[128];
+    assert_eq(b->data[128], v);
+
+    b->data[128] = ~v;
+    bcache.sync(&ctx, b);
+    bcache.release(b);
+
+    assert_eq(d[128], v);
+    bcache.end_op(&ctx);
+    assert_eq(d[128], ~v);
+
+    bcache.begin_op(&ctx);
+
+    auto *b1 = bcache.acquire(t - 1);
+    auto *b2 = bcache.acquire(t - 2);
+    assert_eq(b1->block_no, t - 1);
+    assert_eq(b2->block_no, t - 2);
+
+    auto *d1 = mock.inspect(t - 1);
+    auto *d2 = mock.inspect(t - 2);
+    u8 v1 = d1[500];
+    u8 v2 = d2[10];
+    assert_eq(b1->data[500], v1);
+    assert_eq(b2->data[10], v2);
+
+    b1->data[500] = ~v1;
+    b2->data[10] = ~v2;
+    bcache.sync(&ctx, b1);
+    bcache.release(b1);
+    bcache.sync(&ctx, b2);
+    bcache.release(b2);
+
+    assert_eq(d1[500], v1);
+    assert_eq(d2[10], v2);
+    bcache.end_op(&ctx);
+    assert_eq(d1[500], ~v1);
+    assert_eq(d2[10], ~v2);
+}
+
+void test_overflow()
+{
+    initialize(100, 100);
+
+    OpContext ctx;
+    bcache.begin_op(&ctx);
+
+    usize t = sblock.num_blocks - 1;
+    for (usize i = 0; i < OP_MAX_NUM_BLOCKS; i++) {
+        auto *b = bcache.acquire(t - i);
+        b->data[0] = 0xaa;
+        bcache.sync(&ctx, b);
+        bcache.release(b);
+    }
+
+    bool panicked = false;
+    auto *b = bcache.acquire(t - OP_MAX_NUM_BLOCKS);
+    b->data[128] = 0x88;
+    try {
+        bcache.sync(&ctx, b);
+    } catch (const Panic &) {
+        panicked = true;
+    }
+
+    assert_eq(panicked, true);
+}
+
+void test_resident()
+{
+    // NOTE: this test may be a little controversial.
+    // the main ideas are:
+    // 1. dirty blocks should be pinned in block cache before `end_op`.
+    // 2. logging should not pollute block cache in most of time.
+
+    initialize(OP_MAX_NUM_BLOCKS, 500);
+
+    constexpr usize num_rounds = 200;
+    constexpr usize blocks[] = { 1, 123, 233, 399, 415 };
+
+    auto matched = [&](usize bno) {
+        for (usize b : blocks) {
+            if (bno == b)
+                return true;
+        }
+        return false;
+    };
+
+    usize rcnt = 0;
+    mock.on_read = [&](usize bno, auto) {
+        if (matched(bno))
+            rcnt++;
+    };
+
+    for (usize round = 0; round < num_rounds; round++) {
+        OpContext ctx;
+        bcache.begin_op(&ctx);
+
+        for (usize block_no : blocks) {
+            auto *b = bcache.acquire(block_no);
+            assert_eq(b->valid, true);
+            b->data[0] = 0;
+            bcache.sync(&ctx, b);
+            bcache.release(b);
+        }
+
+        bcache.end_op(&ctx);
+    }
+
+    assert_true(rcnt < 10);
+}
+
+void test_local_absorption()
+{
+    constexpr usize num_rounds = 1000;
+
+    initialize(100, 100);
+
+    OpContext ctx;
+    bcache.begin_op(&ctx);
+    usize t = sblock.num_blocks - 1;
+    for (usize i = 0; i < num_rounds; i++) {
+        for (usize j = 0; j < OP_MAX_NUM_BLOCKS; j++) {
+            auto *b = bcache.acquire(t - j);
+            b->data[0] = 0xcd;
+            bcache.sync(&ctx, b);
+            bcache.release(b);
+        }
+    }
+    bcache.end_op(&ctx);
+
+    assert_true(mock.read_count < OP_MAX_NUM_BLOCKS * 5);
+    assert_true(mock.write_count < OP_MAX_NUM_BLOCKS * 5);
+    for (usize j = 0; j < OP_MAX_NUM_BLOCKS; j++) {
+        auto *b = mock.inspect(t - j);
+        assert_eq(b[0], 0xcd);
+    }
+}
+
+void test_global_absorption()
+{
+    constexpr usize op_size = 3;
+    constexpr usize num_workers = 100;
+
+    initialize(2 * OP_MAX_NUM_BLOCKS + op_size, 100);
+    usize t = sblock.num_blocks - 1;
+
+    OpContext out;
+    bcache.begin_op(&out);
+
+    for (usize i = 0; i < OP_MAX_NUM_BLOCKS; i++) {
+        auto *b = bcache.acquire(t - i);
+        b->data[0] = 0xcc;
+        bcache.sync(&out, b);
+        bcache.release(b);
+    }
+
+    std::vector<OpContext> ctx;
+    std::vector<std::thread> workers;
+    ctx.resize(num_workers);
+    workers.reserve(num_workers);
+
+    for (usize i = 0; i < num_workers; i++) {
+        bcache.begin_op(&ctx[i]);
+        for (usize j = 0; j < op_size; j++) {
+            auto *b = bcache.acquire(t - j);
+            b->data[0] = 0xdd;
+            bcache.sync(&ctx[i], b);
+            bcache.release(b);
+        }
+        workers.emplace_back([&, i] { bcache.end_op(&ctx[i]); });
+    }
+
+    workers.emplace_back([&] { bcache.end_op(&out); });
+    for (auto &worker : workers) {
+        worker.join();
+    }
+    for (usize i = 0; i < op_size; i++) {
+        auto *b = mock.inspect(t - i);
+        assert_eq(b[0], 0xdd);
+    }
+
+    for (usize i = op_size; i < OP_MAX_NUM_BLOCKS; i++) {
+        auto *b = mock.inspect(t - i);
+        assert_eq(b[0], 0xcc);
+    }
+}
+
+// target: replay at initialization.
+
+void test_replay()
+{
+    initialize_mock(50, 1000);
+
+    auto *header = mock.inspect_log_header();
+    header->num_blocks = 5;
+    for (usize i = 0; i < 5; i++) {
+        usize v = 500 + i;
+        header->block_no[i] = v;
+        auto *b = mock.inspect_log(i);
+        for (usize j = 0; j < BLOCK_SIZE; j++) {
+            b[j] = v & 0xff;
+        }
+    }
+
+    init_bcache(&sblock, &device);
+
+    assert_eq(header->num_blocks, 0);
+    for (usize i = 0; i < 5; i++) {
+        usize v = 500 + i;
+        auto *b = mock.inspect(v);
+        for (usize j = 0; j < BLOCK_SIZE; j++) {
+            assert_eq(b[j], v & 0xff);
+        }
+    }
+}
+
+// targets: `alloc`, `free`.
+
+void test_alloc()
+{
+    initialize(100, 100);
+
+    std::vector<usize> bno;
+    bno.reserve(100);
+    for (int i = 0; i < 100; i++) {
+        OpContext ctx;
+        bcache.begin_op(&ctx);
+
+        bno.push_back(bcache.alloc(&ctx));
+        assert_ne(bno[i], 0);
+        assert_true(bno[i] < sblock.num_blocks);
+
+        auto *b = bcache.acquire(bno[i]);
+        for (usize j = 0; j < BLOCK_SIZE; j++) {
+            assert_eq(b->data[j], 0);
+        }
+        bcache.release(b);
+
+        bcache.end_op(&ctx);
+        auto *d = mock.inspect(bno[i]);
+        for (usize j = 0; j < BLOCK_SIZE; j++) {
+            assert_eq(d[j], 0);
+        }
+    }
+
+    std::sort(bno.begin(), bno.end());
+    usize count = std::unique(bno.begin(), bno.end()) - bno.begin();
+    assert_eq(count, bno.size());
+
+    OpContext ctx;
+    bcache.begin_op(&ctx);
+
+    bool panicked = false;
+    try {
+        usize b = bcache.alloc(&ctx);
+        assert_ne(b, 0);
+    } catch (const Panic &) {
+        panicked = true;
+    }
+
+    assert_eq(panicked, true);
+}
+
+void test_alloc_free()
+{
+    constexpr usize num_rounds = 5;
+    constexpr usize num_data_blocks = 1000;
+
+    initialize(100, num_data_blocks);
+
+    for (usize round = 0; round < num_rounds; round++) {
+        std::vector<usize> bno;
+        for (usize i = 0; i < num_data_blocks; i++) {
+            OpContext ctx;
+            bcache.begin_op(&ctx);
+            bno.push_back(bcache.alloc(&ctx));
+            bcache.end_op(&ctx);
+        }
+
+        for (usize b : bno) {
+            assert_true(b >= sblock.num_blocks - num_data_blocks);
+        }
+
+        for (usize i = 0; i < num_data_blocks; i += 2) {
+            usize no = bno[i];
+            assert_ne(no, 0);
+
+            OpContext ctx;
+            bcache.begin_op(&ctx);
+            bcache.free(&ctx, no);
+            bcache.end_op(&ctx);
+        }
+
+        OpContext ctx;
+        bcache.begin_op(&ctx);
+        usize no = bcache.alloc(&ctx);
+        assert_ne(no, 0);
+        for (usize i = 1; i < num_data_blocks; i += 2) {
+            assert_ne(bno[i], no);
+        }
+        bcache.free(&ctx, no);
+        bcache.end_op(&ctx);
+
+        for (usize i = 1; i < num_data_blocks; i += 2) {
+            bcache.begin_op(&ctx);
+            bcache.free(&ctx, bno[i]);
+            bcache.end_op(&ctx);
+        }
+    }
+}
+
+} // namespace basic
+
+namespace concurrent
+{
+
+void test_acquire()
+{
+    constexpr usize num_rounds = 100;
+    constexpr usize num_workers = 64;
+
+    for (usize round = 0; round < num_rounds; round++) {
+        int child;
+        if ((child = fork()) == IN_CHILD) {
+            initialize(1, num_workers);
+
+            std::atomic<bool> flag = false;
+            std::vector<std::thread> workers;
+            for (usize i = 0; i < num_workers; i++) {
+                workers.emplace_back([&, i] {
+                    while (!flag) {
+                        std::this_thread::yield();
+                    }
+
+                    usize t = sblock.num_blocks - 1 - i;
+                    auto *b = bcache.acquire(t);
+                    assert_eq(b->block_no, t);
+                    assert_eq(b->valid, true);
+                    bcache.release(b);
+                });
+            }
+
+            flag = true;
+            for (auto &worker : workers) {
+                worker.join();
+            }
+
+            exit(0);
+        } else {
+            wait_process(child);
+        }
+    }
+}
+
+void test_sync()
+{
+    constexpr int num_rounds = 100;
+
+    initialize(OP_MAX_NUM_BLOCKS * OP_MAX_NUM_BLOCKS, OP_MAX_NUM_BLOCKS);
+
+    std::mutex mtx;
+    std::condition_variable cv;
+    OpContext ctx;
+    int count = -1, round = -1;
+
+    auto cookie = [](int i, int j) { return (i + 1) * 1926 + j + 817; };
+
+    std::vector<std::thread> workers;
+    for (int i = 0; i < OP_MAX_NUM_BLOCKS; i++) {
+        workers.emplace_back([&, i] {
+            usize t = sblock.num_blocks - 1 - i;
+            for (int j = 0; j < num_rounds; j++) {
+                {
+                    std::unique_lock lock(mtx);
+                    cv.wait(lock, [&] { return j <= round; });
+                }
+
+                auto *b = bcache.acquire(t);
+                int *p = reinterpret_cast<int *>(b->data);
+                *p = cookie(i, j);
+                bcache.sync(&ctx, b);
+                bcache.release(b);
+
+                {
+                    std::unique_lock lock(mtx);
+                    count++;
+                }
+
+                cv.notify_all();
+            }
+        });
+    }
+
+    auto check = [&](int j) {
+        for (int i = 0; i < OP_MAX_NUM_BLOCKS; i++) {
+            int *b = reinterpret_cast<int *>(
+                    mock.inspect(sblock.num_blocks - 1 - i));
+            assert_eq(*b, cookie(i, j));
+        }
+    };
+
+    {
+        std::unique_lock lock(mtx);
+        for (int j = 0; j < num_rounds; j++) {
+            bcache.begin_op(&ctx);
+            round = j;
+            count = 0;
+            cv.notify_all();
+
+            cv.wait(lock, [&] { return count >= OP_MAX_NUM_BLOCKS; });
+
+            if (j > 0)
+                check(j - 1);
+            bcache.end_op(&ctx);
+            check(j);
+        }
+    }
+
+    for (auto &worker : workers) {
+        worker.join();
+    }
+}
+
+void test_alloc()
+{
+    initialize(100, 1000);
+
+    std::vector<usize> bno(1000);
+    std::vector<std::thread> workers;
+    for (usize i = 0; i < 4; i++) {
+        workers.emplace_back([&, i] {
+            usize t = 250 * i;
+            for (usize j = 0; j < 250; j++) {
+                OpContext ctx;
+                bcache.begin_op(&ctx);
+                bno[t + j] = bcache.alloc(&ctx);
+                bcache.end_op(&ctx);
+            }
+        });
+    }
+
+    for (auto &worker : workers) {
+        worker.join();
+    }
+    std::sort(bno.begin(), bno.end());
+    usize count = std::unique(bno.begin(), bno.end()) - bno.begin();
+    assert_eq(count, 1000);
+    assert_true(bno.front() >= sblock.num_blocks - 1000);
+    assert_true(bno.back() < sblock.num_blocks);
+}
+
+} // namespace concurrent
+
+namespace crash
+{
+
+void test_simple_crash()
+{
+    int child;
+    if ((child = fork()) == IN_CHILD) {
+        initialize(100, 100);
+
+        OpContext ctx;
+        bcache.begin_op(&ctx);
+        auto *b = bcache.acquire(150);
+        b->data[200] = 0x19;
+        b->data[201] = 0x26;
+        b->data[202] = 0x08;
+        b->data[203] = 0x17;
+        bcache.sync(&ctx, b);
+        bcache.release(b);
+        bcache.end_op(&ctx);
+
+        bcache.begin_op(&ctx);
+        b = bcache.acquire(150);
+        b->data[200] = 0xcc;
+        b->data[201] = 0xcc;
+        b->data[202] = 0xcc;
+        b->data[203] = 0xcc;
+        bcache.sync(&ctx, b);
+        bcache.release(b);
+
+        mock.offline = true;
+
+        try {
+            bcache.end_op(&ctx);
+        } catch (const Offline &) {
+        }
+
+        mock.dump("sd.img");
+
+        exit(0);
+    } else {
+        wait_process(child);
+        initialize_mock(100, 100, "sd.img");
+
+        auto *b = mock.inspect(150);
+        assert_eq(b[200], 0x19);
+        assert_eq(b[201], 0x26);
+        assert_eq(b[202], 0x08);
+        assert_eq(b[203], 0x17);
+
+        init_bcache(&sblock, &device);
+        assert_eq(b[200], 0x19);
+        assert_eq(b[201], 0x26);
+        assert_eq(b[202], 0x08);
+        assert_eq(b[203], 0x17);
+    }
+}
+
+void test_parallel(usize num_rounds, usize num_workers, usize delay_ms,
+                   usize log_cut)
+{
+    usize log_size = num_workers * OP_MAX_NUM_BLOCKS - log_cut;
+    usize num_data_blocks = 200 + num_workers * OP_MAX_NUM_BLOCKS;
+
+    printf("(trace) running: 0/%zu", num_rounds);
+    fflush(stdout);
+
+    usize replay_count = 0;
+    for (usize round = 0; round < num_rounds; round++) {
+        int child;
+        if ((child = fork()) == IN_CHILD) {
+            initialize_mock(log_size, num_data_blocks);
+            for (usize i = 0; i < num_workers * OP_MAX_NUM_BLOCKS; i++) {
+                auto *b = mock.inspect(200 + i);
+                std::fill(b, b + BLOCK_SIZE, 0);
+            }
+
+            init_bcache(&sblock, &device);
+
+            std::atomic<bool> started = false;
+            for (usize i = 0; i < num_workers; i++) {
+                std::thread([&, i] {
+                    started = true;
+                    usize t = 200 + i * OP_MAX_NUM_BLOCKS;
+                    try {
+                        u64 v = 0;
+                        while (true) {
+                            OpContext ctx;
+                            bcache.begin_op(&ctx);
+                            for (usize j = 0; j < OP_MAX_NUM_BLOCKS; j++) {
+                                auto *b = bcache.acquire(t + j);
+                                for (usize k = 0; k < BLOCK_SIZE;
+                                     k += sizeof(u64)) {
+                                    u64 *p = reinterpret_cast<u64 *>(b->data +
+                                                                     k);
+                                    *p = v;
+                                }
+                                bcache.sync(&ctx, b);
+                                bcache.release(b);
+                            }
+                            bcache.end_op(&ctx);
+
+                            v++;
+                        }
+                    } catch (const Offline &) {
+                    }
+                }).detach();
+            }
+
+            // disk will power off after `delay_ms` ms.
+            std::thread aha([&] {
+                while (!started) {
+                }
+                std::this_thread::sleep_for(
+                        std::chrono::milliseconds(delay_ms));
+                mock.offline = true;
+            });
+
+            aha.join();
+            mock.dump("sd.img");
+            _exit(0);
+        } else {
+            wait_process(child);
+            initialize_mock(log_size, num_data_blocks, "sd.img");
+            auto *header = mock.inspect_log_header();
+            if (header->num_blocks > 0)
+                replay_count++;
+
+            if ((child = fork()) == IN_CHILD) {
+                init_bcache(&sblock, &device);
+                assert_eq(header->num_blocks, 0);
+
+                for (usize i = 0; i < num_workers; i++) {
+                    usize t = 200 + i * OP_MAX_NUM_BLOCKS;
+                    u64 v = *reinterpret_cast<u64 *>(mock.inspect(t));
+
+                    for (usize j = 0; j < OP_MAX_NUM_BLOCKS; j++) {
+                        auto *b = mock.inspect(t + j);
+                        for (usize k = 0; k < BLOCK_SIZE; k += sizeof(u64)) {
+                            u64 u = *reinterpret_cast<u64 *>(b + k);
+                            assert_eq(u, v);
+                        }
+                    }
+                }
+
+                exit(0);
+            } else
+                wait_process(child);
+        }
+
+        printf("\r(trace) running: %zu/%zu (%zu replayed)", round + 1,
+               num_rounds, replay_count);
+        fflush(stdout);
+    }
+
+    puts("");
+}
+
+void test_banker()
+{
+    using namespace std::chrono_literals;
+
+    constexpr i64 initial = 1000;
+    constexpr i64 bill = 200;
+    constexpr usize num_accounts = 10;
+    constexpr usize num_workers = 8;
+    constexpr usize num_rounds = 30;
+
+    constexpr usize log_size = 3 * num_workers + OP_MAX_NUM_BLOCKS;
+
+    printf("(trace) running: 0/%zu", num_rounds);
+    fflush(stdout);
+
+    usize replay_count = 0;
+    for (usize round = 0; round < num_rounds; round++) {
+        int child;
+        if ((child = fork()) == IN_CHILD) {
+            initialize(log_size, num_accounts);
+
+            auto begin_ts = std::chrono::steady_clock::now();
+
+            std::vector<usize> bno;
+            bno.reserve(num_accounts);
+            for (usize i = 0; i < num_accounts; i++) {
+                OpContext ctx;
+                bcache.begin_op(&ctx);
+                bno.push_back(bcache.alloc(&ctx));
+                auto *b = bcache.acquire(bno.back());
+                i64 *p = reinterpret_cast<i64 *>(b->data);
+                *p = initial;
+                bcache.sync(&ctx, b);
+                bcache.release(b);
+                bcache.end_op(&ctx);
+            }
+
+            std::random_device rd;
+            std::atomic<usize> count = 0;
+            std::atomic<bool> started = false;
+            for (usize i = 0; i < num_workers; i++) {
+                std::thread([&] {
+                    std::mt19937 gen(rd());
+
+                    started = true;
+                    try {
+                        while (true) {
+                            usize j = gen() % num_accounts,
+                                  k = gen() % num_accounts;
+                            if (j == k)
+                                k = (k + 1) % num_accounts;
+
+                            OpContext ctx;
+                            bcache.begin_op(&ctx);
+
+                            Block *bj, *bk;
+                            if (j < k) {
+                                bj = bcache.acquire(bno[j]);
+                                bk = bcache.acquire(bno[k]);
+                            } else {
+                                bk = bcache.acquire(bno[k]);
+                                bj = bcache.acquire(bno[j]);
+                            }
+
+                            i64 *vj = reinterpret_cast<i64 *>(bj->data);
+                            i64 *vk = reinterpret_cast<i64 *>(bk->data);
+                            i64 transfer = std::min(*vj, (i64)(gen() % bill));
+
+                            *vj -= transfer;
+                            bcache.sync(&ctx, bj);
+                            bcache.release(bj);
+
+                            *vk += transfer;
+                            bcache.sync(&ctx, bk);
+                            bcache.release(bk);
+
+                            bcache.end_op(&ctx);
+                            count++;
+                        }
+                    } catch (const Offline &) {
+                    }
+                }).detach();
+            }
+
+            while (!started) {
+            }
+            std::this_thread::sleep_for(2s);
+            mock.offline = true;
+
+            auto end_ts = std::chrono::steady_clock::now();
+            auto duration =
+                    std::chrono::duration_cast<std::chrono::milliseconds>(
+                            end_ts - begin_ts)
+                            .count();
+            printf("\r\033[K(trace) throughput = %.2f txn/s\n",
+                   static_cast<double>(count) * 1000 / duration);
+            fflush(stdout);
+
+            mock.dump("sd.img");
+            _exit(0);
+        } else {
+            wait_process(child);
+            initialize_mock(log_size, num_accounts, "sd.img");
+            auto *header = mock.inspect_log_header();
+            if (header->num_blocks > 0)
+                replay_count++;
+
+            if ((child = fork()) == IN_CHILD) {
+                init_bcache(&sblock, &device);
+
+                i64 sum = 0;
+                usize t = sblock.num_blocks - num_accounts;
+                for (usize i = 0; i < num_accounts; i++) {
+                    i64 value = *reinterpret_cast<i64 *>(mock.inspect(t + i));
+                    assert_true(value >= 0);
+                    sum += value;
+                }
+
+                assert_eq(sum, num_accounts * initial);
+                exit(0);
+            } else
+                wait_process(child);
+        }
+
+        printf("\r(trace) running: %zu/%zu (%zu replayed)", round + 1,
+               num_rounds, replay_count);
+        fflush(stdout);
+    }
+
+    puts("");
+}
+
+} // namespace crash
+
+int main()
+{
+    std::vector<Testcase> tests = {
+        { "init", basic::test_init },
+        { "read_write", basic::test_read_write },
+        { "loop_read", basic::test_loop_read },
+        { "reuse", basic::test_reuse },
+        { "lru", basic::test_lru },
+        { "atomic_op", basic::test_atomic_op },
+        { "overflow", basic::test_overflow },
+        { "resident", basic::test_resident },
+        { "local_absorption", basic::test_local_absorption },
+        { "global_absorption", basic::test_global_absorption },
+        { "replay", basic::test_replay },
+        { "alloc", basic::test_alloc },
+        { "alloc_free", basic::test_alloc_free },
+
+        { "concurrent_acquire", concurrent::test_acquire },
+        { "concurrent_sync", concurrent::test_sync },
+        { "concurrent_alloc", concurrent::test_alloc },
+
+        { "simple_crash", crash::test_simple_crash },
+        { "single", [] { crash::test_parallel(1000, 1, 5, 0); } },
+        { "parallel_1", [] { crash::test_parallel(1000, 2, 5, 0); } },
+        { "parallel_2", [] { crash::test_parallel(1000, 4, 5, 0); } },
+        { "parallel_3", [] { crash::test_parallel(500, 4, 10, 1); } },
+        { "parallel_4",
+          [] { crash::test_parallel(500, 4, 10, 2 * OP_MAX_NUM_BLOCKS); } },
+        { "banker", crash::test_banker },
+    };
+    Runner(tests).run();
+
+    printf("(info) OK: %zu tests passed.\n", tests.size());
+
+    return 0;
+}
diff --git a/src/fs/test/exception.hpp b/src/fs/test/exception.hpp
new file mode 100755
index 0000000..efd874b
--- /dev/null
+++ b/src/fs/test/exception.hpp
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <cstdio>
+
+#include <exception>
+#include <string>
+
+#include <cstdio>
+
+static inline void backtrace() {
+    printf("0\n%p\n", __builtin_return_address(0));
+    printf("1\n%p\n", __builtin_return_address(1));
+    printf("2\n%p\n", __builtin_return_address(2));
+    printf("3\n%p\n", __builtin_return_address(3));
+    printf("4\n%p\n", __builtin_return_address(4));
+}
+
+struct Exception : public std::exception {
+    std::string message;
+
+    Exception(const std::string &_message) : message(_message) {}
+
+    const char *what() const noexcept override {
+        return message.data();
+    }
+};
+
+struct Internal final : Exception {
+    using Exception::Exception;
+    virtual ~Internal() = default;
+};
+
+struct Panic final : Exception {
+    using Exception::Exception;
+    virtual ~Panic() = default;
+};
+
+struct AssertionFailure final : Exception {
+    using Exception::Exception;
+    virtual ~AssertionFailure() = default;
+};
+
+struct Offline final : Exception {
+    using Exception::Exception;
+    virtual ~Offline() = default;
+};
diff --git a/src/fs/test/inode_test.cpp b/src/fs/test/inode_test.cpp
new file mode 100755
index 0000000..c51464b
--- /dev/null
+++ b/src/fs/test/inode_test.cpp
@@ -0,0 +1,432 @@
+extern "C" {
+#include <fs/inode.h>
+}
+
+#include "assert.hpp"
+#include "pause.hpp"
+#include "runner.hpp"
+
+#include "mock/cache.hpp"
+
+void test_init()
+{
+    init_inodes(&sblock, &cache);
+    assert_eq(mock.count_inodes(), 1);
+    assert_eq(mock.count_blocks(), 0);
+}
+
+namespace adhoc
+{
+
+static OpContext _ctx, *ctx = &_ctx;
+
+void test_alloc()
+{
+    mock.begin_op(ctx);
+    usize ino = inodes.alloc(ctx, INODE_REGULAR);
+
+    assert_eq(mock.count_inodes(), 1);
+    mock.end_op(ctx);
+    assert_eq(mock.count_inodes(), 2);
+
+    auto *p = inodes.get(ino);
+
+    inodes.lock(p);
+    // printf("hello\n");
+    inodes.unlock(p);
+
+    mock.begin_op(ctx);
+    inodes.put(ctx, p);
+
+    assert_eq(mock.count_inodes(), 2);
+    mock.end_op(ctx);
+    assert_eq(mock.count_inodes(), 1);
+}
+
+void test_sync()
+{
+    auto *p = inodes.get(1);
+
+    inodes.lock(p);
+    assert_eq(p->entry.type, INODE_DIRECTORY);
+    p->entry.major = 0x19;
+    p->entry.minor = 0x26;
+    p->entry.indirect = 0xa817;
+    inodes.unlock(p);
+
+    mock.begin_op(ctx);
+    inodes.lock(p);
+    inodes.sync(ctx, p, true);
+    inodes.unlock(p);
+    inodes.put(ctx, p);
+    mock.end_op(ctx);
+
+    auto *q = mock.inspect(1);
+    assert_eq(q->type, INODE_DIRECTORY);
+    assert_eq(q->major, 0x19);
+    assert_eq(q->minor, 0x26);
+    assert_eq(q->indirect, 0xa817);
+}
+
+void test_touch()
+{
+    auto *p = inodes.get(1);
+    inodes.lock(p);
+
+    for (usize i = 2; i < mock.num_inodes; i++) {
+        mock.begin_op(ctx);
+        usize ino = inodes.alloc(ctx, INODE_REGULAR);
+        inodes.insert(ctx, p, std::to_string(i).data(), ino);
+
+        auto *q = inodes.get(ino);
+        inodes.lock(q);
+        assert_eq(q->entry.type, INODE_REGULAR);
+        assert_eq(q->entry.major, 0);
+        assert_eq(q->entry.minor, 0);
+        assert_eq(q->entry.num_links, 0);
+        assert_eq(q->entry.num_bytes, 0);
+        assert_eq(q->entry.indirect, 0);
+        for (usize j = 0; j < INODE_NUM_DIRECT; j++) {
+            assert_eq(q->entry.addrs[j], 0);
+        }
+
+        q->entry.num_links++;
+
+        inodes.sync(ctx, q, true);
+
+        inodes.unlock(q);
+        inodes.put(ctx, q);
+
+        assert_eq(mock.count_inodes(), i - 1);
+        mock.end_op(ctx);
+        assert_eq(mock.count_inodes(), i);
+    }
+
+    usize n = mock.num_inodes - 1;
+    for (usize i = 2; i < mock.num_inodes; i += 2, n--) {
+        mock.begin_op(ctx);
+        usize index = 10086;
+        assert_ne(inodes.lookup(p, std::to_string(i).data(), &index), 0);
+        assert_ne(index, 10086);
+        inodes.remove(ctx, p, index);
+
+        auto *q = inodes.get(i);
+        inodes.lock(q);
+        q->entry.num_links = 0;
+        inodes.sync(ctx, q, true);
+        inodes.unlock(q);
+        inodes.put(ctx, q);
+
+        assert_eq(mock.count_inodes(), n);
+        mock.end_op(ctx);
+        assert_eq(mock.count_inodes(), n - 1);
+    }
+
+    mock.begin_op(ctx);
+    usize ino = inodes.alloc(ctx, INODE_DIRECTORY);
+    auto *q = inodes.get(ino);
+    inodes.lock(q);
+    assert_eq(q->entry.type, INODE_DIRECTORY);
+    inodes.unlock(q);
+    assert_eq(mock.count_inodes(), n);
+    mock.end_op(ctx);
+    assert_eq(mock.count_inodes(), n + 1);
+
+    mock.begin_op(ctx);
+    inodes.put(ctx, q);
+    mock.end_op(ctx);
+    assert_eq(mock.count_inodes(), n);
+
+    for (usize i = 3; i < mock.num_inodes; i += 2, n--) {
+        mock.begin_op(ctx);
+        q = inodes.get(i);
+        inodes.lock(q);
+        q->entry.num_links = 0;
+        inodes.sync(ctx, q, true);
+        inodes.unlock(q);
+        inodes.put(ctx, q);
+        assert_eq(mock.count_inodes(), n);
+        mock.end_op(ctx);
+        assert_eq(mock.count_inodes(), n - 1);
+    }
+
+    inodes.unlock(p);
+}
+
+void test_share()
+{
+    mock.begin_op(ctx);
+    usize ino = inodes.alloc(ctx, INODE_REGULAR);
+    mock.end_op(ctx);
+    assert_eq(mock.count_inodes(), 2);
+
+    auto *p = inodes.get(ino);
+    auto *q = inodes.share(p);
+    auto *r = inodes.get(ino);
+
+    assert_eq(r->rc.count, 3);
+
+    mock.begin_op(ctx);
+    inodes.put(ctx, p);
+    assert_eq(q->rc.count, 2);
+    mock.end_op(ctx);
+    assert_eq(mock.count_inodes(), 2);
+
+    mock.begin_op(ctx);
+    inodes.put(ctx, q);
+    assert_eq(r->rc.count, 1);
+    assert_eq(mock.count_inodes(), 2);
+    inodes.put(ctx, r);
+    assert_eq(mock.count_inodes(), 2);
+    mock.end_op(ctx);
+    assert_eq(mock.count_inodes(), 1);
+}
+
+void test_small_file()
+{
+    mock.begin_op(ctx);
+    usize ino = inodes.alloc(ctx, INODE_REGULAR);
+    mock.end_op(ctx);
+
+    u8 buf[1];
+    auto *p = inodes.get(ino);
+    inodes.lock(p);
+
+    buf[0] = 0xcc;
+    inodes.read(p, buf, 0, 0);
+    assert_eq(buf[0], 0xcc);
+
+    mock.begin_op(ctx);
+    inodes.write(ctx, p, buf, 0, 1);
+    assert_eq(mock.count_blocks(), 0);
+    mock.end_op(ctx);
+
+    auto *q = mock.inspect(ino);
+    assert_eq(q->indirect, 0);
+    assert_ne(q->addrs[0], 0);
+    assert_eq(q->addrs[1], 0);
+    assert_eq(q->num_bytes, 1);
+    assert_eq(mock.count_blocks(), 1);
+
+    mock.fill_junk();
+    buf[0] = 0;
+    inodes.read(p, buf, 0, 1);
+    assert_eq(buf[0], 0xcc);
+
+    inodes.unlock(p);
+
+    inodes.lock(p);
+
+    mock.begin_op(ctx);
+    inodes.clear(ctx, p);
+    mock.end_op(ctx);
+
+    q = mock.inspect(ino);
+    assert_eq(q->indirect, 0);
+    assert_eq(q->addrs[0], 0);
+    assert_eq(q->num_bytes, 0);
+    assert_eq(mock.count_blocks(), 0);
+
+    inodes.unlock(p);
+
+    mock.begin_op(ctx);
+    inodes.put(ctx, p);
+    mock.end_op(ctx);
+    assert_eq(mock.count_inodes(), 1);
+}
+
+void test_large_file()
+{
+    mock.begin_op(ctx);
+    usize ino = inodes.alloc(ctx, INODE_REGULAR);
+    mock.end_op(ctx);
+
+    constexpr usize max_size = 65535;
+    u8 buf[max_size], copy[max_size];
+    std::mt19937 gen(0x12345678);
+    for (usize i = 0; i < max_size; i++) {
+        copy[i] = buf[i] = gen() & 0xff;
+    }
+
+    auto *p = inodes.get(ino);
+
+    inodes.lock(p);
+    for (usize i = 0, n = 0; i < max_size; i += n) {
+        n = std::min(static_cast<usize>(gen() % 10000), max_size - i);
+
+        mock.begin_op(ctx);
+        inodes.write(ctx, p, buf + i, i, n);
+        auto *q = mock.inspect(ino);
+        assert_eq(q->num_bytes, i);
+        mock.end_op(ctx);
+        assert_eq(q->num_bytes, i + n);
+    }
+    inodes.unlock(p);
+
+    for (usize i = 0; i < max_size; i++) {
+        buf[i] = 0;
+    }
+
+    inodes.lock(p);
+    inodes.read(p, buf, 0, max_size);
+    inodes.unlock(p);
+
+    for (usize i = 0; i < max_size; i++) {
+        assert_eq(buf[i], copy[i]);
+    }
+
+    inodes.lock(p);
+    mock.begin_op(ctx);
+    inodes.clear(ctx, p);
+    inodes.unlock(p);
+    mock.end_op(ctx);
+    assert_eq(mock.count_inodes(), 2);
+    assert_eq(mock.count_blocks(), 0);
+
+    for (usize i = 0; i < max_size; i++) {
+        copy[i] = buf[i] = gen() & 0xff;
+    }
+
+    inodes.lock(p);
+    mock.begin_op(ctx);
+    inodes.write(ctx, p, buf, 0, max_size);
+    mock.end_op(ctx);
+    inodes.unlock(p);
+
+    auto *q = mock.inspect(ino);
+    assert_eq(q->num_bytes, max_size);
+
+    for (usize i = 0; i < max_size; i++) {
+        buf[i] = 0;
+    }
+
+    inodes.lock(p);
+    for (usize i = 0, n = 0; i < max_size; i += n) {
+        n = std::min(static_cast<usize>(gen() % 10000), max_size - i);
+        inodes.read(p, buf + i, i, n);
+        for (usize j = 0; j < i + n; j++) {
+            assert_eq(buf[j], copy[j]);
+        }
+    }
+    inodes.unlock(p);
+
+    mock.begin_op(ctx);
+    inodes.put(ctx, p);
+    mock.end_op(ctx);
+
+    assert_eq(mock.count_inodes(), 1);
+    assert_eq(mock.count_blocks(), 0);
+}
+
+void test_dir()
+{
+    usize ino[5] = { 1 };
+
+    mock.begin_op(ctx);
+    ino[1] = inodes.alloc(ctx, INODE_DIRECTORY);
+    ino[2] = inodes.alloc(ctx, INODE_REGULAR);
+    ino[3] = inodes.alloc(ctx, INODE_REGULAR);
+    ino[4] = inodes.alloc(ctx, INODE_REGULAR);
+    assert_eq(mock.count_inodes(), 1);
+    mock.end_op(ctx);
+    assert_eq(mock.count_inodes(), 5);
+
+    Inode *p[5];
+    for (usize i = 0; i < 5; i++) {
+        p[i] = inodes.get(ino[i]);
+        inodes.lock(p[i]);
+    }
+
+    mock.begin_op(ctx);
+    inodes.insert(ctx, p[0], "fudan", ino[1]);
+    p[1]->entry.num_links++;
+    inodes.sync(ctx, p[1], true);
+
+    auto *q = mock.inspect(ino[0]);
+    assert_eq(q->addrs[0], 0);
+    assert_eq(inodes.lookup(p[0], "fudan", NULL), ino[1]);
+    mock.end_op(ctx);
+
+    assert_eq(inodes.lookup(p[0], "fudan", NULL), ino[1]);
+    assert_eq(inodes.lookup(p[0], "sjtu", NULL), 0);
+    assert_eq(inodes.lookup(p[0], "pku", NULL), 0);
+    assert_eq(inodes.lookup(p[0], "tsinghua", NULL), 0);
+
+    mock.begin_op(ctx);
+    inodes.insert(ctx, p[0], ".vimrc", ino[2]);
+    inodes.insert(ctx, p[1], "alice", ino[3]);
+    inodes.insert(ctx, p[1], "bob", ino[4]);
+    p[2]->entry.num_links++;
+    p[3]->entry.num_links++;
+    p[4]->entry.num_links++;
+    inodes.sync(ctx, p[2], true);
+    inodes.sync(ctx, p[3], true);
+    inodes.sync(ctx, p[4], true);
+    mock.end_op(ctx);
+
+    for (usize i = 1; i < 5; i++) {
+        q = mock.inspect(ino[i]);
+        assert_eq(q->num_links, 1);
+    }
+
+    usize index = 233;
+    assert_eq(inodes.lookup(p[0], "vimrc", &index), 0);
+    assert_eq(index, 233);
+    assert_eq(inodes.lookup(p[0], ".vimrc", &index), ino[2]);
+    assert_ne(index, 233);
+    index = 244;
+    assert_eq(inodes.lookup(p[1], "nano", &index), 0);
+    assert_eq(index, 244);
+    assert_eq(inodes.lookup(p[1], "alice", &index), ino[3]);
+    usize index2 = 255;
+    assert_eq(inodes.lookup(p[1], "bob", &index2), ino[4]);
+    assert_ne(index, 244);
+    assert_ne(index2, 255);
+    assert_ne(index, index2);
+
+    mock.begin_op(ctx);
+    inodes.clear(ctx, p[1]);
+    p[2]->entry.num_links = 0;
+    inodes.sync(ctx, p[2], true);
+
+    q = mock.inspect(ino[1]);
+    assert_ne(q->addrs[0], 0);
+    assert_eq(inodes.lookup(p[1], "alice", NULL), 0);
+    assert_eq(inodes.lookup(p[1], "bob", NULL), 0);
+    mock.end_op(ctx);
+
+    assert_eq(q->addrs[0], 0);
+    assert_eq(mock.count_inodes(), 5);
+    assert_ne(mock.count_blocks(), 0);
+
+    for (usize i = 0; i < 5; i++) {
+        mock.begin_op(ctx);
+        inodes.unlock(p[i]);
+        inodes.put(ctx, p[i]);
+        mock.end_op(ctx);
+        assert_eq(mock.count_inodes(), (i < 2 ? 5 : 4));
+    }
+}
+
+} // namespace adhoc
+
+int main()
+{
+    if (Runner::run({ "init", test_init }))
+        init_inodes(&sblock, &cache);
+    else
+        return -1;
+
+    std::vector<Testcase> tests = {
+        { "alloc", adhoc::test_alloc },
+        { "sync", adhoc::test_sync },
+        { "touch", adhoc::test_touch },
+        { "share", adhoc::test_share },
+        { "small_file", adhoc::test_small_file },
+        { "large_file", adhoc::test_large_file },
+        { "dir", adhoc::test_dir },
+    };
+    Runner(tests).run();
+
+    return 0;
+}
diff --git a/src/fs/test/instrument.c b/src/fs/test/instrument.c
new file mode 100755
index 0000000..e7b87ec
--- /dev/null
+++ b/src/fs/test/instrument.c
@@ -0,0 +1,16 @@
+#include <common/spinlock.h>
+
+// this file is compiled with `fs` library.
+// some symbols may conflict with those in the standard libc, e.g. `sleep`, so we
+// have to replace them with other symbol names and instrument them here.
+
+extern void _fs_test_sleep(void *chan, SpinLock *lock);
+extern void _fs_test_wakeup(void *chan);
+
+void sleep(void *chan, SpinLock *lock) {
+    _fs_test_sleep(chan, lock);
+}
+
+void wakeup(void *chan) {
+    _fs_test_wakeup(chan);
+}
diff --git a/src/fs/test/mock/arena.cpp b/src/fs/test/mock/arena.cpp
new file mode 100755
index 0000000..f44f9c8
--- /dev/null
+++ b/src/fs/test/mock/arena.cpp
@@ -0,0 +1,24 @@
+extern "C" {
+#include <common/defines.h>
+}
+
+#include "map.hpp"
+
+namespace
+{
+Map<struct Arena *, usize> map;
+Map<u8 *, u8 *> ref;
+} // namespace
+
+extern "C" {
+
+void *kalloc(isize x)
+{
+    return malloc(x);
+}
+
+void kfree(void *object)
+{
+    free(object);
+}
+}
diff --git a/src/fs/test/mock/block_device.hpp b/src/fs/test/mock/block_device.hpp
new file mode 100755
index 0000000..ca0b283
--- /dev/null
+++ b/src/fs/test/mock/block_device.hpp
@@ -0,0 +1,181 @@
+#pragma once
+
+extern "C" {
+#include <fs/cache.h>
+}
+
+#include <atomic>
+#include <fstream>
+#include <functional>
+#include <iomanip>
+#include <mutex>
+#include <random>
+#include <vector>
+
+#include "../exception.hpp"
+
+struct MockBlockDevice {
+    struct Block {
+        std::mutex mutex;
+        u8 data[BLOCK_SIZE];
+
+        void fill_junk() {
+            static std::mt19937 gen(0x19260817);
+
+            for (usize i = 0; i < BLOCK_SIZE; i++) {
+                data[i] = gen() & 0xff;
+            }
+        }
+
+        void fill_zero() {
+            std::fill(std::begin(data), std::end(data), 0);
+        }
+    };
+
+    const SuperBlock *sblock;
+
+    std::atomic<bool> offline;
+    std::atomic<usize> read_count;
+    std::atomic<usize> write_count;
+    std::vector<Block> disk;
+
+    using Hook = std::function<void(usize block_no, u8 *buffer)>;
+
+    Hook on_read;
+    Hook on_write;
+
+    void initialize(const SuperBlock &_sblock) {
+        sblock = &_sblock;
+
+        offline = false;
+        read_count = 0;
+        write_count = 0;
+        {
+            std::vector<Block> new_disk(sblock->num_blocks);
+            std::swap(disk, new_disk);
+        }
+
+        for (auto &block : disk) {
+            block.fill_junk();
+        }
+
+        if (sblock->num_log_blocks < 2)
+            throw Internal("logging area is too small");
+        disk[sblock->log_start].fill_zero();
+
+        usize num_bitmap_blocks = (sblock->num_blocks + BIT_PER_BLOCK - 1) / BIT_PER_BLOCK;
+        for (usize i = 0; i < num_bitmap_blocks; i++) {
+            disk[sblock->bitmap_start + i].fill_zero();
+        }
+
+        usize num_preallocated = 1 + 1 + sblock->num_log_blocks +
+                                 ((sblock->num_inodes + INODE_PER_BLOCK - 1) / INODE_PER_BLOCK) +
+                                 num_bitmap_blocks;
+        if (num_preallocated + sblock->num_data_blocks > sblock->num_blocks)
+            throw Internal("invalid super block");
+        for (usize i = 0; i < num_preallocated; i++) {
+            usize j = i / BIT_PER_BLOCK, k = i % BIT_PER_BLOCK;
+            disk[sblock->bitmap_start + j].data[k / 8] |= (1 << (k % 8));
+        }
+    }
+
+    auto inspect(usize block_no) -> u8 * {
+        if (block_no >= disk.size())
+            throw Internal("block number is out of range");
+        return disk[block_no].data;
+    }
+
+    auto inspect_log(usize index) -> u8 * {
+        return inspect(sblock->log_start + 1 + index);
+    }
+
+    auto inspect_log_header() -> LogHeader * {
+        return reinterpret_cast<LogHeader *>(inspect(sblock->log_start));
+    }
+
+    void dump(std::ostream &stream) {
+        for (auto &block : disk) {
+            std::scoped_lock lock(block.mutex);
+            for (usize i = 0; i < BLOCK_SIZE; i++) {
+                stream << std::setfill('0') << std::setw(2) << std::hex
+                       << static_cast<u64>(block.data[i]) << " ";
+            }
+            stream << "\n";
+        }
+    }
+
+    void load(std::istream &stream) {
+        for (auto &block : disk) {
+            for (usize i = 0; i < BLOCK_SIZE; i++) {
+                u64 value;
+                stream >> std::hex >> value;
+                block.data[i] = value & 0xff;
+            }
+        }
+    }
+
+    void dump(const std::string &path) {
+        std::ofstream file(path);
+        dump(file);
+    }
+
+    void load(const std::string &path) {
+        std::ifstream file(path);
+        load(file);
+    }
+
+    void check_offline() {
+        if (offline)
+            throw Offline("disk power failure");
+    }
+
+    void read(usize block_no, u8 *buffer) {
+        if (block_no >= disk.size())
+            throw AssertionFailure("block number is out of range");
+
+        check_offline();
+
+        auto &block = disk[block_no];
+        std::scoped_lock lock(block.mutex);
+
+        if (on_read)
+            on_read(block_no, buffer);
+
+        check_offline();
+
+        for (usize i = 0; i < BLOCK_SIZE; i++) {
+            buffer[i] = block.data[i];
+        }
+
+        read_count++;
+
+        check_offline();
+    }
+
+    void write(usize block_no, u8 *buffer) {
+        if (block_no >= disk.size())
+            throw AssertionFailure("block number is out of range");
+
+        check_offline();
+
+        auto &block = disk[block_no];
+        std::scoped_lock lock(block.mutex);
+
+        if (on_write)
+            on_write(block_no, buffer);
+
+        check_offline();
+
+        for (usize i = 0; i < BLOCK_SIZE; i++) {
+            block.data[i] = buffer[i];
+        }
+
+        write_count++;
+
+        check_offline();
+    }
+};
+
+namespace {
+#include "block_device.ipp"
+}  // namespace
diff --git a/src/fs/test/mock/block_device.ipp b/src/fs/test/mock/block_device.ipp
new file mode 100755
index 0000000..8e3f39a
--- /dev/null
+++ b/src/fs/test/mock/block_device.ipp
@@ -0,0 +1,45 @@
+#pragma once
+
+#include "block_device.hpp"
+
+static MockBlockDevice mock;
+static SuperBlock sblock;
+static BlockDevice device;
+
+static void stub_read(usize block_no, u8 *buffer) {
+    mock.read(block_no, buffer);
+}
+
+static void stub_write(usize block_no, u8 *buffer) {
+    mock.write(block_no, buffer);
+}
+
+static void initialize_mock(  //
+    usize log_size,
+    usize num_data_blocks,
+    const std::string &image_path = "") {
+    sblock.log_start = 2;
+    sblock.inode_start = sblock.log_start + 1 + log_size;
+    sblock.bitmap_start = sblock.inode_start + 1;
+    sblock.num_inodes = 1;
+    sblock.num_log_blocks = 1 + log_size;
+    sblock.num_data_blocks = num_data_blocks;
+    sblock.num_blocks = 1 + 1 + 1 + log_size + 1 +
+                        ((num_data_blocks + BIT_PER_BLOCK - 1) / BIT_PER_BLOCK) + num_data_blocks;
+
+    mock.initialize(sblock);
+
+    device.read = stub_read;
+    device.write = stub_write;
+
+    if (!image_path.empty())
+        mock.load(image_path);
+}
+
+[[maybe_unused]] static void initialize(  //
+    usize log_size,
+    usize num_data_blocks,
+    const std::string &image_path = "") {
+    initialize_mock(log_size, num_data_blocks, image_path);
+    init_bcache(&sblock, &device);
+}
diff --git a/src/fs/test/mock/cache.hpp b/src/fs/test/mock/cache.hpp
new file mode 100755
index 0000000..d728690
--- /dev/null
+++ b/src/fs/test/mock/cache.hpp
@@ -0,0 +1,342 @@
+#pragma once
+
+extern "C" {
+#include <fs/inode.h>
+}
+
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+#include <random>
+#include <unordered_map>
+
+#include "../exception.hpp"
+
+struct MockBlockCache {
+    static constexpr usize num_blocks = 2000;
+    static constexpr usize inode_start = 200;
+    static constexpr usize block_start = 1000;
+    static constexpr usize num_inodes = 1000;
+
+    static auto get_sblock() -> SuperBlock {
+        SuperBlock sblock;
+        sblock.num_blocks = num_blocks;
+        sblock.num_data_blocks = num_blocks - block_start;
+        sblock.num_inodes = num_inodes;
+        sblock.num_log_blocks = 50;
+        sblock.log_start = 2;
+        sblock.inode_start = inode_start;
+        sblock.bitmap_start = 900;
+        return sblock;
+    }
+
+    struct Meta {
+        bool mark = false;
+        std::mutex mutex;
+        bool used;
+
+        auto operator=(const Meta &rhs) -> Meta & {
+            used = rhs.used;
+            return *this;
+        }
+    };
+
+    struct Cell {
+        bool mark = false;
+        usize index;
+        std::mutex mutex;
+        Block block;
+
+        auto operator=(const Cell &rhs) -> Cell & {
+            block = rhs.block;
+            return *this;
+        }
+
+        void zero() {
+            for (usize i = 0; i < BLOCK_SIZE; i++) {
+                block.data[i] = 0;
+            }
+        }
+
+        void random(std::mt19937 &gen) {
+            for (usize i = 0; i < BLOCK_SIZE; i++) {
+                block.data[i] = gen() & 0xff;
+            }
+        }
+    };
+
+    // board: record all uncommitted atomic operations. `board[i] = true` means
+    // atomic operation i has called `end_op` and waits for final commit.
+    // oracle: to allocate id for each atomic operation.
+    // top: the maximum id of committed atomic operation.
+    // mutex & cv: protects board.
+    std::mutex mutex;
+    std::condition_variable cv;
+    std::atomic<usize> oracle, top_oracle;
+    std::unordered_map<usize, bool> scoreboard;
+
+    // mbit: bitmap cached in memory, which is volatile
+    // sbit: bitmap on SD card, which is persistent
+    // mblk: data blocks cached in memory, volatile
+    // sblk: data blocks on SD card, persistent
+    Meta mbit[num_blocks], sbit[num_blocks];
+    Cell mblk[num_blocks], sblk[num_blocks];
+
+    MockBlockCache() {
+        std::mt19937 gen(0x19260817);
+
+        oracle.store(1);
+        top_oracle.store(0);
+
+        // fill disk with junk.
+        for (usize i = 0; i < num_blocks; i++) {
+            mbit[i].used = false;
+            mblk[i].index = i;
+            mblk[i].random(gen);
+            sbit[i].used = false;
+            sblk[i].index = i;
+            sblk[i].random(gen);
+        }
+
+        // mock superblock.
+        auto sblock = get_sblock();
+        u8 *buf = reinterpret_cast<u8 *>(&sblock);
+        for (usize i = 0; i < sizeof(sblock); i++) {
+            sblk[1].block.data[i] = buf[i];
+        }
+
+        // mock inodes.
+        InodeEntry node[num_inodes];
+        for (usize i = 0; i < num_inodes; i++) {
+            node[i].type = INODE_INVALID;
+            node[i].major = gen() & 0xffff;
+            node[i].minor = gen() & 0xffff;
+            node[i].num_links = gen() & 0xffff;
+            node[i].num_bytes = gen() & 0xffff;
+            for (usize j = 0; j < INODE_NUM_DIRECT; j++) {
+                node[i].addrs[j] = gen();
+            }
+            node[i].indirect = gen();
+        }
+
+        // mock root inode.
+        node[1].type = INODE_DIRECTORY;
+        node[1].major = 0;
+        node[1].minor = 0;
+        node[1].num_links = 1;
+        node[1].num_bytes = 0;
+        for (usize i = 0; i < INODE_NUM_DIRECT; i++) {
+            node[1].addrs[i] = 0;
+        }
+        node[1].indirect = 0;
+
+        usize step = 0;
+        for (usize i = 0, j = inode_start; i < num_inodes; i += step, j++) {
+            step = std::min(num_inodes - i, static_cast<usize>(INODE_PER_BLOCK));
+            buf = reinterpret_cast<u8 *>(&node[i]);
+            for (usize k = 0; k < step * sizeof(InodeEntry); k++) {
+                sblk[j].block.data[k] = buf[k];
+            }
+        }
+    }
+
+    // invalidate all cached blocks and fill them with random data.
+    void fill_junk() {
+        std::mt19937 gen(0xdeadbeef);
+
+        for (usize i = 0; i < num_blocks; i++) {
+            std::scoped_lock guard(mbit[i].mutex);
+            if (mbit[i].mark)
+                throw Internal("marked by others");
+        }
+
+        for (usize i = 0; i < num_blocks; i++) {
+            std::scoped_lock guard(mblk[i].mutex);
+            if (mblk[i].mark)
+                throw Internal("marked by others");
+            mblk[i].random(gen);
+        }
+    }
+
+    // count how many inodes on disk are valid.
+    auto count_inodes() -> usize {
+        std::unique_lock lock(mutex);
+
+        usize step = 0, count = 0;
+        for (usize i = 0, j = inode_start; i < num_inodes; i += step, j++) {
+            step = std::min(num_inodes - i, static_cast<usize>(INODE_PER_BLOCK));
+            auto *inodes = reinterpret_cast<InodeEntry *>(sblk[j].block.data);
+            for (usize k = 0; k < step; k++) {
+                if (inodes[k].type != INODE_INVALID)
+                    count++;
+            }
+        }
+
+        return count;
+    }
+
+    // count how many blocks on disk are allocated.
+    auto count_blocks() -> usize {
+        std::unique_lock lock(mutex);
+
+        usize count = 0;
+        for (usize i = block_start; i < num_blocks; i++) {
+            std::scoped_lock guard(sbit[i].mutex);
+            if (sbit[i].used)
+                count++;
+        }
+
+        return count;
+    }
+
+    // inspect on disk inode at specified inode number.
+    auto inspect(usize i) -> InodeEntry * {
+        usize j = inode_start + i / INODE_PER_BLOCK;
+        usize k = i % INODE_PER_BLOCK;
+        auto *arr = reinterpret_cast<InodeEntry *>(sblk[j].block.data);
+        return &arr[k];
+    }
+
+    void check_block_no(usize i) {
+        if (i >= num_blocks)
+            throw AssertionFailure("block number out of range");
+    }
+
+    auto check_and_get_cell(Block *b) -> Cell * {
+        Cell *p = container_of(b, Cell, block);
+        isize offset = reinterpret_cast<u8 *>(p) - reinterpret_cast<u8 *>(mblk);
+        if (offset % sizeof(Cell) != 0)
+            throw AssertionFailure("pointer not aligned");
+
+        isize i = p - mblk;
+        if (i < 0 || static_cast<usize>(i) >= num_blocks)
+            throw AssertionFailure("block is not managed by cache");
+
+        return p;
+    }
+
+    template <typename T>
+    void load(T &a, T &b) {
+        if (!a.mark) {
+            a = b;
+            a.mark = true;
+        }
+    }
+
+    template <typename T>
+    void store(T &a, T &b) {
+        if (a.mark) {
+            b = a;
+            a.mark = false;
+        }
+    }
+
+    void begin_op(OpContext *ctx) {
+        std::unique_lock lock(mutex);
+        ctx->ts = oracle.fetch_add(1);
+        scoreboard[ctx->ts] = false;
+    }
+
+    void end_op(OpContext *ctx) {
+        std::unique_lock lock(mutex);
+        scoreboard[ctx->ts] = true;
+
+        // is it safe to checkpoint now?
+        bool do_checkpoint = true;
+        for (const auto &e : scoreboard) {
+            do_checkpoint &= e.second;
+        }
+
+        if (do_checkpoint) {
+            for (usize i = 0; i < num_blocks; i++) {
+                std::scoped_lock guard(mbit[i].mutex, sbit[i].mutex);
+                store(mbit[i], sbit[i]);
+            }
+
+            for (usize i = 0; i < num_blocks; i++) {
+                std::scoped_lock guard(mblk[i].mutex, sblk[i].mutex);
+                store(mblk[i], sblk[i]);
+            }
+
+            usize max_oracle = 0;
+            for (const auto &e : scoreboard) {
+                max_oracle = std::max(max_oracle, e.first);
+            }
+            top_oracle.store(max_oracle);
+            scoreboard.clear();
+
+            cv.notify_all();
+        } else {
+            // if there are other running atomic operations, just wait for them.
+            cv.wait(lock, [&] { return ctx->ts <= top_oracle.load(); });
+        }
+    }
+
+    auto alloc(OpContext *ctx) -> usize {
+        for (usize i = block_start; i < num_blocks; i++) {
+            std::scoped_lock guard(mbit[i].mutex, sbit[i].mutex);
+            load(mbit[i], sbit[i]);
+
+            if (!mbit[i].used) {
+                mbit[i].used = true;
+                if (!ctx)
+                    store(mbit[i], sbit[i]);
+
+                std::scoped_lock guard(mblk[i].mutex, sblk[i].mutex);
+                load(mblk[i], sblk[i]);
+                mblk[i].zero();
+                if (!ctx)
+                    store(mblk[i], sblk[i]);
+
+                return i;
+            }
+        }
+
+        throw AssertionFailure("no free block");
+    }
+
+    void free(OpContext *ctx, usize i) {
+        check_block_no(i);
+
+        std::scoped_lock guard(mbit[i].mutex, sbit[i].mutex);
+        load(mbit[i], sbit[i]);
+        if (!mbit[i].used)
+            throw AssertionFailure("free unused block");
+
+        mbit[i].used = false;
+        if (!ctx)
+            store(mbit[i], sbit[i]);
+    }
+
+    auto acquire(usize i) -> Block * {
+        check_block_no(i);
+
+        mblk[i].mutex.lock();
+
+        {
+            std::scoped_lock guard(sblk[i].mutex);
+            load(mblk[i], sblk[i]);
+        }
+
+        return &mblk[i].block;
+    }
+
+    void release(Block *b) {
+        auto *p = check_and_get_cell(b);
+        p->mutex.unlock();
+    }
+
+    void sync(OpContext *ctx, Block *b) {
+        auto *p = check_and_get_cell(b);
+        usize i = p->index;
+
+        if (!ctx) {
+            std::scoped_lock guard(sblk[i].mutex);
+            store(mblk[i], sblk[i]);
+        }
+    }
+};
+
+namespace {
+#include "cache.ipp"
+}  // namespace
diff --git a/src/fs/test/mock/cache.ipp b/src/fs/test/mock/cache.ipp
new file mode 100755
index 0000000..5677245
--- /dev/null
+++ b/src/fs/test/mock/cache.ipp
@@ -0,0 +1,49 @@
+#pragma once
+
+#include "cache.hpp"
+
+static MockBlockCache mock;
+static SuperBlock sblock;
+static BlockCache cache;
+
+static void stub_begin_op(OpContext *ctx) {
+    mock.begin_op(ctx);
+}
+
+static void stub_end_op(OpContext *ctx) {
+    mock.end_op(ctx);
+}
+
+static usize stub_alloc(OpContext *ctx) {
+    return mock.alloc(ctx);
+}
+
+static void stub_free(OpContext *ctx, usize block_no) {
+    mock.free(ctx, block_no);
+}
+
+static Block *stub_acquire(usize block_no) {
+    return mock.acquire(block_no);
+}
+
+static void stub_release(Block *block) {
+    return mock.release(block);
+}
+
+static void stub_sync(OpContext *ctx, Block *block) {
+    mock.sync(ctx, block);
+}
+
+static struct _Loader {
+    _Loader() {
+        sblock = mock.get_sblock();
+
+        cache.begin_op = stub_begin_op;
+        cache.end_op = stub_end_op;
+        cache.alloc = stub_alloc;
+        cache.free = stub_free;
+        cache.acquire = stub_acquire;
+        cache.release = stub_release;
+        cache.sync = stub_sync;
+    }
+} _loader;
diff --git a/src/fs/test/mock/list.cpp b/src/fs/test/mock/list.cpp
new file mode 100755
index 0000000..34ab1e5
--- /dev/null
+++ b/src/fs/test/mock/list.cpp
@@ -0,0 +1,3 @@
+extern "C" {
+#include <common/list.c>
+}
diff --git a/src/fs/test/mock/lock.cpp b/src/fs/test/mock/lock.cpp
new file mode 100755
index 0000000..0a3931a
--- /dev/null
+++ b/src/fs/test/mock/lock.cpp
@@ -0,0 +1,177 @@
+#include "lock_config.hpp"
+#include "map.hpp"
+#include "errno.h"
+
+#include <condition_variable>
+#include <semaphore.h>
+#include <time.h>
+#include <cassert>
+#include <map>
+#include <unistd.h>
+namespace
+{
+
+struct Mutex {
+    bool locked;
+    std::mutex mutex;
+
+    void lock()
+    {
+        mutex.lock();
+        locked = true;
+    }
+
+    void unlock()
+    {
+        locked = false;
+        mutex.unlock();
+    }
+};
+
+struct Signal {
+    // use a pointer to avoid `pthread_cond_destroy` blocking process exit.
+    std::condition_variable_any *cv;
+
+    Signal()
+    {
+        cv = new std::condition_variable_any;
+    }
+};
+
+Map<void *, Mutex> mtx_map;
+
+thread_local int holding = 0;
+static struct Blocker {
+    sem_t sem;
+    Blocker()
+    {
+        sem_init(&sem, 0, 4);
+        mtx_map.try_add(&sem);
+    }
+    void p()
+    {
+        if constexpr (MockLockConfig::SpinLockBlocksCPU) {
+            struct timespec ts;
+            assert(clock_gettime(CLOCK_REALTIME, &ts) == 0);
+            ts.tv_sec += MockLockConfig::WaitTimeoutSeconds;
+            assert(sem_timedwait(&sem, &ts) == 0);
+        }
+    }
+    void v()
+    {
+        if constexpr (MockLockConfig::SpinLockBlocksCPU)
+            sem_post(&sem);
+    }
+} blocker;
+}
+
+extern "C" {
+
+void init_spinlock(struct SpinLock *lock, const char *name [[maybe_unused]])
+{
+    mtx_map.try_add(lock);
+}
+
+void acquire_spinlock(struct SpinLock *lock)
+{
+    if (holding++ == 0)
+        blocker.p();
+    mtx_map[lock].lock();
+}
+
+void release_spinlock(struct SpinLock *lock)
+{
+    mtx_map[lock].unlock();
+    if (--holding == 0)
+        blocker.v();
+}
+
+bool holding_spinlock(struct SpinLock *lock)
+{
+    return mtx_map[lock].locked;
+}
+
+struct Semaphore;
+#define sa(x) ((uint64_t *)x)[0]
+#define sb(x) ((uint64_t *)x)[1]
+void init_sem(Semaphore *x, int val)
+{
+    init_spinlock((SpinLock *)x, "");
+    sa(x) = 0;
+    sb(x) = val;
+}
+void _lock_sem(Semaphore *x)
+{
+    acquire_spinlock((SpinLock *)x);
+}
+void _unlock_sem(Semaphore *x)
+{
+    release_spinlock((SpinLock *)x);
+}
+bool _get_sem(Semaphore *x)
+{
+    bool ret = false;
+    if (sa(x) < sb(x)) {
+        sa(x)++;
+        ret = true;
+    }
+    return ret;
+}
+int _query_sem(Semaphore *x)
+{
+    return sb(x) - sa(x);
+}
+void _post_sem(Semaphore *x)
+{
+    sb(x)++;
+}
+bool _wait_sem(Semaphore *x, bool alertable [[maybe_unused]])
+{
+    auto t = sa(x)++;
+    int t0 = time(NULL);
+    while (1) {
+        if (time(NULL) - t0 > MockLockConfig::WaitTimeoutSeconds) {
+            return false;
+        }
+        if (sb(x) > t)
+            break;
+        _unlock_sem(x);
+        if (holding) {
+            if constexpr (MockLockConfig::SpinLockForbidsWait)
+                assert(0);
+            blocker.v();
+        }
+        usleep(5);
+        if (holding) {
+            blocker.p();
+        }
+        _lock_sem(x);
+    }
+    _unlock_sem(x);
+    return true;
+}
+int get_all_sem(Semaphore *x)
+{
+    int ret = 0;
+    _lock_sem(x);
+    if (sa(x) < sb(x)) {
+        ret = sb(x) - sa(x);
+        sa(x) = sb(x);
+    }
+    _unlock_sem(x);
+    return ret;
+}
+int post_all_sem(Semaphore *x)
+{
+    int ret = 0;
+    _lock_sem(x);
+    if (sa(x) > sb(x)) {
+        ret = sa(x) - sb(x);
+        sb(x) = sa(x);
+    }
+    _unlock_sem(x);
+    return ret;
+}
+#undef sa
+#undef sb
+}
diff --git a/src/fs/test/mock/lock_config.cpp b/src/fs/test/mock/lock_config.cpp
new file mode 100755
index 0000000..90391d4
--- /dev/null
+++ b/src/fs/test/mock/lock_config.cpp
@@ -0,0 +1,3 @@
+#include "lock_config.hpp"
+
+MockLockConfig mock_lock;
diff --git a/src/fs/test/mock/lock_config.hpp b/src/fs/test/mock/lock_config.hpp
new file mode 100755
index 0000000..1574279
--- /dev/null
+++ b/src/fs/test/mock/lock_config.hpp
@@ -0,0 +1,13 @@
+#pragma once
+
+extern "C" {
+#include "common/defines.h"
+}
+
+struct MockLockConfig {
+    static constexpr bool SpinLockBlocksCPU = true;
+    static constexpr bool SpinLockForbidsWait = true;
+    static constexpr int WaitTimeoutSeconds = 10;
+};
+
+extern MockLockConfig mock_lock;
diff --git a/src/fs/test/mock/map.hpp b/src/fs/test/mock/map.hpp
new file mode 100755
index 0000000..4d9a775
--- /dev/null
+++ b/src/fs/test/mock/map.hpp
@@ -0,0 +1,54 @@
+#pragma once
+
+#include <mutex>
+#include <shared_mutex>
+#include <unordered_map>
+
+#include "../exception.hpp"
+
+template <typename Key, typename Value>
+class Map {
+public:
+    template <typename... Args>
+    void add(const Key &key, Args &&...args) {
+        std::unique_lock lock(mutex);
+        if (!map.try_emplace(key, std::forward<Args>(args)...).second)
+            throw Internal("key already exists");
+    }
+
+    template <typename... Args>
+    void try_add(const Key &key, Args &&...args) {
+        std::unique_lock lock(mutex);
+        map.try_emplace(key, std::forward<Args>(args)...);
+    }
+
+    bool contain(const Key &key) {
+        std::shared_lock lock(mutex);
+        return map.find(key) != map.end();
+    }
+
+    auto operator[](const Key &key) -> Value & {
+        std::shared_lock lock(mutex);
+        auto it = map.find(key);
+        if (it == map.end()) {
+            backtrace();
+            throw Internal("key not found");
+        }
+        return it->second;
+    }
+
+    auto safe_get(const Key &key) -> Value & {
+        std::shared_lock lock(mutex);
+        auto it = map.find(key);
+        if (it == map.end()) {
+            lock.unlock();
+            try_add(key);
+            lock.lock();
+        }
+        return map[key];
+    }
+
+private:
+    std::shared_mutex mutex;
+    std::unordered_map<Key, Value> map;
+};
diff --git a/src/fs/test/mock/panic.cpp b/src/fs/test/mock/panic.cpp
new file mode 100755
index 0000000..4df746f
--- /dev/null
+++ b/src/fs/test/mock/panic.cpp
@@ -0,0 +1,22 @@
+extern "C" {
+#include <common/defines.h>
+}
+
+#include <cstdarg>
+#include <cstdint>
+#include <cstdio>
+
+#include <sstream>
+
+#include "../exception.hpp"
+
+extern "C" {
+void _panic(const char *file, int line)
+{
+    printf("(fatal) ");
+    puts("");
+    std::stringstream buf;
+    buf << file << ":L" << line << ": kernel panic";
+    throw Panic(buf.str());
+}
+}
diff --git a/src/fs/test/mock/printk.cpp b/src/fs/test/mock/printk.cpp
new file mode 100755
index 0000000..6ee18be
--- /dev/null
+++ b/src/fs/test/mock/printk.cpp
@@ -0,0 +1,3 @@
+__asm(".globl printk\n"
+      "printk:\n"
+      "b printf");
diff --git a/src/fs/test/mock/rc.cpp b/src/fs/test/mock/rc.cpp
new file mode 100755
index 0000000..a6ebf7c
--- /dev/null
+++ b/src/fs/test/mock/rc.cpp
@@ -0,0 +1,3 @@
+extern "C" {
+#include <common/rc.c>
+}
diff --git a/src/fs/test/mock/yield.cpp b/src/fs/test/mock/yield.cpp
new file mode 100755
index 0000000..40bb35b
--- /dev/null
+++ b/src/fs/test/mock/yield.cpp
@@ -0,0 +1,8 @@
+#include <thread>
+
+extern "C" {
+void yield()
+{
+    std::this_thread::yield();
+}
+}
diff --git a/src/fs/test/pause.hpp b/src/fs/test/pause.hpp
new file mode 100755
index 0000000..669babc
--- /dev/null
+++ b/src/fs/test/pause.hpp
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <csignal>
+#include <cstdio>
+
+#include <atomic>
+#include <unistd.h>
+
+#define PAUSE                                                                                      \
+    { Pause().pause(); }
+
+class Pause {
+public:
+    __attribute__((__noinline__, optimize(3))) void pause() {
+        int pid = getpid();
+        printf("(debug) process %d paused.\n", pid);
+        raise(SIGTSTP);
+    }
+};
diff --git a/src/fs/test/runner.hpp b/src/fs/test/runner.hpp
new file mode 100755
index 0000000..6dd4343
--- /dev/null
+++ b/src/fs/test/runner.hpp
@@ -0,0 +1,61 @@
+#pragma once
+
+#include <cstdio>
+
+#include <functional>
+#include <string>
+#include <vector>
+
+#include <sys/wait.h>
+#include <unistd.h>
+
+using TestFunc = std::function<void()>;
+
+struct Testcase {
+    std::string name;
+    TestFunc func;
+};
+
+class Runner {
+public:
+    Runner(const std::vector<Testcase> &_testcases) : testcases(_testcases) {}
+
+    void run(bool stop_on_error = true) {
+        for (const auto &testcase : testcases) {
+            if (!run(testcase) && stop_on_error)
+                exit(0);
+        }
+    }
+
+    static bool run(const Testcase &testcase) {
+        int pid;
+        if ((pid = fork()) == 0) {
+            testcase.func();
+            exit(0);
+        }
+
+        int ws;
+        waitpid(pid, &ws, 0);
+
+        if (!WIFEXITED(ws)) {
+            fprintf(stderr, "(error) \"%s\" [%d] exited abnormally.\n", testcase.name.data(), pid);
+            return false;
+        } else {
+            int status = WEXITSTATUS(ws);
+            if (status != 0) {
+                fprintf(stderr,
+                        "(error) \"%s\" [%d] exited with status %d.\n",
+                        testcase.name.data(),
+                        pid,
+                        status);
+                return false;
+            } else
+                printf("(info) \"%s\" passed.\n", testcase.name.data());
+        }
+
+        return true;
+    }
+
+private:
+    std::vector<Testcase> testcases;
+};
diff --git a/src/kernel/CMakeLists.txt b/src/kernel/CMakeLists.txt
new file mode 100644
index 0000000..a02d6c0
--- /dev/null
+++ b/src/kernel/CMakeLists.txt
@@ -0,0 +1,3 @@
+file(GLOB kernel_sources CONFIGURE_DEPENDS "*.c" "*.S")
+add_library(kernelx STATIC ${kernel_sources})
+set_property(SOURCE ${kernel_sources} PROPERTY LANGUAGE C)
\ No newline at end of file
diff --git a/src/kernel/core.c b/src/kernel/core.c
new file mode 100755
index 0000000..c12432d
--- /dev/null
+++ b/src/kernel/core.c
@@ -0,0 +1,84 @@
+#include <aarch64/intrinsic.h>
+#include <kernel/cpu.h>
+#include <kernel/printk.h>
+#include <kernel/sched.h>
+#include <test/test.h>
+#include <driver/virtio.h>
+#include <common/string.h> 
+
+volatile bool panic_flag;
+
+NO_RETURN void idle_entry()
+{
+    //printk("[IDLE_ENTRY] idle_entry started on CPU%lld\n", cpuid());
+    set_cpu_on();
+    while (1) {
+        yield();
+        //printk("[IDLE_ENTRY] CPU%lld: back to idle\n", cpuid());
+        if (panic_flag){
+            //printk("[IDLE_ENTRY] CPU%lld: panic_flag is set\n", cpuid());
+            break;
+        }
+        arch_with_trap
+        {
+            //printk("[IDLE_ENTRY] CPU%lld: going to WFI\n", cpuid());
+            arch_wfi();
+            //printk("[IDLE_ENTRY] CPU%lld: WFI returned\n", cpuid());
+        }
+    }
+    set_cpu_off();
+    arch_stop_cpu();
+}
+
+NO_RETURN void kernel_entry()
+{
+    printk("Hello world! (Core %lld)\n", cpuid());
+    proc_test();
+    vm_test();
+    user_proc_test();
+    io_test();
+
+    /* LAB 4 TODO 3 BEGIN
+     * Read MBR (block 0) via virtio and parse partition table.
+     * We record the second partition (index 1) start LBA and size.
+     */
+    static u8 mbr_buf[512];
+    Buf tmp;
+    memset(&tmp, 0, sizeof(tmp));
+    tmp.block_no = 0;
+    tmp.flags = 0;
+    if (virtio_blk_rw(&tmp) != 0) {
+        printk("[ERROR] virtio_blk_rw failed for MBR\n");
+        PANIC();
+    }
+    for (int i = 0; i < 512; i++) {
+        mbr_buf[i] = tmp.data[i];
+    }
+    const MBR *mbr = (const MBR *)mbr_buf;
+    if (mbr->signature != 0xAA55) {
+        printk("[ERROR] Invalid MBR signature: 0x%04X\n", mbr->signature);
+        PANIC();
+    }
+    static u32 part2_lba;
+    part2_lba = mbr_partition2_lba(mbr);
+    static u32 part2_sectors;
+    part2_sectors = mbr_partition2_sectors(mbr);
+    printk("[MBR] part2 start LBA=%u sectors=%u\n", part2_lba, part2_sectors);
+    /* LAB 4 TODO 3 END */
+
+    while (1)
+        yield();
+}
+
+NO_INLINE NO_RETURN void _panic(const char *file, int line)
+{
+    printk("\033[1;31m=====%s:%d PANIC%lld!=====\033[0m\n", file, line, cpuid());
+    panic_flag = true;
+    set_cpu_off();
+    for (int i = 0; i < NCPU; i++) {
+        if (cpus[i].online)
+            i--;
+    }
+    printk("\033[1;31mKernel PANIC invoked at %s:%d. Stopped.\033[0m\n", file, line);
+    arch_stop_cpu();
+}
\ No newline at end of file
diff --git a/src/kernel/core.h b/src/kernel/core.h
new file mode 100644
index 0000000..61f5818
--- /dev/null
+++ b/src/kernel/core.h
@@ -0,0 +1,4 @@
+#pragma once
+#include <common/defines.h>
+
+NO_RETURN void idle_entry();
\ No newline at end of file
diff --git a/src/kernel/cpu.c b/src/kernel/cpu.c
new file mode 100755
index 0000000..4641289
--- /dev/null
+++ b/src/kernel/cpu.c
@@ -0,0 +1,105 @@
+#include <kernel/cpu.h>
+#include <kernel/printk.h>
+#include <driver/clock.h>
+#include <kernel/sched.h>
+#include <kernel/proc.h>
+#include <aarch64/mmu.h>
+#include <driver/timer.h>
+
+struct cpu cpus[NCPU];
+
+static bool __timer_cmp(rb_node lnode, rb_node rnode)
+{
+    i64 d = container_of(lnode, struct timer, _node)->_key -
+            container_of(rnode, struct timer, _node)->_key;
+    if (d < 0)
+        return true;
+    if (d == 0)
+        return lnode < rnode;
+    return false;
+}
+
+static void __timer_set_clock()
+{
+    auto node = _rb_first(&cpus[cpuid()].timer);
+    if (!node) {
+        reset_clock(10);
+        return;
+    }
+    auto t1 = container_of(node, struct timer, _node)->_key;
+    auto t0 = get_timestamp_ms();
+    if (t1 <= t0)
+        reset_clock(0);
+    else
+        reset_clock(t1 - t0);
+}
+
+static void timer_clock_handler()
+{
+    reset_clock(10);
+    while (1) {
+        auto node = _rb_first(&cpus[cpuid()].timer);
+        if (!node)
+            break;
+        auto timer = container_of(node, struct timer, _node);
+        if (get_timestamp_ms() < timer->_key)
+            break;
+        cancel_cpu_timer(timer);
+        timer->triggered = true;
+        timer->handler(timer);
+    }
+}
+
+void init_clock_handler()
+{
+    set_clock_handler(&timer_clock_handler);
+}
+
+static struct timer hello_timer[4];
+
+static void hello(struct timer *t)
+{
+    printk("[CPU] CPU %lld: living\n", cpuid());
+    t->data++;
+    set_cpu_timer(&hello_timer[cpuid()]);
+}
+
+void set_cpu_timer(struct timer *timer)
+{
+    timer->triggered = false;
+    timer->_key = get_timestamp_ms() + timer->elapse;
+    ASSERT(0 == _rb_insert(&timer->_node, &cpus[cpuid()].timer, __timer_cmp));
+    __timer_set_clock();
+}
+
+void cancel_cpu_timer(struct timer *timer)
+{
+    ASSERT(!timer->triggered);
+    _rb_erase(&timer->_node, &cpus[cpuid()].timer);
+    __timer_set_clock();
+}
+
+void set_cpu_on()
+{
+    ASSERT(!_arch_disable_trap());
+    extern PTEntries invalid_pt;
+    arch_set_ttbr0(K2P(&invalid_pt));
+    extern char exception_vector[];
+    arch_set_vbar(exception_vector);
+    //printk("[CPU]CPU %lld: VBAR set to %p\n", cpuid(), exception_vector);
+    arch_reset_esr();
+    init_clock();
+    cpus[cpuid()].online = true;
+    printk("[CPU] CPU %lld: hello\n", cpuid());
+    hello_timer[cpuid()].elapse = 5000;
+    hello_timer[cpuid()].handler = hello;
+    set_cpu_timer(&hello_timer[cpuid()]);
+}
+
+void set_cpu_off()
+{
+    if (!_arch_disable_trap()) 
+        PANIC();
+    cpus[cpuid()].online = false;
+    printk("[CPU] CPU %lld: stopped\n", cpuid());
+}
diff --git a/src/kernel/cpu.h b/src/kernel/cpu.h
new file mode 100644
index 0000000..ebec930
--- /dev/null
+++ b/src/kernel/cpu.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <kernel/proc.h>
+#include <common/rbtree.h>
+
+#define NCPU 4
+
+struct sched {
+    // TODO: customize your sched info
+    Proc* thisproc;  // cpu当前正在运行的进程
+    Proc* idle;  // 每个cpu都有一个idle进程（记录main函数的上下文自然演化而来）
+};
+
+struct cpu {
+    bool online;
+    struct rb_root_ timer;
+    struct sched sched;
+};
+
+extern struct cpu cpus[NCPU];
+
+struct timer {
+    bool triggered;
+    int elapse;
+    u64 _key;
+    struct rb_node_ _node;
+    void (*handler)(struct timer *);
+    u64 data;
+};
+
+void init_clock_handler();
+
+void set_cpu_on();
+void set_cpu_off();
+
+void set_cpu_timer(struct timer *timer);
+void cancel_cpu_timer(struct timer *timer);
\ No newline at end of file
diff --git a/src/kernel/mem.c b/src/kernel/mem.c
new file mode 100644
index 0000000..743dd2e
--- /dev/null
+++ b/src/kernel/mem.c
@@ -0,0 +1,121 @@
+#include <aarch64/mmu.h>
+#include <common/rc.h>
+#include <common/spinlock.h>
+#include <driver/memlayout.h>
+#include <kernel/mem.h>
+#include <common/list.h>
+#include <common/string.h>
+#include <kernel/printk.h>
+
+struct page {
+    RefCount ref;
+    SpinLock ref_lock;
+};
+
+#define NULL 0
+extern char end[];
+#define PAGE_INDEX(page_base) ((KSPACE((u64)page_base) - P2K(EXTMEM)) / PAGE_SIZE)
+#define phy_start PAGE_BASE((u64)&end)
+#define phy_end P2K(PHYSTOP)
+#define PAGES_REF_SIZE (sizeof(struct page))*(PHYSTOP/PAGE_SIZE)
+
+RefCount kalloc_page_cnt;
+RefCount _left_page_cnt;
+void* zero_page_ptr;
+SpinLock kernel_mem_lock;
+QueueNode* nodes;
+struct page* pages_ref;
+static QueueNode* bin[256];
+
+
+void init_page() {
+    for(u64 p = PAGE_BASE(end); p < P2K(PHYSTOP)-PAGES_REF_SIZE; p = p + 4096) {
+        add_to_queue(&nodes, (void*)p);
+        increment_rc(&_left_page_cnt);
+    }
+}
+
+void kinit() {
+    init_rc(&kalloc_page_cnt);
+    init_rc(&_left_page_cnt);
+    init_spinlock(&kernel_mem_lock);
+    zero_page_ptr = NULL;
+    pages_ref = (struct page*)(P2K(PHYSTOP)-PAGES_REF_SIZE);
+    init_page();
+    zero_page_ptr = kalloc_page();
+    memset(zero_page_ptr, 0, PAGE_SIZE);
+    u64 page_num = ((u64)K2P(zero_page_ptr))/PAGE_SIZE;
+    pages_ref[page_num].ref.count = 1;
+    printk("[MEM] kernel memory initialized, page_cnt = %lld, to_alloct = %lld\n", kalloc_page_cnt.count, _left_page_cnt.count);
+}
+
+u64 left_page_cnt() {
+    return _left_page_cnt.count;
+}
+
+void* kalloc_page() {
+    increment_rc(&kalloc_page_cnt);
+    decrement_rc(&_left_page_cnt);
+    void* ret = fetch_from_queue(&nodes);
+    u64 page_num = ((u64)K2P(ret))/PAGE_SIZE;
+    pages_ref[page_num].ref.count = 0;
+    init_spinlock(&(pages_ref[page_num].ref_lock));
+    //printk("[MEM] kalloc_page, left_page_cnt = %lld\n", _left_page_cnt.count);
+    return ret;
+}
+
+void kfree_page(void* p) {
+    if (!p) return;
+    decrement_rc(&kalloc_page_cnt);
+    u64 page_num = ((u64)K2P(p))/PAGE_SIZE;
+    acquire_spinlock(&(pages_ref[page_num].ref_lock));
+    decrement_rc(&(pages_ref[page_num].ref));
+    if(pages_ref[page_num].ref.count <= 0){
+        add_to_queue(&nodes, (QueueNode*)p);
+        increment_rc(&_left_page_cnt);
+    }
+    release_spinlock(&(pages_ref[page_num].ref_lock));
+    return;
+}
+
+void* kalloc(unsigned long long size) {
+    acquire_spinlock(&kernel_mem_lock);
+    size = size + 8;
+    int number;
+    if(size % 16 == 0) number = size / 16;
+    else number = size / 16 + 1;
+    if(number == 256) {
+        void* node = kalloc_page();
+        *(int*) node = 256;
+        node = node + 8;
+        release_spinlock(&kernel_mem_lock);
+        return node;
+    }
+    for(int i = number - 1; i < 256 ; i++) {
+        if(bin[i] != 0) {
+            void* node = (void*)fetch_from_queue(&bin[i]);
+            void* freeblock = node + number * 16;
+            int freenumber = i + 1 - number;
+            if(freenumber != 0) add_to_queue(&bin[freenumber - 1], (QueueNode*)freeblock);
+            *(int*) node = number - 1;
+            node = node + 8;
+            release_spinlock(&kernel_mem_lock);
+            return node;
+        }
+    }
+    void* node = kalloc_page();
+    void* freeblock = node + number * 16;
+    int freenumber = 256 - number;
+    add_to_queue(&bin[freenumber - 1], (QueueNode*)freeblock);
+    *(int*) node = number - 1;
+    node = node + 8;
+    release_spinlock(&kernel_mem_lock);
+    return node;
+}
+
+void kfree(void* ptr) {
+    ptr = ptr - 8;
+    int* address = ptr;
+    add_to_queue(&bin[*address], ptr);
+    return;
+}
diff --git a/src/kernel/mem.h b/src/kernel/mem.h
new file mode 100755
index 0000000..5d447e9
--- /dev/null
+++ b/src/kernel/mem.h
@@ -0,0 +1,9 @@
+#pragma once
+
+void kinit();
+
+WARN_RESULT void *kalloc_page();
+void kfree_page(void *);
+
+WARN_RESULT void *kalloc(unsigned long long);
+void kfree(void *);
diff --git a/src/kernel/printk.c b/src/kernel/printk.c
new file mode 100644
index 0000000..58a57e8
--- /dev/null
+++ b/src/kernel/printk.c
@@ -0,0 +1,47 @@
+#include <common/format.h>
+#include <common/spinlock.h>
+#include <common/string.h>
+#include <kernel/printk.h>
+
+static SpinLock printk_lock;
+
+void printk_init()
+{
+    init_spinlock(&printk_lock);
+}
+
+static void _put_char(void *_ctx, char c)
+{
+    (void)_ctx;
+    putch(c);
+}
+
+static void _vprintf(const char *fmt, va_list arg)
+{
+    acquire_spinlock(&printk_lock);
+    int colorize = 0;
+    if (fmt) {
+        // Color lines that start with "ERROR" or "[ERROR" in red
+        if (strncmp(fmt, "ERROR", 5) == 0 || strncmp(fmt, "[ERROR", 6) == 0) {
+            colorize = 1;
+        }
+    }
+    if (colorize) {
+        const char *red = "\033[1;31m";
+        while (*red) { _put_char(NULL, *red++); }
+    }
+    vformat(_put_char, NULL, fmt, arg);
+    if (colorize) {
+        const char *rst = "\033[0m";
+        while (*rst) { _put_char(NULL, *rst++); }
+    }
+    release_spinlock(&printk_lock);
+}
+
+void printk(const char *fmt, ...)
+{
+    va_list arg;
+    va_start(arg, fmt);
+    _vprintf(fmt, arg);
+    va_end(arg);
+}
\ No newline at end of file
diff --git a/src/kernel/printk.h b/src/kernel/printk.h
new file mode 100644
index 0000000..0cf0a5a
--- /dev/null
+++ b/src/kernel/printk.h
@@ -0,0 +1,6 @@
+#pragma once
+
+extern void putch(char);
+
+void printk_init();
+__attribute__((format(printf, 1, 2))) void printk(const char *, ...);
diff --git a/src/kernel/proc.c b/src/kernel/proc.c
new file mode 100644
index 0000000..4763d17
--- /dev/null
+++ b/src/kernel/proc.c
@@ -0,0 +1,260 @@
+#include <kernel/proc.h>
+#include <kernel/mem.h>
+#include <kernel/sched.h>
+#include <kernel/pt.h>
+#include <aarch64/mmu.h>
+#include <common/list.h>
+#include <common/string.h>
+#include <kernel/printk.h>
+
+Proc root_proc;
+
+void kernel_entry();
+void proc_entry();
+
+static SpinLock proclock;
+
+// PID bitmap allocator
+static unsigned int pid_bitmap[(MAX_PID + 31) / 32];
+
+// Internal helper: free pid without taking proclock. Caller must hold proclock.
+static void pid_free_unlocked(int pid)
+{
+    if (pid < 0 || pid >= MAX_PID) return;
+    int w = pid / 32;
+    int b = pid % 32;
+    pid_bitmap[w] &= ~(1u << b);
+}
+
+void pid_init()
+{
+    for (int i = 0; i < (MAX_PID + 31) / 32; i++)
+        pid_bitmap[i] = 0;
+}
+
+int pid_alloc()
+{
+    // use proclock to protect pid allocation as requested
+    acquire_spinlock(&proclock);
+    for (int w = 0; w < (MAX_PID + 31) / 32; w++) {
+        unsigned int word = pid_bitmap[w];
+        if (~word) { // has zero bit
+            for (int b = 0; b < 32; b++) {
+                int bit = w * 32 + b;
+                if (bit >= MAX_PID) break;
+                if (!(word & (1u << b))) {
+                    pid_bitmap[w] |= (1u << b);
+                    release_spinlock(&proclock);
+                    return bit;
+                }
+            }
+        }
+    }
+    release_spinlock(&proclock);
+    return -1; // no pid available
+}
+
+void pid_free(int pid)
+{
+    if (pid < 0 || pid >= MAX_PID) return;
+    acquire_spinlock(&proclock);
+    pid_free_unlocked(pid);
+    release_spinlock(&proclock);
+}
+
+// init_kproc initializes the kernel process
+// NOTE: should call after kinit
+void init_kproc()
+{
+    // TODO:
+    // 1. init global resources (e.g. locks, semaphores)
+    // 2. init the root_proc (finished)
+    init_spinlock(&proclock);
+    pid_init();
+    init_proc(&root_proc);
+    root_proc.parent = &root_proc;
+    start_proc(&root_proc, kernel_entry, 123456);
+    printk("[KERNEL] kernel_entry started on CPU%lld, kernel_entry is %p\n", cpuid(), &kernel_entry);
+    printk("[KERNEL][KPROC] root_proc=%p, pid=%d, &root_proc.childexit=%p ,runnable = %d\n", &root_proc, root_proc.pid, &root_proc.childexit, root_proc.state == RUNNABLE);
+}
+
+void init_proc(Proc *p)
+{
+    // TODO:
+    // setup the Proc with kstack and pid allocated
+    // NOTE: be careful of concurrency
+    // NOTE: be careful of concurrency
+    
+    memset(p, 0, sizeof(*p));
+    // allocate pid
+    int id = pid_alloc();
+    if (id < 0) {
+        PANIC();
+    }
+    p->pid = id;
+    p->killed = false;
+    p->idle = false;
+    p->exitcode = 0;
+    p->state = UNUSED;
+    p->parent = NULL;
+    init_sem(&p->childexit, 0);
+    init_list_node(&p->children);
+    init_list_node(&p->ptnode);
+    p->kstack = kalloc_page();
+    memset(p->kstack, 0, PAGE_SIZE);
+    init_schinfo(&p->schinfo);
+    init_pgdir(&p->pgdir);  // lab3_new_added
+    p->kcontext=(KernelContext*)((u64)p->kstack+PAGE_SIZE-16-sizeof(KernelContext)-sizeof(UserContext));
+    p->ucontext=(UserContext*)((u64)p->kstack+PAGE_SIZE-16-sizeof(UserContext));
+}
+
+Proc *create_proc()
+{
+    Proc *p = kalloc(sizeof(Proc));
+    init_proc(p);
+    return p;
+}
+
+void set_parent_to_this(Proc *proc)
+{
+    // NOTE: maybe you need to lock the process tree
+    acquire_spinlock(&proclock);
+    // NOTE: it's ensured that the old proc->parent = NULL
+    ASSERT(proc->parent == NULL);
+    proc->parent = thisproc();
+    _insert_into_list(&thisproc()->children, &proc->ptnode);
+    release_spinlock(&proclock);
+}
+
+int start_proc(Proc *p, void (*entry)(u64), u64 arg)
+{
+    // TODO:
+    acquire_spinlock(&proclock);
+    // 1. set the parent to root_proc if NULL
+    if(p->parent == NULL) {
+        p->parent = &root_proc;
+        _insert_into_list(&root_proc.children, &p->ptnode);
+    }
+    // 2. setup the kcontext to make the proc start with proc_entry(entry, arg)
+    p->kcontext->lr = (u64)&proc_entry;
+    p->kcontext->x0 = (u64)entry;
+    p->kcontext->x1 = (u64)arg;
+    // 3. activate the proc and return its pid
+    activate_proc(p);
+    release_spinlock(&proclock);
+    return p->pid;
+    // NOTE: be careful of concurrency
+}
+
+int wait(int *exitcode)
+{
+    acquire_spinlock(&proclock);
+    if(thisproc()->children.next == &thisproc()->children) { release_spinlock(&proclock); return -1; }
+    //printk("[WAIT] CPU%lld pid=%d waiting for child exit...\n", cpuid(), thisproc()->pid);
+    // 2. wait for childexit
+    release_spinlock(&proclock);
+    if(!wait_sem(&thisproc()->childexit)) { return -1; }
+    //printk("[WAIT] CPU%lld pid=%d woke up from child exit wait.\n", cpuid(), thisproc()->pid);
+    // NOTE: be careful of concurrency
+    acquire_spinlock(&proclock);
+    acquire_sched_lock();
+    // 3. if any child exits, clean it up and return its pid and exitcode
+    Proc* zombie = NULL;
+    int pid_zombie = -1;
+    _for_in_list(p, &thisproc()->children) {
+        if(p == &thisproc()->children) continue;
+        auto childproc = container_of(p, Proc, ptnode);
+        if(childproc->state == ZOMBIE) { zombie = childproc; break; }
+        //printk("[WAIT] CPU%lld pid=%d checking child pid=%d state=%d\n", cpuid(), thisproc()->pid, childproc->pid, childproc->state);
+    }
+    if(zombie) {
+        *exitcode = zombie->exitcode;
+        pid_zombie = zombie->pid; 
+        release_sched_lock();
+        acquire_sched_lock();
+        _detach_from_list(&zombie->ptnode);
+        _detach_from_list(&zombie->schinfo.rqnode);
+        kfree_page(zombie->kstack);
+        // proclock is already held here; avoid re-entrance deadlock
+        pid_free_unlocked(zombie->pid);
+        kfree(zombie);
+        //printk("[WAIT] CPU%lld pid=%d cleaned up zombie pid=%d\n", cpuid(), thisproc()->pid, pid_zombie);
+    }
+    release_sched_lock();
+    release_spinlock(&proclock);
+    //printk("[WAIT] CPU%lld pid=%d returning from wait with pid=%d\n", cpuid(), thisproc()->pid, pid_zombie);
+    return pid_zombie;
+    // NOTE: be careful of concurrency
+}
+
+NO_RETURN void exit(int code)
+{
+    //printk("[EXIT] CPU%lld pid=%d exit(%d), parent=%p parent_pid=%d\n", 
+    //            cpuid(), thisproc()->pid, code, thisproc()->parent, thisproc()->parent ? thisproc()->parent->pid : -999);
+    ASSERT(thisproc()!=&root_proc && thisproc()->pid!=-1);
+    // NOTE: be careful of concurrency
+    acquire_spinlock(&proclock);
+    acquire_sched_lock();
+    // 1. set the exitcode
+    thisproc()->exitcode = code;
+    // 2. clean up the resources
+    // 3. transfer children to the root_proc, and notify the root_proc if there is zombie
+    _for_in_list(p, &thisproc()->children) {
+        if(p == &thisproc()->children) continue;
+        auto childproc = container_of(p, Proc, ptnode);
+        childproc->parent = &root_proc;
+        if(childproc->state == ZOMBIE) {
+            release_sched_lock();
+            post_sem(&root_proc.childexit);
+            acquire_sched_lock();
+        }
+    }
+    if(!_empty_list(&thisproc()->children)) {
+        //printk("[EXIT] CPU%lld pid=%d has children, transferring to root_proc.\n", cpuid(), thisproc()->pid);
+        _merge_list(&root_proc.children, thisproc()->children.next);
+        _detach_from_list(&thisproc()->children);
+    }
+
+    // 4. sched(ZOMBIE)
+    //printk("[EXIT] CPU%lld pid=%d exit(%d) completed.\n", cpuid(), thisproc()->pid, code);
+    release_sched_lock();
+    release_spinlock(&proclock);
+    acquire_spinlock(&proclock);
+    acquire_sched_lock();
+    free_pgdir(&thisproc()->pgdir);
+    release_sched_lock();
+    post_sem(&thisproc()->parent->childexit);
+    acquire_sched_lock();
+    release_spinlock(&proclock);
+    sched(ZOMBIE);
+    // NOTE: be careful of concurrency
+    PANIC(); // prevent the warning of 'no_return function returns'
+}
+
+Proc* find_proc(int pid, Proc* current_proc) {
+    if (current_proc->pid == pid && !is_unused(current_proc)) { return current_proc; }
+    _for_in_list(p, &current_proc->children){
+        if (p == &current_proc->children) continue;
+        Proc* child_proc = container_of(p, Proc, ptnode);
+        Proc* res_proc = find_proc(pid, child_proc);
+        if(res_proc) return res_proc;
+    }
+    return NULL;
+}
+
+int kill(int pid)
+{
+    acquire_spinlock(&proclock);
+    Proc* kill_proc = find_proc(pid,&root_proc);
+    release_spinlock(&proclock);
+    // Set the killed flag of the proc to true and return 0.
+    if (kill_proc) {
+        acquire_spinlock(&proclock);
+        kill_proc->killed = true;
+        alert_proc(kill_proc);
+        release_spinlock(&proclock);
+        return 0;
+    }
+    // Return -1 if the pid is invalid (proc not found).
+    return -1;
+}
\ No newline at end of file
diff --git a/src/kernel/proc.h b/src/kernel/proc.h
new file mode 100755
index 0000000..cd26d45
--- /dev/null
+++ b/src/kernel/proc.h
@@ -0,0 +1,61 @@
+#pragma once
+
+#include <common/defines.h>
+#include <common/list.h>
+#include <common/sem.h>
+#include <common/rbtree.h>
+#include <kernel/pt.h>
+
+enum procstate { UNUSED, RUNNABLE, RUNNING, SLEEPING, DEEPSLEEPING, ZOMBIE };
+
+typedef struct UserContext {
+    u64 padding;
+    u64 sp;
+    u64 spsr; 
+    u64 elr;
+    u64 x[18];
+} UserContext;
+
+typedef struct KernelContext {
+    // TODO: customize your context
+    u64 lr, x0, x1;
+    u64 x[11]; 
+} KernelContext;
+
+// embeded data for procs
+struct schinfo {
+    // TODO: customize your sched info
+    ListNode rqnode;  // all the runnable/running processes
+};
+
+typedef struct Proc {
+    bool killed;
+    bool idle;
+    int pid;
+    int exitcode;
+    enum procstate state;
+    Semaphore childexit;
+    ListNode children;
+    ListNode ptnode;
+    struct Proc *parent;
+    struct schinfo schinfo;
+    struct pgdir pgdir;
+    void *kstack;
+    UserContext *ucontext;
+    KernelContext *kcontext;
+    int timeload;
+} Proc;
+
+// PID allocator helpers
+#define MAX_PID 65536
+void pid_init();
+int pid_alloc();
+void pid_free(int pid);
+
+void init_kproc();
+void init_proc(Proc *);
+WARN_RESULT Proc *create_proc();
+int start_proc(Proc *, void (*entry)(u64), u64 arg);
+NO_RETURN void exit(int code);
+WARN_RESULT int wait(int *exitcode);
+WARN_RESULT int kill(int pid);
\ No newline at end of file
diff --git a/src/kernel/pt.c b/src/kernel/pt.c
new file mode 100644
index 0000000..eca48d8
--- /dev/null
+++ b/src/kernel/pt.c
@@ -0,0 +1,144 @@
+#include <kernel/pt.h>
+#include <kernel/mem.h>
+#include <common/string.h>
+#include <aarch64/intrinsic.h>
+#include <kernel/printk.h>
+#include <kernel/cpu.h>
+
+// Needed for fallback TTBR0 when no L0 is present
+extern PTEntries invalid_pt;
+
+PTEntriesPtr get_pte(struct pgdir *pgdir, u64 va, bool alloc)
+{
+    // TODO:
+    // Return a pointer to the PTE (Page Table Entry) for virtual address 'va'
+    // If the entry not exists (NEEDN'T BE VALID), allocate it if alloc=true, or return NULL if false.
+    // THIS ROUTINUE GETS THE PTE, NOT THE PAGE DESCRIBED BY PTE.
+    // Return Kernel Address.
+    PTEntriesPtr pt0 = pgdir->pt, pt1 = NULL, pt2 = NULL, pt3 = NULL;
+    if(pt0 == NULL) {
+        if(alloc) {
+            pgdir->pt = kalloc_page();
+            memset(pgdir->pt,0,PAGE_SIZE);
+        }
+        else return NULL;
+    }
+    pt0 = pgdir->pt;
+    pt1 = (PTEntriesPtr)P2K(PTE_ADDRESS(pt0[VA_PART0(va)]));
+    if(!(pt0[VA_PART0(va)] & PTE_VALID)){
+        if(alloc){
+            pt1 = kalloc_page();
+            memset(pt1,0,PAGE_SIZE);
+            pt0[VA_PART0(va)] = K2P(pt1) | PTE_TABLE;
+        }
+        else return NULL;
+    }
+    pt2 = (PTEntriesPtr)P2K(PTE_ADDRESS(pt1[VA_PART1(va)]));
+    if(!(pt1[VA_PART1(va)] & PTE_VALID)){
+        if(alloc){
+            pt2 = kalloc_page();
+            memset(pt2,0,PAGE_SIZE);
+            pt1[VA_PART1(va)] = K2P(pt2) | PTE_TABLE;
+        }
+        else return NULL;
+    }
+    pt3 = (PTEntriesPtr)P2K(PTE_ADDRESS(pt2[VA_PART2(va)]));
+    if(!(pt2[VA_PART2(va)] & PTE_VALID)){
+        if(alloc){
+            pt3 = kalloc_page();
+            memset(pt3,0,PAGE_SIZE);
+            pt2[VA_PART2(va)] = K2P(pt3) | PTE_TABLE;
+        }
+        else return NULL;
+    }
+    return &(pt3[VA_PART3(va)]);
+}
+
+void init_pgdir(struct pgdir *pgdir)
+{
+    pgdir->pt = NULL;
+}
+
+void free_pgdir(struct pgdir *pgdir)
+{
+    // Free only page-table pages. Do NOT free physical pages described by leaf PTEs.
+    PTEntriesPtr pt0 = pgdir->pt;
+    if (pt0 == NULL) return;
+
+    for (int i = 0; i < N_PTE_PER_TABLE; ++i) {
+        PTEntry pte0 = pt0[i];
+        if ((pte0 & PTE_VALID) && (pte0 & PTE_TABLE)) {
+            PTEntriesPtr pt1 = (PTEntriesPtr)P2K(PTE_ADDRESS(pte0));
+            for (int j = 0; j < N_PTE_PER_TABLE; ++j) {
+                PTEntry pte1 = pt1[j];
+                if ((pte1 & PTE_VALID) && (pte1 & PTE_TABLE)) {
+                    PTEntriesPtr pt2 = (PTEntriesPtr)P2K(PTE_ADDRESS(pte1));
+                    for (int k = 0; k < N_PTE_PER_TABLE; ++k) {
+                        PTEntry pte2 = pt2[k];
+                        if ((pte2 & PTE_VALID) && (pte2 & PTE_TABLE)) {
+                            PTEntriesPtr pt3 = (PTEntriesPtr)P2K(PTE_ADDRESS(pte2));
+                            kfree_page((void *)pt3);
+                            pt2[k] = 0;
+                        }
+                    }
+                    kfree_page((void *)pt2);
+                    pt1[j] = 0;
+                }
+            }
+            kfree_page((void *)pt1);
+            pt0[i] = 0;
+        }
+    }
+
+    kfree_page((void *)pgdir->pt);
+    pgdir->pt = NULL;
+}
+
+void attach_pgdir(struct pgdir *pgdir)
+{
+    //{
+        //void *l0 = pgdir->pt ? (void *)pgdir->pt : (void *)&invalid_pt;
+        //printk("[PT] attach_pgdir cpu=%lld ttbr0 <= %llx (pt=%p)\n", cpuid(), K2P(l0), pgdir->pt);
+    //}
+    extern PTEntries invalid_pt;
+    if (pgdir->pt)
+        arch_set_ttbr0(K2P(pgdir->pt));
+    else
+        arch_set_ttbr0(K2P(&invalid_pt));
+}
+
+void pt_dump_va(struct pgdir *pgdir, u64 va)
+{
+    printk("[ERROR][PT] dump va=0x%llx\n", va);
+    if (!pgdir || !pgdir->pt) {
+        printk("[ERROR][PT] pgdir or L0 is NULL\n");
+        return;
+    }
+
+    PTEntriesPtr pt0 = pgdir->pt;
+    int i0 = VA_PART0(va);
+    PTEntry pte0 = pt0[i0];
+    printk("[ERROR][PT]  L0[%d]=0x%llx (addr=0x%llx, flags=0x%llx)\n", i0, pte0, PTE_ADDRESS(pte0), PTE_FLAGS(pte0));
+    if (!(pte0 & PTE_VALID)) { printk("  L0 invalid\n"); return; }
+
+    PTEntriesPtr pt1 = (PTEntriesPtr)P2K(PTE_ADDRESS(pte0));
+    int i1 = VA_PART1(va);
+    PTEntry pte1 = pt1[i1];
+    printk("[ERROR][PT]  L1[%d]=0x%llx (addr=0x%llx, flags=0x%llx)\n", i1, pte1, PTE_ADDRESS(pte1), PTE_FLAGS(pte1));
+    if (!(pte1 & PTE_VALID)) { printk("[ERROR][PT]  L1 invalid\n"); return; }
+    if ((pte1 & 0x3) == PTE_BLOCK) { printk("[ERROR][PT]  L1 is BLOCK\n"); return; }
+
+    PTEntriesPtr pt2 = (PTEntriesPtr)P2K(PTE_ADDRESS(pte1));
+    int i2 = VA_PART2(va);
+    PTEntry pte2 = pt2[i2];
+    printk("[ERROR][PT]  L2[%d]=0x%llx (addr=0x%llx, flags=0x%llx)\n", i2, pte2, PTE_ADDRESS(pte2), PTE_FLAGS(pte2));
+    if (!(pte2 & PTE_VALID)) { printk("[ERROR][PT]  L2 invalid\n"); return; }
+    if ((pte2 & 0x3) == PTE_BLOCK) { printk("[ERROR][PT]  L2 is BLOCK\n"); return; }
+
+    PTEntriesPtr pt3 = (PTEntriesPtr)P2K(PTE_ADDRESS(pte2));
+    int i3 = VA_PART3(va);
+    PTEntry pte3 = pt3[i3];
+    printk("[ERROR][PT]  L3[%d]=0x%llx (addr=0x%llx, flags=0x%llx)\n", i3, pte3, PTE_ADDRESS(pte3), PTE_FLAGS(pte3));
+    if (!(pte3 & PTE_VALID)) { printk("[ERROR][PT]  L3 invalid\n"); return; }
+    printk("[ERROR][PT]  L3 is PAGE\n");
+}
diff --git a/src/kernel/pt.h b/src/kernel/pt.h
new file mode 100755
index 0000000..2e5282c
--- /dev/null
+++ b/src/kernel/pt.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <aarch64/mmu.h>
+
+struct pgdir {
+    PTEntriesPtr pt;
+};
+
+void init_pgdir(struct pgdir *pgdir);
+WARN_RESULT PTEntriesPtr get_pte(struct pgdir *pgdir, u64 va, bool alloc);
+void free_pgdir(struct pgdir *pgdir);
+void attach_pgdir(struct pgdir *pgdir);
+// Lookup/create PTE for VA (returns pointer to L3 entry)
+PTEntriesPtr get_pte(struct pgdir *pgdir, u64 va, bool alloc);
+
+// Debug: dump page-table walk for a VA in given pgdir
+void pt_dump_va(struct pgdir *pgdir, u64 va);
+
+// Debug helper: check W^X and exec permission for a given VA
+void pt_check_user_exec_violation(struct pgdir *pgdir, u64 va);
diff --git a/src/kernel/sched.c b/src/kernel/sched.c
new file mode 100755
index 0000000..c93ab1a
--- /dev/null
+++ b/src/kernel/sched.c
@@ -0,0 +1,198 @@
+#include <kernel/sched.h>
+#include <kernel/proc.h>
+#include <kernel/mem.h>
+#include <kernel/printk.h>
+#include <aarch64/intrinsic.h>
+#include <kernel/cpu.h>
+#include <common/rbtree.h>
+extern void swtch(KernelContext *new_ctx, KernelContext **old_ctx);
+
+extern bool panic_flag;
+static SpinLock shecdlock; 
+static ListNode rq;
+
+static struct timer sched_timer[NCPU]; 
+void sched_timer_handler(struct timer* t) {
+    t->elapse = 8;
+    acquire_sched_lock();
+    sched(RUNNABLE);
+}
+
+void init_sched()
+{
+    // TODO: initialize the scheduler
+    // 1. initialize the resources (e.g. locks, semaphores)
+    init_spinlock(&shecdlock);
+    init_list_node(&rq);
+    // 2. initialize the scheduler info of each CPU
+    for (int i = 0; i < NCPU; i++) {
+        // initialize per-CPU scheduler timer
+        sched_timer[i].handler = sched_timer_handler;
+        sched_timer[i].elapse = 8; // ms tick for scheduler
+        // mark as triggered to avoid cancel before first insertion
+        sched_timer[i].triggered = true;
+    }
+    for (int i = 0; i < NCPU; i++) {
+        Proc* p = kalloc(sizeof(Proc));
+        p->idle = true;
+        p->state = RUNNING;
+        p->pid = -1; 
+        cpus[i].sched.thisproc = cpus[i].sched.idle = p;
+    }
+    printk("[SCHED] Scheduler initialized\n");
+}
+
+Proc *thisproc()
+{
+    return cpus[cpuid()].sched.thisproc;
+}
+
+void init_schinfo(struct schinfo *p)
+{
+    init_list_node(&p->rqnode);
+    // TODO: initialize your customized schinfo for every newly-created process
+}
+
+void acquire_sched_lock()
+{
+    //printk("[SCHED] CPU%lld: acquiring sched lock\n", cpuid());
+    acquire_spinlock(&shecdlock); 
+    // TODO: acquire the sched_lock if need
+}
+
+void release_sched_lock()
+{
+    //printk("[SCHED] CPU%lld: releasing sched lock\n", cpuid());
+    release_spinlock(&shecdlock); 
+    // TODO: release the sched_lock if need
+}
+
+bool is_zombie(Proc *p)
+{
+    bool r;
+    acquire_sched_lock();
+    r = p->state == ZOMBIE;
+    release_sched_lock();
+    return r;
+}
+
+bool is_unused(Proc *p)
+{
+    bool r;
+    acquire_sched_lock();
+    r = p->state == UNUSED;
+    release_sched_lock();
+    return r;
+}
+
+bool _activate_proc(Proc *p, bool onalert)
+{
+    // TODO:(Lab5 new)
+    // if the proc->state is RUNNING/RUNNABLE, do nothing and return false
+    // if the proc->state is SLEEPING/UNUSED, set the process state to RUNNABLE, add it to the sched queue, and return true
+    // if the proc->state is DEEPSLEEPING, do nothing if onalert or activate it if else, and return the corresponding value.
+    acquire_sched_lock();
+    if (p->state==RUNNING || p->state==RUNNABLE || p->state==ZOMBIE ||
+        (p->state==DEEPSLEEPING && onalert)) {
+        release_sched_lock();
+        return false;
+    }
+    if (p->state==SLEEPING || p->state==UNUSED ||
+        (p->state==DEEPSLEEPING && !onalert)) {
+        p->state = RUNNABLE;
+        _insert_into_list(&rq, &p->schinfo.rqnode);
+    }
+    release_sched_lock();
+    return true;
+}
+
+static void update_this_state(enum procstate new_state) {
+    // TODO: if you use template sched function, you should implement this routinue
+    // update the state of current process to new_state, and modify the sched queue if necessary
+    auto origin_state = thisproc()->state;
+    thisproc()->state = new_state;
+    if (thisproc()->pid == -1) return;
+    if((origin_state==RUNNABLE || origin_state==RUNNING) && new_state!=RUNNING && new_state != RUNNABLE) {
+        _detach_from_list(&thisproc()->schinfo.rqnode);
+    }
+    if (origin_state!=RUNNABLE && origin_state!=RUNNING && (new_state==RUNNABLE || new_state==RUNNING)) {
+        _insert_into_list(&rq, &thisproc()->schinfo.rqnode);
+    }
+}
+
+static Proc *pick_next()
+{
+    // Round-robin: pick first RUNNABLE process from queue
+    // choose the next process to run, and return idle if no runnable process
+    Proc *selected_proc = NULL;
+    int min_load = 1000000;
+    _for_in_list(p, &rq) {
+        if(p == &rq) continue;
+        auto proc = container_of(p, Proc, schinfo.rqnode);
+        if(proc->state == RUNNABLE && proc->timeload < min_load) {
+            selected_proc = proc;
+            min_load = proc->timeload;
+        }
+    }
+    if(selected_proc) {
+        selected_proc->timeload++;
+        return selected_proc;
+    }
+    //printk("[SCHED] CPU%lld: no RUNNABLE process, returning idle\n", cpuid());
+    return cpus[cpuid()].sched.idle;
+}
+
+static void update_this_proc(Proc *p) {
+    // TODO: you should implement this routinue
+    // update thisproc to the choosen process
+    cpus[cpuid()].sched.thisproc = p;
+    if (!sched_timer[cpuid()].triggered) {
+        cancel_cpu_timer(&sched_timer[cpuid()]);
+    }
+    set_cpu_timer(&sched_timer[cpuid()]);
+}
+
+// // A simple scheduler.
+// // You are allowed to replace it with whatever you like.
+// // call with sched_lock
+void sched(enum procstate new_state)
+{
+    auto this = thisproc();
+    //printk("[SCHED] CPU%lld: scheduling proc pid=%d (idle=%d) to state=%d, init_state=%d\n", cpuid(), this ? this->pid : -999, this ? this->idle : -1, new_state, this->state);
+    ASSERT(this->state == RUNNING);
+    if (this->killed && new_state!=ZOMBIE) { release_sched_lock(); return; }
+    //printk("[SCHED] rq: \n");
+    update_this_state(new_state);
+    auto next = pick_next();
+    update_this_proc(next);
+    //printk("[SCHED] CPU%lld: switching from pid=%d to pid=%d\n", cpuid(), this ? this->pid : -999, next ? next->pid : -999);
+    //printk("[SCHED] idle=%d, this->state=%d, next->state=%d, cpuid=%lld\n", this->idle, this->state, next->state, cpuid());
+    ASSERT(next->state == RUNNABLE);
+    next->state = RUNNING;
+    if (next != this) {
+        //extern void trap_return(u64);
+        // CPU%lld: switch to pid=%d; attach pgdir\n", cpuid(), next->pid);
+        //printk("[SCHED] CPU%lld: next kctx lr=%p x0(entry)=%p trap_return=%p\n",
+        //       cpuid(), (void*)next->kcontext->lr, (void*)next->kcontext->x0, (void*)trap_return);
+        attach_pgdir(&next->pgdir);
+        //if (next->ucontext) {
+            //pt_check_user_exec_violation(&next->pgdir, next->ucontext->elr);
+            // 解析 ELR 所在页的物理地址 (PTE -> PA)
+            //PTEntriesPtr ptep = get_pte(&next->pgdir, next->ucontext->elr, false);
+            //u64 phys_addr = 0;
+            //if (ptep && (*ptep & PTE_VALID)) {
+            //    phys_addr = PTE_ADDRESS(*ptep) | VA_OFFSET(next->ucontext->elr);
+            //}
+            //printk("[SCHED] CPU%lld: next process va=0x%llx pa=0x%llx pte=%p flags=0x%llx\n",
+            //       cpuid(), next->ucontext->elr, phys_addr, ptep, ptep ? PTE_FLAGS(*ptep) : 0ULL);
+        //}
+        swtch(next->kcontext, &this->kcontext);
+    }
+    release_sched_lock();
+}
+
+u64 proc_entry(void (*entry)(u64), u64 arg) {
+    release_sched_lock();
+    set_return_addr(entry);
+    return arg;
+}
\ No newline at end of file
diff --git a/src/kernel/sched.h b/src/kernel/sched.h
new file mode 100755
index 0000000..f9cd4ec
--- /dev/null
+++ b/src/kernel/sched.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <kernel/proc.h>
+
+void init_sched();
+void init_schinfo(struct schinfo *);
+
+bool _activate_proc(Proc *, bool onalert);
+#define activate_proc(proc) _activate_proc(proc, false)
+#define alert_proc(proc) _activate_proc(proc, true)
+
+WARN_RESULT bool is_zombie(Proc *);
+WARN_RESULT bool is_unused(Proc *);
+void acquire_sched_lock();
+void release_sched_lock();
+void sched(enum procstate new_state);
+
+// MUST call lock_for_sched() before sched() !!!
+#define yield() (/*printk("[SCHED]yielding...\n"),*/  acquire_sched_lock(), sched(RUNNABLE))
+
+WARN_RESULT Proc *thisproc();
diff --git a/src/kernel/syscall.c b/src/kernel/syscall.c
new file mode 100644
index 0000000..e215125
--- /dev/null
+++ b/src/kernel/syscall.c
@@ -0,0 +1,33 @@
+#include <kernel/syscall.h>
+#include <kernel/sched.h>
+#include <kernel/printk.h>
+#include <common/sem.h>
+#include <test/test.h>
+#include <aarch64/intrinsic.h>
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Woverride-init"
+
+void *syscall_table[NR_SYSCALL] = {
+    [0 ... NR_SYSCALL - 1] = NULL,
+    [SYS_myreport] = (void *)syscall_myreport,
+};
+
+void syscall_entry(UserContext *context)
+{
+    // Invoke syscall_table[id] with args and set the return value.
+    // id is stored in x8. args are stored in x0-x5. return value is stored in x0.
+    u64 id = context->x[8];
+    // printk("syscall_table function id: %d\n", (int)id);
+    u64 x[6];
+    for(int i = 0; i < 6; i++) { x[i] = context->x[i]; }
+    void* sys_func = syscall_table[id];
+    // be sure to check the range of id. if id >= NR_SYSCALL, panic.
+    if (id >= NR_SYSCALL) PANIC();
+    // if (sys_func == NULL) while(true) continue;
+    // if (sys_func == NULL) PANIC();
+    else context->x[0] = ((u64(*)(u64,u64,u64,u64,u64,u64))
+        (sys_func))(x[0],x[1],x[2],x[3],x[4],x[5]);
+}
+
+#pragma GCC diagnostic pop
\ No newline at end of file
diff --git a/src/kernel/syscall.h b/src/kernel/syscall.h
new file mode 100644
index 0000000..f7270ae
--- /dev/null
+++ b/src/kernel/syscall.h
@@ -0,0 +1,6 @@
+#include <kernel/syscallno.h>
+#include <kernel/proc.h>
+
+#define NR_SYSCALL 512
+
+void syscall_entry(UserContext *context);
\ No newline at end of file
diff --git a/src/kernel/syscallno.h b/src/kernel/syscallno.h
new file mode 100644
index 0000000..4088372
--- /dev/null
+++ b/src/kernel/syscallno.h
@@ -0,0 +1,3 @@
+#pragma once
+
+#define SYS_myreport 499
\ No newline at end of file
diff --git a/src/linker.ld b/src/linker.ld
new file mode 100644
index 0000000..8e7ae16
--- /dev/null
+++ b/src/linker.ld
@@ -0,0 +1,39 @@
+ENTRY(_start)
+
+SECTIONS
+{
+    . = 0xFFFF000040000000;
+    .text.boot : AT(ADDR(.text.boot) - 0xFFFF000000000000) {
+      KEEP(*(.text.boot))
+    }
+    . = ALIGN(4096);
+    .text : AT(ADDR(.text) - 0xFFFF000000000000) {
+      *(.text) 
+      *(.text.*)
+    }
+    PROVIDE(etext = .);
+    . = ALIGN(4096);
+    .init : AT(ADDR(.init) - 0xFFFF000000000000) { 
+        PROVIDE(early_init = .);
+        KEEP(*(.init.early))
+        PROVIDE(rest_init = .);
+        KEEP(*(.init.rest))
+        PROVIDE(init = .);
+        KEEP(*(.init))
+        PROVIDE(einit = .);
+    }
+    .rodata : AT(ADDR(.rodata) - 0xFFFF000000000000) {
+       *(.rodata)
+       *(.rodata.*)
+    }
+    PROVIDE(data = .);
+    .data : AT(ADDR(.data) - 0xFFFF000000000000) {
+      *(.data)
+      *(.data.*)
+    }
+    PROVIDE(edata = .);
+    .bss : AT(ADDR(.bss) - 0xFFFF000000000000) {
+      *(.bss .bss.*) 
+    }
+    PROVIDE(end = .);
+}
\ No newline at end of file
diff --git a/src/main.c b/src/main.c
new file mode 100755
index 0000000..d74bf4b
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,59 @@
+#include <aarch64/intrinsic.h>
+#include <common/string.h>
+#include <driver/uart.h>
+#include <kernel/core.h>
+#include <kernel/cpu.h>
+#include <kernel/mem.h>
+#include <kernel/printk.h>
+#include <kernel/sched.h>
+#include <driver/interrupt.h>
+#include <kernel/proc.h>
+#include <driver/gicv3.h>
+#include <driver/timer.h>
+#include <driver/virtio.h>
+
+static volatile bool boot_secondary_cpus = false;
+
+void main()
+{
+    if (cpuid() == 0) {
+        printk("========= Booting ==========\n");
+        extern char edata[], end[];
+        memset(edata, 0, (usize)(end - edata));
+        printk("[KERNEL] BSS section cleared: %p - %p\n", edata, end);
+        /* initialize interrupt handler */
+        init_interrupt();
+        /* initialize uart and printk */
+        uart_init();
+        printk_init();
+        /* initialize GICv3 */
+        gicv3_init();
+        gicv3_init_percpu();
+        init_clock_handler();
+
+        /* Initialize kernel memory allocator. */
+        kinit();
+
+        /* Initialize sched. */
+        init_sched();
+
+        /* Initialize kernel proc. */
+        init_kproc();
+
+        virtio_init();
+
+        smp_init();
+        //printk("CPU 0 are booted.\n");
+        arch_fence();
+        // Set a flag indicating that the secondary CPUs can start executing.
+        boot_secondary_cpus = true;
+        printk("========== Booted ==========\n");
+    } else {
+        while (!boot_secondary_cpus)
+            ;
+        arch_fence();
+        gicv3_init_percpu();
+    }
+    printk("[KER] CPU%lld: entering idle_entry, idle_entry is %p\n", cpuid(), idle_entry);  
+    set_return_addr(idle_entry);
+}
diff --git a/src/start.S b/src/start.S
new file mode 100644
index 0000000..5ce34bc
--- /dev/null
+++ b/src/start.S
@@ -0,0 +1,74 @@
+#define PAGE_SIZE 4096
+
+#define SCTLR_MMU_ENABLED (1 << 0)
+
+/* Translation Control Register */
+#define TCR_T0SZ        (64 - 48)
+#define TCR_T1SZ        ((64 - 48) << 16)
+#define TCR_TG0_4K      (0 << 14)
+#define TCR_TG1_4K      (2 << 30)
+#define TCR_SH0_INNER   (3 << 12)
+#define TCR_SH1_INNER   (3 << 28)
+#define TCR_SH0_OUTER   (2 << 12)
+#define TCR_SH1_OUTER   (2 << 28)
+#define TCR_ORGN0_IRGN0 ((1 << 10) | (1 << 8))
+#define TCR_ORGN1_IRGN1 ((1 << 26) | (1 << 24))
+#define TCR_VALUE       (TCR_T0SZ | TCR_T1SZ | TCR_TG0_4K | TCR_TG1_4K | TCR_SH0_OUTER | TCR_SH1_OUTER | TCR_ORGN0_IRGN0 | TCR_ORGN1_IRGN1)
+
+/* Memory region attributes */
+#define MT_DEVICE_nGnRnE       0x0
+#define MT_NORMAL              0x1
+#define MT_NORMAL_NC           0x2
+#define MT_DEVICE_nGnRnE_FLAGS 0x00
+#define MT_NORMAL_FLAGS        0xFF  /* Inner/Outer Write-back Non-transient RW-Allocate */
+#define MT_NORMAL_NC_FLAGS     0x44  /* Inner/Outer Non-cacheable */
+#define MAIR_VALUE             ((MT_DEVICE_nGnRnE_FLAGS << (8 * MT_DEVICE_nGnRnE)) | (MT_NORMAL_FLAGS << (8 * MT_NORMAL)) | (MT_NORMAL_NC_FLAGS << (8 * MT_NORMAL_NC)))
+
+.section ".text.boot"
+
+.global _start
+_start:
+  /**
+   * Set up the user and kernel page tables.
+   * Higher and lower half map to same physical memory region.
+   */
+  adrp x9, kernel_pt_level0
+  msr ttbr0_el1, x9
+  msr ttbr1_el1, x9
+
+  ldr x9, =TCR_VALUE
+  msr tcr_el1, x9
+
+  ldr x9, =MAIR_VALUE
+  msr mair_el1, x9
+
+  /* Enable MMU. */
+  mrs x9, sctlr_el1
+  orr x9, x9, #SCTLR_MMU_ENABLED
+  msr sctlr_el1, x9
+
+  /* Set up kernel stacks. */
+  mrs x0, mpidr_el1
+  and x0, x0, #0xff
+  add x0, x0, 1
+  mov x1, #PAGE_SIZE 
+  mul x0, x0, x1
+  ldr x2, =kstack
+  add x2, x2, x0
+  mov sp, x2
+  ldr x9, =main
+  br  x9
+
+.section ".data"
+.align 12
+.global kstack
+kstack:
+  .zero 4096 
+  .zero 4096 
+  .zero 4096 
+  .zero 4096
+  /**
+   * Allocate a guard page to protect the kernel stack from potential overflow
+   * or corruption. 
+   */
+  .zero 4096
diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt
new file mode 100644
index 0000000..9fb552a
--- /dev/null
+++ b/src/test/CMakeLists.txt
@@ -0,0 +1,3 @@
+file(GLOB test_sources CONFIGURE_DEPENDS "*.c")
+
+add_library(test STATIC ${test_sources})
diff --git a/src/test/io_test.c b/src/test/io_test.c
new file mode 100755
index 0000000..2e1482b
--- /dev/null
+++ b/src/test/io_test.c
@@ -0,0 +1,92 @@
+#include <common/buf.h>
+#include <kernel/printk.h>
+#include <driver/virtio.h>
+#include <common/string.h>
+#include <aarch64/intrinsic.h>
+
+void io_test()
+{
+    static Buf buffer[1 << 11];
+    int num_blocks = sizeof(buffer) / sizeof(buffer[0]);
+    int megabytes = (num_blocks * BSIZE) >> 20;
+    if (megabytes == 0)
+        PANIC();
+
+    i64 frequency, timestamp;
+    asm volatile("mrs %[freq], cntfrq_el0" : [freq] "=r"(frequency));
+
+    printk("\e[0;32m[Test] Starting disk test. \e[0m\n");
+
+    printk("\e[0;32m[Test] Checking data correctness...\e[0m\n");
+    (void)timestamp;
+    for (int i = 1; i < num_blocks; i++) {
+        // Backup current block.
+        buffer[0].flags = 0;
+        buffer[0].block_no = (u32)i;
+        virtio_blk_rw(&buffer[0]);
+        
+        // Write a pattern to block `i`.
+        buffer[i].flags = B_DIRTY;
+        buffer[i].block_no = (u32)i;
+        for (int j = 0; j < BSIZE; j++) {
+            buffer[i].data[j] = (u8)((i * j) & 0xFF);
+        }
+        virtio_blk_rw(&buffer[i]);
+
+        // Clear data and verify pattern
+        memset(buffer[i].data, 0, sizeof(buffer[i].data));
+        buffer[i].flags = 0;
+        virtio_blk_rw(&buffer[i]);
+
+        for (int j = 0; j < BSIZE; j++) {
+            if (buffer[i].data[j] != ((i * j) & 0xFF)) {
+                PANIC();  // Trigger panic on data mismatch
+            }
+        }
+
+        // Restore previous block data
+        buffer[0].flags = B_DIRTY;
+        virtio_blk_rw(&buffer[0]);
+    }
+    printk("\e[0;32m[Test] Data correctness verified. \e[0m\n");
+
+    printk("\e[0;32m[Test] Measuring read speed... \e[0m\n");
+    arch_dsb_sy();
+    timestamp = (i64)get_timestamp();
+    arch_dsb_sy();
+
+    for (int i = 0; i < num_blocks; i++) {
+        buffer[i].flags = 0;
+        buffer[i].block_no = (u32)i;
+        virtio_blk_rw(&buffer[i]);
+    }
+
+    arch_dsb_sy();
+    timestamp = (i64)get_timestamp() - timestamp;
+    arch_dsb_sy();
+
+    printk("\e[0;32m[Test] Read %dB (%dMB), time: %lld cycles, speed: %lld.%lld MB/s\e[0m\n",
+           num_blocks * BSIZE, megabytes, timestamp,
+           megabytes * frequency / timestamp, (megabytes * frequency * 10 / timestamp) % 10);
+
+    printk("\e[0;32m[Test] Measuring write speed... \e[0m\n");
+    arch_dsb_sy();
+    timestamp = (i64)get_timestamp();
+    arch_dsb_sy();
+
+    for (int i = 0; i < num_blocks; i++) {
+        buffer[i].flags = B_DIRTY;
+        buffer[i].block_no = (u32)i;
+        virtio_blk_rw(&buffer[i]);
+    }
+
+    arch_dsb_sy();
+    timestamp = (i64)get_timestamp() - timestamp;
+    arch_dsb_sy();
+
+    printk("\e[0;32m[Test] Write %dB (%dMB), time: %lld cycles, speed: %lld.%lld MB/s\e[0m\n",
+           num_blocks * BSIZE, megabytes, timestamp,
+           megabytes * frequency / timestamp, (megabytes * frequency * 10 / timestamp) % 10);
+
+    printk("\e[0;32m[Test] io_test PASS\e[0m\n");
+}
diff --git a/src/test/kalloc_test.c b/src/test/kalloc_test.c
new file mode 100644
index 0000000..38115f1
--- /dev/null
+++ b/src/test/kalloc_test.c
@@ -0,0 +1,105 @@
+#include <aarch64/intrinsic.h>
+#include <aarch64/mmu.h>
+#include <common/rc.h>
+#include <common/string.h>
+#include <kernel/mem.h>
+#include <kernel/printk.h>
+#include <test/test.h>
+
+extern RefCount kalloc_page_cnt;
+
+static RefCount x;
+static void *p[4][10000];
+static short sz[4][10000];
+
+#define FAIL(...)            \
+    {                        \
+        printk(__VA_ARGS__); \
+        while (1);           \
+    }
+#define SYNC(i)              \
+    arch_dsb_sy();           \
+    increment_rc(&x);        \
+    while (x.count < 4 * i); \
+    arch_dsb_sy();
+
+void kalloc_test() {
+    int i = cpuid();
+    int r = kalloc_page_cnt.count;
+    int y = 10000 - i * 500;
+    if (i == 0)
+        printk("\n\nkalloc_test\n");
+    SYNC(1)
+    for (int j = 0; j < y; j++) {
+        p[i][j] = kalloc_page();
+        if (!p[i][j] || ((u64)p[i][j] & 4095))
+            FAIL("FAIL: kalloc_page() = %p\n", p[i][j]);
+        memset(p[i][j], i ^ j, PAGE_SIZE);
+    }
+    for (int j = 0; j < y; j++) {
+        u8 m = (i ^ j) & 255;
+        for (int k = 0; k < PAGE_SIZE; k++)
+            if (((u8 *)p[i][j])[k] != m)
+                FAIL("FAIL: page[%d][%d] wrong\n", i, j);
+        kfree_page(p[i][j]);
+    }
+    SYNC(2)
+    if (kalloc_page_cnt.count != r)
+        FAIL("FAIL: kalloc_page_cnt %d -> %lld\n", r, kalloc_page_cnt.count);
+    SYNC(3)
+    for (int j = 0; j < 10000;) {
+        if (j < 1000 || rand() > RAND_MAX / 16 * 7) {
+            int z = 0;
+            int r = rand() & 255;
+            if (r < 127) {  // [17,64]
+                z = rand() % 48 + 17;
+                z = round_up((u64)z, 4ll);
+            } else if (r < 181) {  // [1,16]
+                z = rand() % 16 + 1;
+            } else if (r < 235) {  // [65,256]
+                z = rand() % 192 + 65;
+                z = round_up((u64)z, 8ll);
+            } else if (r < 255) {  // [257,512]
+                z = rand() % 256 + 257;
+                z = round_up((u64)z, 8ll);
+            } else {  // [513,2040]
+                z = rand() % 1528 + 513;
+                z = round_up((u64)z, 8ll);
+            }
+            sz[i][j] = z;
+            p[i][j] = kalloc(z);
+            u64 q = (u64)p[i][j];
+            if (p[i][j] == NULL || ((z & 1) == 0 && (q & 1) != 0) ||
+                ((z & 3) == 0 && (q & 3) != 0) ||
+                ((z & 7) == 0 && (q & 7) != 0))
+                FAIL("FAIL: kalloc(%d) = %p\n", z, p[i][j]);
+            memset(p[i][j], i ^ z, z);
+            j++;
+        } else {
+            int k = rand() % j;
+            if (p[i][k] == NULL)
+                FAIL("FAIL: block[%d][%d] null\n", i, k);
+            int m = (i ^ sz[i][k]) & 255;
+            for (int t = 0; t < sz[i][k]; t++)
+                if (((u8 *)p[i][k])[t] != m)
+                    FAIL("FAIL: block[%d][%d] wrong\n", i, k);
+            kfree(p[i][k]);
+            p[i][k] = p[i][--j];
+            sz[i][k] = sz[i][j];
+        }
+    }
+    SYNC(4)
+    if (cpuid() == 0) {
+        i64 z = 0;
+        for (int j = 0; j < 4; j++)
+            for (int k = 0; k < 10000; k++)
+                z += sz[j][k];
+        printk("Total: %lld\nUsage: %lld\n", z, kalloc_page_cnt.count - r);
+    }
+    SYNC(5)
+    for (int j = 0; j < 10000; j++)
+        kfree(p[i][j]);
+    SYNC(6)
+    if (cpuid() == 0)
+        printk("kalloc_test PASS\n");
+}
diff --git a/src/test/proc_test.c b/src/test/proc_test.c
new file mode 100755
index 0000000..23d195c
--- /dev/null
+++ b/src/test/proc_test.c
@@ -0,0 +1,156 @@
+#include <kernel/sched.h>
+#include <common/sem.h>
+#include <test/test.h>
+#include <kernel/mem.h>
+#include <kernel/printk.h>
+
+void set_parent_to_this(Proc *proc);
+
+static Semaphore s1, s2, s3, s4, s5, s6;
+
+// proc_test_1
+// 0: wait 10-19
+// 1: give living 20-29 to root_proc
+// 2: give dead 30-39 to root_proc
+// 34567: 40-89 odd P(s2) even V(s2)
+// 8: 90-99 V(s3) P(s4) get_all
+// 9: 100-109 P(s5) V(s6) post
+
+static void proc_test_1b(u64 a)
+{
+    switch (a / 10 - 1) {
+    case 0:
+        break;
+    case 1:
+        yield();
+        yield();
+        yield();
+        break;
+    case 2:
+        post_sem(&s1);
+        break;
+    case 3:
+    case 4:
+    case 5:
+    case 6:
+    case 7:
+        if (a & 1)
+            post_sem(&s2);
+        else
+            unalertable_wait_sem(&s2);
+        break;
+    case 8:
+        unalertable_wait_sem(&s3);
+        post_sem(&s4);
+        break;
+    case 9:
+        post_sem(&s5);
+        unalertable_wait_sem(&s6);
+        break;
+    }
+    exit(a);
+}
+
+static void proc_test_1a(u64 a)
+{
+    for (int i = 0; i < 10; i++) {
+        auto p = create_proc();
+        set_parent_to_this(p);
+        start_proc(p, proc_test_1b, a * 10 + i + 10);
+    }
+    switch (a) {
+    case 0: {
+        int t = 0, x;
+        for (int i = 0; i < 10; i++) {
+            ASSERT(wait(&x) != -1);
+            t |= 1 << (x - 10);
+        }
+        ASSERT(t == 1023);
+        ASSERT(wait(&x) == -1);
+    } break;
+    case 1:
+        break;
+    case 2: {
+        for (int i = 0; i < 10; i++)
+            unalertable_wait_sem(&s1);
+        ASSERT(!get_sem(&s1));
+    } break;
+    case 3:
+    case 4:
+    case 5:
+    case 6:
+    case 7: {
+        int x;
+        for (int i = 0; i < 10; i++)
+            ASSERT(wait(&x) != -1);
+        ASSERT(wait(&x) == -1);
+    } break;
+    case 8: {
+        int x;
+        for (int i = 0; i < 10; i++)
+            post_sem(&s3);
+        for (int i = 0; i < 10; i++)
+            ASSERT(wait(&x) != -1);
+        ASSERT(wait(&x) == -1);
+        ASSERT(s3.val == 0);
+        ASSERT(get_all_sem(&s4) == 10);
+    } break;
+    case 9: {
+        int x;
+        for (int i = 0; i < 10; i++)
+           unalertable_wait_sem(&s5);
+        for (int i = 0; i < 10; i++)
+            post_sem(&s6);
+        for (int i = 0; i < 10; i++)
+            ASSERT(wait(&x) != -1);
+        ASSERT(wait(&x) == -1);
+        ASSERT(s5.val == 0);
+        ASSERT(s6.val == 0);
+    } break;
+    }
+    exit(a);
+}
+
+static void proc_test_1()
+{
+    printk("proc_test_1\n");
+    init_sem(&s1, 0);
+    init_sem(&s2, 0);
+    init_sem(&s3, 0);
+    init_sem(&s4, 0);
+    init_sem(&s5, 0);
+    init_sem(&s6, 0);
+    int pid[10];
+    for (int i = 0; i < 10; i++) {
+        auto p = create_proc();
+        set_parent_to_this(p);
+        pid[i] = start_proc(p, proc_test_1a, i);
+    }
+    for (int i = 0; i < 10; i++) {
+        int code, id;
+        id = wait(&code);
+        ASSERT(pid[code] == id);
+        printk("proc %d exit\n", code);
+    }
+    exit(0);
+}
+
+void proc_test()
+{
+    printk("=============proc_test===============\n");
+    auto p = create_proc();
+    int pid = start_proc(p, proc_test_1, 0);
+    int t = 0;
+    while (1) {
+        int code;
+        int id = wait(&code);
+        if (id == -1)
+            break;
+        if (id == pid)
+            ASSERT(code == 0);
+        else
+            t |= 1 << (code - 20);
+    }
+    ASSERT(t == 1048575);
+    printk("===========proc_test PASS===========\n");
+}
diff --git a/src/test/rand.c b/src/test/rand.c
new file mode 100644
index 0000000..d543211
--- /dev/null
+++ b/src/test/rand.c
@@ -0,0 +1,16 @@
+#include <aarch64/intrinsic.h>
+#include <common/defines.h>
+
+static u64 next[4] = { 1111, 2222, 3333, 4444 };
+
+unsigned rand(void)
+{
+    // RAND_MAX assumed to be 32767
+    next[cpuid()] = next[cpuid()] * 1103515245 + 12345;
+    return (unsigned int)(next[cpuid()] / 65536) % 32768;
+}
+
+void srand(unsigned seed)
+{
+    next[cpuid()] = seed;
+}
\ No newline at end of file
diff --git a/src/test/rbtree_test.c b/src/test/rbtree_test.c
new file mode 100644
index 0000000..011e201
--- /dev/null
+++ b/src/test/rbtree_test.c
@@ -0,0 +1,69 @@
+#include "common/rbtree.h"
+#include "aarch64/intrinsic.h"
+#include "common/rc.h"
+#include "common/spinlock.h"
+#include "test.h"
+#include <kernel/printk.h>
+struct mytype {
+    struct rb_node_ node;
+    int key;
+    int data;
+};
+static bool rb_cmp(rb_node n1, rb_node n2)
+{
+    return container_of(n1, struct mytype, node)->key <
+           container_of(n2, struct mytype, node)->key;
+}
+static struct mytype p[4][1000], tmp;
+static struct rb_root_ rt;
+static SpinLock lock;
+#define FAIL(...)            \
+    {                        \
+        printk(__VA_ARGS__); \
+        while (1)            \
+            ;                \
+    }
+static RefCount x;
+
+void rbtree_test()
+{
+    int cid = cpuid();
+    for (int i = 0; i < 1000; i++) {
+        p[cid][i].key = cid * 100000 + i;
+        p[cid][i].data = -p[cid][i].key;
+    }
+    if (cid == 0)
+        init_spinlock(&lock);
+    arch_dsb_sy();
+    increment_rc(&x);
+    while (x.count < 4)
+        ;
+    arch_dsb_sy();
+    for (int i = 0; i < 1000; i++) {
+        acquire_spinlock(&lock);
+        int ok = _rb_insert(&p[cid][i].node, &rt, rb_cmp);
+        if (ok)
+            FAIL("insert failed!\n");
+        release_spinlock(&lock);
+    }
+    for (int i = 0; i < 1000; i++) {
+        acquire_spinlock(&lock);
+        tmp.key = cid * 100000 + i;
+        rb_node np = _rb_lookup(&tmp.node, &rt, rb_cmp);
+        if (np == NULL)
+            FAIL("NULL\n");
+        if (tmp.key != -container_of(np, struct mytype, node)->data) {
+            FAIL("data error! %d %d %d\n", tmp.key,
+                 container_of(np, struct mytype, node)->key,
+                 container_of(np, struct mytype, node)->data);
+        }
+        release_spinlock(&lock);
+    }
+    arch_dsb_sy();
+    increment_rc(&x);
+    while (x.count < 8)
+        ;
+    arch_dsb_sy();
+    if (cid == 0)
+        printk("rbtree_test PASS\n");
+}
\ No newline at end of file
diff --git a/src/test/test.h b/src/test/test.h
new file mode 100755
index 0000000..8f7dc16
--- /dev/null
+++ b/src/test/test.h
@@ -0,0 +1,16 @@
+#pragma once
+#include <common/defines.h>
+
+#define RAND_MAX 32768
+
+void kalloc_test();
+void rbtree_test();
+void proc_test();
+void vm_test();
+void user_proc_test();
+void io_test();
+unsigned rand();
+void srand(unsigned seed);
+
+// syscall
+u64 syscall_myreport(u64 id);
\ No newline at end of file
diff --git a/src/test/user_proc_test.c b/src/test/user_proc_test.c
new file mode 100644
index 0000000..f3bbee8
--- /dev/null
+++ b/src/test/user_proc_test.c
@@ -0,0 +1,114 @@
+#include <test/test.h>
+#include <common/rc.h>
+#include <kernel/pt.h>
+#include <kernel/mem.h>
+#include <kernel/printk.h>
+#include <common/sem.h>
+#include <kernel/proc.h>
+#include <kernel/syscall.h>
+#include <driver/memlayout.h>
+#include <kernel/sched.h>
+
+PTEntriesPtr get_pte(struct pgdir *pgdir, u64 va, bool alloc);
+
+void vm_test()
+{
+    printk("===============vm_test BEGIN==============\n");
+    static void *p[100000]; //100000 pages
+    extern RefCount kalloc_page_cnt;
+    struct pgdir pg;
+    int p0 = kalloc_page_cnt.count;
+    init_pgdir(&pg);
+    printk("[DEBUG] init pgdir done\n");
+    for (u64 i = 0; i < 100000; i++) {
+        p[i] = kalloc_page();
+        //printk("[DEBUG] kalloc_page %d done\n", (int)i);
+        *get_pte(&pg, i << 12, true) = K2P(p[i]) | PTE_USER_DATA;
+        *(int *)p[i] = i;
+    }
+    printk("[DEBUG] kalloc_page done\n");
+    attach_pgdir(&pg);
+    printk("[DEBUG] attach_pgdir done\n");
+    for (u64 i = 0; i < 100000; i++) {
+        ASSERT(*(int *)(P2K(PTE_ADDRESS(*get_pte(&pg, i << 12, false)))) ==
+               (int)i);
+        ASSERT(*(int *)(i << 12) == (int)i);
+    }
+    printk("[DEBUG] ASSERT done\n");
+    free_pgdir(&pg);
+    printk("[DEBUG] free_pgdir done\n");
+    attach_pgdir(&pg);
+    printk("[DEBUG] attach_pgdir 2 done\n");
+    for (u64 i = 0; i < 100000; i++)
+        kfree_page(p[i]);
+    printk("[DEBUG] kfree_page done\n");
+    ASSERT(kalloc_page_cnt.count == p0);
+    printk("================vm_test PASS=================\n");
+}
+
+void trap_return(u64);
+
+static u64 proc_cnt[22] = { 0 }, cpu_cnt[4] = { 0 };
+static Semaphore myrepot_done;
+
+u64 syscall_myreport(u64 id)
+{
+    static bool stop;
+    ASSERT(id < 22);
+    if (stop)
+        return 0;
+    proc_cnt[id]++;
+    cpu_cnt[cpuid()]++;
+    if (proc_cnt[id] > 12345) {
+        stop = true;
+        post_sem(&myrepot_done);
+    }
+    return 0;
+}
+
+void user_proc_test()
+{
+    printk("=================user_proc_test BEGIN====================\n");
+    init_sem(&myrepot_done, 0);
+    extern char loop_start[], loop_end[];
+    int pids[22]; //22
+    for (int i = 0; i < 22; i++) {
+        auto p = create_proc();
+        for (u64 q = (u64)loop_start; q < (u64)loop_end; q += PAGE_SIZE) {
+            *get_pte(&p->pgdir, EXTMEM + q - (u64)loop_start, true) =
+                    K2P(q) | PTE_USER_DATA;
+        }
+        ASSERT(p->pgdir.pt);
+
+        // TODO: setup the user context
+        // 1. set x0 = i
+        // 2. set elr = EXTMEM
+        // 3. set spsr = 0
+        p->ucontext->x[0] = i;
+        p->ucontext->elr = EXTMEM;
+        p->ucontext->spsr = 0;
+
+        pids[i] = start_proc(p, trap_return, 0);
+        printk("pid[%d] = %d\n", i, pids[i]);
+    }
+    ASSERT(wait_sem(&myrepot_done));
+    printk("[DEBUG] user_proc_test: all processes reported enough times.\n");
+    for (int i = 0; i < 22; i++)
+        ASSERT(kill(pids[i]) == 0);
+    printk("[DEBUG] user_proc_test: all processes killed.\n");
+    for (int i = 0; i < 22; i++) {
+        int code;
+        printk("waiting for pid %d...\n", pids[i]);
+        int pid = wait(&code);
+        printk("pid %d killed\n", pid);
+        ASSERT(code == -1);
+    }
+    printk("[DEBUG] user_proc_test: all processes waited.\n");
+    printk("===============statistics====================\n");
+    printk("Runtime: \n");
+    for (int i = 0; i < 4; i++)
+        printk("CPU %d: %llu\n", i, cpu_cnt[i]);
+    for (int i = 0; i < 22; i++)
+        printk("Proc %d: %llu\n", i, proc_cnt[i]);
+    printk("=================user_proc_test PASS====================\n");
+}
diff --git a/src/user/CMakeLists.txt b/src/user/CMakeLists.txt
new file mode 100644
index 0000000..f6e7869
--- /dev/null
+++ b/src/user/CMakeLists.txt
@@ -0,0 +1,28 @@
+file(GLOB user_sources CONFIGURE_DEPENDS "*.S")
+
+add_library(user STATIC ${user_sources})
+
+
+# set(CMAKE_C_STANDARD 11)
+
+# set(CMAKE_C_COMPILER ${aarch64_gcc})
+# set(CMAKE_ASM_COMPILER ${aarch64_gcc})
+
+
+# include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)
+# include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../libc/obj/include)
+# include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../libc/arch/aarch64)
+# include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../libc/arch/generic)
+
+# set(LIBC_SPEC_OUT ${CMAKE_CURRENT_BINARY_DIR}/../../musl-gcc.specs)
+# set(CMAKE_C_FLAGS "-specs ${LIBC_SPEC_OUT} -std=gnu99  -MMD -MP -static -fno-plt -fno-pic -fpie -z max-page-size=4096")
+# set(CMAKE_EXE_LINKER_FLAGS "")
+
+# # Add targets here if needed
+# set(bin_list cat echo init ls mkfs sh mkdir)
+
+# add_custom_target(user_bin
+# 	DEPENDS ${bin_list})
+# foreach(bin ${bin_list})
+# 	add_executable(${bin} ${CMAKE_CURRENT_SOURCE_DIR}/${bin}/main.c)
+# endforeach(bin)
\ No newline at end of file
diff --git a/src/user/loop.S b/src/user/loop.S
new file mode 100644
index 0000000..4352e0f
--- /dev/null
+++ b/src/user/loop.S
@@ -0,0 +1,27 @@
+#include <kernel/syscallno.h>
+
+.global loop_start
+.global loop_end
+
+.align 12
+loop_start:
+    mov x4, x0
+    mov x8, #SYS_myreport
+delay:
+    mov x0, #10000
+    mov x1, #0
+    mov x2, #1
+loop:
+    add x1, x1, x2
+    cmp x0, x1
+    bne loop
+    mov x0, x4
+    mov x1, #0
+    svc #0
+    cmp x0, x1
+    beq delay
+    mov x0, #100
+    svc #0
+
+.align 12
+loop_end:
diff --git a/src/user/mkfs/main.c b/src/user/mkfs/main.c
new file mode 100644
index 0000000..4b0eefd
--- /dev/null
+++ b/src/user/mkfs/main.c
@@ -0,0 +1,316 @@
+#include <assert.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+typedef uint8_t uchar;
+typedef uint16_t ushort;
+typedef uint32_t uint;
+
+// this file should be compiled with normal gcc...
+
+#define stat xv6_stat // avoid clash with host struct stat
+#define sleep xv6_sleep
+// #include "../../../inc/fs.h"
+#include "../../fs/defines.h"
+// #include "../../fs/inode.h"
+
+#ifndef static_assert
+#define static_assert(a, b) \
+    do {                    \
+        switch (0)          \
+        case 0:             \
+        case (a):;          \
+    } while (0)
+#endif
+
+#define NINODES 200
+
+// Disk layout:
+// [ boot block | sb block | log | inode blocks | free bit map | data blocks ]
+#define BSIZE BLOCK_SIZE
+#define LOGSIZE LOG_MAX_SIZE
+#define NDIRECT INODE_NUM_DIRECT
+#define NINDIRECT INODE_NUM_INDIRECT
+#define DIRSIZ FILE_NAME_MAX_LENGTH
+#define IPB (BSIZE / sizeof(InodeEntry))
+#define IBLOCK(i, sb) ((i) / IPB + sb.inode_start)
+
+int nbitmap = FSSIZE / (BSIZE * 8) + 1;
+int ninodeblocks = NINODES / IPB + 1;
+int num_log_blocks = LOGSIZE;
+int nmeta; // Number of meta blocks (boot, sb, num_log_blocks, inode, bitmap)
+int num_data_blocks; // Number of data blocks
+
+int fsfd;
+SuperBlock sb;
+char zeroes[BSIZE];
+uint freeinode = 1;
+uint freeblock;
+
+void balloc(int);
+void wsect(uint, void *);
+void winode(uint, struct dinode *);
+void rinode(uint inum, struct dinode *ip);
+void rsect(uint sec, void *buf);
+uint ialloc(ushort type);
+void iappend(uint inum, void *p, int n);
+
+// convert to little-endian byte order
+ushort xshort(ushort x)
+{
+    ushort y;
+    uchar *a = (uchar *)&y;
+    a[0] = x;
+    a[1] = x >> 8;
+    return y;
+}
+
+uint xint(uint x)
+{
+    uint y;
+    uchar *a = (uchar *)&y;
+    a[0] = x;
+    a[1] = x >> 8;
+    a[2] = x >> 16;
+    a[3] = x >> 24;
+    return y;
+}
+
+int main(int argc, char *argv[])
+{
+    int i, cc, fd;
+    uint rootino, inum, off;
+    struct dirent de;
+    char buf[BSIZE];
+    InodeEntry din;
+
+    static_assert(sizeof(int) == 4, "Integers must be 4 bytes!");
+
+    if (argc < 2) {
+        fprintf(stderr, "Usage: mkfs fs.img files...\n");
+        exit(1);
+    }
+
+    assert((BSIZE % sizeof(struct dinode)) == 0);
+    assert((BSIZE % sizeof(struct dirent)) == 0);
+
+    fsfd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0666);
+    if (fsfd < 0) {
+        perror(argv[1]);
+        exit(1);
+    }
+
+    // 1 fs block = 1 disk sector
+    nmeta = 2 + num_log_blocks + ninodeblocks + nbitmap;
+    num_data_blocks = FSSIZE - nmeta;
+
+    sb.num_blocks = xint(FSSIZE);
+    sb.num_data_blocks = xint(num_data_blocks);
+    sb.num_inodes = xint(NINODES);
+    sb.num_log_blocks = xint(num_log_blocks);
+    sb.log_start = xint(2);
+    sb.inode_start = xint(2 + num_log_blocks);
+    sb.bitmap_start = xint(2 + num_log_blocks + ninodeblocks);
+
+    printf("nmeta %d (boot, super, log blocks %u inode blocks %u, bitmap blocks %u) blocks %d "
+           "total %d\n",
+           nmeta, num_log_blocks, ninodeblocks, nbitmap, num_data_blocks,
+           FSSIZE);
+
+    freeblock = nmeta; // the first free block that we can allocate
+
+    for (i = 0; i < FSSIZE; i++)
+        wsect(i, zeroes);
+
+    memset(buf, 0, sizeof(buf));
+    memmove(buf, &sb, sizeof(sb));
+    wsect(1, buf);
+
+    rootino = ialloc(INODE_DIRECTORY);
+    assert(rootino == ROOT_INODE_NO);
+
+    bzero(&de, sizeof(de));
+    de.inode_no = xshort(rootino);
+    strcpy(de.name, ".");
+    iappend(rootino, &de, sizeof(de));
+
+    bzero(&de, sizeof(de));
+    de.inode_no = xshort(rootino);
+    strcpy(de.name, "..");
+    iappend(rootino, &de, sizeof(de));
+
+    for (i = 2; i < argc; i++) {
+        char *path = argv[i];
+        int j = 0;
+        for (; *argv[i]; argv[i]++) {
+            if (*argv[i] == '/')
+                j = -1;
+            j++;
+        }
+        argv[i] -= j;
+        printf("input: '%s' -> '%s'\n", path, argv[i]);
+
+        assert(index(argv[i], '/') == 0);
+
+        if ((fd = open(path, 0)) < 0) {
+            perror(argv[i]);
+            exit(1);
+        }
+
+        // Skip leading _ in name when writing to file system.
+        // The binaries are named _rm, _cat, etc. to keep the
+        // build operating system from trying to execute them
+        // in place of system binaries like rm and cat.
+        if (argv[i][0] == '_')
+            ++argv[i];
+
+        inum = ialloc(INODE_REGULAR);
+
+        bzero(&de, sizeof(de));
+        de.inode_no = xshort(inum);
+        strncpy(de.name, argv[i], DIRSIZ);
+        iappend(rootino, &de, sizeof(de));
+
+        while ((cc = read(fd, buf, sizeof(buf))) > 0)
+            iappend(inum, buf, cc);
+
+        close(fd);
+    }
+
+    // fix size of root inode dir
+    rinode(rootino, &din);
+    off = xint(din.num_bytes);
+    off = ((off / BSIZE) + 1) * BSIZE;
+    din.num_bytes = xint(off);
+    winode(rootino, &din);
+
+    balloc(freeblock);
+
+    exit(0);
+}
+
+void wsect(uint sec, void *buf)
+{
+    if (lseek(fsfd, sec * BSIZE, 0) != sec * BSIZE) {
+        perror("lseek");
+        exit(1);
+    }
+    if (write(fsfd, buf, BSIZE) != BSIZE) {
+        perror("write");
+        exit(1);
+    }
+}
+
+void winode(uint inum, struct dinode *ip)
+{
+    char buf[BSIZE];
+    uint bn;
+    struct dinode *dip;
+
+    bn = IBLOCK(inum, sb);
+    rsect(bn, buf);
+    dip = ((struct dinode *)buf) + (inum % IPB);
+    *dip = *ip;
+    wsect(bn, buf);
+}
+
+void rinode(uint inum, struct dinode *ip)
+{
+    char buf[BSIZE];
+    uint bn;
+    struct dinode *dip;
+
+    bn = IBLOCK(inum, sb);
+    rsect(bn, buf);
+    dip = ((struct dinode *)buf) + (inum % IPB);
+    *ip = *dip;
+}
+
+void rsect(uint sec, void *buf)
+{
+    if (lseek(fsfd, sec * BSIZE, 0) != sec * BSIZE) {
+        perror("lseek");
+        exit(1);
+    }
+    if (read(fsfd, buf, BSIZE) != BSIZE) {
+        perror("read");
+        exit(1);
+    }
+}
+
+uint ialloc(ushort type)
+{
+    uint inum = freeinode++;
+    struct dinode din;
+
+    bzero(&din, sizeof(din));
+    din.type = xshort(type);
+    din.num_links = xshort(1);
+    din.num_bytes = xint(0);
+    winode(inum, &din);
+    return inum;
+}
+
+void balloc(int used)
+{
+    uchar buf[BSIZE];
+    int i;
+
+    printf("balloc: first %d blocks have been allocated\n", used);
+    assert(used < BSIZE * 8);
+    bzero(buf, BSIZE);
+    for (i = 0; i < used; i++) {
+        buf[i / 8] = buf[i / 8] | (0x1 << (i % 8));
+    }
+    printf("balloc: write bitmap block at sector %d\n", sb.bitmap_start);
+    wsect(sb.bitmap_start, buf);
+}
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+void iappend(uint inum, void *xp, int n)
+{
+    char *p = (char *)xp;
+    uint fbn, off, n1;
+    struct dinode din;
+    char buf[BSIZE];
+    uint indirect[NINDIRECT];
+    uint x;
+
+    rinode(inum, &din);
+    off = xint(din.num_bytes);
+    // printf("append inum %d at off %d sz %d\n", inum, off, n);
+    while (n > 0) {
+        fbn = off / BSIZE;
+        assert(fbn < INODE_MAX_BLOCKS);
+        if (fbn < NDIRECT) {
+            if (xint(din.addrs[fbn]) == 0) {
+                din.addrs[fbn] = xint(freeblock++);
+            }
+            x = xint(din.addrs[fbn]);
+        } else {
+            if (xint(din.indirect) == 0) {
+                din.indirect = xint(freeblock++);
+            }
+            rsect(xint(din.indirect), (char *)indirect);
+            if (indirect[fbn - NDIRECT] == 0) {
+                indirect[fbn - NDIRECT] = xint(freeblock++);
+                wsect(xint(din.indirect), (char *)indirect);
+            }
+            x = xint(indirect[fbn - NDIRECT]);
+        }
+        n1 = min(n, (fbn + 1) * BSIZE - off);
+        rsect(x, buf);
+        bcopy(p, buf + off - (fbn * BSIZE), n1);
+        wsect(x, buf);
+        n -= n1;
+        off += n1;
+        p += n1;
+    }
+    din.num_bytes = xint(off);
+    winode(inum, &din);
+}
diff --git a/virt.dts b/virt.dts
new file mode 100644
index 0000000..c2db4e3
--- /dev/null
+++ b/virt.dts
@@ -0,0 +1,696 @@
+/dts-v1/;
+
+/ {
+	interrupt-parent = <0x8001>;
+	#size-cells = <0x02>;
+	#address-cells = <0x02>;
+	compatible = "linux,dummy-virt";
+
+	psci {
+		migrate = <0xc4000005>;
+		cpu_on = <0xc4000003>;
+		cpu_off = <0x84000002>;
+		cpu_suspend = <0xc4000001>;
+		method = "hvc";
+		compatible = "arm,psci-0.2\0arm,psci";
+	};
+
+	memory@40000000 {
+		reg = <0x00 0x40000000 0x00 0x8000000>;
+		device_type = "memory";
+	};
+
+	platform@c000000 {
+		interrupt-parent = <0x8001>;
+		ranges = <0x00 0x00 0xc000000 0x2000000>;
+		#address-cells = <0x01>;
+		#size-cells = <0x01>;
+		compatible = "qemu,platform\0simple-bus";
+	};
+
+	fw-cfg@9020000 {
+		dma-coherent;
+		reg = <0x00 0x9020000 0x00 0x18>;
+		compatible = "qemu,fw-cfg-mmio";
+	};
+
+	virtio_mmio@a000000 {
+		dma-coherent;
+		interrupts = <0x00 0x10 0x01>;
+		reg = <0x00 0xa000000 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a000200 {
+		dma-coherent;
+		interrupts = <0x00 0x11 0x01>;
+		reg = <0x00 0xa000200 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a000400 {
+		dma-coherent;
+		interrupts = <0x00 0x12 0x01>;
+		reg = <0x00 0xa000400 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a000600 {
+		dma-coherent;
+		interrupts = <0x00 0x13 0x01>;
+		reg = <0x00 0xa000600 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a000800 {
+		dma-coherent;
+		interrupts = <0x00 0x14 0x01>;
+		reg = <0x00 0xa000800 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a000a00 {
+		dma-coherent;
+		interrupts = <0x00 0x15 0x01>;
+		reg = <0x00 0xa000a00 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a000c00 {
+		dma-coherent;
+		interrupts = <0x00 0x16 0x01>;
+		reg = <0x00 0xa000c00 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a000e00 {
+		dma-coherent;
+		interrupts = <0x00 0x17 0x01>;
+		reg = <0x00 0xa000e00 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a001000 {
+		dma-coherent;
+		interrupts = <0x00 0x18 0x01>;
+		reg = <0x00 0xa001000 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a001200 {
+		dma-coherent;
+		interrupts = <0x00 0x19 0x01>;
+		reg = <0x00 0xa001200 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a001400 {
+		dma-coherent;
+		interrupts = <0x00 0x1a 0x01>;
+		reg = <0x00 0xa001400 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a001600 {
+		dma-coherent;
+		interrupts = <0x00 0x1b 0x01>;
+		reg = <0x00 0xa001600 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a001800 {
+		dma-coherent;
+		interrupts = <0x00 0x1c 0x01>;
+		reg = <0x00 0xa001800 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a001a00 {
+		dma-coherent;
+		interrupts = <0x00 0x1d 0x01>;
+		reg = <0x00 0xa001a00 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a001c00 {
+		dma-coherent;
+		interrupts = <0x00 0x1e 0x01>;
+		reg = <0x00 0xa001c00 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a001e00 {
+		dma-coherent;
+		interrupts = <0x00 0x1f 0x01>;
+		reg = <0x00 0xa001e00 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a002000 {
+		dma-coherent;
+		interrupts = <0x00 0x20 0x01>;
+		reg = <0x00 0xa002000 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a002200 {
+		dma-coherent;
+		interrupts = <0x00 0x21 0x01>;
+		reg = <0x00 0xa002200 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a002400 {
+		dma-coherent;
+		interrupts = <0x00 0x22 0x01>;
+		reg = <0x00 0xa002400 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a002600 {
+		dma-coherent;
+		interrupts = <0x00 0x23 0x01>;
+		reg = <0x00 0xa002600 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a002800 {
+		dma-coherent;
+		interrupts = <0x00 0x24 0x01>;
+		reg = <0x00 0xa002800 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a002a00 {
+		dma-coherent;
+		interrupts = <0x00 0x25 0x01>;
+		reg = <0x00 0xa002a00 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a002c00 {
+		dma-coherent;
+		interrupts = <0x00 0x26 0x01>;
+		reg = <0x00 0xa002c00 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a002e00 {
+		dma-coherent;
+		interrupts = <0x00 0x27 0x01>;
+		reg = <0x00 0xa002e00 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a003000 {
+		dma-coherent;
+		interrupts = <0x00 0x28 0x01>;
+		reg = <0x00 0xa003000 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a003200 {
+		dma-coherent;
+		interrupts = <0x00 0x29 0x01>;
+		reg = <0x00 0xa003200 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a003400 {
+		dma-coherent;
+		interrupts = <0x00 0x2a 0x01>;
+		reg = <0x00 0xa003400 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a003600 {
+		dma-coherent;
+		interrupts = <0x00 0x2b 0x01>;
+		reg = <0x00 0xa003600 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a003800 {
+		dma-coherent;
+		interrupts = <0x00 0x2c 0x01>;
+		reg = <0x00 0xa003800 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a003a00 {
+		dma-coherent;
+		interrupts = <0x00 0x2d 0x01>;
+		reg = <0x00 0xa003a00 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a003c00 {
+		dma-coherent;
+		interrupts = <0x00 0x2e 0x01>;
+		reg = <0x00 0xa003c00 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	virtio_mmio@a003e00 {
+		dma-coherent;
+		interrupts = <0x00 0x2f 0x01>;
+		reg = <0x00 0xa003e00 0x00 0x200>;
+		compatible = "virtio,mmio";
+	};
+
+	gpio-keys {
+		#address-cells = <0x01>;
+		#size-cells = <0x00>;
+		compatible = "gpio-keys";
+
+		poweroff {
+			gpios = <0x8002 0x03 0x00>;
+			linux,code = <0x74>;
+			label = "GPIO Key Poweroff";
+		};
+	};
+
+	pl061@9030000 {
+		phandle = <0x8002>;
+		clock-names = "apb_pclk";
+		clocks = <0x8000>;
+		interrupts = <0x00 0x07 0x04>;
+		gpio-controller;
+		#gpio-cells = <0x02>;
+		compatible = "arm,pl061\0arm,primecell";
+		reg = <0x00 0x9030000 0x00 0x1000>;
+	};
+
+	pcie@10000000 {
+		interrupt-map-mask = <0x1800 0x00 0x00 0x07>;
+		interrupt-map = <0x00 0x00 0x00 0x01 0x8001 0x00 0x00 0x00 0x03 0x04 0x00 0x00 0x00 0x02 0x8001 0x00 0x00 0x00 0x04 0x04 0x00 0x00 0x00 0x03 0x8001 0x00 0x00 0x00 0x05 0x04 0x00 0x00 0x00 0x04 0x8001 0x00 0x00 0x00 0x06 0x04 0x800 0x00 0x00 0x01 0x8001 0x00 0x00 0x00 0x04 0x04 0x800 0x00 0x00 0x02 0x8001 0x00 0x00 0x00 0x05 0x04 0x800 0x00 0x00 0x03 0x8001 0x00 0x00 0x00 0x06 0x04 0x800 0x00 0x00 0x04 0x8001 0x00 0x00 0x00 0x03 0x04 0x1000 0x00 0x00 0x01 0x8001 0x00 0x00 0x00 0x05 0x04 0x1000 0x00 0x00 0x02 0x8001 0x00 0x00 0x00 0x06 0x04 0x1000 0x00 0x00 0x03 0x8001 0x00 0x00 0x00 0x03 0x04 0x1000 0x00 0x00 0x04 0x8001 0x00 0x00 0x00 0x04 0x04 0x1800 0x00 0x00 0x01 0x8001 0x00 0x00 0x00 0x06 0x04 0x1800 0x00 0x00 0x02 0x8001 0x00 0x00 0x00 0x03 0x04 0x1800 0x00 0x00 0x03 0x8001 0x00 0x00 0x00 0x04 0x04 0x1800 0x00 0x00 0x04 0x8001 0x00 0x00 0x00 0x05 0x04>;
+		#interrupt-cells = <0x01>;
+		ranges = <0x1000000 0x00 0x00 0x00 0x3eff0000 0x00 0x10000 0x2000000 0x00 0x10000000 0x00 0x10000000 0x00 0x2eff0000 0x3000000 0x80 0x00 0x80 0x00 0x80 0x00>;
+		reg = <0x40 0x10000000 0x00 0x10000000>;
+		dma-coherent;
+		bus-range = <0x00 0xff>;
+		linux,pci-domain = <0x00>;
+		#size-cells = <0x02>;
+		#address-cells = <0x03>;
+		device_type = "pci";
+		compatible = "pci-host-ecam-generic";
+	};
+
+	pl031@9010000 {
+		clock-names = "apb_pclk";
+		clocks = <0x8000>;
+		interrupts = <0x00 0x02 0x04>;
+		reg = <0x00 0x9010000 0x00 0x1000>;
+		compatible = "arm,pl031\0arm,primecell";
+	};
+
+	pl011@9000000 {
+		clock-names = "uartclk\0apb_pclk";
+		clocks = <0x8000 0x8000>;
+		interrupts = <0x00 0x01 0x04>;
+		reg = <0x00 0x9000000 0x00 0x1000>;
+		compatible = "arm,pl011\0arm,primecell";
+	};
+
+	pmu {
+		interrupts = <0x01 0x07 0x04>;
+		compatible = "arm,armv8-pmuv3";
+	};
+
+	intc@8000000 {
+		phandle = <0x8001>;
+		reg = <0x00 0x8000000 0x00 0x10000 0x00 0x80a0000 0x00 0xf60000>;
+		#redistributor-regions = <0x01>;
+		compatible = "arm,gic-v3";
+		ranges;
+		#size-cells = <0x02>;
+		#address-cells = <0x02>;
+		interrupt-controller;
+		#interrupt-cells = <0x03>;
+	};
+
+	flash@0 {
+		bank-width = <0x04>;
+		reg = <0x00 0x00 0x00 0x4000000 0x00 0x4000000 0x00 0x4000000>;
+		compatible = "cfi-flash";
+	};
+
+	cpus {
+		#size-cells = <0x00>;
+		#address-cells = <0x01>;
+
+		cpu@0 {
+			reg = <0x00>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@1 {
+			reg = <0x01>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@2 {
+			reg = <0x02>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@3 {
+			reg = <0x03>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@4 {
+			reg = <0x04>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@5 {
+			reg = <0x05>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@6 {
+			reg = <0x06>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@7 {
+			reg = <0x07>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@8 {
+			reg = <0x08>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@9 {
+			reg = <0x09>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@10 {
+			reg = <0x0a>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@11 {
+			reg = <0x0b>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@12 {
+			reg = <0x0c>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@13 {
+			reg = <0x0d>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@14 {
+			reg = <0x0e>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@15 {
+			reg = <0x0f>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@16 {
+			reg = <0x100>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@17 {
+			reg = <0x101>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@18 {
+			reg = <0x102>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@19 {
+			reg = <0x103>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@20 {
+			reg = <0x104>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@21 {
+			reg = <0x105>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@22 {
+			reg = <0x106>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@23 {
+			reg = <0x107>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@24 {
+			reg = <0x108>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@25 {
+			reg = <0x109>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@26 {
+			reg = <0x10a>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@27 {
+			reg = <0x10b>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@28 {
+			reg = <0x10c>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@29 {
+			reg = <0x10d>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@30 {
+			reg = <0x10e>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@31 {
+			reg = <0x10f>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@32 {
+			reg = <0x200>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@33 {
+			reg = <0x201>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@34 {
+			reg = <0x202>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@35 {
+			reg = <0x203>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@36 {
+			reg = <0x204>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@37 {
+			reg = <0x205>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@38 {
+			reg = <0x206>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@39 {
+			reg = <0x207>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@40 {
+			reg = <0x208>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@41 {
+			reg = <0x209>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@42 {
+			reg = <0x20a>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@43 {
+			reg = <0x20b>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@44 {
+			reg = <0x20c>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@45 {
+			reg = <0x20d>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@46 {
+			reg = <0x20e>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+
+		cpu@47 {
+			reg = <0x20f>;
+			enable-method = "psci";
+			compatible = "arm,cortex-a72";
+			device_type = "cpu";
+		};
+	};
+
+	timer {
+		interrupts = <0x01 0x0d 0x04 0x01 0x0e 0x04 0x01 0x0b 0x04 0x01 0x0a 0x04>;
+		always-on;
+		compatible = "arm,armv8-timer\0arm,armv7-timer";
+	};
+
+	apb-pclk {
+		phandle = <0x8000>;
+		clock-output-names = "clk24mhz";
+		clock-frequency = <0x16e3600>;
+		#clock-cells = <0x00>;
+		compatible = "fixed-clock";
+	};
+
+	chosen {
+		stdout-path = "/pl011@9000000";
+	};
+};
\ No newline at end of file