diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..a748b84
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,741 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# clang-format configuration file. Intended for clang-format >= 11.
+#
+# For more information, see:
+#
+# Documentation/dev-tools/clang-format.rst
+# https://clang.llvm.org/docs/ClangFormat.html
+# https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+#
+---
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Left
+AlignOperands: true
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: false
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+ AfterClass: false
+ AfterControlStatement: false
+ AfterEnum: false
+ AfterFunction: true
+ AfterNamespace: true
+ AfterObjCDeclaration: false
+ AfterStruct: false
+ AfterUnion: false
+ AfterExternBlock: false
+ BeforeCatch: false
+ BeforeElse: false
+ IndentBraces: false
+ SplitEmptyFunction: true
+ SplitEmptyRecord: true
+ SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Custom
+BreakBeforeInheritanceComma: false
+BreakBeforeTernaryOperators: false
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeComma
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: false
+ColumnLimit: 80
+CommentPragmas: '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 8
+ContinuationIndentWidth: 8
+Cpp11BracedListStyle: false
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: false
+
+# Taken from:
+# git grep -h '^#define [^[:space:]]*for_each[^[:space:]]*(' include/ tools/ \
+# | sed "s,^#define \([^[:space:]]*for_each[^[:space:]]*\)(.*$, - '\1'," \
+# | LC_ALL=C sort -u
+ForEachMacros:
+ - '__ata_qc_for_each'
+ - '__bio_for_each_bvec'
+ - '__bio_for_each_segment'
+ - '__evlist__for_each_entry'
+ - '__evlist__for_each_entry_continue'
+ - '__evlist__for_each_entry_from'
+ - '__evlist__for_each_entry_reverse'
+ - '__evlist__for_each_entry_safe'
+ - '__for_each_mem_range'
+ - '__for_each_mem_range_rev'
+ - '__for_each_thread'
+ - '__hlist_for_each_rcu'
+ - '__map__for_each_symbol_by_name'
+ - '__pci_bus_for_each_res0'
+ - '__pci_bus_for_each_res1'
+ - '__pci_dev_for_each_res0'
+ - '__pci_dev_for_each_res1'
+ - '__perf_evlist__for_each_entry'
+ - '__perf_evlist__for_each_entry_reverse'
+ - '__perf_evlist__for_each_entry_safe'
+ - '__rq_for_each_bio'
+ - '__shost_for_each_device'
+ - '__sym_for_each'
+ - 'apei_estatus_for_each_section'
+ - 'ata_for_each_dev'
+ - 'ata_for_each_link'
+ - 'ata_qc_for_each'
+ - 'ata_qc_for_each_raw'
+ - 'ata_qc_for_each_with_internal'
+ - 'ax25_for_each'
+ - 'ax25_uid_for_each'
+ - 'bio_for_each_bvec'
+ - 'bio_for_each_bvec_all'
+ - 'bio_for_each_folio_all'
+ - 'bio_for_each_integrity_vec'
+ - 'bio_for_each_segment'
+ - 'bio_for_each_segment_all'
+ - 'bio_list_for_each'
+ - 'bip_for_each_vec'
+ - 'bond_for_each_slave'
+ - 'bond_for_each_slave_rcu'
+ - 'bpf_for_each'
+ - 'bpf_for_each_reg_in_vstate'
+ - 'bpf_for_each_reg_in_vstate_mask'
+ - 'bpf_for_each_spilled_reg'
+ - 'bpf_object__for_each_map'
+ - 'bpf_object__for_each_program'
+ - 'btree_for_each_safe128'
+ - 'btree_for_each_safe32'
+ - 'btree_for_each_safe64'
+ - 'btree_for_each_safel'
+ - 'card_for_each_dev'
+ - 'cgroup_taskset_for_each'
+ - 'cgroup_taskset_for_each_leader'
+ - 'cpu_aggr_map__for_each_idx'
+ - 'cpufreq_for_each_efficient_entry_idx'
+ - 'cpufreq_for_each_entry'
+ - 'cpufreq_for_each_entry_idx'
+ - 'cpufreq_for_each_valid_entry'
+ - 'cpufreq_for_each_valid_entry_idx'
+ - 'css_for_each_child'
+ - 'css_for_each_descendant_post'
+ - 'css_for_each_descendant_pre'
+ - 'damon_for_each_region'
+ - 'damon_for_each_region_from'
+ - 'damon_for_each_region_safe'
+ - 'damon_for_each_scheme'
+ - 'damon_for_each_scheme_safe'
+ - 'damon_for_each_target'
+ - 'damon_for_each_target_safe'
+ - 'damos_for_each_filter'
+ - 'damos_for_each_filter_safe'
+ - 'data__for_each_file'
+ - 'data__for_each_file_new'
+ - 'data__for_each_file_start'
+ - 'device_for_each_child_node'
+ - 'displayid_iter_for_each'
+ - 'dma_fence_array_for_each'
+ - 'dma_fence_chain_for_each'
+ - 'dma_fence_unwrap_for_each'
+ - 'dma_resv_for_each_fence'
+ - 'dma_resv_for_each_fence_unlocked'
+ - 'do_for_each_ftrace_op'
+ - 'drm_atomic_crtc_for_each_plane'
+ - 'drm_atomic_crtc_state_for_each_plane'
+ - 'drm_atomic_crtc_state_for_each_plane_state'
+ - 'drm_atomic_for_each_plane_damage'
+ - 'drm_client_for_each_connector_iter'
+ - 'drm_client_for_each_modeset'
+ - 'drm_connector_for_each_possible_encoder'
+ - 'drm_exec_for_each_locked_object'
+ - 'drm_exec_for_each_locked_object_reverse'
+ - 'drm_for_each_bridge_in_chain'
+ - 'drm_for_each_connector_iter'
+ - 'drm_for_each_crtc'
+ - 'drm_for_each_crtc_reverse'
+ - 'drm_for_each_encoder'
+ - 'drm_for_each_encoder_mask'
+ - 'drm_for_each_fb'
+ - 'drm_for_each_legacy_plane'
+ - 'drm_for_each_plane'
+ - 'drm_for_each_plane_mask'
+ - 'drm_for_each_privobj'
+ - 'drm_gem_for_each_gpuva'
+ - 'drm_gem_for_each_gpuva_safe'
+ - 'drm_gpuva_for_each_op'
+ - 'drm_gpuva_for_each_op_from_reverse'
+ - 'drm_gpuva_for_each_op_safe'
+ - 'drm_gpuvm_for_each_va'
+ - 'drm_gpuvm_for_each_va_range'
+ - 'drm_gpuvm_for_each_va_range_safe'
+ - 'drm_gpuvm_for_each_va_safe'
+ - 'drm_mm_for_each_hole'
+ - 'drm_mm_for_each_node'
+ - 'drm_mm_for_each_node_in_range'
+ - 'drm_mm_for_each_node_safe'
+ - 'dsa_switch_for_each_available_port'
+ - 'dsa_switch_for_each_cpu_port'
+ - 'dsa_switch_for_each_cpu_port_continue_reverse'
+ - 'dsa_switch_for_each_port'
+ - 'dsa_switch_for_each_port_continue_reverse'
+ - 'dsa_switch_for_each_port_safe'
+ - 'dsa_switch_for_each_user_port'
+ - 'dsa_tree_for_each_cpu_port'
+ - 'dsa_tree_for_each_user_port'
+ - 'dsa_tree_for_each_user_port_continue_reverse'
+ - 'dso__for_each_symbol'
+ - 'dsos__for_each_with_build_id'
+ - 'elf_hash_for_each_possible'
+ - 'elf_symtab__for_each_symbol'
+ - 'evlist__for_each_cpu'
+ - 'evlist__for_each_entry'
+ - 'evlist__for_each_entry_continue'
+ - 'evlist__for_each_entry_from'
+ - 'evlist__for_each_entry_reverse'
+ - 'evlist__for_each_entry_safe'
+ - 'flow_action_for_each'
+ - 'for_each_acpi_consumer_dev'
+ - 'for_each_acpi_dev_match'
+ - 'for_each_active_dev_scope'
+ - 'for_each_active_drhd_unit'
+ - 'for_each_active_iommu'
+ - 'for_each_active_route'
+ - 'for_each_aggr_pgid'
+ - 'for_each_and_bit'
+ - 'for_each_andnot_bit'
+ - 'for_each_available_child_of_node'
+ - 'for_each_bench'
+ - 'for_each_bio'
+ - 'for_each_board_func_rsrc'
+ - 'for_each_btf_ext_rec'
+ - 'for_each_btf_ext_sec'
+ - 'for_each_bvec'
+ - 'for_each_card_auxs'
+ - 'for_each_card_auxs_safe'
+ - 'for_each_card_components'
+ - 'for_each_card_dapms'
+ - 'for_each_card_pre_auxs'
+ - 'for_each_card_prelinks'
+ - 'for_each_card_rtds'
+ - 'for_each_card_rtds_safe'
+ - 'for_each_card_widgets'
+ - 'for_each_card_widgets_safe'
+ - 'for_each_cgroup_storage_type'
+ - 'for_each_child_of_node'
+ - 'for_each_clear_bit'
+ - 'for_each_clear_bit_from'
+ - 'for_each_clear_bitrange'
+ - 'for_each_clear_bitrange_from'
+ - 'for_each_cmd'
+ - 'for_each_cmsghdr'
+ - 'for_each_collection'
+ - 'for_each_comp_order'
+ - 'for_each_compatible_node'
+ - 'for_each_component_dais'
+ - 'for_each_component_dais_safe'
+ - 'for_each_conduit'
+ - 'for_each_console'
+ - 'for_each_console_srcu'
+ - 'for_each_cpu'
+ - 'for_each_cpu_and'
+ - 'for_each_cpu_andnot'
+ - 'for_each_cpu_or'
+ - 'for_each_cpu_wrap'
+ - 'for_each_dapm_widgets'
+ - 'for_each_dedup_cand'
+ - 'for_each_dev_addr'
+ - 'for_each_dev_scope'
+ - 'for_each_dma_cap_mask'
+ - 'for_each_dpcm_be'
+ - 'for_each_dpcm_be_rollback'
+ - 'for_each_dpcm_be_safe'
+ - 'for_each_dpcm_fe'
+ - 'for_each_drhd_unit'
+ - 'for_each_dss_dev'
+ - 'for_each_efi_memory_desc'
+ - 'for_each_efi_memory_desc_in_map'
+ - 'for_each_element'
+ - 'for_each_element_extid'
+ - 'for_each_element_id'
+ - 'for_each_endpoint_of_node'
+ - 'for_each_event'
+ - 'for_each_event_tps'
+ - 'for_each_evictable_lru'
+ - 'for_each_fib6_node_rt_rcu'
+ - 'for_each_fib6_walker_rt'
+ - 'for_each_free_mem_pfn_range_in_zone'
+ - 'for_each_free_mem_pfn_range_in_zone_from'
+ - 'for_each_free_mem_range'
+ - 'for_each_free_mem_range_reverse'
+ - 'for_each_func_rsrc'
+ - 'for_each_gpiochip_node'
+ - 'for_each_group_evsel'
+ - 'for_each_group_evsel_head'
+ - 'for_each_group_member'
+ - 'for_each_group_member_head'
+ - 'for_each_hstate'
+ - 'for_each_if'
+ - 'for_each_inject_fn'
+ - 'for_each_insn'
+ - 'for_each_insn_prefix'
+ - 'for_each_intid'
+ - 'for_each_iommu'
+ - 'for_each_ip_tunnel_rcu'
+ - 'for_each_irq_nr'
+ - 'for_each_lang'
+ - 'for_each_link_codecs'
+ - 'for_each_link_cpus'
+ - 'for_each_link_platforms'
+ - 'for_each_lru'
+ - 'for_each_matching_node'
+ - 'for_each_matching_node_and_match'
+ - 'for_each_media_entity_data_link'
+ - 'for_each_mem_pfn_range'
+ - 'for_each_mem_range'
+ - 'for_each_mem_range_rev'
+ - 'for_each_mem_region'
+ - 'for_each_member'
+ - 'for_each_memory'
+ - 'for_each_migratetype_order'
+ - 'for_each_missing_reg'
+ - 'for_each_mle_subelement'
+ - 'for_each_mod_mem_type'
+ - 'for_each_net'
+ - 'for_each_net_continue_reverse'
+ - 'for_each_net_rcu'
+ - 'for_each_netdev'
+ - 'for_each_netdev_continue'
+ - 'for_each_netdev_continue_rcu'
+ - 'for_each_netdev_continue_reverse'
+ - 'for_each_netdev_dump'
+ - 'for_each_netdev_feature'
+ - 'for_each_netdev_in_bond_rcu'
+ - 'for_each_netdev_rcu'
+ - 'for_each_netdev_reverse'
+ - 'for_each_netdev_safe'
+ - 'for_each_new_connector_in_state'
+ - 'for_each_new_crtc_in_state'
+ - 'for_each_new_mst_mgr_in_state'
+ - 'for_each_new_plane_in_state'
+ - 'for_each_new_plane_in_state_reverse'
+ - 'for_each_new_private_obj_in_state'
+ - 'for_each_new_reg'
+ - 'for_each_node'
+ - 'for_each_node_by_name'
+ - 'for_each_node_by_type'
+ - 'for_each_node_mask'
+ - 'for_each_node_state'
+ - 'for_each_node_with_cpus'
+ - 'for_each_node_with_property'
+ - 'for_each_nonreserved_multicast_dest_pgid'
+ - 'for_each_numa_hop_mask'
+ - 'for_each_of_allnodes'
+ - 'for_each_of_allnodes_from'
+ - 'for_each_of_cpu_node'
+ - 'for_each_of_pci_range'
+ - 'for_each_old_connector_in_state'
+ - 'for_each_old_crtc_in_state'
+ - 'for_each_old_mst_mgr_in_state'
+ - 'for_each_old_plane_in_state'
+ - 'for_each_old_private_obj_in_state'
+ - 'for_each_oldnew_connector_in_state'
+ - 'for_each_oldnew_crtc_in_state'
+ - 'for_each_oldnew_mst_mgr_in_state'
+ - 'for_each_oldnew_plane_in_state'
+ - 'for_each_oldnew_plane_in_state_reverse'
+ - 'for_each_oldnew_private_obj_in_state'
+ - 'for_each_online_cpu'
+ - 'for_each_online_node'
+ - 'for_each_online_pgdat'
+ - 'for_each_or_bit'
+ - 'for_each_path'
+ - 'for_each_pci_bridge'
+ - 'for_each_pci_dev'
+ - 'for_each_pcm_streams'
+ - 'for_each_physmem_range'
+ - 'for_each_populated_zone'
+ - 'for_each_possible_cpu'
+ - 'for_each_present_blessed_reg'
+ - 'for_each_present_cpu'
+ - 'for_each_prime_number'
+ - 'for_each_prime_number_from'
+ - 'for_each_probe_cache_entry'
+ - 'for_each_process'
+ - 'for_each_process_thread'
+ - 'for_each_prop_codec_conf'
+ - 'for_each_prop_dai_codec'
+ - 'for_each_prop_dai_cpu'
+ - 'for_each_prop_dlc_codecs'
+ - 'for_each_prop_dlc_cpus'
+ - 'for_each_prop_dlc_platforms'
+ - 'for_each_property_of_node'
+ - 'for_each_reg'
+ - 'for_each_reg_filtered'
+ - 'for_each_reloc'
+ - 'for_each_reloc_from'
+ - 'for_each_requested_gpio'
+ - 'for_each_requested_gpio_in_range'
+ - 'for_each_reserved_mem_range'
+ - 'for_each_reserved_mem_region'
+ - 'for_each_rtd_codec_dais'
+ - 'for_each_rtd_components'
+ - 'for_each_rtd_cpu_dais'
+ - 'for_each_rtd_dais'
+ - 'for_each_sband_iftype_data'
+ - 'for_each_script'
+ - 'for_each_sec'
+ - 'for_each_set_bit'
+ - 'for_each_set_bit_from'
+ - 'for_each_set_bit_wrap'
+ - 'for_each_set_bitrange'
+ - 'for_each_set_bitrange_from'
+ - 'for_each_set_clump8'
+ - 'for_each_sg'
+ - 'for_each_sg_dma_page'
+ - 'for_each_sg_page'
+ - 'for_each_sgtable_dma_page'
+ - 'for_each_sgtable_dma_sg'
+ - 'for_each_sgtable_page'
+ - 'for_each_sgtable_sg'
+ - 'for_each_sibling_event'
+ - 'for_each_sta_active_link'
+ - 'for_each_subelement'
+ - 'for_each_subelement_extid'
+ - 'for_each_subelement_id'
+ - 'for_each_sublist'
+ - 'for_each_subsystem'
+ - 'for_each_supported_activate_fn'
+ - 'for_each_supported_inject_fn'
+ - 'for_each_sym'
+ - 'for_each_test'
+ - 'for_each_thread'
+ - 'for_each_token'
+ - 'for_each_unicast_dest_pgid'
+ - 'for_each_valid_link'
+ - 'for_each_vif_active_link'
+ - 'for_each_vma'
+ - 'for_each_vma_range'
+ - 'for_each_vsi'
+ - 'for_each_wakeup_source'
+ - 'for_each_zone'
+ - 'for_each_zone_zonelist'
+ - 'for_each_zone_zonelist_nodemask'
+ - 'func_for_each_insn'
+ - 'fwnode_for_each_available_child_node'
+ - 'fwnode_for_each_child_node'
+ - 'fwnode_for_each_parent_node'
+ - 'fwnode_graph_for_each_endpoint'
+ - 'gadget_for_each_ep'
+ - 'genradix_for_each'
+ - 'genradix_for_each_from'
+ - 'genradix_for_each_reverse'
+ - 'hash_for_each'
+ - 'hash_for_each_possible'
+ - 'hash_for_each_possible_rcu'
+ - 'hash_for_each_possible_rcu_notrace'
+ - 'hash_for_each_possible_safe'
+ - 'hash_for_each_rcu'
+ - 'hash_for_each_safe'
+ - 'hashmap__for_each_entry'
+ - 'hashmap__for_each_entry_safe'
+ - 'hashmap__for_each_key_entry'
+ - 'hashmap__for_each_key_entry_safe'
+ - 'hctx_for_each_ctx'
+ - 'hists__for_each_format'
+ - 'hists__for_each_sort_list'
+ - 'hlist_bl_for_each_entry'
+ - 'hlist_bl_for_each_entry_rcu'
+ - 'hlist_bl_for_each_entry_safe'
+ - 'hlist_for_each'
+ - 'hlist_for_each_entry'
+ - 'hlist_for_each_entry_continue'
+ - 'hlist_for_each_entry_continue_rcu'
+ - 'hlist_for_each_entry_continue_rcu_bh'
+ - 'hlist_for_each_entry_from'
+ - 'hlist_for_each_entry_from_rcu'
+ - 'hlist_for_each_entry_rcu'
+ - 'hlist_for_each_entry_rcu_bh'
+ - 'hlist_for_each_entry_rcu_notrace'
+ - 'hlist_for_each_entry_safe'
+ - 'hlist_for_each_entry_srcu'
+ - 'hlist_for_each_safe'
+ - 'hlist_nulls_for_each_entry'
+ - 'hlist_nulls_for_each_entry_from'
+ - 'hlist_nulls_for_each_entry_rcu'
+ - 'hlist_nulls_for_each_entry_safe'
+ - 'i3c_bus_for_each_i2cdev'
+ - 'i3c_bus_for_each_i3cdev'
+ - 'idr_for_each_entry'
+ - 'idr_for_each_entry_continue'
+ - 'idr_for_each_entry_continue_ul'
+ - 'idr_for_each_entry_ul'
+ - 'in_dev_for_each_ifa_rcu'
+ - 'in_dev_for_each_ifa_rtnl'
+ - 'inet_bind_bucket_for_each'
+ - 'interval_tree_for_each_span'
+ - 'intlist__for_each_entry'
+ - 'intlist__for_each_entry_safe'
+ - 'kcore_copy__for_each_phdr'
+ - 'key_for_each'
+ - 'key_for_each_safe'
+ - 'klp_for_each_func'
+ - 'klp_for_each_func_safe'
+ - 'klp_for_each_func_static'
+ - 'klp_for_each_object'
+ - 'klp_for_each_object_safe'
+ - 'klp_for_each_object_static'
+ - 'kunit_suite_for_each_test_case'
+ - 'kvm_for_each_memslot'
+ - 'kvm_for_each_memslot_in_gfn_range'
+ - 'kvm_for_each_vcpu'
+ - 'libbpf_nla_for_each_attr'
+ - 'list_for_each'
+ - 'list_for_each_codec'
+ - 'list_for_each_codec_safe'
+ - 'list_for_each_continue'
+ - 'list_for_each_entry'
+ - 'list_for_each_entry_continue'
+ - 'list_for_each_entry_continue_rcu'
+ - 'list_for_each_entry_continue_reverse'
+ - 'list_for_each_entry_from'
+ - 'list_for_each_entry_from_rcu'
+ - 'list_for_each_entry_from_reverse'
+ - 'list_for_each_entry_lockless'
+ - 'list_for_each_entry_rcu'
+ - 'list_for_each_entry_reverse'
+ - 'list_for_each_entry_safe'
+ - 'list_for_each_entry_safe_continue'
+ - 'list_for_each_entry_safe_from'
+ - 'list_for_each_entry_safe_reverse'
+ - 'list_for_each_entry_srcu'
+ - 'list_for_each_from'
+ - 'list_for_each_prev'
+ - 'list_for_each_prev_safe'
+ - 'list_for_each_rcu'
+ - 'list_for_each_reverse'
+ - 'list_for_each_safe'
+ - 'llist_for_each'
+ - 'llist_for_each_entry'
+ - 'llist_for_each_entry_safe'
+ - 'llist_for_each_safe'
+ - 'lwq_for_each_safe'
+ - 'map__for_each_symbol'
+ - 'map__for_each_symbol_by_name'
+ - 'maps__for_each_entry'
+ - 'maps__for_each_entry_safe'
+ - 'mas_for_each'
+ - 'mci_for_each_dimm'
+ - 'media_device_for_each_entity'
+ - 'media_device_for_each_intf'
+ - 'media_device_for_each_link'
+ - 'media_device_for_each_pad'
+ - 'media_entity_for_each_pad'
+ - 'media_pipeline_for_each_entity'
+ - 'media_pipeline_for_each_pad'
+ - 'mlx5_lag_for_each_peer_mdev'
+ - 'msi_domain_for_each_desc'
+ - 'msi_for_each_desc'
+ - 'mt_for_each'
+ - 'nanddev_io_for_each_page'
+ - 'netdev_for_each_lower_dev'
+ - 'netdev_for_each_lower_private'
+ - 'netdev_for_each_lower_private_rcu'
+ - 'netdev_for_each_mc_addr'
+ - 'netdev_for_each_synced_mc_addr'
+ - 'netdev_for_each_synced_uc_addr'
+ - 'netdev_for_each_uc_addr'
+ - 'netdev_for_each_upper_dev_rcu'
+ - 'netdev_hw_addr_list_for_each'
+ - 'nft_rule_for_each_expr'
+ - 'nla_for_each_attr'
+ - 'nla_for_each_nested'
+ - 'nlmsg_for_each_attr'
+ - 'nlmsg_for_each_msg'
+ - 'nr_neigh_for_each'
+ - 'nr_neigh_for_each_safe'
+ - 'nr_node_for_each'
+ - 'nr_node_for_each_safe'
+ - 'of_for_each_phandle'
+ - 'of_property_for_each_string'
+ - 'of_property_for_each_u32'
+ - 'pci_bus_for_each_resource'
+ - 'pci_dev_for_each_resource'
+ - 'pcl_for_each_chunk'
+ - 'pcl_for_each_segment'
+ - 'pcm_for_each_format'
+ - 'perf_config_items__for_each_entry'
+ - 'perf_config_sections__for_each_entry'
+ - 'perf_config_set__for_each_entry'
+ - 'perf_cpu_map__for_each_cpu'
+ - 'perf_cpu_map__for_each_idx'
+ - 'perf_evlist__for_each_entry'
+ - 'perf_evlist__for_each_entry_reverse'
+ - 'perf_evlist__for_each_entry_safe'
+ - 'perf_evlist__for_each_evsel'
+ - 'perf_evlist__for_each_mmap'
+ - 'perf_hpp_list__for_each_format'
+ - 'perf_hpp_list__for_each_format_safe'
+ - 'perf_hpp_list__for_each_sort_list'
+ - 'perf_hpp_list__for_each_sort_list_safe'
+ - 'perf_tool_event__for_each_event'
+ - 'plist_for_each'
+ - 'plist_for_each_continue'
+ - 'plist_for_each_entry'
+ - 'plist_for_each_entry_continue'
+ - 'plist_for_each_entry_safe'
+ - 'plist_for_each_safe'
+ - 'pnp_for_each_card'
+ - 'pnp_for_each_dev'
+ - 'protocol_for_each_card'
+ - 'protocol_for_each_dev'
+ - 'queue_for_each_hw_ctx'
+ - 'radix_tree_for_each_slot'
+ - 'radix_tree_for_each_tagged'
+ - 'rb_for_each'
+ - 'rbtree_postorder_for_each_entry_safe'
+ - 'rdma_for_each_block'
+ - 'rdma_for_each_port'
+ - 'rdma_umem_for_each_dma_block'
+ - 'resort_rb__for_each_entry'
+ - 'resource_list_for_each_entry'
+ - 'resource_list_for_each_entry_safe'
+ - 'rhl_for_each_entry_rcu'
+ - 'rhl_for_each_rcu'
+ - 'rht_for_each'
+ - 'rht_for_each_entry'
+ - 'rht_for_each_entry_from'
+ - 'rht_for_each_entry_rcu'
+ - 'rht_for_each_entry_rcu_from'
+ - 'rht_for_each_entry_safe'
+ - 'rht_for_each_from'
+ - 'rht_for_each_rcu'
+ - 'rht_for_each_rcu_from'
+ - 'rq_for_each_bvec'
+ - 'rq_for_each_segment'
+ - 'rq_list_for_each'
+ - 'rq_list_for_each_safe'
+ - 'sample_read_group__for_each'
+ - 'scsi_for_each_prot_sg'
+ - 'scsi_for_each_sg'
+ - 'sctp_for_each_hentry'
+ - 'sctp_skb_for_each'
+ - 'sec_for_each_insn'
+ - 'sec_for_each_insn_continue'
+ - 'sec_for_each_insn_from'
+ - 'sec_for_each_sym'
+ - 'shdma_for_each_chan'
+ - 'shost_for_each_device'
+ - 'sk_for_each'
+ - 'sk_for_each_bound'
+ - 'sk_for_each_bound_bhash2'
+ - 'sk_for_each_entry_offset_rcu'
+ - 'sk_for_each_from'
+ - 'sk_for_each_rcu'
+ - 'sk_for_each_safe'
+ - 'sk_nulls_for_each'
+ - 'sk_nulls_for_each_from'
+ - 'sk_nulls_for_each_rcu'
+ - 'snd_array_for_each'
+ - 'snd_pcm_group_for_each_entry'
+ - 'snd_soc_dapm_widget_for_each_path'
+ - 'snd_soc_dapm_widget_for_each_path_safe'
+ - 'snd_soc_dapm_widget_for_each_sink_path'
+ - 'snd_soc_dapm_widget_for_each_source_path'
+ - 'strlist__for_each_entry'
+ - 'strlist__for_each_entry_safe'
+ - 'sym_for_each_insn'
+ - 'sym_for_each_insn_continue_reverse'
+ - 'symbols__for_each_entry'
+ - 'tb_property_for_each'
+ - 'tcf_act_for_each_action'
+ - 'tcf_exts_for_each_action'
+ - 'ttm_resource_manager_for_each_res'
+ - 'twsk_for_each_bound_bhash2'
+ - 'udp_portaddr_for_each_entry'
+ - 'udp_portaddr_for_each_entry_rcu'
+ - 'usb_hub_for_each_child'
+ - 'v4l2_device_for_each_subdev'
+ - 'v4l2_m2m_for_each_dst_buf'
+ - 'v4l2_m2m_for_each_dst_buf_safe'
+ - 'v4l2_m2m_for_each_src_buf'
+ - 'v4l2_m2m_for_each_src_buf_safe'
+ - 'virtio_device_for_each_vq'
+ - 'while_for_each_ftrace_op'
+ - 'xa_for_each'
+ - 'xa_for_each_marked'
+ - 'xa_for_each_range'
+ - 'xa_for_each_start'
+ - 'xas_for_each'
+ - 'xas_for_each_conflict'
+ - 'xas_for_each_marked'
+ - 'xbc_array_for_each_value'
+ - 'xbc_for_each_key_value'
+ - 'xbc_node_for_each_array_value'
+ - 'xbc_node_for_each_child'
+ - 'xbc_node_for_each_key_value'
+ - 'xbc_node_for_each_subkey'
+ - 'zorro_for_each_dev'
+
+IncludeBlocks: Preserve
+IncludeCategories:
+ - Regex: '.*'
+ Priority: 1
+IncludeIsMainRegex: '(Test)?$'
+IndentCaseLabels: false
+IndentGotoLabels: false
+IndentPPDirectives: None
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Auto
+ObjCBlockIndentWidth: 8
+ObjCSpaceAfterProperty: true
+ObjCSpaceBeforeProtocolList: true
+
+# Taken from git's rules
+PenaltyBreakAssignment: 10
+PenaltyBreakBeforeFirstCallParameter: 30
+PenaltyBreakComment: 10
+PenaltyBreakFirstLessLess: 0
+PenaltyBreakString: 10
+PenaltyExcessCharacter: 100
+PenaltyReturnTypeOnItsOwnLine: 60
+
+PointerAlignment: Right
+ReflowComments: false
+SortIncludes: false
+SortUsingDeclarations: false
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInContainerLiterals: false
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp03
+TabWidth: 4
+UseTab: Never
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..538cddb
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,68 @@
+# Prerequisites
+*.d
+
+# Object files
+*.o
+*.ko
+*.obj
+kernel8.*
+*.img
+build/
+
+# Linker output
+*.ilk
+*.map
+*.exp
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Libraries
+*.lib
+*.a
+*.la
+*.lo
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
+*.i*86
+*.x86_64
+*.hex
+
+# Debug files
+*.dSYM/
+*.su
+*.idb
+*.pdb
+
+# Kernel Module Compile Results
+*.mod*
+*.cmd
+.tmp_versions/
+modules.order
+Module.symvers
+Mkfile.old
+dkms.conf
+
+# Local files
+.gdb_history
+.vscode/
+.cache/
+.mypy_cache/
+
+# CMake output
+compile_flags.txt
+compile_commands.json
+CMakeFiles/
+cmake_install.cmake
+CMakeCache.txt
+Makefile
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..f80450c
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "musl"]
+ path = musl
+ url = git://git.musl-libc.org/musl
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100755
index 0000000..8e8e304
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,77 @@
+cmake_minimum_required(VERSION 3.16)
+
+project(rpi-os VERSION 0.1.0 LANGUAGES C ASM)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS True)
+
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
+ set(aarch64_prefix "")
+ set(aarch64_gdb "gdb")
+else()
+ set(aarch64_prefix "aarch64-linux-gnu-")
+ set(aarch64_gdb "gdb-multiarch")
+endif()
+
+set(aarch64_gcc "${aarch64_prefix}gcc")
+set(aarch64_ld "${aarch64_prefix}ld")
+set(aarch64_objdump "${aarch64_prefix}objdump")
+set(aarch64_objcopy "${aarch64_prefix}objcopy")
+
+set(aarch64_qemu "qemu-system-aarch64")
+
+add_subdirectory(src)
+add_subdirectory(boot)
+
+get_property(kernel_elf GLOBAL PROPERTY kernel_elf_path)
+get_property(kernel_image GLOBAL PROPERTY kernel_image_path)
+get_property(sd_image GLOBAL PROPERTY sd_image_path)
+
+set(qemu_flags
+ -machine virt,gic-version=3
+ -cpu cortex-a72
+ -smp 4
+ -m 4096
+ -nographic
+ -monitor none
+ -serial "mon:stdio"
+ -global virtio-mmio.force-legacy=false
+ -drive file=${sd_image},if=none,format=raw,id=d0
+ -device virtio-blk-device,drive=d0,bus=virtio-mmio-bus.0
+ -kernel "${kernel_elf}")
+
+add_custom_target(qemu
+ COMMAND ${aarch64_qemu} ${qemu_flags} -gdb tcp::1234
+ DEPENDS image)
+add_custom_target(qemu-debug
+ COMMAND ${aarch64_qemu} ${qemu_flags} -gdb tcp::1234 -S
+ DEPENDS image)
+add_custom_target(debug
+ COMMAND ${aarch64_gdb} --nx --quiet
+ -ex "set architecture aarch64"
+ -ex "file ${kernel_elf}"
+ -ex "target remote localhost:1234"
+ DEPENDS kernel)
+add_custom_target(pwn
+ COMMAND pwndbg-dev --nx --quiet
+ -ex "set architecture aarch64"
+ -ex "file ${kernel_elf}"
+ -ex "target remote localhost:1234"
+ DEPENDS kernel)
+
+
+# if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
+# add_custom_target(init_libc
+# COMMAND git submodule update --init && cd ../libc &&
+# ./configure)
+# else()
+# add_custom_target(init_libc
+# COMMAND git submodule update --init && cd ../libc &&
+# export CROSS_COMPILE=${aarch64_prefix} &&
+# ./configure --target=aarch64)
+# endif()
+
+# set(LIBC_SPEC ${CMAKE_CURRENT_SOURCE_DIR}/libc/lib/musl-gcc.specs)
+# set(LIBC_SPEC_OUT musl-gcc.specs)
+# add_custom_target(libc
+# COMMAND make -C ../libc -j12 &&
+# sed -e \"s/\\/usr\\/local\\/musl/..\\/..\\/..\\/libc/g\" ${LIBC_SPEC} > ${LIBC_SPEC_OUT})
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..f288702
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ Copyright (C)
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+.
diff --git a/README.md b/README.md
index 58c6102..11a9d7c 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ Lab repository for OS Fall 2025 at Fudan University
这是复旦大学 2025 年秋季学期《操作系统(H)》课程的配套实验内容。我们将建立一个基于 ARM 架构的简易教学操作系统。
-[实验文档](https://osh.fducslg.com)
+[实验文档](https://osh2025.fducslg.com/)
暂定的实验内容将包括:
diff --git a/boot/CMakeLists.txt b/boot/CMakeLists.txt
new file mode 100644
index 0000000..8705e14
--- /dev/null
+++ b/boot/CMakeLists.txt
@@ -0,0 +1,36 @@
+get_property(kernel_image GLOBAL PROPERTY kernel_image_path)
+
+set(boot_files
+ "${kernel_image}"
+ "armstub8-rpi4.bin"
+ "bootcode.bin"
+ "config.txt"
+ "COPYING.linux"
+ "fixup_cd.dat"
+ "fixup.dat"
+ "fixup4.dat"
+ "fixup4cd.dat"
+ "LICENCE.broadcom"
+ "start_cd.elf"
+ "start.elf"
+ "start4.elf"
+ "start4cd.elf")
+
+# set(user_files "init"
+# "cat"
+# "sh"
+# "echo"
+# "ls"
+# "mkfs"
+# "mkdir")
+
+add_custom_command(
+ OUTPUT sd.img
+ BYPRODUCTS boot.img
+ COMMAND ./generate-image.py ${CMAKE_CURRENT_BINARY_DIR} ${boot_files} # ${user_files}
+ DEPENDS kernel generate-image.py ${boot_files} # user_bin
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+
+add_custom_target(image ALL DEPENDS sd.img)
+
+set_property(GLOBAL PROPERTY sd_image_path ${CMAKE_CURRENT_BINARY_DIR}/sd.img)
diff --git a/boot/COPYING.linux b/boot/COPYING.linux
new file mode 100644
index 0000000..ca442d3
--- /dev/null
+++ b/boot/COPYING.linux
@@ -0,0 +1,356 @@
+
+ NOTE! This copyright does *not* cover user programs that use kernel
+ services by normal system calls - this is merely considered normal use
+ of the kernel, and does *not* fall under the heading of "derived work".
+ Also note that the GPL below is copyrighted by the Free Software
+ Foundation, but the instance of code that it refers to (the Linux
+ kernel) is copyrighted by me and others who actually wrote it.
+
+ Also note that the only valid version of the GPL as far as the kernel
+ is concerned is _this_ particular version of the license (ie v2, not
+ v2.2 or v3.x or whatever), unless explicitly otherwise stated.
+
+ Linus Torvalds
+
+----------------------------------------
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ , 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/boot/LICENCE.broadcom b/boot/LICENCE.broadcom
new file mode 100644
index 0000000..d5793b5
--- /dev/null
+++ b/boot/LICENCE.broadcom
@@ -0,0 +1,31 @@
+Copyright (c) 2006, Broadcom Corporation.
+Copyright (c) 2015, Raspberry Pi (Trading) Ltd
+All rights reserved.
+
+Redistribution. Redistribution and use in binary form, without
+modification, are permitted provided that the following conditions are
+met:
+
+* This software may only be used for the purposes of developing for,
+ running or using a Raspberry Pi device, or authorised derivative
+ device manufactured via the element14 Raspberry Pi Customization Service
+* Redistributions must reproduce the above copyright notice and the
+ following disclaimer in the documentation and/or other materials
+ provided with the distribution.
+* Neither the name of Broadcom Corporation nor the names of its suppliers
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+DISCLAIMER. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
+BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+
diff --git a/boot/README.md b/boot/README.md
new file mode 100644
index 0000000..43aa0ea
--- /dev/null
+++ b/boot/README.md
@@ -0,0 +1,17 @@
+This folder contains necessary files to boot rpi-os up. They are downloaded from .
+
+`armstub8-rpi4.bin` is compiled from `armstub8.S` by following make rules:
+
+```makefile
+%8-rpi4.o: %8.S
+ $(CC) -DBCM2711=1 -c $< -o $@
+
+%8-rpi4.elf: %8-rpi4.o
+ $(LD) --section-start=.text=0 $< -o $@
+
+%8-rpi4.tmp: %8-rpi4.elf
+ $(OBJCOPY) $< -O binary $@
+
+%8-rpi4.bin: %8-rpi4.tmp
+ dd if=$< ibs=256 of=$@ conv=sync
+```
diff --git a/boot/armstub8-rpi4.bin b/boot/armstub8-rpi4.bin
new file mode 100644
index 0000000..2635844
Binary files /dev/null and b/boot/armstub8-rpi4.bin differ
diff --git a/boot/armstub8.S b/boot/armstub8.S
new file mode 100644
index 0000000..7c5b100
--- /dev/null
+++ b/boot/armstub8.S
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2016-2019 Raspberry Pi (Trading) Ltd.
+ * Copyright (c) 2016 Stephen Warren
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * * Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define BIT(x) (1 << (x))
+
+#if BCM2711
+#ifdef HIGH_PERI
+#define LOCAL_CONTROL 0x4c0000000
+#define LOCAL_PRESCALER 0x4c0000008
+#else
+#define LOCAL_CONTROL 0xff800000
+#define LOCAL_PRESCALER 0xff800008
+#endif
+#else
+#define LOCAL_CONTROL 0x40000000
+#define LOCAL_PRESCALER 0x40000008
+#endif
+
+#ifdef HIGH_PERI
+#define GIC_DISTB 0x4c0041000
+#define GIC_CPUB 0x4c0042000
+#else
+#define GIC_DISTB 0xff841000
+#define GIC_CPUB 0xff842000
+#endif
+
+#if BCM2711
+#define OSC_FREQ 54000000
+#else
+#define OSC_FREQ 19200000
+#endif
+
+#define SCR_RW BIT(10)
+#define SCR_HCE BIT(8)
+#define SCR_SMD BIT(7)
+#define SCR_RES1_5 BIT(5)
+#define SCR_RES1_4 BIT(4)
+#define SCR_NS BIT(0)
+#define SCR_VAL \
+ (SCR_RW | SCR_HCE | SCR_SMD | SCR_RES1_5 | SCR_RES1_4 | SCR_NS)
+
+#define ACTLR_VAL \
+ (BIT(0) | BIT(1) | BIT(4) | BIT(5) | BIT(6))
+
+#define CPUECTLR_EL1 S3_1_C15_C2_1
+#define CPUECTLR_EL1_SMPEN BIT(6)
+
+#define SPSR_EL3_D BIT(9)
+#define SPSR_EL3_A BIT(8)
+#define SPSR_EL3_I BIT(7)
+#define SPSR_EL3_F BIT(6)
+#define SPSR_EL3_MODE_EL2H 9
+#define SPSR_EL3_VAL \
+ (SPSR_EL3_D | SPSR_EL3_A | SPSR_EL3_I | SPSR_EL3_F | SPSR_EL3_MODE_EL2H)
+
+#define L2CTLR_EL1 S3_1_C11_C0_2
+
+
+#define GICC_CTRLR 0x0
+#define GICC_PMR 0x4
+#define IT_NR 0x8 // Number of interrupt enable registers (256 total irqs)
+#define GICD_CTRLR 0x0
+#define GICD_IGROUPR 0x80
+
+.globl _start
+_start:
+ /*
+ * LOCAL_CONTROL:
+ * Bit 9 clear: Increment by 1 (vs. 2).
+ * Bit 8 clear: Timer source is 19.2MHz crystal (vs. APB).
+ */
+ ldr x0, =LOCAL_CONTROL
+ str wzr, [x0]
+ /* LOCAL_PRESCALER; divide-by (0x80000000 / register_val) == 1 */
+ mov w1, 0x80000000
+ str w1, [x0, #(LOCAL_PRESCALER - LOCAL_CONTROL)]
+
+ /* Set L2 read/write cache latency to 3 */
+ mrs x0, L2CTLR_EL1
+ mov x1, #0x22
+ orr x0, x0, x1
+ msr L2CTLR_EL1, x0
+
+ /* Set up CNTFRQ_EL0 */
+ ldr x0, =OSC_FREQ
+ msr CNTFRQ_EL0, x0
+
+ /* Set up CNTVOFF_EL2 */
+ msr CNTVOFF_EL2, xzr
+
+ /* Enable FP/SIMD */
+ /* All set bits below are res1; bit 10 (TFP) is set to 0 */
+ mov x0, #0x33ff
+ msr CPTR_EL3, x0
+
+ /* Set up SCR */
+ mov x0, #SCR_VAL
+ msr SCR_EL3, x0
+
+ /* Set up ACTLR */
+ mov x0, #ACTLR_VAL
+ msr ACTLR_EL3, x0
+
+ /* Set SMPEN */
+ mov x0, #CPUECTLR_EL1_SMPEN
+ msr CPUECTLR_EL1, x0
+
+#ifdef GIC
+ bl setup_gic
+#endif
+ /*
+ * Set up SCTLR_EL2
+ * All set bits below are res1. LE, no WXN/I/SA/C/A/M
+ */
+ ldr x0, =0x30c50830
+ msr SCTLR_EL2, x0
+
+ /* Switch to EL2 */
+ mov x0, #SPSR_EL3_VAL
+ msr spsr_el3, x0
+ adr x0, in_el2
+ msr elr_el3, x0
+ eret
+in_el2:
+
+ mrs x6, MPIDR_EL1
+ and x6, x6, #0x3
+ cbz x6, primary_cpu
+
+ adr x5, spin_cpu0
+secondary_spin:
+ wfe
+ ldr x4, [x5, x6, lsl #3]
+ cbz x4, secondary_spin
+ mov x0, #0
+ b boot_kernel
+
+primary_cpu:
+ ldr w4, kernel_entry32
+ ldr w0, dtb_ptr32
+
+boot_kernel:
+ mov x1, #0
+ mov x2, #0
+ mov x3, #0
+ br x4
+
+.ltorg
+
+.org 0xd8
+.globl spin_cpu0
+spin_cpu0:
+ .quad 0
+.org 0xe0
+.globl spin_cpu1
+spin_cpu1:
+ .quad 0
+.org 0xe8
+.globl spin_cpu2
+spin_cpu2:
+ .quad 0
+.org 0xf0
+.globl spin_cpu3
+spin_cpu3:
+ # Shared with next two symbols/.word
+ # FW clears the next 8 bytes after reading the initial value, leaving
+ # the location suitable for use as spin_cpu3
+.org 0xf0
+.globl stub_magic
+stub_magic:
+ .word 0x5afe570b
+.org 0xf4
+.globl stub_version
+stub_version:
+ .word 0
+.org 0xf8
+.globl dtb_ptr32
+dtb_ptr32:
+ .word 0x0
+.org 0xfc
+.globl kernel_entry32
+kernel_entry32:
+ .word 0x0
+
+// Leave space for the ATAGS, which are loaded at 0x100
+// See https://www.raspberrypi.org/forums/viewtopic.php?f=72&t=293320
+.org 0x400
+
+#ifdef GIC
+
+setup_gic: // Called from secure mode - set all interrupts to group 1 and enable.
+ mrs x0, MPIDR_EL1
+ ldr x2, =GIC_DISTB
+ tst x0, #0x3
+ b.ne 2f // secondary cores
+
+ mov w0, #3 // Enable group 0 and 1 IRQs from distributor
+ str w0, [x2, #GICD_CTRLR]
+2:
+ add x1, x2, #(GIC_CPUB - GIC_DISTB)
+ mov w0, #0x1e7
+ str w0, [x1, #GICC_CTRLR] // Enable group 1 IRQs from CPU interface
+ mov w0, #0xff
+ str w0, [x1, #GICC_PMR] // priority mask
+ add x2, x2, #GICD_IGROUPR
+ mov x0, #(IT_NR * 4)
+ mov w1, #~0 // group 1 all the things
+3:
+ subs x0, x0, #4
+ str w1, [x2, x0]
+ b.ne 3b
+ ret
+
+#endif
+
+.globl dtb_space
+dtb_space:
diff --git a/boot/bootcode.bin b/boot/bootcode.bin
new file mode 100644
index 0000000..c7ec95e
Binary files /dev/null and b/boot/bootcode.bin differ
diff --git a/boot/config.txt b/boot/config.txt
new file mode 100644
index 0000000..9a1ce2d
--- /dev/null
+++ b/boot/config.txt
@@ -0,0 +1,9 @@
+arm_64bit=1
+enable_uart=1
+# disable_l2cache=1
+
+[pi4]
+device_tree=
+enable_gic=0
+core_freq_min=250
+armstub=armstub8-rpi4.bin
diff --git a/boot/fixup.dat b/boot/fixup.dat
new file mode 100644
index 0000000..5018e69
Binary files /dev/null and b/boot/fixup.dat differ
diff --git a/boot/fixup4.dat b/boot/fixup4.dat
new file mode 100644
index 0000000..d5fdacd
Binary files /dev/null and b/boot/fixup4.dat differ
diff --git a/boot/fixup4cd.dat b/boot/fixup4cd.dat
new file mode 100644
index 0000000..b55ac8d
Binary files /dev/null and b/boot/fixup4cd.dat differ
diff --git a/boot/fixup_cd.dat b/boot/fixup_cd.dat
new file mode 100644
index 0000000..b55ac8d
Binary files /dev/null and b/boot/fixup_cd.dat differ
diff --git a/boot/generate-image.py b/boot/generate-image.py
new file mode 100755
index 0000000..6870d63
--- /dev/null
+++ b/boot/generate-image.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+
+# TODO: generate filesystem image.
+
+from os import system
+from pathlib import Path
+from argparse import ArgumentParser
+
+def sh(command):
+ print(f'> {command}')
+ assert system(command) == 0
+
+sector_size = 512
+n_sectors = 256 * 1024
+boot_offset = 2048
+n_boot_sectors = 128 * 1024
+filesystem_offset = boot_offset + n_boot_sectors
+n_filesystem_sectors = n_sectors - filesystem_offset
+
+def generate_boot_image(target, files):
+ sh(f'dd if=/dev/zero of={target} seek={n_boot_sectors - 1} bs={sector_size} count=1')
+
+ # "-F 32" specifies FAT32.
+ # "-s 1" specifies one sector per cluster so that we can create a smaller one.
+ sh(f'mkfs.vfat -F 32 -s 1 {target}')
+
+ # copy files into boot partition.
+ for file in files:
+ sh(f'mcopy -i {target} {file} ::{Path(file).name};')
+
+def generate_fs_image(target, files):
+ sh(f'cc ../src/user/mkfs/main.c -o ../build/mkfs -I../src/')
+ file_list=""
+ for file in files:
+ file_list = file_list + "../build/src/user/" + str(file) + ' '
+ print(file_list)
+ sh(f'../build/mkfs {target} {file_list}')
+
+def generate_sd_image(target, boot_image, fs_image):
+ sh(f'dd if=/dev/zero of={target} seek={n_sectors - 1} bs={sector_size} count=1')
+
+ boot_line = f'{boot_offset}, {n_boot_sectors * sector_size // 1024}K, c,'
+ filesystem_line = f'{filesystem_offset}, {n_filesystem_sectors * sector_size // 1024}K, L,'
+ sh(f'printf "{boot_line}\\n{filesystem_line}\\n" | sfdisk {target}')
+
+ sh(f'dd if={boot_image} of={target} seek={boot_offset} conv=notrunc')
+ sh(f'dd if={fs_image} of={target} seek={filesystem_offset} conv=notrunc')
+
+if __name__ == '__main__':
+ parser = ArgumentParser()
+ parser.add_argument('root')
+ parser.add_argument('files', nargs=14)
+ parser.add_argument('user_files', nargs='*')
+
+ args = parser.parse_args()
+
+ boot_image = f'{args.root}/boot.img'
+ sd_image = f'{args.root}/sd.img'
+ fs_image = f'{args.root}/fs.img'
+
+ generate_boot_image(boot_image, args.files)
+ generate_fs_image(fs_image, args.user_files)
+ generate_sd_image(sd_image, boot_image, fs_image)
diff --git a/boot/start.elf b/boot/start.elf
new file mode 100644
index 0000000..3477828
Binary files /dev/null and b/boot/start.elf differ
diff --git a/boot/start4.elf b/boot/start4.elf
new file mode 100644
index 0000000..2a19058
Binary files /dev/null and b/boot/start4.elf differ
diff --git a/boot/start4cd.elf b/boot/start4cd.elf
new file mode 100644
index 0000000..151e8cf
Binary files /dev/null and b/boot/start4cd.elf differ
diff --git a/boot/start_cd.elf b/boot/start_cd.elf
new file mode 100644
index 0000000..7652305
Binary files /dev/null and b/boot/start_cd.elf differ
diff --git a/musl/.gitignore b/musl/.gitignore
new file mode 100644
index 0000000..8043b6b
--- /dev/null
+++ b/musl/.gitignore
@@ -0,0 +1,8 @@
+*.o
+*.lo
+*.a
+*.so
+*.so.1
+config.mak
+lib/musl-gcc.specs
+/obj/
diff --git a/musl/.mailmap b/musl/.mailmap
new file mode 100644
index 0000000..aede9ec
--- /dev/null
+++ b/musl/.mailmap
@@ -0,0 +1 @@
+Ada Worcester
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..1471efa
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,69 @@
+set(CMAKE_C_STANDARD 11)
+
+set(CMAKE_C_COMPILER ${aarch64_gcc})
+set(CMAKE_ASM_COMPILER ${aarch64_gcc})
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../musl/obj/include)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../musl/include)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../musl/arch/aarch64)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../musl/arch/generic)
+
+set(compiler_warnings "-Werror -Wall -Wextra")
+set(compiler_flags "${compiler_warnings} \
+ -fno-pie -fno-pic -fno-stack-protector \
+ -fno-zero-initialized-in-bss \
+ -Og -g -static -fno-builtin -nostdlib -nostdinc -ffreestanding -nostartfiles \
+ -Wl,--whole-archive \
+ -mgeneral-regs-only \
+ -MMD -MP \
+ -mlittle-endian -mcmodel=small -mno-outline-atomics \
+ -mcpu=cortex-a72+nofp -mtune=cortex-a72 -DUSE_ARMVIRT -Wno-error=unused-parameter")
+
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${compiler_flags}")
+set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${compiler_flags}")
+
+set(linker_script "${CMAKE_CURRENT_SOURCE_DIR}/linker.ld")
+set(LINK_DEPENDS "${LINK_DEPENDS} ${linker_script}")
+
+# "--build-id=none": remove ".note.gnu.build-id" section.
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} \
+ -T ${linker_script} \
+ -Wl,--build-id=none")
+
+add_subdirectory(aarch64)
+add_subdirectory(common)
+add_subdirectory(kernel)
+add_subdirectory(driver)
+add_subdirectory(user)
+add_subdirectory(test)
+
+set(kernel_name kernel8)
+add_executable(${kernel_name}.elf main.c start.S)
+target_link_libraries(${kernel_name}.elf test kernelx driver common aarch64 user)
+
+set(kernel_prefix "${CMAKE_CURRENT_BINARY_DIR}/${kernel_name}")
+set(kernel_elf "${kernel_prefix}.elf")
+set(kernel_asm "${kernel_prefix}.asm")
+set(kernel_hdr "${kernel_prefix}.hdr")
+set(kernel_img "${kernel_prefix}.img")
+
+add_custom_command(
+ OUTPUT ${kernel_asm}
+ COMMAND ${aarch64_objdump} -S -d ${kernel_elf} > ${kernel_asm}
+ DEPENDS ${kernel_elf})
+
+add_custom_command(
+ OUTPUT ${kernel_hdr}
+ COMMAND ${aarch64_objdump} -x ${kernel_elf} > ${kernel_hdr}
+ DEPENDS ${kernel_elf})
+
+add_custom_command(
+ OUTPUT ${kernel_img}
+ COMMAND ${aarch64_objcopy} -O binary ${kernel_elf} ${kernel_img}
+ DEPENDS ${kernel_elf})
+
+add_custom_target(kernel ALL DEPENDS ${kernel_asm} ${kernel_hdr} ${kernel_img})
+
+set_property(GLOBAL PROPERTY kernel_elf_path ${kernel_elf})
+set_property(GLOBAL PROPERTY kernel_image_path ${kernel_img})
diff --git a/src/aarch64/CMakeLists.txt b/src/aarch64/CMakeLists.txt
new file mode 100644
index 0000000..51675be
--- /dev/null
+++ b/src/aarch64/CMakeLists.txt
@@ -0,0 +1,3 @@
+file(GLOB aarch64_sources CONFIGURE_DEPENDS "*.[Sc]")
+
+add_library(aarch64 STATIC ${aarch64_sources})
diff --git a/src/aarch64/exception_vector.S b/src/aarch64/exception_vector.S
new file mode 100644
index 0000000..d95ea07
--- /dev/null
+++ b/src/aarch64/exception_vector.S
@@ -0,0 +1,33 @@
+#define enter_trap .align 7; b trap_entry
+#define trap_error(type) .align 7; mov x0, #(type); b trap_error_handler
+
+.globl exception_vector
+
+.align 12
+exception_vector:
+el1_sp0:
+ trap_error(0)
+ trap_error(1)
+ trap_error(2)
+ trap_error(3)
+
+el1_spx:
+ /* if you want to disable in-kernel traps, just replace `enter_trap` with `trap_error` */
+ //trap_error(4)
+ //trap_error(5)
+ enter_trap
+ enter_trap
+ trap_error(6)
+ trap_error(7)
+
+el0_aarch64:
+ enter_trap
+ enter_trap
+ trap_error(10)
+ trap_error(11)
+
+el0_aarch32:
+ trap_error(12)
+ trap_error(13)
+ trap_error(14)
+ trap_error(15)
diff --git a/src/aarch64/intrinsic.c b/src/aarch64/intrinsic.c
new file mode 100644
index 0000000..21a469d
--- /dev/null
+++ b/src/aarch64/intrinsic.c
@@ -0,0 +1,19 @@
+#include
+
+void delay_us(u64 n)
+{
+ u64 freq = get_clock_frequency();
+ u64 end = get_timestamp(), now;
+ end += freq / 1000000 * n;
+
+ do {
+ now = get_timestamp();
+ } while (now <= end);
+}
+
+void smp_init()
+{
+ psci_cpu_on(1, SECONDARY_CORE_ENTRY);
+ psci_cpu_on(2, SECONDARY_CORE_ENTRY);
+ psci_cpu_on(3, SECONDARY_CORE_ENTRY);
+}
diff --git a/src/aarch64/intrinsic.h b/src/aarch64/intrinsic.h
new file mode 100755
index 0000000..7affd57
--- /dev/null
+++ b/src/aarch64/intrinsic.h
@@ -0,0 +1,333 @@
+#pragma once
+
+#include
+
+#define SECONDARY_CORE_ENTRY 0x40000000
+#define PSCI_SYSTEM_OFF 0x84000008
+#define PSCI_SYSTEM_RESET 0x84000009
+#define PSCI_SYSTEM_CPUON 0xC4000003
+
+/**
+ * PSCI (Power State Coordination Interface) function on QEMU's virt platform
+ * -------------------------------------------------------------------------
+ * This function provides an interface to interact with the PSCI (Power State
+ * Coordination Interface) on ARM architectures, which is particularly useful
+ * in virtualized environments like QEMU's virt platform.
+ *
+ * Background:
+ * PSCI is an ARM-defined interface that allows software running at the highest
+ * privilege level (typically a hypervisor or OS kernel) to manage power states
+ * of CPUs. It includes operations to turn CPUs on or off, put them into a low
+ * power state, or reset them.
+ *
+ * In a virtualized environment, such as when using QEMU with the virt machine
+ * type, the PSCI interface can be used to control the power states of virtual
+ * CPUs (vCPUs). This is essential for operations like starting a secondary
+ * vCPU or putting a vCPU into a suspend state.
+ */
+static ALWAYS_INLINE u64 psci_fn(u64 id, u64 arg1, u64 arg2, u64 arg3)
+{
+ u64 result;
+
+ asm volatile("mov x0, %1\n"
+ "mov x1, %2\n"
+ "mov x2, %3\n"
+ "mov x3, %4\n"
+ "hvc #0\n"
+ "mov %0, x0\n"
+ : "=r"(result)
+ : "r"(id), "r"(arg1), "r"(arg2), "r"(arg3)
+ : "x0", "x1", "x2", "x3");
+
+ return result;
+}
+
+static ALWAYS_INLINE u64 psci_cpu_on(u64 cpuid, u64 ep)
+{
+ return psci_fn(PSCI_SYSTEM_CPUON, cpuid, ep, 0);
+}
+
+static WARN_RESULT ALWAYS_INLINE usize cpuid()
+{
+ u64 id;
+ asm volatile("mrs %[x], mpidr_el1" : [x] "=r"(id));
+ return id & 0xff;
+}
+
+/* Instruct compiler not to reorder instructions around the fence. */
+static ALWAYS_INLINE void compiler_fence()
+{
+ asm volatile("" ::: "memory");
+}
+
+static WARN_RESULT ALWAYS_INLINE u64 get_clock_frequency()
+{
+ u64 result;
+ asm volatile("mrs %[freq], cntfrq_el0" : [freq] "=r"(result));
+ return result;
+}
+
+static WARN_RESULT ALWAYS_INLINE u64 get_timestamp()
+{
+ u64 result;
+ compiler_fence();
+ asm volatile("mrs %[cnt], cntpct_el0" : [cnt] "=r"(result));
+ compiler_fence();
+ return result;
+}
+
+/* Instruction synchronization barrier. */
+static ALWAYS_INLINE void arch_isb()
+{
+ asm volatile("isb" ::: "memory");
+}
+
+/* Data synchronization barrier. */
+static ALWAYS_INLINE void arch_dsb_sy()
+{
+ asm volatile("dsb sy" ::: "memory");
+}
+
+static ALWAYS_INLINE void arch_fence()
+{
+ arch_dsb_sy();
+ arch_isb();
+}
+
+/**
+ * The `device_get/put_*` functions do not require protection using
+ * architectural barriers. This is because they are specifically
+ * designed to access device memory regions, which are already marked as
+ * nGnRnE (Non-Gathering, Non-Reordering, on-Early Write Acknowledgement)
+ * in the `kernel_pt_level0`.
+ */
+static ALWAYS_INLINE void device_put_u32(u64 addr, u32 value)
+{
+ compiler_fence();
+ *(volatile u32 *)addr = value;
+ compiler_fence();
+}
+
+static WARN_RESULT ALWAYS_INLINE u32 device_get_u32(u64 addr)
+{
+ compiler_fence();
+ u32 value = *(volatile u32 *)addr;
+ compiler_fence();
+ return value;
+}
+
+/* Read Exception Syndrome Register (EL1). */
+static WARN_RESULT ALWAYS_INLINE u64 arch_get_esr()
+{
+ u64 result;
+ arch_fence();
+ asm volatile("mrs %[x], esr_el1" : [x] "=r"(result));
+ arch_fence();
+ return result;
+}
+
+/* Reset Exception Syndrome Register (EL1) to zero. */
+static ALWAYS_INLINE void arch_reset_esr()
+{
+ arch_fence();
+ asm volatile("msr esr_el1, %[x]" : : [x] "r"(0ll));
+ arch_fence();
+}
+
+/* Read Exception Link Register (EL1). */
+static WARN_RESULT ALWAYS_INLINE u64 arch_get_elr()
+{
+ u64 result;
+ arch_fence();
+ asm volatile("mrs %[x], elr_el1" : [x] "=r"(result));
+ arch_fence();
+ return result;
+}
+
+/* Set vector base (virtual) address register (EL1). */
+static ALWAYS_INLINE void arch_set_vbar(void *ptr)
+{
+ arch_fence();
+ asm volatile("msr vbar_el1, %[x]" : : [x] "r"(ptr));
+ arch_fence();
+}
+
+/* Flush TLB entries. */
+static ALWAYS_INLINE void arch_tlbi_vmalle1is()
+{
+ arch_fence();
+ asm volatile("tlbi vmalle1is");
+ arch_fence();
+}
+
+/* Set Translation Table Base Register 0 (EL1). */
+static ALWAYS_INLINE void arch_set_ttbr0(u64 addr)
+{
+ arch_fence();
+ asm volatile("msr ttbr0_el1, %[x]" : : [x] "r"(addr));
+ arch_tlbi_vmalle1is();
+}
+
+/* Get Translation Table Base Register 0 (EL1). */
+static inline WARN_RESULT u64 arch_get_ttbr0()
+{
+ u64 result;
+ arch_fence();
+ asm volatile("mrs %[x], ttbr0_el1" : [x] "=r"(result));
+ arch_fence();
+ return result;
+}
+
+/* Set Translation Table Base Register 1 (EL1). */
+static ALWAYS_INLINE void arch_set_ttbr1(u64 addr)
+{
+ arch_fence();
+ asm volatile("msr ttbr1_el1, %[x]" : : [x] "r"(addr));
+ arch_tlbi_vmalle1is();
+}
+
+/* Read Fault Address Register. */
+static inline u64 arch_get_far()
+{
+ u64 result;
+ arch_fence();
+ asm volatile("mrs %[x], far_el1" : [x] "=r"(result));
+ arch_fence();
+ return result;
+}
+
+static inline WARN_RESULT u64 arch_get_tid()
+{
+ u64 tid;
+ asm volatile("mrs %[x], tpidr_el1" : [x] "=r"(tid));
+ return tid;
+}
+
+static inline void arch_set_tid(u64 tid)
+{
+ arch_fence();
+ asm volatile("msr tpidr_el1, %[x]" : : [x] "r"(tid));
+ arch_fence();
+}
+
+/* Get User Stack Pointer. */
+static inline WARN_RESULT u64 arch_get_usp()
+{
+ u64 usp;
+ arch_fence();
+ asm volatile("mrs %[x], sp_el0" : [x] "=r"(usp));
+ arch_fence();
+ return usp;
+}
+
+/* Set User Stack Pointer. */
+static inline void arch_set_usp(u64 usp)
+{
+ arch_fence();
+ asm volatile("msr sp_el0, %[x]" : : [x] "r"(usp));
+ arch_fence();
+}
+
+static inline WARN_RESULT u64 arch_get_tid0()
+{
+ u64 tid;
+ asm volatile("mrs %[x], tpidr_el0" : [x] "=r"(tid));
+ return tid;
+}
+
+static inline void arch_set_tid0(u64 tid)
+{
+ arch_fence();
+ asm volatile("msr tpidr_el0, %[x]" : : [x] "r"(tid));
+ arch_fence();
+}
+
+static ALWAYS_INLINE void arch_sev()
+{
+ asm volatile("sev" ::: "memory");
+}
+
+static ALWAYS_INLINE void arch_wfe()
+{
+ asm volatile("wfe" ::: "memory");
+}
+
+static ALWAYS_INLINE void arch_wfi()
+{
+ asm volatile("wfi" ::: "memory");
+}
+
+static ALWAYS_INLINE void arch_yield()
+{
+ asm volatile("yield" ::: "memory");
+}
+
+static ALWAYS_INLINE u64 get_cntv_ctl_el0()
+{
+ u64 c;
+ asm volatile("mrs %0, cntv_ctl_el0" : "=r"(c));
+ return c;
+}
+
+static ALWAYS_INLINE void set_cntv_ctl_el0(u64 c)
+{
+ asm volatile("msr cntv_ctl_el0, %0" : : "r"(c));
+}
+
+static ALWAYS_INLINE void set_cntv_tval_el0(u64 t)
+{
+ asm volatile("msr cntv_tval_el0, %0" : : "r"(t));
+}
+
+static inline WARN_RESULT bool _arch_enable_trap()
+{
+ u64 t;
+ asm volatile("mrs %[x], daif" : [x] "=r"(t));
+ if (t == 0)
+ return true;
+ asm volatile("msr daif, %[x]" ::[x] "r"(0ll));
+ return false;
+}
+
+static inline WARN_RESULT bool _arch_disable_trap()
+{
+ u64 t;
+ asm volatile("mrs %[x], daif" : [x] "=r"(t));
+ if (t != 0)
+ return false;
+ asm volatile("msr daif, %[x]" ::[x] "r"(0xfll << 6));
+ return true;
+}
+
+#define arch_with_trap \
+ for (int __t_e = _arch_enable_trap(), __t_i = 0; __t_i < 1; \
+ __t_i++, __t_e || _arch_disable_trap())
+
+static ALWAYS_INLINE NO_RETURN void arch_stop_cpu()
+{
+ while (1)
+ arch_wfe();
+}
+
+#define set_return_addr(addr) \
+ (compiler_fence(), \
+ ((volatile u64 *)__builtin_frame_address(0))[1] = (u64)(addr), \
+ compiler_fence())
+
+void delay_us(u64 n);
+u64 psci_cpu_on(u64 cpuid, u64 ep);
+void smp_init();
+
+static inline u64 arch_get_currentel()
+{
+ u64 result;
+ asm volatile("mrs %[x], CurrentEL" : [x] "=r"(result));
+ return result; // bits[3:2] indicate EL (e.g., 0x4 for EL1, 0x0 for EL0)
+}
+
+static inline u64 arch_get_sctlr()
+{
+ u64 result;
+ asm volatile("mrs %[x], sctlr_el1" : [x] "=r"(result));
+ return result;
+}
\ No newline at end of file
diff --git a/src/aarch64/kernel_pt.c b/src/aarch64/kernel_pt.c
new file mode 100644
index 0000000..4e164a9
--- /dev/null
+++ b/src/aarch64/kernel_pt.c
@@ -0,0 +1,394 @@
+#include
+
+/**
+ * The layout of physical memory space of virt:
+ *
+ * 0..128MB is space for a flash device so we can run bootrom code such as UEFI.
+ * 128MB..256MB is used for miscellaneous device I/O.
+ * 256MB..1GB is reserved for possible future PCI support (ie where the
+ * PCI memory window will go if we add a PCI host controller).
+ * 1GB and up is RAM (which may happily spill over into the
+ * high memory region beyond 4GB).
+ * This represents a compromise between how much RAM can be given to
+ * a 32 bit VM and leaving space for expansion and in particular for PCI.
+ * Note that devices should generally be placed at multiples of 0x10000,
+ * to accommodate guests using 64K pages.
+ */
+
+/**
+ * Bits used to index: 29:21
+ * Size per entry: 2MB
+ * Address Span: [0x0, 0x40000000 (1GB)]
+ */
+__attribute__((__aligned__(PAGE_SIZE))) PTEntries _kernel_pt_lv2_dev = {
+ // Space up to 0x8000000 is reserved for a boot ROM
+ 0x0 & ~PTE_VALID,
+ 0x200000 & ~PTE_VALID,
+ 0x400000 & ~PTE_VALID,
+ 0x600000 & ~PTE_VALID,
+ 0x800000 & ~PTE_VALID,
+ 0xa00000 & ~PTE_VALID,
+ 0xc00000 & ~PTE_VALID,
+ 0xe00000 & ~PTE_VALID,
+ 0x1000000 & ~PTE_VALID,
+ 0x1200000 & ~PTE_VALID,
+ 0x1400000 & ~PTE_VALID,
+ 0x1600000 & ~PTE_VALID,
+ 0x1800000 & ~PTE_VALID,
+ 0x1a00000 & ~PTE_VALID,
+ 0x1c00000 & ~PTE_VALID,
+ 0x1e00000 & ~PTE_VALID,
+ 0x2000000 & ~PTE_VALID,
+ 0x2200000 & ~PTE_VALID,
+ 0x2400000 & ~PTE_VALID,
+ 0x2600000 & ~PTE_VALID,
+ 0x2800000 & ~PTE_VALID,
+ 0x2a00000 & ~PTE_VALID,
+ 0x2c00000 & ~PTE_VALID,
+ 0x2e00000 & ~PTE_VALID,
+ 0x3000000 & ~PTE_VALID,
+ 0x3200000 & ~PTE_VALID,
+ 0x3400000 & ~PTE_VALID,
+ 0x3600000 & ~PTE_VALID,
+ 0x3800000 & ~PTE_VALID,
+ 0x3a00000 & ~PTE_VALID,
+ 0x3c00000 & ~PTE_VALID,
+ 0x3e00000 & ~PTE_VALID,
+ 0x4000000 & ~PTE_VALID,
+ 0x4200000 & ~PTE_VALID,
+ 0x4400000 & ~PTE_VALID,
+ 0x4600000 & ~PTE_VALID,
+ 0x4800000 & ~PTE_VALID,
+ 0x4a00000 & ~PTE_VALID,
+ 0x4c00000 & ~PTE_VALID,
+ 0x4e00000 & ~PTE_VALID,
+ 0x5000000 & ~PTE_VALID,
+ 0x5200000 & ~PTE_VALID,
+ 0x5400000 & ~PTE_VALID,
+ 0x5600000 & ~PTE_VALID,
+ 0x5800000 & ~PTE_VALID,
+ 0x5a00000 & ~PTE_VALID,
+ 0x5c00000 & ~PTE_VALID,
+ 0x5e00000 & ~PTE_VALID,
+ 0x6000000 & ~PTE_VALID,
+ 0x6200000 & ~PTE_VALID,
+ 0x6400000 & ~PTE_VALID,
+ 0x6600000 & ~PTE_VALID,
+ 0x6800000 & ~PTE_VALID,
+ 0x6a00000 & ~PTE_VALID,
+ 0x6c00000 & ~PTE_VALID,
+ 0x6e00000 & ~PTE_VALID,
+ 0x7000000 & ~PTE_VALID,
+ 0x7200000 & ~PTE_VALID,
+ 0x7400000 & ~PTE_VALID,
+ 0x7600000 & ~PTE_VALID,
+ 0x7800000 & ~PTE_VALID,
+ 0x7a00000 & ~PTE_VALID,
+ 0x7c00000 & ~PTE_VALID,
+ 0x7e00000 & ~PTE_VALID,
+
+ // GIC
+ 0x8000000 | PTE_KERNEL_DEVICE,
+ 0x8200000 | PTE_KERNEL_DEVICE,
+ 0x8400000 | PTE_KERNEL_DEVICE,
+ 0x8600000 | PTE_KERNEL_DEVICE,
+ 0x8800000 | PTE_KERNEL_DEVICE,
+ 0x8a00000 | PTE_KERNEL_DEVICE,
+ 0x8c00000 | PTE_KERNEL_DEVICE,
+ 0x8e00000 | PTE_KERNEL_DEVICE,
+
+ // UART0
+ 0x9000000 | PTE_KERNEL_DEVICE,
+ 0x9200000 | PTE_KERNEL_DEVICE,
+ 0x9400000 | PTE_KERNEL_DEVICE,
+ 0x9600000 | PTE_KERNEL_DEVICE,
+ 0x9800000 | PTE_KERNEL_DEVICE,
+ 0x9a00000 | PTE_KERNEL_DEVICE,
+ 0x9c00000 | PTE_KERNEL_DEVICE,
+ 0x9e00000 | PTE_KERNEL_DEVICE,
+
+ // VIRTIO
+ 0xa000000 | PTE_KERNEL_DEVICE,
+ 0xa200000 | PTE_KERNEL_DEVICE,
+ 0xa400000 | PTE_KERNEL_DEVICE,
+ 0xa600000 | PTE_KERNEL_DEVICE,
+ 0xa800000 | PTE_KERNEL_DEVICE,
+ 0xaa00000 | PTE_KERNEL_DEVICE,
+ 0xac00000 | PTE_KERNEL_DEVICE,
+ 0xae00000 | PTE_KERNEL_DEVICE,
+};
+
+/**
+ * Bits used to index: 29:21
+ * Size per entry: 2MB
+ * Address Span: [0x40000000 (1GB), 0x80000000 (2GB)]
+ */
+__attribute__((__aligned__(PAGE_SIZE))) PTEntries _kernel_pt_lv2_ram = {
+ 0x40000000 | PTE_KERNEL_DATA, 0x40200000 | PTE_KERNEL_DATA,
+ 0x40400000 | PTE_KERNEL_DATA, 0x40600000 | PTE_KERNEL_DATA,
+ 0x40800000 | PTE_KERNEL_DATA, 0x40a00000 | PTE_KERNEL_DATA,
+ 0x40c00000 | PTE_KERNEL_DATA, 0x40e00000 | PTE_KERNEL_DATA,
+ 0x41000000 | PTE_KERNEL_DATA, 0x41200000 | PTE_KERNEL_DATA,
+ 0x41400000 | PTE_KERNEL_DATA, 0x41600000 | PTE_KERNEL_DATA,
+ 0x41800000 | PTE_KERNEL_DATA, 0x41a00000 | PTE_KERNEL_DATA,
+ 0x41c00000 | PTE_KERNEL_DATA, 0x41e00000 | PTE_KERNEL_DATA,
+ 0x42000000 | PTE_KERNEL_DATA, 0x42200000 | PTE_KERNEL_DATA,
+ 0x42400000 | PTE_KERNEL_DATA, 0x42600000 | PTE_KERNEL_DATA,
+ 0x42800000 | PTE_KERNEL_DATA, 0x42a00000 | PTE_KERNEL_DATA,
+ 0x42c00000 | PTE_KERNEL_DATA, 0x42e00000 | PTE_KERNEL_DATA,
+ 0x43000000 | PTE_KERNEL_DATA, 0x43200000 | PTE_KERNEL_DATA,
+ 0x43400000 | PTE_KERNEL_DATA, 0x43600000 | PTE_KERNEL_DATA,
+ 0x43800000 | PTE_KERNEL_DATA, 0x43a00000 | PTE_KERNEL_DATA,
+ 0x43c00000 | PTE_KERNEL_DATA, 0x43e00000 | PTE_KERNEL_DATA,
+ 0x44000000 | PTE_KERNEL_DATA, 0x44200000 | PTE_KERNEL_DATA,
+ 0x44400000 | PTE_KERNEL_DATA, 0x44600000 | PTE_KERNEL_DATA,
+ 0x44800000 | PTE_KERNEL_DATA, 0x44a00000 | PTE_KERNEL_DATA,
+ 0x44c00000 | PTE_KERNEL_DATA, 0x44e00000 | PTE_KERNEL_DATA,
+ 0x45000000 | PTE_KERNEL_DATA, 0x45200000 | PTE_KERNEL_DATA,
+ 0x45400000 | PTE_KERNEL_DATA, 0x45600000 | PTE_KERNEL_DATA,
+ 0x45800000 | PTE_KERNEL_DATA, 0x45a00000 | PTE_KERNEL_DATA,
+ 0x45c00000 | PTE_KERNEL_DATA, 0x45e00000 | PTE_KERNEL_DATA,
+ 0x46000000 | PTE_KERNEL_DATA, 0x46200000 | PTE_KERNEL_DATA,
+ 0x46400000 | PTE_KERNEL_DATA, 0x46600000 | PTE_KERNEL_DATA,
+ 0x46800000 | PTE_KERNEL_DATA, 0x46a00000 | PTE_KERNEL_DATA,
+ 0x46c00000 | PTE_KERNEL_DATA, 0x46e00000 | PTE_KERNEL_DATA,
+ 0x47000000 | PTE_KERNEL_DATA, 0x47200000 | PTE_KERNEL_DATA,
+ 0x47400000 | PTE_KERNEL_DATA, 0x47600000 | PTE_KERNEL_DATA,
+ 0x47800000 | PTE_KERNEL_DATA, 0x47a00000 | PTE_KERNEL_DATA,
+ 0x47c00000 | PTE_KERNEL_DATA, 0x47e00000 | PTE_KERNEL_DATA,
+ 0x48000000 | PTE_KERNEL_DATA, 0x48200000 | PTE_KERNEL_DATA,
+ 0x48400000 | PTE_KERNEL_DATA, 0x48600000 | PTE_KERNEL_DATA,
+ 0x48800000 | PTE_KERNEL_DATA, 0x48a00000 | PTE_KERNEL_DATA,
+ 0x48c00000 | PTE_KERNEL_DATA, 0x48e00000 | PTE_KERNEL_DATA,
+ 0x49000000 | PTE_KERNEL_DATA, 0x49200000 | PTE_KERNEL_DATA,
+ 0x49400000 | PTE_KERNEL_DATA, 0x49600000 | PTE_KERNEL_DATA,
+ 0x49800000 | PTE_KERNEL_DATA, 0x49a00000 | PTE_KERNEL_DATA,
+ 0x49c00000 | PTE_KERNEL_DATA, 0x49e00000 | PTE_KERNEL_DATA,
+ 0x4a000000 | PTE_KERNEL_DATA, 0x4a200000 | PTE_KERNEL_DATA,
+ 0x4a400000 | PTE_KERNEL_DATA, 0x4a600000 | PTE_KERNEL_DATA,
+ 0x4a800000 | PTE_KERNEL_DATA, 0x4aa00000 | PTE_KERNEL_DATA,
+ 0x4ac00000 | PTE_KERNEL_DATA, 0x4ae00000 | PTE_KERNEL_DATA,
+ 0x4b000000 | PTE_KERNEL_DATA, 0x4b200000 | PTE_KERNEL_DATA,
+ 0x4b400000 | PTE_KERNEL_DATA, 0x4b600000 | PTE_KERNEL_DATA,
+ 0x4b800000 | PTE_KERNEL_DATA, 0x4ba00000 | PTE_KERNEL_DATA,
+ 0x4bc00000 | PTE_KERNEL_DATA, 0x4be00000 | PTE_KERNEL_DATA,
+ 0x4c000000 | PTE_KERNEL_DATA, 0x4c200000 | PTE_KERNEL_DATA,
+ 0x4c400000 | PTE_KERNEL_DATA, 0x4c600000 | PTE_KERNEL_DATA,
+ 0x4c800000 | PTE_KERNEL_DATA, 0x4ca00000 | PTE_KERNEL_DATA,
+ 0x4cc00000 | PTE_KERNEL_DATA, 0x4ce00000 | PTE_KERNEL_DATA,
+ 0x4d000000 | PTE_KERNEL_DATA, 0x4d200000 | PTE_KERNEL_DATA,
+ 0x4d400000 | PTE_KERNEL_DATA, 0x4d600000 | PTE_KERNEL_DATA,
+ 0x4d800000 | PTE_KERNEL_DATA, 0x4da00000 | PTE_KERNEL_DATA,
+ 0x4dc00000 | PTE_KERNEL_DATA, 0x4de00000 | PTE_KERNEL_DATA,
+ 0x4e000000 | PTE_KERNEL_DATA, 0x4e200000 | PTE_KERNEL_DATA,
+ 0x4e400000 | PTE_KERNEL_DATA, 0x4e600000 | PTE_KERNEL_DATA,
+ 0x4e800000 | PTE_KERNEL_DATA, 0x4ea00000 | PTE_KERNEL_DATA,
+ 0x4ec00000 | PTE_KERNEL_DATA, 0x4ee00000 | PTE_KERNEL_DATA,
+ 0x4f000000 | PTE_KERNEL_DATA, 0x4f200000 | PTE_KERNEL_DATA,
+ 0x4f400000 | PTE_KERNEL_DATA, 0x4f600000 | PTE_KERNEL_DATA,
+ 0x4f800000 | PTE_KERNEL_DATA, 0x4fa00000 | PTE_KERNEL_DATA,
+ 0x4fc00000 | PTE_KERNEL_DATA, 0x4fe00000 | PTE_KERNEL_DATA,
+ 0x50000000 | PTE_KERNEL_DATA, 0x50200000 | PTE_KERNEL_DATA,
+ 0x50400000 | PTE_KERNEL_DATA, 0x50600000 | PTE_KERNEL_DATA,
+ 0x50800000 | PTE_KERNEL_DATA, 0x50a00000 | PTE_KERNEL_DATA,
+ 0x50c00000 | PTE_KERNEL_DATA, 0x50e00000 | PTE_KERNEL_DATA,
+ 0x51000000 | PTE_KERNEL_DATA, 0x51200000 | PTE_KERNEL_DATA,
+ 0x51400000 | PTE_KERNEL_DATA, 0x51600000 | PTE_KERNEL_DATA,
+ 0x51800000 | PTE_KERNEL_DATA, 0x51a00000 | PTE_KERNEL_DATA,
+ 0x51c00000 | PTE_KERNEL_DATA, 0x51e00000 | PTE_KERNEL_DATA,
+ 0x52000000 | PTE_KERNEL_DATA, 0x52200000 | PTE_KERNEL_DATA,
+ 0x52400000 | PTE_KERNEL_DATA, 0x52600000 | PTE_KERNEL_DATA,
+ 0x52800000 | PTE_KERNEL_DATA, 0x52a00000 | PTE_KERNEL_DATA,
+ 0x52c00000 | PTE_KERNEL_DATA, 0x52e00000 | PTE_KERNEL_DATA,
+ 0x53000000 | PTE_KERNEL_DATA, 0x53200000 | PTE_KERNEL_DATA,
+ 0x53400000 | PTE_KERNEL_DATA, 0x53600000 | PTE_KERNEL_DATA,
+ 0x53800000 | PTE_KERNEL_DATA, 0x53a00000 | PTE_KERNEL_DATA,
+ 0x53c00000 | PTE_KERNEL_DATA, 0x53e00000 | PTE_KERNEL_DATA,
+ 0x54000000 | PTE_KERNEL_DATA, 0x54200000 | PTE_KERNEL_DATA,
+ 0x54400000 | PTE_KERNEL_DATA, 0x54600000 | PTE_KERNEL_DATA,
+ 0x54800000 | PTE_KERNEL_DATA, 0x54a00000 | PTE_KERNEL_DATA,
+ 0x54c00000 | PTE_KERNEL_DATA, 0x54e00000 | PTE_KERNEL_DATA,
+ 0x55000000 | PTE_KERNEL_DATA, 0x55200000 | PTE_KERNEL_DATA,
+ 0x55400000 | PTE_KERNEL_DATA, 0x55600000 | PTE_KERNEL_DATA,
+ 0x55800000 | PTE_KERNEL_DATA, 0x55a00000 | PTE_KERNEL_DATA,
+ 0x55c00000 | PTE_KERNEL_DATA, 0x55e00000 | PTE_KERNEL_DATA,
+ 0x56000000 | PTE_KERNEL_DATA, 0x56200000 | PTE_KERNEL_DATA,
+ 0x56400000 | PTE_KERNEL_DATA, 0x56600000 | PTE_KERNEL_DATA,
+ 0x56800000 | PTE_KERNEL_DATA, 0x56a00000 | PTE_KERNEL_DATA,
+ 0x56c00000 | PTE_KERNEL_DATA, 0x56e00000 | PTE_KERNEL_DATA,
+ 0x57000000 | PTE_KERNEL_DATA, 0x57200000 | PTE_KERNEL_DATA,
+ 0x57400000 | PTE_KERNEL_DATA, 0x57600000 | PTE_KERNEL_DATA,
+ 0x57800000 | PTE_KERNEL_DATA, 0x57a00000 | PTE_KERNEL_DATA,
+ 0x57c00000 | PTE_KERNEL_DATA, 0x57e00000 | PTE_KERNEL_DATA,
+ 0x58000000 | PTE_KERNEL_DATA, 0x58200000 | PTE_KERNEL_DATA,
+ 0x58400000 | PTE_KERNEL_DATA, 0x58600000 | PTE_KERNEL_DATA,
+ 0x58800000 | PTE_KERNEL_DATA, 0x58a00000 | PTE_KERNEL_DATA,
+ 0x58c00000 | PTE_KERNEL_DATA, 0x58e00000 | PTE_KERNEL_DATA,
+ 0x59000000 | PTE_KERNEL_DATA, 0x59200000 | PTE_KERNEL_DATA,
+ 0x59400000 | PTE_KERNEL_DATA, 0x59600000 | PTE_KERNEL_DATA,
+ 0x59800000 | PTE_KERNEL_DATA, 0x59a00000 | PTE_KERNEL_DATA,
+ 0x59c00000 | PTE_KERNEL_DATA, 0x59e00000 | PTE_KERNEL_DATA,
+ 0x5a000000 | PTE_KERNEL_DATA, 0x5a200000 | PTE_KERNEL_DATA,
+ 0x5a400000 | PTE_KERNEL_DATA, 0x5a600000 | PTE_KERNEL_DATA,
+ 0x5a800000 | PTE_KERNEL_DATA, 0x5aa00000 | PTE_KERNEL_DATA,
+ 0x5ac00000 | PTE_KERNEL_DATA, 0x5ae00000 | PTE_KERNEL_DATA,
+ 0x5b000000 | PTE_KERNEL_DATA, 0x5b200000 | PTE_KERNEL_DATA,
+ 0x5b400000 | PTE_KERNEL_DATA, 0x5b600000 | PTE_KERNEL_DATA,
+ 0x5b800000 | PTE_KERNEL_DATA, 0x5ba00000 | PTE_KERNEL_DATA,
+ 0x5bc00000 | PTE_KERNEL_DATA, 0x5be00000 | PTE_KERNEL_DATA,
+ 0x5c000000 | PTE_KERNEL_DATA, 0x5c200000 | PTE_KERNEL_DATA,
+ 0x5c400000 | PTE_KERNEL_DATA, 0x5c600000 | PTE_KERNEL_DATA,
+ 0x5c800000 | PTE_KERNEL_DATA, 0x5ca00000 | PTE_KERNEL_DATA,
+ 0x5cc00000 | PTE_KERNEL_DATA, 0x5ce00000 | PTE_KERNEL_DATA,
+ 0x5d000000 | PTE_KERNEL_DATA, 0x5d200000 | PTE_KERNEL_DATA,
+ 0x5d400000 | PTE_KERNEL_DATA, 0x5d600000 | PTE_KERNEL_DATA,
+ 0x5d800000 | PTE_KERNEL_DATA, 0x5da00000 | PTE_KERNEL_DATA,
+ 0x5dc00000 | PTE_KERNEL_DATA, 0x5de00000 | PTE_KERNEL_DATA,
+ 0x5e000000 | PTE_KERNEL_DATA, 0x5e200000 | PTE_KERNEL_DATA,
+ 0x5e400000 | PTE_KERNEL_DATA, 0x5e600000 | PTE_KERNEL_DATA,
+ 0x5e800000 | PTE_KERNEL_DATA, 0x5ea00000 | PTE_KERNEL_DATA,
+ 0x5ec00000 | PTE_KERNEL_DATA, 0x5ee00000 | PTE_KERNEL_DATA,
+ 0x5f000000 | PTE_KERNEL_DATA, 0x5f200000 | PTE_KERNEL_DATA,
+ 0x5f400000 | PTE_KERNEL_DATA, 0x5f600000 | PTE_KERNEL_DATA,
+ 0x5f800000 | PTE_KERNEL_DATA, 0x5fa00000 | PTE_KERNEL_DATA,
+ 0x5fc00000 | PTE_KERNEL_DATA, 0x5fe00000 | PTE_KERNEL_DATA,
+ 0x60000000 | PTE_KERNEL_DATA, 0x60200000 | PTE_KERNEL_DATA,
+ 0x60400000 | PTE_KERNEL_DATA, 0x60600000 | PTE_KERNEL_DATA,
+ 0x60800000 | PTE_KERNEL_DATA, 0x60a00000 | PTE_KERNEL_DATA,
+ 0x60c00000 | PTE_KERNEL_DATA, 0x60e00000 | PTE_KERNEL_DATA,
+ 0x61000000 | PTE_KERNEL_DATA, 0x61200000 | PTE_KERNEL_DATA,
+ 0x61400000 | PTE_KERNEL_DATA, 0x61600000 | PTE_KERNEL_DATA,
+ 0x61800000 | PTE_KERNEL_DATA, 0x61a00000 | PTE_KERNEL_DATA,
+ 0x61c00000 | PTE_KERNEL_DATA, 0x61e00000 | PTE_KERNEL_DATA,
+ 0x62000000 | PTE_KERNEL_DATA, 0x62200000 | PTE_KERNEL_DATA,
+ 0x62400000 | PTE_KERNEL_DATA, 0x62600000 | PTE_KERNEL_DATA,
+ 0x62800000 | PTE_KERNEL_DATA, 0x62a00000 | PTE_KERNEL_DATA,
+ 0x62c00000 | PTE_KERNEL_DATA, 0x62e00000 | PTE_KERNEL_DATA,
+ 0x63000000 | PTE_KERNEL_DATA, 0x63200000 | PTE_KERNEL_DATA,
+ 0x63400000 | PTE_KERNEL_DATA, 0x63600000 | PTE_KERNEL_DATA,
+ 0x63800000 | PTE_KERNEL_DATA, 0x63a00000 | PTE_KERNEL_DATA,
+ 0x63c00000 | PTE_KERNEL_DATA, 0x63e00000 | PTE_KERNEL_DATA,
+ 0x64000000 | PTE_KERNEL_DATA, 0x64200000 | PTE_KERNEL_DATA,
+ 0x64400000 | PTE_KERNEL_DATA, 0x64600000 | PTE_KERNEL_DATA,
+ 0x64800000 | PTE_KERNEL_DATA, 0x64a00000 | PTE_KERNEL_DATA,
+ 0x64c00000 | PTE_KERNEL_DATA, 0x64e00000 | PTE_KERNEL_DATA,
+ 0x65000000 | PTE_KERNEL_DATA, 0x65200000 | PTE_KERNEL_DATA,
+ 0x65400000 | PTE_KERNEL_DATA, 0x65600000 | PTE_KERNEL_DATA,
+ 0x65800000 | PTE_KERNEL_DATA, 0x65a00000 | PTE_KERNEL_DATA,
+ 0x65c00000 | PTE_KERNEL_DATA, 0x65e00000 | PTE_KERNEL_DATA,
+ 0x66000000 | PTE_KERNEL_DATA, 0x66200000 | PTE_KERNEL_DATA,
+ 0x66400000 | PTE_KERNEL_DATA, 0x66600000 | PTE_KERNEL_DATA,
+ 0x66800000 | PTE_KERNEL_DATA, 0x66a00000 | PTE_KERNEL_DATA,
+ 0x66c00000 | PTE_KERNEL_DATA, 0x66e00000 | PTE_KERNEL_DATA,
+ 0x67000000 | PTE_KERNEL_DATA, 0x67200000 | PTE_KERNEL_DATA,
+ 0x67400000 | PTE_KERNEL_DATA, 0x67600000 | PTE_KERNEL_DATA,
+ 0x67800000 | PTE_KERNEL_DATA, 0x67a00000 | PTE_KERNEL_DATA,
+ 0x67c00000 | PTE_KERNEL_DATA, 0x67e00000 | PTE_KERNEL_DATA,
+ 0x68000000 | PTE_KERNEL_DATA, 0x68200000 | PTE_KERNEL_DATA,
+ 0x68400000 | PTE_KERNEL_DATA, 0x68600000 | PTE_KERNEL_DATA,
+ 0x68800000 | PTE_KERNEL_DATA, 0x68a00000 | PTE_KERNEL_DATA,
+ 0x68c00000 | PTE_KERNEL_DATA, 0x68e00000 | PTE_KERNEL_DATA,
+ 0x69000000 | PTE_KERNEL_DATA, 0x69200000 | PTE_KERNEL_DATA,
+ 0x69400000 | PTE_KERNEL_DATA, 0x69600000 | PTE_KERNEL_DATA,
+ 0x69800000 | PTE_KERNEL_DATA, 0x69a00000 | PTE_KERNEL_DATA,
+ 0x69c00000 | PTE_KERNEL_DATA, 0x69e00000 | PTE_KERNEL_DATA,
+ 0x6a000000 | PTE_KERNEL_DATA, 0x6a200000 | PTE_KERNEL_DATA,
+ 0x6a400000 | PTE_KERNEL_DATA, 0x6a600000 | PTE_KERNEL_DATA,
+ 0x6a800000 | PTE_KERNEL_DATA, 0x6aa00000 | PTE_KERNEL_DATA,
+ 0x6ac00000 | PTE_KERNEL_DATA, 0x6ae00000 | PTE_KERNEL_DATA,
+ 0x6b000000 | PTE_KERNEL_DATA, 0x6b200000 | PTE_KERNEL_DATA,
+ 0x6b400000 | PTE_KERNEL_DATA, 0x6b600000 | PTE_KERNEL_DATA,
+ 0x6b800000 | PTE_KERNEL_DATA, 0x6ba00000 | PTE_KERNEL_DATA,
+ 0x6bc00000 | PTE_KERNEL_DATA, 0x6be00000 | PTE_KERNEL_DATA,
+ 0x6c000000 | PTE_KERNEL_DATA, 0x6c200000 | PTE_KERNEL_DATA,
+ 0x6c400000 | PTE_KERNEL_DATA, 0x6c600000 | PTE_KERNEL_DATA,
+ 0x6c800000 | PTE_KERNEL_DATA, 0x6ca00000 | PTE_KERNEL_DATA,
+ 0x6cc00000 | PTE_KERNEL_DATA, 0x6ce00000 | PTE_KERNEL_DATA,
+ 0x6d000000 | PTE_KERNEL_DATA, 0x6d200000 | PTE_KERNEL_DATA,
+ 0x6d400000 | PTE_KERNEL_DATA, 0x6d600000 | PTE_KERNEL_DATA,
+ 0x6d800000 | PTE_KERNEL_DATA, 0x6da00000 | PTE_KERNEL_DATA,
+ 0x6dc00000 | PTE_KERNEL_DATA, 0x6de00000 | PTE_KERNEL_DATA,
+ 0x6e000000 | PTE_KERNEL_DATA, 0x6e200000 | PTE_KERNEL_DATA,
+ 0x6e400000 | PTE_KERNEL_DATA, 0x6e600000 | PTE_KERNEL_DATA,
+ 0x6e800000 | PTE_KERNEL_DATA, 0x6ea00000 | PTE_KERNEL_DATA,
+ 0x6ec00000 | PTE_KERNEL_DATA, 0x6ee00000 | PTE_KERNEL_DATA,
+ 0x6f000000 | PTE_KERNEL_DATA, 0x6f200000 | PTE_KERNEL_DATA,
+ 0x6f400000 | PTE_KERNEL_DATA, 0x6f600000 | PTE_KERNEL_DATA,
+ 0x6f800000 | PTE_KERNEL_DATA, 0x6fa00000 | PTE_KERNEL_DATA,
+ 0x6fc00000 | PTE_KERNEL_DATA, 0x6fe00000 | PTE_KERNEL_DATA,
+ 0x70000000 | PTE_KERNEL_DATA, 0x70200000 | PTE_KERNEL_DATA,
+ 0x70400000 | PTE_KERNEL_DATA, 0x70600000 | PTE_KERNEL_DATA,
+ 0x70800000 | PTE_KERNEL_DATA, 0x70a00000 | PTE_KERNEL_DATA,
+ 0x70c00000 | PTE_KERNEL_DATA, 0x70e00000 | PTE_KERNEL_DATA,
+ 0x71000000 | PTE_KERNEL_DATA, 0x71200000 | PTE_KERNEL_DATA,
+ 0x71400000 | PTE_KERNEL_DATA, 0x71600000 | PTE_KERNEL_DATA,
+ 0x71800000 | PTE_KERNEL_DATA, 0x71a00000 | PTE_KERNEL_DATA,
+ 0x71c00000 | PTE_KERNEL_DATA, 0x71e00000 | PTE_KERNEL_DATA,
+ 0x72000000 | PTE_KERNEL_DATA, 0x72200000 | PTE_KERNEL_DATA,
+ 0x72400000 | PTE_KERNEL_DATA, 0x72600000 | PTE_KERNEL_DATA,
+ 0x72800000 | PTE_KERNEL_DATA, 0x72a00000 | PTE_KERNEL_DATA,
+ 0x72c00000 | PTE_KERNEL_DATA, 0x72e00000 | PTE_KERNEL_DATA,
+ 0x73000000 | PTE_KERNEL_DATA, 0x73200000 | PTE_KERNEL_DATA,
+ 0x73400000 | PTE_KERNEL_DATA, 0x73600000 | PTE_KERNEL_DATA,
+ 0x73800000 | PTE_KERNEL_DATA, 0x73a00000 | PTE_KERNEL_DATA,
+ 0x73c00000 | PTE_KERNEL_DATA, 0x73e00000 | PTE_KERNEL_DATA,
+ 0x74000000 | PTE_KERNEL_DATA, 0x74200000 | PTE_KERNEL_DATA,
+ 0x74400000 | PTE_KERNEL_DATA, 0x74600000 | PTE_KERNEL_DATA,
+ 0x74800000 | PTE_KERNEL_DATA, 0x74a00000 | PTE_KERNEL_DATA,
+ 0x74c00000 | PTE_KERNEL_DATA, 0x74e00000 | PTE_KERNEL_DATA,
+ 0x75000000 | PTE_KERNEL_DATA, 0x75200000 | PTE_KERNEL_DATA,
+ 0x75400000 | PTE_KERNEL_DATA, 0x75600000 | PTE_KERNEL_DATA,
+ 0x75800000 | PTE_KERNEL_DATA, 0x75a00000 | PTE_KERNEL_DATA,
+ 0x75c00000 | PTE_KERNEL_DATA, 0x75e00000 | PTE_KERNEL_DATA,
+ 0x76000000 | PTE_KERNEL_DATA, 0x76200000 | PTE_KERNEL_DATA,
+ 0x76400000 | PTE_KERNEL_DATA, 0x76600000 | PTE_KERNEL_DATA,
+ 0x76800000 | PTE_KERNEL_DATA, 0x76a00000 | PTE_KERNEL_DATA,
+ 0x76c00000 | PTE_KERNEL_DATA, 0x76e00000 | PTE_KERNEL_DATA,
+ 0x77000000 | PTE_KERNEL_DATA, 0x77200000 | PTE_KERNEL_DATA,
+ 0x77400000 | PTE_KERNEL_DATA, 0x77600000 | PTE_KERNEL_DATA,
+ 0x77800000 | PTE_KERNEL_DATA, 0x77a00000 | PTE_KERNEL_DATA,
+ 0x77c00000 | PTE_KERNEL_DATA, 0x77e00000 | PTE_KERNEL_DATA,
+ 0x78000000 | PTE_KERNEL_DATA, 0x78200000 | PTE_KERNEL_DATA,
+ 0x78400000 | PTE_KERNEL_DATA, 0x78600000 | PTE_KERNEL_DATA,
+ 0x78800000 | PTE_KERNEL_DATA, 0x78a00000 | PTE_KERNEL_DATA,
+ 0x78c00000 | PTE_KERNEL_DATA, 0x78e00000 | PTE_KERNEL_DATA,
+ 0x79000000 | PTE_KERNEL_DATA, 0x79200000 | PTE_KERNEL_DATA,
+ 0x79400000 | PTE_KERNEL_DATA, 0x79600000 | PTE_KERNEL_DATA,
+ 0x79800000 | PTE_KERNEL_DATA, 0x79a00000 | PTE_KERNEL_DATA,
+ 0x79c00000 | PTE_KERNEL_DATA, 0x79e00000 | PTE_KERNEL_DATA,
+ 0x7a000000 | PTE_KERNEL_DATA, 0x7a200000 | PTE_KERNEL_DATA,
+ 0x7a400000 | PTE_KERNEL_DATA, 0x7a600000 | PTE_KERNEL_DATA,
+ 0x7a800000 | PTE_KERNEL_DATA, 0x7aa00000 | PTE_KERNEL_DATA,
+ 0x7ac00000 | PTE_KERNEL_DATA, 0x7ae00000 | PTE_KERNEL_DATA,
+ 0x7b000000 | PTE_KERNEL_DATA, 0x7b200000 | PTE_KERNEL_DATA,
+ 0x7b400000 | PTE_KERNEL_DATA, 0x7b600000 | PTE_KERNEL_DATA,
+ 0x7b800000 | PTE_KERNEL_DATA, 0x7ba00000 | PTE_KERNEL_DATA,
+ 0x7bc00000 | PTE_KERNEL_DATA, 0x7be00000 | PTE_KERNEL_DATA,
+ 0x7c000000 | PTE_KERNEL_DATA, 0x7c200000 | PTE_KERNEL_DATA,
+ 0x7c400000 | PTE_KERNEL_DATA, 0x7c600000 | PTE_KERNEL_DATA,
+ 0x7c800000 | PTE_KERNEL_DATA, 0x7ca00000 | PTE_KERNEL_DATA,
+ 0x7cc00000 | PTE_KERNEL_DATA, 0x7ce00000 | PTE_KERNEL_DATA,
+ 0x7d000000 | PTE_KERNEL_DATA, 0x7d200000 | PTE_KERNEL_DATA,
+ 0x7d400000 | PTE_KERNEL_DATA, 0x7d600000 | PTE_KERNEL_DATA,
+ 0x7d800000 | PTE_KERNEL_DATA, 0x7da00000 | PTE_KERNEL_DATA,
+ 0x7dc00000 | PTE_KERNEL_DATA, 0x7de00000 | PTE_KERNEL_DATA,
+ 0x7e000000 | PTE_KERNEL_DATA, 0x7e200000 | PTE_KERNEL_DATA,
+ 0x7e400000 | PTE_KERNEL_DATA, 0x7e600000 | PTE_KERNEL_DATA,
+ 0x7e800000 | PTE_KERNEL_DATA, 0x7ea00000 | PTE_KERNEL_DATA,
+ 0x7ec00000 | PTE_KERNEL_DATA, 0x7ee00000 | PTE_KERNEL_DATA,
+ 0x7f000000 | PTE_KERNEL_DATA, 0x7f200000 | PTE_KERNEL_DATA,
+ 0x7f400000 | PTE_KERNEL_DATA, 0x7f600000 | PTE_KERNEL_DATA,
+ 0x7f800000 | PTE_KERNEL_DATA, 0x7fa00000 | PTE_KERNEL_DATA,
+ 0x7fc00000 | PTE_KERNEL_DATA, 0x7fe00000 | PTE_KERNEL_DATA
+};
+
+__attribute__((__aligned__(PAGE_SIZE))) PTEntries _kernel_pt_level1 = {
+ K2P(_kernel_pt_lv2_dev) + PTE_TABLE,
+ K2P(_kernel_pt_lv2_ram) + PTE_TABLE,
+};
+
+__attribute__((__aligned__(PAGE_SIZE))) PTEntries kernel_pt_level0 = {
+ K2P(_kernel_pt_level1) + PTE_TABLE
+};
+
+__attribute__((__aligned__(PAGE_SIZE))) PTEntries invalid_pt = { 0 };
\ No newline at end of file
diff --git a/src/aarch64/mmu.h b/src/aarch64/mmu.h
new file mode 100644
index 0000000..9b78926
--- /dev/null
+++ b/src/aarch64/mmu.h
@@ -0,0 +1,71 @@
+#pragma once
+
+#include
+typedef unsigned long long u64;
+#define PAGE_SIZE 4096
+
+/* Memory region attributes */
+#define MT_DEVICE_nGnRnE 0x0
+#define MT_NORMAL 0x1
+#define MT_NORMAL_NC 0x2
+#define MT_DEVICE_nGnRnE_FLAGS 0x00
+#define MT_NORMAL_FLAGS \
+ 0xFF /* Inner/Outer Write-Back Non-Transient RW-Allocate */
+#define MT_NORMAL_NC_FLAGS 0x44 /* Inner/Outer Non-Cacheable */
+
+#define SH_OUTER (2 << 8)
+#define SH_INNER (3 << 8)
+
+#define AF_USED (1 << 10)
+
+#define PTE_NORMAL_NC ((MT_NORMAL_NC << 2) | AF_USED | SH_OUTER)
+#define PTE_NORMAL ((MT_NORMAL << 2) | AF_USED | SH_OUTER)
+#define PTE_DEVICE ((MT_DEVICE_nGnRnE << 2) | AF_USED)
+
+#define PTE_VALID 0x1
+
+#define PTE_TABLE 0x3
+#define PTE_BLOCK 0x1
+#define PTE_PAGE 0x3
+
+#define PTE_KERNEL (0 << 6)
+#define PTE_USER (1 << 6)
+#define PTE_RO (1 << 7)
+#define PTE_RW (0 << 7)
+
+#define PTE_KERNEL_DATA (PTE_KERNEL | PTE_NORMAL | PTE_BLOCK)
+#define PTE_KERNEL_DEVICE (PTE_KERNEL | PTE_DEVICE | PTE_BLOCK)
+#define PTE_USER_DATA (PTE_USER | PTE_NORMAL | PTE_PAGE)
+
+#define N_PTE_PER_TABLE 512
+
+#define PTE_HIGH_NX (1LL << 54)
+
+#define KSPACE_MASK 0xFFFF000000000000
+
+// convert kernel address into physical address.
+#define K2P(addr) ((u64)(addr) - (KSPACE_MASK))
+
+// convert physical address into kernel address.
+#define P2K(addr) ((u64)(addr) + (KSPACE_MASK))
+
+// convert any address into kernel address space.
+#define KSPACE(addr) ((u64)(addr) | (KSPACE_MASK))
+
+// conver any address into physical address space.
+#define PSPACE(addr) ((u64)(addr) & (~KSPACE_MASK))
+
+typedef u64 PTEntry;
+typedef PTEntry PTEntries[N_PTE_PER_TABLE];
+typedef PTEntry *PTEntriesPtr;
+
+#define VA_OFFSET(va) ((u64)(va) & 0xFFF)
+#define PTE_ADDRESS(pte) ((pte) & ~0xFFFF000000000FFF)
+#define PTE_FLAGS(pte) ((pte) & 0xFFFF000000000FFF)
+#define P2N(addr) (addr >> 12)
+#define PAGE_BASE(addr) ((u64)addr & ~(PAGE_SIZE - 1))
+
+#define VA_PART0(va) (((u64)(va) & 0xFF8000000000) >> 39)
+#define VA_PART1(va) (((u64)(va) & 0x7FC0000000) >> 30)
+#define VA_PART2(va) (((u64)(va) & 0x3FE00000) >> 21)
+#define VA_PART3(va) (((u64)(va) & 0x1FF000) >> 12)
diff --git a/src/aarch64/swtch.S b/src/aarch64/swtch.S
new file mode 100644
index 0000000..c6a404f
--- /dev/null
+++ b/src/aarch64/swtch.S
@@ -0,0 +1,31 @@
+// Do kernel-mode context switch
+// x0 (first parameter): new context ptr
+// x1 (second parameter): addr to save old context ptr
+
+#define pushp(a, b) stp a, b, [sp, #-0x10]!
+#define popp(a, b) ldp a, b, [sp], #0x10
+
+.globl swtch
+swtch:
+// TODO: save and restore KernelContext
+// store the last callee-saved registers
+pushp(x28,x29)
+pushp(x26,x27)
+pushp(x24,x25)
+pushp(x22,x23)
+pushp(x20,x21)
+pushp(x1,x19)
+pushp(lr,x0)
+// change stack
+mov x2 , sp
+str x2 , [x1]
+mov sp , x0
+// load the new callee-saved registers
+popp(lr,x0)
+popp(x1,x19)
+popp(x20,x21)
+popp(x22,x23)
+popp(x24,x25)
+popp(x26,x27)
+popp(x28,x29)
+ret
diff --git a/src/aarch64/trap.S b/src/aarch64/trap.S
new file mode 100644
index 0000000..5dfad6f
--- /dev/null
+++ b/src/aarch64/trap.S
@@ -0,0 +1,50 @@
+#define pushp(a, b) stp a, b, [sp, #-0x10]!
+#define popp(a, b) ldp a, b, [sp], #0x10
+
+/* `exception_vector.S` send all traps here. */
+.global trap_entry
+trap_entry:
+// TODO: save UserContext
+pushp(x16,x17)
+pushp(x14,x15)
+pushp(x12,x13)
+pushp(x10,x11)
+pushp(x8,x9)
+pushp(x6,x7)
+pushp(x4,x5)
+pushp(x2,x3)
+pushp(x0,x1)
+
+mrs x0, spsr_el1
+mrs x1, elr_el1
+pushp(x0,x1)
+
+mrs x0, sp_el0
+mrs x1, sp_el0
+pushp(x0,x1)
+
+mov x0, sp
+bl trap_global_handler
+
+.global trap_return
+trap_return:
+// TODO: restore UserContext
+
+popp(x0,x1)
+msr sp_el0, x0
+msr sp_el0, x1
+
+popp(x0,x1)
+msr spsr_el1, x0
+msr elr_el1, x1
+
+popp(x0,x1)
+popp(x2,x3)
+popp(x4,x5)
+popp(x6,x7)
+popp(x8,x9)
+popp(x10,x11)
+popp(x12,x13)
+popp(x14,x15)
+popp(x16,x17)
+eret
diff --git a/src/aarch64/trap.c b/src/aarch64/trap.c
new file mode 100644
index 0000000..6f83779
--- /dev/null
+++ b/src/aarch64/trap.c
@@ -0,0 +1,71 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+void trap_global_handler(UserContext *context)
+{
+ //printk("[TRAP] Trap occurred! ELR=0x%llx, ESR=0x%llx\n", context->elr, arch_get_esr());
+ thisproc()->ucontext = context;
+
+ u64 esr = arch_get_esr();
+ u64 ec = esr >> ESR_EC_SHIFT;
+ u64 iss = esr & ESR_ISS_MASK;
+ u64 ir = esr & ESR_IR_MASK;
+
+ (void)iss;
+
+ arch_reset_esr();
+
+ switch (ec) {
+ case ESR_EC_UNKNOWN: {
+ if (ir)
+ PANIC();
+ else
+ interrupt_global_handler();
+ } break;
+ case ESR_EC_SVC64: {
+ syscall_entry(context);
+ } break;
+ case ESR_EC_IABORT_EL0:
+ case ESR_EC_IABORT_EL1:
+ case ESR_EC_DABORT_EL0:
+ case ESR_EC_DABORT_EL1: {
+ printk("[ERROR] Page Fault Exception!\n");
+ u64 far = arch_get_far();
+ u64 elr = arch_get_elr();
+ u64 ttbr0 = arch_get_ttbr0();
+ int pid = thisproc() ? thisproc()->pid : -999;
+ // Decode ISS basic fields (DFSC/IFSC low 6 bits, WnR bit for Data Abort)
+ u64 fsc = iss & 0x3F; // Fault Status Code
+ u64 is_write = (iss >> 6) & 1; // WnR for DAbort
+ printk("[ERROR] pid=%d ec=0x%llx iss=0x%llx fsc=0x%llx wr=%llu FAR=0x%llx ELR=0x%llx TTBR0=0x%llx\n",
+ pid, ec, iss, fsc, is_write, far, elr, ttbr0);
+ // Extra diagnostics: current EL and SCTLR (to infer PAN/behavior)
+ u64 cur_el = arch_get_currentel();
+ u64 sctlr = arch_get_sctlr();
+ printk("[ERROR] CurrentEL=0x%llx SCTLR_EL1=0x%llx\n", cur_el, sctlr);
+ if (thisproc()) {
+ extern void pt_dump_va(struct pgdir*, u64);
+ pt_dump_va(&thisproc()->pgdir, far);
+ }
+ PANIC();
+ } break;
+ default: {
+ printk("[ERROR] Unknwon exception %llu\n", ec);
+ PANIC();
+ }
+ }
+
+ // TODO: stop killed process while returning to user space
+ if (thisproc()->killed == true && ((context->elr) & 0xffff000000000000) == 0) exit(-1);
+}
+
+NO_RETURN void trap_error_handler(u64 type)
+{
+ printk("[ERROR] Unknown trap type %llu\n", type);
+ PANIC();
+}
diff --git a/src/aarch64/trap.h b/src/aarch64/trap.h
new file mode 100644
index 0000000..83ffd71
--- /dev/null
+++ b/src/aarch64/trap.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include
+
+#define ESR_EC_SHIFT 26
+#define ESR_ISS_MASK 0xFFFFFF
+#define ESR_IR_MASK (1 << 25)
+
+#define ESR_EC_UNKNOWN 0x00
+#define ESR_EC_SVC64 0x15
+#define ESR_EC_IABORT_EL0 0x20
+#define ESR_EC_IABORT_EL1 0x21
+#define ESR_EC_DABORT_EL0 0x24
+#define ESR_EC_DABORT_EL1 0x25
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
new file mode 100644
index 0000000..2ea3d0c
--- /dev/null
+++ b/src/common/CMakeLists.txt
@@ -0,0 +1,3 @@
+file(GLOB common_sources CONFIGURE_DEPENDS "*.c")
+
+add_library(common STATIC ${common_sources})
diff --git a/src/common/bitmap.h b/src/common/bitmap.h
new file mode 100755
index 0000000..b9e3d6b
--- /dev/null
+++ b/src/common/bitmap.h
@@ -0,0 +1,48 @@
+#pragma once
+
+#include
+
+// bitmap is a compact representation of boolean array.
+// consecutive 64 bits are stored in one u64 (BitmapCell).
+typedef u64 BitmapCell;
+
+#define BITMAP_BITS_PER_CELL (sizeof(BitmapCell) * 8)
+#define BITMAP_TO_NUM_CELLS(size) \
+ (((size) + BITMAP_BITS_PER_CELL - 1) / BITMAP_BITS_PER_CELL)
+
+// calculate cell index `idx` and in-cell `offset` from `index`.
+#define BITMAP_PARSE_INDEX(index, idx, offset) \
+ do { \
+ idx = index / BITMAP_BITS_PER_CELL; \
+ offset = index % BITMAP_BITS_PER_CELL; \
+ } while (false)
+
+// declare a new bitmap with `size` bits.
+#define Bitmap(name, size) BitmapCell name[BITMAP_TO_NUM_CELLS(size)]
+
+// initialize a bitmap with `size` bits. All bits are cleared.
+void init_bitmap(BitmapCell *bitmap, usize size);
+
+// get the bit at `index`.
+static INLINE bool bitmap_get(BitmapCell *bitmap, usize index)
+{
+ usize idx, offset;
+ BITMAP_PARSE_INDEX(index, idx, offset);
+ return (bitmap[idx] >> offset) & 1;
+}
+
+// set the bit at `index` to 1.
+static INLINE void bitmap_set(BitmapCell *bitmap, usize index)
+{
+ usize idx, offset;
+ BITMAP_PARSE_INDEX(index, idx, offset);
+ bitmap[idx] |= BIT(offset);
+}
+
+// set the bit at `index` to 0.
+static INLINE void bitmap_clear(BitmapCell *bitmap, usize index)
+{
+ usize idx, offset;
+ BITMAP_PARSE_INDEX(index, idx, offset);
+ bitmap[idx] &= ~BIT(offset);
+}
diff --git a/src/common/buf.h b/src/common/buf.h
new file mode 100755
index 0000000..38792e9
--- /dev/null
+++ b/src/common/buf.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include
+#include
+
+#define BSIZE 512
+#define B_VALID 0x2 // Buffer has been read from disk.
+#define B_DIRTY 0x4 // Buffer needs to be written to disk.
+
+typedef struct {
+ int flags;
+ u8 data[BSIZE];
+ u32 block_no;
+
+ /* @todo: It depends on you to add other necessary elements. */
+ Semaphore sem;
+} Buf;
diff --git a/src/common/checker.h b/src/common/checker.h
new file mode 100644
index 0000000..0451189
--- /dev/null
+++ b/src/common/checker.h
@@ -0,0 +1,16 @@
+#pragma once
+
+typedef int Checker;
+
+__attribute__((error("Checker: context mismatching"), unused)) void
+__checker_fail();
+__attribute__((always_inline)) static inline void __checker_check(Checker *x)
+{
+ if (*x)
+ __checker_fail();
+}
+
+#define setup_checker(id) \
+ Checker __chkcounter_##id __attribute__((cleanup(__checker_check))) = 0
+#define checker_begin_ctx(id) (__chkcounter_##id++)
+#define checker_end_ctx(id) (__chkcounter_##id--)
diff --git a/src/common/defines.h b/src/common/defines.h
new file mode 100755
index 0000000..c37a9fe
--- /dev/null
+++ b/src/common/defines.h
@@ -0,0 +1,108 @@
+#pragma once
+
+#ifndef __cplusplus
+#define true 1
+#define false 0
+#define auto __auto_type
+typedef char bool;
+#endif
+
+#define TRUE true
+#define FALSE false
+#ifndef NULL
+#define NULL 0
+#endif
+
+typedef signed char i8;
+typedef unsigned char u8;
+typedef signed short i16;
+typedef unsigned short u16;
+typedef signed int i32;
+typedef unsigned int u32;
+typedef signed long long i64;
+typedef unsigned long long u64;
+
+typedef i64 isize;
+typedef u64 usize;
+
+/* Efficient min and max operations */
+#define MIN(_a, _b) \
+ ({ \
+ typeof(_a) __a = (_a); \
+ typeof(_b) __b = (_b); \
+ __a <= __b ? __a : __b; \
+ })
+
+#define MAX(_a, _b) \
+ ({ \
+ typeof(_a) __a = (_a); \
+ typeof(_b) __b = (_b); \
+ __a >= __b ? __a : __b; \
+ })
+
+#define BIT(i) (1ull << (i))
+
+#define NO_BSS __attribute__((section(".data")))
+#define NO_RETURN __attribute__((noreturn))
+#define INLINE inline __attribute__((unused))
+#define ALWAYS_INLINE inline __attribute__((unused, always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_IPA __attribute__((noipa))
+#define WARN_RESULT __attribute__((warn_unused_result))
+
+// NOTE: no_return will disable traps.
+// NO_RETURN NO_INLINE void no_return();
+
+/* Return the offset of `member` inside struct `type`. */
+#define offset_of(type, member) ((usize)(&((type *)NULL)->member))
+
+/**
+ * The following macro assumes that `mptr` is a pointer to a `member` within
+ * a struct of type `type`. It returns a pointer to the encompassing struct of
+ * type `type` that contains this `member`.
+ *
+ * This macro is particularly useful in scenarios involving lists. For instance,
+ * it is common practice to embed a `ListNode` within a struct, as demonstrated
+ * below:
+ *
+ * typedef struct {
+ * u64 data;
+ * ListNode node;
+ * } Container;
+ *
+ * Container a;
+ * ListNode *b = &a.node;
+ *
+ * In this example, the expression `container_of(b, Container, node)` will yield
+ * the same result as `&a`.
+ */
+#define container_of(mptr, type, member) \
+ ({ \
+ const typeof(((type *)NULL)->member) *_mptr = (mptr); \
+ (type *)((u8 *)_mptr - offset_of(type, member)); \
+ })
+
+/* Return the largest c that c is a multiple of b and c <= a. */
+static INLINE u64 round_down(u64 a, u64 b)
+{
+ return a - a % b;
+}
+
+/* Return the smallest c that c is a multiple of b and c >= a. */
+static INLINE u64 round_up(u64 a, u64 b)
+{
+ return round_down(a + b - 1, b);
+}
+
+void _panic(const char *, int);
+NO_INLINE NO_RETURN void _panic(const char *, int);
+#define PANIC() _panic(__FILE__, __LINE__)
+#define ASSERT(expr) \
+ ({ \
+ if (!(expr)) \
+ PANIC(); \
+ })
+
+#define LO(addr) (u32)((addr) & 0xffffffff)
+#define HI(addr) (u32)(((addr) >> 32) & 0xffffffff)
+#define REG(addr) (*(volatile u32 *)(u64)(addr))
diff --git a/src/common/format.c b/src/common/format.c
new file mode 100644
index 0000000..843a5c0
--- /dev/null
+++ b/src/common/format.c
@@ -0,0 +1,91 @@
+#include
+#include
+#include
+
+static void _print_int(PutCharFunc put_char, void *ctx, i64 u, int _base,
+ bool is_signed)
+{
+ static char digit[] = "0123456789abcdef";
+ static char buf[64];
+
+ u64 v = (u64)u, base = (u64)_base;
+ if (is_signed && u < 0) {
+ v = -v;
+ put_char(ctx, '-');
+ }
+
+ char *pos = buf;
+ do {
+ *pos++ = digit[v % base];
+ } while (v /= base);
+
+ do {
+ put_char(ctx, *(--pos));
+ } while (pos != buf);
+}
+
+void vformat(PutCharFunc put_char, void *ctx, const char *fmt, va_list arg)
+{
+ const char *pos = fmt;
+
+#define _INT_CASE(ident, type, base, sign) \
+ else if (strncmp(pos, ident, sizeof(ident) - 1) == 0) \
+ { \
+ _print_int(put_char, ctx, (i64)va_arg(arg, type), base, sign); \
+ pos += sizeof(ident) - 1; \
+ }
+
+ char c;
+ while ((c = *pos++) != '\0') {
+ bool special = false;
+
+ if (c == '%') {
+ special = 1;
+
+ if (*pos == '%') {
+ // simple case: %% -> %
+ put_char(ctx, '%');
+ pos++;
+ } else if (*pos == 'c') {
+ put_char(ctx, (char)va_arg(arg, int));
+ pos++;
+ } else if (*pos == 's') {
+ const char *s = va_arg(arg, const char *);
+
+ if (!s)
+ s = "(null)";
+ while (*s != '\0') {
+ put_char(ctx, *s++);
+ }
+
+ pos++;
+ }
+ _INT_CASE("u", u32, 10, 0)
+ _INT_CASE("llu", u64, 10, 0)
+ _INT_CASE("d", i32, 10, 1)
+ _INT_CASE("lld", i64, 10, 1)
+ _INT_CASE("x", u32, 16, 0)
+ _INT_CASE("llx", u64, 16, 0)
+ _INT_CASE("p", u64, 16, 0)
+ _INT_CASE("zu", usize, 10, 0)
+ _INT_CASE("zd", isize, 10, 1)
+ else
+ {
+ special = 0;
+ }
+ }
+
+ if (!special)
+ put_char(ctx, c);
+ }
+
+#undef _INT_CASE
+}
+
+void format(PutCharFunc put_char, void *ctx, const char *fmt, ...)
+{
+ va_list arg;
+ va_start(arg, fmt);
+ vformat(put_char, ctx, fmt, arg);
+ va_end(arg);
+}
diff --git a/src/common/format.h b/src/common/format.h
new file mode 100644
index 0000000..40c77b9
--- /dev/null
+++ b/src/common/format.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include
+
+typedef void (*PutCharFunc)(void *ctx, char c);
+
+void vformat(PutCharFunc put_char, void *ctx, const char *fmt, va_list arg);
+void format(PutCharFunc put_char, void *ctx, const char *fmt, ...);
diff --git a/src/common/list.c b/src/common/list.c
new file mode 100644
index 0000000..301e4b2
--- /dev/null
+++ b/src/common/list.c
@@ -0,0 +1,121 @@
+#include
+
+void init_list_node(ListNode *node)
+{
+ node->prev = node;
+ node->next = node;
+}
+
+ListNode *_merge_list(ListNode *node1, ListNode *node2)
+{
+ if (!node1)
+ return node2;
+ if (!node2)
+ return node1;
+
+ // before: (arrow is the next pointer)
+ // ... --> node1 --> node3 --> ...
+ // ... <-- node2 <-- node4 <-- ...
+ //
+ // after:
+ // ... --> node1 --+ +-> node3 --> ...
+ // | |
+ // ... <-- node2 <-+ +-- node4 <-- ...
+
+ ListNode *node3 = node1->next;
+ ListNode *node4 = node2->prev;
+
+ node1->next = node2;
+ node2->prev = node1;
+ node4->next = node3;
+ node3->prev = node4;
+
+ return node1;
+}
+
+ListNode *_detach_from_list(ListNode *node)
+{
+ ListNode *prev = node->prev;
+
+ node->prev->next = node->next;
+ node->next->prev = node->prev;
+ init_list_node(node);
+
+ if (prev == node)
+ return NULL;
+ return prev;
+}
+
+QueueNode *add_to_queue(QueueNode **head, QueueNode *node)
+{
+ do
+ node->next = *head;
+ while (!__atomic_compare_exchange_n(head, &node->next, node, true,
+ __ATOMIC_ACQ_REL, __ATOMIC_RELAXED));
+ return node;
+}
+
+QueueNode *fetch_from_queue(QueueNode **head)
+{
+ QueueNode *node;
+ do
+ node = *head;
+ while (node &&
+ !__atomic_compare_exchange_n(head, &node, node->next, true,
+ __ATOMIC_ACQ_REL, __ATOMIC_RELAXED));
+ return node;
+}
+
+QueueNode *fetch_all_from_queue(QueueNode **head)
+{
+ return __atomic_exchange_n(head, NULL, __ATOMIC_ACQ_REL);
+}
+
+void queue_init(Queue *x)
+{
+ x->begin = x->end = 0;
+ x->sz = 0;
+ init_spinlock(&x->lk);
+}
+void queue_lock(Queue *x)
+{
+ acquire_spinlock(&x->lk);
+}
+void queue_unlock(Queue *x)
+{
+ release_spinlock(&x->lk);
+}
+void queue_push(Queue *x, ListNode *item)
+{
+ init_list_node(item);
+ if (x->sz == 0) {
+ x->begin = x->end = item;
+ } else {
+ _merge_list(x->end, item);
+ x->end = item;
+ }
+ x->sz++;
+}
+void queue_pop(Queue *x)
+{
+ if (x->sz == 0)
+ PANIC();
+ if (x->sz == 1) {
+ x->begin = x->end = 0;
+ } else {
+ auto t = x->begin;
+ x->begin = x->begin->next;
+ _detach_from_list(t);
+ }
+ x->sz--;
+}
+ListNode *queue_front(Queue *x)
+{
+ if (!x || !x->begin)
+ PANIC();
+ return x->begin;
+}
+bool queue_empty(Queue *x)
+{
+ return x->sz == 0;
+}
\ No newline at end of file
diff --git a/src/common/list.h b/src/common/list.h
new file mode 100644
index 0000000..bfa680b
--- /dev/null
+++ b/src/common/list.h
@@ -0,0 +1,79 @@
+#pragma once
+
+#include
+#include
+
+// ListNode represents one node on a circular list.
+typedef struct ListNode {
+ struct ListNode *prev, *next;
+} ListNode;
+
+// initialize a sigle node circular list.
+void init_list_node(ListNode *node);
+
+// * List operations without locks: USE THEM CAREFULLY
+// - merge the list containing `node1` and the list containing `node2`
+// into one list. It guarantees `node1->next == node2`. Both lists can be
+// empty. This function will return the merged list.
+ListNode *_merge_list(ListNode *node1, ListNode *node2);
+// - syntax sugar: insert a single new node into the list
+#define _insert_into_list(list, node) \
+ (init_list_node(node), _merge_list(list, node))
+// - remove `node` from the list, and then `node` becomes a single
+// node list. It usually returns `node->prev`. If `node` is
+// the last one in the list, it will return NULL.
+ListNode *_detach_from_list(ListNode *node);
+// - walk through the list
+#define _for_in_list(valptr, list) \
+ for (ListNode *__flag = (list), *valptr = __flag->next; valptr; \
+ valptr = valptr == __flag ? (void *)0 : valptr->next)
+// - test if the list is empty
+#define _empty_list(list) ((list)->next == (list))
+
+// * List operations with locks
+#define merge_list(lock, node1, node2) \
+ ({ \
+ acquire_spinlock(lock); \
+ ListNode *__t = _merge_list(node1, node2); \
+ release_spinlock(lock); \
+ __t; \
+ })
+#define insert_into_list(lock, list, node) \
+ ({ \
+ acquire_spinlock(lock); \
+ ListNode *__t = _insert_into_list(list, node); \
+ release_spinlock(lock); \
+ __t; \
+ })
+#define detach_from_list(lock, node) \
+ ({ \
+ acquire_spinlock(lock); \
+ ListNode *__t = _detach_from_list(node); \
+ release_spinlock(lock); \
+ __t; \
+ })
+
+// Lockfree Queue: implemented as a lock-free single linked list.
+typedef struct QueueNode {
+ struct QueueNode *next;
+} QueueNode;
+// add a node to the queue and return the added node
+QueueNode *add_to_queue(QueueNode **head, QueueNode *node);
+// remove the last added node from the queue and return it
+QueueNode *fetch_from_queue(QueueNode **head);
+// remove all nodes from the queue and return them as a single list
+QueueNode *fetch_all_from_queue(QueueNode **head);
+
+typedef struct Queue {
+ ListNode *begin;
+ ListNode *end;
+ int sz;
+ SpinLock lk;
+} Queue;
+void queue_init(Queue *x);
+void queue_lock(Queue *x);
+void queue_unlock(Queue *x);
+void queue_push(Queue *x, ListNode *item);
+void queue_pop(Queue *x);
+ListNode *queue_front(Queue *x);
+bool queue_empty(Queue *x);
\ No newline at end of file
diff --git a/src/common/rbtree.c b/src/common/rbtree.c
new file mode 100644
index 0000000..4547944
--- /dev/null
+++ b/src/common/rbtree.c
@@ -0,0 +1,316 @@
+#include "rbtree.h"
+#define RB_RED 0
+#define RB_BLACK 1
+#define rb_parent(r) ((rb_node)((r)->__rb_parent_color & ~3))
+#define __rb_parent(pc) ((rb_node)(pc & ~3))
+
+#define __rb_color(pc) ((pc) & 1)
+#define __rb_is_black(pc) __rb_color(pc)
+#define __rb_is_red(pc) (!__rb_color(pc))
+#define rb_color(rb) __rb_color((rb)->__rb_parent_color)
+#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color)
+#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color)
+static inline void rb_set_black(rb_node rb)
+{
+ rb->__rb_parent_color |= RB_BLACK;
+}
+static inline rb_node rb_red_parent(rb_node red)
+{
+ return (rb_node)red->__rb_parent_color;
+}
+static inline void rb_set_parent(rb_node rb, rb_node p)
+{
+ rb->__rb_parent_color = rb_color(rb) | (unsigned long)p;
+}
+static inline void rb_set_parent_color(rb_node rb, rb_node p, int color)
+{
+ rb->__rb_parent_color = (unsigned long)p | color;
+}
+static inline void __rb_change_child(rb_node old, rb_node new, rb_node parent,
+ rb_root root)
+{
+ if (parent) {
+ if (parent->rb_left == old)
+ parent->rb_left = new;
+ else
+ parent->rb_right = new;
+ } else
+ root->rb_node = new;
+}
+static inline void __rb_rotate_set_parents(rb_node old, rb_node new,
+ rb_root root, int color)
+{
+ rb_node parent = rb_parent(old);
+ new->__rb_parent_color = old->__rb_parent_color;
+ rb_set_parent_color(old, new, color);
+ __rb_change_child(old, new, parent, root);
+}
+static void __rb_insert_fix(rb_node node, rb_root root)
+{
+ rb_node parent = rb_red_parent(node), gparent, tmp;
+ while (1) {
+ if (!parent) {
+ rb_set_parent_color(node, NULL, RB_BLACK);
+ root->rb_node = node;
+ break;
+ } else if (rb_is_black(parent))
+ break;
+
+ gparent = rb_red_parent(parent);
+ tmp = gparent->rb_right;
+ if (parent != tmp) { /* parent == gparent->rb_left */
+ if (tmp && rb_is_red(tmp)) { /*Case 1,uncle is red*/
+ rb_set_parent_color(tmp, gparent, RB_BLACK);
+ rb_set_parent_color(parent, gparent, RB_BLACK);
+ node = gparent;
+ parent = rb_parent(node);
+ rb_set_parent_color(node, parent, RB_RED);
+ continue;
+ }
+ // Uncle is black
+ tmp = parent->rb_right;
+ if (node == tmp) { /*Case 2,node is right child,left rotate*/
+ parent->rb_right = tmp = node->rb_left;
+ if (tmp)
+ rb_set_parent_color(tmp, parent, RB_BLACK);
+ node->rb_left = parent;
+ rb_set_parent_color(parent, node, RB_RED);
+ parent = node;
+ tmp = node->rb_right;
+ }
+ /*Case 3,can break*/
+ gparent->rb_left = tmp;
+ if (tmp)
+ rb_set_parent_color(tmp, gparent, RB_BLACK);
+ parent->rb_right = gparent;
+ __rb_rotate_set_parents(gparent, parent, root, RB_RED);
+ break;
+ } else {
+ tmp = gparent->rb_left;
+ if (tmp && rb_is_red(tmp)) { /*Case 1,uncle is red*/
+ rb_set_parent_color(tmp, gparent, RB_BLACK);
+ rb_set_parent_color(parent, gparent, RB_BLACK);
+ node = gparent;
+ parent = rb_parent(node);
+ rb_set_parent_color(node, parent, RB_RED);
+ continue;
+ }
+ // Uncle is black
+ tmp = parent->rb_left;
+ if (node == tmp) { /*Case 2,node is right child,left rotate*/
+ parent->rb_left = tmp = node->rb_right;
+ if (tmp)
+ rb_set_parent_color(tmp, parent, RB_BLACK);
+ node->rb_right = parent;
+ rb_set_parent_color(parent, node, RB_RED);
+ parent = node;
+ tmp = node->rb_left;
+ }
+ /*Case 3,can break*/
+ gparent->rb_right = tmp;
+ if (tmp)
+ rb_set_parent_color(tmp, gparent, RB_BLACK);
+ parent->rb_left = gparent;
+ __rb_rotate_set_parents(gparent, parent, root, RB_RED);
+ break;
+ }
+ }
+}
+static rb_node __rb_erase(rb_node node, rb_root root)
+{
+ rb_node child = node->rb_right, tmp = node->rb_left;
+ rb_node parent, rebalance;
+ unsigned long pc;
+ if (!tmp) {
+ pc = node->__rb_parent_color;
+ parent = __rb_parent(pc);
+ __rb_change_child(node, child, parent, root);
+ if (child) {
+ child->__rb_parent_color = pc;
+ rebalance = NULL;
+ } else
+ rebalance = __rb_is_black(pc) ? parent : NULL;
+ } else if (!child) {
+ tmp->__rb_parent_color = pc = node->__rb_parent_color;
+ parent = __rb_parent(pc);
+ __rb_change_child(node, tmp, parent, root);
+ rebalance = NULL;
+ } else {
+ rb_node successor = child, child2;
+ tmp = child->rb_left;
+ if (!tmp) {
+ parent = successor;
+ child2 = successor->rb_right;
+ } else {
+ do {
+ parent = successor;
+ successor = tmp;
+ tmp = tmp->rb_left;
+ } while (tmp);
+ parent->rb_left = child2 = successor->rb_right;
+ successor->rb_right = child;
+ rb_set_parent(child, successor);
+ }
+ successor->rb_left = tmp = node->rb_left;
+ rb_set_parent(tmp, successor);
+ pc = node->__rb_parent_color;
+ tmp = __rb_parent(pc);
+ __rb_change_child(node, successor, tmp, root);
+ if (child2) {
+ successor->__rb_parent_color = pc;
+ rb_set_parent_color(child2, parent, RB_BLACK);
+ rebalance = NULL;
+ } else {
+ unsigned long pc2 = successor->__rb_parent_color;
+ successor->__rb_parent_color = pc;
+ rebalance = __rb_is_black(pc2) ? parent : NULL;
+ }
+ }
+ return rebalance;
+}
+static void __rb_erase_fix(rb_node parent, rb_root root)
+{
+ rb_node node = NULL, sibling, tmp1, tmp2;
+ while (1) {
+ sibling = parent->rb_right;
+ if (node != sibling) {
+ if (rb_is_red(sibling)) { /*Case 1,sibling is red*/
+ parent->rb_right = tmp1 = sibling->rb_left;
+ rb_set_parent_color(tmp1, parent, RB_BLACK);
+ sibling->rb_left = parent;
+ __rb_rotate_set_parents(parent, sibling, root, RB_RED);
+ sibling = tmp1;
+ }
+ tmp1 = sibling->rb_right;
+ if (!tmp1 || rb_is_black(tmp1)) {
+ tmp2 = sibling->rb_left;
+ if (!tmp2 ||
+ rb_is_black(tmp2)) { /*Case 2,sibling black,ch1,ch2 black*/
+ rb_set_parent_color(sibling, parent, RB_RED);
+ if (rb_is_red(parent)) {
+ rb_set_black(parent);
+ } else {
+ node = parent;
+ parent = rb_parent(node);
+ if (parent)
+ continue;
+ }
+ break;
+ } else { /*Case 3*/
+ sibling->rb_left = tmp1 = tmp2->rb_right;
+ if (tmp1)
+ rb_set_parent_color(tmp1, sibling, RB_BLACK);
+ tmp2->rb_right = sibling;
+ parent->rb_right = tmp2;
+ tmp1 = sibling;
+ sibling = tmp2;
+ }
+ }
+ parent->rb_right = tmp2 = sibling->rb_left;
+ if (tmp2)
+ rb_set_parent(tmp2, parent);
+ sibling->rb_left = parent;
+ rb_set_parent_color(tmp1, sibling, RB_BLACK);
+ __rb_rotate_set_parents(parent, sibling, root, RB_BLACK);
+ break;
+ } else {
+ sibling = parent->rb_left;
+ if (rb_is_red(sibling)) { /*Case 1,sibling is red*/
+ parent->rb_left = tmp1 = sibling->rb_right;
+ rb_set_parent_color(tmp1, parent, RB_BLACK);
+ sibling->rb_right = parent;
+ __rb_rotate_set_parents(parent, sibling, root, RB_RED);
+ sibling = tmp1;
+ }
+ tmp1 = sibling->rb_left;
+ if (!tmp1 || rb_is_black(tmp1)) {
+ tmp2 = sibling->rb_right;
+ if (!tmp2 ||
+ rb_is_black(tmp2)) { /*Case 2,sibling black,ch1,ch2 black*/
+ rb_set_parent_color(sibling, parent, RB_RED);
+ if (rb_is_red(parent)) {
+ rb_set_black(parent);
+ } else {
+ node = parent;
+ parent = rb_parent(node);
+ if (parent)
+ continue;
+ }
+ break;
+ } else { /*Case 3*/
+ sibling->rb_right = tmp1 = tmp2->rb_left;
+ if (tmp1)
+ rb_set_parent_color(tmp1, sibling, RB_BLACK);
+ tmp2->rb_left = sibling;
+ parent->rb_left = tmp2;
+ tmp1 = sibling;
+ sibling = tmp2;
+ }
+ }
+ parent->rb_left = tmp2 = sibling->rb_right;
+ if (tmp2)
+ rb_set_parent(tmp2, parent);
+ sibling->rb_right = parent;
+ rb_set_parent_color(tmp1, sibling, RB_BLACK);
+ __rb_rotate_set_parents(parent, sibling, root, RB_BLACK);
+ break;
+ }
+ }
+}
+int _rb_insert(rb_node node, rb_root rt,
+ bool (*cmp)(rb_node lnode, rb_node rnode))
+{
+ rb_node nw = rt->rb_node, parent = NULL;
+ node->rb_left = node->rb_right = NULL;
+ node->__rb_parent_color = 0;
+ while (nw) {
+ parent = nw;
+ if (cmp(node, nw)) {
+ nw = nw->rb_left;
+ if (nw == NULL) {
+ parent->rb_left = node;
+ node->__rb_parent_color = (unsigned long)parent;
+ }
+ } else if (cmp(nw, node)) {
+ nw = nw->rb_right;
+ if (nw == NULL) {
+ parent->rb_right = node;
+ node->__rb_parent_color = (unsigned long)parent;
+ }
+ } else
+ return -1;
+ }
+ __rb_insert_fix(node, rt);
+ return 0;
+}
+void _rb_erase(rb_node node, rb_root root)
+{
+ rb_node rebalance;
+ rebalance = __rb_erase(node, root);
+ if (rebalance)
+ __rb_erase_fix(rebalance, root);
+}
+rb_node _rb_lookup(rb_node node, rb_root rt,
+ bool (*cmp)(rb_node lnode, rb_node rnode))
+{
+ rb_node nw = rt->rb_node;
+ while (nw) {
+ if (cmp(node, nw)) {
+ nw = nw->rb_left;
+ } else if (cmp(nw, node)) {
+ nw = nw->rb_right;
+ } else
+ return nw;
+ }
+ return NULL;
+}
+rb_node _rb_first(rb_root root)
+{
+ rb_node n;
+ n = root->rb_node;
+ if (!n)
+ return NULL;
+ while (n->rb_left)
+ n = n->rb_left;
+ return n;
+}
\ No newline at end of file
diff --git a/src/common/rbtree.h b/src/common/rbtree.h
new file mode 100755
index 0000000..535f356
--- /dev/null
+++ b/src/common/rbtree.h
@@ -0,0 +1,22 @@
+#pragma once
+#include "common/defines.h"
+
+struct rb_node_ {
+ unsigned long __rb_parent_color;
+ struct rb_node_ *rb_right;
+ struct rb_node_ *rb_left;
+} __attribute__((aligned(sizeof(long))));
+
+typedef struct rb_node_ *rb_node;
+struct rb_root_ {
+ rb_node rb_node;
+};
+typedef struct rb_root_ *rb_root;
+
+/* NOTE:You should add lock when use */
+WARN_RESULT int _rb_insert(rb_node node, rb_root root,
+ bool (*cmp)(rb_node lnode, rb_node rnode));
+void _rb_erase(rb_node node, rb_root root);
+rb_node _rb_lookup(rb_node node, rb_root rt,
+ bool (*cmp)(rb_node lnode, rb_node rnode));
+rb_node _rb_first(rb_root root);
diff --git a/src/common/rc.c b/src/common/rc.c
new file mode 100644
index 0000000..673ca8f
--- /dev/null
+++ b/src/common/rc.c
@@ -0,0 +1,17 @@
+#include
+
+void init_rc(RefCount *rc)
+{
+ rc->count = 0;
+}
+
+void increment_rc(RefCount *rc)
+{
+ __atomic_fetch_add(&rc->count, 1, __ATOMIC_ACQ_REL);
+}
+
+bool decrement_rc(RefCount *rc)
+{
+ i64 r = __atomic_sub_fetch(&rc->count, 1, __ATOMIC_ACQ_REL);
+ return r <= 0;
+}
diff --git a/src/common/rc.h b/src/common/rc.h
new file mode 100644
index 0000000..b7fbf40
--- /dev/null
+++ b/src/common/rc.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include
+
+typedef struct {
+ isize count;
+} RefCount;
+
+void init_rc(RefCount *);
+void increment_rc(RefCount *);
+bool decrement_rc(RefCount *);
diff --git a/src/common/sem.c b/src/common/sem.c
new file mode 100755
index 0000000..cf277bf
--- /dev/null
+++ b/src/common/sem.c
@@ -0,0 +1,96 @@
+#include
+#include
+#include
+#include
+#include
+
+void init_sem(Semaphore *sem, int val)
+{
+ sem->val = val;
+ init_spinlock(&sem->lock);
+ init_list_node(&sem->sleeplist);
+}
+
+void _lock_sem(Semaphore *sem)
+{
+ acquire_spinlock(&sem->lock);
+}
+
+void _unlock_sem(Semaphore *sem)
+{
+ release_spinlock(&sem->lock);
+}
+
+bool _get_sem(Semaphore *sem)
+{
+ bool ret = false;
+ if (sem->val > 0) {
+ sem->val--;
+ ret = true;
+ }
+ return ret;
+}
+
+int _query_sem(Semaphore *sem)
+{
+ return sem->val;
+}
+
+int get_all_sem(Semaphore *sem)
+{
+ int ret = 0;
+ _lock_sem(sem);
+ if (sem->val > 0) {
+ ret = sem->val;
+ sem->val = 0;
+ }
+ _unlock_sem(sem);
+ return ret;
+}
+
+int post_all_sem(Semaphore *sem)
+{
+ int ret = -1;
+ _lock_sem(sem);
+ do
+ _post_sem(sem), ret++;
+ while (!_get_sem(sem));
+ _unlock_sem(sem);
+ return ret;
+}
+
+bool _wait_sem(Semaphore *sem, bool alertable)
+{
+ if (--sem->val >= 0) {
+ release_spinlock(&sem->lock);
+ return true;
+ }
+ WaitData *wait = kalloc(sizeof(WaitData));
+ wait->proc = thisproc();
+ wait->up = false;
+ _insert_into_list(&sem->sleeplist, &wait->slnode);
+ acquire_sched_lock();
+ release_spinlock(&sem->lock);
+ sched(alertable ? SLEEPING : DEEPSLEEPING);
+ acquire_spinlock(&sem->lock); // also the lock for waitdata
+ if (!wait->up) // wakeup by other sources
+ {
+ ASSERT(++sem->val <= 0);
+ _detach_from_list(&wait->slnode);
+ }
+ release_spinlock(&sem->lock);
+ bool ret = wait->up;
+ kfree(wait);
+ return ret;
+}
+
+void _post_sem(Semaphore *sem)
+{
+ if (++sem->val <= 0) {
+ ASSERT(!_empty_list(&sem->sleeplist));
+ auto wait = container_of(sem->sleeplist.prev, WaitData, slnode);
+ wait->up = true;
+ _detach_from_list(&wait->slnode);
+ activate_proc(wait->proc);
+ }
+}
\ No newline at end of file
diff --git a/src/common/sem.h b/src/common/sem.h
new file mode 100755
index 0000000..656376d
--- /dev/null
+++ b/src/common/sem.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include
+
+struct Proc;
+
+typedef struct {
+ bool up;
+ struct Proc *proc;
+ ListNode slnode;
+} WaitData;
+
+typedef struct {
+ SpinLock lock;
+ int val;
+ ListNode sleeplist;
+} Semaphore;
+
+void init_sem(Semaphore *, int val);
+void _post_sem(Semaphore *);
+WARN_RESULT bool _wait_sem(Semaphore *, bool alertable);
+bool _get_sem(Semaphore *);
+WARN_RESULT int _query_sem(Semaphore *);
+void _lock_sem(Semaphore *);
+void _unlock_sem(Semaphore *);
+int get_all_sem(Semaphore *);
+int post_all_sem(Semaphore *);
+#define wait_sem(sem) (_lock_sem(sem), _wait_sem(sem, true))
+#define unalertable_wait_sem(sem) \
+ ASSERT((_lock_sem(sem), _wait_sem(sem, false)))
+#define post_sem(sem) (_lock_sem(sem), _post_sem(sem), _unlock_sem(sem))
+#define get_sem(sem) \
+ ({ \
+ _lock_sem(sem); \
+ bool __ret = _get_sem(sem); \
+ _unlock_sem(sem); \
+ __ret; \
+ })
+
+#define SleepLock Semaphore
+#define init_sleeplock(lock) init_sem(lock, 1)
+#define acquire_sleeplock(lock) wait_sem(lock)
+#define unalertable_acquire_sleeplock(lock) unalertable_wait_sem(lock)
+#define release_sleeplock(lock) post_sem(lock)
diff --git a/src/common/spinlock.c b/src/common/spinlock.c
new file mode 100644
index 0000000..1bbb362
--- /dev/null
+++ b/src/common/spinlock.c
@@ -0,0 +1,28 @@
+#include
+#include
+
+void init_spinlock(SpinLock *lock)
+{
+ lock->locked = 0;
+}
+
+bool try_acquire_spinlock(SpinLock *lock)
+{
+ if (!lock->locked &&
+ !__atomic_test_and_set(&lock->locked, __ATOMIC_ACQUIRE)) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+void acquire_spinlock(SpinLock *lock)
+{
+ while (!try_acquire_spinlock(lock))
+ arch_yield();
+}
+
+void release_spinlock(SpinLock *lock)
+{
+ __atomic_clear(&lock->locked, __ATOMIC_RELEASE);
+}
diff --git a/src/common/spinlock.h b/src/common/spinlock.h
new file mode 100755
index 0000000..d8c265a
--- /dev/null
+++ b/src/common/spinlock.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include
+#include
+
+typedef struct {
+ volatile bool locked;
+} SpinLock;
+
+void init_spinlock(SpinLock *);
+WARN_RESULT bool try_acquire_spinlock(SpinLock *);
+void acquire_spinlock(SpinLock *);
+void release_spinlock(SpinLock *);
diff --git a/src/common/string.c b/src/common/string.c
new file mode 100644
index 0000000..8b5d906
--- /dev/null
+++ b/src/common/string.c
@@ -0,0 +1,93 @@
+#include
+
+void *memset(void *s, int c, usize n)
+{
+ for (usize i = 0; i < n; i++)
+ ((u8 *)s)[i] = (u8)(c & 0xff);
+
+ return s;
+}
+
+void *memcpy(void *restrict dest, const void *restrict src, usize n)
+{
+ for (usize i = 0; i < n; i++)
+ ((u8 *)dest)[i] = ((u8 *)src)[i];
+
+ return dest;
+}
+
+int memcmp(const void *s1, const void *s2, usize n)
+{
+ for (usize i = 0; i < n; i++) {
+ int c1 = ((u8 *)s1)[i];
+ int c2 = ((u8 *)s2)[i];
+
+ if (c1 != c2)
+ return c1 - c2;
+ }
+
+ return 0;
+}
+
+void *memmove(void *dest, const void *src, usize n)
+{
+ const char *s = (const char *)src;
+ char *d = (char *)dest;
+
+ if (s < d && (usize)(d - s) < n) {
+ s += n;
+ d += n;
+ while (n-- > 0) {
+ *--d = *--s;
+ }
+ } else {
+ while (n-- > 0) {
+ *d++ = *s++;
+ }
+ }
+
+ return dest;
+}
+
+char *strncpy(char *restrict dest, const char *restrict src, usize n)
+{
+ usize i = 0;
+ for (; i < n && src[i] != '\0'; i++)
+ dest[i] = src[i];
+ for (; i < n; i++)
+ dest[i] = '\0';
+
+ return dest;
+}
+
+char *strncpy_fast(char *restrict dest, const char *restrict src, usize n)
+{
+ usize i = 0;
+ for (; i < n && src[i] != '\0'; i++)
+ dest[i] = src[i];
+ if (i < n)
+ dest[i] = '\0';
+
+ return dest;
+}
+
+int strncmp(const char *s1, const char *s2, usize n)
+{
+ for (usize i = 0; i < n; i++) {
+ if (s1[i] != s2[i])
+ return s1[i] - s2[i];
+ if (s1[i] == '\0' || s2[i] == '\0')
+ break;
+ }
+
+ return 0;
+}
+
+usize strlen(const char *s)
+{
+ usize i = 0;
+ while (s[i] != '\0')
+ i++;
+
+ return i;
+}
diff --git a/src/common/string.h b/src/common/string.h
new file mode 100755
index 0000000..6248d79
--- /dev/null
+++ b/src/common/string.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include
+
+void *memset(void *s, int c, usize n);
+void *memcpy(void *restrict dest, const void *restrict src, usize n);
+WARN_RESULT int memcmp(const void *s1, const void *s2, usize n);
+
+/**
+ * Note that memmove does not allocate extra memory and handles overlapped memory
+ * regions correctly, but it does not take side effects into consideration
+ * (e.g. two virtual memory regions mapped to the same physical memory region).
+ */
+void *memmove(void *dest, const void *src, usize n);
+
+/**
+ * Note that for string functions, please specify `n` explicitly.
+ * strncpy will `dest` with zeroes if the length of `src` is less than `n`.
+ * strncpy_fast will not do that.
+ */
+char *strncpy(char *restrict dest, const char *restrict src, usize n);
+char *strncpy_fast(char *restrict dest, const char *restrict src, usize n);
+
+WARN_RESULT int strncmp(const char *s1, const char *s2, usize n);
+WARN_RESULT usize strlen(const char *s);
diff --git a/src/common/variadic.h b/src/common/variadic.h
new file mode 100644
index 0000000..751ceba
--- /dev/null
+++ b/src/common/variadic.h
@@ -0,0 +1,7 @@
+#pragma once
+
+typedef __builtin_va_list va_list;
+
+#define va_start(ap, param) __builtin_va_start(ap, param)
+#define va_end(ap) __builtin_va_end(ap)
+#define va_arg(ap, type) __builtin_va_arg(ap, type)
diff --git a/src/driver/CMakeLists.txt b/src/driver/CMakeLists.txt
new file mode 100644
index 0000000..ec32eef
--- /dev/null
+++ b/src/driver/CMakeLists.txt
@@ -0,0 +1,3 @@
+file(GLOB driver_sources CONFIGURE_DEPENDS "*.c")
+
+add_library(driver STATIC ${driver_sources})
diff --git a/src/driver/aux.h b/src/driver/aux.h
new file mode 100644
index 0000000..5c58a98
--- /dev/null
+++ b/src/driver/aux.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include
+
+#define AUX_BASE (MMIO_BASE + 0x215000)
+
+#define AUX_ENABLES (AUX_BASE + 0x04)
+#define AUX_MU_IO_REG (AUX_BASE + 0x40)
+#define AUX_MU_IER_REG (AUX_BASE + 0x44)
+#define AUX_MU_IIR_REG (AUX_BASE + 0x48)
+#define AUX_MU_LCR_REG (AUX_BASE + 0x4C)
+#define AUX_MU_MCR_REG (AUX_BASE + 0x50)
+#define AUX_MU_LSR_REG (AUX_BASE + 0x54)
+#define AUX_MU_MSR_REG (AUX_BASE + 0x58)
+#define AUX_MU_SCRATCH (AUX_BASE + 0x5C)
+#define AUX_MU_CNTL_REG (AUX_BASE + 0x60)
+#define AUX_MU_STAT_REG (AUX_BASE + 0x64)
+#define AUX_MU_BAUD_REG (AUX_BASE + 0x68)
+
+#define AUX_UART_CLOCK 250000000
+
+#define AUX_MU_BAUD(baudrate) ((AUX_UART_CLOCK / ((baudrate) * 8)) - 1)
diff --git a/src/driver/base.h b/src/driver/base.h
new file mode 100755
index 0000000..214cd83
--- /dev/null
+++ b/src/driver/base.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#define KERNEL_BASE 0xFFFF000000000000
+#define MMIO_BASE (KERNEL_BASE + 0xA000000)
+#define LOCAL_BASE (KERNEL_BASE + 0x40000000)
+
+#define V2P(v) ((u64)(v) - KERNEL_BASE)
+#define P2V(p) ((u64)(p) + KERNEL_BASE)
+
+#define PUARTBASE 0x9000000
+#define UARTBASE P2V(PUARTBASE)
+
+#define PGICBASE 0x08000000
+#define GICBASE P2V(PGICBASE)
+
+#define PVIRTIO0 0x0A000000
+#define VIRTIO0 P2V(PVIRTIO0)
\ No newline at end of file
diff --git a/src/driver/clock.c b/src/driver/clock.c
new file mode 100755
index 0000000..c8325d5
--- /dev/null
+++ b/src/driver/clock.c
@@ -0,0 +1,43 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+static struct {
+ ClockHandler handler;
+} clock;
+
+void init_clock()
+{
+ // reserve one second for the first time.
+ enable_timer();
+ reset_clock(10);
+}
+
+void reset_clock(u64 interval_ms)
+{
+ u64 interval_clk = interval_ms * get_clock_frequency() / 1000;
+ ASSERT(interval_clk <= 0x7fffffff);
+ set_cntv_tval_el0(interval_clk);
+}
+
+void set_clock_handler(ClockHandler handler)
+{
+ clock.handler = handler;
+ set_interrupt_handler(TIMER_IRQ, invoke_clock_handler);
+}
+
+void invoke_clock_handler()
+{
+ if (!clock.handler)
+ PANIC();
+ clock.handler();
+}
+
+u64 get_timestamp_ms()
+{
+ return get_timestamp() * 1000 / get_clock_frequency();
+}
diff --git a/src/driver/clock.h b/src/driver/clock.h
new file mode 100755
index 0000000..677f232
--- /dev/null
+++ b/src/driver/clock.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include
+
+typedef void (*ClockHandler)(void);
+
+WARN_RESULT u64 get_timestamp_ms();
+void init_clock();
+void reset_clock(u64 interval_ms);
+void set_clock_handler(ClockHandler handler);
+void invoke_clock_handler();
diff --git a/src/driver/gicv3.c b/src/driver/gicv3.c
new file mode 100644
index 0000000..aa2be84
--- /dev/null
+++ b/src/driver/gicv3.c
@@ -0,0 +1,287 @@
+#include
+#include
+#include
+#include
+#include
+
+#define GICD_CTLR (0x0)
+#define GICD_TYPER (0x4)
+#define GICD_IGROUPR(n) (0x80 + (u64)(n) * 4)
+#define GICD_ISENABLER(n) (0x100 + (u64)(n) * 4)
+#define GICD_ICENABLER(n) (0x180 + (u64)(n) * 4)
+#define GICD_ISPENDR(n) (0x200 + (u64)(n) * 4)
+#define GICD_ICPENDR(n) (0x280 + (u64)(n) * 4)
+#define GICD_IPRIORITYR(n) (0x400 + (u64)(n) * 4)
+#define GICD_ITARGETSR(n) (0x800 + (u64)(n) * 4)
+#define GICD_ICFGR(n) (0xc00 + (u64)(n) * 4)
+
+#define GICC_CTLR (0x0)
+#define GICC_PMR (0x4)
+#define GICC_IAR (0xc)
+#define GICC_EOIR (0x10)
+#define GICC_HPPIR (0x18)
+#define GICC_AIAR (0x20)
+#define GICC_AEOIR (0x24)
+
+#define GICR_CTLR (0x0)
+#define GICR_WAKER (0x14)
+
+#define SGI_BASE 0x10000
+#define GICR_IGROUPR0 (SGI_BASE + 0x80)
+#define GICR_ISENABLER0 (SGI_BASE + 0x100)
+#define GICR_ICENABLER0 (SGI_BASE + 0x180)
+#define GICR_ICPENDR0 (SGI_BASE + 0x280)
+#define GICR_IPRIORITYR(n) (SGI_BASE + 0x400 + (n) * 4)
+#define GICR_ICFGR0 (SGI_BASE + 0xc00)
+#define GICR_ICFGR1 (SGI_BASE + 0xc04)
+#define GICR_IGRPMODR0 (SGI_BASE + 0xd00)
+
+static bool is_sgi_ppi(u32 id);
+
+static inline u32 icc_igrpen1_el1()
+{
+ u32 x;
+ asm volatile("mrs %0, S3_0_C12_C12_7" : "=r"(x));
+ return x;
+}
+
+static inline void w_icc_igrpen1_el1(u32 x)
+{
+ asm volatile("msr S3_0_C12_C12_7, %0" : : "r"(x));
+}
+
+static inline u32 icc_pmr_el1()
+{
+ u32 x;
+ asm volatile("mrs %0, S3_0_C4_C6_0" : "=r"(x));
+ return x;
+}
+
+static inline void w_icc_pmr_el1(u32 x)
+{
+ asm volatile("msr S3_0_C4_C6_0, %0" : : "r"(x));
+}
+
+static inline u32 icc_iar1_el1()
+{
+ u32 x;
+ asm volatile("mrs %0, S3_0_C12_C12_0" : "=r"(x));
+ return x;
+}
+
+static inline void w_icc_eoir1_el1(u32 x)
+{
+ asm volatile("msr S3_0_C12_C12_1, %0" : : "r"(x));
+}
+
+static inline u32 icc_sre_el1()
+{
+ u32 x;
+ asm volatile("mrs %0, S3_0_C12_C12_5" : "=r"(x));
+ return x;
+}
+
+static inline void w_icc_sre_el1(u32 x)
+{
+ asm volatile("msr S3_0_C12_C12_5, %0" : : "r"(x));
+}
+
+static struct {
+ char *gicd;
+ char *rdist_addrs[NCPU];
+} gicv3;
+
+static void wd32(u32 off, u32 val)
+{
+ *(volatile u32 *)(gicv3.gicd + off) = val;
+}
+
+static u32 rd32(u32 off)
+{
+ return *(volatile u32 *)(gicv3.gicd + off);
+}
+
+static void wr32(u32 cpuid, u32 off, u32 val)
+{
+ *(volatile u32 *)(gicv3.rdist_addrs[cpuid] + off) = val;
+}
+
+static u32 rr32(u32 cpuid, u32 off)
+{
+ return *(volatile u32 *)(gicv3.rdist_addrs[cpuid] + off);
+}
+
+static void gic_enable_int(u32 intid)
+{
+ u32 is = rd32(GICD_ISENABLER(intid / 32));
+ is |= 1 << (intid % 32);
+ wd32(GICD_ISENABLER(intid / 32), is);
+}
+
+static void gicr_enable_int(u32 cpuid, u32 intid)
+{
+ if (!is_sgi_ppi(intid)) {
+ PANIC();
+ }
+
+ u32 is = rr32(cpuid, GICR_ISENABLER0);
+ is |= 1 << (intid % 32);
+ wr32(cpuid, GICR_ISENABLER0, is);
+}
+
+static void gic_clear_pending(u32 intid)
+{
+ u32 ic = rd32(GICD_ICPENDR(intid / 32));
+ ic |= 1 << (intid % 32);
+ wd32(GICD_ICPENDR(intid / 32), ic);
+}
+
+static void gicr_clear_pending(u32 cpuid, u32 intid)
+{
+ if (!is_sgi_ppi(intid)) {
+ PANIC();
+ }
+
+ u32 ic = rr32(cpuid, GICR_ICPENDR0);
+ ic |= 1 << (intid % 32);
+ wr32(cpuid, GICR_ICPENDR0, ic);
+}
+
+static void gic_set_prio(u32 intid, u32 prio)
+{
+ (void)prio;
+ u32 p = rd32(GICD_IPRIORITYR(intid / 4));
+ p &= ~((u32)0xff << (intid % 4 * 8)); // set prio 0
+ wd32(GICD_IPRIORITYR(intid / 4), p);
+}
+
+static void gicr_set_prio(u32 cpuid, u32 intid, u32 prio)
+{
+ (void)prio;
+ if (!is_sgi_ppi(intid)) {
+ PANIC();
+ }
+
+ u32 p = rr32(cpuid, GICR_IPRIORITYR(intid / 4));
+ p &= ~((u32)0xff << (intid % 4 * 8)); // set prio 0
+ wr32(cpuid, GICR_IPRIORITYR(intid / 4), p);
+}
+
+static void gic_set_target(u32 intid, u32 cpuid)
+{
+ u32 itargetsr = rd32(GICD_ITARGETSR(intid / 4));
+ itargetsr &= ~((u32)0xff << (intid % 4 * 8));
+ wd32(GICD_ITARGETSR(intid / 4),
+ itargetsr | ((u32)(1 << cpuid) << (intid % 4 * 8)));
+}
+
+/*static void gicr_wait_rwp(u32 cpuid)
+{
+ u32 ctlr = rr32(cpuid, GICR_CTLR);
+ while ((ctlr >> 3) & 1) // RWP
+ ;
+}*/
+
+void gic_setup_ppi(u32 cpu, u32 intid, int prio)
+{
+ gicr_set_prio(cpu, intid, prio);
+ gicr_clear_pending(cpu, intid);
+ gicr_enable_int(cpu, intid);
+}
+
+void gic_setup_spi(u32 intid, int prio)
+{
+ gic_set_prio(intid, prio);
+ gic_set_target(intid, 0);
+ gic_clear_pending(intid);
+ gic_enable_int(intid);
+}
+
+static void gic_cpu_init()
+{
+ w_icc_igrpen1_el1(0);
+
+ w_icc_pmr_el1(0xff);
+}
+
+static void gic_dist_init()
+{
+ wd32(GICD_CTLR, 0);
+
+ for (int i = 0; i < 32; i++)
+ wd32(GICD_IGROUPR(i), ~0);
+}
+
+static void gic_redist_init(u32 cpuid)
+{
+ wr32(cpuid, GICR_CTLR, 0);
+
+ w_icc_sre_el1(icc_sre_el1() | 1);
+
+ /* Non-secure Group1 */
+ wr32(cpuid, GICR_IGROUPR0, ~0);
+ wr32(cpuid, GICR_IGRPMODR0, 0);
+
+ wr32(cpuid, GICR_ICFGR1, 0);
+
+ /* enable redist */
+ u32 waker = rr32(cpuid, GICR_WAKER);
+ wr32(cpuid, GICR_WAKER, waker & ~(1 << 1));
+ while (rr32(cpuid, GICR_WAKER) & (1 << 2))
+ ;
+}
+
+static void gic_enable()
+{
+ /* enable Group0/Non-secure Group1 */
+ wd32(GICD_CTLR, 3);
+
+ w_icc_igrpen1_el1(1);
+}
+
+void gicv3_init_percpu()
+{
+ u32 cpu = cpuid();
+
+ gic_cpu_init();
+ gic_dist_init();
+ gic_redist_init(cpu);
+
+ gic_setup_ppi(cpuid(), TIMER_IRQ, 0);
+
+ gic_enable();
+}
+
+void gicv3_init()
+{
+ gicv3.gicd = (char *)GICBASE;
+ for (int i = 0; i < NCPU; i++) {
+ gicv3.rdist_addrs[i] = (char *)(GICBASE + 0xa0000 + (i) * 0x20000);
+ }
+
+ gic_setup_spi(UART_IRQ, 0);
+ gic_setup_spi(VIRTIO_BLK_IRQ, 0);
+}
+
+bool gic_enabled()
+{
+ return (icc_igrpen1_el1() & 0x1) && (rd32(GICD_CTLR) & 0x1);
+}
+
+u32 gic_iar()
+{
+ return icc_iar1_el1();
+}
+
+void gic_eoi(u32 iar)
+{
+ w_icc_eoir1_el1(iar);
+}
+
+static bool is_sgi_ppi(u32 id)
+{
+ if (id < 32)
+ return true;
+ else
+ return false;
+}
\ No newline at end of file
diff --git a/src/driver/gicv3.h b/src/driver/gicv3.h
new file mode 100644
index 0000000..fd41d42
--- /dev/null
+++ b/src/driver/gicv3.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include
+
+void gicv3_init(void);
+void gicv3_init_percpu(void);
+void gic_eoi(u32 iar);
+u32 gic_iar(void);
+bool gic_enabled(void);
diff --git a/src/driver/gpio.h b/src/driver/gpio.h
new file mode 100644
index 0000000..c1ede8b
--- /dev/null
+++ b/src/driver/gpio.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include
+
+#define GPIO_BASE (MMIO_BASE + 0x200000)
+
+#define GPFSEL0 (GPIO_BASE + 0x00)
+#define GPFSEL1 (GPIO_BASE + 0x04)
+#define GPFSEL2 (GPIO_BASE + 0x08)
+#define GPFSEL3 (GPIO_BASE + 0x0C)
+#define GPFSEL4 (GPIO_BASE + 0x10)
+#define GPFSEL5 (GPIO_BASE + 0x14)
+#define GPSET0 (GPIO_BASE + 0x1C)
+#define GPSET1 (GPIO_BASE + 0x20)
+#define GPCLR0 (GPIO_BASE + 0x28)
+#define GPLEV0 (GPIO_BASE + 0x34)
+#define GPLEV1 (GPIO_BASE + 0x38)
+#define GPEDS0 (GPIO_BASE + 0x40)
+#define GPEDS1 (GPIO_BASE + 0x44)
+#define GPHEN0 (GPIO_BASE + 0x64)
+#define GPHEN1 (GPIO_BASE + 0x68)
+#define GPPUD (GPIO_BASE + 0x94)
+#define GPPUDCLK0 (GPIO_BASE + 0x98)
+#define GPPUDCLK1 (GPIO_BASE + 0x9C)
diff --git a/src/driver/interrupt.c b/src/driver/interrupt.c
new file mode 100644
index 0000000..4b1f2fd
--- /dev/null
+++ b/src/driver/interrupt.c
@@ -0,0 +1,45 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+static InterruptHandler int_handler[NUM_IRQ_TYPES];
+
+static void default_handler(u32 intid)
+{
+ printk("\033[1;31m[Error CPU %lld]: Interrupt %d not implemented.\033[0m\n", cpuid(), intid);
+ PANIC();
+}
+
+void init_interrupt()
+{
+ for (usize i = 0; i < NUM_IRQ_TYPES; i++) {
+ int_handler[i] = default_handler;
+ }
+}
+
+void set_interrupt_handler(InterruptType type, InterruptHandler handler)
+{
+ int_handler[type] = handler;
+}
+
+void interrupt_global_handler()
+{
+ //printk("[Interrupt] Interrupt occurred on CPU %lld.\n", cpuid());
+ u32 iar = gic_iar();
+ u32 intid = iar & 0x3ff;
+
+ if (intid == 1023) {
+
+ printk("\033[1;31m[Warning]: Spurious Interrupt.\033[0m\n");
+ return;
+ }
+
+ gic_eoi(iar);
+
+ if (int_handler[intid])
+ int_handler[intid](intid);
+}
diff --git a/src/driver/interrupt.h b/src/driver/interrupt.h
new file mode 100644
index 0000000..3372154
--- /dev/null
+++ b/src/driver/interrupt.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#define NUM_IRQ_TYPES 64
+
+typedef enum {
+ TIMER_IRQ = 27,
+ UART_IRQ = 33,
+ VIRTIO_BLK_IRQ = 48
+} InterruptType;
+
+typedef void (*InterruptHandler)();
+
+void init_interrupt();
+void interrupt_global_handler();
+void set_interrupt_handler(InterruptType type, InterruptHandler handler);
diff --git a/src/driver/irq.h b/src/driver/irq.h
new file mode 100644
index 0000000..532dc67
--- /dev/null
+++ b/src/driver/irq.h
@@ -0,0 +1,42 @@
+#include
+
+#define IRQ_BASIC_PENDING (MMIO_BASE + 0xB200)
+#define IRQ_PENDING_1 (MMIO_BASE + 0xB204)
+#define IRQ_PENDING_2 (MMIO_BASE + 0xB208)
+#define FIQ_CONTROL (MMIO_BASE + 0xB20C)
+#define ENABLE_IRQS_1 (MMIO_BASE + 0xB210)
+#define ENABLE_IRQS_2 (MMIO_BASE + 0xB214)
+#define ENABLE_BASIC_IRQS (MMIO_BASE + 0xB218)
+#define DISABLE_IRQS_1 (MMIO_BASE + 0xB21C)
+#define DISABLE_IRQS_2 (MMIO_BASE + 0xB220)
+#define DISABLE_BASIC_IRQS (MMIO_BASE + 0xB224)
+
+#define AUX_INT (1 << 29)
+#define VC_ARASANSDIO_INT (1 << 30)
+
+/* ARM Local Peripherals */
+#define GPU_INT_ROUTE (LOCAL_BASE + 0xC)
+#define GPU_IRQ2CORE(i) (i)
+
+#define IRQ_SRC_CORE(i) (LOCAL_BASE + 0x60 + 4 * (i))
+#define IRQ_SRC_TIMER (1 << 11) /* Local Timer */
+#define IRQ_SRC_GPU (1 << 8)
+#define IRQ_SRC_CNTPNSIRQ (1 << 1) /* Core Timer */
+#define FIQ_SRC_CORE(i) (LOCAL_BASE + 0x70 + 4 * (i))
+
+/* Local timer */
+#define TIMER_ROUTE (LOCAL_BASE + 0x24)
+#define TIMER_IRQ2CORE(i) (i)
+
+#define TIMER_CTRL (LOCAL_BASE + 0x34)
+#define TIMER_INTENA (1 << 29)
+#define TIMER_ENABLE (1 << 28)
+#define TIMER_RELOAD_SEC (38400000) /* 2 * 19.2 MHz */
+
+#define TIMER_CLR (LOCAL_BASE + 0x38)
+#define TIMER_CLR_INT (1 << 31)
+#define TIMER_RELOAD (1 << 30)
+
+/* Core Timer */
+#define CORE_TIMER_CTRL(i) (LOCAL_BASE + 0x40 + 4 * (i))
+#define CORE_TIMER_ENABLE (1 << 1) /* CNTPNSIRQ */
diff --git a/src/driver/memlayout.h b/src/driver/memlayout.h
new file mode 100644
index 0000000..00482e9
--- /dev/null
+++ b/src/driver/memlayout.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#define EXTMEM 0x40000000
+#define PHYSTOP 0x80000000
+
+#define KSPACE_MASK 0xFFFF000000000000
+#define KERNLINK (KSPACE_MASK + EXTMEM) /* Address where kernel is linked */
+
+#define K2P_WO(x) ((x) - (KSPACE_MASK)) /* Same as V2P, but without casts */
+#define P2K_WO(x) ((x) + (KSPACE_MASK)) /* Same as P2V, but without casts */
diff --git a/src/driver/timer.c b/src/driver/timer.c
new file mode 100644
index 0000000..31556b8
--- /dev/null
+++ b/src/driver/timer.c
@@ -0,0 +1,38 @@
+#include
+#include
+#include
+#include
+
+#define CNTV_CTL_ENABLE (1 << 0)
+#define CNTV_CTL_IMASK (1 << 1)
+#define CNTV_CTL_ISTATUS (1 << 2)
+
+void enable_timer()
+{
+ u64 c = get_cntv_ctl_el0();
+ c |= CNTV_CTL_ENABLE;
+ c &= ~CNTV_CTL_IMASK;
+ set_cntv_ctl_el0(c);
+}
+
+void disable_timer()
+{
+ u64 c = get_cntv_ctl_el0();
+ c &= ~CNTV_CTL_ENABLE;
+ c |= CNTV_CTL_IMASK;
+ set_cntv_ctl_el0(c);
+}
+
+bool timer_enabled()
+{
+ u64 c = get_cntv_ctl_el0();
+ return c & 1;
+}
+
+void reload_timer(u64 interval_ms)
+{
+ u64 interval_us = interval_ms * 1000;
+ u64 interval_clk = interval_us * (get_clock_frequency() / 1000000);
+
+ set_cntv_tval_el0(interval_clk);
+}
diff --git a/src/driver/timer.h b/src/driver/timer.h
new file mode 100644
index 0000000..395bed1
--- /dev/null
+++ b/src/driver/timer.h
@@ -0,0 +1,4 @@
+#include
+
+void enable_timer();
+void disable_timer();
\ No newline at end of file
diff --git a/src/driver/uart.c b/src/driver/uart.c
new file mode 100644
index 0000000..18adfe6
--- /dev/null
+++ b/src/driver/uart.c
@@ -0,0 +1,37 @@
+#include
+#include
+#include
+#include
+#include
+
+static void uartintr()
+{
+ device_put_u32(UART_ICR, 1 << 4 | 1 << 5);
+}
+
+void uart_init()
+{
+ device_put_u32(UART_CR, 0);
+ set_interrupt_handler(UART_IRQ, uartintr);
+ device_put_u32(UART_LCRH, LCRH_FEN | LCRH_WLEN_8BIT);
+ device_put_u32(UART_CR, 0x301);
+ device_put_u32(UART_IMSC, 0);
+ delay_us(5);
+ device_put_u32(UART_IMSC, 1 << 4 | 1 << 5);
+}
+
+char uart_get_char()
+{
+ if (device_get_u32(UART_FR) & FR_RXFE)
+ return -1;
+ return device_get_u32(UART_DR);
+}
+
+void uart_put_char(char c)
+{
+ while (device_get_u32(UART_FR) & FR_TXFF)
+ ;
+ device_put_u32(UART_DR, c);
+}
+
+__attribute__((weak, alias("uart_put_char"))) void putch(char);
diff --git a/src/driver/uart.h b/src/driver/uart.h
new file mode 100644
index 0000000..c923c23
--- /dev/null
+++ b/src/driver/uart.h
@@ -0,0 +1,24 @@
+#pragma once
+#include
+
+#define UART_DR (UARTBASE + 0x00)
+#define UART_FR (UARTBASE + 0x18)
+#define FR_RXFE (1 << 4) // Recieve fifo empty
+#define FR_TXFF (1 << 5) // Transmit fifo full
+#define FR_RXFF (1 << 6) // Recieve fifo full
+#define FR_TXFE (1 << 7) // Transmit fifo empty
+#define RXFE (device_get_u32(UART_FR) & FR_RXFE)
+#define TXFF (device_get_u32(UART_FR) & FR_TXFF)
+#define RXFF (device_get_u32(UART_FR) & FR_RXFF)
+#define TXFE (device_get_u32(UART_FR) & FR_TXFE)
+#define UART_IBRD (UARTBASE + 0x24)
+#define UART_FBRD (UARTBASE + 0x28)
+#define UART_LCRH (UARTBASE + 0x2c)
+#define LCRH_FEN (1 << 4)
+#define LCRH_WLEN_8BIT (3 << 5)
+#define UART_CR (UARTBASE + 0x30)
+#define UART_IMSC (UARTBASE + 0x38)
+#define UART_ICR (UARTBASE + 0x44)
+
+void uart_init();
+void uart_put_char(char c);
\ No newline at end of file
diff --git a/src/driver/virtio.h b/src/driver/virtio.h
new file mode 100755
index 0000000..65fa102
--- /dev/null
+++ b/src/driver/virtio.h
@@ -0,0 +1,140 @@
+#pragma once
+
+#include
+#include
+#include
+
+#define NQUEUE 8
+
+#define VIRTIO_REG_MAGICVALUE (VIRTIO0 + 0x00)
+#define VIRTIO_REG_VERSION (VIRTIO0 + 0x04)
+#define VIRTIO_REG_DEVICE_ID (VIRTIO0 + 0x08)
+#define VIRTIO_REG_VENDOR_ID (VIRTIO0 + 0x0c)
+#define VIRTIO_REG_DEVICE_FEATURES (VIRTIO0 + 0x10)
+#define VIRTIO_REG_DEVICE_FEATURES_SEL (VIRTIO0 + 0x14)
+#define VIRTIO_REG_DRIVER_FEATURES (VIRTIO0 + 0x20)
+#define VIRTIO_REG_DRIVER_FEATURES_SEL (VIRTIO0 + 0x24)
+#define VIRTIO_REG_QUEUE_SEL (VIRTIO0 + 0x30)
+#define VIRTIO_REG_QUEUE_NUM_MAX (VIRTIO0 + 0x34)
+#define VIRTIO_REG_QUEUE_NUM (VIRTIO0 + 0x38)
+#define VIRTIO_REG_QUEUE_READY (VIRTIO0 + 0x44)
+#define VIRTIO_REG_QUEUE_NOTIFY (VIRTIO0 + 0x50)
+#define VIRTIO_REG_INTERRUPT_STATUS (VIRTIO0 + 0x60)
+#define VIRTIO_REG_INTERRUPT_ACK (VIRTIO0 + 0x64)
+#define VIRTIO_REG_STATUS (VIRTIO0 + 0x70)
+#define VIRTIO_REG_QUEUE_DESC_LOW (VIRTIO0 + 0x80)
+#define VIRTIO_REG_QUEUE_DESC_HIGH (VIRTIO0 + 0x84)
+#define VIRTIO_REG_QUEUE_DRIVER_LOW (VIRTIO0 + 0x90)
+#define VIRTIO_REG_QUEUE_DRIVER_HIGH (VIRTIO0 + 0x94)
+#define VIRTIO_REG_QUEUE_DEVICE_LOW (VIRTIO0 + 0xa0)
+#define VIRTIO_REG_QUEUE_DEVICE_HIGH (VIRTIO0 + 0xa4)
+#define VIRTIO_REG_CONFIG_GENERATION (VIRTIO0 + 0xfc)
+#define VIRTIO_REG_CONFIG (VIRTIO0 + 0x100)
+
+#define DEV_STATUS_ACKNOWLEDGE 1
+#define DEV_STATUS_DRIVER 2
+#define DEV_STATUS_FAILED 128
+#define DEV_STATUS_FEATURES_OK 8
+#define DEV_STATUS_DRIVER_OK 4
+#define DEV_STATUS_NEEDS_RESET 64
+
+#define VIRTIO_BLK_F_SIZE_MAX 1
+#define VIRTIO_BLK_F_SEG_MAX 2
+#define VIRTIO_BLK_F_GEOMETRY 4
+#define VIRTIO_BLK_F_RO 5
+#define VIRTIO_BLK_F_BLK_SIZE 6
+#define VIRTIO_BLK_F_FLUSH 9
+#define VIRTIO_BLK_F_TOPOLOGY 10
+#define VIRTIO_BLK_F_CONFIG_WCE 11
+#define VIRTIO_BLK_F_DISCARD 13
+#define VIRTIO_BLK_F_WRITE_ZEROES 14
+#define VIRTIO_F_ANY_LAYOUT 27
+#define VIRTIO_RING_F_INDIRECT_DESC 28
+#define VIRTIO_RING_F_EVENT_IDX 29
+
+#define VIRTIO_BLK_S_OK 0
+#define VIRTIO_BLK_S_IOERR 1
+#define VIRTIO_BLK_S_UNSUPP 2
+
+#define VIRTQ_DESC_F_NEXT 1
+#define VIRTQ_DESC_F_WRITE 2
+#define VIRTQ_DESC_F_INDIRECT 4
+struct virtq_desc {
+ u64 addr;
+ u32 len;
+ u16 flags;
+ u16 next;
+} __attribute__((packed, aligned(16)));
+
+#define VIRTQ_AVAIL_F_NO_INTERRUPT 1
+struct virtq_avail {
+ u16 flags;
+ u16 idx;
+ u16 ring[NQUEUE];
+} __attribute__((packed, aligned(2)));
+
+struct virtq_used_elem {
+ u32 id;
+ u32 len;
+} __attribute__((packed));
+
+#define VIRTQ_USED_F_NO_NOTIFY 1
+struct virtq_used {
+ u16 flags;
+ u16 idx;
+ struct virtq_used_elem ring[NQUEUE];
+} __attribute__((packed, aligned(4)));
+
+struct virtq {
+ struct virtq_desc *desc;
+ struct virtq_avail *avail;
+ struct virtq_used *used;
+ u16 free_head;
+ u16 nfree;
+ u16 last_used_idx;
+
+ struct {
+ volatile u8 status;
+ volatile u8 done;
+ u8 *buf;
+ } info[NQUEUE];
+};
+
+#define VIRTIO_BLK_T_IN 0
+#define VIRTIO_BLK_T_OUT 1
+#define VIRTIO_BLK_T_FLUSH 4
+#define VIRTIO_BLK_T_DISCARD 11
+#define VIRTIO_BLK_T_WRITE_ZEROES 13
+struct virtio_blk_req_hdr {
+ u32 type;
+ u32 reserved;
+ u64 sector;
+} __attribute__((packed));
+
+enum diskop {
+ DREAD,
+ DWRITE,
+};
+
+int virtio_blk_rw(Buf *b);
+void virtio_init(void);
+
+
+typedef struct __attribute__((packed)) {
+ u8 boot_indicator; // 0x00
+ u8 chs_first[3]; // 0x01
+ u8 partition_type; // 0x04
+ u8 chs_last[3]; // 0x05
+ u32 lba_start; // 0x08 LBA of first absolute sector in the partition
+ u32 num_sectors; // 0x0C Number of sectors in partition
+} PartitionEntry;
+
+typedef struct __attribute__((packed)) {
+ u8 bootcode[446]; // 0x000
+ PartitionEntry pte[4]; // 0x1BE, 4 * 16B
+ u16 signature; // 0x1FE (0xAA55)
+} MBR;
+
+/* helpers to access partition 2 (index 1) */
+static inline u32 mbr_partition2_lba(const MBR *m) { return m->pte[1].lba_start; }
+static inline u32 mbr_partition2_sectors(const MBR *m) { return m->pte[1].num_sectors; }
\ No newline at end of file
diff --git a/src/driver/virtio_blk.c b/src/driver/virtio_blk.c
new file mode 100755
index 0000000..499c8a4
--- /dev/null
+++ b/src/driver/virtio_blk.c
@@ -0,0 +1,265 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#define VIRTIO_MAGIC 0x74726976
+
+struct disk {
+ SpinLock lk;
+ struct virtq virtq;
+} disk;
+
+static void desc_init(struct virtq *virtq)
+{
+ for (int i = 0; i < NQUEUE; i++) {
+ if (i != NQUEUE - 1) {
+ virtq->desc[i].flags = VIRTQ_DESC_F_NEXT;
+ virtq->desc[i].next = i + 1;
+ }
+ }
+}
+
+static int alloc_desc(struct virtq *virtq)
+{
+ if (virtq->nfree == 0) {
+ PANIC();
+ }
+
+ u16 d = virtq->free_head;
+ if (virtq->desc[d].flags & VIRTQ_DESC_F_NEXT)
+ virtq->free_head = virtq->desc[d].next;
+
+ virtq->nfree--;
+
+ return d;
+}
+
+static void free_desc(struct virtq *virtq, u16 n)
+{
+ u16 head = n;
+ int empty = 0;
+
+ if (virtq->nfree == 0)
+ empty = 1;
+
+ while (virtq->nfree++, (virtq->desc[n].flags & VIRTQ_DESC_F_NEXT)) {
+ n = virtq->desc[n].next;
+ }
+
+ virtq->desc[n].flags = VIRTQ_DESC_F_NEXT;
+ if (!empty)
+ virtq->desc[n].next = virtq->free_head;
+ virtq->free_head = head;
+}
+
+int virtio_blk_rw(Buf *b)
+{
+ enum diskop op = DREAD;
+ if (b->flags & B_DIRTY)
+ op = DWRITE;
+
+ init_sem(&b->sem, 0);
+
+ u64 sector = b->block_no;
+ struct virtio_blk_req_hdr hdr;
+
+ if (op == DREAD)
+ hdr.type = VIRTIO_BLK_T_IN;
+ else if (op == DWRITE)
+ hdr.type = VIRTIO_BLK_T_OUT;
+ else
+ return -1;
+ hdr.reserved = 0;
+ hdr.sector = sector;
+
+ acquire_spinlock(&disk.lk);
+
+ int d0 = alloc_desc(&disk.virtq);
+ if (d0 < 0)
+ return -1;
+ disk.virtq.desc[d0].addr = (u64)V2P(&hdr);
+ disk.virtq.desc[d0].len = sizeof(hdr);
+ disk.virtq.desc[d0].flags = VIRTQ_DESC_F_NEXT;
+
+ int d1 = alloc_desc(&disk.virtq);
+ if (d1 < 0)
+ return -1;
+ disk.virtq.desc[d0].next = d1;
+ disk.virtq.desc[d1].addr = (u64)V2P(b->data);
+ disk.virtq.desc[d1].len = 512;
+ disk.virtq.desc[d1].flags = VIRTQ_DESC_F_NEXT;
+ if (op == DREAD)
+ disk.virtq.desc[d1].flags |= VIRTQ_DESC_F_WRITE;
+
+ int d2 = alloc_desc(&disk.virtq);
+ if (d2 < 0)
+ return -1;
+ disk.virtq.desc[d1].next = d2;
+ disk.virtq.desc[d2].addr = (u64)V2P(&disk.virtq.info[d0].status);
+ disk.virtq.desc[d2].len = sizeof(disk.virtq.info[d0].status);
+ disk.virtq.desc[d2].flags = VIRTQ_DESC_F_WRITE;
+ disk.virtq.desc[d2].next = 0;
+
+ disk.virtq.avail->ring[disk.virtq.avail->idx % NQUEUE] = d0;
+ disk.virtq.avail->idx++;
+
+ disk.virtq.info[d0].buf = b->data;
+
+ arch_fence();
+ REG(VIRTIO_REG_QUEUE_NOTIFY) = 0;
+ arch_fence();
+
+ /* LAB 4 TODO 1 BEGIN */
+
+ release_spinlock(&disk.lk);
+ _lock_sem(&b->sem);
+ bool ok = _wait_sem(&b->sem, true);
+ if (!ok) {
+ PANIC();
+ }
+ acquire_spinlock(&disk.lk);
+
+ /* LAB 4 TODO 1 END */
+
+ disk.virtq.info[d0].done = 0;
+ free_desc(&disk.virtq, d0);
+ release_spinlock(&disk.lk);
+ return 0;
+}
+
+static void virtio_blk_intr()
+{
+ acquire_spinlock(&disk.lk);
+
+ u32 intr_status = REG(VIRTIO_REG_INTERRUPT_STATUS);
+ REG(VIRTIO_REG_INTERRUPT_ACK) = intr_status & 0x3;
+
+ int d0;
+ while (disk.virtq.last_used_idx != disk.virtq.used->idx) {
+ d0 = disk.virtq.used->ring[disk.virtq.last_used_idx % NQUEUE].id;
+ if (disk.virtq.info[d0].status != 0) {
+ PANIC();
+ }
+
+ /* LAB 4 TODO 2 BEGIN */
+ Buf *b = container_of((void*)disk.virtq.info[d0].buf, Buf, data);
+ if (b != NULL) {
+ post_sem(&b->sem);
+ }
+ /* LAB 4 TODO 2 END */
+
+ disk.virtq.info[d0].buf = NULL;
+ disk.virtq.last_used_idx++;
+ }
+
+ release_spinlock(&disk.lk);
+}
+
+static int virtq_init(struct virtq *vq)
+{
+ memset(vq, 0, sizeof(*vq));
+
+ vq->desc = kalloc_page();
+ vq->avail = kalloc_page();
+ vq->used = kalloc_page();
+
+ memset(vq->desc, 0, 4096);
+ memset(vq->avail, 0, 4096);
+ memset(vq->used, 0, 4096);
+
+ if (!vq->desc || !vq->avail || !vq->used) {
+ PANIC();
+ }
+ vq->nfree = NQUEUE;
+ desc_init(vq);
+
+ return 0;
+}
+
+void virtio_init()
+{
+ if (REG(VIRTIO_REG_MAGICVALUE) != VIRTIO_MAGIC ||
+ REG(VIRTIO_REG_VERSION) != 2 || REG(VIRTIO_REG_DEVICE_ID) != 2) {
+ printk("[Virtio]: Device not found.");
+ PANIC();
+ }
+
+ /* Reset the device. */
+ REG(VIRTIO_REG_STATUS) = 0;
+
+ u32 status = 0;
+
+ /* Set the ACKNOWLEDGE status bit: the guest OS has noticed the device. */
+ status |= DEV_STATUS_ACKNOWLEDGE;
+ REG(VIRTIO_REG_STATUS) = status;
+
+ /* Set the DRIVER status bit: the guest OS knows how to drive the device. */
+ status |= DEV_STATUS_DRIVER;
+ REG(VIRTIO_REG_STATUS) = status;
+
+ /* Read device feature bits, and write the subset of feature bits understood by the OS and driver to the device. */
+ REG(VIRTIO_REG_DEVICE_FEATURES_SEL) = 0;
+ REG(VIRTIO_REG_DRIVER_FEATURES_SEL) = 0;
+
+ u32 features = REG(VIRTIO_REG_DEVICE_FEATURES);
+ features &= ~(1 << VIRTIO_BLK_F_SEG_MAX);
+ features &= ~(1 << VIRTIO_BLK_F_GEOMETRY);
+ features &= ~(1 << VIRTIO_BLK_F_RO);
+ features &= ~(1 << VIRTIO_BLK_F_BLK_SIZE);
+ features &= ~(1 << VIRTIO_BLK_F_FLUSH);
+ features &= ~(1 << VIRTIO_BLK_F_TOPOLOGY);
+ features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE);
+ features &= ~(1 << VIRTIO_F_ANY_LAYOUT);
+ features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
+ features &= ~(1 << VIRTIO_RING_F_EVENT_IDX);
+ REG(VIRTIO_REG_DRIVER_FEATURES) = features;
+
+ status |= DEV_STATUS_FEATURES_OK;
+ REG(VIRTIO_REG_STATUS) = status;
+
+ arch_fence();
+ status = REG(VIRTIO_REG_STATUS);
+ arch_fence();
+ if (!(status & DEV_STATUS_FEATURES_OK)) {
+ PANIC();
+ }
+
+ virtq_init(&disk.virtq);
+
+ int qmax = REG(VIRTIO_REG_QUEUE_NUM_MAX);
+ if (qmax < NQUEUE) {
+ printk("[Virtio]: Too many queues.");
+ PANIC();
+ }
+
+ REG(VIRTIO_REG_QUEUE_SEL) = 0;
+ REG(VIRTIO_REG_QUEUE_NUM) = NQUEUE;
+
+ u64 phy_desc = V2P(disk.virtq.desc);
+ REG(VIRTIO_REG_QUEUE_DESC_LOW) = LO(phy_desc);
+ REG(VIRTIO_REG_QUEUE_DESC_HIGH) = HI(phy_desc);
+
+ u64 phy_avail = V2P(disk.virtq.avail);
+ REG(VIRTIO_REG_QUEUE_DRIVER_LOW) = LO(phy_avail);
+ REG(VIRTIO_REG_QUEUE_DRIVER_HIGH) = HI(phy_avail);
+ u64 phy_used = V2P(disk.virtq.used);
+
+ REG(VIRTIO_REG_QUEUE_DEVICE_LOW) = LO(phy_used);
+ REG(VIRTIO_REG_QUEUE_DEVICE_HIGH) = HI(phy_used);
+
+ arch_fence();
+
+ REG(VIRTIO_REG_QUEUE_READY) = 1;
+ status |= DEV_STATUS_DRIVER_OK;
+ REG(VIRTIO_REG_STATUS) = status;
+
+ arch_fence();
+
+ set_interrupt_handler(VIRTIO_BLK_IRQ, virtio_blk_intr);
+ init_spinlock(&disk.lk);
+}
diff --git a/src/fs/CMakeLists.txt b/src/fs/CMakeLists.txt
new file mode 100755
index 0000000..fb46d81
--- /dev/null
+++ b/src/fs/CMakeLists.txt
@@ -0,0 +1,3 @@
+file(GLOB fs_sources CONFIGURE_DEPENDS "*.c")
+
+add_library(fs STATIC ${fs_sources})
diff --git a/src/fs/block_device.c b/src/fs/block_device.c
new file mode 100755
index 0000000..9a3f075
--- /dev/null
+++ b/src/fs/block_device.c
@@ -0,0 +1,50 @@
+#include
+#include
+
+/**
+ @brief a simple implementation of reading a block from SD card.
+
+ @param[in] block_no the block number to read
+ @param[out] buffer the buffer to store the data
+ */
+static void sd_read(usize block_no, u8 *buffer) {
+ Buf b;
+ b.block_no = (u32)block_no;
+ b.flags = 0;
+ virtio_blk_rw(&b);
+ memcpy(buffer, b.data, BLOCK_SIZE);
+}
+
+/**
+ @brief a simple implementation of writing a block to SD card.
+
+ @param[in] block_no the block number to write
+ @param[in] buffer the buffer to store the data
+ */
+static void sd_write(usize block_no, u8 *buffer) {
+ Buf b;
+ b.block_no = (u32)block_no;
+ b.flags = B_DIRTY | B_VALID;
+ memcpy(b.data, buffer, BLOCK_SIZE);
+ virtio_blk_rw(&b);
+}
+
+/**
+ @brief the in-memory copy of the super block.
+
+ We may need to read the super block multiple times, so keep a copy of it in
+ memory.
+
+ @note the super block, in our lab, is always read-only, so we don't need to
+ write it back.
+ */
+static u8 sblock_data[BLOCK_SIZE];
+
+BlockDevice block_device;
+
+void init_block_device() {
+ block_device.read = sd_read;
+ block_device.write = sd_write;
+}
+
+const SuperBlock *get_super_block() { return (const SuperBlock *)sblock_data; }
\ No newline at end of file
diff --git a/src/fs/block_device.h b/src/fs/block_device.h
new file mode 100755
index 0000000..ed9b361
--- /dev/null
+++ b/src/fs/block_device.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include
+
+/**
+ @brief interface for block devices.
+
+ @note yes, there is no OOP in C, but we can use function pointers to
+ simulate it. this is a common pattern in C, and you can find it in Linux
+ kernel too.
+
+ @see init_block_device
+ */
+typedef struct {
+ /**
+ read `BLOCK_SIZE` bytes in block at `block_no` to `buffer`.
+ caller must guarantee `buffer` is large enough.
+
+ @param[in] block_no the block number to read from.
+ @param[out] buffer the buffer to read into.
+ */
+ void (*read)(usize block_no, u8 *buffer);
+
+ /**
+ write `BLOCK_SIZE` bytes in `buffer` to block at `block_no`.
+ caller must guarantee `buffer` is large enough.
+
+ @param[in] block_no the block number to write to.
+ @param[in] buffer the buffer to write from.
+ */
+ void (*write)(usize block_no, u8 *buffer);
+} BlockDevice;
+
+/**
+ @brief the global block device instance.
+ */
+extern BlockDevice block_device;
+
+/**
+ @brief initialize the block device.
+
+ This method must be called before any other block device methods,
+ and initializes the global block device and (if necessary) the
+ global super block.
+
+ e.g. for the SD card, this method is responsible for initializing
+ the SD card and reading the super block from the SD card.
+
+ @note You may want to put it into `*_init` method groups.
+ */
+void init_block_device();
+
+/**
+ * @brief get the global super block.
+ *
+ * @return const SuperBlock* the global super block.
+ */
+const SuperBlock *get_super_block();
\ No newline at end of file
diff --git a/src/fs/cache.c b/src/fs/cache.c
new file mode 100755
index 0000000..5458803
--- /dev/null
+++ b/src/fs/cache.c
@@ -0,0 +1,358 @@
+#include
+#include
+#include
+#include
+#include
+#include
+
+/**
+ @brief the private reference to the super block.
+
+ @note we need these two variables because we allow the caller to
+ specify the block device and super block to use.
+ Correspondingly, you should NEVER use global instance of
+ them, e.g. `get_super_block`, `block_device`
+
+ @see init_bcache
+ */
+static const SuperBlock *sblock;
+
+/**
+ @brief the reference to the underlying block device.
+ */
+static const BlockDevice *device;
+
+/**
+ @brief global lock for block cache.
+
+ Use it to protect anything you need.
+
+ e.g. the list of allocated blocks, etc.
+ */
+static SpinLock lock;
+
+/**
+ @brief the list of all allocated in-memory block.
+
+ We use a linked list to manage all allocated cached blocks.
+
+ You can implement your own data structure if you like better performance.
+
+ @see Block
+ */
+static ListNode head;
+
+static LogHeader header; // in-memory copy of log header block.
+
+/**
+ @brief a struct to maintain other logging states.
+
+ You may wonder where we store some states, e.g.
+
+ * how many atomic operations are running?
+ * are we checkpointing?
+ * how to notify `end_op` that a checkpoint is done?
+
+ Put them here!
+
+ @see cache_begin_op, cache_end_op, cache_sync
+ */
+struct {
+ int waiting_ops; // number of operations waiting to begin
+ bool committing; // is the log being committed?
+ Semaphore logsem; // semaphore to notify the completion of checkpointing
+} log;
+
+// read the content from disk.
+static INLINE void device_read(Block *block) {
+ device->read(block->block_no, block->data);
+}
+
+// write the content back to disk.
+static INLINE void device_write(Block *block) {
+ device->write(block->block_no, block->data);
+}
+
+// read log header from disk.
+static INLINE void read_header() {
+ device->read(sblock->log_start, (u8 *)&header);
+}
+
+// write log header back to disk.
+static INLINE void write_header() {
+ device->write(sblock->log_start, (u8 *)&header);
+}
+
+// initialize a block struct.
+static void init_block(Block *block) {
+ block->block_no = 0;
+ init_list_node(&block->node);
+ block->acquired = false;
+ block->refcnt = 0;
+ block->pinned = false;
+
+ init_sleeplock(&block->lock);
+ block->valid = false;
+ memset(block->data, 0, sizeof(block->data));
+}
+
+static usize cache_nums_blocks;
+
+// see `cache.h`.
+static usize get_num_cached_blocks() {
+ return cache_nums_blocks;
+}
+
+// see `cache.h`.
+static Block *cache_acquire(usize block_no) {
+ //printk("[BCACHE] Acquiring block no %llu\n", block_no);
+ acquire_spinlock(&lock);
+ Block *block_to_acquire = NULL;
+ Block *block_current = NULL;
+ _for_in_list(nd, &head) {
+ if (nd == &head) continue;
+ block_current = container_of(nd, Block, node);
+ if (block_current->block_no == block_no) {
+ block_to_acquire = block_current;
+ break;
+ }
+
+ }
+ if (block_to_acquire != NULL) {
+ //printk("[BCACHE] Block no %llu found in cache\n", block_no);
+ // found in cache
+ block_to_acquire->refcnt++;
+ block_to_acquire->acquired = true;
+ release_spinlock(&lock);
+ if (!wait_sem(&block_to_acquire->lock)) {
+ //printk("[BCACHE] Error: cannot acquire block no %llu\n", block_no);
+ PANIC();
+ }
+ acquire_spinlock(&lock);
+ _detach_from_list(&block_to_acquire->node);
+ _insert_into_list(&head, &block_to_acquire->node);
+ release_spinlock(&lock);
+ //printk("[BCACHE] Acquired block no %llu\n", block_no);
+ return block_to_acquire;
+ }
+ //printk("[BCACHE] Block no %llu not found in cache\n", block_no);
+ // not found in cache
+ // need to allocate a new block, evict if necessary
+ ListNode *p = head.prev;
+ while(cache_nums_blocks >= EVICTION_THRESHOLD){
+ ListNode* next_p = p->prev;
+ if(p == &head){
+ break;
+ }
+ Block* block_current = container_of(p, Block, node);
+ if(!block_current->acquired && !block_current->pinned){
+ _detach_from_list(p);
+ cache_nums_blocks--;
+ kfree(block_current);
+ }
+ p = next_p;
+ }
+ block_to_acquire = kalloc(sizeof(Block));
+ init_block(block_to_acquire);
+ if (!wait_sem(&block_to_acquire->lock)) {
+ //printk("[BCACHE] Error: cannot acquire newly allocated block\n");
+ PANIC();
+ }
+ block_to_acquire->block_no = block_no;
+ block_to_acquire->refcnt = 1;
+ block_to_acquire->acquired = true;
+ block_to_acquire->valid = true;
+ cache_nums_blocks++;
+ release_spinlock(&lock);
+ device_read(block_to_acquire);
+ acquire_spinlock(&lock);
+ _insert_into_list(&head, &block_to_acquire->node);
+ release_spinlock(&lock);
+ //printk("[BCACHE] Acquired block no %llu\n", block_no);
+ return block_to_acquire;
+}
+
+// see `cache.h`.
+static void cache_release(Block *block) {
+ acquire_spinlock(&lock);
+ if (block->refcnt <= 0) {
+ //printk("[BCACHE] Error: releasing a block that is not acquired\n");
+ PANIC();
+ }
+ block->refcnt--;
+ if (block->refcnt == 0) {
+ block->acquired = false;
+ }
+ post_sem(&block->lock);
+ release_spinlock(&lock);
+}
+
+// see `cache.h`.
+static void cache_begin_op(OpContext *ctx) {
+ //printk("[BCACHE] Beginning atomic operation\n");
+ acquire_spinlock(&lock);
+ ctx->rm = OP_MAX_NUM_BLOCKS;
+ while (log.committing || LOG_MAX_SIZE <= header.num_blocks + (log.waiting_ops + 1)* OP_MAX_NUM_BLOCKS) {
+ release_spinlock(&lock);
+ if (!wait_sem(&log.logsem)) {
+ //printk("[BCACHE] Error: cannot begin atomic operation due to log full\n");
+ PANIC();
+ }
+ acquire_spinlock(&lock);
+ }
+ log.waiting_ops++;
+ release_spinlock(&lock);
+}
+
+// see `cache.h`.
+static void cache_sync(OpContext *ctx, Block *block) {
+ //printk("[BCACHE] Syncing block no %llu\n", block->block_no);
+ if(ctx == NULL) {
+ device_write(block);
+ return;
+ }
+ acquire_spinlock(&lock);
+ block->pinned = true;
+ bool already_in_log = false;
+ for(usize i = 0; i < header.num_blocks; i++) {
+ if(header.block_no[i] == block->block_no) {
+ already_in_log = true;
+ break;
+ }
+ }
+ if(!already_in_log) {
+ header.block_no[header.num_blocks] = block->block_no;
+ header.num_blocks++;
+ if(ctx->rm <= 0 || header.num_blocks > LOG_MAX_SIZE) {
+ //printk("[BCACHE] Error: too many blocks in atomic operation\n");
+ PANIC();
+ }
+ ctx->rm--;
+ }
+ release_spinlock(&lock);
+}
+
+// see `cache.h`.
+static void cache_end_op(OpContext *ctx) {
+ //printk("[BCACHE] Ending atomic operation\n");
+ ctx = ctx;
+ acquire_spinlock(&lock);
+ log.waiting_ops--;
+ if(log.waiting_ops > 0) {
+ post_sem(&log.logsem);
+ release_spinlock(&lock);
+ return;
+ }
+
+ log.committing = true;
+ release_spinlock(&lock);
+
+ for(usize i = 0; i < header.num_blocks; i++) {
+ usize from_block_no = header.block_no[i];
+ usize to_block_no = sblock->log_start + 1 + i;
+ Block *from_block = cache_acquire(from_block_no);
+ Block *to_block = cache_acquire(to_block_no);
+ for(int j = 0; j < BLOCK_SIZE; j++) {
+ to_block->data[j] = from_block->data[j];
+ }
+ device_write(to_block);
+ cache_release(from_block);
+ cache_release(to_block);
+ }
+ write_header();
+ for(usize i = 0; i < header.num_blocks; i++) {
+ Block *block = cache_acquire(header.block_no[i]);
+ device_write(block);
+ acquire_spinlock(&lock);
+ block->pinned = false;
+ release_spinlock(&lock);
+ cache_release(block);
+ }
+
+ acquire_spinlock(&lock);
+ header.num_blocks = 0;
+ release_spinlock(&lock);
+
+ write_header();
+
+ acquire_spinlock(&lock);
+ log.committing = false;
+ post_all_sem(&log.logsem);
+ release_spinlock(&lock);
+}
+
+// see `cache.h`.
+static usize cache_alloc(OpContext *ctx) {
+ //printk("[BCACHE] Allocating block\n");
+ Block *bitmap_block = cache_acquire(sblock->bitmap_start);
+ for(usize i = 0; i < sblock->num_blocks; i++) {
+ if(!bitmap_get((BitmapCell*)bitmap_block->data, i)) {
+ bitmap_set((BitmapCell*)bitmap_block->data, i);
+
+ cache_sync(ctx, bitmap_block);
+ cache_release(bitmap_block);
+
+ Block *new_block = cache_acquire(i);
+ memset(new_block->data, 0, BLOCK_SIZE);
+ cache_sync(ctx, new_block);
+ cache_release(new_block);
+ return i;
+ }
+ }
+ cache_release(bitmap_block);
+ //printk("[BCACHE] Error: no free block available for allocation\n");
+ PANIC();
+}
+
+// see `cache.h`.
+static void cache_free(OpContext *ctx, usize block_no) {
+ //printk("[BCACHE] Freeing block no %llu\n", block_no);
+ Block *bitmap_block = cache_acquire(sblock->bitmap_start);
+
+ bitmap_clear((BitmapCell*)bitmap_block->data, block_no);
+
+ cache_sync(ctx, bitmap_block);
+ cache_release(bitmap_block);
+}
+
+// see `cache.h`.
+void init_bcache(const SuperBlock *_sblock, const BlockDevice *_device) {
+ sblock = _sblock;
+ device = _device;
+ cache_nums_blocks = 0;
+ init_spinlock(&lock);
+
+ init_sem(&log.logsem, 0);
+ log.waiting_ops = 0;
+ log.committing = false;
+
+ init_list_node(&head);
+ read_header();
+ for (usize i = 0; i < header.num_blocks; i++) {
+ usize real_block_no = header.block_no[i];
+ usize log_block_no = sblock->log_start + 1 + i;
+ Block *real_block = cache_acquire(real_block_no);
+ Block *log_block = cache_acquire(log_block_no);
+
+ memcpy(real_block->data, log_block->data, BLOCK_SIZE);
+
+ device_write(real_block);
+ cache_release(real_block);
+ cache_release(log_block);
+ }
+ header.num_blocks = 0;
+ memset(header.block_no, 0, LOG_MAX_SIZE);
+ write_header();
+ // TODO
+}
+
+BlockCache bcache = {
+ .get_num_cached_blocks = get_num_cached_blocks,
+ .acquire = cache_acquire,
+ .release = cache_release,
+ .begin_op = cache_begin_op,
+ .sync = cache_sync,
+ .end_op = cache_end_op,
+ .alloc = cache_alloc,
+ .free = cache_free,
+};
\ No newline at end of file
diff --git a/src/fs/cache.h b/src/fs/cache.h
new file mode 100755
index 0000000..754df5d
--- /dev/null
+++ b/src/fs/cache.h
@@ -0,0 +1,262 @@
+#pragma once
+#include
+#include
+#include
+#include
+
+/**
+ @brief maximum number of distinct blocks that one atomic operation can hold.
+ */
+#define OP_MAX_NUM_BLOCKS 10
+
+/**
+ @brief the threshold of block cache to start eviction.
+
+ if the number of cached blocks is no less than this threshold, we can
+ evict some blocks in `acquire` to keep block cache small.
+ */
+#define EVICTION_THRESHOLD 20
+
+/**
+ @brief a block in block cache.
+
+ @note you can add any member to this struct as you want.
+ */
+typedef struct {
+ /**
+ @brief the corresponding block number on disk.
+
+ @note should be protected by the global lock of the block cache.
+
+ @note required by our test. Do NOT remove it.
+ */
+ usize block_no;
+
+ /**
+ @brief list this block into a linked list.
+
+ @note should be protected by the global lock of the block cache.
+ */
+ ListNode node;
+
+ /**
+ @brief is the block already acquired by some thread or process?
+
+ @note should be protected by the global lock of the block cache.
+ */
+ bool acquired;
+
+ /**
+ @brief the reference count of the block.
+
+ @note should be protected by the global lock of the block cache.
+ */
+ int refcnt;
+
+ /**
+ @brief is the block pinned?
+
+ A pinned block should not be evicted from the cache.
+
+ e.g. it is dirty.
+
+ @note should be protected by the global lock of the block cache.
+ */
+ bool pinned;
+
+ /**
+ @brief the sleep lock protecting `valid` and `data`.
+ */
+ SleepLock lock;
+
+ /**
+ @brief is the content of block loaded from disk?
+
+ You may find it useless and it *is*. It is just a test flag read
+ by our test. In your code, you should:
+
+ * set `valid` to `false` when you allocate a new `Block` struct.
+ * set `valid` to `true` only after you load the content of block from
+ disk.
+
+ @note required by our test. Do NOT remove it.
+ */
+ bool valid;
+ /**
+ @brief the real in-memory content of the block on disk.
+ */
+ u8 data[BLOCK_SIZE];
+} Block;
+
+/**
+ @brief an atomic operation context.
+
+ @note add any member to this struct as you want.
+
+ @see begin_op, end_op
+ */
+typedef struct {
+ /**
+ @brief how many operation remains in this atomic operation?
+
+ If `rm` is 0, any **new** `sync` will panic.
+ */
+ usize rm;
+ /**
+ @brief a timestamp (i.e. an ID) to identify this atomic operation.
+
+ @note your implementation does NOT have to use this field, just ignoring
+ it is OK too.
+
+ @note only required by our test. Do NOT remove it.
+ */
+ usize ts;
+} OpContext;
+
+
+typedef struct {
+ /**
+ @return the number of cached blocks at this moment.
+
+ @note only required by our test to print statistics.
+ */
+ usize (*get_num_cached_blocks)();
+
+ /**
+ @brief declare a block as acquired by the caller.
+
+ It reads the content of block at `block_no` from disk, and locks the
+ block so that the caller can exclusively modify it.
+
+ @return the pointer to the locked block.
+
+ @see `release` - the counterpart of this function.
+ */
+ Block *(*acquire)(usize block_no);
+
+ /**
+ @brief declare an acquired block as released by the caller.
+
+ It unlocks the block so that other threads can acquire it again.
+
+ @note it does not need to write the block content back to disk.
+ */
+ void (*release)(Block *block);
+
+ // # NOTES FOR ATOMIC OPERATIONS
+ //
+ // atomic operation has three states:
+ // * running: this atomic operation may have more modifications.
+ // * committed: this atomic operation is ended. No more modifications.
+ // * checkpointed: all modifications have been already persisted to disk.
+ //
+ // `begin_op` creates a new running atomic operation.
+ // `end_op` commits an atomic operation, and waits for it to be
+ // checkpointed.
+
+ /**
+ @brief begin a new atomic operation and initialize `ctx`.
+
+ If there are too many running operations (i.e. our logging is
+ too small to hold all of them), `begin_op` should sleep until
+ we can start a new operation.
+
+ @param[out] ctx the context to be initialized.
+
+ @throw panic if `ctx` is NULL.
+
+ @see `end_op` - the counterpart of this function.
+ */
+ void (*begin_op)(OpContext *ctx);
+
+ /**
+ @brief synchronize the content of `block` to disk.
+
+ If `ctx` is NULL, it immediately writes the content of `block` to disk.
+
+ However this is very dangerous, since it may break atomicity of
+ concurrent atomic operations. YOU SHOULD USE THIS MODE WITH CARE.
+
+ @param ctx the atomic operation context to which this block belongs.
+
+ @note the caller must hold the lock of `block`.
+
+ @throw panic if the number of blocks associated with `ctx` is larger
+ than `OP_MAX_NUM_BLOCKS` after `sync`
+ */
+ void (*sync)(OpContext *ctx, Block *block);
+
+ /**
+ @brief end the atomic operation managed by `ctx`.
+
+ It sleeps until all associated blocks are written to disk.
+
+ @param ctx the atomic operation context to be ended.
+
+ @throw panic if `ctx` is NULL.
+ */
+ void (*end_op)(OpContext *ctx);
+
+ // # NOTES FOR BITMAP
+ //
+ // every block on disk has a bit in bitmap, including blocks inside bitmap!
+ //
+ // usually, MBR block, super block, inode blocks, log blocks and bitmap
+ // blocks are preallocated on disk, i.e. those bits for them are already set
+ // in bitmap. therefore when we allocate a new block, it usually returns a
+ // data block. however, nobody can prevent you freeing a non-data block :)
+
+ /**
+ @brief allocate a new zero-initialized block.
+
+ It searches bitmap for a free block, mark it allocated and
+ returns the block number.
+
+ @param ctx since this function may write on-disk bitmap, it must be
+ associated with an atomic operation.
+ The caller must ensure that `ctx` is **running**.
+
+ @return the block number of the allocated block.
+
+ @note you should use `acquire`, `sync` and `release` to do disk I/O
+ here.
+
+ @throw panic if there is no free block on disk.
+ */
+ usize (*alloc)(OpContext *ctx);
+
+ /**
+ @brief free the block at `block_no` in bitmap.
+
+ It will NOT panic if `block_no` is already free or invalid.
+
+ @param ctx since this function may write on-disk bitmap, it must be
+ associated with an atomic operation.
+ The caller must ensure that `ctx` is **running**.
+ @param block_no the block number to be freed.
+
+ @note you should use `acquire`, `sync` and `release` to do disk I/O
+ here.
+ */
+ void (*free)(OpContext *ctx, usize block_no);
+} BlockCache;
+
+/**
+ @brief the global block cache instance.
+ */
+extern BlockCache bcache;
+
+/**
+ @brief initialize the block cache.
+
+ This method is also responsible for restoring logs after system crash,
+
+ i.e. it should read the uncommitted blocks from log section and
+ write them back to their original positions.
+
+ @param sblock the loaded super block.
+ @param device the initialized block device.
+
+ @note You may want to put it into `*_init` method groups.
+ */
+void init_bcache(const SuperBlock *sblock, const BlockDevice *device);
\ No newline at end of file
diff --git a/src/fs/defines.h b/src/fs/defines.h
new file mode 100644
index 0000000..af690ea
--- /dev/null
+++ b/src/fs/defines.h
@@ -0,0 +1,78 @@
+#pragma once
+
+#include
+
+/**
+ * this file contains on-disk representations of primitives in our filesystem.
+ */
+
+#define BLOCK_SIZE 512
+
+// maximum number of distinct block numbers can be recorded in the log header.
+#define LOG_MAX_SIZE ((BLOCK_SIZE - sizeof(usize)) / sizeof(usize))
+
+#define INODE_NUM_DIRECT 12
+#define INODE_NUM_INDIRECT (BLOCK_SIZE / sizeof(u32))
+#define INODE_PER_BLOCK (BLOCK_SIZE / sizeof(InodeEntry))
+#define INODE_MAX_BLOCKS (INODE_NUM_DIRECT + INODE_NUM_INDIRECT)
+#define INODE_MAX_BYTES (INODE_MAX_BLOCKS * BLOCK_SIZE)
+
+// the maximum length of file names, including trailing '\0'.
+#define FILE_NAME_MAX_LENGTH 14
+
+// inode types:
+#define INODE_INVALID 0
+#define INODE_DIRECTORY 1
+#define INODE_REGULAR 2 // regular file
+#define INODE_DEVICE 3
+
+#define ROOT_INODE_NO 1
+
+typedef u16 InodeType;
+
+#define BIT_PER_BLOCK (BLOCK_SIZE * 8)
+
+// disk layout:
+// [ MBR block | super block | log blocks | inode blocks | bitmap blocks | data blocks ]
+//
+// `mkfs` generates the super block and builds an initial filesystem. The
+// super block describes the disk layout.
+typedef struct {
+ u32 num_blocks; // total number of blocks in filesystem.
+ u32 num_data_blocks;
+ u32 num_inodes;
+ u32 num_log_blocks; // number of blocks for logging, including log header.
+ u32 log_start; // the first block of logging area.
+ u32 inode_start; // the first block of inode area.
+ u32 bitmap_start; // the first block of bitmap area.
+} SuperBlock;
+
+// `type == INODE_INVALID` implies this inode is free.
+typedef struct dinode {
+ InodeType type;
+ u16 major; // major device id, for INODE_DEVICE only.
+ u16 minor; // minor device id, for INODE_DEVICE only.
+ u16 num_links; // number of hard links to this inode in the filesystem.
+ u32 num_bytes; // number of bytes in the file, i.e. the size of file.
+ u32 addrs[INODE_NUM_DIRECT]; // direct addresses/block numbers.
+ u32 indirect; // the indirect address block.
+} InodeEntry;
+
+// the block pointed by `InodeEntry.indirect`.
+typedef struct {
+ u32 addrs[INODE_NUM_INDIRECT];
+} IndirectBlock;
+
+// directory entry. `inode_no == 0` implies this entry is free.
+typedef struct dirent {
+ u16 inode_no;
+ char name[FILE_NAME_MAX_LENGTH];
+} DirEntry;
+
+typedef struct {
+ usize num_blocks;
+ usize block_no[LOG_MAX_SIZE];
+} LogHeader;
+
+// mkfs only
+#define FSSIZE 1000 // Size of file system in blocks
\ No newline at end of file
diff --git a/src/fs/inode.c b/src/fs/inode.c
new file mode 100755
index 0000000..427839c
--- /dev/null
+++ b/src/fs/inode.c
@@ -0,0 +1,468 @@
+#include
+#include
+#include
+#include
+
+/**
+ @brief the private reference to the super block.
+
+ @note we need these two variables because we allow the caller to
+ specify the block cache and super block to use.
+ Correspondingly, you should NEVER use global instance of
+ them.
+
+ @see init_inodes
+ */
+static const SuperBlock* sblock;
+
+/**
+ @brief the reference to the underlying block cache.
+ */
+static const BlockCache* cache;
+
+/**
+ @brief global lock for inode layer.
+
+ Use it to protect anything you need.
+
+ e.g. the list of allocated blocks, ref counts, etc.
+ */
+static SpinLock lock;
+
+/**
+ @brief the list of all allocated in-memory inodes.
+
+ We use a linked list to manage all allocated inodes.
+
+ You can implement your own data structure if you want better performance.
+
+ @see Inode
+ */
+static ListNode head;
+
+
+// return which block `inode_no` lives on.
+static INLINE usize to_block_no(usize inode_no) {
+ return sblock->inode_start + (inode_no / (INODE_PER_BLOCK));
+}
+
+// return the pointer to on-disk inode.
+static INLINE InodeEntry* get_entry(Block* block, usize inode_no) {
+ return ((InodeEntry*)block->data) + (inode_no % INODE_PER_BLOCK);
+}
+
+// return address array in indirect block.
+static INLINE u32* get_addrs(Block* block) {
+ return ((IndirectBlock*)block->data)->addrs;
+}
+
+// initialize inode tree.
+void init_inodes(const SuperBlock* _sblock, const BlockCache* _cache) {
+ init_spinlock(&lock);
+ init_list_node(&head);
+ sblock = _sblock;
+ cache = _cache;
+
+ if (ROOT_INODE_NO < sblock->num_inodes)
+ inodes.root = inodes.get(ROOT_INODE_NO);
+ else
+ printk("(warn) init_inodes: no root inode.\n");
+}
+
+// initialize in-memory inode.
+static void init_inode(Inode* inode) {
+ init_sleeplock(&inode->lock);
+ init_rc(&inode->rc);
+ init_list_node(&inode->node);
+ inode->inode_no = 0;
+ inode->valid = false;
+}
+
+// see `inode.h`.
+static usize inode_alloc(OpContext* ctx, InodeType type) {
+ //printk("[INODE] Allocating inode of type %d\n", type);
+ ASSERT(type != INODE_INVALID);
+ acquire_spinlock(&lock);
+ for (usize i = 1; i < sblock->num_inodes; i++) {
+ // search for a free inode
+ Block* block = cache->acquire(to_block_no(i));
+ InodeEntry* entry = get_entry(block, i);
+ if (entry->type == INODE_INVALID) {
+ // found a free inode
+ memset(entry, 0, sizeof(InodeEntry));
+ entry->type = type;
+ cache->sync(ctx, block);
+ cache->release(block);
+ release_spinlock(&lock);
+ return i;
+ }
+ cache->release(block);
+ }
+ release_spinlock(&lock);
+ PANIC();
+ return 0;
+}
+
+// see `inode.h`.
+static void inode_lock(Inode* inode) {
+ if(inode == NULL) {
+ PANIC();
+ }
+ //printk("[INODE] Locking inode no %llu\n", inode->inode_no);
+ //printk("[INODE] rc count: %d\n", inode->rc.count);
+ ASSERT(inode->rc.count > 0);
+ //printk("[INODE] Acquiring sleeplock for inode no %llu\n", inode->inode_no);
+ unalertable_wait_sem(&inode->lock);
+ //printk("[INODE] Sleeplock acquired for inode no %llu\n", inode->inode_no);
+}
+
+// see `inode.h`.
+static void inode_unlock(Inode* inode) {
+ if(inode == NULL) {
+ PANIC();
+ }
+ //printk("[INODE] Unlocking inode no %llu\n", inode->inode_no);
+ ASSERT(inode->rc.count > 0);
+ // TODO
+ post_sem(&inode->lock);
+ //printk("[INODE] Sleeplock released for inode no %llu\n", inode->inode_no);
+}
+
+// see `inode.h`.
+static void inode_sync(OpContext* ctx, Inode* inode, bool do_write) {
+ // TODO
+ //printk("[INODE] Syncing inode no %llu, do_write: %d\n", inode->inode_no, do_write);
+ if(!inode->valid && do_write) {
+ PANIC();
+ } else if (!inode->valid && !do_write) {
+ // Load inode from disk
+ Block* block = cache->acquire(to_block_no(inode->inode_no));
+ InodeEntry* entry = get_entry(block, inode->inode_no);
+ memcpy(&inode->entry, entry, sizeof(InodeEntry));
+ cache->release(block);
+ inode->valid = true; // Mark as valid after loading
+ } else if (inode->valid && do_write) {
+ Block* block = cache->acquire(to_block_no(inode->inode_no));
+ InodeEntry* entry = get_entry(block, inode->inode_no);
+ memcpy(entry, &inode->entry, sizeof(InodeEntry));
+ cache->sync(ctx, block);
+ cache->release(block);
+ } else if (inode->valid && !do_write) {
+ Block* block = cache->acquire(to_block_no(inode->inode_no));
+ InodeEntry* entry = get_entry(block, inode->inode_no);
+ memcpy(&inode->entry, entry, sizeof(InodeEntry));
+ cache->release(block);
+ }
+}
+
+// see `inode.h`.
+static Inode* inode_get(usize inode_no) {
+ //printk("[INODE] Getting inode no %llu\n", inode_no);
+ ASSERT(inode_no > 0);
+ //printk("[INODE] Total inodes: %llu\n", sblock->num_inodes);
+ ASSERT(inode_no < sblock->num_inodes);
+ //printk("[INODE] Acquiring global inode lock\n");
+ acquire_spinlock(&lock);
+ _for_in_list(nd, &head) {
+ Inode* inode = container_of(nd, Inode, node);
+ if (inode->inode_no == inode_no) {
+ // found in-memory inode
+ increment_rc(&inode->rc);
+ release_spinlock(&lock);
+ return inode;
+ }
+ }
+ release_spinlock(&lock);
+ // not found, create a new one
+ Inode* new_node = kalloc(sizeof(Inode));
+ init_inode(new_node);
+ new_node->inode_no = inode_no;
+ increment_rc(&new_node->rc);
+ //printk("[INODE] Loading inode no %llu from disk\n", inode_no);
+ //printk("[INODE] rc count: %d\n", new_node->rc.count);
+ inode_lock(new_node);
+ inode_sync(NULL, new_node, false);
+ inode_unlock(new_node);
+ acquire_spinlock(&lock);
+ //printk("[INODE] Inode no %llu loaded from disk\n", inode_no);
+ _insert_into_list(&head, &new_node->node);
+ //printk("[INODE] Inserting inode no %llu into global inode list\n", new_node->inode_no);
+ release_spinlock(&lock);
+ return new_node;
+}
+// see `inode.h`.
+static void inode_clear(OpContext* ctx, Inode* inode) {
+ //printk("[INODE] Clearing inode no %llu\n", inode->inode_no);
+ //acquire_spinlock(&lock);
+ // 清空 inode 的内容(使文件变成长度为 0 的空文件)
+ // inode is not a leave
+ if (inode->entry.indirect){
+ Block* indirect_block = cache->acquire(inode->entry.indirect);
+ u32* addrs = get_addrs(indirect_block);
+ for (usize i = 0; i < INODE_NUM_INDIRECT; i++) {
+ if (addrs[i]) {
+ cache->free(ctx, addrs[i]);
+ }
+ }
+ cache->sync(ctx, indirect_block);
+ cache->release(indirect_block);
+ cache->free(ctx, inode->entry.indirect);
+ inode->entry.indirect = 0;
+ }
+ // direct blocks
+ for (usize i = 0; i < INODE_NUM_DIRECT; i++) {
+ if (inode->entry.addrs[i]) {
+ cache->free(ctx, inode->entry.addrs[i]);
+ inode->entry.addrs[i] = 0;
+ }
+ }
+ inode->entry.num_bytes = 0;
+ inode_sync(ctx, inode, true);
+ //release_spinlock(&lock);
+}
+
+// see `inode.h`.
+static Inode* inode_share(Inode* inode) {
+ //printk("[INODE] Sharing inode no %llu\n", inode->inode_no);
+ // TODO
+ increment_rc(&inode->rc);
+ return inode;
+}
+
+// see `inode.h`.
+static void inode_put(OpContext* ctx, Inode* inode) {
+ // TODO
+ //printk("[INODE] Putting inode no %llu\n", inode->inode_no);
+ acquire_spinlock(&lock);
+ decrement_rc(&inode->rc);
+ //printk("[INODE] rc count after decrement: %d\n", inode->rc.count);
+ // if no one needs the inode any more
+ if(inode->rc.count == 0) {
+ // Remove from list immediately so no one else gets it
+ _detach_from_list(&inode->node);
+ release_spinlock(&lock); // Release before IO
+
+ if (inode->entry.num_links == 0) {
+ inode->entry.type = INODE_INVALID;
+ inode_clear(ctx, inode);
+ }
+ kfree(inode);
+ } else {
+ release_spinlock(&lock);
+ }
+}
+
+/**
+ @brief get which block is the offset of the inode in.
+
+ e.g. `inode_map(ctx, my_inode, 1234, &modified)` will return the block_no
+ of the block that contains the 1234th byte of the file
+ represented by `my_inode`.
+
+ If a block has not been allocated for that byte, `inode_map` will
+ allocate a new block and update `my_inode`, at which time, `modified`
+ will be set to true.
+
+ HOWEVER, if `ctx == NULL`, `inode_map` will NOT try to allocate any new block,
+ and when it finds that the block has not been allocated, it will return 0.
+
+ @param[out] modified true if some new block is allocated and `inode`
+ has been changed.
+
+ @return usize the block number of that block, or 0 if `ctx == NULL` and
+ the required block has not been allocated.
+
+ @note the caller must hold the lock of `inode`.
+ */
+static usize inode_map(OpContext* ctx,
+ Inode* inode,
+ usize offset,
+ bool* modified) {
+ //direct blocks
+ //printk("[INODE] Mapping offset %llu for inode no %llu\n", offset, inode->inode_no);
+ *modified = false;
+ if (offset < INODE_NUM_DIRECT){
+ if (inode->entry.addrs[offset] == 0){
+ if (ctx == NULL){
+ return 0;
+ }
+ inode->entry.addrs[offset] = cache->alloc(ctx);
+ *modified = true;
+ }
+ return inode->entry.addrs[offset];
+ }
+ //indirect blocks
+ offset -= INODE_NUM_DIRECT;
+ if (inode->entry.indirect == 0){
+ if (ctx == NULL) return 0;
+ inode->entry.indirect = cache->alloc(ctx);
+ *modified = true;
+ // Initialize new indirect block with zeros
+ Block* new_ind = cache->acquire(inode->entry.indirect);
+ memset(new_ind->data, 0, BLOCK_SIZE);
+ cache->sync(ctx, new_ind);
+ cache->release(new_ind);
+ }
+
+ Block* indirect_block = cache->acquire(inode->entry.indirect);
+ u32* addrs = get_addrs(indirect_block);
+
+ if (addrs[offset] == 0){
+ if (ctx == NULL){
+ cache->release(indirect_block);
+ return 0;
+ }
+ addrs[offset] = cache->alloc(ctx);
+ // Sync the indirect block because we updated an entry in it
+ cache->sync(ctx, indirect_block);
+ }
+ usize ret = addrs[offset];
+ cache->release(indirect_block);
+ return ret;
+}
+
+// see `inode.h`.
+static usize inode_read(Inode* inode, u8* dest, usize offset, usize count) {
+ //printk("[INODE] Reading %llu bytes from offset %llu of inode no %llu\n", count, offset, inode->inode_no);
+ InodeEntry* entry = &inode->entry;
+ if (count + offset > entry->num_bytes)
+ count = entry->num_bytes - offset;
+ usize end = offset + count;
+ //printk("[INODE] [ASSERT] offset: %llu, end: %llu, entry->num_bytes: %llu\n", offset, end, entry->num_bytes);
+ ASSERT(offset <= entry->num_bytes);
+ //printk("[INODE] [ASSERT PASSED] offset: %llu, end: %llu, entry->num_bytes: %llu\n", offset, end, entry->num_bytes);
+ ASSERT(end <= entry->num_bytes);
+ //printk("[INODE] [ASSERT PASSED] offset: %llu, end: %llu, entry->num_bytes: %llu\n", offset, end, entry->num_bytes);
+ ASSERT(offset <= end);
+ bool modified = false;
+ for (usize readed_block = 0, size = 0; readed_block < count; readed_block += size) {
+ Block* block = NULL;
+ usize block_no = inode_map(NULL, inode, offset / BLOCK_SIZE, &modified);
+ block = cache->acquire(block_no);
+ if (count - readed_block < BLOCK_SIZE - (offset % BLOCK_SIZE))
+ size = count - readed_block;
+ else
+ size = BLOCK_SIZE - (offset % BLOCK_SIZE);
+ memmove(dest, block->data + (offset % BLOCK_SIZE), size);
+ cache->release(block);
+ dest += size;
+ offset += size;
+ }
+ return count;
+}
+
+// see `inode.h`.
+static usize inode_write(OpContext* ctx,
+ Inode* inode,
+ u8* src,
+ usize offset,
+ usize count) {
+ //printk("[INODE] Writing %llu bytes to offset %llu of inode no %llu\n", count, offset, inode->inode_no);
+ InodeEntry* entry = &inode->entry;
+ usize end = offset + count;
+ ASSERT(offset <= entry->num_bytes);
+ ASSERT(end <= INODE_MAX_BYTES);
+ ASSERT(offset <= end);
+ if (end > entry->num_bytes){
+ entry->num_bytes = end;
+ inode_sync(ctx, inode, true);
+ }
+ usize written_byte = 0;
+ while (offset < end) {
+ usize byte_to_write = MIN(BLOCK_SIZE - (offset % BLOCK_SIZE), end - offset);
+ bool modified = false;
+ usize block_no = inode_map(ctx, inode, offset / BLOCK_SIZE, &modified);
+ if (block_no == 0) {
+ PANIC();
+ }
+ if (modified) {
+ inode_sync(ctx, inode, true);
+ }
+ Block* block = cache->acquire(block_no);
+ memcpy(block->data + (offset % BLOCK_SIZE), src, byte_to_write);
+ cache->sync(ctx, block);
+ cache->release(block);
+ offset += byte_to_write;
+ src += byte_to_write;
+ written_byte += byte_to_write;
+ }
+ return written_byte;
+}
+
+// see `inode.h`.
+static usize inode_lookup(Inode* inode, const char* name, usize* index) {
+ //printk("[INODE] Looking up name %s in directory inode no %llu\n", name, inode->inode_no);
+ InodeEntry* entry = &inode->entry;
+ ASSERT(entry->type == INODE_DIRECTORY);
+ DirEntry dir_entry;
+ for (usize off = 0; off < entry->num_bytes; off += sizeof(DirEntry)) {
+ inode_read(inode, (u8*)&dir_entry, off, sizeof(DirEntry));
+ if ((strncmp(dir_entry.name, name, FILE_NAME_MAX_LENGTH) == 0) && dir_entry.inode_no != 0) {
+ if (index)
+ *index = off;
+ return dir_entry.inode_no;
+ }
+ }
+ return 0;
+}
+
+// see `inode.h`.
+static usize inode_insert(OpContext* ctx,
+ Inode* inode,
+ const char* name,
+ usize inode_no) {
+ InodeEntry* entry = &inode->entry;
+ ASSERT(entry->type == INODE_DIRECTORY);
+ //printk("[INODE] Inserting name %s with inode no %llu into directory inode no %llu\n", name, inode_no, inode->inode_no);
+ if (inode_lookup(inode, name, NULL) != 0) {
+ return -1;
+ }
+ DirEntry dir_entry;
+ usize idx = inode->entry.num_bytes;
+ // find a free entry
+ for (usize i = 0; i < inode->entry.num_bytes; i += sizeof(DirEntry)) {
+ inode_read(inode, (u8*)&dir_entry, i, sizeof(DirEntry));
+ if (dir_entry.inode_no == 0) {
+ idx = i;
+ break;
+ }
+ }
+ memset(&dir_entry, 0, sizeof(DirEntry));
+ strncpy(dir_entry.name, name, FILE_NAME_MAX_LENGTH);
+ dir_entry.inode_no = inode_no;
+ inode_write(ctx, inode, (u8*)&dir_entry, idx, sizeof(DirEntry));
+ return idx;
+}
+
+// see `inode.h`.
+static void inode_remove(OpContext* ctx, Inode* inode, usize index) {
+ //printk("[INODE] Removing entry at index %llu from directory inode no %llu\n", index, inode->inode_no);
+ InodeEntry* entry = &inode->entry;
+ ASSERT(entry->type == INODE_DIRECTORY);
+ ASSERT(index < entry->num_bytes);
+ DirEntry dir_entry;
+ inode_read(inode, (u8*)&dir_entry, index, sizeof(DirEntry));
+ if (dir_entry.inode_no == 0) {
+ return;
+ }
+ usize last_index = entry->num_bytes - sizeof(DirEntry);
+ inode_read(inode, (u8*)&dir_entry, last_index, sizeof(DirEntry));
+ dir_entry.inode_no = 0;
+ inode_write(ctx, inode, (u8*)&dir_entry, index, sizeof(DirEntry));
+}
+
+InodeTree inodes = {
+ .alloc = inode_alloc,
+ .lock = inode_lock,
+ .unlock = inode_unlock,
+ .sync = inode_sync,
+ .get = inode_get,
+ .clear = inode_clear,
+ .share = inode_share,
+ .put = inode_put,
+ .read = inode_read,
+ .write = inode_write,
+ .lookup = inode_lookup,
+ .insert = inode_insert,
+ .remove = inode_remove,
+};
\ No newline at end of file
diff --git a/src/fs/inode.h b/src/fs/inode.h
new file mode 100755
index 0000000..78a4345
--- /dev/null
+++ b/src/fs/inode.h
@@ -0,0 +1,266 @@
+#pragma once
+#include
+#include
+#include
+#include
+#include
+
+/**
+ @brief the number of the root inode (i.e. the inode_no of `/`).
+ */
+#define ROOT_INODE_NO 1
+
+/**
+ @brief an inode in memory.
+
+ You can compare it to a `Block` because they have similar operating ways.
+
+ @see Block
+ */
+typedef struct {
+ /**
+ @brief the lock protecting the inode metadata and its content.
+
+ @note it does NOT protect `rc`, `node`, `valid`, etc, because they are
+ "runtime" variables, not "filesystem" metadata or data of the inode.
+ */
+ SleepLock lock;
+
+ /**
+ @brief the reference count of this inode.
+
+ Different from `Block`, an inode can be shared by multiple threads or
+ processes, so we need a reference count to track the number of
+ references to this inode.
+ */
+ RefCount rc;
+
+ /**
+ @brief link this inode into a linked list.
+ */
+ ListNode node;
+
+ /**
+ @brief the corresponding inode number on disk.
+
+ @note distinguish it from `block_no` in `Block`, which is the "block number".
+
+ `inode_no` should be the offset in block from the beginning of the inode area.
+ */
+ usize inode_no;
+
+ /**
+ @brief has the `entry` been loaded from disk?
+ */
+ bool valid;
+
+ /**
+ @brief the real in-memory copy of the inode on disk.
+ */
+ InodeEntry entry;
+} Inode;
+
+/**
+ @brief interface of inode layer.
+ */
+typedef struct {
+ /**
+ @brief the root inode of the file system.
+
+ @see `init_inodes` should initialize it to a valid inode.
+ */
+ Inode* root;
+
+ /**
+ @brief allocate a new zero-initialized inode on disk.
+
+ @param type the type of the inode to allocate.
+
+ @return the number of newly allocated inode.
+
+ @throw panic if allocation fails (e.g. no more free inode).
+ */
+ usize (*alloc)(OpContext* ctx, InodeType type);
+
+ /**
+ @brief acquire the sleep lock of `inode`.
+
+ This method should be called before any write operation to `inode` and its
+ file content.
+
+ If the inode has not been loaded, this method should load it from disk.
+
+ @see `unlock` - the counterpart of this method.
+ */
+ void (*lock)(Inode* inode);
+
+ /**
+ @brief release the sleep lock of `inode`.
+
+ @see `lock` - the counterpart of this method.
+ */
+ void (*unlock)(Inode* inode);
+
+ /**
+ @brief synchronize the content of `inode` between memory and disk.
+
+ Different from block cache, this method can either read or write the inode.
+
+ If `do_write` is true and the inode is valid, write the content of `inode` to disk.
+
+ If `do_write` is false and the inode is invalid, read the content of `inode` from disk.
+
+ If `do_write` is false and the inode is valid, do nothing.
+
+ @note here "write to disk" means "sync with block cache", not "directly
+ write to underneath SD card".
+
+ @note caller must hold the lock of `inode`.
+
+ @throw panic if `do_write` is true and `inode` is invalid.
+ */
+ void (*sync)(OpContext* ctx, Inode* inode, bool do_write);
+
+ /**
+ @brief get an inode by its inode number.
+
+ This method should increment the reference count of the inode by one.
+
+ @note it does NOT have to load the inode from disk!
+
+ @see `sync` will be responsible to load the content of inode.
+
+ @return the `inode` of `inode_no`. `inode->valid` can be false.
+
+ @see `put` - the counterpart of this method.
+ */
+ Inode* (*get)(usize inode_no);
+
+ /**
+ @brief truncate all contents of `inode`.
+
+ This method removes (i.e. "frees") all file blocks of `inode`.
+
+ @note do not forget to reset related metadata of `inode`, e.g. `inode->entry.num_bytes`.
+
+ @note caller must hold the lock of `inode`.
+ */
+ void (*clear)(OpContext* ctx, Inode* inode);
+
+ /**
+ @brief duplicate an inode.
+
+ Call this if you want to share an inode with others.
+
+ It should increment the reference count of `inode` by one.
+
+ @return the duplicated inode (i.e. may just return `inode`).
+ */
+ Inode* (*share)(Inode* inode);
+
+ /**
+ @brief notify that you no longer need `inode`.
+
+ This method is also responsible to free the inode if no one needs it:
+
+ "No one needs it" means it is useless BOTH in-memory (`inode->rc == 0`) and on-disk
+ (`inode->entry.num_links == 0`).
+
+ "Free the inode" means freeing all related file blocks and the inode itself.
+
+ @note do not forget `kfree(inode)` after you have done them all!
+
+ @note caller must NOT hold the lock of `inode`. i.e. caller should have `unlock`ed it.
+
+ @see `get` - the counterpart of this method.
+
+ @see `clear` can be used to free all file blocks of `inode`.
+ */
+ void (*put)(OpContext* ctx, Inode* inode);
+
+ /**
+ @brief read `count` bytes from `inode`, beginning at `offset`, to `dest`.
+
+ @return how many bytes you actually read.
+
+ @note caller must hold the lock of `inode`.
+ */
+ usize (*read)(Inode* inode, u8* dest, usize offset, usize count);
+
+ /**
+ @brief write `count` bytes from `src` to `inode`, beginning at `offset`.
+
+ @return how many bytes you actually write.
+
+ @note caller must hold the lock of `inode`.
+ */
+ usize (*write)(OpContext* ctx,
+ Inode* inode,
+ u8* src,
+ usize offset,
+ usize count);
+
+ /**
+ @brief look up an entry named `name` in directory `inode`.
+
+ @param[out] index the index of found entry in this directory.
+
+ @return the inode number of the corresponding inode, or 0 if not found.
+
+ @note caller must hold the lock of `inode`.
+
+ @throw panic if `inode` is not a directory.
+ */
+ usize (*lookup)(Inode* inode, const char* name, usize* index);
+
+ /**
+ @brief insert a new directory entry in directory `inode`.
+
+ Add a new directory entry in `inode` called `name`, which points to inode
+ with `inode_no`.
+
+ @return the index of new directory entry, or -1 if `name` already exists.
+
+ @note if the directory inode is full, you should grow the size of directory inode.
+
+ @note you do NOT need to change `inode->entry.num_links`. Another function
+ to be finished in our final lab will do this.
+
+ @note caller must hold the lock of `inode`.
+
+ @throw panic if `inode` is not a directory.
+ */
+ usize (*insert)(OpContext* ctx,
+ Inode* inode,
+ const char* name,
+ usize inode_no);
+
+ /**
+ @brief remove the directory entry at `index`.
+
+ If the corresponding entry is not used before, `remove` does nothing.
+
+ @note if the last entry is removed, you can shrink the size of directory inode.
+ If you like, you can also move entries to fill the hole.
+
+ @note caller must hold the lock of `inode`.
+
+ @throw panic if `inode` is not a directory.
+ */
+ void (*remove)(OpContext* ctx, Inode* inode, usize index);
+} InodeTree;
+
+/**
+ @brief the global inode layer instance.
+ */
+extern InodeTree inodes;
+
+/**
+ @brief initialize the inode layer.
+
+ @note do not forget to read the root inode from disk!
+
+ @param sblock the loaded super block.
+ @param cache the initialized block cache.
+ */
+void init_inodes(const SuperBlock* sblock, const BlockCache* cache);
\ No newline at end of file
diff --git a/src/fs/test/.gitignore b/src/fs/test/.gitignore
new file mode 100755
index 0000000..d5d6e41
--- /dev/null
+++ b/src/fs/test/.gitignore
@@ -0,0 +1 @@
+cache_test
\ No newline at end of file
diff --git a/src/fs/test/CMakeLists.txt b/src/fs/test/CMakeLists.txt
new file mode 100755
index 0000000..340f47c
--- /dev/null
+++ b/src/fs/test/CMakeLists.txt
@@ -0,0 +1,33 @@
+cmake_minimum_required(VERSION 3.16)
+
+project(fs-test VERSION 0.1.0 LANGUAGES C CXX)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS True)
+
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
+
+include_directories(../..)
+
+set(compiler_warnings "-Wall -Wextra")
+set(compiler_flags "${compiler_warnings} \
+ -O1 -ftree-pre -g \
+ -fno-omit-frame-pointer \
+ -fsanitize=undefined \
+ -fno-sanitize=alignment")
+
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${compiler_flags}")
+set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} ${compiler_flags}")
+
+file(GLOB mock_sources CONFIGURE_DEPENDS "mock/*.cpp")
+add_library(mock STATIC ${mock_sources})
+
+file(GLOB fs_sources CONFIGURE_DEPENDS "../*.c")
+add_library(fs STATIC ${fs_sources} "instrument.c")
+target_compile_options(fs PUBLIC "-fno-builtin")
+
+add_executable(inode_test inode_test.cpp)
+target_link_libraries(inode_test fs mock pthread)
+
+add_executable(cache_test cache_test.cpp)
+target_link_libraries(cache_test fs mock pthread)
diff --git a/src/fs/test/assert.hpp b/src/fs/test/assert.hpp
new file mode 100755
index 0000000..fd8522d
--- /dev/null
+++ b/src/fs/test/assert.hpp
@@ -0,0 +1,55 @@
+#pragma once
+
+#include
+
+#include "exception.hpp"
+
+[[maybe_unused]] static inline auto
+get_source_location(size_t line, const char *file, const char *func) -> std::string {
+ std::stringstream buf;
+ buf << "'" << func << "' (" << file << ":L" << line << ")";
+ return buf.str();
+}
+
+template
+void _assert_eq(const X &actual, const Y &expect, const char *expr, const std::string &location) {
+ if (actual != static_cast(expect)) {
+ std::stringstream buf;
+ buf << location << ": ";
+ buf << "assert_eq failed: '" << expr << "': expect '" << expect << "', got '" << actual
+ << "'";
+ throw AssertionFailure(buf.str());
+ }
+}
+
+#define assert_eq(actual, expect) \
+ _assert_eq( \
+ (actual), (expect), #actual, get_source_location(__LINE__, __FILE__, __PRETTY_FUNCTION__))
+
+template
+void _assert_ne(const X &actual, const Y &expect, const char *expr, const std::string &location) {
+ if (actual == static_cast(expect)) {
+ std::stringstream buf;
+ buf << location << ": ";
+ buf << "assert_ne failed: '" << expr << "': expect ≠ '" << expect << "', got '" << actual
+ << "'";
+ throw AssertionFailure(buf.str());
+ }
+}
+
+#define assert_ne(actual, expect) \
+ _assert_ne( \
+ (actual), (expect), #actual, get_source_location(__LINE__, __FILE__, __PRETTY_FUNCTION__))
+
+[[maybe_unused]] static inline void
+_assert_true(bool predicate, const char *expr, const std::string &location) {
+ if (!predicate) {
+ std::stringstream buf;
+ buf << location << ": ";
+ buf << "assert_true failed: '" << expr << "'";
+ throw AssertionFailure(buf.str());
+ }
+}
+
+#define assert_true(expr) \
+ _assert_true((expr), #expr, get_source_location(__LINE__, __FILE__, __PRETTY_FUNCTION__))
diff --git a/src/fs/test/cache_test.cpp b/src/fs/test/cache_test.cpp
new file mode 100755
index 0000000..98cdff3
--- /dev/null
+++ b/src/fs/test/cache_test.cpp
@@ -0,0 +1,953 @@
+extern "C" {
+#include
+}
+
+#include "assert.hpp"
+#include "pause.hpp"
+#include "runner.hpp"
+
+#include "mock/block_device.hpp"
+
+#include
+#include
+#include
+#include
+
+namespace
+{
+
+constexpr int IN_CHILD = 0;
+
+static void wait_process(int pid)
+{
+ int wstatus;
+ waitpid(pid, &wstatus, 0);
+ if (!WIFEXITED(wstatus)) {
+ std::stringstream buf;
+ buf << "process [" << pid << "] exited abnormally with code "
+ << wstatus;
+ throw Internal(buf.str());
+ }
+}
+
+} // namespace
+
+namespace basic
+{
+
+void test_init()
+{
+ initialize(1, 1);
+}
+
+// targets: `acquire`, `release`, `sync(NULL, ...)`.
+
+void test_read_write()
+{
+ initialize(1, 1);
+
+ auto *b = bcache.acquire(1);
+ auto *d = mock.inspect(1);
+ assert_eq(b->block_no, 1);
+ assert_eq(b->valid, true);
+
+ for (usize i = 0; i < BLOCK_SIZE; i++) {
+ assert_eq(b->data[i], d[i]);
+ }
+
+ u8 value = b->data[128];
+ b->data[128] = ~value;
+ bcache.sync(NULL, b);
+ assert_eq(d[128], ~value);
+
+ bcache.release(b);
+ b = bcache.acquire(1);
+}
+
+void test_loop_read()
+{
+ initialize(1, 128);
+ constexpr usize num_rounds = 10;
+ for (usize round = 0; round < num_rounds; round++) {
+ std::vector p;
+ p.resize(sblock.num_blocks);
+
+ for (usize i = 0; i < sblock.num_blocks; i++) {
+ // PAUSE
+ p[i] = bcache.acquire(i);
+
+ assert_eq(p[i]->block_no, i);
+
+ auto *d = mock.inspect(i);
+ for (usize j = 0; j < BLOCK_SIZE; j++) {
+ assert_eq(p[i]->data[j], d[j]);
+ }
+ }
+
+ for (usize i = 0; i < sblock.num_blocks; i++) {
+ assert_eq(p[i]->valid, true);
+ bcache.release(p[i]);
+ }
+ }
+}
+
+void test_reuse()
+{
+ initialize(1, 500);
+
+ constexpr usize num_rounds = 200;
+ constexpr usize blocks[] = { 1, 123, 233, 399, 415 };
+
+ auto matched = [&](usize bno) {
+ for (usize b : blocks) {
+ if (bno == b)
+ return true;
+ }
+ return false;
+ };
+
+ usize rcnt = 0, wcnt = 0;
+ mock.on_read = [&](usize bno, auto) {
+ if (matched(bno))
+ rcnt++;
+ };
+ mock.on_write = [&](usize bno, auto) {
+ if (matched(bno))
+ wcnt++;
+ };
+
+ for (usize round = 0; round < num_rounds; round++) {
+ std::vector p;
+ for (usize block_no : blocks) {
+ p.push_back(bcache.acquire(block_no));
+ }
+ for (auto *b : p) {
+ assert_eq(b->valid, true);
+ bcache.release(b);
+ }
+ }
+
+ assert_true(rcnt < 10);
+ assert_eq(wcnt, 0);
+}
+
+void test_lru()
+{
+ std::mt19937 gen(0xdeadbeef);
+
+ usize cold_size = 1000;
+ usize hot_size = EVICTION_THRESHOLD * 0.8;
+ initialize(1, cold_size + hot_size);
+ for (int i = 0; i < 1000; i++) {
+ bool hot = (gen() % 100) <= 90;
+ usize bno = hot ? (gen() % hot_size) : (hot_size + gen() % cold_size);
+
+ auto *b = bcache.acquire(bno);
+ auto *d = mock.inspect(bno);
+ assert_eq(b->data[123], d[123]);
+ bcache.release(b);
+ }
+
+ printf("(debug) #cached = %zu, #read = %zu\n",
+ bcache.get_num_cached_blocks(), mock.read_count.load());
+ assert_true(bcache.get_num_cached_blocks() <= EVICTION_THRESHOLD);
+ assert_true(mock.read_count < 233);
+ assert_true(mock.write_count < 5);
+}
+
+// targets: `begin_op`, `end_op`, `sync`.
+
+void test_atomic_op()
+{
+ initialize(32, 64);
+
+ OpContext ctx;
+ bcache.begin_op(&ctx);
+ bcache.end_op(&ctx);
+
+ bcache.begin_op(&ctx);
+
+ usize t = sblock.num_blocks - 1;
+ auto *b = bcache.acquire(t);
+ assert_eq(b->block_no, t);
+ assert_eq(b->valid, true);
+ auto *d = mock.inspect(t);
+ u8 v = d[128];
+ assert_eq(b->data[128], v);
+
+ b->data[128] = ~v;
+ bcache.sync(&ctx, b);
+ bcache.release(b);
+
+ assert_eq(d[128], v);
+ bcache.end_op(&ctx);
+ assert_eq(d[128], ~v);
+
+ bcache.begin_op(&ctx);
+
+ auto *b1 = bcache.acquire(t - 1);
+ auto *b2 = bcache.acquire(t - 2);
+ assert_eq(b1->block_no, t - 1);
+ assert_eq(b2->block_no, t - 2);
+
+ auto *d1 = mock.inspect(t - 1);
+ auto *d2 = mock.inspect(t - 2);
+ u8 v1 = d1[500];
+ u8 v2 = d2[10];
+ assert_eq(b1->data[500], v1);
+ assert_eq(b2->data[10], v2);
+
+ b1->data[500] = ~v1;
+ b2->data[10] = ~v2;
+ bcache.sync(&ctx, b1);
+ bcache.release(b1);
+ bcache.sync(&ctx, b2);
+ bcache.release(b2);
+
+ assert_eq(d1[500], v1);
+ assert_eq(d2[10], v2);
+ bcache.end_op(&ctx);
+ assert_eq(d1[500], ~v1);
+ assert_eq(d2[10], ~v2);
+}
+
+void test_overflow()
+{
+ initialize(100, 100);
+
+ OpContext ctx;
+ bcache.begin_op(&ctx);
+
+ usize t = sblock.num_blocks - 1;
+ for (usize i = 0; i < OP_MAX_NUM_BLOCKS; i++) {
+ auto *b = bcache.acquire(t - i);
+ b->data[0] = 0xaa;
+ bcache.sync(&ctx, b);
+ bcache.release(b);
+ }
+
+ bool panicked = false;
+ auto *b = bcache.acquire(t - OP_MAX_NUM_BLOCKS);
+ b->data[128] = 0x88;
+ try {
+ bcache.sync(&ctx, b);
+ } catch (const Panic &) {
+ panicked = true;
+ }
+
+ assert_eq(panicked, true);
+}
+
+void test_resident()
+{
+ // NOTE: this test may be a little controversial.
+ // the main ideas are:
+ // 1. dirty blocks should be pinned in block cache before `end_op`.
+ // 2. logging should not pollute block cache in most of time.
+
+ initialize(OP_MAX_NUM_BLOCKS, 500);
+
+ constexpr usize num_rounds = 200;
+ constexpr usize blocks[] = { 1, 123, 233, 399, 415 };
+
+ auto matched = [&](usize bno) {
+ for (usize b : blocks) {
+ if (bno == b)
+ return true;
+ }
+ return false;
+ };
+
+ usize rcnt = 0;
+ mock.on_read = [&](usize bno, auto) {
+ if (matched(bno))
+ rcnt++;
+ };
+
+ for (usize round = 0; round < num_rounds; round++) {
+ OpContext ctx;
+ bcache.begin_op(&ctx);
+
+ for (usize block_no : blocks) {
+ auto *b = bcache.acquire(block_no);
+ assert_eq(b->valid, true);
+ b->data[0] = 0;
+ bcache.sync(&ctx, b);
+ bcache.release(b);
+ }
+
+ bcache.end_op(&ctx);
+ }
+
+ assert_true(rcnt < 10);
+}
+
+void test_local_absorption()
+{
+ constexpr usize num_rounds = 1000;
+
+ initialize(100, 100);
+
+ OpContext ctx;
+ bcache.begin_op(&ctx);
+ usize t = sblock.num_blocks - 1;
+ for (usize i = 0; i < num_rounds; i++) {
+ for (usize j = 0; j < OP_MAX_NUM_BLOCKS; j++) {
+ auto *b = bcache.acquire(t - j);
+ b->data[0] = 0xcd;
+ bcache.sync(&ctx, b);
+ bcache.release(b);
+ }
+ }
+ bcache.end_op(&ctx);
+
+ assert_true(mock.read_count < OP_MAX_NUM_BLOCKS * 5);
+ assert_true(mock.write_count < OP_MAX_NUM_BLOCKS * 5);
+ for (usize j = 0; j < OP_MAX_NUM_BLOCKS; j++) {
+ auto *b = mock.inspect(t - j);
+ assert_eq(b[0], 0xcd);
+ }
+}
+
+void test_global_absorption()
+{
+ constexpr usize op_size = 3;
+ constexpr usize num_workers = 100;
+
+ initialize(2 * OP_MAX_NUM_BLOCKS + op_size, 100);
+ usize t = sblock.num_blocks - 1;
+
+ OpContext out;
+ bcache.begin_op(&out);
+
+ for (usize i = 0; i < OP_MAX_NUM_BLOCKS; i++) {
+ auto *b = bcache.acquire(t - i);
+ b->data[0] = 0xcc;
+ bcache.sync(&out, b);
+ bcache.release(b);
+ }
+
+ std::vector ctx;
+ std::vector workers;
+ ctx.resize(num_workers);
+ workers.reserve(num_workers);
+
+ for (usize i = 0; i < num_workers; i++) {
+ bcache.begin_op(&ctx[i]);
+ for (usize j = 0; j < op_size; j++) {
+ auto *b = bcache.acquire(t - j);
+ b->data[0] = 0xdd;
+ bcache.sync(&ctx[i], b);
+ bcache.release(b);
+ }
+ workers.emplace_back([&, i] { bcache.end_op(&ctx[i]); });
+ }
+
+ workers.emplace_back([&] { bcache.end_op(&out); });
+ for (auto &worker : workers) {
+ worker.join();
+ }
+ for (usize i = 0; i < op_size; i++) {
+ auto *b = mock.inspect(t - i);
+ assert_eq(b[0], 0xdd);
+ }
+
+ for (usize i = op_size; i < OP_MAX_NUM_BLOCKS; i++) {
+ auto *b = mock.inspect(t - i);
+ assert_eq(b[0], 0xcc);
+ }
+}
+
+// target: replay at initialization.
+
+void test_replay()
+{
+ initialize_mock(50, 1000);
+
+ auto *header = mock.inspect_log_header();
+ header->num_blocks = 5;
+ for (usize i = 0; i < 5; i++) {
+ usize v = 500 + i;
+ header->block_no[i] = v;
+ auto *b = mock.inspect_log(i);
+ for (usize j = 0; j < BLOCK_SIZE; j++) {
+ b[j] = v & 0xff;
+ }
+ }
+
+ init_bcache(&sblock, &device);
+
+ assert_eq(header->num_blocks, 0);
+ for (usize i = 0; i < 5; i++) {
+ usize v = 500 + i;
+ auto *b = mock.inspect(v);
+ for (usize j = 0; j < BLOCK_SIZE; j++) {
+ assert_eq(b[j], v & 0xff);
+ }
+ }
+}
+
+// targets: `alloc`, `free`.
+
+void test_alloc()
+{
+ initialize(100, 100);
+
+ std::vector bno;
+ bno.reserve(100);
+ for (int i = 0; i < 100; i++) {
+ OpContext ctx;
+ bcache.begin_op(&ctx);
+
+ bno.push_back(bcache.alloc(&ctx));
+ assert_ne(bno[i], 0);
+ assert_true(bno[i] < sblock.num_blocks);
+
+ auto *b = bcache.acquire(bno[i]);
+ for (usize j = 0; j < BLOCK_SIZE; j++) {
+ assert_eq(b->data[j], 0);
+ }
+ bcache.release(b);
+
+ bcache.end_op(&ctx);
+ auto *d = mock.inspect(bno[i]);
+ for (usize j = 0; j < BLOCK_SIZE; j++) {
+ assert_eq(d[j], 0);
+ }
+ }
+
+ std::sort(bno.begin(), bno.end());
+ usize count = std::unique(bno.begin(), bno.end()) - bno.begin();
+ assert_eq(count, bno.size());
+
+ OpContext ctx;
+ bcache.begin_op(&ctx);
+
+ bool panicked = false;
+ try {
+ usize b = bcache.alloc(&ctx);
+ assert_ne(b, 0);
+ } catch (const Panic &) {
+ panicked = true;
+ }
+
+ assert_eq(panicked, true);
+}
+
+void test_alloc_free()
+{
+ constexpr usize num_rounds = 5;
+ constexpr usize num_data_blocks = 1000;
+
+ initialize(100, num_data_blocks);
+
+ for (usize round = 0; round < num_rounds; round++) {
+ std::vector bno;
+ for (usize i = 0; i < num_data_blocks; i++) {
+ OpContext ctx;
+ bcache.begin_op(&ctx);
+ bno.push_back(bcache.alloc(&ctx));
+ bcache.end_op(&ctx);
+ }
+
+ for (usize b : bno) {
+ assert_true(b >= sblock.num_blocks - num_data_blocks);
+ }
+
+ for (usize i = 0; i < num_data_blocks; i += 2) {
+ usize no = bno[i];
+ assert_ne(no, 0);
+
+ OpContext ctx;
+ bcache.begin_op(&ctx);
+ bcache.free(&ctx, no);
+ bcache.end_op(&ctx);
+ }
+
+ OpContext ctx;
+ bcache.begin_op(&ctx);
+ usize no = bcache.alloc(&ctx);
+ assert_ne(no, 0);
+ for (usize i = 1; i < num_data_blocks; i += 2) {
+ assert_ne(bno[i], no);
+ }
+ bcache.free(&ctx, no);
+ bcache.end_op(&ctx);
+
+ for (usize i = 1; i < num_data_blocks; i += 2) {
+ bcache.begin_op(&ctx);
+ bcache.free(&ctx, bno[i]);
+ bcache.end_op(&ctx);
+ }
+ }
+}
+
+} // namespace basic
+
+namespace concurrent
+{
+
+void test_acquire()
+{
+ constexpr usize num_rounds = 100;
+ constexpr usize num_workers = 64;
+
+ for (usize round = 0; round < num_rounds; round++) {
+ int child;
+ if ((child = fork()) == IN_CHILD) {
+ initialize(1, num_workers);
+
+ std::atomic flag = false;
+ std::vector workers;
+ for (usize i = 0; i < num_workers; i++) {
+ workers.emplace_back([&, i] {
+ while (!flag) {
+ std::this_thread::yield();
+ }
+
+ usize t = sblock.num_blocks - 1 - i;
+ auto *b = bcache.acquire(t);
+ assert_eq(b->block_no, t);
+ assert_eq(b->valid, true);
+ bcache.release(b);
+ });
+ }
+
+ flag = true;
+ for (auto &worker : workers) {
+ worker.join();
+ }
+
+ exit(0);
+ } else {
+ wait_process(child);
+ }
+ }
+}
+
+void test_sync()
+{
+ constexpr int num_rounds = 100;
+
+ initialize(OP_MAX_NUM_BLOCKS * OP_MAX_NUM_BLOCKS, OP_MAX_NUM_BLOCKS);
+
+ std::mutex mtx;
+ std::condition_variable cv;
+ OpContext ctx;
+ int count = -1, round = -1;
+
+ auto cookie = [](int i, int j) { return (i + 1) * 1926 + j + 817; };
+
+ std::vector workers;
+ for (int i = 0; i < OP_MAX_NUM_BLOCKS; i++) {
+ workers.emplace_back([&, i] {
+ usize t = sblock.num_blocks - 1 - i;
+ for (int j = 0; j < num_rounds; j++) {
+ {
+ std::unique_lock lock(mtx);
+ cv.wait(lock, [&] { return j <= round; });
+ }
+
+ auto *b = bcache.acquire(t);
+ int *p = reinterpret_cast(b->data);
+ *p = cookie(i, j);
+ bcache.sync(&ctx, b);
+ bcache.release(b);
+
+ {
+ std::unique_lock lock(mtx);
+ count++;
+ }
+
+ cv.notify_all();
+ }
+ });
+ }
+
+ auto check = [&](int j) {
+ for (int i = 0; i < OP_MAX_NUM_BLOCKS; i++) {
+ int *b = reinterpret_cast(
+ mock.inspect(sblock.num_blocks - 1 - i));
+ assert_eq(*b, cookie(i, j));
+ }
+ };
+
+ {
+ std::unique_lock lock(mtx);
+ for (int j = 0; j < num_rounds; j++) {
+ bcache.begin_op(&ctx);
+ round = j;
+ count = 0;
+ cv.notify_all();
+
+ cv.wait(lock, [&] { return count >= OP_MAX_NUM_BLOCKS; });
+
+ if (j > 0)
+ check(j - 1);
+ bcache.end_op(&ctx);
+ check(j);
+ }
+ }
+
+ for (auto &worker : workers) {
+ worker.join();
+ }
+}
+
+void test_alloc()
+{
+ initialize(100, 1000);
+
+ std::vector bno(1000);
+ std::vector workers;
+ for (usize i = 0; i < 4; i++) {
+ workers.emplace_back([&, i] {
+ usize t = 250 * i;
+ for (usize j = 0; j < 250; j++) {
+ OpContext ctx;
+ bcache.begin_op(&ctx);
+ bno[t + j] = bcache.alloc(&ctx);
+ bcache.end_op(&ctx);
+ }
+ });
+ }
+
+ for (auto &worker : workers) {
+ worker.join();
+ }
+ std::sort(bno.begin(), bno.end());
+ usize count = std::unique(bno.begin(), bno.end()) - bno.begin();
+ assert_eq(count, 1000);
+ assert_true(bno.front() >= sblock.num_blocks - 1000);
+ assert_true(bno.back() < sblock.num_blocks);
+}
+
+} // namespace concurrent
+
+namespace crash
+{
+
+void test_simple_crash()
+{
+ int child;
+ if ((child = fork()) == IN_CHILD) {
+ initialize(100, 100);
+
+ OpContext ctx;
+ bcache.begin_op(&ctx);
+ auto *b = bcache.acquire(150);
+ b->data[200] = 0x19;
+ b->data[201] = 0x26;
+ b->data[202] = 0x08;
+ b->data[203] = 0x17;
+ bcache.sync(&ctx, b);
+ bcache.release(b);
+ bcache.end_op(&ctx);
+
+ bcache.begin_op(&ctx);
+ b = bcache.acquire(150);
+ b->data[200] = 0xcc;
+ b->data[201] = 0xcc;
+ b->data[202] = 0xcc;
+ b->data[203] = 0xcc;
+ bcache.sync(&ctx, b);
+ bcache.release(b);
+
+ mock.offline = true;
+
+ try {
+ bcache.end_op(&ctx);
+ } catch (const Offline &) {
+ }
+
+ mock.dump("sd.img");
+
+ exit(0);
+ } else {
+ wait_process(child);
+ initialize_mock(100, 100, "sd.img");
+
+ auto *b = mock.inspect(150);
+ assert_eq(b[200], 0x19);
+ assert_eq(b[201], 0x26);
+ assert_eq(b[202], 0x08);
+ assert_eq(b[203], 0x17);
+
+ init_bcache(&sblock, &device);
+ assert_eq(b[200], 0x19);
+ assert_eq(b[201], 0x26);
+ assert_eq(b[202], 0x08);
+ assert_eq(b[203], 0x17);
+ }
+}
+
+void test_parallel(usize num_rounds, usize num_workers, usize delay_ms,
+ usize log_cut)
+{
+ usize log_size = num_workers * OP_MAX_NUM_BLOCKS - log_cut;
+ usize num_data_blocks = 200 + num_workers * OP_MAX_NUM_BLOCKS;
+
+ printf("(trace) running: 0/%zu", num_rounds);
+ fflush(stdout);
+
+ usize replay_count = 0;
+ for (usize round = 0; round < num_rounds; round++) {
+ int child;
+ if ((child = fork()) == IN_CHILD) {
+ initialize_mock(log_size, num_data_blocks);
+ for (usize i = 0; i < num_workers * OP_MAX_NUM_BLOCKS; i++) {
+ auto *b = mock.inspect(200 + i);
+ std::fill(b, b + BLOCK_SIZE, 0);
+ }
+
+ init_bcache(&sblock, &device);
+
+ std::atomic started = false;
+ for (usize i = 0; i < num_workers; i++) {
+ std::thread([&, i] {
+ started = true;
+ usize t = 200 + i * OP_MAX_NUM_BLOCKS;
+ try {
+ u64 v = 0;
+ while (true) {
+ OpContext ctx;
+ bcache.begin_op(&ctx);
+ for (usize j = 0; j < OP_MAX_NUM_BLOCKS; j++) {
+ auto *b = bcache.acquire(t + j);
+ for (usize k = 0; k < BLOCK_SIZE;
+ k += sizeof(u64)) {
+ u64 *p = reinterpret_cast(b->data +
+ k);
+ *p = v;
+ }
+ bcache.sync(&ctx, b);
+ bcache.release(b);
+ }
+ bcache.end_op(&ctx);
+
+ v++;
+ }
+ } catch (const Offline &) {
+ }
+ }).detach();
+ }
+
+ // disk will power off after `delay_ms` ms.
+ std::thread aha([&] {
+ while (!started) {
+ }
+ std::this_thread::sleep_for(
+ std::chrono::milliseconds(delay_ms));
+ mock.offline = true;
+ });
+
+ aha.join();
+ mock.dump("sd.img");
+ _exit(0);
+ } else {
+ wait_process(child);
+ initialize_mock(log_size, num_data_blocks, "sd.img");
+ auto *header = mock.inspect_log_header();
+ if (header->num_blocks > 0)
+ replay_count++;
+
+ if ((child = fork()) == IN_CHILD) {
+ init_bcache(&sblock, &device);
+ assert_eq(header->num_blocks, 0);
+
+ for (usize i = 0; i < num_workers; i++) {
+ usize t = 200 + i * OP_MAX_NUM_BLOCKS;
+ u64 v = *reinterpret_cast(mock.inspect(t));
+
+ for (usize j = 0; j < OP_MAX_NUM_BLOCKS; j++) {
+ auto *b = mock.inspect(t + j);
+ for (usize k = 0; k < BLOCK_SIZE; k += sizeof(u64)) {
+ u64 u = *reinterpret_cast(b + k);
+ assert_eq(u, v);
+ }
+ }
+ }
+
+ exit(0);
+ } else
+ wait_process(child);
+ }
+
+ printf("\r(trace) running: %zu/%zu (%zu replayed)", round + 1,
+ num_rounds, replay_count);
+ fflush(stdout);
+ }
+
+ puts("");
+}
+
+void test_banker()
+{
+ using namespace std::chrono_literals;
+
+ constexpr i64 initial = 1000;
+ constexpr i64 bill = 200;
+ constexpr usize num_accounts = 10;
+ constexpr usize num_workers = 8;
+ constexpr usize num_rounds = 30;
+
+ constexpr usize log_size = 3 * num_workers + OP_MAX_NUM_BLOCKS;
+
+ printf("(trace) running: 0/%zu", num_rounds);
+ fflush(stdout);
+
+ usize replay_count = 0;
+ for (usize round = 0; round < num_rounds; round++) {
+ int child;
+ if ((child = fork()) == IN_CHILD) {
+ initialize(log_size, num_accounts);
+
+ auto begin_ts = std::chrono::steady_clock::now();
+
+ std::vector bno;
+ bno.reserve(num_accounts);
+ for (usize i = 0; i < num_accounts; i++) {
+ OpContext ctx;
+ bcache.begin_op(&ctx);
+ bno.push_back(bcache.alloc(&ctx));
+ auto *b = bcache.acquire(bno.back());
+ i64 *p = reinterpret_cast(b->data);
+ *p = initial;
+ bcache.sync(&ctx, b);
+ bcache.release(b);
+ bcache.end_op(&ctx);
+ }
+
+ std::random_device rd;
+ std::atomic count = 0;
+ std::atomic started = false;
+ for (usize i = 0; i < num_workers; i++) {
+ std::thread([&] {
+ std::mt19937 gen(rd());
+
+ started = true;
+ try {
+ while (true) {
+ usize j = gen() % num_accounts,
+ k = gen() % num_accounts;
+ if (j == k)
+ k = (k + 1) % num_accounts;
+
+ OpContext ctx;
+ bcache.begin_op(&ctx);
+
+ Block *bj, *bk;
+ if (j < k) {
+ bj = bcache.acquire(bno[j]);
+ bk = bcache.acquire(bno[k]);
+ } else {
+ bk = bcache.acquire(bno[k]);
+ bj = bcache.acquire(bno[j]);
+ }
+
+ i64 *vj = reinterpret_cast(bj->data);
+ i64 *vk = reinterpret_cast(bk->data);
+ i64 transfer = std::min(*vj, (i64)(gen() % bill));
+
+ *vj -= transfer;
+ bcache.sync(&ctx, bj);
+ bcache.release(bj);
+
+ *vk += transfer;
+ bcache.sync(&ctx, bk);
+ bcache.release(bk);
+
+ bcache.end_op(&ctx);
+ count++;
+ }
+ } catch (const Offline &) {
+ }
+ }).detach();
+ }
+
+ while (!started) {
+ }
+ std::this_thread::sleep_for(2s);
+ mock.offline = true;
+
+ auto end_ts = std::chrono::steady_clock::now();
+ auto duration =
+ std::chrono::duration_cast(
+ end_ts - begin_ts)
+ .count();
+ printf("\r\033[K(trace) throughput = %.2f txn/s\n",
+ static_cast(count) * 1000 / duration);
+ fflush(stdout);
+
+ mock.dump("sd.img");
+ _exit(0);
+ } else {
+ wait_process(child);
+ initialize_mock(log_size, num_accounts, "sd.img");
+ auto *header = mock.inspect_log_header();
+ if (header->num_blocks > 0)
+ replay_count++;
+
+ if ((child = fork()) == IN_CHILD) {
+ init_bcache(&sblock, &device);
+
+ i64 sum = 0;
+ usize t = sblock.num_blocks - num_accounts;
+ for (usize i = 0; i < num_accounts; i++) {
+ i64 value = *reinterpret_cast(mock.inspect(t + i));
+ assert_true(value >= 0);
+ sum += value;
+ }
+
+ assert_eq(sum, num_accounts * initial);
+ exit(0);
+ } else
+ wait_process(child);
+ }
+
+ printf("\r(trace) running: %zu/%zu (%zu replayed)", round + 1,
+ num_rounds, replay_count);
+ fflush(stdout);
+ }
+
+ puts("");
+}
+
+} // namespace crash
+
+int main()
+{
+ std::vector tests = {
+ { "init", basic::test_init },
+ { "read_write", basic::test_read_write },
+ { "loop_read", basic::test_loop_read },
+ { "reuse", basic::test_reuse },
+ { "lru", basic::test_lru },
+ { "atomic_op", basic::test_atomic_op },
+ { "overflow", basic::test_overflow },
+ { "resident", basic::test_resident },
+ { "local_absorption", basic::test_local_absorption },
+ { "global_absorption", basic::test_global_absorption },
+ { "replay", basic::test_replay },
+ { "alloc", basic::test_alloc },
+ { "alloc_free", basic::test_alloc_free },
+
+ { "concurrent_acquire", concurrent::test_acquire },
+ { "concurrent_sync", concurrent::test_sync },
+ { "concurrent_alloc", concurrent::test_alloc },
+
+ { "simple_crash", crash::test_simple_crash },
+ { "single", [] { crash::test_parallel(1000, 1, 5, 0); } },
+ { "parallel_1", [] { crash::test_parallel(1000, 2, 5, 0); } },
+ { "parallel_2", [] { crash::test_parallel(1000, 4, 5, 0); } },
+ { "parallel_3", [] { crash::test_parallel(500, 4, 10, 1); } },
+ { "parallel_4",
+ [] { crash::test_parallel(500, 4, 10, 2 * OP_MAX_NUM_BLOCKS); } },
+ { "banker", crash::test_banker },
+ };
+ Runner(tests).run();
+
+ printf("(info) OK: %zu tests passed.\n", tests.size());
+
+ return 0;
+}
diff --git a/src/fs/test/exception.hpp b/src/fs/test/exception.hpp
new file mode 100755
index 0000000..efd874b
--- /dev/null
+++ b/src/fs/test/exception.hpp
@@ -0,0 +1,46 @@
+#pragma once
+
+#include
+
+#include
+#include
+
+#include
+
+static inline void backtrace() {
+ printf("0\n%p\n", __builtin_return_address(0));
+ printf("1\n%p\n", __builtin_return_address(1));
+ printf("2\n%p\n", __builtin_return_address(2));
+ printf("3\n%p\n", __builtin_return_address(3));
+ printf("4\n%p\n", __builtin_return_address(4));
+}
+
+struct Exception : public std::exception {
+ std::string message;
+
+ Exception(const std::string &_message) : message(_message) {}
+
+ const char *what() const noexcept override {
+ return message.data();
+ }
+};
+
+struct Internal final : Exception {
+ using Exception::Exception;
+ virtual ~Internal() = default;
+};
+
+struct Panic final : Exception {
+ using Exception::Exception;
+ virtual ~Panic() = default;
+};
+
+struct AssertionFailure final : Exception {
+ using Exception::Exception;
+ virtual ~AssertionFailure() = default;
+};
+
+struct Offline final : Exception {
+ using Exception::Exception;
+ virtual ~Offline() = default;
+};
diff --git a/src/fs/test/inode_test.cpp b/src/fs/test/inode_test.cpp
new file mode 100755
index 0000000..c51464b
--- /dev/null
+++ b/src/fs/test/inode_test.cpp
@@ -0,0 +1,432 @@
+extern "C" {
+#include
+}
+
+#include "assert.hpp"
+#include "pause.hpp"
+#include "runner.hpp"
+
+#include "mock/cache.hpp"
+
+void test_init()
+{
+ init_inodes(&sblock, &cache);
+ assert_eq(mock.count_inodes(), 1);
+ assert_eq(mock.count_blocks(), 0);
+}
+
+namespace adhoc
+{
+
+static OpContext _ctx, *ctx = &_ctx;
+
+void test_alloc()
+{
+ mock.begin_op(ctx);
+ usize ino = inodes.alloc(ctx, INODE_REGULAR);
+
+ assert_eq(mock.count_inodes(), 1);
+ mock.end_op(ctx);
+ assert_eq(mock.count_inodes(), 2);
+
+ auto *p = inodes.get(ino);
+
+ inodes.lock(p);
+ // printf("hello\n");
+ inodes.unlock(p);
+
+ mock.begin_op(ctx);
+ inodes.put(ctx, p);
+
+ assert_eq(mock.count_inodes(), 2);
+ mock.end_op(ctx);
+ assert_eq(mock.count_inodes(), 1);
+}
+
+void test_sync()
+{
+ auto *p = inodes.get(1);
+
+ inodes.lock(p);
+ assert_eq(p->entry.type, INODE_DIRECTORY);
+ p->entry.major = 0x19;
+ p->entry.minor = 0x26;
+ p->entry.indirect = 0xa817;
+ inodes.unlock(p);
+
+ mock.begin_op(ctx);
+ inodes.lock(p);
+ inodes.sync(ctx, p, true);
+ inodes.unlock(p);
+ inodes.put(ctx, p);
+ mock.end_op(ctx);
+
+ auto *q = mock.inspect(1);
+ assert_eq(q->type, INODE_DIRECTORY);
+ assert_eq(q->major, 0x19);
+ assert_eq(q->minor, 0x26);
+ assert_eq(q->indirect, 0xa817);
+}
+
+void test_touch()
+{
+ auto *p = inodes.get(1);
+ inodes.lock(p);
+
+ for (usize i = 2; i < mock.num_inodes; i++) {
+ mock.begin_op(ctx);
+ usize ino = inodes.alloc(ctx, INODE_REGULAR);
+ inodes.insert(ctx, p, std::to_string(i).data(), ino);
+
+ auto *q = inodes.get(ino);
+ inodes.lock(q);
+ assert_eq(q->entry.type, INODE_REGULAR);
+ assert_eq(q->entry.major, 0);
+ assert_eq(q->entry.minor, 0);
+ assert_eq(q->entry.num_links, 0);
+ assert_eq(q->entry.num_bytes, 0);
+ assert_eq(q->entry.indirect, 0);
+ for (usize j = 0; j < INODE_NUM_DIRECT; j++) {
+ assert_eq(q->entry.addrs[j], 0);
+ }
+
+ q->entry.num_links++;
+
+ inodes.sync(ctx, q, true);
+
+ inodes.unlock(q);
+ inodes.put(ctx, q);
+
+ assert_eq(mock.count_inodes(), i - 1);
+ mock.end_op(ctx);
+ assert_eq(mock.count_inodes(), i);
+ }
+
+ usize n = mock.num_inodes - 1;
+ for (usize i = 2; i < mock.num_inodes; i += 2, n--) {
+ mock.begin_op(ctx);
+ usize index = 10086;
+ assert_ne(inodes.lookup(p, std::to_string(i).data(), &index), 0);
+ assert_ne(index, 10086);
+ inodes.remove(ctx, p, index);
+
+ auto *q = inodes.get(i);
+ inodes.lock(q);
+ q->entry.num_links = 0;
+ inodes.sync(ctx, q, true);
+ inodes.unlock(q);
+ inodes.put(ctx, q);
+
+ assert_eq(mock.count_inodes(), n);
+ mock.end_op(ctx);
+ assert_eq(mock.count_inodes(), n - 1);
+ }
+
+ mock.begin_op(ctx);
+ usize ino = inodes.alloc(ctx, INODE_DIRECTORY);
+ auto *q = inodes.get(ino);
+ inodes.lock(q);
+ assert_eq(q->entry.type, INODE_DIRECTORY);
+ inodes.unlock(q);
+ assert_eq(mock.count_inodes(), n);
+ mock.end_op(ctx);
+ assert_eq(mock.count_inodes(), n + 1);
+
+ mock.begin_op(ctx);
+ inodes.put(ctx, q);
+ mock.end_op(ctx);
+ assert_eq(mock.count_inodes(), n);
+
+ for (usize i = 3; i < mock.num_inodes; i += 2, n--) {
+ mock.begin_op(ctx);
+ q = inodes.get(i);
+ inodes.lock(q);
+ q->entry.num_links = 0;
+ inodes.sync(ctx, q, true);
+ inodes.unlock(q);
+ inodes.put(ctx, q);
+ assert_eq(mock.count_inodes(), n);
+ mock.end_op(ctx);
+ assert_eq(mock.count_inodes(), n - 1);
+ }
+
+ inodes.unlock(p);
+}
+
+void test_share()
+{
+ mock.begin_op(ctx);
+ usize ino = inodes.alloc(ctx, INODE_REGULAR);
+ mock.end_op(ctx);
+ assert_eq(mock.count_inodes(), 2);
+
+ auto *p = inodes.get(ino);
+ auto *q = inodes.share(p);
+ auto *r = inodes.get(ino);
+
+ assert_eq(r->rc.count, 3);
+
+ mock.begin_op(ctx);
+ inodes.put(ctx, p);
+ assert_eq(q->rc.count, 2);
+ mock.end_op(ctx);
+ assert_eq(mock.count_inodes(), 2);
+
+ mock.begin_op(ctx);
+ inodes.put(ctx, q);
+ assert_eq(r->rc.count, 1);
+ assert_eq(mock.count_inodes(), 2);
+ inodes.put(ctx, r);
+ assert_eq(mock.count_inodes(), 2);
+ mock.end_op(ctx);
+ assert_eq(mock.count_inodes(), 1);
+}
+
+void test_small_file()
+{
+ mock.begin_op(ctx);
+ usize ino = inodes.alloc(ctx, INODE_REGULAR);
+ mock.end_op(ctx);
+
+ u8 buf[1];
+ auto *p = inodes.get(ino);
+ inodes.lock(p);
+
+ buf[0] = 0xcc;
+ inodes.read(p, buf, 0, 0);
+ assert_eq(buf[0], 0xcc);
+
+ mock.begin_op(ctx);
+ inodes.write(ctx, p, buf, 0, 1);
+ assert_eq(mock.count_blocks(), 0);
+ mock.end_op(ctx);
+
+ auto *q = mock.inspect(ino);
+ assert_eq(q->indirect, 0);
+ assert_ne(q->addrs[0], 0);
+ assert_eq(q->addrs[1], 0);
+ assert_eq(q->num_bytes, 1);
+ assert_eq(mock.count_blocks(), 1);
+
+ mock.fill_junk();
+ buf[0] = 0;
+ inodes.read(p, buf, 0, 1);
+ assert_eq(buf[0], 0xcc);
+
+ inodes.unlock(p);
+
+ inodes.lock(p);
+
+ mock.begin_op(ctx);
+ inodes.clear(ctx, p);
+ mock.end_op(ctx);
+
+ q = mock.inspect(ino);
+ assert_eq(q->indirect, 0);
+ assert_eq(q->addrs[0], 0);
+ assert_eq(q->num_bytes, 0);
+ assert_eq(mock.count_blocks(), 0);
+
+ inodes.unlock(p);
+
+ mock.begin_op(ctx);
+ inodes.put(ctx, p);
+ mock.end_op(ctx);
+ assert_eq(mock.count_inodes(), 1);
+}
+
+void test_large_file()
+{
+ mock.begin_op(ctx);
+ usize ino = inodes.alloc(ctx, INODE_REGULAR);
+ mock.end_op(ctx);
+
+ constexpr usize max_size = 65535;
+ u8 buf[max_size], copy[max_size];
+ std::mt19937 gen(0x12345678);
+ for (usize i = 0; i < max_size; i++) {
+ copy[i] = buf[i] = gen() & 0xff;
+ }
+
+ auto *p = inodes.get(ino);
+
+ inodes.lock(p);
+ for (usize i = 0, n = 0; i < max_size; i += n) {
+ n = std::min(static_cast(gen() % 10000), max_size - i);
+
+ mock.begin_op(ctx);
+ inodes.write(ctx, p, buf + i, i, n);
+ auto *q = mock.inspect(ino);
+ assert_eq(q->num_bytes, i);
+ mock.end_op(ctx);
+ assert_eq(q->num_bytes, i + n);
+ }
+ inodes.unlock(p);
+
+ for (usize i = 0; i < max_size; i++) {
+ buf[i] = 0;
+ }
+
+ inodes.lock(p);
+ inodes.read(p, buf, 0, max_size);
+ inodes.unlock(p);
+
+ for (usize i = 0; i < max_size; i++) {
+ assert_eq(buf[i], copy[i]);
+ }
+
+ inodes.lock(p);
+ mock.begin_op(ctx);
+ inodes.clear(ctx, p);
+ inodes.unlock(p);
+ mock.end_op(ctx);
+ assert_eq(mock.count_inodes(), 2);
+ assert_eq(mock.count_blocks(), 0);
+
+ for (usize i = 0; i < max_size; i++) {
+ copy[i] = buf[i] = gen() & 0xff;
+ }
+
+ inodes.lock(p);
+ mock.begin_op(ctx);
+ inodes.write(ctx, p, buf, 0, max_size);
+ mock.end_op(ctx);
+ inodes.unlock(p);
+
+ auto *q = mock.inspect(ino);
+ assert_eq(q->num_bytes, max_size);
+
+ for (usize i = 0; i < max_size; i++) {
+ buf[i] = 0;
+ }
+
+ inodes.lock(p);
+ for (usize i = 0, n = 0; i < max_size; i += n) {
+ n = std::min(static_cast(gen() % 10000), max_size - i);
+ inodes.read(p, buf + i, i, n);
+ for (usize j = 0; j < i + n; j++) {
+ assert_eq(buf[j], copy[j]);
+ }
+ }
+ inodes.unlock(p);
+
+ mock.begin_op(ctx);
+ inodes.put(ctx, p);
+ mock.end_op(ctx);
+
+ assert_eq(mock.count_inodes(), 1);
+ assert_eq(mock.count_blocks(), 0);
+}
+
+void test_dir()
+{
+ usize ino[5] = { 1 };
+
+ mock.begin_op(ctx);
+ ino[1] = inodes.alloc(ctx, INODE_DIRECTORY);
+ ino[2] = inodes.alloc(ctx, INODE_REGULAR);
+ ino[3] = inodes.alloc(ctx, INODE_REGULAR);
+ ino[4] = inodes.alloc(ctx, INODE_REGULAR);
+ assert_eq(mock.count_inodes(), 1);
+ mock.end_op(ctx);
+ assert_eq(mock.count_inodes(), 5);
+
+ Inode *p[5];
+ for (usize i = 0; i < 5; i++) {
+ p[i] = inodes.get(ino[i]);
+ inodes.lock(p[i]);
+ }
+
+ mock.begin_op(ctx);
+ inodes.insert(ctx, p[0], "fudan", ino[1]);
+ p[1]->entry.num_links++;
+ inodes.sync(ctx, p[1], true);
+
+ auto *q = mock.inspect(ino[0]);
+ assert_eq(q->addrs[0], 0);
+ assert_eq(inodes.lookup(p[0], "fudan", NULL), ino[1]);
+ mock.end_op(ctx);
+
+ assert_eq(inodes.lookup(p[0], "fudan", NULL), ino[1]);
+ assert_eq(inodes.lookup(p[0], "sjtu", NULL), 0);
+ assert_eq(inodes.lookup(p[0], "pku", NULL), 0);
+ assert_eq(inodes.lookup(p[0], "tsinghua", NULL), 0);
+
+ mock.begin_op(ctx);
+ inodes.insert(ctx, p[0], ".vimrc", ino[2]);
+ inodes.insert(ctx, p[1], "alice", ino[3]);
+ inodes.insert(ctx, p[1], "bob", ino[4]);
+ p[2]->entry.num_links++;
+ p[3]->entry.num_links++;
+ p[4]->entry.num_links++;
+ inodes.sync(ctx, p[2], true);
+ inodes.sync(ctx, p[3], true);
+ inodes.sync(ctx, p[4], true);
+ mock.end_op(ctx);
+
+ for (usize i = 1; i < 5; i++) {
+ q = mock.inspect(ino[i]);
+ assert_eq(q->num_links, 1);
+ }
+
+ usize index = 233;
+ assert_eq(inodes.lookup(p[0], "vimrc", &index), 0);
+ assert_eq(index, 233);
+ assert_eq(inodes.lookup(p[0], ".vimrc", &index), ino[2]);
+ assert_ne(index, 233);
+ index = 244;
+ assert_eq(inodes.lookup(p[1], "nano", &index), 0);
+ assert_eq(index, 244);
+ assert_eq(inodes.lookup(p[1], "alice", &index), ino[3]);
+ usize index2 = 255;
+ assert_eq(inodes.lookup(p[1], "bob", &index2), ino[4]);
+ assert_ne(index, 244);
+ assert_ne(index2, 255);
+ assert_ne(index, index2);
+
+ mock.begin_op(ctx);
+ inodes.clear(ctx, p[1]);
+ p[2]->entry.num_links = 0;
+ inodes.sync(ctx, p[2], true);
+
+ q = mock.inspect(ino[1]);
+ assert_ne(q->addrs[0], 0);
+ assert_eq(inodes.lookup(p[1], "alice", NULL), 0);
+ assert_eq(inodes.lookup(p[1], "bob", NULL), 0);
+ mock.end_op(ctx);
+
+ assert_eq(q->addrs[0], 0);
+ assert_eq(mock.count_inodes(), 5);
+ assert_ne(mock.count_blocks(), 0);
+
+ for (usize i = 0; i < 5; i++) {
+ mock.begin_op(ctx);
+ inodes.unlock(p[i]);
+ inodes.put(ctx, p[i]);
+ mock.end_op(ctx);
+ assert_eq(mock.count_inodes(), (i < 2 ? 5 : 4));
+ }
+}
+
+} // namespace adhoc
+
+int main()
+{
+ if (Runner::run({ "init", test_init }))
+ init_inodes(&sblock, &cache);
+ else
+ return -1;
+
+ std::vector tests = {
+ { "alloc", adhoc::test_alloc },
+ { "sync", adhoc::test_sync },
+ { "touch", adhoc::test_touch },
+ { "share", adhoc::test_share },
+ { "small_file", adhoc::test_small_file },
+ { "large_file", adhoc::test_large_file },
+ { "dir", adhoc::test_dir },
+ };
+ Runner(tests).run();
+
+ return 0;
+}
diff --git a/src/fs/test/instrument.c b/src/fs/test/instrument.c
new file mode 100755
index 0000000..e7b87ec
--- /dev/null
+++ b/src/fs/test/instrument.c
@@ -0,0 +1,16 @@
+#include
+
+// this file is compiled with `fs` library.
+// some symbols may conflict with those in the standard libc, e.g. `sleep`, so we
+// have to replace them with other symbol names and instrument them here.
+
+extern void _fs_test_sleep(void *chan, SpinLock *lock);
+extern void _fs_test_wakeup(void *chan);
+
+void sleep(void *chan, SpinLock *lock) {
+ _fs_test_sleep(chan, lock);
+}
+
+void wakeup(void *chan) {
+ _fs_test_wakeup(chan);
+}
diff --git a/src/fs/test/mock/arena.cpp b/src/fs/test/mock/arena.cpp
new file mode 100755
index 0000000..f44f9c8
--- /dev/null
+++ b/src/fs/test/mock/arena.cpp
@@ -0,0 +1,24 @@
+extern "C" {
+#include
+}
+
+#include "map.hpp"
+
+namespace
+{
+Map map;
+Map ref;
+} // namespace
+
+extern "C" {
+
+void *kalloc(isize x)
+{
+ return malloc(x);
+}
+
+void kfree(void *object)
+{
+ free(object);
+}
+}
diff --git a/src/fs/test/mock/block_device.hpp b/src/fs/test/mock/block_device.hpp
new file mode 100755
index 0000000..ca0b283
--- /dev/null
+++ b/src/fs/test/mock/block_device.hpp
@@ -0,0 +1,181 @@
+#pragma once
+
+extern "C" {
+#include
+}
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "../exception.hpp"
+
+struct MockBlockDevice {
+ struct Block {
+ std::mutex mutex;
+ u8 data[BLOCK_SIZE];
+
+ void fill_junk() {
+ static std::mt19937 gen(0x19260817);
+
+ for (usize i = 0; i < BLOCK_SIZE; i++) {
+ data[i] = gen() & 0xff;
+ }
+ }
+
+ void fill_zero() {
+ std::fill(std::begin(data), std::end(data), 0);
+ }
+ };
+
+ const SuperBlock *sblock;
+
+ std::atomic offline;
+ std::atomic read_count;
+ std::atomic write_count;
+ std::vector disk;
+
+ using Hook = std::function;
+
+ Hook on_read;
+ Hook on_write;
+
+ void initialize(const SuperBlock &_sblock) {
+ sblock = &_sblock;
+
+ offline = false;
+ read_count = 0;
+ write_count = 0;
+ {
+ std::vector new_disk(sblock->num_blocks);
+ std::swap(disk, new_disk);
+ }
+
+ for (auto &block : disk) {
+ block.fill_junk();
+ }
+
+ if (sblock->num_log_blocks < 2)
+ throw Internal("logging area is too small");
+ disk[sblock->log_start].fill_zero();
+
+ usize num_bitmap_blocks = (sblock->num_blocks + BIT_PER_BLOCK - 1) / BIT_PER_BLOCK;
+ for (usize i = 0; i < num_bitmap_blocks; i++) {
+ disk[sblock->bitmap_start + i].fill_zero();
+ }
+
+ usize num_preallocated = 1 + 1 + sblock->num_log_blocks +
+ ((sblock->num_inodes + INODE_PER_BLOCK - 1) / INODE_PER_BLOCK) +
+ num_bitmap_blocks;
+ if (num_preallocated + sblock->num_data_blocks > sblock->num_blocks)
+ throw Internal("invalid super block");
+ for (usize i = 0; i < num_preallocated; i++) {
+ usize j = i / BIT_PER_BLOCK, k = i % BIT_PER_BLOCK;
+ disk[sblock->bitmap_start + j].data[k / 8] |= (1 << (k % 8));
+ }
+ }
+
+ auto inspect(usize block_no) -> u8 * {
+ if (block_no >= disk.size())
+ throw Internal("block number is out of range");
+ return disk[block_no].data;
+ }
+
+ auto inspect_log(usize index) -> u8 * {
+ return inspect(sblock->log_start + 1 + index);
+ }
+
+ auto inspect_log_header() -> LogHeader * {
+ return reinterpret_cast(inspect(sblock->log_start));
+ }
+
+ void dump(std::ostream &stream) {
+ for (auto &block : disk) {
+ std::scoped_lock lock(block.mutex);
+ for (usize i = 0; i < BLOCK_SIZE; i++) {
+ stream << std::setfill('0') << std::setw(2) << std::hex
+ << static_cast(block.data[i]) << " ";
+ }
+ stream << "\n";
+ }
+ }
+
+ void load(std::istream &stream) {
+ for (auto &block : disk) {
+ for (usize i = 0; i < BLOCK_SIZE; i++) {
+ u64 value;
+ stream >> std::hex >> value;
+ block.data[i] = value & 0xff;
+ }
+ }
+ }
+
+ void dump(const std::string &path) {
+ std::ofstream file(path);
+ dump(file);
+ }
+
+ void load(const std::string &path) {
+ std::ifstream file(path);
+ load(file);
+ }
+
+ void check_offline() {
+ if (offline)
+ throw Offline("disk power failure");
+ }
+
+ void read(usize block_no, u8 *buffer) {
+ if (block_no >= disk.size())
+ throw AssertionFailure("block number is out of range");
+
+ check_offline();
+
+ auto &block = disk[block_no];
+ std::scoped_lock lock(block.mutex);
+
+ if (on_read)
+ on_read(block_no, buffer);
+
+ check_offline();
+
+ for (usize i = 0; i < BLOCK_SIZE; i++) {
+ buffer[i] = block.data[i];
+ }
+
+ read_count++;
+
+ check_offline();
+ }
+
+ void write(usize block_no, u8 *buffer) {
+ if (block_no >= disk.size())
+ throw AssertionFailure("block number is out of range");
+
+ check_offline();
+
+ auto &block = disk[block_no];
+ std::scoped_lock lock(block.mutex);
+
+ if (on_write)
+ on_write(block_no, buffer);
+
+ check_offline();
+
+ for (usize i = 0; i < BLOCK_SIZE; i++) {
+ block.data[i] = buffer[i];
+ }
+
+ write_count++;
+
+ check_offline();
+ }
+};
+
+namespace {
+#include "block_device.ipp"
+} // namespace
diff --git a/src/fs/test/mock/block_device.ipp b/src/fs/test/mock/block_device.ipp
new file mode 100755
index 0000000..8e3f39a
--- /dev/null
+++ b/src/fs/test/mock/block_device.ipp
@@ -0,0 +1,45 @@
+#pragma once
+
+#include "block_device.hpp"
+
+static MockBlockDevice mock;
+static SuperBlock sblock;
+static BlockDevice device;
+
+static void stub_read(usize block_no, u8 *buffer) {
+ mock.read(block_no, buffer);
+}
+
+static void stub_write(usize block_no, u8 *buffer) {
+ mock.write(block_no, buffer);
+}
+
+static void initialize_mock( //
+ usize log_size,
+ usize num_data_blocks,
+ const std::string &image_path = "") {
+ sblock.log_start = 2;
+ sblock.inode_start = sblock.log_start + 1 + log_size;
+ sblock.bitmap_start = sblock.inode_start + 1;
+ sblock.num_inodes = 1;
+ sblock.num_log_blocks = 1 + log_size;
+ sblock.num_data_blocks = num_data_blocks;
+ sblock.num_blocks = 1 + 1 + 1 + log_size + 1 +
+ ((num_data_blocks + BIT_PER_BLOCK - 1) / BIT_PER_BLOCK) + num_data_blocks;
+
+ mock.initialize(sblock);
+
+ device.read = stub_read;
+ device.write = stub_write;
+
+ if (!image_path.empty())
+ mock.load(image_path);
+}
+
+[[maybe_unused]] static void initialize( //
+ usize log_size,
+ usize num_data_blocks,
+ const std::string &image_path = "") {
+ initialize_mock(log_size, num_data_blocks, image_path);
+ init_bcache(&sblock, &device);
+}
diff --git a/src/fs/test/mock/cache.hpp b/src/fs/test/mock/cache.hpp
new file mode 100755
index 0000000..d728690
--- /dev/null
+++ b/src/fs/test/mock/cache.hpp
@@ -0,0 +1,342 @@
+#pragma once
+
+extern "C" {
+#include
+}
+
+#include
+#include
+#include
+#include
+#include
+
+#include "../exception.hpp"
+
+struct MockBlockCache {
+ static constexpr usize num_blocks = 2000;
+ static constexpr usize inode_start = 200;
+ static constexpr usize block_start = 1000;
+ static constexpr usize num_inodes = 1000;
+
+ static auto get_sblock() -> SuperBlock {
+ SuperBlock sblock;
+ sblock.num_blocks = num_blocks;
+ sblock.num_data_blocks = num_blocks - block_start;
+ sblock.num_inodes = num_inodes;
+ sblock.num_log_blocks = 50;
+ sblock.log_start = 2;
+ sblock.inode_start = inode_start;
+ sblock.bitmap_start = 900;
+ return sblock;
+ }
+
+ struct Meta {
+ bool mark = false;
+ std::mutex mutex;
+ bool used;
+
+ auto operator=(const Meta &rhs) -> Meta & {
+ used = rhs.used;
+ return *this;
+ }
+ };
+
+ struct Cell {
+ bool mark = false;
+ usize index;
+ std::mutex mutex;
+ Block block;
+
+ auto operator=(const Cell &rhs) -> Cell & {
+ block = rhs.block;
+ return *this;
+ }
+
+ void zero() {
+ for (usize i = 0; i < BLOCK_SIZE; i++) {
+ block.data[i] = 0;
+ }
+ }
+
+ void random(std::mt19937 &gen) {
+ for (usize i = 0; i < BLOCK_SIZE; i++) {
+ block.data[i] = gen() & 0xff;
+ }
+ }
+ };
+
+ // board: record all uncommitted atomic operations. `board[i] = true` means
+ // atomic operation i has called `end_op` and waits for final commit.
+ // oracle: to allocate id for each atomic operation.
+ // top: the maximum id of committed atomic operation.
+ // mutex & cv: protects board.
+ std::mutex mutex;
+ std::condition_variable cv;
+ std::atomic oracle, top_oracle;
+ std::unordered_map scoreboard;
+
+ // mbit: bitmap cached in memory, which is volatile
+ // sbit: bitmap on SD card, which is persistent
+ // mblk: data blocks cached in memory, volatile
+ // sblk: data blocks on SD card, persistent
+ Meta mbit[num_blocks], sbit[num_blocks];
+ Cell mblk[num_blocks], sblk[num_blocks];
+
+ MockBlockCache() {
+ std::mt19937 gen(0x19260817);
+
+ oracle.store(1);
+ top_oracle.store(0);
+
+ // fill disk with junk.
+ for (usize i = 0; i < num_blocks; i++) {
+ mbit[i].used = false;
+ mblk[i].index = i;
+ mblk[i].random(gen);
+ sbit[i].used = false;
+ sblk[i].index = i;
+ sblk[i].random(gen);
+ }
+
+ // mock superblock.
+ auto sblock = get_sblock();
+ u8 *buf = reinterpret_cast(&sblock);
+ for (usize i = 0; i < sizeof(sblock); i++) {
+ sblk[1].block.data[i] = buf[i];
+ }
+
+ // mock inodes.
+ InodeEntry node[num_inodes];
+ for (usize i = 0; i < num_inodes; i++) {
+ node[i].type = INODE_INVALID;
+ node[i].major = gen() & 0xffff;
+ node[i].minor = gen() & 0xffff;
+ node[i].num_links = gen() & 0xffff;
+ node[i].num_bytes = gen() & 0xffff;
+ for (usize j = 0; j < INODE_NUM_DIRECT; j++) {
+ node[i].addrs[j] = gen();
+ }
+ node[i].indirect = gen();
+ }
+
+ // mock root inode.
+ node[1].type = INODE_DIRECTORY;
+ node[1].major = 0;
+ node[1].minor = 0;
+ node[1].num_links = 1;
+ node[1].num_bytes = 0;
+ for (usize i = 0; i < INODE_NUM_DIRECT; i++) {
+ node[1].addrs[i] = 0;
+ }
+ node[1].indirect = 0;
+
+ usize step = 0;
+ for (usize i = 0, j = inode_start; i < num_inodes; i += step, j++) {
+ step = std::min(num_inodes - i, static_cast(INODE_PER_BLOCK));
+ buf = reinterpret_cast(&node[i]);
+ for (usize k = 0; k < step * sizeof(InodeEntry); k++) {
+ sblk[j].block.data[k] = buf[k];
+ }
+ }
+ }
+
+ // invalidate all cached blocks and fill them with random data.
+ void fill_junk() {
+ std::mt19937 gen(0xdeadbeef);
+
+ for (usize i = 0; i < num_blocks; i++) {
+ std::scoped_lock guard(mbit[i].mutex);
+ if (mbit[i].mark)
+ throw Internal("marked by others");
+ }
+
+ for (usize i = 0; i < num_blocks; i++) {
+ std::scoped_lock guard(mblk[i].mutex);
+ if (mblk[i].mark)
+ throw Internal("marked by others");
+ mblk[i].random(gen);
+ }
+ }
+
+ // count how many inodes on disk are valid.
+ auto count_inodes() -> usize {
+ std::unique_lock lock(mutex);
+
+ usize step = 0, count = 0;
+ for (usize i = 0, j = inode_start; i < num_inodes; i += step, j++) {
+ step = std::min(num_inodes - i, static_cast(INODE_PER_BLOCK));
+ auto *inodes = reinterpret_cast(sblk[j].block.data);
+ for (usize k = 0; k < step; k++) {
+ if (inodes[k].type != INODE_INVALID)
+ count++;
+ }
+ }
+
+ return count;
+ }
+
+ // count how many blocks on disk are allocated.
+ auto count_blocks() -> usize {
+ std::unique_lock lock(mutex);
+
+ usize count = 0;
+ for (usize i = block_start; i < num_blocks; i++) {
+ std::scoped_lock guard(sbit[i].mutex);
+ if (sbit[i].used)
+ count++;
+ }
+
+ return count;
+ }
+
+ // inspect on disk inode at specified inode number.
+ auto inspect(usize i) -> InodeEntry * {
+ usize j = inode_start + i / INODE_PER_BLOCK;
+ usize k = i % INODE_PER_BLOCK;
+ auto *arr = reinterpret_cast(sblk[j].block.data);
+ return &arr[k];
+ }
+
+ void check_block_no(usize i) {
+ if (i >= num_blocks)
+ throw AssertionFailure("block number out of range");
+ }
+
+ auto check_and_get_cell(Block *b) -> Cell * {
+ Cell *p = container_of(b, Cell, block);
+ isize offset = reinterpret_cast(p) - reinterpret_cast(mblk);
+ if (offset % sizeof(Cell) != 0)
+ throw AssertionFailure("pointer not aligned");
+
+ isize i = p - mblk;
+ if (i < 0 || static_cast(i) >= num_blocks)
+ throw AssertionFailure("block is not managed by cache");
+
+ return p;
+ }
+
+ template
+ void load(T &a, T &b) {
+ if (!a.mark) {
+ a = b;
+ a.mark = true;
+ }
+ }
+
+ template
+ void store(T &a, T &b) {
+ if (a.mark) {
+ b = a;
+ a.mark = false;
+ }
+ }
+
+ void begin_op(OpContext *ctx) {
+ std::unique_lock lock(mutex);
+ ctx->ts = oracle.fetch_add(1);
+ scoreboard[ctx->ts] = false;
+ }
+
+ void end_op(OpContext *ctx) {
+ std::unique_lock lock(mutex);
+ scoreboard[ctx->ts] = true;
+
+ // is it safe to checkpoint now?
+ bool do_checkpoint = true;
+ for (const auto &e : scoreboard) {
+ do_checkpoint &= e.second;
+ }
+
+ if (do_checkpoint) {
+ for (usize i = 0; i < num_blocks; i++) {
+ std::scoped_lock guard(mbit[i].mutex, sbit[i].mutex);
+ store(mbit[i], sbit[i]);
+ }
+
+ for (usize i = 0; i < num_blocks; i++) {
+ std::scoped_lock guard(mblk[i].mutex, sblk[i].mutex);
+ store(mblk[i], sblk[i]);
+ }
+
+ usize max_oracle = 0;
+ for (const auto &e : scoreboard) {
+ max_oracle = std::max(max_oracle, e.first);
+ }
+ top_oracle.store(max_oracle);
+ scoreboard.clear();
+
+ cv.notify_all();
+ } else {
+ // if there are other running atomic operations, just wait for them.
+ cv.wait(lock, [&] { return ctx->ts <= top_oracle.load(); });
+ }
+ }
+
+ auto alloc(OpContext *ctx) -> usize {
+ for (usize i = block_start; i < num_blocks; i++) {
+ std::scoped_lock guard(mbit[i].mutex, sbit[i].mutex);
+ load(mbit[i], sbit[i]);
+
+ if (!mbit[i].used) {
+ mbit[i].used = true;
+ if (!ctx)
+ store(mbit[i], sbit[i]);
+
+ std::scoped_lock guard(mblk[i].mutex, sblk[i].mutex);
+ load(mblk[i], sblk[i]);
+ mblk[i].zero();
+ if (!ctx)
+ store(mblk[i], sblk[i]);
+
+ return i;
+ }
+ }
+
+ throw AssertionFailure("no free block");
+ }
+
+ void free(OpContext *ctx, usize i) {
+ check_block_no(i);
+
+ std::scoped_lock guard(mbit[i].mutex, sbit[i].mutex);
+ load(mbit[i], sbit[i]);
+ if (!mbit[i].used)
+ throw AssertionFailure("free unused block");
+
+ mbit[i].used = false;
+ if (!ctx)
+ store(mbit[i], sbit[i]);
+ }
+
+ auto acquire(usize i) -> Block * {
+ check_block_no(i);
+
+ mblk[i].mutex.lock();
+
+ {
+ std::scoped_lock guard(sblk[i].mutex);
+ load(mblk[i], sblk[i]);
+ }
+
+ return &mblk[i].block;
+ }
+
+ void release(Block *b) {
+ auto *p = check_and_get_cell(b);
+ p->mutex.unlock();
+ }
+
+ void sync(OpContext *ctx, Block *b) {
+ auto *p = check_and_get_cell(b);
+ usize i = p->index;
+
+ if (!ctx) {
+ std::scoped_lock guard(sblk[i].mutex);
+ store(mblk[i], sblk[i]);
+ }
+ }
+};
+
+namespace {
+#include "cache.ipp"
+} // namespace
diff --git a/src/fs/test/mock/cache.ipp b/src/fs/test/mock/cache.ipp
new file mode 100755
index 0000000..5677245
--- /dev/null
+++ b/src/fs/test/mock/cache.ipp
@@ -0,0 +1,49 @@
+#pragma once
+
+#include "cache.hpp"
+
+static MockBlockCache mock;
+static SuperBlock sblock;
+static BlockCache cache;
+
+static void stub_begin_op(OpContext *ctx) {
+ mock.begin_op(ctx);
+}
+
+static void stub_end_op(OpContext *ctx) {
+ mock.end_op(ctx);
+}
+
+static usize stub_alloc(OpContext *ctx) {
+ return mock.alloc(ctx);
+}
+
+static void stub_free(OpContext *ctx, usize block_no) {
+ mock.free(ctx, block_no);
+}
+
+static Block *stub_acquire(usize block_no) {
+ return mock.acquire(block_no);
+}
+
+static void stub_release(Block *block) {
+ return mock.release(block);
+}
+
+static void stub_sync(OpContext *ctx, Block *block) {
+ mock.sync(ctx, block);
+}
+
+static struct _Loader {
+ _Loader() {
+ sblock = mock.get_sblock();
+
+ cache.begin_op = stub_begin_op;
+ cache.end_op = stub_end_op;
+ cache.alloc = stub_alloc;
+ cache.free = stub_free;
+ cache.acquire = stub_acquire;
+ cache.release = stub_release;
+ cache.sync = stub_sync;
+ }
+} _loader;
diff --git a/src/fs/test/mock/list.cpp b/src/fs/test/mock/list.cpp
new file mode 100755
index 0000000..34ab1e5
--- /dev/null
+++ b/src/fs/test/mock/list.cpp
@@ -0,0 +1,3 @@
+extern "C" {
+#include
+}
diff --git a/src/fs/test/mock/lock.cpp b/src/fs/test/mock/lock.cpp
new file mode 100755
index 0000000..0a3931a
--- /dev/null
+++ b/src/fs/test/mock/lock.cpp
@@ -0,0 +1,177 @@
+#include "lock_config.hpp"
+#include "map.hpp"
+#include "errno.h"
+
+#include
+#include
+#include
+#include
+#include