diff --git a/.gitmodules b/.gitmodules
index c532fbc7..e05419d6 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -21,3 +21,11 @@
 [submodule "mllm/ffi/vendors/tvm-ffi"]
 	path = mllm/ffi/vendors/tvm-ffi
 	url = https://github.com/apache/tvm-ffi
+[submodule "mllm/ext/vendors/llvm-project"]
+	path = mllm/ext/vendors/llvm-project
+	url = https://github.com/llvm/llvm-project
+	update = none
+[submodule "mllm/ext/vendors/tokenizers"]
+	path = mllm/ext/vendors/tokenizers
+	url = https://github.com/meta-pytorch/tokenizers.git
+	update = none
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b3c939de..b8e20945 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,6 +22,11 @@ option(MLLM_BUILD_QNN_BACKEND "Enable MLLM QNN backend" OFF)
 option(MLLM_BUILD_SDK_C_BINDING "Enable MLLM C SDK binding" OFF)
 option(MLLM_BUILD_EXPERIMENTS "Enable MLLM experiments" OFF)
 
+# Extension Enable
+option(MLLM_EXT_ENABLE OFF)
+option(MLLM_EXT_ENABLE_LLVM_PROJECT OFF)
+option(MLLM_EXT_ENABLE_META_TORCH_TOKENIZERS OFF)
+
 # CPU Backend: BLAS
 option(MLLM_USE_BLAS "Enable BLAS" OFF)
 option(MLLM_BLAS_VENDOR_ACCELERATE "Enable Accelerate BLAS on OSX" OFF)
@@ -35,6 +40,7 @@ option(MLLM_KERNEL_THREADS_VENDOR_APPLE_GCD "Enable Apple GCD Threads" OFF)
 
 # Performance components
 option(MLLM_PERFETTO_ENABLE "Enable perfetto" OFF)
+option(MLLM_TRACY_ENABLE "Enable Tracy. A more advanced profiler" OFF)
 
 message(STATUS "CXX Compiler=${CMAKE_CXX_COMPILER_ID}")
 message(STATUS "CXX Compiler version=${CMAKE_CXX_COMPILER_VERSION}")
@@ -206,6 +212,7 @@ set(MLLM_INCLUDE_DIR
     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/third_party/half/include>
     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/third_party/dlpack/include>
     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/third_party/xxHash/include>
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/third_party/utfcpp/include>
     $<INSTALL_INTERFACE:include/mllm>
     $<INSTALL_INTERFACE:include/third_party/>)
 set(MLLM_JSON_INCLUDE_DIR
@@ -314,6 +321,13 @@ install(
   PATTERN "*.h"
   PATTERN "*.hpp")
 
+install(
+  DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/third_party/utfcpp/include/utfcpp/
+  DESTINATION include/utfcpp/
+  FILES_MATCHING
+  PATTERN "*.h"
+  PATTERN "*.hpp")
+
 install(
   DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/third_party/xxHash/include/xxHash/
   DESTINATION include/xxHash/
diff --git a/docs/api/functional.rst b/docs/api/functional.rst
index 5fca137d..080d2091 100644
--- a/docs/api/functional.rst
+++ b/docs/api/functional.rst
@@ -78,6 +78,36 @@ Shape Operations
    :param dim: Dimension along which to concatenate
    :return: Concatenated tensor
 
+.. cpp:function:: Tensor mllm::nn::functional::pad(const Tensor& x, const std::vector<int32_t>& pad, aops::PadMode mode = aops::PadMode::kConstant, float value = 0.0f)
+
+   Pad a tensor along the last N dimensions as specified.
+
+   :param x: Input tensor
+   :param pad: Padding sizes ordered from the last dimension to the first, e.g. [last_left, last_right, ..., first_left, first_right]
+   :param mode: Padding mode (kConstant, kReflect, kReplicate, kCircular). Default: kConstant
+   :param value: Constant value used when mode is kConstant. Default: 0.0
+   :return: Padded tensor
+
+.. cpp:function:: Tensor mllm::nn::functional::interpolate(const Tensor& x, const std::vector<int32_t>& size, aops::InterpolateOpMode mode = aops::InterpolateOpMode::kNearest, bool align_corners = false, bool antialias = false)
+
+   Resize a tensor to the target spatial size.
+
+   :param x: Input tensor (supports 1D/2D/3D spatial resizing depending on mode)
+   :param size: Target spatial size (e.g., [H_out, W_out] for 2D)
+   :param mode: Interpolation mode (kNearest, kLinear, kBilinear, kBicubic, kTrilinear). Default: kNearest
+   :param align_corners: Align corners for linear/bilinear/trilinear interpolation. Default: false
+      :return: Resized tensor
+
+.. cpp:function:: Tensor mllm::nn::functional::interpolate(const Tensor& x, const std::vector<float>& scale_factor, aops::InterpolateOpMode mode = aops::InterpolateOpMode::kNearest, bool align_corners = false)
+
+   Resize a tensor by scale factors per spatial dimension.
+
+   :param x: Input tensor (supports 1D/2D/3D spatial resizing depending on mode)
+   :param scale_factor: Scale factors per spatial dimension (e.g., [sh, sw] for 2D)
+   :param mode: Interpolation mode (kNearest, kLinear, kBilinear, kBicubic, kTrilinear). Default: kNearest
+   :param align_corners: Align corners for linear/bilinear/trilinear interpolation. Default: false
+   :return: Resized tensor
+
 Attention Operations
 --------------------
 
diff --git a/docs/contribute/guidelines.md b/docs/contribute/guidelines.rst
similarity index 100%
rename from docs/contribute/guidelines.md
rename to docs/contribute/guidelines.rst
diff --git a/docs/contribute/index.rst b/docs/contribute/index.rst
index 4e969b0b..b26dc366 100644
--- a/docs/contribute/index.rst
+++ b/docs/contribute/index.rst
@@ -6,3 +6,4 @@ Contribute
 
    roadmap
    guidelines
+   model_supports
diff --git a/docs/contribute/model_supports.rst b/docs/contribute/model_supports.rst
new file mode 100644
index 00000000..fda32d04
--- /dev/null
+++ b/docs/contribute/model_supports.rst
@@ -0,0 +1,2 @@
+Model Supports
+=================
diff --git a/docs/cpu_backend/fa2_radix_paged.rst b/docs/cpu_backend/fa2_radix_paged.rst
new file mode 100644
index 00000000..a678ad26
--- /dev/null
+++ b/docs/cpu_backend/fa2_radix_paged.rst
@@ -0,0 +1,2 @@
+FA2, Radix, Paged
+====================
diff --git a/docs/cpu_backend/index.rst b/docs/cpu_backend/index.rst
index 3527029b..6aa13800 100644
--- a/docs/cpu_backend/index.rst
+++ b/docs/cpu_backend/index.rst
@@ -5,5 +5,6 @@ CPU Backend
    :maxdepth: 2
 
    threads
+   fa2_radix_paged
    arm/index
    x86/index
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index f193f34c..51dfd2e1 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -6,3 +6,4 @@ add_subdirectory(llama)
 add_subdirectory(minicpm_o)
 add_subdirectory(qwen3)
 add_subdirectory(qwen3_service)
+add_subdirectory(deepseek_ocr)
diff --git a/examples/deepseek_ocr/CMakeLists.txt b/examples/deepseek_ocr/CMakeLists.txt
new file mode 100644
index 00000000..8a80e34d
--- /dev/null
+++ b/examples/deepseek_ocr/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_executable(mllm-deepseek-ocr-runner main.cpp)
+target_link_libraries(mllm-deepseek-ocr-runner PRIVATE MllmRT MllmCPUBackend)
+target_include_directories(mllm-deepseek-ocr-runner PRIVATE ${MLLM_INCLUDE_DIR})
diff --git a/examples/deepseek_ocr/main.cpp b/examples/deepseek_ocr/main.cpp
new file mode 100644
index 00000000..cf819be0
--- /dev/null
+++ b/examples/deepseek_ocr/main.cpp
@@ -0,0 +1,21 @@
+#include <mllm/mllm.hpp>
+#include "mllm/models/deepseek_ocr/modeling_deepseek_ocr.hpp"
+#include "mllm/models/deepseek_ocr/tokenization_deepseek_ocr.hpp"
+
+using mllm::Argparse;
+
+MLLM_MAIN({
+  // auto config = mllm::models::deepseek_ocr::DpskOcrConfig("/Volumes/D/mllm-models/DeepSeek-OCR-w32a32/config.json");
+  // auto model = mllm::models::deepseek_ocr::DeepseekOCRForCausalLM(config);
+  // auto tokenizer = mllm::models::deepseek_ocr::DpskOcrTokenizer("/Volumes/D/mllm-models/DeepSeek-OCR-w32a32/tokenizer.json");
+  // model.load(mllm::load("/Volumes/D/mllm-models/DeepSeek-OCR-w32a32/model.mllm", mllm::ModelFileVersion::kV2));
+  mllm::setLogLevel(mllm::LogLevel::kError);
+  auto config = mllm::models::deepseek_ocr::DpskOcrConfig("/Volumes/D/mllm-models/DeepSeek-OCR-w4a8-i8mm-kai/config.json");
+  auto model = mllm::models::deepseek_ocr::DeepseekOCRForCausalLM(config);
+  auto tokenizer =
+      mllm::models::deepseek_ocr::DpskOcrTokenizer("/Volumes/D/mllm-models/DeepSeek-OCR-w4a8-i8mm-kai/tokenizer.json");
+  model.load(mllm::load("/Volumes/D/mllm-models/DeepSeek-OCR-w4a8-i8mm-kai/model.mllm", mllm::ModelFileVersion::kV2));
+
+  model.infer(tokenizer, "<image>\n<|grounding|>Convert the document to markdown. ", "/Volumes/D/mllm/.tmp/dpsk-ocr-pr.png",
+              "/Volumes/D/mllm/.tmp/dpsk-ocr");
+});
diff --git a/examples/deepseek_ocr/net_info.text b/examples/deepseek_ocr/net_info.text
new file mode 100644
index 00000000..e1ec22dc
--- /dev/null
+++ b/examples/deepseek_ocr/net_info.text
@@ -0,0 +1,4159 @@
+Module: , device: CPU
+    Module: model, device: CPU
+        model.embed_tokens, device: CPU
+        Module: model.layers, device: CPU
+            Module: model.layers.0, device: CPU
+                Module: model.layers.0.self_attn, device: CPU
+                    model.layers.0.self_attn.q_proj, device: CPU
+                    model.layers.0.self_attn.k_proj, device: CPU
+                    model.layers.0.self_attn.v_proj, device: CPU
+                    model.layers.0.self_attn.o_proj, device: CPU
+                    model.layers.0.self_attn.q_rope, device: CPU
+                    model.layers.0.self_attn.k_rope, device: CPU
+                Module: model.layers.0.mlp, device: CPU
+                    model.layers.0.mlp.gate_proj, device: CPU
+                    model.layers.0.mlp.up_proj, device: CPU
+                    model.layers.0.mlp.down_proj, device: CPU
+                    model.layers.0.mlp.act, device: CPU
+                model.layers.0.input_layernorm, device: CPU
+                model.layers.0.post_attention_layernorm, device: CPU
+            Module: model.layers.1, device: CPU
+                Module: model.layers.1.self_attn, device: CPU
+                    model.layers.1.self_attn.q_proj, device: CPU
+                    model.layers.1.self_attn.k_proj, device: CPU
+                    model.layers.1.self_attn.v_proj, device: CPU
+                    model.layers.1.self_attn.o_proj, device: CPU
+                    model.layers.1.self_attn.q_rope, device: CPU
+                    model.layers.1.self_attn.k_rope, device: CPU
+                Module: model.layers.1.mlp, device: CPU
+                    Module: model.layers.1.mlp.experts, device: CPU
+                        Module: model.layers.1.mlp.experts.0, device: CPU
+                            model.layers.1.mlp.experts.0.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.0.up_proj, device: CPU
+                            model.layers.1.mlp.experts.0.down_proj, device: CPU
+                            model.layers.1.mlp.experts.0.act, device: CPU
+                        Module: model.layers.1.mlp.experts.1, device: CPU
+                            model.layers.1.mlp.experts.1.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.1.up_proj, device: CPU
+                            model.layers.1.mlp.experts.1.down_proj, device: CPU
+                            model.layers.1.mlp.experts.1.act, device: CPU
+                        Module: model.layers.1.mlp.experts.2, device: CPU
+                            model.layers.1.mlp.experts.2.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.2.up_proj, device: CPU
+                            model.layers.1.mlp.experts.2.down_proj, device: CPU
+                            model.layers.1.mlp.experts.2.act, device: CPU
+                        Module: model.layers.1.mlp.experts.3, device: CPU
+                            model.layers.1.mlp.experts.3.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.3.up_proj, device: CPU
+                            model.layers.1.mlp.experts.3.down_proj, device: CPU
+                            model.layers.1.mlp.experts.3.act, device: CPU
+                        Module: model.layers.1.mlp.experts.4, device: CPU
+                            model.layers.1.mlp.experts.4.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.4.up_proj, device: CPU
+                            model.layers.1.mlp.experts.4.down_proj, device: CPU
+                            model.layers.1.mlp.experts.4.act, device: CPU
+                        Module: model.layers.1.mlp.experts.5, device: CPU
+                            model.layers.1.mlp.experts.5.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.5.up_proj, device: CPU
+                            model.layers.1.mlp.experts.5.down_proj, device: CPU
+                            model.layers.1.mlp.experts.5.act, device: CPU
+                        Module: model.layers.1.mlp.experts.6, device: CPU
+                            model.layers.1.mlp.experts.6.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.6.up_proj, device: CPU
+                            model.layers.1.mlp.experts.6.down_proj, device: CPU
+                            model.layers.1.mlp.experts.6.act, device: CPU
+                        Module: model.layers.1.mlp.experts.7, device: CPU
+                            model.layers.1.mlp.experts.7.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.7.up_proj, device: CPU
+                            model.layers.1.mlp.experts.7.down_proj, device: CPU
+                            model.layers.1.mlp.experts.7.act, device: CPU
+                        Module: model.layers.1.mlp.experts.8, device: CPU
+                            model.layers.1.mlp.experts.8.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.8.up_proj, device: CPU
+                            model.layers.1.mlp.experts.8.down_proj, device: CPU
+                            model.layers.1.mlp.experts.8.act, device: CPU
+                        Module: model.layers.1.mlp.experts.9, device: CPU
+                            model.layers.1.mlp.experts.9.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.9.up_proj, device: CPU
+                            model.layers.1.mlp.experts.9.down_proj, device: CPU
+                            model.layers.1.mlp.experts.9.act, device: CPU
+                        Module: model.layers.1.mlp.experts.10, device: CPU
+                            model.layers.1.mlp.experts.10.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.10.up_proj, device: CPU
+                            model.layers.1.mlp.experts.10.down_proj, device: CPU
+                            model.layers.1.mlp.experts.10.act, device: CPU
+                        Module: model.layers.1.mlp.experts.11, device: CPU
+                            model.layers.1.mlp.experts.11.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.11.up_proj, device: CPU
+                            model.layers.1.mlp.experts.11.down_proj, device: CPU
+                            model.layers.1.mlp.experts.11.act, device: CPU
+                        Module: model.layers.1.mlp.experts.12, device: CPU
+                            model.layers.1.mlp.experts.12.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.12.up_proj, device: CPU
+                            model.layers.1.mlp.experts.12.down_proj, device: CPU
+                            model.layers.1.mlp.experts.12.act, device: CPU
+                        Module: model.layers.1.mlp.experts.13, device: CPU
+                            model.layers.1.mlp.experts.13.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.13.up_proj, device: CPU
+                            model.layers.1.mlp.experts.13.down_proj, device: CPU
+                            model.layers.1.mlp.experts.13.act, device: CPU
+                        Module: model.layers.1.mlp.experts.14, device: CPU
+                            model.layers.1.mlp.experts.14.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.14.up_proj, device: CPU
+                            model.layers.1.mlp.experts.14.down_proj, device: CPU
+                            model.layers.1.mlp.experts.14.act, device: CPU
+                        Module: model.layers.1.mlp.experts.15, device: CPU
+                            model.layers.1.mlp.experts.15.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.15.up_proj, device: CPU
+                            model.layers.1.mlp.experts.15.down_proj, device: CPU
+                            model.layers.1.mlp.experts.15.act, device: CPU
+                        Module: model.layers.1.mlp.experts.16, device: CPU
+                            model.layers.1.mlp.experts.16.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.16.up_proj, device: CPU
+                            model.layers.1.mlp.experts.16.down_proj, device: CPU
+                            model.layers.1.mlp.experts.16.act, device: CPU
+                        Module: model.layers.1.mlp.experts.17, device: CPU
+                            model.layers.1.mlp.experts.17.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.17.up_proj, device: CPU
+                            model.layers.1.mlp.experts.17.down_proj, device: CPU
+                            model.layers.1.mlp.experts.17.act, device: CPU
+                        Module: model.layers.1.mlp.experts.18, device: CPU
+                            model.layers.1.mlp.experts.18.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.18.up_proj, device: CPU
+                            model.layers.1.mlp.experts.18.down_proj, device: CPU
+                            model.layers.1.mlp.experts.18.act, device: CPU
+                        Module: model.layers.1.mlp.experts.19, device: CPU
+                            model.layers.1.mlp.experts.19.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.19.up_proj, device: CPU
+                            model.layers.1.mlp.experts.19.down_proj, device: CPU
+                            model.layers.1.mlp.experts.19.act, device: CPU
+                        Module: model.layers.1.mlp.experts.20, device: CPU
+                            model.layers.1.mlp.experts.20.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.20.up_proj, device: CPU
+                            model.layers.1.mlp.experts.20.down_proj, device: CPU
+                            model.layers.1.mlp.experts.20.act, device: CPU
+                        Module: model.layers.1.mlp.experts.21, device: CPU
+                            model.layers.1.mlp.experts.21.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.21.up_proj, device: CPU
+                            model.layers.1.mlp.experts.21.down_proj, device: CPU
+                            model.layers.1.mlp.experts.21.act, device: CPU
+                        Module: model.layers.1.mlp.experts.22, device: CPU
+                            model.layers.1.mlp.experts.22.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.22.up_proj, device: CPU
+                            model.layers.1.mlp.experts.22.down_proj, device: CPU
+                            model.layers.1.mlp.experts.22.act, device: CPU
+                        Module: model.layers.1.mlp.experts.23, device: CPU
+                            model.layers.1.mlp.experts.23.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.23.up_proj, device: CPU
+                            model.layers.1.mlp.experts.23.down_proj, device: CPU
+                            model.layers.1.mlp.experts.23.act, device: CPU
+                        Module: model.layers.1.mlp.experts.24, device: CPU
+                            model.layers.1.mlp.experts.24.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.24.up_proj, device: CPU
+                            model.layers.1.mlp.experts.24.down_proj, device: CPU
+                            model.layers.1.mlp.experts.24.act, device: CPU
+                        Module: model.layers.1.mlp.experts.25, device: CPU
+                            model.layers.1.mlp.experts.25.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.25.up_proj, device: CPU
+                            model.layers.1.mlp.experts.25.down_proj, device: CPU
+                            model.layers.1.mlp.experts.25.act, device: CPU
+                        Module: model.layers.1.mlp.experts.26, device: CPU
+                            model.layers.1.mlp.experts.26.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.26.up_proj, device: CPU
+                            model.layers.1.mlp.experts.26.down_proj, device: CPU
+                            model.layers.1.mlp.experts.26.act, device: CPU
+                        Module: model.layers.1.mlp.experts.27, device: CPU
+                            model.layers.1.mlp.experts.27.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.27.up_proj, device: CPU
+                            model.layers.1.mlp.experts.27.down_proj, device: CPU
+                            model.layers.1.mlp.experts.27.act, device: CPU
+                        Module: model.layers.1.mlp.experts.28, device: CPU
+                            model.layers.1.mlp.experts.28.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.28.up_proj, device: CPU
+                            model.layers.1.mlp.experts.28.down_proj, device: CPU
+                            model.layers.1.mlp.experts.28.act, device: CPU
+                        Module: model.layers.1.mlp.experts.29, device: CPU
+                            model.layers.1.mlp.experts.29.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.29.up_proj, device: CPU
+                            model.layers.1.mlp.experts.29.down_proj, device: CPU
+                            model.layers.1.mlp.experts.29.act, device: CPU
+                        Module: model.layers.1.mlp.experts.30, device: CPU
+                            model.layers.1.mlp.experts.30.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.30.up_proj, device: CPU
+                            model.layers.1.mlp.experts.30.down_proj, device: CPU
+                            model.layers.1.mlp.experts.30.act, device: CPU
+                        Module: model.layers.1.mlp.experts.31, device: CPU
+                            model.layers.1.mlp.experts.31.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.31.up_proj, device: CPU
+                            model.layers.1.mlp.experts.31.down_proj, device: CPU
+                            model.layers.1.mlp.experts.31.act, device: CPU
+                        Module: model.layers.1.mlp.experts.32, device: CPU
+                            model.layers.1.mlp.experts.32.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.32.up_proj, device: CPU
+                            model.layers.1.mlp.experts.32.down_proj, device: CPU
+                            model.layers.1.mlp.experts.32.act, device: CPU
+                        Module: model.layers.1.mlp.experts.33, device: CPU
+                            model.layers.1.mlp.experts.33.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.33.up_proj, device: CPU
+                            model.layers.1.mlp.experts.33.down_proj, device: CPU
+                            model.layers.1.mlp.experts.33.act, device: CPU
+                        Module: model.layers.1.mlp.experts.34, device: CPU
+                            model.layers.1.mlp.experts.34.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.34.up_proj, device: CPU
+                            model.layers.1.mlp.experts.34.down_proj, device: CPU
+                            model.layers.1.mlp.experts.34.act, device: CPU
+                        Module: model.layers.1.mlp.experts.35, device: CPU
+                            model.layers.1.mlp.experts.35.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.35.up_proj, device: CPU
+                            model.layers.1.mlp.experts.35.down_proj, device: CPU
+                            model.layers.1.mlp.experts.35.act, device: CPU
+                        Module: model.layers.1.mlp.experts.36, device: CPU
+                            model.layers.1.mlp.experts.36.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.36.up_proj, device: CPU
+                            model.layers.1.mlp.experts.36.down_proj, device: CPU
+                            model.layers.1.mlp.experts.36.act, device: CPU
+                        Module: model.layers.1.mlp.experts.37, device: CPU
+                            model.layers.1.mlp.experts.37.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.37.up_proj, device: CPU
+                            model.layers.1.mlp.experts.37.down_proj, device: CPU
+                            model.layers.1.mlp.experts.37.act, device: CPU
+                        Module: model.layers.1.mlp.experts.38, device: CPU
+                            model.layers.1.mlp.experts.38.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.38.up_proj, device: CPU
+                            model.layers.1.mlp.experts.38.down_proj, device: CPU
+                            model.layers.1.mlp.experts.38.act, device: CPU
+                        Module: model.layers.1.mlp.experts.39, device: CPU
+                            model.layers.1.mlp.experts.39.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.39.up_proj, device: CPU
+                            model.layers.1.mlp.experts.39.down_proj, device: CPU
+                            model.layers.1.mlp.experts.39.act, device: CPU
+                        Module: model.layers.1.mlp.experts.40, device: CPU
+                            model.layers.1.mlp.experts.40.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.40.up_proj, device: CPU
+                            model.layers.1.mlp.experts.40.down_proj, device: CPU
+                            model.layers.1.mlp.experts.40.act, device: CPU
+                        Module: model.layers.1.mlp.experts.41, device: CPU
+                            model.layers.1.mlp.experts.41.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.41.up_proj, device: CPU
+                            model.layers.1.mlp.experts.41.down_proj, device: CPU
+                            model.layers.1.mlp.experts.41.act, device: CPU
+                        Module: model.layers.1.mlp.experts.42, device: CPU
+                            model.layers.1.mlp.experts.42.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.42.up_proj, device: CPU
+                            model.layers.1.mlp.experts.42.down_proj, device: CPU
+                            model.layers.1.mlp.experts.42.act, device: CPU
+                        Module: model.layers.1.mlp.experts.43, device: CPU
+                            model.layers.1.mlp.experts.43.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.43.up_proj, device: CPU
+                            model.layers.1.mlp.experts.43.down_proj, device: CPU
+                            model.layers.1.mlp.experts.43.act, device: CPU
+                        Module: model.layers.1.mlp.experts.44, device: CPU
+                            model.layers.1.mlp.experts.44.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.44.up_proj, device: CPU
+                            model.layers.1.mlp.experts.44.down_proj, device: CPU
+                            model.layers.1.mlp.experts.44.act, device: CPU
+                        Module: model.layers.1.mlp.experts.45, device: CPU
+                            model.layers.1.mlp.experts.45.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.45.up_proj, device: CPU
+                            model.layers.1.mlp.experts.45.down_proj, device: CPU
+                            model.layers.1.mlp.experts.45.act, device: CPU
+                        Module: model.layers.1.mlp.experts.46, device: CPU
+                            model.layers.1.mlp.experts.46.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.46.up_proj, device: CPU
+                            model.layers.1.mlp.experts.46.down_proj, device: CPU
+                            model.layers.1.mlp.experts.46.act, device: CPU
+                        Module: model.layers.1.mlp.experts.47, device: CPU
+                            model.layers.1.mlp.experts.47.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.47.up_proj, device: CPU
+                            model.layers.1.mlp.experts.47.down_proj, device: CPU
+                            model.layers.1.mlp.experts.47.act, device: CPU
+                        Module: model.layers.1.mlp.experts.48, device: CPU
+                            model.layers.1.mlp.experts.48.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.48.up_proj, device: CPU
+                            model.layers.1.mlp.experts.48.down_proj, device: CPU
+                            model.layers.1.mlp.experts.48.act, device: CPU
+                        Module: model.layers.1.mlp.experts.49, device: CPU
+                            model.layers.1.mlp.experts.49.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.49.up_proj, device: CPU
+                            model.layers.1.mlp.experts.49.down_proj, device: CPU
+                            model.layers.1.mlp.experts.49.act, device: CPU
+                        Module: model.layers.1.mlp.experts.50, device: CPU
+                            model.layers.1.mlp.experts.50.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.50.up_proj, device: CPU
+                            model.layers.1.mlp.experts.50.down_proj, device: CPU
+                            model.layers.1.mlp.experts.50.act, device: CPU
+                        Module: model.layers.1.mlp.experts.51, device: CPU
+                            model.layers.1.mlp.experts.51.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.51.up_proj, device: CPU
+                            model.layers.1.mlp.experts.51.down_proj, device: CPU
+                            model.layers.1.mlp.experts.51.act, device: CPU
+                        Module: model.layers.1.mlp.experts.52, device: CPU
+                            model.layers.1.mlp.experts.52.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.52.up_proj, device: CPU
+                            model.layers.1.mlp.experts.52.down_proj, device: CPU
+                            model.layers.1.mlp.experts.52.act, device: CPU
+                        Module: model.layers.1.mlp.experts.53, device: CPU
+                            model.layers.1.mlp.experts.53.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.53.up_proj, device: CPU
+                            model.layers.1.mlp.experts.53.down_proj, device: CPU
+                            model.layers.1.mlp.experts.53.act, device: CPU
+                        Module: model.layers.1.mlp.experts.54, device: CPU
+                            model.layers.1.mlp.experts.54.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.54.up_proj, device: CPU
+                            model.layers.1.mlp.experts.54.down_proj, device: CPU
+                            model.layers.1.mlp.experts.54.act, device: CPU
+                        Module: model.layers.1.mlp.experts.55, device: CPU
+                            model.layers.1.mlp.experts.55.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.55.up_proj, device: CPU
+                            model.layers.1.mlp.experts.55.down_proj, device: CPU
+                            model.layers.1.mlp.experts.55.act, device: CPU
+                        Module: model.layers.1.mlp.experts.56, device: CPU
+                            model.layers.1.mlp.experts.56.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.56.up_proj, device: CPU
+                            model.layers.1.mlp.experts.56.down_proj, device: CPU
+                            model.layers.1.mlp.experts.56.act, device: CPU
+                        Module: model.layers.1.mlp.experts.57, device: CPU
+                            model.layers.1.mlp.experts.57.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.57.up_proj, device: CPU
+                            model.layers.1.mlp.experts.57.down_proj, device: CPU
+                            model.layers.1.mlp.experts.57.act, device: CPU
+                        Module: model.layers.1.mlp.experts.58, device: CPU
+                            model.layers.1.mlp.experts.58.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.58.up_proj, device: CPU
+                            model.layers.1.mlp.experts.58.down_proj, device: CPU
+                            model.layers.1.mlp.experts.58.act, device: CPU
+                        Module: model.layers.1.mlp.experts.59, device: CPU
+                            model.layers.1.mlp.experts.59.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.59.up_proj, device: CPU
+                            model.layers.1.mlp.experts.59.down_proj, device: CPU
+                            model.layers.1.mlp.experts.59.act, device: CPU
+                        Module: model.layers.1.mlp.experts.60, device: CPU
+                            model.layers.1.mlp.experts.60.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.60.up_proj, device: CPU
+                            model.layers.1.mlp.experts.60.down_proj, device: CPU
+                            model.layers.1.mlp.experts.60.act, device: CPU
+                        Module: model.layers.1.mlp.experts.61, device: CPU
+                            model.layers.1.mlp.experts.61.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.61.up_proj, device: CPU
+                            model.layers.1.mlp.experts.61.down_proj, device: CPU
+                            model.layers.1.mlp.experts.61.act, device: CPU
+                        Module: model.layers.1.mlp.experts.62, device: CPU
+                            model.layers.1.mlp.experts.62.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.62.up_proj, device: CPU
+                            model.layers.1.mlp.experts.62.down_proj, device: CPU
+                            model.layers.1.mlp.experts.62.act, device: CPU
+                        Module: model.layers.1.mlp.experts.63, device: CPU
+                            model.layers.1.mlp.experts.63.gate_proj, device: CPU
+                            model.layers.1.mlp.experts.63.up_proj, device: CPU
+                            model.layers.1.mlp.experts.63.down_proj, device: CPU
+                            model.layers.1.mlp.experts.63.act, device: CPU
+                    Module: model.layers.1.mlp.gate, device: CPU
+                        model.layers.1.mlp.gate.weight, device: CPU
+                    Module: model.layers.1.mlp.shared_experts, device: CPU
+                        model.layers.1.mlp.shared_experts.gate_proj, device: CPU
+                        model.layers.1.mlp.shared_experts.up_proj, device: CPU
+                        model.layers.1.mlp.shared_experts.down_proj, device: CPU
+                        model.layers.1.mlp.shared_experts.act, device: CPU
+                model.layers.1.input_layernorm, device: CPU
+                model.layers.1.post_attention_layernorm, device: CPU
+            Module: model.layers.2, device: CPU
+                Module: model.layers.2.self_attn, device: CPU
+                    model.layers.2.self_attn.q_proj, device: CPU
+                    model.layers.2.self_attn.k_proj, device: CPU
+                    model.layers.2.self_attn.v_proj, device: CPU
+                    model.layers.2.self_attn.o_proj, device: CPU
+                    model.layers.2.self_attn.q_rope, device: CPU
+                    model.layers.2.self_attn.k_rope, device: CPU
+                Module: model.layers.2.mlp, device: CPU
+                    Module: model.layers.2.mlp.experts, device: CPU
+                        Module: model.layers.2.mlp.experts.0, device: CPU
+                            model.layers.2.mlp.experts.0.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.0.up_proj, device: CPU
+                            model.layers.2.mlp.experts.0.down_proj, device: CPU
+                            model.layers.2.mlp.experts.0.act, device: CPU
+                        Module: model.layers.2.mlp.experts.1, device: CPU
+                            model.layers.2.mlp.experts.1.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.1.up_proj, device: CPU
+                            model.layers.2.mlp.experts.1.down_proj, device: CPU
+                            model.layers.2.mlp.experts.1.act, device: CPU
+                        Module: model.layers.2.mlp.experts.2, device: CPU
+                            model.layers.2.mlp.experts.2.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.2.up_proj, device: CPU
+                            model.layers.2.mlp.experts.2.down_proj, device: CPU
+                            model.layers.2.mlp.experts.2.act, device: CPU
+                        Module: model.layers.2.mlp.experts.3, device: CPU
+                            model.layers.2.mlp.experts.3.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.3.up_proj, device: CPU
+                            model.layers.2.mlp.experts.3.down_proj, device: CPU
+                            model.layers.2.mlp.experts.3.act, device: CPU
+                        Module: model.layers.2.mlp.experts.4, device: CPU
+                            model.layers.2.mlp.experts.4.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.4.up_proj, device: CPU
+                            model.layers.2.mlp.experts.4.down_proj, device: CPU
+                            model.layers.2.mlp.experts.4.act, device: CPU
+                        Module: model.layers.2.mlp.experts.5, device: CPU
+                            model.layers.2.mlp.experts.5.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.5.up_proj, device: CPU
+                            model.layers.2.mlp.experts.5.down_proj, device: CPU
+                            model.layers.2.mlp.experts.5.act, device: CPU
+                        Module: model.layers.2.mlp.experts.6, device: CPU
+                            model.layers.2.mlp.experts.6.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.6.up_proj, device: CPU
+                            model.layers.2.mlp.experts.6.down_proj, device: CPU
+                            model.layers.2.mlp.experts.6.act, device: CPU
+                        Module: model.layers.2.mlp.experts.7, device: CPU
+                            model.layers.2.mlp.experts.7.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.7.up_proj, device: CPU
+                            model.layers.2.mlp.experts.7.down_proj, device: CPU
+                            model.layers.2.mlp.experts.7.act, device: CPU
+                        Module: model.layers.2.mlp.experts.8, device: CPU
+                            model.layers.2.mlp.experts.8.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.8.up_proj, device: CPU
+                            model.layers.2.mlp.experts.8.down_proj, device: CPU
+                            model.layers.2.mlp.experts.8.act, device: CPU
+                        Module: model.layers.2.mlp.experts.9, device: CPU
+                            model.layers.2.mlp.experts.9.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.9.up_proj, device: CPU
+                            model.layers.2.mlp.experts.9.down_proj, device: CPU
+                            model.layers.2.mlp.experts.9.act, device: CPU
+                        Module: model.layers.2.mlp.experts.10, device: CPU
+                            model.layers.2.mlp.experts.10.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.10.up_proj, device: CPU
+                            model.layers.2.mlp.experts.10.down_proj, device: CPU
+                            model.layers.2.mlp.experts.10.act, device: CPU
+                        Module: model.layers.2.mlp.experts.11, device: CPU
+                            model.layers.2.mlp.experts.11.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.11.up_proj, device: CPU
+                            model.layers.2.mlp.experts.11.down_proj, device: CPU
+                            model.layers.2.mlp.experts.11.act, device: CPU
+                        Module: model.layers.2.mlp.experts.12, device: CPU
+                            model.layers.2.mlp.experts.12.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.12.up_proj, device: CPU
+                            model.layers.2.mlp.experts.12.down_proj, device: CPU
+                            model.layers.2.mlp.experts.12.act, device: CPU
+                        Module: model.layers.2.mlp.experts.13, device: CPU
+                            model.layers.2.mlp.experts.13.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.13.up_proj, device: CPU
+                            model.layers.2.mlp.experts.13.down_proj, device: CPU
+                            model.layers.2.mlp.experts.13.act, device: CPU
+                        Module: model.layers.2.mlp.experts.14, device: CPU
+                            model.layers.2.mlp.experts.14.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.14.up_proj, device: CPU
+                            model.layers.2.mlp.experts.14.down_proj, device: CPU
+                            model.layers.2.mlp.experts.14.act, device: CPU
+                        Module: model.layers.2.mlp.experts.15, device: CPU
+                            model.layers.2.mlp.experts.15.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.15.up_proj, device: CPU
+                            model.layers.2.mlp.experts.15.down_proj, device: CPU
+                            model.layers.2.mlp.experts.15.act, device: CPU
+                        Module: model.layers.2.mlp.experts.16, device: CPU
+                            model.layers.2.mlp.experts.16.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.16.up_proj, device: CPU
+                            model.layers.2.mlp.experts.16.down_proj, device: CPU
+                            model.layers.2.mlp.experts.16.act, device: CPU
+                        Module: model.layers.2.mlp.experts.17, device: CPU
+                            model.layers.2.mlp.experts.17.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.17.up_proj, device: CPU
+                            model.layers.2.mlp.experts.17.down_proj, device: CPU
+                            model.layers.2.mlp.experts.17.act, device: CPU
+                        Module: model.layers.2.mlp.experts.18, device: CPU
+                            model.layers.2.mlp.experts.18.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.18.up_proj, device: CPU
+                            model.layers.2.mlp.experts.18.down_proj, device: CPU
+                            model.layers.2.mlp.experts.18.act, device: CPU
+                        Module: model.layers.2.mlp.experts.19, device: CPU
+                            model.layers.2.mlp.experts.19.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.19.up_proj, device: CPU
+                            model.layers.2.mlp.experts.19.down_proj, device: CPU
+                            model.layers.2.mlp.experts.19.act, device: CPU
+                        Module: model.layers.2.mlp.experts.20, device: CPU
+                            model.layers.2.mlp.experts.20.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.20.up_proj, device: CPU
+                            model.layers.2.mlp.experts.20.down_proj, device: CPU
+                            model.layers.2.mlp.experts.20.act, device: CPU
+                        Module: model.layers.2.mlp.experts.21, device: CPU
+                            model.layers.2.mlp.experts.21.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.21.up_proj, device: CPU
+                            model.layers.2.mlp.experts.21.down_proj, device: CPU
+                            model.layers.2.mlp.experts.21.act, device: CPU
+                        Module: model.layers.2.mlp.experts.22, device: CPU
+                            model.layers.2.mlp.experts.22.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.22.up_proj, device: CPU
+                            model.layers.2.mlp.experts.22.down_proj, device: CPU
+                            model.layers.2.mlp.experts.22.act, device: CPU
+                        Module: model.layers.2.mlp.experts.23, device: CPU
+                            model.layers.2.mlp.experts.23.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.23.up_proj, device: CPU
+                            model.layers.2.mlp.experts.23.down_proj, device: CPU
+                            model.layers.2.mlp.experts.23.act, device: CPU
+                        Module: model.layers.2.mlp.experts.24, device: CPU
+                            model.layers.2.mlp.experts.24.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.24.up_proj, device: CPU
+                            model.layers.2.mlp.experts.24.down_proj, device: CPU
+                            model.layers.2.mlp.experts.24.act, device: CPU
+                        Module: model.layers.2.mlp.experts.25, device: CPU
+                            model.layers.2.mlp.experts.25.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.25.up_proj, device: CPU
+                            model.layers.2.mlp.experts.25.down_proj, device: CPU
+                            model.layers.2.mlp.experts.25.act, device: CPU
+                        Module: model.layers.2.mlp.experts.26, device: CPU
+                            model.layers.2.mlp.experts.26.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.26.up_proj, device: CPU
+                            model.layers.2.mlp.experts.26.down_proj, device: CPU
+                            model.layers.2.mlp.experts.26.act, device: CPU
+                        Module: model.layers.2.mlp.experts.27, device: CPU
+                            model.layers.2.mlp.experts.27.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.27.up_proj, device: CPU
+                            model.layers.2.mlp.experts.27.down_proj, device: CPU
+                            model.layers.2.mlp.experts.27.act, device: CPU
+                        Module: model.layers.2.mlp.experts.28, device: CPU
+                            model.layers.2.mlp.experts.28.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.28.up_proj, device: CPU
+                            model.layers.2.mlp.experts.28.down_proj, device: CPU
+                            model.layers.2.mlp.experts.28.act, device: CPU
+                        Module: model.layers.2.mlp.experts.29, device: CPU
+                            model.layers.2.mlp.experts.29.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.29.up_proj, device: CPU
+                            model.layers.2.mlp.experts.29.down_proj, device: CPU
+                            model.layers.2.mlp.experts.29.act, device: CPU
+                        Module: model.layers.2.mlp.experts.30, device: CPU
+                            model.layers.2.mlp.experts.30.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.30.up_proj, device: CPU
+                            model.layers.2.mlp.experts.30.down_proj, device: CPU
+                            model.layers.2.mlp.experts.30.act, device: CPU
+                        Module: model.layers.2.mlp.experts.31, device: CPU
+                            model.layers.2.mlp.experts.31.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.31.up_proj, device: CPU
+                            model.layers.2.mlp.experts.31.down_proj, device: CPU
+                            model.layers.2.mlp.experts.31.act, device: CPU
+                        Module: model.layers.2.mlp.experts.32, device: CPU
+                            model.layers.2.mlp.experts.32.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.32.up_proj, device: CPU
+                            model.layers.2.mlp.experts.32.down_proj, device: CPU
+                            model.layers.2.mlp.experts.32.act, device: CPU
+                        Module: model.layers.2.mlp.experts.33, device: CPU
+                            model.layers.2.mlp.experts.33.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.33.up_proj, device: CPU
+                            model.layers.2.mlp.experts.33.down_proj, device: CPU
+                            model.layers.2.mlp.experts.33.act, device: CPU
+                        Module: model.layers.2.mlp.experts.34, device: CPU
+                            model.layers.2.mlp.experts.34.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.34.up_proj, device: CPU
+                            model.layers.2.mlp.experts.34.down_proj, device: CPU
+                            model.layers.2.mlp.experts.34.act, device: CPU
+                        Module: model.layers.2.mlp.experts.35, device: CPU
+                            model.layers.2.mlp.experts.35.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.35.up_proj, device: CPU
+                            model.layers.2.mlp.experts.35.down_proj, device: CPU
+                            model.layers.2.mlp.experts.35.act, device: CPU
+                        Module: model.layers.2.mlp.experts.36, device: CPU
+                            model.layers.2.mlp.experts.36.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.36.up_proj, device: CPU
+                            model.layers.2.mlp.experts.36.down_proj, device: CPU
+                            model.layers.2.mlp.experts.36.act, device: CPU
+                        Module: model.layers.2.mlp.experts.37, device: CPU
+                            model.layers.2.mlp.experts.37.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.37.up_proj, device: CPU
+                            model.layers.2.mlp.experts.37.down_proj, device: CPU
+                            model.layers.2.mlp.experts.37.act, device: CPU
+                        Module: model.layers.2.mlp.experts.38, device: CPU
+                            model.layers.2.mlp.experts.38.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.38.up_proj, device: CPU
+                            model.layers.2.mlp.experts.38.down_proj, device: CPU
+                            model.layers.2.mlp.experts.38.act, device: CPU
+                        Module: model.layers.2.mlp.experts.39, device: CPU
+                            model.layers.2.mlp.experts.39.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.39.up_proj, device: CPU
+                            model.layers.2.mlp.experts.39.down_proj, device: CPU
+                            model.layers.2.mlp.experts.39.act, device: CPU
+                        Module: model.layers.2.mlp.experts.40, device: CPU
+                            model.layers.2.mlp.experts.40.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.40.up_proj, device: CPU
+                            model.layers.2.mlp.experts.40.down_proj, device: CPU
+                            model.layers.2.mlp.experts.40.act, device: CPU
+                        Module: model.layers.2.mlp.experts.41, device: CPU
+                            model.layers.2.mlp.experts.41.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.41.up_proj, device: CPU
+                            model.layers.2.mlp.experts.41.down_proj, device: CPU
+                            model.layers.2.mlp.experts.41.act, device: CPU
+                        Module: model.layers.2.mlp.experts.42, device: CPU
+                            model.layers.2.mlp.experts.42.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.42.up_proj, device: CPU
+                            model.layers.2.mlp.experts.42.down_proj, device: CPU
+                            model.layers.2.mlp.experts.42.act, device: CPU
+                        Module: model.layers.2.mlp.experts.43, device: CPU
+                            model.layers.2.mlp.experts.43.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.43.up_proj, device: CPU
+                            model.layers.2.mlp.experts.43.down_proj, device: CPU
+                            model.layers.2.mlp.experts.43.act, device: CPU
+                        Module: model.layers.2.mlp.experts.44, device: CPU
+                            model.layers.2.mlp.experts.44.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.44.up_proj, device: CPU
+                            model.layers.2.mlp.experts.44.down_proj, device: CPU
+                            model.layers.2.mlp.experts.44.act, device: CPU
+                        Module: model.layers.2.mlp.experts.45, device: CPU
+                            model.layers.2.mlp.experts.45.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.45.up_proj, device: CPU
+                            model.layers.2.mlp.experts.45.down_proj, device: CPU
+                            model.layers.2.mlp.experts.45.act, device: CPU
+                        Module: model.layers.2.mlp.experts.46, device: CPU
+                            model.layers.2.mlp.experts.46.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.46.up_proj, device: CPU
+                            model.layers.2.mlp.experts.46.down_proj, device: CPU
+                            model.layers.2.mlp.experts.46.act, device: CPU
+                        Module: model.layers.2.mlp.experts.47, device: CPU
+                            model.layers.2.mlp.experts.47.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.47.up_proj, device: CPU
+                            model.layers.2.mlp.experts.47.down_proj, device: CPU
+                            model.layers.2.mlp.experts.47.act, device: CPU
+                        Module: model.layers.2.mlp.experts.48, device: CPU
+                            model.layers.2.mlp.experts.48.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.48.up_proj, device: CPU
+                            model.layers.2.mlp.experts.48.down_proj, device: CPU
+                            model.layers.2.mlp.experts.48.act, device: CPU
+                        Module: model.layers.2.mlp.experts.49, device: CPU
+                            model.layers.2.mlp.experts.49.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.49.up_proj, device: CPU
+                            model.layers.2.mlp.experts.49.down_proj, device: CPU
+                            model.layers.2.mlp.experts.49.act, device: CPU
+                        Module: model.layers.2.mlp.experts.50, device: CPU
+                            model.layers.2.mlp.experts.50.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.50.up_proj, device: CPU
+                            model.layers.2.mlp.experts.50.down_proj, device: CPU
+                            model.layers.2.mlp.experts.50.act, device: CPU
+                        Module: model.layers.2.mlp.experts.51, device: CPU
+                            model.layers.2.mlp.experts.51.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.51.up_proj, device: CPU
+                            model.layers.2.mlp.experts.51.down_proj, device: CPU
+                            model.layers.2.mlp.experts.51.act, device: CPU
+                        Module: model.layers.2.mlp.experts.52, device: CPU
+                            model.layers.2.mlp.experts.52.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.52.up_proj, device: CPU
+                            model.layers.2.mlp.experts.52.down_proj, device: CPU
+                            model.layers.2.mlp.experts.52.act, device: CPU
+                        Module: model.layers.2.mlp.experts.53, device: CPU
+                            model.layers.2.mlp.experts.53.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.53.up_proj, device: CPU
+                            model.layers.2.mlp.experts.53.down_proj, device: CPU
+                            model.layers.2.mlp.experts.53.act, device: CPU
+                        Module: model.layers.2.mlp.experts.54, device: CPU
+                            model.layers.2.mlp.experts.54.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.54.up_proj, device: CPU
+                            model.layers.2.mlp.experts.54.down_proj, device: CPU
+                            model.layers.2.mlp.experts.54.act, device: CPU
+                        Module: model.layers.2.mlp.experts.55, device: CPU
+                            model.layers.2.mlp.experts.55.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.55.up_proj, device: CPU
+                            model.layers.2.mlp.experts.55.down_proj, device: CPU
+                            model.layers.2.mlp.experts.55.act, device: CPU
+                        Module: model.layers.2.mlp.experts.56, device: CPU
+                            model.layers.2.mlp.experts.56.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.56.up_proj, device: CPU
+                            model.layers.2.mlp.experts.56.down_proj, device: CPU
+                            model.layers.2.mlp.experts.56.act, device: CPU
+                        Module: model.layers.2.mlp.experts.57, device: CPU
+                            model.layers.2.mlp.experts.57.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.57.up_proj, device: CPU
+                            model.layers.2.mlp.experts.57.down_proj, device: CPU
+                            model.layers.2.mlp.experts.57.act, device: CPU
+                        Module: model.layers.2.mlp.experts.58, device: CPU
+                            model.layers.2.mlp.experts.58.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.58.up_proj, device: CPU
+                            model.layers.2.mlp.experts.58.down_proj, device: CPU
+                            model.layers.2.mlp.experts.58.act, device: CPU
+                        Module: model.layers.2.mlp.experts.59, device: CPU
+                            model.layers.2.mlp.experts.59.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.59.up_proj, device: CPU
+                            model.layers.2.mlp.experts.59.down_proj, device: CPU
+                            model.layers.2.mlp.experts.59.act, device: CPU
+                        Module: model.layers.2.mlp.experts.60, device: CPU
+                            model.layers.2.mlp.experts.60.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.60.up_proj, device: CPU
+                            model.layers.2.mlp.experts.60.down_proj, device: CPU
+                            model.layers.2.mlp.experts.60.act, device: CPU
+                        Module: model.layers.2.mlp.experts.61, device: CPU
+                            model.layers.2.mlp.experts.61.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.61.up_proj, device: CPU
+                            model.layers.2.mlp.experts.61.down_proj, device: CPU
+                            model.layers.2.mlp.experts.61.act, device: CPU
+                        Module: model.layers.2.mlp.experts.62, device: CPU
+                            model.layers.2.mlp.experts.62.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.62.up_proj, device: CPU
+                            model.layers.2.mlp.experts.62.down_proj, device: CPU
+                            model.layers.2.mlp.experts.62.act, device: CPU
+                        Module: model.layers.2.mlp.experts.63, device: CPU
+                            model.layers.2.mlp.experts.63.gate_proj, device: CPU
+                            model.layers.2.mlp.experts.63.up_proj, device: CPU
+                            model.layers.2.mlp.experts.63.down_proj, device: CPU
+                            model.layers.2.mlp.experts.63.act, device: CPU
+                    Module: model.layers.2.mlp.gate, device: CPU
+                        model.layers.2.mlp.gate.weight, device: CPU
+                    Module: model.layers.2.mlp.shared_experts, device: CPU
+                        model.layers.2.mlp.shared_experts.gate_proj, device: CPU
+                        model.layers.2.mlp.shared_experts.up_proj, device: CPU
+                        model.layers.2.mlp.shared_experts.down_proj, device: CPU
+                        model.layers.2.mlp.shared_experts.act, device: CPU
+                model.layers.2.input_layernorm, device: CPU
+                model.layers.2.post_attention_layernorm, device: CPU
+            Module: model.layers.3, device: CPU
+                Module: model.layers.3.self_attn, device: CPU
+                    model.layers.3.self_attn.q_proj, device: CPU
+                    model.layers.3.self_attn.k_proj, device: CPU
+                    model.layers.3.self_attn.v_proj, device: CPU
+                    model.layers.3.self_attn.o_proj, device: CPU
+                    model.layers.3.self_attn.q_rope, device: CPU
+                    model.layers.3.self_attn.k_rope, device: CPU
+                Module: model.layers.3.mlp, device: CPU
+                    Module: model.layers.3.mlp.experts, device: CPU
+                        Module: model.layers.3.mlp.experts.0, device: CPU
+                            model.layers.3.mlp.experts.0.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.0.up_proj, device: CPU
+                            model.layers.3.mlp.experts.0.down_proj, device: CPU
+                            model.layers.3.mlp.experts.0.act, device: CPU
+                        Module: model.layers.3.mlp.experts.1, device: CPU
+                            model.layers.3.mlp.experts.1.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.1.up_proj, device: CPU
+                            model.layers.3.mlp.experts.1.down_proj, device: CPU
+                            model.layers.3.mlp.experts.1.act, device: CPU
+                        Module: model.layers.3.mlp.experts.2, device: CPU
+                            model.layers.3.mlp.experts.2.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.2.up_proj, device: CPU
+                            model.layers.3.mlp.experts.2.down_proj, device: CPU
+                            model.layers.3.mlp.experts.2.act, device: CPU
+                        Module: model.layers.3.mlp.experts.3, device: CPU
+                            model.layers.3.mlp.experts.3.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.3.up_proj, device: CPU
+                            model.layers.3.mlp.experts.3.down_proj, device: CPU
+                            model.layers.3.mlp.experts.3.act, device: CPU
+                        Module: model.layers.3.mlp.experts.4, device: CPU
+                            model.layers.3.mlp.experts.4.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.4.up_proj, device: CPU
+                            model.layers.3.mlp.experts.4.down_proj, device: CPU
+                            model.layers.3.mlp.experts.4.act, device: CPU
+                        Module: model.layers.3.mlp.experts.5, device: CPU
+                            model.layers.3.mlp.experts.5.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.5.up_proj, device: CPU
+                            model.layers.3.mlp.experts.5.down_proj, device: CPU
+                            model.layers.3.mlp.experts.5.act, device: CPU
+                        Module: model.layers.3.mlp.experts.6, device: CPU
+                            model.layers.3.mlp.experts.6.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.6.up_proj, device: CPU
+                            model.layers.3.mlp.experts.6.down_proj, device: CPU
+                            model.layers.3.mlp.experts.6.act, device: CPU
+                        Module: model.layers.3.mlp.experts.7, device: CPU
+                            model.layers.3.mlp.experts.7.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.7.up_proj, device: CPU
+                            model.layers.3.mlp.experts.7.down_proj, device: CPU
+                            model.layers.3.mlp.experts.7.act, device: CPU
+                        Module: model.layers.3.mlp.experts.8, device: CPU
+                            model.layers.3.mlp.experts.8.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.8.up_proj, device: CPU
+                            model.layers.3.mlp.experts.8.down_proj, device: CPU
+                            model.layers.3.mlp.experts.8.act, device: CPU
+                        Module: model.layers.3.mlp.experts.9, device: CPU
+                            model.layers.3.mlp.experts.9.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.9.up_proj, device: CPU
+                            model.layers.3.mlp.experts.9.down_proj, device: CPU
+                            model.layers.3.mlp.experts.9.act, device: CPU
+                        Module: model.layers.3.mlp.experts.10, device: CPU
+                            model.layers.3.mlp.experts.10.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.10.up_proj, device: CPU
+                            model.layers.3.mlp.experts.10.down_proj, device: CPU
+                            model.layers.3.mlp.experts.10.act, device: CPU
+                        Module: model.layers.3.mlp.experts.11, device: CPU
+                            model.layers.3.mlp.experts.11.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.11.up_proj, device: CPU
+                            model.layers.3.mlp.experts.11.down_proj, device: CPU
+                            model.layers.3.mlp.experts.11.act, device: CPU
+                        Module: model.layers.3.mlp.experts.12, device: CPU
+                            model.layers.3.mlp.experts.12.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.12.up_proj, device: CPU
+                            model.layers.3.mlp.experts.12.down_proj, device: CPU
+                            model.layers.3.mlp.experts.12.act, device: CPU
+                        Module: model.layers.3.mlp.experts.13, device: CPU
+                            model.layers.3.mlp.experts.13.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.13.up_proj, device: CPU
+                            model.layers.3.mlp.experts.13.down_proj, device: CPU
+                            model.layers.3.mlp.experts.13.act, device: CPU
+                        Module: model.layers.3.mlp.experts.14, device: CPU
+                            model.layers.3.mlp.experts.14.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.14.up_proj, device: CPU
+                            model.layers.3.mlp.experts.14.down_proj, device: CPU
+                            model.layers.3.mlp.experts.14.act, device: CPU
+                        Module: model.layers.3.mlp.experts.15, device: CPU
+                            model.layers.3.mlp.experts.15.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.15.up_proj, device: CPU
+                            model.layers.3.mlp.experts.15.down_proj, device: CPU
+                            model.layers.3.mlp.experts.15.act, device: CPU
+                        Module: model.layers.3.mlp.experts.16, device: CPU
+                            model.layers.3.mlp.experts.16.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.16.up_proj, device: CPU
+                            model.layers.3.mlp.experts.16.down_proj, device: CPU
+                            model.layers.3.mlp.experts.16.act, device: CPU
+                        Module: model.layers.3.mlp.experts.17, device: CPU
+                            model.layers.3.mlp.experts.17.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.17.up_proj, device: CPU
+                            model.layers.3.mlp.experts.17.down_proj, device: CPU
+                            model.layers.3.mlp.experts.17.act, device: CPU
+                        Module: model.layers.3.mlp.experts.18, device: CPU
+                            model.layers.3.mlp.experts.18.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.18.up_proj, device: CPU
+                            model.layers.3.mlp.experts.18.down_proj, device: CPU
+                            model.layers.3.mlp.experts.18.act, device: CPU
+                        Module: model.layers.3.mlp.experts.19, device: CPU
+                            model.layers.3.mlp.experts.19.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.19.up_proj, device: CPU
+                            model.layers.3.mlp.experts.19.down_proj, device: CPU
+                            model.layers.3.mlp.experts.19.act, device: CPU
+                        Module: model.layers.3.mlp.experts.20, device: CPU
+                            model.layers.3.mlp.experts.20.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.20.up_proj, device: CPU
+                            model.layers.3.mlp.experts.20.down_proj, device: CPU
+                            model.layers.3.mlp.experts.20.act, device: CPU
+                        Module: model.layers.3.mlp.experts.21, device: CPU
+                            model.layers.3.mlp.experts.21.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.21.up_proj, device: CPU
+                            model.layers.3.mlp.experts.21.down_proj, device: CPU
+                            model.layers.3.mlp.experts.21.act, device: CPU
+                        Module: model.layers.3.mlp.experts.22, device: CPU
+                            model.layers.3.mlp.experts.22.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.22.up_proj, device: CPU
+                            model.layers.3.mlp.experts.22.down_proj, device: CPU
+                            model.layers.3.mlp.experts.22.act, device: CPU
+                        Module: model.layers.3.mlp.experts.23, device: CPU
+                            model.layers.3.mlp.experts.23.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.23.up_proj, device: CPU
+                            model.layers.3.mlp.experts.23.down_proj, device: CPU
+                            model.layers.3.mlp.experts.23.act, device: CPU
+                        Module: model.layers.3.mlp.experts.24, device: CPU
+                            model.layers.3.mlp.experts.24.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.24.up_proj, device: CPU
+                            model.layers.3.mlp.experts.24.down_proj, device: CPU
+                            model.layers.3.mlp.experts.24.act, device: CPU
+                        Module: model.layers.3.mlp.experts.25, device: CPU
+                            model.layers.3.mlp.experts.25.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.25.up_proj, device: CPU
+                            model.layers.3.mlp.experts.25.down_proj, device: CPU
+                            model.layers.3.mlp.experts.25.act, device: CPU
+                        Module: model.layers.3.mlp.experts.26, device: CPU
+                            model.layers.3.mlp.experts.26.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.26.up_proj, device: CPU
+                            model.layers.3.mlp.experts.26.down_proj, device: CPU
+                            model.layers.3.mlp.experts.26.act, device: CPU
+                        Module: model.layers.3.mlp.experts.27, device: CPU
+                            model.layers.3.mlp.experts.27.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.27.up_proj, device: CPU
+                            model.layers.3.mlp.experts.27.down_proj, device: CPU
+                            model.layers.3.mlp.experts.27.act, device: CPU
+                        Module: model.layers.3.mlp.experts.28, device: CPU
+                            model.layers.3.mlp.experts.28.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.28.up_proj, device: CPU
+                            model.layers.3.mlp.experts.28.down_proj, device: CPU
+                            model.layers.3.mlp.experts.28.act, device: CPU
+                        Module: model.layers.3.mlp.experts.29, device: CPU
+                            model.layers.3.mlp.experts.29.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.29.up_proj, device: CPU
+                            model.layers.3.mlp.experts.29.down_proj, device: CPU
+                            model.layers.3.mlp.experts.29.act, device: CPU
+                        Module: model.layers.3.mlp.experts.30, device: CPU
+                            model.layers.3.mlp.experts.30.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.30.up_proj, device: CPU
+                            model.layers.3.mlp.experts.30.down_proj, device: CPU
+                            model.layers.3.mlp.experts.30.act, device: CPU
+                        Module: model.layers.3.mlp.experts.31, device: CPU
+                            model.layers.3.mlp.experts.31.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.31.up_proj, device: CPU
+                            model.layers.3.mlp.experts.31.down_proj, device: CPU
+                            model.layers.3.mlp.experts.31.act, device: CPU
+                        Module: model.layers.3.mlp.experts.32, device: CPU
+                            model.layers.3.mlp.experts.32.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.32.up_proj, device: CPU
+                            model.layers.3.mlp.experts.32.down_proj, device: CPU
+                            model.layers.3.mlp.experts.32.act, device: CPU
+                        Module: model.layers.3.mlp.experts.33, device: CPU
+                            model.layers.3.mlp.experts.33.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.33.up_proj, device: CPU
+                            model.layers.3.mlp.experts.33.down_proj, device: CPU
+                            model.layers.3.mlp.experts.33.act, device: CPU
+                        Module: model.layers.3.mlp.experts.34, device: CPU
+                            model.layers.3.mlp.experts.34.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.34.up_proj, device: CPU
+                            model.layers.3.mlp.experts.34.down_proj, device: CPU
+                            model.layers.3.mlp.experts.34.act, device: CPU
+                        Module: model.layers.3.mlp.experts.35, device: CPU
+                            model.layers.3.mlp.experts.35.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.35.up_proj, device: CPU
+                            model.layers.3.mlp.experts.35.down_proj, device: CPU
+                            model.layers.3.mlp.experts.35.act, device: CPU
+                        Module: model.layers.3.mlp.experts.36, device: CPU
+                            model.layers.3.mlp.experts.36.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.36.up_proj, device: CPU
+                            model.layers.3.mlp.experts.36.down_proj, device: CPU
+                            model.layers.3.mlp.experts.36.act, device: CPU
+                        Module: model.layers.3.mlp.experts.37, device: CPU
+                            model.layers.3.mlp.experts.37.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.37.up_proj, device: CPU
+                            model.layers.3.mlp.experts.37.down_proj, device: CPU
+                            model.layers.3.mlp.experts.37.act, device: CPU
+                        Module: model.layers.3.mlp.experts.38, device: CPU
+                            model.layers.3.mlp.experts.38.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.38.up_proj, device: CPU
+                            model.layers.3.mlp.experts.38.down_proj, device: CPU
+                            model.layers.3.mlp.experts.38.act, device: CPU
+                        Module: model.layers.3.mlp.experts.39, device: CPU
+                            model.layers.3.mlp.experts.39.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.39.up_proj, device: CPU
+                            model.layers.3.mlp.experts.39.down_proj, device: CPU
+                            model.layers.3.mlp.experts.39.act, device: CPU
+                        Module: model.layers.3.mlp.experts.40, device: CPU
+                            model.layers.3.mlp.experts.40.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.40.up_proj, device: CPU
+                            model.layers.3.mlp.experts.40.down_proj, device: CPU
+                            model.layers.3.mlp.experts.40.act, device: CPU
+                        Module: model.layers.3.mlp.experts.41, device: CPU
+                            model.layers.3.mlp.experts.41.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.41.up_proj, device: CPU
+                            model.layers.3.mlp.experts.41.down_proj, device: CPU
+                            model.layers.3.mlp.experts.41.act, device: CPU
+                        Module: model.layers.3.mlp.experts.42, device: CPU
+                            model.layers.3.mlp.experts.42.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.42.up_proj, device: CPU
+                            model.layers.3.mlp.experts.42.down_proj, device: CPU
+                            model.layers.3.mlp.experts.42.act, device: CPU
+                        Module: model.layers.3.mlp.experts.43, device: CPU
+                            model.layers.3.mlp.experts.43.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.43.up_proj, device: CPU
+                            model.layers.3.mlp.experts.43.down_proj, device: CPU
+                            model.layers.3.mlp.experts.43.act, device: CPU
+                        Module: model.layers.3.mlp.experts.44, device: CPU
+                            model.layers.3.mlp.experts.44.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.44.up_proj, device: CPU
+                            model.layers.3.mlp.experts.44.down_proj, device: CPU
+                            model.layers.3.mlp.experts.44.act, device: CPU
+                        Module: model.layers.3.mlp.experts.45, device: CPU
+                            model.layers.3.mlp.experts.45.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.45.up_proj, device: CPU
+                            model.layers.3.mlp.experts.45.down_proj, device: CPU
+                            model.layers.3.mlp.experts.45.act, device: CPU
+                        Module: model.layers.3.mlp.experts.46, device: CPU
+                            model.layers.3.mlp.experts.46.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.46.up_proj, device: CPU
+                            model.layers.3.mlp.experts.46.down_proj, device: CPU
+                            model.layers.3.mlp.experts.46.act, device: CPU
+                        Module: model.layers.3.mlp.experts.47, device: CPU
+                            model.layers.3.mlp.experts.47.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.47.up_proj, device: CPU
+                            model.layers.3.mlp.experts.47.down_proj, device: CPU
+                            model.layers.3.mlp.experts.47.act, device: CPU
+                        Module: model.layers.3.mlp.experts.48, device: CPU
+                            model.layers.3.mlp.experts.48.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.48.up_proj, device: CPU
+                            model.layers.3.mlp.experts.48.down_proj, device: CPU
+                            model.layers.3.mlp.experts.48.act, device: CPU
+                        Module: model.layers.3.mlp.experts.49, device: CPU
+                            model.layers.3.mlp.experts.49.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.49.up_proj, device: CPU
+                            model.layers.3.mlp.experts.49.down_proj, device: CPU
+                            model.layers.3.mlp.experts.49.act, device: CPU
+                        Module: model.layers.3.mlp.experts.50, device: CPU
+                            model.layers.3.mlp.experts.50.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.50.up_proj, device: CPU
+                            model.layers.3.mlp.experts.50.down_proj, device: CPU
+                            model.layers.3.mlp.experts.50.act, device: CPU
+                        Module: model.layers.3.mlp.experts.51, device: CPU
+                            model.layers.3.mlp.experts.51.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.51.up_proj, device: CPU
+                            model.layers.3.mlp.experts.51.down_proj, device: CPU
+                            model.layers.3.mlp.experts.51.act, device: CPU
+                        Module: model.layers.3.mlp.experts.52, device: CPU
+                            model.layers.3.mlp.experts.52.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.52.up_proj, device: CPU
+                            model.layers.3.mlp.experts.52.down_proj, device: CPU
+                            model.layers.3.mlp.experts.52.act, device: CPU
+                        Module: model.layers.3.mlp.experts.53, device: CPU
+                            model.layers.3.mlp.experts.53.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.53.up_proj, device: CPU
+                            model.layers.3.mlp.experts.53.down_proj, device: CPU
+                            model.layers.3.mlp.experts.53.act, device: CPU
+                        Module: model.layers.3.mlp.experts.54, device: CPU
+                            model.layers.3.mlp.experts.54.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.54.up_proj, device: CPU
+                            model.layers.3.mlp.experts.54.down_proj, device: CPU
+                            model.layers.3.mlp.experts.54.act, device: CPU
+                        Module: model.layers.3.mlp.experts.55, device: CPU
+                            model.layers.3.mlp.experts.55.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.55.up_proj, device: CPU
+                            model.layers.3.mlp.experts.55.down_proj, device: CPU
+                            model.layers.3.mlp.experts.55.act, device: CPU
+                        Module: model.layers.3.mlp.experts.56, device: CPU
+                            model.layers.3.mlp.experts.56.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.56.up_proj, device: CPU
+                            model.layers.3.mlp.experts.56.down_proj, device: CPU
+                            model.layers.3.mlp.experts.56.act, device: CPU
+                        Module: model.layers.3.mlp.experts.57, device: CPU
+                            model.layers.3.mlp.experts.57.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.57.up_proj, device: CPU
+                            model.layers.3.mlp.experts.57.down_proj, device: CPU
+                            model.layers.3.mlp.experts.57.act, device: CPU
+                        Module: model.layers.3.mlp.experts.58, device: CPU
+                            model.layers.3.mlp.experts.58.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.58.up_proj, device: CPU
+                            model.layers.3.mlp.experts.58.down_proj, device: CPU
+                            model.layers.3.mlp.experts.58.act, device: CPU
+                        Module: model.layers.3.mlp.experts.59, device: CPU
+                            model.layers.3.mlp.experts.59.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.59.up_proj, device: CPU
+                            model.layers.3.mlp.experts.59.down_proj, device: CPU
+                            model.layers.3.mlp.experts.59.act, device: CPU
+                        Module: model.layers.3.mlp.experts.60, device: CPU
+                            model.layers.3.mlp.experts.60.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.60.up_proj, device: CPU
+                            model.layers.3.mlp.experts.60.down_proj, device: CPU
+                            model.layers.3.mlp.experts.60.act, device: CPU
+                        Module: model.layers.3.mlp.experts.61, device: CPU
+                            model.layers.3.mlp.experts.61.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.61.up_proj, device: CPU
+                            model.layers.3.mlp.experts.61.down_proj, device: CPU
+                            model.layers.3.mlp.experts.61.act, device: CPU
+                        Module: model.layers.3.mlp.experts.62, device: CPU
+                            model.layers.3.mlp.experts.62.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.62.up_proj, device: CPU
+                            model.layers.3.mlp.experts.62.down_proj, device: CPU
+                            model.layers.3.mlp.experts.62.act, device: CPU
+                        Module: model.layers.3.mlp.experts.63, device: CPU
+                            model.layers.3.mlp.experts.63.gate_proj, device: CPU
+                            model.layers.3.mlp.experts.63.up_proj, device: CPU
+                            model.layers.3.mlp.experts.63.down_proj, device: CPU
+                            model.layers.3.mlp.experts.63.act, device: CPU
+                    Module: model.layers.3.mlp.gate, device: CPU
+                        model.layers.3.mlp.gate.weight, device: CPU
+                    Module: model.layers.3.mlp.shared_experts, device: CPU
+                        model.layers.3.mlp.shared_experts.gate_proj, device: CPU
+                        model.layers.3.mlp.shared_experts.up_proj, device: CPU
+                        model.layers.3.mlp.shared_experts.down_proj, device: CPU
+                        model.layers.3.mlp.shared_experts.act, device: CPU
+                model.layers.3.input_layernorm, device: CPU
+                model.layers.3.post_attention_layernorm, device: CPU
+            Module: model.layers.4, device: CPU
+                Module: model.layers.4.self_attn, device: CPU
+                    model.layers.4.self_attn.q_proj, device: CPU
+                    model.layers.4.self_attn.k_proj, device: CPU
+                    model.layers.4.self_attn.v_proj, device: CPU
+                    model.layers.4.self_attn.o_proj, device: CPU
+                    model.layers.4.self_attn.q_rope, device: CPU
+                    model.layers.4.self_attn.k_rope, device: CPU
+                Module: model.layers.4.mlp, device: CPU
+                    Module: model.layers.4.mlp.experts, device: CPU
+                        Module: model.layers.4.mlp.experts.0, device: CPU
+                            model.layers.4.mlp.experts.0.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.0.up_proj, device: CPU
+                            model.layers.4.mlp.experts.0.down_proj, device: CPU
+                            model.layers.4.mlp.experts.0.act, device: CPU
+                        Module: model.layers.4.mlp.experts.1, device: CPU
+                            model.layers.4.mlp.experts.1.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.1.up_proj, device: CPU
+                            model.layers.4.mlp.experts.1.down_proj, device: CPU
+                            model.layers.4.mlp.experts.1.act, device: CPU
+                        Module: model.layers.4.mlp.experts.2, device: CPU
+                            model.layers.4.mlp.experts.2.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.2.up_proj, device: CPU
+                            model.layers.4.mlp.experts.2.down_proj, device: CPU
+                            model.layers.4.mlp.experts.2.act, device: CPU
+                        Module: model.layers.4.mlp.experts.3, device: CPU
+                            model.layers.4.mlp.experts.3.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.3.up_proj, device: CPU
+                            model.layers.4.mlp.experts.3.down_proj, device: CPU
+                            model.layers.4.mlp.experts.3.act, device: CPU
+                        Module: model.layers.4.mlp.experts.4, device: CPU
+                            model.layers.4.mlp.experts.4.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.4.up_proj, device: CPU
+                            model.layers.4.mlp.experts.4.down_proj, device: CPU
+                            model.layers.4.mlp.experts.4.act, device: CPU
+                        Module: model.layers.4.mlp.experts.5, device: CPU
+                            model.layers.4.mlp.experts.5.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.5.up_proj, device: CPU
+                            model.layers.4.mlp.experts.5.down_proj, device: CPU
+                            model.layers.4.mlp.experts.5.act, device: CPU
+                        Module: model.layers.4.mlp.experts.6, device: CPU
+                            model.layers.4.mlp.experts.6.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.6.up_proj, device: CPU
+                            model.layers.4.mlp.experts.6.down_proj, device: CPU
+                            model.layers.4.mlp.experts.6.act, device: CPU
+                        Module: model.layers.4.mlp.experts.7, device: CPU
+                            model.layers.4.mlp.experts.7.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.7.up_proj, device: CPU
+                            model.layers.4.mlp.experts.7.down_proj, device: CPU
+                            model.layers.4.mlp.experts.7.act, device: CPU
+                        Module: model.layers.4.mlp.experts.8, device: CPU
+                            model.layers.4.mlp.experts.8.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.8.up_proj, device: CPU
+                            model.layers.4.mlp.experts.8.down_proj, device: CPU
+                            model.layers.4.mlp.experts.8.act, device: CPU
+                        Module: model.layers.4.mlp.experts.9, device: CPU
+                            model.layers.4.mlp.experts.9.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.9.up_proj, device: CPU
+                            model.layers.4.mlp.experts.9.down_proj, device: CPU
+                            model.layers.4.mlp.experts.9.act, device: CPU
+                        Module: model.layers.4.mlp.experts.10, device: CPU
+                            model.layers.4.mlp.experts.10.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.10.up_proj, device: CPU
+                            model.layers.4.mlp.experts.10.down_proj, device: CPU
+                            model.layers.4.mlp.experts.10.act, device: CPU
+                        Module: model.layers.4.mlp.experts.11, device: CPU
+                            model.layers.4.mlp.experts.11.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.11.up_proj, device: CPU
+                            model.layers.4.mlp.experts.11.down_proj, device: CPU
+                            model.layers.4.mlp.experts.11.act, device: CPU
+                        Module: model.layers.4.mlp.experts.12, device: CPU
+                            model.layers.4.mlp.experts.12.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.12.up_proj, device: CPU
+                            model.layers.4.mlp.experts.12.down_proj, device: CPU
+                            model.layers.4.mlp.experts.12.act, device: CPU
+                        Module: model.layers.4.mlp.experts.13, device: CPU
+                            model.layers.4.mlp.experts.13.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.13.up_proj, device: CPU
+                            model.layers.4.mlp.experts.13.down_proj, device: CPU
+                            model.layers.4.mlp.experts.13.act, device: CPU
+                        Module: model.layers.4.mlp.experts.14, device: CPU
+                            model.layers.4.mlp.experts.14.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.14.up_proj, device: CPU
+                            model.layers.4.mlp.experts.14.down_proj, device: CPU
+                            model.layers.4.mlp.experts.14.act, device: CPU
+                        Module: model.layers.4.mlp.experts.15, device: CPU
+                            model.layers.4.mlp.experts.15.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.15.up_proj, device: CPU
+                            model.layers.4.mlp.experts.15.down_proj, device: CPU
+                            model.layers.4.mlp.experts.15.act, device: CPU
+                        Module: model.layers.4.mlp.experts.16, device: CPU
+                            model.layers.4.mlp.experts.16.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.16.up_proj, device: CPU
+                            model.layers.4.mlp.experts.16.down_proj, device: CPU
+                            model.layers.4.mlp.experts.16.act, device: CPU
+                        Module: model.layers.4.mlp.experts.17, device: CPU
+                            model.layers.4.mlp.experts.17.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.17.up_proj, device: CPU
+                            model.layers.4.mlp.experts.17.down_proj, device: CPU
+                            model.layers.4.mlp.experts.17.act, device: CPU
+                        Module: model.layers.4.mlp.experts.18, device: CPU
+                            model.layers.4.mlp.experts.18.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.18.up_proj, device: CPU
+                            model.layers.4.mlp.experts.18.down_proj, device: CPU
+                            model.layers.4.mlp.experts.18.act, device: CPU
+                        Module: model.layers.4.mlp.experts.19, device: CPU
+                            model.layers.4.mlp.experts.19.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.19.up_proj, device: CPU
+                            model.layers.4.mlp.experts.19.down_proj, device: CPU
+                            model.layers.4.mlp.experts.19.act, device: CPU
+                        Module: model.layers.4.mlp.experts.20, device: CPU
+                            model.layers.4.mlp.experts.20.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.20.up_proj, device: CPU
+                            model.layers.4.mlp.experts.20.down_proj, device: CPU
+                            model.layers.4.mlp.experts.20.act, device: CPU
+                        Module: model.layers.4.mlp.experts.21, device: CPU
+                            model.layers.4.mlp.experts.21.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.21.up_proj, device: CPU
+                            model.layers.4.mlp.experts.21.down_proj, device: CPU
+                            model.layers.4.mlp.experts.21.act, device: CPU
+                        Module: model.layers.4.mlp.experts.22, device: CPU
+                            model.layers.4.mlp.experts.22.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.22.up_proj, device: CPU
+                            model.layers.4.mlp.experts.22.down_proj, device: CPU
+                            model.layers.4.mlp.experts.22.act, device: CPU
+                        Module: model.layers.4.mlp.experts.23, device: CPU
+                            model.layers.4.mlp.experts.23.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.23.up_proj, device: CPU
+                            model.layers.4.mlp.experts.23.down_proj, device: CPU
+                            model.layers.4.mlp.experts.23.act, device: CPU
+                        Module: model.layers.4.mlp.experts.24, device: CPU
+                            model.layers.4.mlp.experts.24.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.24.up_proj, device: CPU
+                            model.layers.4.mlp.experts.24.down_proj, device: CPU
+                            model.layers.4.mlp.experts.24.act, device: CPU
+                        Module: model.layers.4.mlp.experts.25, device: CPU
+                            model.layers.4.mlp.experts.25.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.25.up_proj, device: CPU
+                            model.layers.4.mlp.experts.25.down_proj, device: CPU
+                            model.layers.4.mlp.experts.25.act, device: CPU
+                        Module: model.layers.4.mlp.experts.26, device: CPU
+                            model.layers.4.mlp.experts.26.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.26.up_proj, device: CPU
+                            model.layers.4.mlp.experts.26.down_proj, device: CPU
+                            model.layers.4.mlp.experts.26.act, device: CPU
+                        Module: model.layers.4.mlp.experts.27, device: CPU
+                            model.layers.4.mlp.experts.27.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.27.up_proj, device: CPU
+                            model.layers.4.mlp.experts.27.down_proj, device: CPU
+                            model.layers.4.mlp.experts.27.act, device: CPU
+                        Module: model.layers.4.mlp.experts.28, device: CPU
+                            model.layers.4.mlp.experts.28.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.28.up_proj, device: CPU
+                            model.layers.4.mlp.experts.28.down_proj, device: CPU
+                            model.layers.4.mlp.experts.28.act, device: CPU
+                        Module: model.layers.4.mlp.experts.29, device: CPU
+                            model.layers.4.mlp.experts.29.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.29.up_proj, device: CPU
+                            model.layers.4.mlp.experts.29.down_proj, device: CPU
+                            model.layers.4.mlp.experts.29.act, device: CPU
+                        Module: model.layers.4.mlp.experts.30, device: CPU
+                            model.layers.4.mlp.experts.30.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.30.up_proj, device: CPU
+                            model.layers.4.mlp.experts.30.down_proj, device: CPU
+                            model.layers.4.mlp.experts.30.act, device: CPU
+                        Module: model.layers.4.mlp.experts.31, device: CPU
+                            model.layers.4.mlp.experts.31.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.31.up_proj, device: CPU
+                            model.layers.4.mlp.experts.31.down_proj, device: CPU
+                            model.layers.4.mlp.experts.31.act, device: CPU
+                        Module: model.layers.4.mlp.experts.32, device: CPU
+                            model.layers.4.mlp.experts.32.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.32.up_proj, device: CPU
+                            model.layers.4.mlp.experts.32.down_proj, device: CPU
+                            model.layers.4.mlp.experts.32.act, device: CPU
+                        Module: model.layers.4.mlp.experts.33, device: CPU
+                            model.layers.4.mlp.experts.33.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.33.up_proj, device: CPU
+                            model.layers.4.mlp.experts.33.down_proj, device: CPU
+                            model.layers.4.mlp.experts.33.act, device: CPU
+                        Module: model.layers.4.mlp.experts.34, device: CPU
+                            model.layers.4.mlp.experts.34.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.34.up_proj, device: CPU
+                            model.layers.4.mlp.experts.34.down_proj, device: CPU
+                            model.layers.4.mlp.experts.34.act, device: CPU
+                        Module: model.layers.4.mlp.experts.35, device: CPU
+                            model.layers.4.mlp.experts.35.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.35.up_proj, device: CPU
+                            model.layers.4.mlp.experts.35.down_proj, device: CPU
+                            model.layers.4.mlp.experts.35.act, device: CPU
+                        Module: model.layers.4.mlp.experts.36, device: CPU
+                            model.layers.4.mlp.experts.36.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.36.up_proj, device: CPU
+                            model.layers.4.mlp.experts.36.down_proj, device: CPU
+                            model.layers.4.mlp.experts.36.act, device: CPU
+                        Module: model.layers.4.mlp.experts.37, device: CPU
+                            model.layers.4.mlp.experts.37.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.37.up_proj, device: CPU
+                            model.layers.4.mlp.experts.37.down_proj, device: CPU
+                            model.layers.4.mlp.experts.37.act, device: CPU
+                        Module: model.layers.4.mlp.experts.38, device: CPU
+                            model.layers.4.mlp.experts.38.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.38.up_proj, device: CPU
+                            model.layers.4.mlp.experts.38.down_proj, device: CPU
+                            model.layers.4.mlp.experts.38.act, device: CPU
+                        Module: model.layers.4.mlp.experts.39, device: CPU
+                            model.layers.4.mlp.experts.39.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.39.up_proj, device: CPU
+                            model.layers.4.mlp.experts.39.down_proj, device: CPU
+                            model.layers.4.mlp.experts.39.act, device: CPU
+                        Module: model.layers.4.mlp.experts.40, device: CPU
+                            model.layers.4.mlp.experts.40.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.40.up_proj, device: CPU
+                            model.layers.4.mlp.experts.40.down_proj, device: CPU
+                            model.layers.4.mlp.experts.40.act, device: CPU
+                        Module: model.layers.4.mlp.experts.41, device: CPU
+                            model.layers.4.mlp.experts.41.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.41.up_proj, device: CPU
+                            model.layers.4.mlp.experts.41.down_proj, device: CPU
+                            model.layers.4.mlp.experts.41.act, device: CPU
+                        Module: model.layers.4.mlp.experts.42, device: CPU
+                            model.layers.4.mlp.experts.42.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.42.up_proj, device: CPU
+                            model.layers.4.mlp.experts.42.down_proj, device: CPU
+                            model.layers.4.mlp.experts.42.act, device: CPU
+                        Module: model.layers.4.mlp.experts.43, device: CPU
+                            model.layers.4.mlp.experts.43.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.43.up_proj, device: CPU
+                            model.layers.4.mlp.experts.43.down_proj, device: CPU
+                            model.layers.4.mlp.experts.43.act, device: CPU
+                        Module: model.layers.4.mlp.experts.44, device: CPU
+                            model.layers.4.mlp.experts.44.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.44.up_proj, device: CPU
+                            model.layers.4.mlp.experts.44.down_proj, device: CPU
+                            model.layers.4.mlp.experts.44.act, device: CPU
+                        Module: model.layers.4.mlp.experts.45, device: CPU
+                            model.layers.4.mlp.experts.45.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.45.up_proj, device: CPU
+                            model.layers.4.mlp.experts.45.down_proj, device: CPU
+                            model.layers.4.mlp.experts.45.act, device: CPU
+                        Module: model.layers.4.mlp.experts.46, device: CPU
+                            model.layers.4.mlp.experts.46.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.46.up_proj, device: CPU
+                            model.layers.4.mlp.experts.46.down_proj, device: CPU
+                            model.layers.4.mlp.experts.46.act, device: CPU
+                        Module: model.layers.4.mlp.experts.47, device: CPU
+                            model.layers.4.mlp.experts.47.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.47.up_proj, device: CPU
+                            model.layers.4.mlp.experts.47.down_proj, device: CPU
+                            model.layers.4.mlp.experts.47.act, device: CPU
+                        Module: model.layers.4.mlp.experts.48, device: CPU
+                            model.layers.4.mlp.experts.48.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.48.up_proj, device: CPU
+                            model.layers.4.mlp.experts.48.down_proj, device: CPU
+                            model.layers.4.mlp.experts.48.act, device: CPU
+                        Module: model.layers.4.mlp.experts.49, device: CPU
+                            model.layers.4.mlp.experts.49.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.49.up_proj, device: CPU
+                            model.layers.4.mlp.experts.49.down_proj, device: CPU
+                            model.layers.4.mlp.experts.49.act, device: CPU
+                        Module: model.layers.4.mlp.experts.50, device: CPU
+                            model.layers.4.mlp.experts.50.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.50.up_proj, device: CPU
+                            model.layers.4.mlp.experts.50.down_proj, device: CPU
+                            model.layers.4.mlp.experts.50.act, device: CPU
+                        Module: model.layers.4.mlp.experts.51, device: CPU
+                            model.layers.4.mlp.experts.51.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.51.up_proj, device: CPU
+                            model.layers.4.mlp.experts.51.down_proj, device: CPU
+                            model.layers.4.mlp.experts.51.act, device: CPU
+                        Module: model.layers.4.mlp.experts.52, device: CPU
+                            model.layers.4.mlp.experts.52.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.52.up_proj, device: CPU
+                            model.layers.4.mlp.experts.52.down_proj, device: CPU
+                            model.layers.4.mlp.experts.52.act, device: CPU
+                        Module: model.layers.4.mlp.experts.53, device: CPU
+                            model.layers.4.mlp.experts.53.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.53.up_proj, device: CPU
+                            model.layers.4.mlp.experts.53.down_proj, device: CPU
+                            model.layers.4.mlp.experts.53.act, device: CPU
+                        Module: model.layers.4.mlp.experts.54, device: CPU
+                            model.layers.4.mlp.experts.54.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.54.up_proj, device: CPU
+                            model.layers.4.mlp.experts.54.down_proj, device: CPU
+                            model.layers.4.mlp.experts.54.act, device: CPU
+                        Module: model.layers.4.mlp.experts.55, device: CPU
+                            model.layers.4.mlp.experts.55.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.55.up_proj, device: CPU
+                            model.layers.4.mlp.experts.55.down_proj, device: CPU
+                            model.layers.4.mlp.experts.55.act, device: CPU
+                        Module: model.layers.4.mlp.experts.56, device: CPU
+                            model.layers.4.mlp.experts.56.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.56.up_proj, device: CPU
+                            model.layers.4.mlp.experts.56.down_proj, device: CPU
+                            model.layers.4.mlp.experts.56.act, device: CPU
+                        Module: model.layers.4.mlp.experts.57, device: CPU
+                            model.layers.4.mlp.experts.57.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.57.up_proj, device: CPU
+                            model.layers.4.mlp.experts.57.down_proj, device: CPU
+                            model.layers.4.mlp.experts.57.act, device: CPU
+                        Module: model.layers.4.mlp.experts.58, device: CPU
+                            model.layers.4.mlp.experts.58.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.58.up_proj, device: CPU
+                            model.layers.4.mlp.experts.58.down_proj, device: CPU
+                            model.layers.4.mlp.experts.58.act, device: CPU
+                        Module: model.layers.4.mlp.experts.59, device: CPU
+                            model.layers.4.mlp.experts.59.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.59.up_proj, device: CPU
+                            model.layers.4.mlp.experts.59.down_proj, device: CPU
+                            model.layers.4.mlp.experts.59.act, device: CPU
+                        Module: model.layers.4.mlp.experts.60, device: CPU
+                            model.layers.4.mlp.experts.60.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.60.up_proj, device: CPU
+                            model.layers.4.mlp.experts.60.down_proj, device: CPU
+                            model.layers.4.mlp.experts.60.act, device: CPU
+                        Module: model.layers.4.mlp.experts.61, device: CPU
+                            model.layers.4.mlp.experts.61.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.61.up_proj, device: CPU
+                            model.layers.4.mlp.experts.61.down_proj, device: CPU
+                            model.layers.4.mlp.experts.61.act, device: CPU
+                        Module: model.layers.4.mlp.experts.62, device: CPU
+                            model.layers.4.mlp.experts.62.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.62.up_proj, device: CPU
+                            model.layers.4.mlp.experts.62.down_proj, device: CPU
+                            model.layers.4.mlp.experts.62.act, device: CPU
+                        Module: model.layers.4.mlp.experts.63, device: CPU
+                            model.layers.4.mlp.experts.63.gate_proj, device: CPU
+                            model.layers.4.mlp.experts.63.up_proj, device: CPU
+                            model.layers.4.mlp.experts.63.down_proj, device: CPU
+                            model.layers.4.mlp.experts.63.act, device: CPU
+                    Module: model.layers.4.mlp.gate, device: CPU
+                        model.layers.4.mlp.gate.weight, device: CPU
+                    Module: model.layers.4.mlp.shared_experts, device: CPU
+                        model.layers.4.mlp.shared_experts.gate_proj, device: CPU
+                        model.layers.4.mlp.shared_experts.up_proj, device: CPU
+                        model.layers.4.mlp.shared_experts.down_proj, device: CPU
+                        model.layers.4.mlp.shared_experts.act, device: CPU
+                model.layers.4.input_layernorm, device: CPU
+                model.layers.4.post_attention_layernorm, device: CPU
+            Module: model.layers.5, device: CPU
+                Module: model.layers.5.self_attn, device: CPU
+                    model.layers.5.self_attn.q_proj, device: CPU
+                    model.layers.5.self_attn.k_proj, device: CPU
+                    model.layers.5.self_attn.v_proj, device: CPU
+                    model.layers.5.self_attn.o_proj, device: CPU
+                    model.layers.5.self_attn.q_rope, device: CPU
+                    model.layers.5.self_attn.k_rope, device: CPU
+                Module: model.layers.5.mlp, device: CPU
+                    Module: model.layers.5.mlp.experts, device: CPU
+                        Module: model.layers.5.mlp.experts.0, device: CPU
+                            model.layers.5.mlp.experts.0.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.0.up_proj, device: CPU
+                            model.layers.5.mlp.experts.0.down_proj, device: CPU
+                            model.layers.5.mlp.experts.0.act, device: CPU
+                        Module: model.layers.5.mlp.experts.1, device: CPU
+                            model.layers.5.mlp.experts.1.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.1.up_proj, device: CPU
+                            model.layers.5.mlp.experts.1.down_proj, device: CPU
+                            model.layers.5.mlp.experts.1.act, device: CPU
+                        Module: model.layers.5.mlp.experts.2, device: CPU
+                            model.layers.5.mlp.experts.2.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.2.up_proj, device: CPU
+                            model.layers.5.mlp.experts.2.down_proj, device: CPU
+                            model.layers.5.mlp.experts.2.act, device: CPU
+                        Module: model.layers.5.mlp.experts.3, device: CPU
+                            model.layers.5.mlp.experts.3.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.3.up_proj, device: CPU
+                            model.layers.5.mlp.experts.3.down_proj, device: CPU
+                            model.layers.5.mlp.experts.3.act, device: CPU
+                        Module: model.layers.5.mlp.experts.4, device: CPU
+                            model.layers.5.mlp.experts.4.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.4.up_proj, device: CPU
+                            model.layers.5.mlp.experts.4.down_proj, device: CPU
+                            model.layers.5.mlp.experts.4.act, device: CPU
+                        Module: model.layers.5.mlp.experts.5, device: CPU
+                            model.layers.5.mlp.experts.5.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.5.up_proj, device: CPU
+                            model.layers.5.mlp.experts.5.down_proj, device: CPU
+                            model.layers.5.mlp.experts.5.act, device: CPU
+                        Module: model.layers.5.mlp.experts.6, device: CPU
+                            model.layers.5.mlp.experts.6.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.6.up_proj, device: CPU
+                            model.layers.5.mlp.experts.6.down_proj, device: CPU
+                            model.layers.5.mlp.experts.6.act, device: CPU
+                        Module: model.layers.5.mlp.experts.7, device: CPU
+                            model.layers.5.mlp.experts.7.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.7.up_proj, device: CPU
+                            model.layers.5.mlp.experts.7.down_proj, device: CPU
+                            model.layers.5.mlp.experts.7.act, device: CPU
+                        Module: model.layers.5.mlp.experts.8, device: CPU
+                            model.layers.5.mlp.experts.8.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.8.up_proj, device: CPU
+                            model.layers.5.mlp.experts.8.down_proj, device: CPU
+                            model.layers.5.mlp.experts.8.act, device: CPU
+                        Module: model.layers.5.mlp.experts.9, device: CPU
+                            model.layers.5.mlp.experts.9.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.9.up_proj, device: CPU
+                            model.layers.5.mlp.experts.9.down_proj, device: CPU
+                            model.layers.5.mlp.experts.9.act, device: CPU
+                        Module: model.layers.5.mlp.experts.10, device: CPU
+                            model.layers.5.mlp.experts.10.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.10.up_proj, device: CPU
+                            model.layers.5.mlp.experts.10.down_proj, device: CPU
+                            model.layers.5.mlp.experts.10.act, device: CPU
+                        Module: model.layers.5.mlp.experts.11, device: CPU
+                            model.layers.5.mlp.experts.11.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.11.up_proj, device: CPU
+                            model.layers.5.mlp.experts.11.down_proj, device: CPU
+                            model.layers.5.mlp.experts.11.act, device: CPU
+                        Module: model.layers.5.mlp.experts.12, device: CPU
+                            model.layers.5.mlp.experts.12.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.12.up_proj, device: CPU
+                            model.layers.5.mlp.experts.12.down_proj, device: CPU
+                            model.layers.5.mlp.experts.12.act, device: CPU
+                        Module: model.layers.5.mlp.experts.13, device: CPU
+                            model.layers.5.mlp.experts.13.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.13.up_proj, device: CPU
+                            model.layers.5.mlp.experts.13.down_proj, device: CPU
+                            model.layers.5.mlp.experts.13.act, device: CPU
+                        Module: model.layers.5.mlp.experts.14, device: CPU
+                            model.layers.5.mlp.experts.14.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.14.up_proj, device: CPU
+                            model.layers.5.mlp.experts.14.down_proj, device: CPU
+                            model.layers.5.mlp.experts.14.act, device: CPU
+                        Module: model.layers.5.mlp.experts.15, device: CPU
+                            model.layers.5.mlp.experts.15.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.15.up_proj, device: CPU
+                            model.layers.5.mlp.experts.15.down_proj, device: CPU
+                            model.layers.5.mlp.experts.15.act, device: CPU
+                        Module: model.layers.5.mlp.experts.16, device: CPU
+                            model.layers.5.mlp.experts.16.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.16.up_proj, device: CPU
+                            model.layers.5.mlp.experts.16.down_proj, device: CPU
+                            model.layers.5.mlp.experts.16.act, device: CPU
+                        Module: model.layers.5.mlp.experts.17, device: CPU
+                            model.layers.5.mlp.experts.17.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.17.up_proj, device: CPU
+                            model.layers.5.mlp.experts.17.down_proj, device: CPU
+                            model.layers.5.mlp.experts.17.act, device: CPU
+                        Module: model.layers.5.mlp.experts.18, device: CPU
+                            model.layers.5.mlp.experts.18.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.18.up_proj, device: CPU
+                            model.layers.5.mlp.experts.18.down_proj, device: CPU
+                            model.layers.5.mlp.experts.18.act, device: CPU
+                        Module: model.layers.5.mlp.experts.19, device: CPU
+                            model.layers.5.mlp.experts.19.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.19.up_proj, device: CPU
+                            model.layers.5.mlp.experts.19.down_proj, device: CPU
+                            model.layers.5.mlp.experts.19.act, device: CPU
+                        Module: model.layers.5.mlp.experts.20, device: CPU
+                            model.layers.5.mlp.experts.20.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.20.up_proj, device: CPU
+                            model.layers.5.mlp.experts.20.down_proj, device: CPU
+                            model.layers.5.mlp.experts.20.act, device: CPU
+                        Module: model.layers.5.mlp.experts.21, device: CPU
+                            model.layers.5.mlp.experts.21.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.21.up_proj, device: CPU
+                            model.layers.5.mlp.experts.21.down_proj, device: CPU
+                            model.layers.5.mlp.experts.21.act, device: CPU
+                        Module: model.layers.5.mlp.experts.22, device: CPU
+                            model.layers.5.mlp.experts.22.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.22.up_proj, device: CPU
+                            model.layers.5.mlp.experts.22.down_proj, device: CPU
+                            model.layers.5.mlp.experts.22.act, device: CPU
+                        Module: model.layers.5.mlp.experts.23, device: CPU
+                            model.layers.5.mlp.experts.23.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.23.up_proj, device: CPU
+                            model.layers.5.mlp.experts.23.down_proj, device: CPU
+                            model.layers.5.mlp.experts.23.act, device: CPU
+                        Module: model.layers.5.mlp.experts.24, device: CPU
+                            model.layers.5.mlp.experts.24.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.24.up_proj, device: CPU
+                            model.layers.5.mlp.experts.24.down_proj, device: CPU
+                            model.layers.5.mlp.experts.24.act, device: CPU
+                        Module: model.layers.5.mlp.experts.25, device: CPU
+                            model.layers.5.mlp.experts.25.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.25.up_proj, device: CPU
+                            model.layers.5.mlp.experts.25.down_proj, device: CPU
+                            model.layers.5.mlp.experts.25.act, device: CPU
+                        Module: model.layers.5.mlp.experts.26, device: CPU
+                            model.layers.5.mlp.experts.26.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.26.up_proj, device: CPU
+                            model.layers.5.mlp.experts.26.down_proj, device: CPU
+                            model.layers.5.mlp.experts.26.act, device: CPU
+                        Module: model.layers.5.mlp.experts.27, device: CPU
+                            model.layers.5.mlp.experts.27.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.27.up_proj, device: CPU
+                            model.layers.5.mlp.experts.27.down_proj, device: CPU
+                            model.layers.5.mlp.experts.27.act, device: CPU
+                        Module: model.layers.5.mlp.experts.28, device: CPU
+                            model.layers.5.mlp.experts.28.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.28.up_proj, device: CPU
+                            model.layers.5.mlp.experts.28.down_proj, device: CPU
+                            model.layers.5.mlp.experts.28.act, device: CPU
+                        Module: model.layers.5.mlp.experts.29, device: CPU
+                            model.layers.5.mlp.experts.29.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.29.up_proj, device: CPU
+                            model.layers.5.mlp.experts.29.down_proj, device: CPU
+                            model.layers.5.mlp.experts.29.act, device: CPU
+                        Module: model.layers.5.mlp.experts.30, device: CPU
+                            model.layers.5.mlp.experts.30.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.30.up_proj, device: CPU
+                            model.layers.5.mlp.experts.30.down_proj, device: CPU
+                            model.layers.5.mlp.experts.30.act, device: CPU
+                        Module: model.layers.5.mlp.experts.31, device: CPU
+                            model.layers.5.mlp.experts.31.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.31.up_proj, device: CPU
+                            model.layers.5.mlp.experts.31.down_proj, device: CPU
+                            model.layers.5.mlp.experts.31.act, device: CPU
+                        Module: model.layers.5.mlp.experts.32, device: CPU
+                            model.layers.5.mlp.experts.32.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.32.up_proj, device: CPU
+                            model.layers.5.mlp.experts.32.down_proj, device: CPU
+                            model.layers.5.mlp.experts.32.act, device: CPU
+                        Module: model.layers.5.mlp.experts.33, device: CPU
+                            model.layers.5.mlp.experts.33.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.33.up_proj, device: CPU
+                            model.layers.5.mlp.experts.33.down_proj, device: CPU
+                            model.layers.5.mlp.experts.33.act, device: CPU
+                        Module: model.layers.5.mlp.experts.34, device: CPU
+                            model.layers.5.mlp.experts.34.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.34.up_proj, device: CPU
+                            model.layers.5.mlp.experts.34.down_proj, device: CPU
+                            model.layers.5.mlp.experts.34.act, device: CPU
+                        Module: model.layers.5.mlp.experts.35, device: CPU
+                            model.layers.5.mlp.experts.35.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.35.up_proj, device: CPU
+                            model.layers.5.mlp.experts.35.down_proj, device: CPU
+                            model.layers.5.mlp.experts.35.act, device: CPU
+                        Module: model.layers.5.mlp.experts.36, device: CPU
+                            model.layers.5.mlp.experts.36.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.36.up_proj, device: CPU
+                            model.layers.5.mlp.experts.36.down_proj, device: CPU
+                            model.layers.5.mlp.experts.36.act, device: CPU
+                        Module: model.layers.5.mlp.experts.37, device: CPU
+                            model.layers.5.mlp.experts.37.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.37.up_proj, device: CPU
+                            model.layers.5.mlp.experts.37.down_proj, device: CPU
+                            model.layers.5.mlp.experts.37.act, device: CPU
+                        Module: model.layers.5.mlp.experts.38, device: CPU
+                            model.layers.5.mlp.experts.38.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.38.up_proj, device: CPU
+                            model.layers.5.mlp.experts.38.down_proj, device: CPU
+                            model.layers.5.mlp.experts.38.act, device: CPU
+                        Module: model.layers.5.mlp.experts.39, device: CPU
+                            model.layers.5.mlp.experts.39.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.39.up_proj, device: CPU
+                            model.layers.5.mlp.experts.39.down_proj, device: CPU
+                            model.layers.5.mlp.experts.39.act, device: CPU
+                        Module: model.layers.5.mlp.experts.40, device: CPU
+                            model.layers.5.mlp.experts.40.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.40.up_proj, device: CPU
+                            model.layers.5.mlp.experts.40.down_proj, device: CPU
+                            model.layers.5.mlp.experts.40.act, device: CPU
+                        Module: model.layers.5.mlp.experts.41, device: CPU
+                            model.layers.5.mlp.experts.41.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.41.up_proj, device: CPU
+                            model.layers.5.mlp.experts.41.down_proj, device: CPU
+                            model.layers.5.mlp.experts.41.act, device: CPU
+                        Module: model.layers.5.mlp.experts.42, device: CPU
+                            model.layers.5.mlp.experts.42.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.42.up_proj, device: CPU
+                            model.layers.5.mlp.experts.42.down_proj, device: CPU
+                            model.layers.5.mlp.experts.42.act, device: CPU
+                        Module: model.layers.5.mlp.experts.43, device: CPU
+                            model.layers.5.mlp.experts.43.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.43.up_proj, device: CPU
+                            model.layers.5.mlp.experts.43.down_proj, device: CPU
+                            model.layers.5.mlp.experts.43.act, device: CPU
+                        Module: model.layers.5.mlp.experts.44, device: CPU
+                            model.layers.5.mlp.experts.44.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.44.up_proj, device: CPU
+                            model.layers.5.mlp.experts.44.down_proj, device: CPU
+                            model.layers.5.mlp.experts.44.act, device: CPU
+                        Module: model.layers.5.mlp.experts.45, device: CPU
+                            model.layers.5.mlp.experts.45.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.45.up_proj, device: CPU
+                            model.layers.5.mlp.experts.45.down_proj, device: CPU
+                            model.layers.5.mlp.experts.45.act, device: CPU
+                        Module: model.layers.5.mlp.experts.46, device: CPU
+                            model.layers.5.mlp.experts.46.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.46.up_proj, device: CPU
+                            model.layers.5.mlp.experts.46.down_proj, device: CPU
+                            model.layers.5.mlp.experts.46.act, device: CPU
+                        Module: model.layers.5.mlp.experts.47, device: CPU
+                            model.layers.5.mlp.experts.47.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.47.up_proj, device: CPU
+                            model.layers.5.mlp.experts.47.down_proj, device: CPU
+                            model.layers.5.mlp.experts.47.act, device: CPU
+                        Module: model.layers.5.mlp.experts.48, device: CPU
+                            model.layers.5.mlp.experts.48.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.48.up_proj, device: CPU
+                            model.layers.5.mlp.experts.48.down_proj, device: CPU
+                            model.layers.5.mlp.experts.48.act, device: CPU
+                        Module: model.layers.5.mlp.experts.49, device: CPU
+                            model.layers.5.mlp.experts.49.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.49.up_proj, device: CPU
+                            model.layers.5.mlp.experts.49.down_proj, device: CPU
+                            model.layers.5.mlp.experts.49.act, device: CPU
+                        Module: model.layers.5.mlp.experts.50, device: CPU
+                            model.layers.5.mlp.experts.50.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.50.up_proj, device: CPU
+                            model.layers.5.mlp.experts.50.down_proj, device: CPU
+                            model.layers.5.mlp.experts.50.act, device: CPU
+                        Module: model.layers.5.mlp.experts.51, device: CPU
+                            model.layers.5.mlp.experts.51.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.51.up_proj, device: CPU
+                            model.layers.5.mlp.experts.51.down_proj, device: CPU
+                            model.layers.5.mlp.experts.51.act, device: CPU
+                        Module: model.layers.5.mlp.experts.52, device: CPU
+                            model.layers.5.mlp.experts.52.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.52.up_proj, device: CPU
+                            model.layers.5.mlp.experts.52.down_proj, device: CPU
+                            model.layers.5.mlp.experts.52.act, device: CPU
+                        Module: model.layers.5.mlp.experts.53, device: CPU
+                            model.layers.5.mlp.experts.53.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.53.up_proj, device: CPU
+                            model.layers.5.mlp.experts.53.down_proj, device: CPU
+                            model.layers.5.mlp.experts.53.act, device: CPU
+                        Module: model.layers.5.mlp.experts.54, device: CPU
+                            model.layers.5.mlp.experts.54.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.54.up_proj, device: CPU
+                            model.layers.5.mlp.experts.54.down_proj, device: CPU
+                            model.layers.5.mlp.experts.54.act, device: CPU
+                        Module: model.layers.5.mlp.experts.55, device: CPU
+                            model.layers.5.mlp.experts.55.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.55.up_proj, device: CPU
+                            model.layers.5.mlp.experts.55.down_proj, device: CPU
+                            model.layers.5.mlp.experts.55.act, device: CPU
+                        Module: model.layers.5.mlp.experts.56, device: CPU
+                            model.layers.5.mlp.experts.56.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.56.up_proj, device: CPU
+                            model.layers.5.mlp.experts.56.down_proj, device: CPU
+                            model.layers.5.mlp.experts.56.act, device: CPU
+                        Module: model.layers.5.mlp.experts.57, device: CPU
+                            model.layers.5.mlp.experts.57.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.57.up_proj, device: CPU
+                            model.layers.5.mlp.experts.57.down_proj, device: CPU
+                            model.layers.5.mlp.experts.57.act, device: CPU
+                        Module: model.layers.5.mlp.experts.58, device: CPU
+                            model.layers.5.mlp.experts.58.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.58.up_proj, device: CPU
+                            model.layers.5.mlp.experts.58.down_proj, device: CPU
+                            model.layers.5.mlp.experts.58.act, device: CPU
+                        Module: model.layers.5.mlp.experts.59, device: CPU
+                            model.layers.5.mlp.experts.59.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.59.up_proj, device: CPU
+                            model.layers.5.mlp.experts.59.down_proj, device: CPU
+                            model.layers.5.mlp.experts.59.act, device: CPU
+                        Module: model.layers.5.mlp.experts.60, device: CPU
+                            model.layers.5.mlp.experts.60.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.60.up_proj, device: CPU
+                            model.layers.5.mlp.experts.60.down_proj, device: CPU
+                            model.layers.5.mlp.experts.60.act, device: CPU
+                        Module: model.layers.5.mlp.experts.61, device: CPU
+                            model.layers.5.mlp.experts.61.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.61.up_proj, device: CPU
+                            model.layers.5.mlp.experts.61.down_proj, device: CPU
+                            model.layers.5.mlp.experts.61.act, device: CPU
+                        Module: model.layers.5.mlp.experts.62, device: CPU
+                            model.layers.5.mlp.experts.62.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.62.up_proj, device: CPU
+                            model.layers.5.mlp.experts.62.down_proj, device: CPU
+                            model.layers.5.mlp.experts.62.act, device: CPU
+                        Module: model.layers.5.mlp.experts.63, device: CPU
+                            model.layers.5.mlp.experts.63.gate_proj, device: CPU
+                            model.layers.5.mlp.experts.63.up_proj, device: CPU
+                            model.layers.5.mlp.experts.63.down_proj, device: CPU
+                            model.layers.5.mlp.experts.63.act, device: CPU
+                    Module: model.layers.5.mlp.gate, device: CPU
+                        model.layers.5.mlp.gate.weight, device: CPU
+                    Module: model.layers.5.mlp.shared_experts, device: CPU
+                        model.layers.5.mlp.shared_experts.gate_proj, device: CPU
+                        model.layers.5.mlp.shared_experts.up_proj, device: CPU
+                        model.layers.5.mlp.shared_experts.down_proj, device: CPU
+                        model.layers.5.mlp.shared_experts.act, device: CPU
+                model.layers.5.input_layernorm, device: CPU
+                model.layers.5.post_attention_layernorm, device: CPU
+            Module: model.layers.6, device: CPU
+                Module: model.layers.6.self_attn, device: CPU
+                    model.layers.6.self_attn.q_proj, device: CPU
+                    model.layers.6.self_attn.k_proj, device: CPU
+                    model.layers.6.self_attn.v_proj, device: CPU
+                    model.layers.6.self_attn.o_proj, device: CPU
+                    model.layers.6.self_attn.q_rope, device: CPU
+                    model.layers.6.self_attn.k_rope, device: CPU
+                Module: model.layers.6.mlp, device: CPU
+                    Module: model.layers.6.mlp.experts, device: CPU
+                        Module: model.layers.6.mlp.experts.0, device: CPU
+                            model.layers.6.mlp.experts.0.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.0.up_proj, device: CPU
+                            model.layers.6.mlp.experts.0.down_proj, device: CPU
+                            model.layers.6.mlp.experts.0.act, device: CPU
+                        Module: model.layers.6.mlp.experts.1, device: CPU
+                            model.layers.6.mlp.experts.1.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.1.up_proj, device: CPU
+                            model.layers.6.mlp.experts.1.down_proj, device: CPU
+                            model.layers.6.mlp.experts.1.act, device: CPU
+                        Module: model.layers.6.mlp.experts.2, device: CPU
+                            model.layers.6.mlp.experts.2.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.2.up_proj, device: CPU
+                            model.layers.6.mlp.experts.2.down_proj, device: CPU
+                            model.layers.6.mlp.experts.2.act, device: CPU
+                        Module: model.layers.6.mlp.experts.3, device: CPU
+                            model.layers.6.mlp.experts.3.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.3.up_proj, device: CPU
+                            model.layers.6.mlp.experts.3.down_proj, device: CPU
+                            model.layers.6.mlp.experts.3.act, device: CPU
+                        Module: model.layers.6.mlp.experts.4, device: CPU
+                            model.layers.6.mlp.experts.4.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.4.up_proj, device: CPU
+                            model.layers.6.mlp.experts.4.down_proj, device: CPU
+                            model.layers.6.mlp.experts.4.act, device: CPU
+                        Module: model.layers.6.mlp.experts.5, device: CPU
+                            model.layers.6.mlp.experts.5.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.5.up_proj, device: CPU
+                            model.layers.6.mlp.experts.5.down_proj, device: CPU
+                            model.layers.6.mlp.experts.5.act, device: CPU
+                        Module: model.layers.6.mlp.experts.6, device: CPU
+                            model.layers.6.mlp.experts.6.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.6.up_proj, device: CPU
+                            model.layers.6.mlp.experts.6.down_proj, device: CPU
+                            model.layers.6.mlp.experts.6.act, device: CPU
+                        Module: model.layers.6.mlp.experts.7, device: CPU
+                            model.layers.6.mlp.experts.7.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.7.up_proj, device: CPU
+                            model.layers.6.mlp.experts.7.down_proj, device: CPU
+                            model.layers.6.mlp.experts.7.act, device: CPU
+                        Module: model.layers.6.mlp.experts.8, device: CPU
+                            model.layers.6.mlp.experts.8.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.8.up_proj, device: CPU
+                            model.layers.6.mlp.experts.8.down_proj, device: CPU
+                            model.layers.6.mlp.experts.8.act, device: CPU
+                        Module: model.layers.6.mlp.experts.9, device: CPU
+                            model.layers.6.mlp.experts.9.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.9.up_proj, device: CPU
+                            model.layers.6.mlp.experts.9.down_proj, device: CPU
+                            model.layers.6.mlp.experts.9.act, device: CPU
+                        Module: model.layers.6.mlp.experts.10, device: CPU
+                            model.layers.6.mlp.experts.10.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.10.up_proj, device: CPU
+                            model.layers.6.mlp.experts.10.down_proj, device: CPU
+                            model.layers.6.mlp.experts.10.act, device: CPU
+                        Module: model.layers.6.mlp.experts.11, device: CPU
+                            model.layers.6.mlp.experts.11.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.11.up_proj, device: CPU
+                            model.layers.6.mlp.experts.11.down_proj, device: CPU
+                            model.layers.6.mlp.experts.11.act, device: CPU
+                        Module: model.layers.6.mlp.experts.12, device: CPU
+                            model.layers.6.mlp.experts.12.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.12.up_proj, device: CPU
+                            model.layers.6.mlp.experts.12.down_proj, device: CPU
+                            model.layers.6.mlp.experts.12.act, device: CPU
+                        Module: model.layers.6.mlp.experts.13, device: CPU
+                            model.layers.6.mlp.experts.13.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.13.up_proj, device: CPU
+                            model.layers.6.mlp.experts.13.down_proj, device: CPU
+                            model.layers.6.mlp.experts.13.act, device: CPU
+                        Module: model.layers.6.mlp.experts.14, device: CPU
+                            model.layers.6.mlp.experts.14.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.14.up_proj, device: CPU
+                            model.layers.6.mlp.experts.14.down_proj, device: CPU
+                            model.layers.6.mlp.experts.14.act, device: CPU
+                        Module: model.layers.6.mlp.experts.15, device: CPU
+                            model.layers.6.mlp.experts.15.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.15.up_proj, device: CPU
+                            model.layers.6.mlp.experts.15.down_proj, device: CPU
+                            model.layers.6.mlp.experts.15.act, device: CPU
+                        Module: model.layers.6.mlp.experts.16, device: CPU
+                            model.layers.6.mlp.experts.16.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.16.up_proj, device: CPU
+                            model.layers.6.mlp.experts.16.down_proj, device: CPU
+                            model.layers.6.mlp.experts.16.act, device: CPU
+                        Module: model.layers.6.mlp.experts.17, device: CPU
+                            model.layers.6.mlp.experts.17.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.17.up_proj, device: CPU
+                            model.layers.6.mlp.experts.17.down_proj, device: CPU
+                            model.layers.6.mlp.experts.17.act, device: CPU
+                        Module: model.layers.6.mlp.experts.18, device: CPU
+                            model.layers.6.mlp.experts.18.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.18.up_proj, device: CPU
+                            model.layers.6.mlp.experts.18.down_proj, device: CPU
+                            model.layers.6.mlp.experts.18.act, device: CPU
+                        Module: model.layers.6.mlp.experts.19, device: CPU
+                            model.layers.6.mlp.experts.19.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.19.up_proj, device: CPU
+                            model.layers.6.mlp.experts.19.down_proj, device: CPU
+                            model.layers.6.mlp.experts.19.act, device: CPU
+                        Module: model.layers.6.mlp.experts.20, device: CPU
+                            model.layers.6.mlp.experts.20.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.20.up_proj, device: CPU
+                            model.layers.6.mlp.experts.20.down_proj, device: CPU
+                            model.layers.6.mlp.experts.20.act, device: CPU
+                        Module: model.layers.6.mlp.experts.21, device: CPU
+                            model.layers.6.mlp.experts.21.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.21.up_proj, device: CPU
+                            model.layers.6.mlp.experts.21.down_proj, device: CPU
+                            model.layers.6.mlp.experts.21.act, device: CPU
+                        Module: model.layers.6.mlp.experts.22, device: CPU
+                            model.layers.6.mlp.experts.22.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.22.up_proj, device: CPU
+                            model.layers.6.mlp.experts.22.down_proj, device: CPU
+                            model.layers.6.mlp.experts.22.act, device: CPU
+                        Module: model.layers.6.mlp.experts.23, device: CPU
+                            model.layers.6.mlp.experts.23.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.23.up_proj, device: CPU
+                            model.layers.6.mlp.experts.23.down_proj, device: CPU
+                            model.layers.6.mlp.experts.23.act, device: CPU
+                        Module: model.layers.6.mlp.experts.24, device: CPU
+                            model.layers.6.mlp.experts.24.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.24.up_proj, device: CPU
+                            model.layers.6.mlp.experts.24.down_proj, device: CPU
+                            model.layers.6.mlp.experts.24.act, device: CPU
+                        Module: model.layers.6.mlp.experts.25, device: CPU
+                            model.layers.6.mlp.experts.25.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.25.up_proj, device: CPU
+                            model.layers.6.mlp.experts.25.down_proj, device: CPU
+                            model.layers.6.mlp.experts.25.act, device: CPU
+                        Module: model.layers.6.mlp.experts.26, device: CPU
+                            model.layers.6.mlp.experts.26.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.26.up_proj, device: CPU
+                            model.layers.6.mlp.experts.26.down_proj, device: CPU
+                            model.layers.6.mlp.experts.26.act, device: CPU
+                        Module: model.layers.6.mlp.experts.27, device: CPU
+                            model.layers.6.mlp.experts.27.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.27.up_proj, device: CPU
+                            model.layers.6.mlp.experts.27.down_proj, device: CPU
+                            model.layers.6.mlp.experts.27.act, device: CPU
+                        Module: model.layers.6.mlp.experts.28, device: CPU
+                            model.layers.6.mlp.experts.28.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.28.up_proj, device: CPU
+                            model.layers.6.mlp.experts.28.down_proj, device: CPU
+                            model.layers.6.mlp.experts.28.act, device: CPU
+                        Module: model.layers.6.mlp.experts.29, device: CPU
+                            model.layers.6.mlp.experts.29.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.29.up_proj, device: CPU
+                            model.layers.6.mlp.experts.29.down_proj, device: CPU
+                            model.layers.6.mlp.experts.29.act, device: CPU
+                        Module: model.layers.6.mlp.experts.30, device: CPU
+                            model.layers.6.mlp.experts.30.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.30.up_proj, device: CPU
+                            model.layers.6.mlp.experts.30.down_proj, device: CPU
+                            model.layers.6.mlp.experts.30.act, device: CPU
+                        Module: model.layers.6.mlp.experts.31, device: CPU
+                            model.layers.6.mlp.experts.31.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.31.up_proj, device: CPU
+                            model.layers.6.mlp.experts.31.down_proj, device: CPU
+                            model.layers.6.mlp.experts.31.act, device: CPU
+                        Module: model.layers.6.mlp.experts.32, device: CPU
+                            model.layers.6.mlp.experts.32.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.32.up_proj, device: CPU
+                            model.layers.6.mlp.experts.32.down_proj, device: CPU
+                            model.layers.6.mlp.experts.32.act, device: CPU
+                        Module: model.layers.6.mlp.experts.33, device: CPU
+                            model.layers.6.mlp.experts.33.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.33.up_proj, device: CPU
+                            model.layers.6.mlp.experts.33.down_proj, device: CPU
+                            model.layers.6.mlp.experts.33.act, device: CPU
+                        Module: model.layers.6.mlp.experts.34, device: CPU
+                            model.layers.6.mlp.experts.34.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.34.up_proj, device: CPU
+                            model.layers.6.mlp.experts.34.down_proj, device: CPU
+                            model.layers.6.mlp.experts.34.act, device: CPU
+                        Module: model.layers.6.mlp.experts.35, device: CPU
+                            model.layers.6.mlp.experts.35.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.35.up_proj, device: CPU
+                            model.layers.6.mlp.experts.35.down_proj, device: CPU
+                            model.layers.6.mlp.experts.35.act, device: CPU
+                        Module: model.layers.6.mlp.experts.36, device: CPU
+                            model.layers.6.mlp.experts.36.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.36.up_proj, device: CPU
+                            model.layers.6.mlp.experts.36.down_proj, device: CPU
+                            model.layers.6.mlp.experts.36.act, device: CPU
+                        Module: model.layers.6.mlp.experts.37, device: CPU
+                            model.layers.6.mlp.experts.37.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.37.up_proj, device: CPU
+                            model.layers.6.mlp.experts.37.down_proj, device: CPU
+                            model.layers.6.mlp.experts.37.act, device: CPU
+                        Module: model.layers.6.mlp.experts.38, device: CPU
+                            model.layers.6.mlp.experts.38.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.38.up_proj, device: CPU
+                            model.layers.6.mlp.experts.38.down_proj, device: CPU
+                            model.layers.6.mlp.experts.38.act, device: CPU
+                        Module: model.layers.6.mlp.experts.39, device: CPU
+                            model.layers.6.mlp.experts.39.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.39.up_proj, device: CPU
+                            model.layers.6.mlp.experts.39.down_proj, device: CPU
+                            model.layers.6.mlp.experts.39.act, device: CPU
+                        Module: model.layers.6.mlp.experts.40, device: CPU
+                            model.layers.6.mlp.experts.40.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.40.up_proj, device: CPU
+                            model.layers.6.mlp.experts.40.down_proj, device: CPU
+                            model.layers.6.mlp.experts.40.act, device: CPU
+                        Module: model.layers.6.mlp.experts.41, device: CPU
+                            model.layers.6.mlp.experts.41.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.41.up_proj, device: CPU
+                            model.layers.6.mlp.experts.41.down_proj, device: CPU
+                            model.layers.6.mlp.experts.41.act, device: CPU
+                        Module: model.layers.6.mlp.experts.42, device: CPU
+                            model.layers.6.mlp.experts.42.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.42.up_proj, device: CPU
+                            model.layers.6.mlp.experts.42.down_proj, device: CPU
+                            model.layers.6.mlp.experts.42.act, device: CPU
+                        Module: model.layers.6.mlp.experts.43, device: CPU
+                            model.layers.6.mlp.experts.43.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.43.up_proj, device: CPU
+                            model.layers.6.mlp.experts.43.down_proj, device: CPU
+                            model.layers.6.mlp.experts.43.act, device: CPU
+                        Module: model.layers.6.mlp.experts.44, device: CPU
+                            model.layers.6.mlp.experts.44.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.44.up_proj, device: CPU
+                            model.layers.6.mlp.experts.44.down_proj, device: CPU
+                            model.layers.6.mlp.experts.44.act, device: CPU
+                        Module: model.layers.6.mlp.experts.45, device: CPU
+                            model.layers.6.mlp.experts.45.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.45.up_proj, device: CPU
+                            model.layers.6.mlp.experts.45.down_proj, device: CPU
+                            model.layers.6.mlp.experts.45.act, device: CPU
+                        Module: model.layers.6.mlp.experts.46, device: CPU
+                            model.layers.6.mlp.experts.46.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.46.up_proj, device: CPU
+                            model.layers.6.mlp.experts.46.down_proj, device: CPU
+                            model.layers.6.mlp.experts.46.act, device: CPU
+                        Module: model.layers.6.mlp.experts.47, device: CPU
+                            model.layers.6.mlp.experts.47.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.47.up_proj, device: CPU
+                            model.layers.6.mlp.experts.47.down_proj, device: CPU
+                            model.layers.6.mlp.experts.47.act, device: CPU
+                        Module: model.layers.6.mlp.experts.48, device: CPU
+                            model.layers.6.mlp.experts.48.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.48.up_proj, device: CPU
+                            model.layers.6.mlp.experts.48.down_proj, device: CPU
+                            model.layers.6.mlp.experts.48.act, device: CPU
+                        Module: model.layers.6.mlp.experts.49, device: CPU
+                            model.layers.6.mlp.experts.49.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.49.up_proj, device: CPU
+                            model.layers.6.mlp.experts.49.down_proj, device: CPU
+                            model.layers.6.mlp.experts.49.act, device: CPU
+                        Module: model.layers.6.mlp.experts.50, device: CPU
+                            model.layers.6.mlp.experts.50.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.50.up_proj, device: CPU
+                            model.layers.6.mlp.experts.50.down_proj, device: CPU
+                            model.layers.6.mlp.experts.50.act, device: CPU
+                        Module: model.layers.6.mlp.experts.51, device: CPU
+                            model.layers.6.mlp.experts.51.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.51.up_proj, device: CPU
+                            model.layers.6.mlp.experts.51.down_proj, device: CPU
+                            model.layers.6.mlp.experts.51.act, device: CPU
+                        Module: model.layers.6.mlp.experts.52, device: CPU
+                            model.layers.6.mlp.experts.52.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.52.up_proj, device: CPU
+                            model.layers.6.mlp.experts.52.down_proj, device: CPU
+                            model.layers.6.mlp.experts.52.act, device: CPU
+                        Module: model.layers.6.mlp.experts.53, device: CPU
+                            model.layers.6.mlp.experts.53.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.53.up_proj, device: CPU
+                            model.layers.6.mlp.experts.53.down_proj, device: CPU
+                            model.layers.6.mlp.experts.53.act, device: CPU
+                        Module: model.layers.6.mlp.experts.54, device: CPU
+                            model.layers.6.mlp.experts.54.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.54.up_proj, device: CPU
+                            model.layers.6.mlp.experts.54.down_proj, device: CPU
+                            model.layers.6.mlp.experts.54.act, device: CPU
+                        Module: model.layers.6.mlp.experts.55, device: CPU
+                            model.layers.6.mlp.experts.55.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.55.up_proj, device: CPU
+                            model.layers.6.mlp.experts.55.down_proj, device: CPU
+                            model.layers.6.mlp.experts.55.act, device: CPU
+                        Module: model.layers.6.mlp.experts.56, device: CPU
+                            model.layers.6.mlp.experts.56.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.56.up_proj, device: CPU
+                            model.layers.6.mlp.experts.56.down_proj, device: CPU
+                            model.layers.6.mlp.experts.56.act, device: CPU
+                        Module: model.layers.6.mlp.experts.57, device: CPU
+                            model.layers.6.mlp.experts.57.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.57.up_proj, device: CPU
+                            model.layers.6.mlp.experts.57.down_proj, device: CPU
+                            model.layers.6.mlp.experts.57.act, device: CPU
+                        Module: model.layers.6.mlp.experts.58, device: CPU
+                            model.layers.6.mlp.experts.58.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.58.up_proj, device: CPU
+                            model.layers.6.mlp.experts.58.down_proj, device: CPU
+                            model.layers.6.mlp.experts.58.act, device: CPU
+                        Module: model.layers.6.mlp.experts.59, device: CPU
+                            model.layers.6.mlp.experts.59.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.59.up_proj, device: CPU
+                            model.layers.6.mlp.experts.59.down_proj, device: CPU
+                            model.layers.6.mlp.experts.59.act, device: CPU
+                        Module: model.layers.6.mlp.experts.60, device: CPU
+                            model.layers.6.mlp.experts.60.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.60.up_proj, device: CPU
+                            model.layers.6.mlp.experts.60.down_proj, device: CPU
+                            model.layers.6.mlp.experts.60.act, device: CPU
+                        Module: model.layers.6.mlp.experts.61, device: CPU
+                            model.layers.6.mlp.experts.61.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.61.up_proj, device: CPU
+                            model.layers.6.mlp.experts.61.down_proj, device: CPU
+                            model.layers.6.mlp.experts.61.act, device: CPU
+                        Module: model.layers.6.mlp.experts.62, device: CPU
+                            model.layers.6.mlp.experts.62.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.62.up_proj, device: CPU
+                            model.layers.6.mlp.experts.62.down_proj, device: CPU
+                            model.layers.6.mlp.experts.62.act, device: CPU
+                        Module: model.layers.6.mlp.experts.63, device: CPU
+                            model.layers.6.mlp.experts.63.gate_proj, device: CPU
+                            model.layers.6.mlp.experts.63.up_proj, device: CPU
+                            model.layers.6.mlp.experts.63.down_proj, device: CPU
+                            model.layers.6.mlp.experts.63.act, device: CPU
+                    Module: model.layers.6.mlp.gate, device: CPU
+                        model.layers.6.mlp.gate.weight, device: CPU
+                    Module: model.layers.6.mlp.shared_experts, device: CPU
+                        model.layers.6.mlp.shared_experts.gate_proj, device: CPU
+                        model.layers.6.mlp.shared_experts.up_proj, device: CPU
+                        model.layers.6.mlp.shared_experts.down_proj, device: CPU
+                        model.layers.6.mlp.shared_experts.act, device: CPU
+                model.layers.6.input_layernorm, device: CPU
+                model.layers.6.post_attention_layernorm, device: CPU
+            Module: model.layers.7, device: CPU
+                Module: model.layers.7.self_attn, device: CPU
+                    model.layers.7.self_attn.q_proj, device: CPU
+                    model.layers.7.self_attn.k_proj, device: CPU
+                    model.layers.7.self_attn.v_proj, device: CPU
+                    model.layers.7.self_attn.o_proj, device: CPU
+                    model.layers.7.self_attn.q_rope, device: CPU
+                    model.layers.7.self_attn.k_rope, device: CPU
+                Module: model.layers.7.mlp, device: CPU
+                    Module: model.layers.7.mlp.experts, device: CPU
+                        Module: model.layers.7.mlp.experts.0, device: CPU
+                            model.layers.7.mlp.experts.0.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.0.up_proj, device: CPU
+                            model.layers.7.mlp.experts.0.down_proj, device: CPU
+                            model.layers.7.mlp.experts.0.act, device: CPU
+                        Module: model.layers.7.mlp.experts.1, device: CPU
+                            model.layers.7.mlp.experts.1.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.1.up_proj, device: CPU
+                            model.layers.7.mlp.experts.1.down_proj, device: CPU
+                            model.layers.7.mlp.experts.1.act, device: CPU
+                        Module: model.layers.7.mlp.experts.2, device: CPU
+                            model.layers.7.mlp.experts.2.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.2.up_proj, device: CPU
+                            model.layers.7.mlp.experts.2.down_proj, device: CPU
+                            model.layers.7.mlp.experts.2.act, device: CPU
+                        Module: model.layers.7.mlp.experts.3, device: CPU
+                            model.layers.7.mlp.experts.3.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.3.up_proj, device: CPU
+                            model.layers.7.mlp.experts.3.down_proj, device: CPU
+                            model.layers.7.mlp.experts.3.act, device: CPU
+                        Module: model.layers.7.mlp.experts.4, device: CPU
+                            model.layers.7.mlp.experts.4.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.4.up_proj, device: CPU
+                            model.layers.7.mlp.experts.4.down_proj, device: CPU
+                            model.layers.7.mlp.experts.4.act, device: CPU
+                        Module: model.layers.7.mlp.experts.5, device: CPU
+                            model.layers.7.mlp.experts.5.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.5.up_proj, device: CPU
+                            model.layers.7.mlp.experts.5.down_proj, device: CPU
+                            model.layers.7.mlp.experts.5.act, device: CPU
+                        Module: model.layers.7.mlp.experts.6, device: CPU
+                            model.layers.7.mlp.experts.6.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.6.up_proj, device: CPU
+                            model.layers.7.mlp.experts.6.down_proj, device: CPU
+                            model.layers.7.mlp.experts.6.act, device: CPU
+                        Module: model.layers.7.mlp.experts.7, device: CPU
+                            model.layers.7.mlp.experts.7.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.7.up_proj, device: CPU
+                            model.layers.7.mlp.experts.7.down_proj, device: CPU
+                            model.layers.7.mlp.experts.7.act, device: CPU
+                        Module: model.layers.7.mlp.experts.8, device: CPU
+                            model.layers.7.mlp.experts.8.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.8.up_proj, device: CPU
+                            model.layers.7.mlp.experts.8.down_proj, device: CPU
+                            model.layers.7.mlp.experts.8.act, device: CPU
+                        Module: model.layers.7.mlp.experts.9, device: CPU
+                            model.layers.7.mlp.experts.9.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.9.up_proj, device: CPU
+                            model.layers.7.mlp.experts.9.down_proj, device: CPU
+                            model.layers.7.mlp.experts.9.act, device: CPU
+                        Module: model.layers.7.mlp.experts.10, device: CPU
+                            model.layers.7.mlp.experts.10.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.10.up_proj, device: CPU
+                            model.layers.7.mlp.experts.10.down_proj, device: CPU
+                            model.layers.7.mlp.experts.10.act, device: CPU
+                        Module: model.layers.7.mlp.experts.11, device: CPU
+                            model.layers.7.mlp.experts.11.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.11.up_proj, device: CPU
+                            model.layers.7.mlp.experts.11.down_proj, device: CPU
+                            model.layers.7.mlp.experts.11.act, device: CPU
+                        Module: model.layers.7.mlp.experts.12, device: CPU
+                            model.layers.7.mlp.experts.12.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.12.up_proj, device: CPU
+                            model.layers.7.mlp.experts.12.down_proj, device: CPU
+                            model.layers.7.mlp.experts.12.act, device: CPU
+                        Module: model.layers.7.mlp.experts.13, device: CPU
+                            model.layers.7.mlp.experts.13.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.13.up_proj, device: CPU
+                            model.layers.7.mlp.experts.13.down_proj, device: CPU
+                            model.layers.7.mlp.experts.13.act, device: CPU
+                        Module: model.layers.7.mlp.experts.14, device: CPU
+                            model.layers.7.mlp.experts.14.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.14.up_proj, device: CPU
+                            model.layers.7.mlp.experts.14.down_proj, device: CPU
+                            model.layers.7.mlp.experts.14.act, device: CPU
+                        Module: model.layers.7.mlp.experts.15, device: CPU
+                            model.layers.7.mlp.experts.15.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.15.up_proj, device: CPU
+                            model.layers.7.mlp.experts.15.down_proj, device: CPU
+                            model.layers.7.mlp.experts.15.act, device: CPU
+                        Module: model.layers.7.mlp.experts.16, device: CPU
+                            model.layers.7.mlp.experts.16.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.16.up_proj, device: CPU
+                            model.layers.7.mlp.experts.16.down_proj, device: CPU
+                            model.layers.7.mlp.experts.16.act, device: CPU
+                        Module: model.layers.7.mlp.experts.17, device: CPU
+                            model.layers.7.mlp.experts.17.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.17.up_proj, device: CPU
+                            model.layers.7.mlp.experts.17.down_proj, device: CPU
+                            model.layers.7.mlp.experts.17.act, device: CPU
+                        Module: model.layers.7.mlp.experts.18, device: CPU
+                            model.layers.7.mlp.experts.18.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.18.up_proj, device: CPU
+                            model.layers.7.mlp.experts.18.down_proj, device: CPU
+                            model.layers.7.mlp.experts.18.act, device: CPU
+                        Module: model.layers.7.mlp.experts.19, device: CPU
+                            model.layers.7.mlp.experts.19.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.19.up_proj, device: CPU
+                            model.layers.7.mlp.experts.19.down_proj, device: CPU
+                            model.layers.7.mlp.experts.19.act, device: CPU
+                        Module: model.layers.7.mlp.experts.20, device: CPU
+                            model.layers.7.mlp.experts.20.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.20.up_proj, device: CPU
+                            model.layers.7.mlp.experts.20.down_proj, device: CPU
+                            model.layers.7.mlp.experts.20.act, device: CPU
+                        Module: model.layers.7.mlp.experts.21, device: CPU
+                            model.layers.7.mlp.experts.21.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.21.up_proj, device: CPU
+                            model.layers.7.mlp.experts.21.down_proj, device: CPU
+                            model.layers.7.mlp.experts.21.act, device: CPU
+                        Module: model.layers.7.mlp.experts.22, device: CPU
+                            model.layers.7.mlp.experts.22.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.22.up_proj, device: CPU
+                            model.layers.7.mlp.experts.22.down_proj, device: CPU
+                            model.layers.7.mlp.experts.22.act, device: CPU
+                        Module: model.layers.7.mlp.experts.23, device: CPU
+                            model.layers.7.mlp.experts.23.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.23.up_proj, device: CPU
+                            model.layers.7.mlp.experts.23.down_proj, device: CPU
+                            model.layers.7.mlp.experts.23.act, device: CPU
+                        Module: model.layers.7.mlp.experts.24, device: CPU
+                            model.layers.7.mlp.experts.24.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.24.up_proj, device: CPU
+                            model.layers.7.mlp.experts.24.down_proj, device: CPU
+                            model.layers.7.mlp.experts.24.act, device: CPU
+                        Module: model.layers.7.mlp.experts.25, device: CPU
+                            model.layers.7.mlp.experts.25.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.25.up_proj, device: CPU
+                            model.layers.7.mlp.experts.25.down_proj, device: CPU
+                            model.layers.7.mlp.experts.25.act, device: CPU
+                        Module: model.layers.7.mlp.experts.26, device: CPU
+                            model.layers.7.mlp.experts.26.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.26.up_proj, device: CPU
+                            model.layers.7.mlp.experts.26.down_proj, device: CPU
+                            model.layers.7.mlp.experts.26.act, device: CPU
+                        Module: model.layers.7.mlp.experts.27, device: CPU
+                            model.layers.7.mlp.experts.27.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.27.up_proj, device: CPU
+                            model.layers.7.mlp.experts.27.down_proj, device: CPU
+                            model.layers.7.mlp.experts.27.act, device: CPU
+                        Module: model.layers.7.mlp.experts.28, device: CPU
+                            model.layers.7.mlp.experts.28.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.28.up_proj, device: CPU
+                            model.layers.7.mlp.experts.28.down_proj, device: CPU
+                            model.layers.7.mlp.experts.28.act, device: CPU
+                        Module: model.layers.7.mlp.experts.29, device: CPU
+                            model.layers.7.mlp.experts.29.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.29.up_proj, device: CPU
+                            model.layers.7.mlp.experts.29.down_proj, device: CPU
+                            model.layers.7.mlp.experts.29.act, device: CPU
+                        Module: model.layers.7.mlp.experts.30, device: CPU
+                            model.layers.7.mlp.experts.30.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.30.up_proj, device: CPU
+                            model.layers.7.mlp.experts.30.down_proj, device: CPU
+                            model.layers.7.mlp.experts.30.act, device: CPU
+                        Module: model.layers.7.mlp.experts.31, device: CPU
+                            model.layers.7.mlp.experts.31.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.31.up_proj, device: CPU
+                            model.layers.7.mlp.experts.31.down_proj, device: CPU
+                            model.layers.7.mlp.experts.31.act, device: CPU
+                        Module: model.layers.7.mlp.experts.32, device: CPU
+                            model.layers.7.mlp.experts.32.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.32.up_proj, device: CPU
+                            model.layers.7.mlp.experts.32.down_proj, device: CPU
+                            model.layers.7.mlp.experts.32.act, device: CPU
+                        Module: model.layers.7.mlp.experts.33, device: CPU
+                            model.layers.7.mlp.experts.33.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.33.up_proj, device: CPU
+                            model.layers.7.mlp.experts.33.down_proj, device: CPU
+                            model.layers.7.mlp.experts.33.act, device: CPU
+                        Module: model.layers.7.mlp.experts.34, device: CPU
+                            model.layers.7.mlp.experts.34.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.34.up_proj, device: CPU
+                            model.layers.7.mlp.experts.34.down_proj, device: CPU
+                            model.layers.7.mlp.experts.34.act, device: CPU
+                        Module: model.layers.7.mlp.experts.35, device: CPU
+                            model.layers.7.mlp.experts.35.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.35.up_proj, device: CPU
+                            model.layers.7.mlp.experts.35.down_proj, device: CPU
+                            model.layers.7.mlp.experts.35.act, device: CPU
+                        Module: model.layers.7.mlp.experts.36, device: CPU
+                            model.layers.7.mlp.experts.36.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.36.up_proj, device: CPU
+                            model.layers.7.mlp.experts.36.down_proj, device: CPU
+                            model.layers.7.mlp.experts.36.act, device: CPU
+                        Module: model.layers.7.mlp.experts.37, device: CPU
+                            model.layers.7.mlp.experts.37.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.37.up_proj, device: CPU
+                            model.layers.7.mlp.experts.37.down_proj, device: CPU
+                            model.layers.7.mlp.experts.37.act, device: CPU
+                        Module: model.layers.7.mlp.experts.38, device: CPU
+                            model.layers.7.mlp.experts.38.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.38.up_proj, device: CPU
+                            model.layers.7.mlp.experts.38.down_proj, device: CPU
+                            model.layers.7.mlp.experts.38.act, device: CPU
+                        Module: model.layers.7.mlp.experts.39, device: CPU
+                            model.layers.7.mlp.experts.39.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.39.up_proj, device: CPU
+                            model.layers.7.mlp.experts.39.down_proj, device: CPU
+                            model.layers.7.mlp.experts.39.act, device: CPU
+                        Module: model.layers.7.mlp.experts.40, device: CPU
+                            model.layers.7.mlp.experts.40.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.40.up_proj, device: CPU
+                            model.layers.7.mlp.experts.40.down_proj, device: CPU
+                            model.layers.7.mlp.experts.40.act, device: CPU
+                        Module: model.layers.7.mlp.experts.41, device: CPU
+                            model.layers.7.mlp.experts.41.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.41.up_proj, device: CPU
+                            model.layers.7.mlp.experts.41.down_proj, device: CPU
+                            model.layers.7.mlp.experts.41.act, device: CPU
+                        Module: model.layers.7.mlp.experts.42, device: CPU
+                            model.layers.7.mlp.experts.42.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.42.up_proj, device: CPU
+                            model.layers.7.mlp.experts.42.down_proj, device: CPU
+                            model.layers.7.mlp.experts.42.act, device: CPU
+                        Module: model.layers.7.mlp.experts.43, device: CPU
+                            model.layers.7.mlp.experts.43.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.43.up_proj, device: CPU
+                            model.layers.7.mlp.experts.43.down_proj, device: CPU
+                            model.layers.7.mlp.experts.43.act, device: CPU
+                        Module: model.layers.7.mlp.experts.44, device: CPU
+                            model.layers.7.mlp.experts.44.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.44.up_proj, device: CPU
+                            model.layers.7.mlp.experts.44.down_proj, device: CPU
+                            model.layers.7.mlp.experts.44.act, device: CPU
+                        Module: model.layers.7.mlp.experts.45, device: CPU
+                            model.layers.7.mlp.experts.45.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.45.up_proj, device: CPU
+                            model.layers.7.mlp.experts.45.down_proj, device: CPU
+                            model.layers.7.mlp.experts.45.act, device: CPU
+                        Module: model.layers.7.mlp.experts.46, device: CPU
+                            model.layers.7.mlp.experts.46.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.46.up_proj, device: CPU
+                            model.layers.7.mlp.experts.46.down_proj, device: CPU
+                            model.layers.7.mlp.experts.46.act, device: CPU
+                        Module: model.layers.7.mlp.experts.47, device: CPU
+                            model.layers.7.mlp.experts.47.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.47.up_proj, device: CPU
+                            model.layers.7.mlp.experts.47.down_proj, device: CPU
+                            model.layers.7.mlp.experts.47.act, device: CPU
+                        Module: model.layers.7.mlp.experts.48, device: CPU
+                            model.layers.7.mlp.experts.48.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.48.up_proj, device: CPU
+                            model.layers.7.mlp.experts.48.down_proj, device: CPU
+                            model.layers.7.mlp.experts.48.act, device: CPU
+                        Module: model.layers.7.mlp.experts.49, device: CPU
+                            model.layers.7.mlp.experts.49.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.49.up_proj, device: CPU
+                            model.layers.7.mlp.experts.49.down_proj, device: CPU
+                            model.layers.7.mlp.experts.49.act, device: CPU
+                        Module: model.layers.7.mlp.experts.50, device: CPU
+                            model.layers.7.mlp.experts.50.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.50.up_proj, device: CPU
+                            model.layers.7.mlp.experts.50.down_proj, device: CPU
+                            model.layers.7.mlp.experts.50.act, device: CPU
+                        Module: model.layers.7.mlp.experts.51, device: CPU
+                            model.layers.7.mlp.experts.51.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.51.up_proj, device: CPU
+                            model.layers.7.mlp.experts.51.down_proj, device: CPU
+                            model.layers.7.mlp.experts.51.act, device: CPU
+                        Module: model.layers.7.mlp.experts.52, device: CPU
+                            model.layers.7.mlp.experts.52.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.52.up_proj, device: CPU
+                            model.layers.7.mlp.experts.52.down_proj, device: CPU
+                            model.layers.7.mlp.experts.52.act, device: CPU
+                        Module: model.layers.7.mlp.experts.53, device: CPU
+                            model.layers.7.mlp.experts.53.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.53.up_proj, device: CPU
+                            model.layers.7.mlp.experts.53.down_proj, device: CPU
+                            model.layers.7.mlp.experts.53.act, device: CPU
+                        Module: model.layers.7.mlp.experts.54, device: CPU
+                            model.layers.7.mlp.experts.54.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.54.up_proj, device: CPU
+                            model.layers.7.mlp.experts.54.down_proj, device: CPU
+                            model.layers.7.mlp.experts.54.act, device: CPU
+                        Module: model.layers.7.mlp.experts.55, device: CPU
+                            model.layers.7.mlp.experts.55.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.55.up_proj, device: CPU
+                            model.layers.7.mlp.experts.55.down_proj, device: CPU
+                            model.layers.7.mlp.experts.55.act, device: CPU
+                        Module: model.layers.7.mlp.experts.56, device: CPU
+                            model.layers.7.mlp.experts.56.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.56.up_proj, device: CPU
+                            model.layers.7.mlp.experts.56.down_proj, device: CPU
+                            model.layers.7.mlp.experts.56.act, device: CPU
+                        Module: model.layers.7.mlp.experts.57, device: CPU
+                            model.layers.7.mlp.experts.57.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.57.up_proj, device: CPU
+                            model.layers.7.mlp.experts.57.down_proj, device: CPU
+                            model.layers.7.mlp.experts.57.act, device: CPU
+                        Module: model.layers.7.mlp.experts.58, device: CPU
+                            model.layers.7.mlp.experts.58.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.58.up_proj, device: CPU
+                            model.layers.7.mlp.experts.58.down_proj, device: CPU
+                            model.layers.7.mlp.experts.58.act, device: CPU
+                        Module: model.layers.7.mlp.experts.59, device: CPU
+                            model.layers.7.mlp.experts.59.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.59.up_proj, device: CPU
+                            model.layers.7.mlp.experts.59.down_proj, device: CPU
+                            model.layers.7.mlp.experts.59.act, device: CPU
+                        Module: model.layers.7.mlp.experts.60, device: CPU
+                            model.layers.7.mlp.experts.60.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.60.up_proj, device: CPU
+                            model.layers.7.mlp.experts.60.down_proj, device: CPU
+                            model.layers.7.mlp.experts.60.act, device: CPU
+                        Module: model.layers.7.mlp.experts.61, device: CPU
+                            model.layers.7.mlp.experts.61.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.61.up_proj, device: CPU
+                            model.layers.7.mlp.experts.61.down_proj, device: CPU
+                            model.layers.7.mlp.experts.61.act, device: CPU
+                        Module: model.layers.7.mlp.experts.62, device: CPU
+                            model.layers.7.mlp.experts.62.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.62.up_proj, device: CPU
+                            model.layers.7.mlp.experts.62.down_proj, device: CPU
+                            model.layers.7.mlp.experts.62.act, device: CPU
+                        Module: model.layers.7.mlp.experts.63, device: CPU
+                            model.layers.7.mlp.experts.63.gate_proj, device: CPU
+                            model.layers.7.mlp.experts.63.up_proj, device: CPU
+                            model.layers.7.mlp.experts.63.down_proj, device: CPU
+                            model.layers.7.mlp.experts.63.act, device: CPU
+                    Module: model.layers.7.mlp.gate, device: CPU
+                        model.layers.7.mlp.gate.weight, device: CPU
+                    Module: model.layers.7.mlp.shared_experts, device: CPU
+                        model.layers.7.mlp.shared_experts.gate_proj, device: CPU
+                        model.layers.7.mlp.shared_experts.up_proj, device: CPU
+                        model.layers.7.mlp.shared_experts.down_proj, device: CPU
+                        model.layers.7.mlp.shared_experts.act, device: CPU
+                model.layers.7.input_layernorm, device: CPU
+                model.layers.7.post_attention_layernorm, device: CPU
+            Module: model.layers.8, device: CPU
+                Module: model.layers.8.self_attn, device: CPU
+                    model.layers.8.self_attn.q_proj, device: CPU
+                    model.layers.8.self_attn.k_proj, device: CPU
+                    model.layers.8.self_attn.v_proj, device: CPU
+                    model.layers.8.self_attn.o_proj, device: CPU
+                    model.layers.8.self_attn.q_rope, device: CPU
+                    model.layers.8.self_attn.k_rope, device: CPU
+                Module: model.layers.8.mlp, device: CPU
+                    Module: model.layers.8.mlp.experts, device: CPU
+                        Module: model.layers.8.mlp.experts.0, device: CPU
+                            model.layers.8.mlp.experts.0.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.0.up_proj, device: CPU
+                            model.layers.8.mlp.experts.0.down_proj, device: CPU
+                            model.layers.8.mlp.experts.0.act, device: CPU
+                        Module: model.layers.8.mlp.experts.1, device: CPU
+                            model.layers.8.mlp.experts.1.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.1.up_proj, device: CPU
+                            model.layers.8.mlp.experts.1.down_proj, device: CPU
+                            model.layers.8.mlp.experts.1.act, device: CPU
+                        Module: model.layers.8.mlp.experts.2, device: CPU
+                            model.layers.8.mlp.experts.2.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.2.up_proj, device: CPU
+                            model.layers.8.mlp.experts.2.down_proj, device: CPU
+                            model.layers.8.mlp.experts.2.act, device: CPU
+                        Module: model.layers.8.mlp.experts.3, device: CPU
+                            model.layers.8.mlp.experts.3.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.3.up_proj, device: CPU
+                            model.layers.8.mlp.experts.3.down_proj, device: CPU
+                            model.layers.8.mlp.experts.3.act, device: CPU
+                        Module: model.layers.8.mlp.experts.4, device: CPU
+                            model.layers.8.mlp.experts.4.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.4.up_proj, device: CPU
+                            model.layers.8.mlp.experts.4.down_proj, device: CPU
+                            model.layers.8.mlp.experts.4.act, device: CPU
+                        Module: model.layers.8.mlp.experts.5, device: CPU
+                            model.layers.8.mlp.experts.5.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.5.up_proj, device: CPU
+                            model.layers.8.mlp.experts.5.down_proj, device: CPU
+                            model.layers.8.mlp.experts.5.act, device: CPU
+                        Module: model.layers.8.mlp.experts.6, device: CPU
+                            model.layers.8.mlp.experts.6.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.6.up_proj, device: CPU
+                            model.layers.8.mlp.experts.6.down_proj, device: CPU
+                            model.layers.8.mlp.experts.6.act, device: CPU
+                        Module: model.layers.8.mlp.experts.7, device: CPU
+                            model.layers.8.mlp.experts.7.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.7.up_proj, device: CPU
+                            model.layers.8.mlp.experts.7.down_proj, device: CPU
+                            model.layers.8.mlp.experts.7.act, device: CPU
+                        Module: model.layers.8.mlp.experts.8, device: CPU
+                            model.layers.8.mlp.experts.8.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.8.up_proj, device: CPU
+                            model.layers.8.mlp.experts.8.down_proj, device: CPU
+                            model.layers.8.mlp.experts.8.act, device: CPU
+                        Module: model.layers.8.mlp.experts.9, device: CPU
+                            model.layers.8.mlp.experts.9.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.9.up_proj, device: CPU
+                            model.layers.8.mlp.experts.9.down_proj, device: CPU
+                            model.layers.8.mlp.experts.9.act, device: CPU
+                        Module: model.layers.8.mlp.experts.10, device: CPU
+                            model.layers.8.mlp.experts.10.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.10.up_proj, device: CPU
+                            model.layers.8.mlp.experts.10.down_proj, device: CPU
+                            model.layers.8.mlp.experts.10.act, device: CPU
+                        Module: model.layers.8.mlp.experts.11, device: CPU
+                            model.layers.8.mlp.experts.11.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.11.up_proj, device: CPU
+                            model.layers.8.mlp.experts.11.down_proj, device: CPU
+                            model.layers.8.mlp.experts.11.act, device: CPU
+                        Module: model.layers.8.mlp.experts.12, device: CPU
+                            model.layers.8.mlp.experts.12.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.12.up_proj, device: CPU
+                            model.layers.8.mlp.experts.12.down_proj, device: CPU
+                            model.layers.8.mlp.experts.12.act, device: CPU
+                        Module: model.layers.8.mlp.experts.13, device: CPU
+                            model.layers.8.mlp.experts.13.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.13.up_proj, device: CPU
+                            model.layers.8.mlp.experts.13.down_proj, device: CPU
+                            model.layers.8.mlp.experts.13.act, device: CPU
+                        Module: model.layers.8.mlp.experts.14, device: CPU
+                            model.layers.8.mlp.experts.14.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.14.up_proj, device: CPU
+                            model.layers.8.mlp.experts.14.down_proj, device: CPU
+                            model.layers.8.mlp.experts.14.act, device: CPU
+                        Module: model.layers.8.mlp.experts.15, device: CPU
+                            model.layers.8.mlp.experts.15.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.15.up_proj, device: CPU
+                            model.layers.8.mlp.experts.15.down_proj, device: CPU
+                            model.layers.8.mlp.experts.15.act, device: CPU
+                        Module: model.layers.8.mlp.experts.16, device: CPU
+                            model.layers.8.mlp.experts.16.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.16.up_proj, device: CPU
+                            model.layers.8.mlp.experts.16.down_proj, device: CPU
+                            model.layers.8.mlp.experts.16.act, device: CPU
+                        Module: model.layers.8.mlp.experts.17, device: CPU
+                            model.layers.8.mlp.experts.17.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.17.up_proj, device: CPU
+                            model.layers.8.mlp.experts.17.down_proj, device: CPU
+                            model.layers.8.mlp.experts.17.act, device: CPU
+                        Module: model.layers.8.mlp.experts.18, device: CPU
+                            model.layers.8.mlp.experts.18.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.18.up_proj, device: CPU
+                            model.layers.8.mlp.experts.18.down_proj, device: CPU
+                            model.layers.8.mlp.experts.18.act, device: CPU
+                        Module: model.layers.8.mlp.experts.19, device: CPU
+                            model.layers.8.mlp.experts.19.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.19.up_proj, device: CPU
+                            model.layers.8.mlp.experts.19.down_proj, device: CPU
+                            model.layers.8.mlp.experts.19.act, device: CPU
+                        Module: model.layers.8.mlp.experts.20, device: CPU
+                            model.layers.8.mlp.experts.20.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.20.up_proj, device: CPU
+                            model.layers.8.mlp.experts.20.down_proj, device: CPU
+                            model.layers.8.mlp.experts.20.act, device: CPU
+                        Module: model.layers.8.mlp.experts.21, device: CPU
+                            model.layers.8.mlp.experts.21.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.21.up_proj, device: CPU
+                            model.layers.8.mlp.experts.21.down_proj, device: CPU
+                            model.layers.8.mlp.experts.21.act, device: CPU
+                        Module: model.layers.8.mlp.experts.22, device: CPU
+                            model.layers.8.mlp.experts.22.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.22.up_proj, device: CPU
+                            model.layers.8.mlp.experts.22.down_proj, device: CPU
+                            model.layers.8.mlp.experts.22.act, device: CPU
+                        Module: model.layers.8.mlp.experts.23, device: CPU
+                            model.layers.8.mlp.experts.23.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.23.up_proj, device: CPU
+                            model.layers.8.mlp.experts.23.down_proj, device: CPU
+                            model.layers.8.mlp.experts.23.act, device: CPU
+                        Module: model.layers.8.mlp.experts.24, device: CPU
+                            model.layers.8.mlp.experts.24.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.24.up_proj, device: CPU
+                            model.layers.8.mlp.experts.24.down_proj, device: CPU
+                            model.layers.8.mlp.experts.24.act, device: CPU
+                        Module: model.layers.8.mlp.experts.25, device: CPU
+                            model.layers.8.mlp.experts.25.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.25.up_proj, device: CPU
+                            model.layers.8.mlp.experts.25.down_proj, device: CPU
+                            model.layers.8.mlp.experts.25.act, device: CPU
+                        Module: model.layers.8.mlp.experts.26, device: CPU
+                            model.layers.8.mlp.experts.26.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.26.up_proj, device: CPU
+                            model.layers.8.mlp.experts.26.down_proj, device: CPU
+                            model.layers.8.mlp.experts.26.act, device: CPU
+                        Module: model.layers.8.mlp.experts.27, device: CPU
+                            model.layers.8.mlp.experts.27.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.27.up_proj, device: CPU
+                            model.layers.8.mlp.experts.27.down_proj, device: CPU
+                            model.layers.8.mlp.experts.27.act, device: CPU
+                        Module: model.layers.8.mlp.experts.28, device: CPU
+                            model.layers.8.mlp.experts.28.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.28.up_proj, device: CPU
+                            model.layers.8.mlp.experts.28.down_proj, device: CPU
+                            model.layers.8.mlp.experts.28.act, device: CPU
+                        Module: model.layers.8.mlp.experts.29, device: CPU
+                            model.layers.8.mlp.experts.29.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.29.up_proj, device: CPU
+                            model.layers.8.mlp.experts.29.down_proj, device: CPU
+                            model.layers.8.mlp.experts.29.act, device: CPU
+                        Module: model.layers.8.mlp.experts.30, device: CPU
+                            model.layers.8.mlp.experts.30.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.30.up_proj, device: CPU
+                            model.layers.8.mlp.experts.30.down_proj, device: CPU
+                            model.layers.8.mlp.experts.30.act, device: CPU
+                        Module: model.layers.8.mlp.experts.31, device: CPU
+                            model.layers.8.mlp.experts.31.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.31.up_proj, device: CPU
+                            model.layers.8.mlp.experts.31.down_proj, device: CPU
+                            model.layers.8.mlp.experts.31.act, device: CPU
+                        Module: model.layers.8.mlp.experts.32, device: CPU
+                            model.layers.8.mlp.experts.32.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.32.up_proj, device: CPU
+                            model.layers.8.mlp.experts.32.down_proj, device: CPU
+                            model.layers.8.mlp.experts.32.act, device: CPU
+                        Module: model.layers.8.mlp.experts.33, device: CPU
+                            model.layers.8.mlp.experts.33.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.33.up_proj, device: CPU
+                            model.layers.8.mlp.experts.33.down_proj, device: CPU
+                            model.layers.8.mlp.experts.33.act, device: CPU
+                        Module: model.layers.8.mlp.experts.34, device: CPU
+                            model.layers.8.mlp.experts.34.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.34.up_proj, device: CPU
+                            model.layers.8.mlp.experts.34.down_proj, device: CPU
+                            model.layers.8.mlp.experts.34.act, device: CPU
+                        Module: model.layers.8.mlp.experts.35, device: CPU
+                            model.layers.8.mlp.experts.35.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.35.up_proj, device: CPU
+                            model.layers.8.mlp.experts.35.down_proj, device: CPU
+                            model.layers.8.mlp.experts.35.act, device: CPU
+                        Module: model.layers.8.mlp.experts.36, device: CPU
+                            model.layers.8.mlp.experts.36.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.36.up_proj, device: CPU
+                            model.layers.8.mlp.experts.36.down_proj, device: CPU
+                            model.layers.8.mlp.experts.36.act, device: CPU
+                        Module: model.layers.8.mlp.experts.37, device: CPU
+                            model.layers.8.mlp.experts.37.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.37.up_proj, device: CPU
+                            model.layers.8.mlp.experts.37.down_proj, device: CPU
+                            model.layers.8.mlp.experts.37.act, device: CPU
+                        Module: model.layers.8.mlp.experts.38, device: CPU
+                            model.layers.8.mlp.experts.38.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.38.up_proj, device: CPU
+                            model.layers.8.mlp.experts.38.down_proj, device: CPU
+                            model.layers.8.mlp.experts.38.act, device: CPU
+                        Module: model.layers.8.mlp.experts.39, device: CPU
+                            model.layers.8.mlp.experts.39.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.39.up_proj, device: CPU
+                            model.layers.8.mlp.experts.39.down_proj, device: CPU
+                            model.layers.8.mlp.experts.39.act, device: CPU
+                        Module: model.layers.8.mlp.experts.40, device: CPU
+                            model.layers.8.mlp.experts.40.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.40.up_proj, device: CPU
+                            model.layers.8.mlp.experts.40.down_proj, device: CPU
+                            model.layers.8.mlp.experts.40.act, device: CPU
+                        Module: model.layers.8.mlp.experts.41, device: CPU
+                            model.layers.8.mlp.experts.41.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.41.up_proj, device: CPU
+                            model.layers.8.mlp.experts.41.down_proj, device: CPU
+                            model.layers.8.mlp.experts.41.act, device: CPU
+                        Module: model.layers.8.mlp.experts.42, device: CPU
+                            model.layers.8.mlp.experts.42.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.42.up_proj, device: CPU
+                            model.layers.8.mlp.experts.42.down_proj, device: CPU
+                            model.layers.8.mlp.experts.42.act, device: CPU
+                        Module: model.layers.8.mlp.experts.43, device: CPU
+                            model.layers.8.mlp.experts.43.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.43.up_proj, device: CPU
+                            model.layers.8.mlp.experts.43.down_proj, device: CPU
+                            model.layers.8.mlp.experts.43.act, device: CPU
+                        Module: model.layers.8.mlp.experts.44, device: CPU
+                            model.layers.8.mlp.experts.44.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.44.up_proj, device: CPU
+                            model.layers.8.mlp.experts.44.down_proj, device: CPU
+                            model.layers.8.mlp.experts.44.act, device: CPU
+                        Module: model.layers.8.mlp.experts.45, device: CPU
+                            model.layers.8.mlp.experts.45.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.45.up_proj, device: CPU
+                            model.layers.8.mlp.experts.45.down_proj, device: CPU
+                            model.layers.8.mlp.experts.45.act, device: CPU
+                        Module: model.layers.8.mlp.experts.46, device: CPU
+                            model.layers.8.mlp.experts.46.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.46.up_proj, device: CPU
+                            model.layers.8.mlp.experts.46.down_proj, device: CPU
+                            model.layers.8.mlp.experts.46.act, device: CPU
+                        Module: model.layers.8.mlp.experts.47, device: CPU
+                            model.layers.8.mlp.experts.47.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.47.up_proj, device: CPU
+                            model.layers.8.mlp.experts.47.down_proj, device: CPU
+                            model.layers.8.mlp.experts.47.act, device: CPU
+                        Module: model.layers.8.mlp.experts.48, device: CPU
+                            model.layers.8.mlp.experts.48.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.48.up_proj, device: CPU
+                            model.layers.8.mlp.experts.48.down_proj, device: CPU
+                            model.layers.8.mlp.experts.48.act, device: CPU
+                        Module: model.layers.8.mlp.experts.49, device: CPU
+                            model.layers.8.mlp.experts.49.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.49.up_proj, device: CPU
+                            model.layers.8.mlp.experts.49.down_proj, device: CPU
+                            model.layers.8.mlp.experts.49.act, device: CPU
+                        Module: model.layers.8.mlp.experts.50, device: CPU
+                            model.layers.8.mlp.experts.50.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.50.up_proj, device: CPU
+                            model.layers.8.mlp.experts.50.down_proj, device: CPU
+                            model.layers.8.mlp.experts.50.act, device: CPU
+                        Module: model.layers.8.mlp.experts.51, device: CPU
+                            model.layers.8.mlp.experts.51.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.51.up_proj, device: CPU
+                            model.layers.8.mlp.experts.51.down_proj, device: CPU
+                            model.layers.8.mlp.experts.51.act, device: CPU
+                        Module: model.layers.8.mlp.experts.52, device: CPU
+                            model.layers.8.mlp.experts.52.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.52.up_proj, device: CPU
+                            model.layers.8.mlp.experts.52.down_proj, device: CPU
+                            model.layers.8.mlp.experts.52.act, device: CPU
+                        Module: model.layers.8.mlp.experts.53, device: CPU
+                            model.layers.8.mlp.experts.53.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.53.up_proj, device: CPU
+                            model.layers.8.mlp.experts.53.down_proj, device: CPU
+                            model.layers.8.mlp.experts.53.act, device: CPU
+                        Module: model.layers.8.mlp.experts.54, device: CPU
+                            model.layers.8.mlp.experts.54.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.54.up_proj, device: CPU
+                            model.layers.8.mlp.experts.54.down_proj, device: CPU
+                            model.layers.8.mlp.experts.54.act, device: CPU
+                        Module: model.layers.8.mlp.experts.55, device: CPU
+                            model.layers.8.mlp.experts.55.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.55.up_proj, device: CPU
+                            model.layers.8.mlp.experts.55.down_proj, device: CPU
+                            model.layers.8.mlp.experts.55.act, device: CPU
+                        Module: model.layers.8.mlp.experts.56, device: CPU
+                            model.layers.8.mlp.experts.56.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.56.up_proj, device: CPU
+                            model.layers.8.mlp.experts.56.down_proj, device: CPU
+                            model.layers.8.mlp.experts.56.act, device: CPU
+                        Module: model.layers.8.mlp.experts.57, device: CPU
+                            model.layers.8.mlp.experts.57.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.57.up_proj, device: CPU
+                            model.layers.8.mlp.experts.57.down_proj, device: CPU
+                            model.layers.8.mlp.experts.57.act, device: CPU
+                        Module: model.layers.8.mlp.experts.58, device: CPU
+                            model.layers.8.mlp.experts.58.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.58.up_proj, device: CPU
+                            model.layers.8.mlp.experts.58.down_proj, device: CPU
+                            model.layers.8.mlp.experts.58.act, device: CPU
+                        Module: model.layers.8.mlp.experts.59, device: CPU
+                            model.layers.8.mlp.experts.59.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.59.up_proj, device: CPU
+                            model.layers.8.mlp.experts.59.down_proj, device: CPU
+                            model.layers.8.mlp.experts.59.act, device: CPU
+                        Module: model.layers.8.mlp.experts.60, device: CPU
+                            model.layers.8.mlp.experts.60.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.60.up_proj, device: CPU
+                            model.layers.8.mlp.experts.60.down_proj, device: CPU
+                            model.layers.8.mlp.experts.60.act, device: CPU
+                        Module: model.layers.8.mlp.experts.61, device: CPU
+                            model.layers.8.mlp.experts.61.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.61.up_proj, device: CPU
+                            model.layers.8.mlp.experts.61.down_proj, device: CPU
+                            model.layers.8.mlp.experts.61.act, device: CPU
+                        Module: model.layers.8.mlp.experts.62, device: CPU
+                            model.layers.8.mlp.experts.62.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.62.up_proj, device: CPU
+                            model.layers.8.mlp.experts.62.down_proj, device: CPU
+                            model.layers.8.mlp.experts.62.act, device: CPU
+                        Module: model.layers.8.mlp.experts.63, device: CPU
+                            model.layers.8.mlp.experts.63.gate_proj, device: CPU
+                            model.layers.8.mlp.experts.63.up_proj, device: CPU
+                            model.layers.8.mlp.experts.63.down_proj, device: CPU
+                            model.layers.8.mlp.experts.63.act, device: CPU
+                    Module: model.layers.8.mlp.gate, device: CPU
+                        model.layers.8.mlp.gate.weight, device: CPU
+                    Module: model.layers.8.mlp.shared_experts, device: CPU
+                        model.layers.8.mlp.shared_experts.gate_proj, device: CPU
+                        model.layers.8.mlp.shared_experts.up_proj, device: CPU
+                        model.layers.8.mlp.shared_experts.down_proj, device: CPU
+                        model.layers.8.mlp.shared_experts.act, device: CPU
+                model.layers.8.input_layernorm, device: CPU
+                model.layers.8.post_attention_layernorm, device: CPU
+            Module: model.layers.9, device: CPU
+                Module: model.layers.9.self_attn, device: CPU
+                    model.layers.9.self_attn.q_proj, device: CPU
+                    model.layers.9.self_attn.k_proj, device: CPU
+                    model.layers.9.self_attn.v_proj, device: CPU
+                    model.layers.9.self_attn.o_proj, device: CPU
+                    model.layers.9.self_attn.q_rope, device: CPU
+                    model.layers.9.self_attn.k_rope, device: CPU
+                Module: model.layers.9.mlp, device: CPU
+                    Module: model.layers.9.mlp.experts, device: CPU
+                        Module: model.layers.9.mlp.experts.0, device: CPU
+                            model.layers.9.mlp.experts.0.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.0.up_proj, device: CPU
+                            model.layers.9.mlp.experts.0.down_proj, device: CPU
+                            model.layers.9.mlp.experts.0.act, device: CPU
+                        Module: model.layers.9.mlp.experts.1, device: CPU
+                            model.layers.9.mlp.experts.1.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.1.up_proj, device: CPU
+                            model.layers.9.mlp.experts.1.down_proj, device: CPU
+                            model.layers.9.mlp.experts.1.act, device: CPU
+                        Module: model.layers.9.mlp.experts.2, device: CPU
+                            model.layers.9.mlp.experts.2.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.2.up_proj, device: CPU
+                            model.layers.9.mlp.experts.2.down_proj, device: CPU
+                            model.layers.9.mlp.experts.2.act, device: CPU
+                        Module: model.layers.9.mlp.experts.3, device: CPU
+                            model.layers.9.mlp.experts.3.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.3.up_proj, device: CPU
+                            model.layers.9.mlp.experts.3.down_proj, device: CPU
+                            model.layers.9.mlp.experts.3.act, device: CPU
+                        Module: model.layers.9.mlp.experts.4, device: CPU
+                            model.layers.9.mlp.experts.4.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.4.up_proj, device: CPU
+                            model.layers.9.mlp.experts.4.down_proj, device: CPU
+                            model.layers.9.mlp.experts.4.act, device: CPU
+                        Module: model.layers.9.mlp.experts.5, device: CPU
+                            model.layers.9.mlp.experts.5.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.5.up_proj, device: CPU
+                            model.layers.9.mlp.experts.5.down_proj, device: CPU
+                            model.layers.9.mlp.experts.5.act, device: CPU
+                        Module: model.layers.9.mlp.experts.6, device: CPU
+                            model.layers.9.mlp.experts.6.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.6.up_proj, device: CPU
+                            model.layers.9.mlp.experts.6.down_proj, device: CPU
+                            model.layers.9.mlp.experts.6.act, device: CPU
+                        Module: model.layers.9.mlp.experts.7, device: CPU
+                            model.layers.9.mlp.experts.7.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.7.up_proj, device: CPU
+                            model.layers.9.mlp.experts.7.down_proj, device: CPU
+                            model.layers.9.mlp.experts.7.act, device: CPU
+                        Module: model.layers.9.mlp.experts.8, device: CPU
+                            model.layers.9.mlp.experts.8.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.8.up_proj, device: CPU
+                            model.layers.9.mlp.experts.8.down_proj, device: CPU
+                            model.layers.9.mlp.experts.8.act, device: CPU
+                        Module: model.layers.9.mlp.experts.9, device: CPU
+                            model.layers.9.mlp.experts.9.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.9.up_proj, device: CPU
+                            model.layers.9.mlp.experts.9.down_proj, device: CPU
+                            model.layers.9.mlp.experts.9.act, device: CPU
+                        Module: model.layers.9.mlp.experts.10, device: CPU
+                            model.layers.9.mlp.experts.10.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.10.up_proj, device: CPU
+                            model.layers.9.mlp.experts.10.down_proj, device: CPU
+                            model.layers.9.mlp.experts.10.act, device: CPU
+                        Module: model.layers.9.mlp.experts.11, device: CPU
+                            model.layers.9.mlp.experts.11.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.11.up_proj, device: CPU
+                            model.layers.9.mlp.experts.11.down_proj, device: CPU
+                            model.layers.9.mlp.experts.11.act, device: CPU
+                        Module: model.layers.9.mlp.experts.12, device: CPU
+                            model.layers.9.mlp.experts.12.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.12.up_proj, device: CPU
+                            model.layers.9.mlp.experts.12.down_proj, device: CPU
+                            model.layers.9.mlp.experts.12.act, device: CPU
+                        Module: model.layers.9.mlp.experts.13, device: CPU
+                            model.layers.9.mlp.experts.13.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.13.up_proj, device: CPU
+                            model.layers.9.mlp.experts.13.down_proj, device: CPU
+                            model.layers.9.mlp.experts.13.act, device: CPU
+                        Module: model.layers.9.mlp.experts.14, device: CPU
+                            model.layers.9.mlp.experts.14.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.14.up_proj, device: CPU
+                            model.layers.9.mlp.experts.14.down_proj, device: CPU
+                            model.layers.9.mlp.experts.14.act, device: CPU
+                        Module: model.layers.9.mlp.experts.15, device: CPU
+                            model.layers.9.mlp.experts.15.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.15.up_proj, device: CPU
+                            model.layers.9.mlp.experts.15.down_proj, device: CPU
+                            model.layers.9.mlp.experts.15.act, device: CPU
+                        Module: model.layers.9.mlp.experts.16, device: CPU
+                            model.layers.9.mlp.experts.16.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.16.up_proj, device: CPU
+                            model.layers.9.mlp.experts.16.down_proj, device: CPU
+                            model.layers.9.mlp.experts.16.act, device: CPU
+                        Module: model.layers.9.mlp.experts.17, device: CPU
+                            model.layers.9.mlp.experts.17.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.17.up_proj, device: CPU
+                            model.layers.9.mlp.experts.17.down_proj, device: CPU
+                            model.layers.9.mlp.experts.17.act, device: CPU
+                        Module: model.layers.9.mlp.experts.18, device: CPU
+                            model.layers.9.mlp.experts.18.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.18.up_proj, device: CPU
+                            model.layers.9.mlp.experts.18.down_proj, device: CPU
+                            model.layers.9.mlp.experts.18.act, device: CPU
+                        Module: model.layers.9.mlp.experts.19, device: CPU
+                            model.layers.9.mlp.experts.19.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.19.up_proj, device: CPU
+                            model.layers.9.mlp.experts.19.down_proj, device: CPU
+                            model.layers.9.mlp.experts.19.act, device: CPU
+                        Module: model.layers.9.mlp.experts.20, device: CPU
+                            model.layers.9.mlp.experts.20.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.20.up_proj, device: CPU
+                            model.layers.9.mlp.experts.20.down_proj, device: CPU
+                            model.layers.9.mlp.experts.20.act, device: CPU
+                        Module: model.layers.9.mlp.experts.21, device: CPU
+                            model.layers.9.mlp.experts.21.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.21.up_proj, device: CPU
+                            model.layers.9.mlp.experts.21.down_proj, device: CPU
+                            model.layers.9.mlp.experts.21.act, device: CPU
+                        Module: model.layers.9.mlp.experts.22, device: CPU
+                            model.layers.9.mlp.experts.22.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.22.up_proj, device: CPU
+                            model.layers.9.mlp.experts.22.down_proj, device: CPU
+                            model.layers.9.mlp.experts.22.act, device: CPU
+                        Module: model.layers.9.mlp.experts.23, device: CPU
+                            model.layers.9.mlp.experts.23.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.23.up_proj, device: CPU
+                            model.layers.9.mlp.experts.23.down_proj, device: CPU
+                            model.layers.9.mlp.experts.23.act, device: CPU
+                        Module: model.layers.9.mlp.experts.24, device: CPU
+                            model.layers.9.mlp.experts.24.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.24.up_proj, device: CPU
+                            model.layers.9.mlp.experts.24.down_proj, device: CPU
+                            model.layers.9.mlp.experts.24.act, device: CPU
+                        Module: model.layers.9.mlp.experts.25, device: CPU
+                            model.layers.9.mlp.experts.25.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.25.up_proj, device: CPU
+                            model.layers.9.mlp.experts.25.down_proj, device: CPU
+                            model.layers.9.mlp.experts.25.act, device: CPU
+                        Module: model.layers.9.mlp.experts.26, device: CPU
+                            model.layers.9.mlp.experts.26.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.26.up_proj, device: CPU
+                            model.layers.9.mlp.experts.26.down_proj, device: CPU
+                            model.layers.9.mlp.experts.26.act, device: CPU
+                        Module: model.layers.9.mlp.experts.27, device: CPU
+                            model.layers.9.mlp.experts.27.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.27.up_proj, device: CPU
+                            model.layers.9.mlp.experts.27.down_proj, device: CPU
+                            model.layers.9.mlp.experts.27.act, device: CPU
+                        Module: model.layers.9.mlp.experts.28, device: CPU
+                            model.layers.9.mlp.experts.28.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.28.up_proj, device: CPU
+                            model.layers.9.mlp.experts.28.down_proj, device: CPU
+                            model.layers.9.mlp.experts.28.act, device: CPU
+                        Module: model.layers.9.mlp.experts.29, device: CPU
+                            model.layers.9.mlp.experts.29.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.29.up_proj, device: CPU
+                            model.layers.9.mlp.experts.29.down_proj, device: CPU
+                            model.layers.9.mlp.experts.29.act, device: CPU
+                        Module: model.layers.9.mlp.experts.30, device: CPU
+                            model.layers.9.mlp.experts.30.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.30.up_proj, device: CPU
+                            model.layers.9.mlp.experts.30.down_proj, device: CPU
+                            model.layers.9.mlp.experts.30.act, device: CPU
+                        Module: model.layers.9.mlp.experts.31, device: CPU
+                            model.layers.9.mlp.experts.31.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.31.up_proj, device: CPU
+                            model.layers.9.mlp.experts.31.down_proj, device: CPU
+                            model.layers.9.mlp.experts.31.act, device: CPU
+                        Module: model.layers.9.mlp.experts.32, device: CPU
+                            model.layers.9.mlp.experts.32.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.32.up_proj, device: CPU
+                            model.layers.9.mlp.experts.32.down_proj, device: CPU
+                            model.layers.9.mlp.experts.32.act, device: CPU
+                        Module: model.layers.9.mlp.experts.33, device: CPU
+                            model.layers.9.mlp.experts.33.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.33.up_proj, device: CPU
+                            model.layers.9.mlp.experts.33.down_proj, device: CPU
+                            model.layers.9.mlp.experts.33.act, device: CPU
+                        Module: model.layers.9.mlp.experts.34, device: CPU
+                            model.layers.9.mlp.experts.34.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.34.up_proj, device: CPU
+                            model.layers.9.mlp.experts.34.down_proj, device: CPU
+                            model.layers.9.mlp.experts.34.act, device: CPU
+                        Module: model.layers.9.mlp.experts.35, device: CPU
+                            model.layers.9.mlp.experts.35.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.35.up_proj, device: CPU
+                            model.layers.9.mlp.experts.35.down_proj, device: CPU
+                            model.layers.9.mlp.experts.35.act, device: CPU
+                        Module: model.layers.9.mlp.experts.36, device: CPU
+                            model.layers.9.mlp.experts.36.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.36.up_proj, device: CPU
+                            model.layers.9.mlp.experts.36.down_proj, device: CPU
+                            model.layers.9.mlp.experts.36.act, device: CPU
+                        Module: model.layers.9.mlp.experts.37, device: CPU
+                            model.layers.9.mlp.experts.37.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.37.up_proj, device: CPU
+                            model.layers.9.mlp.experts.37.down_proj, device: CPU
+                            model.layers.9.mlp.experts.37.act, device: CPU
+                        Module: model.layers.9.mlp.experts.38, device: CPU
+                            model.layers.9.mlp.experts.38.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.38.up_proj, device: CPU
+                            model.layers.9.mlp.experts.38.down_proj, device: CPU
+                            model.layers.9.mlp.experts.38.act, device: CPU
+                        Module: model.layers.9.mlp.experts.39, device: CPU
+                            model.layers.9.mlp.experts.39.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.39.up_proj, device: CPU
+                            model.layers.9.mlp.experts.39.down_proj, device: CPU
+                            model.layers.9.mlp.experts.39.act, device: CPU
+                        Module: model.layers.9.mlp.experts.40, device: CPU
+                            model.layers.9.mlp.experts.40.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.40.up_proj, device: CPU
+                            model.layers.9.mlp.experts.40.down_proj, device: CPU
+                            model.layers.9.mlp.experts.40.act, device: CPU
+                        Module: model.layers.9.mlp.experts.41, device: CPU
+                            model.layers.9.mlp.experts.41.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.41.up_proj, device: CPU
+                            model.layers.9.mlp.experts.41.down_proj, device: CPU
+                            model.layers.9.mlp.experts.41.act, device: CPU
+                        Module: model.layers.9.mlp.experts.42, device: CPU
+                            model.layers.9.mlp.experts.42.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.42.up_proj, device: CPU
+                            model.layers.9.mlp.experts.42.down_proj, device: CPU
+                            model.layers.9.mlp.experts.42.act, device: CPU
+                        Module: model.layers.9.mlp.experts.43, device: CPU
+                            model.layers.9.mlp.experts.43.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.43.up_proj, device: CPU
+                            model.layers.9.mlp.experts.43.down_proj, device: CPU
+                            model.layers.9.mlp.experts.43.act, device: CPU
+                        Module: model.layers.9.mlp.experts.44, device: CPU
+                            model.layers.9.mlp.experts.44.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.44.up_proj, device: CPU
+                            model.layers.9.mlp.experts.44.down_proj, device: CPU
+                            model.layers.9.mlp.experts.44.act, device: CPU
+                        Module: model.layers.9.mlp.experts.45, device: CPU
+                            model.layers.9.mlp.experts.45.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.45.up_proj, device: CPU
+                            model.layers.9.mlp.experts.45.down_proj, device: CPU
+                            model.layers.9.mlp.experts.45.act, device: CPU
+                        Module: model.layers.9.mlp.experts.46, device: CPU
+                            model.layers.9.mlp.experts.46.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.46.up_proj, device: CPU
+                            model.layers.9.mlp.experts.46.down_proj, device: CPU
+                            model.layers.9.mlp.experts.46.act, device: CPU
+                        Module: model.layers.9.mlp.experts.47, device: CPU
+                            model.layers.9.mlp.experts.47.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.47.up_proj, device: CPU
+                            model.layers.9.mlp.experts.47.down_proj, device: CPU
+                            model.layers.9.mlp.experts.47.act, device: CPU
+                        Module: model.layers.9.mlp.experts.48, device: CPU
+                            model.layers.9.mlp.experts.48.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.48.up_proj, device: CPU
+                            model.layers.9.mlp.experts.48.down_proj, device: CPU
+                            model.layers.9.mlp.experts.48.act, device: CPU
+                        Module: model.layers.9.mlp.experts.49, device: CPU
+                            model.layers.9.mlp.experts.49.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.49.up_proj, device: CPU
+                            model.layers.9.mlp.experts.49.down_proj, device: CPU
+                            model.layers.9.mlp.experts.49.act, device: CPU
+                        Module: model.layers.9.mlp.experts.50, device: CPU
+                            model.layers.9.mlp.experts.50.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.50.up_proj, device: CPU
+                            model.layers.9.mlp.experts.50.down_proj, device: CPU
+                            model.layers.9.mlp.experts.50.act, device: CPU
+                        Module: model.layers.9.mlp.experts.51, device: CPU
+                            model.layers.9.mlp.experts.51.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.51.up_proj, device: CPU
+                            model.layers.9.mlp.experts.51.down_proj, device: CPU
+                            model.layers.9.mlp.experts.51.act, device: CPU
+                        Module: model.layers.9.mlp.experts.52, device: CPU
+                            model.layers.9.mlp.experts.52.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.52.up_proj, device: CPU
+                            model.layers.9.mlp.experts.52.down_proj, device: CPU
+                            model.layers.9.mlp.experts.52.act, device: CPU
+                        Module: model.layers.9.mlp.experts.53, device: CPU
+                            model.layers.9.mlp.experts.53.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.53.up_proj, device: CPU
+                            model.layers.9.mlp.experts.53.down_proj, device: CPU
+                            model.layers.9.mlp.experts.53.act, device: CPU
+                        Module: model.layers.9.mlp.experts.54, device: CPU
+                            model.layers.9.mlp.experts.54.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.54.up_proj, device: CPU
+                            model.layers.9.mlp.experts.54.down_proj, device: CPU
+                            model.layers.9.mlp.experts.54.act, device: CPU
+                        Module: model.layers.9.mlp.experts.55, device: CPU
+                            model.layers.9.mlp.experts.55.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.55.up_proj, device: CPU
+                            model.layers.9.mlp.experts.55.down_proj, device: CPU
+                            model.layers.9.mlp.experts.55.act, device: CPU
+                        Module: model.layers.9.mlp.experts.56, device: CPU
+                            model.layers.9.mlp.experts.56.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.56.up_proj, device: CPU
+                            model.layers.9.mlp.experts.56.down_proj, device: CPU
+                            model.layers.9.mlp.experts.56.act, device: CPU
+                        Module: model.layers.9.mlp.experts.57, device: CPU
+                            model.layers.9.mlp.experts.57.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.57.up_proj, device: CPU
+                            model.layers.9.mlp.experts.57.down_proj, device: CPU
+                            model.layers.9.mlp.experts.57.act, device: CPU
+                        Module: model.layers.9.mlp.experts.58, device: CPU
+                            model.layers.9.mlp.experts.58.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.58.up_proj, device: CPU
+                            model.layers.9.mlp.experts.58.down_proj, device: CPU
+                            model.layers.9.mlp.experts.58.act, device: CPU
+                        Module: model.layers.9.mlp.experts.59, device: CPU
+                            model.layers.9.mlp.experts.59.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.59.up_proj, device: CPU
+                            model.layers.9.mlp.experts.59.down_proj, device: CPU
+                            model.layers.9.mlp.experts.59.act, device: CPU
+                        Module: model.layers.9.mlp.experts.60, device: CPU
+                            model.layers.9.mlp.experts.60.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.60.up_proj, device: CPU
+                            model.layers.9.mlp.experts.60.down_proj, device: CPU
+                            model.layers.9.mlp.experts.60.act, device: CPU
+                        Module: model.layers.9.mlp.experts.61, device: CPU
+                            model.layers.9.mlp.experts.61.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.61.up_proj, device: CPU
+                            model.layers.9.mlp.experts.61.down_proj, device: CPU
+                            model.layers.9.mlp.experts.61.act, device: CPU
+                        Module: model.layers.9.mlp.experts.62, device: CPU
+                            model.layers.9.mlp.experts.62.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.62.up_proj, device: CPU
+                            model.layers.9.mlp.experts.62.down_proj, device: CPU
+                            model.layers.9.mlp.experts.62.act, device: CPU
+                        Module: model.layers.9.mlp.experts.63, device: CPU
+                            model.layers.9.mlp.experts.63.gate_proj, device: CPU
+                            model.layers.9.mlp.experts.63.up_proj, device: CPU
+                            model.layers.9.mlp.experts.63.down_proj, device: CPU
+                            model.layers.9.mlp.experts.63.act, device: CPU
+                    Module: model.layers.9.mlp.gate, device: CPU
+                        model.layers.9.mlp.gate.weight, device: CPU
+                    Module: model.layers.9.mlp.shared_experts, device: CPU
+                        model.layers.9.mlp.shared_experts.gate_proj, device: CPU
+                        model.layers.9.mlp.shared_experts.up_proj, device: CPU
+                        model.layers.9.mlp.shared_experts.down_proj, device: CPU
+                        model.layers.9.mlp.shared_experts.act, device: CPU
+                model.layers.9.input_layernorm, device: CPU
+                model.layers.9.post_attention_layernorm, device: CPU
+            Module: model.layers.10, device: CPU
+                Module: model.layers.10.self_attn, device: CPU
+                    model.layers.10.self_attn.q_proj, device: CPU
+                    model.layers.10.self_attn.k_proj, device: CPU
+                    model.layers.10.self_attn.v_proj, device: CPU
+                    model.layers.10.self_attn.o_proj, device: CPU
+                    model.layers.10.self_attn.q_rope, device: CPU
+                    model.layers.10.self_attn.k_rope, device: CPU
+                Module: model.layers.10.mlp, device: CPU
+                    Module: model.layers.10.mlp.experts, device: CPU
+                        Module: model.layers.10.mlp.experts.0, device: CPU
+                            model.layers.10.mlp.experts.0.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.0.up_proj, device: CPU
+                            model.layers.10.mlp.experts.0.down_proj, device: CPU
+                            model.layers.10.mlp.experts.0.act, device: CPU
+                        Module: model.layers.10.mlp.experts.1, device: CPU
+                            model.layers.10.mlp.experts.1.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.1.up_proj, device: CPU
+                            model.layers.10.mlp.experts.1.down_proj, device: CPU
+                            model.layers.10.mlp.experts.1.act, device: CPU
+                        Module: model.layers.10.mlp.experts.2, device: CPU
+                            model.layers.10.mlp.experts.2.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.2.up_proj, device: CPU
+                            model.layers.10.mlp.experts.2.down_proj, device: CPU
+                            model.layers.10.mlp.experts.2.act, device: CPU
+                        Module: model.layers.10.mlp.experts.3, device: CPU
+                            model.layers.10.mlp.experts.3.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.3.up_proj, device: CPU
+                            model.layers.10.mlp.experts.3.down_proj, device: CPU
+                            model.layers.10.mlp.experts.3.act, device: CPU
+                        Module: model.layers.10.mlp.experts.4, device: CPU
+                            model.layers.10.mlp.experts.4.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.4.up_proj, device: CPU
+                            model.layers.10.mlp.experts.4.down_proj, device: CPU
+                            model.layers.10.mlp.experts.4.act, device: CPU
+                        Module: model.layers.10.mlp.experts.5, device: CPU
+                            model.layers.10.mlp.experts.5.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.5.up_proj, device: CPU
+                            model.layers.10.mlp.experts.5.down_proj, device: CPU
+                            model.layers.10.mlp.experts.5.act, device: CPU
+                        Module: model.layers.10.mlp.experts.6, device: CPU
+                            model.layers.10.mlp.experts.6.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.6.up_proj, device: CPU
+                            model.layers.10.mlp.experts.6.down_proj, device: CPU
+                            model.layers.10.mlp.experts.6.act, device: CPU
+                        Module: model.layers.10.mlp.experts.7, device: CPU
+                            model.layers.10.mlp.experts.7.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.7.up_proj, device: CPU
+                            model.layers.10.mlp.experts.7.down_proj, device: CPU
+                            model.layers.10.mlp.experts.7.act, device: CPU
+                        Module: model.layers.10.mlp.experts.8, device: CPU
+                            model.layers.10.mlp.experts.8.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.8.up_proj, device: CPU
+                            model.layers.10.mlp.experts.8.down_proj, device: CPU
+                            model.layers.10.mlp.experts.8.act, device: CPU
+                        Module: model.layers.10.mlp.experts.9, device: CPU
+                            model.layers.10.mlp.experts.9.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.9.up_proj, device: CPU
+                            model.layers.10.mlp.experts.9.down_proj, device: CPU
+                            model.layers.10.mlp.experts.9.act, device: CPU
+                        Module: model.layers.10.mlp.experts.10, device: CPU
+                            model.layers.10.mlp.experts.10.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.10.up_proj, device: CPU
+                            model.layers.10.mlp.experts.10.down_proj, device: CPU
+                            model.layers.10.mlp.experts.10.act, device: CPU
+                        Module: model.layers.10.mlp.experts.11, device: CPU
+                            model.layers.10.mlp.experts.11.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.11.up_proj, device: CPU
+                            model.layers.10.mlp.experts.11.down_proj, device: CPU
+                            model.layers.10.mlp.experts.11.act, device: CPU
+                        Module: model.layers.10.mlp.experts.12, device: CPU
+                            model.layers.10.mlp.experts.12.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.12.up_proj, device: CPU
+                            model.layers.10.mlp.experts.12.down_proj, device: CPU
+                            model.layers.10.mlp.experts.12.act, device: CPU
+                        Module: model.layers.10.mlp.experts.13, device: CPU
+                            model.layers.10.mlp.experts.13.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.13.up_proj, device: CPU
+                            model.layers.10.mlp.experts.13.down_proj, device: CPU
+                            model.layers.10.mlp.experts.13.act, device: CPU
+                        Module: model.layers.10.mlp.experts.14, device: CPU
+                            model.layers.10.mlp.experts.14.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.14.up_proj, device: CPU
+                            model.layers.10.mlp.experts.14.down_proj, device: CPU
+                            model.layers.10.mlp.experts.14.act, device: CPU
+                        Module: model.layers.10.mlp.experts.15, device: CPU
+                            model.layers.10.mlp.experts.15.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.15.up_proj, device: CPU
+                            model.layers.10.mlp.experts.15.down_proj, device: CPU
+                            model.layers.10.mlp.experts.15.act, device: CPU
+                        Module: model.layers.10.mlp.experts.16, device: CPU
+                            model.layers.10.mlp.experts.16.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.16.up_proj, device: CPU
+                            model.layers.10.mlp.experts.16.down_proj, device: CPU
+                            model.layers.10.mlp.experts.16.act, device: CPU
+                        Module: model.layers.10.mlp.experts.17, device: CPU
+                            model.layers.10.mlp.experts.17.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.17.up_proj, device: CPU
+                            model.layers.10.mlp.experts.17.down_proj, device: CPU
+                            model.layers.10.mlp.experts.17.act, device: CPU
+                        Module: model.layers.10.mlp.experts.18, device: CPU
+                            model.layers.10.mlp.experts.18.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.18.up_proj, device: CPU
+                            model.layers.10.mlp.experts.18.down_proj, device: CPU
+                            model.layers.10.mlp.experts.18.act, device: CPU
+                        Module: model.layers.10.mlp.experts.19, device: CPU
+                            model.layers.10.mlp.experts.19.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.19.up_proj, device: CPU
+                            model.layers.10.mlp.experts.19.down_proj, device: CPU
+                            model.layers.10.mlp.experts.19.act, device: CPU
+                        Module: model.layers.10.mlp.experts.20, device: CPU
+                            model.layers.10.mlp.experts.20.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.20.up_proj, device: CPU
+                            model.layers.10.mlp.experts.20.down_proj, device: CPU
+                            model.layers.10.mlp.experts.20.act, device: CPU
+                        Module: model.layers.10.mlp.experts.21, device: CPU
+                            model.layers.10.mlp.experts.21.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.21.up_proj, device: CPU
+                            model.layers.10.mlp.experts.21.down_proj, device: CPU
+                            model.layers.10.mlp.experts.21.act, device: CPU
+                        Module: model.layers.10.mlp.experts.22, device: CPU
+                            model.layers.10.mlp.experts.22.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.22.up_proj, device: CPU
+                            model.layers.10.mlp.experts.22.down_proj, device: CPU
+                            model.layers.10.mlp.experts.22.act, device: CPU
+                        Module: model.layers.10.mlp.experts.23, device: CPU
+                            model.layers.10.mlp.experts.23.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.23.up_proj, device: CPU
+                            model.layers.10.mlp.experts.23.down_proj, device: CPU
+                            model.layers.10.mlp.experts.23.act, device: CPU
+                        Module: model.layers.10.mlp.experts.24, device: CPU
+                            model.layers.10.mlp.experts.24.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.24.up_proj, device: CPU
+                            model.layers.10.mlp.experts.24.down_proj, device: CPU
+                            model.layers.10.mlp.experts.24.act, device: CPU
+                        Module: model.layers.10.mlp.experts.25, device: CPU
+                            model.layers.10.mlp.experts.25.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.25.up_proj, device: CPU
+                            model.layers.10.mlp.experts.25.down_proj, device: CPU
+                            model.layers.10.mlp.experts.25.act, device: CPU
+                        Module: model.layers.10.mlp.experts.26, device: CPU
+                            model.layers.10.mlp.experts.26.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.26.up_proj, device: CPU
+                            model.layers.10.mlp.experts.26.down_proj, device: CPU
+                            model.layers.10.mlp.experts.26.act, device: CPU
+                        Module: model.layers.10.mlp.experts.27, device: CPU
+                            model.layers.10.mlp.experts.27.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.27.up_proj, device: CPU
+                            model.layers.10.mlp.experts.27.down_proj, device: CPU
+                            model.layers.10.mlp.experts.27.act, device: CPU
+                        Module: model.layers.10.mlp.experts.28, device: CPU
+                            model.layers.10.mlp.experts.28.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.28.up_proj, device: CPU
+                            model.layers.10.mlp.experts.28.down_proj, device: CPU
+                            model.layers.10.mlp.experts.28.act, device: CPU
+                        Module: model.layers.10.mlp.experts.29, device: CPU
+                            model.layers.10.mlp.experts.29.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.29.up_proj, device: CPU
+                            model.layers.10.mlp.experts.29.down_proj, device: CPU
+                            model.layers.10.mlp.experts.29.act, device: CPU
+                        Module: model.layers.10.mlp.experts.30, device: CPU
+                            model.layers.10.mlp.experts.30.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.30.up_proj, device: CPU
+                            model.layers.10.mlp.experts.30.down_proj, device: CPU
+                            model.layers.10.mlp.experts.30.act, device: CPU
+                        Module: model.layers.10.mlp.experts.31, device: CPU
+                            model.layers.10.mlp.experts.31.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.31.up_proj, device: CPU
+                            model.layers.10.mlp.experts.31.down_proj, device: CPU
+                            model.layers.10.mlp.experts.31.act, device: CPU
+                        Module: model.layers.10.mlp.experts.32, device: CPU
+                            model.layers.10.mlp.experts.32.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.32.up_proj, device: CPU
+                            model.layers.10.mlp.experts.32.down_proj, device: CPU
+                            model.layers.10.mlp.experts.32.act, device: CPU
+                        Module: model.layers.10.mlp.experts.33, device: CPU
+                            model.layers.10.mlp.experts.33.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.33.up_proj, device: CPU
+                            model.layers.10.mlp.experts.33.down_proj, device: CPU
+                            model.layers.10.mlp.experts.33.act, device: CPU
+                        Module: model.layers.10.mlp.experts.34, device: CPU
+                            model.layers.10.mlp.experts.34.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.34.up_proj, device: CPU
+                            model.layers.10.mlp.experts.34.down_proj, device: CPU
+                            model.layers.10.mlp.experts.34.act, device: CPU
+                        Module: model.layers.10.mlp.experts.35, device: CPU
+                            model.layers.10.mlp.experts.35.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.35.up_proj, device: CPU
+                            model.layers.10.mlp.experts.35.down_proj, device: CPU
+                            model.layers.10.mlp.experts.35.act, device: CPU
+                        Module: model.layers.10.mlp.experts.36, device: CPU
+                            model.layers.10.mlp.experts.36.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.36.up_proj, device: CPU
+                            model.layers.10.mlp.experts.36.down_proj, device: CPU
+                            model.layers.10.mlp.experts.36.act, device: CPU
+                        Module: model.layers.10.mlp.experts.37, device: CPU
+                            model.layers.10.mlp.experts.37.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.37.up_proj, device: CPU
+                            model.layers.10.mlp.experts.37.down_proj, device: CPU
+                            model.layers.10.mlp.experts.37.act, device: CPU
+                        Module: model.layers.10.mlp.experts.38, device: CPU
+                            model.layers.10.mlp.experts.38.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.38.up_proj, device: CPU
+                            model.layers.10.mlp.experts.38.down_proj, device: CPU
+                            model.layers.10.mlp.experts.38.act, device: CPU
+                        Module: model.layers.10.mlp.experts.39, device: CPU
+                            model.layers.10.mlp.experts.39.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.39.up_proj, device: CPU
+                            model.layers.10.mlp.experts.39.down_proj, device: CPU
+                            model.layers.10.mlp.experts.39.act, device: CPU
+                        Module: model.layers.10.mlp.experts.40, device: CPU
+                            model.layers.10.mlp.experts.40.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.40.up_proj, device: CPU
+                            model.layers.10.mlp.experts.40.down_proj, device: CPU
+                            model.layers.10.mlp.experts.40.act, device: CPU
+                        Module: model.layers.10.mlp.experts.41, device: CPU
+                            model.layers.10.mlp.experts.41.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.41.up_proj, device: CPU
+                            model.layers.10.mlp.experts.41.down_proj, device: CPU
+                            model.layers.10.mlp.experts.41.act, device: CPU
+                        Module: model.layers.10.mlp.experts.42, device: CPU
+                            model.layers.10.mlp.experts.42.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.42.up_proj, device: CPU
+                            model.layers.10.mlp.experts.42.down_proj, device: CPU
+                            model.layers.10.mlp.experts.42.act, device: CPU
+                        Module: model.layers.10.mlp.experts.43, device: CPU
+                            model.layers.10.mlp.experts.43.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.43.up_proj, device: CPU
+                            model.layers.10.mlp.experts.43.down_proj, device: CPU
+                            model.layers.10.mlp.experts.43.act, device: CPU
+                        Module: model.layers.10.mlp.experts.44, device: CPU
+                            model.layers.10.mlp.experts.44.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.44.up_proj, device: CPU
+                            model.layers.10.mlp.experts.44.down_proj, device: CPU
+                            model.layers.10.mlp.experts.44.act, device: CPU
+                        Module: model.layers.10.mlp.experts.45, device: CPU
+                            model.layers.10.mlp.experts.45.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.45.up_proj, device: CPU
+                            model.layers.10.mlp.experts.45.down_proj, device: CPU
+                            model.layers.10.mlp.experts.45.act, device: CPU
+                        Module: model.layers.10.mlp.experts.46, device: CPU
+                            model.layers.10.mlp.experts.46.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.46.up_proj, device: CPU
+                            model.layers.10.mlp.experts.46.down_proj, device: CPU
+                            model.layers.10.mlp.experts.46.act, device: CPU
+                        Module: model.layers.10.mlp.experts.47, device: CPU
+                            model.layers.10.mlp.experts.47.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.47.up_proj, device: CPU
+                            model.layers.10.mlp.experts.47.down_proj, device: CPU
+                            model.layers.10.mlp.experts.47.act, device: CPU
+                        Module: model.layers.10.mlp.experts.48, device: CPU
+                            model.layers.10.mlp.experts.48.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.48.up_proj, device: CPU
+                            model.layers.10.mlp.experts.48.down_proj, device: CPU
+                            model.layers.10.mlp.experts.48.act, device: CPU
+                        Module: model.layers.10.mlp.experts.49, device: CPU
+                            model.layers.10.mlp.experts.49.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.49.up_proj, device: CPU
+                            model.layers.10.mlp.experts.49.down_proj, device: CPU
+                            model.layers.10.mlp.experts.49.act, device: CPU
+                        Module: model.layers.10.mlp.experts.50, device: CPU
+                            model.layers.10.mlp.experts.50.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.50.up_proj, device: CPU
+                            model.layers.10.mlp.experts.50.down_proj, device: CPU
+                            model.layers.10.mlp.experts.50.act, device: CPU
+                        Module: model.layers.10.mlp.experts.51, device: CPU
+                            model.layers.10.mlp.experts.51.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.51.up_proj, device: CPU
+                            model.layers.10.mlp.experts.51.down_proj, device: CPU
+                            model.layers.10.mlp.experts.51.act, device: CPU
+                        Module: model.layers.10.mlp.experts.52, device: CPU
+                            model.layers.10.mlp.experts.52.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.52.up_proj, device: CPU
+                            model.layers.10.mlp.experts.52.down_proj, device: CPU
+                            model.layers.10.mlp.experts.52.act, device: CPU
+                        Module: model.layers.10.mlp.experts.53, device: CPU
+                            model.layers.10.mlp.experts.53.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.53.up_proj, device: CPU
+                            model.layers.10.mlp.experts.53.down_proj, device: CPU
+                            model.layers.10.mlp.experts.53.act, device: CPU
+                        Module: model.layers.10.mlp.experts.54, device: CPU
+                            model.layers.10.mlp.experts.54.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.54.up_proj, device: CPU
+                            model.layers.10.mlp.experts.54.down_proj, device: CPU
+                            model.layers.10.mlp.experts.54.act, device: CPU
+                        Module: model.layers.10.mlp.experts.55, device: CPU
+                            model.layers.10.mlp.experts.55.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.55.up_proj, device: CPU
+                            model.layers.10.mlp.experts.55.down_proj, device: CPU
+                            model.layers.10.mlp.experts.55.act, device: CPU
+                        Module: model.layers.10.mlp.experts.56, device: CPU
+                            model.layers.10.mlp.experts.56.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.56.up_proj, device: CPU
+                            model.layers.10.mlp.experts.56.down_proj, device: CPU
+                            model.layers.10.mlp.experts.56.act, device: CPU
+                        Module: model.layers.10.mlp.experts.57, device: CPU
+                            model.layers.10.mlp.experts.57.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.57.up_proj, device: CPU
+                            model.layers.10.mlp.experts.57.down_proj, device: CPU
+                            model.layers.10.mlp.experts.57.act, device: CPU
+                        Module: model.layers.10.mlp.experts.58, device: CPU
+                            model.layers.10.mlp.experts.58.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.58.up_proj, device: CPU
+                            model.layers.10.mlp.experts.58.down_proj, device: CPU
+                            model.layers.10.mlp.experts.58.act, device: CPU
+                        Module: model.layers.10.mlp.experts.59, device: CPU
+                            model.layers.10.mlp.experts.59.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.59.up_proj, device: CPU
+                            model.layers.10.mlp.experts.59.down_proj, device: CPU
+                            model.layers.10.mlp.experts.59.act, device: CPU
+                        Module: model.layers.10.mlp.experts.60, device: CPU
+                            model.layers.10.mlp.experts.60.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.60.up_proj, device: CPU
+                            model.layers.10.mlp.experts.60.down_proj, device: CPU
+                            model.layers.10.mlp.experts.60.act, device: CPU
+                        Module: model.layers.10.mlp.experts.61, device: CPU
+                            model.layers.10.mlp.experts.61.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.61.up_proj, device: CPU
+                            model.layers.10.mlp.experts.61.down_proj, device: CPU
+                            model.layers.10.mlp.experts.61.act, device: CPU
+                        Module: model.layers.10.mlp.experts.62, device: CPU
+                            model.layers.10.mlp.experts.62.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.62.up_proj, device: CPU
+                            model.layers.10.mlp.experts.62.down_proj, device: CPU
+                            model.layers.10.mlp.experts.62.act, device: CPU
+                        Module: model.layers.10.mlp.experts.63, device: CPU
+                            model.layers.10.mlp.experts.63.gate_proj, device: CPU
+                            model.layers.10.mlp.experts.63.up_proj, device: CPU
+                            model.layers.10.mlp.experts.63.down_proj, device: CPU
+                            model.layers.10.mlp.experts.63.act, device: CPU
+                    Module: model.layers.10.mlp.gate, device: CPU
+                        model.layers.10.mlp.gate.weight, device: CPU
+                    Module: model.layers.10.mlp.shared_experts, device: CPU
+                        model.layers.10.mlp.shared_experts.gate_proj, device: CPU
+                        model.layers.10.mlp.shared_experts.up_proj, device: CPU
+                        model.layers.10.mlp.shared_experts.down_proj, device: CPU
+                        model.layers.10.mlp.shared_experts.act, device: CPU
+                model.layers.10.input_layernorm, device: CPU
+                model.layers.10.post_attention_layernorm, device: CPU
+            Module: model.layers.11, device: CPU
+                Module: model.layers.11.self_attn, device: CPU
+                    model.layers.11.self_attn.q_proj, device: CPU
+                    model.layers.11.self_attn.k_proj, device: CPU
+                    model.layers.11.self_attn.v_proj, device: CPU
+                    model.layers.11.self_attn.o_proj, device: CPU
+                    model.layers.11.self_attn.q_rope, device: CPU
+                    model.layers.11.self_attn.k_rope, device: CPU
+                Module: model.layers.11.mlp, device: CPU
+                    Module: model.layers.11.mlp.experts, device: CPU
+                        Module: model.layers.11.mlp.experts.0, device: CPU
+                            model.layers.11.mlp.experts.0.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.0.up_proj, device: CPU
+                            model.layers.11.mlp.experts.0.down_proj, device: CPU
+                            model.layers.11.mlp.experts.0.act, device: CPU
+                        Module: model.layers.11.mlp.experts.1, device: CPU
+                            model.layers.11.mlp.experts.1.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.1.up_proj, device: CPU
+                            model.layers.11.mlp.experts.1.down_proj, device: CPU
+                            model.layers.11.mlp.experts.1.act, device: CPU
+                        Module: model.layers.11.mlp.experts.2, device: CPU
+                            model.layers.11.mlp.experts.2.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.2.up_proj, device: CPU
+                            model.layers.11.mlp.experts.2.down_proj, device: CPU
+                            model.layers.11.mlp.experts.2.act, device: CPU
+                        Module: model.layers.11.mlp.experts.3, device: CPU
+                            model.layers.11.mlp.experts.3.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.3.up_proj, device: CPU
+                            model.layers.11.mlp.experts.3.down_proj, device: CPU
+                            model.layers.11.mlp.experts.3.act, device: CPU
+                        Module: model.layers.11.mlp.experts.4, device: CPU
+                            model.layers.11.mlp.experts.4.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.4.up_proj, device: CPU
+                            model.layers.11.mlp.experts.4.down_proj, device: CPU
+                            model.layers.11.mlp.experts.4.act, device: CPU
+                        Module: model.layers.11.mlp.experts.5, device: CPU
+                            model.layers.11.mlp.experts.5.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.5.up_proj, device: CPU
+                            model.layers.11.mlp.experts.5.down_proj, device: CPU
+                            model.layers.11.mlp.experts.5.act, device: CPU
+                        Module: model.layers.11.mlp.experts.6, device: CPU
+                            model.layers.11.mlp.experts.6.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.6.up_proj, device: CPU
+                            model.layers.11.mlp.experts.6.down_proj, device: CPU
+                            model.layers.11.mlp.experts.6.act, device: CPU
+                        Module: model.layers.11.mlp.experts.7, device: CPU
+                            model.layers.11.mlp.experts.7.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.7.up_proj, device: CPU
+                            model.layers.11.mlp.experts.7.down_proj, device: CPU
+                            model.layers.11.mlp.experts.7.act, device: CPU
+                        Module: model.layers.11.mlp.experts.8, device: CPU
+                            model.layers.11.mlp.experts.8.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.8.up_proj, device: CPU
+                            model.layers.11.mlp.experts.8.down_proj, device: CPU
+                            model.layers.11.mlp.experts.8.act, device: CPU
+                        Module: model.layers.11.mlp.experts.9, device: CPU
+                            model.layers.11.mlp.experts.9.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.9.up_proj, device: CPU
+                            model.layers.11.mlp.experts.9.down_proj, device: CPU
+                            model.layers.11.mlp.experts.9.act, device: CPU
+                        Module: model.layers.11.mlp.experts.10, device: CPU
+                            model.layers.11.mlp.experts.10.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.10.up_proj, device: CPU
+                            model.layers.11.mlp.experts.10.down_proj, device: CPU
+                            model.layers.11.mlp.experts.10.act, device: CPU
+                        Module: model.layers.11.mlp.experts.11, device: CPU
+                            model.layers.11.mlp.experts.11.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.11.up_proj, device: CPU
+                            model.layers.11.mlp.experts.11.down_proj, device: CPU
+                            model.layers.11.mlp.experts.11.act, device: CPU
+                        Module: model.layers.11.mlp.experts.12, device: CPU
+                            model.layers.11.mlp.experts.12.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.12.up_proj, device: CPU
+                            model.layers.11.mlp.experts.12.down_proj, device: CPU
+                            model.layers.11.mlp.experts.12.act, device: CPU
+                        Module: model.layers.11.mlp.experts.13, device: CPU
+                            model.layers.11.mlp.experts.13.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.13.up_proj, device: CPU
+                            model.layers.11.mlp.experts.13.down_proj, device: CPU
+                            model.layers.11.mlp.experts.13.act, device: CPU
+                        Module: model.layers.11.mlp.experts.14, device: CPU
+                            model.layers.11.mlp.experts.14.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.14.up_proj, device: CPU
+                            model.layers.11.mlp.experts.14.down_proj, device: CPU
+                            model.layers.11.mlp.experts.14.act, device: CPU
+                        Module: model.layers.11.mlp.experts.15, device: CPU
+                            model.layers.11.mlp.experts.15.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.15.up_proj, device: CPU
+                            model.layers.11.mlp.experts.15.down_proj, device: CPU
+                            model.layers.11.mlp.experts.15.act, device: CPU
+                        Module: model.layers.11.mlp.experts.16, device: CPU
+                            model.layers.11.mlp.experts.16.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.16.up_proj, device: CPU
+                            model.layers.11.mlp.experts.16.down_proj, device: CPU
+                            model.layers.11.mlp.experts.16.act, device: CPU
+                        Module: model.layers.11.mlp.experts.17, device: CPU
+                            model.layers.11.mlp.experts.17.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.17.up_proj, device: CPU
+                            model.layers.11.mlp.experts.17.down_proj, device: CPU
+                            model.layers.11.mlp.experts.17.act, device: CPU
+                        Module: model.layers.11.mlp.experts.18, device: CPU
+                            model.layers.11.mlp.experts.18.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.18.up_proj, device: CPU
+                            model.layers.11.mlp.experts.18.down_proj, device: CPU
+                            model.layers.11.mlp.experts.18.act, device: CPU
+                        Module: model.layers.11.mlp.experts.19, device: CPU
+                            model.layers.11.mlp.experts.19.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.19.up_proj, device: CPU
+                            model.layers.11.mlp.experts.19.down_proj, device: CPU
+                            model.layers.11.mlp.experts.19.act, device: CPU
+                        Module: model.layers.11.mlp.experts.20, device: CPU
+                            model.layers.11.mlp.experts.20.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.20.up_proj, device: CPU
+                            model.layers.11.mlp.experts.20.down_proj, device: CPU
+                            model.layers.11.mlp.experts.20.act, device: CPU
+                        Module: model.layers.11.mlp.experts.21, device: CPU
+                            model.layers.11.mlp.experts.21.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.21.up_proj, device: CPU
+                            model.layers.11.mlp.experts.21.down_proj, device: CPU
+                            model.layers.11.mlp.experts.21.act, device: CPU
+                        Module: model.layers.11.mlp.experts.22, device: CPU
+                            model.layers.11.mlp.experts.22.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.22.up_proj, device: CPU
+                            model.layers.11.mlp.experts.22.down_proj, device: CPU
+                            model.layers.11.mlp.experts.22.act, device: CPU
+                        Module: model.layers.11.mlp.experts.23, device: CPU
+                            model.layers.11.mlp.experts.23.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.23.up_proj, device: CPU
+                            model.layers.11.mlp.experts.23.down_proj, device: CPU
+                            model.layers.11.mlp.experts.23.act, device: CPU
+                        Module: model.layers.11.mlp.experts.24, device: CPU
+                            model.layers.11.mlp.experts.24.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.24.up_proj, device: CPU
+                            model.layers.11.mlp.experts.24.down_proj, device: CPU
+                            model.layers.11.mlp.experts.24.act, device: CPU
+                        Module: model.layers.11.mlp.experts.25, device: CPU
+                            model.layers.11.mlp.experts.25.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.25.up_proj, device: CPU
+                            model.layers.11.mlp.experts.25.down_proj, device: CPU
+                            model.layers.11.mlp.experts.25.act, device: CPU
+                        Module: model.layers.11.mlp.experts.26, device: CPU
+                            model.layers.11.mlp.experts.26.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.26.up_proj, device: CPU
+                            model.layers.11.mlp.experts.26.down_proj, device: CPU
+                            model.layers.11.mlp.experts.26.act, device: CPU
+                        Module: model.layers.11.mlp.experts.27, device: CPU
+                            model.layers.11.mlp.experts.27.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.27.up_proj, device: CPU
+                            model.layers.11.mlp.experts.27.down_proj, device: CPU
+                            model.layers.11.mlp.experts.27.act, device: CPU
+                        Module: model.layers.11.mlp.experts.28, device: CPU
+                            model.layers.11.mlp.experts.28.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.28.up_proj, device: CPU
+                            model.layers.11.mlp.experts.28.down_proj, device: CPU
+                            model.layers.11.mlp.experts.28.act, device: CPU
+                        Module: model.layers.11.mlp.experts.29, device: CPU
+                            model.layers.11.mlp.experts.29.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.29.up_proj, device: CPU
+                            model.layers.11.mlp.experts.29.down_proj, device: CPU
+                            model.layers.11.mlp.experts.29.act, device: CPU
+                        Module: model.layers.11.mlp.experts.30, device: CPU
+                            model.layers.11.mlp.experts.30.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.30.up_proj, device: CPU
+                            model.layers.11.mlp.experts.30.down_proj, device: CPU
+                            model.layers.11.mlp.experts.30.act, device: CPU
+                        Module: model.layers.11.mlp.experts.31, device: CPU
+                            model.layers.11.mlp.experts.31.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.31.up_proj, device: CPU
+                            model.layers.11.mlp.experts.31.down_proj, device: CPU
+                            model.layers.11.mlp.experts.31.act, device: CPU
+                        Module: model.layers.11.mlp.experts.32, device: CPU
+                            model.layers.11.mlp.experts.32.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.32.up_proj, device: CPU
+                            model.layers.11.mlp.experts.32.down_proj, device: CPU
+                            model.layers.11.mlp.experts.32.act, device: CPU
+                        Module: model.layers.11.mlp.experts.33, device: CPU
+                            model.layers.11.mlp.experts.33.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.33.up_proj, device: CPU
+                            model.layers.11.mlp.experts.33.down_proj, device: CPU
+                            model.layers.11.mlp.experts.33.act, device: CPU
+                        Module: model.layers.11.mlp.experts.34, device: CPU
+                            model.layers.11.mlp.experts.34.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.34.up_proj, device: CPU
+                            model.layers.11.mlp.experts.34.down_proj, device: CPU
+                            model.layers.11.mlp.experts.34.act, device: CPU
+                        Module: model.layers.11.mlp.experts.35, device: CPU
+                            model.layers.11.mlp.experts.35.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.35.up_proj, device: CPU
+                            model.layers.11.mlp.experts.35.down_proj, device: CPU
+                            model.layers.11.mlp.experts.35.act, device: CPU
+                        Module: model.layers.11.mlp.experts.36, device: CPU
+                            model.layers.11.mlp.experts.36.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.36.up_proj, device: CPU
+                            model.layers.11.mlp.experts.36.down_proj, device: CPU
+                            model.layers.11.mlp.experts.36.act, device: CPU
+                        Module: model.layers.11.mlp.experts.37, device: CPU
+                            model.layers.11.mlp.experts.37.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.37.up_proj, device: CPU
+                            model.layers.11.mlp.experts.37.down_proj, device: CPU
+                            model.layers.11.mlp.experts.37.act, device: CPU
+                        Module: model.layers.11.mlp.experts.38, device: CPU
+                            model.layers.11.mlp.experts.38.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.38.up_proj, device: CPU
+                            model.layers.11.mlp.experts.38.down_proj, device: CPU
+                            model.layers.11.mlp.experts.38.act, device: CPU
+                        Module: model.layers.11.mlp.experts.39, device: CPU
+                            model.layers.11.mlp.experts.39.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.39.up_proj, device: CPU
+                            model.layers.11.mlp.experts.39.down_proj, device: CPU
+                            model.layers.11.mlp.experts.39.act, device: CPU
+                        Module: model.layers.11.mlp.experts.40, device: CPU
+                            model.layers.11.mlp.experts.40.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.40.up_proj, device: CPU
+                            model.layers.11.mlp.experts.40.down_proj, device: CPU
+                            model.layers.11.mlp.experts.40.act, device: CPU
+                        Module: model.layers.11.mlp.experts.41, device: CPU
+                            model.layers.11.mlp.experts.41.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.41.up_proj, device: CPU
+                            model.layers.11.mlp.experts.41.down_proj, device: CPU
+                            model.layers.11.mlp.experts.41.act, device: CPU
+                        Module: model.layers.11.mlp.experts.42, device: CPU
+                            model.layers.11.mlp.experts.42.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.42.up_proj, device: CPU
+                            model.layers.11.mlp.experts.42.down_proj, device: CPU
+                            model.layers.11.mlp.experts.42.act, device: CPU
+                        Module: model.layers.11.mlp.experts.43, device: CPU
+                            model.layers.11.mlp.experts.43.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.43.up_proj, device: CPU
+                            model.layers.11.mlp.experts.43.down_proj, device: CPU
+                            model.layers.11.mlp.experts.43.act, device: CPU
+                        Module: model.layers.11.mlp.experts.44, device: CPU
+                            model.layers.11.mlp.experts.44.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.44.up_proj, device: CPU
+                            model.layers.11.mlp.experts.44.down_proj, device: CPU
+                            model.layers.11.mlp.experts.44.act, device: CPU
+                        Module: model.layers.11.mlp.experts.45, device: CPU
+                            model.layers.11.mlp.experts.45.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.45.up_proj, device: CPU
+                            model.layers.11.mlp.experts.45.down_proj, device: CPU
+                            model.layers.11.mlp.experts.45.act, device: CPU
+                        Module: model.layers.11.mlp.experts.46, device: CPU
+                            model.layers.11.mlp.experts.46.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.46.up_proj, device: CPU
+                            model.layers.11.mlp.experts.46.down_proj, device: CPU
+                            model.layers.11.mlp.experts.46.act, device: CPU
+                        Module: model.layers.11.mlp.experts.47, device: CPU
+                            model.layers.11.mlp.experts.47.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.47.up_proj, device: CPU
+                            model.layers.11.mlp.experts.47.down_proj, device: CPU
+                            model.layers.11.mlp.experts.47.act, device: CPU
+                        Module: model.layers.11.mlp.experts.48, device: CPU
+                            model.layers.11.mlp.experts.48.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.48.up_proj, device: CPU
+                            model.layers.11.mlp.experts.48.down_proj, device: CPU
+                            model.layers.11.mlp.experts.48.act, device: CPU
+                        Module: model.layers.11.mlp.experts.49, device: CPU
+                            model.layers.11.mlp.experts.49.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.49.up_proj, device: CPU
+                            model.layers.11.mlp.experts.49.down_proj, device: CPU
+                            model.layers.11.mlp.experts.49.act, device: CPU
+                        Module: model.layers.11.mlp.experts.50, device: CPU
+                            model.layers.11.mlp.experts.50.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.50.up_proj, device: CPU
+                            model.layers.11.mlp.experts.50.down_proj, device: CPU
+                            model.layers.11.mlp.experts.50.act, device: CPU
+                        Module: model.layers.11.mlp.experts.51, device: CPU
+                            model.layers.11.mlp.experts.51.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.51.up_proj, device: CPU
+                            model.layers.11.mlp.experts.51.down_proj, device: CPU
+                            model.layers.11.mlp.experts.51.act, device: CPU
+                        Module: model.layers.11.mlp.experts.52, device: CPU
+                            model.layers.11.mlp.experts.52.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.52.up_proj, device: CPU
+                            model.layers.11.mlp.experts.52.down_proj, device: CPU
+                            model.layers.11.mlp.experts.52.act, device: CPU
+                        Module: model.layers.11.mlp.experts.53, device: CPU
+                            model.layers.11.mlp.experts.53.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.53.up_proj, device: CPU
+                            model.layers.11.mlp.experts.53.down_proj, device: CPU
+                            model.layers.11.mlp.experts.53.act, device: CPU
+                        Module: model.layers.11.mlp.experts.54, device: CPU
+                            model.layers.11.mlp.experts.54.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.54.up_proj, device: CPU
+                            model.layers.11.mlp.experts.54.down_proj, device: CPU
+                            model.layers.11.mlp.experts.54.act, device: CPU
+                        Module: model.layers.11.mlp.experts.55, device: CPU
+                            model.layers.11.mlp.experts.55.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.55.up_proj, device: CPU
+                            model.layers.11.mlp.experts.55.down_proj, device: CPU
+                            model.layers.11.mlp.experts.55.act, device: CPU
+                        Module: model.layers.11.mlp.experts.56, device: CPU
+                            model.layers.11.mlp.experts.56.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.56.up_proj, device: CPU
+                            model.layers.11.mlp.experts.56.down_proj, device: CPU
+                            model.layers.11.mlp.experts.56.act, device: CPU
+                        Module: model.layers.11.mlp.experts.57, device: CPU
+                            model.layers.11.mlp.experts.57.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.57.up_proj, device: CPU
+                            model.layers.11.mlp.experts.57.down_proj, device: CPU
+                            model.layers.11.mlp.experts.57.act, device: CPU
+                        Module: model.layers.11.mlp.experts.58, device: CPU
+                            model.layers.11.mlp.experts.58.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.58.up_proj, device: CPU
+                            model.layers.11.mlp.experts.58.down_proj, device: CPU
+                            model.layers.11.mlp.experts.58.act, device: CPU
+                        Module: model.layers.11.mlp.experts.59, device: CPU
+                            model.layers.11.mlp.experts.59.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.59.up_proj, device: CPU
+                            model.layers.11.mlp.experts.59.down_proj, device: CPU
+                            model.layers.11.mlp.experts.59.act, device: CPU
+                        Module: model.layers.11.mlp.experts.60, device: CPU
+                            model.layers.11.mlp.experts.60.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.60.up_proj, device: CPU
+                            model.layers.11.mlp.experts.60.down_proj, device: CPU
+                            model.layers.11.mlp.experts.60.act, device: CPU
+                        Module: model.layers.11.mlp.experts.61, device: CPU
+                            model.layers.11.mlp.experts.61.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.61.up_proj, device: CPU
+                            model.layers.11.mlp.experts.61.down_proj, device: CPU
+                            model.layers.11.mlp.experts.61.act, device: CPU
+                        Module: model.layers.11.mlp.experts.62, device: CPU
+                            model.layers.11.mlp.experts.62.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.62.up_proj, device: CPU
+                            model.layers.11.mlp.experts.62.down_proj, device: CPU
+                            model.layers.11.mlp.experts.62.act, device: CPU
+                        Module: model.layers.11.mlp.experts.63, device: CPU
+                            model.layers.11.mlp.experts.63.gate_proj, device: CPU
+                            model.layers.11.mlp.experts.63.up_proj, device: CPU
+                            model.layers.11.mlp.experts.63.down_proj, device: CPU
+                            model.layers.11.mlp.experts.63.act, device: CPU
+                    Module: model.layers.11.mlp.gate, device: CPU
+                        model.layers.11.mlp.gate.weight, device: CPU
+                    Module: model.layers.11.mlp.shared_experts, device: CPU
+                        model.layers.11.mlp.shared_experts.gate_proj, device: CPU
+                        model.layers.11.mlp.shared_experts.up_proj, device: CPU
+                        model.layers.11.mlp.shared_experts.down_proj, device: CPU
+                        model.layers.11.mlp.shared_experts.act, device: CPU
+                model.layers.11.input_layernorm, device: CPU
+                model.layers.11.post_attention_layernorm, device: CPU
+        model.norm, device: CPU
+        Module: model.sam_model, device: CPU
+            Module: model.sam_model.patch_embed, device: CPU
+                model.sam_model.patch_embed.proj, device: CPU
+            model.sam_model.pos_embed, device: CPU
+            Module: model.sam_model.blocks, device: CPU
+                Module: model.sam_model.blocks.0, device: CPU
+                    model.sam_model.blocks.0.norm1, device: CPU
+                    Module: model.sam_model.blocks.0.attn, device: CPU
+                        model.sam_model.blocks.0.attn.qkv, device: CPU
+                        model.sam_model.blocks.0.attn.proj, device: CPU
+                        model.sam_model.blocks.0.attn.rel_pos_h, device: CPU
+                        model.sam_model.blocks.0.attn.rel_pos_w, device: CPU
+                    model.sam_model.blocks.0.norm2, device: CPU
+                    Module: model.sam_model.blocks.0.mlp, device: CPU
+                        model.sam_model.blocks.0.mlp.lin1, device: CPU
+                        model.sam_model.blocks.0.mlp.lin2, device: CPU
+                        model.sam_model.blocks.0.mlp.act, device: CPU
+                Module: model.sam_model.blocks.1, device: CPU
+                    model.sam_model.blocks.1.norm1, device: CPU
+                    Module: model.sam_model.blocks.1.attn, device: CPU
+                        model.sam_model.blocks.1.attn.qkv, device: CPU
+                        model.sam_model.blocks.1.attn.proj, device: CPU
+                        model.sam_model.blocks.1.attn.rel_pos_h, device: CPU
+                        model.sam_model.blocks.1.attn.rel_pos_w, device: CPU
+                    model.sam_model.blocks.1.norm2, device: CPU
+                    Module: model.sam_model.blocks.1.mlp, device: CPU
+                        model.sam_model.blocks.1.mlp.lin1, device: CPU
+                        model.sam_model.blocks.1.mlp.lin2, device: CPU
+                        model.sam_model.blocks.1.mlp.act, device: CPU
+                Module: model.sam_model.blocks.2, device: CPU
+                    model.sam_model.blocks.2.norm1, device: CPU
+                    Module: model.sam_model.blocks.2.attn, device: CPU
+                        model.sam_model.blocks.2.attn.qkv, device: CPU
+                        model.sam_model.blocks.2.attn.proj, device: CPU
+                        model.sam_model.blocks.2.attn.rel_pos_h, device: CPU
+                        model.sam_model.blocks.2.attn.rel_pos_w, device: CPU
+                    model.sam_model.blocks.2.norm2, device: CPU
+                    Module: model.sam_model.blocks.2.mlp, device: CPU
+                        model.sam_model.blocks.2.mlp.lin1, device: CPU
+                        model.sam_model.blocks.2.mlp.lin2, device: CPU
+                        model.sam_model.blocks.2.mlp.act, device: CPU
+                Module: model.sam_model.blocks.3, device: CPU
+                    model.sam_model.blocks.3.norm1, device: CPU
+                    Module: model.sam_model.blocks.3.attn, device: CPU
+                        model.sam_model.blocks.3.attn.qkv, device: CPU
+                        model.sam_model.blocks.3.attn.proj, device: CPU
+                        model.sam_model.blocks.3.attn.rel_pos_h, device: CPU
+                        model.sam_model.blocks.3.attn.rel_pos_w, device: CPU
+                    model.sam_model.blocks.3.norm2, device: CPU
+                    Module: model.sam_model.blocks.3.mlp, device: CPU
+                        model.sam_model.blocks.3.mlp.lin1, device: CPU
+                        model.sam_model.blocks.3.mlp.lin2, device: CPU
+                        model.sam_model.blocks.3.mlp.act, device: CPU
+                Module: model.sam_model.blocks.4, device: CPU
+                    model.sam_model.blocks.4.norm1, device: CPU
+                    Module: model.sam_model.blocks.4.attn, device: CPU
+                        model.sam_model.blocks.4.attn.qkv, device: CPU
+                        model.sam_model.blocks.4.attn.proj, device: CPU
+                        model.sam_model.blocks.4.attn.rel_pos_h, device: CPU
+                        model.sam_model.blocks.4.attn.rel_pos_w, device: CPU
+                    model.sam_model.blocks.4.norm2, device: CPU
+                    Module: model.sam_model.blocks.4.mlp, device: CPU
+                        model.sam_model.blocks.4.mlp.lin1, device: CPU
+                        model.sam_model.blocks.4.mlp.lin2, device: CPU
+                        model.sam_model.blocks.4.mlp.act, device: CPU
+                Module: model.sam_model.blocks.5, device: CPU
+                    model.sam_model.blocks.5.norm1, device: CPU
+                    Module: model.sam_model.blocks.5.attn, device: CPU
+                        model.sam_model.blocks.5.attn.qkv, device: CPU
+                        model.sam_model.blocks.5.attn.proj, device: CPU
+                        model.sam_model.blocks.5.attn.rel_pos_h, device: CPU
+                        model.sam_model.blocks.5.attn.rel_pos_w, device: CPU
+                    model.sam_model.blocks.5.norm2, device: CPU
+                    Module: model.sam_model.blocks.5.mlp, device: CPU
+                        model.sam_model.blocks.5.mlp.lin1, device: CPU
+                        model.sam_model.blocks.5.mlp.lin2, device: CPU
+                        model.sam_model.blocks.5.mlp.act, device: CPU
+                Module: model.sam_model.blocks.6, device: CPU
+                    model.sam_model.blocks.6.norm1, device: CPU
+                    Module: model.sam_model.blocks.6.attn, device: CPU
+                        model.sam_model.blocks.6.attn.qkv, device: CPU
+                        model.sam_model.blocks.6.attn.proj, device: CPU
+                        model.sam_model.blocks.6.attn.rel_pos_h, device: CPU
+                        model.sam_model.blocks.6.attn.rel_pos_w, device: CPU
+                    model.sam_model.blocks.6.norm2, device: CPU
+                    Module: model.sam_model.blocks.6.mlp, device: CPU
+                        model.sam_model.blocks.6.mlp.lin1, device: CPU
+                        model.sam_model.blocks.6.mlp.lin2, device: CPU
+                        model.sam_model.blocks.6.mlp.act, device: CPU
+                Module: model.sam_model.blocks.7, device: CPU
+                    model.sam_model.blocks.7.norm1, device: CPU
+                    Module: model.sam_model.blocks.7.attn, device: CPU
+                        model.sam_model.blocks.7.attn.qkv, device: CPU
+                        model.sam_model.blocks.7.attn.proj, device: CPU
+                        model.sam_model.blocks.7.attn.rel_pos_h, device: CPU
+                        model.sam_model.blocks.7.attn.rel_pos_w, device: CPU
+                    model.sam_model.blocks.7.norm2, device: CPU
+                    Module: model.sam_model.blocks.7.mlp, device: CPU
+                        model.sam_model.blocks.7.mlp.lin1, device: CPU
+                        model.sam_model.blocks.7.mlp.lin2, device: CPU
+                        model.sam_model.blocks.7.mlp.act, device: CPU
+                Module: model.sam_model.blocks.8, device: CPU
+                    model.sam_model.blocks.8.norm1, device: CPU
+                    Module: model.sam_model.blocks.8.attn, device: CPU
+                        model.sam_model.blocks.8.attn.qkv, device: CPU
+                        model.sam_model.blocks.8.attn.proj, device: CPU
+                        model.sam_model.blocks.8.attn.rel_pos_h, device: CPU
+                        model.sam_model.blocks.8.attn.rel_pos_w, device: CPU
+                    model.sam_model.blocks.8.norm2, device: CPU
+                    Module: model.sam_model.blocks.8.mlp, device: CPU
+                        model.sam_model.blocks.8.mlp.lin1, device: CPU
+                        model.sam_model.blocks.8.mlp.lin2, device: CPU
+                        model.sam_model.blocks.8.mlp.act, device: CPU
+                Module: model.sam_model.blocks.9, device: CPU
+                    model.sam_model.blocks.9.norm1, device: CPU
+                    Module: model.sam_model.blocks.9.attn, device: CPU
+                        model.sam_model.blocks.9.attn.qkv, device: CPU
+                        model.sam_model.blocks.9.attn.proj, device: CPU
+                        model.sam_model.blocks.9.attn.rel_pos_h, device: CPU
+                        model.sam_model.blocks.9.attn.rel_pos_w, device: CPU
+                    model.sam_model.blocks.9.norm2, device: CPU
+                    Module: model.sam_model.blocks.9.mlp, device: CPU
+                        model.sam_model.blocks.9.mlp.lin1, device: CPU
+                        model.sam_model.blocks.9.mlp.lin2, device: CPU
+                        model.sam_model.blocks.9.mlp.act, device: CPU
+                Module: model.sam_model.blocks.10, device: CPU
+                    model.sam_model.blocks.10.norm1, device: CPU
+                    Module: model.sam_model.blocks.10.attn, device: CPU
+                        model.sam_model.blocks.10.attn.qkv, device: CPU
+                        model.sam_model.blocks.10.attn.proj, device: CPU
+                        model.sam_model.blocks.10.attn.rel_pos_h, device: CPU
+                        model.sam_model.blocks.10.attn.rel_pos_w, device: CPU
+                    model.sam_model.blocks.10.norm2, device: CPU
+                    Module: model.sam_model.blocks.10.mlp, device: CPU
+                        model.sam_model.blocks.10.mlp.lin1, device: CPU
+                        model.sam_model.blocks.10.mlp.lin2, device: CPU
+                        model.sam_model.blocks.10.mlp.act, device: CPU
+                Module: model.sam_model.blocks.11, device: CPU
+                    model.sam_model.blocks.11.norm1, device: CPU
+                    Module: model.sam_model.blocks.11.attn, device: CPU
+                        model.sam_model.blocks.11.attn.qkv, device: CPU
+                        model.sam_model.blocks.11.attn.proj, device: CPU
+                        model.sam_model.blocks.11.attn.rel_pos_h, device: CPU
+                        model.sam_model.blocks.11.attn.rel_pos_w, device: CPU
+                    model.sam_model.blocks.11.norm2, device: CPU
+                    Module: model.sam_model.blocks.11.mlp, device: CPU
+                        model.sam_model.blocks.11.mlp.lin1, device: CPU
+                        model.sam_model.blocks.11.mlp.lin2, device: CPU
+                        model.sam_model.blocks.11.mlp.act, device: CPU
+            Module: model.sam_model.neck, device: CPU
+                model.sam_model.neck.0, device: CPU
+                model.sam_model.neck.1, device: CPU
+                model.sam_model.neck.2, device: CPU
+                model.sam_model.neck.3, device: CPU
+            model.sam_model.net_2, device: CPU
+            model.sam_model.net_3, device: CPU
+        Module: model.vision_model, device: CPU
+            Module: model.vision_model.embeddings, device: CPU
+                model.vision_model.embeddings.class_embedding, device: CPU
+                model.vision_model.embeddings.patch_embedding, device: CPU
+                model.vision_model.embeddings.position_embedding, device: CPU
+            Module: model.vision_model.transformer, device: CPU
+                Module: model.vision_model.transformer.layers, device: CPU
+                    Module: model.vision_model.transformer.layers.0, device: CPU
+                        Module: model.vision_model.transformer.layers.0.self_attn, device: CPU
+                            model.vision_model.transformer.layers.0.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.0.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.0.mlp, device: CPU
+                            model.vision_model.transformer.layers.0.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.0.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.0.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.0.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.0.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.1, device: CPU
+                        Module: model.vision_model.transformer.layers.1.self_attn, device: CPU
+                            model.vision_model.transformer.layers.1.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.1.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.1.mlp, device: CPU
+                            model.vision_model.transformer.layers.1.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.1.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.1.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.1.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.1.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.2, device: CPU
+                        Module: model.vision_model.transformer.layers.2.self_attn, device: CPU
+                            model.vision_model.transformer.layers.2.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.2.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.2.mlp, device: CPU
+                            model.vision_model.transformer.layers.2.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.2.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.2.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.2.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.2.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.3, device: CPU
+                        Module: model.vision_model.transformer.layers.3.self_attn, device: CPU
+                            model.vision_model.transformer.layers.3.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.3.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.3.mlp, device: CPU
+                            model.vision_model.transformer.layers.3.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.3.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.3.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.3.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.3.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.4, device: CPU
+                        Module: model.vision_model.transformer.layers.4.self_attn, device: CPU
+                            model.vision_model.transformer.layers.4.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.4.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.4.mlp, device: CPU
+                            model.vision_model.transformer.layers.4.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.4.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.4.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.4.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.4.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.5, device: CPU
+                        Module: model.vision_model.transformer.layers.5.self_attn, device: CPU
+                            model.vision_model.transformer.layers.5.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.5.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.5.mlp, device: CPU
+                            model.vision_model.transformer.layers.5.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.5.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.5.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.5.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.5.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.6, device: CPU
+                        Module: model.vision_model.transformer.layers.6.self_attn, device: CPU
+                            model.vision_model.transformer.layers.6.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.6.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.6.mlp, device: CPU
+                            model.vision_model.transformer.layers.6.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.6.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.6.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.6.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.6.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.7, device: CPU
+                        Module: model.vision_model.transformer.layers.7.self_attn, device: CPU
+                            model.vision_model.transformer.layers.7.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.7.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.7.mlp, device: CPU
+                            model.vision_model.transformer.layers.7.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.7.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.7.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.7.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.7.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.8, device: CPU
+                        Module: model.vision_model.transformer.layers.8.self_attn, device: CPU
+                            model.vision_model.transformer.layers.8.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.8.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.8.mlp, device: CPU
+                            model.vision_model.transformer.layers.8.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.8.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.8.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.8.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.8.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.9, device: CPU
+                        Module: model.vision_model.transformer.layers.9.self_attn, device: CPU
+                            model.vision_model.transformer.layers.9.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.9.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.9.mlp, device: CPU
+                            model.vision_model.transformer.layers.9.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.9.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.9.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.9.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.9.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.10, device: CPU
+                        Module: model.vision_model.transformer.layers.10.self_attn, device: CPU
+                            model.vision_model.transformer.layers.10.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.10.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.10.mlp, device: CPU
+                            model.vision_model.transformer.layers.10.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.10.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.10.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.10.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.10.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.11, device: CPU
+                        Module: model.vision_model.transformer.layers.11.self_attn, device: CPU
+                            model.vision_model.transformer.layers.11.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.11.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.11.mlp, device: CPU
+                            model.vision_model.transformer.layers.11.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.11.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.11.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.11.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.11.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.12, device: CPU
+                        Module: model.vision_model.transformer.layers.12.self_attn, device: CPU
+                            model.vision_model.transformer.layers.12.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.12.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.12.mlp, device: CPU
+                            model.vision_model.transformer.layers.12.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.12.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.12.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.12.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.12.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.13, device: CPU
+                        Module: model.vision_model.transformer.layers.13.self_attn, device: CPU
+                            model.vision_model.transformer.layers.13.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.13.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.13.mlp, device: CPU
+                            model.vision_model.transformer.layers.13.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.13.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.13.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.13.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.13.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.14, device: CPU
+                        Module: model.vision_model.transformer.layers.14.self_attn, device: CPU
+                            model.vision_model.transformer.layers.14.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.14.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.14.mlp, device: CPU
+                            model.vision_model.transformer.layers.14.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.14.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.14.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.14.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.14.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.15, device: CPU
+                        Module: model.vision_model.transformer.layers.15.self_attn, device: CPU
+                            model.vision_model.transformer.layers.15.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.15.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.15.mlp, device: CPU
+                            model.vision_model.transformer.layers.15.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.15.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.15.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.15.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.15.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.16, device: CPU
+                        Module: model.vision_model.transformer.layers.16.self_attn, device: CPU
+                            model.vision_model.transformer.layers.16.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.16.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.16.mlp, device: CPU
+                            model.vision_model.transformer.layers.16.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.16.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.16.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.16.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.16.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.17, device: CPU
+                        Module: model.vision_model.transformer.layers.17.self_attn, device: CPU
+                            model.vision_model.transformer.layers.17.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.17.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.17.mlp, device: CPU
+                            model.vision_model.transformer.layers.17.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.17.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.17.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.17.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.17.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.18, device: CPU
+                        Module: model.vision_model.transformer.layers.18.self_attn, device: CPU
+                            model.vision_model.transformer.layers.18.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.18.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.18.mlp, device: CPU
+                            model.vision_model.transformer.layers.18.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.18.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.18.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.18.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.18.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.19, device: CPU
+                        Module: model.vision_model.transformer.layers.19.self_attn, device: CPU
+                            model.vision_model.transformer.layers.19.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.19.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.19.mlp, device: CPU
+                            model.vision_model.transformer.layers.19.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.19.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.19.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.19.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.19.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.20, device: CPU
+                        Module: model.vision_model.transformer.layers.20.self_attn, device: CPU
+                            model.vision_model.transformer.layers.20.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.20.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.20.mlp, device: CPU
+                            model.vision_model.transformer.layers.20.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.20.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.20.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.20.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.20.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.21, device: CPU
+                        Module: model.vision_model.transformer.layers.21.self_attn, device: CPU
+                            model.vision_model.transformer.layers.21.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.21.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.21.mlp, device: CPU
+                            model.vision_model.transformer.layers.21.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.21.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.21.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.21.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.21.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.22, device: CPU
+                        Module: model.vision_model.transformer.layers.22.self_attn, device: CPU
+                            model.vision_model.transformer.layers.22.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.22.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.22.mlp, device: CPU
+                            model.vision_model.transformer.layers.22.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.22.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.22.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.22.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.22.layer_norm2, device: CPU
+                    Module: model.vision_model.transformer.layers.23, device: CPU
+                        Module: model.vision_model.transformer.layers.23.self_attn, device: CPU
+                            model.vision_model.transformer.layers.23.self_attn.qkv_proj, device: CPU
+                            model.vision_model.transformer.layers.23.self_attn.out_proj, device: CPU
+                        Module: model.vision_model.transformer.layers.23.mlp, device: CPU
+                            model.vision_model.transformer.layers.23.mlp.fc1, device: CPU
+                            model.vision_model.transformer.layers.23.mlp.fc2, device: CPU
+                            model.vision_model.transformer.layers.23.mlp.act, device: CPU
+                        model.vision_model.transformer.layers.23.layer_norm1, device: CPU
+                        model.vision_model.transformer.layers.23.layer_norm2, device: CPU
+            model.vision_model.pre_layrnorm, device: CPU
+        Module: model.projector, device: CPU
+            model.projector.layers, device: CPU
+        model.image_newline, device: CPU
+        model.view_seperator, device: CPU
+    lm_head, device: CPU
+ 
diff --git a/examples/deepseek_ocr/quant_cfg_w4a8_kai_i8mm.json b/examples/deepseek_ocr/quant_cfg_w4a8_kai_i8mm.json
new file mode 100644
index 00000000..a35346d4
--- /dev/null
+++ b/examples/deepseek_ocr/quant_cfg_w4a8_kai_i8mm.json
@@ -0,0 +1,184 @@
+{
+    "lm_head.weight": {
+        "hints": {
+            "quant_method": "kai",
+            "kai_matmul_triplet": "f32_qai8dxp_qsi4c32p",
+            "kai_matmul_layout": "mxk_nxk",
+            "kai_matmul_tile_cfg": "qai8dxp4x8_qsi4c32p8x8_4x8x32",
+            "shape": [
+                129280,
+                1280
+            ],
+            "replace": true
+        }
+    },
+    "^model\\.layers\\.\\d+\\.mlp\\.down_proj\\.weight": {
+        "hints": {
+            "quant_method": "kai",
+            "kai_matmul_triplet": "f32_qai8dxp_qsi4c32p",
+            "kai_matmul_layout": "mxk_nxk",
+            "kai_matmul_tile_cfg": "qai8dxp4x8_qsi4c32p8x8_4x8x32",
+            "shape": [
+                1280,
+                6848
+            ],
+            "replace": true
+        }
+    },
+    "^model\\.layers\\.\\d+\\.mlp\\.gate_proj\\.weight": {
+        "hints": {
+            "quant_method": "kai",
+            "kai_matmul_triplet": "f32_qai8dxp_qsi4c32p",
+            "kai_matmul_layout": "mxk_nxk",
+            "kai_matmul_tile_cfg": "qai8dxp4x8_qsi4c32p8x8_4x8x32",
+            "shape": [
+                6848,
+                1280
+            ],
+            "replace": true
+        }
+    },
+    "^model\\.layers\\.\\d+\\.mlp\\.up_proj\\.weight": {
+        "hints": {
+            "quant_method": "kai",
+            "kai_matmul_triplet": "f32_qai8dxp_qsi4c32p",
+            "kai_matmul_layout": "mxk_nxk",
+            "kai_matmul_tile_cfg": "qai8dxp4x8_qsi4c32p8x8_4x8x32",
+            "shape": [
+                6848,
+                1280
+            ],
+            "replace": true
+        }
+    },
+    "^model\\.layers\\.\\d+\\.self_attn\\.k_proj\\.weight": {
+        "hints": {
+            "quant_method": "kai",
+            "kai_matmul_triplet": "f32_qai8dxp_qsi4c32p",
+            "kai_matmul_layout": "mxk_nxk",
+            "kai_matmul_tile_cfg": "qai8dxp4x8_qsi4c32p8x8_4x8x32",
+            "shape": [
+                1280,
+                1280
+            ],
+            "replace": true
+        }
+    },
+    "^model\\.layers\\.\\d+\\.self_attn\\.q_proj\\.weight": {
+        "hints": {
+            "quant_method": "kai",
+            "kai_matmul_triplet": "f32_qai8dxp_qsi4c32p",
+            "kai_matmul_layout": "mxk_nxk",
+            "kai_matmul_tile_cfg": "qai8dxp4x8_qsi4c32p8x8_4x8x32",
+            "shape": [
+                1280,
+                1280
+            ],
+            "replace": true
+        }
+    },
+    "^model\\.layers\\.\\d+\\.self_attn\\.v_proj\\.weight": {
+        "hints": {
+            "quant_method": "kai",
+            "kai_matmul_triplet": "f32_qai8dxp_qsi4c32p",
+            "kai_matmul_layout": "mxk_nxk",
+            "kai_matmul_tile_cfg": "qai8dxp4x8_qsi4c32p8x8_4x8x32",
+            "shape": [
+                1280,
+                1280
+            ],
+            "replace": true
+        }
+    },
+    "^model\\.layers\\.\\d+\\.self_attn\\.o_proj\\.weight": {
+        "hints": {
+            "quant_method": "kai",
+            "kai_matmul_triplet": "f32_qai8dxp_qsi4c32p",
+            "kai_matmul_layout": "mxk_nxk",
+            "kai_matmul_tile_cfg": "qai8dxp4x8_qsi4c32p8x8_4x8x32",
+            "shape": [
+                1280,
+                1280
+            ],
+            "replace": true
+        }
+    },
+    "^model\\.layers\\.\\d+\\.mlp\\.experts\\.\\d+\\.down_proj\\.weight": {
+        "hints": {
+            "quant_method": "kai",
+            "kai_matmul_triplet": "f32_qai8dxp_qsi4c32p",
+            "kai_matmul_layout": "mxk_nxk",
+            "kai_matmul_tile_cfg": "qai8dxp4x8_qsi4c32p8x8_4x8x32",
+            "shape": [
+                1280,
+                896
+            ],
+            "replace": true
+        }
+    },
+    "^model\\.layers\\.\\d+\\.mlp\\.experts\\.\\d+\\.gate_proj\\.weight": {
+        "hints": {
+            "quant_method": "kai",
+            "kai_matmul_triplet": "f32_qai8dxp_qsi4c32p",
+            "kai_matmul_layout": "mxk_nxk",
+            "kai_matmul_tile_cfg": "qai8dxp4x8_qsi4c32p8x8_4x8x32",
+            "shape": [
+                896,
+                1280
+            ],
+            "replace": true
+        }
+    },
+    "^model\\.layers\\.\\d+\\.mlp\\.experts\\.\\d+\\.up_proj\\.weight": {
+        "hints": {
+            "quant_method": "kai",
+            "kai_matmul_triplet": "f32_qai8dxp_qsi4c32p",
+            "kai_matmul_layout": "mxk_nxk",
+            "kai_matmul_tile_cfg": "qai8dxp4x8_qsi4c32p8x8_4x8x32",
+            "shape": [
+                896,
+                1280
+            ],
+            "replace": true
+        }
+    },
+    "^model\\.layers\\.\\d+\\.mlp\\.shared_experts\\.down_proj\\.weight": {
+        "hints": {
+            "quant_method": "kai",
+            "kai_matmul_triplet": "f32_qai8dxp_qsi4c32p",
+            "kai_matmul_layout": "mxk_nxk",
+            "kai_matmul_tile_cfg": "qai8dxp4x8_qsi4c32p8x8_4x8x32",
+            "shape": [
+                1280,
+                1792
+            ],
+            "replace": true
+        }
+    },
+    "^model\\.layers\\.\\d+\\.mlp\\.shared_experts\\.gate_proj\\.weight": {
+        "hints": {
+            "quant_method": "kai",
+            "kai_matmul_triplet": "f32_qai8dxp_qsi4c32p",
+            "kai_matmul_layout": "mxk_nxk",
+            "kai_matmul_tile_cfg": "qai8dxp4x8_qsi4c32p8x8_4x8x32",
+            "shape": [
+                1792,
+                1280
+            ],
+            "replace": true
+        }
+    },
+    "^model\\.layers\\.\\d+\\.mlp\\.shared_experts\\.up_proj\\.weight": {
+        "hints": {
+            "quant_method": "kai",
+            "kai_matmul_triplet": "f32_qai8dxp_qsi4c32p",
+            "kai_matmul_layout": "mxk_nxk",
+            "kai_matmul_tile_cfg": "qai8dxp4x8_qsi4c32p8x8_4x8x32",
+            "shape": [
+                1792,
+                1280
+            ],
+            "replace": true
+        }
+    }
+}
diff --git a/mllm/CMakeLists.txt b/mllm/CMakeLists.txt
index f28afdce..d4728bfe 100644
--- a/mllm/CMakeLists.txt
+++ b/mllm/CMakeLists.txt
@@ -105,3 +105,8 @@ if(MLLM_BUILD_CUDA_BACKEND)
     MLLM_CUDA_BACKEND
   )
 endif()
+
+# Extension
+if(MLLM_EXT_ENABLE)
+  add_subdirectory(ext)
+endif()
diff --git a/mllm/backends/cpu/CPUBackend.cpp b/mllm/backends/cpu/CPUBackend.cpp
index c94871b9..508c381e 100644
--- a/mllm/backends/cpu/CPUBackend.cpp
+++ b/mllm/backends/cpu/CPUBackend.cpp
@@ -5,11 +5,13 @@
 #include "mllm/backends/cpu/CPUAllocator.hpp"
 
 // Ops
+#include "mllm/backends/cpu/ops/ArgsortOp.hpp"
 #include "mllm/backends/cpu/ops/CastTypeOp.hpp"
 #include "mllm/backends/cpu/ops/CausalMaskOp.hpp"
 #include "mllm/backends/cpu/ops/ConcatOp.hpp"
 #include "mllm/backends/cpu/ops/ContiguousOp.hpp"
 #include "mllm/backends/cpu/ops/Conv1DOp.hpp"
+#include "mllm/backends/cpu/ops/Conv2DOp.hpp"
 #include "mllm/backends/cpu/ops/Conv3DOp.hpp"
 #include "mllm/backends/cpu/ops/CopyOp.hpp"
 #include "mllm/backends/cpu/ops/ElewiseOps.hpp"
@@ -17,6 +19,10 @@
 #include "mllm/backends/cpu/ops/FillOp.hpp"
 #include "mllm/backends/cpu/ops/FlashAttention2Op.hpp"
 #include "mllm/backends/cpu/ops/GELUOp.hpp"
+#include "mllm/backends/cpu/ops/InterpolateOp.hpp"
+#include "mllm/backends/cpu/ops/LayerNorm2DOp.hpp"
+#include "mllm/backends/cpu/ops/MaskedScatterOp.hpp"
+#include "mllm/backends/cpu/ops/PadOp.hpp"
 #include "mllm/backends/cpu/ops/RadixAttnOp.hpp"
 #include "mllm/backends/cpu/ops/ReLUOp.hpp"
 #include "mllm/backends/cpu/ops/GraphOps.hpp"
@@ -46,6 +52,7 @@
 #include "mllm/backends/cpu/ops/ViewOp.hpp"
 #include "mllm/backends/cpu/ops/VisionRoPEOp.hpp"
 #include "mllm/backends/cpu/ops/X2XOp.hpp"
+#include "mllm/backends/cpu/ops/StackOp.hpp"
 
 namespace mllm::cpu {
 
@@ -54,13 +61,15 @@ CPUBackend::CPUBackend() : Backend(kCPU, createCPUAllocator()) {
                CPUSubOpFactory, CPUMulOpFactory, CPUDivOpFactory, CPUNegOpFactory, CPUAbsOpFactory, CPULogOpFactory,
                CPUExpOpFactory, CPUSinOpFactory, CPUCosOpFactory, CPUReduceMaxOpFactory, CPUReduceMinOpFactory,
                CPUReduceSumOpFactory, CPUTransposeOpFactory, CPUPermuteOpFactory, CPUCastTypeOpFactory, CPUConcatOpFactory,
-               CPUContiguousOpFactory, CPUCopyOpFactory, CPUEmbeddingOpFactory, CPUSplitOpFactory, CPUViewOpFactory,
-               CPULayerNormOpFactory, CPURepeatOpFactory, CPUX2XOpFactory, CPUSoftmaxOpFactory, CPUSiLUOpFactory,
-               CPURMSNormOpFactory, CPUGELUOpFactory, CPUQuickGELUOpFactory, CPUReLUOpFactory, CPUMatMulOpFactory,
-               CPUFlashAttention2OpFactory, CPUSliceOpFactory, CPUVisionRoPEOpFactory, CPUParamOpFactory,
+               CPUStackOpFactory, CPUContiguousOpFactory, CPUCopyOpFactory, CPUEmbeddingOpFactory, CPUSplitOpFactory,
+               CPUViewOpFactory, CPULayerNormOpFactory, CPURepeatOpFactory, CPUX2XOpFactory, CPUSoftmaxOpFactory,
+               CPUSiLUOpFactory, CPURMSNormOpFactory, CPUGELUOpFactory, CPUQuickGELUOpFactory, CPUReLUOpFactory,
+               CPUMatMulOpFactory, CPUFlashAttention2OpFactory, CPUSliceOpFactory, CPUVisionRoPEOpFactory, CPUParamOpFactory,
                CPUMultimodalRoPEOpFactory, CPURoPEOpFactory, CPUCausalMaskOpFactory, CPUConv1DOpFactory, CPUConv3DOpFactory,
                CPUSTFTOpFactory, CPUISTFTOpFactory, CPUIndexOpFactory, CPUTopKOpFactory, CPUClipOpFactory, CPUMeanOpFactory,
-               CPUKVCacheOpFactory, CPUPagedAttnOpFactory, CPUScatter2ShardsOpFactory, CPURadixAttnOpFactory>();
+               CPUKVCacheOpFactory, CPUPagedAttnOpFactory, CPUScatter2ShardsOpFactory, CPURadixAttnOpFactory,
+               CPUConv2DOpFactory, CPULayerNorm2DOpFactory, CPUInterpolateOpFactory, CPUPadOpFactory, CPUMaskedScatterOpFactory,
+               CPUArgsortOpFactory>();
 }
 
 std::shared_ptr<CPUBackend> createCPUBackend() { return std::make_shared<CPUBackend>(); }
diff --git a/mllm/backends/cpu/kernels/Kernels.hpp b/mllm/backends/cpu/kernels/Kernels.hpp
index e68b7097..026cc84a 100644
--- a/mllm/backends/cpu/kernels/Kernels.hpp
+++ b/mllm/backends/cpu/kernels/Kernels.hpp
@@ -30,6 +30,8 @@
 #include "mllm/backends/cpu/kernels/arm/conv3d.hpp"                     // IWYU pragma: export
 #include "mllm/backends/cpu/kernels/arm/linear/kai.hpp"                 // IWYU pragma: export
 #include "mllm/backends/cpu/kernels/arm/relu.hpp"                       // IWYU pragma: export
+#include "mllm/backends/cpu/kernels/arm/conv2d.hpp"                     // IWYU pragma: export
+#include "mllm/backends/cpu/kernels/arm/layernorm2d.hpp"                // IWYU pragma: export
 #include "mllm/backends/cpu/kernels/arm/mllm_blas/mllm_blas_sgemm.hpp"  // IWYU pragma: export
 #else
 #include "mllm/backends/cpu/kernels/common/gelu-inl.hpp"     // IWYU pragma: export
diff --git a/mllm/backends/cpu/kernels/arm/conv2d.cpp b/mllm/backends/cpu/kernels/arm/conv2d.cpp
new file mode 100644
index 00000000..27bd3ae4
--- /dev/null
+++ b/mllm/backends/cpu/kernels/arm/conv2d.cpp
@@ -0,0 +1,99 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+#include "mllm/backends/cpu/kernels/arm/conv2d.hpp"
+
+#if defined(MLLM_HOST_ARCH_ARM64) || defined(MLLM_HOST_ARCH_ARM)
+
+#include <arm_neon.h>
+
+namespace mllm::cpu::arm {
+
+void conv2d_fp32_im2col_input(const float* input_data, const int channels, const int height, const int width,
+                              const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h,
+                              const int stride_w, const int dilation_h, const int dilation_w, float* col_data) {
+  const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+  const int output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+  const int channel_size = height * width;
+
+  const float32x4_t vzero = vdupq_n_f32(0.0f);
+
+  for (int channel = 0; channel < channels; ++channel) {
+    for (int kernel_y = 0; kernel_y < kernel_h; ++kernel_y) {
+      for (int kernel_x = 0; kernel_x < kernel_w; ++kernel_x) {
+        const int input_start_y = -pad_h + kernel_y * dilation_h;
+        const int input_start_x = -pad_w + kernel_x * dilation_w;
+
+        for (int out_y = 0; out_y < output_h; ++out_y) {
+          const int cur_input_y = input_start_y + out_y * stride_h;
+
+          if (static_cast<unsigned>(cur_input_y) >= static_cast<unsigned>(height)) {
+            for (int out_x = 0; out_x < output_w; out_x += 4) {
+              if (out_x + 3 < output_w) {
+                vst1q_f32(col_data, vzero);
+                col_data += 4;
+              } else {
+                for (int i = 0; i < output_w - out_x; ++i) { *col_data++ = 0.0f; }
+              }
+            }
+          } else {
+            int out_x = 0;
+            for (; out_x + 3 < output_w; out_x += 4) {
+              const int input_x0 = input_start_x + (out_x + 0) * stride_w;
+              const int input_x1 = input_start_x + (out_x + 1) * stride_w;
+              const int input_x2 = input_start_x + (out_x + 2) * stride_w;
+              const int input_x3 = input_start_x + (out_x + 3) * stride_w;
+
+              const float val0 = (static_cast<unsigned>(input_x0) < static_cast<unsigned>(width))
+                                     ? input_data[cur_input_y * width + input_x0]
+                                     : 0.0f;
+              const float val1 = (static_cast<unsigned>(input_x1) < static_cast<unsigned>(width))
+                                     ? input_data[cur_input_y * width + input_x1]
+                                     : 0.0f;
+              const float val2 = (static_cast<unsigned>(input_x2) < static_cast<unsigned>(width))
+                                     ? input_data[cur_input_y * width + input_x2]
+                                     : 0.0f;
+              const float val3 = (static_cast<unsigned>(input_x3) < static_cast<unsigned>(width))
+                                     ? input_data[cur_input_y * width + input_x3]
+                                     : 0.0f;
+
+              float32x4_t v_data = {val0, val1, val2, val3};
+              vst1q_f32(col_data, v_data);
+              col_data += 4;
+            }
+
+            for (; out_x < output_w; ++out_x) {
+              const int cur_input_x = input_start_x + out_x * stride_w;
+              if (static_cast<unsigned>(cur_input_x) < static_cast<unsigned>(width)) {
+                *col_data++ = input_data[cur_input_y * width + cur_input_x];
+              } else {
+                *col_data++ = 0.0f;
+              }
+            }
+          }
+        }
+      }
+    }
+    input_data += channel_size;
+  }
+}
+
+void conv2d_fp32_im2col_weight(const float* src_weight, float* packed_weight, int out_channels, int in_channels, int kernel_h,
+                               int kernel_w) {
+  // Original Weight: [Out, In, Kh, Kw]
+  // Packed Weight: [Out, In*Kh*Kw]
+  for (int o = 0; o < out_channels; ++o) {
+    for (int i = 0; i < in_channels; ++i) {
+      for (int h = 0; h < kernel_h; ++h) {
+        for (int w = 0; w < kernel_w; ++w) {
+          int src_idx = o * (in_channels * kernel_h * kernel_w) + i * (kernel_h * kernel_w) + h * kernel_w + w;
+          int dst_idx = o * (in_channels * kernel_h * kernel_w) + i * (kernel_h * kernel_w) + h * kernel_w + w;
+          packed_weight[dst_idx] = src_weight[src_idx];
+        }
+      }
+    }
+  }
+}
+
+}  // namespace mllm::cpu::arm
+
+#endif
diff --git a/mllm/backends/cpu/kernels/arm/conv2d.hpp b/mllm/backends/cpu/kernels/arm/conv2d.hpp
new file mode 100644
index 00000000..129e272f
--- /dev/null
+++ b/mllm/backends/cpu/kernels/arm/conv2d.hpp
@@ -0,0 +1,34 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/utils/CPUArchHelper.hpp"
+
+#if defined(MLLM_HOST_ARCH_ARM64) || defined(MLLM_HOST_ARCH_ARM)
+
+#include <arm_neon.h>
+
+namespace mllm::cpu::arm {
+
+//===----------------------------------------------------------------------===//
+// Im2col.
+//
+// Reformat your inputs to im2col's input
+// Reformat your weights to im2col's weight
+// After those 2 parts, do gemm(weight, input)
+//===----------------------------------------------------------------------===//
+void conv2d_fp32_im2col_input(const float* input_data, const int channels, const int height, const int width,
+                              const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h,
+                              const int stride_w, const int dilation_h, const int dilation_w, float* col_data);
+
+// Inputs weight format should in [Out_Channels, In_Channels, Kernel_H, Kernel_W]
+// Output weight format should in [M x K]
+//
+//
+// This kernel is not performance sensitive !!! We only need to pack weight once !
+void conv2d_fp32_im2col_weight(const float* src_weight, float* packed_weight, int out_channels, int in_channels, int kernel_h,
+                               int kernel_w);
+
+}  // namespace mllm::cpu::arm
+#endif
diff --git a/mllm/backends/cpu/kernels/arm/layernorm2d.cpp b/mllm/backends/cpu/kernels/arm/layernorm2d.cpp
new file mode 100644
index 00000000..b75257d7
--- /dev/null
+++ b/mllm/backends/cpu/kernels/arm/layernorm2d.cpp
@@ -0,0 +1,89 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+#include "mllm/utils/CPUArchHelper.hpp"
+
+#if defined(MLLM_HOST_ARCH_ARM64) || defined(MLLM_HOST_ARCH_ARM)
+
+#include <cmath>
+#include <arm_neon.h>
+
+namespace mllm::cpu::arm {
+void layernorm2d_fp32(const float* x, const float* weight, const float* bias, float* y, int N, int C, int H, int W, float eps) {
+  const int spatial_dim = H * W;
+
+  for (int n = 0; n < N; ++n) {
+    for (int i = 0; i < spatial_dim; ++i) {
+      const float* x_ptr = x + n * C * spatial_dim + i;
+      float* y_ptr = y + n * C * spatial_dim + i;
+
+      float sum = 0.0f;
+#if defined(__ARM_NEON)
+      float32x4_t sum_vec = vdupq_n_f32(0.0f);
+      int c = 0;
+      for (; c <= C - 4; c += 4) {
+        float32x4_t x_vec = {x_ptr[c * spatial_dim], x_ptr[(c + 1) * spatial_dim], x_ptr[(c + 2) * spatial_dim],
+                             x_ptr[(c + 3) * spatial_dim]};
+        sum_vec = vaddq_f32(sum_vec, x_vec);
+      }
+      sum = vaddvq_f32(sum_vec);
+      for (; c < C; ++c) { sum += x_ptr[c * spatial_dim]; }
+#else
+      for (int c = 0; c < C; ++c) { sum += x_ptr[c * spatial_dim]; }
+#endif
+      const float mean = sum / C;
+
+      float sq_sum = 0.0f;
+#if defined(__ARM_NEON)
+      float32x4_t sq_sum_vec = vdupq_n_f32(0.0f);
+      float32x4_t mean_vec = vdupq_n_f32(mean);
+      c = 0;
+      for (; c <= C - 4; c += 4) {
+        float32x4_t x_vec = {x_ptr[c * spatial_dim], x_ptr[(c + 1) * spatial_dim], x_ptr[(c + 2) * spatial_dim],
+                             x_ptr[(c + 3) * spatial_dim]};
+        float32x4_t diff = vsubq_f32(x_vec, mean_vec);
+        sq_sum_vec = vmlaq_f32(sq_sum_vec, diff, diff);  // Fused multiply-accumulate: sq_sum_vec += diff * diff
+      }
+      sq_sum = vaddvq_f32(sq_sum_vec);
+      for (; c < C; ++c) {
+        float diff = x_ptr[c * spatial_dim] - mean;
+        sq_sum += diff * diff;
+      }
+#else
+      for (int c = 0; c < C; ++c) {
+        float diff = x_ptr[c * spatial_dim] - mean;
+        sq_sum += diff * diff;
+      }
+#endif
+      const float variance = sq_sum / C;
+      const float inv_std = 1.0f / std::sqrt(variance + eps);
+
+#if defined(__ARM_NEON)
+      float32x4_t inv_std_vec = vdupq_n_f32(inv_std);
+      c = 0;
+      for (; c <= C - 4; c += 4) {
+        float32x4_t x_vec = {x_ptr[c * spatial_dim], x_ptr[(c + 1) * spatial_dim], x_ptr[(c + 2) * spatial_dim],
+                             x_ptr[(c + 3) * spatial_dim]};
+        float32x4_t weight_vec = vld1q_f32(weight + c);
+        float32x4_t bias_vec = vld1q_f32(bias + c);
+
+        // y = (x - mean) * inv_std
+        float32x4_t norm_val = vmulq_f32(vsubq_f32(x_vec, mean_vec), inv_std_vec);
+        // y = y * weight + bias
+        float32x4_t out_vec = vmlaq_f32(bias_vec, norm_val, weight_vec);
+
+        y_ptr[c * spatial_dim] = vgetq_lane_f32(out_vec, 0);
+        y_ptr[(c + 1) * spatial_dim] = vgetq_lane_f32(out_vec, 1);
+        y_ptr[(c + 2) * spatial_dim] = vgetq_lane_f32(out_vec, 2);
+        y_ptr[(c + 3) * spatial_dim] = vgetq_lane_f32(out_vec, 3);
+      }
+      for (; c < C; ++c) { y_ptr[c * spatial_dim] = (x_ptr[c * spatial_dim] - mean) * inv_std * weight[c] + bias[c]; }
+#else
+      for (int c = 0; c < C; ++c) { y_ptr[c * spatial_dim] = (x_ptr[c * spatial_dim] - mean) * inv_std * weight[c] + bias[c]; }
+#endif
+    }
+  }
+}
+
+}  // namespace mllm::cpu::arm
+
+#endif
diff --git a/mllm/backends/cpu/kernels/arm/layernorm2d.hpp b/mllm/backends/cpu/kernels/arm/layernorm2d.hpp
new file mode 100644
index 00000000..15782905
--- /dev/null
+++ b/mllm/backends/cpu/kernels/arm/layernorm2d.hpp
@@ -0,0 +1,18 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/utils/CPUArchHelper.hpp"
+
+#if defined(MLLM_HOST_ARCH_ARM64) || defined(MLLM_HOST_ARCH_ARM)
+
+#include <arm_neon.h>
+
+namespace mllm::cpu::arm {
+
+// For NCHW
+void layernorm2d_fp32(const float* x, const float* weight, const float* bias, float* y, int N, int C, int H, int W, float eps);
+
+}  // namespace mllm::cpu::arm
+#endif
diff --git a/mllm/backends/cpu/ops/ArgsortOp.cpp b/mllm/backends/cpu/ops/ArgsortOp.cpp
new file mode 100644
index 00000000..646499c5
--- /dev/null
+++ b/mllm/backends/cpu/ops/ArgsortOp.cpp
@@ -0,0 +1,100 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include <algorithm>
+#include <functional>
+#include <vector>
+
+#include "mllm/backends/cpu/ops/ArgsortOp.hpp"
+#include "mllm/core/DataTypes.hpp"
+#include "mllm/utils/Common.hpp"
+#include "mllm/utils/UnsafeMacros.hpp"
+
+namespace mllm::cpu {
+
+CPUArgsortOp::CPUArgsortOp(const aops::ArgsortOpOptions& options) : aops::ArgsortOp(options) {}
+
+namespace MLLM_ANONYMOUS_NAMESPACE {
+
+template<typename T>
+void argsort_impl(const T* input_data, int32_t* indices_data, int outer_size, int axis_size, int inner_size, int dim,
+                  bool descending) {
+  for (int out = 0; out < outer_size; ++out) {
+    for (int in = 0; in < inner_size; ++in) {
+      // Create pairs of (value, index) for sorting
+      std::vector<std::pair<T, int32_t>> data_pairs(axis_size);
+      for (int i = 0; i < axis_size; ++i) {
+        int index = out * axis_size * inner_size + i * inner_size + in;
+        data_pairs[i] = {input_data[index], static_cast<int32_t>(i)};
+      }
+
+      // Sort based on values
+      if (descending) {
+        std::sort(data_pairs.begin(), data_pairs.end(),
+                  [](const std::pair<T, int32_t>& a, const std::pair<T, int32_t>& b) { return a.first > b.first; });
+      } else {
+        std::sort(data_pairs.begin(), data_pairs.end(),
+                  [](const std::pair<T, int32_t>& a, const std::pair<T, int32_t>& b) { return a.first < b.first; });
+      }
+
+      // Store sorted indices
+      for (int i = 0; i < axis_size; ++i) {
+        int out_index = out * axis_size * inner_size + i * inner_size + in;
+        indices_data[out_index] = data_pairs[i].second;
+      }
+    }
+  }
+}
+}  // namespace MLLM_ANONYMOUS_NAMESPACE
+
+__MLLM_UNSAFE_OPT_BEGIN_O3_FAST_MATH
+void CPUArgsortOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  auto& input = inputs[0];
+  auto& indices = outputs[0];
+
+  auto dtype = input.dtype();
+  int dim = options_.dim;
+  bool descending = options_.descending;
+
+  // Handle negative dimension index
+  if (dim < 0) { dim += input.shape().size(); }
+
+  // Calculate sizes
+  int outer_size = 1;
+  int inner_size = 1;
+  int axis_size = input.shape()[dim];
+
+  for (int i = 0; i < dim; ++i) { outer_size *= input.shape()[i]; }
+  for (int i = dim + 1; i < input.shape().size(); ++i) { inner_size *= input.shape()[i]; }
+
+  switch (dtype) {
+    case kFloat32: {
+      argsort_impl<float>(input.ptr<float>(), indices.ptr<mllm_int32_t>(), outer_size, axis_size, inner_size, dim, descending);
+      break;
+    }
+    case kFloat16: {
+      argsort_impl<mllm_fp16_t>(input.ptr<mllm_fp16_t>(), indices.ptr<mllm_int32_t>(), outer_size, axis_size, inner_size, dim,
+                                descending);
+      break;
+    }
+    case kInt32: {
+      argsort_impl<mllm_int32_t>(input.ptr<mllm_int32_t>(), indices.ptr<mllm_int32_t>(), outer_size, axis_size, inner_size, dim,
+                                 descending);
+      break;
+    }
+    case kInt16: {
+      argsort_impl<mllm_int16_t>(input.ptr<mllm_int16_t>(), indices.ptr<mllm_int32_t>(), outer_size, axis_size, inner_size, dim,
+                                 descending);
+      break;
+    }
+    case kInt8: {
+      argsort_impl<mllm_int8_t>(input.ptr<mllm_int8_t>(), indices.ptr<mllm_int32_t>(), outer_size, axis_size, inner_size, dim,
+                                descending);
+      break;
+    }
+    default: NYI("Unsupported data type for ArgsortOp");
+  }
+}
+__MLLM_UNSAFE_OPT_END
+
+}  // namespace mllm::cpu
diff --git a/mllm/backends/cpu/ops/ArgsortOp.hpp b/mllm/backends/cpu/ops/ArgsortOp.hpp
new file mode 100644
index 00000000..3649b8bc
--- /dev/null
+++ b/mllm/backends/cpu/ops/ArgsortOp.hpp
@@ -0,0 +1,24 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/core/aops/ArgsortOp.hpp"
+
+namespace mllm::cpu {
+
+class CPUArgsortOp final : public aops::ArgsortOp {
+ public:
+  explicit CPUArgsortOp(const aops::ArgsortOpOptions& options);
+
+  void forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+};
+
+class CPUArgsortOpFactory final : public TypedOpFactory<OpTypes::kArgsort, aops::ArgsortOpOptions> {
+ public:
+  std::shared_ptr<BaseOp> createOpImpl(const aops::ArgsortOpOptions& options) override {
+    return std::make_shared<CPUArgsortOp>(options);
+  }
+};
+
+}  // namespace mllm::cpu
\ No newline at end of file
diff --git a/mllm/backends/cpu/ops/Conv2DOp.cpp b/mllm/backends/cpu/ops/Conv2DOp.cpp
new file mode 100644
index 00000000..5770afb3
--- /dev/null
+++ b/mllm/backends/cpu/ops/Conv2DOp.cpp
@@ -0,0 +1,178 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include <cstring>
+#include "mllm/core/aops/MatMulOp.hpp"
+#include "mllm/backends/cpu/ops/Conv2DOp.hpp"
+#include "mllm/backends/cpu/kernels/Kernels.hpp"
+
+namespace mllm::cpu {
+
+CPUConv2DOp::CPUConv2DOp(const aops::Conv2DOpOptions& options) : aops::Conv2DOp(options) {}
+
+void CPUConv2DOp::load(const ParameterFile::ptr_t& ploader) {
+  switch (ploader->version()) {
+    case ModelFileVersion::kV1: {
+      weight_ = ploader->pull(getName() + ".weight");
+      if (options_.bias) { bias_ = ploader->pull(getName() + ".bias"); }
+      weight_ = weight_.view({
+          options_.out_channels,
+          options_.in_channels,
+          options_.kernel_size[0],
+          options_.kernel_size[1],
+      });
+      if (options_.bias) { bias_ = bias_.view({options_.out_channels}); }
+      break;
+    }
+    case ModelFileVersion::kUserTemporary:
+    case ModelFileVersion::kV2: {
+      weight_ = ploader->pull(getName() + ".weight");
+      if (options_.bias) { bias_ = ploader->pull(getName() + ".bias"); }
+      break;
+    }
+    default: NYI("Unsupported model file version")
+  }
+
+  auto& kernel_size = options_.kernel_size;
+  auto& stride = options_.stride;
+  auto& padding = options_.padding;
+  auto& dilation = options_.dilation;
+
+  // Pack data
+  switch (options_.impl_type) {
+    case aops::Conv2DOpImplType::kDefault: {
+      // We will do im2col algorithm when using default impl. We will packing weight here.
+      MLLM_INFO("Packing Conv2D weight to im2col format. kh={}, kw={}, pw={}, ph={}, dw={}, dh={}, sw={}, sh={}",
+                kernel_size[0], kernel_size[1], padding[0], padding[1], dilation[0], dilation[1], stride[0], stride[1]);
+      auto packed_weight = Tensor::empty(
+                               {
+                                   options_.out_channels,
+                                   options_.in_channels * kernel_size[0] * kernel_size[1],
+
+                               },
+                               weight_.dtype(), weight_.device())
+                               .alloc();
+#if defined(MLLM_HOST_ARCH_ARM64) || defined(MLLM_HOST_ARCH_ARM)
+      arm::conv2d_fp32_im2col_weight(weight_.ptr<mllm_fp32_t>(), packed_weight.ptr<mllm_fp32_t>(), options_.out_channels,
+                                     options_.in_channels, kernel_size[0], kernel_size[1]);
+#else
+      MLLM_ERROR_EXIT(ExitCode::kCoreError, "Unsupported architecture for packing conv2d weight into im2col format.");
+#endif
+      weight_ = packed_weight;
+      break;
+    }
+    default: {
+      NYI("Unsupported impl type")
+    }
+  }
+}
+
+void CPUConv2DOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  auto& input = inputs[0];
+  auto& output = outputs[0];
+  auto& kernel_size = options_.kernel_size;
+  auto& stride = options_.stride;
+  auto& padding = options_.padding;
+  auto& dilation = options_.dilation;
+
+  MLLM_RT_ASSERT_EQ(input.rank(), 4);
+  MLLM_RT_ASSERT_EQ(output.rank(), 4);
+  auto batch_size = input.size(0);
+  auto _1 = input.size(1);
+  auto _2 = input.size(2);
+  auto _3 = input.size(3);
+  auto _out_1 = output.size(1);
+  auto _out_2 = output.size(2);
+  auto _out_3 = output.size(3);
+
+  switch (input.dtype()) {
+    case kFloat32: {
+      switch (options_.impl_type) {
+        case aops::Conv2DOpImplType::kDefault: {
+          // Weight is M x K  (out_channels x (in_channels * kernel_h * kernel_w))
+          // Input is K x N  ((in_channels * kernel_h * kernel_w) x (out_h * out_w))
+          // Output is M x N  (out_channels x (out_h * out_w))
+
+          auto mt = aops::MatMulOpType::kDefault;
+          if (mt == aops::MatMulOpType::kDefault) {
+#if defined(MLLM_USE_BLAS)
+            mt = aops::MatMulOpType::kBLAS;
+#else
+            mt = aops::MatMulOpType::kMllmBlas;
+#endif
+          }
+          int MATMUL_M = options_.out_channels;
+          int MATMUL_K = options_.in_channels * kernel_size[0] * kernel_size[1];
+          int MATMUL_N = output.shape()[2] * output.shape()[3];
+
+          // step 1. im2col inputs to tmp
+          auto packed_inputs = Tensor::empty({MATMUL_K, MATMUL_N}, input.dtype(), input.device()).alloc();
+
+          for (int _b_idx = 0; _b_idx < batch_size; ++_b_idx) {
+#if defined(MLLM_HOST_ARCH_ARM64) || defined(MLLM_HOST_ARCH_ARM)
+
+            arm::conv2d_fp32_im2col_input(input.ptr<mllm_fp32_t>() + _b_idx * (_1 * _2 * _3), options_.in_channels,
+                                          input.shape()[2], input.shape()[3], kernel_size[0], kernel_size[1], padding[0],
+                                          padding[1], stride[0], stride[1], dilation[0], dilation[1],
+                                          packed_inputs.ptr<mllm_fp32_t>());
+            // step 2. Do matmul
+            switch (mt) {  // NOLINT
+              case aops::MatMulOpType::kBLAS: {
+#if defined(MLLM_USE_BLAS)
+                blas::matmul_fp32(weight_.ptr<mllm_fp32_t>(), packed_inputs.ptr<mllm_fp32_t>(),
+                                  output.ptr<mllm_fp32_t>() + _b_idx * (_out_1 * _out_2 * _out_3), nullptr, MATMUL_M, MATMUL_N,
+                                  MATMUL_K, false, false);
+
+                // Add Bias
+                if (options_.bias) {
+                  auto out_ptr = output.ptr<mllm_fp32_t>() + _b_idx * (_out_1 * _out_2 * _out_3);
+                  const auto bias_ptr = bias_.ptr<mllm_fp32_t>();
+                  for (int m = 0; m < MATMUL_M; ++m) {
+                    const float b = bias_ptr[m];
+                    for (int n = 0; n < MATMUL_N; ++n) { out_ptr[m * MATMUL_N + n] += b; }
+                  }
+                }
+#else
+                NYI("BLAS not supported. Pls set MLLM_USE_BLAS=ON to enable BLAS supports in cmake.");
+#endif
+                break;
+              }
+              case aops::MatMulOpType::kMllmBlas: {
+                auto thread_count = options_.getThreads();
+#if defined(MLLM_HOST_ARCH_ARM64) || defined(MLLM_HOST_ARCH_ARM)
+                arm::mllm_blas_matmul_fp32(
+                    MATMUL_M, MATMUL_K, MATMUL_N, output.ptr<mllm_fp32_t>() + _b_idx * (_out_1 * _out_2 * _out_3),
+                    weight_.ptr<mllm_fp32_t>(), packed_inputs.ptr<mllm_fp32_t>(), nullptr, false, false, thread_count);
+                // Add Bias
+                if (options_.bias) {
+                  auto out_ptr = output.ptr<mllm_fp32_t>() + _b_idx * (_out_1 * _out_2 * _out_3);
+                  const auto bias_ptr = bias_.ptr<mllm_fp32_t>();
+                  for (int m = 0; m < MATMUL_M; ++m) {
+                    const float b = bias_ptr[m];
+                    for (int n = 0; n < MATMUL_N; ++n) { out_ptr[m * MATMUL_N + n] += b; }
+                  }
+                }
+#else
+                NYI("MllmBlas only support MLLM_HOST_ARCH_ARM64 or MLLM_HOST_ARCH_ARM right now.")
+#endif
+                break;
+              }
+              default: {
+                NYI("Unsupported matmul type");
+              }
+            }
+#else
+            MLLM_ERROR_EXIT(ExitCode::kCoreError, "Unsupported architecture for perform im2col conv2d.");
+#endif
+          }
+        }
+      }
+      break;
+    }
+    default: {
+      NYI("Unsupported data type");
+    }
+  }
+}
+
+}  // namespace mllm::cpu
diff --git a/mllm/backends/cpu/ops/Conv2DOp.hpp b/mllm/backends/cpu/ops/Conv2DOp.hpp
new file mode 100644
index 00000000..b005391c
--- /dev/null
+++ b/mllm/backends/cpu/ops/Conv2DOp.hpp
@@ -0,0 +1,27 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/aops/Conv2DOp.hpp"
+
+namespace mllm::cpu {
+
+class CPUConv2DOp final : public aops::Conv2DOp {
+ public:
+  explicit CPUConv2DOp(const aops::Conv2DOpOptions& options);
+
+  void load(const ParameterFile::ptr_t& ploader) override;
+
+  void forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+};
+
+class CPUConv2DOpFactory : public TypedOpFactory<OpTypes::kConv2D, aops::Conv2DOpOptions> {
+ public:
+  std::shared_ptr<BaseOp> createOpImpl(const aops::Conv2DOpOptions& options) override {
+    return std::make_shared<CPUConv2DOp>(options);
+  }
+};
+
+}  // namespace mllm::cpu
diff --git a/mllm/core/aops/ArgSortOp.cpp b/mllm/backends/cpu/ops/EinsumOp.cpp
similarity index 100%
rename from mllm/core/aops/ArgSortOp.cpp
rename to mllm/backends/cpu/ops/EinsumOp.cpp
diff --git a/mllm/core/aops/ArgSortOp.hpp b/mllm/backends/cpu/ops/EinsumOp.hpp
similarity index 100%
rename from mllm/core/aops/ArgSortOp.hpp
rename to mllm/backends/cpu/ops/EinsumOp.hpp
diff --git a/mllm/backends/cpu/ops/InterpolateOp.cpp b/mllm/backends/cpu/ops/InterpolateOp.cpp
new file mode 100644
index 00000000..66fdc3d7
--- /dev/null
+++ b/mllm/backends/cpu/ops/InterpolateOp.cpp
@@ -0,0 +1,518 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include <cstring>
+#include <cmath>
+#include <algorithm>
+#include "mllm/backends/cpu/ops/InterpolateOp.hpp"
+
+namespace mllm::cpu {
+
+CPUInterpolateOp::CPUInterpolateOp(const aops::InterpolateOpOptions& options) : aops::InterpolateOp(options) {}
+
+// Helper function to compute scale factors
+static void compute_scale_factors(const std::vector<int>& input_size, const std::vector<int>& output_size,
+                                  std::vector<float>& scale_factors, bool align_corners) {
+  scale_factors.resize(input_size.size());
+  for (size_t i = 0; i < input_size.size(); ++i) {
+    if (output_size[i] == 1) {
+      scale_factors[i] = 0.0f;
+    } else if (align_corners) {
+      scale_factors[i] = static_cast<float>(input_size[i] - 1) / (output_size[i] - 1);
+    } else {
+      scale_factors[i] = static_cast<float>(input_size[i]) / output_size[i];
+    }
+  }
+}
+
+// Nearest neighbor interpolation for 2D data (4D tensor with NCHW layout)
+template<typename T>
+static void nearest_interpolate_2d(const T* input_data, T* output_data, const std::vector<int>& input_shape,
+                                   const std::vector<int>& output_shape, bool align_corners) {
+  const int batch_size = input_shape[0];
+  const int channels = input_shape[1];
+  const int input_height = input_shape[2];
+  const int input_width = input_shape[3];
+  const int output_height = output_shape[2];
+  const int output_width = output_shape[3];
+
+  std::vector<float> scale_factors;
+  compute_scale_factors({input_height, input_width}, {output_height, output_width}, scale_factors, align_corners);
+
+  const float height_scale = scale_factors[0];
+  const float width_scale = scale_factors[1];
+
+  for (int n = 0; n < batch_size; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int oh = 0; oh < output_height; ++oh) {
+        for (int ow = 0; ow < output_width; ++ow) {
+          // Compute source indices
+          float ih_f = align_corners ? oh * height_scale : (oh + 0.5f) * height_scale - 0.5f;
+          float iw_f = align_corners ? ow * width_scale : (ow + 0.5f) * width_scale - 0.5f;
+
+          // Round to nearest neighbor
+          int ih = std::min(static_cast<int>(std::round(ih_f)), input_height - 1);
+          int iw = std::min(static_cast<int>(std::round(iw_f)), input_width - 1);
+
+          // Handle edge cases
+          ih = std::max(0, ih);
+          iw = std::max(0, iw);
+
+          // Copy value
+          const int input_idx = ((n * channels + c) * input_height + ih) * input_width + iw;
+          const int output_idx = ((n * channels + c) * output_height + oh) * output_width + ow;
+          output_data[output_idx] = input_data[input_idx];
+        }
+      }
+    }
+  }
+}
+
+// Linear interpolation for 1D data (3D tensor with NCL layout)
+template<typename T>
+static void linear_interpolate_1d(const T* input_data, T* output_data, const std::vector<int>& input_shape,
+                                  const std::vector<int>& output_shape, bool align_corners) {
+  const int batch_size = input_shape[0];
+  const int channels = input_shape[1];
+  const int input_length = input_shape[2];
+  const int output_length = output_shape[2];
+
+  std::vector<float> scale_factors;
+  compute_scale_factors({input_length}, {output_length}, scale_factors, align_corners);
+
+  const float length_scale = scale_factors[0];
+
+  for (int n = 0; n < batch_size; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int ol = 0; ol < output_length; ++ol) {
+        // Compute source position
+        float il_f = align_corners ? ol * length_scale : (ol + 0.5f) * length_scale - 0.5f;
+
+        // Get the neighboring indices
+        int il_low = static_cast<int>(std::floor(il_f));
+        int il_high = il_low + 1;
+
+        // Compute weights
+        float w_high = il_f - il_low;
+        float w_low = 1.0f - w_high;
+
+        // Handle boundary conditions
+        il_low = std::max(0, std::min(il_low, input_length - 1));
+        il_high = std::max(0, std::min(il_high, input_length - 1));
+
+        // Compute indices
+        const int input_idx_low = (n * channels + c) * input_length + il_low;
+        const int input_idx_high = (n * channels + c) * input_length + il_high;
+        const int output_idx = (n * channels + c) * output_length + ol;
+
+        // Linear interpolation
+        output_data[output_idx] = static_cast<T>(w_low * input_data[input_idx_low] + w_high * input_data[input_idx_high]);
+      }
+    }
+  }
+}
+
+// Bilinear interpolation for 2D data (4D tensor with NCHW layout)
+template<typename T>
+static void bilinear_interpolate_2d(const T* input_data, T* output_data, const std::vector<int>& input_shape,
+                                    const std::vector<int>& output_shape, bool align_corners) {
+  const int batch_size = input_shape[0];
+  const int channels = input_shape[1];
+  const int input_height = input_shape[2];
+  const int input_width = input_shape[3];
+  const int output_height = output_shape[2];
+  const int output_width = output_shape[3];
+
+  std::vector<float> scale_factors;
+  compute_scale_factors({input_height, input_width}, {output_height, output_width}, scale_factors, align_corners);
+
+  const float height_scale = scale_factors[0];
+  const float width_scale = scale_factors[1];
+
+  for (int n = 0; n < batch_size; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int oh = 0; oh < output_height; ++oh) {
+        for (int ow = 0; ow < output_width; ++ow) {
+          // Compute source position
+          float ih_f = align_corners ? oh * height_scale : (oh + 0.5f) * height_scale - 0.5f;
+          float iw_f = align_corners ? ow * width_scale : (ow + 0.5f) * width_scale - 0.5f;
+
+          // Get the four neighboring pixels
+          int ih_low = static_cast<int>(std::floor(ih_f));
+          int iw_low = static_cast<int>(std::floor(iw_f));
+          int ih_high = ih_low + 1;
+          int iw_high = iw_low + 1;
+
+          // Compute weights
+          float h_weight_high = ih_f - ih_low;
+          float w_weight_high = iw_f - iw_low;
+          float h_weight_low = 1.0f - h_weight_high;
+          float w_weight_low = 1.0f - w_weight_high;
+
+          // Handle boundary conditions
+          ih_low = std::max(0, std::min(ih_low, input_height - 1));
+          ih_high = std::max(0, std::min(ih_high, input_height - 1));
+          iw_low = std::max(0, std::min(iw_low, input_width - 1));
+          iw_high = std::max(0, std::min(iw_high, input_width - 1));
+
+          // Compute indices for the four corners
+          const int idx_top_left = ((n * channels + c) * input_height + ih_low) * input_width + iw_low;
+          const int idx_top_right = ((n * channels + c) * input_height + ih_low) * input_width + iw_high;
+          const int idx_bottom_left = ((n * channels + c) * input_height + ih_high) * input_width + iw_low;
+          const int idx_bottom_right = ((n * channels + c) * input_height + ih_high) * input_width + iw_high;
+
+          // Compute output index
+          const int output_idx = ((n * channels + c) * output_height + oh) * output_width + ow;
+
+          // Bilinear interpolation
+          output_data[output_idx] = static_cast<T>(h_weight_low * w_weight_low * input_data[idx_top_left]
+                                                   + h_weight_low * w_weight_high * input_data[idx_top_right]
+                                                   + h_weight_high * w_weight_low * input_data[idx_bottom_left]
+                                                   + h_weight_high * w_weight_high * input_data[idx_bottom_right]);
+        }
+      }
+    }
+  }
+}
+
+// Bicubic interpolation helper function
+static float cubic_interp1d(float x0, float x1, float x2, float x3, float t) {
+  float a = -0.5f * x0 + 1.5f * x1 - 1.5f * x2 + 0.5f * x3;
+  float b = x0 - 2.5f * x1 + 2.0f * x2 - 0.5f * x3;
+  float c = -0.5f * x0 + 0.5f * x2;
+  float d = x1;
+
+  return ((a * t + b) * t + c) * t + d;
+}
+
+// Bicubic interpolation for 2D data (4D tensor with NCHW layout)
+template<typename T>
+static void bicubic_interpolate_2d(const T* input_data, T* output_data, const std::vector<int>& input_shape,
+                                   const std::vector<int>& output_shape, bool align_corners) {
+  const int batch_size = input_shape[0];
+  const int channels = input_shape[1];
+  const int input_height = input_shape[2];
+  const int input_width = input_shape[3];
+  const int output_height = output_shape[2];
+  const int output_width = output_shape[3];
+
+  std::vector<float> scale_factors;
+  compute_scale_factors({input_height, input_width}, {output_height, output_width}, scale_factors, align_corners);
+
+  const float height_scale = scale_factors[0];
+  const float width_scale = scale_factors[1];
+
+  auto get_value_bounded = [&](int n, int c, int h, int w) -> T {
+    h = std::max(0, std::min(h, input_height - 1));
+    w = std::max(0, std::min(w, input_width - 1));
+    return input_data[((n * channels + c) * input_height + h) * input_width + w];
+  };
+
+  for (int n = 0; n < batch_size; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int oh = 0; oh < output_height; ++oh) {
+        for (int ow = 0; ow < output_width; ++ow) {
+          // Compute source position
+          float ih_f = align_corners ? oh * height_scale : (oh + 0.5f) * height_scale - 0.5f;
+          float iw_f = align_corners ? ow * width_scale : (ow + 0.5f) * width_scale - 0.5f;
+
+          // Get the integer part
+          int ih = static_cast<int>(std::floor(ih_f));
+          int iw = static_cast<int>(std::floor(iw_f));
+
+          // Get fractional part
+          float h_frac = ih_f - ih;
+          float w_frac = iw_f - iw;
+
+          // Compute output index
+          const int output_idx = ((n * channels + c) * output_height + oh) * output_width + ow;
+
+          // Perform bicubic interpolation
+          float coeffs[4];
+
+          // Interpolate along each row
+          for (int i = 0; i < 4; ++i) {
+            float row_values[4];
+            for (int j = 0; j < 4; ++j) { row_values[j] = static_cast<float>(get_value_bounded(n, c, ih + i - 1, iw + j - 1)); }
+            coeffs[i] = cubic_interp1d(row_values[0], row_values[1], row_values[2], row_values[3], w_frac);
+          }
+
+          // Interpolate along column
+          float result = cubic_interp1d(coeffs[0], coeffs[1], coeffs[2], coeffs[3], h_frac);
+
+          // Clamp result to avoid overshoot/undershoot
+          output_data[output_idx] = static_cast<T>(result);
+        }
+      }
+    }
+  }
+}
+
+// Trilinear interpolation for 3D data (5D tensor with NCDHW layout)
+template<typename T>
+static void trilinear_interpolate_3d(const T* input_data, T* output_data, const std::vector<int>& input_shape,
+                                     const std::vector<int>& output_shape, bool align_corners) {
+  const int batch_size = input_shape[0];
+  const int channels = input_shape[1];
+  const int input_depth = input_shape[2];
+  const int input_height = input_shape[3];
+  const int input_width = input_shape[4];
+  const int output_depth = output_shape[2];
+  const int output_height = output_shape[3];
+  const int output_width = output_shape[4];
+
+  std::vector<float> scale_factors;
+  compute_scale_factors({input_depth, input_height, input_width}, {output_depth, output_height, output_width}, scale_factors,
+                        align_corners);
+
+  const float depth_scale = scale_factors[0];
+  const float height_scale = scale_factors[1];
+  const float width_scale = scale_factors[2];
+
+  for (int n = 0; n < batch_size; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int od = 0; od < output_depth; ++od) {
+        for (int oh = 0; oh < output_height; ++oh) {
+          for (int ow = 0; ow < output_width; ++ow) {
+            // Compute source position
+            float id_f = align_corners ? od * depth_scale : (od + 0.5f) * depth_scale - 0.5f;
+            float ih_f = align_corners ? oh * height_scale : (oh + 0.5f) * height_scale - 0.5f;
+            float iw_f = align_corners ? ow * width_scale : (ow + 0.5f) * width_scale - 0.5f;
+
+            // Get the eight neighboring voxels
+            int id_low = static_cast<int>(std::floor(id_f));
+            int ih_low = static_cast<int>(std::floor(ih_f));
+            int iw_low = static_cast<int>(std::floor(iw_f));
+            int id_high = id_low + 1;
+            int ih_high = ih_low + 1;
+            int iw_high = iw_low + 1;
+
+            // Compute weights
+            float d_weight_high = id_f - id_low;
+            float h_weight_high = ih_f - ih_low;
+            float w_weight_high = iw_f - iw_low;
+            float d_weight_low = 1.0f - d_weight_high;
+            float h_weight_low = 1.0f - h_weight_high;
+            float w_weight_low = 1.0f - w_weight_high;
+
+            // Handle boundary conditions
+            id_low = std::max(0, std::min(id_low, input_depth - 1));
+            id_high = std::max(0, std::min(id_high, input_depth - 1));
+            ih_low = std::max(0, std::min(ih_low, input_height - 1));
+            ih_high = std::max(0, std::min(ih_high, input_height - 1));
+            iw_low = std::max(0, std::min(iw_low, input_width - 1));
+            iw_high = std::max(0, std::min(iw_high, input_width - 1));
+
+            // Compute indices for the eight corners
+            const int idx_d0_h0_w0 =
+                (((n * channels + c) * input_depth + id_low) * input_height + ih_low) * input_width + iw_low;
+            const int idx_d0_h0_w1 =
+                (((n * channels + c) * input_depth + id_low) * input_height + ih_low) * input_width + iw_high;
+            const int idx_d0_h1_w0 =
+                (((n * channels + c) * input_depth + id_low) * input_height + ih_high) * input_width + iw_low;
+            const int idx_d0_h1_w1 =
+                (((n * channels + c) * input_depth + id_low) * input_height + ih_high) * input_width + iw_high;
+            const int idx_d1_h0_w0 =
+                (((n * channels + c) * input_depth + id_high) * input_height + ih_low) * input_width + iw_low;
+            const int idx_d1_h0_w1 =
+                (((n * channels + c) * input_depth + id_high) * input_height + ih_low) * input_width + iw_high;
+            const int idx_d1_h1_w0 =
+                (((n * channels + c) * input_depth + id_high) * input_height + ih_high) * input_width + iw_low;
+            const int idx_d1_h1_w1 =
+                (((n * channels + c) * input_depth + id_high) * input_height + ih_high) * input_width + iw_high;
+
+            // Compute output index
+            const int output_idx = (((n * channels + c) * output_depth + od) * output_height + oh) * output_width + ow;
+
+            // Trilinear interpolation
+            output_data[output_idx] =
+                static_cast<T>(d_weight_low * h_weight_low * w_weight_low * input_data[idx_d0_h0_w0]
+                               + d_weight_low * h_weight_low * w_weight_high * input_data[idx_d0_h0_w1]
+                               + d_weight_low * h_weight_high * w_weight_low * input_data[idx_d0_h1_w0]
+                               + d_weight_low * h_weight_high * w_weight_high * input_data[idx_d0_h1_w1]
+                               + d_weight_high * h_weight_low * w_weight_low * input_data[idx_d1_h0_w0]
+                               + d_weight_high * h_weight_low * w_weight_high * input_data[idx_d1_h0_w1]
+                               + d_weight_high * h_weight_high * w_weight_low * input_data[idx_d1_h1_w0]
+                               + d_weight_high * h_weight_high * w_weight_high * input_data[idx_d1_h1_w1]);
+          }
+        }
+      }
+    }
+  }
+}
+
+void CPUInterpolateOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  const auto& X = inputs[0];
+  auto& Y = outputs[0];
+
+  // Get shapes
+  const auto& input_shape = X.shape();
+  const auto& output_shape = Y.shape();
+  const int input_dim = static_cast<int>(input_shape.size());
+
+  // Get options
+  const auto& mode = options().mode;
+  const bool align_corners = options().align_corners;
+  const bool antialias = options().antialias;
+
+  // Allocate output tensor if not already allocated
+  if (Y.isNil() || Y.numel() == 0) { Y = Tensor::empty(output_shape, X.dtype(), X.device()).alloc(); }
+
+  switch (X.dtype()) {
+    case kFloat32: {
+      const float* input_data = X.ptr<float>();
+      float* output_data = Y.ptr<float>();
+
+      // Gaussian blur utility (separable) for NCHW
+      auto gaussian_blur_nchw = [&](const float* src, int N, int C, int H, int W, float sigma) {
+        int radius = std::max(1, static_cast<int>(std::ceil(3.f * sigma)));
+        std::vector<float> kernel(2 * radius + 1);
+        float sumw = 0.f;
+        for (int i = -radius; i <= radius; ++i) {
+          float w = std::exp(-(i * i) / (2.f * sigma * sigma));
+          kernel[i + radius] = w;
+          sumw += w;
+        }
+        for (float& w : kernel) { w /= sumw; }
+
+        size_t numel = static_cast<size_t>(N) * C * H * W;
+        std::vector<float> tmp(numel, 0.f);
+        std::vector<float> dst(numel, 0.f);
+
+        // Horizontal pass
+        for (int n = 0; n < N; ++n) {
+          for (int c = 0; c < C; ++c) {
+            for (int h = 0; h < H; ++h) {
+              for (int w = 0; w < W; ++w) {
+                float acc = 0.f;
+                for (int k = -radius; k <= radius; ++k) {
+                  int x = std::min(std::max(w + k, 0), W - 1);
+                  size_t idx = ((static_cast<size_t>(n) * C + c) * H + h) * W + x;
+                  acc += kernel[k + radius] * src[idx];
+                }
+                size_t oidx = ((static_cast<size_t>(n) * C + c) * H + h) * W + w;
+                tmp[oidx] = acc;
+              }
+            }
+          }
+        }
+        // Vertical pass
+        for (int n = 0; n < N; ++n) {
+          for (int c = 0; c < C; ++c) {
+            for (int h = 0; h < H; ++h) {
+              for (int w = 0; w < W; ++w) {
+                float acc = 0.f;
+                for (int k = -radius; k <= radius; ++k) {
+                  int y = std::min(std::max(h + k, 0), H - 1);
+                  size_t idx = ((static_cast<size_t>(n) * C + c) * H + y) * W + w;
+                  acc += kernel[k + radius] * tmp[idx];
+                }
+                size_t oidx = ((static_cast<size_t>(n) * C + c) * H + h) * W + w;
+                dst[oidx] = acc;
+              }
+            }
+          }
+        }
+        return dst;
+      };
+
+      // Choose interpolation method based on mode and input dimensions
+      if (mode == aops::InterpolateOpMode::kNearest) {
+        if (input_dim == 3) {  // NCL format
+          nearest_interpolate_2d<float>(input_data, output_data, {input_shape[0], input_shape[1], 1, input_shape[2]},
+                                        {output_shape[0], output_shape[1], 1, output_shape[2]}, align_corners);
+        } else if (input_dim == 4) {  // NCHW format
+          nearest_interpolate_2d<float>(input_data, output_data, input_shape, output_shape, align_corners);
+        } else if (input_dim == 5) {  // NCDHW format
+          // For 3D data, we handle each depth slice separately using 2D nearest neighbor
+          const int batch_size = input_shape[0];
+          const int channels = input_shape[1];
+          const int input_depth = input_shape[2];
+          const int output_depth = output_shape[2];
+
+          std::vector<float> scale_factors;
+          compute_scale_factors({input_depth}, {output_depth}, scale_factors, align_corners);
+          const float depth_scale = scale_factors[0];
+
+          for (int od = 0; od < output_depth; ++od) {
+            // Compute source depth index
+            float id_f = align_corners ? od * depth_scale : (od + 0.5f) * depth_scale - 0.5f;
+            int id = std::min(static_cast<int>(std::round(id_f)), input_depth - 1);
+            id = std::max(0, id);
+
+            // Process each 2D slice
+            for (int n = 0; n < batch_size; ++n) {
+              for (int c = 0; c < channels; ++c) {
+                const float* input_slice =
+                    input_data + (((n * channels + c) * input_depth + id) * input_shape[3] * input_shape[4]);
+                float* output_slice =
+                    output_data + (((n * channels + c) * output_depth + od) * output_shape[3] * output_shape[4]);
+
+                nearest_interpolate_2d<float>(input_slice, output_slice, {1, 1, input_shape[3], input_shape[4]},
+                                              {1, 1, output_shape[3], output_shape[4]}, align_corners);
+              }
+            }
+          }
+        } else {
+          NYI("CPUInterpolateOp::forward nearest mode not support input dim {}", input_dim);
+        }
+      } else if (mode == aops::InterpolateOpMode::kLinear) {
+        if (input_dim == 3) {  // NCL format
+          linear_interpolate_1d<float>(input_data, output_data, input_shape, output_shape, align_corners);
+        } else {
+          NYI("CPUInterpolateOp::forward linear mode only supports 3D input (NCL format)");
+        }
+      } else if (mode == aops::InterpolateOpMode::kBilinear) {
+        if (input_dim == 4) {  // NCHW format
+          // Antialias for downsampling
+          std::vector<float> scale_factors;
+          compute_scale_factors({input_shape[2], input_shape[3]}, {output_shape[2], output_shape[3]}, scale_factors,
+                                align_corners);
+          const float h_scale = scale_factors[0];
+          const float w_scale = scale_factors[1];
+          const float* src_ptr = input_data;
+          std::vector<float> blurred;
+          if (antialias && (h_scale > 1.f || w_scale > 1.f)) {
+            float sigma = 0.5f * std::max(h_scale, w_scale);
+            blurred = gaussian_blur_nchw(input_data, input_shape[0], input_shape[1], input_shape[2], input_shape[3], sigma);
+            src_ptr = blurred.data();
+          }
+          bilinear_interpolate_2d<float>(src_ptr, output_data, input_shape, output_shape, align_corners);
+        } else {
+          NYI("CPUInterpolateOp::forward bilinear mode only supports 4D input (NCHW format)");
+        }
+      } else if (mode == aops::InterpolateOpMode::kBicubic) {
+        if (input_dim == 4) {  // NCHW format
+          // Antialias for downsampling
+          std::vector<float> scale_factors;
+          compute_scale_factors({input_shape[2], input_shape[3]}, {output_shape[2], output_shape[3]}, scale_factors,
+                                align_corners);
+          const float h_scale = scale_factors[0];
+          const float w_scale = scale_factors[1];
+          const float* src_ptr = input_data;
+          std::vector<float> blurred;
+          if (antialias && (h_scale > 1.f || w_scale > 1.f)) {
+            float sigma = 0.5f * std::max(h_scale, w_scale);
+            blurred = gaussian_blur_nchw(input_data, input_shape[0], input_shape[1], input_shape[2], input_shape[3], sigma);
+            src_ptr = blurred.data();
+          }
+          bicubic_interpolate_2d<float>(src_ptr, output_data, input_shape, output_shape, align_corners);
+        } else {
+          NYI("CPUInterpolateOp::forward bicubic mode only supports 4D input (NCHW format)");
+        }
+      } else if (mode == aops::InterpolateOpMode::kTrilinear) {
+        if (input_dim == 5) {  // NCDHW format
+          trilinear_interpolate_3d<float>(input_data, output_data, input_shape, output_shape, align_corners);
+        } else {
+          NYI("CPUInterpolateOp::forward trilinear mode only supports 5D input (NCDHW format)");
+        }
+      } else {
+        NYI("CPUInterpolateOp::forward unknown interpolation mode");
+      }
+      break;
+    }
+    default: NYI("CPUInterpolateOp::forward not support dtype {}", nameOfType(X.dtype())); break;
+  }
+}
+
+}  // namespace mllm::cpu
diff --git a/mllm/backends/cpu/ops/InterpolateOp.hpp b/mllm/backends/cpu/ops/InterpolateOp.hpp
new file mode 100644
index 00000000..24940eb2
--- /dev/null
+++ b/mllm/backends/cpu/ops/InterpolateOp.hpp
@@ -0,0 +1,25 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/aops/InterpolateOp.hpp"
+
+namespace mllm::cpu {
+
+class CPUInterpolateOp final : public aops::InterpolateOp {
+ public:
+  explicit CPUInterpolateOp(const aops::InterpolateOpOptions& options);
+
+  void forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+};
+
+class CPUInterpolateOpFactory : public TypedOpFactory<OpTypes::kInterpolate, aops::InterpolateOpOptions> {
+ public:
+  std::shared_ptr<BaseOp> createOpImpl(const aops::InterpolateOpOptions& options) override {
+    return std::make_shared<CPUInterpolateOp>(options);
+  }
+};
+
+}  // namespace mllm::cpu
diff --git a/mllm/backends/cpu/ops/LayerNorm2DOp.cpp b/mllm/backends/cpu/ops/LayerNorm2DOp.cpp
new file mode 100644
index 00000000..6d9e9ebb
--- /dev/null
+++ b/mllm/backends/cpu/ops/LayerNorm2DOp.cpp
@@ -0,0 +1,38 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include <cstring>
+#include "mllm/backends/cpu/ops/LayerNorm2DOp.hpp"
+#include "mllm/backends/cpu/kernels/Kernels.hpp"
+
+namespace mllm::cpu {
+
+CPULayerNorm2DOp::CPULayerNorm2DOp(const aops::LayerNorm2DOpOptions& options) : aops::LayerNorm2DOp(options) {}
+
+void CPULayerNorm2DOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  auto& i = inputs[0];
+  auto& o = outputs[0];
+
+  auto i_shape = i.shape();
+  auto N = i_shape[0];
+  auto C = i_shape[1];
+  auto H = i_shape[2];
+  auto W = i_shape[3];
+
+  switch (i.dtype()) {
+    case kFloat32: {
+#if defined(MLLM_HOST_ARCH_X86_64) || defined(MLLM_HOST_ARCH_X86)
+      NYI("Not impl for x86 64");
+#elif defined(MLLM_HOST_ARCH_ARM64) || defined(MLLM_HOST_ARCH_ARM)
+      arm::layernorm2d_fp32(i.ptr<mllm_fp32_t>(), weight_.ptr<mllm_fp32_t>(), bias_.ptr<mllm_fp32_t>(), o.ptr<mllm_fp32_t>(), N,
+                            C, H, W, options_.eps);
+#endif
+      break;
+    }
+    default: {
+      NYI("Not support data type");
+    }
+  }
+}
+
+}  // namespace mllm::cpu
diff --git a/mllm/backends/cpu/ops/LayerNorm2DOp.hpp b/mllm/backends/cpu/ops/LayerNorm2DOp.hpp
new file mode 100644
index 00000000..b6d9a917
--- /dev/null
+++ b/mllm/backends/cpu/ops/LayerNorm2DOp.hpp
@@ -0,0 +1,25 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/aops/LayerNorm2DOp.hpp"
+
+namespace mllm::cpu {
+
+class CPULayerNorm2DOp final : public aops::LayerNorm2DOp {
+ public:
+  explicit CPULayerNorm2DOp(const aops::LayerNorm2DOpOptions& options);
+
+  void forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+};
+
+class CPULayerNorm2DOpFactory : public TypedOpFactory<OpTypes::kLayerNorm2D, aops::LayerNorm2DOpOptions> {
+ public:
+  std::shared_ptr<BaseOp> createOpImpl(const aops::LayerNorm2DOpOptions& options) override {
+    return std::make_shared<CPULayerNorm2DOp>(options);
+  }
+};
+
+}  // namespace mllm::cpu
diff --git a/mllm/backends/cpu/ops/MaskedScatterOp.cpp b/mllm/backends/cpu/ops/MaskedScatterOp.cpp
new file mode 100644
index 00000000..e41c568e
--- /dev/null
+++ b/mllm/backends/cpu/ops/MaskedScatterOp.cpp
@@ -0,0 +1,219 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include "mllm/backends/cpu/ops/MaskedScatterOp.hpp"
+#include "mllm/core/Tensor.hpp"
+#include "mllm/core/OpTypes.hpp"
+#include <cstring>
+#include <vector>
+#include <numeric>
+
+namespace mllm::cpu {
+
+CPUMaskedScatterOp::CPUMaskedScatterOp(const aops::MaskedScatterOpOptions& options) : aops::MaskedScatterOp(options) {}
+
+// Helper function to calculate broadcast shape
+std::vector<int32_t> calculateBroadcastShape(const std::vector<int32_t>& shape_a, const std::vector<int32_t>& shape_b) {
+  // Determine the maximum number of dimensions
+  size_t max_ndim = std::max(shape_a.size(), shape_b.size());
+  std::vector<int32_t> broadcast_shape(max_ndim);
+
+  // Pad shapes to the same number of dimensions
+  std::vector<int32_t> padded_a(max_ndim, 1);
+  std::vector<int32_t> padded_b(max_ndim, 1);
+
+  // Copy original shapes to the end (right-aligned)
+  std::copy(shape_a.begin(), shape_a.end(), padded_a.begin() + (max_ndim - shape_a.size()));
+  std::copy(shape_b.begin(), shape_b.end(), padded_b.begin() + (max_ndim - shape_b.size()));
+
+  // Calculate broadcast shape
+  for (size_t i = 0; i < max_ndim; ++i) {
+    if (padded_a[i] == padded_b[i]) {
+      broadcast_shape[i] = padded_a[i];
+    } else if (padded_a[i] == 1) {
+      broadcast_shape[i] = padded_b[i];
+    } else if (padded_b[i] == 1) {
+      broadcast_shape[i] = padded_a[i];
+    } else {
+      // Cannot broadcast, should not happen in valid cases
+      broadcast_shape[i] = std::max(padded_a[i], padded_b[i]);
+    }
+  }
+
+  return broadcast_shape;
+}
+
+// Helper function to calculate strides for broadcasting
+std::vector<int32_t> calculateStrides(const std::vector<int32_t>& shape) {
+  std::vector<int32_t> strides(shape.size(), 1);
+  for (int i = shape.size() - 2; i >= 0; --i) { strides[i] = strides[i + 1] * shape[i + 1]; }
+  return strides;
+}
+
+// Helper function to convert multi-dimensional index to linear index
+int32_t getLinearIndex(const std::vector<int32_t>& indices, const std::vector<int32_t>& strides) {
+  int32_t linear_index = 0;
+  for (size_t i = 0; i < indices.size(); ++i) { linear_index += indices[i] * strides[i]; }
+  return linear_index;
+}
+
+// Helper function to convert linear index to multi-dimensional indices
+std::vector<int32_t> getMultiDimIndices(int32_t linear_index, const std::vector<int32_t>& shape) {
+  std::vector<int32_t> indices(shape.size());
+  for (int i = shape.size() - 1; i >= 0; --i) {
+    indices[i] = linear_index % shape[i];
+    linear_index /= shape[i];
+  }
+  return indices;
+}
+
+void CPUMaskedScatterOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  // dst, mask, src
+  auto& dst = inputs[0];
+  auto& mask = inputs[1];
+  auto& src = inputs[2];
+
+  MLLM_RT_ASSERT_EQ(mask.dtype(), kInt8);
+
+  // dst and output should be the same tensor (in-place operation)
+  // But we still need to ensure output has the correct shape
+  auto& output = outputs[0];
+
+  // Get shapes
+  auto dst_shape = dst.shape();
+  auto mask_shape = mask.shape();
+  auto src_shape = src.shape();
+
+  // Calculate broadcast shape for all tensors
+  auto broadcast_shape = calculateBroadcastShape(dst_shape, mask_shape);
+  broadcast_shape = calculateBroadcastShape(broadcast_shape, src_shape);
+
+  // Calculate strides for broadcasting
+  auto dst_strides = calculateStrides(dst_shape);
+  auto mask_strides = calculateStrides(mask_shape);
+  auto src_strides = calculateStrides(src_shape);
+  auto broadcast_strides = calculateStrides(broadcast_shape);
+
+  // Calculate total elements
+  int32_t total_elements = std::accumulate(broadcast_shape.begin(), broadcast_shape.end(), 1, std::multiplies<>());
+
+  // Check data types
+  MLLM_RT_ASSERT(dst.dtype() == src.dtype());
+
+  // Handle different data types
+  if (dst.dtype() == MLLM_TYPE_F32) {
+    float* dst_ptr = dst.ptr<float>();
+    float* src_ptr = src.ptr<float>();
+    uint8_t* mask_ptr = mask.ptr<uint8_t>();
+
+    for (int32_t i = 0; i < total_elements; ++i) {
+      // Convert linear index to multi-dimensional indices
+      auto indices = getMultiDimIndices(i, broadcast_shape);
+
+      // Calculate index for mask with broadcasting
+      int32_t mask_linear_index = 0;
+      if (mask_shape.size() == 1 && mask_shape[0] == 1) {
+        // Scalar mask
+        mask_linear_index = 0;
+      } else {
+        // Multi-dimensional mask
+        std::vector<int32_t> mask_indices(mask_shape.size());
+        int offset = broadcast_shape.size() - mask_shape.size();
+        for (size_t j = 0; j < mask_shape.size(); ++j) { mask_indices[j] = indices[j + offset] % mask_shape[j]; }
+        mask_linear_index = getLinearIndex(mask_indices, mask_strides);
+      }
+
+      // If mask is true, copy from src to dst
+      if (mask_ptr[mask_linear_index] != 0) {
+        // Calculate index for dst
+        int32_t dst_linear_index = 0;
+        if (dst_shape.size() == 1 && dst_shape[0] == 1) {
+          // Scalar dst
+          dst_linear_index = 0;
+        } else {
+          // Multi-dimensional dst
+          std::vector<int32_t> dst_indices(dst_shape.size());
+          int offset = broadcast_shape.size() - dst_shape.size();
+          for (size_t j = 0; j < dst_shape.size(); ++j) { dst_indices[j] = indices[j + offset] % dst_shape[j]; }
+          dst_linear_index = getLinearIndex(dst_indices, dst_strides);
+        }
+
+        // Calculate index for src with broadcasting
+        int32_t src_linear_index = 0;
+        if (src_shape.size() == 1 && src_shape[0] == 1) {
+          // Scalar src
+          src_linear_index = 0;
+        } else {
+          // Multi-dimensional src
+          std::vector<int32_t> src_indices(src_shape.size());
+          int offset = broadcast_shape.size() - src_shape.size();
+          for (size_t j = 0; j < src_shape.size(); ++j) { src_indices[j] = indices[j + offset] % src_shape[j]; }
+          src_linear_index = getLinearIndex(src_indices, src_strides);
+        }
+
+        dst_ptr[dst_linear_index] = src_ptr[src_linear_index];
+      }
+    }
+  } else if (dst.dtype() == MLLM_TYPE_F16) {
+    mllm_fp16_t* dst_ptr = dst.ptr<mllm_fp16_t>();
+    mllm_fp16_t* src_ptr = src.ptr<mllm_fp16_t>();
+    uint8_t* mask_ptr = mask.ptr<uint8_t>();
+
+    for (int32_t i = 0; i < total_elements; ++i) {
+      // Convert linear index to multi-dimensional indices
+      auto indices = getMultiDimIndices(i, broadcast_shape);
+
+      // Calculate index for mask with broadcasting
+      int32_t mask_linear_index = 0;
+      if (mask_shape.size() == 1 && mask_shape[0] == 1) {
+        // Scalar mask
+        mask_linear_index = 0;
+      } else {
+        // Multi-dimensional mask
+        std::vector<int32_t> mask_indices(mask_shape.size());
+        int offset = broadcast_shape.size() - mask_shape.size();
+        for (size_t j = 0; j < mask_shape.size(); ++j) { mask_indices[j] = indices[j + offset] % mask_shape[j]; }
+        mask_linear_index = getLinearIndex(mask_indices, mask_strides);
+      }
+
+      // If mask is true, copy from src to dst
+      if (mask_ptr[mask_linear_index] != 0) {
+        // Calculate index for dst
+        int32_t dst_linear_index = 0;
+        if (dst_shape.size() == 1 && dst_shape[0] == 1) {
+          // Scalar dst
+          dst_linear_index = 0;
+        } else {
+          // Multi-dimensional dst
+          std::vector<int32_t> dst_indices(dst_shape.size());
+          int offset = broadcast_shape.size() - dst_shape.size();
+          for (size_t j = 0; j < dst_shape.size(); ++j) { dst_indices[j] = indices[j + offset] % dst_shape[j]; }
+          dst_linear_index = getLinearIndex(dst_indices, dst_strides);
+        }
+
+        // Calculate index for src with broadcasting
+        int32_t src_linear_index = 0;
+        if (src_shape.size() == 1 && src_shape[0] == 1) {
+          // Scalar src
+          src_linear_index = 0;
+        } else {
+          // Multi-dimensional src
+          std::vector<int32_t> src_indices(src_shape.size());
+          int offset = broadcast_shape.size() - src_shape.size();
+          for (size_t j = 0; j < src_shape.size(); ++j) { src_indices[j] = indices[j + offset] % src_shape[j]; }
+          src_linear_index = getLinearIndex(src_indices, src_strides);
+        }
+
+        dst_ptr[dst_linear_index] = src_ptr[src_linear_index];
+      }
+    }
+  } else {
+    // For other data types, we could add support similarly
+    NYI("Unsupported data type for MaskedScatter operation");
+  }
+
+  // Copy result to output
+  if (output.ptr<void>() != dst.ptr<void>()) { std::memcpy(output.ptr<void>(), dst.ptr<void>(), dst.bytes()); }
+}
+
+}  // namespace mllm::cpu
diff --git a/mllm/backends/cpu/ops/MaskedScatterOp.hpp b/mllm/backends/cpu/ops/MaskedScatterOp.hpp
new file mode 100644
index 00000000..d61fa90b
--- /dev/null
+++ b/mllm/backends/cpu/ops/MaskedScatterOp.hpp
@@ -0,0 +1,25 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/aops/MaskedScatterOp.hpp"
+
+namespace mllm::cpu {
+
+class CPUMaskedScatterOp final : public aops::MaskedScatterOp {
+ public:
+  explicit CPUMaskedScatterOp(const aops::MaskedScatterOpOptions& options);
+
+  void forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+};
+
+class CPUMaskedScatterOpFactory : public TypedOpFactory<OpTypes::kMaskedScatter, aops::MaskedScatterOpOptions> {
+ public:
+  std::shared_ptr<BaseOp> createOpImpl(const aops::MaskedScatterOpOptions& options) override {
+    return std::make_shared<CPUMaskedScatterOp>(options);
+  }
+};
+
+}  // namespace mllm::cpu
diff --git a/mllm/backends/cpu/ops/PadOp.cpp b/mllm/backends/cpu/ops/PadOp.cpp
new file mode 100644
index 00000000..ba363c85
--- /dev/null
+++ b/mllm/backends/cpu/ops/PadOp.cpp
@@ -0,0 +1,139 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include <cstring>
+#include <algorithm>
+#include <cmath>
+#include "mllm/backends/cpu/ops/PadOp.hpp"
+
+namespace mllm::cpu {
+
+CPUPadOp::CPUPadOp(const aops::PadOpOptions& options) : aops::PadOp(options) {}
+
+// Helper: compute output shape from input shape and pad vector (starting from last dimension)
+static std::vector<int32_t> compute_padded_shape(const std::vector<int32_t>& in_shape, const std::vector<int32_t>& pad) {
+  const int D = static_cast<int>(in_shape.size());
+  std::vector<int32_t> out_shape = in_shape;
+  const int pairs = static_cast<int>(pad.size()) / 2;
+  for (int i = 0; i < D; ++i) {
+    int base = 2 * (D - 1 - i);
+    int32_t before = (base < pad.size()) ? pad[base] : 0;
+    int32_t after = (base + 1 < pad.size()) ? pad[base + 1] : 0;
+    out_shape[i] = in_shape[i] + before + after;
+  }
+  return out_shape;
+}
+
+// Helper: reflect index to [0, size-1] without repeating edge (PyTorch-like reflect)
+static inline int32_t reflect_index(int32_t x, int32_t size) {
+  if (size <= 1) return 0;
+  int32_t m = size - 1;
+  // Map x to the range [-(size-1), size-1]
+  int32_t p = std::abs(x);
+  int32_t period = 2 * m;
+  int32_t r = p % period;
+  if (r >= size) { r = period - r; }
+  return r;
+}
+
+// Helper: replicate index (clamp)
+static inline int32_t replicate_index(int32_t x, int32_t size) {
+  if (size <= 1) return 0;
+  return std::max<int32_t>(0, std::min<int32_t>(x, size - 1));
+}
+
+// Helper: circular index (wrap)
+static inline int32_t circular_index(int32_t x, int32_t size) {
+  if (size <= 1) return 0;
+  int32_t r = x % size;
+  if (r < 0) r += size;
+  return r;
+}
+
+void CPUPadOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  const auto& X = inputs[0];
+  auto& Y = outputs[0];
+
+  const auto& in_shape = X.shape();
+  const auto& opts = options();
+  const auto& pad = opts.pad;  // [last_dim_left, last_dim_right, ..., first_dim_left, first_dim_right]
+
+  // Compute output shape and allocate Y if needed
+  std::vector<int32_t> out_shape = Y.isNil() ? compute_padded_shape(in_shape, pad) : Y.shape();
+  if (Y.isNil() || Y.numel() == 0) { Y = Tensor::empty(out_shape, X.dtype(), X.device()).alloc(); }
+
+  // Precompute pad_before/after per dimension in order
+  const int D = static_cast<int>(in_shape.size());
+  std::vector<int32_t> pad_before(D, 0), pad_after(D, 0);
+  for (int i = 0; i < D; ++i) {
+    int base = 2 * (D - 1 - i);
+    if (base < pad.size()) pad_before[i] = pad[base];
+    if (base + 1 < pad.size()) pad_after[i] = pad[base + 1];
+  }
+
+  // Only implement float32 for now
+  switch (X.dtype()) {
+    case kFloat32: {
+      const float* in = X.ptr<float>();
+      float* out = Y.ptr<float>();
+
+      // Compute input/output strides for index mapping
+      std::vector<int64_t> in_stride(D, 1), out_stride(D, 1);
+      for (int i = D - 2; i >= 0; --i) { in_stride[i] = in_stride[i + 1] * in_shape[i + 1]; }
+      const auto& actual_out_shape = Y.shape();
+      for (int i = D - 2; i >= 0; --i) { out_stride[i] = out_stride[i + 1] * actual_out_shape[i + 1]; }
+
+      const int64_t out_numel = Y.numel();
+      const auto mode = opts.mode;
+      const float constant_val = opts.value;
+
+      // Iterate over all output elements
+      for (int64_t idx = 0; idx < out_numel; ++idx) {
+        // Decode idx into coordinates
+        int64_t t = idx;
+        std::vector<int32_t> oc(D, 0);
+        for (int i = 0; i < D; ++i) {
+          int64_t s = out_stride[i];
+          oc[i] = (i == D - 1) ? static_cast<int32_t>(t) : static_cast<int32_t>(t / s);
+          if (i != D - 1) t %= s;
+        }
+
+        // Map to input coordinates
+        bool oob = false;
+        std::vector<int32_t> ic(D, 0);
+        for (int i = 0; i < D; ++i) {
+          int32_t xi = oc[i] - pad_before[i];
+          int32_t s = in_shape[i];
+          switch (mode) {
+            case aops::PadMode::kConstant:
+              if (xi < 0 || xi >= s) {
+                oob = true;
+                ic[i] = 0;
+              } else {
+                ic[i] = xi;
+              }
+              break;
+            case aops::PadMode::kReflect: ic[i] = reflect_index(xi, s); break;
+            case aops::PadMode::kReplicate: ic[i] = replicate_index(xi, s); break;
+            case aops::PadMode::kCircular: ic[i] = circular_index(xi, s); break;
+            default: ic[i] = replicate_index(xi, s); break;
+          }
+        }
+
+        if (mode == aops::PadMode::kConstant && oob) {
+          out[idx] = constant_val;
+        } else {
+          // Compute input linear index and copy value
+          int64_t in_idx = 0;
+          for (int i = 0; i < D; ++i) { in_idx += static_cast<int64_t>(ic[i]) * in_stride[i]; }
+          out[idx] = in[in_idx];
+        }
+      }
+
+      break;
+    }
+    default: NYI("CPUPadOp::forward not support dtype {}", nameOfType(X.dtype())); break;
+  }
+}
+
+}  // namespace mllm::cpu
diff --git a/mllm/backends/cpu/ops/PadOp.hpp b/mllm/backends/cpu/ops/PadOp.hpp
new file mode 100644
index 00000000..637ad54e
--- /dev/null
+++ b/mllm/backends/cpu/ops/PadOp.hpp
@@ -0,0 +1,25 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/aops/PadOp.hpp"
+
+namespace mllm::cpu {
+
+class CPUPadOp final : public aops::PadOp {
+ public:
+  explicit CPUPadOp(const aops::PadOpOptions& options);
+
+  void forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+};
+
+class CPUPadOpFactory : public TypedOpFactory<OpTypes::kPad, aops::PadOpOptions> {
+ public:
+  std::shared_ptr<BaseOp> createOpImpl(const aops::PadOpOptions& options) override {
+    return std::make_shared<CPUPadOp>(options);
+  }
+};
+
+}  // namespace mllm::cpu
diff --git a/mllm/backends/cpu/ops/SoftmaxOp.cpp b/mllm/backends/cpu/ops/SoftmaxOp.cpp
index 09bf01aa..2f412be2 100644
--- a/mllm/backends/cpu/ops/SoftmaxOp.cpp
+++ b/mllm/backends/cpu/ops/SoftmaxOp.cpp
@@ -14,6 +14,49 @@ void CPUSoftmaxOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor
   const auto& X = inputs[0];
   const auto& Y = outputs[0];
 
+  int reduce_dim = options_.axis;
+  if (reduce_dim < 0) { reduce_dim += (int)(X.rank()); }
+
+  // CASE 1. FOR SINGLE DIMENSION CASE, Only for options_.axis == -1
+  if (reduce_dim == X.rank() - 1) {
+    // Get loop dims
+    int loop_dims = X.size(reduce_dim);
+    // Get loop times
+    int loop_times = 1;
+    for (int idx = 0; idx < reduce_dim; ++idx) { loop_times *= X.size(idx); }
+    switch (X.dtype()) {
+      case kFloat32: {
+        MLLM_CONDITIONAL_PARALLEL_FOR(options_.getThreads() > 1, options_.getThreads(), loop_idx, 0, loop_times, 1, {
+#if defined(MLLM_HOST_ARCH_X86_64) || defined(MLLM_HOST_ARCH_X86)
+          x86::softmax_v1_fp32(X.ptr<mllm_fp32_t>() + loop_idx * loop_dims, Y.ptr<mllm_fp32_t>() + +loop_idx * loop_dims,
+                               loop_dims, 1, options_.getThreads());
+#elif defined(MLLM_HOST_ARCH_ARM64) || defined(MLLM_HOST_ARCH_ARM)
+          arm::softmax_v1_fp32(X.ptr<mllm_fp32_t>() + loop_idx * loop_dims, Y.ptr<mllm_fp32_t>() + +loop_idx * loop_dims,
+                               loop_dims, 1, options_.getThreads());
+#endif
+        });
+        break;
+      }
+      case kFloat16: {
+        MLLM_CONDITIONAL_PARALLEL_FOR(options_.getThreads() > 1, options_.getThreads(), loop_idx, 0, loop_times, 1, {
+#if defined(MLLM_HOST_ARCH_X86_64) || defined(MLLM_HOST_ARCH_X86)
+          NYI("CPUSoftmaxOp::forward not support dtype {}", nameOfType(X.dtype()));
+#elif defined(MLLM_HOST_ARCH_ARM64) || defined(MLLM_HOST_ARCH_ARM)
+          arm::softmax_v1_fp16(X.ptr<mllm_fp16_t>() + loop_idx * loop_dims, Y.ptr<mllm_fp16_t>() + +loop_idx * loop_dims,
+                               loop_dims, 1, options_.getThreads());
+#endif
+        });
+        break;
+      }
+      default: {
+        NYI("CPUSoftmaxOp::forward not support dtype {}", nameOfType(X.dtype()));
+        break;
+      }
+    }
+    return;
+  }
+
+  // CASE 2. FOR NORMAL CASE in LLM (BSHD, BHSD, etc)
   MLLM_RT_ASSERT_EQ(X.shape().size(), 4);
   MLLM_RT_ASSERT(options_.axis == -1 || options_.axis == 3);
 
diff --git a/mllm/backends/cpu/ops/StackOp.cpp b/mllm/backends/cpu/ops/StackOp.cpp
new file mode 100644
index 00000000..54a40671
--- /dev/null
+++ b/mllm/backends/cpu/ops/StackOp.cpp
@@ -0,0 +1,117 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include <cstring>
+
+#include "mllm/backends/cpu/ops/StackOp.hpp"
+
+namespace mllm::cpu {
+
+CPUStackOp::CPUStackOp(const aops::StackOpOptions& options) : aops::StackOp(options) {}
+
+void CPUStackOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  bool is_all_contiguous = true;
+  for (auto& input : inputs) { is_all_contiguous &= input.isContiguous(); }
+
+  int stack_dim = options_.dim;
+  const int input_rank = inputs[0].rank();
+  if (stack_dim < 0) { stack_dim += (input_rank + 1); }
+
+  const int N = static_cast<int>(inputs.size());
+
+  if (is_all_contiguous) {
+    // Elements before stack_dim
+    int num_slices = 1;
+    for (int i = 0; i < stack_dim; ++i) { num_slices *= inputs[0].shape()[i]; }
+
+    // Elements after stack_dim in input (inner block size)
+    int inner_size = 1;
+    for (int i = stack_dim; i < input_rank; ++i) { inner_size *= inputs[0].shape()[i]; }
+
+    switch (outputs[0].dtype()) {
+      case kFloat32: {
+        mllm_fp32_t* out_ptr = outputs[0].ptr<mllm_fp32_t>();
+        for (int k = 0; k < N; ++k) {
+          const mllm_fp32_t* in_ptr = inputs[k].ptr<mllm_fp32_t>();
+          for (int slice = 0; slice < num_slices; ++slice) {
+            const mllm_fp32_t* src = in_ptr + slice * inner_size;
+            mllm_fp32_t* dst = out_ptr + (slice * N + k) * inner_size;
+            std::memcpy(dst, src, inner_size * sizeof(mllm_fp32_t));
+          }
+        }
+        break;
+      }
+      case kFloat16: {
+        mllm_fp16_t* out_ptr = outputs[0].ptr<mllm_fp16_t>();
+        for (int k = 0; k < N; ++k) {
+          const mllm_fp16_t* in_ptr = inputs[k].ptr<mllm_fp16_t>();
+          for (int slice = 0; slice < num_slices; ++slice) {
+            const mllm_fp16_t* src = in_ptr + slice * inner_size;
+            mllm_fp16_t* dst = out_ptr + (slice * N + k) * inner_size;
+            std::memcpy(dst, src, inner_size * sizeof(mllm_fp16_t));
+          }
+        }
+        break;
+      }
+      default: NYI("Type not supported in stack op");
+    }
+  } else {
+    MLLM_WARN("Stack op has weak performance for non-contiguous inputs.");
+
+    switch (outputs[0].dtype()) {
+      case kFloat32: {
+        for (int k = 0; k < N; ++k) {
+          auto input = inputs[k];
+          std::vector<int> input_shape = input.shape();
+
+          for (int64_t j = 0; j < input.numel(); ++j) {
+            std::vector<int> input_index(input_shape.size(), 0);
+            int64_t temp = j;
+            for (int d = input_shape.size() - 1; d >= 0; --d) {
+              input_index[d] = temp % input_shape[d];
+              temp /= input_shape[d];
+            }
+
+            std::vector<int> output_index;
+            output_index.reserve(input_shape.size() + 1);
+            for (int d = 0; d < stack_dim; ++d) { output_index.push_back(input_index[d]); }
+            output_index.push_back(k);
+            for (int d = stack_dim; d < input_shape.size(); ++d) { output_index.push_back(input_index[d]); }
+
+            mllm_fp32_t value = input.at<mllm_fp32_t>(input_index);
+            outputs[0].at<mllm_fp32_t>(output_index) = value;
+          }
+        }
+        break;
+      }
+      case kFloat16: {
+        for (int k = 0; k < N; ++k) {
+          auto input = inputs[k];
+          std::vector<int> input_shape = input.shape();
+
+          for (int64_t j = 0; j < input.numel(); ++j) {
+            std::vector<int> input_index(input_shape.size(), 0);
+            int64_t temp = j;
+            for (int d = input_shape.size() - 1; d >= 0; --d) {
+              input_index[d] = temp % input_shape[d];
+              temp /= input_shape[d];
+            }
+
+            std::vector<int> output_index;
+            output_index.reserve(input_shape.size() + 1);
+            for (int d = 0; d < stack_dim; ++d) { output_index.push_back(input_index[d]); }
+            output_index.push_back(k);
+            for (int d = stack_dim; d < input_shape.size(); ++d) { output_index.push_back(input_index[d]); }
+
+            mllm_fp16_t value = input.at<mllm_fp16_t>(input_index);
+            outputs[0].at<mllm_fp16_t>(output_index) = value;
+          }
+        }
+        break;
+      }
+      default: NYI("Type not supported in stack op");
+    }
+  }
+}
+
+}  // namespace mllm::cpu
\ No newline at end of file
diff --git a/mllm/backends/cpu/ops/StackOp.hpp b/mllm/backends/cpu/ops/StackOp.hpp
new file mode 100644
index 00000000..cce1f84c
--- /dev/null
+++ b/mllm/backends/cpu/ops/StackOp.hpp
@@ -0,0 +1,25 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/aops/StackOp.hpp"
+
+namespace mllm::cpu {
+
+class CPUStackOp final : public aops::StackOp {
+ public:
+  explicit CPUStackOp(const aops::StackOpOptions& options);
+
+  void forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+};
+
+class CPUStackOpFactory : public TypedOpFactory<OpTypes::kStack, aops::StackOpOptions> {
+ public:
+  std::shared_ptr<BaseOp> createOpImpl(const aops::StackOpOptions& options) override {
+    return std::make_shared<CPUStackOp>(options);
+  }
+};
+
+}  // namespace mllm::cpu
\ No newline at end of file
diff --git a/mllm/compile/ir/GeneratedRTTIKind.hpp b/mllm/compile/ir/GeneratedRTTIKind.hpp
index 0ec14089..f097306f 100644
--- a/mllm/compile/ir/GeneratedRTTIKind.hpp
+++ b/mllm/compile/ir/GeneratedRTTIKind.hpp
@@ -1,4 +1,4 @@
-// Auto generated: 2025-09-09 15:27:37
+// Auto generated: 2025-10-27 11:05:42
 // do not modify this file
 #pragma once
 
@@ -71,6 +71,15 @@ enum NodeKind : uint32_t {
   RK_Op_LinalgIROp_SinOp,
   RK_Op_LinalgIROp_CosOp,
   RK_Op_LinalgIROp_PagedAttnOp,
+  RK_Op_LinalgIROp_LayerNorm2DOp,
+  RK_Op_LinalgIROp_PadOp,
+  RK_Op_LinalgIROp_InterpolateOp,
+  RK_Op_LinalgIROp_EinsumOp,
+  RK_Op_LinalgIROp_StackOp,
+  RK_Op_LinalgIROp_MaskedScatterOp,
+  RK_Op_LinalgIROp_ScatterOp,
+  RK_Op_LinalgIROp_GatherOp,
+  RK_Op_LinalgIROp_ArgsortOp,
   RK_Op_LinalgIROp_Last,
   RK_Op_GraphIROp,
   RK_Op_GraphIROp_SubGraphOp,
diff --git a/mllm/compile/ir/NodeRTTIClassOfImpl.hpp b/mllm/compile/ir/NodeRTTIClassOfImpl.hpp
index 2e3a4e85..cd750459 100644
--- a/mllm/compile/ir/NodeRTTIClassOfImpl.hpp
+++ b/mllm/compile/ir/NodeRTTIClassOfImpl.hpp
@@ -1,4 +1,4 @@
-// Auto generated: 2025-09-09 15:27:37
+// Auto generated: 2025-10-27 11:05:42
 // do not modify this file
 #pragma once
 namespace mllm::ir {
@@ -183,6 +183,33 @@ struct NodeRTTIClassOfImpl {
 #define RTTI_RK_OP_LINALGIROP_PAGEDATTNOP_IMPL(v) \
   return (v)->getKind() >= RK_Op_LinalgIROp_PagedAttnOp && (v)->getKind() <= RK_Op_LinalgIROp_PagedAttnOp
 
+#define RTTI_RK_OP_LINALGIROP_LAYERNORM2DOP_IMPL(v) \
+  return (v)->getKind() >= RK_Op_LinalgIROp_LayerNorm2DOp && (v)->getKind() <= RK_Op_LinalgIROp_LayerNorm2DOp
+
+#define RTTI_RK_OP_LINALGIROP_PADOP_IMPL(v) \
+  return (v)->getKind() >= RK_Op_LinalgIROp_PadOp && (v)->getKind() <= RK_Op_LinalgIROp_PadOp
+
+#define RTTI_RK_OP_LINALGIROP_INTERPOLATEOP_IMPL(v) \
+  return (v)->getKind() >= RK_Op_LinalgIROp_InterpolateOp && (v)->getKind() <= RK_Op_LinalgIROp_InterpolateOp
+
+#define RTTI_RK_OP_LINALGIROP_EINSUMOP_IMPL(v) \
+  return (v)->getKind() >= RK_Op_LinalgIROp_EinsumOp && (v)->getKind() <= RK_Op_LinalgIROp_EinsumOp
+
+#define RTTI_RK_OP_LINALGIROP_STACKOP_IMPL(v) \
+  return (v)->getKind() >= RK_Op_LinalgIROp_StackOp && (v)->getKind() <= RK_Op_LinalgIROp_StackOp
+
+#define RTTI_RK_OP_LINALGIROP_MASKEDSCATTEROP_IMPL(v) \
+  return (v)->getKind() >= RK_Op_LinalgIROp_MaskedScatterOp && (v)->getKind() <= RK_Op_LinalgIROp_MaskedScatterOp
+
+#define RTTI_RK_OP_LINALGIROP_SCATTEROP_IMPL(v) \
+  return (v)->getKind() >= RK_Op_LinalgIROp_ScatterOp && (v)->getKind() <= RK_Op_LinalgIROp_ScatterOp
+
+#define RTTI_RK_OP_LINALGIROP_GATHEROP_IMPL(v) \
+  return (v)->getKind() >= RK_Op_LinalgIROp_GatherOp && (v)->getKind() <= RK_Op_LinalgIROp_GatherOp
+
+#define RTTI_RK_OP_LINALGIROP_ARGSORTOP_IMPL(v) \
+  return (v)->getKind() >= RK_Op_LinalgIROp_ArgsortOp && (v)->getKind() <= RK_Op_LinalgIROp_ArgsortOp
+
 #define RTTI_RK_OP_GRAPHIROP_IMPL(v) return (v)->getKind() >= RK_Op_GraphIROp && (v)->getKind() <= RK_Op_GraphIROp_Last
 
 #define RTTI_RK_OP_GRAPHIROP_SUBGRAPHOP_IMPL(v) \
diff --git a/mllm/compile/ir/linalg/Op.cpp b/mllm/compile/ir/linalg/Op.cpp
index 9f550341..68fa7b02 100644
--- a/mllm/compile/ir/linalg/Op.cpp
+++ b/mllm/compile/ir/linalg/Op.cpp
@@ -101,4 +101,15 @@ LINALG_AOPS_DECL(OpTypes::kMean, MeanOp);
 LINALG_AOPS_DECL(OpTypes::kClip, ClipOp);
 LINALG_AOPS_DECL(OpTypes::kPagedAttn, PagedAttnOp);
 
+LINALG_AOPS_DECL(OpTypes::kLayerNorm2D, LayerNorm2DOp);
+LINALG_AOPS_DECL(OpTypes::kPad, PadOp);
+LINALG_AOPS_DECL(OpTypes::kInterpolate, InterpolateOp);
+LINALG_AOPS_DECL(OpTypes::kEinsum, EinsumOp);
+LINALG_AOPS_DECL(OpTypes::kStack, StackOp);
+LINALG_AOPS_DECL(OpTypes::kMaskedScatter, MaskedScatterOp);
+
+LINALG_AOPS_DECL(OpTypes::kScatter, ScatterOp);
+LINALG_AOPS_DECL(OpTypes::kGather, GatherOp);
+LINALG_AOPS_DECL(OpTypes::kArgsort, ArgsortOp);
+
 }  // namespace mllm::ir::linalg
diff --git a/mllm/compile/ir/linalg/Op.hpp b/mllm/compile/ir/linalg/Op.hpp
index e9a33641..6ea04d9a 100644
--- a/mllm/compile/ir/linalg/Op.hpp
+++ b/mllm/compile/ir/linalg/Op.hpp
@@ -64,6 +64,15 @@ class ExpOp;
 class SinOp;
 class CosOp;
 class PagedAttnOp;
+class LayerNorm2DOp;
+class PadOp;
+class InterpolateOp;
+class EinsumOp;
+class StackOp;
+class MaskedScatterOp;
+class ScatterOp;
+class GatherOp;
+class ArgsortOp;
 }  // namespace mllm
 
 #define LINALG_AOPS_DEFINE(class_name, rtti_name)                                                                       \
@@ -213,4 +222,14 @@ LINALG_AOPS_DEFINE(MeanOp, MEANOP);
 LINALG_AOPS_DEFINE(ClipOp, CLIPOP);
 LINALG_AOPS_DEFINE(PagedAttnOp, PAGEDATTNOP);
 
+LINALG_AOPS_DEFINE(LayerNorm2DOp, LAYERNORM2DOP);
+LINALG_AOPS_DEFINE(PadOp, PADOP);
+LINALG_AOPS_DEFINE(InterpolateOp, INTERPOLATEOP);
+LINALG_AOPS_DEFINE(EinsumOp, EINSUMOP);
+LINALG_AOPS_DEFINE(StackOp, STACKOP);
+LINALG_AOPS_DEFINE(MaskedScatterOp, MASKEDSCATTEROP);
+LINALG_AOPS_DEFINE(ScatterOp, SCATTEROP);
+LINALG_AOPS_DEFINE(GatherOp, GATHEROP);
+LINALG_AOPS_DEFINE(ArgsortOp, ARGSORTOP);
+
 }  // namespace mllm::ir::linalg
diff --git a/mllm/compile/ir/rtti_kind_gen.py b/mllm/compile/ir/rtti_kind_gen.py
index 3eeaf5ab..d81d1e8c 100644
--- a/mllm/compile/ir/rtti_kind_gen.py
+++ b/mllm/compile/ir/rtti_kind_gen.py
@@ -272,6 +272,15 @@ def define_lianlg_ir(ir: dict):
     op.derive(Cls("SinOp"))
     op.derive(Cls("CosOp"))
     op.derive(Cls("PagedAttnOp"))
+    op.derive(Cls("LayerNorm2DOp"))
+    op.derive(Cls("PadOp"))
+    op.derive(Cls("InterpolateOp"))
+    op.derive(Cls("EinsumOp"))
+    op.derive(Cls("StackOp"))
+    op.derive(Cls("MaskedScatterOp"))
+    op.derive(Cls("ScatterOp"))
+    op.derive(Cls("GatherOp"))
+    op.derive(Cls("ArgsortOp"))
 
     # value
 
diff --git a/mllm/core/OpTypes.hpp b/mllm/core/OpTypes.hpp
index 4adc215e..d072055b 100644
--- a/mllm/core/OpTypes.hpp
+++ b/mllm/core/OpTypes.hpp
@@ -75,6 +75,17 @@ enum class OpTypes : int32_t {
   kPagedAttn = 57,
   kRadixAttn = 58,
   kScatter2Shards = 59,
+  kLayerNorm2D = 60,
+
+  // Padding Op
+  kPad = 61,
+  kInterpolate = 62,
+  kEinsum = 63,
+  kStack = 64,
+  kMaskedScatter = 65,
+  kScatter = 66,
+  kGather = 67,
+  kArgsort = 68,
 
   // Dynamic Op Start for user to register there own ops.
   kDynamicOp_Start = 4096,
@@ -142,7 +153,18 @@ inline std::string optype2Str(OpTypes type) {
     case OpTypes::kGraphBegin: return "GraphBegin";
     case OpTypes::kGraphEnd: return "GraphEnd";
     case OpTypes::kPagedAttn: return "PagedAttn";
+    case OpTypes::kRadixAttn: return "RadixAttn";
     case OpTypes::kScatter2Shards: return "Scatter2Shards";
+    case OpTypes::kLayerNorm2D: return "LayerNorm2D";
+    case OpTypes::kPad: return "Pad";
+    case OpTypes::kInterpolate: return "Interpolate";
+    case OpTypes::kStack: return "Stack";
+    case OpTypes::kEinsum: return "Einsum";
+    case OpTypes::kMaskedScatter: return "MaskedScatter";
+    case OpTypes::kScatter: return "Scatter";
+    case OpTypes::kGather: return "Gather";
+    case OpTypes::kArgsort: return "Argsort";
+    case OpTypes::kDynamicOp_Start: return "DynamicOp_Start";
     case OpTypes::kOpType_End: return "OpType_End";
     default: return "Unknown";
   }
diff --git a/mllm/core/Tensor.cpp b/mllm/core/Tensor.cpp
index 73f80bf2..a1fc90c7 100644
--- a/mllm/core/Tensor.cpp
+++ b/mllm/core/Tensor.cpp
@@ -21,6 +21,7 @@
 #include "mllm/core/aops/TransposeOp.hpp"
 #include "mllm/core/aops/ViewOp.hpp"
 #include "mllm/core/aops/X2XOp.hpp"
+#include "mllm/core/aops/ArgsortOp.hpp"
 #include "mllm/engine/Context.hpp"
 
 namespace mllm {
@@ -70,6 +71,11 @@ Tensor Tensor::empty(const std::vector<int32_t>& shape, DataTypes dtype, DeviceT
   return Tensor(impl);
 }
 
+Tensor Tensor::emptyLike(const Tensor& liked_tensor) {
+  auto ret = Tensor::empty(liked_tensor.shape(), liked_tensor.dtype(), liked_tensor.device());
+  return ret;
+}
+
 Tensor& Tensor::allocExtraTensorView(const std::string& extra_tensor_name, const std::vector<int32_t>& shape, DataTypes dtype,
                                      DeviceTypes device) {
   MLLM_RT_ASSERT_EQ(attached_views_.count(extra_tensor_name), 0);
@@ -129,6 +135,12 @@ Tensor Tensor::operator/(const Tensor& rhs) {
   return Context::instance().buildOpAndSubmitTask(OpTypes::kDiv, aops::DivOpOptions{}, {*this, rhs})[0];
 }
 
+Tensor Tensor::mul_(const Tensor& rhs) {
+  auto opts = aops::MulOpOptions{};
+  opts.setInplace(true);
+  return Context::instance().buildOpAndSubmitTask(OpTypes::kMul, opts, {*this, rhs})[0];
+}
+
 Tensor Tensor::operator+(float rhs) {
   auto rhs_tensor = Tensor::empty({1}, dtype(), device()).alloc();
   switch (dtype()) {
@@ -243,6 +255,11 @@ Tensor Tensor::operator/(std::complex<float> rhs) {
 
 Tensor Tensor::abs() { return Context::instance().buildOpAndSubmitTask(OpTypes::kAbs, aops::AbsOpOptions{}, {*this})[0]; }
 
+Tensor Tensor::argsort(int dim, bool descending) {
+  return Context::instance().buildOpAndSubmitTask(OpTypes::kArgsort,
+                                                  aops::ArgsortOpOptions{.dim = dim, .descending = descending}, {*this})[0];
+}
+
 Tensor Tensor::clip(float min_val, float max_val) {
   return Context::instance().buildOpAndSubmitTask(OpTypes::kClip, aops::ClipOpOptions{.min_val = min_val, .max_val = max_val},
                                                   {*this})[0];
@@ -345,6 +362,12 @@ bool Tensor::isContiguous() const { return impl()->isContiguous(); }
 
 bool Tensor::isContiguousN(int n) const { return impl()->isContiguousN(n); }
 
+int32_t Tensor::size(int32_t id) const {
+  auto nid = id;
+  if (id < 0) { nid = static_cast<int32_t>(rank()) + id; }
+  return shape()[nid];
+}
+
 Tensor Tensor::contiguous() {
   return Context::instance().buildOpAndSubmitTask(OpTypes::kContiguous, aops::ContiguousOpOptions{}, {*this})[0];
 }
@@ -359,11 +382,13 @@ Tensor Tensor::view(const Tensor::shape_t& indicies) {
 
 Tensor Tensor::repeat(int32_t multiplier, int32_t dim) {
   if (multiplier == 1) { return *this; }
+  if (dim < 0) { dim = static_cast<int32_t>(rank()) + dim; }
   return Context::instance().buildOpAndSubmitTask(OpTypes::kRepeat,
                                                   aops::RepeatOpOptions{.dim = dim, .repeat_times = multiplier}, {*this})[0];
 }
 
 Tensor Tensor::unsqueeze(int32_t dim) {
+  if (dim < 0) { dim = static_cast<int32_t>(rank()) + dim + 1; }
   auto this_shape = shape();
   this_shape.insert(this_shape.begin() + dim, 1);
   return view(this_shape);
@@ -404,6 +429,33 @@ Tensor Tensor::squeeze(int32_t dim) {
   }
 }
 
+Tensor Tensor::flatten(int32_t dim) {
+  const auto old_shape = shape();
+  const int32_t ndim = static_cast<int32_t>(old_shape.size());
+
+  if (dim == 0x7fffffff) {
+    int32_t total = 1;
+    for (auto s : old_shape) total *= s;
+    return view({total});
+  }
+
+  if (ndim == 0) return view({1});
+
+  if (dim < 0) dim += ndim;
+  if (dim < 0 || dim >= ndim) throw std::out_of_range("flatten dim out of range");
+
+  std::vector<int32_t> new_shape;
+  new_shape.reserve(dim + 1);
+
+  for (int32_t i = 0; i < dim; ++i) new_shape.push_back(old_shape[i]);
+
+  int32_t flatten_size = 1;
+  for (int32_t i = dim; i < ndim; ++i) flatten_size *= old_shape[i];
+  new_shape.push_back(flatten_size);
+
+  return view(new_shape);
+}
+
 Tensor Tensor::clone() { return Context::instance().buildOpAndSubmitTask(OpTypes::kClone, aops::CloneOpOptions{}, {*this})[0]; }
 
 void Tensor::copy2(const Tensor& src) {
diff --git a/mllm/core/Tensor.hpp b/mllm/core/Tensor.hpp
index f042bc55..7f04b211 100644
--- a/mllm/core/Tensor.hpp
+++ b/mllm/core/Tensor.hpp
@@ -175,6 +175,14 @@ class Tensor {
    */
   static Tensor empty(const std::vector<int32_t>& shape, DataTypes dtype = kFloat32, DeviceTypes device = kCPU);
 
+  /**
+   * @brief Creates an uninitialized tensor with the same shape and attributes as another tensor.
+   *
+   * @param liked_tensor
+   * @return Tensor
+   */
+  static Tensor emptyLike(const Tensor& liked_tensor);
+
   /**
    * @brief If this tensor is not initialized
    *
@@ -267,6 +275,8 @@ class Tensor {
   Tensor operator/(const Tensor& rhs);
   /// @}
 
+  Tensor mul_(const Tensor& rhs);
+
   /// @name Scalar Operations
   /// Element-wise operations with scalar values.
   /// @{
@@ -296,6 +306,15 @@ class Tensor {
    */
   Tensor abs();
 
+  /**
+   * @brief Argsort
+   *
+   * @param dim
+   * @param descending
+   * @return Tensor
+   */
+  Tensor argsort(int dim = -1, bool descending = false);
+
   /**
    * @brief Clips (limits) the values in a tensor.
    * @param min_val Minimum value
@@ -476,6 +495,14 @@ class Tensor {
    */
   [[nodiscard]] bool isContiguousN(int n) const;
 
+  /**
+   * @brief
+   *
+   * @param id
+   * @return int32_t
+   */
+  [[nodiscard]] int32_t size(int32_t id) const;
+
   /**
    * @brief Creates contiguous copy if non-contiguous.
    * @return Contiguous tensor (may be a view or copy).
@@ -522,6 +549,14 @@ class Tensor {
    */
   Tensor squeeze(int32_t dim = 0x7fffffff);
 
+  /**
+   * @brief
+   *
+   * @param dim
+   * @return Tensor
+   */
+  Tensor flatten(int32_t dim = 0x7fffffff);
+
   /**
    * @brief clone a tensor
    *
@@ -618,6 +653,12 @@ class Tensor {
     return *(offsettedPtr<T>(offsets));
   }
 
+  template<typename T>
+  T item() const {
+    MLLM_RT_ASSERT_EQ(numel(), 1);
+    return *(ptr<T>());
+  };
+
   /**
    * @brief Accesses a tensor element at specified coordinates (const version).
    * @tparam T Expected data type (must match tensor dtype).
diff --git a/mllm/core/aops/ArgsortOp.cpp b/mllm/core/aops/ArgsortOp.cpp
new file mode 100644
index 00000000..35e2f4d6
--- /dev/null
+++ b/mllm/core/aops/ArgsortOp.cpp
@@ -0,0 +1,40 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include "mllm/core/aops/ArgsortOp.hpp"
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/Tensor.hpp"
+#include "mllm/utils/Common.hpp"
+#include "mllm/compile/ir/linalg/Op.hpp"
+
+namespace mllm::aops {
+
+ArgsortOp::ArgsortOp(const ArgsortOpOptions& options) : BaseOp(OpTypes::kArgsort), options_(options) {}
+
+void ArgsortOp::load(const ParameterFile::ptr_t& ploader) { MLLM_EMPTY_SCOPE; }
+
+void ArgsortOp::trace(void* trace_context, const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  auto ir_ctx = (ir::IRContext*)trace_context;
+  auto i_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, inputs);
+  auto o_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, outputs);
+  ir_ctx->create<ir::linalg::ArgsortOp>(shared_from_this(), i_irs, o_irs);
+}
+
+void ArgsortOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  NYI("ArgsortOp::forward not implemented in aops base.");
+}
+
+void ArgsortOp::reshape(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  // Define output tensor shapes based on input shapes
+  auto& input = inputs[0];
+
+  if (!input.isNil()) {
+    auto input_shape = input.shape();
+    // Output indices tensor has the same shape as input
+    outputs.emplace_back(Tensor::empty(input_shape, kInt32, input.device()));
+  }
+}
+
+void ArgsortOp::setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) { BaseOp::setup(inputs, outputs); }
+
+}  // namespace mllm::aops
diff --git a/mllm/core/aops/ArgsortOp.hpp b/mllm/core/aops/ArgsortOp.hpp
new file mode 100644
index 00000000..4db89873
--- /dev/null
+++ b/mllm/core/aops/ArgsortOp.hpp
@@ -0,0 +1,36 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/ParameterFile.hpp"
+
+namespace mllm::aops {
+
+struct ArgsortOpOptions : public BaseOpOptions<ArgsortOpOptions> {
+  int dim = -1;
+  bool descending = false;
+};
+
+class ArgsortOp : public BaseOp {
+ public:
+  explicit ArgsortOp(const ArgsortOpOptions& options);
+
+  void load(const ParameterFile::ptr_t& ploader) override;
+
+  void trace(void* trace_context, const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void reshape(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  inline ArgsortOpOptions& options() { return options_; }
+
+ protected:
+  ArgsortOpOptions options_;
+};
+
+}  // namespace mllm::aops
diff --git a/mllm/core/aops/Conv2DOp.cpp b/mllm/core/aops/Conv2DOp.cpp
new file mode 100644
index 00000000..4b20fea8
--- /dev/null
+++ b/mllm/core/aops/Conv2DOp.cpp
@@ -0,0 +1,118 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include "mllm/core/aops/Conv2DOp.hpp"
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/Tensor.hpp"
+#include "mllm/utils/Common.hpp"
+#include "mllm/compile/ir/linalg/Op.hpp"
+#include "mllm/compile/ir/graph/Op.hpp"
+#include "mllm/compile/ir/tensor/Op.hpp"
+
+namespace mllm::aops {
+
+Conv2DOp::Conv2DOp(const Conv2DOpOptions& options) : BaseOp(OpTypes::kConv2D), options_(options) {}
+
+void Conv2DOp::load(const ParameterFile::ptr_t& ploader) {
+  switch (ploader->version()) {
+    case ModelFileVersion::kV1: {
+      weight_ = ploader->pull(getName() + ".weight");
+      if (options_.bias) { bias_ = ploader->pull(getName() + ".bias"); }
+      weight_ = weight_.view({
+          options_.out_channels,
+          options_.in_channels,
+          options_.kernel_size[0],
+          options_.kernel_size[1],
+      });
+      if (options_.bias) { bias_ = bias_.view({options_.out_channels}); }
+      break;
+    }
+    case ModelFileVersion::kUserTemporary:
+    case ModelFileVersion::kV2: {
+      weight_ = ploader->pull(getName() + ".weight");
+      if (options_.bias) { bias_ = ploader->pull(getName() + ".bias"); }
+      break;
+    }
+    default: NYI("Unsupported model file version");
+  }
+}
+
+void Conv2DOp::trace(void* trace_context, const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  auto ir_ctx = (ir::IRContext*)trace_context;
+
+  // Register Params
+  auto init_region = ir_ctx->lookupSymbolTable("init")->cast_<ir::graph::SubGraphOp>()->getTopRegion();
+  if (weight_ && !ir_ctx->lookupSymbolTable(getName() + ".weight")) {
+    ir::IRWriterGuard guard(ir_ctx, init_region);
+    ir_ctx->create<ir::tensor::RegisterOp>(ir_ctx->create<ir::tensor::TensorValue>(weight_));
+  }
+  if (options_.bias && bias_ && !ir_ctx->lookupSymbolTable(getName() + ".bias")) {
+    ir::IRWriterGuard guard(ir_ctx, init_region);
+    ir_ctx->create<ir::tensor::RegisterOp>(ir_ctx->create<ir::tensor::TensorValue>(bias_));
+  }
+
+  auto i_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, inputs);
+  auto o_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, outputs);
+  auto _op = ir_ctx->create<ir::linalg::Conv2DOp>(shared_from_this(), i_irs, o_irs);
+}
+
+void Conv2DOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  NYI("Conv2DOp::forward not implemented in aops base.");
+}
+
+void Conv2DOp::reshape(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  const auto& i = inputs[0];
+  const auto& ishape = i.shape();
+
+  // Input must be 4D: [batch, channels, height, width]
+  if (ishape.size() != 4) {
+    MLLM_ERROR_EXIT(ExitCode::kCoreError, "Conv2DOp expects 4D input, got {} D", ishape.size());
+    outputs.emplace_back(Tensor::empty(i.shape(), i.dtype(), i.device()));
+    return;
+  }
+
+  const int batch = ishape[0];
+  const int in_channels = ishape[1];  // channel axis
+  const int in_height = ishape[2];    // height axis
+  const int in_width = ishape[3];     // width axis
+
+  MLLM_RT_ASSERT_EQ(in_channels, options_.in_channels);
+
+  // Retrieve convolution parameters from options_
+  // For Conv2D, kernel_size should be [kh, kw]
+  const auto& kernel = options_.kernel_size;
+  const auto& stride = options_.stride;      // [sh, sw]
+  const auto& padding = options_.padding;    // [ph, pw] if available
+  const auto& dilation = options_.dilation;  // [dh, dw] if available
+  const int out_channels = options_.out_channels;
+  MLLM_RT_ASSERT_EQ(kernel.size(), 2);
+  MLLM_RT_ASSERT_EQ(stride.size(), 2);
+  MLLM_RT_ASSERT_EQ(padding.size(), 2);
+  MLLM_RT_ASSERT_EQ(dilation.size(), 2);
+
+  // Output shape calculation for Conv2D
+  auto out_shape = [](int dim_size, int kernel_size, int stride_size, int padding_size, int dilation_size) -> int32_t {
+    const int dilated_kernel_size = dilation_size * (kernel_size - 1) + 1;
+    return ((dim_size + 2 * padding_size - dilated_kernel_size) / stride_size) + 1;
+  };
+
+  // Calculate output height and width
+  auto h_out = out_shape(in_height, kernel[0], stride[0], padding[0], dilation[0]);
+  auto w_out = out_shape(in_width, kernel[1], stride[1], padding[1], dilation[1]);
+
+  // Output shape: [batch, out_channels, h_out, w_out]
+  auto new_shape = std::vector<int32_t>{batch, out_channels, h_out, w_out};
+
+  outputs.emplace_back(Tensor::empty(new_shape, i.dtype(), i.device()));
+}
+
+void Conv2DOp::setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) { BaseOp::setup(inputs, outputs); }
+
+ParameterFile::ptr_t Conv2DOp::getParams() {
+  auto p = ParameterFile::create();
+  p->push(getName() + ".weight", weight_);
+  if (options_.bias) { p->push(getName() + ".bias", bias_); }
+  return p;
+}
+
+}  // namespace mllm::aops
diff --git a/mllm/core/aops/Conv2DOp.hpp b/mllm/core/aops/Conv2DOp.hpp
new file mode 100644
index 00000000..0904e75b
--- /dev/null
+++ b/mllm/core/aops/Conv2DOp.hpp
@@ -0,0 +1,72 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/ParameterFile.hpp"
+
+namespace mllm::aops {
+
+enum class Conv2DOpImplType {
+  kDefault = 0,
+};
+
+struct Conv2DOpOptions : public BaseOpOptions<Conv2DOpOptions> {
+  int32_t in_channels;
+  int32_t out_channels;
+  std::vector<int32_t> kernel_size;
+  std::vector<int32_t> stride;
+  std::vector<int32_t> padding;
+  std::vector<int32_t> dilation;
+  bool bias = true;
+  Conv2DOpImplType impl_type = Conv2DOpImplType::kDefault;
+};
+
+inline Conv2DOpImplType str2Conv2DOpImplType(const std::string& str) {
+  static const std::unordered_map<std::string, Conv2DOpImplType> map = {{"Default", Conv2DOpImplType::kDefault}};
+
+  auto it = map.find(str);
+  if (it != map.end()) { return it->second; }
+
+  // Return default if not found
+  return Conv2DOpImplType::kDefault;
+}
+
+inline std::string Conv2DOpImplType2Str(Conv2DOpImplType type) {
+  static const std::unordered_map<Conv2DOpImplType, std::string> map = {{Conv2DOpImplType::kDefault, "Default"}};
+
+  auto it = map.find(type);
+  if (it != map.end()) return it->second;
+  return "Default";
+}
+
+class Conv2DOp : public BaseOp {
+ public:
+  explicit Conv2DOp(const Conv2DOpOptions& options);
+
+  void load(const ParameterFile::ptr_t& ploader) override;
+
+  void trace(void* trace_context, const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void reshape(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  ParameterFile::ptr_t getParams() override;
+
+  inline Tensor& weight() { return weight_; }
+
+  inline Tensor& bias() { return bias_; }
+
+  inline Conv2DOpOptions& options() { return options_; }
+
+ protected:
+  Tensor weight_;
+  Tensor bias_;
+  Conv2DOpOptions options_;
+};
+
+}  // namespace mllm::aops
diff --git a/mllm/core/aops/EinsumOp.cpp b/mllm/core/aops/EinsumOp.cpp
new file mode 100644
index 00000000..e69de29b
diff --git a/mllm/core/aops/EinsumOp.hpp b/mllm/core/aops/EinsumOp.hpp
new file mode 100644
index 00000000..e69de29b
diff --git a/mllm/core/aops/ElewiseOps.cpp b/mllm/core/aops/ElewiseOps.cpp
index 06319f58..55e00189 100644
--- a/mllm/core/aops/ElewiseOps.cpp
+++ b/mllm/core/aops/ElewiseOps.cpp
@@ -47,6 +47,10 @@ static std::vector<int> broadcastShapes(const std::vector<std::vector<int>>& sha
     MLLM_WARN(#name "::forward is not implemented");                                                       \
   }                                                                                                        \
   void name::reshape(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {                    \
+    if (options_.isInplace()) {                                                                            \
+      outputs.emplace_back(inputs[0]);                                                                     \
+      return;                                                                                              \
+    }                                                                                                      \
     std::vector<std::vector<int>> input_shapes;                                                            \
     input_shapes.reserve(inputs.size());                                                                   \
     for (const auto& input : inputs) { input_shapes.push_back(input.shape()); }                            \
@@ -58,7 +62,10 @@ static std::vector<int> broadcastShapes(const std::vector<std::vector<int>>& sha
     }                                                                                                      \
     outputs.emplace_back(output_0);                                                                        \
   }                                                                                                        \
-  void name::setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) { BaseOp::setup(inputs, outputs); }
+  void name::setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {                      \
+    if (options_.isInplace()) { return; }                                                                  \
+    BaseOp::setup(inputs, outputs);                                                                        \
+  }
 
 // for unary ops, reshape don't consider shape broadcast, and dtype of output needs to be handled for Abs op
 #define __MLLM_ELEWISE_UNARY_OP_IMPL(types, name)                                                          \
diff --git a/mllm/core/aops/InterpolateOp.cpp b/mllm/core/aops/InterpolateOp.cpp
new file mode 100644
index 00000000..2c160ade
--- /dev/null
+++ b/mllm/core/aops/InterpolateOp.cpp
@@ -0,0 +1,68 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include "mllm/core/aops/InterpolateOp.hpp"
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/Tensor.hpp"
+#include "mllm/utils/Common.hpp"
+#include "mllm/compile/ir/linalg/Op.hpp"
+
+namespace mllm::aops {
+
+InterpolateOp::InterpolateOp(const InterpolateOpOptions& options) : BaseOp(OpTypes::kInterpolate), options_(options) {}
+
+void InterpolateOp::load(const ParameterFile::ptr_t& ploader) { MLLM_EMPTY_SCOPE; }
+
+void InterpolateOp::trace(void* trace_context, const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  auto ir_ctx = (ir::IRContext*)trace_context;
+  auto i_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, inputs);
+  auto o_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, outputs);
+  ir_ctx->create<ir::linalg::InterpolateOp>(shared_from_this(), i_irs, o_irs);
+}
+
+void InterpolateOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  NYI("InterpolateOp::forward not implemented in aops base.");
+}
+
+void InterpolateOp::reshape(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  // Get the input tensor
+  const auto& input = inputs[0];
+
+  // Skip if input is nil
+  if (input.isNil()) { return; }
+
+  // Get input shape
+  auto input_shape = input.shape();
+  const int input_dim = static_cast<int>(input_shape.size());
+
+  // Calculate output shape based on options
+  std::vector<int> output_shape = input_shape;
+
+  // If size is specified, use it to determine output dimensions
+  if (!options_.size.empty()) {
+    // Ensure size vector has correct dimensions
+    MLLM_RT_ASSERT(options_.size.size() <= input_dim);
+
+    // Apply size to the last N dimensions where N is the size of options_.size
+    const int offset = input_dim - static_cast<int>(options_.size.size());
+    for (size_t i = 0; i < options_.size.size(); ++i) { output_shape[offset + i] = options_.size[i]; }
+  }
+  // If scale_factor is specified, use it to scale dimensions
+  else if (!options_.scale_factor.empty()) {
+    // Ensure scale_factor vector has correct dimensions
+    MLLM_RT_ASSERT(options_.scale_factor.size() <= input_dim);
+
+    // Apply scale factor to the last N dimensions where N is the size of options_.scale_factor
+    const int offset = input_dim - static_cast<int>(options_.scale_factor.size());
+    for (size_t i = 0; i < options_.scale_factor.size(); ++i) {
+      output_shape[offset + i] = static_cast<int>(input_shape[offset + i] * options_.scale_factor[i]);
+    }
+  }
+
+  // Create output tensor with the calculated shape
+  outputs.emplace_back(Tensor::empty(output_shape, input.dtype(), input.device()));
+}
+
+void InterpolateOp::setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) { BaseOp::setup(inputs, outputs); }
+
+}  // namespace mllm::aops
diff --git a/mllm/core/aops/InterpolateOp.hpp b/mllm/core/aops/InterpolateOp.hpp
new file mode 100644
index 00000000..1000573f
--- /dev/null
+++ b/mllm/core/aops/InterpolateOp.hpp
@@ -0,0 +1,47 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/ParameterFile.hpp"
+
+namespace mllm::aops {
+
+enum class InterpolateOpMode {
+  kNearest,
+  kLinear,
+  kBilinear,
+  kBicubic,
+  kTrilinear,
+};
+
+struct InterpolateOpOptions : public BaseOpOptions<InterpolateOpOptions> {
+  std::vector<int> size;
+  std::vector<float> scale_factor;
+  InterpolateOpMode mode = InterpolateOpMode::kNearest;
+  bool align_corners = false;
+  bool antialias = false;
+};
+
+class InterpolateOp : public BaseOp {
+ public:
+  explicit InterpolateOp(const InterpolateOpOptions& options);
+
+  void load(const ParameterFile::ptr_t& ploader) override;
+
+  void trace(void* trace_context, const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void reshape(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  inline InterpolateOpOptions& options() { return options_; }
+
+ protected:
+  InterpolateOpOptions options_;
+};
+
+}  // namespace mllm::aops
diff --git a/mllm/core/aops/LayerNorm2DOp.cpp b/mllm/core/aops/LayerNorm2DOp.cpp
new file mode 100644
index 00000000..3ce487b2
--- /dev/null
+++ b/mllm/core/aops/LayerNorm2DOp.cpp
@@ -0,0 +1,60 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include "mllm/core/aops/LayerNorm2DOp.hpp"
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/Tensor.hpp"
+#include "mllm/utils/Common.hpp"
+#include "mllm/compile/ir/linalg/Op.hpp"
+#include "mllm/compile/ir/graph/Op.hpp"
+#include "mllm/compile/ir/tensor/Op.hpp"
+
+namespace mllm::aops {
+
+LayerNorm2DOp::LayerNorm2DOp(const LayerNorm2DOpOptions& options) : BaseOp(OpTypes::kLayerNorm2D), options_(options) {}
+
+void LayerNorm2DOp::load(const ParameterFile::ptr_t& ploader) {
+  weight_ = ploader->pull(getName() + ".weight");
+  weight_ = weight_.view({options_.num_channels});
+  bias_ = ploader->pull(getName() + ".bias");
+  bias_ = bias_.view({options_.num_channels});
+}
+
+void LayerNorm2DOp::trace(void* trace_context, const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  auto ir_ctx = (ir::IRContext*)trace_context;
+
+  // Register Params
+  auto init_region = ir_ctx->lookupSymbolTable("init")->cast_<ir::graph::SubGraphOp>()->getTopRegion();
+  if (weight_ && !ir_ctx->lookupSymbolTable(getName() + ".weight")) {
+    ir::IRWriterGuard guard(ir_ctx, init_region);
+    ir_ctx->create<ir::tensor::RegisterOp>(ir_ctx->create<ir::tensor::TensorValue>(weight_));
+  }
+  if (bias_ && !ir_ctx->lookupSymbolTable(getName() + ".bias")) {
+    ir::IRWriterGuard guard(ir_ctx, init_region);
+    ir_ctx->create<ir::tensor::RegisterOp>(ir_ctx->create<ir::tensor::TensorValue>(bias_));
+  }
+
+  auto i_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, inputs);
+  auto o_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, outputs);
+  ir_ctx->create<ir::linalg::LayerNorm2DOp>(shared_from_this(), i_irs, o_irs);
+}
+
+void LayerNorm2DOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  NYI("LayerNorm2DOp::forward not implemented in aops base.");
+}
+
+void LayerNorm2DOp::reshape(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  const auto& i = inputs[0];
+  outputs.emplace_back(Tensor::empty(i.shape(), i.dtype(), i.device()));
+}
+
+void LayerNorm2DOp::setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) { BaseOp::setup(inputs, outputs); }
+
+ParameterFile::ptr_t LayerNorm2DOp::getParams() {
+  auto p = ParameterFile::create();
+  p->push(getName() + ".weight", weight_);
+  p->push(getName() + ".bias", bias_);
+  return p;
+}
+
+}  // namespace mllm::aops
diff --git a/mllm/core/aops/LayerNorm2DOp.hpp b/mllm/core/aops/LayerNorm2DOp.hpp
new file mode 100644
index 00000000..9ff05d13
--- /dev/null
+++ b/mllm/core/aops/LayerNorm2DOp.hpp
@@ -0,0 +1,44 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/ParameterFile.hpp"
+
+namespace mllm::aops {
+
+struct LayerNorm2DOpOptions : public BaseOpOptions<LayerNorm2DOpOptions> {
+  int32_t num_channels;
+  float eps = 1e-6;
+};
+
+class LayerNorm2DOp : public BaseOp {
+ public:
+  explicit LayerNorm2DOp(const LayerNorm2DOpOptions& options);
+
+  void load(const ParameterFile::ptr_t& ploader) override;
+
+  void trace(void* trace_context, const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void reshape(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  ParameterFile::ptr_t getParams() override;
+
+  inline Tensor& weight() { return weight_; }
+
+  inline Tensor& bias() { return bias_; }
+
+  inline LayerNorm2DOpOptions& options() { return options_; }
+
+ protected:
+  Tensor weight_;
+  Tensor bias_;
+  LayerNorm2DOpOptions options_;
+};
+
+}  // namespace mllm::aops
diff --git a/mllm/core/aops/MaskedScatterOp.cpp b/mllm/core/aops/MaskedScatterOp.cpp
new file mode 100644
index 00000000..9502dda0
--- /dev/null
+++ b/mllm/core/aops/MaskedScatterOp.cpp
@@ -0,0 +1,36 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include "mllm/core/aops/MaskedScatterOp.hpp"
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/Tensor.hpp"
+#include "mllm/utils/Common.hpp"
+#include "mllm/compile/ir/linalg/Op.hpp"
+
+namespace mllm::aops {
+
+MaskedScatterOp::MaskedScatterOp(const MaskedScatterOpOptions& options) : BaseOp(OpTypes::kMaskedScatter), options_(options) {}
+
+void MaskedScatterOp::load(const ParameterFile::ptr_t& ploader) { MLLM_EMPTY_SCOPE; }
+
+void MaskedScatterOp::trace(void* trace_context, const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  auto ir_ctx = (ir::IRContext*)trace_context;
+  auto i_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, inputs);
+  auto o_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, outputs);
+  ir_ctx->create<ir::linalg::MaskedScatterOp>(shared_from_this(), i_irs, o_irs);
+}
+
+void MaskedScatterOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  NYI("MaskedScatterOp::forward not implemented in aops base.");
+}
+
+void MaskedScatterOp::reshape(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  outputs.emplace_back(inputs[0]);
+}
+
+void MaskedScatterOp::setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  // Do nothing.
+  MLLM_EMPTY_SCOPE;
+}
+
+}  // namespace mllm::aops
diff --git a/mllm/core/aops/MaskedScatterOp.hpp b/mllm/core/aops/MaskedScatterOp.hpp
new file mode 100644
index 00000000..9e452e6a
--- /dev/null
+++ b/mllm/core/aops/MaskedScatterOp.hpp
@@ -0,0 +1,33 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/ParameterFile.hpp"
+
+namespace mllm::aops {
+
+struct MaskedScatterOpOptions : public BaseOpOptions<MaskedScatterOpOptions> {};
+
+class MaskedScatterOp : public BaseOp {
+ public:
+  explicit MaskedScatterOp(const MaskedScatterOpOptions& options);
+
+  void load(const ParameterFile::ptr_t& ploader) override;
+
+  void trace(void* trace_context, const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void reshape(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  inline MaskedScatterOpOptions& options() { return options_; }
+
+ protected:
+  MaskedScatterOpOptions options_;
+};
+
+}  // namespace mllm::aops
diff --git a/mllm/core/aops/PadOp.cpp b/mllm/core/aops/PadOp.cpp
index e69de29b..5421b819 100644
--- a/mllm/core/aops/PadOp.cpp
+++ b/mllm/core/aops/PadOp.cpp
@@ -0,0 +1,44 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include "mllm/core/aops/PadOp.hpp"
+#include "mllm/compile/ir/linalg/Op.hpp"
+
+namespace mllm::aops {
+
+PadOp::PadOp(const PadOpOptions& options) : BaseOp(OpTypes::kPad), options_(options) {}
+
+void PadOp::load(const ParameterFile::ptr_t& ploader) { MLLM_EMPTY_SCOPE; }
+
+void PadOp::trace(void* trace_context, const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  auto ir_ctx = (ir::IRContext*)trace_context;
+  auto i_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, inputs);
+  auto o_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, outputs);
+  ir_ctx->create<ir::linalg::PadOp>(shared_from_this(), i_irs, o_irs);
+}
+
+void PadOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  NYI("PadOp::forward is not implemented in the base class");
+}
+
+void PadOp::reshape(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  if (inputs.empty()) { throw std::invalid_argument("PadOp::reshape: inputs empty"); }
+
+  const auto& i = inputs[0];
+  const int in_dims = i.shape().size();
+  const auto& pad = options_.pad;  // [dim_last_low, dim_last_high, ...]
+
+  std::vector<int32_t> out_shape = i.shape();
+  for (int d = 0; d < in_dims; ++d) {
+    int idx = (in_dims - 1 - d) * 2;
+    int32_t low = (idx < pad.size()) ? pad[idx] : 0;
+    int32_t high = (idx + 1 < pad.size()) ? pad[idx + 1] : 0;
+    out_shape[d] += low + high;
+  }
+
+  outputs.emplace_back(Tensor::empty(out_shape, i.dtype(), i.device()));
+}
+
+void PadOp::setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) { BaseOp::setup(inputs, outputs); }
+
+}  // namespace mllm::aops
diff --git a/mllm/core/aops/PadOp.hpp b/mllm/core/aops/PadOp.hpp
index e69de29b..e4c5c555 100644
--- a/mllm/core/aops/PadOp.hpp
+++ b/mllm/core/aops/PadOp.hpp
@@ -0,0 +1,44 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/ParameterFile.hpp"
+
+namespace mllm::aops {
+
+enum class PadMode : uint8_t {
+  kConstant = 0,
+  kReflect = 1,
+  kReplicate = 2,
+  kCircular = 3,
+};
+
+struct PadOpOptions : public BaseOpOptions<PadOpOptions> {
+  std::vector<int32_t> pad;          // padding sizes, starting from the last dimension
+  PadMode mode{PadMode::kConstant};  // padding mode
+  float value{0.0f};                 // padding value for constant mode
+};
+
+class PadOp : public BaseOp {
+ public:
+  explicit PadOp(const PadOpOptions& options);
+
+  void load(const ParameterFile::ptr_t& ploader) override;
+
+  void trace(void* trace_context, const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void reshape(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  inline const PadOpOptions& options() const { return options_; }
+
+ protected:
+  PadOpOptions options_;
+};
+
+}  // namespace mllm::aops
diff --git a/mllm/core/aops/StackOp.cpp b/mllm/core/aops/StackOp.cpp
new file mode 100644
index 00000000..858bf273
--- /dev/null
+++ b/mllm/core/aops/StackOp.cpp
@@ -0,0 +1,63 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include "mllm/core/aops/StackOp.hpp"
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/Tensor.hpp"
+#include "mllm/utils/Common.hpp"
+#include "mllm/compile/ir/linalg/Op.hpp"
+
+namespace mllm::aops {
+
+StackOp::StackOp(const StackOpOptions& options) : BaseOp(OpTypes::kStack), options_(options) {}
+
+void StackOp::load(const ParameterFile::ptr_t& ploader) { MLLM_EMPTY_SCOPE; }
+
+void StackOp::trace(void* trace_context, const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  auto ir_ctx = (ir::IRContext*)trace_context;
+  auto i_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, inputs);
+  auto o_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, outputs);
+  ir_ctx->create<ir::linalg::StackOp>(shared_from_this(), i_irs, o_irs);
+}
+
+void StackOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  NYI("StackOp::forward not implemented in aops base.");
+}
+
+void StackOp::reshape(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
+  if (inputs.empty()) {
+    MLLM_ERROR_EXIT(ExitCode::kCoreError, "StackOp: no inputs");
+    return;
+  }
+
+  const int n_dims = inputs[0].shape().size();
+  int at_dim = options_.dim;
+
+  // Normalize dim into [0, n_dims]
+  if (at_dim < 0) { at_dim += (n_dims + 1); }
+  if (at_dim < 0 || at_dim > n_dims) {
+    MLLM_ERROR_EXIT(ExitCode::kCoreError, "StackOp: dim {} out of range [0, {}]", at_dim, n_dims);
+    return;
+  }
+
+  // Check all input shapes equal
+  for (size_t i = 1; i < inputs.size(); ++i) {
+    if (inputs[i].shape() != inputs[0].shape()) {
+      MLLM_ERROR_EXIT(ExitCode::kCoreError, "StackOp: input shape mismatch");
+      return;
+    }
+  }
+
+  // Build new shape by inserting a new dimension of size inputs.size()
+  std::vector<int> new_shape;
+  new_shape.reserve(n_dims + 1);
+  for (int d = 0; d < at_dim; ++d) { new_shape.push_back(inputs[0].shape()[d]); }
+  new_shape.push_back(static_cast<int>(inputs.size()));
+  for (int d = at_dim; d < n_dims; ++d) { new_shape.push_back(inputs[0].shape()[d]); }
+
+  outputs.emplace_back(Tensor::empty(new_shape, inputs[0].dtype(), inputs[0].device()));
+}
+
+void StackOp::setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) { BaseOp::setup(inputs, outputs); }
+
+}  // namespace mllm::aops
\ No newline at end of file
diff --git a/mllm/core/aops/StackOp.hpp b/mllm/core/aops/StackOp.hpp
new file mode 100644
index 00000000..01c0ba4c
--- /dev/null
+++ b/mllm/core/aops/StackOp.hpp
@@ -0,0 +1,35 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/ParameterFile.hpp"
+
+namespace mllm::aops {
+
+struct StackOpOptions : public BaseOpOptions<StackOpOptions> {
+  int32_t dim;
+};
+
+class StackOp : public BaseOp {
+ public:
+  explicit StackOp(const StackOpOptions& options);
+
+  void load(const ParameterFile::ptr_t& ploader) override;
+
+  void trace(void* trace_context, const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void reshape(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  void setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
+
+  inline StackOpOptions& options() { return options_; }
+
+ protected:
+  StackOpOptions options_;
+};
+
+}  // namespace mllm::aops
diff --git a/mllm/ext/CMakeLists.txt b/mllm/ext/CMakeLists.txt
new file mode 100644
index 00000000..de1e27a2
--- /dev/null
+++ b/mllm/ext/CMakeLists.txt
@@ -0,0 +1,5 @@
+if (MLLM_EXT_ENABLE_META_TORCH_TOKENIZERS)
+  add_subdirectory(vendors/tokenizers)
+endif()
+
+# LLVM MLIR Stuff.
diff --git a/mllm/ext/README.md b/mllm/ext/README.md
new file mode 100644
index 00000000..919c7ea9
--- /dev/null
+++ b/mllm/ext/README.md
@@ -0,0 +1,6 @@
+# MLLM Extension
+
+Mllm extension contains some third-party packages and it's warper to mllm. Those extensions are OPTIONAL for mllm main lib.
+
+- tokenizers: [tokenizers](https://github.com/meta-pytorch/tokenizers.git)
+- mobile opencv
diff --git a/mllm/ext/vendors/llvm-project b/mllm/ext/vendors/llvm-project
new file mode 160000
index 00000000..6a0f392b
--- /dev/null
+++ b/mllm/ext/vendors/llvm-project
@@ -0,0 +1 @@
+Subproject commit 6a0f392bb50d890f13cb961a911be28f965ed4f2
diff --git a/mllm/ext/vendors/opencv-mobile b/mllm/ext/vendors/opencv-mobile
new file mode 100644
index 00000000..4c3f0b43
--- /dev/null
+++ b/mllm/ext/vendors/opencv-mobile
@@ -0,0 +1 @@
+https://github.com/nihui/opencv-mobile
diff --git a/mllm/ext/vendors/tokenizers b/mllm/ext/vendors/tokenizers
new file mode 160000
index 00000000..0bcd9f53
--- /dev/null
+++ b/mllm/ext/vendors/tokenizers
@@ -0,0 +1 @@
+Subproject commit 0bcd9f5325c42d2e05765d584131900dbc8835c8
diff --git a/mllm/mllm.inl b/mllm/mllm.inl
index 19410166..5ef1ddac 100644
--- a/mllm/mllm.inl
+++ b/mllm/mllm.inl
@@ -111,6 +111,55 @@ struct formatter<std::vector<int32_t>> {
   }
 };
 
+template<>
+struct formatter<std::vector<std::string>> {
+  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+  template<typename FormatContext>
+  auto format(const std::vector<std::string>& vec, FormatContext& ctx) const {
+    auto out = ctx.out();
+    *out++ = '[';
+    for (size_t i = 0; i < vec.size(); ++i) {
+      if (i > 0) {
+        *out++ = ',';
+        *out++ = ' ';
+      }
+      out = fmt::format_to(out, "{:?}", vec[i]);
+    }
+    *out++ = ']';
+    return out;
+  }
+};
+
+template<>
+struct formatter<std::vector<int64_t>> {
+  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+  template<typename FormatContext>
+  auto format(const std::vector<int64_t>& vec, FormatContext& ctx) const {
+    auto out = ctx.out();
+    *out++ = '[';
+    for (size_t i = 0; i < vec.size(); ++i) {
+      if (i > 0) {
+        *out++ = ',';
+        *out++ = ' ';
+      }
+      out = fmt::format_to(out, "{}", vec[i]);
+    }
+    *out++ = ']';
+    return out;
+  }
+};
+
+template<>
+struct formatter<std::tuple<int, int>> {
+  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+  template<typename FormatContext>
+  auto format(const std::tuple<int, int>& tuple, FormatContext& ctx) const {
+    auto out = ctx.out();
+    out = fmt::format_to(out, "tuple[{}, {}]", std::get<0>(tuple), std::get<1>(tuple));
+    return out;
+  }
+};
+
 template<>
 struct formatter<mllm::Tensor> {
   constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
diff --git a/mllm/models/deepseek_ocr/README.md b/mllm/models/deepseek_ocr/README.md
new file mode 100644
index 00000000..802ed47c
--- /dev/null
+++ b/mllm/models/deepseek_ocr/README.md
@@ -0,0 +1,55 @@
+---
+pipeline_tag: image-text-to-text
+language:
+- multilingual
+tags:
+- deepseek
+- vision-language
+- ocr
+- custom_code
+license: mit
+---
+<div align="center">
+  <img src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true" width="60%" alt="DeepSeek AI" />
+</div>
+<hr>
+<div align="center">
+  <a href="https://www.deepseek.com/" target="_blank">
+    <img alt="Homepage" src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/badge.svg?raw=true" />
+  </a>
+  <a href="https://huggingface.co/deepseek-ai/DeepSeek-OCR" target="_blank">
+    <img alt="Hugging Face" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-DeepSeek%20AI-ffc107?color=ffc107&logoColor=white" />
+  </a>
+
+</div>
+
+<div align="center">
+
+  <a href="https://discord.gg/Tc7c45Zzu5" target="_blank">
+    <img alt="Discord" src="https://img.shields.io/badge/Discord-DeepSeek%20AI-7289da?logo=discord&logoColor=white&color=7289da" />
+  </a>
+  <a href="https://twitter.com/deepseek_ai" target="_blank">
+    <img alt="Twitter Follow" src="https://img.shields.io/badge/Twitter-deepseek_ai-white?logo=x&logoColor=white" />
+  </a>
+
+</div>
+
+
+
+<p align="center">
+  <a href="https://github.com/deepseek-ai/DeepSeek-OCR"><b>🌟 Github</b></a> |
+  <a href="https://huggingface.co/deepseek-ai/DeepSeek-OCR"><b>📥 Model Download</b></a> |
+  <a href="https://github.com/deepseek-ai/DeepSeek-OCR/blob/main/DeepSeek_OCR_paper.pdf"><b>📄 Paper Link</b></a> |
+  <a href=""><b>📄 Arxiv Paper Link</b></a> |
+</p>
+<h2>
+<p align="center">
+  <a href="">DeepSeek-OCR: Contexts Optical Compression</a>
+</p>
+</h2>
+<p align="center">
+<img src="assets/fig1.png" style="width: 1000px" align=center>
+</p>
+<p align="center">
+<a href="">Explore the boundaries of visual-text compression.</a>       
+</p>
diff --git a/mllm/models/deepseek_ocr/configuration_deepseek_ocr.hpp b/mllm/models/deepseek_ocr/configuration_deepseek_ocr.hpp
new file mode 100644
index 00000000..ddc6a47c
--- /dev/null
+++ b/mllm/models/deepseek_ocr/configuration_deepseek_ocr.hpp
@@ -0,0 +1,196 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+#pragma once
+
+#include "mllm/core/aops/LinearOp.hpp"
+#include "mllm/engine/ConfigFile.hpp"
+#include <vector>
+
+namespace mllm::models::deepseek_ocr {
+
+struct DpskOcrConfig : protected ConfigFile {
+  DpskOcrConfig() = default;
+
+  explicit DpskOcrConfig(const std::string& file_path) : ConfigFile(file_path) {
+    // Init all
+    _name_or_path = data()["_name_or_path"];
+
+    // Parse candidate_resolutions
+    if (data().contains("candidate_resolutions") && data()["candidate_resolutions"].is_array()) {
+      for (const auto& res : data()["candidate_resolutions"]) {
+        if (res.is_array() && res.size() == 2) { candidate_resolutions.push_back({res[0], res[1]}); }
+      }
+    }
+
+    global_view_pos = data()["global_view_pos"];
+    model_type = data()["model_type"];
+    tile_tag = data()["tile_tag"];
+    transformers_version = data()["transformers_version"];
+
+    // Language config
+    language_config.bos_token_id = data()["language_config"]["bos_token_id"];
+    language_config.eos_token_id = data()["language_config"]["eos_token_id"];
+    language_config.first_k_dense_replace = data()["language_config"]["first_k_dense_replace"];
+    language_config.hidden_size = data()["language_config"]["hidden_size"];
+    language_config.intermediate_size = data()["language_config"]["intermediate_size"];
+    language_config.kv_lora_rank = data()["language_config"]["kv_lora_rank"].is_null()
+                                       ? -1
+                                       : static_cast<int32_t>(data()["language_config"]["kv_lora_rank"]);
+    language_config.lm_head = data()["language_config"]["lm_head"];
+    language_config.max_position_embeddings = data()["language_config"]["max_position_embeddings"];
+    language_config.moe_intermediate_size = data()["language_config"]["moe_intermediate_size"];
+    language_config.n_group = data()["language_config"]["n_group"];
+    language_config.n_routed_experts = data()["language_config"]["n_routed_experts"];
+    language_config.n_shared_experts = data()["language_config"]["n_shared_experts"];
+    language_config.num_attention_heads = data()["language_config"]["num_attention_heads"];
+    language_config.num_experts_per_tok = data()["language_config"]["num_experts_per_tok"];
+    language_config.num_hidden_layers = data()["language_config"]["num_hidden_layers"];
+    language_config.num_key_value_heads = data()["language_config"]["num_key_value_heads"];
+    language_config.q_lora_rank = data()["language_config"]["q_lora_rank"].is_null()
+                                      ? -1
+                                      : static_cast<int32_t>(data()["language_config"]["q_lora_rank"]);
+    language_config.qk_nope_head_dim = data()["language_config"]["qk_nope_head_dim"];
+    language_config.qk_rope_head_dim = data()["language_config"]["qk_rope_head_dim"];
+    language_config.rm_head = data()["language_config"]["rm_head"];
+    language_config.topk_group = data()["language_config"]["topk_group"];
+    language_config.topk_method = data()["language_config"]["topk_method"];
+    language_config.use_mla = data()["language_config"]["use_mla"];
+    language_config.v_head_dim = data()["language_config"]["v_head_dim"];
+    language_config.vocab_size = data()["language_config"]["vocab_size"];
+
+    // Projector config
+    projector_config.input_dim = data()["projector_config"]["input_dim"];
+    projector_config.model_type = data()["projector_config"]["model_type"];
+    projector_config.n_embed = data()["projector_config"]["n_embed"];
+    projector_config.projector_type = data()["projector_config"]["projector_type"];
+
+    // Vision config
+    vision_config.image_size = data()["vision_config"]["image_size"];
+    vision_config.mlp_ratio = data()["vision_config"]["mlp_ratio"];
+    vision_config.model_name = data()["vision_config"]["model_name"];
+    vision_config.model_type = data()["vision_config"]["model_type"];
+
+    // Main config values
+    bos_token_id = data()["bos_token_id"];
+    eos_token_id = data()["eos_token_id"];
+    first_k_dense_replace = data()["first_k_dense_replace"];
+    hidden_size = data()["hidden_size"];
+    intermediate_size = data()["intermediate_size"];
+    kv_lora_rank = data()["kv_lora_rank"].is_null() ? -1 : static_cast<int32_t>(data()["kv_lora_rank"]);
+    lm_head = data()["lm_head"];
+    max_position_embeddings = data()["max_position_embeddings"];
+    moe_intermediate_size = data()["moe_intermediate_size"];
+    n_group = data()["n_group"];
+    n_routed_experts = data()["n_routed_experts"];
+    n_shared_experts = data()["n_shared_experts"];
+    num_attention_heads = data()["num_attention_heads"];
+    num_experts_per_tok = data()["num_experts_per_tok"];
+    num_hidden_layers = data()["num_hidden_layers"];
+    num_key_value_heads = data()["num_key_value_heads"];
+    q_lora_rank = data()["q_lora_rank"].is_null() ? -1 : static_cast<int32_t>(data()["q_lora_rank"]);
+    qk_nope_head_dim = data()["qk_nope_head_dim"];
+    qk_rope_head_dim = data()["qk_rope_head_dim"];
+    rm_head = data()["rm_head"];
+    topk_group = data()["topk_group"];
+    topk_method = data()["topk_method"];
+    use_mla = data()["use_mla"];
+    v_head_dim = data()["v_head_dim"];
+    vocab_size = data()["vocab_size"];
+    clip_linear_impl_type = aops::str2LinearImplTypes(data()["clip_linear_impl_type"]);
+    llm_mlp_linear_impl_type = aops::str2LinearImplTypes(data()["llm_mlp_linear_impl_type"]);
+    lm_head_linear_impl_type = aops::str2LinearImplTypes(data()["lm_head_linear_impl_type"]);
+    mlp_projector_linear_impl_type = aops::str2LinearImplTypes(data()["mlp_projector_linear_impl_type"]);
+    sam_linear_impl_type = aops::str2LinearImplTypes(data()["sam_linear_impl_type"]);
+  }
+
+  // Nested structs for complex configuration
+  struct LanguageConfig {
+    int64_t bos_token_id = 0;
+    int64_t eos_token_id = 1;
+    int32_t first_k_dense_replace = 1;
+    int32_t hidden_size = 1280;
+    int32_t intermediate_size = 6848;
+    int32_t kv_lora_rank = -1;  // null in JSON
+    bool lm_head = true;
+    int32_t max_position_embeddings = 8192;
+    int32_t moe_intermediate_size = 896;
+    int32_t n_group = 1;
+    int32_t n_routed_experts = 64;
+    int32_t n_shared_experts = 2;
+    int32_t num_attention_heads = 10;
+    int32_t num_experts_per_tok = 6;
+    int32_t num_hidden_layers = 12;
+    int32_t num_key_value_heads = 10;
+    int32_t q_lora_rank = -1;  // null in JSON
+    int32_t qk_nope_head_dim = 0;
+    int32_t qk_rope_head_dim = 0;
+    bool rm_head = false;
+    int32_t topk_group = 1;
+    std::string topk_method = "greedy";
+    bool use_mla = false;
+    int32_t v_head_dim = 0;
+    int32_t vocab_size = 129280;
+  };
+
+  struct ProjectorConfig {
+    int32_t input_dim = 2048;
+    std::string model_type = "mlp_projector";
+    int32_t n_embed = 1280;
+    std::string projector_type = "linear";
+  };
+
+  struct VisionConfig {
+    int32_t image_size = 1024;
+    float mlp_ratio = 3.7362;
+    std::string model_name = "deeplip_b_l";
+    std::string model_type = "vision";
+  };
+
+  std::string _name_or_path = "deepseek-ai/DeepSeek-OCR";
+  std::vector<std::vector<int32_t>> candidate_resolutions = {{1024, 1024}};
+  std::string global_view_pos = "head";
+  std::string model_type = "deepseek_vl_v2";
+  std::string tile_tag = "2D";
+  std::string transformers_version = "4.46.3";
+
+  LanguageConfig language_config;
+  ProjectorConfig projector_config;
+  VisionConfig vision_config;
+
+  // Main config values
+  int64_t bos_token_id = 0;
+  int64_t eos_token_id = 1;
+  int32_t first_k_dense_replace = 1;
+  int32_t hidden_size = 1280;
+  int32_t intermediate_size = 6848;
+  int32_t kv_lora_rank = -1;  // null in JSON
+  bool lm_head = true;
+  int32_t max_position_embeddings = 8192;
+  int32_t moe_intermediate_size = 896;
+  int32_t n_group = 1;
+  int32_t n_routed_experts = 64;
+  int32_t n_shared_experts = 2;
+  int32_t num_attention_heads = 10;
+  int32_t num_experts_per_tok = 6;
+  int32_t num_hidden_layers = 12;
+  int32_t num_key_value_heads = 10;
+  int32_t q_lora_rank = -1;  // null in JSON
+  int32_t qk_nope_head_dim = 0;
+  int32_t qk_rope_head_dim = 0;
+  bool rm_head = false;
+  int32_t topk_group = 1;
+  std::string topk_method = "greedy";
+  bool use_mla = false;
+  int32_t v_head_dim = 0;
+  int32_t vocab_size = 129280;
+
+  // MLLM Related Stuff
+  int32_t max_cache_length = 2048;
+  aops::LinearImplTypes clip_linear_impl_type = aops::LinearImplTypes::kDefault;
+  aops::LinearImplTypes sam_linear_impl_type = aops::LinearImplTypes::kDefault;
+  aops::LinearImplTypes mlp_projector_linear_impl_type = aops::LinearImplTypes::kDefault;
+  aops::LinearImplTypes lm_head_linear_impl_type = aops::LinearImplTypes::kDefault;
+  aops::LinearImplTypes llm_mlp_linear_impl_type = aops::LinearImplTypes::kDefault;
+};
+
+}  // namespace mllm::models::deepseek_ocr
diff --git a/mllm/models/deepseek_ocr/conversation_preprocess.hpp b/mllm/models/deepseek_ocr/conversation_preprocess.hpp
new file mode 100644
index 00000000..df54e4f7
--- /dev/null
+++ b/mllm/models/deepseek_ocr/conversation_preprocess.hpp
@@ -0,0 +1,453 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+#pragma once
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+#include <memory>
+#include <limits>
+#include <cassert>
+#include <variant>
+#include <optional>
+#include <stdexcept>
+
+#include <cmath>
+#include <algorithm>
+
+#include <nlohmann/json.hpp>
+
+#include "mllm/preprocessor/visual/Image.hpp"
+
+namespace mllm::models::deepseek_ocr {
+
+/**
+ * Separator styles for different conversation formats
+ */
+enum class SeparatorStyle {
+  DeepSeek,
+  DeepSeekV2,
+  PLAIN,
+  ALIGNMENT,
+};
+
+/**
+ * A class that manages prompt templates and keeps all conversation history
+ */
+class Conversation {
+ public:
+  // Constructor with required and optional parameters
+  explicit Conversation(const std::string& name, const std::string& system_template = "{system_message}",
+                        const std::string& system_message = "", const std::vector<std::string>& roles = {"USER", "ASSISTANT"},
+                        const std::vector<std::vector<std::string>>& messages = {}, int offset = 0,
+                        SeparatorStyle sep_style = SeparatorStyle::DeepSeek, const std::string& sep = "\n",
+                        const std::optional<std::string>& sep2 = std::nullopt,
+                        const std::optional<std::string>& stop_str = std::nullopt,
+                        const std::optional<std::vector<int>>& stop_token_ids = std::nullopt)
+      : name_(name),
+        system_template_(system_template),
+        system_message_(system_message),
+        roles_(roles),
+        messages_(messages),
+        offset_(offset),
+        sep_style_(sep_style),
+        sep_(sep),
+        sep2_(sep2),
+        stop_str_(stop_str),
+        stop_token_ids_(stop_token_ids) {}
+
+  /**
+   * Get the prompt for generation
+   */
+  [[nodiscard]] std::string getPrompt() const {
+    std::string system_prompt = formatSystemTemplate();
+
+    if (sep_style_ == SeparatorStyle::DeepSeek) {
+      std::vector<std::string> seps = {sep_, sep2_.value_or("")};
+      std::string ret;
+
+      if (!system_prompt.empty()) { ret = system_prompt + seps[0]; }
+
+      for (size_t i = 0; i < messages_.size(); ++i) {
+        const auto& [role, message] = std::make_pair(messages_[i][0], messages_[i][1]);
+        if (!message.empty()) {
+          ret += role + ": " + message + seps[i % 2];  // NOLINT
+        } else {
+          ret += role + ":";
+        }
+      }
+      return ret;
+    } else if (sep_style_ == SeparatorStyle::DeepSeekV2) {
+      std::vector<std::string> seps = {sep_, sep2_.value_or("")};
+      std::string ret;
+
+      if (!system_prompt.empty()) { ret = system_prompt + seps[0]; }
+
+      for (const auto& i : messages_) {
+        const auto& [role, message] = std::make_pair(i[0], i[1]);
+        if (!message.empty()) {
+          if (role == "User") {
+            ret += "<｜sft▁begin｜>\n" + message + sep_;
+          } else {
+            ret += message + sep2_.value_or("");
+          }
+        }
+      }
+      return ret;
+    } else if (sep_style_ == SeparatorStyle::PLAIN) {
+      std::vector<std::string> seps = {sep_, sep2_.value_or("")};
+      std::string ret;
+
+      for (size_t i = 0; i < messages_.size(); ++i) {
+        const auto& [role, message] = std::make_pair(messages_[i][0], messages_[i][1]);
+        if (!message.empty()) { ret += message + seps[i % 2]; }
+      }
+      return ret;
+    } else if (sep_style_ == SeparatorStyle::ALIGNMENT) {
+      std::vector<std::string> seps = {sep_, sep2_.value_or("")};
+      std::string ret;
+
+      for (size_t i = 0; i < messages_.size(); ++i) {
+        const auto& [role, message] = std::make_pair(messages_[i][0], messages_[i][1]);
+        if (!message.empty()) {
+          if (i % 2 == 0) {
+            ret += "<image>\n" + seps[i % 2];
+          } else {
+            ret += message + seps[i % 2];
+          }
+        }
+      }
+      return ret;
+    } else {
+      throw std::invalid_argument("Invalid separator style");
+    }
+  }
+
+  /**
+   * Set the system message
+   */
+  void setSystemMessage(const std::string& system_message) { system_message_ = system_message; }
+
+  /**
+   * Append a new message
+   */
+  void appendMessage(const std::string& role, const std::string& message) { messages_.push_back({role, message}); }
+
+  /**
+   * Update the last output
+   * The last message is typically set to be empty when constructing the prompt,
+   * so we need to update it in-place after getting the response from a model.
+   */
+  void updateLastMessage(const std::string& message) {
+    if (!messages_.empty()) { messages_.back()[1] = message; }
+  }
+
+  /**
+   * Reset messages
+   */
+  void resetMessages() { messages_.clear(); }
+
+  /**
+   * Convert the conversation to gradio chatbot format
+   */
+  [[nodiscard]] std::vector<std::vector<std::optional<std::string>>> toGradioChatbot() const {
+    std::vector<std::vector<std::optional<std::string>>> ret;
+
+    for (size_t i = offset_; i < messages_.size(); ++i) {
+      const auto& [role, msg] = std::make_pair(messages_[i][0], messages_[i][1]);
+      if (i % 2 == 0) {
+        ret.push_back({msg, std::nullopt});
+      } else if (!ret.empty()) {
+        ret.back()[1] = msg;
+      }
+    }
+
+    return ret;
+  }
+
+  /**
+   * Convert the conversation to OpenAI chat completion format
+   */
+  [[nodiscard]] std::vector<std::map<std::string, std::string>> toOpenAIApiMessages() const {
+    std::string system_prompt = formatSystemTemplate();
+    std::vector<std::map<std::string, std::string>> ret;
+
+    ret.push_back({{"role", "system"}, {"content", system_prompt}});
+
+    for (size_t i = offset_; i < messages_.size(); ++i) {
+      const auto& [_, msg] = std::make_pair(messages_[i][0], messages_[i][1]);
+      if (i % 2 == 0) {
+        ret.push_back({{"role", "user"}, {"content", msg}});
+      } else if (!msg.empty()) {
+        ret.push_back({{"role", "assistant"}, {"content", msg}});
+      }
+    }
+
+    return ret;
+  }
+
+  /**
+   * Create a copy of the conversation
+   */
+  [[nodiscard]] std::shared_ptr<Conversation> copy() const {
+    return std::make_shared<Conversation>(name_, system_template_, system_message_, roles_, messages_, offset_, sep_style_,
+                                          sep_, sep2_, stop_str_, stop_token_ids_);
+  }
+
+  /**
+   * Convert the conversation to a dictionary
+   */
+  [[nodiscard]] std::map<std::string,
+                         std::variant<std::string, std::vector<std::string>, std::vector<std::vector<std::string>>, int>>
+  toDict() const {
+    return {{"template_name", name_},
+            {"system_message", system_message_},
+            {"roles", roles_},
+            {"messages", messages_},
+            {"offset", offset_}};
+  }
+
+  // Getters
+  [[nodiscard]] const std::string& getName() const { return name_; }
+  [[nodiscard]] const std::vector<std::string>& getRoles() const { return roles_; }
+
+ private:
+  [[nodiscard]] std::string formatSystemTemplate() const {
+    std::string result = system_template_;
+    size_t pos = result.find("{system_message}");
+    if (pos != std::string::npos) { result.replace(pos, 16, system_message_); }
+    return result;
+  }
+
+ private:
+  std::string name_;
+  std::string system_template_;
+  std::string system_message_;
+  std::vector<std::string> roles_;
+  std::vector<std::vector<std::string>> messages_;
+  int offset_;
+  SeparatorStyle sep_style_;
+  std::string sep_;
+  std::optional<std::string> sep2_;
+  std::optional<std::string> stop_str_;
+  std::optional<std::vector<int>> stop_token_ids_;
+};
+
+// A global registry for all conversation templates
+static std::map<std::string, std::shared_ptr<Conversation>> conv_templates;
+
+/**
+ * Register a new conversation template
+ */
+void registerConvTemplate(const std::shared_ptr<Conversation>& template_ptr, bool override = false) {
+  const std::string& name = template_ptr->getName();
+  if (!override) { assert(conv_templates.find(name) == conv_templates.end() && (name + " has been registered.").c_str()); }
+  conv_templates[name] = template_ptr;
+}
+
+/**
+ * Get a conversation template
+ */
+std::shared_ptr<Conversation> getConvTemplate(const std::string& name) {
+  auto it = conv_templates.find(name);
+  if (it == conv_templates.end()) { throw std::runtime_error("Template not found: " + name); }
+  return it->second->copy();
+}
+
+// Initialize templates
+void initializeTemplates() {
+  // DeepSeek template
+  auto deepseek = std::make_shared<Conversation>(
+      "deepseek", "{system_message}", "", std::vector<std::string>{"<|User|>", "<|Assistant|>"},
+      std::vector<std::vector<std::string>>{}, 0, SeparatorStyle::DeepSeek, "\n\n", "<｜end▁of▁sentence｜>",
+      std::optional<std::string>{"<｜end▁of▁sentence｜>"}, std::optional<std::vector<int>>{std::vector<int>{100001}});
+  registerConvTemplate(deepseek);
+
+  // DeepSeekV2 template
+  auto deepseekv2 = std::make_shared<Conversation>(
+      "deepseekv2", "{system_message}", "", std::vector<std::string>{"<｜User｜>", "<｜Assistant｜>"},
+      std::vector<std::vector<std::string>>{}, 0, SeparatorStyle::DeepSeekV2, "", "<｜end▁of▁sentence｜>",
+      std::optional<std::string>{"<｜end▁of▁sentence｜>"}, std::optional<std::vector<int>>{std::vector<int>{100001}});
+  registerConvTemplate(deepseekv2);
+
+  // Plain template
+  auto plain = std::make_shared<Conversation>(
+      "plain", "", "", std::vector<std::string>{"", ""}, std::vector<std::vector<std::string>>{}, 0, SeparatorStyle::PLAIN, "",
+      "", std::optional<std::string>{"</s>"}, std::optional<std::vector<int>>{std::vector<int>{100001}});
+  registerConvTemplate(plain);
+
+  // Alignment template
+  auto alignment = std::make_shared<Conversation>("alignment", "", "", std::vector<std::string>{"", ""},
+                                                  std::vector<std::vector<std::string>>{}, 0, SeparatorStyle::ALIGNMENT, "", "",
+                                                  std::optional<std::string>{"</s>"},
+                                                  std::optional<std::vector<int>>{std::vector<int>{100001}});
+  registerConvTemplate(alignment);
+}
+
+inline std::string formatMessages(const nlohmann::json& conversations, const std::string& sft_format = "deepseek",
+                                  const std::string& system_prompt = "") {
+  auto conv = getConvTemplate(sft_format);
+
+  // Helper trim function to mimic Python's .strip()
+  auto trim = [](const std::string& s) -> std::string {
+    const char* ws = " \t\n\r\f\v";
+    const auto start = s.find_first_not_of(ws);
+    if (start == std::string::npos) return "";
+    const auto end = s.find_last_not_of(ws);
+    return s.substr(start, end - start + 1);
+  };
+
+  conv->setSystemMessage(system_prompt);
+  for (const auto& message : conversations) {
+    std::string role;
+    std::string content;
+
+    if (message.contains("role") && message["role"].is_string()) {
+      role = message["role"].get<std::string>();
+    } else {
+      role = "";
+    }
+
+    if (message.contains("content") && message["content"].is_string()) {
+      content = trim(message["content"].get<std::string>());
+    } else {
+      content = "";
+    }
+
+    conv->appendMessage(role, content);
+  }
+
+  return trim(conv->getPrompt());
+}
+
+//===----------------------------------------------------------------------===//
+// For Image processing
+//===----------------------------------------------------------------------===//
+
+/**
+ * Loads images from conversation messages
+ *
+ * @param conversations JSON array of conversation messages
+ *        An example is:
+ *        [
+ *          {
+ *            "role": "User",
+ *            "content": "<image_placeholder>\nExtract all information from this image and convert them into markdown format.",
+ *            "images": ["./examples/table_datasets.png"]
+ *          },
+ *          {"role": "Assistant", "content": ""}
+ *        ]
+ *
+ * @return Vector of Image objects
+ */
+std::vector<Image> loadImages(const nlohmann::json& conversations) {
+  std::vector<Image> ret;
+  // Iterate through each conversation message
+  for (const auto& message : conversations) {
+    // Skip if message doesn't contain "images" field
+    if (!message.contains("images")) { continue; }
+
+    // Process each image path in the "images" array
+    for (const auto& image_path : message["images"]) {
+      // Load the image using Image::open
+      Image img = Image::open(image_path);
+      // Add to result vector
+      ret.push_back(img);
+    }
+  }
+  return ret;
+}
+
+std::pair<int, int> findClosestAspectRatio(double aspect_ratio, const std::vector<std::pair<int, int>>& target_ratios,
+                                           int width, int height, int image_size) {
+  double best_ratio_diff = std::numeric_limits<double>::infinity();
+  std::pair<int, int> best_ratio = {1, 1};
+  const double area = static_cast<double>(width) * static_cast<double>(height);
+
+  for (const auto& ratio : target_ratios) {
+    const double target_aspect_ratio = static_cast<double>(ratio.first) / static_cast<double>(ratio.second);
+    const double ratio_diff = std::abs(aspect_ratio - target_aspect_ratio);
+
+    if (ratio_diff < best_ratio_diff) {
+      best_ratio_diff = ratio_diff;
+      best_ratio = ratio;
+    } else if (ratio_diff == best_ratio_diff) {
+      if (area > 0.5 * static_cast<double>(image_size) * static_cast<double>(image_size) * static_cast<double>(ratio.first)
+                     * static_cast<double>(ratio.second)) {
+        best_ratio = ratio;
+      }
+    }
+  }
+
+  return best_ratio;
+}
+
+/**
+ * Dynamic preprocess that crops and resizes an image into tiles matching
+ * a target aspect ratio grid, similar to DeepSeek-OCR implementation.
+ *
+ * - Selects closest aspect ratio from predefined candidates.
+ * - Generates non-overlapping tiles along the longer dimension.
+ * - Pads out-of-bounds areas during crop and resizes tiles to target grid.
+ *
+ * @param image        Input RGB image
+ * @param image_size   Base size used to construct target grid (e.g., 448)
+ * @param max_num      Max number of tiles to generate (default 6)
+ * @param use_thumbnail Whether to add a square thumbnail (image_size x image_size) as the first tile
+ * @return             Vector of processed Image tiles
+ */
+inline std::pair<std::vector<Image>, std::pair<int, int>> dynamicPreprocess(const Image& image, int min_num = 2,
+                                                                            int max_num = 9, int image_size = 640,
+                                                                            bool use_thumbnail = false) {
+  Image src = image;
+  const int w = src.w();
+  const int h = src.h();
+  if (w <= 0 || h <= 0) { return {{}, {1, 1}}; }
+
+  const double aspect_ratio = static_cast<double>(w) / static_cast<double>(h);
+
+  // Build candidate ratios: all pairs (i, j) with min_num <= i*j <= max_num
+  std::set<std::pair<int, int>> ratio_set;
+  for (int n = min_num; n <= max_num; ++n) {
+    for (int i = 1; i <= n; ++i) {
+      for (int j = 1; j <= n; ++j) {
+        const int blocks = i * j;
+        if (blocks >= min_num && blocks <= max_num) { ratio_set.insert({i, j}); }
+      }
+    }
+  }
+  std::vector<std::pair<int, int>> target_ratios(ratio_set.begin(), ratio_set.end());
+  std::sort(target_ratios.begin(), target_ratios.end(),
+            [](const auto& a, const auto& b) { return (a.first * a.second) < (b.first * b.second); });
+
+  const auto target_aspect_ratio = findClosestAspectRatio(aspect_ratio, target_ratios, w, h, image_size);
+
+  const int target_width = image_size * target_aspect_ratio.first;
+  const int target_height = image_size * target_aspect_ratio.second;
+  const int blocks = target_aspect_ratio.first * target_aspect_ratio.second;
+
+  Image resized_img = src.resize(target_width, target_height);
+  std::vector<Image> processed_images;
+  processed_images.reserve(static_cast<size_t>(blocks));
+
+  for (int i = 0; i < blocks; ++i) {
+    const int cols = target_width / image_size;  // equals target_aspect_ratio.first
+    const int x0 = (i % cols) * image_size;
+    const int y0 = (i / cols) * image_size;
+    const int x1 = x0 + image_size;
+    const int y1 = y0 + image_size;
+    Image split_img = resized_img.crop(x0, y0, x1, y1);
+    processed_images.push_back(split_img);
+  }
+
+  assert(static_cast<int>(processed_images.size()) == blocks);
+
+  if (use_thumbnail && static_cast<int>(processed_images.size()) != 1) {
+    processed_images.push_back(src.resize(image_size, image_size));
+  }
+
+  return {processed_images, target_aspect_ratio};
+}
+
+}  // namespace mllm::models::deepseek_ocr
diff --git a/mllm/models/deepseek_ocr/deepencoder.hpp b/mllm/models/deepseek_ocr/deepencoder.hpp
new file mode 100644
index 00000000..c454c0b1
--- /dev/null
+++ b/mllm/models/deepseek_ocr/deepencoder.hpp
@@ -0,0 +1,717 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+#pragma once
+
+#include <cmath>
+#include <optional>
+#include <algorithm>
+
+#include "mllm/mllm.hpp"
+#include "mllm/nn/Nn.hpp"
+#include "mllm/nn/Module.hpp"
+#include "mllm/nn/Functional.hpp"
+#include "mllm/nn/layers/Param.hpp"
+#include "mllm/utils/Enumerate.hpp"
+#include "mllm/models/deepseek_ocr/configuration_deepseek_ocr.hpp"
+
+namespace mllm::models::deepseek_ocr {
+
+//===----------------------------------------------------------------------===//
+// MLP Projector For Mapping Visual Tokens to Text Token Space
+//===----------------------------------------------------------------------===//
+class MlpProjector final : public nn::Module {
+  nn::Linear layers_;
+
+ public:
+  MlpProjector() = default;
+
+  MlpProjector(const std::string& name, const DpskOcrConfig& config) : nn::Module(name) {
+    layers_ = reg<nn::Linear>("layers", 2048, 1280, true, config.mlp_projector_linear_impl_type);
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    return {layers_(inputs[0])};
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// CLIP
+//
+// CLIP params is hard coded. Just like what deepseek official model does.
+//
+// vit_model_cfg = adict(
+//     num_layers=24,
+//     hidden_size=1024,
+//     num_heads = 16,
+//     num_attention_heads=16,
+//     ffn_hidden_size=4096,
+//     seq_length=256,
+//     max_position_embeddings=256,
+//     use_flash_attn=False,
+//     understand_projector_stride=2,
+//     hidden_dropout = 0.0,
+//     attention_dropout = 0.0,
+//     no_persist_layer_norm = False,
+//     layernorm_epsilon = 1e-5,
+//     pre_layernorm_epsilon = 1e-5,
+//     image_size = 224,
+//     patch_size = 14,
+//     recompute_list = []
+// )
+
+// def build_clip_l():
+//     return VitModel(
+//         cfg=vit_model_cfg,
+//         freeze_embed=False,
+//         freeze_pre_norm=False,
+//     )
+//===----------------------------------------------------------------------===//
+class CLIPVisionEmbeddings final : public nn::Module {
+  int embed_dim_;
+  int image_size_;
+  int patch_size_;
+  nn::Param class_embedding_;
+  nn::Conv2D patch_embedding_;
+  int num_patches_;
+  int num_positions_;
+  nn::Embedding position_embedding_;
+
+ public:
+  CLIPVisionEmbeddings() = default;
+
+  CLIPVisionEmbeddings(const std::string& name, const DpskOcrConfig& config) : nn::Module(name) {
+    embed_dim_ = 1024;
+    image_size_ = 224;
+    patch_size_ = 14;
+    num_patches_ = (image_size_ / patch_size_) * (image_size_ / patch_size_);
+    num_positions_ = num_patches_ + 1;
+
+    // [embed_dim], aka [1024]
+    class_embedding_ = reg<nn::Param>("class_embedding", getModuleName() + ".class_embedding");
+    patch_embedding_ = reg<nn::Conv2D>("patch_embedding", 3, embed_dim_, Tensor::shape_t{14, 14}, Tensor::shape_t{14, 14},
+                                       Tensor::shape_t{0, 0}, Tensor::shape_t{1, 1}, false);
+    position_embedding_ = reg<nn::Embedding>("position_embedding", num_positions_, embed_dim_);
+
+    // Register a buffer
+    registerBuffer("position_ids", Tensor::arange(0, num_positions_, 1, kInt64, kCPU).view({1, -1}));
+  }
+
+  Tensor getAbsPos(Tensor abs_pos, int32_t tgt_size) {
+    // abs_pos : L, C
+    // tgt_size : M
+    // return : M, C
+
+    auto dim = abs_pos.size(-1);
+    auto abs_pos_new = abs_pos.squeeze(0);
+    auto cls_token = abs_pos_new[{{kAll, 1}, kAll}].contiguous();
+    auto old_pos_embed = abs_pos_new[{{1, kAll}, kAll}].contiguous();
+
+    auto src_size = int(std::sqrt(abs_pos_new.shape()[0] - 1));
+    tgt_size = int(std::sqrt(tgt_size));
+    auto dtype = abs_pos.dtype();
+
+    if (src_size != tgt_size) {
+      old_pos_embed = old_pos_embed.view({1, src_size, src_size, dim}).permute({0, 3, 1, 2});
+      old_pos_embed = old_pos_embed.to(kFloat32);
+      auto new_pos_embed = nn::functional::interpolateBySize(old_pos_embed, {tgt_size, tgt_size},
+                                                             aops::InterpolateOpMode::kBicubic, false, true);
+      new_pos_embed = new_pos_embed.permute({0, 2, 3, 1});
+      new_pos_embed = new_pos_embed.view({tgt_size * tgt_size, dim});
+      auto vision_pos_embed = nn::functional::concat({cls_token, new_pos_embed}, 0);
+      vision_pos_embed = vision_pos_embed.view({1, tgt_size * tgt_size + 1, dim});
+      return vision_pos_embed;
+    } else {
+      return abs_pos;
+    }
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    auto pixel_values = Tensor::nil();
+    auto patch_embeds = Tensor::nil();
+
+    if (inputs.size() == 1) {
+      pixel_values = inputs[0];
+    } else if (inputs.size() == 2) {
+      pixel_values = inputs[0];
+      patch_embeds = inputs[1];
+    }
+
+    auto batch_size = pixel_values.shape()[0];
+
+    if (!patch_embeds) { patch_embeds = patch_embedding_(pixel_values); }
+
+    // Flatten and transpose.
+    // patch_embeds original shape is [batch, out_channel, width, grid, grid]
+    patch_embeds = patch_embeds.flatten(2).transpose(1, 2);  // [batch, width * grid * grid, out_channel]
+
+    // [batch, 1, 1024]
+    // Same as expand(batch_size, 1, -1)
+    auto class_embeds = class_embedding_.weight().view({1, 1, -1}).repeat(batch_size, 0);
+
+    auto embeddings = nn::functional::concat({class_embeds, patch_embeds}, 1);
+    embeddings = embeddings + getAbsPos(position_embedding_(getBuffer("position_ids")), embeddings.size(1));
+
+    return {embeddings};
+  }
+};
+
+class NoTPFeedForward final : public nn::Module {
+  nn::Linear fc1_;
+  nn::Linear fc2_;
+  nn::QuickGELU act_;
+
+ public:
+  NoTPFeedForward() = default;
+
+  NoTPFeedForward(const std::string& name, int32_t dim, int32_t hidden_dim, const DpskOcrConfig& config) : nn::Module(name) {
+    fc1_ = reg<nn::Linear>("fc1", dim, hidden_dim, true, config.clip_linear_impl_type);
+    fc2_ = reg<nn::Linear>("fc2", hidden_dim, dim, true, config.clip_linear_impl_type);
+    act_ = reg<nn::QuickGELU>("act");
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    return {fc2_(act_(fc1_(inputs[0])))};
+  }
+};
+
+class NoTPAttention final : public nn::Module {
+  int num_heads_;
+  int n_local_heads_;
+  int head_dim_;
+  int max_seq_len_;
+  nn::Linear qkv_proj_;
+  nn::Linear out_proj_;
+
+ public:
+  NoTPAttention() = default;
+
+  NoTPAttention(const std::string& name, const DpskOcrConfig& config) : nn::Module(name) {
+    num_heads_ = 16;
+    n_local_heads_ = 16;
+    head_dim_ = 1024 / 16;
+    max_seq_len_ = 256;
+
+    qkv_proj_ = reg<nn::Linear>("qkv_proj", 1024, 1024 * 3, true, config.clip_linear_impl_type);
+    out_proj_ = reg<nn::Linear>("out_proj", 1024, 1024, true, config.clip_linear_impl_type);
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    auto& x = inputs[0];
+    auto bsz = x.size(0);
+    auto seqlen = x.size(1);
+
+    auto xqkv = qkv_proj_(x);
+    xqkv = xqkv.view({bsz, seqlen, 3, num_heads_, head_dim_});
+    auto [xq, xk, xv] = nn::functional::split<3>(xqkv, 2);
+
+    // FIXME: contiguous is not needed, actually.
+    xq = xq.contiguous().squeeze(2);
+    xk = xk.contiguous().squeeze(2);
+    xv = xv.contiguous().squeeze(2);
+
+    xq = xq.permute({0, 2, 1, 3});
+    xk = xk.permute({0, 2, 1, 3});
+    xv = xv.permute({0, 2, 1, 3});
+
+    auto output = nn::functional::scaledDotProductAttention(xq, xk, xv);
+    output = output.permute({0, 2, 1, 3}).view({bsz, seqlen, -1});
+    output = out_proj_(output);
+    return {output};
+  }
+};
+
+class NoTPTransformerBlock final : public nn::Module {
+  int n_heads_;
+  int dim_;
+  int head_dim_;
+  NoTPAttention self_attn_;
+  NoTPFeedForward mlp_;
+  nn::LayerNorm layer_norm1_;
+  nn::LayerNorm layer_norm2_;
+
+ public:
+  int layer_id_;
+
+  NoTPTransformerBlock() = default;
+
+  NoTPTransformerBlock(const std::string& name, const DpskOcrConfig& config) : nn::Module(name) {
+    n_heads_ = 16;
+    dim_ = 1024;
+    head_dim_ = 1024 / 16;
+    self_attn_ = reg<NoTPAttention>("self_attn", config);
+    mlp_ = reg<NoTPFeedForward>("mlp", 1024, 4096, config);
+    layer_norm1_ = reg<nn::LayerNorm>("layer_norm1", Tensor::shape_t{1024}, true, true, 1e-5);
+    layer_norm2_ = reg<nn::LayerNorm>("layer_norm2", Tensor::shape_t{1024}, true, true, 1e-5);
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    auto x = inputs[0];
+    auto residual = self_attn_(layer_norm1_(x))[0];
+    auto h = x + residual;
+    auto out = h + mlp_(layer_norm2_(h))[0];
+    return {out};
+  }
+};
+
+class NoTPTransformer final : public nn::Module {
+  int num_layers_;
+  nn::ModuleList<NoTPTransformerBlock> layers_;
+
+ public:
+  NoTPTransformer() = default;
+
+  NoTPTransformer(const std::string& name, const DpskOcrConfig& config) : nn::Module(name) {
+    num_layers_ = 24;
+    layers_ = reg<nn::ModuleList<NoTPTransformerBlock>>("layers", num_layers_, config);
+    for (auto [idx, layer] : enumerate(layers_.list())) { layer.layer_id_ = idx; }
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    auto hidden_states = inputs[0];
+    for (auto [idx, layer] : enumerate(layers_.list())) { hidden_states = layer(hidden_states)[0]; }
+
+    return {hidden_states};
+  }
+};
+
+class VitModel final : public nn::Module {
+  CLIPVisionEmbeddings embeddings_;
+  NoTPTransformer transformer_;
+  nn::LayerNorm pre_layernorm_;  ///< input must in fp32 dtype.
+
+ public:
+  VitModel() = default;
+
+  VitModel(const std::string& name, const DpskOcrConfig& config) : nn::Module(name) {
+    embeddings_ = reg<CLIPVisionEmbeddings>("embeddings", config);
+    transformer_ = reg<NoTPTransformer>("transformer", config);
+
+    // NOTE:
+    // Yes!!!, Its pre_layrnorm! Deepseek Typo!.
+    pre_layernorm_ = reg<nn::LayerNorm>("pre_layrnorm", Tensor::shape_t{1024}, true, true, 1e-5);
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    auto x = inputs[0];
+    auto patch_embeds = inputs[1];
+
+    auto output = embeddings_(x, patch_embeds)[0];
+    output = pre_layernorm_(output);
+    output = transformer_(output)[0];
+    return {output};
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// SAM
+//===----------------------------------------------------------------------===//
+class PatchEmbed final : public nn::Module {
+  nn::Conv2D proj_;
+
+ public:
+  PatchEmbed() = default;
+
+  PatchEmbed(const std::string& name, const DpskOcrConfig& config) : nn::Module(name) {
+    proj_ = reg<nn::Conv2D>("proj", 3, 768, Tensor::shape_t{16, 16}, Tensor::shape_t{16, 16}, Tensor::shape_t{0, 0},
+                            Tensor::shape_t{1, 1}, true);
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    auto x = inputs[0];
+    // B C H W -> B H W C
+    return {proj_(x).permute({0, 2, 3, 1})};
+  }
+};
+
+class MLPBlock final : public nn::Module {
+  nn::Linear lin1_;
+  nn::Linear lin2_;
+  nn::GELU act_;
+
+ public:
+  MLPBlock() = default;
+
+  MLPBlock(const std::string& name, int embedding_dim, int mlp_dim, const DpskOcrConfig& config) : nn::Module(name) {
+    lin1_ = reg<nn::Linear>("lin1", embedding_dim, mlp_dim, true, config.sam_linear_impl_type);
+    lin2_ = reg<nn::Linear>("lin2", mlp_dim, embedding_dim, true, config.sam_linear_impl_type);
+    act_ = reg<nn::GELU>("act");
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    return {lin2_(act_(lin1_(inputs[0])))};
+  }
+};
+
+class Attention final : public nn::Module {
+  int num_heads_;
+  bool use_rel_pos_;
+
+  nn::Linear qkv_;
+  nn::Linear proj_;
+  nn::Param rel_pos_h_;
+  nn::Param rel_pos_w_;
+
+ public:
+  int layer_idx_;
+
+  Attention() = default;
+
+  Attention(const std::string& name, int dim, int num_heads, bool qkv_bias, bool use_rel_pos,
+            std::optional<std::tuple<int, int>> input_size, const DpskOcrConfig& config)
+      : nn::Module(name) {
+    num_heads_ = num_heads;
+    use_rel_pos_ = use_rel_pos;
+
+    qkv_ = reg<nn::Linear>("qkv", dim, dim * 3, qkv_bias, config.sam_linear_impl_type);
+    proj_ = reg<nn::Linear>("proj", dim, dim, true, config.sam_linear_impl_type);
+    if (use_rel_pos) {
+      rel_pos_h_ = reg<nn::Param>("rel_pos_h", getModuleName() + ".rel_pos_h");
+      rel_pos_w_ = reg<nn::Param>("rel_pos_w", getModuleName() + ".rel_pos_w");
+    }
+  }
+
+  // Get relative positional embeddings according to the relative positions of query and key sizes.
+  Tensor getRelPos(int q_size, int k_size, const Tensor& rel_pos_) {
+    auto rel_pos = rel_pos_;
+    auto max_rel_dist = 2 * std::max(q_size, k_size) - 1;
+    Tensor rel_pos_resized = Tensor::nil();
+
+    if (rel_pos.size(0) != max_rel_dist) {
+      auto dtype = rel_pos.dtype();
+      rel_pos = rel_pos.to(kFloat32);
+      rel_pos_resized = nn::functional::interpolateBySize(rel_pos.view({1, rel_pos.size(0), -1}).permute({0, 2, 1}),
+                                                          {max_rel_dist}, aops::InterpolateOpMode::kLinear)
+                            .to(dtype);
+      rel_pos_resized = rel_pos_resized.view({-1, max_rel_dist}).permute({1, 0});
+    } else {
+      rel_pos_resized = rel_pos;
+    }
+
+    std::vector<float> q_coords(q_size);
+    std::vector<float> k_coords(k_size);
+
+    float q_scale = std::max((float)k_size / q_size, 1.0f);
+    float k_scale = std::max((float)q_size / k_size, 1.0f);
+
+    for (int i = 0; i < q_size; ++i) { q_coords[i] = i * q_scale; }
+    for (int i = 0; i < k_size; ++i) { k_coords[i] = i * k_scale; }
+
+    float offset = (k_size - 1) * k_scale;
+    int embedding_dim = rel_pos_resized.size(1);
+    auto out = Tensor::empty({q_size, k_size, embedding_dim}).alloc();
+
+    for (int i = 0; i < q_size; ++i) {
+      for (int j = 0; j < k_size; ++j) {
+        float relative_coord_float = (q_coords[i] - k_coords[j]) + offset;
+        int64_t relative_coord_long = static_cast<int64_t>(std::round(relative_coord_float));
+
+        if (relative_coord_long < 0) relative_coord_long = 0;
+        if (relative_coord_long >= max_rel_dist) relative_coord_long = max_rel_dist - 1;
+
+        for (int d = 0; d < embedding_dim; ++d) {
+          out.ptr<mllm_fp32_t>()[i * k_size * embedding_dim + j * embedding_dim + d] =
+              *rel_pos_resized.offsettedPtr<mllm_fp32_t>({(int32_t)relative_coord_long, d});
+        }
+      }
+    }
+
+    return out;
+  }
+
+  // Calculate decomposed Relative Positional Embeddings from :paper:`mvitv2`.
+  // https://github.com/facebookresearch/mvit/blob/19786631e330df9f3622e5402b4a419a263a2c80/mvit/models/attention.py
+  std::tuple<Tensor, Tensor> addDecomposedRelPos(Tensor q, const Tensor& rel_pos_h, const Tensor& rel_pos_w,
+                                                 std::tuple<int, int> q_size, std::tuple<int, int> k_size) {
+    auto [q_h, q_w] = q_size;
+    auto [k_h, k_w] = k_size;
+
+    auto Rh = getRelPos(q_h, k_h, rel_pos_h);
+    auto Rw = getRelPos(q_w, k_w, rel_pos_w);
+
+    auto B = q.size(0);
+    auto dim = q.size(2);
+
+    auto r_q = q.view({B, q_h, q_w, dim});
+
+    // Einsum
+    // 1. bhwc,hkc->bhwk
+    auto rel_h = Tensor::empty({B, q_h, q_w, k_h}).alloc();
+    {
+      auto* r_q_ptr = r_q.ptr<mllm_fp32_t>();
+      auto* Rh_ptr = Rh.ptr<mllm_fp32_t>();
+      auto* rel_h_ptr = rel_h.ptr<mllm_fp32_t>();
+      // rel_h[b, h, w, k] = sum over c from 0 to dim-1 ( r_q[b, h, w, c] * Rh[h, k, c] )
+      for (int b = 0; b < B; ++b) {
+        for (int h = 0; h < q_h; ++h) {
+          for (int w = 0; w < q_w; ++w) {
+            for (int k = 0; k < k_h; ++k) {
+              float sum = 0.0f;
+              const auto* p_r_q = r_q_ptr + (b * q_h * q_w * dim) + (h * q_w * dim) + (w * dim);
+              const auto* p_Rh = Rh_ptr + (h * k_h * dim) + (k * dim);
+              for (int c = 0; c < dim; ++c) { sum += p_r_q[c] * p_Rh[c]; }
+              rel_h_ptr[(b * q_h * q_w * k_h) + (h * q_w * k_h) + (w * k_h) + k] = sum;
+            }
+          }
+        }
+      }
+    }
+    // 2. bhwc,wkc->bhwk
+    auto rel_w = Tensor::empty({B, q_h, q_w, k_w}).alloc();
+    {
+      auto* r_q_ptr = r_q.ptr<mllm_fp32_t>();
+      auto* Rw_ptr = Rw.ptr<mllm_fp32_t>();
+      auto* rel_w_ptr = rel_w.ptr<mllm_fp32_t>();
+
+      // rel_w[b, h, w, k] = sum over c from 0 to dim-1 ( r_q[b, h, w, c] * Rw[w, k, c] )
+      for (int b = 0; b < B; ++b) {
+        for (int h = 0; h < q_h; ++h) {
+          for (int w = 0; w < q_w; ++w) {
+            for (int k = 0; k < k_w; ++k) {
+              float sum = 0.0f;
+              const auto* p_r_q = r_q_ptr + (b * q_h * q_w * dim) + (h * q_w * dim) + (w * dim);
+              const auto* p_Rw = Rw_ptr + (w * k_w * dim) + (k * dim);
+              for (int c = 0; c < dim; ++c) { sum += p_r_q[c] * p_Rw[c]; }
+              rel_w_ptr[(b * q_h * q_w * k_w) + (h * q_w * k_w) + (w * k_w) + k] = sum;
+            }
+          }
+        }
+      }
+    }
+
+    rel_h = rel_h.unsqueeze(-1);
+    rel_w = rel_w.unsqueeze(-2);
+    rel_h = rel_h.view({B, q_h * q_w, k_h, 1});
+    rel_w = rel_w.view({B, q_h * q_w, 1, k_w});
+    return {rel_h, rel_w};
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    auto x = inputs[0];
+    auto B = x.size(0);
+    auto H = x.size(1);
+    auto W = x.size(2);
+
+    // qkv with shape (3, B, nHead, H * W, C)
+    auto qkv = qkv_(x)
+                   .view({
+                       B,
+                       H * W,
+                       3,
+                       num_heads_,
+                       -1,
+                   })
+                   .permute({2, 0, 3, 1, 4});
+    qkv = qkv.view({3, B * num_heads_, H * W, -1});
+    auto [q, k, v] = nn::functional::split<3>(qkv, 0);
+
+    q = q.view({B * num_heads_, H * W, -1});
+    k = k.view({B * num_heads_, H * W, -1});
+    v = v.view({B * num_heads_, H * W, -1});
+
+    auto rel_h = Tensor::nil();
+    auto rel_w = Tensor::nil();
+
+    if (use_rel_pos_) {
+      std::tie(rel_h, rel_w) = addDecomposedRelPos(q, rel_pos_h_.weight(), rel_pos_w_.weight(), {H, W}, {H, W});
+    }
+
+    q = q.view({B, num_heads_, H * W, -1});
+    k = k.view({B, num_heads_, H * W, -1});
+    v = v.view({B, num_heads_, H * W, -1});
+
+    if (use_rel_pos_) {
+      rel_h = rel_h.view({B, num_heads_, rel_h.size(1), rel_h.size(2), rel_h.size(3)});
+      rel_w = rel_w.view({B, num_heads_, rel_w.size(1), rel_w.size(2), rel_w.size(3)});
+
+      // Dual broadcast is not supported in cpu backend. So we need to repeat rel_h and rel_w
+      // torch.Size([54, 12, 196, 14, 1])
+      // torch.Size([54, 12, 196, 1, 14])
+      auto _dim_neg_1 = rel_w.size(4);
+      auto _dim_neg_2 = rel_h.size(3);
+      rel_h = rel_h.repeat(_dim_neg_1, -1);
+      rel_w = rel_w.repeat(_dim_neg_2, -2);
+      MLLM_RT_ASSERT_EQ(rel_h.shape(), rel_w.shape());
+      auto attn_bias = (rel_h + rel_w).view({B, num_heads_, rel_h.size(2), rel_h.size(3) * rel_w.size(4)});
+      x = nn::functional::scaledDotProductAttention(q, k, v, attn_bias);
+    } else {
+      x = nn::functional::scaledDotProductAttention(q, k, v);
+    }
+
+    x = x.view({B, num_heads_, H, W, -1}).permute({0, 2, 3, 1, 4}).view({B, H, W, -1});
+    x = proj_(x);
+    return {x};
+  }
+};
+
+class Block final : public nn::Module {
+  nn::LayerNorm norm1_;
+  nn::LayerNorm norm2_;
+  MLPBlock mlp_;
+  int window_size_;
+
+ public:
+  Attention attn_;
+  int layer_idx_;
+
+  Block() = default;
+
+  Block(const std::string& name, int dim, int num_heads, float mlp_ratio, bool qkv_bias, bool use_rel_pos, int window_size,
+        std::optional<std::tuple<int, int>> input_size, const DpskOcrConfig& config)
+      : nn::Module(name) {
+    norm1_ = reg<nn::LayerNorm>("norm1", Tensor::shape_t{dim});
+    attn_ =
+        reg<Attention>("attn", dim, num_heads, qkv_bias, use_rel_pos,
+                       window_size == 0 ? input_size : std::make_optional(std::make_tuple(window_size, window_size)), config);
+    norm2_ = reg<nn::LayerNorm>("norm2", Tensor::shape_t{dim});
+    mlp_ = reg<MLPBlock>("mlp", dim, (int)(dim * mlp_ratio), config);
+    window_size_ = window_size;
+  }
+
+  std::tuple<Tensor, std::tuple<int, int>> windowPartition(Tensor x, int window_size) {
+    auto B = x.size(0);
+    auto H = x.size(1);
+    auto W = x.size(2);
+    auto C = x.size(3);
+
+    auto pad_h = (window_size - H % window_size) % window_size;
+    auto pad_w = (window_size - W % window_size) % window_size;
+
+    if (pad_h > 0 || pad_w > 0) { x = nn::functional::pad(x, {0, 0, 0, pad_w, 0, pad_h}); }
+
+    auto Hp = H + pad_h;
+    auto Wp = W + pad_w;
+
+    x = x.view({B, Hp / window_size, window_size, Wp / window_size, window_size, C});
+    auto window = x.permute({0, 1, 3, 2, 4, 5}).view({-1, window_size, window_size, C});
+    return {window, {Hp, Wp}};
+  }
+
+  Tensor windowUnpartition(Tensor windows, int window_size, std::tuple<int, int> pad_wh, std::tuple<int, int> hw) {
+    auto [Hp, Wp] = pad_wh;
+    auto [H, W] = hw;
+    auto B = windows.size(0) / (Hp * Wp / window_size / window_size);
+    auto x = windows.view({B, Hp / window_size, Wp / window_size, window_size, window_size, -1});
+    x = x.permute({0, 1, 3, 2, 4, 5}).view({B, Hp, Wp, -1});
+
+    if (Hp > H || Wp > W) { x = x[{kAll, {kAll, H}, {kAll, W}, kAll}].contiguous(); }
+
+    return x;
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    auto x = inputs[0];
+    auto shortcut = x;
+    x = norm1_(x);
+
+    // Window partition
+    int H = 0;
+    int W = 0;
+    std::tuple<int, int> pad_hw;
+    if (window_size_ > 0) {
+      H = x.size(1);
+      W = x.size(2);
+      std::tie(x, pad_hw) = windowPartition(x, window_size_);
+    }
+
+    x = attn_(x)[0];
+
+    // Reverse window partition
+    if (window_size_ > 0) { x = windowUnpartition(x, window_size_, pad_hw, {H, W}); }
+
+    x = shortcut + x;
+    x = x + mlp_(norm2_(x))[0];
+
+    return {x};
+  }
+};
+
+class Blocks final : public nn::Module {
+  std::vector<Block> blocks_;
+
+ public:
+  Blocks() = default;
+
+  Blocks(const std::string& name, int nums, const std::vector<int>& global_attn_indexes, const DpskOcrConfig& config)
+      : nn::Module(name) {
+    for (int i = 0; i < nums; ++i) {
+      bool is_in = std::find(global_attn_indexes.begin(), global_attn_indexes.end(), i) == global_attn_indexes.end();
+      auto this_block_window_size = is_in ? 14 : 0;
+      blocks_.emplace_back(reg<Block>(std::to_string(i), 768, 12, 4.0, true, true, this_block_window_size,
+                                      std::make_optional(std::make_tuple(1024 / 16, 1024 / 16)), config));
+      blocks_[i].layer_idx_ = i;
+      blocks_[i].attn_.layer_idx_ = i;
+    }
+  };
+
+  std::vector<Block>& list() { return blocks_; }
+};
+
+class ImageEncoderViT final : public nn::Module {
+  PatchEmbed patch_embed_;
+  nn::Param pos_embed_;
+  Blocks blocks_;
+  nn::Sequential neck_;
+  nn::Conv2D net_2_;
+  nn::Conv2D net_3_;
+
+ public:
+  ImageEncoderViT() = default;
+
+  ImageEncoderViT(const std::string& name, const DpskOcrConfig& config) : nn::Module(name) {
+    patch_embed_ = reg<PatchEmbed>("patch_embed", config);
+    pos_embed_ = reg<nn::Param>("pos_embed", getModuleName() + ".pos_embed");
+
+    // block_nums = 12
+    // embed_dim = 768
+    // num_heads = 12
+    // mlp_ratio = 4.f
+    // qkv_bias = true
+    // use_rel_pos = true
+    // window_size = 14
+    blocks_ = reg<Blocks>("blocks", 12, std::vector<int>{2, 5, 8, 11}, config);
+
+    neck_ = reg<nn::Sequential>("neck")
+                .add<nn::Conv2D>(768, 256, Tensor::shape_t{1, 1}, Tensor::shape_t{1, 1}, Tensor::shape_t{0, 0},
+                                 Tensor::shape_t{1, 1}, false)
+                .add<nn::LayerNorm2D>(256)
+                .add<nn::Conv2D>(256, 256, Tensor::shape_t{3, 3}, Tensor::shape_t{1, 1}, Tensor::shape_t{1, 1},
+                                 Tensor::shape_t{1, 1}, false)
+                .add<nn::LayerNorm2D>(256);
+
+    net_2_ = reg<nn::Conv2D>("net_2", 256, 512, Tensor::shape_t{3, 3}, Tensor::shape_t{2, 2}, Tensor::shape_t{1, 1},
+                             Tensor::shape_t{1, 1}, false);
+    net_3_ = reg<nn::Conv2D>("net_3", 512, 1024, Tensor::shape_t{3, 3}, Tensor::shape_t{2, 2}, Tensor::shape_t{1, 1},
+                             Tensor::shape_t{1, 1}, false);
+  }
+
+  Tensor getAbsPosSam(Tensor abs_pos, int tgt_size) {
+    auto dtype = abs_pos.dtype();
+    auto src_size = abs_pos.size(1);
+
+    if (src_size != tgt_size) {
+      auto old_pos_embed = abs_pos.permute({0, 3, 1, 2});
+      old_pos_embed = old_pos_embed.to(kFloat32);
+      // clang-format off
+      auto new_pos_embed = nn::functional::interpolateBySize(old_pos_embed, {tgt_size, tgt_size}, aops::InterpolateOpMode::kBicubic, false, true).to(dtype);
+      // clang-format on
+      new_pos_embed = new_pos_embed.permute({0, 2, 3, 1});
+      return new_pos_embed;
+    } else {
+      return abs_pos;
+    }
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    auto x = inputs[0];
+    x = patch_embed_(x)[0];
+    x = x + getAbsPosSam(pos_embed_.weight(), x.size(1));
+    for (auto& blk : blocks_.list()) { x = blk(x)[0]; }
+
+    x = neck_(x.permute({0, 3, 1, 2}))[0];
+    x = net_2_(x);
+    x = net_3_(x);
+    return {x};
+  }
+};
+
+}  // namespace mllm::models::deepseek_ocr
diff --git a/mllm/models/deepseek_ocr/modeling_deepseek_ocr.hpp b/mllm/models/deepseek_ocr/modeling_deepseek_ocr.hpp
new file mode 100644
index 00000000..1a1ed955
--- /dev/null
+++ b/mllm/models/deepseek_ocr/modeling_deepseek_ocr.hpp
@@ -0,0 +1,909 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+#pragma once
+
+#include <iostream>
+#include <optional>
+#include <algorithm>
+#include <filesystem>
+#include <nlohmann/json.hpp>
+
+#include "mllm/mllm.hpp"
+#include "mllm/nn/Nn.hpp"
+#include "mllm/utils/StringHelper.hpp"
+#include "mllm/models/ARGeneration.hpp"
+#include "mllm/nn/lmcache/StaticCache.hpp"
+#include "mllm/preprocessor/visual/ImageTransform.hpp"
+
+#include "mllm/models/deepseek_ocr/deepencoder.hpp"
+#include "mllm/models/deepseek_ocr/conversation_preprocess.hpp"
+#include "mllm/models/deepseek_ocr/tokenization_deepseek_ocr.hpp"
+#include "mllm/models/deepseek_ocr/configuration_deepseek_ocr.hpp"
+
+namespace mllm::models::deepseek_ocr {
+
+inline auto makeRoPEInvFreq(int output_dim, float rope_theta) -> Tensor {
+  auto inv_freq = Tensor::empty({output_dim / 2}, kFloat32, kCPU).alloc();
+  auto inv_freq_ptr = inv_freq.ptr<float>();
+  for (int i = 0; i < output_dim / 2; i++) { inv_freq_ptr[i] = 1.0 / std::pow(rope_theta, 2.0 * i / output_dim); }
+  return inv_freq;
+}
+
+inline auto makeRotaryPosEmbedding(Tensor& position_ids, const Tensor& inv_freq,
+                                   float attention_scaling = 1.0f) -> std::pair<Tensor, Tensor> {
+  auto batch_size = position_ids.shape()[0];
+  auto seq_len = position_ids.shape()[1];
+  auto inv_freq_len = inv_freq.shape()[0];
+  auto dim = inv_freq_len * 2;
+
+  // Create freqs tensor: position_ids @ inv_freq
+  auto freqs = Tensor::empty({batch_size, seq_len, inv_freq_len}, kFloat32, kCPU).alloc();
+  auto freqs_ptr = freqs.ptr<float>();
+  auto position_ids_ptr = position_ids.ptr<int64_t>();
+  auto inv_freq_ptr = inv_freq.ptr<float>();
+
+  // Compute freqs = position_ids[:, :, None] @ inv_freq[None, :]
+  for (int b = 0; b < batch_size; ++b) {
+    for (int s = 0; s < seq_len; ++s) {
+      auto pos = position_ids_ptr[b * seq_len + s];
+      for (int d = 0; d < inv_freq_len; ++d) {
+        freqs_ptr[b * seq_len * inv_freq_len + s * inv_freq_len + d] = static_cast<float>(pos) * inv_freq_ptr[d];
+      }
+    }
+  }
+
+  // Create sin and cos tensors with shape [batch_size, seq_len, dim]
+  auto sin_emb = Tensor::empty({batch_size, seq_len, dim}, kFloat32, kCPU).alloc();
+  auto cos_emb = Tensor::empty({batch_size, seq_len, dim}, kFloat32, kCPU).alloc();
+  auto sin_ptr = sin_emb.ptr<float>();
+  auto cos_ptr = cos_emb.ptr<float>();
+
+  // Compute sin and cos embeddings: emb = [freqs, freqs]
+  for (int b = 0; b < batch_size; ++b) {
+    for (int s = 0; s < seq_len; ++s) {
+      for (int d = 0; d < inv_freq_len; ++d) {
+        auto freq = freqs_ptr[b * seq_len * inv_freq_len + s * inv_freq_len + d];
+        auto sin_val = std::sin(freq) * attention_scaling;
+        auto cos_val = std::cos(freq) * attention_scaling;
+
+        // Store the same values in both halves: [freqs, freqs]
+        sin_ptr[b * seq_len * dim + s * dim + d] = sin_val;
+        sin_ptr[b * seq_len * dim + s * dim + d + inv_freq_len] = sin_val;
+        cos_ptr[b * seq_len * dim + s * dim + d] = cos_val;
+        cos_ptr[b * seq_len * dim + s * dim + d + inv_freq_len] = cos_val;
+      }
+    }
+  }
+
+  return {sin_emb, cos_emb};
+}
+
+class DeepseekV2MLP final : public nn::Module {
+  nn::Linear gate_proj_;
+  nn::Linear up_proj_;
+  nn::Linear down_proj_;
+  nn::SiLU act_;
+
+  int hidden_size_;
+  int intermediate_size_;
+
+ public:
+  DeepseekV2MLP() = default;
+
+  explicit DeepseekV2MLP(const std::string& name, const DpskOcrConfig& config,
+                         const std::optional<int>& hidden_size = std::nullopt,
+                         const std::optional<int>& intermediate_size = std::nullopt)
+      : nn::Module(name) {
+    hidden_size_ = hidden_size.value_or(config.hidden_size);
+    intermediate_size_ = intermediate_size.value_or(config.intermediate_size);
+
+    // clang-format off
+    gate_proj_ = reg<nn::Linear>("gate_proj", hidden_size_, intermediate_size_, false, config.llm_mlp_linear_impl_type);
+    up_proj_ = reg<nn::Linear>("up_proj", hidden_size_, intermediate_size_, false, config.llm_mlp_linear_impl_type);
+    down_proj_ = reg<nn::Linear>("down_proj", intermediate_size_, hidden_size_, false, config.llm_mlp_linear_impl_type);
+    act_ = reg<nn::SiLU>("act");
+    // clang-format on
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    return {down_proj_(act_(gate_proj_(inputs[0])) * up_proj_(inputs[0]))};
+  }
+};
+
+class MoEGate final : public nn::Module {
+  // FIXME: We may need to support more types
+  std::string scoring_func_ = "softmax";
+  std::string topk_method_ = "greedy";
+
+  int top_k_;
+  int n_routed_experts_;
+  float routed_scaling_factor_;
+  int n_group_;
+  int topk_group_;
+  bool norm_topk_prob_;
+
+  nn::Param weight_;
+
+ public:
+  MoEGate() = default;
+
+  MoEGate(const std::string& name, const DpskOcrConfig& config) : nn::Module(name) {
+    top_k_ = config.num_experts_per_tok;
+    n_routed_experts_ = config.n_routed_experts;
+
+    // FIXME: Read from config.json instead of hard-coding
+    routed_scaling_factor_ = 1.f;
+    norm_topk_prob_ = false;
+
+    n_group_ = config.n_group;
+    topk_group_ = config.topk_group;
+
+    weight_ = reg<nn::Param>("weight", getModuleName() + ".weight");
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    auto hidden_states = inputs[0];
+    auto bsz = hidden_states.size(0);
+    auto seq_len = hidden_states.size(1);
+    auto h = hidden_states.size(2);
+
+    // Compute gating score
+    hidden_states = hidden_states.view({-1, h});
+    // hidden_states and weight must in fp32 to keep precision !!!
+    auto logits = nn::functional::matmul(hidden_states, weight_.weight(), false, true);
+    auto scores = nn::functional::softmax(logits, -1);
+    auto [topk_weight, topk_idx] = nn::functional::topk(scores, top_k_, -1, true, false);
+
+    // FIXME: Someone may need to Norm gate to sum 1.
+    // FIXME: Someone may need rescale topk_weight by routed_scaling_factor_, but here is hard-code to 1.f
+
+    return {topk_idx, topk_weight};
+  }
+};
+
+class DeepseekV2MoE final : public nn::Module {
+  int num_experts_per_tok_;
+
+  // FIXME: Should not hard-code
+  int ep_size_ = 1;
+  int experts_per_rank_;
+  int n_shared_experts_ = 0;
+
+  nn::ModuleList<DeepseekV2MLP> experts_;
+  MoEGate gate_;
+  DeepseekV2MLP shared_experts_;
+
+ public:
+  DeepseekV2MoE() = default;
+
+  DeepseekV2MoE(const std::string& name, const DpskOcrConfig& config) : nn::Module(name) {
+    num_experts_per_tok_ = config.num_experts_per_tok;
+    experts_per_rank_ = config.n_routed_experts;
+    n_shared_experts_ = config.n_shared_experts;
+
+    // Init experts
+    experts_ = reg<nn::ModuleList<DeepseekV2MLP>>("experts", config.n_routed_experts, config, std::nullopt,
+                                                  config.moe_intermediate_size);
+    gate_ = reg<MoEGate>("gate", config);
+
+    if (n_shared_experts_ > 0) {
+      auto intermediate_size = config.moe_intermediate_size * config.n_shared_experts;
+      shared_experts_ = reg<DeepseekV2MLP>("shared_experts", config, std::nullopt, intermediate_size);
+    }
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    auto hidden_states = inputs[0];
+    auto identity = hidden_states;
+    auto orig_shape = hidden_states.shape();
+    auto topk_idx = Tensor::nil();
+    auto topk_weight = Tensor::nil();
+    auto gated_ret = gate_(hidden_states);
+    topk_idx = gated_ret[0];
+    topk_weight = gated_ret[1];
+    hidden_states = hidden_states.view({-1, hidden_states.size(-1)});
+    auto flat_topk_idx = topk_idx.view({-1});
+    auto y = moeInfer(hidden_states, topk_idx, topk_weight).view(orig_shape);
+    if (n_shared_experts_ > 0) { y = y + shared_experts_(identity)[0]; }
+    return {y};
+  }
+
+ private:
+  Tensor moeInfer(const Tensor& x, Tensor& topk_ids, Tensor& topk_weights) {
+    // x shape is [batch_size * seq, hidden_dim]
+
+    auto cnts = Tensor::zeros({topk_ids.size(0), (int32_t)experts_.list().size()});
+    // Do scatter_ operation
+    {
+      const int32_t* idx_ptr = topk_ids.ptr<mllm_int32_t>();
+      float* cnt_ptr = cnts.ptr<mllm_fp32_t>();
+      const int batch = topk_ids.size(0);
+      const int k = topk_ids.size(1);
+      const int n_exp = cnts.size(1);
+      for (int b = 0; b < batch; ++b) {
+        for (int j = 0; j < k; ++j) {
+          int32_t e = idx_ptr[b * k + j];
+          MLLM_RT_ASSERT(e >= 0 && e < n_exp);
+          cnt_ptr[b * n_exp + e] += 1.f;  // +1
+        }
+      }
+    }
+    auto tokens_per_expert = cnts.sum(0);
+    auto idxs = topk_ids.view({-1}).argsort();
+
+    // TODO this line maybe error
+    auto sorted_tokens = x[{idxs / topk_ids.size(1), {kAll}}];
+
+    std::vector<Tensor> outputs;
+    int start_idx = 0;
+
+    // tokens_per_expert shape is [num_experts]
+    // Loop through each expert
+    for (int i = 0; i < experts_.list().size(); ++i) {
+      auto num_tokens = tokens_per_expert.ptr<mllm_fp32_t>()[i];
+      auto end_idx = start_idx + (int32_t)num_tokens;
+      if (num_tokens == 0) { continue; }
+      auto& expert = experts_.list()[i];
+      auto tokens_for_this_expert = sorted_tokens[{{start_idx, end_idx}, kAll}];
+      auto expert_out = expert(tokens_for_this_expert)[0];
+      outputs.push_back(expert_out);
+      start_idx = end_idx;
+    }
+
+    auto outs = nn::functional::concat(outputs, 0);
+    auto new_x = Tensor::emptyLike(outs).alloc();
+
+    // indexed_write
+    // python logic: new_x[idxs] = outs
+    {
+      const int32_t* idx_ptr = idxs.ptr<mllm_int32_t>();
+      float* outs_ptr = outs.ptr<mllm_fp32_t>();
+      float* new_x_ptr = new_x.ptr<mllm_fp32_t>();
+      MLLM_RT_ASSERT_EQ(new_x.rank(), 2);
+      MLLM_RT_ASSERT_EQ(new_x.size(0), idxs.size(0));
+      auto dim = new_x.size(1);
+      for (int i = 0; i < idxs.size(0); ++i) {
+        int32_t idx = idx_ptr[i];
+        std::memcpy(new_x_ptr + idx * dim, outs_ptr + i * dim, dim * sizeof(float));
+      }
+    }
+
+    auto final_out_shape = topk_ids.shape();
+    final_out_shape.emplace_back(-1);
+    auto final_out =
+        new_x.view(final_out_shape).to(topk_weights.dtype()).mul_(topk_weights.unsqueeze(-1)).sum(1).to(new_x.dtype());
+    return final_out;
+  }
+};
+
+// Deepseek OCR's attention not used MLA. It's same with LlamaFlashAttention2
+class DeepseekV2Attention final : public nn::Module {
+  nn::Linear q_proj_;
+  nn::Linear k_proj_;
+  nn::Linear v_proj_;
+  nn::RoPE q_rope_;
+  nn::RoPE k_rope_;
+  nn::Linear o_proj_;
+  int hidden_size_;
+  int num_head_;
+  int head_dim_;
+  int num_key_value_heads_;
+
+ public:
+  int layer_idx_;
+
+  DeepseekV2Attention() = default;
+
+  DeepseekV2Attention(const std::string& name, const DpskOcrConfig& config) : nn::Module(name) {
+    hidden_size_ = config.hidden_size;
+    num_head_ = config.num_attention_heads;
+    head_dim_ = config.hidden_size / config.num_attention_heads;
+    num_key_value_heads_ = config.num_key_value_heads;
+
+    // clang-format off
+    q_proj_ = reg<nn::Linear>("q_proj", hidden_size_, num_head_ * head_dim_, false, config.llm_mlp_linear_impl_type);
+    k_proj_ = reg<nn::Linear>("k_proj", hidden_size_, num_key_value_heads_ * head_dim_, false, config.llm_mlp_linear_impl_type).redirect();
+    v_proj_ = reg<nn::Linear>("v_proj", hidden_size_, num_key_value_heads_ * head_dim_, false, config.llm_mlp_linear_impl_type).redirect();
+    o_proj_ = reg<nn::Linear>("o_proj", num_head_ * head_dim_, hidden_size_, false, config.llm_mlp_linear_impl_type);
+    q_rope_ = reg<nn::RoPE>("q_rope", 10000.0, config.max_position_embeddings, aops::RoPEOpOptionsInputType::kBSHD).inplace();
+    k_rope_ = reg<nn::RoPE>("k_rope", 10000.0, config.max_position_embeddings, aops::RoPEOpOptionsInputType::kBSHD).inplace();
+    // clang-format on
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    auto hidden_states = inputs[0];
+    auto llm_embedding_sin = inputs[1];
+    auto llm_embedding_cos = inputs[2];
+    auto past_kv_cache = args[0].get<nn::StaticCache*>();
+
+    auto bsz = hidden_states.size(0);
+    auto q_len = hidden_states.size(1);
+
+    // Get KV cache for Key and Value first.
+    // [B, S, H * D]
+    auto [key_states_redirect, value_states_redirect] = past_kv_cache->preGetKVWriteLocation(layer_idx_, q_len);
+
+    auto query_states = q_proj_(hidden_states);
+    auto key_states = k_proj_(hidden_states, key_states_redirect);
+    auto value_states = v_proj_(hidden_states, value_states_redirect);
+
+    // [B, S, H, D]
+    query_states = query_states.view({bsz, q_len, num_head_, head_dim_});
+    key_states = key_states.view({bsz, q_len, num_key_value_heads_, head_dim_});
+
+    // [B, S, H, D]
+    query_states = q_rope_(query_states, llm_embedding_sin, llm_embedding_cos);
+    key_states = k_rope_(key_states, llm_embedding_sin, llm_embedding_cos);
+
+    // Get KV
+    auto [K, V] = past_kv_cache->getKVCache(layer_idx_);
+
+    // [B, S, H, D] FA2
+    auto output = o_proj_(nn::functional::flashAttention2(query_states, K, V).view({bsz, q_len, num_head_ * head_dim_}));
+
+    return {output};
+  }
+};
+
+class DeepseekV2DecoderLayer final : public nn::Module {
+  // Use llama2 attention impl in deepseek-ocr model
+  DeepseekV2Attention self_attn_;
+
+  // FIXME: Do not use hard-code
+  int first_k_dense_replace_ = 1;
+  int moe_layer_freq_ = 1;
+
+  nn::RMSNorm input_layernorm_;
+  nn::RMSNorm post_attention_layernorm_;
+
+  std::optional<DeepseekV2MoE> mlp_opt0_ = std::nullopt;
+  std::optional<DeepseekV2MLP> mlp_opt1_ = std::nullopt;
+
+ public:
+  int layer_idx_;
+
+  DeepseekV2DecoderLayer() = default;
+
+  DeepseekV2DecoderLayer(const std::string& name, const DpskOcrConfig& config, int layer_idx) : nn::Module(name) {
+    layer_idx_ = layer_idx;
+    first_k_dense_replace_ = config.first_k_dense_replace;
+
+    self_attn_ = reg<DeepseekV2Attention>("self_attn", config);
+    self_attn_.layer_idx_ = layer_idx;
+
+    if (config.n_routed_experts > 0 && layer_idx_ >= config.first_k_dense_replace && layer_idx_ % moe_layer_freq_ == 0) {
+      mlp_opt0_ = reg<DeepseekV2MoE>("mlp", config);
+    } else {
+      mlp_opt1_ = reg<DeepseekV2MLP>("mlp", config);
+    }
+
+    input_layernorm_ = reg<nn::RMSNorm>("input_layernorm", 1e-6);
+    post_attention_layernorm_ = reg<nn::RMSNorm>("post_attention_layernorm", 1e-6);
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    auto hidden_states = inputs[0];
+    auto rope_pos_embed_sin = inputs[1];
+    auto rope_pos_embed_cos = inputs[2];
+    auto kv_cache = args[0];
+    auto residual = hidden_states;
+
+    hidden_states = input_layernorm_(hidden_states);
+    hidden_states = self_attn_(hidden_states, rope_pos_embed_sin, rope_pos_embed_cos, kv_cache)[0];
+    hidden_states = residual + hidden_states;
+
+    residual = hidden_states;
+    hidden_states = post_attention_layernorm_(hidden_states);
+    if (mlp_opt0_) {
+      hidden_states = mlp_opt0_.value()(hidden_states)[0];
+    } else {
+      hidden_states = mlp_opt1_.value()(hidden_states)[0];
+    }
+    hidden_states = residual + hidden_states;
+
+    return {hidden_states};
+  }
+};
+
+class DeepSeekV2Model : public nn::Module {
+ protected:
+  nn::Embedding embed_tokens_;
+  nn::ModuleListWithIdx<DeepseekV2DecoderLayer> layers_;
+  nn::RMSNorm norm_;
+
+ public:
+  DeepSeekV2Model() = default;
+
+  explicit DeepSeekV2Model(const std::string& name, const DpskOcrConfig& config) : nn::Module(name) {
+    embed_tokens_ = reg<nn::Embedding>("embed_tokens", config.vocab_size, config.hidden_size);
+    layers_ = reg<nn::ModuleListWithIdx<DeepseekV2DecoderLayer>>("layers", config.num_hidden_layers, config);
+    norm_ = reg<nn::RMSNorm>("norm", 1e-6);
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    auto& input_embeddings = inputs[0];
+    auto rope_embedding_sin = inputs[1];
+    auto rope_embedding_cos = inputs[2];
+    auto kv_cache = args[0];
+
+    auto hidden_states = input_embeddings;
+
+    for (auto& layer : layers_.list()) {
+      hidden_states = layer(hidden_states, rope_embedding_sin, rope_embedding_cos, kv_cache)[0];
+    }
+
+    hidden_states = norm_(hidden_states);
+
+    return {hidden_states};
+  }
+};
+
+class DeepseekOCRModel final : public DeepSeekV2Model {
+  VitModel vision_model_;
+  ImageEncoderViT sam_model_;
+  MlpProjector projector_;
+  nn::Param image_newline_;
+  nn::Param view_separator_;
+  int n_embed = 1280;
+
+ public:
+  DeepseekOCRModel() = default;
+
+  explicit DeepseekOCRModel(const std::string& name, const DpskOcrConfig& config) : DeepSeekV2Model(name, config) {
+    sam_model_ = reg<ImageEncoderViT>("sam_model", config);
+    vision_model_ = reg<VitModel>("vision_model", config);
+    projector_ = reg<MlpProjector>("projector", config);
+    image_newline_ = reg<nn::Param>("image_newline", getModuleName() + ".image_newline");
+    view_separator_ = reg<nn::Param>("view_seperator", getModuleName() + ".view_seperator");  ///< DeepSeek's typo.
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    // FIXME: Just support one image right now.
+    // Inputs: should be [input_ids, optional[image_crop], optional[image_ori], optional[images_spatial_crop]]
+    auto& input_ids = inputs[0];
+    auto patches = inputs.size() > 1 ? inputs[1] : Tensor::nil();
+    auto image_ori = inputs.size() > 2 ? inputs[2] : Tensor::nil();
+    auto images_spatial_crop = inputs.size() > 3 ? inputs[3] : Tensor::nil();
+    auto images_seq_mask = inputs.size() > 4 ? inputs[4] : Tensor::nil();
+    auto rope_embedding_sin = inputs.size() > 5 ? inputs[5] : Tensor::nil();
+    auto rope_embedding_cos = inputs.size() > 6 ? inputs[6] : Tensor::nil();
+
+    // Embedding
+    auto inputs_embeds = embed_tokens_(input_ids);
+
+    // We need to process image
+    auto images_in_this_batch = Tensor::nil();
+    if (patches && image_ori && images_spatial_crop && images_seq_mask) {
+      if (nn::functional::sum(patches).item<float>() != 0) {
+        // Local features
+        auto local_features_1 = sam_model_(patches)[0];
+        auto local_features_2 = vision_model_(patches, local_features_1)[0];
+        auto local_features = nn::functional::concat(
+            {
+                // FIXME: contiguous is not needed. We use contiguous because mllm has weak performance in this case.
+                local_features_2[{kAll, {1, kAll}, kAll}].contiguous(),
+                local_features_1.flatten(2).permute({0, 2, 1}),
+            },
+            -1);
+        local_features = projector_(local_features)[0];
+
+        // Global features
+        auto global_features_1 = sam_model_(image_ori)[0];
+        auto global_features_2 = vision_model_(image_ori, global_features_1)[0];
+        auto global_features = nn::functional::concat(
+            {
+                // FIXME: contiguous is not needed. We use contiguous because mllm has weak performance in this case.
+                global_features_2[{kAll, {1, kAll}, kAll}].contiguous(),
+                global_features_1.flatten(2).permute({0, 2, 1}),
+            },
+            -1);
+        global_features = projector_(global_features)[0];
+
+        print("=====================");
+        print("BASE: ", global_features.shape());
+        print("PATCHES: ", local_features.shape());
+        print("=====================");
+
+        auto hw = global_features.size(1);
+        auto n_dim = global_features.size(2);
+        auto h = (int)std::sqrt(hw);
+        auto w = h;
+
+        auto hw2 = local_features.size(1);
+        auto n_dim2 = local_features.size(2);
+        auto h2 = (int)std::sqrt(hw2);
+        auto w2 = h2;
+
+        MLLM_RT_ASSERT_EQ(images_spatial_crop.dtype(), kInt64);
+        int width_crop_num = images_spatial_crop.at<mllm_int64_t>({0, 0});
+        int height_crop_num = images_spatial_crop.at<mllm_int64_t>({0, 1});
+
+        global_features = global_features.view({h, w, n_dim});
+        global_features = nn::functional::concat(
+            {
+                global_features,
+
+                // FIXME: This line is in-efficient.
+                // pytorch logic: self.image_newline[None, None, :].expand(h, 1, n_dim)
+                //
+                // Use pytorch like expand instead. Expand will only modified stride, no memory copy involved.
+                // But many kernels in mllm's arm backend not use stride as loop step, but calculate itself, so we need to
+                // refact it.
+                image_newline_.weight().view({1, 1, -1}).repeat(h, 0),
+            },
+            1);
+
+        global_features = global_features.view({-1, n_dim});
+
+        local_features = local_features.view({height_crop_num, width_crop_num, h2, w2, n_dim2})
+                             .permute({0, 2, 1, 3, 4})
+                             .view({height_crop_num * h2, width_crop_num * w2, n_dim2});
+        local_features = nn::functional::concat(
+            {
+                local_features,
+
+                // FIXME: This line is in-efficient.
+                // pytorch logic: self.image_newline[None, None, :].expand(height_crop_num * h2, 1, n_dim2)
+                //
+                // Use pytorch like expand instead. Expand will only modified stride, no memory copy involved.
+                // But many kernels in mllm's arm backend not use stride as loop step, but calculate itself, so we need to
+                // refact it.
+                image_newline_.weight().view({1, 1, -1}).repeat(height_crop_num * h2, 0),
+            },
+            1);
+
+        local_features = local_features.view({-1, n_dim2});
+        auto global_local_features = nn::functional::concat(
+            {
+                local_features,
+                global_features,
+
+                // pytorch logic: self.view_seperator[None, :]
+                view_separator_.weight().view({1, -1}),
+            },
+            0);
+        images_in_this_batch = global_local_features;
+      } else {
+        auto global_features_1 = sam_model_(image_ori)[0];
+        auto global_features_2 = vision_model_(image_ori, global_features_1)[0];
+        auto global_features = nn::functional::concat(
+            {
+                global_features_2[{kAll, {1, kAll}, kAll}],
+                global_features_1.flatten(2).permute({0, 2, 1}),
+            },
+            -1);
+
+        global_features = projector_(global_features)[0];
+
+        print("=====================");
+        print("BASE: ", global_features.shape());
+        print("NO PATCHES");
+        print("=====================");
+
+        auto hw = global_features.size(1);
+        auto n_dim = global_features.size(2);
+        auto h = (int)std::sqrt(hw);
+        auto w = h;
+
+        global_features = global_features.view({h, w, n_dim});
+        global_features = nn::functional::concat(
+            {
+                global_features,
+
+                // FIXME: This line is in-efficient.
+                // pytorch logic: self.image_newline[None, None, :].expand(h, 1, n_dim)
+                //
+                // Use pytorch like expand instead. Expand will only modified stride, no memory copy involved.
+                // But many kernels in mllm's arm backend not use stride as loop step, but calculate itself, so we need to
+                // refact it.
+                image_newline_.weight().view({1, 1, -1}).repeat(h, 0),
+            },
+            1);
+
+        global_features = global_features.view({-1, n_dim});
+
+        auto global_local_features = nn::functional::concat(
+            {
+                global_features,
+                view_separator_.weight().view({1, -1}),
+            },
+            0);
+
+        images_in_this_batch = global_local_features;
+      }
+    }
+
+    // Scatter copy.
+    if (images_in_this_batch) {
+      nn::functional::maskedScatter(inputs_embeds, images_seq_mask.unsqueeze(-1), images_in_this_batch);
+    }
+
+    auto sequence = DeepSeekV2Model::forward({inputs_embeds, rope_embedding_sin, rope_embedding_cos}, args)[0];
+
+    return {sequence};
+  }
+};
+
+class DeepseekOCRForCausalLM final : public nn::Module, public ARGeneration {
+  DeepseekOCRModel model_;
+  nn::Linear lm_head_;
+  nn::StaticCache kv_cache_;
+
+ public:
+  DeepseekOCRForCausalLM() = default;
+
+  explicit DeepseekOCRForCausalLM(const DpskOcrConfig& config) {
+    model_ = reg<DeepseekOCRModel>("model", config);
+    lm_head_ = reg<nn::Linear>("lm_head", config.hidden_size, config.vocab_size, false, config.lm_head_linear_impl_type);
+
+    // Init inv freq
+    auto inv = makeRoPEInvFreq(config.hidden_size / config.num_attention_heads, 10000.0);
+    registerBuffer("inv_freq", inv);
+
+    // kv_cache_
+    kv_cache_ = nn::StaticCache(config.max_cache_length, config.num_hidden_layers,
+                                config.num_attention_heads,                       // q_heads
+                                config.num_key_value_heads,                       // kv_heads
+                                config.hidden_size / config.num_attention_heads,  // kv_dim
+                                kFloat32,                                         // k_dtype
+                                kFloat32,                                         // v_dtype
+                                kCPU,                                             // device_type
+                                true                                              // use_fa2
+    );
+  }
+
+  ARGenerationOutputPast forward(const ARGenerationOutputPast& input, const ARGenerationArgs& args) override {
+    auto patches = input.count("patches") ? input.at("patches") : Tensor::nil();
+    auto image_ori = input.count("image_ori") ? input.at("image_ori") : Tensor::nil();
+    auto images_spatial_crop = input.count("images_spatial_crop") ? input.at("images_spatial_crop") : Tensor::nil();
+    auto images_seq_mask = input.count("images_seq_mask") ? input.at("images_seq_mask") : Tensor::nil();
+
+    auto sequence = input.at("sequence");
+
+    // Generate position_ids for the current sequence
+    auto batch_size = sequence.shape()[0];
+    auto seq_len = sequence.shape()[1];
+
+    auto position_ids = Tensor::nil();
+    auto rope_embedding_sin = Tensor::nil();
+    auto rope_embedding_cos = Tensor::nil();
+    auto kv_cache = args.at("kv_cache");
+
+    if (input.count("position_ids")) {
+      // Use existing position_ids for decode phase
+      position_ids = input.at("position_ids");
+      // For decode phase, increment the last position
+      if (seq_len == 1) {
+        auto last_pos = *position_ids.offsettedPtr<int64_t>({0, position_ids.shape()[1] - 1});
+        position_ids = Tensor::empty({batch_size, 1}, kInt64, kCPU).alloc();
+        *position_ids.offsettedPtr<int64_t>({0, 0}) = last_pos + 1;
+      }
+    } else {
+      // Generate position_ids for prefill phase
+      position_ids = Tensor::empty({batch_size, seq_len}, kInt64, kCPU).alloc();
+      auto position_ids_ptr = position_ids.ptr<int64_t>();
+      for (int b = 0; b < batch_size; ++b) {
+        for (int s = 0; s < seq_len; ++s) { position_ids_ptr[b * seq_len + s] = s; }
+      }
+    }
+
+    auto [llm_embedding_sin, llm_embedding_cos] = makeRotaryPosEmbedding(position_ids, getBuffer("inv_freq"), 1.0f);
+    rope_embedding_sin = llm_embedding_sin;
+    rope_embedding_cos = llm_embedding_cos;
+    sequence = model_(sequence, patches, image_ori, images_spatial_crop, images_seq_mask, rope_embedding_sin,
+                      rope_embedding_cos, kv_cache)[0];
+    // clip x to one seq length
+    {
+      auto S = sequence.shape()[1];
+      sequence = sequence[{kAll, {S - 1}, kAll}];
+    }
+    sequence = lm_head_(sequence);
+
+    return {
+        {"sequence", sequence},
+        {"position_ids", position_ids},
+    };
+  }
+
+  void infer(DpskOcrTokenizer& tokenizer, const std::string& prompt, const std::string& image_fp,
+             const std::string& output_path, int base_size = 1024, int image_size = 640, bool crop_mode = true) {
+    // Initialize template
+    initializeTemplates();
+
+    namespace fs = std::filesystem;
+    fs::path out_path(output_path);
+    fs::create_directories(out_path);
+    fs::create_directories(out_path / "images");
+
+    nlohmann::json conversations;
+    if (!prompt.empty() && !image_fp.empty()) {
+      conversations = nlohmann::json::array();
+      conversations.push_back({{"role", "<|User|>"}, {"content", prompt}, {"images", nlohmann::json::array({image_fp})}});
+      conversations.push_back({{"role", "<|Assistant|>"}, {"content", ""}});
+    } else if (!prompt.empty()) {
+      conversations = nlohmann::json::array();
+      conversations.push_back({{"role", "<|User|>"}, {"content", prompt}});
+      conversations.push_back({{"role", "<|Assistant|>"}, {"content", ""}});
+    } else {
+      // Prompt should not be empty
+      MLLM_RT_ASSERT_EQ(prompt.empty(), false);
+    }
+
+    auto processed_prompt = formatMessages(conversations, "plain", "");
+
+    // Global constant define
+    const int PATCH_SIZE = 16;
+    const int DOWN_SAMPLE_RATIO = 4;
+    const std::string IMAGE_TOKEN = "<image>";
+    const int64_t IMAGE_TOKEN_ID = 128815;
+
+    // Global states
+    int valid_img_tokens = 0;
+    float ratio = 1.f;
+
+    // Load image
+    auto images = loadImages(conversations);
+
+    auto w = images[0].w();
+    auto h = images[0].h();
+    ratio = 1 - (float)((std::max(w, h) - std::min(w, h)) / (float)(std::max(w, h)));
+
+    // Image transform infra
+    auto image_transform = BasicImageTransform(std::nullopt, std::nullopt, /*mean=*/std::vector<float>{0.5, 0.5, 0.5},
+                                               /*std=*/std::vector<float>{0.5, 0.5, 0.5});
+
+    // Split text with IMAGE_TOKEN
+    // Like what python does: text_splits = prompt.split(image_token)
+    auto text_splits = mllm::splitString(processed_prompt, IMAGE_TOKEN);
+
+    // Processed states
+    std::vector<int64_t> tokenized_str;
+    std::vector<int8_t> images_seq_mask;
+    std::vector<Tensor> images_list;
+    std::vector<Tensor> images_crop_list;
+    std::vector<std::tuple<int, int>> images_spatial_crop;
+
+    // text_splits's length should be greater than images' length.
+    // text_splits.size() - images.size() = 1
+    for (int idx = 0; idx < std::min(images.size(), text_splits.size()); ++idx) {
+      auto tokenized_sep = tokenizer.encode(text_splits[idx]);
+      tokenized_str.insert(tokenized_str.end(), tokenized_sep.begin(), tokenized_sep.end());
+      for (int _i = 0; _i < tokenized_sep.size(); ++_i) {
+        images_seq_mask.emplace_back(0);  // emplace_back(false)
+      }
+
+      // Get image in this loop
+      auto image = images[idx];
+      std::tuple<int, int> crop_ratio;
+      std::vector<Image> images_crop_raw;
+
+      // Processing Image
+      if (crop_mode) {
+        if (image.h() <= 640 && image.w() <= 640) {
+          crop_ratio = {1, 1};
+        } else {
+          if (crop_mode) {
+            auto p = dynamicPreprocess(image);
+            images_crop_raw = p.first;
+            crop_ratio = p.second;
+          } else {
+            crop_ratio = {1, 1};
+          }
+        }
+
+        // color=tuple(int(x * 255) for x in image_transform.mean
+        auto global_view = image.pad(base_size, base_size, (int)(255 * 0.5), (int)(255 * 0.5), (int)(255 * 0.5));
+
+        if (base_size == 1024) {
+          valid_img_tokens += (int)(256 * ratio);
+        } else if (base_size == 1280) {
+          valid_img_tokens += (int)(400 * ratio);
+        } else {
+          MLLM_RT_ASSERT(false);
+        }
+
+        images_list.emplace_back(image_transform(global_view));
+
+        auto [width_crop_num, height_crop_num] = crop_ratio;
+        images_spatial_crop.emplace_back(width_crop_num, height_crop_num);
+
+        // Processing crops
+        if (width_crop_num > 1 || height_crop_num > 1) {
+          for (const auto& _i : images_crop_raw) { images_crop_list.emplace_back(image_transform(_i)); }
+        }
+
+        // Check if image_size is 640
+        valid_img_tokens += images_crop_list.size() * 100;
+
+        // Compute query
+        auto num_queries = std::ceil((image_size / PATCH_SIZE) / DOWN_SAMPLE_RATIO);
+        auto num_queries_base = std::ceil((base_size / PATCH_SIZE) / DOWN_SAMPLE_RATIO);
+
+        // Do python logic below:
+        // tokenized_image = ([image_token_id] * num_queries_base + [image_token_id]) * num_queries_base
+        // tokenized_image += [image_token_id]
+        std::vector<int64_t> tokenized_image;
+        tokenized_image.reserve((num_queries_base + 1) * num_queries_base + 1);
+        for (int i = 0; i < num_queries_base; ++i) {
+          tokenized_image.insert(tokenized_image.end(), num_queries_base, IMAGE_TOKEN_ID);
+          tokenized_image.push_back(IMAGE_TOKEN_ID);
+        }
+        tokenized_image.push_back(IMAGE_TOKEN_ID);
+
+        if (width_crop_num > 1 || height_crop_num > 1) {
+          for (int h = 0; h < num_queries * height_crop_num; ++h) {
+            tokenized_image.insert(tokenized_image.end(), num_queries * width_crop_num, IMAGE_TOKEN_ID);
+            tokenized_image.push_back(IMAGE_TOKEN_ID);
+          }
+        }
+
+        tokenized_str.insert(tokenized_str.end(), tokenized_image.begin(), tokenized_image.end());
+        for (int _i = 0; _i < tokenized_image.size(); ++_i) { images_seq_mask.emplace_back(true); }
+      } else {
+        NYI("crop_mode = false is not supported yet.");
+      }
+    }
+
+    // Processing last text split
+    auto tokenized_sep = tokenizer.encode(text_splits.back());
+    tokenized_str.insert(tokenized_str.end(), tokenized_sep.begin(), tokenized_sep.end());
+    images_seq_mask.insert(images_seq_mask.end(), tokenized_sep.size(), false);
+
+    // Add bos token
+    // bos_id = 0
+    // tokenized_str = [bos_id] + tokenized_str
+    // images_seq_mask = [False] + images_seq_mask
+    tokenized_str.insert(tokenized_str.begin(), 0);
+    images_seq_mask.insert(images_seq_mask.begin(), false);
+
+    // Prepare Tensor to DeepSeek-OCR Model
+    auto input_ids = Tensor::fromVector(tokenized_str, {1, (int32_t)tokenized_str.size()}, kInt64);
+    auto images_seq_mask_tensor = Tensor::fromVector(images_seq_mask, {1, (int32_t)images_seq_mask.size()}, kInt8);
+    auto images_ori_tensor = Tensor::nil();
+    auto images_spatial_crop_tensor = Tensor::nil();
+    auto images_crop_tensor = Tensor::nil();
+    if (images_list.empty()) {
+      images_ori_tensor = Tensor::zeros({1, 3, image_size, image_size});
+      images_spatial_crop_tensor = Tensor::zeros({1, 2}, kInt64);
+      images_crop_tensor = Tensor::zeros({1, 3, base_size, base_size});
+    } else {
+      images_ori_tensor = nn::functional::stack(images_list, 0);
+      images_spatial_crop_tensor = Tensor::zeros({(int32_t)images_spatial_crop.size(), 2}, kInt64);
+      auto _ptr = images_spatial_crop_tensor.ptr<mllm_int64_t>();
+      for (int _i = 0; _i < images_spatial_crop.size(); ++_i) {
+        auto [l, h] = images_spatial_crop[_i];
+        _ptr[2 * _i + 0] = l;
+        _ptr[2 * _i + 1] = h;
+      }
+      if (!images_crop_list.empty()) {
+        images_crop_tensor = nn::functional::stack(images_crop_list, 0);
+      } else {
+        images_crop_tensor = Tensor::zeros({1, 3, base_size, base_size});
+      }
+    }
+
+    std::stringstream result;
+    streamGenerate(
+        {
+            {"patches", images_crop_tensor},
+            {"image_ori", images_ori_tensor},
+            {"images_spatial_crop", images_spatial_crop_tensor},
+            {"images_seq_mask", images_seq_mask_tensor},
+            {"sequence", input_ids},
+        },
+        {
+            {"kv_cache", mllm::AnyValue(&kv_cache_)},
+        },
+        [&](int64_t token_id) {
+          auto decode = tokenizer.decode({token_id});
+          result << decode;
+          std::cout << decode << std::flush;
+        });
+    print("\n");  ///< flush
+
+    // Post process data
+    // TODO
+  }
+};
+
+}  // namespace mllm::models::deepseek_ocr
diff --git a/mllm/models/deepseek_ocr/tokenization_deepseek_ocr.hpp b/mllm/models/deepseek_ocr/tokenization_deepseek_ocr.hpp
new file mode 100644
index 00000000..5bdfc1dd
--- /dev/null
+++ b/mllm/models/deepseek_ocr/tokenization_deepseek_ocr.hpp
@@ -0,0 +1,300 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+// ref: https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/tokenization_llama.py
+// and
+// ref: https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/tokenization_llama_fast.py
+
+// LlamaTokenizerFast
+#pragma once
+
+#include <vector>
+#include <vector>
+#include <string>
+
+#include "mllm/preprocessor/tokenizers/BPEUTF8.hpp"
+#include "mllm/preprocessor/tokenizers/Unicode.hpp"
+#include "mllm/preprocessor/tokenizers/AutoTokenizer.hpp"
+#include "mllm/preprocessor/tokenizers/llama_cpp_unicode/unicode.h"
+
+namespace mllm::models::deepseek_ocr {
+
+namespace details {
+
+// Standard GPT2 regex
+// https://github.com/openai/gpt-2/blob/master/src/encoder.py#L53
+constexpr char GPT2_EXPR[] = R"('s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+)";
+
+}  // namespace details
+
+// Actually is LlamaTokenizer
+class DpskOcrTokenizer final : public mllm::preprocessor::AutoTokenizerUTF8 {
+ public:
+  explicit DpskOcrTokenizer(const std::string& file_path) {
+    // Init
+    preprocessor::initLocal();
+
+    // Load bpe files
+    bpe_.initFromSentencePieceJson(file_path);
+
+    // Add special tokens to trie
+    special_tokens_trie_.add("<|User|>");
+    special_tokens_trie_.add("<|Assistant|>");
+    special_tokens_trie_.add("<｜begin▁of▁sentence｜>");
+    special_tokens_trie_.add("<｜end▁of▁sentence｜>");
+    special_tokens_trie_.add("<｜▁pad▁｜>");
+    special_tokens_trie_.add("<image>");
+    special_tokens_trie_.add("<|grounding|>");
+    special_tokens_trie_.add("<tr>");
+    special_tokens_trie_.add("</tr>");
+  }
+
+  std::vector<int64_t> encode(const std::string& str) override {
+    auto sub_tokens = tokenize(str);
+    auto ret = std::vector<int64_t>{};
+    for (auto& token : sub_tokens) { ret.emplace_back(bpe_._lookup_vocab(token)); }
+    return ret;
+  }
+
+  std::string decode(const std::vector<int64_t>& ids) override {
+    std::vector<std::string> after_bpe_check;
+    for (auto& each_id : ids) {
+      auto each_str = bpe_._lookup_inverse_vocab(each_id);
+      after_bpe_check.emplace_back(each_str);
+    }
+    return detokenize(after_bpe_check);
+  }
+
+  std::vector<std::string> tokenize(const std::string& str) override {
+    // Replace all blank token to underscore
+
+    std::vector<std::string> ret;
+    for (auto& each_str : special_tokens_trie_.split(str)) {
+      if (special_tokens_trie_.isSpecialToken(each_str)) {
+        ret.emplace_back(each_str);
+        continue;
+      }
+
+      // FIXME Should Regex:
+      auto after_regex_process = {each_str};
+
+      for (auto& ss : after_regex_process) {
+        auto after_bytes_process = byteLevelPreTokenizer(ss);
+
+        // Perform BPE algorithm on each sub-token
+        for (auto& bbpe_str : after_bytes_process) {
+          auto bbpe_str_sub_tokens = bpe_._bpe(bbpe_str);
+          ret.insert(ret.end(), bbpe_str_sub_tokens.begin(), bbpe_str_sub_tokens.end());
+        }
+      }
+    }
+    return ret;
+  }
+
+  std::string detokenize(const std::vector<std::string>& tokenized_str) override {
+    std::string ret;
+    for (auto& each_str : tokenized_str) {
+      if (special_tokens_trie_.isSpecialToken(each_str)) {
+        ret += each_str;
+        continue;
+      }
+      // Loop utf8 string
+      utf8::iterator it(each_str.begin(), each_str.begin(), each_str.end());
+      utf8::iterator end_it(each_str.end(), each_str.begin(), each_str.end());
+      for (; it != end_it; ++it) {
+        char32_t cp = *it;
+        auto b = unicode_utf8_to_byte(unicode_cpt_to_utf8(cp));
+        ret.push_back(b);
+      }
+    }
+    return ret;
+  }
+
+ private:
+  std::vector<std::string> byteLevelPreTokenizer(const std::string& str) {
+    return unicode_regex_split(str, {std::string{details::GPT2_EXPR}});
+  }
+
+  // "pre_tokenizer": {
+  //   "type": "Sequence",
+  //   "pretokenizers": [
+  //     {
+  //       "type": "Split",
+  //       "pattern": {
+  //         "Regex": "\\p{N}{1,3}"
+  //       },
+  //       "behavior": "Isolated",
+  //       "invert": false
+  //     },
+  //     {
+  //       "type": "Split",
+  //       "pattern": {
+  //         "Regex": "[一-龥぀-ゟ゠-ヿ]+"
+  //       },
+  //       "behavior": "Isolated",
+  //       "invert": false
+  //     },
+  //     {
+  //       "type": "Split",
+  //       "pattern": {
+  //         "Regex": "[!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~][A-Za-z]+|[^\r\n\\p{L}\\p{P}\\p{S}]?[\\p{L}\\p{M}]+|
+  //         ?[\\p{P}\\p{S}]+[\r\n]*|\\s*[\r\n]+|\\s+(?!\\S)|\\s+"
+  //       },
+  //       "behavior": "Isolated",
+  //       "invert": false
+  //     },
+  //     {
+  //       "type": "ByteLevel",
+  //       "add_prefix_space": false,
+  //       "trim_offsets": true,
+  //       "use_regex": false
+  //     }
+  //   ]
+  // }
+  std::vector<std::string> regexPreTokenizer(const std::string& token) {
+    std::vector<std::string> out;
+    auto it = token.begin();
+    auto end = token.end();
+
+    while (it != end) {
+      auto seg_start = it;
+      int digit_cnt = 0;
+      auto tmp = it;
+      while (digit_cnt < 3) {
+        uint32_t cp = 0;
+        auto next = tmp;
+        utf8::next(next, end);
+        if (next == tmp) break;
+        cp = utf8::peek_next(tmp, end);
+        if (!is_digit(cp)) break;
+        tmp = next;
+        ++digit_cnt;
+      }
+      if (digit_cnt > 0) {
+        out.emplace_back(seg_start, tmp);
+        it = tmp;
+        continue;
+      }
+
+      uint32_t cp = utf8::peek_next(it, end);
+      if (is_cjk(cp)) {
+        auto tmp2 = it;
+        while (tmp2 != end) {
+          uint32_t nxt = utf8::peek_next(tmp2, end);
+          if (!is_cjk(nxt)) break;
+          utf8::next(tmp2, end);
+        }
+        out.emplace_back(seg_start, tmp2);
+        it = tmp2;
+        continue;
+      }
+
+      if (is_punct_symbol(cp)) {
+        auto tmp3 = it;
+        utf8::next(tmp3, end);
+        if (tmp3 != end && is_letter(utf8::peek_next(tmp3, end))) {
+          utf8::next(tmp3, end);
+          out.emplace_back(seg_start, tmp3);
+          it = tmp3;
+          continue;
+        }
+      }
+
+      if (!is_letter(cp) && !is_space(cp) && !is_punct_symbol(cp)) {
+        auto tmp3 = it;
+        utf8::next(tmp3, end);
+        if (tmp3 != end && is_letter(utf8::peek_next(tmp3, end))) {
+          while (tmp3 != end) {
+            uint32_t nxt = utf8::peek_next(tmp3, end);
+            if (!is_letter(nxt)) break;
+            utf8::next(tmp3, end);
+          }
+          out.emplace_back(seg_start, tmp3);
+          it = tmp3;
+          continue;
+        }
+      }
+
+      if (is_punct_symbol(cp)) {
+        auto tmp3 = it;
+        while (tmp3 != end) {
+          uint32_t nxt = utf8::peek_next(tmp3, end);
+          if (!is_punct_symbol(nxt)) break;
+          utf8::next(tmp3, end);
+        }
+
+        while (tmp3 != end) {
+          uint32_t nxt = utf8::peek_next(tmp3, end);
+          if (nxt != 0x0A && nxt != 0x0D) break;
+          utf8::next(tmp3, end);
+        }
+        out.emplace_back(seg_start, tmp3);
+        it = tmp3;
+        continue;
+      }
+
+      if (is_space(cp)) {
+        auto tmp3 = it;
+        bool has_nl = false;
+        while (tmp3 != end) {
+          uint32_t nxt = utf8::peek_next(tmp3, end);
+          if (nxt == 0x0A || nxt == 0x0D) {
+            has_nl = true;
+            utf8::next(tmp3, end);
+          } else if (is_space(nxt)) {
+            utf8::next(tmp3, end);
+          } else {
+            break;
+          }
+        }
+        if (has_nl) {
+          out.emplace_back(seg_start, tmp3);
+          it = tmp3;
+          continue;
+        }
+        auto tmp4 = tmp3;
+        while (tmp4 != end && is_space(utf8::peek_next(tmp4, end))) utf8::next(tmp4, end);
+        if (tmp4 == end) {
+          out.emplace_back(seg_start, tmp4);
+          it = tmp4;
+          continue;
+        }
+        while (tmp3 != end) {
+          uint32_t nxt = utf8::peek_next(tmp3, end);
+          if (!is_space(nxt)) break;
+          utf8::next(tmp3, end);
+        }
+        out.emplace_back(seg_start, tmp3);
+        it = tmp3;
+        continue;
+      }
+      utf8::next(it, end);
+      out.emplace_back(seg_start, it);
+    }
+
+    return out;
+  }
+
+  static inline bool is_digit(uint32_t cp) { return cp >= 0x30 && cp <= 0x39; }
+
+  static inline bool is_cjk(uint32_t cp) {
+    return (cp >= 0x4E00 && cp <= 0x9FFF) ||  // CJK Unified Ideographs
+           (cp >= 0x3400 && cp <= 0x4DBF) ||  // CJK Extension A
+           (cp >= 0xF900 && cp <= 0xFAFF) ||  // CJK Compatibility
+           (cp >= 0x3040 && cp <= 0x309F) ||  // Hiragana
+           (cp >= 0x30A0 && cp <= 0x30FF);    // Katakana
+  }
+
+  static inline bool is_letter(uint32_t cp) { return (cp >= 0x41 && cp <= 0x5A) || (cp >= 0x61 && cp <= 0x7A); }
+
+  static inline bool is_punct_symbol(uint32_t cp) {
+    return (cp >= 0x21 && cp <= 0x2F) || (cp >= 0x3A && cp <= 0x40) || (cp >= 0x5B && cp <= 0x60) || (cp >= 0x7B && cp <= 0x7E);
+  }
+
+  static inline bool is_space(uint32_t cp) { return cp == 0x20 || cp == 0x09 || cp == 0x0A || cp == 0x0D; }
+
+  // For text
+  preprocessor::BPEUTF8 bpe_;
+  std::string SPIECE_UNDERLINE = "▁";
+};
+}  // namespace mllm::models::deepseek_ocr
diff --git a/mllm/nn/Functional.cpp b/mllm/nn/Functional.cpp
index c6803af9..3931f5d6 100644
--- a/mllm/nn/Functional.cpp
+++ b/mllm/nn/Functional.cpp
@@ -14,6 +14,10 @@
 #include "mllm/core/aops/ViewOp.hpp"
 #include "mllm/core/aops/TopKOp.hpp"
 #include "mllm/core/aops/SiLUOp.hpp"
+#include "mllm/core/aops/PadOp.hpp"
+#include "mllm/core/aops/MaskedScatterOp.hpp"
+#include "mllm/core/aops/InterpolateOp.hpp"
+#include "mllm/core/aops/StackOp.hpp"
 #include "mllm/engine/Context.hpp"
 
 namespace mllm::nn::functional {
@@ -42,6 +46,10 @@ Tensor concat(const std::vector<Tensor>& ins, int32_t dim) {
   return Context::instance().buildOpAndSubmitTask(OpTypes::kConcat, aops::ConcatOpOptions{.dim = dim}, ins)[0];
 }
 
+Tensor stack(const std::vector<Tensor>& ins, int32_t dim) {
+  return Context::instance().buildOpAndSubmitTask(OpTypes::kStack, aops::StackOpOptions{.dim = dim}, ins)[0];
+}
+
 Tensor flashAttention2(const Tensor& Q, const Tensor& K, const Tensor& V) {
   // Inputs is all BSHD format.
 
@@ -118,4 +126,42 @@ void scatter2Shards(const Tensor& src, const Tensor& shards_pointer, int32_t dim
                                            {src, shards_pointer});
 }
 
+Tensor scaledDotProductAttention(const Tensor& Q, const Tensor& K, const Tensor& V, const Tensor& mask) {
+  float scale = Q.size(-1);
+  scale = (1.f / std::sqrtf((float)scale));
+  auto attn_weight = matmul(Q, K, false, true) * scale;
+  if (mask) { attn_weight = attn_weight + mask; }
+  attn_weight = softmax(attn_weight, -1);
+  return matmul(attn_weight, V);
+}
+
+Tensor pad(const Tensor& x, const std::vector<int32_t>& pad, aops::PadMode mode, float value) {
+  return Context::instance().buildOpAndSubmitTask(OpTypes::kPad, aops::PadOpOptions{.pad = pad, .mode = mode, .value = value},
+                                                  {x})[0];
+}
+
+Tensor interpolateBySize(const Tensor& x, const std::vector<int32_t>& size, aops::InterpolateOpMode mode, bool align_corners,
+                         bool antialias) {
+  aops::InterpolateOpOptions opts{};
+  opts.size.assign(size.begin(), size.end());
+  opts.mode = mode;
+  opts.align_corners = align_corners;
+  opts.antialias = antialias;
+  return Context::instance().buildOpAndSubmitTask(OpTypes::kInterpolate, opts, {x})[0];
+}
+
+Tensor interpolateByScale(const Tensor& x, const std::vector<float>& scale_factor, aops::InterpolateOpMode mode,
+                          bool align_corners, bool antialias) {
+  aops::InterpolateOpOptions opts{};
+  opts.scale_factor = scale_factor;
+  opts.mode = mode;
+  opts.align_corners = align_corners;
+  opts.antialias = antialias;
+  return Context::instance().buildOpAndSubmitTask(OpTypes::kInterpolate, opts, {x})[0];
+}
+
+void maskedScatter(const Tensor& dst, const Tensor& mask, const Tensor& src) {
+  Context::instance().buildOpAndSubmitTask(OpTypes::kMaskedScatter, aops::MaskedScatterOpOptions{}, {dst, mask, src});
+}
+
 }  // namespace mllm::nn::functional
diff --git a/mllm/nn/Functional.hpp b/mllm/nn/Functional.hpp
index 9efad0ab..08f5fbf8 100644
--- a/mllm/nn/Functional.hpp
+++ b/mllm/nn/Functional.hpp
@@ -9,6 +9,8 @@
 #include "mllm/core/Tensor.hpp"
 #include "mllm/core/aops/MatMulOp.hpp"
 #include "mllm/core/aops/SplitOp.hpp"
+#include "mllm/core/aops/PadOp.hpp"
+#include "mllm/core/aops/InterpolateOp.hpp"
 #include "mllm/engine/Context.hpp"
 
 namespace mllm::nn::functional {
@@ -102,6 +104,8 @@ inline std::vector<Tensor> chunk(int32_t num, const Tensor& x, int32_t dim) {
 
 Tensor concat(const std::vector<Tensor>& ins, int32_t dim);
 
+Tensor stack(const std::vector<Tensor>& ins, int32_t dim);
+
 Tensor flashAttention2(const Tensor& Q, const Tensor& K, const Tensor& V);
 
 Tensor softmax(const Tensor& x, int32_t dim);
@@ -129,4 +133,22 @@ Tensor silu_(const Tensor& x);
 
 void scatter2Shards(const Tensor& src, const Tensor& shards_pointer, int32_t dim);
 
+// If you want causal mask attention. Use Flash attention instead.
+Tensor scaledDotProductAttention(const Tensor& Q, const Tensor& K, const Tensor& V, const Tensor& mask = Tensor());
+
+// Pad: apply N-D padding. pad is ordered from the last to first dimension.
+Tensor pad(const Tensor& x, const std::vector<int32_t>& pad, aops::PadMode mode = aops::PadMode::kConstant, float value = 0.0f);
+
+// Interpolate by target size
+Tensor interpolateBySize(const Tensor& x, const std::vector<int32_t>& size,
+                         aops::InterpolateOpMode mode = aops::InterpolateOpMode::kNearest, bool align_corners = false,
+                         bool antialias = false);
+
+// Interpolate by scale factor
+Tensor interpolateByScale(const Tensor& x, const std::vector<float>& scale_factor,
+                          aops::InterpolateOpMode mode = aops::InterpolateOpMode::kNearest, bool align_corners = false,
+                          bool antialias = false);
+
+void maskedScatter(const Tensor& dst, const Tensor& mask, const Tensor& src);
+
 }  // namespace mllm::nn::functional
diff --git a/mllm/nn/Module.hpp b/mllm/nn/Module.hpp
index 8adadae5..cedefd79 100644
--- a/mllm/nn/Module.hpp
+++ b/mllm/nn/Module.hpp
@@ -42,10 +42,13 @@ class ModuleImpl : public AbstractNnNode {
 };
 
 template<typename T>
-class ModuleLists;
+class ModuleList;
 
 template<typename T>
-class ModuleListsSuffix;
+class ModuleListWithIdx;
+
+template<typename T>
+class ModuleListSuffixed;
 
 class Module {
  public:
@@ -198,6 +201,29 @@ class ModuleList final : public Module {
   std::vector<T>& list() { return layers_; }
 };
 
+template<typename T>
+class ModuleListWithIdx final : public Module {
+  std::vector<T> layers_;
+
+ public:
+  ModuleListWithIdx() = default;
+
+  template<typename... Args>
+  ModuleListWithIdx(const std::string& name, int nums, Args&&... args) : Module(name) {
+    for (int i = 0; i < nums; ++i) {
+      layers_.emplace_back(reg<T>(/*name*/ std::to_string(i), /*args*/ std::forward<Args>(args)..., /*index*/ i));
+    }
+  };
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    std::vector<Tensor> o = inputs;
+    for (auto& layer : layers_) { o = layer.forward(o, args); }
+    return o;
+  }
+
+  std::vector<T>& list() { return layers_; }
+};
+
 template<typename T>
 class ModuleListSuffixed final : public Module {
   std::vector<T> layers_;
diff --git a/mllm/nn/Nn.hpp b/mllm/nn/Nn.hpp
index 7bab5eba..bb4fa54d 100644
--- a/mllm/nn/Nn.hpp
+++ b/mllm/nn/Nn.hpp
@@ -17,6 +17,7 @@
 #include "mllm/nn/layers/LayerNorm.hpp"       // IWYU pragma: export
 #include "mllm/nn/layers/Softmax.hpp"         // IWYU pragma: export
 #include "mllm/nn/layers/VisionRoPE.hpp"      // IWYU pragma: export
+#include "mllm/nn/layers/Conv2D.hpp"          // IWYU pragma: export
 #include "mllm/nn/layers/Conv3D.hpp"          // IWYU pragma: export
 #include "mllm/nn/layers/CausalMask.hpp"      // IWYU pragma: export
 #include "mllm/nn/layers/RoPE.hpp"            // IWYU pragma: export
@@ -27,3 +28,4 @@
 #include "mllm/nn/layers/STFT.hpp"            // IWYU pragma: export
 #include "mllm/nn/layers/PagedAttn.hpp"       // IWYU pragma: export
 #include "mllm/nn/layers/RadixAttn.hpp"       // IWYU pragma: export
+#include "mllm/nn/layers/LayerNorm2D.hpp"     // IWYU pragma: export
diff --git a/mllm/nn/layers/Conv2D.cpp b/mllm/nn/layers/Conv2D.cpp
new file mode 100644
index 00000000..6be07208
--- /dev/null
+++ b/mllm/nn/layers/Conv2D.cpp
@@ -0,0 +1,29 @@
+#include "mllm/nn/layers/Conv2D.hpp"
+#include "mllm/core/BaseOp.hpp"
+#include "mllm/core/aops/Conv2DOp.hpp"
+
+namespace mllm::nn {
+
+Conv2D::Conv2D() : Layer(OpTypes::kConv2D, aops::Conv2DOpOptions{}) {}
+
+Conv2D::Conv2D(int32_t in_channels, int32_t out_channels, const std::vector<int32_t>& kernel_size,
+               const std::vector<int32_t>& stride_size, const std::vector<int32_t>& padding_size,
+               const std::vector<int32_t>& dilation_size, bool bias, aops::Conv2DOpImplType impl_type)
+    : Layer(OpTypes::kConv2D, aops::Conv2DOpOptions{
+                                  .in_channels = in_channels,
+                                  .out_channels = out_channels,
+                                  .kernel_size = kernel_size,
+                                  .stride = stride_size,
+                                  .padding = padding_size,
+                                  .dilation = dilation_size,
+                                  .bias = bias,
+                                  .impl_type = impl_type,
+                              }) {}
+
+Conv2D::Conv2D(const aops::Conv2DOpOptions& options) : Layer(OpTypes::kConv2D, options) {}
+
+Tensor Conv2D::weight() const { return std::static_pointer_cast<aops::Conv2DOp>(impl()->getInstancedOp())->weight(); }
+
+Tensor Conv2D::bias() const { return std::static_pointer_cast<aops::Conv2DOp>(impl()->getInstancedOp())->bias(); }
+
+}  // namespace mllm::nn
diff --git a/mllm/nn/layers/Conv2D.hpp b/mllm/nn/layers/Conv2D.hpp
new file mode 100644
index 00000000..ff67918b
--- /dev/null
+++ b/mllm/nn/layers/Conv2D.hpp
@@ -0,0 +1,27 @@
+#pragma once
+
+#include "mllm/nn/Layer.hpp"
+#include "mllm/core/aops/Conv2DOp.hpp"
+
+namespace mllm::nn {
+
+class Conv2D : public Layer {
+ public:
+  Conv2D();
+
+  Conv2D(int32_t in_channels, int32_t out_channels, const std::vector<int32_t>& kernel_size,
+         const std::vector<int32_t>& stride_size, const std::vector<int32_t>& padding_size,
+         const std::vector<int32_t>& dilation_size, bool bias = true,
+         aops::Conv2DOpImplType impl_type = aops::Conv2DOpImplType::kDefault);
+
+  explicit Conv2D(const aops::Conv2DOpOptions& options);
+
+  [[nodiscard]] Tensor weight() const;
+
+  [[nodiscard]] Tensor bias() const;
+
+  MLLM_LAYER_ANY_INPUTS_1_OUTPUTS_FORWARD
+  MLLM_LAYER_ENABLE_REDIRECT_ATTRIBUTE(Conv2D)
+};
+
+}  // namespace mllm::nn
diff --git a/mllm/nn/layers/LayerNorm2D.cpp b/mllm/nn/layers/LayerNorm2D.cpp
new file mode 100644
index 00000000..628d6d64
--- /dev/null
+++ b/mllm/nn/layers/LayerNorm2D.cpp
@@ -0,0 +1,16 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include "mllm/core/aops/LayerNorm2DOp.hpp"
+#include "mllm/nn/layers/LayerNorm2D.hpp"
+
+namespace mllm::nn {
+
+LayerNorm2D::LayerNorm2D() : Layer(OpTypes::kLayerNorm2D, aops::LayerNorm2DOpOptions{}) {}
+
+LayerNorm2D::LayerNorm2D(const aops::LayerNorm2DOpOptions& options) : Layer(OpTypes::kLayerNorm2D, options) {}
+
+LayerNorm2D::LayerNorm2D(const int32_t num_channels, float eps)
+    : Layer(OpTypes::kLayerNorm2D, aops::LayerNorm2DOpOptions{.num_channels = num_channels, .eps = eps}) {}
+
+}  // namespace mllm::nn
diff --git a/mllm/nn/layers/LayerNorm2D.hpp b/mllm/nn/layers/LayerNorm2D.hpp
new file mode 100644
index 00000000..ebe29d0c
--- /dev/null
+++ b/mllm/nn/layers/LayerNorm2D.hpp
@@ -0,0 +1,23 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "mllm/nn/Layer.hpp"
+#include "mllm/core/aops/LayerNorm2DOp.hpp"
+
+namespace mllm::nn {
+
+class LayerNorm2D : public Layer {
+ public:
+  LayerNorm2D();
+
+  explicit LayerNorm2D(const aops::LayerNorm2DOpOptions& options);
+
+  explicit LayerNorm2D(const int32_t num_channels, float eps = 1e-6);
+
+  MLLM_LAYER_ANY_INPUTS_1_OUTPUTS_FORWARD
+  MLLM_LAYER_ENABLE_INPLACE_ATTRIBUTE(LayerNorm2D)
+};
+
+}  // namespace mllm::nn
diff --git a/mllm/preprocessor/tokenizers/AutoTokenizer.cpp b/mllm/preprocessor/tokenizers/AutoTokenizer.cpp
index 1d958f3f..ae9f4f9e 100644
--- a/mllm/preprocessor/tokenizers/AutoTokenizer.cpp
+++ b/mllm/preprocessor/tokenizers/AutoTokenizer.cpp
@@ -110,6 +110,118 @@ std::vector<std::wstring> Trie::split(const std::wstring& text) {
 
 bool Trie::isSpecialToken(const std::wstring& token) { return special_tokens_.count(token); }
 
+void TrieUTF8::add(const std::string& word_utf8) {
+  auto word = utf8String2Cpts(word_utf8);
+  if (word.empty()) return;
+  special_tokens_.insert(word);
+
+  TrieNode* current = root_.get();
+
+  for (const auto& c : word) {
+    if (!current->children.count(c)) { current->children[c] = std::make_unique<TrieNode>(); }
+    current = current->children[c].get();
+  }
+
+  current->is_end = true;
+}
+
+void TrieUTF8::update(const std::vector<std::string>& words) {
+  for (const auto& word : words) { add(word); }
+}
+
+// I use FSA to implement the split function.
+std::vector<std::string> TrieUTF8::split(const std::string& text_utf8) {
+  auto text = utf8String2Cpts(text_utf8);
+
+  std::map<size_t, TrieNode*> states;
+  std::vector<size_t> offsets = {0};
+  size_t skip = 0;
+
+  for (size_t current = 0; current < text.size(); ++current) {
+    if (skip > current) continue;
+
+    std::unordered_set<size_t> to_remove;
+    bool reset = false;
+
+    wchar_t current_char = text[current];
+
+    for (auto& [_start, node] : states) {
+      auto start = _start;
+      if (node->is_end) {
+        // trying to find the longest match
+        size_t max_end = current;
+
+        for (auto& [look_start, look_node] : states) {
+          if (look_start > start) break;
+
+          size_t lookahead = (look_start < start) ? current + 1 : current;
+          size_t end = lookahead;
+          TrieNode* ptr = look_node;
+
+          while (lookahead < text.size()) {
+            wchar_t ch = text[lookahead];
+
+            if (!ptr->children.count(ch)) break;
+
+            ptr = ptr->children[ch].get();
+            lookahead++;
+
+            if (ptr->is_end) {
+              start = look_start;
+              end = lookahead;
+              skip = lookahead;
+            }
+          }
+
+          if (ptr->is_end && end > max_end) { max_end = end; }
+        }
+        offsets.push_back(start);
+        offsets.push_back(max_end);
+        reset = true;
+        break;
+      }
+      if (node->children.count(current_char)) {
+        states[start] = node->children[current_char].get();
+      } else {
+        to_remove.insert(start);
+      }
+    }
+    if (reset) {
+      states.clear();
+    } else {
+      for (auto start : to_remove) { states.erase(start); }
+    }
+    if (current >= skip && root_->children.count(current_char)) { states[current] = root_->children[current_char].get(); }
+  }
+  for (auto& [start, node] : states) {
+    if (node->is_end) {
+      offsets.push_back(start);
+      offsets.push_back(text.size());
+      break;
+    }
+  }
+
+  sort(offsets.begin(), offsets.end());
+  std::vector<std::string> result;
+
+  for (size_t i = 1; i < offsets.size(); ++i) {
+    if (offsets[i - 1] != offsets[i]) {
+      auto cpts_str = cpts_string_t{};
+      for (int __idx = offsets[i - 1]; __idx < offsets[i]; __idx++) { cpts_str.push_back(text[__idx]); }
+      result.push_back(cpts2Utf8String(cpts_str));
+    }
+  }
+  if (offsets[offsets.size() - 1] != text.size()) {
+    auto cpts_str = cpts_string_t{};
+    for (int __idx = offsets[offsets.size() - 1]; __idx < text.size(); __idx++) { cpts_str.push_back(text[__idx]); }
+    result.push_back(cpts2Utf8String(cpts_str));
+  }
+
+  return result;
+}
+
+bool TrieUTF8::isSpecialToken(const std::string& token) { return special_tokens_.count(utf8String2Cpts(token)); }
+
 void AutoTokenizer::addSpecialToken(const std::wstring& special_token) { special_tokens_trie_.add(special_token); }
 
-}  // namespace mllm::preprocessor
\ No newline at end of file
+}  // namespace mllm::preprocessor
diff --git a/mllm/preprocessor/tokenizers/AutoTokenizer.hpp b/mllm/preprocessor/tokenizers/AutoTokenizer.hpp
index e3f76b88..10721e23 100644
--- a/mllm/preprocessor/tokenizers/AutoTokenizer.hpp
+++ b/mllm/preprocessor/tokenizers/AutoTokenizer.hpp
@@ -16,6 +16,10 @@
 #include <nlohmann/json_fwd.hpp>
 using json = nlohmann::json;
 
+#include <xxHash/xxhash.h>
+
+#include <utfcpp/utf8.h>
+
 #include "mllm/core/Tensor.hpp"
 
 #include <vector>
@@ -52,6 +56,48 @@ class Trie {
   std::unordered_set<std::wstring> special_tokens_;
 };
 
+class TrieUTF8 {
+  using cpts_string_t = std::vector<uint32_t>;
+
+  struct TrieNode {
+    std::unordered_map<uint32_t, std::unique_ptr<TrieNode>> children;
+    bool is_end = false;
+  };
+
+  struct VectorUint32Hash {
+    std::size_t operator()(const std::vector<uint32_t>& v) const noexcept {
+      if (v.empty()) return 0;
+      return static_cast<std::size_t>(XXH64(v.data(), v.size() * sizeof(uint32_t), /*seed=*/0));
+    }
+  };
+
+ public:
+  void add(const std::string& word);
+
+  void update(const std::vector<std::string>& words);
+
+  // I use FSA to implement the split function.
+  std::vector<std::string> split(const std::string& text);
+
+  bool isSpecialToken(const std::string& token);
+
+  inline std::vector<uint32_t> utf8String2Cpts(const std::string& str) {
+    std::vector<uint32_t> word32;
+    utf8::utf8to32(str.begin(), str.end(), std::back_inserter(word32));
+    return word32;
+  }
+
+  inline std::string cpts2Utf8String(const std::vector<uint32_t>& cpts) {
+    std::string str;
+    utf8::utf32to8(cpts.begin(), cpts.end(), std::back_inserter(str));
+    return str;
+  }
+
+ private:
+  std::unique_ptr<TrieNode> root_ = std::make_unique<TrieNode>();
+  std::unordered_set<cpts_string_t, VectorUint32Hash> special_tokens_;
+};
+
 class AutoTokenizer {
  public:
   void addSpecialToken(const std::wstring& special_token);
@@ -70,4 +116,20 @@ class AutoTokenizer {
   Trie special_tokens_trie_;
 };
 
-}  // namespace mllm::preprocessor
\ No newline at end of file
+class AutoTokenizerUTF8 {
+ public:
+  void addSpecialToken(const std::string& special_token);
+
+  virtual std::vector<int64_t> encode(const std::string& str) = 0;
+
+  virtual std::string decode(const std::vector<int64_t>& ids) = 0;
+
+  virtual std::vector<std::string> tokenize(const std::string& str) = 0;
+
+  virtual std::string detokenize(const std::vector<std::string>& tokenized_str) = 0;
+
+ protected:
+  TrieUTF8 special_tokens_trie_;
+};
+
+}  // namespace mllm::preprocessor
diff --git a/mllm/preprocessor/tokenizers/BPEUTF8.cpp b/mllm/preprocessor/tokenizers/BPEUTF8.cpp
new file mode 100644
index 00000000..bd6be2a8
--- /dev/null
+++ b/mllm/preprocessor/tokenizers/BPEUTF8.cpp
@@ -0,0 +1,173 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include <fstream>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <utfcpp/utf8.h>
+#include <nlohmann/json.hpp>
+
+#include "mllm/utils/Common.hpp"
+#include "mllm/preprocessor/tokenizers/BPEUTF8.hpp"
+
+namespace mllm::preprocessor {
+bool BPEUTF8::initFromSentencePieceJson(const std::string& file_path) {
+  std::ifstream f(file_path);
+  if (!f.is_open()) {
+    MLLM_ERROR("BPEUTF8 Cannot open file {}", file_path);
+    return false;
+  }
+  auto json_data = nlohmann::json::parse(f);
+
+  if (!json_data.contains("model") || !json_data["model"].contains("vocab") || !json_data["model"].contains("merges")) {
+    MLLM_ERROR("BPEUTF8 initFromSentencePieceJson need sentence piece json, but get {}", file_path);
+    return false;
+  }
+
+  for (const auto& [key, value] : json_data["model"]["vocab"].items()) {
+    vocab_.insert({
+        utf8String2Cpts(key),
+        value,
+    });
+    vocab_inverse_.insert({
+        value,
+        utf8String2Cpts(key),
+    });
+  }
+
+  for (const auto& add_token : json_data["added_tokens"].items()) {
+    int64_t id = add_token.value()["id"];
+    std::string content = add_token.value()["content"];
+    vocab_.insert({
+        utf8String2Cpts(content),
+        id,
+    });
+    vocab_inverse_.insert({
+        id,
+        utf8String2Cpts(content),
+    });
+  }
+
+  int64_t cnt = 0;
+  for (auto& merge_item : json_data["model"]["merges"]) {
+    if (merge_item.is_string()) {
+      std::string wide_merge_item = merge_item;
+
+      // 0x20 will only represent space in utf8. we can use this unsafe method to speed up.
+      auto blank_pos = wide_merge_item.find(' ');
+      auto first = wide_merge_item.substr(0, blank_pos);
+      auto second = wide_merge_item.substr(blank_pos + 1);
+      bpe_ranks_.insert({{utf8String2Cpts(first), utf8String2Cpts(second)}, cnt++});
+    } else if (merge_item.is_array()) {
+      bpe_ranks_.insert({{utf8String2Cpts(merge_item[0]), utf8String2Cpts(merge_item[1])}, cnt++});
+    }
+  }
+
+  return true;
+}
+
+// ByteLevel BPE
+std::vector<std::string> BPEUTF8::_bpe(const std::string& token) {
+  // Slice all tokens to word
+  std::vector<cpt_string_t> word;
+  {
+    auto cpts = utf8String2Cpts(token);
+    for (auto cpt : cpts) { word.push_back(cpt_string_t{cpt}); }
+  }
+
+  auto pairs = _get_pairs(word);
+  if (pairs.empty()) return {token};
+
+  while (true) {
+    bool has_bigram = false;
+    int64_t rank_bigram = std::numeric_limits<int64_t>::max();
+    std::pair<cpt_string_t, cpt_string_t> bigram;
+
+    for (const auto& p : pairs) {
+      if (bpe_ranks_.count(p)) {
+        auto rank = bpe_ranks_.at(p);
+        if (rank < rank_bigram) {
+          rank_bigram = rank;
+          bigram = p;
+          has_bigram = true;
+        }
+      }
+    }
+
+    if (!has_bigram) { break; }
+
+    auto [first, second] = bigram;
+    std::vector<cpt_string_t> new_word;
+    int i = 0;
+
+    while (i < word.size()) {
+      // Find the next occurrence of 'first' starting at i
+      int j = i;
+      while (j < word.size() && word[j] != first) { j++; }
+
+      // Add elements from i to j-1 (if any)
+      if (j > i) { new_word.insert(new_word.end(), word.begin() + i, word.begin() + j); }
+
+      // Check if we can merge at position j
+      if (j < word.size() - 1 && word[j] == first && word[j + 1] == second) {
+        auto __merged = first;
+        __merged.insert(std::end(__merged), std::begin(second), std::end(second));
+        new_word.push_back(__merged);
+        i = j + 2;  // Skip both merged elements
+      } else if (j < word.size()) {
+        new_word.push_back(word[j]);
+        i = j + 1;
+      } else {
+        i = j;  // j == word.size()
+      }
+    }
+
+    word = std::move(new_word);
+    if (word.size() == 1) {
+      break;
+    } else {
+      pairs = _get_pairs(word);
+    }
+  }
+
+  std::vector<std::string> ret;
+  ret.reserve(word.size());
+  for (auto& cpt : word) { ret.push_back(cpts2Utf8String(cpt)); }
+
+  return ret;
+}
+
+int64_t BPEUTF8::_lookup_vocab(const std::string& token) {
+  auto cpts = utf8String2Cpts(token);
+  if (vocab_.find(cpts) != vocab_.end()) {
+    return vocab_[cpts];
+  } else {
+    MLLM_WARN("Cannot find token: {} in BPEUTF8 vocab", token);
+    return 0;
+  }
+}
+
+std::string BPEUTF8::_lookup_inverse_vocab(int64_t idx) {
+  if (vocab_inverse_.find(idx) != vocab_inverse_.end()) {
+    return cpts2Utf8String(vocab_inverse_[idx]);
+  } else {
+    MLLM_WARN("Cannot find token in BPEUTF8 vocab. When doing _lookup_inverse_vocab");
+    return {};
+  }
+}
+
+std::unordered_set<std::pair<BPEUTF8::cpt_string_t, BPEUTF8::cpt_string_t>, BPEUTF8PairHash> BPEUTF8::_get_pairs(
+    const std::vector<BPEUTF8::cpt_string_t>& word) {
+  std::unordered_set<std::pair<cpt_string_t, cpt_string_t>, BPEUTF8PairHash> pairs;
+  if (word.size() < 2) return pairs;
+  auto prev_char = word[0];
+  for (size_t i = 1; i < word.size(); ++i) {
+    pairs.insert({prev_char, word[i]});
+    prev_char = word[i];
+  }
+  return pairs;
+}
+}  // namespace mllm::preprocessor
diff --git a/mllm/preprocessor/tokenizers/BPEUTF8.hpp b/mllm/preprocessor/tokenizers/BPEUTF8.hpp
new file mode 100644
index 00000000..fc772a8c
--- /dev/null
+++ b/mllm/preprocessor/tokenizers/BPEUTF8.hpp
@@ -0,0 +1,94 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+#pragma once
+
+// TODO
+// Documents
+// This Byte-Level BPE(BBPE) works as an fully correct byte-level BPE tokenizer.
+
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+
+// CPP's support of UTF-8 is weak. We use the utfcpp library to handle UTF-8 strings.
+#include <utfcpp/utf8.h>
+#include <nlohmann/json_fwd.hpp>
+
+// Use XXHash
+#include <xxHash/xxhash.h>
+
+// Remember:
+// utfcpp use
+// std::string to represent UTF-8 strings.
+// std::u16string to represent UTF-16 strings.
+// std::u32string to represent UTF-32 strings.
+
+namespace mllm::preprocessor {
+
+namespace details {
+
+struct VectorUint32Hash {
+  std::size_t operator()(const std::vector<uint32_t>& v) const noexcept {
+    if (v.empty()) return 0;
+    return static_cast<std::size_t>(XXH64(v.data(), v.size() * sizeof(uint32_t), /*seed=*/0));
+  }
+};
+
+}  // namespace details
+
+struct BPEUTF8PairHash {
+  std::size_t operator()(const std::pair<std::vector<uint32_t>, std::vector<uint32_t>>& key) const noexcept {
+    const auto& a = key.first;
+    const auto& b = key.second;
+
+    const std::size_t bytes_a = a.size() * sizeof(uint32_t);
+    const std::size_t bytes_b = b.size() * sizeof(uint32_t);
+
+    if (bytes_a == 0 && bytes_b == 0) return 0;
+
+    XXH64_state_t* state = XXH64_createState();
+    if (!state) return 0;
+    XXH64_reset(state, /*seed=*/0);
+
+    if (!a.empty()) XXH64_update(state, a.data(), bytes_a);
+    if (!b.empty()) XXH64_update(state, b.data(), bytes_b);
+
+    std::size_t h = static_cast<std::size_t>(XXH64_digest(state));
+    XXH64_freeState(state);
+    return h;
+  }
+};
+
+class BPEUTF8 {
+ public:
+  using cpt_string_t = std::vector<uint32_t>;
+
+  // BPE can accept sentence piece's json foramt.
+  bool initFromSentencePieceJson(const std::string& file_path);
+
+  std::vector<std::string> _bpe(const std::string& token);
+
+  int64_t _lookup_vocab(const std::string& token);
+
+  std::string _lookup_inverse_vocab(int64_t idx);
+
+ private:
+  inline std::vector<uint32_t> utf8String2Cpts(const std::string& str) {
+    std::vector<uint32_t> word32;
+    utf8::utf8to32(str.begin(), str.end(), std::back_inserter(word32));
+    return word32;
+  }
+
+  inline std::string cpts2Utf8String(const std::vector<uint32_t>& cpts) {
+    std::string str;
+    utf8::utf32to8(cpts.begin(), cpts.end(), std::back_inserter(str));
+    return str;
+  }
+
+  std::unordered_set<std::pair<cpt_string_t, cpt_string_t>, BPEUTF8PairHash> _get_pairs(const std::vector<cpt_string_t>& word);
+  std::unordered_map<cpt_string_t, int64_t, details::VectorUint32Hash> vocab_;
+  std::unordered_map<int64_t, cpt_string_t> vocab_inverse_;
+  std::unordered_map<std::pair<cpt_string_t, cpt_string_t>, int64_t, BPEUTF8PairHash> bpe_ranks_;
+};
+
+}  // namespace mllm::preprocessor
diff --git a/mllm/preprocessor/tokenizers/llama_cpp_unicode/README.md b/mllm/preprocessor/tokenizers/llama_cpp_unicode/README.md
new file mode 100644
index 00000000..b7f40592
--- /dev/null
+++ b/mllm/preprocessor/tokenizers/llama_cpp_unicode/README.md
@@ -0,0 +1,8 @@
+# llama.cpp Unicode
+
+This is a vendored copy of the `unicode.h` and `unicode-data.h` modules from [llama.cpp](https://github.com/ggerganov/llama.cpp), along with their corresponding source files. The modules are held as vendored source rather than submodules since they are a small subset of the overall `llama.cpp` project.
+
+## Latest Update
+
+llama.cpp - commit 54ef9cfc
+https://github.com/ggerganov/llama.cpp
\ No newline at end of file
diff --git a/mllm/preprocessor/tokenizers/llama_cpp_unicode/unicode-data.cpp b/mllm/preprocessor/tokenizers/llama_cpp_unicode/unicode-data.cpp
new file mode 100644
index 00000000..32b39afd
--- /dev/null
+++ b/mllm/preprocessor/tokenizers/llama_cpp_unicode/unicode-data.cpp
@@ -0,0 +1,1617 @@
+/*
+llama.cpp - commit 54ef9cfc
+https://github.com/ggerganov/llama.cpp
+
+MIT License
+
+Copyright (c) 2023-2024 The ggml authors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+// generated with scripts/gen-unicode-data.py
+
+#include "unicode-data.h"
+
+#include <cstdint>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+const std::initializer_list<std::pair<uint32_t, uint16_t>> unicode_ranges_flags = {
+    // start, flags // last=next_start-1
+    {0x000000, 0x0080}, {0x000020, 0x0008}, {0x000021, 0x0020}, {0x000024, 0x0040}, {0x000025, 0x0020}, {0x00002B, 0x0040},
+    {0x00002C, 0x0020}, {0x000030, 0x0002}, {0x00003A, 0x0020}, {0x00003C, 0x0040}, {0x00003F, 0x0020}, {0x000041, 0x0004},
+    {0x00005B, 0x0020}, {0x00005E, 0x0040}, {0x00005F, 0x0020}, {0x000060, 0x0040}, {0x000061, 0x0004}, {0x00007B, 0x0020},
+    {0x00007C, 0x0040}, {0x00007D, 0x0020}, {0x00007E, 0x0040}, {0x00007F, 0x0080}, {0x0000A0, 0x0008}, {0x0000A1, 0x0020},
+    {0x0000A2, 0x0040}, {0x0000A7, 0x0020}, {0x0000A8, 0x0040}, {0x0000AA, 0x0004}, {0x0000AB, 0x0020}, {0x0000AC, 0x0040},
+    {0x0000AD, 0x0080}, {0x0000AE, 0x0040}, {0x0000B2, 0x0002}, {0x0000B4, 0x0040}, {0x0000B5, 0x0004}, {0x0000B6, 0x0020},
+    {0x0000B8, 0x0040}, {0x0000B9, 0x0002}, {0x0000BA, 0x0004}, {0x0000BB, 0x0020}, {0x0000BC, 0x0002}, {0x0000BF, 0x0020},
+    {0x0000C0, 0x0004}, {0x0000D7, 0x0040}, {0x0000D8, 0x0004}, {0x0000F7, 0x0040}, {0x0000F8, 0x0004}, {0x0002C2, 0x0040},
+    {0x0002C6, 0x0004}, {0x0002D2, 0x0040}, {0x0002E0, 0x0004}, {0x0002E5, 0x0040}, {0x0002EC, 0x0004}, {0x0002ED, 0x0040},
+    {0x0002EE, 0x0004}, {0x0002EF, 0x0040}, {0x000300, 0x0010}, {0x000370, 0x0004}, {0x000375, 0x0040}, {0x000376, 0x0004},
+    {0x000378, 0x0001}, {0x00037A, 0x0004}, {0x00037E, 0x0020}, {0x00037F, 0x0004}, {0x000380, 0x0001}, {0x000384, 0x0040},
+    {0x000386, 0x0004}, {0x000387, 0x0020}, {0x000388, 0x0004}, {0x00038B, 0x0001}, {0x00038C, 0x0004}, {0x00038D, 0x0001},
+    {0x00038E, 0x0004}, {0x0003A2, 0x0001}, {0x0003A3, 0x0004}, {0x0003F6, 0x0040}, {0x0003F7, 0x0004}, {0x000482, 0x0040},
+    {0x000483, 0x0010}, {0x00048A, 0x0004}, {0x000530, 0x0001}, {0x000531, 0x0004}, {0x000557, 0x0001}, {0x000559, 0x0004},
+    {0x00055A, 0x0020}, {0x000560, 0x0004}, {0x000589, 0x0020}, {0x00058B, 0x0001}, {0x00058D, 0x0040}, {0x000590, 0x0001},
+    {0x000591, 0x0010}, {0x0005BE, 0x0020}, {0x0005BF, 0x0010}, {0x0005C0, 0x0020}, {0x0005C1, 0x0010}, {0x0005C3, 0x0020},
+    {0x0005C4, 0x0010}, {0x0005C6, 0x0020}, {0x0005C7, 0x0010}, {0x0005C8, 0x0001}, {0x0005D0, 0x0004}, {0x0005EB, 0x0001},
+    {0x0005EF, 0x0004}, {0x0005F3, 0x0020}, {0x0005F5, 0x0001}, {0x000600, 0x0080}, {0x000606, 0x0040}, {0x000609, 0x0020},
+    {0x00060B, 0x0040}, {0x00060C, 0x0020}, {0x00060E, 0x0040}, {0x000610, 0x0010}, {0x00061B, 0x0020}, {0x00061C, 0x0080},
+    {0x00061D, 0x0020}, {0x000620, 0x0004}, {0x00064B, 0x0010}, {0x000660, 0x0002}, {0x00066A, 0x0020}, {0x00066E, 0x0004},
+    {0x000670, 0x0010}, {0x000671, 0x0004}, {0x0006D4, 0x0020}, {0x0006D5, 0x0004}, {0x0006D6, 0x0010}, {0x0006DD, 0x0080},
+    {0x0006DE, 0x0040}, {0x0006DF, 0x0010}, {0x0006E5, 0x0004}, {0x0006E7, 0x0010}, {0x0006E9, 0x0040}, {0x0006EA, 0x0010},
+    {0x0006EE, 0x0004}, {0x0006F0, 0x0002}, {0x0006FA, 0x0004}, {0x0006FD, 0x0040}, {0x0006FF, 0x0004}, {0x000700, 0x0020},
+    {0x00070E, 0x0001}, {0x00070F, 0x0080}, {0x000710, 0x0004}, {0x000711, 0x0010}, {0x000712, 0x0004}, {0x000730, 0x0010},
+    {0x00074B, 0x0001}, {0x00074D, 0x0004}, {0x0007A6, 0x0010}, {0x0007B1, 0x0004}, {0x0007B2, 0x0001}, {0x0007C0, 0x0002},
+    {0x0007CA, 0x0004}, {0x0007EB, 0x0010}, {0x0007F4, 0x0004}, {0x0007F6, 0x0040}, {0x0007F7, 0x0020}, {0x0007FA, 0x0004},
+    {0x0007FB, 0x0001}, {0x0007FD, 0x0010}, {0x0007FE, 0x0040}, {0x000800, 0x0004}, {0x000816, 0x0010}, {0x00081A, 0x0004},
+    {0x00081B, 0x0010}, {0x000824, 0x0004}, {0x000825, 0x0010}, {0x000828, 0x0004}, {0x000829, 0x0010}, {0x00082E, 0x0001},
+    {0x000830, 0x0020}, {0x00083F, 0x0001}, {0x000840, 0x0004}, {0x000859, 0x0010}, {0x00085C, 0x0001}, {0x00085E, 0x0020},
+    {0x00085F, 0x0001}, {0x000860, 0x0004}, {0x00086B, 0x0001}, {0x000870, 0x0004}, {0x000888, 0x0040}, {0x000889, 0x0004},
+    {0x00088F, 0x0001}, {0x000890, 0x0080}, {0x000892, 0x0001}, {0x000898, 0x0010}, {0x0008A0, 0x0004}, {0x0008CA, 0x0010},
+    {0x0008E2, 0x0080}, {0x0008E3, 0x0010}, {0x000904, 0x0004}, {0x00093A, 0x0010}, {0x00093D, 0x0004}, {0x00093E, 0x0010},
+    {0x000950, 0x0004}, {0x000951, 0x0010}, {0x000958, 0x0004}, {0x000962, 0x0010}, {0x000964, 0x0020}, {0x000966, 0x0002},
+    {0x000970, 0x0020}, {0x000971, 0x0004}, {0x000981, 0x0010}, {0x000984, 0x0001}, {0x000985, 0x0004}, {0x00098D, 0x0001},
+    {0x00098F, 0x0004}, {0x000991, 0x0001}, {0x000993, 0x0004}, {0x0009A9, 0x0001}, {0x0009AA, 0x0004}, {0x0009B1, 0x0001},
+    {0x0009B2, 0x0004}, {0x0009B3, 0x0001}, {0x0009B6, 0x0004}, {0x0009BA, 0x0001}, {0x0009BC, 0x0010}, {0x0009BD, 0x0004},
+    {0x0009BE, 0x0010}, {0x0009C5, 0x0001}, {0x0009C7, 0x0010}, {0x0009C9, 0x0001}, {0x0009CB, 0x0010}, {0x0009CE, 0x0004},
+    {0x0009CF, 0x0001}, {0x0009D7, 0x0010}, {0x0009D8, 0x0001}, {0x0009DC, 0x0004}, {0x0009DE, 0x0001}, {0x0009DF, 0x0004},
+    {0x0009E2, 0x0010}, {0x0009E4, 0x0001}, {0x0009E6, 0x0002}, {0x0009F0, 0x0004}, {0x0009F2, 0x0040}, {0x0009F4, 0x0002},
+    {0x0009FA, 0x0040}, {0x0009FC, 0x0004}, {0x0009FD, 0x0020}, {0x0009FE, 0x0010}, {0x0009FF, 0x0001}, {0x000A01, 0x0010},
+    {0x000A04, 0x0001}, {0x000A05, 0x0004}, {0x000A0B, 0x0001}, {0x000A0F, 0x0004}, {0x000A11, 0x0001}, {0x000A13, 0x0004},
+    {0x000A29, 0x0001}, {0x000A2A, 0x0004}, {0x000A31, 0x0001}, {0x000A32, 0x0004}, {0x000A34, 0x0001}, {0x000A35, 0x0004},
+    {0x000A37, 0x0001}, {0x000A38, 0x0004}, {0x000A3A, 0x0001}, {0x000A3C, 0x0010}, {0x000A3D, 0x0001}, {0x000A3E, 0x0010},
+    {0x000A43, 0x0001}, {0x000A47, 0x0010}, {0x000A49, 0x0001}, {0x000A4B, 0x0010}, {0x000A4E, 0x0001}, {0x000A51, 0x0010},
+    {0x000A52, 0x0001}, {0x000A59, 0x0004}, {0x000A5D, 0x0001}, {0x000A5E, 0x0004}, {0x000A5F, 0x0001}, {0x000A66, 0x0002},
+    {0x000A70, 0x0010}, {0x000A72, 0x0004}, {0x000A75, 0x0010}, {0x000A76, 0x0020}, {0x000A77, 0x0001}, {0x000A81, 0x0010},
+    {0x000A84, 0x0001}, {0x000A85, 0x0004}, {0x000A8E, 0x0001}, {0x000A8F, 0x0004}, {0x000A92, 0x0001}, {0x000A93, 0x0004},
+    {0x000AA9, 0x0001}, {0x000AAA, 0x0004}, {0x000AB1, 0x0001}, {0x000AB2, 0x0004}, {0x000AB4, 0x0001}, {0x000AB5, 0x0004},
+    {0x000ABA, 0x0001}, {0x000ABC, 0x0010}, {0x000ABD, 0x0004}, {0x000ABE, 0x0010}, {0x000AC6, 0x0001}, {0x000AC7, 0x0010},
+    {0x000ACA, 0x0001}, {0x000ACB, 0x0010}, {0x000ACE, 0x0001}, {0x000AD0, 0x0004}, {0x000AD1, 0x0001}, {0x000AE0, 0x0004},
+    {0x000AE2, 0x0010}, {0x000AE4, 0x0001}, {0x000AE6, 0x0002}, {0x000AF0, 0x0020}, {0x000AF1, 0x0040}, {0x000AF2, 0x0001},
+    {0x000AF9, 0x0004}, {0x000AFA, 0x0010}, {0x000B00, 0x0001}, {0x000B01, 0x0010}, {0x000B04, 0x0001}, {0x000B05, 0x0004},
+    {0x000B0D, 0x0001}, {0x000B0F, 0x0004}, {0x000B11, 0x0001}, {0x000B13, 0x0004}, {0x000B29, 0x0001}, {0x000B2A, 0x0004},
+    {0x000B31, 0x0001}, {0x000B32, 0x0004}, {0x000B34, 0x0001}, {0x000B35, 0x0004}, {0x000B3A, 0x0001}, {0x000B3C, 0x0010},
+    {0x000B3D, 0x0004}, {0x000B3E, 0x0010}, {0x000B45, 0x0001}, {0x000B47, 0x0010}, {0x000B49, 0x0001}, {0x000B4B, 0x0010},
+    {0x000B4E, 0x0001}, {0x000B55, 0x0010}, {0x000B58, 0x0001}, {0x000B5C, 0x0004}, {0x000B5E, 0x0001}, {0x000B5F, 0x0004},
+    {0x000B62, 0x0010}, {0x000B64, 0x0001}, {0x000B66, 0x0002}, {0x000B70, 0x0040}, {0x000B71, 0x0004}, {0x000B72, 0x0002},
+    {0x000B78, 0x0001}, {0x000B82, 0x0010}, {0x000B83, 0x0004}, {0x000B84, 0x0001}, {0x000B85, 0x0004}, {0x000B8B, 0x0001},
+    {0x000B8E, 0x0004}, {0x000B91, 0x0001}, {0x000B92, 0x0004}, {0x000B96, 0x0001}, {0x000B99, 0x0004}, {0x000B9B, 0x0001},
+    {0x000B9C, 0x0004}, {0x000B9D, 0x0001}, {0x000B9E, 0x0004}, {0x000BA0, 0x0001}, {0x000BA3, 0x0004}, {0x000BA5, 0x0001},
+    {0x000BA8, 0x0004}, {0x000BAB, 0x0001}, {0x000BAE, 0x0004}, {0x000BBA, 0x0001}, {0x000BBE, 0x0010}, {0x000BC3, 0x0001},
+    {0x000BC6, 0x0010}, {0x000BC9, 0x0001}, {0x000BCA, 0x0010}, {0x000BCE, 0x0001}, {0x000BD0, 0x0004}, {0x000BD1, 0x0001},
+    {0x000BD7, 0x0010}, {0x000BD8, 0x0001}, {0x000BE6, 0x0002}, {0x000BF3, 0x0040}, {0x000BFB, 0x0001}, {0x000C00, 0x0010},
+    {0x000C05, 0x0004}, {0x000C0D, 0x0001}, {0x000C0E, 0x0004}, {0x000C11, 0x0001}, {0x000C12, 0x0004}, {0x000C29, 0x0001},
+    {0x000C2A, 0x0004}, {0x000C3A, 0x0001}, {0x000C3C, 0x0010}, {0x000C3D, 0x0004}, {0x000C3E, 0x0010}, {0x000C45, 0x0001},
+    {0x000C46, 0x0010}, {0x000C49, 0x0001}, {0x000C4A, 0x0010}, {0x000C4E, 0x0001}, {0x000C55, 0x0010}, {0x000C57, 0x0001},
+    {0x000C58, 0x0004}, {0x000C5B, 0x0001}, {0x000C5D, 0x0004}, {0x000C5E, 0x0001}, {0x000C60, 0x0004}, {0x000C62, 0x0010},
+    {0x000C64, 0x0001}, {0x000C66, 0x0002}, {0x000C70, 0x0001}, {0x000C77, 0x0020}, {0x000C78, 0x0002}, {0x000C7F, 0x0040},
+    {0x000C80, 0x0004}, {0x000C81, 0x0010}, {0x000C84, 0x0020}, {0x000C85, 0x0004}, {0x000C8D, 0x0001}, {0x000C8E, 0x0004},
+    {0x000C91, 0x0001}, {0x000C92, 0x0004}, {0x000CA9, 0x0001}, {0x000CAA, 0x0004}, {0x000CB4, 0x0001}, {0x000CB5, 0x0004},
+    {0x000CBA, 0x0001}, {0x000CBC, 0x0010}, {0x000CBD, 0x0004}, {0x000CBE, 0x0010}, {0x000CC5, 0x0001}, {0x000CC6, 0x0010},
+    {0x000CC9, 0x0001}, {0x000CCA, 0x0010}, {0x000CCE, 0x0001}, {0x000CD5, 0x0010}, {0x000CD7, 0x0001}, {0x000CDD, 0x0004},
+    {0x000CDF, 0x0001}, {0x000CE0, 0x0004}, {0x000CE2, 0x0010}, {0x000CE4, 0x0001}, {0x000CE6, 0x0002}, {0x000CF0, 0x0001},
+    {0x000CF1, 0x0004}, {0x000CF3, 0x0010}, {0x000CF4, 0x0001}, {0x000D00, 0x0010}, {0x000D04, 0x0004}, {0x000D0D, 0x0001},
+    {0x000D0E, 0x0004}, {0x000D11, 0x0001}, {0x000D12, 0x0004}, {0x000D3B, 0x0010}, {0x000D3D, 0x0004}, {0x000D3E, 0x0010},
+    {0x000D45, 0x0001}, {0x000D46, 0x0010}, {0x000D49, 0x0001}, {0x000D4A, 0x0010}, {0x000D4E, 0x0004}, {0x000D4F, 0x0040},
+    {0x000D50, 0x0001}, {0x000D54, 0x0004}, {0x000D57, 0x0010}, {0x000D58, 0x0002}, {0x000D5F, 0x0004}, {0x000D62, 0x0010},
+    {0x000D64, 0x0001}, {0x000D66, 0x0002}, {0x000D79, 0x0040}, {0x000D7A, 0x0004}, {0x000D80, 0x0001}, {0x000D81, 0x0010},
+    {0x000D84, 0x0001}, {0x000D85, 0x0004}, {0x000D97, 0x0001}, {0x000D9A, 0x0004}, {0x000DB2, 0x0001}, {0x000DB3, 0x0004},
+    {0x000DBC, 0x0001}, {0x000DBD, 0x0004}, {0x000DBE, 0x0001}, {0x000DC0, 0x0004}, {0x000DC7, 0x0001}, {0x000DCA, 0x0010},
+    {0x000DCB, 0x0001}, {0x000DCF, 0x0010}, {0x000DD5, 0x0001}, {0x000DD6, 0x0010}, {0x000DD7, 0x0001}, {0x000DD8, 0x0010},
+    {0x000DE0, 0x0001}, {0x000DE6, 0x0002}, {0x000DF0, 0x0001}, {0x000DF2, 0x0010}, {0x000DF4, 0x0020}, {0x000DF5, 0x0001},
+    {0x000E01, 0x0004}, {0x000E31, 0x0010}, {0x000E32, 0x0004}, {0x000E34, 0x0010}, {0x000E3B, 0x0001}, {0x000E3F, 0x0040},
+    {0x000E40, 0x0004}, {0x000E47, 0x0010}, {0x000E4F, 0x0020}, {0x000E50, 0x0002}, {0x000E5A, 0x0020}, {0x000E5C, 0x0001},
+    {0x000E81, 0x0004}, {0x000E83, 0x0001}, {0x000E84, 0x0004}, {0x000E85, 0x0001}, {0x000E86, 0x0004}, {0x000E8B, 0x0001},
+    {0x000E8C, 0x0004}, {0x000EA4, 0x0001}, {0x000EA5, 0x0004}, {0x000EA6, 0x0001}, {0x000EA7, 0x0004}, {0x000EB1, 0x0010},
+    {0x000EB2, 0x0004}, {0x000EB4, 0x0010}, {0x000EBD, 0x0004}, {0x000EBE, 0x0001}, {0x000EC0, 0x0004}, {0x000EC5, 0x0001},
+    {0x000EC6, 0x0004}, {0x000EC7, 0x0001}, {0x000EC8, 0x0010}, {0x000ECF, 0x0001}, {0x000ED0, 0x0002}, {0x000EDA, 0x0001},
+    {0x000EDC, 0x0004}, {0x000EE0, 0x0001}, {0x000F00, 0x0004}, {0x000F01, 0x0040}, {0x000F04, 0x0020}, {0x000F13, 0x0040},
+    {0x000F14, 0x0020}, {0x000F15, 0x0040}, {0x000F18, 0x0010}, {0x000F1A, 0x0040}, {0x000F20, 0x0002}, {0x000F34, 0x0040},
+    {0x000F35, 0x0010}, {0x000F36, 0x0040}, {0x000F37, 0x0010}, {0x000F38, 0x0040}, {0x000F39, 0x0010}, {0x000F3A, 0x0020},
+    {0x000F3E, 0x0010}, {0x000F40, 0x0004}, {0x000F48, 0x0001}, {0x000F49, 0x0004}, {0x000F6D, 0x0001}, {0x000F71, 0x0010},
+    {0x000F85, 0x0020}, {0x000F86, 0x0010}, {0x000F88, 0x0004}, {0x000F8D, 0x0010}, {0x000F98, 0x0001}, {0x000F99, 0x0010},
+    {0x000FBD, 0x0001}, {0x000FBE, 0x0040}, {0x000FC6, 0x0010}, {0x000FC7, 0x0040}, {0x000FCD, 0x0001}, {0x000FCE, 0x0040},
+    {0x000FD0, 0x0020}, {0x000FD5, 0x0040}, {0x000FD9, 0x0020}, {0x000FDB, 0x0001}, {0x001000, 0x0004}, {0x00102B, 0x0010},
+    {0x00103F, 0x0004}, {0x001040, 0x0002}, {0x00104A, 0x0020}, {0x001050, 0x0004}, {0x001056, 0x0010}, {0x00105A, 0x0004},
+    {0x00105E, 0x0010}, {0x001061, 0x0004}, {0x001062, 0x0010}, {0x001065, 0x0004}, {0x001067, 0x0010}, {0x00106E, 0x0004},
+    {0x001071, 0x0010}, {0x001075, 0x0004}, {0x001082, 0x0010}, {0x00108E, 0x0004}, {0x00108F, 0x0010}, {0x001090, 0x0002},
+    {0x00109A, 0x0010}, {0x00109E, 0x0040}, {0x0010A0, 0x0004}, {0x0010C6, 0x0001}, {0x0010C7, 0x0004}, {0x0010C8, 0x0001},
+    {0x0010CD, 0x0004}, {0x0010CE, 0x0001}, {0x0010D0, 0x0004}, {0x0010FB, 0x0020}, {0x0010FC, 0x0004}, {0x001249, 0x0001},
+    {0x00124A, 0x0004}, {0x00124E, 0x0001}, {0x001250, 0x0004}, {0x001257, 0x0001}, {0x001258, 0x0004}, {0x001259, 0x0001},
+    {0x00125A, 0x0004}, {0x00125E, 0x0001}, {0x001260, 0x0004}, {0x001289, 0x0001}, {0x00128A, 0x0004}, {0x00128E, 0x0001},
+    {0x001290, 0x0004}, {0x0012B1, 0x0001}, {0x0012B2, 0x0004}, {0x0012B6, 0x0001}, {0x0012B8, 0x0004}, {0x0012BF, 0x0001},
+    {0x0012C0, 0x0004}, {0x0012C1, 0x0001}, {0x0012C2, 0x0004}, {0x0012C6, 0x0001}, {0x0012C8, 0x0004}, {0x0012D7, 0x0001},
+    {0x0012D8, 0x0004}, {0x001311, 0x0001}, {0x001312, 0x0004}, {0x001316, 0x0001}, {0x001318, 0x0004}, {0x00135B, 0x0001},
+    {0x00135D, 0x0010}, {0x001360, 0x0020}, {0x001369, 0x0002}, {0x00137D, 0x0001}, {0x001380, 0x0004}, {0x001390, 0x0040},
+    {0x00139A, 0x0001}, {0x0013A0, 0x0004}, {0x0013F6, 0x0001}, {0x0013F8, 0x0004}, {0x0013FE, 0x0001}, {0x001400, 0x0020},
+    {0x001401, 0x0004}, {0x00166D, 0x0040}, {0x00166E, 0x0020}, {0x00166F, 0x0004}, {0x001680, 0x0008}, {0x001681, 0x0004},
+    {0x00169B, 0x0020}, {0x00169D, 0x0001}, {0x0016A0, 0x0004}, {0x0016EB, 0x0020}, {0x0016EE, 0x0002}, {0x0016F1, 0x0004},
+    {0x0016F9, 0x0001}, {0x001700, 0x0004}, {0x001712, 0x0010}, {0x001716, 0x0001}, {0x00171F, 0x0004}, {0x001732, 0x0010},
+    {0x001735, 0x0020}, {0x001737, 0x0001}, {0x001740, 0x0004}, {0x001752, 0x0010}, {0x001754, 0x0001}, {0x001760, 0x0004},
+    {0x00176D, 0x0001}, {0x00176E, 0x0004}, {0x001771, 0x0001}, {0x001772, 0x0010}, {0x001774, 0x0001}, {0x001780, 0x0004},
+    {0x0017B4, 0x0010}, {0x0017D4, 0x0020}, {0x0017D7, 0x0004}, {0x0017D8, 0x0020}, {0x0017DB, 0x0040}, {0x0017DC, 0x0004},
+    {0x0017DD, 0x0010}, {0x0017DE, 0x0001}, {0x0017E0, 0x0002}, {0x0017EA, 0x0001}, {0x0017F0, 0x0002}, {0x0017FA, 0x0001},
+    {0x001800, 0x0020}, {0x00180B, 0x0010}, {0x00180E, 0x0080}, {0x00180F, 0x0010}, {0x001810, 0x0002}, {0x00181A, 0x0001},
+    {0x001820, 0x0004}, {0x001879, 0x0001}, {0x001880, 0x0004}, {0x001885, 0x0010}, {0x001887, 0x0004}, {0x0018A9, 0x0010},
+    {0x0018AA, 0x0004}, {0x0018AB, 0x0001}, {0x0018B0, 0x0004}, {0x0018F6, 0x0001}, {0x001900, 0x0004}, {0x00191F, 0x0001},
+    {0x001920, 0x0010}, {0x00192C, 0x0001}, {0x001930, 0x0010}, {0x00193C, 0x0001}, {0x001940, 0x0040}, {0x001941, 0x0001},
+    {0x001944, 0x0020}, {0x001946, 0x0002}, {0x001950, 0x0004}, {0x00196E, 0x0001}, {0x001970, 0x0004}, {0x001975, 0x0001},
+    {0x001980, 0x0004}, {0x0019AC, 0x0001}, {0x0019B0, 0x0004}, {0x0019CA, 0x0001}, {0x0019D0, 0x0002}, {0x0019DB, 0x0001},
+    {0x0019DE, 0x0040}, {0x001A00, 0x0004}, {0x001A17, 0x0010}, {0x001A1C, 0x0001}, {0x001A1E, 0x0020}, {0x001A20, 0x0004},
+    {0x001A55, 0x0010}, {0x001A5F, 0x0001}, {0x001A60, 0x0010}, {0x001A7D, 0x0001}, {0x001A7F, 0x0010}, {0x001A80, 0x0002},
+    {0x001A8A, 0x0001}, {0x001A90, 0x0002}, {0x001A9A, 0x0001}, {0x001AA0, 0x0020}, {0x001AA7, 0x0004}, {0x001AA8, 0x0020},
+    {0x001AAE, 0x0001}, {0x001AB0, 0x0010}, {0x001ACF, 0x0001}, {0x001B00, 0x0010}, {0x001B05, 0x0004}, {0x001B34, 0x0010},
+    {0x001B45, 0x0004}, {0x001B4D, 0x0001}, {0x001B50, 0x0002}, {0x001B5A, 0x0020}, {0x001B61, 0x0040}, {0x001B6B, 0x0010},
+    {0x001B74, 0x0040}, {0x001B7D, 0x0020}, {0x001B7F, 0x0001}, {0x001B80, 0x0010}, {0x001B83, 0x0004}, {0x001BA1, 0x0010},
+    {0x001BAE, 0x0004}, {0x001BB0, 0x0002}, {0x001BBA, 0x0004}, {0x001BE6, 0x0010}, {0x001BF4, 0x0001}, {0x001BFC, 0x0020},
+    {0x001C00, 0x0004}, {0x001C24, 0x0010}, {0x001C38, 0x0001}, {0x001C3B, 0x0020}, {0x001C40, 0x0002}, {0x001C4A, 0x0001},
+    {0x001C4D, 0x0004}, {0x001C50, 0x0002}, {0x001C5A, 0x0004}, {0x001C7E, 0x0020}, {0x001C80, 0x0004}, {0x001C89, 0x0001},
+    {0x001C90, 0x0004}, {0x001CBB, 0x0001}, {0x001CBD, 0x0004}, {0x001CC0, 0x0020}, {0x001CC8, 0x0001}, {0x001CD0, 0x0010},
+    {0x001CD3, 0x0020}, {0x001CD4, 0x0010}, {0x001CE9, 0x0004}, {0x001CED, 0x0010}, {0x001CEE, 0x0004}, {0x001CF4, 0x0010},
+    {0x001CF5, 0x0004}, {0x001CF7, 0x0010}, {0x001CFA, 0x0004}, {0x001CFB, 0x0001}, {0x001D00, 0x0004}, {0x001DC0, 0x0010},
+    {0x001E00, 0x0004}, {0x001F16, 0x0001}, {0x001F18, 0x0004}, {0x001F1E, 0x0001}, {0x001F20, 0x0004}, {0x001F46, 0x0001},
+    {0x001F48, 0x0004}, {0x001F4E, 0x0001}, {0x001F50, 0x0004}, {0x001F58, 0x0001}, {0x001F59, 0x0004}, {0x001F5A, 0x0001},
+    {0x001F5B, 0x0004}, {0x001F5C, 0x0001}, {0x001F5D, 0x0004}, {0x001F5E, 0x0001}, {0x001F5F, 0x0004}, {0x001F7E, 0x0001},
+    {0x001F80, 0x0004}, {0x001FB5, 0x0001}, {0x001FB6, 0x0004}, {0x001FBD, 0x0040}, {0x001FBE, 0x0004}, {0x001FBF, 0x0040},
+    {0x001FC2, 0x0004}, {0x001FC5, 0x0001}, {0x001FC6, 0x0004}, {0x001FCD, 0x0040}, {0x001FD0, 0x0004}, {0x001FD4, 0x0001},
+    {0x001FD6, 0x0004}, {0x001FDC, 0x0001}, {0x001FDD, 0x0040}, {0x001FE0, 0x0004}, {0x001FED, 0x0040}, {0x001FF0, 0x0001},
+    {0x001FF2, 0x0004}, {0x001FF5, 0x0001}, {0x001FF6, 0x0004}, {0x001FFD, 0x0040}, {0x001FFF, 0x0001}, {0x002000, 0x0008},
+    {0x00200B, 0x0080}, {0x002010, 0x0020}, {0x002028, 0x0008}, {0x00202A, 0x0080}, {0x00202F, 0x0008}, {0x002030, 0x0020},
+    {0x002044, 0x0040}, {0x002045, 0x0020}, {0x002052, 0x0040}, {0x002053, 0x0020}, {0x00205F, 0x0008}, {0x002060, 0x0080},
+    {0x002065, 0x0001}, {0x002066, 0x0080}, {0x002070, 0x0002}, {0x002071, 0x0004}, {0x002072, 0x0001}, {0x002074, 0x0002},
+    {0x00207A, 0x0040}, {0x00207D, 0x0020}, {0x00207F, 0x0004}, {0x002080, 0x0002}, {0x00208A, 0x0040}, {0x00208D, 0x0020},
+    {0x00208F, 0x0001}, {0x002090, 0x0004}, {0x00209D, 0x0001}, {0x0020A0, 0x0040}, {0x0020C1, 0x0001}, {0x0020D0, 0x0010},
+    {0x0020F1, 0x0001}, {0x002100, 0x0040}, {0x002102, 0x0004}, {0x002103, 0x0040}, {0x002107, 0x0004}, {0x002108, 0x0040},
+    {0x00210A, 0x0004}, {0x002114, 0x0040}, {0x002115, 0x0004}, {0x002116, 0x0040}, {0x002119, 0x0004}, {0x00211E, 0x0040},
+    {0x002124, 0x0004}, {0x002125, 0x0040}, {0x002126, 0x0004}, {0x002127, 0x0040}, {0x002128, 0x0004}, {0x002129, 0x0040},
+    {0x00212A, 0x0004}, {0x00212E, 0x0040}, {0x00212F, 0x0004}, {0x00213A, 0x0040}, {0x00213C, 0x0004}, {0x002140, 0x0040},
+    {0x002145, 0x0004}, {0x00214A, 0x0040}, {0x00214E, 0x0004}, {0x00214F, 0x0040}, {0x002150, 0x0002}, {0x002183, 0x0004},
+    {0x002185, 0x0002}, {0x00218A, 0x0040}, {0x00218C, 0x0001}, {0x002190, 0x0040}, {0x002308, 0x0020}, {0x00230C, 0x0040},
+    {0x002329, 0x0020}, {0x00232B, 0x0040}, {0x002427, 0x0001}, {0x002440, 0x0040}, {0x00244B, 0x0001}, {0x002460, 0x0002},
+    {0x00249C, 0x0040}, {0x0024EA, 0x0002}, {0x002500, 0x0040}, {0x002768, 0x0020}, {0x002776, 0x0002}, {0x002794, 0x0040},
+    {0x0027C5, 0x0020}, {0x0027C7, 0x0040}, {0x0027E6, 0x0020}, {0x0027F0, 0x0040}, {0x002983, 0x0020}, {0x002999, 0x0040},
+    {0x0029D8, 0x0020}, {0x0029DC, 0x0040}, {0x0029FC, 0x0020}, {0x0029FE, 0x0040}, {0x002B74, 0x0001}, {0x002B76, 0x0040},
+    {0x002B96, 0x0001}, {0x002B97, 0x0040}, {0x002C00, 0x0004}, {0x002CE5, 0x0040}, {0x002CEB, 0x0004}, {0x002CEF, 0x0010},
+    {0x002CF2, 0x0004}, {0x002CF4, 0x0001}, {0x002CF9, 0x0020}, {0x002CFD, 0x0002}, {0x002CFE, 0x0020}, {0x002D00, 0x0004},
+    {0x002D26, 0x0001}, {0x002D27, 0x0004}, {0x002D28, 0x0001}, {0x002D2D, 0x0004}, {0x002D2E, 0x0001}, {0x002D30, 0x0004},
+    {0x002D68, 0x0001}, {0x002D6F, 0x0004}, {0x002D70, 0x0020}, {0x002D71, 0x0001}, {0x002D7F, 0x0010}, {0x002D80, 0x0004},
+    {0x002D97, 0x0001}, {0x002DA0, 0x0004}, {0x002DA7, 0x0001}, {0x002DA8, 0x0004}, {0x002DAF, 0x0001}, {0x002DB0, 0x0004},
+    {0x002DB7, 0x0001}, {0x002DB8, 0x0004}, {0x002DBF, 0x0001}, {0x002DC0, 0x0004}, {0x002DC7, 0x0001}, {0x002DC8, 0x0004},
+    {0x002DCF, 0x0001}, {0x002DD0, 0x0004}, {0x002DD7, 0x0001}, {0x002DD8, 0x0004}, {0x002DDF, 0x0001}, {0x002DE0, 0x0010},
+    {0x002E00, 0x0020}, {0x002E2F, 0x0004}, {0x002E30, 0x0020}, {0x002E50, 0x0040}, {0x002E52, 0x0020}, {0x002E5E, 0x0001},
+    {0x002E80, 0x0040}, {0x002E9A, 0x0001}, {0x002E9B, 0x0040}, {0x002EF4, 0x0001}, {0x002F00, 0x0040}, {0x002FD6, 0x0001},
+    {0x002FF0, 0x0040}, {0x003000, 0x0008}, {0x003001, 0x0020}, {0x003004, 0x0040}, {0x003005, 0x0004}, {0x003007, 0x0002},
+    {0x003008, 0x0020}, {0x003012, 0x0040}, {0x003014, 0x0020}, {0x003020, 0x0040}, {0x003021, 0x0002}, {0x00302A, 0x0010},
+    {0x003030, 0x0020}, {0x003031, 0x0004}, {0x003036, 0x0040}, {0x003038, 0x0002}, {0x00303B, 0x0004}, {0x00303D, 0x0020},
+    {0x00303E, 0x0040}, {0x003040, 0x0001}, {0x003041, 0x0004}, {0x003097, 0x0001}, {0x003099, 0x0010}, {0x00309B, 0x0040},
+    {0x00309D, 0x0004}, {0x0030A0, 0x0020}, {0x0030A1, 0x0004}, {0x0030FB, 0x0020}, {0x0030FC, 0x0004}, {0x003100, 0x0001},
+    {0x003105, 0x0004}, {0x003130, 0x0001}, {0x003131, 0x0004}, {0x00318F, 0x0001}, {0x003190, 0x0040}, {0x003192, 0x0002},
+    {0x003196, 0x0040}, {0x0031A0, 0x0004}, {0x0031C0, 0x0040}, {0x0031E4, 0x0001}, {0x0031EF, 0x0040}, {0x0031F0, 0x0004},
+    {0x003200, 0x0040}, {0x00321F, 0x0001}, {0x003220, 0x0002}, {0x00322A, 0x0040}, {0x003248, 0x0002}, {0x003250, 0x0040},
+    {0x003251, 0x0002}, {0x003260, 0x0040}, {0x003280, 0x0002}, {0x00328A, 0x0040}, {0x0032B1, 0x0002}, {0x0032C0, 0x0040},
+    {0x003400, 0x0004}, {0x004DC0, 0x0040}, {0x004E00, 0x0004}, {0x00A48D, 0x0001}, {0x00A490, 0x0040}, {0x00A4C7, 0x0001},
+    {0x00A4D0, 0x0004}, {0x00A4FE, 0x0020}, {0x00A500, 0x0004}, {0x00A60D, 0x0020}, {0x00A610, 0x0004}, {0x00A620, 0x0002},
+    {0x00A62A, 0x0004}, {0x00A62C, 0x0001}, {0x00A640, 0x0004}, {0x00A66F, 0x0010}, {0x00A673, 0x0020}, {0x00A674, 0x0010},
+    {0x00A67E, 0x0020}, {0x00A67F, 0x0004}, {0x00A69E, 0x0010}, {0x00A6A0, 0x0004}, {0x00A6E6, 0x0002}, {0x00A6F0, 0x0010},
+    {0x00A6F2, 0x0020}, {0x00A6F8, 0x0001}, {0x00A700, 0x0040}, {0x00A717, 0x0004}, {0x00A720, 0x0040}, {0x00A722, 0x0004},
+    {0x00A789, 0x0040}, {0x00A78B, 0x0004}, {0x00A7CB, 0x0001}, {0x00A7D0, 0x0004}, {0x00A7D2, 0x0001}, {0x00A7D3, 0x0004},
+    {0x00A7D4, 0x0001}, {0x00A7D5, 0x0004}, {0x00A7DA, 0x0001}, {0x00A7F2, 0x0004}, {0x00A802, 0x0010}, {0x00A803, 0x0004},
+    {0x00A806, 0x0010}, {0x00A807, 0x0004}, {0x00A80B, 0x0010}, {0x00A80C, 0x0004}, {0x00A823, 0x0010}, {0x00A828, 0x0040},
+    {0x00A82C, 0x0010}, {0x00A82D, 0x0001}, {0x00A830, 0x0002}, {0x00A836, 0x0040}, {0x00A83A, 0x0001}, {0x00A840, 0x0004},
+    {0x00A874, 0x0020}, {0x00A878, 0x0001}, {0x00A880, 0x0010}, {0x00A882, 0x0004}, {0x00A8B4, 0x0010}, {0x00A8C6, 0x0001},
+    {0x00A8CE, 0x0020}, {0x00A8D0, 0x0002}, {0x00A8DA, 0x0001}, {0x00A8E0, 0x0010}, {0x00A8F2, 0x0004}, {0x00A8F8, 0x0020},
+    {0x00A8FB, 0x0004}, {0x00A8FC, 0x0020}, {0x00A8FD, 0x0004}, {0x00A8FF, 0x0010}, {0x00A900, 0x0002}, {0x00A90A, 0x0004},
+    {0x00A926, 0x0010}, {0x00A92E, 0x0020}, {0x00A930, 0x0004}, {0x00A947, 0x0010}, {0x00A954, 0x0001}, {0x00A95F, 0x0020},
+    {0x00A960, 0x0004}, {0x00A97D, 0x0001}, {0x00A980, 0x0010}, {0x00A984, 0x0004}, {0x00A9B3, 0x0010}, {0x00A9C1, 0x0020},
+    {0x00A9CE, 0x0001}, {0x00A9CF, 0x0004}, {0x00A9D0, 0x0002}, {0x00A9DA, 0x0001}, {0x00A9DE, 0x0020}, {0x00A9E0, 0x0004},
+    {0x00A9E5, 0x0010}, {0x00A9E6, 0x0004}, {0x00A9F0, 0x0002}, {0x00A9FA, 0x0004}, {0x00A9FF, 0x0001}, {0x00AA00, 0x0004},
+    {0x00AA29, 0x0010}, {0x00AA37, 0x0001}, {0x00AA40, 0x0004}, {0x00AA43, 0x0010}, {0x00AA44, 0x0004}, {0x00AA4C, 0x0010},
+    {0x00AA4E, 0x0001}, {0x00AA50, 0x0002}, {0x00AA5A, 0x0001}, {0x00AA5C, 0x0020}, {0x00AA60, 0x0004}, {0x00AA77, 0x0040},
+    {0x00AA7A, 0x0004}, {0x00AA7B, 0x0010}, {0x00AA7E, 0x0004}, {0x00AAB0, 0x0010}, {0x00AAB1, 0x0004}, {0x00AAB2, 0x0010},
+    {0x00AAB5, 0x0004}, {0x00AAB7, 0x0010}, {0x00AAB9, 0x0004}, {0x00AABE, 0x0010}, {0x00AAC0, 0x0004}, {0x00AAC1, 0x0010},
+    {0x00AAC2, 0x0004}, {0x00AAC3, 0x0001}, {0x00AADB, 0x0004}, {0x00AADE, 0x0020}, {0x00AAE0, 0x0004}, {0x00AAEB, 0x0010},
+    {0x00AAF0, 0x0020}, {0x00AAF2, 0x0004}, {0x00AAF5, 0x0010}, {0x00AAF7, 0x0001}, {0x00AB01, 0x0004}, {0x00AB07, 0x0001},
+    {0x00AB09, 0x0004}, {0x00AB0F, 0x0001}, {0x00AB11, 0x0004}, {0x00AB17, 0x0001}, {0x00AB20, 0x0004}, {0x00AB27, 0x0001},
+    {0x00AB28, 0x0004}, {0x00AB2F, 0x0001}, {0x00AB30, 0x0004}, {0x00AB5B, 0x0040}, {0x00AB5C, 0x0004}, {0x00AB6A, 0x0040},
+    {0x00AB6C, 0x0001}, {0x00AB70, 0x0004}, {0x00ABE3, 0x0010}, {0x00ABEB, 0x0020}, {0x00ABEC, 0x0010}, {0x00ABEE, 0x0001},
+    {0x00ABF0, 0x0002}, {0x00ABFA, 0x0001}, {0x00AC00, 0x0004}, {0x00D7A4, 0x0001}, {0x00D7B0, 0x0004}, {0x00D7C7, 0x0001},
+    {0x00D7CB, 0x0004}, {0x00D7FC, 0x0001}, {0x00D800, 0x0080}, {0x00F900, 0x0004}, {0x00FA6E, 0x0001}, {0x00FA70, 0x0004},
+    {0x00FADA, 0x0001}, {0x00FB00, 0x0004}, {0x00FB07, 0x0001}, {0x00FB13, 0x0004}, {0x00FB18, 0x0001}, {0x00FB1D, 0x0004},
+    {0x00FB1E, 0x0010}, {0x00FB1F, 0x0004}, {0x00FB29, 0x0040}, {0x00FB2A, 0x0004}, {0x00FB37, 0x0001}, {0x00FB38, 0x0004},
+    {0x00FB3D, 0x0001}, {0x00FB3E, 0x0004}, {0x00FB3F, 0x0001}, {0x00FB40, 0x0004}, {0x00FB42, 0x0001}, {0x00FB43, 0x0004},
+    {0x00FB45, 0x0001}, {0x00FB46, 0x0004}, {0x00FBB2, 0x0040}, {0x00FBC3, 0x0001}, {0x00FBD3, 0x0004}, {0x00FD3E, 0x0020},
+    {0x00FD40, 0x0040}, {0x00FD50, 0x0004}, {0x00FD90, 0x0001}, {0x00FD92, 0x0004}, {0x00FDC8, 0x0001}, {0x00FDCF, 0x0040},
+    {0x00FDD0, 0x0001}, {0x00FDF0, 0x0004}, {0x00FDFC, 0x0040}, {0x00FE00, 0x0010}, {0x00FE10, 0x0020}, {0x00FE1A, 0x0001},
+    {0x00FE20, 0x0010}, {0x00FE30, 0x0020}, {0x00FE53, 0x0001}, {0x00FE54, 0x0020}, {0x00FE62, 0x0040}, {0x00FE63, 0x0020},
+    {0x00FE64, 0x0040}, {0x00FE67, 0x0001}, {0x00FE68, 0x0020}, {0x00FE69, 0x0040}, {0x00FE6A, 0x0020}, {0x00FE6C, 0x0001},
+    {0x00FE70, 0x0004}, {0x00FE75, 0x0001}, {0x00FE76, 0x0004}, {0x00FEFD, 0x0001}, {0x00FEFF, 0x0080}, {0x00FF00, 0x0001},
+    {0x00FF01, 0x0020}, {0x00FF04, 0x0040}, {0x00FF05, 0x0020}, {0x00FF0B, 0x0040}, {0x00FF0C, 0x0020}, {0x00FF10, 0x0002},
+    {0x00FF1A, 0x0020}, {0x00FF1C, 0x0040}, {0x00FF1F, 0x0020}, {0x00FF21, 0x0004}, {0x00FF3B, 0x0020}, {0x00FF3E, 0x0040},
+    {0x00FF3F, 0x0020}, {0x00FF40, 0x0040}, {0x00FF41, 0x0004}, {0x00FF5B, 0x0020}, {0x00FF5C, 0x0040}, {0x00FF5D, 0x0020},
+    {0x00FF5E, 0x0040}, {0x00FF5F, 0x0020}, {0x00FF66, 0x0004}, {0x00FFBF, 0x0001}, {0x00FFC2, 0x0004}, {0x00FFC8, 0x0001},
+    {0x00FFCA, 0x0004}, {0x00FFD0, 0x0001}, {0x00FFD2, 0x0004}, {0x00FFD8, 0x0001}, {0x00FFDA, 0x0004}, {0x00FFDD, 0x0001},
+    {0x00FFE0, 0x0040}, {0x00FFE7, 0x0001}, {0x00FFE8, 0x0040}, {0x00FFEF, 0x0001}, {0x00FFF9, 0x0080}, {0x00FFFC, 0x0040},
+    {0x00FFFE, 0x0001}, {0x010000, 0x0004}, {0x01000C, 0x0001}, {0x01000D, 0x0004}, {0x010027, 0x0001}, {0x010028, 0x0004},
+    {0x01003B, 0x0001}, {0x01003C, 0x0004}, {0x01003E, 0x0001}, {0x01003F, 0x0004}, {0x01004E, 0x0001}, {0x010050, 0x0004},
+    {0x01005E, 0x0001}, {0x010080, 0x0004}, {0x0100FB, 0x0001}, {0x010100, 0x0020}, {0x010103, 0x0001}, {0x010107, 0x0002},
+    {0x010134, 0x0001}, {0x010137, 0x0040}, {0x010140, 0x0002}, {0x010179, 0x0040}, {0x01018A, 0x0002}, {0x01018C, 0x0040},
+    {0x01018F, 0x0001}, {0x010190, 0x0040}, {0x01019D, 0x0001}, {0x0101A0, 0x0040}, {0x0101A1, 0x0001}, {0x0101D0, 0x0040},
+    {0x0101FD, 0x0010}, {0x0101FE, 0x0001}, {0x010280, 0x0004}, {0x01029D, 0x0001}, {0x0102A0, 0x0004}, {0x0102D1, 0x0001},
+    {0x0102E0, 0x0010}, {0x0102E1, 0x0002}, {0x0102FC, 0x0001}, {0x010300, 0x0004}, {0x010320, 0x0002}, {0x010324, 0x0001},
+    {0x01032D, 0x0004}, {0x010341, 0x0002}, {0x010342, 0x0004}, {0x01034A, 0x0002}, {0x01034B, 0x0001}, {0x010350, 0x0004},
+    {0x010376, 0x0010}, {0x01037B, 0x0001}, {0x010380, 0x0004}, {0x01039E, 0x0001}, {0x01039F, 0x0020}, {0x0103A0, 0x0004},
+    {0x0103C4, 0x0001}, {0x0103C8, 0x0004}, {0x0103D0, 0x0020}, {0x0103D1, 0x0002}, {0x0103D6, 0x0001}, {0x010400, 0x0004},
+    {0x01049E, 0x0001}, {0x0104A0, 0x0002}, {0x0104AA, 0x0001}, {0x0104B0, 0x0004}, {0x0104D4, 0x0001}, {0x0104D8, 0x0004},
+    {0x0104FC, 0x0001}, {0x010500, 0x0004}, {0x010528, 0x0001}, {0x010530, 0x0004}, {0x010564, 0x0001}, {0x01056F, 0x0020},
+    {0x010570, 0x0004}, {0x01057B, 0x0001}, {0x01057C, 0x0004}, {0x01058B, 0x0001}, {0x01058C, 0x0004}, {0x010593, 0x0001},
+    {0x010594, 0x0004}, {0x010596, 0x0001}, {0x010597, 0x0004}, {0x0105A2, 0x0001}, {0x0105A3, 0x0004}, {0x0105B2, 0x0001},
+    {0x0105B3, 0x0004}, {0x0105BA, 0x0001}, {0x0105BB, 0x0004}, {0x0105BD, 0x0001}, {0x010600, 0x0004}, {0x010737, 0x0001},
+    {0x010740, 0x0004}, {0x010756, 0x0001}, {0x010760, 0x0004}, {0x010768, 0x0001}, {0x010780, 0x0004}, {0x010786, 0x0001},
+    {0x010787, 0x0004}, {0x0107B1, 0x0001}, {0x0107B2, 0x0004}, {0x0107BB, 0x0001}, {0x010800, 0x0004}, {0x010806, 0x0001},
+    {0x010808, 0x0004}, {0x010809, 0x0001}, {0x01080A, 0x0004}, {0x010836, 0x0001}, {0x010837, 0x0004}, {0x010839, 0x0001},
+    {0x01083C, 0x0004}, {0x01083D, 0x0001}, {0x01083F, 0x0004}, {0x010856, 0x0001}, {0x010857, 0x0020}, {0x010858, 0x0002},
+    {0x010860, 0x0004}, {0x010877, 0x0040}, {0x010879, 0x0002}, {0x010880, 0x0004}, {0x01089F, 0x0001}, {0x0108A7, 0x0002},
+    {0x0108B0, 0x0001}, {0x0108E0, 0x0004}, {0x0108F3, 0x0001}, {0x0108F4, 0x0004}, {0x0108F6, 0x0001}, {0x0108FB, 0x0002},
+    {0x010900, 0x0004}, {0x010916, 0x0002}, {0x01091C, 0x0001}, {0x01091F, 0x0020}, {0x010920, 0x0004}, {0x01093A, 0x0001},
+    {0x01093F, 0x0020}, {0x010940, 0x0001}, {0x010980, 0x0004}, {0x0109B8, 0x0001}, {0x0109BC, 0x0002}, {0x0109BE, 0x0004},
+    {0x0109C0, 0x0002}, {0x0109D0, 0x0001}, {0x0109D2, 0x0002}, {0x010A00, 0x0004}, {0x010A01, 0x0010}, {0x010A04, 0x0001},
+    {0x010A05, 0x0010}, {0x010A07, 0x0001}, {0x010A0C, 0x0010}, {0x010A10, 0x0004}, {0x010A14, 0x0001}, {0x010A15, 0x0004},
+    {0x010A18, 0x0001}, {0x010A19, 0x0004}, {0x010A36, 0x0001}, {0x010A38, 0x0010}, {0x010A3B, 0x0001}, {0x010A3F, 0x0010},
+    {0x010A40, 0x0002}, {0x010A49, 0x0001}, {0x010A50, 0x0020}, {0x010A59, 0x0001}, {0x010A60, 0x0004}, {0x010A7D, 0x0002},
+    {0x010A7F, 0x0020}, {0x010A80, 0x0004}, {0x010A9D, 0x0002}, {0x010AA0, 0x0001}, {0x010AC0, 0x0004}, {0x010AC8, 0x0040},
+    {0x010AC9, 0x0004}, {0x010AE5, 0x0010}, {0x010AE7, 0x0001}, {0x010AEB, 0x0002}, {0x010AF0, 0x0020}, {0x010AF7, 0x0001},
+    {0x010B00, 0x0004}, {0x010B36, 0x0001}, {0x010B39, 0x0020}, {0x010B40, 0x0004}, {0x010B56, 0x0001}, {0x010B58, 0x0002},
+    {0x010B60, 0x0004}, {0x010B73, 0x0001}, {0x010B78, 0x0002}, {0x010B80, 0x0004}, {0x010B92, 0x0001}, {0x010B99, 0x0020},
+    {0x010B9D, 0x0001}, {0x010BA9, 0x0002}, {0x010BB0, 0x0001}, {0x010C00, 0x0004}, {0x010C49, 0x0001}, {0x010C80, 0x0004},
+    {0x010CB3, 0x0001}, {0x010CC0, 0x0004}, {0x010CF3, 0x0001}, {0x010CFA, 0x0002}, {0x010D00, 0x0004}, {0x010D24, 0x0010},
+    {0x010D28, 0x0001}, {0x010D30, 0x0002}, {0x010D3A, 0x0001}, {0x010E60, 0x0002}, {0x010E7F, 0x0001}, {0x010E80, 0x0004},
+    {0x010EAA, 0x0001}, {0x010EAB, 0x0010}, {0x010EAD, 0x0020}, {0x010EAE, 0x0001}, {0x010EB0, 0x0004}, {0x010EB2, 0x0001},
+    {0x010EFD, 0x0010}, {0x010F00, 0x0004}, {0x010F1D, 0x0002}, {0x010F27, 0x0004}, {0x010F28, 0x0001}, {0x010F30, 0x0004},
+    {0x010F46, 0x0010}, {0x010F51, 0x0002}, {0x010F55, 0x0020}, {0x010F5A, 0x0001}, {0x010F70, 0x0004}, {0x010F82, 0x0010},
+    {0x010F86, 0x0020}, {0x010F8A, 0x0001}, {0x010FB0, 0x0004}, {0x010FC5, 0x0002}, {0x010FCC, 0x0001}, {0x010FE0, 0x0004},
+    {0x010FF7, 0x0001}, {0x011000, 0x0010}, {0x011003, 0x0004}, {0x011038, 0x0010}, {0x011047, 0x0020}, {0x01104E, 0x0001},
+    {0x011052, 0x0002}, {0x011070, 0x0010}, {0x011071, 0x0004}, {0x011073, 0x0010}, {0x011075, 0x0004}, {0x011076, 0x0001},
+    {0x01107F, 0x0010}, {0x011083, 0x0004}, {0x0110B0, 0x0010}, {0x0110BB, 0x0020}, {0x0110BD, 0x0080}, {0x0110BE, 0x0020},
+    {0x0110C2, 0x0010}, {0x0110C3, 0x0001}, {0x0110CD, 0x0080}, {0x0110CE, 0x0001}, {0x0110D0, 0x0004}, {0x0110E9, 0x0001},
+    {0x0110F0, 0x0002}, {0x0110FA, 0x0001}, {0x011100, 0x0010}, {0x011103, 0x0004}, {0x011127, 0x0010}, {0x011135, 0x0001},
+    {0x011136, 0x0002}, {0x011140, 0x0020}, {0x011144, 0x0004}, {0x011145, 0x0010}, {0x011147, 0x0004}, {0x011148, 0x0001},
+    {0x011150, 0x0004}, {0x011173, 0x0010}, {0x011174, 0x0020}, {0x011176, 0x0004}, {0x011177, 0x0001}, {0x011180, 0x0010},
+    {0x011183, 0x0004}, {0x0111B3, 0x0010}, {0x0111C1, 0x0004}, {0x0111C5, 0x0020}, {0x0111C9, 0x0010}, {0x0111CD, 0x0020},
+    {0x0111CE, 0x0010}, {0x0111D0, 0x0002}, {0x0111DA, 0x0004}, {0x0111DB, 0x0020}, {0x0111DC, 0x0004}, {0x0111DD, 0x0020},
+    {0x0111E0, 0x0001}, {0x0111E1, 0x0002}, {0x0111F5, 0x0001}, {0x011200, 0x0004}, {0x011212, 0x0001}, {0x011213, 0x0004},
+    {0x01122C, 0x0010}, {0x011238, 0x0020}, {0x01123E, 0x0010}, {0x01123F, 0x0004}, {0x011241, 0x0010}, {0x011242, 0x0001},
+    {0x011280, 0x0004}, {0x011287, 0x0001}, {0x011288, 0x0004}, {0x011289, 0x0001}, {0x01128A, 0x0004}, {0x01128E, 0x0001},
+    {0x01128F, 0x0004}, {0x01129E, 0x0001}, {0x01129F, 0x0004}, {0x0112A9, 0x0020}, {0x0112AA, 0x0001}, {0x0112B0, 0x0004},
+    {0x0112DF, 0x0010}, {0x0112EB, 0x0001}, {0x0112F0, 0x0002}, {0x0112FA, 0x0001}, {0x011300, 0x0010}, {0x011304, 0x0001},
+    {0x011305, 0x0004}, {0x01130D, 0x0001}, {0x01130F, 0x0004}, {0x011311, 0x0001}, {0x011313, 0x0004}, {0x011329, 0x0001},
+    {0x01132A, 0x0004}, {0x011331, 0x0001}, {0x011332, 0x0004}, {0x011334, 0x0001}, {0x011335, 0x0004}, {0x01133A, 0x0001},
+    {0x01133B, 0x0010}, {0x01133D, 0x0004}, {0x01133E, 0x0010}, {0x011345, 0x0001}, {0x011347, 0x0010}, {0x011349, 0x0001},
+    {0x01134B, 0x0010}, {0x01134E, 0x0001}, {0x011350, 0x0004}, {0x011351, 0x0001}, {0x011357, 0x0010}, {0x011358, 0x0001},
+    {0x01135D, 0x0004}, {0x011362, 0x0010}, {0x011364, 0x0001}, {0x011366, 0x0010}, {0x01136D, 0x0001}, {0x011370, 0x0010},
+    {0x011375, 0x0001}, {0x011400, 0x0004}, {0x011435, 0x0010}, {0x011447, 0x0004}, {0x01144B, 0x0020}, {0x011450, 0x0002},
+    {0x01145A, 0x0020}, {0x01145C, 0x0001}, {0x01145D, 0x0020}, {0x01145E, 0x0010}, {0x01145F, 0x0004}, {0x011462, 0x0001},
+    {0x011480, 0x0004}, {0x0114B0, 0x0010}, {0x0114C4, 0x0004}, {0x0114C6, 0x0020}, {0x0114C7, 0x0004}, {0x0114C8, 0x0001},
+    {0x0114D0, 0x0002}, {0x0114DA, 0x0001}, {0x011580, 0x0004}, {0x0115AF, 0x0010}, {0x0115B6, 0x0001}, {0x0115B8, 0x0010},
+    {0x0115C1, 0x0020}, {0x0115D8, 0x0004}, {0x0115DC, 0x0010}, {0x0115DE, 0x0001}, {0x011600, 0x0004}, {0x011630, 0x0010},
+    {0x011641, 0x0020}, {0x011644, 0x0004}, {0x011645, 0x0001}, {0x011650, 0x0002}, {0x01165A, 0x0001}, {0x011660, 0x0020},
+    {0x01166D, 0x0001}, {0x011680, 0x0004}, {0x0116AB, 0x0010}, {0x0116B8, 0x0004}, {0x0116B9, 0x0020}, {0x0116BA, 0x0001},
+    {0x0116C0, 0x0002}, {0x0116CA, 0x0001}, {0x011700, 0x0004}, {0x01171B, 0x0001}, {0x01171D, 0x0010}, {0x01172C, 0x0001},
+    {0x011730, 0x0002}, {0x01173C, 0x0020}, {0x01173F, 0x0040}, {0x011740, 0x0004}, {0x011747, 0x0001}, {0x011800, 0x0004},
+    {0x01182C, 0x0010}, {0x01183B, 0x0020}, {0x01183C, 0x0001}, {0x0118A0, 0x0004}, {0x0118E0, 0x0002}, {0x0118F3, 0x0001},
+    {0x0118FF, 0x0004}, {0x011907, 0x0001}, {0x011909, 0x0004}, {0x01190A, 0x0001}, {0x01190C, 0x0004}, {0x011914, 0x0001},
+    {0x011915, 0x0004}, {0x011917, 0x0001}, {0x011918, 0x0004}, {0x011930, 0x0010}, {0x011936, 0x0001}, {0x011937, 0x0010},
+    {0x011939, 0x0001}, {0x01193B, 0x0010}, {0x01193F, 0x0004}, {0x011940, 0x0010}, {0x011941, 0x0004}, {0x011942, 0x0010},
+    {0x011944, 0x0020}, {0x011947, 0x0001}, {0x011950, 0x0002}, {0x01195A, 0x0001}, {0x0119A0, 0x0004}, {0x0119A8, 0x0001},
+    {0x0119AA, 0x0004}, {0x0119D1, 0x0010}, {0x0119D8, 0x0001}, {0x0119DA, 0x0010}, {0x0119E1, 0x0004}, {0x0119E2, 0x0020},
+    {0x0119E3, 0x0004}, {0x0119E4, 0x0010}, {0x0119E5, 0x0001}, {0x011A00, 0x0004}, {0x011A01, 0x0010}, {0x011A0B, 0x0004},
+    {0x011A33, 0x0010}, {0x011A3A, 0x0004}, {0x011A3B, 0x0010}, {0x011A3F, 0x0020}, {0x011A47, 0x0010}, {0x011A48, 0x0001},
+    {0x011A50, 0x0004}, {0x011A51, 0x0010}, {0x011A5C, 0x0004}, {0x011A8A, 0x0010}, {0x011A9A, 0x0020}, {0x011A9D, 0x0004},
+    {0x011A9E, 0x0020}, {0x011AA3, 0x0001}, {0x011AB0, 0x0004}, {0x011AF9, 0x0001}, {0x011B00, 0x0020}, {0x011B0A, 0x0001},
+    {0x011C00, 0x0004}, {0x011C09, 0x0001}, {0x011C0A, 0x0004}, {0x011C2F, 0x0010}, {0x011C37, 0x0001}, {0x011C38, 0x0010},
+    {0x011C40, 0x0004}, {0x011C41, 0x0020}, {0x011C46, 0x0001}, {0x011C50, 0x0002}, {0x011C6D, 0x0001}, {0x011C70, 0x0020},
+    {0x011C72, 0x0004}, {0x011C90, 0x0001}, {0x011C92, 0x0010}, {0x011CA8, 0x0001}, {0x011CA9, 0x0010}, {0x011CB7, 0x0001},
+    {0x011D00, 0x0004}, {0x011D07, 0x0001}, {0x011D08, 0x0004}, {0x011D0A, 0x0001}, {0x011D0B, 0x0004}, {0x011D31, 0x0010},
+    {0x011D37, 0x0001}, {0x011D3A, 0x0010}, {0x011D3B, 0x0001}, {0x011D3C, 0x0010}, {0x011D3E, 0x0001}, {0x011D3F, 0x0010},
+    {0x011D46, 0x0004}, {0x011D47, 0x0010}, {0x011D48, 0x0001}, {0x011D50, 0x0002}, {0x011D5A, 0x0001}, {0x011D60, 0x0004},
+    {0x011D66, 0x0001}, {0x011D67, 0x0004}, {0x011D69, 0x0001}, {0x011D6A, 0x0004}, {0x011D8A, 0x0010}, {0x011D8F, 0x0001},
+    {0x011D90, 0x0010}, {0x011D92, 0x0001}, {0x011D93, 0x0010}, {0x011D98, 0x0004}, {0x011D99, 0x0001}, {0x011DA0, 0x0002},
+    {0x011DAA, 0x0001}, {0x011EE0, 0x0004}, {0x011EF3, 0x0010}, {0x011EF7, 0x0020}, {0x011EF9, 0x0001}, {0x011F00, 0x0010},
+    {0x011F02, 0x0004}, {0x011F03, 0x0010}, {0x011F04, 0x0004}, {0x011F11, 0x0001}, {0x011F12, 0x0004}, {0x011F34, 0x0010},
+    {0x011F3B, 0x0001}, {0x011F3E, 0x0010}, {0x011F43, 0x0020}, {0x011F50, 0x0002}, {0x011F5A, 0x0001}, {0x011FB0, 0x0004},
+    {0x011FB1, 0x0001}, {0x011FC0, 0x0002}, {0x011FD5, 0x0040}, {0x011FF2, 0x0001}, {0x011FFF, 0x0020}, {0x012000, 0x0004},
+    {0x01239A, 0x0001}, {0x012400, 0x0002}, {0x01246F, 0x0001}, {0x012470, 0x0020}, {0x012475, 0x0001}, {0x012480, 0x0004},
+    {0x012544, 0x0001}, {0x012F90, 0x0004}, {0x012FF1, 0x0020}, {0x012FF3, 0x0001}, {0x013000, 0x0004}, {0x013430, 0x0080},
+    {0x013440, 0x0010}, {0x013441, 0x0004}, {0x013447, 0x0010}, {0x013456, 0x0001}, {0x014400, 0x0004}, {0x014647, 0x0001},
+    {0x016800, 0x0004}, {0x016A39, 0x0001}, {0x016A40, 0x0004}, {0x016A5F, 0x0001}, {0x016A60, 0x0002}, {0x016A6A, 0x0001},
+    {0x016A6E, 0x0020}, {0x016A70, 0x0004}, {0x016ABF, 0x0001}, {0x016AC0, 0x0002}, {0x016ACA, 0x0001}, {0x016AD0, 0x0004},
+    {0x016AEE, 0x0001}, {0x016AF0, 0x0010}, {0x016AF5, 0x0020}, {0x016AF6, 0x0001}, {0x016B00, 0x0004}, {0x016B30, 0x0010},
+    {0x016B37, 0x0020}, {0x016B3C, 0x0040}, {0x016B40, 0x0004}, {0x016B44, 0x0020}, {0x016B45, 0x0040}, {0x016B46, 0x0001},
+    {0x016B50, 0x0002}, {0x016B5A, 0x0001}, {0x016B5B, 0x0002}, {0x016B62, 0x0001}, {0x016B63, 0x0004}, {0x016B78, 0x0001},
+    {0x016B7D, 0x0004}, {0x016B90, 0x0001}, {0x016E40, 0x0004}, {0x016E80, 0x0002}, {0x016E97, 0x0020}, {0x016E9B, 0x0001},
+    {0x016F00, 0x0004}, {0x016F4B, 0x0001}, {0x016F4F, 0x0010}, {0x016F50, 0x0004}, {0x016F51, 0x0010}, {0x016F88, 0x0001},
+    {0x016F8F, 0x0010}, {0x016F93, 0x0004}, {0x016FA0, 0x0001}, {0x016FE0, 0x0004}, {0x016FE2, 0x0020}, {0x016FE3, 0x0004},
+    {0x016FE4, 0x0010}, {0x016FE5, 0x0001}, {0x016FF0, 0x0010}, {0x016FF2, 0x0001}, {0x017000, 0x0004}, {0x0187F8, 0x0001},
+    {0x018800, 0x0004}, {0x018CD6, 0x0001}, {0x018D00, 0x0004}, {0x018D09, 0x0001}, {0x01AFF0, 0x0004}, {0x01AFF4, 0x0001},
+    {0x01AFF5, 0x0004}, {0x01AFFC, 0x0001}, {0x01AFFD, 0x0004}, {0x01AFFF, 0x0001}, {0x01B000, 0x0004}, {0x01B123, 0x0001},
+    {0x01B132, 0x0004}, {0x01B133, 0x0001}, {0x01B150, 0x0004}, {0x01B153, 0x0001}, {0x01B155, 0x0004}, {0x01B156, 0x0001},
+    {0x01B164, 0x0004}, {0x01B168, 0x0001}, {0x01B170, 0x0004}, {0x01B2FC, 0x0001}, {0x01BC00, 0x0004}, {0x01BC6B, 0x0001},
+    {0x01BC70, 0x0004}, {0x01BC7D, 0x0001}, {0x01BC80, 0x0004}, {0x01BC89, 0x0001}, {0x01BC90, 0x0004}, {0x01BC9A, 0x0001},
+    {0x01BC9C, 0x0040}, {0x01BC9D, 0x0010}, {0x01BC9F, 0x0020}, {0x01BCA0, 0x0080}, {0x01BCA4, 0x0001}, {0x01CF00, 0x0010},
+    {0x01CF2E, 0x0001}, {0x01CF30, 0x0010}, {0x01CF47, 0x0001}, {0x01CF50, 0x0040}, {0x01CFC4, 0x0001}, {0x01D000, 0x0040},
+    {0x01D0F6, 0x0001}, {0x01D100, 0x0040}, {0x01D127, 0x0001}, {0x01D129, 0x0040}, {0x01D165, 0x0010}, {0x01D16A, 0x0040},
+    {0x01D16D, 0x0010}, {0x01D173, 0x0080}, {0x01D17B, 0x0010}, {0x01D183, 0x0040}, {0x01D185, 0x0010}, {0x01D18C, 0x0040},
+    {0x01D1AA, 0x0010}, {0x01D1AE, 0x0040}, {0x01D1EB, 0x0001}, {0x01D200, 0x0040}, {0x01D242, 0x0010}, {0x01D245, 0x0040},
+    {0x01D246, 0x0001}, {0x01D2C0, 0x0002}, {0x01D2D4, 0x0001}, {0x01D2E0, 0x0002}, {0x01D2F4, 0x0001}, {0x01D300, 0x0040},
+    {0x01D357, 0x0001}, {0x01D360, 0x0002}, {0x01D379, 0x0001}, {0x01D400, 0x0004}, {0x01D455, 0x0001}, {0x01D456, 0x0004},
+    {0x01D49D, 0x0001}, {0x01D49E, 0x0004}, {0x01D4A0, 0x0001}, {0x01D4A2, 0x0004}, {0x01D4A3, 0x0001}, {0x01D4A5, 0x0004},
+    {0x01D4A7, 0x0001}, {0x01D4A9, 0x0004}, {0x01D4AD, 0x0001}, {0x01D4AE, 0x0004}, {0x01D4BA, 0x0001}, {0x01D4BB, 0x0004},
+    {0x01D4BC, 0x0001}, {0x01D4BD, 0x0004}, {0x01D4C4, 0x0001}, {0x01D4C5, 0x0004}, {0x01D506, 0x0001}, {0x01D507, 0x0004},
+    {0x01D50B, 0x0001}, {0x01D50D, 0x0004}, {0x01D515, 0x0001}, {0x01D516, 0x0004}, {0x01D51D, 0x0001}, {0x01D51E, 0x0004},
+    {0x01D53A, 0x0001}, {0x01D53B, 0x0004}, {0x01D53F, 0x0001}, {0x01D540, 0x0004}, {0x01D545, 0x0001}, {0x01D546, 0x0004},
+    {0x01D547, 0x0001}, {0x01D54A, 0x0004}, {0x01D551, 0x0001}, {0x01D552, 0x0004}, {0x01D6A6, 0x0001}, {0x01D6A8, 0x0004},
+    {0x01D6C1, 0x0040}, {0x01D6C2, 0x0004}, {0x01D6DB, 0x0040}, {0x01D6DC, 0x0004}, {0x01D6FB, 0x0040}, {0x01D6FC, 0x0004},
+    {0x01D715, 0x0040}, {0x01D716, 0x0004}, {0x01D735, 0x0040}, {0x01D736, 0x0004}, {0x01D74F, 0x0040}, {0x01D750, 0x0004},
+    {0x01D76F, 0x0040}, {0x01D770, 0x0004}, {0x01D789, 0x0040}, {0x01D78A, 0x0004}, {0x01D7A9, 0x0040}, {0x01D7AA, 0x0004},
+    {0x01D7C3, 0x0040}, {0x01D7C4, 0x0004}, {0x01D7CC, 0x0001}, {0x01D7CE, 0x0002}, {0x01D800, 0x0040}, {0x01DA00, 0x0010},
+    {0x01DA37, 0x0040}, {0x01DA3B, 0x0010}, {0x01DA6D, 0x0040}, {0x01DA75, 0x0010}, {0x01DA76, 0x0040}, {0x01DA84, 0x0010},
+    {0x01DA85, 0x0040}, {0x01DA87, 0x0020}, {0x01DA8C, 0x0001}, {0x01DA9B, 0x0010}, {0x01DAA0, 0x0001}, {0x01DAA1, 0x0010},
+    {0x01DAB0, 0x0001}, {0x01DF00, 0x0004}, {0x01DF1F, 0x0001}, {0x01DF25, 0x0004}, {0x01DF2B, 0x0001}, {0x01E000, 0x0010},
+    {0x01E007, 0x0001}, {0x01E008, 0x0010}, {0x01E019, 0x0001}, {0x01E01B, 0x0010}, {0x01E022, 0x0001}, {0x01E023, 0x0010},
+    {0x01E025, 0x0001}, {0x01E026, 0x0010}, {0x01E02B, 0x0001}, {0x01E030, 0x0004}, {0x01E06E, 0x0001}, {0x01E08F, 0x0010},
+    {0x01E090, 0x0001}, {0x01E100, 0x0004}, {0x01E12D, 0x0001}, {0x01E130, 0x0010}, {0x01E137, 0x0004}, {0x01E13E, 0x0001},
+    {0x01E140, 0x0002}, {0x01E14A, 0x0001}, {0x01E14E, 0x0004}, {0x01E14F, 0x0040}, {0x01E150, 0x0001}, {0x01E290, 0x0004},
+    {0x01E2AE, 0x0010}, {0x01E2AF, 0x0001}, {0x01E2C0, 0x0004}, {0x01E2EC, 0x0010}, {0x01E2F0, 0x0002}, {0x01E2FA, 0x0001},
+    {0x01E2FF, 0x0040}, {0x01E300, 0x0001}, {0x01E4D0, 0x0004}, {0x01E4EC, 0x0010}, {0x01E4F0, 0x0002}, {0x01E4FA, 0x0001},
+    {0x01E7E0, 0x0004}, {0x01E7E7, 0x0001}, {0x01E7E8, 0x0004}, {0x01E7EC, 0x0001}, {0x01E7ED, 0x0004}, {0x01E7EF, 0x0001},
+    {0x01E7F0, 0x0004}, {0x01E7FF, 0x0001}, {0x01E800, 0x0004}, {0x01E8C5, 0x0001}, {0x01E8C7, 0x0002}, {0x01E8D0, 0x0010},
+    {0x01E8D7, 0x0001}, {0x01E900, 0x0004}, {0x01E944, 0x0010}, {0x01E94B, 0x0004}, {0x01E94C, 0x0001}, {0x01E950, 0x0002},
+    {0x01E95A, 0x0001}, {0x01E95E, 0x0020}, {0x01E960, 0x0001}, {0x01EC71, 0x0002}, {0x01ECAC, 0x0040}, {0x01ECAD, 0x0002},
+    {0x01ECB0, 0x0040}, {0x01ECB1, 0x0002}, {0x01ECB5, 0x0001}, {0x01ED01, 0x0002}, {0x01ED2E, 0x0040}, {0x01ED2F, 0x0002},
+    {0x01ED3E, 0x0001}, {0x01EE00, 0x0004}, {0x01EE04, 0x0001}, {0x01EE05, 0x0004}, {0x01EE20, 0x0001}, {0x01EE21, 0x0004},
+    {0x01EE23, 0x0001}, {0x01EE24, 0x0004}, {0x01EE25, 0x0001}, {0x01EE27, 0x0004}, {0x01EE28, 0x0001}, {0x01EE29, 0x0004},
+    {0x01EE33, 0x0001}, {0x01EE34, 0x0004}, {0x01EE38, 0x0001}, {0x01EE39, 0x0004}, {0x01EE3A, 0x0001}, {0x01EE3B, 0x0004},
+    {0x01EE3C, 0x0001}, {0x01EE42, 0x0004}, {0x01EE43, 0x0001}, {0x01EE47, 0x0004}, {0x01EE48, 0x0001}, {0x01EE49, 0x0004},
+    {0x01EE4A, 0x0001}, {0x01EE4B, 0x0004}, {0x01EE4C, 0x0001}, {0x01EE4D, 0x0004}, {0x01EE50, 0x0001}, {0x01EE51, 0x0004},
+    {0x01EE53, 0x0001}, {0x01EE54, 0x0004}, {0x01EE55, 0x0001}, {0x01EE57, 0x0004}, {0x01EE58, 0x0001}, {0x01EE59, 0x0004},
+    {0x01EE5A, 0x0001}, {0x01EE5B, 0x0004}, {0x01EE5C, 0x0001}, {0x01EE5D, 0x0004}, {0x01EE5E, 0x0001}, {0x01EE5F, 0x0004},
+    {0x01EE60, 0x0001}, {0x01EE61, 0x0004}, {0x01EE63, 0x0001}, {0x01EE64, 0x0004}, {0x01EE65, 0x0001}, {0x01EE67, 0x0004},
+    {0x01EE6B, 0x0001}, {0x01EE6C, 0x0004}, {0x01EE73, 0x0001}, {0x01EE74, 0x0004}, {0x01EE78, 0x0001}, {0x01EE79, 0x0004},
+    {0x01EE7D, 0x0001}, {0x01EE7E, 0x0004}, {0x01EE7F, 0x0001}, {0x01EE80, 0x0004}, {0x01EE8A, 0x0001}, {0x01EE8B, 0x0004},
+    {0x01EE9C, 0x0001}, {0x01EEA1, 0x0004}, {0x01EEA4, 0x0001}, {0x01EEA5, 0x0004}, {0x01EEAA, 0x0001}, {0x01EEAB, 0x0004},
+    {0x01EEBC, 0x0001}, {0x01EEF0, 0x0040}, {0x01EEF2, 0x0001}, {0x01F000, 0x0040}, {0x01F02C, 0x0001}, {0x01F030, 0x0040},
+    {0x01F094, 0x0001}, {0x01F0A0, 0x0040}, {0x01F0AF, 0x0001}, {0x01F0B1, 0x0040}, {0x01F0C0, 0x0001}, {0x01F0C1, 0x0040},
+    {0x01F0D0, 0x0001}, {0x01F0D1, 0x0040}, {0x01F0F6, 0x0001}, {0x01F100, 0x0002}, {0x01F10D, 0x0040}, {0x01F1AE, 0x0001},
+    {0x01F1E6, 0x0040}, {0x01F203, 0x0001}, {0x01F210, 0x0040}, {0x01F23C, 0x0001}, {0x01F240, 0x0040}, {0x01F249, 0x0001},
+    {0x01F250, 0x0040}, {0x01F252, 0x0001}, {0x01F260, 0x0040}, {0x01F266, 0x0001}, {0x01F300, 0x0040}, {0x01F6D8, 0x0001},
+    {0x01F6DC, 0x0040}, {0x01F6ED, 0x0001}, {0x01F6F0, 0x0040}, {0x01F6FD, 0x0001}, {0x01F700, 0x0040}, {0x01F777, 0x0001},
+    {0x01F77B, 0x0040}, {0x01F7DA, 0x0001}, {0x01F7E0, 0x0040}, {0x01F7EC, 0x0001}, {0x01F7F0, 0x0040}, {0x01F7F1, 0x0001},
+    {0x01F800, 0x0040}, {0x01F80C, 0x0001}, {0x01F810, 0x0040}, {0x01F848, 0x0001}, {0x01F850, 0x0040}, {0x01F85A, 0x0001},
+    {0x01F860, 0x0040}, {0x01F888, 0x0001}, {0x01F890, 0x0040}, {0x01F8AE, 0x0001}, {0x01F8B0, 0x0040}, {0x01F8B2, 0x0001},
+    {0x01F900, 0x0040}, {0x01FA54, 0x0001}, {0x01FA60, 0x0040}, {0x01FA6E, 0x0001}, {0x01FA70, 0x0040}, {0x01FA7D, 0x0001},
+    {0x01FA80, 0x0040}, {0x01FA89, 0x0001}, {0x01FA90, 0x0040}, {0x01FABE, 0x0001}, {0x01FABF, 0x0040}, {0x01FAC6, 0x0001},
+    {0x01FACE, 0x0040}, {0x01FADC, 0x0001}, {0x01FAE0, 0x0040}, {0x01FAE9, 0x0001}, {0x01FAF0, 0x0040}, {0x01FAF9, 0x0001},
+    {0x01FB00, 0x0040}, {0x01FB93, 0x0001}, {0x01FB94, 0x0040}, {0x01FBCB, 0x0001}, {0x01FBF0, 0x0002}, {0x01FBFA, 0x0001},
+    {0x020000, 0x0004}, {0x02A6E0, 0x0001}, {0x02A700, 0x0004}, {0x02B73A, 0x0001}, {0x02B740, 0x0004}, {0x02B81E, 0x0001},
+    {0x02B820, 0x0004}, {0x02CEA2, 0x0001}, {0x02CEB0, 0x0004}, {0x02EBE1, 0x0001}, {0x02EBF0, 0x0004}, {0x02EE5E, 0x0001},
+    {0x02F800, 0x0004}, {0x02FA1E, 0x0001}, {0x030000, 0x0004}, {0x03134B, 0x0001}, {0x031350, 0x0004}, {0x0323B0, 0x0001},
+    {0x0E0001, 0x0080}, {0x0E0002, 0x0001}, {0x0E0020, 0x0080}, {0x0E0080, 0x0001}, {0x0E0100, 0x0010}, {0x0E01F0, 0x0001},
+    {0x0F0000, 0x0080}, {0x0FFFFE, 0x0001}, {0x100000, 0x0080}, {0x10FFFE, 0x0001}, {0x110000, 0x0000},
+};
+
+// list is always in ascending order, to enable binary search
+const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase = {
+    {0x000041, 0x000061}, {0x000042, 0x000062}, {0x000043, 0x000063}, {0x000044, 0x000064}, {0x000045, 0x000065},
+    {0x000046, 0x000066}, {0x000047, 0x000067}, {0x000048, 0x000068}, {0x000049, 0x000069}, {0x00004A, 0x00006A},
+    {0x00004B, 0x00006B}, {0x00004C, 0x00006C}, {0x00004D, 0x00006D}, {0x00004E, 0x00006E}, {0x00004F, 0x00006F},
+    {0x000050, 0x000070}, {0x000051, 0x000071}, {0x000052, 0x000072}, {0x000053, 0x000073}, {0x000054, 0x000074},
+    {0x000055, 0x000075}, {0x000056, 0x000076}, {0x000057, 0x000077}, {0x000058, 0x000078}, {0x000059, 0x000079},
+    {0x00005A, 0x00007A}, {0x0000C0, 0x0000E0}, {0x0000C1, 0x0000E1}, {0x0000C2, 0x0000E2}, {0x0000C3, 0x0000E3},
+    {0x0000C4, 0x0000E4}, {0x0000C5, 0x0000E5}, {0x0000C6, 0x0000E6}, {0x0000C7, 0x0000E7}, {0x0000C8, 0x0000E8},
+    {0x0000C9, 0x0000E9}, {0x0000CA, 0x0000EA}, {0x0000CB, 0x0000EB}, {0x0000CC, 0x0000EC}, {0x0000CD, 0x0000ED},
+    {0x0000CE, 0x0000EE}, {0x0000CF, 0x0000EF}, {0x0000D0, 0x0000F0}, {0x0000D1, 0x0000F1}, {0x0000D2, 0x0000F2},
+    {0x0000D3, 0x0000F3}, {0x0000D4, 0x0000F4}, {0x0000D5, 0x0000F5}, {0x0000D6, 0x0000F6}, {0x0000D8, 0x0000F8},
+    {0x0000D9, 0x0000F9}, {0x0000DA, 0x0000FA}, {0x0000DB, 0x0000FB}, {0x0000DC, 0x0000FC}, {0x0000DD, 0x0000FD},
+    {0x0000DE, 0x0000FE}, {0x000100, 0x000101}, {0x000102, 0x000103}, {0x000104, 0x000105}, {0x000106, 0x000107},
+    {0x000108, 0x000109}, {0x00010A, 0x00010B}, {0x00010C, 0x00010D}, {0x00010E, 0x00010F}, {0x000110, 0x000111},
+    {0x000112, 0x000113}, {0x000114, 0x000115}, {0x000116, 0x000117}, {0x000118, 0x000119}, {0x00011A, 0x00011B},
+    {0x00011C, 0x00011D}, {0x00011E, 0x00011F}, {0x000120, 0x000121}, {0x000122, 0x000123}, {0x000124, 0x000125},
+    {0x000126, 0x000127}, {0x000128, 0x000129}, {0x00012A, 0x00012B}, {0x00012C, 0x00012D}, {0x00012E, 0x00012F},
+    {0x000130, 0x000069}, {0x000132, 0x000133}, {0x000134, 0x000135}, {0x000136, 0x000137}, {0x000139, 0x00013A},
+    {0x00013B, 0x00013C}, {0x00013D, 0x00013E}, {0x00013F, 0x000140}, {0x000141, 0x000142}, {0x000143, 0x000144},
+    {0x000145, 0x000146}, {0x000147, 0x000148}, {0x00014A, 0x00014B}, {0x00014C, 0x00014D}, {0x00014E, 0x00014F},
+    {0x000150, 0x000151}, {0x000152, 0x000153}, {0x000154, 0x000155}, {0x000156, 0x000157}, {0x000158, 0x000159},
+    {0x00015A, 0x00015B}, {0x00015C, 0x00015D}, {0x00015E, 0x00015F}, {0x000160, 0x000161}, {0x000162, 0x000163},
+    {0x000164, 0x000165}, {0x000166, 0x000167}, {0x000168, 0x000169}, {0x00016A, 0x00016B}, {0x00016C, 0x00016D},
+    {0x00016E, 0x00016F}, {0x000170, 0x000171}, {0x000172, 0x000173}, {0x000174, 0x000175}, {0x000176, 0x000177},
+    {0x000178, 0x0000FF}, {0x000179, 0x00017A}, {0x00017B, 0x00017C}, {0x00017D, 0x00017E}, {0x000181, 0x000253},
+    {0x000182, 0x000183}, {0x000184, 0x000185}, {0x000186, 0x000254}, {0x000187, 0x000188}, {0x000189, 0x000256},
+    {0x00018A, 0x000257}, {0x00018B, 0x00018C}, {0x00018E, 0x0001DD}, {0x00018F, 0x000259}, {0x000190, 0x00025B},
+    {0x000191, 0x000192}, {0x000193, 0x000260}, {0x000194, 0x000263}, {0x000196, 0x000269}, {0x000197, 0x000268},
+    {0x000198, 0x000199}, {0x00019C, 0x00026F}, {0x00019D, 0x000272}, {0x00019F, 0x000275}, {0x0001A0, 0x0001A1},
+    {0x0001A2, 0x0001A3}, {0x0001A4, 0x0001A5}, {0x0001A6, 0x000280}, {0x0001A7, 0x0001A8}, {0x0001A9, 0x000283},
+    {0x0001AC, 0x0001AD}, {0x0001AE, 0x000288}, {0x0001AF, 0x0001B0}, {0x0001B1, 0x00028A}, {0x0001B2, 0x00028B},
+    {0x0001B3, 0x0001B4}, {0x0001B5, 0x0001B6}, {0x0001B7, 0x000292}, {0x0001B8, 0x0001B9}, {0x0001BC, 0x0001BD},
+    {0x0001C4, 0x0001C6}, {0x0001C5, 0x0001C6}, {0x0001C7, 0x0001C9}, {0x0001C8, 0x0001C9}, {0x0001CA, 0x0001CC},
+    {0x0001CB, 0x0001CC}, {0x0001CD, 0x0001CE}, {0x0001CF, 0x0001D0}, {0x0001D1, 0x0001D2}, {0x0001D3, 0x0001D4},
+    {0x0001D5, 0x0001D6}, {0x0001D7, 0x0001D8}, {0x0001D9, 0x0001DA}, {0x0001DB, 0x0001DC}, {0x0001DE, 0x0001DF},
+    {0x0001E0, 0x0001E1}, {0x0001E2, 0x0001E3}, {0x0001E4, 0x0001E5}, {0x0001E6, 0x0001E7}, {0x0001E8, 0x0001E9},
+    {0x0001EA, 0x0001EB}, {0x0001EC, 0x0001ED}, {0x0001EE, 0x0001EF}, {0x0001F1, 0x0001F3}, {0x0001F2, 0x0001F3},
+    {0x0001F4, 0x0001F5}, {0x0001F6, 0x000195}, {0x0001F7, 0x0001BF}, {0x0001F8, 0x0001F9}, {0x0001FA, 0x0001FB},
+    {0x0001FC, 0x0001FD}, {0x0001FE, 0x0001FF}, {0x000200, 0x000201}, {0x000202, 0x000203}, {0x000204, 0x000205},
+    {0x000206, 0x000207}, {0x000208, 0x000209}, {0x00020A, 0x00020B}, {0x00020C, 0x00020D}, {0x00020E, 0x00020F},
+    {0x000210, 0x000211}, {0x000212, 0x000213}, {0x000214, 0x000215}, {0x000216, 0x000217}, {0x000218, 0x000219},
+    {0x00021A, 0x00021B}, {0x00021C, 0x00021D}, {0x00021E, 0x00021F}, {0x000220, 0x00019E}, {0x000222, 0x000223},
+    {0x000224, 0x000225}, {0x000226, 0x000227}, {0x000228, 0x000229}, {0x00022A, 0x00022B}, {0x00022C, 0x00022D},
+    {0x00022E, 0x00022F}, {0x000230, 0x000231}, {0x000232, 0x000233}, {0x00023A, 0x002C65}, {0x00023B, 0x00023C},
+    {0x00023D, 0x00019A}, {0x00023E, 0x002C66}, {0x000241, 0x000242}, {0x000243, 0x000180}, {0x000244, 0x000289},
+    {0x000245, 0x00028C}, {0x000246, 0x000247}, {0x000248, 0x000249}, {0x00024A, 0x00024B}, {0x00024C, 0x00024D},
+    {0x00024E, 0x00024F}, {0x000370, 0x000371}, {0x000372, 0x000373}, {0x000376, 0x000377}, {0x00037F, 0x0003F3},
+    {0x000386, 0x0003AC}, {0x000388, 0x0003AD}, {0x000389, 0x0003AE}, {0x00038A, 0x0003AF}, {0x00038C, 0x0003CC},
+    {0x00038E, 0x0003CD}, {0x00038F, 0x0003CE}, {0x000391, 0x0003B1}, {0x000392, 0x0003B2}, {0x000393, 0x0003B3},
+    {0x000394, 0x0003B4}, {0x000395, 0x0003B5}, {0x000396, 0x0003B6}, {0x000397, 0x0003B7}, {0x000398, 0x0003B8},
+    {0x000399, 0x0003B9}, {0x00039A, 0x0003BA}, {0x00039B, 0x0003BB}, {0x00039C, 0x0003BC}, {0x00039D, 0x0003BD},
+    {0x00039E, 0x0003BE}, {0x00039F, 0x0003BF}, {0x0003A0, 0x0003C0}, {0x0003A1, 0x0003C1}, {0x0003A3, 0x0003C3},
+    {0x0003A4, 0x0003C4}, {0x0003A5, 0x0003C5}, {0x0003A6, 0x0003C6}, {0x0003A7, 0x0003C7}, {0x0003A8, 0x0003C8},
+    {0x0003A9, 0x0003C9}, {0x0003AA, 0x0003CA}, {0x0003AB, 0x0003CB}, {0x0003CF, 0x0003D7}, {0x0003D8, 0x0003D9},
+    {0x0003DA, 0x0003DB}, {0x0003DC, 0x0003DD}, {0x0003DE, 0x0003DF}, {0x0003E0, 0x0003E1}, {0x0003E2, 0x0003E3},
+    {0x0003E4, 0x0003E5}, {0x0003E6, 0x0003E7}, {0x0003E8, 0x0003E9}, {0x0003EA, 0x0003EB}, {0x0003EC, 0x0003ED},
+    {0x0003EE, 0x0003EF}, {0x0003F4, 0x0003B8}, {0x0003F7, 0x0003F8}, {0x0003F9, 0x0003F2}, {0x0003FA, 0x0003FB},
+    {0x0003FD, 0x00037B}, {0x0003FE, 0x00037C}, {0x0003FF, 0x00037D}, {0x000400, 0x000450}, {0x000401, 0x000451},
+    {0x000402, 0x000452}, {0x000403, 0x000453}, {0x000404, 0x000454}, {0x000405, 0x000455}, {0x000406, 0x000456},
+    {0x000407, 0x000457}, {0x000408, 0x000458}, {0x000409, 0x000459}, {0x00040A, 0x00045A}, {0x00040B, 0x00045B},
+    {0x00040C, 0x00045C}, {0x00040D, 0x00045D}, {0x00040E, 0x00045E}, {0x00040F, 0x00045F}, {0x000410, 0x000430},
+    {0x000411, 0x000431}, {0x000412, 0x000432}, {0x000413, 0x000433}, {0x000414, 0x000434}, {0x000415, 0x000435},
+    {0x000416, 0x000436}, {0x000417, 0x000437}, {0x000418, 0x000438}, {0x000419, 0x000439}, {0x00041A, 0x00043A},
+    {0x00041B, 0x00043B}, {0x00041C, 0x00043C}, {0x00041D, 0x00043D}, {0x00041E, 0x00043E}, {0x00041F, 0x00043F},
+    {0x000420, 0x000440}, {0x000421, 0x000441}, {0x000422, 0x000442}, {0x000423, 0x000443}, {0x000424, 0x000444},
+    {0x000425, 0x000445}, {0x000426, 0x000446}, {0x000427, 0x000447}, {0x000428, 0x000448}, {0x000429, 0x000449},
+    {0x00042A, 0x00044A}, {0x00042B, 0x00044B}, {0x00042C, 0x00044C}, {0x00042D, 0x00044D}, {0x00042E, 0x00044E},
+    {0x00042F, 0x00044F}, {0x000460, 0x000461}, {0x000462, 0x000463}, {0x000464, 0x000465}, {0x000466, 0x000467},
+    {0x000468, 0x000469}, {0x00046A, 0x00046B}, {0x00046C, 0x00046D}, {0x00046E, 0x00046F}, {0x000470, 0x000471},
+    {0x000472, 0x000473}, {0x000474, 0x000475}, {0x000476, 0x000477}, {0x000478, 0x000479}, {0x00047A, 0x00047B},
+    {0x00047C, 0x00047D}, {0x00047E, 0x00047F}, {0x000480, 0x000481}, {0x00048A, 0x00048B}, {0x00048C, 0x00048D},
+    {0x00048E, 0x00048F}, {0x000490, 0x000491}, {0x000492, 0x000493}, {0x000494, 0x000495}, {0x000496, 0x000497},
+    {0x000498, 0x000499}, {0x00049A, 0x00049B}, {0x00049C, 0x00049D}, {0x00049E, 0x00049F}, {0x0004A0, 0x0004A1},
+    {0x0004A2, 0x0004A3}, {0x0004A4, 0x0004A5}, {0x0004A6, 0x0004A7}, {0x0004A8, 0x0004A9}, {0x0004AA, 0x0004AB},
+    {0x0004AC, 0x0004AD}, {0x0004AE, 0x0004AF}, {0x0004B0, 0x0004B1}, {0x0004B2, 0x0004B3}, {0x0004B4, 0x0004B5},
+    {0x0004B6, 0x0004B7}, {0x0004B8, 0x0004B9}, {0x0004BA, 0x0004BB}, {0x0004BC, 0x0004BD}, {0x0004BE, 0x0004BF},
+    {0x0004C0, 0x0004CF}, {0x0004C1, 0x0004C2}, {0x0004C3, 0x0004C4}, {0x0004C5, 0x0004C6}, {0x0004C7, 0x0004C8},
+    {0x0004C9, 0x0004CA}, {0x0004CB, 0x0004CC}, {0x0004CD, 0x0004CE}, {0x0004D0, 0x0004D1}, {0x0004D2, 0x0004D3},
+    {0x0004D4, 0x0004D5}, {0x0004D6, 0x0004D7}, {0x0004D8, 0x0004D9}, {0x0004DA, 0x0004DB}, {0x0004DC, 0x0004DD},
+    {0x0004DE, 0x0004DF}, {0x0004E0, 0x0004E1}, {0x0004E2, 0x0004E3}, {0x0004E4, 0x0004E5}, {0x0004E6, 0x0004E7},
+    {0x0004E8, 0x0004E9}, {0x0004EA, 0x0004EB}, {0x0004EC, 0x0004ED}, {0x0004EE, 0x0004EF}, {0x0004F0, 0x0004F1},
+    {0x0004F2, 0x0004F3}, {0x0004F4, 0x0004F5}, {0x0004F6, 0x0004F7}, {0x0004F8, 0x0004F9}, {0x0004FA, 0x0004FB},
+    {0x0004FC, 0x0004FD}, {0x0004FE, 0x0004FF}, {0x000500, 0x000501}, {0x000502, 0x000503}, {0x000504, 0x000505},
+    {0x000506, 0x000507}, {0x000508, 0x000509}, {0x00050A, 0x00050B}, {0x00050C, 0x00050D}, {0x00050E, 0x00050F},
+    {0x000510, 0x000511}, {0x000512, 0x000513}, {0x000514, 0x000515}, {0x000516, 0x000517}, {0x000518, 0x000519},
+    {0x00051A, 0x00051B}, {0x00051C, 0x00051D}, {0x00051E, 0x00051F}, {0x000520, 0x000521}, {0x000522, 0x000523},
+    {0x000524, 0x000525}, {0x000526, 0x000527}, {0x000528, 0x000529}, {0x00052A, 0x00052B}, {0x00052C, 0x00052D},
+    {0x00052E, 0x00052F}, {0x000531, 0x000561}, {0x000532, 0x000562}, {0x000533, 0x000563}, {0x000534, 0x000564},
+    {0x000535, 0x000565}, {0x000536, 0x000566}, {0x000537, 0x000567}, {0x000538, 0x000568}, {0x000539, 0x000569},
+    {0x00053A, 0x00056A}, {0x00053B, 0x00056B}, {0x00053C, 0x00056C}, {0x00053D, 0x00056D}, {0x00053E, 0x00056E},
+    {0x00053F, 0x00056F}, {0x000540, 0x000570}, {0x000541, 0x000571}, {0x000542, 0x000572}, {0x000543, 0x000573},
+    {0x000544, 0x000574}, {0x000545, 0x000575}, {0x000546, 0x000576}, {0x000547, 0x000577}, {0x000548, 0x000578},
+    {0x000549, 0x000579}, {0x00054A, 0x00057A}, {0x00054B, 0x00057B}, {0x00054C, 0x00057C}, {0x00054D, 0x00057D},
+    {0x00054E, 0x00057E}, {0x00054F, 0x00057F}, {0x000550, 0x000580}, {0x000551, 0x000581}, {0x000552, 0x000582},
+    {0x000553, 0x000583}, {0x000554, 0x000584}, {0x000555, 0x000585}, {0x000556, 0x000586}, {0x0010A0, 0x002D00},
+    {0x0010A1, 0x002D01}, {0x0010A2, 0x002D02}, {0x0010A3, 0x002D03}, {0x0010A4, 0x002D04}, {0x0010A5, 0x002D05},
+    {0x0010A6, 0x002D06}, {0x0010A7, 0x002D07}, {0x0010A8, 0x002D08}, {0x0010A9, 0x002D09}, {0x0010AA, 0x002D0A},
+    {0x0010AB, 0x002D0B}, {0x0010AC, 0x002D0C}, {0x0010AD, 0x002D0D}, {0x0010AE, 0x002D0E}, {0x0010AF, 0x002D0F},
+    {0x0010B0, 0x002D10}, {0x0010B1, 0x002D11}, {0x0010B2, 0x002D12}, {0x0010B3, 0x002D13}, {0x0010B4, 0x002D14},
+    {0x0010B5, 0x002D15}, {0x0010B6, 0x002D16}, {0x0010B7, 0x002D17}, {0x0010B8, 0x002D18}, {0x0010B9, 0x002D19},
+    {0x0010BA, 0x002D1A}, {0x0010BB, 0x002D1B}, {0x0010BC, 0x002D1C}, {0x0010BD, 0x002D1D}, {0x0010BE, 0x002D1E},
+    {0x0010BF, 0x002D1F}, {0x0010C0, 0x002D20}, {0x0010C1, 0x002D21}, {0x0010C2, 0x002D22}, {0x0010C3, 0x002D23},
+    {0x0010C4, 0x002D24}, {0x0010C5, 0x002D25}, {0x0010C7, 0x002D27}, {0x0010CD, 0x002D2D}, {0x0013A0, 0x00AB70},
+    {0x0013A1, 0x00AB71}, {0x0013A2, 0x00AB72}, {0x0013A3, 0x00AB73}, {0x0013A4, 0x00AB74}, {0x0013A5, 0x00AB75},
+    {0x0013A6, 0x00AB76}, {0x0013A7, 0x00AB77}, {0x0013A8, 0x00AB78}, {0x0013A9, 0x00AB79}, {0x0013AA, 0x00AB7A},
+    {0x0013AB, 0x00AB7B}, {0x0013AC, 0x00AB7C}, {0x0013AD, 0x00AB7D}, {0x0013AE, 0x00AB7E}, {0x0013AF, 0x00AB7F},
+    {0x0013B0, 0x00AB80}, {0x0013B1, 0x00AB81}, {0x0013B2, 0x00AB82}, {0x0013B3, 0x00AB83}, {0x0013B4, 0x00AB84},
+    {0x0013B5, 0x00AB85}, {0x0013B6, 0x00AB86}, {0x0013B7, 0x00AB87}, {0x0013B8, 0x00AB88}, {0x0013B9, 0x00AB89},
+    {0x0013BA, 0x00AB8A}, {0x0013BB, 0x00AB8B}, {0x0013BC, 0x00AB8C}, {0x0013BD, 0x00AB8D}, {0x0013BE, 0x00AB8E},
+    {0x0013BF, 0x00AB8F}, {0x0013C0, 0x00AB90}, {0x0013C1, 0x00AB91}, {0x0013C2, 0x00AB92}, {0x0013C3, 0x00AB93},
+    {0x0013C4, 0x00AB94}, {0x0013C5, 0x00AB95}, {0x0013C6, 0x00AB96}, {0x0013C7, 0x00AB97}, {0x0013C8, 0x00AB98},
+    {0x0013C9, 0x00AB99}, {0x0013CA, 0x00AB9A}, {0x0013CB, 0x00AB9B}, {0x0013CC, 0x00AB9C}, {0x0013CD, 0x00AB9D},
+    {0x0013CE, 0x00AB9E}, {0x0013CF, 0x00AB9F}, {0x0013D0, 0x00ABA0}, {0x0013D1, 0x00ABA1}, {0x0013D2, 0x00ABA2},
+    {0x0013D3, 0x00ABA3}, {0x0013D4, 0x00ABA4}, {0x0013D5, 0x00ABA5}, {0x0013D6, 0x00ABA6}, {0x0013D7, 0x00ABA7},
+    {0x0013D8, 0x00ABA8}, {0x0013D9, 0x00ABA9}, {0x0013DA, 0x00ABAA}, {0x0013DB, 0x00ABAB}, {0x0013DC, 0x00ABAC},
+    {0x0013DD, 0x00ABAD}, {0x0013DE, 0x00ABAE}, {0x0013DF, 0x00ABAF}, {0x0013E0, 0x00ABB0}, {0x0013E1, 0x00ABB1},
+    {0x0013E2, 0x00ABB2}, {0x0013E3, 0x00ABB3}, {0x0013E4, 0x00ABB4}, {0x0013E5, 0x00ABB5}, {0x0013E6, 0x00ABB6},
+    {0x0013E7, 0x00ABB7}, {0x0013E8, 0x00ABB8}, {0x0013E9, 0x00ABB9}, {0x0013EA, 0x00ABBA}, {0x0013EB, 0x00ABBB},
+    {0x0013EC, 0x00ABBC}, {0x0013ED, 0x00ABBD}, {0x0013EE, 0x00ABBE}, {0x0013EF, 0x00ABBF}, {0x0013F0, 0x0013F8},
+    {0x0013F1, 0x0013F9}, {0x0013F2, 0x0013FA}, {0x0013F3, 0x0013FB}, {0x0013F4, 0x0013FC}, {0x0013F5, 0x0013FD},
+    {0x001C90, 0x0010D0}, {0x001C91, 0x0010D1}, {0x001C92, 0x0010D2}, {0x001C93, 0x0010D3}, {0x001C94, 0x0010D4},
+    {0x001C95, 0x0010D5}, {0x001C96, 0x0010D6}, {0x001C97, 0x0010D7}, {0x001C98, 0x0010D8}, {0x001C99, 0x0010D9},
+    {0x001C9A, 0x0010DA}, {0x001C9B, 0x0010DB}, {0x001C9C, 0x0010DC}, {0x001C9D, 0x0010DD}, {0x001C9E, 0x0010DE},
+    {0x001C9F, 0x0010DF}, {0x001CA0, 0x0010E0}, {0x001CA1, 0x0010E1}, {0x001CA2, 0x0010E2}, {0x001CA3, 0x0010E3},
+    {0x001CA4, 0x0010E4}, {0x001CA5, 0x0010E5}, {0x001CA6, 0x0010E6}, {0x001CA7, 0x0010E7}, {0x001CA8, 0x0010E8},
+    {0x001CA9, 0x0010E9}, {0x001CAA, 0x0010EA}, {0x001CAB, 0x0010EB}, {0x001CAC, 0x0010EC}, {0x001CAD, 0x0010ED},
+    {0x001CAE, 0x0010EE}, {0x001CAF, 0x0010EF}, {0x001CB0, 0x0010F0}, {0x001CB1, 0x0010F1}, {0x001CB2, 0x0010F2},
+    {0x001CB3, 0x0010F3}, {0x001CB4, 0x0010F4}, {0x001CB5, 0x0010F5}, {0x001CB6, 0x0010F6}, {0x001CB7, 0x0010F7},
+    {0x001CB8, 0x0010F8}, {0x001CB9, 0x0010F9}, {0x001CBA, 0x0010FA}, {0x001CBD, 0x0010FD}, {0x001CBE, 0x0010FE},
+    {0x001CBF, 0x0010FF}, {0x001E00, 0x001E01}, {0x001E02, 0x001E03}, {0x001E04, 0x001E05}, {0x001E06, 0x001E07},
+    {0x001E08, 0x001E09}, {0x001E0A, 0x001E0B}, {0x001E0C, 0x001E0D}, {0x001E0E, 0x001E0F}, {0x001E10, 0x001E11},
+    {0x001E12, 0x001E13}, {0x001E14, 0x001E15}, {0x001E16, 0x001E17}, {0x001E18, 0x001E19}, {0x001E1A, 0x001E1B},
+    {0x001E1C, 0x001E1D}, {0x001E1E, 0x001E1F}, {0x001E20, 0x001E21}, {0x001E22, 0x001E23}, {0x001E24, 0x001E25},
+    {0x001E26, 0x001E27}, {0x001E28, 0x001E29}, {0x001E2A, 0x001E2B}, {0x001E2C, 0x001E2D}, {0x001E2E, 0x001E2F},
+    {0x001E30, 0x001E31}, {0x001E32, 0x001E33}, {0x001E34, 0x001E35}, {0x001E36, 0x001E37}, {0x001E38, 0x001E39},
+    {0x001E3A, 0x001E3B}, {0x001E3C, 0x001E3D}, {0x001E3E, 0x001E3F}, {0x001E40, 0x001E41}, {0x001E42, 0x001E43},
+    {0x001E44, 0x001E45}, {0x001E46, 0x001E47}, {0x001E48, 0x001E49}, {0x001E4A, 0x001E4B}, {0x001E4C, 0x001E4D},
+    {0x001E4E, 0x001E4F}, {0x001E50, 0x001E51}, {0x001E52, 0x001E53}, {0x001E54, 0x001E55}, {0x001E56, 0x001E57},
+    {0x001E58, 0x001E59}, {0x001E5A, 0x001E5B}, {0x001E5C, 0x001E5D}, {0x001E5E, 0x001E5F}, {0x001E60, 0x001E61},
+    {0x001E62, 0x001E63}, {0x001E64, 0x001E65}, {0x001E66, 0x001E67}, {0x001E68, 0x001E69}, {0x001E6A, 0x001E6B},
+    {0x001E6C, 0x001E6D}, {0x001E6E, 0x001E6F}, {0x001E70, 0x001E71}, {0x001E72, 0x001E73}, {0x001E74, 0x001E75},
+    {0x001E76, 0x001E77}, {0x001E78, 0x001E79}, {0x001E7A, 0x001E7B}, {0x001E7C, 0x001E7D}, {0x001E7E, 0x001E7F},
+    {0x001E80, 0x001E81}, {0x001E82, 0x001E83}, {0x001E84, 0x001E85}, {0x001E86, 0x001E87}, {0x001E88, 0x001E89},
+    {0x001E8A, 0x001E8B}, {0x001E8C, 0x001E8D}, {0x001E8E, 0x001E8F}, {0x001E90, 0x001E91}, {0x001E92, 0x001E93},
+    {0x001E94, 0x001E95}, {0x001E9E, 0x0000DF}, {0x001EA0, 0x001EA1}, {0x001EA2, 0x001EA3}, {0x001EA4, 0x001EA5},
+    {0x001EA6, 0x001EA7}, {0x001EA8, 0x001EA9}, {0x001EAA, 0x001EAB}, {0x001EAC, 0x001EAD}, {0x001EAE, 0x001EAF},
+    {0x001EB0, 0x001EB1}, {0x001EB2, 0x001EB3}, {0x001EB4, 0x001EB5}, {0x001EB6, 0x001EB7}, {0x001EB8, 0x001EB9},
+    {0x001EBA, 0x001EBB}, {0x001EBC, 0x001EBD}, {0x001EBE, 0x001EBF}, {0x001EC0, 0x001EC1}, {0x001EC2, 0x001EC3},
+    {0x001EC4, 0x001EC5}, {0x001EC6, 0x001EC7}, {0x001EC8, 0x001EC9}, {0x001ECA, 0x001ECB}, {0x001ECC, 0x001ECD},
+    {0x001ECE, 0x001ECF}, {0x001ED0, 0x001ED1}, {0x001ED2, 0x001ED3}, {0x001ED4, 0x001ED5}, {0x001ED6, 0x001ED7},
+    {0x001ED8, 0x001ED9}, {0x001EDA, 0x001EDB}, {0x001EDC, 0x001EDD}, {0x001EDE, 0x001EDF}, {0x001EE0, 0x001EE1},
+    {0x001EE2, 0x001EE3}, {0x001EE4, 0x001EE5}, {0x001EE6, 0x001EE7}, {0x001EE8, 0x001EE9}, {0x001EEA, 0x001EEB},
+    {0x001EEC, 0x001EED}, {0x001EEE, 0x001EEF}, {0x001EF0, 0x001EF1}, {0x001EF2, 0x001EF3}, {0x001EF4, 0x001EF5},
+    {0x001EF6, 0x001EF7}, {0x001EF8, 0x001EF9}, {0x001EFA, 0x001EFB}, {0x001EFC, 0x001EFD}, {0x001EFE, 0x001EFF},
+    {0x001F08, 0x001F00}, {0x001F09, 0x001F01}, {0x001F0A, 0x001F02}, {0x001F0B, 0x001F03}, {0x001F0C, 0x001F04},
+    {0x001F0D, 0x001F05}, {0x001F0E, 0x001F06}, {0x001F0F, 0x001F07}, {0x001F18, 0x001F10}, {0x001F19, 0x001F11},
+    {0x001F1A, 0x001F12}, {0x001F1B, 0x001F13}, {0x001F1C, 0x001F14}, {0x001F1D, 0x001F15}, {0x001F28, 0x001F20},
+    {0x001F29, 0x001F21}, {0x001F2A, 0x001F22}, {0x001F2B, 0x001F23}, {0x001F2C, 0x001F24}, {0x001F2D, 0x001F25},
+    {0x001F2E, 0x001F26}, {0x001F2F, 0x001F27}, {0x001F38, 0x001F30}, {0x001F39, 0x001F31}, {0x001F3A, 0x001F32},
+    {0x001F3B, 0x001F33}, {0x001F3C, 0x001F34}, {0x001F3D, 0x001F35}, {0x001F3E, 0x001F36}, {0x001F3F, 0x001F37},
+    {0x001F48, 0x001F40}, {0x001F49, 0x001F41}, {0x001F4A, 0x001F42}, {0x001F4B, 0x001F43}, {0x001F4C, 0x001F44},
+    {0x001F4D, 0x001F45}, {0x001F59, 0x001F51}, {0x001F5B, 0x001F53}, {0x001F5D, 0x001F55}, {0x001F5F, 0x001F57},
+    {0x001F68, 0x001F60}, {0x001F69, 0x001F61}, {0x001F6A, 0x001F62}, {0x001F6B, 0x001F63}, {0x001F6C, 0x001F64},
+    {0x001F6D, 0x001F65}, {0x001F6E, 0x001F66}, {0x001F6F, 0x001F67}, {0x001F88, 0x001F80}, {0x001F89, 0x001F81},
+    {0x001F8A, 0x001F82}, {0x001F8B, 0x001F83}, {0x001F8C, 0x001F84}, {0x001F8D, 0x001F85}, {0x001F8E, 0x001F86},
+    {0x001F8F, 0x001F87}, {0x001F98, 0x001F90}, {0x001F99, 0x001F91}, {0x001F9A, 0x001F92}, {0x001F9B, 0x001F93},
+    {0x001F9C, 0x001F94}, {0x001F9D, 0x001F95}, {0x001F9E, 0x001F96}, {0x001F9F, 0x001F97}, {0x001FA8, 0x001FA0},
+    {0x001FA9, 0x001FA1}, {0x001FAA, 0x001FA2}, {0x001FAB, 0x001FA3}, {0x001FAC, 0x001FA4}, {0x001FAD, 0x001FA5},
+    {0x001FAE, 0x001FA6}, {0x001FAF, 0x001FA7}, {0x001FB8, 0x001FB0}, {0x001FB9, 0x001FB1}, {0x001FBA, 0x001F70},
+    {0x001FBB, 0x001F71}, {0x001FBC, 0x001FB3}, {0x001FC8, 0x001F72}, {0x001FC9, 0x001F73}, {0x001FCA, 0x001F74},
+    {0x001FCB, 0x001F75}, {0x001FCC, 0x001FC3}, {0x001FD8, 0x001FD0}, {0x001FD9, 0x001FD1}, {0x001FDA, 0x001F76},
+    {0x001FDB, 0x001F77}, {0x001FE8, 0x001FE0}, {0x001FE9, 0x001FE1}, {0x001FEA, 0x001F7A}, {0x001FEB, 0x001F7B},
+    {0x001FEC, 0x001FE5}, {0x001FF8, 0x001F78}, {0x001FF9, 0x001F79}, {0x001FFA, 0x001F7C}, {0x001FFB, 0x001F7D},
+    {0x001FFC, 0x001FF3}, {0x002126, 0x0003C9}, {0x00212A, 0x00006B}, {0x00212B, 0x0000E5}, {0x002132, 0x00214E},
+    {0x002160, 0x002170}, {0x002161, 0x002171}, {0x002162, 0x002172}, {0x002163, 0x002173}, {0x002164, 0x002174},
+    {0x002165, 0x002175}, {0x002166, 0x002176}, {0x002167, 0x002177}, {0x002168, 0x002178}, {0x002169, 0x002179},
+    {0x00216A, 0x00217A}, {0x00216B, 0x00217B}, {0x00216C, 0x00217C}, {0x00216D, 0x00217D}, {0x00216E, 0x00217E},
+    {0x00216F, 0x00217F}, {0x002183, 0x002184}, {0x0024B6, 0x0024D0}, {0x0024B7, 0x0024D1}, {0x0024B8, 0x0024D2},
+    {0x0024B9, 0x0024D3}, {0x0024BA, 0x0024D4}, {0x0024BB, 0x0024D5}, {0x0024BC, 0x0024D6}, {0x0024BD, 0x0024D7},
+    {0x0024BE, 0x0024D8}, {0x0024BF, 0x0024D9}, {0x0024C0, 0x0024DA}, {0x0024C1, 0x0024DB}, {0x0024C2, 0x0024DC},
+    {0x0024C3, 0x0024DD}, {0x0024C4, 0x0024DE}, {0x0024C5, 0x0024DF}, {0x0024C6, 0x0024E0}, {0x0024C7, 0x0024E1},
+    {0x0024C8, 0x0024E2}, {0x0024C9, 0x0024E3}, {0x0024CA, 0x0024E4}, {0x0024CB, 0x0024E5}, {0x0024CC, 0x0024E6},
+    {0x0024CD, 0x0024E7}, {0x0024CE, 0x0024E8}, {0x0024CF, 0x0024E9}, {0x002C00, 0x002C30}, {0x002C01, 0x002C31},
+    {0x002C02, 0x002C32}, {0x002C03, 0x002C33}, {0x002C04, 0x002C34}, {0x002C05, 0x002C35}, {0x002C06, 0x002C36},
+    {0x002C07, 0x002C37}, {0x002C08, 0x002C38}, {0x002C09, 0x002C39}, {0x002C0A, 0x002C3A}, {0x002C0B, 0x002C3B},
+    {0x002C0C, 0x002C3C}, {0x002C0D, 0x002C3D}, {0x002C0E, 0x002C3E}, {0x002C0F, 0x002C3F}, {0x002C10, 0x002C40},
+    {0x002C11, 0x002C41}, {0x002C12, 0x002C42}, {0x002C13, 0x002C43}, {0x002C14, 0x002C44}, {0x002C15, 0x002C45},
+    {0x002C16, 0x002C46}, {0x002C17, 0x002C47}, {0x002C18, 0x002C48}, {0x002C19, 0x002C49}, {0x002C1A, 0x002C4A},
+    {0x002C1B, 0x002C4B}, {0x002C1C, 0x002C4C}, {0x002C1D, 0x002C4D}, {0x002C1E, 0x002C4E}, {0x002C1F, 0x002C4F},
+    {0x002C20, 0x002C50}, {0x002C21, 0x002C51}, {0x002C22, 0x002C52}, {0x002C23, 0x002C53}, {0x002C24, 0x002C54},
+    {0x002C25, 0x002C55}, {0x002C26, 0x002C56}, {0x002C27, 0x002C57}, {0x002C28, 0x002C58}, {0x002C29, 0x002C59},
+    {0x002C2A, 0x002C5A}, {0x002C2B, 0x002C5B}, {0x002C2C, 0x002C5C}, {0x002C2D, 0x002C5D}, {0x002C2E, 0x002C5E},
+    {0x002C2F, 0x002C5F}, {0x002C60, 0x002C61}, {0x002C62, 0x00026B}, {0x002C63, 0x001D7D}, {0x002C64, 0x00027D},
+    {0x002C67, 0x002C68}, {0x002C69, 0x002C6A}, {0x002C6B, 0x002C6C}, {0x002C6D, 0x000251}, {0x002C6E, 0x000271},
+    {0x002C6F, 0x000250}, {0x002C70, 0x000252}, {0x002C72, 0x002C73}, {0x002C75, 0x002C76}, {0x002C7E, 0x00023F},
+    {0x002C7F, 0x000240}, {0x002C80, 0x002C81}, {0x002C82, 0x002C83}, {0x002C84, 0x002C85}, {0x002C86, 0x002C87},
+    {0x002C88, 0x002C89}, {0x002C8A, 0x002C8B}, {0x002C8C, 0x002C8D}, {0x002C8E, 0x002C8F}, {0x002C90, 0x002C91},
+    {0x002C92, 0x002C93}, {0x002C94, 0x002C95}, {0x002C96, 0x002C97}, {0x002C98, 0x002C99}, {0x002C9A, 0x002C9B},
+    {0x002C9C, 0x002C9D}, {0x002C9E, 0x002C9F}, {0x002CA0, 0x002CA1}, {0x002CA2, 0x002CA3}, {0x002CA4, 0x002CA5},
+    {0x002CA6, 0x002CA7}, {0x002CA8, 0x002CA9}, {0x002CAA, 0x002CAB}, {0x002CAC, 0x002CAD}, {0x002CAE, 0x002CAF},
+    {0x002CB0, 0x002CB1}, {0x002CB2, 0x002CB3}, {0x002CB4, 0x002CB5}, {0x002CB6, 0x002CB7}, {0x002CB8, 0x002CB9},
+    {0x002CBA, 0x002CBB}, {0x002CBC, 0x002CBD}, {0x002CBE, 0x002CBF}, {0x002CC0, 0x002CC1}, {0x002CC2, 0x002CC3},
+    {0x002CC4, 0x002CC5}, {0x002CC6, 0x002CC7}, {0x002CC8, 0x002CC9}, {0x002CCA, 0x002CCB}, {0x002CCC, 0x002CCD},
+    {0x002CCE, 0x002CCF}, {0x002CD0, 0x002CD1}, {0x002CD2, 0x002CD3}, {0x002CD4, 0x002CD5}, {0x002CD6, 0x002CD7},
+    {0x002CD8, 0x002CD9}, {0x002CDA, 0x002CDB}, {0x002CDC, 0x002CDD}, {0x002CDE, 0x002CDF}, {0x002CE0, 0x002CE1},
+    {0x002CE2, 0x002CE3}, {0x002CEB, 0x002CEC}, {0x002CED, 0x002CEE}, {0x002CF2, 0x002CF3}, {0x00A640, 0x00A641},
+    {0x00A642, 0x00A643}, {0x00A644, 0x00A645}, {0x00A646, 0x00A647}, {0x00A648, 0x00A649}, {0x00A64A, 0x00A64B},
+    {0x00A64C, 0x00A64D}, {0x00A64E, 0x00A64F}, {0x00A650, 0x00A651}, {0x00A652, 0x00A653}, {0x00A654, 0x00A655},
+    {0x00A656, 0x00A657}, {0x00A658, 0x00A659}, {0x00A65A, 0x00A65B}, {0x00A65C, 0x00A65D}, {0x00A65E, 0x00A65F},
+    {0x00A660, 0x00A661}, {0x00A662, 0x00A663}, {0x00A664, 0x00A665}, {0x00A666, 0x00A667}, {0x00A668, 0x00A669},
+    {0x00A66A, 0x00A66B}, {0x00A66C, 0x00A66D}, {0x00A680, 0x00A681}, {0x00A682, 0x00A683}, {0x00A684, 0x00A685},
+    {0x00A686, 0x00A687}, {0x00A688, 0x00A689}, {0x00A68A, 0x00A68B}, {0x00A68C, 0x00A68D}, {0x00A68E, 0x00A68F},
+    {0x00A690, 0x00A691}, {0x00A692, 0x00A693}, {0x00A694, 0x00A695}, {0x00A696, 0x00A697}, {0x00A698, 0x00A699},
+    {0x00A69A, 0x00A69B}, {0x00A722, 0x00A723}, {0x00A724, 0x00A725}, {0x00A726, 0x00A727}, {0x00A728, 0x00A729},
+    {0x00A72A, 0x00A72B}, {0x00A72C, 0x00A72D}, {0x00A72E, 0x00A72F}, {0x00A732, 0x00A733}, {0x00A734, 0x00A735},
+    {0x00A736, 0x00A737}, {0x00A738, 0x00A739}, {0x00A73A, 0x00A73B}, {0x00A73C, 0x00A73D}, {0x00A73E, 0x00A73F},
+    {0x00A740, 0x00A741}, {0x00A742, 0x00A743}, {0x00A744, 0x00A745}, {0x00A746, 0x00A747}, {0x00A748, 0x00A749},
+    {0x00A74A, 0x00A74B}, {0x00A74C, 0x00A74D}, {0x00A74E, 0x00A74F}, {0x00A750, 0x00A751}, {0x00A752, 0x00A753},
+    {0x00A754, 0x00A755}, {0x00A756, 0x00A757}, {0x00A758, 0x00A759}, {0x00A75A, 0x00A75B}, {0x00A75C, 0x00A75D},
+    {0x00A75E, 0x00A75F}, {0x00A760, 0x00A761}, {0x00A762, 0x00A763}, {0x00A764, 0x00A765}, {0x00A766, 0x00A767},
+    {0x00A768, 0x00A769}, {0x00A76A, 0x00A76B}, {0x00A76C, 0x00A76D}, {0x00A76E, 0x00A76F}, {0x00A779, 0x00A77A},
+    {0x00A77B, 0x00A77C}, {0x00A77D, 0x001D79}, {0x00A77E, 0x00A77F}, {0x00A780, 0x00A781}, {0x00A782, 0x00A783},
+    {0x00A784, 0x00A785}, {0x00A786, 0x00A787}, {0x00A78B, 0x00A78C}, {0x00A78D, 0x000265}, {0x00A790, 0x00A791},
+    {0x00A792, 0x00A793}, {0x00A796, 0x00A797}, {0x00A798, 0x00A799}, {0x00A79A, 0x00A79B}, {0x00A79C, 0x00A79D},
+    {0x00A79E, 0x00A79F}, {0x00A7A0, 0x00A7A1}, {0x00A7A2, 0x00A7A3}, {0x00A7A4, 0x00A7A5}, {0x00A7A6, 0x00A7A7},
+    {0x00A7A8, 0x00A7A9}, {0x00A7AA, 0x000266}, {0x00A7AB, 0x00025C}, {0x00A7AC, 0x000261}, {0x00A7AD, 0x00026C},
+    {0x00A7AE, 0x00026A}, {0x00A7B0, 0x00029E}, {0x00A7B1, 0x000287}, {0x00A7B2, 0x00029D}, {0x00A7B3, 0x00AB53},
+    {0x00A7B4, 0x00A7B5}, {0x00A7B6, 0x00A7B7}, {0x00A7B8, 0x00A7B9}, {0x00A7BA, 0x00A7BB}, {0x00A7BC, 0x00A7BD},
+    {0x00A7BE, 0x00A7BF}, {0x00A7C0, 0x00A7C1}, {0x00A7C2, 0x00A7C3}, {0x00A7C4, 0x00A794}, {0x00A7C5, 0x000282},
+    {0x00A7C6, 0x001D8E}, {0x00A7C7, 0x00A7C8}, {0x00A7C9, 0x00A7CA}, {0x00A7D0, 0x00A7D1}, {0x00A7D6, 0x00A7D7},
+    {0x00A7D8, 0x00A7D9}, {0x00A7F5, 0x00A7F6}, {0x00FF21, 0x00FF41}, {0x00FF22, 0x00FF42}, {0x00FF23, 0x00FF43},
+    {0x00FF24, 0x00FF44}, {0x00FF25, 0x00FF45}, {0x00FF26, 0x00FF46}, {0x00FF27, 0x00FF47}, {0x00FF28, 0x00FF48},
+    {0x00FF29, 0x00FF49}, {0x00FF2A, 0x00FF4A}, {0x00FF2B, 0x00FF4B}, {0x00FF2C, 0x00FF4C}, {0x00FF2D, 0x00FF4D},
+    {0x00FF2E, 0x00FF4E}, {0x00FF2F, 0x00FF4F}, {0x00FF30, 0x00FF50}, {0x00FF31, 0x00FF51}, {0x00FF32, 0x00FF52},
+    {0x00FF33, 0x00FF53}, {0x00FF34, 0x00FF54}, {0x00FF35, 0x00FF55}, {0x00FF36, 0x00FF56}, {0x00FF37, 0x00FF57},
+    {0x00FF38, 0x00FF58}, {0x00FF39, 0x00FF59}, {0x00FF3A, 0x00FF5A}, {0x010400, 0x010428}, {0x010401, 0x010429},
+    {0x010402, 0x01042A}, {0x010403, 0x01042B}, {0x010404, 0x01042C}, {0x010405, 0x01042D}, {0x010406, 0x01042E},
+    {0x010407, 0x01042F}, {0x010408, 0x010430}, {0x010409, 0x010431}, {0x01040A, 0x010432}, {0x01040B, 0x010433},
+    {0x01040C, 0x010434}, {0x01040D, 0x010435}, {0x01040E, 0x010436}, {0x01040F, 0x010437}, {0x010410, 0x010438},
+    {0x010411, 0x010439}, {0x010412, 0x01043A}, {0x010413, 0x01043B}, {0x010414, 0x01043C}, {0x010415, 0x01043D},
+    {0x010416, 0x01043E}, {0x010417, 0x01043F}, {0x010418, 0x010440}, {0x010419, 0x010441}, {0x01041A, 0x010442},
+    {0x01041B, 0x010443}, {0x01041C, 0x010444}, {0x01041D, 0x010445}, {0x01041E, 0x010446}, {0x01041F, 0x010447},
+    {0x010420, 0x010448}, {0x010421, 0x010449}, {0x010422, 0x01044A}, {0x010423, 0x01044B}, {0x010424, 0x01044C},
+    {0x010425, 0x01044D}, {0x010426, 0x01044E}, {0x010427, 0x01044F}, {0x0104B0, 0x0104D8}, {0x0104B1, 0x0104D9},
+    {0x0104B2, 0x0104DA}, {0x0104B3, 0x0104DB}, {0x0104B4, 0x0104DC}, {0x0104B5, 0x0104DD}, {0x0104B6, 0x0104DE},
+    {0x0104B7, 0x0104DF}, {0x0104B8, 0x0104E0}, {0x0104B9, 0x0104E1}, {0x0104BA, 0x0104E2}, {0x0104BB, 0x0104E3},
+    {0x0104BC, 0x0104E4}, {0x0104BD, 0x0104E5}, {0x0104BE, 0x0104E6}, {0x0104BF, 0x0104E7}, {0x0104C0, 0x0104E8},
+    {0x0104C1, 0x0104E9}, {0x0104C2, 0x0104EA}, {0x0104C3, 0x0104EB}, {0x0104C4, 0x0104EC}, {0x0104C5, 0x0104ED},
+    {0x0104C6, 0x0104EE}, {0x0104C7, 0x0104EF}, {0x0104C8, 0x0104F0}, {0x0104C9, 0x0104F1}, {0x0104CA, 0x0104F2},
+    {0x0104CB, 0x0104F3}, {0x0104CC, 0x0104F4}, {0x0104CD, 0x0104F5}, {0x0104CE, 0x0104F6}, {0x0104CF, 0x0104F7},
+    {0x0104D0, 0x0104F8}, {0x0104D1, 0x0104F9}, {0x0104D2, 0x0104FA}, {0x0104D3, 0x0104FB}, {0x010570, 0x010597},
+    {0x010571, 0x010598}, {0x010572, 0x010599}, {0x010573, 0x01059A}, {0x010574, 0x01059B}, {0x010575, 0x01059C},
+    {0x010576, 0x01059D}, {0x010577, 0x01059E}, {0x010578, 0x01059F}, {0x010579, 0x0105A0}, {0x01057A, 0x0105A1},
+    {0x01057C, 0x0105A3}, {0x01057D, 0x0105A4}, {0x01057E, 0x0105A5}, {0x01057F, 0x0105A6}, {0x010580, 0x0105A7},
+    {0x010581, 0x0105A8}, {0x010582, 0x0105A9}, {0x010583, 0x0105AA}, {0x010584, 0x0105AB}, {0x010585, 0x0105AC},
+    {0x010586, 0x0105AD}, {0x010587, 0x0105AE}, {0x010588, 0x0105AF}, {0x010589, 0x0105B0}, {0x01058A, 0x0105B1},
+    {0x01058C, 0x0105B3}, {0x01058D, 0x0105B4}, {0x01058E, 0x0105B5}, {0x01058F, 0x0105B6}, {0x010590, 0x0105B7},
+    {0x010591, 0x0105B8}, {0x010592, 0x0105B9}, {0x010594, 0x0105BB}, {0x010595, 0x0105BC}, {0x010C80, 0x010CC0},
+    {0x010C81, 0x010CC1}, {0x010C82, 0x010CC2}, {0x010C83, 0x010CC3}, {0x010C84, 0x010CC4}, {0x010C85, 0x010CC5},
+    {0x010C86, 0x010CC6}, {0x010C87, 0x010CC7}, {0x010C88, 0x010CC8}, {0x010C89, 0x010CC9}, {0x010C8A, 0x010CCA},
+    {0x010C8B, 0x010CCB}, {0x010C8C, 0x010CCC}, {0x010C8D, 0x010CCD}, {0x010C8E, 0x010CCE}, {0x010C8F, 0x010CCF},
+    {0x010C90, 0x010CD0}, {0x010C91, 0x010CD1}, {0x010C92, 0x010CD2}, {0x010C93, 0x010CD3}, {0x010C94, 0x010CD4},
+    {0x010C95, 0x010CD5}, {0x010C96, 0x010CD6}, {0x010C97, 0x010CD7}, {0x010C98, 0x010CD8}, {0x010C99, 0x010CD9},
+    {0x010C9A, 0x010CDA}, {0x010C9B, 0x010CDB}, {0x010C9C, 0x010CDC}, {0x010C9D, 0x010CDD}, {0x010C9E, 0x010CDE},
+    {0x010C9F, 0x010CDF}, {0x010CA0, 0x010CE0}, {0x010CA1, 0x010CE1}, {0x010CA2, 0x010CE2}, {0x010CA3, 0x010CE3},
+    {0x010CA4, 0x010CE4}, {0x010CA5, 0x010CE5}, {0x010CA6, 0x010CE6}, {0x010CA7, 0x010CE7}, {0x010CA8, 0x010CE8},
+    {0x010CA9, 0x010CE9}, {0x010CAA, 0x010CEA}, {0x010CAB, 0x010CEB}, {0x010CAC, 0x010CEC}, {0x010CAD, 0x010CED},
+    {0x010CAE, 0x010CEE}, {0x010CAF, 0x010CEF}, {0x010CB0, 0x010CF0}, {0x010CB1, 0x010CF1}, {0x010CB2, 0x010CF2},
+    {0x0118A0, 0x0118C0}, {0x0118A1, 0x0118C1}, {0x0118A2, 0x0118C2}, {0x0118A3, 0x0118C3}, {0x0118A4, 0x0118C4},
+    {0x0118A5, 0x0118C5}, {0x0118A6, 0x0118C6}, {0x0118A7, 0x0118C7}, {0x0118A8, 0x0118C8}, {0x0118A9, 0x0118C9},
+    {0x0118AA, 0x0118CA}, {0x0118AB, 0x0118CB}, {0x0118AC, 0x0118CC}, {0x0118AD, 0x0118CD}, {0x0118AE, 0x0118CE},
+    {0x0118AF, 0x0118CF}, {0x0118B0, 0x0118D0}, {0x0118B1, 0x0118D1}, {0x0118B2, 0x0118D2}, {0x0118B3, 0x0118D3},
+    {0x0118B4, 0x0118D4}, {0x0118B5, 0x0118D5}, {0x0118B6, 0x0118D6}, {0x0118B7, 0x0118D7}, {0x0118B8, 0x0118D8},
+    {0x0118B9, 0x0118D9}, {0x0118BA, 0x0118DA}, {0x0118BB, 0x0118DB}, {0x0118BC, 0x0118DC}, {0x0118BD, 0x0118DD},
+    {0x0118BE, 0x0118DE}, {0x0118BF, 0x0118DF}, {0x016E40, 0x016E60}, {0x016E41, 0x016E61}, {0x016E42, 0x016E62},
+    {0x016E43, 0x016E63}, {0x016E44, 0x016E64}, {0x016E45, 0x016E65}, {0x016E46, 0x016E66}, {0x016E47, 0x016E67},
+    {0x016E48, 0x016E68}, {0x016E49, 0x016E69}, {0x016E4A, 0x016E6A}, {0x016E4B, 0x016E6B}, {0x016E4C, 0x016E6C},
+    {0x016E4D, 0x016E6D}, {0x016E4E, 0x016E6E}, {0x016E4F, 0x016E6F}, {0x016E50, 0x016E70}, {0x016E51, 0x016E71},
+    {0x016E52, 0x016E72}, {0x016E53, 0x016E73}, {0x016E54, 0x016E74}, {0x016E55, 0x016E75}, {0x016E56, 0x016E76},
+    {0x016E57, 0x016E77}, {0x016E58, 0x016E78}, {0x016E59, 0x016E79}, {0x016E5A, 0x016E7A}, {0x016E5B, 0x016E7B},
+    {0x016E5C, 0x016E7C}, {0x016E5D, 0x016E7D}, {0x016E5E, 0x016E7E}, {0x016E5F, 0x016E7F}, {0x01E900, 0x01E922},
+    {0x01E901, 0x01E923}, {0x01E902, 0x01E924}, {0x01E903, 0x01E925}, {0x01E904, 0x01E926}, {0x01E905, 0x01E927},
+    {0x01E906, 0x01E928}, {0x01E907, 0x01E929}, {0x01E908, 0x01E92A}, {0x01E909, 0x01E92B}, {0x01E90A, 0x01E92C},
+    {0x01E90B, 0x01E92D}, {0x01E90C, 0x01E92E}, {0x01E90D, 0x01E92F}, {0x01E90E, 0x01E930}, {0x01E90F, 0x01E931},
+    {0x01E910, 0x01E932}, {0x01E911, 0x01E933}, {0x01E912, 0x01E934}, {0x01E913, 0x01E935}, {0x01E914, 0x01E936},
+    {0x01E915, 0x01E937}, {0x01E916, 0x01E938}, {0x01E917, 0x01E939}, {0x01E918, 0x01E93A}, {0x01E919, 0x01E93B},
+    {0x01E91A, 0x01E93C}, {0x01E91B, 0x01E93D}, {0x01E91C, 0x01E93E}, {0x01E91D, 0x01E93F}, {0x01E91E, 0x01E940},
+    {0x01E91F, 0x01E941}, {0x01E920, 0x01E942}, {0x01E921, 0x01E943},
+};
+
+// list is always in ascending order, to enable binary search
+const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase = {
+    {0x000061, 0x000041}, {0x000062, 0x000042}, {0x000063, 0x000043}, {0x000064, 0x000044}, {0x000065, 0x000045},
+    {0x000066, 0x000046}, {0x000067, 0x000047}, {0x000068, 0x000048}, {0x000069, 0x000049}, {0x00006A, 0x00004A},
+    {0x00006B, 0x00004B}, {0x00006C, 0x00004C}, {0x00006D, 0x00004D}, {0x00006E, 0x00004E}, {0x00006F, 0x00004F},
+    {0x000070, 0x000050}, {0x000071, 0x000051}, {0x000072, 0x000052}, {0x000073, 0x000053}, {0x000074, 0x000054},
+    {0x000075, 0x000055}, {0x000076, 0x000056}, {0x000077, 0x000057}, {0x000078, 0x000058}, {0x000079, 0x000059},
+    {0x00007A, 0x00005A}, {0x0000B5, 0x00039C}, {0x0000E0, 0x0000C0}, {0x0000E1, 0x0000C1}, {0x0000E2, 0x0000C2},
+    {0x0000E3, 0x0000C3}, {0x0000E4, 0x0000C4}, {0x0000E5, 0x0000C5}, {0x0000E6, 0x0000C6}, {0x0000E7, 0x0000C7},
+    {0x0000E8, 0x0000C8}, {0x0000E9, 0x0000C9}, {0x0000EA, 0x0000CA}, {0x0000EB, 0x0000CB}, {0x0000EC, 0x0000CC},
+    {0x0000ED, 0x0000CD}, {0x0000EE, 0x0000CE}, {0x0000EF, 0x0000CF}, {0x0000F0, 0x0000D0}, {0x0000F1, 0x0000D1},
+    {0x0000F2, 0x0000D2}, {0x0000F3, 0x0000D3}, {0x0000F4, 0x0000D4}, {0x0000F5, 0x0000D5}, {0x0000F6, 0x0000D6},
+    {0x0000F8, 0x0000D8}, {0x0000F9, 0x0000D9}, {0x0000FA, 0x0000DA}, {0x0000FB, 0x0000DB}, {0x0000FC, 0x0000DC},
+    {0x0000FD, 0x0000DD}, {0x0000FE, 0x0000DE}, {0x0000FF, 0x000178}, {0x000101, 0x000100}, {0x000103, 0x000102},
+    {0x000105, 0x000104}, {0x000107, 0x000106}, {0x000109, 0x000108}, {0x00010B, 0x00010A}, {0x00010D, 0x00010C},
+    {0x00010F, 0x00010E}, {0x000111, 0x000110}, {0x000113, 0x000112}, {0x000115, 0x000114}, {0x000117, 0x000116},
+    {0x000119, 0x000118}, {0x00011B, 0x00011A}, {0x00011D, 0x00011C}, {0x00011F, 0x00011E}, {0x000121, 0x000120},
+    {0x000123, 0x000122}, {0x000125, 0x000124}, {0x000127, 0x000126}, {0x000129, 0x000128}, {0x00012B, 0x00012A},
+    {0x00012D, 0x00012C}, {0x00012F, 0x00012E}, {0x000131, 0x000049}, {0x000133, 0x000132}, {0x000135, 0x000134},
+    {0x000137, 0x000136}, {0x00013A, 0x000139}, {0x00013C, 0x00013B}, {0x00013E, 0x00013D}, {0x000140, 0x00013F},
+    {0x000142, 0x000141}, {0x000144, 0x000143}, {0x000146, 0x000145}, {0x000148, 0x000147}, {0x00014B, 0x00014A},
+    {0x00014D, 0x00014C}, {0x00014F, 0x00014E}, {0x000151, 0x000150}, {0x000153, 0x000152}, {0x000155, 0x000154},
+    {0x000157, 0x000156}, {0x000159, 0x000158}, {0x00015B, 0x00015A}, {0x00015D, 0x00015C}, {0x00015F, 0x00015E},
+    {0x000161, 0x000160}, {0x000163, 0x000162}, {0x000165, 0x000164}, {0x000167, 0x000166}, {0x000169, 0x000168},
+    {0x00016B, 0x00016A}, {0x00016D, 0x00016C}, {0x00016F, 0x00016E}, {0x000171, 0x000170}, {0x000173, 0x000172},
+    {0x000175, 0x000174}, {0x000177, 0x000176}, {0x00017A, 0x000179}, {0x00017C, 0x00017B}, {0x00017E, 0x00017D},
+    {0x00017F, 0x000053}, {0x000180, 0x000243}, {0x000183, 0x000182}, {0x000185, 0x000184}, {0x000188, 0x000187},
+    {0x00018C, 0x00018B}, {0x000192, 0x000191}, {0x000195, 0x0001F6}, {0x000199, 0x000198}, {0x00019A, 0x00023D},
+    {0x00019E, 0x000220}, {0x0001A1, 0x0001A0}, {0x0001A3, 0x0001A2}, {0x0001A5, 0x0001A4}, {0x0001A8, 0x0001A7},
+    {0x0001AD, 0x0001AC}, {0x0001B0, 0x0001AF}, {0x0001B4, 0x0001B3}, {0x0001B6, 0x0001B5}, {0x0001B9, 0x0001B8},
+    {0x0001BD, 0x0001BC}, {0x0001BF, 0x0001F7}, {0x0001C5, 0x0001C4}, {0x0001C6, 0x0001C4}, {0x0001C8, 0x0001C7},
+    {0x0001C9, 0x0001C7}, {0x0001CB, 0x0001CA}, {0x0001CC, 0x0001CA}, {0x0001CE, 0x0001CD}, {0x0001D0, 0x0001CF},
+    {0x0001D2, 0x0001D1}, {0x0001D4, 0x0001D3}, {0x0001D6, 0x0001D5}, {0x0001D8, 0x0001D7}, {0x0001DA, 0x0001D9},
+    {0x0001DC, 0x0001DB}, {0x0001DD, 0x00018E}, {0x0001DF, 0x0001DE}, {0x0001E1, 0x0001E0}, {0x0001E3, 0x0001E2},
+    {0x0001E5, 0x0001E4}, {0x0001E7, 0x0001E6}, {0x0001E9, 0x0001E8}, {0x0001EB, 0x0001EA}, {0x0001ED, 0x0001EC},
+    {0x0001EF, 0x0001EE}, {0x0001F2, 0x0001F1}, {0x0001F3, 0x0001F1}, {0x0001F5, 0x0001F4}, {0x0001F9, 0x0001F8},
+    {0x0001FB, 0x0001FA}, {0x0001FD, 0x0001FC}, {0x0001FF, 0x0001FE}, {0x000201, 0x000200}, {0x000203, 0x000202},
+    {0x000205, 0x000204}, {0x000207, 0x000206}, {0x000209, 0x000208}, {0x00020B, 0x00020A}, {0x00020D, 0x00020C},
+    {0x00020F, 0x00020E}, {0x000211, 0x000210}, {0x000213, 0x000212}, {0x000215, 0x000214}, {0x000217, 0x000216},
+    {0x000219, 0x000218}, {0x00021B, 0x00021A}, {0x00021D, 0x00021C}, {0x00021F, 0x00021E}, {0x000223, 0x000222},
+    {0x000225, 0x000224}, {0x000227, 0x000226}, {0x000229, 0x000228}, {0x00022B, 0x00022A}, {0x00022D, 0x00022C},
+    {0x00022F, 0x00022E}, {0x000231, 0x000230}, {0x000233, 0x000232}, {0x00023C, 0x00023B}, {0x00023F, 0x002C7E},
+    {0x000240, 0x002C7F}, {0x000242, 0x000241}, {0x000247, 0x000246}, {0x000249, 0x000248}, {0x00024B, 0x00024A},
+    {0x00024D, 0x00024C}, {0x00024F, 0x00024E}, {0x000250, 0x002C6F}, {0x000251, 0x002C6D}, {0x000252, 0x002C70},
+    {0x000253, 0x000181}, {0x000254, 0x000186}, {0x000256, 0x000189}, {0x000257, 0x00018A}, {0x000259, 0x00018F},
+    {0x00025B, 0x000190}, {0x00025C, 0x00A7AB}, {0x000260, 0x000193}, {0x000261, 0x00A7AC}, {0x000263, 0x000194},
+    {0x000265, 0x00A78D}, {0x000266, 0x00A7AA}, {0x000268, 0x000197}, {0x000269, 0x000196}, {0x00026A, 0x00A7AE},
+    {0x00026B, 0x002C62}, {0x00026C, 0x00A7AD}, {0x00026F, 0x00019C}, {0x000271, 0x002C6E}, {0x000272, 0x00019D},
+    {0x000275, 0x00019F}, {0x00027D, 0x002C64}, {0x000280, 0x0001A6}, {0x000282, 0x00A7C5}, {0x000283, 0x0001A9},
+    {0x000287, 0x00A7B1}, {0x000288, 0x0001AE}, {0x000289, 0x000244}, {0x00028A, 0x0001B1}, {0x00028B, 0x0001B2},
+    {0x00028C, 0x000245}, {0x000292, 0x0001B7}, {0x00029D, 0x00A7B2}, {0x00029E, 0x00A7B0}, {0x000345, 0x000399},
+    {0x000371, 0x000370}, {0x000373, 0x000372}, {0x000377, 0x000376}, {0x00037B, 0x0003FD}, {0x00037C, 0x0003FE},
+    {0x00037D, 0x0003FF}, {0x0003AC, 0x000386}, {0x0003AD, 0x000388}, {0x0003AE, 0x000389}, {0x0003AF, 0x00038A},
+    {0x0003B1, 0x000391}, {0x0003B2, 0x000392}, {0x0003B3, 0x000393}, {0x0003B4, 0x000394}, {0x0003B5, 0x000395},
+    {0x0003B6, 0x000396}, {0x0003B7, 0x000397}, {0x0003B8, 0x000398}, {0x0003B9, 0x000399}, {0x0003BA, 0x00039A},
+    {0x0003BB, 0x00039B}, {0x0003BC, 0x00039C}, {0x0003BD, 0x00039D}, {0x0003BE, 0x00039E}, {0x0003BF, 0x00039F},
+    {0x0003C0, 0x0003A0}, {0x0003C1, 0x0003A1}, {0x0003C2, 0x0003A3}, {0x0003C3, 0x0003A3}, {0x0003C4, 0x0003A4},
+    {0x0003C5, 0x0003A5}, {0x0003C6, 0x0003A6}, {0x0003C7, 0x0003A7}, {0x0003C8, 0x0003A8}, {0x0003C9, 0x0003A9},
+    {0x0003CA, 0x0003AA}, {0x0003CB, 0x0003AB}, {0x0003CC, 0x00038C}, {0x0003CD, 0x00038E}, {0x0003CE, 0x00038F},
+    {0x0003D0, 0x000392}, {0x0003D1, 0x000398}, {0x0003D5, 0x0003A6}, {0x0003D6, 0x0003A0}, {0x0003D7, 0x0003CF},
+    {0x0003D9, 0x0003D8}, {0x0003DB, 0x0003DA}, {0x0003DD, 0x0003DC}, {0x0003DF, 0x0003DE}, {0x0003E1, 0x0003E0},
+    {0x0003E3, 0x0003E2}, {0x0003E5, 0x0003E4}, {0x0003E7, 0x0003E6}, {0x0003E9, 0x0003E8}, {0x0003EB, 0x0003EA},
+    {0x0003ED, 0x0003EC}, {0x0003EF, 0x0003EE}, {0x0003F0, 0x00039A}, {0x0003F1, 0x0003A1}, {0x0003F2, 0x0003F9},
+    {0x0003F3, 0x00037F}, {0x0003F5, 0x000395}, {0x0003F8, 0x0003F7}, {0x0003FB, 0x0003FA}, {0x000430, 0x000410},
+    {0x000431, 0x000411}, {0x000432, 0x000412}, {0x000433, 0x000413}, {0x000434, 0x000414}, {0x000435, 0x000415},
+    {0x000436, 0x000416}, {0x000437, 0x000417}, {0x000438, 0x000418}, {0x000439, 0x000419}, {0x00043A, 0x00041A},
+    {0x00043B, 0x00041B}, {0x00043C, 0x00041C}, {0x00043D, 0x00041D}, {0x00043E, 0x00041E}, {0x00043F, 0x00041F},
+    {0x000440, 0x000420}, {0x000441, 0x000421}, {0x000442, 0x000422}, {0x000443, 0x000423}, {0x000444, 0x000424},
+    {0x000445, 0x000425}, {0x000446, 0x000426}, {0x000447, 0x000427}, {0x000448, 0x000428}, {0x000449, 0x000429},
+    {0x00044A, 0x00042A}, {0x00044B, 0x00042B}, {0x00044C, 0x00042C}, {0x00044D, 0x00042D}, {0x00044E, 0x00042E},
+    {0x00044F, 0x00042F}, {0x000450, 0x000400}, {0x000451, 0x000401}, {0x000452, 0x000402}, {0x000453, 0x000403},
+    {0x000454, 0x000404}, {0x000455, 0x000405}, {0x000456, 0x000406}, {0x000457, 0x000407}, {0x000458, 0x000408},
+    {0x000459, 0x000409}, {0x00045A, 0x00040A}, {0x00045B, 0x00040B}, {0x00045C, 0x00040C}, {0x00045D, 0x00040D},
+    {0x00045E, 0x00040E}, {0x00045F, 0x00040F}, {0x000461, 0x000460}, {0x000463, 0x000462}, {0x000465, 0x000464},
+    {0x000467, 0x000466}, {0x000469, 0x000468}, {0x00046B, 0x00046A}, {0x00046D, 0x00046C}, {0x00046F, 0x00046E},
+    {0x000471, 0x000470}, {0x000473, 0x000472}, {0x000475, 0x000474}, {0x000477, 0x000476}, {0x000479, 0x000478},
+    {0x00047B, 0x00047A}, {0x00047D, 0x00047C}, {0x00047F, 0x00047E}, {0x000481, 0x000480}, {0x00048B, 0x00048A},
+    {0x00048D, 0x00048C}, {0x00048F, 0x00048E}, {0x000491, 0x000490}, {0x000493, 0x000492}, {0x000495, 0x000494},
+    {0x000497, 0x000496}, {0x000499, 0x000498}, {0x00049B, 0x00049A}, {0x00049D, 0x00049C}, {0x00049F, 0x00049E},
+    {0x0004A1, 0x0004A0}, {0x0004A3, 0x0004A2}, {0x0004A5, 0x0004A4}, {0x0004A7, 0x0004A6}, {0x0004A9, 0x0004A8},
+    {0x0004AB, 0x0004AA}, {0x0004AD, 0x0004AC}, {0x0004AF, 0x0004AE}, {0x0004B1, 0x0004B0}, {0x0004B3, 0x0004B2},
+    {0x0004B5, 0x0004B4}, {0x0004B7, 0x0004B6}, {0x0004B9, 0x0004B8}, {0x0004BB, 0x0004BA}, {0x0004BD, 0x0004BC},
+    {0x0004BF, 0x0004BE}, {0x0004C2, 0x0004C1}, {0x0004C4, 0x0004C3}, {0x0004C6, 0x0004C5}, {0x0004C8, 0x0004C7},
+    {0x0004CA, 0x0004C9}, {0x0004CC, 0x0004CB}, {0x0004CE, 0x0004CD}, {0x0004CF, 0x0004C0}, {0x0004D1, 0x0004D0},
+    {0x0004D3, 0x0004D2}, {0x0004D5, 0x0004D4}, {0x0004D7, 0x0004D6}, {0x0004D9, 0x0004D8}, {0x0004DB, 0x0004DA},
+    {0x0004DD, 0x0004DC}, {0x0004DF, 0x0004DE}, {0x0004E1, 0x0004E0}, {0x0004E3, 0x0004E2}, {0x0004E5, 0x0004E4},
+    {0x0004E7, 0x0004E6}, {0x0004E9, 0x0004E8}, {0x0004EB, 0x0004EA}, {0x0004ED, 0x0004EC}, {0x0004EF, 0x0004EE},
+    {0x0004F1, 0x0004F0}, {0x0004F3, 0x0004F2}, {0x0004F5, 0x0004F4}, {0x0004F7, 0x0004F6}, {0x0004F9, 0x0004F8},
+    {0x0004FB, 0x0004FA}, {0x0004FD, 0x0004FC}, {0x0004FF, 0x0004FE}, {0x000501, 0x000500}, {0x000503, 0x000502},
+    {0x000505, 0x000504}, {0x000507, 0x000506}, {0x000509, 0x000508}, {0x00050B, 0x00050A}, {0x00050D, 0x00050C},
+    {0x00050F, 0x00050E}, {0x000511, 0x000510}, {0x000513, 0x000512}, {0x000515, 0x000514}, {0x000517, 0x000516},
+    {0x000519, 0x000518}, {0x00051B, 0x00051A}, {0x00051D, 0x00051C}, {0x00051F, 0x00051E}, {0x000521, 0x000520},
+    {0x000523, 0x000522}, {0x000525, 0x000524}, {0x000527, 0x000526}, {0x000529, 0x000528}, {0x00052B, 0x00052A},
+    {0x00052D, 0x00052C}, {0x00052F, 0x00052E}, {0x000561, 0x000531}, {0x000562, 0x000532}, {0x000563, 0x000533},
+    {0x000564, 0x000534}, {0x000565, 0x000535}, {0x000566, 0x000536}, {0x000567, 0x000537}, {0x000568, 0x000538},
+    {0x000569, 0x000539}, {0x00056A, 0x00053A}, {0x00056B, 0x00053B}, {0x00056C, 0x00053C}, {0x00056D, 0x00053D},
+    {0x00056E, 0x00053E}, {0x00056F, 0x00053F}, {0x000570, 0x000540}, {0x000571, 0x000541}, {0x000572, 0x000542},
+    {0x000573, 0x000543}, {0x000574, 0x000544}, {0x000575, 0x000545}, {0x000576, 0x000546}, {0x000577, 0x000547},
+    {0x000578, 0x000548}, {0x000579, 0x000549}, {0x00057A, 0x00054A}, {0x00057B, 0x00054B}, {0x00057C, 0x00054C},
+    {0x00057D, 0x00054D}, {0x00057E, 0x00054E}, {0x00057F, 0x00054F}, {0x000580, 0x000550}, {0x000581, 0x000551},
+    {0x000582, 0x000552}, {0x000583, 0x000553}, {0x000584, 0x000554}, {0x000585, 0x000555}, {0x000586, 0x000556},
+    {0x0010D0, 0x001C90}, {0x0010D1, 0x001C91}, {0x0010D2, 0x001C92}, {0x0010D3, 0x001C93}, {0x0010D4, 0x001C94},
+    {0x0010D5, 0x001C95}, {0x0010D6, 0x001C96}, {0x0010D7, 0x001C97}, {0x0010D8, 0x001C98}, {0x0010D9, 0x001C99},
+    {0x0010DA, 0x001C9A}, {0x0010DB, 0x001C9B}, {0x0010DC, 0x001C9C}, {0x0010DD, 0x001C9D}, {0x0010DE, 0x001C9E},
+    {0x0010DF, 0x001C9F}, {0x0010E0, 0x001CA0}, {0x0010E1, 0x001CA1}, {0x0010E2, 0x001CA2}, {0x0010E3, 0x001CA3},
+    {0x0010E4, 0x001CA4}, {0x0010E5, 0x001CA5}, {0x0010E6, 0x001CA6}, {0x0010E7, 0x001CA7}, {0x0010E8, 0x001CA8},
+    {0x0010E9, 0x001CA9}, {0x0010EA, 0x001CAA}, {0x0010EB, 0x001CAB}, {0x0010EC, 0x001CAC}, {0x0010ED, 0x001CAD},
+    {0x0010EE, 0x001CAE}, {0x0010EF, 0x001CAF}, {0x0010F0, 0x001CB0}, {0x0010F1, 0x001CB1}, {0x0010F2, 0x001CB2},
+    {0x0010F3, 0x001CB3}, {0x0010F4, 0x001CB4}, {0x0010F5, 0x001CB5}, {0x0010F6, 0x001CB6}, {0x0010F7, 0x001CB7},
+    {0x0010F8, 0x001CB8}, {0x0010F9, 0x001CB9}, {0x0010FA, 0x001CBA}, {0x0010FD, 0x001CBD}, {0x0010FE, 0x001CBE},
+    {0x0010FF, 0x001CBF}, {0x0013F8, 0x0013F0}, {0x0013F9, 0x0013F1}, {0x0013FA, 0x0013F2}, {0x0013FB, 0x0013F3},
+    {0x0013FC, 0x0013F4}, {0x0013FD, 0x0013F5}, {0x001C80, 0x000412}, {0x001C81, 0x000414}, {0x001C82, 0x00041E},
+    {0x001C83, 0x000421}, {0x001C84, 0x000422}, {0x001C85, 0x000422}, {0x001C86, 0x00042A}, {0x001C87, 0x000462},
+    {0x001C88, 0x00A64A}, {0x001D79, 0x00A77D}, {0x001D7D, 0x002C63}, {0x001D8E, 0x00A7C6}, {0x001E01, 0x001E00},
+    {0x001E03, 0x001E02}, {0x001E05, 0x001E04}, {0x001E07, 0x001E06}, {0x001E09, 0x001E08}, {0x001E0B, 0x001E0A},
+    {0x001E0D, 0x001E0C}, {0x001E0F, 0x001E0E}, {0x001E11, 0x001E10}, {0x001E13, 0x001E12}, {0x001E15, 0x001E14},
+    {0x001E17, 0x001E16}, {0x001E19, 0x001E18}, {0x001E1B, 0x001E1A}, {0x001E1D, 0x001E1C}, {0x001E1F, 0x001E1E},
+    {0x001E21, 0x001E20}, {0x001E23, 0x001E22}, {0x001E25, 0x001E24}, {0x001E27, 0x001E26}, {0x001E29, 0x001E28},
+    {0x001E2B, 0x001E2A}, {0x001E2D, 0x001E2C}, {0x001E2F, 0x001E2E}, {0x001E31, 0x001E30}, {0x001E33, 0x001E32},
+    {0x001E35, 0x001E34}, {0x001E37, 0x001E36}, {0x001E39, 0x001E38}, {0x001E3B, 0x001E3A}, {0x001E3D, 0x001E3C},
+    {0x001E3F, 0x001E3E}, {0x001E41, 0x001E40}, {0x001E43, 0x001E42}, {0x001E45, 0x001E44}, {0x001E47, 0x001E46},
+    {0x001E49, 0x001E48}, {0x001E4B, 0x001E4A}, {0x001E4D, 0x001E4C}, {0x001E4F, 0x001E4E}, {0x001E51, 0x001E50},
+    {0x001E53, 0x001E52}, {0x001E55, 0x001E54}, {0x001E57, 0x001E56}, {0x001E59, 0x001E58}, {0x001E5B, 0x001E5A},
+    {0x001E5D, 0x001E5C}, {0x001E5F, 0x001E5E}, {0x001E61, 0x001E60}, {0x001E63, 0x001E62}, {0x001E65, 0x001E64},
+    {0x001E67, 0x001E66}, {0x001E69, 0x001E68}, {0x001E6B, 0x001E6A}, {0x001E6D, 0x001E6C}, {0x001E6F, 0x001E6E},
+    {0x001E71, 0x001E70}, {0x001E73, 0x001E72}, {0x001E75, 0x001E74}, {0x001E77, 0x001E76}, {0x001E79, 0x001E78},
+    {0x001E7B, 0x001E7A}, {0x001E7D, 0x001E7C}, {0x001E7F, 0x001E7E}, {0x001E81, 0x001E80}, {0x001E83, 0x001E82},
+    {0x001E85, 0x001E84}, {0x001E87, 0x001E86}, {0x001E89, 0x001E88}, {0x001E8B, 0x001E8A}, {0x001E8D, 0x001E8C},
+    {0x001E8F, 0x001E8E}, {0x001E91, 0x001E90}, {0x001E93, 0x001E92}, {0x001E95, 0x001E94}, {0x001E9B, 0x001E60},
+    {0x001EA1, 0x001EA0}, {0x001EA3, 0x001EA2}, {0x001EA5, 0x001EA4}, {0x001EA7, 0x001EA6}, {0x001EA9, 0x001EA8},
+    {0x001EAB, 0x001EAA}, {0x001EAD, 0x001EAC}, {0x001EAF, 0x001EAE}, {0x001EB1, 0x001EB0}, {0x001EB3, 0x001EB2},
+    {0x001EB5, 0x001EB4}, {0x001EB7, 0x001EB6}, {0x001EB9, 0x001EB8}, {0x001EBB, 0x001EBA}, {0x001EBD, 0x001EBC},
+    {0x001EBF, 0x001EBE}, {0x001EC1, 0x001EC0}, {0x001EC3, 0x001EC2}, {0x001EC5, 0x001EC4}, {0x001EC7, 0x001EC6},
+    {0x001EC9, 0x001EC8}, {0x001ECB, 0x001ECA}, {0x001ECD, 0x001ECC}, {0x001ECF, 0x001ECE}, {0x001ED1, 0x001ED0},
+    {0x001ED3, 0x001ED2}, {0x001ED5, 0x001ED4}, {0x001ED7, 0x001ED6}, {0x001ED9, 0x001ED8}, {0x001EDB, 0x001EDA},
+    {0x001EDD, 0x001EDC}, {0x001EDF, 0x001EDE}, {0x001EE1, 0x001EE0}, {0x001EE3, 0x001EE2}, {0x001EE5, 0x001EE4},
+    {0x001EE7, 0x001EE6}, {0x001EE9, 0x001EE8}, {0x001EEB, 0x001EEA}, {0x001EED, 0x001EEC}, {0x001EEF, 0x001EEE},
+    {0x001EF1, 0x001EF0}, {0x001EF3, 0x001EF2}, {0x001EF5, 0x001EF4}, {0x001EF7, 0x001EF6}, {0x001EF9, 0x001EF8},
+    {0x001EFB, 0x001EFA}, {0x001EFD, 0x001EFC}, {0x001EFF, 0x001EFE}, {0x001F00, 0x001F08}, {0x001F01, 0x001F09},
+    {0x001F02, 0x001F0A}, {0x001F03, 0x001F0B}, {0x001F04, 0x001F0C}, {0x001F05, 0x001F0D}, {0x001F06, 0x001F0E},
+    {0x001F07, 0x001F0F}, {0x001F10, 0x001F18}, {0x001F11, 0x001F19}, {0x001F12, 0x001F1A}, {0x001F13, 0x001F1B},
+    {0x001F14, 0x001F1C}, {0x001F15, 0x001F1D}, {0x001F20, 0x001F28}, {0x001F21, 0x001F29}, {0x001F22, 0x001F2A},
+    {0x001F23, 0x001F2B}, {0x001F24, 0x001F2C}, {0x001F25, 0x001F2D}, {0x001F26, 0x001F2E}, {0x001F27, 0x001F2F},
+    {0x001F30, 0x001F38}, {0x001F31, 0x001F39}, {0x001F32, 0x001F3A}, {0x001F33, 0x001F3B}, {0x001F34, 0x001F3C},
+    {0x001F35, 0x001F3D}, {0x001F36, 0x001F3E}, {0x001F37, 0x001F3F}, {0x001F40, 0x001F48}, {0x001F41, 0x001F49},
+    {0x001F42, 0x001F4A}, {0x001F43, 0x001F4B}, {0x001F44, 0x001F4C}, {0x001F45, 0x001F4D}, {0x001F51, 0x001F59},
+    {0x001F53, 0x001F5B}, {0x001F55, 0x001F5D}, {0x001F57, 0x001F5F}, {0x001F60, 0x001F68}, {0x001F61, 0x001F69},
+    {0x001F62, 0x001F6A}, {0x001F63, 0x001F6B}, {0x001F64, 0x001F6C}, {0x001F65, 0x001F6D}, {0x001F66, 0x001F6E},
+    {0x001F67, 0x001F6F}, {0x001F70, 0x001FBA}, {0x001F71, 0x001FBB}, {0x001F72, 0x001FC8}, {0x001F73, 0x001FC9},
+    {0x001F74, 0x001FCA}, {0x001F75, 0x001FCB}, {0x001F76, 0x001FDA}, {0x001F77, 0x001FDB}, {0x001F78, 0x001FF8},
+    {0x001F79, 0x001FF9}, {0x001F7A, 0x001FEA}, {0x001F7B, 0x001FEB}, {0x001F7C, 0x001FFA}, {0x001F7D, 0x001FFB},
+    {0x001F80, 0x001F88}, {0x001F81, 0x001F89}, {0x001F82, 0x001F8A}, {0x001F83, 0x001F8B}, {0x001F84, 0x001F8C},
+    {0x001F85, 0x001F8D}, {0x001F86, 0x001F8E}, {0x001F87, 0x001F8F}, {0x001F90, 0x001F98}, {0x001F91, 0x001F99},
+    {0x001F92, 0x001F9A}, {0x001F93, 0x001F9B}, {0x001F94, 0x001F9C}, {0x001F95, 0x001F9D}, {0x001F96, 0x001F9E},
+    {0x001F97, 0x001F9F}, {0x001FA0, 0x001FA8}, {0x001FA1, 0x001FA9}, {0x001FA2, 0x001FAA}, {0x001FA3, 0x001FAB},
+    {0x001FA4, 0x001FAC}, {0x001FA5, 0x001FAD}, {0x001FA6, 0x001FAE}, {0x001FA7, 0x001FAF}, {0x001FB0, 0x001FB8},
+    {0x001FB1, 0x001FB9}, {0x001FB3, 0x001FBC}, {0x001FBE, 0x000399}, {0x001FC3, 0x001FCC}, {0x001FD0, 0x001FD8},
+    {0x001FD1, 0x001FD9}, {0x001FE0, 0x001FE8}, {0x001FE1, 0x001FE9}, {0x001FE5, 0x001FEC}, {0x001FF3, 0x001FFC},
+    {0x00214E, 0x002132}, {0x002170, 0x002160}, {0x002171, 0x002161}, {0x002172, 0x002162}, {0x002173, 0x002163},
+    {0x002174, 0x002164}, {0x002175, 0x002165}, {0x002176, 0x002166}, {0x002177, 0x002167}, {0x002178, 0x002168},
+    {0x002179, 0x002169}, {0x00217A, 0x00216A}, {0x00217B, 0x00216B}, {0x00217C, 0x00216C}, {0x00217D, 0x00216D},
+    {0x00217E, 0x00216E}, {0x00217F, 0x00216F}, {0x002184, 0x002183}, {0x0024D0, 0x0024B6}, {0x0024D1, 0x0024B7},
+    {0x0024D2, 0x0024B8}, {0x0024D3, 0x0024B9}, {0x0024D4, 0x0024BA}, {0x0024D5, 0x0024BB}, {0x0024D6, 0x0024BC},
+    {0x0024D7, 0x0024BD}, {0x0024D8, 0x0024BE}, {0x0024D9, 0x0024BF}, {0x0024DA, 0x0024C0}, {0x0024DB, 0x0024C1},
+    {0x0024DC, 0x0024C2}, {0x0024DD, 0x0024C3}, {0x0024DE, 0x0024C4}, {0x0024DF, 0x0024C5}, {0x0024E0, 0x0024C6},
+    {0x0024E1, 0x0024C7}, {0x0024E2, 0x0024C8}, {0x0024E3, 0x0024C9}, {0x0024E4, 0x0024CA}, {0x0024E5, 0x0024CB},
+    {0x0024E6, 0x0024CC}, {0x0024E7, 0x0024CD}, {0x0024E8, 0x0024CE}, {0x0024E9, 0x0024CF}, {0x002C30, 0x002C00},
+    {0x002C31, 0x002C01}, {0x002C32, 0x002C02}, {0x002C33, 0x002C03}, {0x002C34, 0x002C04}, {0x002C35, 0x002C05},
+    {0x002C36, 0x002C06}, {0x002C37, 0x002C07}, {0x002C38, 0x002C08}, {0x002C39, 0x002C09}, {0x002C3A, 0x002C0A},
+    {0x002C3B, 0x002C0B}, {0x002C3C, 0x002C0C}, {0x002C3D, 0x002C0D}, {0x002C3E, 0x002C0E}, {0x002C3F, 0x002C0F},
+    {0x002C40, 0x002C10}, {0x002C41, 0x002C11}, {0x002C42, 0x002C12}, {0x002C43, 0x002C13}, {0x002C44, 0x002C14},
+    {0x002C45, 0x002C15}, {0x002C46, 0x002C16}, {0x002C47, 0x002C17}, {0x002C48, 0x002C18}, {0x002C49, 0x002C19},
+    {0x002C4A, 0x002C1A}, {0x002C4B, 0x002C1B}, {0x002C4C, 0x002C1C}, {0x002C4D, 0x002C1D}, {0x002C4E, 0x002C1E},
+    {0x002C4F, 0x002C1F}, {0x002C50, 0x002C20}, {0x002C51, 0x002C21}, {0x002C52, 0x002C22}, {0x002C53, 0x002C23},
+    {0x002C54, 0x002C24}, {0x002C55, 0x002C25}, {0x002C56, 0x002C26}, {0x002C57, 0x002C27}, {0x002C58, 0x002C28},
+    {0x002C59, 0x002C29}, {0x002C5A, 0x002C2A}, {0x002C5B, 0x002C2B}, {0x002C5C, 0x002C2C}, {0x002C5D, 0x002C2D},
+    {0x002C5E, 0x002C2E}, {0x002C5F, 0x002C2F}, {0x002C61, 0x002C60}, {0x002C65, 0x00023A}, {0x002C66, 0x00023E},
+    {0x002C68, 0x002C67}, {0x002C6A, 0x002C69}, {0x002C6C, 0x002C6B}, {0x002C73, 0x002C72}, {0x002C76, 0x002C75},
+    {0x002C81, 0x002C80}, {0x002C83, 0x002C82}, {0x002C85, 0x002C84}, {0x002C87, 0x002C86}, {0x002C89, 0x002C88},
+    {0x002C8B, 0x002C8A}, {0x002C8D, 0x002C8C}, {0x002C8F, 0x002C8E}, {0x002C91, 0x002C90}, {0x002C93, 0x002C92},
+    {0x002C95, 0x002C94}, {0x002C97, 0x002C96}, {0x002C99, 0x002C98}, {0x002C9B, 0x002C9A}, {0x002C9D, 0x002C9C},
+    {0x002C9F, 0x002C9E}, {0x002CA1, 0x002CA0}, {0x002CA3, 0x002CA2}, {0x002CA5, 0x002CA4}, {0x002CA7, 0x002CA6},
+    {0x002CA9, 0x002CA8}, {0x002CAB, 0x002CAA}, {0x002CAD, 0x002CAC}, {0x002CAF, 0x002CAE}, {0x002CB1, 0x002CB0},
+    {0x002CB3, 0x002CB2}, {0x002CB5, 0x002CB4}, {0x002CB7, 0x002CB6}, {0x002CB9, 0x002CB8}, {0x002CBB, 0x002CBA},
+    {0x002CBD, 0x002CBC}, {0x002CBF, 0x002CBE}, {0x002CC1, 0x002CC0}, {0x002CC3, 0x002CC2}, {0x002CC5, 0x002CC4},
+    {0x002CC7, 0x002CC6}, {0x002CC9, 0x002CC8}, {0x002CCB, 0x002CCA}, {0x002CCD, 0x002CCC}, {0x002CCF, 0x002CCE},
+    {0x002CD1, 0x002CD0}, {0x002CD3, 0x002CD2}, {0x002CD5, 0x002CD4}, {0x002CD7, 0x002CD6}, {0x002CD9, 0x002CD8},
+    {0x002CDB, 0x002CDA}, {0x002CDD, 0x002CDC}, {0x002CDF, 0x002CDE}, {0x002CE1, 0x002CE0}, {0x002CE3, 0x002CE2},
+    {0x002CEC, 0x002CEB}, {0x002CEE, 0x002CED}, {0x002CF3, 0x002CF2}, {0x002D00, 0x0010A0}, {0x002D01, 0x0010A1},
+    {0x002D02, 0x0010A2}, {0x002D03, 0x0010A3}, {0x002D04, 0x0010A4}, {0x002D05, 0x0010A5}, {0x002D06, 0x0010A6},
+    {0x002D07, 0x0010A7}, {0x002D08, 0x0010A8}, {0x002D09, 0x0010A9}, {0x002D0A, 0x0010AA}, {0x002D0B, 0x0010AB},
+    {0x002D0C, 0x0010AC}, {0x002D0D, 0x0010AD}, {0x002D0E, 0x0010AE}, {0x002D0F, 0x0010AF}, {0x002D10, 0x0010B0},
+    {0x002D11, 0x0010B1}, {0x002D12, 0x0010B2}, {0x002D13, 0x0010B3}, {0x002D14, 0x0010B4}, {0x002D15, 0x0010B5},
+    {0x002D16, 0x0010B6}, {0x002D17, 0x0010B7}, {0x002D18, 0x0010B8}, {0x002D19, 0x0010B9}, {0x002D1A, 0x0010BA},
+    {0x002D1B, 0x0010BB}, {0x002D1C, 0x0010BC}, {0x002D1D, 0x0010BD}, {0x002D1E, 0x0010BE}, {0x002D1F, 0x0010BF},
+    {0x002D20, 0x0010C0}, {0x002D21, 0x0010C1}, {0x002D22, 0x0010C2}, {0x002D23, 0x0010C3}, {0x002D24, 0x0010C4},
+    {0x002D25, 0x0010C5}, {0x002D27, 0x0010C7}, {0x002D2D, 0x0010CD}, {0x00A641, 0x00A640}, {0x00A643, 0x00A642},
+    {0x00A645, 0x00A644}, {0x00A647, 0x00A646}, {0x00A649, 0x00A648}, {0x00A64B, 0x00A64A}, {0x00A64D, 0x00A64C},
+    {0x00A64F, 0x00A64E}, {0x00A651, 0x00A650}, {0x00A653, 0x00A652}, {0x00A655, 0x00A654}, {0x00A657, 0x00A656},
+    {0x00A659, 0x00A658}, {0x00A65B, 0x00A65A}, {0x00A65D, 0x00A65C}, {0x00A65F, 0x00A65E}, {0x00A661, 0x00A660},
+    {0x00A663, 0x00A662}, {0x00A665, 0x00A664}, {0x00A667, 0x00A666}, {0x00A669, 0x00A668}, {0x00A66B, 0x00A66A},
+    {0x00A66D, 0x00A66C}, {0x00A681, 0x00A680}, {0x00A683, 0x00A682}, {0x00A685, 0x00A684}, {0x00A687, 0x00A686},
+    {0x00A689, 0x00A688}, {0x00A68B, 0x00A68A}, {0x00A68D, 0x00A68C}, {0x00A68F, 0x00A68E}, {0x00A691, 0x00A690},
+    {0x00A693, 0x00A692}, {0x00A695, 0x00A694}, {0x00A697, 0x00A696}, {0x00A699, 0x00A698}, {0x00A69B, 0x00A69A},
+    {0x00A723, 0x00A722}, {0x00A725, 0x00A724}, {0x00A727, 0x00A726}, {0x00A729, 0x00A728}, {0x00A72B, 0x00A72A},
+    {0x00A72D, 0x00A72C}, {0x00A72F, 0x00A72E}, {0x00A733, 0x00A732}, {0x00A735, 0x00A734}, {0x00A737, 0x00A736},
+    {0x00A739, 0x00A738}, {0x00A73B, 0x00A73A}, {0x00A73D, 0x00A73C}, {0x00A73F, 0x00A73E}, {0x00A741, 0x00A740},
+    {0x00A743, 0x00A742}, {0x00A745, 0x00A744}, {0x00A747, 0x00A746}, {0x00A749, 0x00A748}, {0x00A74B, 0x00A74A},
+    {0x00A74D, 0x00A74C}, {0x00A74F, 0x00A74E}, {0x00A751, 0x00A750}, {0x00A753, 0x00A752}, {0x00A755, 0x00A754},
+    {0x00A757, 0x00A756}, {0x00A759, 0x00A758}, {0x00A75B, 0x00A75A}, {0x00A75D, 0x00A75C}, {0x00A75F, 0x00A75E},
+    {0x00A761, 0x00A760}, {0x00A763, 0x00A762}, {0x00A765, 0x00A764}, {0x00A767, 0x00A766}, {0x00A769, 0x00A768},
+    {0x00A76B, 0x00A76A}, {0x00A76D, 0x00A76C}, {0x00A76F, 0x00A76E}, {0x00A77A, 0x00A779}, {0x00A77C, 0x00A77B},
+    {0x00A77F, 0x00A77E}, {0x00A781, 0x00A780}, {0x00A783, 0x00A782}, {0x00A785, 0x00A784}, {0x00A787, 0x00A786},
+    {0x00A78C, 0x00A78B}, {0x00A791, 0x00A790}, {0x00A793, 0x00A792}, {0x00A794, 0x00A7C4}, {0x00A797, 0x00A796},
+    {0x00A799, 0x00A798}, {0x00A79B, 0x00A79A}, {0x00A79D, 0x00A79C}, {0x00A79F, 0x00A79E}, {0x00A7A1, 0x00A7A0},
+    {0x00A7A3, 0x00A7A2}, {0x00A7A5, 0x00A7A4}, {0x00A7A7, 0x00A7A6}, {0x00A7A9, 0x00A7A8}, {0x00A7B5, 0x00A7B4},
+    {0x00A7B7, 0x00A7B6}, {0x00A7B9, 0x00A7B8}, {0x00A7BB, 0x00A7BA}, {0x00A7BD, 0x00A7BC}, {0x00A7BF, 0x00A7BE},
+    {0x00A7C1, 0x00A7C0}, {0x00A7C3, 0x00A7C2}, {0x00A7C8, 0x00A7C7}, {0x00A7CA, 0x00A7C9}, {0x00A7D1, 0x00A7D0},
+    {0x00A7D7, 0x00A7D6}, {0x00A7D9, 0x00A7D8}, {0x00A7F6, 0x00A7F5}, {0x00AB53, 0x00A7B3}, {0x00AB70, 0x0013A0},
+    {0x00AB71, 0x0013A1}, {0x00AB72, 0x0013A2}, {0x00AB73, 0x0013A3}, {0x00AB74, 0x0013A4}, {0x00AB75, 0x0013A5},
+    {0x00AB76, 0x0013A6}, {0x00AB77, 0x0013A7}, {0x00AB78, 0x0013A8}, {0x00AB79, 0x0013A9}, {0x00AB7A, 0x0013AA},
+    {0x00AB7B, 0x0013AB}, {0x00AB7C, 0x0013AC}, {0x00AB7D, 0x0013AD}, {0x00AB7E, 0x0013AE}, {0x00AB7F, 0x0013AF},
+    {0x00AB80, 0x0013B0}, {0x00AB81, 0x0013B1}, {0x00AB82, 0x0013B2}, {0x00AB83, 0x0013B3}, {0x00AB84, 0x0013B4},
+    {0x00AB85, 0x0013B5}, {0x00AB86, 0x0013B6}, {0x00AB87, 0x0013B7}, {0x00AB88, 0x0013B8}, {0x00AB89, 0x0013B9},
+    {0x00AB8A, 0x0013BA}, {0x00AB8B, 0x0013BB}, {0x00AB8C, 0x0013BC}, {0x00AB8D, 0x0013BD}, {0x00AB8E, 0x0013BE},
+    {0x00AB8F, 0x0013BF}, {0x00AB90, 0x0013C0}, {0x00AB91, 0x0013C1}, {0x00AB92, 0x0013C2}, {0x00AB93, 0x0013C3},
+    {0x00AB94, 0x0013C4}, {0x00AB95, 0x0013C5}, {0x00AB96, 0x0013C6}, {0x00AB97, 0x0013C7}, {0x00AB98, 0x0013C8},
+    {0x00AB99, 0x0013C9}, {0x00AB9A, 0x0013CA}, {0x00AB9B, 0x0013CB}, {0x00AB9C, 0x0013CC}, {0x00AB9D, 0x0013CD},
+    {0x00AB9E, 0x0013CE}, {0x00AB9F, 0x0013CF}, {0x00ABA0, 0x0013D0}, {0x00ABA1, 0x0013D1}, {0x00ABA2, 0x0013D2},
+    {0x00ABA3, 0x0013D3}, {0x00ABA4, 0x0013D4}, {0x00ABA5, 0x0013D5}, {0x00ABA6, 0x0013D6}, {0x00ABA7, 0x0013D7},
+    {0x00ABA8, 0x0013D8}, {0x00ABA9, 0x0013D9}, {0x00ABAA, 0x0013DA}, {0x00ABAB, 0x0013DB}, {0x00ABAC, 0x0013DC},
+    {0x00ABAD, 0x0013DD}, {0x00ABAE, 0x0013DE}, {0x00ABAF, 0x0013DF}, {0x00ABB0, 0x0013E0}, {0x00ABB1, 0x0013E1},
+    {0x00ABB2, 0x0013E2}, {0x00ABB3, 0x0013E3}, {0x00ABB4, 0x0013E4}, {0x00ABB5, 0x0013E5}, {0x00ABB6, 0x0013E6},
+    {0x00ABB7, 0x0013E7}, {0x00ABB8, 0x0013E8}, {0x00ABB9, 0x0013E9}, {0x00ABBA, 0x0013EA}, {0x00ABBB, 0x0013EB},
+    {0x00ABBC, 0x0013EC}, {0x00ABBD, 0x0013ED}, {0x00ABBE, 0x0013EE}, {0x00ABBF, 0x0013EF}, {0x00FF41, 0x00FF21},
+    {0x00FF42, 0x00FF22}, {0x00FF43, 0x00FF23}, {0x00FF44, 0x00FF24}, {0x00FF45, 0x00FF25}, {0x00FF46, 0x00FF26},
+    {0x00FF47, 0x00FF27}, {0x00FF48, 0x00FF28}, {0x00FF49, 0x00FF29}, {0x00FF4A, 0x00FF2A}, {0x00FF4B, 0x00FF2B},
+    {0x00FF4C, 0x00FF2C}, {0x00FF4D, 0x00FF2D}, {0x00FF4E, 0x00FF2E}, {0x00FF4F, 0x00FF2F}, {0x00FF50, 0x00FF30},
+    {0x00FF51, 0x00FF31}, {0x00FF52, 0x00FF32}, {0x00FF53, 0x00FF33}, {0x00FF54, 0x00FF34}, {0x00FF55, 0x00FF35},
+    {0x00FF56, 0x00FF36}, {0x00FF57, 0x00FF37}, {0x00FF58, 0x00FF38}, {0x00FF59, 0x00FF39}, {0x00FF5A, 0x00FF3A},
+    {0x010428, 0x010400}, {0x010429, 0x010401}, {0x01042A, 0x010402}, {0x01042B, 0x010403}, {0x01042C, 0x010404},
+    {0x01042D, 0x010405}, {0x01042E, 0x010406}, {0x01042F, 0x010407}, {0x010430, 0x010408}, {0x010431, 0x010409},
+    {0x010432, 0x01040A}, {0x010433, 0x01040B}, {0x010434, 0x01040C}, {0x010435, 0x01040D}, {0x010436, 0x01040E},
+    {0x010437, 0x01040F}, {0x010438, 0x010410}, {0x010439, 0x010411}, {0x01043A, 0x010412}, {0x01043B, 0x010413},
+    {0x01043C, 0x010414}, {0x01043D, 0x010415}, {0x01043E, 0x010416}, {0x01043F, 0x010417}, {0x010440, 0x010418},
+    {0x010441, 0x010419}, {0x010442, 0x01041A}, {0x010443, 0x01041B}, {0x010444, 0x01041C}, {0x010445, 0x01041D},
+    {0x010446, 0x01041E}, {0x010447, 0x01041F}, {0x010448, 0x010420}, {0x010449, 0x010421}, {0x01044A, 0x010422},
+    {0x01044B, 0x010423}, {0x01044C, 0x010424}, {0x01044D, 0x010425}, {0x01044E, 0x010426}, {0x01044F, 0x010427},
+    {0x0104D8, 0x0104B0}, {0x0104D9, 0x0104B1}, {0x0104DA, 0x0104B2}, {0x0104DB, 0x0104B3}, {0x0104DC, 0x0104B4},
+    {0x0104DD, 0x0104B5}, {0x0104DE, 0x0104B6}, {0x0104DF, 0x0104B7}, {0x0104E0, 0x0104B8}, {0x0104E1, 0x0104B9},
+    {0x0104E2, 0x0104BA}, {0x0104E3, 0x0104BB}, {0x0104E4, 0x0104BC}, {0x0104E5, 0x0104BD}, {0x0104E6, 0x0104BE},
+    {0x0104E7, 0x0104BF}, {0x0104E8, 0x0104C0}, {0x0104E9, 0x0104C1}, {0x0104EA, 0x0104C2}, {0x0104EB, 0x0104C3},
+    {0x0104EC, 0x0104C4}, {0x0104ED, 0x0104C5}, {0x0104EE, 0x0104C6}, {0x0104EF, 0x0104C7}, {0x0104F0, 0x0104C8},
+    {0x0104F1, 0x0104C9}, {0x0104F2, 0x0104CA}, {0x0104F3, 0x0104CB}, {0x0104F4, 0x0104CC}, {0x0104F5, 0x0104CD},
+    {0x0104F6, 0x0104CE}, {0x0104F7, 0x0104CF}, {0x0104F8, 0x0104D0}, {0x0104F9, 0x0104D1}, {0x0104FA, 0x0104D2},
+    {0x0104FB, 0x0104D3}, {0x010597, 0x010570}, {0x010598, 0x010571}, {0x010599, 0x010572}, {0x01059A, 0x010573},
+    {0x01059B, 0x010574}, {0x01059C, 0x010575}, {0x01059D, 0x010576}, {0x01059E, 0x010577}, {0x01059F, 0x010578},
+    {0x0105A0, 0x010579}, {0x0105A1, 0x01057A}, {0x0105A3, 0x01057C}, {0x0105A4, 0x01057D}, {0x0105A5, 0x01057E},
+    {0x0105A6, 0x01057F}, {0x0105A7, 0x010580}, {0x0105A8, 0x010581}, {0x0105A9, 0x010582}, {0x0105AA, 0x010583},
+    {0x0105AB, 0x010584}, {0x0105AC, 0x010585}, {0x0105AD, 0x010586}, {0x0105AE, 0x010587}, {0x0105AF, 0x010588},
+    {0x0105B0, 0x010589}, {0x0105B1, 0x01058A}, {0x0105B3, 0x01058C}, {0x0105B4, 0x01058D}, {0x0105B5, 0x01058E},
+    {0x0105B6, 0x01058F}, {0x0105B7, 0x010590}, {0x0105B8, 0x010591}, {0x0105B9, 0x010592}, {0x0105BB, 0x010594},
+    {0x0105BC, 0x010595}, {0x010CC0, 0x010C80}, {0x010CC1, 0x010C81}, {0x010CC2, 0x010C82}, {0x010CC3, 0x010C83},
+    {0x010CC4, 0x010C84}, {0x010CC5, 0x010C85}, {0x010CC6, 0x010C86}, {0x010CC7, 0x010C87}, {0x010CC8, 0x010C88},
+    {0x010CC9, 0x010C89}, {0x010CCA, 0x010C8A}, {0x010CCB, 0x010C8B}, {0x010CCC, 0x010C8C}, {0x010CCD, 0x010C8D},
+    {0x010CCE, 0x010C8E}, {0x010CCF, 0x010C8F}, {0x010CD0, 0x010C90}, {0x010CD1, 0x010C91}, {0x010CD2, 0x010C92},
+    {0x010CD3, 0x010C93}, {0x010CD4, 0x010C94}, {0x010CD5, 0x010C95}, {0x010CD6, 0x010C96}, {0x010CD7, 0x010C97},
+    {0x010CD8, 0x010C98}, {0x010CD9, 0x010C99}, {0x010CDA, 0x010C9A}, {0x010CDB, 0x010C9B}, {0x010CDC, 0x010C9C},
+    {0x010CDD, 0x010C9D}, {0x010CDE, 0x010C9E}, {0x010CDF, 0x010C9F}, {0x010CE0, 0x010CA0}, {0x010CE1, 0x010CA1},
+    {0x010CE2, 0x010CA2}, {0x010CE3, 0x010CA3}, {0x010CE4, 0x010CA4}, {0x010CE5, 0x010CA5}, {0x010CE6, 0x010CA6},
+    {0x010CE7, 0x010CA7}, {0x010CE8, 0x010CA8}, {0x010CE9, 0x010CA9}, {0x010CEA, 0x010CAA}, {0x010CEB, 0x010CAB},
+    {0x010CEC, 0x010CAC}, {0x010CED, 0x010CAD}, {0x010CEE, 0x010CAE}, {0x010CEF, 0x010CAF}, {0x010CF0, 0x010CB0},
+    {0x010CF1, 0x010CB1}, {0x010CF2, 0x010CB2}, {0x0118C0, 0x0118A0}, {0x0118C1, 0x0118A1}, {0x0118C2, 0x0118A2},
+    {0x0118C3, 0x0118A3}, {0x0118C4, 0x0118A4}, {0x0118C5, 0x0118A5}, {0x0118C6, 0x0118A6}, {0x0118C7, 0x0118A7},
+    {0x0118C8, 0x0118A8}, {0x0118C9, 0x0118A9}, {0x0118CA, 0x0118AA}, {0x0118CB, 0x0118AB}, {0x0118CC, 0x0118AC},
+    {0x0118CD, 0x0118AD}, {0x0118CE, 0x0118AE}, {0x0118CF, 0x0118AF}, {0x0118D0, 0x0118B0}, {0x0118D1, 0x0118B1},
+    {0x0118D2, 0x0118B2}, {0x0118D3, 0x0118B3}, {0x0118D4, 0x0118B4}, {0x0118D5, 0x0118B5}, {0x0118D6, 0x0118B6},
+    {0x0118D7, 0x0118B7}, {0x0118D8, 0x0118B8}, {0x0118D9, 0x0118B9}, {0x0118DA, 0x0118BA}, {0x0118DB, 0x0118BB},
+    {0x0118DC, 0x0118BC}, {0x0118DD, 0x0118BD}, {0x0118DE, 0x0118BE}, {0x0118DF, 0x0118BF}, {0x016E60, 0x016E40},
+    {0x016E61, 0x016E41}, {0x016E62, 0x016E42}, {0x016E63, 0x016E43}, {0x016E64, 0x016E44}, {0x016E65, 0x016E45},
+    {0x016E66, 0x016E46}, {0x016E67, 0x016E47}, {0x016E68, 0x016E48}, {0x016E69, 0x016E49}, {0x016E6A, 0x016E4A},
+    {0x016E6B, 0x016E4B}, {0x016E6C, 0x016E4C}, {0x016E6D, 0x016E4D}, {0x016E6E, 0x016E4E}, {0x016E6F, 0x016E4F},
+    {0x016E70, 0x016E50}, {0x016E71, 0x016E51}, {0x016E72, 0x016E52}, {0x016E73, 0x016E53}, {0x016E74, 0x016E54},
+    {0x016E75, 0x016E55}, {0x016E76, 0x016E56}, {0x016E77, 0x016E57}, {0x016E78, 0x016E58}, {0x016E79, 0x016E59},
+    {0x016E7A, 0x016E5A}, {0x016E7B, 0x016E5B}, {0x016E7C, 0x016E5C}, {0x016E7D, 0x016E5D}, {0x016E7E, 0x016E5E},
+    {0x016E7F, 0x016E5F}, {0x01E922, 0x01E900}, {0x01E923, 0x01E901}, {0x01E924, 0x01E902}, {0x01E925, 0x01E903},
+    {0x01E926, 0x01E904}, {0x01E927, 0x01E905}, {0x01E928, 0x01E906}, {0x01E929, 0x01E907}, {0x01E92A, 0x01E908},
+    {0x01E92B, 0x01E909}, {0x01E92C, 0x01E90A}, {0x01E92D, 0x01E90B}, {0x01E92E, 0x01E90C}, {0x01E92F, 0x01E90D},
+    {0x01E930, 0x01E90E}, {0x01E931, 0x01E90F}, {0x01E932, 0x01E910}, {0x01E933, 0x01E911}, {0x01E934, 0x01E912},
+    {0x01E935, 0x01E913}, {0x01E936, 0x01E914}, {0x01E937, 0x01E915}, {0x01E938, 0x01E916}, {0x01E939, 0x01E917},
+    {0x01E93A, 0x01E918}, {0x01E93B, 0x01E919}, {0x01E93C, 0x01E91A}, {0x01E93D, 0x01E91B}, {0x01E93E, 0x01E91C},
+    {0x01E93F, 0x01E91D}, {0x01E940, 0x01E91E}, {0x01E941, 0x01E91F}, {0x01E942, 0x01E920}, {0x01E943, 0x01E921},
+};
+
+const std::initializer_list<range_nfd> unicode_ranges_nfd = {
+    // start, last, nfd
+    {0x000000, 0x000000, 0x000000}, {0x0000C0, 0x0000C5, 0x000041}, {0x0000C7, 0x0000C7, 0x000043},
+    {0x0000C8, 0x0000CB, 0x000045}, {0x0000CC, 0x0000CF, 0x000049}, {0x0000D1, 0x0000D1, 0x00004E},
+    {0x0000D2, 0x0000D6, 0x00004F}, {0x0000D9, 0x0000DC, 0x000055}, {0x0000DD, 0x0000DD, 0x000059},
+    {0x0000E0, 0x0000E5, 0x000061}, {0x0000E7, 0x0000E7, 0x000063}, {0x0000E8, 0x0000EB, 0x000065},
+    {0x0000EC, 0x0000EF, 0x000069}, {0x0000F1, 0x0000F1, 0x00006E}, {0x0000F2, 0x0000F6, 0x00006F},
+    {0x0000F9, 0x0000FC, 0x000075}, {0x0000FD, 0x0000FD, 0x000079}, {0x0000FF, 0x0000FF, 0x000079},
+    {0x000100, 0x000100, 0x000041}, {0x000101, 0x000101, 0x000061}, {0x000102, 0x000102, 0x000041},
+    {0x000103, 0x000103, 0x000061}, {0x000104, 0x000104, 0x000041}, {0x000105, 0x000105, 0x000061},
+    {0x000106, 0x000106, 0x000043}, {0x000107, 0x000107, 0x000063}, {0x000108, 0x000108, 0x000043},
+    {0x000109, 0x000109, 0x000063}, {0x00010A, 0x00010A, 0x000043}, {0x00010B, 0x00010B, 0x000063},
+    {0x00010C, 0x00010C, 0x000043}, {0x00010D, 0x00010D, 0x000063}, {0x00010E, 0x00010E, 0x000044},
+    {0x00010F, 0x00010F, 0x000064}, {0x000112, 0x000112, 0x000045}, {0x000113, 0x000113, 0x000065},
+    {0x000114, 0x000114, 0x000045}, {0x000115, 0x000115, 0x000065}, {0x000116, 0x000116, 0x000045},
+    {0x000117, 0x000117, 0x000065}, {0x000118, 0x000118, 0x000045}, {0x000119, 0x000119, 0x000065},
+    {0x00011A, 0x00011A, 0x000045}, {0x00011B, 0x00011B, 0x000065}, {0x00011C, 0x00011C, 0x000047},
+    {0x00011D, 0x00011D, 0x000067}, {0x00011E, 0x00011E, 0x000047}, {0x00011F, 0x00011F, 0x000067},
+    {0x000120, 0x000120, 0x000047}, {0x000121, 0x000121, 0x000067}, {0x000122, 0x000122, 0x000047},
+    {0x000123, 0x000123, 0x000067}, {0x000124, 0x000124, 0x000048}, {0x000125, 0x000125, 0x000068},
+    {0x000128, 0x000128, 0x000049}, {0x000129, 0x000129, 0x000069}, {0x00012A, 0x00012A, 0x000049},
+    {0x00012B, 0x00012B, 0x000069}, {0x00012C, 0x00012C, 0x000049}, {0x00012D, 0x00012D, 0x000069},
+    {0x00012E, 0x00012E, 0x000049}, {0x00012F, 0x00012F, 0x000069}, {0x000130, 0x000130, 0x000049},
+    {0x000134, 0x000134, 0x00004A}, {0x000135, 0x000135, 0x00006A}, {0x000136, 0x000136, 0x00004B},
+    {0x000137, 0x000137, 0x00006B}, {0x000139, 0x000139, 0x00004C}, {0x00013A, 0x00013A, 0x00006C},
+    {0x00013B, 0x00013B, 0x00004C}, {0x00013C, 0x00013C, 0x00006C}, {0x00013D, 0x00013D, 0x00004C},
+    {0x00013E, 0x00013E, 0x00006C}, {0x000143, 0x000143, 0x00004E}, {0x000144, 0x000144, 0x00006E},
+    {0x000145, 0x000145, 0x00004E}, {0x000146, 0x000146, 0x00006E}, {0x000147, 0x000147, 0x00004E},
+    {0x000148, 0x000148, 0x00006E}, {0x00014C, 0x00014C, 0x00004F}, {0x00014D, 0x00014D, 0x00006F},
+    {0x00014E, 0x00014E, 0x00004F}, {0x00014F, 0x00014F, 0x00006F}, {0x000150, 0x000150, 0x00004F},
+    {0x000151, 0x000151, 0x00006F}, {0x000154, 0x000154, 0x000052}, {0x000155, 0x000155, 0x000072},
+    {0x000156, 0x000156, 0x000052}, {0x000157, 0x000157, 0x000072}, {0x000158, 0x000158, 0x000052},
+    {0x000159, 0x000159, 0x000072}, {0x00015A, 0x00015A, 0x000053}, {0x00015B, 0x00015B, 0x000073},
+    {0x00015C, 0x00015C, 0x000053}, {0x00015D, 0x00015D, 0x000073}, {0x00015E, 0x00015E, 0x000053},
+    {0x00015F, 0x00015F, 0x000073}, {0x000160, 0x000160, 0x000053}, {0x000161, 0x000161, 0x000073},
+    {0x000162, 0x000162, 0x000054}, {0x000163, 0x000163, 0x000074}, {0x000164, 0x000164, 0x000054},
+    {0x000165, 0x000165, 0x000074}, {0x000168, 0x000168, 0x000055}, {0x000169, 0x000169, 0x000075},
+    {0x00016A, 0x00016A, 0x000055}, {0x00016B, 0x00016B, 0x000075}, {0x00016C, 0x00016C, 0x000055},
+    {0x00016D, 0x00016D, 0x000075}, {0x00016E, 0x00016E, 0x000055}, {0x00016F, 0x00016F, 0x000075},
+    {0x000170, 0x000170, 0x000055}, {0x000171, 0x000171, 0x000075}, {0x000172, 0x000172, 0x000055},
+    {0x000173, 0x000173, 0x000075}, {0x000174, 0x000174, 0x000057}, {0x000175, 0x000175, 0x000077},
+    {0x000176, 0x000176, 0x000059}, {0x000177, 0x000177, 0x000079}, {0x000178, 0x000178, 0x000059},
+    {0x000179, 0x000179, 0x00005A}, {0x00017A, 0x00017A, 0x00007A}, {0x00017B, 0x00017B, 0x00005A},
+    {0x00017C, 0x00017C, 0x00007A}, {0x00017D, 0x00017D, 0x00005A}, {0x00017E, 0x00017E, 0x00007A},
+    {0x0001A0, 0x0001A0, 0x00004F}, {0x0001A1, 0x0001A1, 0x00006F}, {0x0001AF, 0x0001AF, 0x000055},
+    {0x0001B0, 0x0001B0, 0x000075}, {0x0001CD, 0x0001CD, 0x000041}, {0x0001CE, 0x0001CE, 0x000061},
+    {0x0001CF, 0x0001CF, 0x000049}, {0x0001D0, 0x0001D0, 0x000069}, {0x0001D1, 0x0001D1, 0x00004F},
+    {0x0001D2, 0x0001D2, 0x00006F}, {0x0001D3, 0x0001D3, 0x000055}, {0x0001D4, 0x0001D4, 0x000075},
+    {0x0001D5, 0x0001D5, 0x000055}, {0x0001D6, 0x0001D6, 0x000075}, {0x0001D7, 0x0001D7, 0x000055},
+    {0x0001D8, 0x0001D8, 0x000075}, {0x0001D9, 0x0001D9, 0x000055}, {0x0001DA, 0x0001DA, 0x000075},
+    {0x0001DB, 0x0001DB, 0x000055}, {0x0001DC, 0x0001DC, 0x000075}, {0x0001DE, 0x0001DE, 0x000041},
+    {0x0001DF, 0x0001DF, 0x000061}, {0x0001E0, 0x0001E0, 0x000041}, {0x0001E1, 0x0001E1, 0x000061},
+    {0x0001E2, 0x0001E2, 0x0000C6}, {0x0001E3, 0x0001E3, 0x0000E6}, {0x0001E6, 0x0001E6, 0x000047},
+    {0x0001E7, 0x0001E7, 0x000067}, {0x0001E8, 0x0001E8, 0x00004B}, {0x0001E9, 0x0001E9, 0x00006B},
+    {0x0001EA, 0x0001EA, 0x00004F}, {0x0001EB, 0x0001EB, 0x00006F}, {0x0001EC, 0x0001EC, 0x00004F},
+    {0x0001ED, 0x0001ED, 0x00006F}, {0x0001EE, 0x0001EE, 0x0001B7}, {0x0001EF, 0x0001EF, 0x000292},
+    {0x0001F0, 0x0001F0, 0x00006A}, {0x0001F4, 0x0001F4, 0x000047}, {0x0001F5, 0x0001F5, 0x000067},
+    {0x0001F8, 0x0001F8, 0x00004E}, {0x0001F9, 0x0001F9, 0x00006E}, {0x0001FA, 0x0001FA, 0x000041},
+    {0x0001FB, 0x0001FB, 0x000061}, {0x0001FC, 0x0001FC, 0x0000C6}, {0x0001FD, 0x0001FD, 0x0000E6},
+    {0x0001FE, 0x0001FE, 0x0000D8}, {0x0001FF, 0x0001FF, 0x0000F8}, {0x000200, 0x000200, 0x000041},
+    {0x000201, 0x000201, 0x000061}, {0x000202, 0x000202, 0x000041}, {0x000203, 0x000203, 0x000061},
+    {0x000204, 0x000204, 0x000045}, {0x000205, 0x000205, 0x000065}, {0x000206, 0x000206, 0x000045},
+    {0x000207, 0x000207, 0x000065}, {0x000208, 0x000208, 0x000049}, {0x000209, 0x000209, 0x000069},
+    {0x00020A, 0x00020A, 0x000049}, {0x00020B, 0x00020B, 0x000069}, {0x00020C, 0x00020C, 0x00004F},
+    {0x00020D, 0x00020D, 0x00006F}, {0x00020E, 0x00020E, 0x00004F}, {0x00020F, 0x00020F, 0x00006F},
+    {0x000210, 0x000210, 0x000052}, {0x000211, 0x000211, 0x000072}, {0x000212, 0x000212, 0x000052},
+    {0x000213, 0x000213, 0x000072}, {0x000214, 0x000214, 0x000055}, {0x000215, 0x000215, 0x000075},
+    {0x000216, 0x000216, 0x000055}, {0x000217, 0x000217, 0x000075}, {0x000218, 0x000218, 0x000053},
+    {0x000219, 0x000219, 0x000073}, {0x00021A, 0x00021A, 0x000054}, {0x00021B, 0x00021B, 0x000074},
+    {0x00021E, 0x00021E, 0x000048}, {0x00021F, 0x00021F, 0x000068}, {0x000226, 0x000226, 0x000041},
+    {0x000227, 0x000227, 0x000061}, {0x000228, 0x000228, 0x000045}, {0x000229, 0x000229, 0x000065},
+    {0x00022A, 0x00022A, 0x00004F}, {0x00022B, 0x00022B, 0x00006F}, {0x00022C, 0x00022C, 0x00004F},
+    {0x00022D, 0x00022D, 0x00006F}, {0x00022E, 0x00022E, 0x00004F}, {0x00022F, 0x00022F, 0x00006F},
+    {0x000230, 0x000230, 0x00004F}, {0x000231, 0x000231, 0x00006F}, {0x000232, 0x000232, 0x000059},
+    {0x000233, 0x000233, 0x000079}, {0x000340, 0x000340, 0x000300}, {0x000341, 0x000341, 0x000301},
+    {0x000343, 0x000343, 0x000313}, {0x000344, 0x000344, 0x000308}, {0x000374, 0x000374, 0x0002B9},
+    {0x00037E, 0x00037E, 0x00003B}, {0x000385, 0x000385, 0x0000A8}, {0x000386, 0x000386, 0x000391},
+    {0x000387, 0x000387, 0x0000B7}, {0x000388, 0x000388, 0x000395}, {0x000389, 0x000389, 0x000397},
+    {0x00038A, 0x00038A, 0x000399}, {0x00038C, 0x00038C, 0x00039F}, {0x00038E, 0x00038E, 0x0003A5},
+    {0x00038F, 0x00038F, 0x0003A9}, {0x000390, 0x000390, 0x0003B9}, {0x0003AA, 0x0003AA, 0x000399},
+    {0x0003AB, 0x0003AB, 0x0003A5}, {0x0003AC, 0x0003AC, 0x0003B1}, {0x0003AD, 0x0003AD, 0x0003B5},
+    {0x0003AE, 0x0003AE, 0x0003B7}, {0x0003AF, 0x0003AF, 0x0003B9}, {0x0003B0, 0x0003B0, 0x0003C5},
+    {0x0003CA, 0x0003CA, 0x0003B9}, {0x0003CB, 0x0003CB, 0x0003C5}, {0x0003CC, 0x0003CC, 0x0003BF},
+    {0x0003CD, 0x0003CD, 0x0003C5}, {0x0003CE, 0x0003CE, 0x0003C9}, {0x0003D3, 0x0003D4, 0x0003D2},
+    {0x000400, 0x000401, 0x000415}, {0x000403, 0x000403, 0x000413}, {0x000407, 0x000407, 0x000406},
+    {0x00040C, 0x00040C, 0x00041A}, {0x00040D, 0x00040D, 0x000418}, {0x00040E, 0x00040E, 0x000423},
+    {0x000419, 0x000419, 0x000418}, {0x000439, 0x000439, 0x000438}, {0x000450, 0x000451, 0x000435},
+    {0x000453, 0x000453, 0x000433}, {0x000457, 0x000457, 0x000456}, {0x00045C, 0x00045C, 0x00043A},
+    {0x00045D, 0x00045D, 0x000438}, {0x00045E, 0x00045E, 0x000443}, {0x000476, 0x000476, 0x000474},
+    {0x000477, 0x000477, 0x000475}, {0x0004C1, 0x0004C1, 0x000416}, {0x0004C2, 0x0004C2, 0x000436},
+    {0x0004D0, 0x0004D0, 0x000410}, {0x0004D1, 0x0004D1, 0x000430}, {0x0004D2, 0x0004D2, 0x000410},
+    {0x0004D3, 0x0004D3, 0x000430}, {0x0004D6, 0x0004D6, 0x000415}, {0x0004D7, 0x0004D7, 0x000435},
+    {0x0004DA, 0x0004DA, 0x0004D8}, {0x0004DB, 0x0004DB, 0x0004D9}, {0x0004DC, 0x0004DC, 0x000416},
+    {0x0004DD, 0x0004DD, 0x000436}, {0x0004DE, 0x0004DE, 0x000417}, {0x0004DF, 0x0004DF, 0x000437},
+    {0x0004E2, 0x0004E2, 0x000418}, {0x0004E3, 0x0004E3, 0x000438}, {0x0004E4, 0x0004E4, 0x000418},
+    {0x0004E5, 0x0004E5, 0x000438}, {0x0004E6, 0x0004E6, 0x00041E}, {0x0004E7, 0x0004E7, 0x00043E},
+    {0x0004EA, 0x0004EA, 0x0004E8}, {0x0004EB, 0x0004EB, 0x0004E9}, {0x0004EC, 0x0004EC, 0x00042D},
+    {0x0004ED, 0x0004ED, 0x00044D}, {0x0004EE, 0x0004EE, 0x000423}, {0x0004EF, 0x0004EF, 0x000443},
+    {0x0004F0, 0x0004F0, 0x000423}, {0x0004F1, 0x0004F1, 0x000443}, {0x0004F2, 0x0004F2, 0x000423},
+    {0x0004F3, 0x0004F3, 0x000443}, {0x0004F4, 0x0004F4, 0x000427}, {0x0004F5, 0x0004F5, 0x000447},
+    {0x0004F8, 0x0004F8, 0x00042B}, {0x0004F9, 0x0004F9, 0x00044B}, {0x000622, 0x000623, 0x000627},
+    {0x000624, 0x000624, 0x000648}, {0x000625, 0x000625, 0x000627}, {0x000626, 0x000626, 0x00064A},
+    {0x0006C0, 0x0006C0, 0x0006D5}, {0x0006C2, 0x0006C2, 0x0006C1}, {0x0006D3, 0x0006D3, 0x0006D2},
+    {0x000929, 0x000929, 0x000928}, {0x000931, 0x000931, 0x000930}, {0x000934, 0x000934, 0x000933},
+    {0x000958, 0x000958, 0x000915}, {0x000959, 0x000959, 0x000916}, {0x00095A, 0x00095A, 0x000917},
+    {0x00095B, 0x00095B, 0x00091C}, {0x00095C, 0x00095C, 0x000921}, {0x00095D, 0x00095D, 0x000922},
+    {0x00095E, 0x00095E, 0x00092B}, {0x00095F, 0x00095F, 0x00092F}, {0x0009CB, 0x0009CC, 0x0009C7},
+    {0x0009DC, 0x0009DC, 0x0009A1}, {0x0009DD, 0x0009DD, 0x0009A2}, {0x0009DF, 0x0009DF, 0x0009AF},
+    {0x000A33, 0x000A33, 0x000A32}, {0x000A36, 0x000A36, 0x000A38}, {0x000A59, 0x000A59, 0x000A16},
+    {0x000A5A, 0x000A5A, 0x000A17}, {0x000A5B, 0x000A5B, 0x000A1C}, {0x000A5E, 0x000A5E, 0x000A2B},
+    {0x000B48, 0x000B48, 0x000B47}, {0x000B4B, 0x000B4C, 0x000B47}, {0x000B5C, 0x000B5C, 0x000B21},
+    {0x000B5D, 0x000B5D, 0x000B22}, {0x000B94, 0x000B94, 0x000B92}, {0x000BCA, 0x000BCA, 0x000BC6},
+    {0x000BCB, 0x000BCB, 0x000BC7}, {0x000BCC, 0x000BCC, 0x000BC6}, {0x000C48, 0x000C48, 0x000C46},
+    {0x000CC0, 0x000CC0, 0x000CBF}, {0x000CC7, 0x000CC8, 0x000CC6}, {0x000CCA, 0x000CCB, 0x000CC6},
+    {0x000D4A, 0x000D4A, 0x000D46}, {0x000D4B, 0x000D4B, 0x000D47}, {0x000D4C, 0x000D4C, 0x000D46},
+    {0x000DDA, 0x000DDA, 0x000DD9}, {0x000DDC, 0x000DDE, 0x000DD9}, {0x000F43, 0x000F43, 0x000F42},
+    {0x000F4D, 0x000F4D, 0x000F4C}, {0x000F52, 0x000F52, 0x000F51}, {0x000F57, 0x000F57, 0x000F56},
+    {0x000F5C, 0x000F5C, 0x000F5B}, {0x000F69, 0x000F69, 0x000F40}, {0x000F73, 0x000F73, 0x000F71},
+    {0x000F75, 0x000F75, 0x000F71}, {0x000F76, 0x000F76, 0x000FB2}, {0x000F78, 0x000F78, 0x000FB3},
+    {0x000F81, 0x000F81, 0x000F71}, {0x000F93, 0x000F93, 0x000F92}, {0x000F9D, 0x000F9D, 0x000F9C},
+    {0x000FA2, 0x000FA2, 0x000FA1}, {0x000FA7, 0x000FA7, 0x000FA6}, {0x000FAC, 0x000FAC, 0x000FAB},
+    {0x000FB9, 0x000FB9, 0x000F90}, {0x001026, 0x001026, 0x001025}, {0x001B06, 0x001B06, 0x001B05},
+    {0x001B08, 0x001B08, 0x001B07}, {0x001B0A, 0x001B0A, 0x001B09}, {0x001B0C, 0x001B0C, 0x001B0B},
+    {0x001B0E, 0x001B0E, 0x001B0D}, {0x001B12, 0x001B12, 0x001B11}, {0x001B3B, 0x001B3B, 0x001B3A},
+    {0x001B3D, 0x001B3D, 0x001B3C}, {0x001B40, 0x001B40, 0x001B3E}, {0x001B41, 0x001B41, 0x001B3F},
+    {0x001B43, 0x001B43, 0x001B42}, {0x001E00, 0x001E00, 0x000041}, {0x001E01, 0x001E01, 0x000061},
+    {0x001E02, 0x001E02, 0x000042}, {0x001E03, 0x001E03, 0x000062}, {0x001E04, 0x001E04, 0x000042},
+    {0x001E05, 0x001E05, 0x000062}, {0x001E06, 0x001E06, 0x000042}, {0x001E07, 0x001E07, 0x000062},
+    {0x001E08, 0x001E08, 0x000043}, {0x001E09, 0x001E09, 0x000063}, {0x001E0A, 0x001E0A, 0x000044},
+    {0x001E0B, 0x001E0B, 0x000064}, {0x001E0C, 0x001E0C, 0x000044}, {0x001E0D, 0x001E0D, 0x000064},
+    {0x001E0E, 0x001E0E, 0x000044}, {0x001E0F, 0x001E0F, 0x000064}, {0x001E10, 0x001E10, 0x000044},
+    {0x001E11, 0x001E11, 0x000064}, {0x001E12, 0x001E12, 0x000044}, {0x001E13, 0x001E13, 0x000064},
+    {0x001E14, 0x001E14, 0x000045}, {0x001E15, 0x001E15, 0x000065}, {0x001E16, 0x001E16, 0x000045},
+    {0x001E17, 0x001E17, 0x000065}, {0x001E18, 0x001E18, 0x000045}, {0x001E19, 0x001E19, 0x000065},
+    {0x001E1A, 0x001E1A, 0x000045}, {0x001E1B, 0x001E1B, 0x000065}, {0x001E1C, 0x001E1C, 0x000045},
+    {0x001E1D, 0x001E1D, 0x000065}, {0x001E1E, 0x001E1E, 0x000046}, {0x001E1F, 0x001E1F, 0x000066},
+    {0x001E20, 0x001E20, 0x000047}, {0x001E21, 0x001E21, 0x000067}, {0x001E22, 0x001E22, 0x000048},
+    {0x001E23, 0x001E23, 0x000068}, {0x001E24, 0x001E24, 0x000048}, {0x001E25, 0x001E25, 0x000068},
+    {0x001E26, 0x001E26, 0x000048}, {0x001E27, 0x001E27, 0x000068}, {0x001E28, 0x001E28, 0x000048},
+    {0x001E29, 0x001E29, 0x000068}, {0x001E2A, 0x001E2A, 0x000048}, {0x001E2B, 0x001E2B, 0x000068},
+    {0x001E2C, 0x001E2C, 0x000049}, {0x001E2D, 0x001E2D, 0x000069}, {0x001E2E, 0x001E2E, 0x000049},
+    {0x001E2F, 0x001E2F, 0x000069}, {0x001E30, 0x001E30, 0x00004B}, {0x001E31, 0x001E31, 0x00006B},
+    {0x001E32, 0x001E32, 0x00004B}, {0x001E33, 0x001E33, 0x00006B}, {0x001E34, 0x001E34, 0x00004B},
+    {0x001E35, 0x001E35, 0x00006B}, {0x001E36, 0x001E36, 0x00004C}, {0x001E37, 0x001E37, 0x00006C},
+    {0x001E38, 0x001E38, 0x00004C}, {0x001E39, 0x001E39, 0x00006C}, {0x001E3A, 0x001E3A, 0x00004C},
+    {0x001E3B, 0x001E3B, 0x00006C}, {0x001E3C, 0x001E3C, 0x00004C}, {0x001E3D, 0x001E3D, 0x00006C},
+    {0x001E3E, 0x001E3E, 0x00004D}, {0x001E3F, 0x001E3F, 0x00006D}, {0x001E40, 0x001E40, 0x00004D},
+    {0x001E41, 0x001E41, 0x00006D}, {0x001E42, 0x001E42, 0x00004D}, {0x001E43, 0x001E43, 0x00006D},
+    {0x001E44, 0x001E44, 0x00004E}, {0x001E45, 0x001E45, 0x00006E}, {0x001E46, 0x001E46, 0x00004E},
+    {0x001E47, 0x001E47, 0x00006E}, {0x001E48, 0x001E48, 0x00004E}, {0x001E49, 0x001E49, 0x00006E},
+    {0x001E4A, 0x001E4A, 0x00004E}, {0x001E4B, 0x001E4B, 0x00006E}, {0x001E4C, 0x001E4C, 0x00004F},
+    {0x001E4D, 0x001E4D, 0x00006F}, {0x001E4E, 0x001E4E, 0x00004F}, {0x001E4F, 0x001E4F, 0x00006F},
+    {0x001E50, 0x001E50, 0x00004F}, {0x001E51, 0x001E51, 0x00006F}, {0x001E52, 0x001E52, 0x00004F},
+    {0x001E53, 0x001E53, 0x00006F}, {0x001E54, 0x001E54, 0x000050}, {0x001E55, 0x001E55, 0x000070},
+    {0x001E56, 0x001E56, 0x000050}, {0x001E57, 0x001E57, 0x000070}, {0x001E58, 0x001E58, 0x000052},
+    {0x001E59, 0x001E59, 0x000072}, {0x001E5A, 0x001E5A, 0x000052}, {0x001E5B, 0x001E5B, 0x000072},
+    {0x001E5C, 0x001E5C, 0x000052}, {0x001E5D, 0x001E5D, 0x000072}, {0x001E5E, 0x001E5E, 0x000052},
+    {0x001E5F, 0x001E5F, 0x000072}, {0x001E60, 0x001E60, 0x000053}, {0x001E61, 0x001E61, 0x000073},
+    {0x001E62, 0x001E62, 0x000053}, {0x001E63, 0x001E63, 0x000073}, {0x001E64, 0x001E64, 0x000053},
+    {0x001E65, 0x001E65, 0x000073}, {0x001E66, 0x001E66, 0x000053}, {0x001E67, 0x001E67, 0x000073},
+    {0x001E68, 0x001E68, 0x000053}, {0x001E69, 0x001E69, 0x000073}, {0x001E6A, 0x001E6A, 0x000054},
+    {0x001E6B, 0x001E6B, 0x000074}, {0x001E6C, 0x001E6C, 0x000054}, {0x001E6D, 0x001E6D, 0x000074},
+    {0x001E6E, 0x001E6E, 0x000054}, {0x001E6F, 0x001E6F, 0x000074}, {0x001E70, 0x001E70, 0x000054},
+    {0x001E71, 0x001E71, 0x000074}, {0x001E72, 0x001E72, 0x000055}, {0x001E73, 0x001E73, 0x000075},
+    {0x001E74, 0x001E74, 0x000055}, {0x001E75, 0x001E75, 0x000075}, {0x001E76, 0x001E76, 0x000055},
+    {0x001E77, 0x001E77, 0x000075}, {0x001E78, 0x001E78, 0x000055}, {0x001E79, 0x001E79, 0x000075},
+    {0x001E7A, 0x001E7A, 0x000055}, {0x001E7B, 0x001E7B, 0x000075}, {0x001E7C, 0x001E7C, 0x000056},
+    {0x001E7D, 0x001E7D, 0x000076}, {0x001E7E, 0x001E7E, 0x000056}, {0x001E7F, 0x001E7F, 0x000076},
+    {0x001E80, 0x001E80, 0x000057}, {0x001E81, 0x001E81, 0x000077}, {0x001E82, 0x001E82, 0x000057},
+    {0x001E83, 0x001E83, 0x000077}, {0x001E84, 0x001E84, 0x000057}, {0x001E85, 0x001E85, 0x000077},
+    {0x001E86, 0x001E86, 0x000057}, {0x001E87, 0x001E87, 0x000077}, {0x001E88, 0x001E88, 0x000057},
+    {0x001E89, 0x001E89, 0x000077}, {0x001E8A, 0x001E8A, 0x000058}, {0x001E8B, 0x001E8B, 0x000078},
+    {0x001E8C, 0x001E8C, 0x000058}, {0x001E8D, 0x001E8D, 0x000078}, {0x001E8E, 0x001E8E, 0x000059},
+    {0x001E8F, 0x001E8F, 0x000079}, {0x001E90, 0x001E90, 0x00005A}, {0x001E91, 0x001E91, 0x00007A},
+    {0x001E92, 0x001E92, 0x00005A}, {0x001E93, 0x001E93, 0x00007A}, {0x001E94, 0x001E94, 0x00005A},
+    {0x001E95, 0x001E95, 0x00007A}, {0x001E96, 0x001E96, 0x000068}, {0x001E97, 0x001E97, 0x000074},
+    {0x001E98, 0x001E98, 0x000077}, {0x001E99, 0x001E99, 0x000079}, {0x001E9B, 0x001E9B, 0x00017F},
+    {0x001EA0, 0x001EA0, 0x000041}, {0x001EA1, 0x001EA1, 0x000061}, {0x001EA2, 0x001EA2, 0x000041},
+    {0x001EA3, 0x001EA3, 0x000061}, {0x001EA4, 0x001EA4, 0x000041}, {0x001EA5, 0x001EA5, 0x000061},
+    {0x001EA6, 0x001EA6, 0x000041}, {0x001EA7, 0x001EA7, 0x000061}, {0x001EA8, 0x001EA8, 0x000041},
+    {0x001EA9, 0x001EA9, 0x000061}, {0x001EAA, 0x001EAA, 0x000041}, {0x001EAB, 0x001EAB, 0x000061},
+    {0x001EAC, 0x001EAC, 0x000041}, {0x001EAD, 0x001EAD, 0x000061}, {0x001EAE, 0x001EAE, 0x000041},
+    {0x001EAF, 0x001EAF, 0x000061}, {0x001EB0, 0x001EB0, 0x000041}, {0x001EB1, 0x001EB1, 0x000061},
+    {0x001EB2, 0x001EB2, 0x000041}, {0x001EB3, 0x001EB3, 0x000061}, {0x001EB4, 0x001EB4, 0x000041},
+    {0x001EB5, 0x001EB5, 0x000061}, {0x001EB6, 0x001EB6, 0x000041}, {0x001EB7, 0x001EB7, 0x000061},
+    {0x001EB8, 0x001EB8, 0x000045}, {0x001EB9, 0x001EB9, 0x000065}, {0x001EBA, 0x001EBA, 0x000045},
+    {0x001EBB, 0x001EBB, 0x000065}, {0x001EBC, 0x001EBC, 0x000045}, {0x001EBD, 0x001EBD, 0x000065},
+    {0x001EBE, 0x001EBE, 0x000045}, {0x001EBF, 0x001EBF, 0x000065}, {0x001EC0, 0x001EC0, 0x000045},
+    {0x001EC1, 0x001EC1, 0x000065}, {0x001EC2, 0x001EC2, 0x000045}, {0x001EC3, 0x001EC3, 0x000065},
+    {0x001EC4, 0x001EC4, 0x000045}, {0x001EC5, 0x001EC5, 0x000065}, {0x001EC6, 0x001EC6, 0x000045},
+    {0x001EC7, 0x001EC7, 0x000065}, {0x001EC8, 0x001EC8, 0x000049}, {0x001EC9, 0x001EC9, 0x000069},
+    {0x001ECA, 0x001ECA, 0x000049}, {0x001ECB, 0x001ECB, 0x000069}, {0x001ECC, 0x001ECC, 0x00004F},
+    {0x001ECD, 0x001ECD, 0x00006F}, {0x001ECE, 0x001ECE, 0x00004F}, {0x001ECF, 0x001ECF, 0x00006F},
+    {0x001ED0, 0x001ED0, 0x00004F}, {0x001ED1, 0x001ED1, 0x00006F}, {0x001ED2, 0x001ED2, 0x00004F},
+    {0x001ED3, 0x001ED3, 0x00006F}, {0x001ED4, 0x001ED4, 0x00004F}, {0x001ED5, 0x001ED5, 0x00006F},
+    {0x001ED6, 0x001ED6, 0x00004F}, {0x001ED7, 0x001ED7, 0x00006F}, {0x001ED8, 0x001ED8, 0x00004F},
+    {0x001ED9, 0x001ED9, 0x00006F}, {0x001EDA, 0x001EDA, 0x00004F}, {0x001EDB, 0x001EDB, 0x00006F},
+    {0x001EDC, 0x001EDC, 0x00004F}, {0x001EDD, 0x001EDD, 0x00006F}, {0x001EDE, 0x001EDE, 0x00004F},
+    {0x001EDF, 0x001EDF, 0x00006F}, {0x001EE0, 0x001EE0, 0x00004F}, {0x001EE1, 0x001EE1, 0x00006F},
+    {0x001EE2, 0x001EE2, 0x00004F}, {0x001EE3, 0x001EE3, 0x00006F}, {0x001EE4, 0x001EE4, 0x000055},
+    {0x001EE5, 0x001EE5, 0x000075}, {0x001EE6, 0x001EE6, 0x000055}, {0x001EE7, 0x001EE7, 0x000075},
+    {0x001EE8, 0x001EE8, 0x000055}, {0x001EE9, 0x001EE9, 0x000075}, {0x001EEA, 0x001EEA, 0x000055},
+    {0x001EEB, 0x001EEB, 0x000075}, {0x001EEC, 0x001EEC, 0x000055}, {0x001EED, 0x001EED, 0x000075},
+    {0x001EEE, 0x001EEE, 0x000055}, {0x001EEF, 0x001EEF, 0x000075}, {0x001EF0, 0x001EF0, 0x000055},
+    {0x001EF1, 0x001EF1, 0x000075}, {0x001EF2, 0x001EF2, 0x000059}, {0x001EF3, 0x001EF3, 0x000079},
+    {0x001EF4, 0x001EF4, 0x000059}, {0x001EF5, 0x001EF5, 0x000079}, {0x001EF6, 0x001EF6, 0x000059},
+    {0x001EF7, 0x001EF7, 0x000079}, {0x001EF8, 0x001EF8, 0x000059}, {0x001EF9, 0x001EF9, 0x000079},
+    {0x001F00, 0x001F07, 0x0003B1}, {0x001F08, 0x001F0F, 0x000391}, {0x001F10, 0x001F15, 0x0003B5},
+    {0x001F18, 0x001F1D, 0x000395}, {0x001F20, 0x001F27, 0x0003B7}, {0x001F28, 0x001F2F, 0x000397},
+    {0x001F30, 0x001F37, 0x0003B9}, {0x001F38, 0x001F3F, 0x000399}, {0x001F40, 0x001F45, 0x0003BF},
+    {0x001F48, 0x001F4D, 0x00039F}, {0x001F50, 0x001F57, 0x0003C5}, {0x001F59, 0x001F59, 0x0003A5},
+    {0x001F5B, 0x001F5B, 0x0003A5}, {0x001F5D, 0x001F5D, 0x0003A5}, {0x001F5F, 0x001F5F, 0x0003A5},
+    {0x001F60, 0x001F67, 0x0003C9}, {0x001F68, 0x001F6F, 0x0003A9}, {0x001F70, 0x001F71, 0x0003B1},
+    {0x001F72, 0x001F73, 0x0003B5}, {0x001F74, 0x001F75, 0x0003B7}, {0x001F76, 0x001F77, 0x0003B9},
+    {0x001F78, 0x001F79, 0x0003BF}, {0x001F7A, 0x001F7B, 0x0003C5}, {0x001F7C, 0x001F7D, 0x0003C9},
+    {0x001F80, 0x001F87, 0x0003B1}, {0x001F88, 0x001F8F, 0x000391}, {0x001F90, 0x001F97, 0x0003B7},
+    {0x001F98, 0x001F9F, 0x000397}, {0x001FA0, 0x001FA7, 0x0003C9}, {0x001FA8, 0x001FAF, 0x0003A9},
+    {0x001FB0, 0x001FB4, 0x0003B1}, {0x001FB6, 0x001FB7, 0x0003B1}, {0x001FB8, 0x001FBC, 0x000391},
+    {0x001FBE, 0x001FBE, 0x0003B9}, {0x001FC1, 0x001FC1, 0x0000A8}, {0x001FC2, 0x001FC4, 0x0003B7},
+    {0x001FC6, 0x001FC7, 0x0003B7}, {0x001FC8, 0x001FC9, 0x000395}, {0x001FCA, 0x001FCC, 0x000397},
+    {0x001FCD, 0x001FCF, 0x001FBF}, {0x001FD0, 0x001FD3, 0x0003B9}, {0x001FD6, 0x001FD7, 0x0003B9},
+    {0x001FD8, 0x001FDB, 0x000399}, {0x001FDD, 0x001FDF, 0x001FFE}, {0x001FE0, 0x001FE3, 0x0003C5},
+    {0x001FE4, 0x001FE5, 0x0003C1}, {0x001FE6, 0x001FE7, 0x0003C5}, {0x001FE8, 0x001FEB, 0x0003A5},
+    {0x001FEC, 0x001FEC, 0x0003A1}, {0x001FED, 0x001FEE, 0x0000A8}, {0x001FEF, 0x001FEF, 0x000060},
+    {0x001FF2, 0x001FF4, 0x0003C9}, {0x001FF6, 0x001FF7, 0x0003C9}, {0x001FF8, 0x001FF9, 0x00039F},
+    {0x001FFA, 0x001FFC, 0x0003A9}, {0x001FFD, 0x001FFD, 0x0000B4}, {0x002000, 0x002000, 0x002002},
+    {0x002001, 0x002001, 0x002003}, {0x002126, 0x002126, 0x0003A9}, {0x00212A, 0x00212A, 0x00004B},
+    {0x00212B, 0x00212B, 0x000041}, {0x00219A, 0x00219A, 0x002190}, {0x00219B, 0x00219B, 0x002192},
+    {0x0021AE, 0x0021AE, 0x002194}, {0x0021CD, 0x0021CD, 0x0021D0}, {0x0021CE, 0x0021CE, 0x0021D4},
+    {0x0021CF, 0x0021CF, 0x0021D2}, {0x002204, 0x002204, 0x002203}, {0x002209, 0x002209, 0x002208},
+    {0x00220C, 0x00220C, 0x00220B}, {0x002224, 0x002224, 0x002223}, {0x002226, 0x002226, 0x002225},
+    {0x002241, 0x002241, 0x00223C}, {0x002244, 0x002244, 0x002243}, {0x002247, 0x002247, 0x002245},
+    {0x002249, 0x002249, 0x002248}, {0x002260, 0x002260, 0x00003D}, {0x002262, 0x002262, 0x002261},
+    {0x00226D, 0x00226D, 0x00224D}, {0x00226E, 0x00226E, 0x00003C}, {0x00226F, 0x00226F, 0x00003E},
+    {0x002270, 0x002270, 0x002264}, {0x002271, 0x002271, 0x002265}, {0x002274, 0x002274, 0x002272},
+    {0x002275, 0x002275, 0x002273}, {0x002278, 0x002278, 0x002276}, {0x002279, 0x002279, 0x002277},
+    {0x002280, 0x002280, 0x00227A}, {0x002281, 0x002281, 0x00227B}, {0x002284, 0x002284, 0x002282},
+    {0x002285, 0x002285, 0x002283}, {0x002288, 0x002288, 0x002286}, {0x002289, 0x002289, 0x002287},
+    {0x0022AC, 0x0022AC, 0x0022A2}, {0x0022AD, 0x0022AD, 0x0022A8}, {0x0022AE, 0x0022AE, 0x0022A9},
+    {0x0022AF, 0x0022AF, 0x0022AB}, {0x0022E0, 0x0022E0, 0x00227C}, {0x0022E1, 0x0022E1, 0x00227D},
+    {0x0022E2, 0x0022E2, 0x002291}, {0x0022E3, 0x0022E3, 0x002292}, {0x0022EA, 0x0022EA, 0x0022B2},
+    {0x0022EB, 0x0022EB, 0x0022B3}, {0x0022EC, 0x0022EC, 0x0022B4}, {0x0022ED, 0x0022ED, 0x0022B5},
+    {0x002329, 0x002329, 0x003008}, {0x00232A, 0x00232A, 0x003009}, {0x002ADC, 0x002ADC, 0x002ADD},
+    {0x00304C, 0x00304C, 0x00304B}, {0x00304E, 0x00304E, 0x00304D}, {0x003050, 0x003050, 0x00304F},
+    {0x003052, 0x003052, 0x003051}, {0x003054, 0x003054, 0x003053}, {0x003056, 0x003056, 0x003055},
+    {0x003058, 0x003058, 0x003057}, {0x00305A, 0x00305A, 0x003059}, {0x00305C, 0x00305C, 0x00305B},
+    {0x00305E, 0x00305E, 0x00305D}, {0x003060, 0x003060, 0x00305F}, {0x003062, 0x003062, 0x003061},
+    {0x003065, 0x003065, 0x003064}, {0x003067, 0x003067, 0x003066}, {0x003069, 0x003069, 0x003068},
+    {0x003070, 0x003071, 0x00306F}, {0x003073, 0x003074, 0x003072}, {0x003076, 0x003077, 0x003075},
+    {0x003079, 0x00307A, 0x003078}, {0x00307C, 0x00307D, 0x00307B}, {0x003094, 0x003094, 0x003046},
+    {0x00309E, 0x00309E, 0x00309D}, {0x0030AC, 0x0030AC, 0x0030AB}, {0x0030AE, 0x0030AE, 0x0030AD},
+    {0x0030B0, 0x0030B0, 0x0030AF}, {0x0030B2, 0x0030B2, 0x0030B1}, {0x0030B4, 0x0030B4, 0x0030B3},
+    {0x0030B6, 0x0030B6, 0x0030B5}, {0x0030B8, 0x0030B8, 0x0030B7}, {0x0030BA, 0x0030BA, 0x0030B9},
+    {0x0030BC, 0x0030BC, 0x0030BB}, {0x0030BE, 0x0030BE, 0x0030BD}, {0x0030C0, 0x0030C0, 0x0030BF},
+    {0x0030C2, 0x0030C2, 0x0030C1}, {0x0030C5, 0x0030C5, 0x0030C4}, {0x0030C7, 0x0030C7, 0x0030C6},
+    {0x0030C9, 0x0030C9, 0x0030C8}, {0x0030D0, 0x0030D1, 0x0030CF}, {0x0030D3, 0x0030D4, 0x0030D2},
+    {0x0030D6, 0x0030D7, 0x0030D5}, {0x0030D9, 0x0030DA, 0x0030D8}, {0x0030DC, 0x0030DD, 0x0030DB},
+    {0x0030F4, 0x0030F4, 0x0030A6}, {0x0030F7, 0x0030F7, 0x0030EF}, {0x0030F8, 0x0030F8, 0x0030F0},
+    {0x0030F9, 0x0030F9, 0x0030F1}, {0x0030FA, 0x0030FA, 0x0030F2}, {0x0030FE, 0x0030FE, 0x0030FD},
+    {0x00AC00, 0x00AE4B, 0x001100}, {0x00AE4C, 0x00B097, 0x001101}, {0x00B098, 0x00B2E3, 0x001102},
+    {0x00B2E4, 0x00B52F, 0x001103}, {0x00B530, 0x00B77B, 0x001104}, {0x00B77C, 0x00B9C7, 0x001105},
+    {0x00B9C8, 0x00BC13, 0x001106}, {0x00BC14, 0x00BE5F, 0x001107}, {0x00BE60, 0x00C0AB, 0x001108},
+    {0x00C0AC, 0x00C2F7, 0x001109}, {0x00C2F8, 0x00C543, 0x00110A}, {0x00C544, 0x00C78F, 0x00110B},
+    {0x00C790, 0x00C9DB, 0x00110C}, {0x00C9DC, 0x00CC27, 0x00110D}, {0x00CC28, 0x00CE73, 0x00110E},
+    {0x00CE74, 0x00D0BF, 0x00110F}, {0x00D0C0, 0x00D30B, 0x001110}, {0x00D30C, 0x00D557, 0x001111},
+    {0x00D558, 0x00D7A3, 0x001112}, {0x00F900, 0x00F900, 0x008C48}, {0x00F901, 0x00F901, 0x0066F4},
+    {0x00F902, 0x00F902, 0x008ECA}, {0x00F903, 0x00F903, 0x008CC8}, {0x00F904, 0x00F904, 0x006ED1},
+    {0x00F905, 0x00F905, 0x004E32}, {0x00F906, 0x00F906, 0x0053E5}, {0x00F907, 0x00F908, 0x009F9C},
+    {0x00F909, 0x00F909, 0x005951}, {0x00F90A, 0x00F90A, 0x0091D1}, {0x00F90B, 0x00F90B, 0x005587},
+    {0x00F90C, 0x00F90C, 0x005948}, {0x00F90D, 0x00F90D, 0x0061F6}, {0x00F90E, 0x00F90E, 0x007669},
+    {0x00F90F, 0x00F90F, 0x007F85}, {0x00F910, 0x00F910, 0x00863F}, {0x00F911, 0x00F911, 0x0087BA},
+    {0x00F912, 0x00F912, 0x0088F8}, {0x00F913, 0x00F913, 0x00908F}, {0x00F914, 0x00F914, 0x006A02},
+    {0x00F915, 0x00F915, 0x006D1B}, {0x00F916, 0x00F916, 0x0070D9}, {0x00F917, 0x00F917, 0x0073DE},
+    {0x00F918, 0x00F918, 0x00843D}, {0x00F919, 0x00F919, 0x00916A}, {0x00F91A, 0x00F91A, 0x0099F1},
+    {0x00F91B, 0x00F91B, 0x004E82}, {0x00F91C, 0x00F91C, 0x005375}, {0x00F91D, 0x00F91D, 0x006B04},
+    {0x00F91E, 0x00F91E, 0x00721B}, {0x00F91F, 0x00F91F, 0x00862D}, {0x00F920, 0x00F920, 0x009E1E},
+    {0x00F921, 0x00F921, 0x005D50}, {0x00F922, 0x00F922, 0x006FEB}, {0x00F923, 0x00F923, 0x0085CD},
+    {0x00F924, 0x00F924, 0x008964}, {0x00F925, 0x00F925, 0x0062C9}, {0x00F926, 0x00F926, 0x0081D8},
+    {0x00F927, 0x00F927, 0x00881F}, {0x00F928, 0x00F928, 0x005ECA}, {0x00F929, 0x00F929, 0x006717},
+    {0x00F92A, 0x00F92A, 0x006D6A}, {0x00F92B, 0x00F92B, 0x0072FC}, {0x00F92C, 0x00F92C, 0x0090CE},
+    {0x00F92D, 0x00F92D, 0x004F86}, {0x00F92E, 0x00F92E, 0x0051B7}, {0x00F92F, 0x00F92F, 0x0052DE},
+    {0x00F930, 0x00F930, 0x0064C4}, {0x00F931, 0x00F931, 0x006AD3}, {0x00F932, 0x00F932, 0x007210},
+    {0x00F933, 0x00F933, 0x0076E7}, {0x00F934, 0x00F934, 0x008001}, {0x00F935, 0x00F935, 0x008606},
+    {0x00F936, 0x00F936, 0x00865C}, {0x00F937, 0x00F937, 0x008DEF}, {0x00F938, 0x00F938, 0x009732},
+    {0x00F939, 0x00F939, 0x009B6F}, {0x00F93A, 0x00F93A, 0x009DFA}, {0x00F93B, 0x00F93B, 0x00788C},
+    {0x00F93C, 0x00F93C, 0x00797F}, {0x00F93D, 0x00F93D, 0x007DA0}, {0x00F93E, 0x00F93E, 0x0083C9},
+    {0x00F93F, 0x00F93F, 0x009304}, {0x00F940, 0x00F940, 0x009E7F}, {0x00F941, 0x00F941, 0x008AD6},
+    {0x00F942, 0x00F942, 0x0058DF}, {0x00F943, 0x00F943, 0x005F04}, {0x00F944, 0x00F944, 0x007C60},
+    {0x00F945, 0x00F945, 0x00807E}, {0x00F946, 0x00F946, 0x007262}, {0x00F947, 0x00F947, 0x0078CA},
+    {0x00F948, 0x00F948, 0x008CC2}, {0x00F949, 0x00F949, 0x0096F7}, {0x00F94A, 0x00F94A, 0x0058D8},
+    {0x00F94B, 0x00F94B, 0x005C62}, {0x00F94C, 0x00F94C, 0x006A13}, {0x00F94D, 0x00F94D, 0x006DDA},
+    {0x00F94E, 0x00F94E, 0x006F0F}, {0x00F94F, 0x00F94F, 0x007D2F}, {0x00F950, 0x00F950, 0x007E37},
+    {0x00F951, 0x00F951, 0x00964B}, {0x00F952, 0x00F952, 0x0052D2}, {0x00F953, 0x00F953, 0x00808B},
+    {0x00F954, 0x00F954, 0x0051DC}, {0x00F955, 0x00F955, 0x0051CC}, {0x00F956, 0x00F956, 0x007A1C},
+    {0x00F957, 0x00F957, 0x007DBE}, {0x00F958, 0x00F958, 0x0083F1}, {0x00F959, 0x00F959, 0x009675},
+    {0x00F95A, 0x00F95A, 0x008B80}, {0x00F95B, 0x00F95B, 0x0062CF}, {0x00F95C, 0x00F95C, 0x006A02},
+    {0x00F95D, 0x00F95D, 0x008AFE}, {0x00F95E, 0x00F95E, 0x004E39}, {0x00F95F, 0x00F95F, 0x005BE7},
+    {0x00F960, 0x00F960, 0x006012}, {0x00F961, 0x00F961, 0x007387}, {0x00F962, 0x00F962, 0x007570},
+    {0x00F963, 0x00F963, 0x005317}, {0x00F964, 0x00F964, 0x0078FB}, {0x00F965, 0x00F965, 0x004FBF},
+    {0x00F966, 0x00F966, 0x005FA9}, {0x00F967, 0x00F967, 0x004E0D}, {0x00F968, 0x00F968, 0x006CCC},
+    {0x00F969, 0x00F969, 0x006578}, {0x00F96A, 0x00F96A, 0x007D22}, {0x00F96B, 0x00F96B, 0x0053C3},
+    {0x00F96C, 0x00F96C, 0x00585E}, {0x00F96D, 0x00F96D, 0x007701}, {0x00F96E, 0x00F96E, 0x008449},
+    {0x00F96F, 0x00F96F, 0x008AAA}, {0x00F970, 0x00F970, 0x006BBA}, {0x00F971, 0x00F971, 0x008FB0},
+    {0x00F972, 0x00F972, 0x006C88}, {0x00F973, 0x00F973, 0x0062FE}, {0x00F974, 0x00F974, 0x0082E5},
+    {0x00F975, 0x00F975, 0x0063A0}, {0x00F976, 0x00F976, 0x007565}, {0x00F977, 0x00F977, 0x004EAE},
+    {0x00F978, 0x00F978, 0x005169}, {0x00F979, 0x00F979, 0x0051C9}, {0x00F97A, 0x00F97A, 0x006881},
+    {0x00F97B, 0x00F97B, 0x007CE7}, {0x00F97C, 0x00F97C, 0x00826F}, {0x00F97D, 0x00F97D, 0x008AD2},
+    {0x00F97E, 0x00F97E, 0x0091CF}, {0x00F97F, 0x00F97F, 0x0052F5}, {0x00F980, 0x00F980, 0x005442},
+    {0x00F981, 0x00F981, 0x005973}, {0x00F982, 0x00F982, 0x005EEC}, {0x00F983, 0x00F983, 0x0065C5},
+    {0x00F984, 0x00F984, 0x006FFE}, {0x00F985, 0x00F985, 0x00792A}, {0x00F986, 0x00F986, 0x0095AD},
+    {0x00F987, 0x00F987, 0x009A6A}, {0x00F988, 0x00F988, 0x009E97}, {0x00F989, 0x00F989, 0x009ECE},
+    {0x00F98A, 0x00F98A, 0x00529B}, {0x00F98B, 0x00F98B, 0x0066C6}, {0x00F98C, 0x00F98C, 0x006B77},
+    {0x00F98D, 0x00F98D, 0x008F62}, {0x00F98E, 0x00F98E, 0x005E74}, {0x00F98F, 0x00F98F, 0x006190},
+    {0x00F990, 0x00F990, 0x006200}, {0x00F991, 0x00F991, 0x00649A}, {0x00F992, 0x00F992, 0x006F23},
+    {0x00F993, 0x00F993, 0x007149}, {0x00F994, 0x00F994, 0x007489}, {0x00F995, 0x00F995, 0x0079CA},
+    {0x00F996, 0x00F996, 0x007DF4}, {0x00F997, 0x00F997, 0x00806F}, {0x00F998, 0x00F998, 0x008F26},
+    {0x00F999, 0x00F999, 0x0084EE}, {0x00F99A, 0x00F99A, 0x009023}, {0x00F99B, 0x00F99B, 0x00934A},
+    {0x00F99C, 0x00F99C, 0x005217}, {0x00F99D, 0x00F99D, 0x0052A3}, {0x00F99E, 0x00F99E, 0x0054BD},
+    {0x00F99F, 0x00F99F, 0x0070C8}, {0x00F9A0, 0x00F9A0, 0x0088C2}, {0x00F9A1, 0x00F9A1, 0x008AAA},
+    {0x00F9A2, 0x00F9A2, 0x005EC9}, {0x00F9A3, 0x00F9A3, 0x005FF5}, {0x00F9A4, 0x00F9A4, 0x00637B},
+    {0x00F9A5, 0x00F9A5, 0x006BAE}, {0x00F9A6, 0x00F9A6, 0x007C3E}, {0x00F9A7, 0x00F9A7, 0x007375},
+    {0x00F9A8, 0x00F9A8, 0x004EE4}, {0x00F9A9, 0x00F9A9, 0x0056F9}, {0x00F9AA, 0x00F9AA, 0x005BE7},
+    {0x00F9AB, 0x00F9AB, 0x005DBA}, {0x00F9AC, 0x00F9AC, 0x00601C}, {0x00F9AD, 0x00F9AD, 0x0073B2},
+    {0x00F9AE, 0x00F9AE, 0x007469}, {0x00F9AF, 0x00F9AF, 0x007F9A}, {0x00F9B0, 0x00F9B0, 0x008046},
+    {0x00F9B1, 0x00F9B1, 0x009234}, {0x00F9B2, 0x00F9B2, 0x0096F6}, {0x00F9B3, 0x00F9B3, 0x009748},
+    {0x00F9B4, 0x00F9B4, 0x009818}, {0x00F9B5, 0x00F9B5, 0x004F8B}, {0x00F9B6, 0x00F9B6, 0x0079AE},
+    {0x00F9B7, 0x00F9B7, 0x0091B4}, {0x00F9B8, 0x00F9B8, 0x0096B8}, {0x00F9B9, 0x00F9B9, 0x0060E1},
+    {0x00F9BA, 0x00F9BA, 0x004E86}, {0x00F9BB, 0x00F9BB, 0x0050DA}, {0x00F9BC, 0x00F9BC, 0x005BEE},
+    {0x00F9BD, 0x00F9BD, 0x005C3F}, {0x00F9BE, 0x00F9BE, 0x006599}, {0x00F9BF, 0x00F9BF, 0x006A02},
+    {0x00F9C0, 0x00F9C0, 0x0071CE}, {0x00F9C1, 0x00F9C1, 0x007642}, {0x00F9C2, 0x00F9C2, 0x0084FC},
+    {0x00F9C3, 0x00F9C3, 0x00907C}, {0x00F9C4, 0x00F9C4, 0x009F8D}, {0x00F9C5, 0x00F9C5, 0x006688},
+    {0x00F9C6, 0x00F9C6, 0x00962E}, {0x00F9C7, 0x00F9C7, 0x005289}, {0x00F9C8, 0x00F9C8, 0x00677B},
+    {0x00F9C9, 0x00F9C9, 0x0067F3}, {0x00F9CA, 0x00F9CA, 0x006D41}, {0x00F9CB, 0x00F9CB, 0x006E9C},
+    {0x00F9CC, 0x00F9CC, 0x007409}, {0x00F9CD, 0x00F9CD, 0x007559}, {0x00F9CE, 0x00F9CE, 0x00786B},
+    {0x00F9CF, 0x00F9CF, 0x007D10}, {0x00F9D0, 0x00F9D0, 0x00985E}, {0x00F9D1, 0x00F9D1, 0x00516D},
+    {0x00F9D2, 0x00F9D2, 0x00622E}, {0x00F9D3, 0x00F9D3, 0x009678}, {0x00F9D4, 0x00F9D4, 0x00502B},
+    {0x00F9D5, 0x00F9D5, 0x005D19}, {0x00F9D6, 0x00F9D6, 0x006DEA}, {0x00F9D7, 0x00F9D7, 0x008F2A},
+    {0x00F9D8, 0x00F9D8, 0x005F8B}, {0x00F9D9, 0x00F9D9, 0x006144}, {0x00F9DA, 0x00F9DA, 0x006817},
+    {0x00F9DB, 0x00F9DB, 0x007387}, {0x00F9DC, 0x00F9DC, 0x009686}, {0x00F9DD, 0x00F9DD, 0x005229},
+    {0x00F9DE, 0x00F9DE, 0x00540F}, {0x00F9DF, 0x00F9DF, 0x005C65}, {0x00F9E0, 0x00F9E0, 0x006613},
+    {0x00F9E1, 0x00F9E1, 0x00674E}, {0x00F9E2, 0x00F9E2, 0x0068A8}, {0x00F9E3, 0x00F9E3, 0x006CE5},
+    {0x00F9E4, 0x00F9E4, 0x007406}, {0x00F9E5, 0x00F9E5, 0x0075E2}, {0x00F9E6, 0x00F9E6, 0x007F79},
+    {0x00F9E7, 0x00F9E7, 0x0088CF}, {0x00F9E8, 0x00F9E8, 0x0088E1}, {0x00F9E9, 0x00F9E9, 0x0091CC},
+    {0x00F9EA, 0x00F9EA, 0x0096E2}, {0x00F9EB, 0x00F9EB, 0x00533F}, {0x00F9EC, 0x00F9EC, 0x006EBA},
+    {0x00F9ED, 0x00F9ED, 0x00541D}, {0x00F9EE, 0x00F9EE, 0x0071D0}, {0x00F9EF, 0x00F9EF, 0x007498},
+    {0x00F9F0, 0x00F9F0, 0x0085FA}, {0x00F9F1, 0x00F9F1, 0x0096A3}, {0x00F9F2, 0x00F9F2, 0x009C57},
+    {0x00F9F3, 0x00F9F3, 0x009E9F}, {0x00F9F4, 0x00F9F4, 0x006797}, {0x00F9F5, 0x00F9F5, 0x006DCB},
+    {0x00F9F6, 0x00F9F6, 0x0081E8}, {0x00F9F7, 0x00F9F7, 0x007ACB}, {0x00F9F8, 0x00F9F8, 0x007B20},
+    {0x00F9F9, 0x00F9F9, 0x007C92}, {0x00F9FA, 0x00F9FA, 0x0072C0}, {0x00F9FB, 0x00F9FB, 0x007099},
+    {0x00F9FC, 0x00F9FC, 0x008B58}, {0x00F9FD, 0x00F9FD, 0x004EC0}, {0x00F9FE, 0x00F9FE, 0x008336},
+    {0x00F9FF, 0x00F9FF, 0x00523A}, {0x00FA00, 0x00FA00, 0x005207}, {0x00FA01, 0x00FA01, 0x005EA6},
+    {0x00FA02, 0x00FA02, 0x0062D3}, {0x00FA03, 0x00FA03, 0x007CD6}, {0x00FA04, 0x00FA04, 0x005B85},
+    {0x00FA05, 0x00FA05, 0x006D1E}, {0x00FA06, 0x00FA06, 0x0066B4}, {0x00FA07, 0x00FA07, 0x008F3B},
+    {0x00FA08, 0x00FA08, 0x00884C}, {0x00FA09, 0x00FA09, 0x00964D}, {0x00FA0A, 0x00FA0A, 0x00898B},
+    {0x00FA0B, 0x00FA0B, 0x005ED3}, {0x00FA0C, 0x00FA0C, 0x005140}, {0x00FA0D, 0x00FA0D, 0x0055C0},
+    {0x00FA10, 0x00FA10, 0x00585A}, {0x00FA12, 0x00FA12, 0x006674}, {0x00FA15, 0x00FA15, 0x0051DE},
+    {0x00FA16, 0x00FA16, 0x00732A}, {0x00FA17, 0x00FA17, 0x0076CA}, {0x00FA18, 0x00FA18, 0x00793C},
+    {0x00FA19, 0x00FA19, 0x00795E}, {0x00FA1A, 0x00FA1A, 0x007965}, {0x00FA1B, 0x00FA1B, 0x00798F},
+    {0x00FA1C, 0x00FA1C, 0x009756}, {0x00FA1D, 0x00FA1D, 0x007CBE}, {0x00FA1E, 0x00FA1E, 0x007FBD},
+    {0x00FA20, 0x00FA20, 0x008612}, {0x00FA22, 0x00FA22, 0x008AF8}, {0x00FA25, 0x00FA25, 0x009038},
+    {0x00FA26, 0x00FA26, 0x0090FD}, {0x00FA2A, 0x00FA2A, 0x0098EF}, {0x00FA2B, 0x00FA2B, 0x0098FC},
+    {0x00FA2C, 0x00FA2C, 0x009928}, {0x00FA2D, 0x00FA2D, 0x009DB4}, {0x00FA2E, 0x00FA2E, 0x0090DE},
+    {0x00FA2F, 0x00FA2F, 0x0096B7}, {0x00FA30, 0x00FA30, 0x004FAE}, {0x00FA31, 0x00FA31, 0x0050E7},
+    {0x00FA32, 0x00FA32, 0x00514D}, {0x00FA33, 0x00FA33, 0x0052C9}, {0x00FA34, 0x00FA34, 0x0052E4},
+    {0x00FA35, 0x00FA35, 0x005351}, {0x00FA36, 0x00FA36, 0x00559D}, {0x00FA37, 0x00FA37, 0x005606},
+    {0x00FA38, 0x00FA38, 0x005668}, {0x00FA39, 0x00FA39, 0x005840}, {0x00FA3A, 0x00FA3A, 0x0058A8},
+    {0x00FA3B, 0x00FA3B, 0x005C64}, {0x00FA3C, 0x00FA3C, 0x005C6E}, {0x00FA3D, 0x00FA3D, 0x006094},
+    {0x00FA3E, 0x00FA3E, 0x006168}, {0x00FA3F, 0x00FA3F, 0x00618E}, {0x00FA40, 0x00FA40, 0x0061F2},
+    {0x00FA41, 0x00FA41, 0x00654F}, {0x00FA42, 0x00FA42, 0x0065E2}, {0x00FA43, 0x00FA43, 0x006691},
+    {0x00FA44, 0x00FA44, 0x006885}, {0x00FA45, 0x00FA45, 0x006D77}, {0x00FA46, 0x00FA46, 0x006E1A},
+    {0x00FA47, 0x00FA47, 0x006F22}, {0x00FA48, 0x00FA48, 0x00716E}, {0x00FA49, 0x00FA49, 0x00722B},
+    {0x00FA4A, 0x00FA4A, 0x007422}, {0x00FA4B, 0x00FA4B, 0x007891}, {0x00FA4C, 0x00FA4C, 0x00793E},
+    {0x00FA4D, 0x00FA4D, 0x007949}, {0x00FA4E, 0x00FA4E, 0x007948}, {0x00FA4F, 0x00FA4F, 0x007950},
+    {0x00FA50, 0x00FA50, 0x007956}, {0x00FA51, 0x00FA51, 0x00795D}, {0x00FA52, 0x00FA52, 0x00798D},
+    {0x00FA53, 0x00FA53, 0x00798E}, {0x00FA54, 0x00FA54, 0x007A40}, {0x00FA55, 0x00FA55, 0x007A81},
+    {0x00FA56, 0x00FA56, 0x007BC0}, {0x00FA57, 0x00FA57, 0x007DF4}, {0x00FA58, 0x00FA58, 0x007E09},
+    {0x00FA59, 0x00FA59, 0x007E41}, {0x00FA5A, 0x00FA5A, 0x007F72}, {0x00FA5B, 0x00FA5B, 0x008005},
+    {0x00FA5C, 0x00FA5C, 0x0081ED}, {0x00FA5D, 0x00FA5E, 0x008279}, {0x00FA5F, 0x00FA5F, 0x008457},
+    {0x00FA60, 0x00FA60, 0x008910}, {0x00FA61, 0x00FA61, 0x008996}, {0x00FA62, 0x00FA62, 0x008B01},
+    {0x00FA63, 0x00FA63, 0x008B39}, {0x00FA64, 0x00FA64, 0x008CD3}, {0x00FA65, 0x00FA65, 0x008D08},
+    {0x00FA66, 0x00FA66, 0x008FB6}, {0x00FA67, 0x00FA67, 0x009038}, {0x00FA68, 0x00FA68, 0x0096E3},
+    {0x00FA69, 0x00FA69, 0x0097FF}, {0x00FA6A, 0x00FA6A, 0x00983B}, {0x00FA6B, 0x00FA6B, 0x006075},
+    {0x00FA6C, 0x00FA6C, 0x0242EE}, {0x00FA6D, 0x00FA6D, 0x008218}, {0x00FA70, 0x00FA70, 0x004E26},
+    {0x00FA71, 0x00FA71, 0x0051B5}, {0x00FA72, 0x00FA72, 0x005168}, {0x00FA73, 0x00FA73, 0x004F80},
+    {0x00FA74, 0x00FA74, 0x005145}, {0x00FA75, 0x00FA75, 0x005180}, {0x00FA76, 0x00FA76, 0x0052C7},
+    {0x00FA77, 0x00FA77, 0x0052FA}, {0x00FA78, 0x00FA78, 0x00559D}, {0x00FA79, 0x00FA79, 0x005555},
+    {0x00FA7A, 0x00FA7A, 0x005599}, {0x00FA7B, 0x00FA7B, 0x0055E2}, {0x00FA7C, 0x00FA7C, 0x00585A},
+    {0x00FA7D, 0x00FA7D, 0x0058B3}, {0x00FA7E, 0x00FA7E, 0x005944}, {0x00FA7F, 0x00FA7F, 0x005954},
+    {0x00FA80, 0x00FA80, 0x005A62}, {0x00FA81, 0x00FA81, 0x005B28}, {0x00FA82, 0x00FA82, 0x005ED2},
+    {0x00FA83, 0x00FA83, 0x005ED9}, {0x00FA84, 0x00FA84, 0x005F69}, {0x00FA85, 0x00FA85, 0x005FAD},
+    {0x00FA86, 0x00FA86, 0x0060D8}, {0x00FA87, 0x00FA87, 0x00614E}, {0x00FA88, 0x00FA88, 0x006108},
+    {0x00FA89, 0x00FA89, 0x00618E}, {0x00FA8A, 0x00FA8A, 0x006160}, {0x00FA8B, 0x00FA8B, 0x0061F2},
+    {0x00FA8C, 0x00FA8C, 0x006234}, {0x00FA8D, 0x00FA8D, 0x0063C4}, {0x00FA8E, 0x00FA8E, 0x00641C},
+    {0x00FA8F, 0x00FA8F, 0x006452}, {0x00FA90, 0x00FA90, 0x006556}, {0x00FA91, 0x00FA91, 0x006674},
+    {0x00FA92, 0x00FA92, 0x006717}, {0x00FA93, 0x00FA93, 0x00671B}, {0x00FA94, 0x00FA94, 0x006756},
+    {0x00FA95, 0x00FA95, 0x006B79}, {0x00FA96, 0x00FA96, 0x006BBA}, {0x00FA97, 0x00FA97, 0x006D41},
+    {0x00FA98, 0x00FA98, 0x006EDB}, {0x00FA99, 0x00FA99, 0x006ECB}, {0x00FA9A, 0x00FA9A, 0x006F22},
+    {0x00FA9B, 0x00FA9B, 0x00701E}, {0x00FA9C, 0x00FA9C, 0x00716E}, {0x00FA9D, 0x00FA9D, 0x0077A7},
+    {0x00FA9E, 0x00FA9E, 0x007235}, {0x00FA9F, 0x00FA9F, 0x0072AF}, {0x00FAA0, 0x00FAA0, 0x00732A},
+    {0x00FAA1, 0x00FAA1, 0x007471}, {0x00FAA2, 0x00FAA2, 0x007506}, {0x00FAA3, 0x00FAA3, 0x00753B},
+    {0x00FAA4, 0x00FAA4, 0x00761D}, {0x00FAA5, 0x00FAA5, 0x00761F}, {0x00FAA6, 0x00FAA6, 0x0076CA},
+    {0x00FAA7, 0x00FAA7, 0x0076DB}, {0x00FAA8, 0x00FAA8, 0x0076F4}, {0x00FAA9, 0x00FAA9, 0x00774A},
+    {0x00FAAA, 0x00FAAA, 0x007740}, {0x00FAAB, 0x00FAAB, 0x0078CC}, {0x00FAAC, 0x00FAAC, 0x007AB1},
+    {0x00FAAD, 0x00FAAD, 0x007BC0}, {0x00FAAE, 0x00FAAE, 0x007C7B}, {0x00FAAF, 0x00FAAF, 0x007D5B},
+    {0x00FAB0, 0x00FAB0, 0x007DF4}, {0x00FAB1, 0x00FAB1, 0x007F3E}, {0x00FAB2, 0x00FAB2, 0x008005},
+    {0x00FAB3, 0x00FAB3, 0x008352}, {0x00FAB4, 0x00FAB4, 0x0083EF}, {0x00FAB5, 0x00FAB5, 0x008779},
+    {0x00FAB6, 0x00FAB6, 0x008941}, {0x00FAB7, 0x00FAB7, 0x008986}, {0x00FAB8, 0x00FAB8, 0x008996},
+    {0x00FAB9, 0x00FAB9, 0x008ABF}, {0x00FABA, 0x00FABA, 0x008AF8}, {0x00FABB, 0x00FABB, 0x008ACB},
+    {0x00FABC, 0x00FABC, 0x008B01}, {0x00FABD, 0x00FABD, 0x008AFE}, {0x00FABE, 0x00FABE, 0x008AED},
+    {0x00FABF, 0x00FABF, 0x008B39}, {0x00FAC0, 0x00FAC0, 0x008B8A}, {0x00FAC1, 0x00FAC1, 0x008D08},
+    {0x00FAC2, 0x00FAC2, 0x008F38}, {0x00FAC3, 0x00FAC3, 0x009072}, {0x00FAC4, 0x00FAC4, 0x009199},
+    {0x00FAC5, 0x00FAC5, 0x009276}, {0x00FAC6, 0x00FAC6, 0x00967C}, {0x00FAC7, 0x00FAC7, 0x0096E3},
+    {0x00FAC8, 0x00FAC8, 0x009756}, {0x00FAC9, 0x00FAC9, 0x0097DB}, {0x00FACA, 0x00FACA, 0x0097FF},
+    {0x00FACB, 0x00FACB, 0x00980B}, {0x00FACC, 0x00FACC, 0x00983B}, {0x00FACD, 0x00FACD, 0x009B12},
+    {0x00FACE, 0x00FACE, 0x009F9C}, {0x00FACF, 0x00FACF, 0x02284A}, {0x00FAD0, 0x00FAD0, 0x022844},
+    {0x00FAD1, 0x00FAD1, 0x0233D5}, {0x00FAD2, 0x00FAD2, 0x003B9D}, {0x00FAD3, 0x00FAD3, 0x004018},
+    {0x00FAD4, 0x00FAD4, 0x004039}, {0x00FAD5, 0x00FAD5, 0x025249}, {0x00FAD6, 0x00FAD6, 0x025CD0},
+    {0x00FAD7, 0x00FAD7, 0x027ED3}, {0x00FAD8, 0x00FAD8, 0x009F43}, {0x00FAD9, 0x00FAD9, 0x009F8E},
+    {0x00FB1D, 0x00FB1D, 0x0005D9}, {0x00FB1F, 0x00FB1F, 0x0005F2}, {0x00FB2A, 0x00FB2D, 0x0005E9},
+    {0x00FB2E, 0x00FB30, 0x0005D0}, {0x00FB31, 0x00FB31, 0x0005D1}, {0x00FB32, 0x00FB32, 0x0005D2},
+    {0x00FB33, 0x00FB33, 0x0005D3}, {0x00FB34, 0x00FB34, 0x0005D4}, {0x00FB35, 0x00FB35, 0x0005D5},
+    {0x00FB36, 0x00FB36, 0x0005D6}, {0x00FB38, 0x00FB38, 0x0005D8}, {0x00FB39, 0x00FB39, 0x0005D9},
+    {0x00FB3A, 0x00FB3A, 0x0005DA}, {0x00FB3B, 0x00FB3B, 0x0005DB}, {0x00FB3C, 0x00FB3C, 0x0005DC},
+    {0x00FB3E, 0x00FB3E, 0x0005DE}, {0x00FB40, 0x00FB40, 0x0005E0}, {0x00FB41, 0x00FB41, 0x0005E1},
+    {0x00FB43, 0x00FB43, 0x0005E3}, {0x00FB44, 0x00FB44, 0x0005E4}, {0x00FB46, 0x00FB46, 0x0005E6},
+    {0x00FB47, 0x00FB47, 0x0005E7}, {0x00FB48, 0x00FB48, 0x0005E8}, {0x00FB49, 0x00FB49, 0x0005E9},
+    {0x00FB4A, 0x00FB4A, 0x0005EA}, {0x00FB4B, 0x00FB4B, 0x0005D5}, {0x00FB4C, 0x00FB4C, 0x0005D1},
+    {0x00FB4D, 0x00FB4D, 0x0005DB}, {0x00FB4E, 0x00FB4E, 0x0005E4}, {0x01109A, 0x01109A, 0x011099},
+    {0x01109C, 0x01109C, 0x01109B}, {0x0110AB, 0x0110AB, 0x0110A5}, {0x01112E, 0x01112E, 0x011131},
+    {0x01112F, 0x01112F, 0x011132}, {0x01134B, 0x01134C, 0x011347}, {0x0114BB, 0x0114BC, 0x0114B9},
+    {0x0114BE, 0x0114BE, 0x0114B9}, {0x0115BA, 0x0115BA, 0x0115B8}, {0x0115BB, 0x0115BB, 0x0115B9},
+    {0x011938, 0x011938, 0x011935}, {0x01D15E, 0x01D15E, 0x01D157}, {0x01D15F, 0x01D164, 0x01D158},
+    {0x01D1BB, 0x01D1BB, 0x01D1B9}, {0x01D1BC, 0x01D1BC, 0x01D1BA}, {0x01D1BD, 0x01D1BD, 0x01D1B9},
+    {0x01D1BE, 0x01D1BE, 0x01D1BA}, {0x01D1BF, 0x01D1BF, 0x01D1B9}, {0x01D1C0, 0x01D1C0, 0x01D1BA},
+    {0x02F800, 0x02F800, 0x004E3D}, {0x02F801, 0x02F801, 0x004E38}, {0x02F802, 0x02F802, 0x004E41},
+    {0x02F803, 0x02F803, 0x020122}, {0x02F804, 0x02F804, 0x004F60}, {0x02F805, 0x02F805, 0x004FAE},
+    {0x02F806, 0x02F806, 0x004FBB}, {0x02F807, 0x02F807, 0x005002}, {0x02F808, 0x02F808, 0x00507A},
+    {0x02F809, 0x02F809, 0x005099}, {0x02F80A, 0x02F80A, 0x0050E7}, {0x02F80B, 0x02F80B, 0x0050CF},
+    {0x02F80C, 0x02F80C, 0x00349E}, {0x02F80D, 0x02F80D, 0x02063A}, {0x02F80E, 0x02F80E, 0x00514D},
+    {0x02F80F, 0x02F80F, 0x005154}, {0x02F810, 0x02F810, 0x005164}, {0x02F811, 0x02F811, 0x005177},
+    {0x02F812, 0x02F812, 0x02051C}, {0x02F813, 0x02F813, 0x0034B9}, {0x02F814, 0x02F814, 0x005167},
+    {0x02F815, 0x02F815, 0x00518D}, {0x02F816, 0x02F816, 0x02054B}, {0x02F817, 0x02F817, 0x005197},
+    {0x02F818, 0x02F818, 0x0051A4}, {0x02F819, 0x02F819, 0x004ECC}, {0x02F81A, 0x02F81A, 0x0051AC},
+    {0x02F81B, 0x02F81B, 0x0051B5}, {0x02F81C, 0x02F81C, 0x0291DF}, {0x02F81D, 0x02F81D, 0x0051F5},
+    {0x02F81E, 0x02F81E, 0x005203}, {0x02F81F, 0x02F81F, 0x0034DF}, {0x02F820, 0x02F820, 0x00523B},
+    {0x02F821, 0x02F821, 0x005246}, {0x02F822, 0x02F822, 0x005272}, {0x02F823, 0x02F823, 0x005277},
+    {0x02F824, 0x02F824, 0x003515}, {0x02F825, 0x02F825, 0x0052C7}, {0x02F826, 0x02F826, 0x0052C9},
+    {0x02F827, 0x02F827, 0x0052E4}, {0x02F828, 0x02F828, 0x0052FA}, {0x02F829, 0x02F829, 0x005305},
+    {0x02F82A, 0x02F82A, 0x005306}, {0x02F82B, 0x02F82B, 0x005317}, {0x02F82C, 0x02F82C, 0x005349},
+    {0x02F82D, 0x02F82D, 0x005351}, {0x02F82E, 0x02F82E, 0x00535A}, {0x02F82F, 0x02F82F, 0x005373},
+    {0x02F830, 0x02F830, 0x00537D}, {0x02F831, 0x02F833, 0x00537F}, {0x02F834, 0x02F834, 0x020A2C},
+    {0x02F835, 0x02F835, 0x007070}, {0x02F836, 0x02F836, 0x0053CA}, {0x02F837, 0x02F837, 0x0053DF},
+    {0x02F838, 0x02F838, 0x020B63}, {0x02F839, 0x02F839, 0x0053EB}, {0x02F83A, 0x02F83A, 0x0053F1},
+    {0x02F83B, 0x02F83B, 0x005406}, {0x02F83C, 0x02F83C, 0x00549E}, {0x02F83D, 0x02F83D, 0x005438},
+    {0x02F83E, 0x02F83E, 0x005448}, {0x02F83F, 0x02F83F, 0x005468}, {0x02F840, 0x02F840, 0x0054A2},
+    {0x02F841, 0x02F841, 0x0054F6}, {0x02F842, 0x02F842, 0x005510}, {0x02F843, 0x02F843, 0x005553},
+    {0x02F844, 0x02F844, 0x005563}, {0x02F845, 0x02F846, 0x005584}, {0x02F847, 0x02F847, 0x005599},
+    {0x02F848, 0x02F848, 0x0055AB}, {0x02F849, 0x02F849, 0x0055B3}, {0x02F84A, 0x02F84A, 0x0055C2},
+    {0x02F84B, 0x02F84B, 0x005716}, {0x02F84C, 0x02F84C, 0x005606}, {0x02F84D, 0x02F84D, 0x005717},
+    {0x02F84E, 0x02F84E, 0x005651}, {0x02F84F, 0x02F84F, 0x005674}, {0x02F850, 0x02F850, 0x005207},
+    {0x02F851, 0x02F851, 0x0058EE}, {0x02F852, 0x02F852, 0x0057CE}, {0x02F853, 0x02F853, 0x0057F4},
+    {0x02F854, 0x02F854, 0x00580D}, {0x02F855, 0x02F855, 0x00578B}, {0x02F856, 0x02F856, 0x005832},
+    {0x02F857, 0x02F857, 0x005831}, {0x02F858, 0x02F858, 0x0058AC}, {0x02F859, 0x02F859, 0x0214E4},
+    {0x02F85A, 0x02F85A, 0x0058F2}, {0x02F85B, 0x02F85B, 0x0058F7}, {0x02F85C, 0x02F85C, 0x005906},
+    {0x02F85D, 0x02F85D, 0x00591A}, {0x02F85E, 0x02F85E, 0x005922}, {0x02F85F, 0x02F85F, 0x005962},
+    {0x02F860, 0x02F860, 0x0216A8}, {0x02F861, 0x02F861, 0x0216EA}, {0x02F862, 0x02F862, 0x0059EC},
+    {0x02F863, 0x02F863, 0x005A1B}, {0x02F864, 0x02F864, 0x005A27}, {0x02F865, 0x02F865, 0x0059D8},
+    {0x02F866, 0x02F866, 0x005A66}, {0x02F867, 0x02F867, 0x0036EE}, {0x02F868, 0x02F868, 0x0036FC},
+    {0x02F869, 0x02F869, 0x005B08}, {0x02F86A, 0x02F86B, 0x005B3E}, {0x02F86C, 0x02F86C, 0x0219C8},
+    {0x02F86D, 0x02F86D, 0x005BC3}, {0x02F86E, 0x02F86E, 0x005BD8}, {0x02F86F, 0x02F86F, 0x005BE7},
+    {0x02F870, 0x02F870, 0x005BF3}, {0x02F871, 0x02F871, 0x021B18}, {0x02F872, 0x02F872, 0x005BFF},
+    {0x02F873, 0x02F873, 0x005C06}, {0x02F874, 0x02F874, 0x005F53}, {0x02F875, 0x02F875, 0x005C22},
+    {0x02F876, 0x02F876, 0x003781}, {0x02F877, 0x02F877, 0x005C60}, {0x02F878, 0x02F878, 0x005C6E},
+    {0x02F879, 0x02F879, 0x005CC0}, {0x02F87A, 0x02F87A, 0x005C8D}, {0x02F87B, 0x02F87B, 0x021DE4},
+    {0x02F87C, 0x02F87C, 0x005D43}, {0x02F87D, 0x02F87D, 0x021DE6}, {0x02F87E, 0x02F87E, 0x005D6E},
+    {0x02F87F, 0x02F87F, 0x005D6B}, {0x02F880, 0x02F880, 0x005D7C}, {0x02F881, 0x02F881, 0x005DE1},
+    {0x02F882, 0x02F882, 0x005DE2}, {0x02F883, 0x02F883, 0x00382F}, {0x02F884, 0x02F884, 0x005DFD},
+    {0x02F885, 0x02F885, 0x005E28}, {0x02F886, 0x02F886, 0x005E3D}, {0x02F887, 0x02F887, 0x005E69},
+    {0x02F888, 0x02F888, 0x003862}, {0x02F889, 0x02F889, 0x022183}, {0x02F88A, 0x02F88A, 0x00387C},
+    {0x02F88B, 0x02F88B, 0x005EB0}, {0x02F88C, 0x02F88C, 0x005EB3}, {0x02F88D, 0x02F88D, 0x005EB6},
+    {0x02F88E, 0x02F88E, 0x005ECA}, {0x02F88F, 0x02F88F, 0x02A392}, {0x02F890, 0x02F890, 0x005EFE},
+    {0x02F891, 0x02F892, 0x022331}, {0x02F893, 0x02F893, 0x008201}, {0x02F894, 0x02F895, 0x005F22},
+    {0x02F896, 0x02F896, 0x0038C7}, {0x02F897, 0x02F897, 0x0232B8}, {0x02F898, 0x02F898, 0x0261DA},
+    {0x02F899, 0x02F899, 0x005F62}, {0x02F89A, 0x02F89A, 0x005F6B}, {0x02F89B, 0x02F89B, 0x0038E3},
+    {0x02F89C, 0x02F89C, 0x005F9A}, {0x02F89D, 0x02F89D, 0x005FCD}, {0x02F89E, 0x02F89E, 0x005FD7},
+    {0x02F89F, 0x02F89F, 0x005FF9}, {0x02F8A0, 0x02F8A0, 0x006081}, {0x02F8A1, 0x02F8A1, 0x00393A},
+    {0x02F8A2, 0x02F8A2, 0x00391C}, {0x02F8A3, 0x02F8A3, 0x006094}, {0x02F8A4, 0x02F8A4, 0x0226D4},
+    {0x02F8A5, 0x02F8A5, 0x0060C7}, {0x02F8A6, 0x02F8A6, 0x006148}, {0x02F8A7, 0x02F8A7, 0x00614C},
+    {0x02F8A8, 0x02F8A8, 0x00614E}, {0x02F8A9, 0x02F8A9, 0x00614C}, {0x02F8AA, 0x02F8AA, 0x00617A},
+    {0x02F8AB, 0x02F8AB, 0x00618E}, {0x02F8AC, 0x02F8AC, 0x0061B2}, {0x02F8AD, 0x02F8AD, 0x0061A4},
+    {0x02F8AE, 0x02F8AE, 0x0061AF}, {0x02F8AF, 0x02F8AF, 0x0061DE}, {0x02F8B0, 0x02F8B0, 0x0061F2},
+    {0x02F8B1, 0x02F8B1, 0x0061F6}, {0x02F8B2, 0x02F8B2, 0x006210}, {0x02F8B3, 0x02F8B3, 0x00621B},
+    {0x02F8B4, 0x02F8B4, 0x00625D}, {0x02F8B5, 0x02F8B5, 0x0062B1}, {0x02F8B6, 0x02F8B6, 0x0062D4},
+    {0x02F8B7, 0x02F8B7, 0x006350}, {0x02F8B8, 0x02F8B8, 0x022B0C}, {0x02F8B9, 0x02F8B9, 0x00633D},
+    {0x02F8BA, 0x02F8BA, 0x0062FC}, {0x02F8BB, 0x02F8BB, 0x006368}, {0x02F8BC, 0x02F8BC, 0x006383},
+    {0x02F8BD, 0x02F8BD, 0x0063E4}, {0x02F8BE, 0x02F8BE, 0x022BF1}, {0x02F8BF, 0x02F8BF, 0x006422},
+    {0x02F8C0, 0x02F8C0, 0x0063C5}, {0x02F8C1, 0x02F8C1, 0x0063A9}, {0x02F8C2, 0x02F8C2, 0x003A2E},
+    {0x02F8C3, 0x02F8C3, 0x006469}, {0x02F8C4, 0x02F8C4, 0x00647E}, {0x02F8C5, 0x02F8C5, 0x00649D},
+    {0x02F8C6, 0x02F8C6, 0x006477}, {0x02F8C7, 0x02F8C7, 0x003A6C}, {0x02F8C8, 0x02F8C8, 0x00654F},
+    {0x02F8C9, 0x02F8C9, 0x00656C}, {0x02F8CA, 0x02F8CA, 0x02300A}, {0x02F8CB, 0x02F8CB, 0x0065E3},
+    {0x02F8CC, 0x02F8CC, 0x0066F8}, {0x02F8CD, 0x02F8CD, 0x006649}, {0x02F8CE, 0x02F8CE, 0x003B19},
+    {0x02F8CF, 0x02F8CF, 0x006691}, {0x02F8D0, 0x02F8D0, 0x003B08}, {0x02F8D1, 0x02F8D1, 0x003AE4},
+    {0x02F8D2, 0x02F8D2, 0x005192}, {0x02F8D3, 0x02F8D3, 0x005195}, {0x02F8D4, 0x02F8D4, 0x006700},
+    {0x02F8D5, 0x02F8D5, 0x00669C}, {0x02F8D6, 0x02F8D6, 0x0080AD}, {0x02F8D7, 0x02F8D7, 0x0043D9},
+    {0x02F8D8, 0x02F8D8, 0x006717}, {0x02F8D9, 0x02F8D9, 0x00671B}, {0x02F8DA, 0x02F8DA, 0x006721},
+    {0x02F8DB, 0x02F8DB, 0x00675E}, {0x02F8DC, 0x02F8DC, 0x006753}, {0x02F8DD, 0x02F8DD, 0x0233C3},
+    {0x02F8DE, 0x02F8DE, 0x003B49}, {0x02F8DF, 0x02F8DF, 0x0067FA}, {0x02F8E0, 0x02F8E0, 0x006785},
+    {0x02F8E1, 0x02F8E1, 0x006852}, {0x02F8E2, 0x02F8E2, 0x006885}, {0x02F8E3, 0x02F8E3, 0x02346D},
+    {0x02F8E4, 0x02F8E4, 0x00688E}, {0x02F8E5, 0x02F8E5, 0x00681F}, {0x02F8E6, 0x02F8E6, 0x006914},
+    {0x02F8E7, 0x02F8E7, 0x003B9D}, {0x02F8E8, 0x02F8E8, 0x006942}, {0x02F8E9, 0x02F8E9, 0x0069A3},
+    {0x02F8EA, 0x02F8EA, 0x0069EA}, {0x02F8EB, 0x02F8EB, 0x006AA8}, {0x02F8EC, 0x02F8EC, 0x0236A3},
+    {0x02F8ED, 0x02F8ED, 0x006ADB}, {0x02F8EE, 0x02F8EE, 0x003C18}, {0x02F8EF, 0x02F8EF, 0x006B21},
+    {0x02F8F0, 0x02F8F0, 0x0238A7}, {0x02F8F1, 0x02F8F1, 0x006B54}, {0x02F8F2, 0x02F8F2, 0x003C4E},
+    {0x02F8F3, 0x02F8F3, 0x006B72}, {0x02F8F4, 0x02F8F4, 0x006B9F}, {0x02F8F5, 0x02F8F5, 0x006BBA},
+    {0x02F8F6, 0x02F8F6, 0x006BBB}, {0x02F8F7, 0x02F8F7, 0x023A8D}, {0x02F8F8, 0x02F8F8, 0x021D0B},
+    {0x02F8F9, 0x02F8F9, 0x023AFA}, {0x02F8FA, 0x02F8FA, 0x006C4E}, {0x02F8FB, 0x02F8FB, 0x023CBC},
+    {0x02F8FC, 0x02F8FC, 0x006CBF}, {0x02F8FD, 0x02F8FD, 0x006CCD}, {0x02F8FE, 0x02F8FE, 0x006C67},
+    {0x02F8FF, 0x02F8FF, 0x006D16}, {0x02F900, 0x02F900, 0x006D3E}, {0x02F901, 0x02F901, 0x006D77},
+    {0x02F902, 0x02F902, 0x006D41}, {0x02F903, 0x02F903, 0x006D69}, {0x02F904, 0x02F904, 0x006D78},
+    {0x02F905, 0x02F905, 0x006D85}, {0x02F906, 0x02F906, 0x023D1E}, {0x02F907, 0x02F907, 0x006D34},
+    {0x02F908, 0x02F908, 0x006E2F}, {0x02F909, 0x02F909, 0x006E6E}, {0x02F90A, 0x02F90A, 0x003D33},
+    {0x02F90B, 0x02F90B, 0x006ECB}, {0x02F90C, 0x02F90C, 0x006EC7}, {0x02F90D, 0x02F90D, 0x023ED1},
+    {0x02F90E, 0x02F90E, 0x006DF9}, {0x02F90F, 0x02F90F, 0x006F6E}, {0x02F910, 0x02F910, 0x023F5E},
+    {0x02F911, 0x02F911, 0x023F8E}, {0x02F912, 0x02F912, 0x006FC6}, {0x02F913, 0x02F913, 0x007039},
+    {0x02F914, 0x02F914, 0x00701E}, {0x02F915, 0x02F915, 0x00701B}, {0x02F916, 0x02F916, 0x003D96},
+    {0x02F917, 0x02F917, 0x00704A}, {0x02F918, 0x02F918, 0x00707D}, {0x02F919, 0x02F919, 0x007077},
+    {0x02F91A, 0x02F91A, 0x0070AD}, {0x02F91B, 0x02F91B, 0x020525}, {0x02F91C, 0x02F91C, 0x007145},
+    {0x02F91D, 0x02F91D, 0x024263}, {0x02F91E, 0x02F91E, 0x00719C}, {0x02F91F, 0x02F91F, 0x0243AB},
+    {0x02F920, 0x02F920, 0x007228}, {0x02F921, 0x02F921, 0x007235}, {0x02F922, 0x02F922, 0x007250},
+    {0x02F923, 0x02F923, 0x024608}, {0x02F924, 0x02F924, 0x007280}, {0x02F925, 0x02F925, 0x007295},
+    {0x02F926, 0x02F926, 0x024735}, {0x02F927, 0x02F927, 0x024814}, {0x02F928, 0x02F928, 0x00737A},
+    {0x02F929, 0x02F929, 0x00738B}, {0x02F92A, 0x02F92A, 0x003EAC}, {0x02F92B, 0x02F92B, 0x0073A5},
+    {0x02F92C, 0x02F92D, 0x003EB8}, {0x02F92E, 0x02F92E, 0x007447}, {0x02F92F, 0x02F92F, 0x00745C},
+    {0x02F930, 0x02F930, 0x007471}, {0x02F931, 0x02F931, 0x007485}, {0x02F932, 0x02F932, 0x0074CA},
+    {0x02F933, 0x02F933, 0x003F1B}, {0x02F934, 0x02F934, 0x007524}, {0x02F935, 0x02F935, 0x024C36},
+    {0x02F936, 0x02F936, 0x00753E}, {0x02F937, 0x02F937, 0x024C92}, {0x02F938, 0x02F938, 0x007570},
+    {0x02F939, 0x02F939, 0x02219F}, {0x02F93A, 0x02F93A, 0x007610}, {0x02F93B, 0x02F93B, 0x024FA1},
+    {0x02F93C, 0x02F93C, 0x024FB8}, {0x02F93D, 0x02F93D, 0x025044}, {0x02F93E, 0x02F93E, 0x003FFC},
+    {0x02F93F, 0x02F93F, 0x004008}, {0x02F940, 0x02F940, 0x0076F4}, {0x02F941, 0x02F941, 0x0250F3},
+    {0x02F942, 0x02F942, 0x0250F2}, {0x02F943, 0x02F943, 0x025119}, {0x02F944, 0x02F944, 0x025133},
+    {0x02F945, 0x02F945, 0x00771E}, {0x02F946, 0x02F947, 0x00771F}, {0x02F948, 0x02F948, 0x00774A},
+    {0x02F949, 0x02F949, 0x004039}, {0x02F94A, 0x02F94A, 0x00778B}, {0x02F94B, 0x02F94B, 0x004046},
+    {0x02F94C, 0x02F94C, 0x004096}, {0x02F94D, 0x02F94D, 0x02541D}, {0x02F94E, 0x02F94E, 0x00784E},
+    {0x02F94F, 0x02F94F, 0x00788C}, {0x02F950, 0x02F950, 0x0078CC}, {0x02F951, 0x02F951, 0x0040E3},
+    {0x02F952, 0x02F952, 0x025626}, {0x02F953, 0x02F953, 0x007956}, {0x02F954, 0x02F954, 0x02569A},
+    {0x02F955, 0x02F955, 0x0256C5}, {0x02F956, 0x02F956, 0x00798F}, {0x02F957, 0x02F957, 0x0079EB},
+    {0x02F958, 0x02F958, 0x00412F}, {0x02F959, 0x02F959, 0x007A40}, {0x02F95A, 0x02F95A, 0x007A4A},
+    {0x02F95B, 0x02F95B, 0x007A4F}, {0x02F95C, 0x02F95C, 0x02597C}, {0x02F95D, 0x02F95E, 0x025AA7},
+    {0x02F95F, 0x02F95F, 0x007AEE}, {0x02F960, 0x02F960, 0x004202}, {0x02F961, 0x02F961, 0x025BAB},
+    {0x02F962, 0x02F962, 0x007BC6}, {0x02F963, 0x02F963, 0x007BC9}, {0x02F964, 0x02F964, 0x004227},
+    {0x02F965, 0x02F965, 0x025C80}, {0x02F966, 0x02F966, 0x007CD2}, {0x02F967, 0x02F967, 0x0042A0},
+    {0x02F968, 0x02F968, 0x007CE8}, {0x02F969, 0x02F969, 0x007CE3}, {0x02F96A, 0x02F96A, 0x007D00},
+    {0x02F96B, 0x02F96B, 0x025F86}, {0x02F96C, 0x02F96C, 0x007D63}, {0x02F96D, 0x02F96D, 0x004301},
+    {0x02F96E, 0x02F96E, 0x007DC7}, {0x02F96F, 0x02F96F, 0x007E02}, {0x02F970, 0x02F970, 0x007E45},
+    {0x02F971, 0x02F971, 0x004334}, {0x02F972, 0x02F972, 0x026228}, {0x02F973, 0x02F973, 0x026247},
+    {0x02F974, 0x02F974, 0x004359}, {0x02F975, 0x02F975, 0x0262D9}, {0x02F976, 0x02F976, 0x007F7A},
+    {0x02F977, 0x02F977, 0x02633E}, {0x02F978, 0x02F978, 0x007F95}, {0x02F979, 0x02F979, 0x007FFA},
+    {0x02F97A, 0x02F97A, 0x008005}, {0x02F97B, 0x02F97B, 0x0264DA}, {0x02F97C, 0x02F97C, 0x026523},
+    {0x02F97D, 0x02F97D, 0x008060}, {0x02F97E, 0x02F97E, 0x0265A8}, {0x02F97F, 0x02F97F, 0x008070},
+    {0x02F980, 0x02F980, 0x02335F}, {0x02F981, 0x02F981, 0x0043D5}, {0x02F982, 0x02F982, 0x0080B2},
+    {0x02F983, 0x02F983, 0x008103}, {0x02F984, 0x02F984, 0x00440B}, {0x02F985, 0x02F985, 0x00813E},
+    {0x02F986, 0x02F986, 0x005AB5}, {0x02F987, 0x02F987, 0x0267A7}, {0x02F988, 0x02F988, 0x0267B5},
+    {0x02F989, 0x02F989, 0x023393}, {0x02F98A, 0x02F98A, 0x02339C}, {0x02F98B, 0x02F98B, 0x008201},
+    {0x02F98C, 0x02F98C, 0x008204}, {0x02F98D, 0x02F98D, 0x008F9E}, {0x02F98E, 0x02F98E, 0x00446B},
+    {0x02F98F, 0x02F98F, 0x008291}, {0x02F990, 0x02F990, 0x00828B}, {0x02F991, 0x02F991, 0x00829D},
+    {0x02F992, 0x02F992, 0x0052B3}, {0x02F993, 0x02F993, 0x0082B1}, {0x02F994, 0x02F994, 0x0082B3},
+    {0x02F995, 0x02F995, 0x0082BD}, {0x02F996, 0x02F996, 0x0082E6}, {0x02F997, 0x02F997, 0x026B3C},
+    {0x02F998, 0x02F998, 0x0082E5}, {0x02F999, 0x02F999, 0x00831D}, {0x02F99A, 0x02F99A, 0x008363},
+    {0x02F99B, 0x02F99B, 0x0083AD}, {0x02F99C, 0x02F99C, 0x008323}, {0x02F99D, 0x02F99D, 0x0083BD},
+    {0x02F99E, 0x02F99E, 0x0083E7}, {0x02F99F, 0x02F99F, 0x008457}, {0x02F9A0, 0x02F9A0, 0x008353},
+    {0x02F9A1, 0x02F9A1, 0x0083CA}, {0x02F9A2, 0x02F9A2, 0x0083CC}, {0x02F9A3, 0x02F9A3, 0x0083DC},
+    {0x02F9A4, 0x02F9A4, 0x026C36}, {0x02F9A5, 0x02F9A5, 0x026D6B}, {0x02F9A6, 0x02F9A6, 0x026CD5},
+    {0x02F9A7, 0x02F9A7, 0x00452B}, {0x02F9A8, 0x02F9A8, 0x0084F1}, {0x02F9A9, 0x02F9A9, 0x0084F3},
+    {0x02F9AA, 0x02F9AA, 0x008516}, {0x02F9AB, 0x02F9AB, 0x0273CA}, {0x02F9AC, 0x02F9AC, 0x008564},
+    {0x02F9AD, 0x02F9AD, 0x026F2C}, {0x02F9AE, 0x02F9AE, 0x00455D}, {0x02F9AF, 0x02F9AF, 0x004561},
+    {0x02F9B0, 0x02F9B0, 0x026FB1}, {0x02F9B1, 0x02F9B1, 0x0270D2}, {0x02F9B2, 0x02F9B2, 0x00456B},
+    {0x02F9B3, 0x02F9B3, 0x008650}, {0x02F9B4, 0x02F9B4, 0x00865C}, {0x02F9B5, 0x02F9B5, 0x008667},
+    {0x02F9B6, 0x02F9B6, 0x008669}, {0x02F9B7, 0x02F9B7, 0x0086A9}, {0x02F9B8, 0x02F9B8, 0x008688},
+    {0x02F9B9, 0x02F9B9, 0x00870E}, {0x02F9BA, 0x02F9BA, 0x0086E2}, {0x02F9BB, 0x02F9BB, 0x008779},
+    {0x02F9BC, 0x02F9BC, 0x008728}, {0x02F9BD, 0x02F9BD, 0x00876B}, {0x02F9BE, 0x02F9BE, 0x008786},
+    {0x02F9BF, 0x02F9BF, 0x0045D7}, {0x02F9C0, 0x02F9C0, 0x0087E1}, {0x02F9C1, 0x02F9C1, 0x008801},
+    {0x02F9C2, 0x02F9C2, 0x0045F9}, {0x02F9C3, 0x02F9C3, 0x008860}, {0x02F9C4, 0x02F9C4, 0x008863},
+    {0x02F9C5, 0x02F9C5, 0x027667}, {0x02F9C6, 0x02F9C6, 0x0088D7}, {0x02F9C7, 0x02F9C7, 0x0088DE},
+    {0x02F9C8, 0x02F9C8, 0x004635}, {0x02F9C9, 0x02F9C9, 0x0088FA}, {0x02F9CA, 0x02F9CA, 0x0034BB},
+    {0x02F9CB, 0x02F9CB, 0x0278AE}, {0x02F9CC, 0x02F9CC, 0x027966}, {0x02F9CD, 0x02F9CD, 0x0046BE},
+    {0x02F9CE, 0x02F9CE, 0x0046C7}, {0x02F9CF, 0x02F9CF, 0x008AA0}, {0x02F9D0, 0x02F9D0, 0x008AED},
+    {0x02F9D1, 0x02F9D1, 0x008B8A}, {0x02F9D2, 0x02F9D2, 0x008C55}, {0x02F9D3, 0x02F9D3, 0x027CA8},
+    {0x02F9D4, 0x02F9D4, 0x008CAB}, {0x02F9D5, 0x02F9D5, 0x008CC1}, {0x02F9D6, 0x02F9D6, 0x008D1B},
+    {0x02F9D7, 0x02F9D7, 0x008D77}, {0x02F9D8, 0x02F9D8, 0x027F2F}, {0x02F9D9, 0x02F9D9, 0x020804},
+    {0x02F9DA, 0x02F9DA, 0x008DCB}, {0x02F9DB, 0x02F9DB, 0x008DBC}, {0x02F9DC, 0x02F9DC, 0x008DF0},
+    {0x02F9DD, 0x02F9DD, 0x0208DE}, {0x02F9DE, 0x02F9DE, 0x008ED4}, {0x02F9DF, 0x02F9DF, 0x008F38},
+    {0x02F9E0, 0x02F9E0, 0x0285D2}, {0x02F9E1, 0x02F9E1, 0x0285ED}, {0x02F9E2, 0x02F9E2, 0x009094},
+    {0x02F9E3, 0x02F9E3, 0x0090F1}, {0x02F9E4, 0x02F9E4, 0x009111}, {0x02F9E5, 0x02F9E5, 0x02872E},
+    {0x02F9E6, 0x02F9E6, 0x00911B}, {0x02F9E7, 0x02F9E7, 0x009238}, {0x02F9E8, 0x02F9E8, 0x0092D7},
+    {0x02F9E9, 0x02F9E9, 0x0092D8}, {0x02F9EA, 0x02F9EA, 0x00927C}, {0x02F9EB, 0x02F9EB, 0x0093F9},
+    {0x02F9EC, 0x02F9EC, 0x009415}, {0x02F9ED, 0x02F9ED, 0x028BFA}, {0x02F9EE, 0x02F9EE, 0x00958B},
+    {0x02F9EF, 0x02F9EF, 0x004995}, {0x02F9F0, 0x02F9F0, 0x0095B7}, {0x02F9F1, 0x02F9F1, 0x028D77},
+    {0x02F9F2, 0x02F9F2, 0x0049E6}, {0x02F9F3, 0x02F9F3, 0x0096C3}, {0x02F9F4, 0x02F9F4, 0x005DB2},
+    {0x02F9F5, 0x02F9F5, 0x009723}, {0x02F9F6, 0x02F9F6, 0x029145}, {0x02F9F7, 0x02F9F7, 0x02921A},
+    {0x02F9F8, 0x02F9F8, 0x004A6E}, {0x02F9F9, 0x02F9F9, 0x004A76}, {0x02F9FA, 0x02F9FA, 0x0097E0},
+    {0x02F9FB, 0x02F9FB, 0x02940A}, {0x02F9FC, 0x02F9FC, 0x004AB2}, {0x02F9FD, 0x02F9FD, 0x029496},
+    {0x02F9FE, 0x02F9FF, 0x00980B}, {0x02FA00, 0x02FA00, 0x009829}, {0x02FA01, 0x02FA01, 0x0295B6},
+    {0x02FA02, 0x02FA02, 0x0098E2}, {0x02FA03, 0x02FA03, 0x004B33}, {0x02FA04, 0x02FA04, 0x009929},
+    {0x02FA05, 0x02FA05, 0x0099A7}, {0x02FA06, 0x02FA06, 0x0099C2}, {0x02FA07, 0x02FA07, 0x0099FE},
+    {0x02FA08, 0x02FA08, 0x004BCE}, {0x02FA09, 0x02FA09, 0x029B30}, {0x02FA0A, 0x02FA0A, 0x009B12},
+    {0x02FA0B, 0x02FA0B, 0x009C40}, {0x02FA0C, 0x02FA0C, 0x009CFD}, {0x02FA0D, 0x02FA0D, 0x004CCE},
+    {0x02FA0E, 0x02FA0E, 0x004CED}, {0x02FA0F, 0x02FA0F, 0x009D67}, {0x02FA10, 0x02FA10, 0x02A0CE},
+    {0x02FA11, 0x02FA11, 0x004CF8}, {0x02FA12, 0x02FA12, 0x02A105}, {0x02FA13, 0x02FA13, 0x02A20E},
+    {0x02FA14, 0x02FA14, 0x02A291}, {0x02FA15, 0x02FA15, 0x009EBB}, {0x02FA16, 0x02FA16, 0x004D56},
+    {0x02FA17, 0x02FA17, 0x009EF9}, {0x02FA18, 0x02FA18, 0x009EFE}, {0x02FA19, 0x02FA19, 0x009F05},
+    {0x02FA1A, 0x02FA1A, 0x009F0F}, {0x02FA1B, 0x02FA1B, 0x009F16}, {0x02FA1C, 0x02FA1C, 0x009F3B},
+    {0x02FA1D, 0x02FA1D, 0x02A600},
+};
diff --git a/mllm/preprocessor/tokenizers/llama_cpp_unicode/unicode-data.h b/mllm/preprocessor/tokenizers/llama_cpp_unicode/unicode-data.h
new file mode 100644
index 00000000..90191ced
--- /dev/null
+++ b/mllm/preprocessor/tokenizers/llama_cpp_unicode/unicode-data.h
@@ -0,0 +1,54 @@
+/*
+llama.cpp - commit 54ef9cfc
+https://github.com/ggerganov/llama.cpp
+
+MIT License
+
+Copyright (c) 2023-2024 The ggml authors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#pragma once
+
+#include <array>
+#include <cstdint>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+struct range_nfd {
+  uint32_t first;
+  uint32_t last;
+  uint32_t nfd;
+};
+
+static const uint32_t MAX_CODEPOINTS = 0x110000;
+
+extern const std::initializer_list<std::pair<uint32_t, uint16_t>> unicode_ranges_flags;
+
+constexpr std::array<uint32_t, 25> unicode_set_whitespace = {
+    0x000009, 0x00000A, 0x00000B, 0x00000C, 0x00000D, 0x000020, 0x000085, 0x0000A0, 0x001680,
+    0x002000, 0x002001, 0x002002, 0x002003, 0x002004, 0x002005, 0x002006, 0x002007, 0x002008,
+    0x002009, 0x00200A, 0x002028, 0x002029, 0x00202F, 0x00205F, 0x003000,
+};
+
+extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase;
+extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase;
+extern const std::initializer_list<range_nfd> unicode_ranges_nfd;
diff --git a/mllm/preprocessor/tokenizers/llama_cpp_unicode/unicode.cpp b/mllm/preprocessor/tokenizers/llama_cpp_unicode/unicode.cpp
new file mode 100644
index 00000000..da8a79e5
--- /dev/null
+++ b/mllm/preprocessor/tokenizers/llama_cpp_unicode/unicode.cpp
@@ -0,0 +1,1068 @@
+/*
+llama.cpp - commit 54ef9cfc
+https://github.com/ggerganov/llama.cpp
+
+MIT License
+
+Copyright (c) 2023-2024 The ggml authors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#if defined(_MSC_VER)
+#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
+#endif
+
+#include "unicode.h"
+#include "unicode-data.h"
+
+#include <algorithm>
+#include <cassert>
+#include <codecvt>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <iterator>
+#include <limits>
+#include <locale>
+#include <map>
+#include <regex>
+#include <stdexcept>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+// Hash function for std::pair<uint32_t, uint32_t> used in composition table
+namespace std {
+template<>
+struct hash<std::pair<uint32_t, uint32_t>> {
+  std::size_t operator()(const std::pair<uint32_t, uint32_t>& p) const {
+    return std::hash<uint64_t>{}(((uint64_t)p.first << 32) | p.second);
+  }
+};
+}  // namespace std
+
+size_t unicode_len_utf8(char src) {
+  const size_t lookup[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4};
+  uint8_t highbits = static_cast<uint8_t>(src) >> 4;
+  return lookup[highbits];
+}
+
+// Unused function
+// static std::string unicode_cpts_to_utf8(const std::vector<uint32_t> &cps) {
+//   std::string result;
+//   for (size_t i = 0; i < cps.size(); ++i) {
+//     result.append(unicode_cpt_to_utf8(cps[i]));
+//   }
+//   return result;
+// }
+
+uint32_t unicode_cpt_from_utf8(const std::string& utf8, size_t& offset) {
+  assert(offset < utf8.size());
+  if (!(utf8[offset + 0] & 0x80)) {
+    auto result = utf8[offset + 0];
+    offset += 1;
+    return result;
+  }
+  if (!(utf8[offset + 0] & 0x40)) { throw std::invalid_argument("invalid character"); }
+  if (!(utf8[offset + 0] & 0x20)) {
+    if (offset + 1 >= utf8.size() || !((utf8[offset + 1] & 0xc0) == 0x80)) { throw std::invalid_argument("invalid character"); }
+    auto result = ((utf8[offset + 0] & 0x1f) << 6) | (utf8[offset + 1] & 0x3f);
+    offset += 2;
+    return result;
+  }
+  if (!(utf8[offset + 0] & 0x10)) {
+    if (offset + 2 >= utf8.size() || !((utf8[offset + 1] & 0xc0) == 0x80) || !((utf8[offset + 2] & 0xc0) == 0x80)) {
+      throw std::invalid_argument("invalid character");
+    }
+    auto result = ((utf8[offset + 0] & 0x0f) << 12) | ((utf8[offset + 1] & 0x3f) << 6) | (utf8[offset + 2] & 0x3f);
+    offset += 3;
+    return result;
+  }
+  if (!(utf8[offset + 0] & 0x08)) {
+    if (offset + 3 >= utf8.size() || !((utf8[offset + 1] & 0xc0) == 0x80) || !((utf8[offset + 2] & 0xc0) == 0x80)
+        || !((utf8[offset + 3] & 0xc0) == 0x80)) {
+      throw std::invalid_argument("invalid character");
+    }
+    auto result = ((utf8[offset + 0] & 0x07) << 18) | ((utf8[offset + 1] & 0x3f) << 12) | ((utf8[offset + 2] & 0x3f) << 6)
+                  | (utf8[offset + 3] & 0x3f);
+    offset += 4;
+    return result;
+  }
+  throw std::invalid_argument("failed to convert utf8 to codepoint");
+}
+
+// static std::vector<uint16_t> unicode_cpt_to_utf16(uint32_t cp) {
+//     std::vector<uint16_t> result;
+//     if (/* 0x0000 <= cp && */ cp <= 0xffff) {
+//         result.emplace_back(cp);
+//         return result;
+//     }
+//     if (0x10000 <= cp && cp <= 0x10ffff) {
+//         result.emplace_back(0xd800 | ((cp - 0x10000) >> 10));
+//         result.emplace_back(0xdc00 | ((cp - 0x10000) & 0x03ff));
+//         return result;
+//     }
+//     throw std::invalid_argument("failed to convert codepoint to utf16");
+// }
+
+// static std::vector<uint16_t> unicode_cpts_to_utf16(const
+// std::vector<uint32_t> & cps) {
+//     std::vector<uint16_t> result;
+//     for (size_t i = 0; i < cps.size(); ++i) {
+//         auto temp = unicode_cpt_to_utf16(cps[i]);
+//         result.insert(result.end(), temp.begin(), temp.end());
+//     }
+//     return result;
+// }
+
+// static uint32_t unicode_cpt_from_utf16(const std::vector<uint16_t> & utf16,
+// size_t & offset) {
+//     assert(offset < utf16.size());
+//     if (((utf16[0] >> 10) << 10) != 0xd800) {
+//         auto result = utf16[offset + 0];
+//         offset += 1;
+//         return result;
+//     }
+//
+//     if (offset + 1 >= utf16.size() || !((utf16[1] & 0xdc00) == 0xdc00)) {
+//         throw std::invalid_argument("invalid character");
+//     }
+//
+//     auto result = 0x10000 + (((utf16[0] & 0x03ff) << 10) | (utf16[1] &
+//     0x03ff)); offset += 2; return result;
+// }
+
+// static std::vector<uint32_t> unicode_cpts_from_utf16(const
+// std::vector<uint16_t> & utf16) {
+//     std::vector<uint32_t> result;
+//     size_t offset = 0;
+//     while (offset < utf16.size()) {
+//         result.push_back(unicode_cpt_from_utf16(utf16, offset));
+//     }
+//     return result;
+// }
+
+static std::vector<codepoint_flags> unicode_cpt_flags_array() {
+  std::vector<codepoint_flags> cpt_flags(MAX_CODEPOINTS, codepoint_flags::UNDEFINED);
+
+  assert(unicode_ranges_flags.begin()[0].first == 0);
+  assert(unicode_ranges_flags.begin()[unicode_ranges_flags.size() - 1].first == MAX_CODEPOINTS);
+  for (size_t i = 1; i < unicode_ranges_flags.size(); ++i) {
+    const auto range_ini = unicode_ranges_flags.begin()[i - 1];  // codepoint_ini, flags
+    const auto range_end = unicode_ranges_flags.begin()[i];      // codepoint_end, flags
+    for (uint32_t cpt = range_ini.first; cpt < range_end.first; ++cpt) { cpt_flags[cpt] = range_ini.second; }
+  }
+
+  for (auto cpt : unicode_set_whitespace) { cpt_flags[cpt].is_whitespace = true; }
+
+  for (auto p : unicode_map_lowercase) { cpt_flags[p.second].is_lowercase = true; }
+
+  for (auto p : unicode_map_uppercase) { cpt_flags[p.second].is_uppercase = true; }
+
+  for (auto& range : unicode_ranges_nfd) {  // start, last, nfd
+    cpt_flags[range.nfd].is_nfd = true;
+  }
+
+  return cpt_flags;
+}
+
+static std::unordered_map<uint8_t, std::string> unicode_byte_to_utf8_map() {
+  std::unordered_map<uint8_t, std::string> map;
+  for (int ch = 0x21; ch <= 0x7E; ++ch) {  // u'!' to u'~'
+    assert(0 <= ch && ch < 256);
+    map[ch] = unicode_cpt_to_utf8(ch);
+  }
+  for (int ch = 0xA1; ch <= 0xAC; ++ch) {  // u'¡' to u'¬'
+    assert(0 <= ch && ch < 256);
+    map[ch] = unicode_cpt_to_utf8(ch);
+  }
+  for (int ch = 0xAE; ch <= 0xFF; ++ch) {  // u'®' to u'ÿ'
+    assert(0 <= ch && ch < 256);
+    map[ch] = unicode_cpt_to_utf8(ch);
+  }
+  auto n = 0;
+  for (int ch = 0; ch < 256; ++ch) {
+    if (map.find(ch) == map.end()) {
+      map[ch] = unicode_cpt_to_utf8(256 + n);
+      ++n;
+    }
+  }
+  return map;
+}
+
+static std::unordered_map<std::string, uint8_t> unicode_utf8_to_byte_map() {
+  std::unordered_map<std::string, uint8_t> map;
+  for (int ch = 0x21; ch <= 0x7E; ++ch) {  // u'!' to u'~'
+    assert(0 <= ch && ch < 256);
+    map[unicode_cpt_to_utf8(ch)] = ch;
+  }
+  for (int ch = 0xA1; ch <= 0xAC; ++ch) {  // u'¡' to u'¬'
+    assert(0 <= ch && ch < 256);
+    map[unicode_cpt_to_utf8(ch)] = ch;
+  }
+  for (int ch = 0xAE; ch <= 0xFF; ++ch) {  // u'®' to u'ÿ'
+    assert(0 <= ch && ch < 256);
+    map[unicode_cpt_to_utf8(ch)] = ch;
+  }
+  auto n = 0;
+  for (int ch = 0; ch < 256; ++ch) {
+    if (map.find(unicode_cpt_to_utf8(ch)) == map.end()) {
+      map[unicode_cpt_to_utf8(256 + n)] = ch;
+      ++n;
+    }
+  }
+  return map;
+}
+
+static inline std::wstring unicode_wstring_from_utf8(const std::string& s) {
+  std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
+  return conv.from_bytes(s);
+}
+
+static std::vector<std::string> unicode_byte_encoding_process(const std::vector<std::string>& bpe_words) {
+  std::vector<std::string> bpe_encoded_words;
+  for (const auto& word : bpe_words) {
+    std::string text_utf;
+    auto utf_word = unicode_cpts_from_utf8(word);
+    for (size_t i = 0; i < utf_word.size(); ++i) { text_utf += unicode_cpt_to_utf8(utf_word[i]); }
+
+    std::string encoded_token;
+    for (char& c : text_utf) { encoded_token += unicode_byte_to_utf8(c); }
+    bpe_encoded_words.emplace_back(encoded_token);
+  }
+  return bpe_encoded_words;
+}
+
+// GPT2 system regex:  's|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+|
+// ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+
+static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string& text, const std::vector<size_t>& offsets) {
+  std::vector<size_t> bpe_offsets;      // store the offset of each word
+  bpe_offsets.reserve(offsets.size());  // Reserve memory for the approximate size
+
+  const auto cpts = unicode_cpts_from_utf8(text);
+
+  size_t start = 0;
+  for (auto offset : offsets) {
+    const size_t offset_ini = start;
+    const size_t offset_end = start + offset;
+    assert(offset_end <= cpts.size());
+    start = offset_end;
+
+    static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
+    auto _get_cpt = [&](const size_t pos) -> uint32_t {
+      return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
+    };
+
+    auto _get_flags = [&](const size_t pos) -> codepoint_flags {
+      return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : codepoint_flags{};
+    };
+
+    size_t _prev_end = offset_ini;
+    auto _add_token = [&](const size_t end) -> size_t {
+      assert(_prev_end <= end && end <= offset_end);
+      size_t len = end - _prev_end;
+      if (len > 0) { bpe_offsets.push_back(len); }
+      _prev_end = end;
+      // if (len > 0) {
+      //     std::string s = "";
+      //     for(size_t p = end-len; p < end; p++)
+      //         s += unicode_cpt_to_utf8(cpts[p]);
+      //     printf(">>> '%s'\n", s.c_str());
+      // }
+      return len;
+    };
+
+    for (size_t pos = offset_ini; pos < offset_end; /*pos++*/) {
+      const uint32_t cpt = _get_cpt(pos);
+      const auto flags = _get_flags(pos);
+
+      // regex: 's|'t|'re|'ve|'m|'ll|'d
+      if (cpt == '\'' && pos + 1 < offset_end) {
+        uint32_t cpt_next = _get_cpt(pos + 1);
+        if (cpt_next == 's' || cpt_next == 't' || cpt_next == 'm' || cpt_next == 'd') {
+          pos += _add_token(pos + 2);
+          continue;
+        }
+        if (pos + 2 < offset_end) {
+          uint32_t cpt_next_next = _get_cpt(pos + 2);
+          if ((cpt_next == 'r' && cpt_next_next == 'e') || (cpt_next == 'v' && cpt_next_next == 'e')
+              || (cpt_next == 'l' && cpt_next_next == 'l')) {
+            pos += _add_token(pos + 3);
+            continue;
+          }
+        }
+      }
+
+      auto flags2 = (cpt == ' ' ? _get_flags(pos + 1) : flags);
+      // regex: <space>?\p{L}+
+      if (flags2.is_letter) {
+        pos += (cpt == ' ');
+        while (flags2.is_letter) { flags2 = _get_flags(++pos); }
+        _add_token(pos);
+        continue;
+      }
+      // regex: <space>?\p{N}+
+      if (flags2.is_number) {
+        pos += (cpt == ' ');
+        while (flags2.is_number) { flags2 = _get_flags(++pos); }
+        _add_token(pos);
+        continue;
+      }
+      // regex: <space>?[^\s\p{L}\p{N}]+
+      if (!(flags2.is_whitespace | flags2.is_letter | flags2.is_number) && flags2.as_uint()) {
+        pos += (cpt == ' ');
+        while (!(flags2.is_whitespace | flags2.is_letter | flags2.is_number) && flags2.as_uint()) {
+          flags2 = _get_flags(++pos);
+        }
+        _add_token(pos);
+        continue;
+      }
+
+      size_t num_whitespaces = 0;
+      while (_get_flags(pos + num_whitespaces).is_whitespace) { num_whitespaces++; }
+
+      // regex: \s+(?!\S)
+      if (num_whitespaces > 1 && _get_cpt(pos + num_whitespaces) != OUT_OF_RANGE) {
+        pos += num_whitespaces - 1;
+        _add_token(pos);
+        continue;
+      }
+
+      // regex: \s+
+      if (num_whitespaces > 0) {
+        pos += num_whitespaces;
+        _add_token(pos);
+        continue;
+      }
+
+      // no matches
+      _add_token(++pos);
+    }
+  }
+
+  return bpe_offsets;
+}
+
+// LLAMA3 system regex:
+// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}|
+// ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+"
+static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string& text, const std::vector<size_t>& offsets) {
+  std::vector<size_t> bpe_offsets;      // store the offset of each word
+  bpe_offsets.reserve(offsets.size());  // Reserve memory for the approximate size
+
+  const auto cpts = unicode_cpts_from_utf8(text);
+
+  size_t start = 0;
+  for (auto offset : offsets) {
+    const size_t offset_ini = start;
+    const size_t offset_end = start + offset;
+    assert(offset_end <= cpts.size());
+    start = offset_end;
+
+    static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
+    auto _get_cpt = [&](const size_t pos) -> uint32_t {
+      return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
+    };
+
+    auto _get_flags = [&](const size_t pos) -> codepoint_flags {
+      return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : codepoint_flags{};
+    };
+
+    size_t _prev_end = offset_ini;
+    auto _add_token = [&](const size_t end) -> size_t {
+      assert(_prev_end <= end && end <= offset_end);
+      size_t len = end - _prev_end;
+      if (len > 0) { bpe_offsets.push_back(len); }
+      _prev_end = end;
+      // if (len > 0) {
+      //     std::string s = "";
+      //     for(size_t p = end-len; p < end; p++)
+      //         s += unicode_cpt_to_utf8(cpts[p]);
+      //     printf(">>> '%s'\n", s.c_str());
+      // }
+      return len;
+    };
+
+    for (size_t pos = offset_ini; pos < offset_end; /*pos++*/) {
+      const uint32_t cpt = _get_cpt(pos);
+      const auto flags = _get_flags(pos);
+
+      // regex: (?i:'s|'t|'re|'ve|'m|'ll|'d) // case insensitive
+      if (cpt == '\'' && pos + 1 < offset_end) {
+        uint32_t cpt_next = unicode_tolower(_get_cpt(pos + 1));
+        if (cpt_next == 's' || cpt_next == 't' || cpt_next == 'm' || cpt_next == 'd') {
+          pos += _add_token(pos + 2);
+          continue;
+        }
+        if (pos + 2 < offset_end) {
+          uint32_t cpt_next_next = unicode_tolower(_get_cpt(pos + 2));
+          if ((cpt_next == 'r' && cpt_next_next == 'e') || (cpt_next == 'v' && cpt_next_next == 'e')
+              || (cpt_next == 'l' && cpt_next_next == 'l')) {
+            pos += _add_token(pos + 3);
+            continue;
+          }
+        }
+      }
+
+      // regex: [^\r\n\p{L}\p{N}]?\p{L}+
+      if (!(cpt == '\r' || cpt == '\n' || flags.is_number)) {
+        if (flags.is_letter || _get_flags(pos + 1).is_letter) {  // one or more letters
+          pos++;
+          while (_get_flags(pos).is_letter) { pos++; }
+          _add_token(pos);
+          continue;
+        }
+      }
+
+      // regex: \p{N}{1,3}
+      if (flags.is_number) {
+        size_t ini = pos;
+        while (_get_flags(pos).is_number) {
+          if (++pos - ini >= 3) {
+            _add_token(pos);
+            ini = pos;
+          }
+        }
+        _add_token(pos);
+        continue;
+      }
+
+      // regex: <space>?[^\s\p{L}\p{N}]+[\r\n]*
+      auto flags2 = (cpt == ' ' ? _get_flags(pos + 1) : flags);
+      if (!(flags2.is_whitespace | flags2.is_letter | flags2.is_number) && flags.as_uint()) {
+        pos += (cpt == ' ');
+        while (!(flags2.is_whitespace | flags2.is_letter | flags2.is_number) && flags2.as_uint()) {
+          flags2 = _get_flags(++pos);
+        }
+        uint32_t cpt2 = _get_cpt(pos);
+        while (cpt2 == '\r' || cpt2 == '\n') { cpt2 = _get_cpt(++pos); }
+        _add_token(pos);
+        continue;
+      }
+
+      size_t num_whitespaces = 0;
+      size_t last_end_r_or_n = 0;
+      while (_get_flags(pos + num_whitespaces).is_whitespace) {
+        uint32_t cpt2 = _get_cpt(pos + num_whitespaces);
+        if (cpt2 == '\r' || cpt2 == '\n') { last_end_r_or_n = pos + num_whitespaces + 1; }
+        num_whitespaces++;
+      }
+
+      // regex: \s*[\r\n]+
+      if (last_end_r_or_n > 0) {
+        pos = last_end_r_or_n;
+        _add_token(pos);
+        continue;
+      }
+
+      // regex: \s+(?!\S)
+      if (num_whitespaces > 1 && _get_cpt(pos + num_whitespaces) != OUT_OF_RANGE) {
+        pos += num_whitespaces - 1;
+        _add_token(pos);
+        continue;
+      }
+
+      // regex: \s+
+      if (num_whitespaces > 0) {
+        pos += num_whitespaces;
+        _add_token(pos);
+        continue;
+      }
+
+      // no matches
+      _add_token(++pos);
+    }
+  }
+
+  return bpe_offsets;
+}
+
+// use std::wregex to split the text
+static std::vector<size_t> unicode_regex_split_stl(const std::wstring& wtext, const std::wstring& regex_expr,
+                                                   const std::vector<size_t>& offsets) {
+  std::wregex expr(regex_expr);
+  std::vector<size_t> bpe_offsets;      // store the offset of each word
+  bpe_offsets.reserve(offsets.size());  // Reserve memory for the approximate size
+  size_t start = 0;
+  for (auto offset : offsets) {
+    std::wcregex_iterator it(wtext.data() + start, wtext.data() + start + offset, expr);
+    std::wcregex_iterator end;
+
+    int64_t start_idx = 0;
+    while (it != end) {
+      std::wcmatch match = *it;
+      if (match.position() > start_idx) { bpe_offsets.emplace_back(match.position() - start_idx); }
+      bpe_offsets.emplace_back(match.length());
+      start_idx = match.position() + match.length();
+      ++it;
+    }
+
+    if (start_idx < (int64_t)offset) { bpe_offsets.emplace_back(offset - start_idx); }
+    start += offset;
+  }
+
+  return bpe_offsets;
+}
+
+// use std::regex to split the text
+static std::vector<size_t> unicode_regex_split_stl(const std::string& text, const std::string& regex_expr,
+                                                   const std::vector<size_t>& offsets) {
+  std::regex expr(regex_expr);
+  std::vector<size_t> bpe_offsets;      // store the offset of each word
+  bpe_offsets.reserve(offsets.size());  // Reserve memory for the approximate size
+  size_t start = 0;
+  for (auto offset : offsets) {
+    std::cregex_iterator it(text.data() + start, text.data() + start + offset, expr);
+    std::cregex_iterator end;
+
+    int64_t start_idx = 0;
+    while (it != end) {
+      std::cmatch match = *it;
+      if (match.position() > start_idx) { bpe_offsets.emplace_back(match.position() - start_idx); }
+      bpe_offsets.emplace_back(match.length());
+      start_idx = match.position() + match.length();
+      ++it;
+    }
+
+    if (start_idx < (int64_t)offset) { bpe_offsets.emplace_back(offset - start_idx); }
+    start += offset;
+  }
+
+  return bpe_offsets;
+}
+
+static std::vector<size_t> unicode_regex_split_custom(const std::string& text, const std::string& regex_expr,
+                                                      const std::vector<size_t>& offsets) {
+  std::vector<size_t> bpe_offsets;
+
+  if (regex_expr
+      == "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| "
+         "?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)") {
+    bpe_offsets = unicode_regex_split_custom_gpt2(text, offsets);
+  } else if (regex_expr
+                 == "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,"
+                    "3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
+             || regex_expr
+                    == "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^"
+                       "\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| "
+                       "?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+") {
+    bpe_offsets = unicode_regex_split_custom_llama3(text, offsets);
+  }
+
+  return bpe_offsets;
+}
+
+//
+// interface
+//
+
+std::string unicode_cpt_to_utf8(uint32_t cp) {
+  std::string result;
+
+  if (/* 0x00 <= cp && */ cp <= 0x7f) {
+    result.push_back(cp);
+    return result;
+  }
+  if (0x80 <= cp && cp <= 0x7ff) {
+    result.push_back(0xc0 | ((cp >> 6) & 0x1f));
+    result.push_back(0x80 | (cp & 0x3f));
+    return result;
+  }
+  if (0x800 <= cp && cp <= 0xffff) {
+    result.push_back(0xe0 | ((cp >> 12) & 0x0f));
+    result.push_back(0x80 | ((cp >> 6) & 0x3f));
+    result.push_back(0x80 | (cp & 0x3f));
+    return result;
+  }
+  if (0x10000 <= cp && cp <= 0x10ffff) {
+    result.push_back(0xf0 | ((cp >> 18) & 0x07));
+    result.push_back(0x80 | ((cp >> 12) & 0x3f));
+    result.push_back(0x80 | ((cp >> 6) & 0x3f));
+    result.push_back(0x80 | (cp & 0x3f));
+    return result;
+  }
+
+  throw std::invalid_argument("invalid codepoint");
+}
+
+std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t>& cpts) {
+  auto comp = [](const uint32_t cpt, const range_nfd& range) { return cpt < range.first; };
+  std::vector<uint32_t> result(cpts.size());
+  for (size_t i = 0; i < cpts.size(); ++i) {
+    const uint32_t cpt = cpts[i];
+    auto it = std::upper_bound(unicode_ranges_nfd.begin(), unicode_ranges_nfd.end(), cpt, comp) - 1;
+    result[i] = (it->first <= cpt && cpt <= it->last) ? it->nfd : cpt;
+  }
+  return result;
+}
+
+std::vector<uint32_t> unicode_cpts_from_utf8(const std::string& utf8) {
+  std::vector<uint32_t> result;
+  result.reserve(utf8.size());
+  size_t offset = 0;
+  while (offset < utf8.size()) { result.push_back(unicode_cpt_from_utf8(utf8, offset)); }
+  return result;
+}
+
+codepoint_flags unicode_cpt_flags(const uint32_t cp) {
+  static const codepoint_flags undef(codepoint_flags::UNDEFINED);
+  static const auto cpt_flags = unicode_cpt_flags_array();
+  return cp < cpt_flags.size() ? cpt_flags[cp] : undef;
+}
+
+codepoint_flags unicode_cpt_flags(const std::string& utf8) {
+  static const codepoint_flags undef(codepoint_flags::UNDEFINED);
+  if (utf8.empty()) {
+    return undef;  // undefined
+  }
+  size_t offset = 0;
+  return unicode_cpt_flags(unicode_cpt_from_utf8(utf8, offset));
+}
+
+std::string unicode_byte_to_utf8(uint8_t byte) {
+  static std::unordered_map<uint8_t, std::string> map = unicode_byte_to_utf8_map();
+  return map.at(byte);
+}
+
+uint8_t unicode_utf8_to_byte(const std::string& utf8) {
+  static std::unordered_map<std::string, uint8_t> map = unicode_utf8_to_byte_map();
+  return map.at(utf8);
+}
+
+uint32_t unicode_tolower(uint32_t cp) {
+  // binary search
+  auto it = std::lower_bound(unicode_map_lowercase.begin(), unicode_map_lowercase.end(), cp,
+                             [](const std::pair<uint32_t, uint32_t>& pair, uint32_t value) { return pair.first < value; });
+  if (it != unicode_map_lowercase.end() && it->first == cp) { return it->second; }
+  return cp;  // Return the original code point if no lowercase mapping is found
+}
+
+std::vector<std::string> unicode_regex_split(const std::string& text, const std::vector<std::string>& regex_exprs) {
+  // unicode categories
+  static const std::map<std::string, int> k_ucat_enum = {
+      {"\\p{N}", codepoint_flags::NUMBER},
+      {"\\p{L}", codepoint_flags::LETTER},
+      {"\\p{P}", codepoint_flags::PUNCTUATION},
+  };
+
+  static const std::map<int, int> k_ucat_cpt = {
+      {codepoint_flags::NUMBER, 0xD1},
+      {codepoint_flags::LETTER, 0xD2},
+      {codepoint_flags::PUNCTUATION, 0xD3},
+  };
+
+  static const std::map<int, std::string> k_ucat_map = {
+      {codepoint_flags::NUMBER, "\x30-\x39"},           // 0-9
+      {codepoint_flags::LETTER, "\x41-\x5A\x61-\x7A"},  // A-Za-z
+      {codepoint_flags::PUNCTUATION, "\x21-\x23\x25-\x2A\x2C-\x2F\x3A-\x3B\x3F-\x40\\\x5B-"
+                                     "\\\x5D\x5F\\\x7B\\\x7D"},  // !-#%-*,-/:-;?-@\[-\]_\{\}
+  };
+
+  // compute collapsed codepoints only if needed by at least one regex
+  bool need_collapse = false;
+  for (auto& regex_expr : regex_exprs) {
+    // search for unicode categories
+    for (const auto& ucat : k_ucat_enum) {
+      if (std::string::npos != regex_expr.find(ucat.first)) {
+        need_collapse = true;
+        break;
+      }
+    }
+  }
+
+  const auto cpts = unicode_cpts_from_utf8(text);
+
+  // generate a "collapsed" representation of the text, where all codepoints are
+  // replaced by a single byte ref:
+  // https://github.com/ggerganov/llama.cpp/pull/6920#issuecomment-2081479935
+  std::string text_collapsed;
+  if (need_collapse) {
+    // collapse all unicode categories
+    text_collapsed.resize(cpts.size());
+
+    for (size_t i = 0; i < cpts.size(); ++i) {
+      // keep single-byte codepoints as is
+      if (cpts[i] < 128) {
+        text_collapsed[i] = cpts[i];
+        continue;
+      }
+
+      const auto flags = unicode_cpt_flags(cpts[i]);
+
+      if (flags.is_whitespace) {
+        // NOTE: C++ std::regex \s does not mach 0x85, Rust and Python regex
+        // does. text_collapsed[i] = (char) 0x85;  // <Next Line> as whitespace
+        // fallback
+        text_collapsed[i] = (char)0x0B;  // <vertical tab> as whitespace fallback
+      } else if (k_ucat_cpt.find(flags.category_flag()) != k_ucat_cpt.end()) {
+        text_collapsed[i] = k_ucat_cpt.at(flags.category_flag());
+      } else {
+        text_collapsed[i] = (char)0xD0;  // fallback
+      }
+    }
+  }
+
+  std::vector<size_t> bpe_offsets = {cpts.size()};
+
+  for (auto& regex_expr : regex_exprs) {
+    // first, see if we have an efficient custom regex implementation
+    auto tmp = unicode_regex_split_custom(text, regex_expr, bpe_offsets);
+
+    if (!tmp.empty()) {
+      bpe_offsets = std::move(tmp);
+      continue;
+    }
+
+    // fallback to general-purpose std::regex / std::wregex
+    try {
+      // if a unicode category is used in the regex, we use the collapsed text
+      // and replace the unicode category with the corresponding collapsed
+      // representation
+      bool use_collapsed = false;
+      for (auto& ucat : k_ucat_enum) {
+        if (std::string::npos != regex_expr.find(ucat.first)) {
+          use_collapsed = true;
+          break;
+        }
+      }
+
+      if (use_collapsed) {
+        // sanity-check that the original regex does not contain any non-ASCII
+        // characters
+        const auto cpts_regex = unicode_cpts_from_utf8(regex_expr);
+        for (size_t i = 0; i < cpts_regex.size(); ++i) {
+          if (cpts_regex[i] >= 128) {
+            throw std::runtime_error("Regex includes both unicode categories and non-ASCII "
+                                     "characters - not supported");
+          }
+        }
+
+        // generate a collapsed representation of the regex
+        std::string regex_expr_collapsed;
+
+        // track if we are inside [], because nested [] are not allowed
+        bool inside = false;
+        for (size_t i = 0; i < regex_expr.size(); ++i) {
+          if (regex_expr[i] == '[' && (i == 0 || regex_expr[i - 1] != '\\')) {
+            regex_expr_collapsed += '[';
+            inside = true;
+            continue;
+          }
+
+          if (inside && regex_expr[i] == ']' && regex_expr[i - 1] != '\\') {
+            regex_expr_collapsed += ']';
+            inside = false;
+            continue;
+          }
+
+          if (regex_expr[i + 0] == '\\' && i + 4 < regex_expr.size() && regex_expr[i + 1] == 'p' && regex_expr[i + 2] == '{'
+              && regex_expr[i + 4] == '}') {
+            const std::string pat = regex_expr.substr(i, 5);
+            if (k_ucat_enum.find(pat) != k_ucat_enum.end()) {
+              if (!inside) { regex_expr_collapsed += '['; }
+              regex_expr_collapsed += k_ucat_cpt.at(k_ucat_enum.at(pat));
+              regex_expr_collapsed += k_ucat_map.at(k_ucat_enum.at(pat));
+              if (!inside) { regex_expr_collapsed += ']'; }
+              i += 4;
+              continue;
+            }
+          }
+
+          regex_expr_collapsed += regex_expr[i];
+        }
+
+        // printf("text_collapsed: %s\n", text_collapsed.c_str());
+        // printf("regex_expr_collapsed: %s\n", regex_expr_collapsed.c_str());
+        bpe_offsets = unicode_regex_split_stl(text_collapsed, regex_expr_collapsed, bpe_offsets);
+      } else {
+        // no unicode category used, we can use std::wregex directly
+        const std::wstring wregex_expr = unicode_wstring_from_utf8(regex_expr);
+
+        // std::wregex \s does not mach non-ASCII whitespaces, using 0x0B as
+        // fallback
+        std::wstring wtext(cpts.begin(), cpts.end());
+        for (size_t i = 0; i < wtext.size(); ++i) {
+          if (wtext[i] > 0x7F && unicode_cpt_flags(wtext[i]).is_whitespace) { wtext[i] = 0x0B; }
+        }
+
+        // printf("text: %s\n", text.c_str());
+        // printf("regex_expr: %s\n", regex_expr.c_str());
+        bpe_offsets = unicode_regex_split_stl(wtext, wregex_expr, bpe_offsets);
+      }
+    } catch (std::regex_error& e) {
+      fprintf(stderr, "Failed to process regex: '%s'\n", regex_expr.c_str());
+      fprintf(stderr, "Regex error: %s\n", e.what());
+      throw std::runtime_error("Failed to process regex");
+    }
+  }
+
+  std::vector<std::string> bpe_words;
+  bpe_words.reserve(bpe_offsets.size());  // reserve memory for the approximate size
+
+  size_t start = 0;
+  for (size_t& offset : bpe_offsets) {
+    bpe_words.emplace_back();
+    for (size_t i = start; i < start + offset; ++i) { bpe_words.back() += unicode_cpt_to_utf8(cpts[i]); }
+    start += offset;
+  }
+
+  return unicode_byte_encoding_process(bpe_words);
+}
+
+// Get canonical combining class for a codepoint using existing flags data
+static uint8_t get_combining_class(uint32_t cpt) {
+  codepoint_flags flags = unicode_cpt_flags(cpt);
+
+  // Use the existing flag system to determine combining class
+  if (flags.is_accent_mark) {
+    // Most combining marks have class 230, but some have different classes
+    // This is a simplified mapping based on common Unicode patterns
+    if (cpt >= 0x0591 && cpt <= 0x05BD) return 220;  // Hebrew accents
+    if (cpt >= 0x05BF && cpt <= 0x05C7) return 230;  // Hebrew points
+    if (cpt >= 0x0610 && cpt <= 0x061A) return 230;  // Arabic marks
+    if (cpt >= 0x064B && cpt <= 0x065F) return 30;   // Arabic vowels
+    if (cpt >= 0x0670 && cpt <= 0x0670) return 35;   // Arabic superscript alef
+    if (cpt >= 0x06D6 && cpt <= 0x06E4) return 230;  // Arabic small high marks
+    if (cpt >= 0x06E7 && cpt <= 0x06E8) return 230;  // Arabic small high marks
+    if (cpt >= 0x06EA && cpt <= 0x06ED) return 220;  // Arabic small low marks
+
+    // Default combining class for most combining marks
+    return 230;
+  }
+
+  return 0;  // Non-combining character (starter)
+}
+
+// Apply canonical ordering using bubble sort (simple but correct)
+static void canonical_order(std::vector<uint32_t>& cpts) {
+  for (size_t i = 1; i < cpts.size(); ++i) {
+    for (size_t j = i; j > 0; --j) {
+      uint8_t cc1 = get_combining_class(cpts[j - 1]);
+      uint8_t cc2 = get_combining_class(cpts[j]);
+
+      // Only reorder if both have non-zero combining class and are out of order
+      if (cc1 > cc2 && cc2 != 0) {
+        std::swap(cpts[j - 1], cpts[j]);
+      } else {
+        break;
+      }
+    }
+  }
+}
+
+// Build composition table by reverse-engineering the NFD data
+static std::unordered_map<std::pair<uint32_t, uint32_t>, uint32_t> build_composition_table() {
+  std::unordered_map<std::pair<uint32_t, uint32_t>, uint32_t> composition_map;
+
+  // Iterate through all NFD mappings to build reverse composition table
+  for (const auto& range : unicode_ranges_nfd) {
+    for (uint32_t cpt = range.first; cpt <= range.last; ++cpt) {
+      uint32_t base = range.nfd;
+
+      // For NFC, we need to figure out what combining character was removed
+      // This is a simplified approach that works for the most common cases
+
+      // Common diacritic mappings based on the composed character
+      uint32_t combining = 0;
+
+      // Determine combining character based on the composed character
+      // This is derived from common Unicode patterns
+      switch (cpt) {
+        // Grave accent (0x0300)
+        case 0x00C0:
+        case 0x00E0:  // À à
+        case 0x00C8:
+        case 0x00E8:  // È è
+        case 0x00CC:
+        case 0x00EC:  // Ì ì
+        case 0x00D2:
+        case 0x00F2:  // Ò ò
+        case 0x00D9:
+        case 0x00F9:  // Ù ù
+        case 0x01CD:
+        case 0x01CE:  // Ǎ ǎ
+        case 0x01CF:
+        case 0x01D0:  // Ǐ ǐ
+        case 0x01D1:
+        case 0x01D2:  // Ǒ ǒ
+        case 0x01D3:
+        case 0x01D4:  // Ǔ ǔ
+          combining = 0x0300;
+          break;
+
+        // Acute accent (0x0301)
+        case 0x00C1:
+        case 0x00E1:  // Á á
+        case 0x00C9:
+        case 0x00E9:  // É é
+        case 0x00CD:
+        case 0x00ED:  // Í í
+        case 0x00D3:
+        case 0x00F3:  // Ó ó
+        case 0x00DA:
+        case 0x00FA:  // Ú ú
+        case 0x00DD:
+        case 0x00FD:  // Ý ý
+          combining = 0x0301;
+          break;
+
+        // Circumflex (0x0302)
+        case 0x00C2:
+        case 0x00E2:  // Â â
+        case 0x00CA:
+        case 0x00EA:  // Ê ê
+        case 0x00CE:
+        case 0x00EE:  // Î î
+        case 0x00D4:
+        case 0x00F4:  // Ô ô
+        case 0x00DB:
+        case 0x00FB:  // Û û
+          combining = 0x0302;
+          break;
+
+        // Tilde (0x0303)
+        case 0x00C3:
+        case 0x00E3:  // Ã ã
+        case 0x00D1:
+        case 0x00F1:  // Ñ ñ
+        case 0x00D5:
+        case 0x00F5:  // Õ õ
+          combining = 0x0303;
+          break;
+
+        // Diaeresis (0x0308)
+        case 0x00C4:
+        case 0x00E4:  // Ä ä
+        case 0x00CB:
+        case 0x00EB:  // Ë ë
+        case 0x00CF:
+        case 0x00EF:  // Ï ï
+        case 0x00D6:
+        case 0x00F6:  // Ö ö
+        case 0x00DC:
+        case 0x00FC:  // Ü ü
+        case 0x00FF:  // ÿ
+          combining = 0x0308;
+          break;
+
+        // Ring above (0x030A)
+        case 0x00C5:
+        case 0x00E5:  // Å å
+          combining = 0x030A;
+          break;
+
+        // Cedilla (0x0327)
+        case 0x00C7:
+        case 0x00E7:  // Ç ç
+          combining = 0x0327;
+          break;
+
+        default:
+          // For other characters, try to infer from Unicode blocks
+          if (cpt >= 0x0100 && cpt <= 0x017F) {
+            // Extended Latin A - try common patterns
+            if ((cpt & 1) == 0) {  // Even codepoints (uppercase)
+              if (cpt >= 0x0100 && cpt <= 0x0105)
+                combining = 0x0304;  // macron
+              else if (cpt >= 0x0102 && cpt <= 0x0107)
+                combining = 0x0306;  // breve
+              else if (cpt >= 0x0104 && cpt <= 0x0119)
+                combining = 0x0328;  // ogonek
+              else if (cpt >= 0x0106 && cpt <= 0x010D)
+                combining = 0x0301;  // acute
+              else if (cpt >= 0x0108 && cpt <= 0x010F)
+                combining = 0x0302;  // circumflex
+              else if (cpt >= 0x010A && cpt <= 0x0111)
+                combining = 0x0307;  // dot above
+              else if (cpt >= 0x010C && cpt <= 0x0165)
+                combining = 0x030C;  // caron
+            }
+          }
+          break;
+      }
+
+      // Only add to composition table if we identified a combining character
+      if (combining != 0) { composition_map[{base, combining}] = cpt; }
+    }
+  }
+
+  return composition_map;
+}
+
+// Get the composition table (built once, cached)
+static const std::unordered_map<std::pair<uint32_t, uint32_t>, uint32_t>& get_composition_table() {
+  static const auto composition_table = build_composition_table();
+  return composition_table;
+}
+
+std::vector<uint32_t> unicode_cpts_normalize_nfc(const std::vector<uint32_t>& cpts) {
+  // Step 1: Apply NFD (canonical decomposition) using existing implementation
+  std::vector<uint32_t> nfd_result = unicode_cpts_normalize_nfd(cpts);
+
+  // Step 2: Apply canonical ordering
+  canonical_order(nfd_result);
+
+  // Step 3: Apply canonical composition
+  const auto& composition_table = get_composition_table();
+  std::vector<uint32_t> result;
+  result.reserve(nfd_result.size());
+
+  size_t i = 0;
+  while (i < nfd_result.size()) {
+    uint32_t starter = nfd_result[i];
+    result.push_back(starter);
+
+    // Only try to compose if this is a starter (combining class 0)
+    if (get_combining_class(starter) == 0) {
+      size_t last_starter_pos = result.size() - 1;
+
+      // Look for composable combining marks after this starter
+      size_t j = i + 1;
+      while (j < nfd_result.size()) {
+        uint32_t combining = nfd_result[j];
+        uint8_t cc = get_combining_class(combining);
+
+        // If we hit another starter, stop
+        if (cc == 0) break;
+
+        // Try to compose with the last starter
+        auto key = std::make_pair(result[last_starter_pos], combining);
+        auto it = composition_table.find(key);
+
+        if (it != composition_table.end()) {
+          // Compose: replace starter with composed character
+          result[last_starter_pos] = it->second;
+          // Skip this combining character
+          ++j;
+          continue;
+        }
+
+        // No composition possible, add the combining character
+        result.push_back(combining);
+        ++j;
+      }
+      i = j;
+    } else {
+      ++i;
+    }
+  }
+
+  return result;
+}
diff --git a/mllm/preprocessor/tokenizers/llama_cpp_unicode/unicode.h b/mllm/preprocessor/tokenizers/llama_cpp_unicode/unicode.h
new file mode 100644
index 00000000..da4e2ea9
--- /dev/null
+++ b/mllm/preprocessor/tokenizers/llama_cpp_unicode/unicode.h
@@ -0,0 +1,90 @@
+/*
+llama.cpp - commit 54ef9cfc
+https://github.com/ggerganov/llama.cpp
+
+MIT License
+
+Copyright (c) 2023-2024 The ggml authors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+// TODO: prefix all symbols with "llama_"
+
+struct codepoint_flags {
+  enum {
+    UNDEFINED = 0x0001,
+    NUMBER = 0x0002,       // regex: \p{N}
+    LETTER = 0x0004,       // regex: \p{L}
+    SEPARATOR = 0x0008,    // regex: \p{Z}
+    ACCENT_MARK = 0x0010,  // regex: \p{M}
+    PUNCTUATION = 0x0020,  // regex: \p{P}
+    SYMBOL = 0x0040,       // regex: \p{S}
+    CONTROL = 0x0080,      // regex: \p{C}
+    MASK_CATEGORIES = 0x00FF,
+  };
+
+  // codepoint type
+  uint16_t is_undefined : 1;
+  uint16_t is_number : 1;       // regex: \p{N}
+  uint16_t is_letter : 1;       // regex: \p{L}
+  uint16_t is_separator : 1;    // regex: \p{Z}
+  uint16_t is_accent_mark : 1;  // regex: \p{M}
+  uint16_t is_punctuation : 1;  // regex: \p{P}
+  uint16_t is_symbol : 1;       // regex: \p{S}
+  uint16_t is_control : 1;      // regex: \p{C}
+  // helper flags
+  uint16_t is_whitespace : 1;  // regex: \s
+  uint16_t is_lowercase : 1;
+  uint16_t is_uppercase : 1;
+  uint16_t is_nfd : 1;
+
+  // decode from uint16
+  inline codepoint_flags(const uint16_t flags = 0) { *reinterpret_cast<uint16_t*>(this) = flags; }
+
+  inline uint16_t as_uint() const { return *reinterpret_cast<const uint16_t*>(this); }
+
+  inline uint16_t category_flag() const { return this->as_uint() & MASK_CATEGORIES; }
+};
+
+size_t unicode_len_utf8(char src);
+
+std::string unicode_cpt_to_utf8(uint32_t cp);
+uint32_t unicode_cpt_from_utf8(const std::string& utf8, size_t& offset);
+std::vector<uint32_t> unicode_cpts_from_utf8(const std::string& utf8);
+
+std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t>& cpts);
+
+std::vector<uint32_t> unicode_cpts_normalize_nfc(const std::vector<uint32_t>& cpts);
+
+codepoint_flags unicode_cpt_flags(const uint32_t cp);
+codepoint_flags unicode_cpt_flags(const std::string& utf8);
+
+std::string unicode_byte_to_utf8(uint8_t byte);
+uint8_t unicode_utf8_to_byte(const std::string& utf8);
+
+uint32_t unicode_tolower(uint32_t cp);
+
+std::vector<std::string> unicode_regex_split(const std::string& text, const std::vector<std::string>& regex_exprs);
diff --git a/mllm/preprocessor/visual/Image.cpp b/mllm/preprocessor/visual/Image.cpp
index 22429b4c..a6733480 100644
--- a/mllm/preprocessor/visual/Image.cpp
+++ b/mllm/preprocessor/visual/Image.cpp
@@ -39,7 +39,7 @@ Image Image::open(const std::string& fp) {
   return ret_image;
 }
 
-Image Image::resize(int new_w, int new_h) {
+Image Image::resize(int new_w, int new_h, const std::string& method) {
   Image new_img;
   new_img.w_ = new_w;
   new_img.h_ = new_h;
@@ -47,9 +47,14 @@ Image Image::resize(int new_w, int new_h) {
 
   unsigned char* output_data = nullptr;
 
-  // stb will alloc memory for us
-  output_data = stbir_resize_uint8_linear(static_cast<unsigned char*>(image_ptr_->ptr_), w_, h_, 0, output_data, new_w, new_h,
-                                          0, STBIR_RGB);
+  if (method == "bilinear") {
+    // stb will alloc memory for us
+    output_data = stbir_resize_uint8_linear(static_cast<unsigned char*>(image_ptr_->ptr_), w_, h_, 0, output_data, new_w, new_h,
+                                            0, STBIR_RGB);
+  } else if (method == "bicubic") {
+    output_data = (unsigned char*)stbir_resize(static_cast<const void*>(image_ptr_->ptr_), w_, h_, 0, (void*)output_data, new_w,
+                                               new_h, 0, STBIR_RGB, STBIR_TYPE_UINT8, STBIR_EDGE_CLAMP, STBIR_FILTER_MITCHELL);
+  }
 
   new_img.image_ptr_ = std::make_shared<_ImagePtr>();
   new_img.image_ptr_->ptr_ = output_data;
@@ -101,4 +106,112 @@ int Image::h() { return h_; }
 
 int Image::c() { return c_; }
 
-}  // namespace mllm
\ No newline at end of file
+Image Image::crop(int left, int upper, int right, int lower) {
+  // Validate input dimensions and ensure source image is loaded
+  MLLM_RT_ASSERT(image_ptr_ != nullptr);
+  MLLM_RT_ASSERT(right > left && lower > upper);
+
+  const int crop_w = right - left;
+  const int crop_h = lower - upper;
+
+  Image new_img;
+  new_img.w_ = crop_w;
+  new_img.h_ = crop_h;
+  new_img.c_ = 3;  // Force RGB, consistent with Image::open
+
+  // Allocate output buffer; stbi_image_free uses free, so malloc is compatible
+  unsigned char* output = static_cast<unsigned char*>(malloc(static_cast<size_t>(crop_w) * crop_h * new_img.c_));
+  MLLM_RT_ASSERT(output != nullptr);
+
+  const unsigned char* src = static_cast<const unsigned char*>(image_ptr_->ptr_);
+
+  // PIL-style crop: pad out-of-bounds with zeros
+  for (int y = 0; y < crop_h; ++y) {
+    const int sy = upper + y;
+    for (int x = 0; x < crop_w; ++x) {
+      const int sx = left + x;
+      unsigned char* dst_px = output + (static_cast<size_t>(y) * crop_w + x) * new_img.c_;
+      if (sx >= 0 && sx < w_ && sy >= 0 && sy < h_) {
+        const unsigned char* src_px = src + (static_cast<size_t>(sy) * w_ + sx) * c_;
+        dst_px[0] = src_px[0];
+        dst_px[1] = src_px[1];
+        dst_px[2] = src_px[2];
+      } else {
+        dst_px[0] = 0;
+        dst_px[1] = 0;
+        dst_px[2] = 0;
+      }
+    }
+  }
+
+  new_img.image_ptr_ = std::make_shared<_ImagePtr>();
+  new_img.image_ptr_->ptr_ = output;
+
+  return new_img;
+}
+
+// Pad the image to target size with given RGB color.
+// Semantics mirror PIL ImageOps.pad: resize to fit within target (keeping aspect ratio)
+// then center the resized image on a canvas of target size filled with color.
+Image Image::pad(int target_w, int target_h, unsigned char r, unsigned char g, unsigned char b) {
+  MLLM_RT_ASSERT(image_ptr_ != nullptr);
+  MLLM_RT_ASSERT_EQ(c_, 3);
+  MLLM_RT_ASSERT(target_w > 0 && target_h > 0);
+
+  // Compute scale to fit within target while preserving aspect ratio
+  const double scale_w = static_cast<double>(target_w) / static_cast<double>(w_);
+  const double scale_h = static_cast<double>(target_h) / static_cast<double>(h_);
+  const double scale = std::min(scale_w, scale_h);
+
+  int new_w = static_cast<int>(std::round(static_cast<double>(w_) * scale));
+  int new_h = static_cast<int>(std::round(static_cast<double>(h_) * scale));
+  new_w = std::max(1, new_w);
+  new_h = std::max(1, new_h);
+
+  // Resize current image to the computed size
+  Image resized = this->resize(new_w, new_h, "bicubic");
+
+  // Prepare output canvas filled with color
+  Image out;
+  out.w_ = target_w;
+  out.h_ = target_h;
+  out.c_ = 3;
+  unsigned char* canvas = static_cast<unsigned char*>(malloc(static_cast<size_t>(target_w) * target_h * out.c_));
+  MLLM_RT_ASSERT(canvas != nullptr);
+
+  for (int y = 0; y < target_h; ++y) {
+    for (int x = 0; x < target_w; ++x) {
+      unsigned char* dst_px = canvas + (static_cast<size_t>(y) * target_w + x) * out.c_;
+      dst_px[0] = r;
+      dst_px[1] = g;
+      dst_px[2] = b;
+    }
+  }
+
+  // Compute offsets to center the resized image
+  const int offset_x = (target_w - new_w) / 2;
+  const int offset_y = (target_h - new_h) / 2;
+
+  const unsigned char* src = static_cast<const unsigned char*>(resized.image_ptr_->ptr_);
+
+  // Blit resized image onto the canvas
+  for (int y = 0; y < new_h; ++y) {
+    const int dy = offset_y + y;
+    if (dy < 0 || dy >= target_h) continue;
+    for (int x = 0; x < new_w; ++x) {
+      const int dx = offset_x + x;
+      if (dx < 0 || dx >= target_w) continue;
+      unsigned char* dst_px = canvas + (static_cast<size_t>(dy) * target_w + dx) * 3;
+      const unsigned char* src_px = src + (static_cast<size_t>(y) * new_w + x) * 3;
+      dst_px[0] = src_px[0];
+      dst_px[1] = src_px[1];
+      dst_px[2] = src_px[2];
+    }
+  }
+
+  out.image_ptr_ = std::make_shared<_ImagePtr>();
+  out.image_ptr_->ptr_ = canvas;
+  return out;
+}
+
+}  // namespace mllm
diff --git a/mllm/preprocessor/visual/Image.hpp b/mllm/preprocessor/visual/Image.hpp
index c47546a6..2f3de4e0 100644
--- a/mllm/preprocessor/visual/Image.hpp
+++ b/mllm/preprocessor/visual/Image.hpp
@@ -28,7 +28,15 @@ class Image {
  public:
   static Image open(const std::string& fp);
 
-  Image resize(int new_w, int new_h);
+  Image resize(int new_w, int new_h, const std::string& method = "bilinear");
+
+  // Crop the image with PIL-style box (left, upper, right, lower).
+  // Out-of-bounds areas are padded with zeros. Returns a new Image.
+  Image crop(int left, int upper, int right, int lower);
+
+  // Pad the image to target size (target_w, target_h) with RGB color.
+  // Mirrors PIL ImageOps.pad: scale to fit, then center-pad with color.
+  Image pad(int target_w, int target_h, unsigned char r, unsigned char g, unsigned char b);
 
   void save(const std::string& fp);
 
@@ -49,4 +57,4 @@ class Image {
   std::shared_ptr<_ImagePtr> image_ptr_ = nullptr;
 };
 
-}  // namespace mllm
\ No newline at end of file
+}  // namespace mllm
diff --git a/mllm/preprocessor/visual/ImageTransform.cpp b/mllm/preprocessor/visual/ImageTransform.cpp
new file mode 100644
index 00000000..d71a0072
--- /dev/null
+++ b/mllm/preprocessor/visual/ImageTransform.cpp
@@ -0,0 +1,154 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include <algorithm>
+#include <cmath>
+
+#include "mllm/preprocessor/visual/ImageTransform.hpp"
+#include "mllm/utils/Common.hpp"
+
+namespace mllm {
+
+// ========================= ComposeImageTransforms =========================
+Image ComposeImageTransforms::operator()(const Image& img) const {
+  Image current = img;  // make a mutable copy for non-const member calls
+  for (const auto& t : transforms_) { current = t->apply(current); }
+  return current;
+}
+
+// ========================= ComposeTensorTransforms =========================
+Tensor ComposeTensorTransforms::operator()(const Tensor& t) const {
+  Tensor current = t;  // make a mutable copy for non-const member calls
+  for (const auto& tr : transforms_) { current = tr->apply(current); }
+  return current;
+}
+
+// ========================= Resize =========================
+Resize::Resize(int size_shorter) : size_shorter_(size_shorter) {}
+Resize::Resize(int target_h, int target_w) : size_hw_(std::make_pair(target_h, target_w)) {}
+
+Image Resize::apply(const Image& input) const {
+  Image src = input;  // mutable copy
+  if (size_hw_.has_value()) {
+    // Explicit resize to (H, W)
+    const int new_h = size_hw_->first;
+    const int new_w = size_hw_->second;
+    return src.resize(new_w, new_h);  // Image::resize expects (w, h)
+  }
+
+  // Shorter-side resize with aspect ratio preserved
+  MLLM_RT_ASSERT(size_shorter_.has_value());
+  const int target_shorter = *size_shorter_;
+  const int h = src.h();
+  const int w = src.w();
+
+  const int shorter = std::min(h, w);
+  const double scale = static_cast<double>(target_shorter) / static_cast<double>(shorter);
+  const int new_h = static_cast<int>(std::round(h * scale));
+  const int new_w = static_cast<int>(std::round(w * scale));
+  return src.resize(new_w, new_h);
+}
+
+// ========================= CenterCrop =========================
+CenterCrop::CenterCrop(int crop_size) : crop_h_(crop_size), crop_w_(crop_size) {}
+CenterCrop::CenterCrop(int crop_h, int crop_w) : crop_h_(crop_h), crop_w_(crop_w) {}
+
+Image CenterCrop::apply(const Image& input) const {
+  Image src = input;  // mutable copy
+  const int h = src.h();
+  const int w = src.w();
+
+  // Compute centered crop box, PIL-style
+  const int left = (w - crop_w_) / 2;
+  const int upper = (h - crop_h_) / 2;
+  const int right = left + crop_w_;
+  const int lower = upper + crop_h_;
+
+  return src.crop(left, upper, right, lower);
+}
+
+// ========================= ToTensor =========================
+Tensor ToTensor::apply(const Image& input) const {
+  Image src = input;  // mutable copy
+  // Image::tensor returns HWC float32 tensor with values in [0, 255]
+  Tensor t = src.tensor();
+
+  // Reorder to CHW to match torchvision semantics
+  t = t.permute({2, 0, 1});
+
+  // Scale to [0, 1]
+  t = t / 255.0f;
+  return t;
+}
+
+// ========================= Normalize =========================
+Normalize::Normalize(const std::vector<float>& mean, const std::vector<float>& std) : mean_(mean), std_(std) {
+  MLLM_RT_ASSERT_EQ(mean_.size(), std_.size());
+}
+
+Tensor Normalize::apply(const Tensor& input) const {
+  const Tensor& src = input;
+  // Expect src in CHW layout
+  MLLM_RT_ASSERT(src.rank() == 3);
+  const int c = src.size(0);
+  const int h = src.size(1);
+  const int w = src.size(2);
+  MLLM_RT_ASSERT_EQ(static_cast<int>(mean_.size()), c);
+  MLLM_RT_ASSERT_EQ(static_cast<int>(std_.size()), c);
+
+  // Asuming Work on a contiguous clone to simplify indexing
+  float* ptr = input.ptr<float>();
+  const size_t plane = static_cast<size_t>(h) * static_cast<size_t>(w);
+
+  for (int ch = 0; ch < c; ++ch) {
+    const float m = mean_[ch];
+    const float s = std_[ch];
+
+    float* base = ptr + static_cast<size_t>(ch) * plane;
+    for (size_t i = 0; i < plane; ++i) { base[i] = (base[i] - m) / s; }
+  }
+
+  return input;
+}
+
+// ========================= BasicImageTransform =========================
+BasicImageTransform::BasicImageTransform(std::optional<int> resize_shorter, std::optional<std::pair<int, int>> resize_hw,
+                                         std::optional<std::pair<int, int>> center_crop,
+                                         std::optional<std::vector<float>> norm_mean,
+                                         std::optional<std::vector<float>> norm_std) {
+  // Build image pipeline
+  if (resize_shorter.has_value()) {
+    image_pipeline_.add(std::make_shared<Resize>(*resize_shorter));
+  } else if (resize_hw.has_value()) {
+    image_pipeline_.add(std::make_shared<Resize>(resize_hw->first, resize_hw->second));
+  }
+
+  if (center_crop.has_value()) { image_pipeline_.add(std::make_shared<CenterCrop>(center_crop->first, center_crop->second)); }
+
+  // Build tensor pipeline (Normalize optional)
+  if (norm_mean.has_value() && norm_std.has_value()) {
+    tensor_pipeline_.add(std::make_shared<Normalize>(*norm_mean, *norm_std));
+  }
+}
+
+BasicImageTransform::BasicImageTransform(std::optional<int> resize_shorter, std::optional<int> center_crop_square,
+                                         std::optional<std::vector<float>> norm_mean,
+                                         std::optional<std::vector<float>> norm_std) {
+  if (resize_shorter.has_value()) { image_pipeline_.add(std::make_shared<Resize>(*resize_shorter)); }
+  if (center_crop_square.has_value()) { image_pipeline_.add(std::make_shared<CenterCrop>(*center_crop_square)); }
+  if (norm_mean.has_value() && norm_std.has_value()) {
+    tensor_pipeline_.add(std::make_shared<Normalize>(*norm_mean, *norm_std));
+  }
+}
+
+Tensor BasicImageTransform::operator()(const Image& img) const {
+  // 1) Run image-level transforms
+  Image processed = image_pipeline_(img);
+  // 2) Convert to tensor (CHW, [0,1])
+  Tensor t = to_tensor_.apply(processed);
+  // 3) Run tensor-level transforms
+  t = tensor_pipeline_(t);
+  return t;
+}
+
+}  // namespace mllm
diff --git a/mllm/preprocessor/visual/ImageTransform.hpp b/mllm/preprocessor/visual/ImageTransform.hpp
new file mode 100644
index 00000000..de8979ca
--- /dev/null
+++ b/mllm/preprocessor/visual/ImageTransform.hpp
@@ -0,0 +1,192 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+/**
+ * ImageTransform.hpp
+ *
+ * This header provides a small, extensible transform system tailored for mllm::Image,
+ * modeled after the design philosophy of torchvision.transforms. It enables users to
+ * compose image processing operations and common post-processing steps used in CV/ML pipelines.
+ *
+ * Key Concepts (parallels to torchvision):
+ * - Transform Operators: Small classes that implement a single, well-defined transformation.
+ *   For example, Resize, CenterCrop operate on Image; Normalize operates on Tensor.
+ * - Composition: Compose objects allow chaining multiple transforms in order.
+ * - Type Flow: Similar to torchvision, some image transforms keep data as image-like objects,
+ *   then a "ToTensor" step converts the image to a tensor. Later steps like "Normalize" operate on Tensor.
+ *
+ * Design Notes:
+ * - Image-level transforms implement IImageTransform and return a new Image.
+ * - Tensor-level transforms implement ITensorTransform and return a new Tensor.
+ * - ToTensor is a bridging transform converting Image(H,W,C, uint8-like) to Tensor(C,H,W, float32)
+ *   with values scaled to [0, 1], matching torchvision.transforms.ToTensor semantics.
+ * - Normalize implements channel-wise normalization: (x - mean) / std for each channel,
+ *   where x is a float32 Tensor in CHW layout.
+ * - BasicImageTransform is a convenience pipeline assembling common steps: resize -> optional crop -> to_tensor -> optional
+ * normalize.
+ *
+ * Example (usage, mirrors torchvision style):
+ *   // Build a preprocessing pipeline
+ *   // mllm::BasicImageTransform tf(
+ *   //     std::optional<int>(512),                // resize shorter side to 512
+ *   //     std::optional<int>(448),                // center-crop square 448x448
+ *   //     std::vector<float>{0.485f, 0.456f, 0.406f}, // mean
+ *   //     std::vector<float>{0.229f, 0.224f, 0.225f}  // std
+ *   // );
+ *
+ *   // Apply to an image
+ *   // mllm::Image img = mllm::Image::open("/path/to/img.jpg");
+ *   // mllm::Tensor input = tf(img); // CHW, float32, normalized
+ */
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <utility>
+#include <vector>
+
+#include "mllm/preprocessor/visual/Image.hpp"
+#include "mllm/core/Tensor.hpp"
+
+namespace mllm {
+
+// Interface for transforms that take an Image and return an Image.
+class IImageTransform {
+ public:
+  virtual ~IImageTransform() = default;
+  [[nodiscard]] virtual Image apply(const Image& input) const = 0;
+};
+
+// Interface for transforms that take a Tensor and return a Tensor.
+class ITensorTransform {
+ public:
+  virtual ~ITensorTransform() = default;
+  [[nodiscard]] virtual Tensor apply(const Tensor& input) const = 0;
+};
+
+// Compose multiple image transforms (executed in order). Returns the final Image.
+class ComposeImageTransforms {
+ public:
+  ComposeImageTransforms() = default;
+
+  explicit ComposeImageTransforms(const std::vector<std::shared_ptr<IImageTransform>>& transforms) : transforms_(transforms) {}
+
+  ComposeImageTransforms& add(const std::shared_ptr<IImageTransform>& t) {
+    transforms_.push_back(t);
+    return *this;
+  }
+
+  [[nodiscard]] Image operator()(const Image& img) const;
+
+ private:
+  std::vector<std::shared_ptr<IImageTransform>> transforms_;
+};
+
+// Compose multiple tensor transforms (executed in order). Returns the final Tensor.
+class ComposeTensorTransforms {
+ public:
+  ComposeTensorTransforms() = default;
+
+  explicit ComposeTensorTransforms(const std::vector<std::shared_ptr<ITensorTransform>>& transforms)
+      : transforms_(transforms) {}
+
+  ComposeTensorTransforms& add(const std::shared_ptr<ITensorTransform>& t) {
+    transforms_.push_back(t);
+    return *this;
+  }
+
+  [[nodiscard]] Tensor operator()(const Tensor& t) const;
+
+ private:
+  std::vector<std::shared_ptr<ITensorTransform>> transforms_;
+};
+
+// Resize transform.
+// TorchVision semantics:
+// - If constructed with a single integer `size`, resize so that the shorter side == size,
+//   preserving aspect ratio. The longer side is scaled accordingly.
+// - If constructed with (height, width), resize to exactly that spatial size.
+class Resize : public IImageTransform {
+ public:
+  // Preserve aspect ratio: shorter side == size.
+  explicit Resize(int size_shorter);
+  // Explicit target (height, width).
+  Resize(int target_h, int target_w);
+
+  [[nodiscard]] Image apply(const Image& input) const override;
+
+ private:
+  std::optional<int> size_shorter_;
+  std::optional<std::pair<int, int>> size_hw_;
+};
+
+// CenterCrop transform.
+// TorchVision semantics:
+// - Crop a Region of size (crop_h, crop_w) at the image center.
+// - If the crop extends beyond boundaries, out-of-bounds is zero-padded (PIL-style); our Image::crop already supports this.
+class CenterCrop : public IImageTransform {
+ public:
+  explicit CenterCrop(int crop_size);
+  CenterCrop(int crop_h, int crop_w);
+
+  [[nodiscard]] Image apply(const Image& input) const override;
+
+ private:
+  int crop_h_;
+  int crop_w_;
+};
+
+// ToTensor bridging transform: Image -> Tensor.
+// TorchVision semantics:
+// - Convert PIL-like image to a float32 tensor with shape (C, H, W).
+// - Scale values from [0, 255] to [0, 1].
+class ToTensor {
+ public:
+  [[nodiscard]] Tensor apply(const Image& input) const;
+};
+
+// Normalize transform: Tensor -> Tensor.
+// TorchVision semantics:
+// - Input tensor expected to be float32 in CHW layout.
+// - For each channel c: out[c, :, :] = (in[c, :, :] - mean[c]) / std[c].
+class Normalize : public ITensorTransform {
+ public:
+  Normalize(const std::vector<float>& mean, const std::vector<float>& std);
+
+  [[nodiscard]] Tensor apply(const Tensor& input) const override;
+
+ private:
+  std::vector<float> mean_;
+  std::vector<float> std_;
+};
+
+// Convenience pipeline resembling common torchvision usage:
+//   transforms = Compose([Resize, (optional) CenterCrop, ToTensor, (optional) Normalize])
+// - Users provide parameters commonly used in OCR/vision preprocessing.
+// - Returns a Tensor ready for model input.
+class BasicImageTransform {
+ public:
+  // Build a pipeline:
+  // - If `resize_shorter` is set, resize by shorter side; otherwise if `resize_hw` is set, resize to (h, w).
+  // - If `center_crop` is set, apply center crop.
+  // - Always apply ToTensor.
+  // - If `norm_mean` and `norm_std` are provided, apply Normalize.
+  BasicImageTransform(std::optional<int> resize_shorter, std::optional<std::pair<int, int>> resize_hw,
+                      std::optional<std::pair<int, int>> center_crop, std::optional<std::vector<float>> norm_mean,
+                      std::optional<std::vector<float>> norm_std);
+
+  // Convenience ctor: shorter-side resize, optional square crop, optional normalize.
+  BasicImageTransform(std::optional<int> resize_shorter, std::optional<int> center_crop_square,
+                      std::optional<std::vector<float>> norm_mean, std::optional<std::vector<float>> norm_std);
+
+  // Apply the pipeline to an input image and return the final tensor.
+  [[nodiscard]] Tensor operator()(const Image& img) const;
+
+ private:
+  ComposeImageTransforms image_pipeline_;
+  ToTensor to_tensor_;
+  ComposeTensorTransforms tensor_pipeline_;
+};
+
+}  // namespace mllm
diff --git a/mllm/utils/StringHelper.hpp b/mllm/utils/StringHelper.hpp
new file mode 100644
index 00000000..c30185bd
--- /dev/null
+++ b/mllm/utils/StringHelper.hpp
@@ -0,0 +1,60 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+#pragma once
+
+#include <string>
+#include <vector>
+#include <cctype>
+
+namespace mllm {
+inline std::vector<std::string> splitString(const std::string& s, const std::string& sep = "", int maxsplit = -1) {
+  std::vector<std::string> out;
+  if (maxsplit == 0) {
+    out.push_back(s);
+    return out;
+  }
+
+  const char* p = s.data();
+  const char* end = p + s.size();
+
+  if (sep.empty()) {
+    auto skip_space = [&]() {
+      while (p != end && std::isspace(static_cast<unsigned char>(*p))) ++p;
+    };
+    skip_space();
+    while (p != end) {
+      const char* start = p;
+      while (p != end && !std::isspace(static_cast<unsigned char>(*p))) ++p;
+      out.emplace_back(start, p);
+      if (maxsplit >= 0 && --maxsplit == 0) {
+        out.emplace_back(p, end);
+        return out;
+      }
+      skip_space();
+    }
+    return out;
+  }
+
+  if (sep.size() == 1) {
+    const unsigned char needle = static_cast<unsigned char>(sep[0]);
+    while (maxsplit != 0) {
+      const char* pos = reinterpret_cast<const char*>(std::memchr(p, needle, end - p));
+      if (!pos) break;
+      out.emplace_back(p, pos);
+      p = pos + 1;
+      if (maxsplit > 0) --maxsplit;
+    }
+  } else {
+    const auto n = sep.size();
+    while (maxsplit != 0) {
+      const char* pos = std::search(p, end, sep.begin(), sep.end());
+      if (pos == end) break;
+      out.emplace_back(p, pos);
+      p = pos + n;
+      if (maxsplit > 0) --maxsplit;
+    }
+  }
+  out.emplace_back(p, end);
+  return out;
+}
+}  // namespace mllm
diff --git a/pymllm/quantize/pipeline.py b/pymllm/quantize/pipeline.py
index 3443491a..71da013c 100644
--- a/pymllm/quantize/pipeline.py
+++ b/pymllm/quantize/pipeline.py
@@ -15,7 +15,15 @@ def build_w4a32_kai_pipeline() -> QuantizeSolver:
     return ret
 
 
-BUILTIN_QUANTIZE_PIPELINE: Dict = {"w4a32_kai_pipeline": build_w4a32_kai_pipeline}
+def build_cast2fp32_pipeline() -> QuantizeSolver:
+    ret = QuantizeSolver()
+    return ret
+
+
+BUILTIN_QUANTIZE_PIPELINE: Dict = {
+    "w4a32_kai_pipeline": build_w4a32_kai_pipeline,
+    "cast2fp32_pipeline": build_cast2fp32_pipeline,
+}
 BUILTIN_QUANTIZE_PASS: Dict = {
     "w4a32_kai": W4A32KAIQuantizePass,
     "cast2fp32": Cast2Fp32QuantizePass,
diff --git a/pymllm/quantize/solver.py b/pymllm/quantize/solver.py
index dd5fec39..c9669e48 100644
--- a/pymllm/quantize/solver.py
+++ b/pymllm/quantize/solver.py
@@ -25,6 +25,9 @@ def _stream_quantize_write_v2(self, tensor_dict: Dict, writer: ModelFileV2) -> b
     def stream_quantize_params_size(
         self, quant_cfg, tensor_dict: Dict, **kwargs
     ) -> int:
+        if quant_cfg is None:
+            quant_cfg = {}
+
         param_groups: Dict[str, List[Any, Dict]] = {}
         for k, v in quant_cfg.items():
             sub_group: Dict[str, QuantizePlanPayload] = {}
@@ -79,6 +82,9 @@ def stream_quantize(
                 "stream_quantize only support type: ModelFileV2 currently."
             )
 
+        if quant_cfg is None:
+            quant_cfg = {}
+
         # Planning
         param_groups: Dict[str, List[Any, Dict]] = {}
         for k, v in quant_cfg.items():
diff --git a/pymllm/utils/mllm_convertor.py b/pymllm/utils/mllm_convertor.py
index 8aa9b4af..7b1aabfb 100644
--- a/pymllm/utils/mllm_convertor.py
+++ b/pymllm/utils/mllm_convertor.py
@@ -48,9 +48,24 @@ def main():
     params = convertor.load_model(args.input_path)
 
     # Build pipeline
-    if args.cfg_path is None:
-        # TODO just convert to mllm file
-        pass
+    if args.cfg_path is None and args.pipeline is not None and args.format == "v2":
+        cfg = None
+        pipeline: QuantizeSolver = BUILTIN_QUANTIZE_PIPELINE[args.pipeline]()
+        old_param_size = len(params)
+        new_param_size = pipeline.stream_quantize_params_size(cfg, params)
+        print(f"Params Num: Before: {old_param_size}, After: {new_param_size}")
+        pipeline.stream_quantize(
+            cfg,
+            params,
+            writer=ModelFileV2(
+                args.output_path,
+                args.model_name,
+                "Streaming",
+                max_params_descriptor_buffer_num=new_param_size,
+            ),
+            cast_left_2_fp32=True,
+            verbose=args.verbose,
+        )
     elif (
         args.cfg_path is not None and args.pipeline is not None and args.format == "v2"
     ):
@@ -76,3 +91,5 @@ def main():
             cast_left_2_fp32=True,
             verbose=args.verbose,
         )
+    else:
+        print("No pipeline specified")
diff --git a/tests/cpu/Conv2DKernelTest.hpp b/tests/cpu/Conv2DKernelTest.hpp
new file mode 100644
index 00000000..f52ca045
--- /dev/null
+++ b/tests/cpu/Conv2DKernelTest.hpp
@@ -0,0 +1,133 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+#include <unordered_map>
+
+#include "mllm/mllm.hpp"
+#include "mllm/nn/Nn.hpp"
+#include "mllm/core/ParameterFile.hpp"
+
+#include "KernelTestHelper.hpp"
+
+using namespace mllm;  // NOLINT
+
+void naive_conv2d(const float* input_data, const float* weight_data, const float* bias_data, float* output_data,
+                  int in_channels, int in_h, int in_w, int out_channels, int kernel_h, int kernel_w, int pad_h, int pad_w,
+                  int stride_h, int stride_w, int dilation_h, int dilation_w) {
+  const int out_h = (in_h + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+  const int out_w = (in_w + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+  for (int oc = 0; oc < out_channels; ++oc) {
+    for (int oy = 0; oy < out_h; ++oy) {
+      for (int ox = 0; ox < out_w; ++ox) {
+        float accumulated_value = 0.0f;
+        for (int ic = 0; ic < in_channels; ++ic) {
+          for (int ky = 0; ky < kernel_h; ++ky) {
+            for (int kx = 0; kx < kernel_w; ++kx) {
+              const int iy = oy * stride_h + ky * dilation_h - pad_h;
+              const int ix = ox * stride_w + kx * dilation_w - pad_w;
+
+              if (iy >= 0 && iy < in_h && ix >= 0 && ix < in_w) {
+                int input_idx = ic * (in_h * in_w) + iy * in_w + ix;
+                int weight_idx = oc * (in_channels * kernel_h * kernel_w) + ic * (kernel_h * kernel_w) + ky * kernel_w + kx;
+
+                accumulated_value += input_data[input_idx] * weight_data[weight_idx];
+              }
+            }
+          }
+        }
+
+        if (bias_data != nullptr) { accumulated_value += bias_data[oc]; }
+
+        int output_idx = oc * (out_h * out_w) + oy * out_w + ox;
+        output_data[output_idx] = accumulated_value;
+      }
+    }
+  }
+}
+
+class Conv2DModule : public nn::Module {
+  nn::Conv2D conv2d_;
+
+ public:
+  Conv2DModule() = default;
+
+  Conv2DModule(int in_channel, int out_channel, int K_H, int K_W, int S_H, int S_W, int P_H, int P_W, bool bias)
+      : nn::Module() {
+    conv2d_ = reg<nn::Conv2D>("emb", in_channel, out_channel, std::vector<int>{K_H, K_W}, std::vector<int>{S_H, S_W},
+                              std::vector<int>{P_H, P_W}, std::vector<int>{1, 1}, bias);
+  }
+
+  std::vector<Tensor> forward(const std::vector<Tensor>& inputs, const std::vector<AnyValue>& args) override {
+    // inputs is Q, K_indices, V_indices
+    return {conv2d_(inputs[0])};
+  }
+};
+
+class Conv2DKernelTest : public KernelTest {
+ public:
+  Conv2DKernelTest() = default;
+  ~Conv2DKernelTest() override = default;
+
+  bool testConv2DOnce(const std::unordered_map<std::string, int32_t>& cfg) {
+    auto in_channel = cfg.at("in_channel");
+    auto out_channel = cfg.at("out_channel");
+    auto I_H = cfg.at("I_H");
+    auto I_W = cfg.at("I_W");
+    auto K_H = cfg.at("K_H");
+    auto K_W = cfg.at("K_W");
+    auto S_H = cfg.at("S_H");
+    auto S_W = cfg.at("S_W");
+    auto P_H = cfg.at("P_H");
+    auto P_W = cfg.at("P_W");
+    auto bias = cfg.at("bias");
+
+    auto module = Conv2DModule(in_channel, out_channel, K_H, K_W, S_H, S_W, P_H, P_W, bias);
+
+    // Make fake data
+    auto weight_param = Tensor::random({out_channel, in_channel, K_H, K_W}, -1, 1, kFloat32, kCPU);
+    auto bias_param = Tensor::random({out_channel}, -1, 1, kFloat32, kCPU);
+    weight_param.setName("emb.weight");
+    bias_param.setName("emb.bias");
+    auto param = ParameterFile::create();
+    param->push("emb.weight", weight_param);
+    param->push("emb.bias", bias_param);
+    module.load(param);
+
+    auto input = Tensor::random({1, in_channel, I_H, I_W}, -1, 1, kFloat32, kCPU);
+    auto predict = module(input)[0];
+
+    auto GT = Tensor::empty(predict.shape(), kFloat32, kCPU).alloc();
+
+    // Naive impl to check correctness.
+    naive_conv2d(input.ptr<float>(), weight_param.ptr<float>(), bias ? bias_param.ptr<float>() : nullptr, GT.ptr<float>(),
+                 in_channel, I_H, I_W, out_channel, K_H, K_W, P_H, P_W, S_H, S_W, 1, 1);
+
+    auto result = test::allClose(GT, predict, 1e-2f, 1e-2f);
+    if (!result) {
+      print(result);
+      return false;
+    }
+
+    return true;
+  }
+
+  bool testConv2D(const std::vector<std::unordered_map<std::string, int32_t>>& cfgs) {
+    for (auto& cfg : cfgs) {
+      if (!testConv2DOnce(cfg)) {
+        auto in_channel = cfg.at("in_channel");
+        auto out_channel = cfg.at("out_channel");
+        auto I_H = cfg.at("I_H");
+        auto I_W = cfg.at("I_W");
+        auto K_H = cfg.at("K_H");
+        auto K_W = cfg.at("K_W");
+        auto S_H = cfg.at("S_H");
+        auto S_W = cfg.at("S_W");
+        auto P_H = cfg.at("P_H");
+        auto P_W = cfg.at("P_W");
+        auto bias = cfg.at("bias");
+        print(in_channel, out_channel, I_H, I_W, K_H, K_W, S_H, S_W, P_H, P_W, bias);
+        return false;
+      }
+    }
+    return true;
+  }
+};
diff --git a/tests/cpu/FlashAttentionKernelTest.hpp b/tests/cpu/FlashAttentionKernelTest.hpp
index 571cf41f..27a5e0d1 100644
--- a/tests/cpu/FlashAttentionKernelTest.hpp
+++ b/tests/cpu/FlashAttentionKernelTest.hpp
@@ -7,7 +7,6 @@
 #include "mllm/mllm.hpp"
 #include "mllm/nn/Nn.hpp"
 #include "mllm/nn/Functional.hpp"
-#include "mllm/nn/lmcache/PrefixCache.hpp"
 
 #include "KernelTestHelper.hpp"
 
diff --git a/tests/cpu/KernelTest.cpp b/tests/cpu/KernelTest.cpp
index a0fe4d7b..fb595c79 100644
--- a/tests/cpu/KernelTest.cpp
+++ b/tests/cpu/KernelTest.cpp
@@ -838,6 +838,128 @@ TEST_F(FlashAttn2KernelTest, fwd_bshd) {
 }
 #endif
 
+//===----------------------------------------------------------------------===//
+// Conv2D Test
+//
+// auto in_channel = cfg.at("in_channel");
+// auto out_channel = cfg.at("out_channel");
+// auto I_H = cfg.at("I_H");
+// auto I_W = cfg.at("I_W");
+// auto K_H = cfg.at("K_H");
+// auto K_W = cfg.at("K_W");
+// auto S_H = cfg.at("S_H");
+// auto S_W = cfg.at("S_W");
+// auto P_H = cfg.at("P_H");
+// auto P_W = cfg.at("P_W");
+// auto bias = cfg.at("bias");
+//
+// In deepseek-ocr we have
+// CASE 1:
+//  in_channel = 3
+//  out_channel = 1024
+//  I_H = 224
+//  I_W = 224
+//  K_H = 14
+//  K_W = 14
+//  S_H = 14
+//  S_W = 14
+//  P_H = 0
+//  P_W = 0
+//  bias = false
+// CASE 2:
+//
+//===----------------------------------------------------------------------===//
+#include "Conv2DKernelTest.hpp"
+TEST_F(Conv2DKernelTest, im2col) {
+  EXPECT_EQ(testConv2D({
+                // CLIP patch embedding
+                {
+                    {"in_channel", 3},
+                    {"out_channel", 1024},
+                    {"I_H", 224},
+                    {"I_W", 224},
+                    {"K_H", 14},
+                    {"K_W", 14},
+                    {"S_H", 14},
+                    {"S_W", 14},
+                    {"P_H", 0},
+                    {"P_W", 0},
+                    {"bias", 0},
+                },
+                // SAM PatchEmbed.proj
+                {
+                    {"in_channel", 3},
+                    {"out_channel", 768},
+                    {"I_H", 1024},
+                    {"I_W", 1024},
+                    {"K_H", 16},
+                    {"K_W", 16},
+                    {"S_H", 16},
+                    {"S_W", 16},
+                    {"P_H", 0},
+                    {"P_W", 0},
+                    {"bias", 1},
+                },
+                // neck: Conv2D(768 -> 12, 1x1, stride=1, pad=0, bias=false)
+                {
+                    {"in_channel", 768},
+                    {"out_channel", 12},
+                    {"I_H", 64},
+                    {"I_W", 64},
+                    {"K_H", 1},
+                    {"K_W", 1},
+                    {"S_H", 1},
+                    {"S_W", 1},
+                    {"P_H", 0},
+                    {"P_W", 0},
+                    {"bias", 0},
+                },
+                // neck: Conv2D(256 -> 256, 3x3, stride=1, pad=1, bias=false)
+                {
+                    {"in_channel", 256},
+                    {"out_channel", 256},
+                    {"I_H", 64},
+                    {"I_W", 64},
+                    {"K_H", 3},
+                    {"K_W", 3},
+                    {"S_H", 1},
+                    {"S_W", 1},
+                    {"P_H", 1},
+                    {"P_W", 1},
+                    {"bias", 0},
+                },
+                // net_2_: Conv2D(256 -> 512, 3x3, stride=2, pad=1, bias=false)
+                {
+                    {"in_channel", 256},
+                    {"out_channel", 512},
+                    {"I_H", 64},
+                    {"I_W", 64},
+                    {"K_H", 3},
+                    {"K_W", 3},
+                    {"S_H", 2},
+                    {"S_W", 2},
+                    {"P_H", 1},
+                    {"P_W", 1},
+                    {"bias", 0},
+                },
+                // net_3_: Conv2D(512 -> 1024, 3x3, stride=2, pad=1, bias=false)
+                {
+                    {"in_channel", 512},
+                    {"out_channel", 1024},
+                    {"I_H", 32},
+                    {"I_W", 32},
+                    {"K_H", 3},
+                    {"K_W", 3},
+                    {"S_H", 2},
+                    {"S_W", 2},
+                    {"P_H", 1},
+                    {"P_W", 1},
+                    {"bias", 0},
+                },
+            }),
+            true);
+}
+
 int main(int argc, char** argv) {
   testing::InitGoogleTest(&argc, argv);
   mllm::initializeContext();
diff --git a/third_party/utfcpp/include/utfcpp/utf8.h b/third_party/utfcpp/include/utfcpp/utf8.h
new file mode 100644
index 00000000..b5135309
--- /dev/null
+++ b/third_party/utfcpp/include/utfcpp/utf8.h
@@ -0,0 +1,46 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+/*
+To control the C++ language version used by the library, you can define UTF_CPP_CPLUSPLUS macro
+and set it to one of the values used by the __cplusplus predefined macro.
+
+For instance,
+    #define UTF_CPP_CPLUSPLUS 199711L
+will cause the UTF-8 CPP library to use only types and language features available in the C++ 98 standard.
+Some library features will be disabled.
+
+If you leave UTF_CPP_CPLUSPLUS undefined, it will be internally assigned to __cplusplus.
+*/
+
+#include "utf8/checked.h"
+#include "utf8/unchecked.h"
+
+#endif // header guard
diff --git a/third_party/utfcpp/include/utfcpp/utf8/checked.h b/third_party/utfcpp/include/utfcpp/utf8/checked.h
new file mode 100644
index 00000000..96ceb4d5
--- /dev/null
+++ b/third_party/utfcpp/include/utfcpp/utf8/checked.h
@@ -0,0 +1,359 @@
+// Copyright 2006-2016 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "core.h"
+#include <stdexcept>
+
+namespace utf8
+{
+    // Base for the exceptions that may be thrown from the library
+    class exception : public ::std::exception {
+    };
+
+    // Exceptions that may be thrown from the library functions.
+    class invalid_code_point : public exception {
+        utfchar32_t cp;
+    public:
+        invalid_code_point(utfchar32_t codepoint) : cp(codepoint) {}
+        virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid code point"; }
+        utfchar32_t code_point() const {return cp;}
+    };
+
+    class invalid_utf8 : public exception {
+        utfchar8_t u8;
+    public:
+        invalid_utf8 (utfchar8_t u) : u8(u) {}
+        invalid_utf8 (char c) : u8(static_cast<utfchar8_t>(c)) {}
+        virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-8"; }
+        utfchar8_t utf8_octet() const {return u8;}
+    };
+
+    class invalid_utf16 : public exception {
+        utfchar16_t u16;
+    public:
+        invalid_utf16 (utfchar16_t u) : u16(u) {}
+        virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-16"; }
+        utfchar16_t utf16_word() const {return u16;}
+    };
+
+    class not_enough_room : public exception {
+    public:
+        virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Not enough space"; }
+    };
+
+    /// The library API - functions intended to be called by the users
+
+    template <typename octet_iterator>
+    octet_iterator append(utfchar32_t cp, octet_iterator result)
+    {
+        if (!utf8::internal::is_code_point_valid(cp))
+            throw invalid_code_point(cp);
+
+        return internal::append(cp, result);
+    }
+
+    inline void append(utfchar32_t cp, std::string& s)
+    {
+        append(cp, std::back_inserter(s));
+    }
+
+    template <typename word_iterator>
+    word_iterator append16(utfchar32_t cp, word_iterator result)
+    {
+        if (!utf8::internal::is_code_point_valid(cp))
+            throw invalid_code_point(cp);
+
+        return internal::append16(cp, result);
+    }
+
+    template <typename octet_iterator, typename output_iterator>
+    output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, utfchar32_t replacement)
+    {
+        while (start != end) {
+            octet_iterator sequence_start = start;
+            internal::utf_error err_code = utf8::internal::validate_next(start, end);
+            switch (err_code) {
+                case internal::UTF8_OK :
+                    for (octet_iterator it = sequence_start; it != start; ++it)
+                        *out++ = *it;
+                    break;
+                case internal::NOT_ENOUGH_ROOM:
+                    out = utf8::append (replacement, out);
+                    start = end;
+                    break;
+                case internal::INVALID_LEAD:
+                    out = utf8::append (replacement, out);
+                    ++start;
+                    break;
+                case internal::INCOMPLETE_SEQUENCE:
+                case internal::OVERLONG_SEQUENCE:
+                case internal::INVALID_CODE_POINT:
+                    out = utf8::append (replacement, out);
+                    ++start;
+                    // just one replacement mark for the sequence
+                    while (start != end && utf8::internal::is_trail(*start))
+                        ++start;
+                    break;
+            }
+        }
+        return out;
+    }
+
+    template <typename octet_iterator, typename output_iterator>
+    inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
+    {
+        static const utfchar32_t replacement_marker = static_cast<utfchar32_t>(utf8::internal::mask16(0xfffd));
+        return utf8::replace_invalid(start, end, out, replacement_marker);
+    }
+
+    inline std::string replace_invalid(const std::string& s, utfchar32_t replacement)
+    {
+        std::string result;
+        replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
+        return result;
+    }
+
+    inline std::string replace_invalid(const std::string& s)
+    {
+        std::string result;
+        replace_invalid(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    template <typename octet_iterator>
+    utfchar32_t next(octet_iterator& it, octet_iterator end)
+    {
+        utfchar32_t cp = 0;
+        internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
+        switch (err_code) {
+            case internal::UTF8_OK :
+                break;
+            case internal::NOT_ENOUGH_ROOM :
+                throw not_enough_room();
+            case internal::INVALID_LEAD :
+            case internal::INCOMPLETE_SEQUENCE :
+            case internal::OVERLONG_SEQUENCE :
+                throw invalid_utf8(static_cast<utfchar8_t>(*it));
+            case internal::INVALID_CODE_POINT :
+                throw invalid_code_point(cp);
+        }
+        return cp;
+    }
+
+    template <typename word_iterator>
+    utfchar32_t next16(word_iterator& it, word_iterator end)
+    {
+        utfchar32_t cp = 0;
+        internal::utf_error err_code = utf8::internal::validate_next16(it, end, cp);
+        if (err_code == internal::NOT_ENOUGH_ROOM)
+            throw not_enough_room();
+        return cp;
+    }
+
+    template <typename octet_iterator>
+    utfchar32_t peek_next(octet_iterator it, octet_iterator end)
+    {
+        return utf8::next(it, end);
+    }
+
+    template <typename octet_iterator>
+    utfchar32_t prior(octet_iterator& it, octet_iterator start)
+    {
+        // can't do much if it == start
+        if (it == start)
+            throw not_enough_room();
+
+        octet_iterator end = it;
+        // Go back until we hit either a lead octet or start
+        while (utf8::internal::is_trail(*(--it)))
+            if (it == start)
+                throw invalid_utf8(*it); // error - no lead byte in the sequence
+        return utf8::peek_next(it, end);
+    }
+
+    template <typename octet_iterator, typename distance_type>
+    void advance (octet_iterator& it, distance_type n, octet_iterator end)
+    {
+        const distance_type zero(0);
+        if (n < zero) {
+            // backward
+            for (distance_type i = n; i < zero; ++i)
+                utf8::prior(it, end);
+        } else {
+            // forward
+            for (distance_type i = zero; i < n; ++i)
+                utf8::next(it, end);
+        }
+    }
+
+    template <typename octet_iterator>
+    typename std::iterator_traits<octet_iterator>::difference_type
+    distance (octet_iterator first, octet_iterator last)
+    {
+        typename std::iterator_traits<octet_iterator>::difference_type dist;
+        for (dist = 0; first < last; ++dist)
+            utf8::next(first, last);
+        return dist;
+    }
+
+    template <typename u16bit_iterator, typename octet_iterator>
+    octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
+    {
+        while (start != end) {
+            utfchar32_t cp = static_cast<utfchar32_t>(utf8::internal::mask16(*start++));
+            // Take care of surrogate pairs first
+            if (utf8::internal::is_lead_surrogate(cp)) {
+                if (start != end) {
+                    const utfchar32_t trail_surrogate = static_cast<utfchar32_t>(utf8::internal::mask16(*start++));
+                    if (utf8::internal::is_trail_surrogate(trail_surrogate))
+                        cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+                    else
+                        throw invalid_utf16(static_cast<utfchar16_t>(trail_surrogate));
+                }
+                else
+                    throw invalid_utf16(static_cast<utfchar16_t>(cp));
+
+            }
+            // Lone trail surrogate
+            else if (utf8::internal::is_trail_surrogate(cp))
+                throw invalid_utf16(static_cast<utfchar16_t>(cp));
+
+            result = utf8::append(cp, result);
+        }
+        return result;
+    }
+
+    template <typename u16bit_iterator, typename octet_iterator>
+    u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
+    {
+        while (start < end) {
+            const utfchar32_t cp = utf8::next(start, end);
+            if (cp > 0xffff) { //make a surrogate pair
+                *result++ = static_cast<utfchar16_t>((cp >> 10)   + internal::LEAD_OFFSET);
+                *result++ = static_cast<utfchar16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
+            }
+            else
+                *result++ = static_cast<utfchar16_t>(cp);
+        }
+        return result;
+    }
+
+    template <typename octet_iterator, typename u32bit_iterator>
+    octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
+    {
+        while (start != end)
+            result = utf8::append(*(start++), result);
+
+        return result;
+    }
+
+    template <typename octet_iterator, typename u32bit_iterator>
+    u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
+    {
+        while (start < end)
+            (*result++) = utf8::next(start, end);
+
+        return result;
+    }
+
+    // The iterator class
+    template <typename octet_iterator>
+    class iterator {
+      octet_iterator it;
+      octet_iterator range_start;
+      octet_iterator range_end;
+      public:
+      typedef utfchar32_t value_type;
+      typedef utfchar32_t* pointer;
+      typedef utfchar32_t& reference;
+      typedef std::ptrdiff_t difference_type;
+      typedef std::bidirectional_iterator_tag iterator_category;
+      iterator () {}
+      explicit iterator (const octet_iterator& octet_it,
+                         const octet_iterator& rangestart,
+                         const octet_iterator& rangeend) :
+               it(octet_it), range_start(rangestart), range_end(rangeend)
+      {
+          if (it < range_start || it > range_end)
+              throw std::out_of_range("Invalid utf-8 iterator position");
+      }
+      // the default "big three" are OK
+      octet_iterator base () const { return it; }
+      utfchar32_t operator * () const
+      {
+          octet_iterator temp = it;
+          return utf8::next(temp, range_end);
+      }
+      bool operator == (const iterator& rhs) const
+      {
+          if (range_start != rhs.range_start || range_end != rhs.range_end)
+              throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
+          return (it == rhs.it);
+      }
+      bool operator != (const iterator& rhs) const
+      {
+          return !(operator == (rhs));
+      }
+      iterator& operator ++ ()
+      {
+          utf8::next(it, range_end);
+          return *this;
+      }
+      iterator operator ++ (int)
+      {
+          iterator temp = *this;
+          utf8::next(it, range_end);
+          return temp;
+      }
+      iterator& operator -- ()
+      {
+          utf8::prior(it, range_start);
+          return *this;
+      }
+      iterator operator -- (int)
+      {
+          iterator temp = *this;
+          utf8::prior(it, range_start);
+          return temp;
+      }
+    }; // class iterator
+
+} // namespace utf8
+
+#if UTF_CPP_CPLUSPLUS >= 202002L // C++ 20 or later
+#include "cpp20.h"
+#elif UTF_CPP_CPLUSPLUS >= 201703L // C++ 17 or later
+#include "cpp17.h"
+#elif UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
+#include "cpp11.h"
+#endif // C++ 11 or later
+
+#endif //header guard
+
diff --git a/third_party/utfcpp/include/utfcpp/utf8/core.h b/third_party/utfcpp/include/utfcpp/utf8/core.h
new file mode 100644
index 00000000..064f838f
--- /dev/null
+++ b/third_party/utfcpp/include/utfcpp/utf8/core.h
@@ -0,0 +1,502 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include <iterator>
+#include <cstring>
+#include <string>
+
+// Determine the C++ standard version.
+// If the user defines UTF_CPP_CPLUSPLUS, use that.
+// Otherwise, trust the unreliable predefined macro __cplusplus
+
+#if !defined UTF_CPP_CPLUSPLUS
+    #define UTF_CPP_CPLUSPLUS __cplusplus
+#endif
+
+#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
+    #define UTF_CPP_OVERRIDE override
+    #define UTF_CPP_NOEXCEPT noexcept
+    #define UTF_CPP_STATIC_ASSERT(condition) static_assert(condition, "UTFCPP static assert");
+#else // C++ 98/03
+    #define UTF_CPP_OVERRIDE
+    #define UTF_CPP_NOEXCEPT throw()
+    // Simulate static_assert:
+    template <bool Condition> struct StaticAssert {static void utf8_static_assert() {char static_assert_impl[Condition ? 1 : 0]; } };
+    template <> struct StaticAssert<true> {static void utf8_static_assert() {}};
+    #define UTF_CPP_STATIC_ASSERT(condition) StaticAssert<condition>::utf8_static_assert();
+#endif // C++ 11 or later
+
+
+namespace utf8
+{
+// The typedefs for 8-bit, 16-bit and 32-bit code units
+#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
+    #if UTF_CPP_CPLUSPLUS >= 202002L // C++ 20 or later
+        typedef char8_t         utfchar8_t;
+    #else // C++ 11/14/17
+        typedef unsigned char   utfchar8_t;
+    #endif
+    typedef char16_t        utfchar16_t;
+    typedef char32_t        utfchar32_t;
+#else // C++ 98/03
+    typedef unsigned char   utfchar8_t;
+    typedef unsigned short  utfchar16_t;
+    typedef unsigned int    utfchar32_t;
+#endif // C++ 11 or later
+
+// Helper code - not intended to be directly called by the library users. May be changed at any time
+namespace internal
+{
+    // Unicode constants
+    // Leading (high) surrogates: 0xd800 - 0xdbff
+    // Trailing (low) surrogates: 0xdc00 - 0xdfff
+    const utfchar16_t LEAD_SURROGATE_MIN  = 0xd800u;
+    const utfchar16_t LEAD_SURROGATE_MAX  = 0xdbffu;
+    const utfchar16_t TRAIL_SURROGATE_MIN = 0xdc00u;
+    const utfchar16_t TRAIL_SURROGATE_MAX = 0xdfffu;
+    const utfchar16_t LEAD_OFFSET         = 0xd7c0u;       // LEAD_SURROGATE_MIN - (0x10000 >> 10)
+    const utfchar32_t SURROGATE_OFFSET    = 0xfca02400u;   // 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN
+
+    // Maximum valid value for a Unicode code point
+    const utfchar32_t CODE_POINT_MAX      = 0x0010ffffu;
+
+    template<typename octet_type>
+    inline utfchar8_t mask8(octet_type oc)
+    {
+        return static_cast<utfchar8_t>(0xff & oc);
+    }
+
+    template<typename u16_type>
+    inline utfchar16_t mask16(u16_type oc)
+    {
+        return static_cast<utfchar16_t>(0xffff & oc);
+    }
+
+    template<typename octet_type>
+    inline bool is_trail(octet_type oc)
+    {
+        return ((utf8::internal::mask8(oc) >> 6) == 0x2);
+    }
+
+    inline bool is_lead_surrogate(utfchar32_t cp)
+    {
+        return (cp >= static_cast<utfchar32_t>(LEAD_SURROGATE_MIN) && cp <= static_cast<utfchar32_t>(LEAD_SURROGATE_MAX));
+    }
+
+    inline bool is_trail_surrogate(utfchar32_t cp)
+    {
+        return (cp >= static_cast<utfchar32_t>(TRAIL_SURROGATE_MIN) && cp <= static_cast<utfchar32_t>(TRAIL_SURROGATE_MAX));
+    }
+
+    inline bool is_surrogate(utfchar32_t cp)
+    {
+        return (cp >= static_cast<utfchar32_t>(LEAD_SURROGATE_MIN) && cp <= static_cast<utfchar32_t>(TRAIL_SURROGATE_MAX));
+    }
+
+    inline bool is_code_point_valid(utfchar32_t cp)
+    {
+        return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
+    }
+
+    inline bool is_in_bmp(utfchar32_t cp)
+    {
+        return cp < utfchar32_t(0x10000);
+    }
+
+    template <typename octet_iterator>
+    int sequence_length(octet_iterator lead_it)
+    {
+        const utfchar8_t lead = utf8::internal::mask8(*lead_it);
+        if (lead < 0x80)
+            return 1;
+        else if ((lead >> 5) == 0x6)
+            return 2;
+        else if ((lead >> 4) == 0xe)
+            return 3;
+        else if ((lead >> 3) == 0x1e)
+            return 4;
+        else
+            return 0;
+    }
+
+    inline bool is_overlong_sequence(utfchar32_t cp, int length)
+    {
+        if (cp < 0x80) {
+            if (length != 1)
+                return true;
+        }
+        else if (cp < 0x800) {
+            if (length != 2)
+                return true;
+        }
+        else if (cp < 0x10000) {
+            if (length != 3)
+                return true;
+        }
+        return false;
+    }
+
+    enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
+
+    /// Helper for get_sequence_x
+    template <typename octet_iterator>
+    utf_error increase_safely(octet_iterator& it, const octet_iterator end)
+    {
+        if (++it == end)
+            return NOT_ENOUGH_ROOM;
+
+        if (!utf8::internal::is_trail(*it))
+            return INCOMPLETE_SEQUENCE;
+
+        return UTF8_OK;
+    }
+
+    #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}
+
+    /// get_sequence_x functions decode utf-8 sequences of the length x
+    template <typename octet_iterator>
+    utf_error get_sequence_1(octet_iterator& it, octet_iterator end, utfchar32_t& code_point)
+    {
+        if (it == end)
+            return NOT_ENOUGH_ROOM;
+
+        code_point = static_cast<utfchar32_t>(utf8::internal::mask8(*it));
+
+        return UTF8_OK;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_2(octet_iterator& it, octet_iterator end, utfchar32_t& code_point)
+    {
+        if (it == end)
+            return NOT_ENOUGH_ROOM;
+
+        code_point = static_cast<utfchar32_t>(utf8::internal::mask8(*it));
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
+
+        return UTF8_OK;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_3(octet_iterator& it, octet_iterator end, utfchar32_t& code_point)
+    {
+        if (it == end)
+            return NOT_ENOUGH_ROOM;
+
+        code_point = static_cast<utfchar32_t>(utf8::internal::mask8(*it));
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
+
+        return UTF8_OK;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_4(octet_iterator& it, octet_iterator end, utfchar32_t& code_point)
+    {
+        if (it == end)
+           return NOT_ENOUGH_ROOM;
+
+        code_point = static_cast<utfchar32_t>(utf8::internal::mask8(*it));
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = static_cast<utfchar32_t>(code_point + ((utf8::internal::mask8(*it) << 6) & 0xfff));
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
+
+        return UTF8_OK;
+    }
+
+    #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
+
+    template <typename octet_iterator>
+    utf_error validate_next(octet_iterator& it, octet_iterator end, utfchar32_t& code_point)
+    {
+        if (it == end)
+            return NOT_ENOUGH_ROOM;
+
+        // Save the original value of it so we can go back in case of failure
+        // Of course, it does not make much sense with i.e. stream iterators
+        octet_iterator original_it = it;
+
+        utfchar32_t cp = 0;
+        // Determine the sequence length based on the lead octet
+        const int length = utf8::internal::sequence_length(it);
+
+        // Get trail octets and calculate the code point
+        utf_error err = UTF8_OK;
+        switch (length) {
+            case 0:
+                return INVALID_LEAD;
+            case 1:
+                err = utf8::internal::get_sequence_1(it, end, cp);
+                break;
+            case 2:
+                err = utf8::internal::get_sequence_2(it, end, cp);
+            break;
+            case 3:
+                err = utf8::internal::get_sequence_3(it, end, cp);
+            break;
+            case 4:
+                err = utf8::internal::get_sequence_4(it, end, cp);
+            break;
+        }
+
+        if (err == UTF8_OK) {
+            // Decoding succeeded. Now, security checks...
+            if (utf8::internal::is_code_point_valid(cp)) {
+                if (!utf8::internal::is_overlong_sequence(cp, length)){
+                    // Passed! Return here.
+                    code_point = cp;
+                    ++it;
+                    return UTF8_OK;
+                }
+                else
+                    err = OVERLONG_SEQUENCE;
+            }
+            else
+                err = INVALID_CODE_POINT;
+        }
+
+        // Failure branch - restore the original value of the iterator
+        it = original_it;
+        return err;
+    }
+
+    template <typename octet_iterator>
+    inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
+        utfchar32_t ignored;
+        return utf8::internal::validate_next(it, end, ignored);
+    }
+
+    template <typename word_iterator>
+    utf_error validate_next16(word_iterator& it, word_iterator end, utfchar32_t& code_point)
+    {
+        // Make sure the iterator dereferences a large enough type
+        typedef typename std::iterator_traits<word_iterator>::value_type word_type;
+        UTF_CPP_STATIC_ASSERT(sizeof(word_type) >= sizeof(utfchar16_t));
+        // Check the edge case:
+        if (it == end)
+            return NOT_ENOUGH_ROOM;
+        // Save the original value of it so we can go back in case of failure
+        // Of course, it does not make much sense with i.e. stream iterators
+        word_iterator original_it = it;
+
+        utf_error err = UTF8_OK;
+
+        const utfchar16_t first_word = *it++;
+        if (!is_surrogate(first_word)) {
+            code_point = first_word;
+            return UTF8_OK;
+        }
+        else {
+            if (it == end)
+                err = NOT_ENOUGH_ROOM;
+            else if (is_lead_surrogate(first_word)) {
+                const utfchar16_t second_word = *it++;
+                if (is_trail_surrogate(static_cast<utfchar32_t>(second_word))) {
+                    code_point = static_cast<utfchar32_t>(first_word << 10) +  static_cast<utfchar32_t>(second_word) + SURROGATE_OFFSET;
+                    return UTF8_OK;
+                } else
+                    err = INCOMPLETE_SEQUENCE;
+
+            } else {
+                err = INVALID_LEAD;
+            }
+        }
+        // error branch
+        it = original_it;
+        return err;
+    }
+
+    // Internal implementation of both checked and unchecked append() function
+    // This function will be invoked by the overloads below, as they will know
+    // the octet_type.
+    template <typename octet_iterator, typename octet_type>
+    octet_iterator append(utfchar32_t cp, octet_iterator result) {
+        if (cp < 0x80)                        // one octet
+            *(result++) = static_cast<octet_type>(cp);
+        else if (cp < 0x800) {                // two octets
+            *(result++) = static_cast<octet_type>((cp >> 6)          | 0xc0);
+            *(result++) = static_cast<octet_type>((cp & 0x3f)        | 0x80);
+        }
+        else if (cp < 0x10000) {              // three octets
+            *(result++) = static_cast<octet_type>((cp >> 12)         | 0xe0);
+            *(result++) = static_cast<octet_type>(((cp >> 6) & 0x3f) | 0x80);
+            *(result++) = static_cast<octet_type>((cp & 0x3f)        | 0x80);
+        }
+        else {                                // four octets
+            *(result++) = static_cast<octet_type>((cp >> 18)         | 0xf0);
+            *(result++) = static_cast<octet_type>(((cp >> 12) & 0x3f)| 0x80);
+            *(result++) = static_cast<octet_type>(((cp >> 6) & 0x3f) | 0x80);
+            *(result++) = static_cast<octet_type>((cp & 0x3f)        | 0x80);
+        }
+        return result;
+    }
+
+    // One of the following overloads will be invoked from the API calls
+
+    // A simple (but dangerous) case: the caller appends byte(s) to a char array
+    inline char* append(utfchar32_t cp, char* result) {
+        return append<char*, char>(cp, result);
+    }
+
+    // Hopefully, most common case: the caller uses back_inserter
+    // i.e. append(cp, std::back_inserter(str));
+    template<typename container_type>
+    std::back_insert_iterator<container_type> append
+            (utfchar32_t cp, std::back_insert_iterator<container_type> result) {
+        return append<std::back_insert_iterator<container_type>,
+            typename container_type::value_type>(cp, result);
+    }
+
+    // The caller uses some other kind of output operator - not covered above
+    // Note that in this case we are not able to determine octet_type
+    // so we assume it's utfchar8_t; that can cause a conversion warning if we are wrong.
+    template <typename octet_iterator>
+    octet_iterator append(utfchar32_t cp, octet_iterator result) {
+        return append<octet_iterator, utfchar8_t>(cp, result);
+    }
+
+    // Internal implementation of both checked and unchecked append16() function
+    // This function will be invoked by the overloads below, as they will know
+    // the word_type.
+    template <typename word_iterator, typename word_type>
+    word_iterator append16(utfchar32_t cp, word_iterator result) {
+        UTF_CPP_STATIC_ASSERT(sizeof(word_type) >= sizeof(utfchar16_t));
+        if (is_in_bmp(cp))
+            *(result++) = static_cast<word_type>(cp);
+        else {
+            // Code points from the supplementary planes are encoded via surrogate pairs
+            *(result++) = static_cast<word_type>(LEAD_OFFSET + (cp >> 10));
+            *(result++) = static_cast<word_type>(TRAIL_SURROGATE_MIN + (cp & 0x3FF));
+        }
+        return result;
+    }
+
+    // Hopefully, most common case: the caller uses back_inserter
+    // i.e. append16(cp, std::back_inserter(str));
+    template<typename container_type>
+    std::back_insert_iterator<container_type> append16
+            (utfchar32_t cp, std::back_insert_iterator<container_type> result) {
+        return append16<std::back_insert_iterator<container_type>,
+            typename container_type::value_type>(cp, result);
+    }
+
+    // The caller uses some other kind of output operator - not covered above
+    // Note that in this case we are not able to determine word_type
+    // so we assume it's utfchar16_t; that can cause a conversion warning if we are wrong.
+    template <typename word_iterator>
+    word_iterator append16(utfchar32_t cp, word_iterator result) {
+        return append16<word_iterator, utfchar16_t>(cp, result);
+    }
+
+} // namespace internal
+
+    /// The library API - functions intended to be called by the users
+
+    // Byte order mark
+    const utfchar8_t bom[] = {0xef, 0xbb, 0xbf};
+
+    template <typename octet_iterator>
+    octet_iterator find_invalid(octet_iterator start, octet_iterator end)
+    {
+        octet_iterator result = start;
+        while (result != end) {
+            utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
+            if (err_code != internal::UTF8_OK)
+                return result;
+        }
+        return result;
+    }
+
+    inline const char* find_invalid(const char* str)
+    {
+        const char* end = str + std::strlen(str);
+        return find_invalid(str, end);
+    }
+
+    inline std::size_t find_invalid(const std::string& s)
+    {
+        std::string::const_iterator invalid = find_invalid(s.begin(), s.end());
+        return (invalid == s.end()) ? std::string::npos : static_cast<std::size_t>(invalid - s.begin());
+    }
+
+    template <typename octet_iterator>
+    inline bool is_valid(octet_iterator start, octet_iterator end)
+    {
+        return (utf8::find_invalid(start, end) == end);
+    }
+
+    inline bool is_valid(const char* str)
+    {
+        return (*(utf8::find_invalid(str)) == '\0');
+    }
+
+    inline bool is_valid(const std::string& s)
+    {
+        return is_valid(s.begin(), s.end());
+    }
+
+
+
+    template <typename octet_iterator>
+    inline bool starts_with_bom (octet_iterator it, octet_iterator end)
+    {
+        return (
+            ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
+            ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
+            ((it != end) && (utf8::internal::mask8(*it))   == bom[2])
+           );
+    }
+
+    inline bool starts_with_bom(const std::string& s)
+    {
+        return starts_with_bom(s.begin(), s.end());
+    }
+} // namespace utf8
+
+#endif // header guard
+
diff --git a/third_party/utfcpp/include/utfcpp/utf8/cpp11.h b/third_party/utfcpp/include/utfcpp/utf8/cpp11.h
new file mode 100644
index 00000000..691633c8
--- /dev/null
+++ b/third_party/utfcpp/include/utfcpp/utf8/cpp11.h
@@ -0,0 +1,70 @@
+// Copyright 2018 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1
+#define UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1
+
+#include "checked.h"
+
+namespace utf8
+{
+    inline void append16(utfchar32_t cp, std::u16string& s)
+    {
+        append16(cp, std::back_inserter(s));
+    }
+
+    inline std::string utf16to8(const std::u16string& s)
+    {
+        std::string result;
+        utf16to8(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::u16string utf8to16(const std::string& s)
+    {
+        std::u16string result;
+        utf8to16(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::string utf32to8(const std::u32string& s)
+    {
+        std::string result;
+        utf32to8(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::u32string utf8to32(const std::string& s)
+    {
+        std::u32string result;
+        utf8to32(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+} // namespace utf8
+
+#endif // header guard
+
diff --git a/third_party/utfcpp/include/utfcpp/utf8/cpp17.h b/third_party/utfcpp/include/utfcpp/utf8/cpp17.h
new file mode 100644
index 00000000..07587300
--- /dev/null
+++ b/third_party/utfcpp/include/utfcpp/utf8/cpp17.h
@@ -0,0 +1,96 @@
+// Copyright 2018 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_7e906c01_03a3_4daf_b420_ea7ea952b3c9
+#define UTF8_FOR_CPP_7e906c01_03a3_4daf_b420_ea7ea952b3c9
+
+#include "cpp11.h"
+
+namespace utf8
+{
+    inline std::string utf16to8(std::u16string_view s)
+    {
+        std::string result;
+        utf16to8(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::u16string utf8to16(std::string_view s)
+    {
+        std::u16string result;
+        utf8to16(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::string utf32to8(std::u32string_view s)
+    {
+        std::string result;
+        utf32to8(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::u32string utf8to32(std::string_view s)
+    {
+        std::u32string result;
+        utf8to32(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::size_t find_invalid(std::string_view s)
+    {
+        std::string_view::const_iterator invalid = find_invalid(s.begin(), s.end());
+        return (invalid == s.end()) ? std::string_view::npos : static_cast<std::size_t>(invalid - s.begin());
+    }
+
+    inline bool is_valid(std::string_view s)
+    {
+        return is_valid(s.begin(), s.end());
+    }
+
+    inline std::string replace_invalid(std::string_view s, char32_t replacement)
+    {
+        std::string result;
+        replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
+        return result;
+    }
+
+    inline std::string replace_invalid(std::string_view s)
+    {
+        std::string result;
+        replace_invalid(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline bool starts_with_bom(std::string_view s)
+    {
+        return starts_with_bom(s.begin(), s.end());
+    }
+
+} // namespace utf8
+
+#endif // header guard
+
diff --git a/third_party/utfcpp/include/utfcpp/utf8/cpp20.h b/third_party/utfcpp/include/utfcpp/utf8/cpp20.h
new file mode 100644
index 00000000..07b61d0f
--- /dev/null
+++ b/third_party/utfcpp/include/utfcpp/utf8/cpp20.h
@@ -0,0 +1,124 @@
+// Copyright 2022 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_207e906c01_03a3_4daf_b420_ea7ea952b3c9
+#define UTF8_FOR_CPP_207e906c01_03a3_4daf_b420_ea7ea952b3c9
+
+#include "cpp17.h"
+
+namespace utf8
+{
+    inline std::u8string utf16tou8(const std::u16string& s)
+    {
+        std::u8string result;
+        utf16to8(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::u8string utf16tou8(std::u16string_view s)
+    {
+        std::u8string result;
+        utf16to8(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::u16string utf8to16(const std::u8string& s)
+    {
+        std::u16string result;
+        utf8to16(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::u16string utf8to16(const std::u8string_view& s)
+    {
+        std::u16string result;
+        utf8to16(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::u8string utf32tou8(const std::u32string& s)
+    {
+        std::u8string result;
+        utf32to8(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::u8string utf32tou8(const std::u32string_view& s)
+    {
+        std::u8string result;
+        utf32to8(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::u32string utf8to32(const std::u8string& s)
+    {
+        std::u32string result;
+        utf8to32(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::u32string utf8to32(const std::u8string_view& s)
+    {
+        std::u32string result;
+        utf8to32(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline std::size_t find_invalid(const std::u8string& s)
+    {
+        std::u8string::const_iterator invalid = find_invalid(s.begin(), s.end());
+        return (invalid == s.end()) ? std::string_view::npos : static_cast<std::size_t>(invalid - s.begin());
+    }
+
+    inline bool is_valid(const std::u8string& s)
+    {
+        return is_valid(s.begin(), s.end());
+    }
+
+    inline std::u8string replace_invalid(const std::u8string& s, char32_t replacement)
+    {
+        std::u8string result;
+        replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
+        return result;
+    }
+
+    inline std::u8string replace_invalid(const std::u8string& s)
+    {
+        std::u8string result;
+        replace_invalid(s.begin(), s.end(), std::back_inserter(result));
+        return result;
+    }
+
+    inline bool starts_with_bom(const std::u8string& s)
+    {
+        return starts_with_bom(s.begin(), s.end());
+    }
+ 
+} // namespace utf8
+
+#endif // header guard
+
diff --git a/third_party/utfcpp/include/utfcpp/utf8/unchecked.h b/third_party/utfcpp/include/utfcpp/utf8/unchecked.h
new file mode 100644
index 00000000..173d0302
--- /dev/null
+++ b/third_party/utfcpp/include/utfcpp/utf8/unchecked.h
@@ -0,0 +1,286 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "core.h"
+
+namespace utf8
+{
+    namespace unchecked
+    {
+        template <typename octet_iterator>
+        octet_iterator append(utfchar32_t cp, octet_iterator result)
+        {
+            return internal::append(cp, result);
+        }
+
+        template <typename word_iterator>
+        word_iterator append16(utfchar32_t cp, word_iterator result)
+        {
+            return internal::append16(cp, result);
+        }
+
+        template <typename octet_iterator, typename output_iterator>
+        output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, utfchar32_t replacement)
+        {
+            while (start != end) {
+                octet_iterator sequence_start = start;
+                internal::utf_error err_code = utf8::internal::validate_next(start, end);
+                switch (err_code) {
+                    case internal::UTF8_OK :
+                        for (octet_iterator it = sequence_start; it != start; ++it)
+                            *out++ = *it;
+                        break;
+                    case internal::NOT_ENOUGH_ROOM:
+                        out = utf8::unchecked::append(replacement, out);
+                        start = end;
+                        break;
+                    case internal::INVALID_LEAD:
+                        out = utf8::unchecked::append(replacement, out);
+                        ++start;
+                        break;
+                    case internal::INCOMPLETE_SEQUENCE:
+                    case internal::OVERLONG_SEQUENCE:
+                    case internal::INVALID_CODE_POINT:
+                        out = utf8::unchecked::append(replacement, out);
+                        ++start;
+                        // just one replacement mark for the sequence
+                        while (start != end && utf8::internal::is_trail(*start))
+                            ++start;
+                        break;
+                }
+            }
+            return out;
+        }
+
+        template <typename octet_iterator, typename output_iterator>
+        inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
+        {
+            static const utfchar32_t replacement_marker = static_cast<utfchar32_t>(utf8::internal::mask16(0xfffd));
+            return utf8::unchecked::replace_invalid(start, end, out, replacement_marker);
+        }
+
+        inline std::string replace_invalid(const std::string& s, utfchar32_t replacement)
+        {
+            std::string result;
+            replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
+            return result;
+        }
+
+        inline std::string replace_invalid(const std::string& s)
+        {
+            std::string result;
+            replace_invalid(s.begin(), s.end(), std::back_inserter(result));
+            return result;
+        }
+
+        template <typename octet_iterator>
+        utfchar32_t next(octet_iterator& it)
+        {
+            utfchar32_t cp = utf8::internal::mask8(*it);
+            switch (utf8::internal::sequence_length(it)) {
+                case 1:
+                    break;
+                case 2:
+                    ++it;
+                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
+                    break;
+                case 3:
+                    ++it;
+                    cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
+                    ++it;
+                    cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
+                    break;
+                case 4:
+                    ++it;
+                    cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
+                    ++it;
+                    cp = static_cast<utfchar32_t>(cp + ((utf8::internal::mask8(*it) << 6) & 0xfff));
+                    ++it;
+                    cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
+                    break;
+            }
+            ++it;
+            return cp;
+        }
+
+        template <typename octet_iterator>
+        utfchar32_t peek_next(octet_iterator it)
+        {
+            return utf8::unchecked::next(it);
+        }
+
+        template <typename word_iterator>
+        utfchar32_t next16(word_iterator& it)
+        {
+            utfchar32_t cp = utf8::internal::mask16(*it++);
+            if (utf8::internal::is_lead_surrogate(cp))
+                return (cp << 10) + *it++ + utf8::internal::SURROGATE_OFFSET;
+            return cp;
+        }
+
+        template <typename octet_iterator>
+        utfchar32_t prior(octet_iterator& it)
+        {
+            while (utf8::internal::is_trail(*(--it))) ;
+            octet_iterator temp = it;
+            return utf8::unchecked::next(temp);
+        }
+
+        template <typename octet_iterator, typename distance_type>
+        void advance(octet_iterator& it, distance_type n)
+        {
+            const distance_type zero(0);
+            if (n < zero) {
+                // backward
+                for (distance_type i = n; i < zero; ++i)
+                    utf8::unchecked::prior(it);
+            } else {
+                // forward
+                for (distance_type i = zero; i < n; ++i)
+                    utf8::unchecked::next(it);
+            }
+        }
+
+        template <typename octet_iterator>
+        typename std::iterator_traits<octet_iterator>::difference_type
+        distance(octet_iterator first, octet_iterator last)
+        {
+            typename std::iterator_traits<octet_iterator>::difference_type dist;
+            for (dist = 0; first < last; ++dist)
+                utf8::unchecked::next(first);
+            return dist;
+        }
+
+        template <typename u16bit_iterator, typename octet_iterator>
+        octet_iterator utf16to8(u16bit_iterator start, u16bit_iterator end, octet_iterator result)
+        {
+            while (start != end) {
+                utfchar32_t cp = utf8::internal::mask16(*start++);
+                // Take care of surrogate pairs first
+                if (utf8::internal::is_lead_surrogate(cp)) {
+                    if (start == end)
+                        return result;
+                    utfchar32_t trail_surrogate = utf8::internal::mask16(*start++);
+                    cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+                }
+                result = utf8::unchecked::append(cp, result);
+            }
+            return result;
+        }
+
+        template <typename u16bit_iterator, typename octet_iterator>
+        u16bit_iterator utf8to16(octet_iterator start, octet_iterator end, u16bit_iterator result)
+        {
+            while (start < end) {
+                utfchar32_t cp = utf8::unchecked::next(start);
+                if (cp > 0xffff) { //make a surrogate pair
+                    *result++ = static_cast<utfchar16_t>((cp >> 10)   + internal::LEAD_OFFSET);
+                    *result++ = static_cast<utfchar16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
+                }
+                else
+                    *result++ = static_cast<utfchar16_t>(cp);
+            }
+            return result;
+        }
+
+        template <typename octet_iterator, typename u32bit_iterator>
+        octet_iterator utf32to8(u32bit_iterator start, u32bit_iterator end, octet_iterator result)
+        {
+            while (start != end)
+                result = utf8::unchecked::append(*(start++), result);
+
+            return result;
+        }
+
+        template <typename octet_iterator, typename u32bit_iterator>
+        u32bit_iterator utf8to32(octet_iterator start, octet_iterator end, u32bit_iterator result)
+        {
+            while (start < end)
+                (*result++) = utf8::unchecked::next(start);
+
+            return result;
+        }
+
+        // The iterator class
+        template <typename octet_iterator>
+          class iterator {
+            octet_iterator it;
+            public:
+            typedef utfchar32_t value_type;
+            typedef utfchar32_t* pointer;
+            typedef utfchar32_t& reference;
+            typedef std::ptrdiff_t difference_type;
+            typedef std::bidirectional_iterator_tag iterator_category;
+            iterator () {}
+            explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
+            // the default "big three" are OK
+            octet_iterator base () const { return it; }
+            utfchar32_t operator * () const
+            {
+                octet_iterator temp = it;
+                return utf8::unchecked::next(temp);
+            }
+            bool operator == (const iterator& rhs) const
+            {
+                return (it == rhs.it);
+            }
+            bool operator != (const iterator& rhs) const
+            {
+                return !(operator == (rhs));
+            }
+            iterator& operator ++ ()
+            {
+                ::std::advance(it, utf8::internal::sequence_length(it));
+                return *this;
+            }
+            iterator operator ++ (int)
+            {
+                iterator temp = *this;
+                ::std::advance(it, utf8::internal::sequence_length(it));
+                return temp;
+            }
+            iterator& operator -- ()
+            {
+                utf8::unchecked::prior(it);
+                return *this;
+            }
+            iterator operator -- (int)
+            {
+                iterator temp = *this;
+                utf8::unchecked::prior(it);
+                return temp;
+            }
+          }; // class iterator
+
+    } // namespace utf8::unchecked
+} // namespace utf8
+
+#endif // header guard
+