UbiquitousLearning · chenghuaWang · Oct 22, 2025 · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025
diff --git a/.gitmodules b/.gitmodules
@@ -21,3 +21,11 @@
 [submodule "mllm/ffi/vendors/tvm-ffi"]
 	path = mllm/ffi/vendors/tvm-ffi
 	url = https://github.com/apache/tvm-ffi
+[submodule "mllm/ext/vendors/llvm-project"]
+	path = mllm/ext/vendors/llvm-project
+	url = https://github.com/llvm/llvm-project
+	update = none
+[submodule "mllm/ext/vendors/tokenizers"]
+	path = mllm/ext/vendors/tokenizers
+	url = https://github.com/meta-pytorch/tokenizers.git
+	update = none
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -22,6 +22,11 @@ option(MLLM_BUILD_QNN_BACKEND "Enable MLLM QNN backend" OFF)
 option(MLLM_BUILD_SDK_C_BINDING "Enable MLLM C SDK binding" OFF)
 option(MLLM_BUILD_EXPERIMENTS "Enable MLLM experiments" OFF)
 
+# Extension Enable
+option(MLLM_EXT_ENABLE OFF)
+option(MLLM_EXT_ENABLE_LLVM_PROJECT OFF)
+option(MLLM_EXT_ENABLE_META_TORCH_TOKENIZERS OFF)
+
 # CPU Backend: BLAS
 option(MLLM_USE_BLAS "Enable BLAS" OFF)
 option(MLLM_BLAS_VENDOR_ACCELERATE "Enable Accelerate BLAS on OSX" OFF)
@@ -35,6 +40,7 @@ option(MLLM_KERNEL_THREADS_VENDOR_APPLE_GCD "Enable Apple GCD Threads" OFF)
 
 # Performance components
 option(MLLM_PERFETTO_ENABLE "Enable perfetto" OFF)
+option(MLLM_TRACY_ENABLE "Enable Tracy. A more advanced profiler" OFF)
 
 message(STATUS "CXX Compiler=${CMAKE_CXX_COMPILER_ID}")
 message(STATUS "CXX Compiler version=${CMAKE_CXX_COMPILER_VERSION}")
@@ -206,6 +212,7 @@ set(MLLM_INCLUDE_DIR
     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/third_party/half/include>
     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/third_party/dlpack/include>
     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/third_party/xxHash/include>
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/third_party/utfcpp/include>
     $<INSTALL_INTERFACE:include/mllm>
     $<INSTALL_INTERFACE:include/third_party/>)
 set(MLLM_JSON_INCLUDE_DIR
@@ -314,6 +321,13 @@ install(
   PATTERN "*.h"
   PATTERN "*.hpp")
 
+install(
+  DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/third_party/utfcpp/include/utfcpp/
+  DESTINATION include/utfcpp/
+  FILES_MATCHING
+  PATTERN "*.h"
+  PATTERN "*.hpp")
+
 install(
   DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/third_party/xxHash/include/xxHash/
   DESTINATION include/xxHash/

@@ -78,6 +78,36 @@ Shape Operations
    :param dim: Dimension along which to concatenate
    :return: Concatenated tensor
 
+.. cpp:function:: Tensor mllm::nn::functional::pad(const Tensor& x, const std::vector<int32_t>& pad, aops::PadMode mode = aops::PadMode::kConstant, float value = 0.0f)
+
+   Pad a tensor along the last N dimensions as specified.
+
+   :param x: Input tensor
+   :param pad: Padding sizes ordered from the last dimension to the first, e.g. [last_left, last_right, ..., first_left, first_right]
+   :param mode: Padding mode (kConstant, kReflect, kReplicate, kCircular). Default: kConstant
+   :param value: Constant value used when mode is kConstant. Default: 0.0
+   :return: Padded tensor
+
+.. cpp:function:: Tensor mllm::nn::functional::interpolate(const Tensor& x, const std::vector<int32_t>& size, aops::InterpolateOpMode mode = aops::InterpolateOpMode::kNearest, bool align_corners = false, bool antialias = false)
+
+   Resize a tensor to the target spatial size.
+
+   :param x: Input tensor (supports 1D/2D/3D spatial resizing depending on mode)
+   :param size: Target spatial size (e.g., [H_out, W_out] for 2D)
+   :param mode: Interpolation mode (kNearest, kLinear, kBilinear, kBicubic, kTrilinear). Default: kNearest
+   :param align_corners: Align corners for linear/bilinear/trilinear interpolation. Default: false
+      :return: Resized tensor
+
+.. cpp:function:: Tensor mllm::nn::functional::interpolate(const Tensor& x, const std::vector<float>& scale_factor, aops::InterpolateOpMode mode = aops::InterpolateOpMode::kNearest, bool align_corners = false)
+
+   Resize a tensor by scale factors per spatial dimension.
+
+   :param x: Input tensor (supports 1D/2D/3D spatial resizing depending on mode)
+   :param scale_factor: Scale factors per spatial dimension (e.g., [sh, sw] for 2D)
+   :param mode: Interpolation mode (kNearest, kLinear, kBilinear, kBicubic, kTrilinear). Default: kNearest
+   :param align_corners: Align corners for linear/bilinear/trilinear interpolation. Default: false
+   :return: Resized tensor
+
 Attention Operations
 --------------------
 

@@ -6,3 +6,4 @@ Contribute
 
    roadmap
    guidelines
+   model_supports
@@ -0,0 +1,2 @@
+Model Supports
+=================
@@ -0,0 +1,2 @@
+FA2, Radix, Paged
+====================
@@ -5,5 +5,6 @@ CPU Backend
    :maxdepth: 2
 
    threads
+   fa2_radix_paged
    arm/index
    x86/index
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -6,3 +6,4 @@ add_subdirectory(llama)
 add_subdirectory(minicpm_o)
 add_subdirectory(qwen3)
 add_subdirectory(qwen3_service)
+add_subdirectory(deepseek_ocr)
diff --git a/examples/deepseek_ocr/CMakeLists.txt b/examples/deepseek_ocr/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_executable(mllm-deepseek-ocr-runner main.cpp)
+target_link_libraries(mllm-deepseek-ocr-runner PRIVATE MllmRT MllmCPUBackend)
+target_include_directories(mllm-deepseek-ocr-runner PRIVATE ${MLLM_INCLUDE_DIR})
diff --git a/examples/deepseek_ocr/main.cpp b/examples/deepseek_ocr/main.cpp
@@ -0,0 +1,21 @@
+#include <mllm/mllm.hpp>
+#include "mllm/models/deepseek_ocr/modeling_deepseek_ocr.hpp"
+#include "mllm/models/deepseek_ocr/tokenization_deepseek_ocr.hpp"
+
+using mllm::Argparse;
+
+MLLM_MAIN({
+  // auto config = mllm::models::deepseek_ocr::DpskOcrConfig("/Volumes/D/mllm-models/DeepSeek-OCR-w32a32/config.json");
+  // auto model = mllm::models::deepseek_ocr::DeepseekOCRForCausalLM(config);
+  // auto tokenizer = mllm::models::deepseek_ocr::DpskOcrTokenizer("/Volumes/D/mllm-models/DeepSeek-OCR-w32a32/tokenizer.json");
+  // model.load(mllm::load("/Volumes/D/mllm-models/DeepSeek-OCR-w32a32/model.mllm", mllm::ModelFileVersion::kV2));
+  mllm::setLogLevel(mllm::LogLevel::kError);
+  auto config = mllm::models::deepseek_ocr::DpskOcrConfig("/Volumes/D/mllm-models/DeepSeek-OCR-w4a8-i8mm-kai/config.json");
+  auto model = mllm::models::deepseek_ocr::DeepseekOCRForCausalLM(config);
+  auto tokenizer =
+      mllm::models::deepseek_ocr::DpskOcrTokenizer("/Volumes/D/mllm-models/DeepSeek-OCR-w4a8-i8mm-kai/tokenizer.json");
+  model.load(mllm::load("/Volumes/D/mllm-models/DeepSeek-OCR-w4a8-i8mm-kai/model.mllm", mllm::ModelFileVersion::kV2));
+
+  model.infer(tokenizer, "<image>\n<|grounding|>Convert the document to markdown. ", "/Volumes/D/mllm/.tmp/dpsk-ocr-pr.png",
+              "/Volumes/D/mllm/.tmp/dpsk-ocr");
+});
Original file line number	Diff line number	Diff line change
Expand Up		@@ -6,3 +6,4 @@ Contribute

		roadmap
		guidelines
		model_supports