Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
74a989b
feat(deepseek-ocr): deepseek-ocr support
chenghuaWang Oct 22, 2025
8eebf54
feat(deepseek_ocr): implement conversation management and preprocessi…
chenghuaWang Oct 23, 2025
60f6f92
feat(cpu): add interpolate and pad operations with full interpolation…
chenghuaWang Oct 23, 2025
bf82e6b
feat(image): add dynamic image preprocessing and cropping support
chenghuaWang Oct 23, 2025
6770f87
feat(interpolate): add antialias support and remove keep_aspect_ratio
chenghuaWang Oct 23, 2025
1515cdb
feat(deepseek_ocr): add mlp projector linear impl type configuration
chenghuaWang Oct 23, 2025
e858d25
feat(deepseek_ocr): add message formatting and model inference support
chenghuaWang Oct 23, 2025
4ca7a07
feat(ext): add tokenizers-cpp and opencv-mobile as optional extensions
chenghuaWang Oct 23, 2025
0eabe59
feat(ocr): add llvm-project submodule and update deepseek ocr model
chenghuaWang Oct 24, 2025
78a17fb
feat(cpu): add StackOp implementation and integrate into DeepSeek-OCR…
chenghuaWang Oct 24, 2025
593258e
feat(tokenizer): implement UTF-8 support for DeepSeek OCR tokenizer
chenghuaWang Oct 24, 2025
f76bce9
refactor(ext): replace tokenizers-cpp with tokenizers submodule
chenghuaWang Oct 24, 2025
73b74f2
feat(tokenizer): implement deepseek-ocr tokenizer with BPE and UTF-8 …
chenghuaWang Oct 25, 2025
a556ad2
feat(deepseek_ocr): improve tokenizer and add support for new special…
chenghuaWang Oct 25, 2025
7b2f501
feat(deepseek_ocr): refactor model loading and initialization with co…
chenghuaWang Oct 26, 2025
6732314
feat(cpu): implement MaskedScatterOp for CPU backend
chenghuaWang Oct 26, 2025
e849dbc
feat(deepseek_ocr): add DeepseekV2MLP, MoEGate, and DeepseekV2MoE mod…
chenghuaWang Oct 26, 2025
a80a973
fix(Tensor): cast rank to int32_t for negative index handling
chenghuaWang Oct 26, 2025
7e39401
docs(contribute): rename guidelines.md to guidelines.rst
chenghuaWang Oct 27, 2025
9b843b6
feat(tensor): support negative dim in repeat operation
chenghuaWang Oct 27, 2025
324cd50
feat(cpu): implement optimized softmax for last dimension cases
chenghuaWang Oct 27, 2025
00d787a
feat(cpu): add Tracy profiler option and update quantization config
chenghuaWang Oct 28, 2025
e861deb
feat(deepseek_ocr): update model paths and quantization config
chenghuaWang Oct 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@
[submodule "mllm/ffi/vendors/tvm-ffi"]
path = mllm/ffi/vendors/tvm-ffi
url = https://github.com/apache/tvm-ffi
[submodule "mllm/ext/vendors/llvm-project"]
path = mllm/ext/vendors/llvm-project
url = https://github.com/llvm/llvm-project
update = none
[submodule "mllm/ext/vendors/tokenizers"]
path = mllm/ext/vendors/tokenizers
url = https://github.com/meta-pytorch/tokenizers.git
update = none
14 changes: 14 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ option(MLLM_BUILD_QNN_BACKEND "Enable MLLM QNN backend" OFF)
option(MLLM_BUILD_SDK_C_BINDING "Enable MLLM C SDK binding" OFF)
option(MLLM_BUILD_EXPERIMENTS "Enable MLLM experiments" OFF)

# Extension Enable
option(MLLM_EXT_ENABLE OFF)
option(MLLM_EXT_ENABLE_LLVM_PROJECT OFF)
option(MLLM_EXT_ENABLE_META_TORCH_TOKENIZERS OFF)

# CPU Backend: BLAS
option(MLLM_USE_BLAS "Enable BLAS" OFF)
option(MLLM_BLAS_VENDOR_ACCELERATE "Enable Accelerate BLAS on OSX" OFF)
Expand All @@ -35,6 +40,7 @@ option(MLLM_KERNEL_THREADS_VENDOR_APPLE_GCD "Enable Apple GCD Threads" OFF)

# Performance components
option(MLLM_PERFETTO_ENABLE "Enable perfetto" OFF)
option(MLLM_TRACY_ENABLE "Enable Tracy. A more advanced profiler" OFF)

message(STATUS "CXX Compiler=${CMAKE_CXX_COMPILER_ID}")
message(STATUS "CXX Compiler version=${CMAKE_CXX_COMPILER_VERSION}")
Expand Down Expand Up @@ -206,6 +212,7 @@ set(MLLM_INCLUDE_DIR
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/third_party/half/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/third_party/dlpack/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/third_party/xxHash/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/third_party/utfcpp/include>
$<INSTALL_INTERFACE:include/mllm>
$<INSTALL_INTERFACE:include/third_party/>)
set(MLLM_JSON_INCLUDE_DIR
Expand Down Expand Up @@ -314,6 +321,13 @@ install(
PATTERN "*.h"
PATTERN "*.hpp")

install(
DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/third_party/utfcpp/include/utfcpp/
DESTINATION include/utfcpp/
FILES_MATCHING
PATTERN "*.h"
PATTERN "*.hpp")

install(
DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/third_party/xxHash/include/xxHash/
DESTINATION include/xxHash/
Expand Down
30 changes: 30 additions & 0 deletions docs/api/functional.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,36 @@ Shape Operations
:param dim: Dimension along which to concatenate
:return: Concatenated tensor

.. cpp:function:: Tensor mllm::nn::functional::pad(const Tensor& x, const std::vector<int32_t>& pad, aops::PadMode mode = aops::PadMode::kConstant, float value = 0.0f)

Pad a tensor along the last N dimensions as specified.

:param x: Input tensor
:param pad: Padding sizes ordered from the last dimension to the first, e.g. [last_left, last_right, ..., first_left, first_right]
:param mode: Padding mode (kConstant, kReflect, kReplicate, kCircular). Default: kConstant
:param value: Constant value used when mode is kConstant. Default: 0.0
:return: Padded tensor

.. cpp:function:: Tensor mllm::nn::functional::interpolate(const Tensor& x, const std::vector<int32_t>& size, aops::InterpolateOpMode mode = aops::InterpolateOpMode::kNearest, bool align_corners = false, bool antialias = false)

Resize a tensor to the target spatial size.

:param x: Input tensor (supports 1D/2D/3D spatial resizing depending on mode)
:param size: Target spatial size (e.g., [H_out, W_out] for 2D)
:param mode: Interpolation mode (kNearest, kLinear, kBilinear, kBicubic, kTrilinear). Default: kNearest
:param align_corners: Align corners for linear/bilinear/trilinear interpolation. Default: false
:return: Resized tensor

.. cpp:function:: Tensor mllm::nn::functional::interpolate(const Tensor& x, const std::vector<float>& scale_factor, aops::InterpolateOpMode mode = aops::InterpolateOpMode::kNearest, bool align_corners = false)

Resize a tensor by scale factors per spatial dimension.

:param x: Input tensor (supports 1D/2D/3D spatial resizing depending on mode)
:param scale_factor: Scale factors per spatial dimension (e.g., [sh, sw] for 2D)
:param mode: Interpolation mode (kNearest, kLinear, kBilinear, kBicubic, kTrilinear). Default: kNearest
:param align_corners: Align corners for linear/bilinear/trilinear interpolation. Default: false
:return: Resized tensor

Attention Operations
--------------------

Expand Down
File renamed without changes.
1 change: 1 addition & 0 deletions docs/contribute/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ Contribute

roadmap
guidelines
model_supports
2 changes: 2 additions & 0 deletions docs/contribute/model_supports.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Model Supports
=================
2 changes: 2 additions & 0 deletions docs/cpu_backend/fa2_radix_paged.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
FA2, Radix, Paged
====================
1 change: 1 addition & 0 deletions docs/cpu_backend/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ CPU Backend
:maxdepth: 2

threads
fa2_radix_paged
arm/index
x86/index
1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ add_subdirectory(llama)
add_subdirectory(minicpm_o)
add_subdirectory(qwen3)
add_subdirectory(qwen3_service)
add_subdirectory(deepseek_ocr)
3 changes: 3 additions & 0 deletions examples/deepseek_ocr/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
add_executable(mllm-deepseek-ocr-runner main.cpp)
target_link_libraries(mllm-deepseek-ocr-runner PRIVATE MllmRT MllmCPUBackend)
target_include_directories(mllm-deepseek-ocr-runner PRIVATE ${MLLM_INCLUDE_DIR})
21 changes: 21 additions & 0 deletions examples/deepseek_ocr/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include <mllm/mllm.hpp>
#include "mllm/models/deepseek_ocr/modeling_deepseek_ocr.hpp"
#include "mllm/models/deepseek_ocr/tokenization_deepseek_ocr.hpp"

using mllm::Argparse;

MLLM_MAIN({
// auto config = mllm::models::deepseek_ocr::DpskOcrConfig("/Volumes/D/mllm-models/DeepSeek-OCR-w32a32/config.json");
// auto model = mllm::models::deepseek_ocr::DeepseekOCRForCausalLM(config);
// auto tokenizer = mllm::models::deepseek_ocr::DpskOcrTokenizer("/Volumes/D/mllm-models/DeepSeek-OCR-w32a32/tokenizer.json");
// model.load(mllm::load("/Volumes/D/mllm-models/DeepSeek-OCR-w32a32/model.mllm", mllm::ModelFileVersion::kV2));
mllm::setLogLevel(mllm::LogLevel::kError);
auto config = mllm::models::deepseek_ocr::DpskOcrConfig("/Volumes/D/mllm-models/DeepSeek-OCR-w4a8-i8mm-kai/config.json");
auto model = mllm::models::deepseek_ocr::DeepseekOCRForCausalLM(config);
auto tokenizer =
mllm::models::deepseek_ocr::DpskOcrTokenizer("/Volumes/D/mllm-models/DeepSeek-OCR-w4a8-i8mm-kai/tokenizer.json");
model.load(mllm::load("/Volumes/D/mllm-models/DeepSeek-OCR-w4a8-i8mm-kai/model.mllm", mllm::ModelFileVersion::kV2));

model.infer(tokenizer, "<image>\n<|grounding|>Convert the document to markdown. ", "/Volumes/D/mllm/.tmp/dpsk-ocr-pr.png",
"/Volumes/D/mllm/.tmp/dpsk-ocr");
});
Loading