Skip to content

com.microsoft:MatMulBnb4(-1) is not a registered function/op #45

@creeponsky

Description

@creeponsky

can't run HuggingFaceTB/SmolVLM-256M-Instruct or llava-hf/llava-onevision-qwen2-0.5b-ov-hf
they support the offical's onnx models, and it will cause the IR Version is 10 is not supports.
So I use the code to downgrade the IR Version to 9

import os
from pathlib import Path

import onnx

# 默认路径配置
DEFAULT_INPUT_DIR = "assets/models/llava"
DEFAULT_OUTPUT_DIR = "assets/models/llava-v9"


def downgrade_onnx_model(input_path: str, output_path: str = None):
    """
    将 ONNX 模型降级到 IR version 9

    Args:
        input_path: 输入模型路径
        output_path: 输出模型路径,如果不指定则在原文件名后添加 _v9 后缀
    """
    print(f"正在加载模型: {input_path}")
    model = onnx.load(input_path)

    # 获取当前 IR 版本
    current_ir_version = model.ir_version
    print(f"当前 IR 版本: {current_ir_version}")

    if current_ir_version <= 9:
        print("模型已经是 IR version 9 或更低版本,无需降级")
        return

    # 设置新的 IR 版本
    model.ir_version = 9
    print(f"已将 IR 版本降级到: {model.ir_version}")

    # 如果没有指定输出路径,则在原文件名后添加 _v9 后缀
    if output_path is None:
        input_path = Path(input_path)
        output_path = str(
            input_path.parent / f"{input_path.stem}_v9{input_path.suffix}"
        )

    # 保存降级后的模型
    print(f"正在保存降级后的模型到: {output_path}")
    onnx.save(model, output_path)

    # 验证保存的模型
    try:
        onnx.checker.check_model(onnx.load(output_path))
        print("模型验证通过!")
    except Exception as e:
        print(f"模型验证失败: {e}")
        return

    print(
        f"模型降级完成!新模型大小: {os.path.getsize(output_path) / (1024 * 1024):.2f} MB"
    )


def process_directory(
    input_dir: str = DEFAULT_INPUT_DIR, output_dir: str = DEFAULT_OUTPUT_DIR
):
    """
    处理目录中的所有 ONNX 模型

    Args:
        input_dir: 输入目录路径
        output_dir: 输出目录路径
    """
    # 确保输出目录存在
    os.makedirs(output_dir, exist_ok=True)

    # 处理目录中的所有 .onnx 文件
    for file in Path(input_dir).glob("*.onnx"):
        output_path = os.path.join(output_dir, file.name)  # 保持原文件名
        print(f"\n处理文件: {file.name}")
        downgrade_onnx_model(str(file), output_path)


def main():
    # 使用默认路径
    print(f"输入目录: {DEFAULT_INPUT_DIR}")
    print(f"输出目录: {DEFAULT_OUTPUT_DIR}")

    # 检查输入目录是否存在
    if not os.path.exists(DEFAULT_INPUT_DIR):
        print(f"错误:输入目录 {DEFAULT_INPUT_DIR} 不存在!")
        return

    # 处理目录
    process_directory()

    print("\n所有模型处理完成!")
    print(f"降级后的模型已保存到: {DEFAULT_OUTPUT_DIR}")


if __name__ == "__main__":
    main()

and then I use the dart code to use this
But failed when loading

      OrtEnv.instance.init();
      print('ONNX Runtime environment initialized');
      final model_path = 'assets/models/llava-v9';
      final visionModelBytes = await rootBundle.load('$model_path/vision_encoder_bnb4.onnx');
      final embedModelBytes = await rootBundle.load('$model_path/embed_tokens_bnb4.onnx');
      print('Embed model loaded, size: ${embedModelBytes.lengthInBytes} bytes');
      final decoderModelBytes = await rootBundle.load('$model_path/decoder_model_merged_bnb4.onnx');
      final sessionOptions = OrtSessionOptions();
      

      try {
        _visionSession = OrtSession.fromBuffer(
          visionModelBytes.buffer.asUint8List(),
          sessionOptions,
        ); # this line will cause error: com.microsoft:MatMulBnb4(-1) is not a registered function/op

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions