-
Notifications
You must be signed in to change notification settings - Fork 68
Open
Description
can't run HuggingFaceTB/SmolVLM-256M-Instruct or llava-hf/llava-onevision-qwen2-0.5b-ov-hf
they support the offical's onnx models, and it will cause the IR Version is 10 is not supports.
So I use the code to downgrade the IR Version to 9
import os
from pathlib import Path
import onnx
# 默认路径配置
DEFAULT_INPUT_DIR = "assets/models/llava"
DEFAULT_OUTPUT_DIR = "assets/models/llava-v9"
def downgrade_onnx_model(input_path: str, output_path: str = None):
"""
将 ONNX 模型降级到 IR version 9
Args:
input_path: 输入模型路径
output_path: 输出模型路径,如果不指定则在原文件名后添加 _v9 后缀
"""
print(f"正在加载模型: {input_path}")
model = onnx.load(input_path)
# 获取当前 IR 版本
current_ir_version = model.ir_version
print(f"当前 IR 版本: {current_ir_version}")
if current_ir_version <= 9:
print("模型已经是 IR version 9 或更低版本,无需降级")
return
# 设置新的 IR 版本
model.ir_version = 9
print(f"已将 IR 版本降级到: {model.ir_version}")
# 如果没有指定输出路径,则在原文件名后添加 _v9 后缀
if output_path is None:
input_path = Path(input_path)
output_path = str(
input_path.parent / f"{input_path.stem}_v9{input_path.suffix}"
)
# 保存降级后的模型
print(f"正在保存降级后的模型到: {output_path}")
onnx.save(model, output_path)
# 验证保存的模型
try:
onnx.checker.check_model(onnx.load(output_path))
print("模型验证通过!")
except Exception as e:
print(f"模型验证失败: {e}")
return
print(
f"模型降级完成!新模型大小: {os.path.getsize(output_path) / (1024 * 1024):.2f} MB"
)
def process_directory(
input_dir: str = DEFAULT_INPUT_DIR, output_dir: str = DEFAULT_OUTPUT_DIR
):
"""
处理目录中的所有 ONNX 模型
Args:
input_dir: 输入目录路径
output_dir: 输出目录路径
"""
# 确保输出目录存在
os.makedirs(output_dir, exist_ok=True)
# 处理目录中的所有 .onnx 文件
for file in Path(input_dir).glob("*.onnx"):
output_path = os.path.join(output_dir, file.name) # 保持原文件名
print(f"\n处理文件: {file.name}")
downgrade_onnx_model(str(file), output_path)
def main():
# 使用默认路径
print(f"输入目录: {DEFAULT_INPUT_DIR}")
print(f"输出目录: {DEFAULT_OUTPUT_DIR}")
# 检查输入目录是否存在
if not os.path.exists(DEFAULT_INPUT_DIR):
print(f"错误:输入目录 {DEFAULT_INPUT_DIR} 不存在!")
return
# 处理目录
process_directory()
print("\n所有模型处理完成!")
print(f"降级后的模型已保存到: {DEFAULT_OUTPUT_DIR}")
if __name__ == "__main__":
main()and then I use the dart code to use this
But failed when loading
OrtEnv.instance.init();
print('ONNX Runtime environment initialized');
final model_path = 'assets/models/llava-v9';
final visionModelBytes = await rootBundle.load('$model_path/vision_encoder_bnb4.onnx');
final embedModelBytes = await rootBundle.load('$model_path/embed_tokens_bnb4.onnx');
print('Embed model loaded, size: ${embedModelBytes.lengthInBytes} bytes');
final decoderModelBytes = await rootBundle.load('$model_path/decoder_model_merged_bnb4.onnx');
final sessionOptions = OrtSessionOptions();
try {
_visionSession = OrtSession.fromBuffer(
visionModelBytes.buffer.asUint8List(),
sessionOptions,
); # this line will cause error: com.microsoft:MatMulBnb4(-1) is not a registered function/opReactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels