From d536384c9fa31b0863f06ef053c7085d69464fef Mon Sep 17 00:00:00 2001
From: David Irvine <david.irvine@maidsafe.net>
Date: Sun, 15 Mar 2026 17:51:40 +0000
Subject: [PATCH] Fix Qwen35 VLM crash on text-only inference (SmallVector out
 of range)

Qwen35Language.LanguageModel.callAsFunction assumes inputs is always 2D
[batch, seq], but text-only callers like WiredMemoryUtils.tune and
TokenIterator can pass 1D [seq] token arrays. This causes
getRopeIndex() and subsequent dim(1) calls to crash with
"SmallVector out of range" when accessing a non-existent dimension.

Add an ndim check at the top of callAsFunction to expand 1D inputs
to 2D before any dimension-dependent logic runs.

Fixes #148
---
 Libraries/MLXVLM/Models/Qwen35.swift | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Libraries/MLXVLM/Models/Qwen35.swift b/Libraries/MLXVLM/Models/Qwen35.swift
index 8a3c3984..d0909da4 100644
--- a/Libraries/MLXVLM/Models/Qwen35.swift
+++ b/Libraries/MLXVLM/Models/Qwen35.swift
@@ -921,6 +921,10 @@ enum Qwen35Language {
             imageGridTHW: [THW]? = nil,
             videoGridTHW: [THW]? = nil
         ) -> LMOutput {
+            // Ensure inputs is 2D [batch, seq]. Text-only callers (e.g.
+            // WiredMemoryUtils, TokenIterator) may pass 1D token arrays.
+            let inputs = inputs.ndim == 1 ? inputs.expandedDimensions(axis: 0) : inputs
+
             if pixelValues != nil {
                 precomputedPositionIds = nil
                 ropeDeltas = nil