From a26494318b3b76fa54c98769f13f57a56dd8da84 Mon Sep 17 00:00:00 2001 From: Anthony Drendel Date: Wed, 11 Mar 2026 12:19:12 +0100 Subject: [PATCH 1/6] Add Nemotron tool integration test --- .../ToolCallIntegrationTests.swift | 158 +++++++++++++++--- 1 file changed, 136 insertions(+), 22 deletions(-) diff --git a/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift b/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift index 3f40d988..576d0489 100644 --- a/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift +++ b/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift @@ -23,12 +23,14 @@ public class ToolCallIntegrationTests: XCTestCase { static let lfm2ModelId = "mlx-community/LFM2-2.6B-Exp-4bit" static let glm4ModelId = "mlx-community/GLM-4-9B-0414-4bit" static let mistral3ModelId = "mlx-community/Ministral-3-3B-Instruct-2512-4bit" + static let nemotronModelId = "mlx-community/NVIDIA-Nemotron-3-Nano-30B-A3B-4bit" // MARK: - Shared State nonisolated(unsafe) static var lfm2Container: ModelContainer? nonisolated(unsafe) static var glm4Container: ModelContainer? nonisolated(unsafe) static var mistral3Container: ModelContainer? + nonisolated(unsafe) static var nemotronContainer: ModelContainer? // MARK: - Tool Schema @@ -65,42 +67,31 @@ public class ToolCallIntegrationTests: XCTestCase { let lfm2Expectation = XCTestExpectation(description: "Load LFM2") let glm4Expectation = XCTestExpectation(description: "Load GLM4") let mistral3Expectation = XCTestExpectation(description: "Load Mistral3") + let nemotronExpectation = XCTestExpectation(description: "Load Nemotron") Task { - do { - lfm2Container = try await LLMModelFactory.shared.loadContainer( - configuration: .init(id: lfm2ModelId) - ) - } catch { - print("Failed to load LFM2: \(error)") - } + lfm2Container = await loadModelContainer(modelId: lfm2ModelId) lfm2Expectation.fulfill() } Task { - do { - glm4Container = try await LLMModelFactory.shared.loadContainer( - configuration: .init(id: glm4ModelId) - ) - } catch { - print("Failed to load GLM4: \(error)") - } + glm4Container = await loadModelContainer(modelId: glm4ModelId) glm4Expectation.fulfill() } Task { - do { - mistral3Container = try await VLMModelFactory.shared.loadContainer( - configuration: .init(id: mistral3ModelId) - ) - } catch { - print("Failed to load Mistral3: \(error)") - } + mistral3Container = await loadModelContainer(modelId: mistral3ModelId) mistral3Expectation.fulfill() } + Task { + nemotronContainer = await loadModelContainer(modelId: nemotronModelId) + nemotronExpectation.fulfill() + } + _ = XCTWaiter.wait( - for: [lfm2Expectation, glm4Expectation, mistral3Expectation], timeout: 600) + for: [lfm2Expectation, glm4Expectation, mistral3Expectation, nemotronExpectation], + timeout: 600) } // MARK: - LFM2 Tests @@ -325,8 +316,131 @@ public class ToolCallIntegrationTests: XCTestCase { } } + // MARK: - Nemotron Tests + + func testNemotronToolCallFormatAutoDetection() async throws { + guard let container = Self.nemotronContainer else { + throw XCTSkip("Nemotron model not available") + } + + let config = await container.configuration + XCTAssertEqual( + config.toolCallFormat, .xmlFunction, + "Nemotron model should auto-detect .xmlFunction tool call format" + ) + } + + func testNemotronEndToEndToolCallGeneration() async throws { + guard let container = Self.nemotronContainer else { + throw XCTSkip("Nemotron model not available") + } + + let input = UserInput( + chat: [ + .system( + "You are a helpful assistant with access to tools. When asked about weather, use the get_weather function." + ), + .user("What's the weather in Tokyo?"), + ], + tools: Self.weatherToolSchema, + additionalContext: ["enable_thinking": false] + ) + + let (result, toolCalls) = try await generateWithTools( + container: container, + input: input, + maxTokens: 150 + ) + + print("Nemotron Output: \(result)") + print("Nemotron Tool Calls: \(toolCalls)") + + if !toolCalls.isEmpty { + let toolCall = toolCalls.first! + XCTAssertEqual(toolCall.function.name, "get_weather") + if let location = toolCall.function.arguments["location"]?.asString { + XCTAssertTrue( + location.lowercased().contains("tokyo"), + "Expected location to contain 'Tokyo', got: \(location)" + ) + } + } + } + + func testNemotronMultipleToolCallGeneration() async throws { + guard let container = Self.nemotronContainer else { + throw XCTSkip("Nemotron model not available") + } + + let multiToolSchema: [[String: any Sendable]] = + Self.weatherToolSchema + [ + [ + "type": "function", + "function": [ + "name": "get_time", + "description": "Get the current time in a given timezone", + "parameters": [ + "type": "object", + "properties": [ + "timezone": [ + "type": "string", + "description": + "The timezone, e.g. America/New_York, Asia/Tokyo", + ] as [String: any Sendable] + ] as [String: any Sendable], + "required": ["timezone"], + ] as [String: any Sendable], + ] as [String: any Sendable], + ] + ] + + let input = UserInput( + chat: [ + .system( + "You are a helpful assistant with access to tools. Always use the available tools to answer questions. Call multiple tools in parallel when needed." + ), + .user( + "What's the weather in Tokyo and what time is it there?" + ), + ], + tools: multiToolSchema + ) + + let (result, toolCalls) = try await generateWithTools( + container: container, + input: input, + maxTokens: 600 + ) + + print("Nemotron Output: \(result)") + print("Nemotron Calls: \(toolCalls)") + + let validNames: Set = ["get_weather", "get_time"] + for toolCall in toolCalls { + XCTAssertTrue( + validNames.contains(toolCall.function.name), + "Unexpected tool call: \(toolCall.function.name)" + ) + } + + if toolCalls.count > 1 { + print("Successfully parsed \(toolCalls.count) tool calls from Nemotron") + } + } + // MARK: - Helper Methods + private static func loadModelContainer(modelId: String) async -> ModelContainer? { + do { + return try await LLMModelFactory.shared.loadContainer( + configuration: .init(id: modelId) + ) + } catch { + print("Failed to load model \(modelId): \(error)") + return nil + } + } + /// Generate text and collect any tool calls private func generateWithTools( container: ModelContainer, From 96cde538cb8829354dab86474ce5743813c50369 Mon Sep 17 00:00:00 2001 From: Anthony Drendel Date: Wed, 11 Mar 2026 12:35:24 +0100 Subject: [PATCH 2/6] Use IntegrationTestModels inside of ToolCallIntegrationTests --- .../IntegrationTestModels.swift | 68 +++++++++ .../ToolCallIntegrationTests.swift | 131 ++++++------------ 2 files changed, 110 insertions(+), 89 deletions(-) diff --git a/Tests/MLXLMIntegrationTests/IntegrationTestModels.swift b/Tests/MLXLMIntegrationTests/IntegrationTestModels.swift index fbd84d16..d91e3117 100644 --- a/Tests/MLXLMIntegrationTests/IntegrationTestModels.swift +++ b/Tests/MLXLMIntegrationTests/IntegrationTestModels.swift @@ -8,14 +8,26 @@ import MLXVLM enum IntegrationTestModelIDs { static let llmModelId = "mlx-community/Qwen3-4B-Instruct-2507-4bit" static let vlmModelId = "mlx-community/Qwen3-VL-4B-Instruct-4bit" + + static let lfm2ModelId = "mlx-community/LFM2-2.6B-Exp-4bit" + static let glm4ModelId = "mlx-community/GLM-4-9B-0414-4bit" + static let mistral3ModelId = "mlx-community/Ministral-3-3B-Instruct-2512-4bit" + static let nemotronModelId = "mlx-community/NVIDIA-Nemotron-3-Nano-30B-A3B-4bit" } actor IntegrationTestModels { static let shared = IntegrationTestModels() + private init() {} + private var llmTask: Task? private var vlmTask: Task? + private var lfm2Task: Task? + private var glm4Task: Task? + private var mistral3Task: Task? + private var nemotronTask: Task? + func llmContainer() async throws -> ModelContainer { if let task = llmTask { return try await task.value @@ -43,4 +55,60 @@ actor IntegrationTestModels { vlmTask = task return try await task.value } + + func lfm2Container() async throws -> ModelContainer { + if let task = lfm2Task { + return try await task.value + } + + let task = Task { + try await LLMModelFactory.shared.loadContainer( + configuration: .init(id: IntegrationTestModelIDs.lfm2ModelId) + ) + } + lfm2Task = task + return try await task.value + } + + func glm4Container() async throws -> ModelContainer { + if let task = glm4Task { + return try await task.value + } + + let task = Task { + try await LLMModelFactory.shared.loadContainer( + configuration: .init(id: IntegrationTestModelIDs.glm4ModelId) + ) + } + glm4Task = task + return try await task.value + } + + func mistral3Container() async throws -> ModelContainer { + if let task = mistral3Task { + return try await task.value + } + + let task = Task { + try await LLMModelFactory.shared.loadContainer( + configuration: .init(id: IntegrationTestModelIDs.mistral3ModelId) + ) + } + mistral3Task = task + return try await task.value + } + + func nemotronContainer() async throws -> ModelContainer { + if let task = nemotronTask { + return try await task.value + } + + let task = Task { + try await LLMModelFactory.shared.loadContainer( + configuration: .init(id: IntegrationTestModelIDs.nemotronModelId) + ) + } + nemotronTask = task + return try await task.value + } } diff --git a/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift b/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift index 576d0489..81f4223c 100644 --- a/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift +++ b/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift @@ -17,21 +17,6 @@ import XCTest /// - LFM2: https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/tool_parsers/default.py /// - GLM4: https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/tool_parsers/glm47.py public class ToolCallIntegrationTests: XCTestCase { - - // MARK: - Model IDs - - static let lfm2ModelId = "mlx-community/LFM2-2.6B-Exp-4bit" - static let glm4ModelId = "mlx-community/GLM-4-9B-0414-4bit" - static let mistral3ModelId = "mlx-community/Ministral-3-3B-Instruct-2512-4bit" - static let nemotronModelId = "mlx-community/NVIDIA-Nemotron-3-Nano-30B-A3B-4bit" - - // MARK: - Shared State - - nonisolated(unsafe) static var lfm2Container: ModelContainer? - nonisolated(unsafe) static var glm4Container: ModelContainer? - nonisolated(unsafe) static var mistral3Container: ModelContainer? - nonisolated(unsafe) static var nemotronContainer: ModelContainer? - // MARK: - Tool Schema static let weatherToolSchema: [[String: any Sendable]] = [ @@ -59,49 +44,52 @@ public class ToolCallIntegrationTests: XCTestCase { ] ] - // MARK: - Setup + // MARK: - Model Loading - override public class func setUp() { - super.setUp() - - let lfm2Expectation = XCTestExpectation(description: "Load LFM2") - let glm4Expectation = XCTestExpectation(description: "Load GLM4") - let mistral3Expectation = XCTestExpectation(description: "Load Mistral3") - let nemotronExpectation = XCTestExpectation(description: "Load Nemotron") - - Task { - lfm2Container = await loadModelContainer(modelId: lfm2ModelId) - lfm2Expectation.fulfill() + private var lfm2Container: ModelContainer { + get async throws { + do { + return try await IntegrationTestModels.shared.lfm2Container() + } catch { + throw XCTSkip("LFM2 model not available: \(error)") + } } + } - Task { - glm4Container = await loadModelContainer(modelId: glm4ModelId) - glm4Expectation.fulfill() + private var glm4Container: ModelContainer { + get async throws { + do { + return try await IntegrationTestModels.shared.glm4Container() + } catch { + throw XCTSkip("GLM4 model not available: \(error)") + } } + } - Task { - mistral3Container = await loadModelContainer(modelId: mistral3ModelId) - mistral3Expectation.fulfill() + private var mistral3Container: ModelContainer { + get async throws { + do { + return try await IntegrationTestModels.shared.mistral3Container() + } catch { + throw XCTSkip("Mistral3 model not available: \(error)") + } } + } - Task { - nemotronContainer = await loadModelContainer(modelId: nemotronModelId) - nemotronExpectation.fulfill() + private var nemotronContainer: ModelContainer { + get async throws { + do { + return try await IntegrationTestModels.shared.nemotronContainer() + } catch { + throw XCTSkip("Nemotron model not available: \(error)") + } } - - _ = XCTWaiter.wait( - for: [lfm2Expectation, glm4Expectation, mistral3Expectation, nemotronExpectation], - timeout: 600) } // MARK: - LFM2 Tests func testLFM2ToolCallFormatAutoDetection() async throws { - guard let container = Self.lfm2Container else { - throw XCTSkip("LFM2 model not available") - } - - let config = await container.configuration + let config = try await lfm2Container.configuration XCTAssertEqual( config.toolCallFormat, .lfm2, "LFM2 model should auto-detect .lfm2 tool call format" @@ -109,9 +97,7 @@ public class ToolCallIntegrationTests: XCTestCase { } func testLFM2EndToEndToolCallGeneration() async throws { - guard let container = Self.lfm2Container else { - throw XCTSkip("LFM2 model not available") - } + let container = try await lfm2Container // Create input with tool schema let input = UserInput( @@ -151,11 +137,7 @@ public class ToolCallIntegrationTests: XCTestCase { // MARK: - GLM4 Tests func testGLM4ToolCallFormatAutoDetection() async throws { - guard let container = Self.glm4Container else { - throw XCTSkip("GLM4 model not available") - } - - let config = await container.configuration + let config = try await glm4Container.configuration XCTAssertEqual( config.toolCallFormat, .glm4, "GLM4 model should auto-detect .glm4 tool call format" @@ -163,9 +145,7 @@ public class ToolCallIntegrationTests: XCTestCase { } func testGLM4EndToEndToolCallGeneration() async throws { - guard let container = Self.glm4Container else { - throw XCTSkip("GLM4 model not available") - } + let container = try await glm4Container // Create input with tool schema let input = UserInput( @@ -205,11 +185,7 @@ public class ToolCallIntegrationTests: XCTestCase { // MARK: - Mistral3 Tests func testMistral3ToolCallFormatAutoDetection() async throws { - guard let container = Self.mistral3Container else { - throw XCTSkip("Mistral3 model not available") - } - - let config = await container.configuration + let config = try await mistral3Container.configuration XCTAssertEqual( config.toolCallFormat, .mistral, "Mistral3 model should auto-detect .mistral tool call format" @@ -217,9 +193,7 @@ public class ToolCallIntegrationTests: XCTestCase { } func testMistral3EndToEndToolCallGeneration() async throws { - guard let container = Self.mistral3Container else { - throw XCTSkip("Mistral3 model not available") - } + let container = try await mistral3Container let input = UserInput( chat: [ @@ -254,9 +228,7 @@ public class ToolCallIntegrationTests: XCTestCase { } func testMistral3MultipleToolCallGeneration() async throws { - guard let container = Self.mistral3Container else { - throw XCTSkip("Mistral3 model not available") - } + let container = try await mistral3Container let multiToolSchema: [[String: any Sendable]] = Self.weatherToolSchema + [ @@ -319,11 +291,7 @@ public class ToolCallIntegrationTests: XCTestCase { // MARK: - Nemotron Tests func testNemotronToolCallFormatAutoDetection() async throws { - guard let container = Self.nemotronContainer else { - throw XCTSkip("Nemotron model not available") - } - - let config = await container.configuration + let config = try await nemotronContainer.configuration XCTAssertEqual( config.toolCallFormat, .xmlFunction, "Nemotron model should auto-detect .xmlFunction tool call format" @@ -331,9 +299,7 @@ public class ToolCallIntegrationTests: XCTestCase { } func testNemotronEndToEndToolCallGeneration() async throws { - guard let container = Self.nemotronContainer else { - throw XCTSkip("Nemotron model not available") - } + let container = try await nemotronContainer let input = UserInput( chat: [ @@ -368,9 +334,7 @@ public class ToolCallIntegrationTests: XCTestCase { } func testNemotronMultipleToolCallGeneration() async throws { - guard let container = Self.nemotronContainer else { - throw XCTSkip("Nemotron model not available") - } + let container = try await nemotronContainer let multiToolSchema: [[String: any Sendable]] = Self.weatherToolSchema + [ @@ -430,17 +394,6 @@ public class ToolCallIntegrationTests: XCTestCase { // MARK: - Helper Methods - private static func loadModelContainer(modelId: String) async -> ModelContainer? { - do { - return try await LLMModelFactory.shared.loadContainer( - configuration: .init(id: modelId) - ) - } catch { - print("Failed to load model \(modelId): \(error)") - return nil - } - } - /// Generate text and collect any tool calls private func generateWithTools( container: ModelContainer, From d6bd700c0f169a8461b248a09dcb74ceff15bf0a Mon Sep 17 00:00:00 2001 From: Anthony Drendel Date: Wed, 11 Mar 2026 12:40:15 +0100 Subject: [PATCH 3/6] Skip Nemotron tests in ToolCallIntegrationTests by default --- Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift b/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift index 81f4223c..cc6b8ccd 100644 --- a/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift +++ b/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift @@ -17,6 +17,7 @@ import XCTest /// - LFM2: https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/tool_parsers/default.py /// - GLM4: https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/tool_parsers/glm47.py public class ToolCallIntegrationTests: XCTestCase { + // MARK: - Tool Schema static let weatherToolSchema: [[String: any Sendable]] = [ @@ -78,6 +79,8 @@ public class ToolCallIntegrationTests: XCTestCase { private var nemotronContainer: ModelContainer { get async throws { + try XCTSkipIf(true, "Nemotron model is opt-in only because of its size") + do { return try await IntegrationTestModels.shared.nemotronContainer() } catch { From 487cec62f5f7455e40c939905a224724d111888c Mon Sep 17 00:00:00 2001 From: Anthony Drendel Date: Wed, 11 Mar 2026 13:11:24 +0100 Subject: [PATCH 4/6] Add Qwen3.5 tool call integration tests --- .../IntegrationTestModels.swift | 16 +++ .../ToolCallIntegrationTests.swift | 114 ++++++++++++++++++ 2 files changed, 130 insertions(+) diff --git a/Tests/MLXLMIntegrationTests/IntegrationTestModels.swift b/Tests/MLXLMIntegrationTests/IntegrationTestModels.swift index d91e3117..7aa32e79 100644 --- a/Tests/MLXLMIntegrationTests/IntegrationTestModels.swift +++ b/Tests/MLXLMIntegrationTests/IntegrationTestModels.swift @@ -13,6 +13,7 @@ enum IntegrationTestModelIDs { static let glm4ModelId = "mlx-community/GLM-4-9B-0414-4bit" static let mistral3ModelId = "mlx-community/Ministral-3-3B-Instruct-2512-4bit" static let nemotronModelId = "mlx-community/NVIDIA-Nemotron-3-Nano-30B-A3B-4bit" + static let qwen35ModelId = "mlx-community/Qwen3.5-2B-4bit" } actor IntegrationTestModels { @@ -27,6 +28,7 @@ actor IntegrationTestModels { private var glm4Task: Task? private var mistral3Task: Task? private var nemotronTask: Task? + private var qwen35Task: Task? func llmContainer() async throws -> ModelContainer { if let task = llmTask { @@ -111,4 +113,18 @@ actor IntegrationTestModels { nemotronTask = task return try await task.value } + + func qwen35Container() async throws -> ModelContainer { + if let task = qwen35Task { + return try await task.value + } + + let task = Task { + try await LLMModelFactory.shared.loadContainer( + configuration: .init(id: IntegrationTestModelIDs.qwen35ModelId) + ) + } + qwen35Task = task + return try await task.value + } } diff --git a/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift b/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift index cc6b8ccd..b0d47bec 100644 --- a/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift +++ b/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift @@ -89,6 +89,16 @@ public class ToolCallIntegrationTests: XCTestCase { } } + private var qwen35Container: ModelContainer { + get async throws { + do { + return try await IntegrationTestModels.shared.qwen35Container() + } catch { + throw XCTSkip("Qwen3.5 model not available: \(error)") + } + } + } + // MARK: - LFM2 Tests func testLFM2ToolCallFormatAutoDetection() async throws { @@ -395,6 +405,110 @@ public class ToolCallIntegrationTests: XCTestCase { } } + // MARK: - Qwen3.5 Tests + + func testQwen35ToolCallFormatAutoDetection() async throws { + let config = try await qwen35Container.configuration + XCTAssertEqual( + config.toolCallFormat, .xmlFunction, + "Qwen3.5 model should auto-detect .xmlFunction tool call format" + ) + } + + func testQwen35EndToEndToolCallGeneration() async throws { + let container = try await qwen35Container + + let input = UserInput( + chat: [ + .system( + "You are a helpful assistant with access to tools. When asked about weather, use the get_weather function." + ), + .user("What's the weather in Tokyo?"), + ], + tools: Self.weatherToolSchema + ) + + let (result, toolCalls) = try await generateWithTools( + container: container, + input: input, + maxTokens: 150 + ) + + print("Qwen3.5 Output: \(result)") + print("Qwen3.5 Tool Calls: \(toolCalls)") + + if !toolCalls.isEmpty { + let toolCall = toolCalls.first! + XCTAssertEqual(toolCall.function.name, "get_weather") + if let location = toolCall.function.arguments["location"]?.asString { + XCTAssertTrue( + location.lowercased().contains("tokyo"), + "Expected location to contain 'Tokyo', got: \(location)" + ) + } + } + } + + func testQwen35MultipleToolCallGeneration() async throws { + let container = try await qwen35Container + + let multiToolSchema: [[String: any Sendable]] = + Self.weatherToolSchema + [ + [ + "type": "function", + "function": [ + "name": "get_time", + "description": "Get the current time in a given timezone", + "parameters": [ + "type": "object", + "properties": [ + "timezone": [ + "type": "string", + "description": + "The timezone, e.g. America/New_York, Asia/Tokyo", + ] as [String: any Sendable] + ] as [String: any Sendable], + "required": ["timezone"], + ] as [String: any Sendable], + ] as [String: any Sendable], + ] + ] + + let input = UserInput( + chat: [ + .system( + "You are a helpful assistant with access to tools. Always use the available tools to answer questions. Call multiple tools in parallel when needed." + ), + .user( + "What's the weather in Tokyo and what time is it there?" + ), + ], + tools: multiToolSchema, + additionalContext: ["enable_thinking": true] + ) + + let (result, toolCalls) = try await generateWithTools( + container: container, + input: input, + maxTokens: 300 + ) + + print("Qwen3.5 Output: \(result)") + print("Qwen3.5 Calls: \(toolCalls)") + + let validNames: Set = ["get_weather", "get_time"] + for toolCall in toolCalls { + XCTAssertTrue( + validNames.contains(toolCall.function.name), + "Unexpected tool call: \(toolCall.function.name)" + ) + } + + if toolCalls.count > 1 { + print("Successfully parsed \(toolCalls.count) tool calls from Qwen3.5") + } + } + // MARK: - Helper Methods /// Generate text and collect any tool calls From 877cf575c3aaed448068201b5bda9dd02951a2fe Mon Sep 17 00:00:00 2001 From: Anthony Drendel Date: Wed, 11 Mar 2026 13:34:28 +0100 Subject: [PATCH 5/6] Disable Nemotron thinking because it uses way too many tokens to think --- Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift b/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift index b0d47bec..4c143f57 100644 --- a/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift +++ b/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift @@ -380,7 +380,8 @@ public class ToolCallIntegrationTests: XCTestCase { "What's the weather in Tokyo and what time is it there?" ), ], - tools: multiToolSchema + tools: multiToolSchema, + additionalContext: ["enable_thinking": false] ) let (result, toolCalls) = try await generateWithTools( From a0bdee5e9ec52d8160ea4c0ee4e08cd7790e3caf Mon Sep 17 00:00:00 2001 From: Anthony Drendel Date: Mon, 23 Mar 2026 00:00:07 +0100 Subject: [PATCH 6/6] Remove XCTSkipIf() --- Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift | 2 -- 1 file changed, 2 deletions(-) diff --git a/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift b/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift index 4c143f57..4d04b6f8 100644 --- a/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift +++ b/Tests/MLXLMIntegrationTests/ToolCallIntegrationTests.swift @@ -79,8 +79,6 @@ public class ToolCallIntegrationTests: XCTestCase { private var nemotronContainer: ModelContainer { get async throws { - try XCTSkipIf(true, "Nemotron model is opt-in only because of its size") - do { return try await IntegrationTestModels.shared.nemotronContainer() } catch {