Skip to content

Commit f171982

Browse files
authored
Add custom JSON mode generation option for Gemini language model (#80)
* Add custom JSON mode generation option for Gemini language model * Incorporate feedback from review
1 parent 3565c1e commit f171982

File tree

2 files changed

+94
-4
lines changed

2 files changed

+94
-4
lines changed

Sources/AnyLanguageModel/Models/GeminiLanguageModel.swift

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,17 +88,47 @@ public struct GeminiLanguageModel: LanguageModel {
8888
/// (URL context), code execution, and location services (Google Maps).
8989
public var serverTools: [ServerTool]?
9090

91+
/// Configures JSON mode for structured output.
92+
///
93+
/// Use this type to enable JSON mode,
94+
/// which constrains the model to output a valid JSON.
95+
/// Optionally provide a schema for typed JSON output.
96+
public enum JSONMode: Sendable, Hashable, ExpressibleByBooleanLiteral {
97+
/// JSON mode is disabled (default text output).
98+
case disabled
99+
100+
/// JSON mode is enabled without a schema constraint.
101+
case enabled
102+
103+
/// JSON mode is enabled with a schema constraint for typed output.
104+
case schema(JSONSchema)
105+
106+
public init(booleanLiteral value: Bool) {
107+
self = value ? .enabled : .disabled
108+
}
109+
}
110+
111+
/// The JSON mode configuration for structured output.
112+
///
113+
/// When set to `.enabled`, the model will output valid JSON.
114+
/// When set to `.schema(_:)`, the model will output JSON
115+
/// conforming to the provided schema.
116+
public var jsonMode: JSONMode?
117+
91118
/// Creates custom generation options for Gemini models.
92119
///
93120
/// - Parameters:
94121
/// - thinking: The thinking mode configuration. When `nil`, uses the model's default.
95122
/// - serverTools: Server-side tools to enable. When `nil`, uses the model's default.
123+
/// - jsonMode: The JSON mode configuration. When `nil`, uses the model's default.
96124
public init(
97125
thinking: Thinking? = nil,
98-
serverTools: [ServerTool]? = nil
126+
serverTools: [ServerTool]? = nil,
127+
jsonMode: JSONMode? = nil
99128
) {
100129
self.thinking = thinking
101130
self.serverTools = serverTools
131+
self.jsonMode = jsonMode
102132
}
103133
}
104134

@@ -240,6 +270,7 @@ public struct GeminiLanguageModel: LanguageModel {
240270
let customOptions = options[custom: GeminiLanguageModel.self]
241271
let effectiveThinking = customOptions?.thinking ?? _thinking
242272
let effectiveServerTools = customOptions?.serverTools ?? _serverTools
273+
let effectiveJsonMode = customOptions?.jsonMode
243274

244275
let url =
245276
baseURL
@@ -262,7 +293,8 @@ public struct GeminiLanguageModel: LanguageModel {
262293
contents: contents,
263294
tools: geminiTools,
264295
options: options,
265-
thinking: effectiveThinking
296+
thinking: effectiveThinking,
297+
jsonMode: effectiveJsonMode
266298
)
267299

268300
let body = try JSONEncoder().encode(params)
@@ -350,6 +382,7 @@ public struct GeminiLanguageModel: LanguageModel {
350382
let customOptions = options[custom: GeminiLanguageModel.self]
351383
let effectiveThinking = customOptions?.thinking ?? _thinking
352384
let effectiveServerTools = customOptions?.serverTools ?? _serverTools
385+
let effectiveJsonMode = customOptions?.jsonMode
353386

354387
let userSegments = extractPromptSegments(from: session, fallbackText: prompt.description)
355388
let contents = [
@@ -375,7 +408,8 @@ public struct GeminiLanguageModel: LanguageModel {
375408
contents: contents,
376409
tools: geminiTools,
377410
options: options,
378-
thinking: effectiveThinking
411+
thinking: effectiveThinking,
412+
jsonMode: effectiveJsonMode
379413
)
380414

381415
let body = try JSONEncoder().encode(params)
@@ -460,7 +494,8 @@ private func createGenerateContentParams(
460494
contents: [GeminiContent],
461495
tools: [GeminiTool]?,
462496
options: GenerationOptions,
463-
thinking: GeminiLanguageModel.CustomGenerationOptions.Thinking
497+
thinking: GeminiLanguageModel.CustomGenerationOptions.Thinking,
498+
jsonMode: GeminiLanguageModel.CustomGenerationOptions.JSONMode?
464499
) throws -> [String: JSONValue] {
465500
var params: [String: JSONValue] = [
466501
"contents": try JSONValue(contents)
@@ -500,6 +535,18 @@ private func createGenerateContentParams(
500535
}
501536
generationConfig["thinkingConfig"] = .object(thinkingConfig)
502537

538+
if let jsonMode {
539+
switch jsonMode {
540+
case .disabled:
541+
break
542+
case .enabled:
543+
generationConfig["responseMimeType"] = .string("application/json")
544+
case .schema(let schema):
545+
generationConfig["responseMimeType"] = .string("application/json")
546+
generationConfig["responseSchema"] = try JSONValue(schema)
547+
}
548+
}
549+
503550
if !generationConfig.isEmpty {
504551
params["generationConfig"] = .object(generationConfig)
505552
}

Tests/AnyLanguageModelTests/GeminiLanguageModelTests.swift

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import Foundation
2+
import JSONSchema
23
import Testing
34

45
@testable import AnyLanguageModel
@@ -144,4 +145,46 @@ struct GeminiLanguageModelTests {
144145
let response = try await session.respond(to: "")
145146
#expect(!response.content.isEmpty)
146147
}
148+
149+
@Test func jsonModeEnabled() async throws {
150+
let session = LanguageModelSession(model: model)
151+
152+
var options = GenerationOptions()
153+
options[custom: GeminiLanguageModel.self] = .init(
154+
thinking: .disabled,
155+
jsonMode: true
156+
)
157+
158+
let response = try await session.respond(
159+
to: "Return a JSON object with a 'greeting' key and value 'hello'",
160+
options: options
161+
)
162+
#expect(response.content.contains("greeting"))
163+
#expect(response.content.contains("hello"))
164+
}
165+
166+
@Test func jsonModeWithSchema() async throws {
167+
let session = LanguageModelSession(model: model)
168+
169+
let schema = JSONSchema.object(
170+
properties: [
171+
"name": .string(),
172+
"age": .integer(),
173+
],
174+
required: ["name", "age"]
175+
)
176+
177+
var options = GenerationOptions()
178+
options[custom: GeminiLanguageModel.self] = .init(
179+
thinking: .disabled,
180+
jsonMode: .schema(schema)
181+
)
182+
183+
let response = try await session.respond(
184+
to: "Generate a person with name 'Alice' and age 30",
185+
options: options
186+
)
187+
#expect(response.content.contains("Alice"))
188+
#expect(response.content.contains("30"))
189+
}
147190
}

0 commit comments

Comments
 (0)