Skip to content

Commit 131d0e9

Browse files
committed
Dump description as extra output for ollama.
1 parent d9454fd commit 131d0e9

File tree

6 files changed

+177
-96
lines changed

6 files changed

+177
-96
lines changed

src/base/json_schema.rs

Lines changed: 150 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
1-
use super::schema;
1+
use crate::utils::immutable::RefList;
2+
3+
use super::{schema, spec::FieldName};
4+
use anyhow::Result;
5+
use indexmap::IndexMap;
26
use schemars::schema::{
3-
ArrayValidation, InstanceType, Metadata, ObjectValidation, Schema, SchemaObject, SingleOrVec,
7+
ArrayValidation, InstanceType, ObjectValidation, Schema, SchemaObject, SingleOrVec,
48
};
9+
use std::fmt::Write;
510

611
pub struct ToJsonSchemaOptions {
712
/// If true, mark all fields as required.
@@ -11,18 +16,45 @@ pub struct ToJsonSchemaOptions {
1116

1217
/// If true, the JSON schema supports the `format` keyword.
1318
pub supports_format: bool,
19+
20+
/// If true, extract descriptions to a separate extra instruction.
21+
pub extract_descriptions: bool,
1422
}
1523

16-
pub struct JsonSchemaBuilder {
24+
struct JsonSchemaBuilder {
1725
options: ToJsonSchemaOptions,
26+
extra_instructions_per_field: IndexMap<String, String>,
1827
}
1928

2029
impl JsonSchemaBuilder {
21-
pub fn new(options: ToJsonSchemaOptions) -> Self {
22-
Self { options }
30+
fn new(options: ToJsonSchemaOptions) -> Self {
31+
Self {
32+
options,
33+
extra_instructions_per_field: IndexMap::new(),
34+
}
35+
}
36+
37+
fn set_description(
38+
&mut self,
39+
schema: &mut SchemaObject,
40+
description: impl ToString,
41+
field_path: RefList<'_, &'_ FieldName>,
42+
) {
43+
if self.options.extract_descriptions {
44+
let mut fields: Vec<_> = field_path.iter().map(|f| f.as_str()).collect();
45+
fields.reverse();
46+
self.extra_instructions_per_field
47+
.insert(fields.join("."), description.to_string());
48+
} else {
49+
schema.metadata.get_or_insert_default().description = Some(description.to_string());
50+
}
2351
}
2452

25-
fn for_basic_value_type(&mut self, basic_type: &schema::BasicValueType) -> SchemaObject {
53+
fn for_basic_value_type(
54+
&mut self,
55+
basic_type: &schema::BasicValueType,
56+
field_path: RefList<'_, &'_ FieldName>,
57+
) -> SchemaObject {
2658
let mut schema = SchemaObject::default();
2759
match basic_type {
2860
schema::BasicValueType::Str => {
@@ -56,48 +88,66 @@ impl JsonSchemaBuilder {
5688
max_items: Some(2),
5789
..Default::default()
5890
}));
59-
schema.metadata.get_or_insert_default().description =
60-
Some("A range, start pos (inclusive), end pos (exclusive).".to_string());
91+
self.set_description(
92+
&mut schema,
93+
"A range represented by a list of two positions, start pos (inclusive), end pos (exclusive).",
94+
field_path,
95+
);
6196
}
6297
schema::BasicValueType::Uuid => {
6398
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
6499
if self.options.supports_format {
65100
schema.format = Some("uuid".to_string());
66101
}
67-
schema.metadata.get_or_insert_default().description =
68-
Some("A UUID, e.g. 123e4567-e89b-12d3-a456-426614174000".to_string());
102+
self.set_description(
103+
&mut schema,
104+
"A UUID, e.g. 123e4567-e89b-12d3-a456-426614174000",
105+
field_path,
106+
);
69107
}
70108
schema::BasicValueType::Date => {
71109
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
72110
if self.options.supports_format {
73111
schema.format = Some("date".to_string());
74112
}
75-
schema.metadata.get_or_insert_default().title =
76-
Some("A date in YYYY-MM-DD format, e.g. 2025-03-27".to_string());
113+
self.set_description(
114+
&mut schema,
115+
"A date in YYYY-MM-DD format, e.g. 2025-03-27",
116+
field_path,
117+
);
77118
}
78119
schema::BasicValueType::Time => {
79120
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
80121
if self.options.supports_format {
81122
schema.format = Some("time".to_string());
82123
}
83-
schema.metadata.get_or_insert_default().description =
84-
Some("A time in HH:MM:SS format, e.g. 13:32:12".to_string());
124+
self.set_description(
125+
&mut schema,
126+
"A time in HH:MM:SS format, e.g. 13:32:12",
127+
field_path,
128+
);
85129
}
86130
schema::BasicValueType::LocalDateTime => {
87131
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
88132
if self.options.supports_format {
89133
schema.format = Some("date-time".to_string());
90134
}
91-
schema.metadata.get_or_insert_default().description =
92-
Some("Date time without timezone offset in YYYY-MM-DDTHH:MM:SS format, e.g. 2025-03-27T13:32:12".to_string());
135+
self.set_description(
136+
&mut schema,
137+
"Date time without timezone offset in YYYY-MM-DDTHH:MM:SS format, e.g. 2025-03-27T13:32:12",
138+
field_path,
139+
);
93140
}
94141
schema::BasicValueType::OffsetDateTime => {
95142
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
96143
if self.options.supports_format {
97144
schema.format = Some("date-time".to_string());
98145
}
99-
schema.metadata.get_or_insert_default().description =
100-
Some("Date time with timezone offset in RFC3339, e.g. 2025-03-27T13:32:12Z, 2025-03-27T07:32:12.313-06:00".to_string());
146+
self.set_description(
147+
&mut schema,
148+
"Date time with timezone offset in RFC3339, e.g. 2025-03-27T13:32:12Z, 2025-03-27T07:32:12.313-06:00",
149+
field_path,
150+
);
101151
}
102152
schema::BasicValueType::Json => {
103153
// Can be any value. No type constraint.
@@ -106,7 +156,8 @@ impl JsonSchemaBuilder {
106156
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::Array)));
107157
schema.array = Some(Box::new(ArrayValidation {
108158
items: Some(SingleOrVec::Single(Box::new(
109-
self.for_basic_value_type(&s.element_type).into(),
159+
self.for_basic_value_type(&s.element_type, field_path)
160+
.into(),
110161
))),
111162
min_items: s.dimension.and_then(|d| u32::try_from(d).ok()),
112163
max_items: s.dimension.and_then(|d| u32::try_from(d).ok()),
@@ -117,54 +168,61 @@ impl JsonSchemaBuilder {
117168
schema
118169
}
119170

120-
fn for_struct_schema(&mut self, struct_schema: &schema::StructSchema) -> SchemaObject {
121-
SchemaObject {
122-
metadata: Some(Box::new(Metadata {
123-
description: struct_schema.description.as_ref().map(|s| s.to_string()),
124-
..Default::default()
125-
})),
126-
instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::Object))),
127-
object: Some(Box::new(ObjectValidation {
128-
properties: struct_schema
129-
.fields
130-
.iter()
131-
.map(|f| {
132-
let mut schema = self.for_enriched_value_type(&f.value_type);
133-
if self.options.fields_always_required && f.value_type.nullable {
134-
if let Some(instance_type) = &mut schema.instance_type {
135-
let mut types = match instance_type {
136-
SingleOrVec::Single(t) => vec![**t],
137-
SingleOrVec::Vec(t) => std::mem::take(t),
138-
};
139-
types.push(InstanceType::Null);
140-
*instance_type = SingleOrVec::Vec(types);
141-
}
171+
fn for_struct_schema(
172+
&mut self,
173+
struct_schema: &schema::StructSchema,
174+
field_path: RefList<'_, &'_ FieldName>,
175+
) -> SchemaObject {
176+
let mut schema = SchemaObject::default();
177+
if let Some(description) = &struct_schema.description {
178+
self.set_description(&mut schema, description, field_path);
179+
}
180+
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::Object)));
181+
schema.object = Some(Box::new(ObjectValidation {
182+
properties: struct_schema
183+
.fields
184+
.iter()
185+
.map(|f| {
186+
let mut schema =
187+
self.for_enriched_value_type(&f.value_type, field_path.prepend(&f.name));
188+
if self.options.fields_always_required && f.value_type.nullable {
189+
if let Some(instance_type) = &mut schema.instance_type {
190+
let mut types = match instance_type {
191+
SingleOrVec::Single(t) => vec![**t],
192+
SingleOrVec::Vec(t) => std::mem::take(t),
193+
};
194+
types.push(InstanceType::Null);
195+
*instance_type = SingleOrVec::Vec(types);
142196
}
143-
(f.name.to_string(), schema.into())
144-
})
145-
.collect(),
146-
required: struct_schema
147-
.fields
148-
.iter()
149-
.filter(|&f| (self.options.fields_always_required || !f.value_type.nullable))
150-
.map(|f| f.name.to_string())
151-
.collect(),
152-
additional_properties: Some(Schema::Bool(false).into()),
153-
..Default::default()
154-
})),
197+
}
198+
(f.name.to_string(), schema.into())
199+
})
200+
.collect(),
201+
required: struct_schema
202+
.fields
203+
.iter()
204+
.filter(|&f| (self.options.fields_always_required || !f.value_type.nullable))
205+
.map(|f| f.name.to_string())
206+
.collect(),
207+
additional_properties: Some(Schema::Bool(false).into()),
155208
..Default::default()
156-
}
209+
}));
210+
schema
157211
}
158212

159-
fn for_value_type(&mut self, value_type: &schema::ValueType) -> SchemaObject {
213+
fn for_value_type(
214+
&mut self,
215+
value_type: &schema::ValueType,
216+
field_path: RefList<'_, &'_ FieldName>,
217+
) -> SchemaObject {
160218
match value_type {
161-
schema::ValueType::Basic(b) => self.for_basic_value_type(b),
162-
schema::ValueType::Struct(s) => self.for_struct_schema(s),
219+
schema::ValueType::Basic(b) => self.for_basic_value_type(b, field_path),
220+
schema::ValueType::Struct(s) => self.for_struct_schema(s, field_path),
163221
schema::ValueType::Collection(c) => SchemaObject {
164222
instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::Array))),
165223
array: Some(Box::new(ArrayValidation {
166224
items: Some(SingleOrVec::Single(Box::new(
167-
self.for_struct_schema(&c.row).into(),
225+
self.for_struct_schema(&c.row, field_path).into(),
168226
))),
169227
..Default::default()
170228
})),
@@ -173,10 +231,42 @@ impl JsonSchemaBuilder {
173231
}
174232
}
175233

176-
pub fn for_enriched_value_type(
234+
fn for_enriched_value_type(
177235
&mut self,
178236
enriched_value_type: &schema::EnrichedValueType,
237+
field_path: RefList<'_, &'_ FieldName>,
179238
) -> SchemaObject {
180-
self.for_value_type(&enriched_value_type.typ)
239+
self.for_value_type(&enriched_value_type.typ, field_path)
181240
}
241+
242+
fn build_extra_instructions(&self) -> Result<Option<String>> {
243+
if self.extra_instructions_per_field.is_empty() {
244+
return Ok(None);
245+
}
246+
247+
let mut instructions = String::new();
248+
write!(&mut instructions, "Instructions for specific fields:\n\n")?;
249+
for (field_path, instruction) in self.extra_instructions_per_field.iter() {
250+
write!(
251+
&mut instructions,
252+
"- {}: {}\n\n",
253+
if field_path.is_empty() {
254+
"(root object)"
255+
} else {
256+
field_path.as_str()
257+
},
258+
instruction
259+
)?;
260+
}
261+
Ok(Some(instructions))
262+
}
263+
}
264+
265+
pub fn build_json_schema(
266+
value_type: &schema::EnrichedValueType,
267+
options: ToJsonSchemaOptions,
268+
) -> Result<(SchemaObject, Option<String>)> {
269+
let mut builder = JsonSchemaBuilder::new(options);
270+
let schema = builder.for_enriched_value_type(value_type, RefList::Nil);
271+
Ok((schema, builder.build_extra_instructions()?))
182272
}

src/base/value.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::{api_bail, api_error};
22

33
use super::schema::*;
4-
use anyhow::{Context, Result};
4+
use anyhow::Result;
55
use base64::prelude::*;
66
use chrono::Offset;
77
use log::warn;
@@ -10,7 +10,7 @@ use serde::{
1010
ser::{SerializeMap, SerializeSeq, SerializeTuple},
1111
Deserialize, Serialize,
1212
};
13-
use std::{collections::BTreeMap, ops::Deref, str::FromStr, sync::Arc};
13+
use std::{collections::BTreeMap, ops::Deref, sync::Arc};
1414

1515
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
1616
pub struct RangeValue {

src/llm/mod.rs

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -47,24 +47,7 @@ pub trait LlmGenerationClient: Send + Sync {
4747
request: LlmGenerateRequest<'req>,
4848
) -> Result<LlmGenerateResponse>;
4949

50-
/// If true, the LLM only accepts a JSON schema with all fields required.
51-
/// This is a limitation of LLM models such as OpenAI.
52-
/// Otherwise, the LLM will accept a JSON schema with optional fields.
53-
fn json_schema_fields_always_required(&self) -> bool {
54-
false
55-
}
56-
57-
/// If true, the LLM supports the `format` keyword in the JSON schema.
58-
fn json_schema_supports_format(&self) -> bool {
59-
true
60-
}
61-
62-
fn to_json_schema_options(&self) -> ToJsonSchemaOptions {
63-
ToJsonSchemaOptions {
64-
fields_always_required: self.json_schema_fields_always_required(),
65-
supports_format: self.json_schema_supports_format(),
66-
}
67-
}
50+
fn json_schema_options(&self) -> ToJsonSchemaOptions;
6851
}
6952

7053
mod ollama;

src/llm/ollama.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,4 +75,12 @@ impl LlmGenerationClient for Client {
7575
text: json.response,
7676
})
7777
}
78+
79+
fn json_schema_options(&self) -> super::ToJsonSchemaOptions {
80+
super::ToJsonSchemaOptions {
81+
fields_always_required: false,
82+
supports_format: true,
83+
extract_descriptions: true,
84+
}
85+
}
7886
}

src/llm/openai.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,11 @@ impl LlmGenerationClient for Client {
9898
Ok(super::LlmGenerateResponse { text })
9999
}
100100

101-
fn json_schema_fields_always_required(&self) -> bool {
102-
true
103-
}
104-
105-
fn json_schema_supports_format(&self) -> bool {
106-
false
101+
fn json_schema_options(&self) -> super::ToJsonSchemaOptions {
102+
super::ToJsonSchemaOptions {
103+
fields_always_required: true,
104+
supports_format: false,
105+
extract_descriptions: false,
106+
}
107107
}
108108
}

0 commit comments

Comments
 (0)