Skip to content

Commit d9454fd

Browse files
committed
Refactor JsonSchemaBuilder to a struct with states.
1 parent 15967c4 commit d9454fd

File tree

2 files changed

+47
-45
lines changed

2 files changed

+47
-45
lines changed

src/base/json_schema.rs

Lines changed: 39 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,18 @@ pub struct ToJsonSchemaOptions {
1313
pub supports_format: bool,
1414
}
1515

16-
pub trait ToJsonSchema {
17-
fn to_json_schema(&self, options: &ToJsonSchemaOptions) -> SchemaObject;
16+
pub struct JsonSchemaBuilder {
17+
options: ToJsonSchemaOptions,
1818
}
1919

20-
impl ToJsonSchema for schema::BasicValueType {
21-
fn to_json_schema(&self, options: &ToJsonSchemaOptions) -> SchemaObject {
20+
impl JsonSchemaBuilder {
21+
pub fn new(options: ToJsonSchemaOptions) -> Self {
22+
Self { options }
23+
}
24+
25+
fn for_basic_value_type(&mut self, basic_type: &schema::BasicValueType) -> SchemaObject {
2226
let mut schema = SchemaObject::default();
23-
match self {
27+
match basic_type {
2428
schema::BasicValueType::Str => {
2529
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
2630
}
@@ -57,42 +61,39 @@ impl ToJsonSchema for schema::BasicValueType {
5761
}
5862
schema::BasicValueType::Uuid => {
5963
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
60-
if options.supports_format {
64+
if self.options.supports_format {
6165
schema.format = Some("uuid".to_string());
62-
} else {
63-
schema.metadata.get_or_insert_default().description =
64-
Some("A UUID, e.g. 123e4567-e89b-12d3-a456-426614174000".to_string());
6566
}
67+
schema.metadata.get_or_insert_default().description =
68+
Some("A UUID, e.g. 123e4567-e89b-12d3-a456-426614174000".to_string());
6669
}
6770
schema::BasicValueType::Date => {
6871
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
69-
if options.supports_format {
72+
if self.options.supports_format {
7073
schema.format = Some("date".to_string());
71-
} else {
72-
schema.metadata.get_or_insert_default().description =
73-
Some("A date, e.g. 2025-03-27".to_string());
7474
}
75+
schema.metadata.get_or_insert_default().title =
76+
Some("A date in YYYY-MM-DD format, e.g. 2025-03-27".to_string());
7577
}
7678
schema::BasicValueType::Time => {
7779
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
78-
if options.supports_format {
80+
if self.options.supports_format {
7981
schema.format = Some("time".to_string());
80-
} else {
81-
schema.metadata.get_or_insert_default().description =
82-
Some("A time, e.g. 13:32:12".to_string());
8382
}
83+
schema.metadata.get_or_insert_default().description =
84+
Some("A time in HH:MM:SS format, e.g. 13:32:12".to_string());
8485
}
8586
schema::BasicValueType::LocalDateTime => {
8687
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
87-
if options.supports_format {
88+
if self.options.supports_format {
8889
schema.format = Some("date-time".to_string());
8990
}
9091
schema.metadata.get_or_insert_default().description =
91-
Some("Date time without timezone offset, e.g. 2025-03-27T13:32:12".to_string());
92+
Some("Date time without timezone offset in YYYY-MM-DDTHH:MM:SS format, e.g. 2025-03-27T13:32:12".to_string());
9293
}
9394
schema::BasicValueType::OffsetDateTime => {
9495
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
95-
if options.supports_format {
96+
if self.options.supports_format {
9697
schema.format = Some("date-time".to_string());
9798
}
9899
schema.metadata.get_or_insert_default().description =
@@ -105,7 +106,7 @@ impl ToJsonSchema for schema::BasicValueType {
105106
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::Array)));
106107
schema.array = Some(Box::new(ArrayValidation {
107108
items: Some(SingleOrVec::Single(Box::new(
108-
s.element_type.to_json_schema(options).into(),
109+
self.for_basic_value_type(&s.element_type).into(),
109110
))),
110111
min_items: s.dimension.and_then(|d| u32::try_from(d).ok()),
111112
max_items: s.dimension.and_then(|d| u32::try_from(d).ok()),
@@ -115,23 +116,21 @@ impl ToJsonSchema for schema::BasicValueType {
115116
}
116117
schema
117118
}
118-
}
119119

120-
impl ToJsonSchema for schema::StructSchema {
121-
fn to_json_schema(&self, options: &ToJsonSchemaOptions) -> SchemaObject {
120+
fn for_struct_schema(&mut self, struct_schema: &schema::StructSchema) -> SchemaObject {
122121
SchemaObject {
123122
metadata: Some(Box::new(Metadata {
124-
description: self.description.as_ref().map(|s| s.to_string()),
123+
description: struct_schema.description.as_ref().map(|s| s.to_string()),
125124
..Default::default()
126125
})),
127126
instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::Object))),
128127
object: Some(Box::new(ObjectValidation {
129-
properties: self
128+
properties: struct_schema
130129
.fields
131130
.iter()
132131
.map(|f| {
133-
let mut schema = f.value_type.to_json_schema(options);
134-
if options.fields_always_required && f.value_type.nullable {
132+
let mut schema = self.for_enriched_value_type(&f.value_type);
133+
if self.options.fields_always_required && f.value_type.nullable {
135134
if let Some(instance_type) = &mut schema.instance_type {
136135
let mut types = match instance_type {
137136
SingleOrVec::Single(t) => vec![**t],
@@ -144,10 +143,10 @@ impl ToJsonSchema for schema::StructSchema {
144143
(f.name.to_string(), schema.into())
145144
})
146145
.collect(),
147-
required: self
146+
required: struct_schema
148147
.fields
149148
.iter()
150-
.filter(|&f| (options.fields_always_required || !f.value_type.nullable))
149+
.filter(|&f| (self.options.fields_always_required || !f.value_type.nullable))
151150
.map(|f| f.name.to_string())
152151
.collect(),
153152
additional_properties: Some(Schema::Bool(false).into()),
@@ -156,29 +155,28 @@ impl ToJsonSchema for schema::StructSchema {
156155
..Default::default()
157156
}
158157
}
159-
}
160158

161-
impl ToJsonSchema for schema::ValueType {
162-
fn to_json_schema(&self, options: &ToJsonSchemaOptions) -> SchemaObject {
163-
match self {
164-
schema::ValueType::Basic(b) => b.to_json_schema(options),
165-
schema::ValueType::Struct(s) => s.to_json_schema(options),
159+
fn for_value_type(&mut self, value_type: &schema::ValueType) -> SchemaObject {
160+
match value_type {
161+
schema::ValueType::Basic(b) => self.for_basic_value_type(b),
162+
schema::ValueType::Struct(s) => self.for_struct_schema(s),
166163
schema::ValueType::Collection(c) => SchemaObject {
167164
instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::Array))),
168165
array: Some(Box::new(ArrayValidation {
169166
items: Some(SingleOrVec::Single(Box::new(
170-
c.row.to_json_schema(options).into(),
167+
self.for_struct_schema(&c.row).into(),
171168
))),
172169
..Default::default()
173170
})),
174171
..Default::default()
175172
},
176173
}
177174
}
178-
}
179175

180-
impl ToJsonSchema for schema::EnrichedValueType {
181-
fn to_json_schema(&self, options: &ToJsonSchemaOptions) -> SchemaObject {
182-
self.typ.to_json_schema(options)
176+
pub fn for_enriched_value_type(
177+
&mut self,
178+
enriched_value_type: &schema::EnrichedValueType,
179+
) -> SchemaObject {
180+
self.for_value_type(&enriched_value_type.typ)
183181
}
184182
}

src/ops/functions/extract_by_llm.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
use std::borrow::Cow;
22
use std::sync::Arc;
33

4+
use log::debug;
45
use schemars::schema::SchemaObject;
56
use serde::Serialize;
67

7-
use crate::base::json_schema::ToJsonSchema;
8+
use crate::base::json_schema::JsonSchemaBuilder;
89
use crate::llm::{
910
new_llm_generation_client, LlmGenerateRequest, LlmGenerationClient, LlmSpec, OutputFormat,
1011
};
@@ -48,9 +49,8 @@ Output only the JSON without any additional messages or explanations."
4849
impl Executor {
4950
async fn new(spec: Spec, args: Args) -> Result<Self> {
5051
let client = new_llm_generation_client(spec.llm_spec).await?;
51-
let output_json_schema = spec
52-
.output_type
53-
.to_json_schema(&client.to_json_schema_options());
52+
let mut json_schema_builder = JsonSchemaBuilder::new(client.to_json_schema_options());
53+
let output_json_schema = json_schema_builder.for_enriched_value_type(&spec.output_type);
5454
Ok(Self {
5555
args,
5656
client,
@@ -83,6 +83,10 @@ impl SimpleFunctionExecutor for Executor {
8383
};
8484
let res = self.client.generate(req).await?;
8585
let json_value: serde_json::Value = serde_json::from_str(res.text.as_str())?;
86+
debug!(
87+
"json_value:\n{}",
88+
serde_json::to_string_pretty(&json_value)?
89+
);
8690
let value = Value::from_json(json_value, &self.output_type.typ)?;
8791
Ok(value)
8892
}

0 commit comments

Comments
 (0)