Skip to content

Commit 565d2e6

Browse files
committed
fix: address PR review feedback for schema compression
- Replace all 'any' types with proper TypeScript interfaces (JsonSchema, JsonSchemaProperty) - Add XML escaping to prevent XSS vulnerabilities in property names and enum values - Improve token estimation with more sophisticated algorithm - Add constants for magic numbers (ENUM_DISPLAY_THRESHOLD, TOKENS_PER_CHARACTER) - Add comprehensive JSDoc documentation for all functions - Handle edge cases: null/undefined schemas, malformed schemas, nested structures - Fix reduction calculation to prevent negative values - Expand test coverage for error scenarios and edge cases - Add support for additional string formats (uuid, ipv4, ipv6, etc.) - Show nested object structure for simple objects (≤2 properties)
1 parent fec389d commit 565d2e6

File tree

2 files changed

+431
-29
lines changed

2 files changed

+431
-29
lines changed

src/core/prompts/utils/__tests__/schemaCompressor.spec.ts

Lines changed: 292 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ describe("schemaCompressor", () => {
77
describe("jsonSchemaToXml", () => {
88
it("should handle empty schema", () => {
99
expect(jsonSchemaToXml(null)).toBe("<schema></schema>")
10+
expect(jsonSchemaToXml(undefined)).toBe("<schema></schema>")
1011
expect(jsonSchemaToXml({})).toBe("<schema></schema>")
1112
expect(jsonSchemaToXml({ properties: {} })).toBe("<schema></schema>")
1213
})
@@ -74,6 +75,36 @@ describe("schemaCompressor", () => {
7475
expect(jsonSchemaToXml(schema)).toBe("<schema>tech_stack*:array[string], numbers?:array[number]</schema>")
7576
})
7677

78+
it("should handle array schema at root level", () => {
79+
const schema = {
80+
type: "array",
81+
items: {
82+
type: "string",
83+
},
84+
}
85+
86+
expect(jsonSchemaToXml(schema)).toBe("<schema>array[string]</schema>")
87+
})
88+
89+
it("should handle nested array types", () => {
90+
const schema = {
91+
type: "object",
92+
properties: {
93+
matrix: {
94+
type: "array",
95+
items: {
96+
type: "array",
97+
items: {
98+
type: "number",
99+
},
100+
},
101+
},
102+
},
103+
}
104+
105+
expect(jsonSchemaToXml(schema)).toBe("<schema>matrix?:array[array[number]]</schema>")
106+
})
107+
77108
it("should handle enum types", () => {
78109
const schema = {
79110
type: "object",
@@ -93,6 +124,38 @@ describe("schemaCompressor", () => {
93124
expect(jsonSchemaToXml(schema)).toBe("<schema>format*:enum(json|xml|csv), size?:enum</schema>")
94125
})
95126

127+
it("should escape XML special characters in enum values", () => {
128+
const schema = {
129+
type: "object",
130+
properties: {
131+
operator: {
132+
type: "string",
133+
enum: ["<", ">", "&", '"', "'"],
134+
},
135+
},
136+
}
137+
138+
expect(jsonSchemaToXml(schema)).toBe("<schema>operator?:enum</schema>")
139+
})
140+
141+
it("should escape XML special characters in property names", () => {
142+
const schema = {
143+
type: "object",
144+
properties: {
145+
"<script>alert('xss')</script>": {
146+
type: "string",
147+
},
148+
"user&admin": {
149+
type: "boolean",
150+
},
151+
},
152+
}
153+
154+
expect(jsonSchemaToXml(schema)).toBe(
155+
"<schema>&lt;script&gt;alert(&apos;xss&apos;)&lt;/script&gt;?:string, user&amp;admin?:boolean</schema>",
156+
)
157+
})
158+
96159
it("should handle object types", () => {
97160
const schema = {
98161
type: "object",
@@ -108,7 +171,28 @@ describe("schemaCompressor", () => {
108171
required: ["requirements"],
109172
}
110173

111-
expect(jsonSchemaToXml(schema)).toBe("<schema>requirements*:object</schema>")
174+
expect(jsonSchemaToXml(schema)).toBe(
175+
"<schema>requirements*:object{description:string,scale:string}</schema>",
176+
)
177+
})
178+
179+
it("should handle complex nested object types", () => {
180+
const schema = {
181+
type: "object",
182+
properties: {
183+
config: {
184+
type: "object",
185+
properties: {
186+
nested1: { type: "string" },
187+
nested2: { type: "number" },
188+
nested3: { type: "boolean" },
189+
},
190+
},
191+
},
192+
}
193+
194+
// Should simplify to just "object" when more than 2 properties
195+
expect(jsonSchemaToXml(schema)).toBe("<schema>config?:object</schema>")
112196
})
113197

114198
it("should handle date and url formats", () => {
@@ -132,6 +216,105 @@ describe("schemaCompressor", () => {
132216

133217
expect(jsonSchemaToXml(schema)).toBe("<schema>created_date?:date, website?:url, email?:email</schema>")
134218
})
219+
220+
it("should handle additional string formats", () => {
221+
const schema = {
222+
type: "object",
223+
properties: {
224+
id: {
225+
type: "string",
226+
format: "uuid",
227+
},
228+
timestamp: {
229+
type: "string",
230+
format: "date-time",
231+
},
232+
ipv4: {
233+
type: "string",
234+
format: "ipv4",
235+
},
236+
ipv6: {
237+
type: "string",
238+
format: "ipv6",
239+
},
240+
},
241+
}
242+
243+
expect(jsonSchemaToXml(schema)).toBe(
244+
"<schema>id?:uuid, timestamp?:datetime, ipv4?:ipv4, ipv6?:ipv6</schema>",
245+
)
246+
})
247+
248+
it("should handle null type", () => {
249+
const schema = {
250+
type: "object",
251+
properties: {
252+
nullable_field: {
253+
type: "null",
254+
},
255+
},
256+
}
257+
258+
expect(jsonSchemaToXml(schema)).toBe("<schema>nullable_field?:null</schema>")
259+
})
260+
261+
it("should handle oneOf/anyOf union types", () => {
262+
const schema = {
263+
type: "object",
264+
properties: {
265+
union_field: {
266+
oneOf: [{ type: "string" }, { type: "number" }],
267+
},
268+
any_field: {
269+
anyOf: [{ type: "boolean" }, { type: "null" }],
270+
},
271+
},
272+
}
273+
274+
expect(jsonSchemaToXml(schema)).toBe("<schema>union_field?:string|number, any_field?:boolean|null</schema>")
275+
})
276+
277+
it("should handle malformed schemas gracefully", () => {
278+
const malformedSchemas = [
279+
{ properties: { field: null } },
280+
{ properties: { field: "not an object" } },
281+
{ properties: { field: [] } },
282+
{ properties: { field: { type: [] } } },
283+
]
284+
285+
malformedSchemas.forEach((schema) => {
286+
expect(() => jsonSchemaToXml(schema as any)).not.toThrow()
287+
})
288+
})
289+
290+
it("should handle arrays without items", () => {
291+
const schema = {
292+
type: "object",
293+
properties: {
294+
empty_array: {
295+
type: "array",
296+
},
297+
},
298+
}
299+
300+
expect(jsonSchemaToXml(schema)).toBe("<schema>empty_array?:array[any]</schema>")
301+
})
302+
303+
it("should handle arrays with enum items", () => {
304+
const schema = {
305+
type: "object",
306+
properties: {
307+
roles: {
308+
type: "array",
309+
items: {
310+
enum: ["admin", "user", "guest"],
311+
},
312+
},
313+
},
314+
}
315+
316+
expect(jsonSchemaToXml(schema)).toBe("<schema>roles?:array[enum(admin|user|guest)]</schema>")
317+
})
135318
})
136319

137320
describe("compressSchemaWithMetrics", () => {
@@ -153,6 +336,48 @@ describe("schemaCompressor", () => {
153336
expect(result.originalTokens).toBeGreaterThan(result.compressedTokens)
154337
expect(result.reduction).toBeGreaterThan(50) // Should be significant reduction
155338
})
339+
340+
it("should handle null/undefined schemas", () => {
341+
const resultNull = compressSchemaWithMetrics(null)
342+
const resultUndefined = compressSchemaWithMetrics(undefined)
343+
344+
expect(resultNull.compressed).toBe("<schema></schema>")
345+
expect(resultUndefined.compressed).toBe("<schema></schema>")
346+
expect(resultNull.reduction).toBe(0)
347+
expect(resultUndefined.reduction).toBe(0)
348+
})
349+
350+
it("should provide accurate token estimation for various text lengths", () => {
351+
const shortSchema = { type: "string" }
352+
const mediumSchema = {
353+
type: "object",
354+
properties: {
355+
field1: { type: "string" },
356+
field2: { type: "number" },
357+
},
358+
}
359+
const longSchema = {
360+
type: "object",
361+
properties: {
362+
field1: { type: "string", description: "A very long description that adds many tokens" },
363+
field2: { type: "array", items: { type: "object", properties: { nested: { type: "boolean" } } } },
364+
field3: { type: "string", enum: ["option1", "option2", "option3", "option4"] },
365+
},
366+
}
367+
368+
const shortResult = compressSchemaWithMetrics(shortSchema)
369+
const mediumResult = compressSchemaWithMetrics(mediumSchema)
370+
const longResult = compressSchemaWithMetrics(longSchema)
371+
372+
// Token counts should increase with complexity
373+
expect(shortResult.originalTokens).toBeLessThan(mediumResult.originalTokens)
374+
expect(mediumResult.originalTokens).toBeLessThan(longResult.originalTokens)
375+
376+
// All should show reduction
377+
expect(shortResult.reduction).toBeGreaterThan(0)
378+
expect(mediumResult.reduction).toBeGreaterThan(0)
379+
expect(longResult.reduction).toBeGreaterThan(0)
380+
})
156381
})
157382

158383
describe("compressToolSchemas", () => {
@@ -161,28 +386,28 @@ describe("schemaCompressor", () => {
161386
{
162387
name: "search",
163388
inputSchema: {
164-
type: "object",
389+
type: "object" as const,
165390
properties: {
166-
query: { type: "string" },
391+
query: { type: "string" as const },
167392
},
168393
required: ["query"],
169394
},
170395
},
171396
{
172397
name: "translate",
173398
inputSchema: {
174-
type: "object",
399+
type: "object" as const,
175400
properties: {
176-
text: { type: "string" },
177-
target_lang: { type: "string" },
178-
source_lang: { type: "string" },
401+
text: { type: "string" as const },
402+
target_lang: { type: "string" as const },
403+
source_lang: { type: "string" as const },
179404
},
180405
required: ["text", "target_lang"],
181406
},
182407
},
183408
]
184409

185-
const result = compressToolSchemas(tools)
410+
const result = compressToolSchemas(tools as Array<{ name: string; inputSchema?: any }>)
186411

187412
expect(result.compressedTools).toHaveLength(2)
188413
expect(result.compressedTools[0].compressedSchema).toBe("<schema>query*:string</schema>")
@@ -192,6 +417,31 @@ describe("schemaCompressor", () => {
192417
expect(result.totalReduction).toBeGreaterThan(0)
193418
expect(result.originalTokens).toBeGreaterThan(result.compressedTokens)
194419
})
420+
421+
it("should handle tools without schemas", () => {
422+
const tools = [
423+
{ name: "tool1", inputSchema: undefined },
424+
{ name: "tool2" },
425+
{ name: "tool3", inputSchema: null },
426+
]
427+
428+
const result = compressToolSchemas(tools as Array<{ name: string; inputSchema?: any }>)
429+
430+
expect(result.compressedTools).toHaveLength(3)
431+
result.compressedTools.forEach((tool) => {
432+
expect(tool.compressedSchema).toBe("<schema></schema>")
433+
})
434+
expect(result.totalReduction).toBe(0)
435+
})
436+
437+
it("should handle empty tools array", () => {
438+
const result = compressToolSchemas([])
439+
440+
expect(result.compressedTools).toHaveLength(0)
441+
expect(result.totalReduction).toBe(0)
442+
expect(result.originalTokens).toBe(0)
443+
expect(result.compressedTokens).toBe(0)
444+
})
195445
})
196446

197447
describe("real MCP server examples", () => {
@@ -243,5 +493,39 @@ describe("schemaCompressor", () => {
243493
"<schema>libraryName*:string, sort?:enum(trust_score|last_updated|snippets), minTrustScore?:number, maxTrustScore?:number</schema>",
244494
)
245495
})
496+
497+
it("should handle complex real-world schema with nested structures", () => {
498+
const schema = {
499+
type: "object",
500+
properties: {
501+
method: {
502+
type: "string",
503+
enum: ["GET", "POST", "PUT", "DELETE"],
504+
},
505+
headers: {
506+
type: "object",
507+
properties: {
508+
"Content-Type": { type: "string" },
509+
Authorization: { type: "string" },
510+
},
511+
},
512+
body: {
513+
oneOf: [{ type: "string" }, { type: "object" }],
514+
},
515+
timeout: {
516+
type: "integer",
517+
description: "Request timeout in milliseconds",
518+
},
519+
},
520+
required: ["method"],
521+
}
522+
523+
const result = jsonSchemaToXml(schema)
524+
// Enum with 4 values should be simplified to just "enum" based on ENUM_DISPLAY_THRESHOLD = 3
525+
expect(result).toContain("method*:enum")
526+
expect(result).toContain("headers?:object{Content-Type:string,Authorization:string}")
527+
expect(result).toContain("body?:string|object")
528+
expect(result).toContain("timeout?:number")
529+
})
246530
})
247531
})

0 commit comments

Comments
 (0)