Skip to content

Commit 88170f5

Browse files
committed
refactor(ajv): rewrite validateSchema to leverage internal ajv cache
This change slightly improves memory performance by generating fewer AJV objects and using smarter logic to funnel repeated schema to the same pre-compiled definition. Signed-off-by: Jeremy Ho <jujaga@gmail.com>
1 parent 15356a6 commit 88170f5

File tree

2 files changed

+129
-75
lines changed

2 files changed

+129
-75
lines changed

src/validators/validator.ts

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
1-
import { Ajv } from 'ajv';
2-
31
import { integrityValidators } from './integrity/index.ts';
4-
import { createAjvInstance, getPiesSchemaUri, loadSchema, pies } from './schema/index.ts';
2+
import { createAjvInstance, ensureSchemaId, getPiesSchemaUri, loadSchema, pies } from './schema/index.ts';
53
import { getLogger } from '../utils/index.ts';
64

75
import type { AnySchemaObject, AnyValidateFunction, ErrorObject } from 'ajv/dist/core.js';
86
import type { IntegrityDictionary, IntegrityResult } from '../types/index.d.ts';
97

8+
const ajv = createAjvInstance();
9+
1010
const log = getLogger(import.meta.filename);
11-
const stringSchemaCache: Record<string, Ajv> = {};
12-
const objectSchemaCache = new WeakMap<AnySchemaObject, Promise<AnyValidateFunction<unknown>>>();
11+
const inFlightCompilations = new Map<string, Promise<AnyValidateFunction<unknown>>>();
1312

1413
// Only pre-cache schemas in production to avoid bombarding Github API in development
1514
if (process.env.NODE_ENV === 'production') await preCachePiesSchema();
@@ -47,46 +46,51 @@ export function validateIntegrity<K extends keyof IntegrityDictionary>(
4746
}
4847

4948
/**
50-
* Validates data against a JSON schema using asynchronous compilation and caching.
49+
* Validates data against a JSON schema using async compilation, deterministic hashing, and request deduplication.
5150
* @remarks
52-
* This function optimizes performance by:
53-
* 1. Caching Ajv instances for string-identified schemas.
54-
* 2. Caching compilation promises for schema objects to prevent redundant processing.
51+
* This function optimizes performance and reliability through three layers:
52+
* 1. **Persistent Cache**: Uses Ajv's internal registry for schemas with a known `$id` or URI.
53+
* 2. **Deterministic Hashing**: Anonymous schema objects are automatically assigned a
54+
* stable `$id` based on their content, preventing redundant compilations of identical structures.
55+
* 3. **Concurrency Lock**: Uses an `in-flight` promise map to ensure that multiple
56+
* simultaneous requests for the same new schema trigger only one compilation/load task.
5557
* @param schema - The validation blueprint. Can be a string identifier (URI/ID) or a schema object.
56-
* @param data - The payload to validate. Treated as `unknown` to ensure type-safe handling.
58+
* Objects without an `$id` will have one auto-generated.
59+
* @param data - The payload to validate. Treated as `unknown` for type safety.
5760
* @returns A promise resolving to a validation result:
5861
* - `valid`: true if the data satisfies the schema.
5962
* - `errors`: An array of `ErrorObject` if validation fails, otherwise undefined.
60-
* @throws {Error} Will throw if the schema cannot be loaded or if compilation fails.
63+
* @throws {Error} Will throw if `loadSchema` fails to fetch a remote definition or
64+
* if the schema contains syntax errors.
6165
*/
6266
export async function validateSchema(
6367
schema: AnySchemaObject | string,
6468
data: unknown
6569
): Promise<{ valid: boolean; errors?: ErrorObject[] }> {
66-
let validate: AnyValidateFunction<unknown>;
67-
if (typeof schema === 'string') {
68-
const cached = schema in stringSchemaCache;
69-
log.verbose('validateSchema', { cached, schema });
70+
const isString = typeof schema === 'string';
71+
const definition = isString ? null : ensureSchemaId(schema);
72+
const schemaId = isString ? schema : definition!.$id!;
7073

71-
if (cached) {
72-
validate = stringSchemaCache[schema].getSchema(schema)!;
73-
} else {
74-
const ajv = createAjvInstance();
74+
let validate = ajv.getSchema(schemaId);
7575

76-
const def = await loadSchema(schema);
77-
validate = await ajv.compileAsync(def);
78-
stringSchemaCache[schema] = ajv;
76+
if (!validate) {
77+
// Check if another request is already compiling this
78+
let promise = inFlightCompilations.get(schemaId);
79+
if (!promise) {
80+
promise = (async (): Promise<AnyValidateFunction<unknown>> => {
81+
try {
82+
const finalDef = definition ?? (await loadSchema(schemaId));
83+
return await ajv.compileAsync(finalDef);
84+
} finally {
85+
inFlightCompilations.delete(schemaId);
86+
}
87+
})();
88+
inFlightCompilations.set(schemaId, promise);
7989
}
80-
} else {
81-
let validatePromise = objectSchemaCache.get(schema);
82-
if (!validatePromise) {
83-
const ajv = createAjvInstance();
84-
validatePromise = ajv.compileAsync(schema);
85-
objectSchemaCache.set(schema, validatePromise);
86-
}
87-
validate = await validatePromise;
90+
91+
validate = await promise;
8892
}
8993

9094
const valid = !!validate(data);
91-
return { valid: valid, errors: validate.errors ?? undefined };
95+
return { valid: valid, errors: validate.errors?.slice() };
9296
}
Lines changed: 94 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,36 @@
1-
import { createAjvInstance, loadSchema, getPiesSchemaUri } from '../../../src/validators/schema/index.ts';
21
import * as integrity from '../../../src/validators/integrity/index.ts';
3-
import { validateSchema, validateIntegrity, preCachePiesSchema } from '../../../src/validators/validator.ts';
2+
import {
3+
createAjvInstance,
4+
ensureSchemaId,
5+
getPiesSchemaUri,
6+
loadSchema
7+
} from '../../../src/validators/schema/index.ts';
8+
import * as validator from '../../../src/validators/validator.ts';
49

510
import type { Mock } from 'vitest';
611

12+
const { mockAjv, mockValidate } = vi.hoisted(() => {
13+
const mv = vi.fn();
14+
return {
15+
mockAjv: {
16+
compileAsync: vi.fn().mockResolvedValue(mv),
17+
getSchema: vi.fn()
18+
},
19+
mockValidate: mv
20+
};
21+
});
22+
723
vi.mock('../../../src/validators/schema/index.ts', () => ({
8-
createAjvInstance: vi.fn(),
9-
loadSchema: vi.fn(),
24+
createAjvInstance: vi.fn().mockReturnValue(mockAjv),
25+
ensureSchemaId: vi.fn(
26+
<T>(schema: T extends object ? T : never) =>
27+
({ ...schema, $id: `test-id-${Object.keys(schema).join('-')}` }) as T & { $id: string }
28+
),
1029
getPiesSchemaUri: vi.fn(),
30+
loadSchema: vi.fn(),
1131
pies: {
1232
spec: {
13-
message: {
14-
A: 'schema-a',
15-
B: 'schema-b'
16-
}
33+
message: { A: 'schema-a', B: 'schema-b' }
1734
}
1835
}
1936
}));
@@ -25,20 +42,21 @@ vi.mock('../../../src/validators/integrity/index.ts', () => ({
2542
}));
2643

2744
describe('preCachePiesSchema', () => {
28-
it('should iterate through all pies message schemas and trigger validation', async () => {
45+
it('should iterate through all pies message schemas and trigger compilation', async () => {
46+
(loadSchema as Mock).mockResolvedValue({ type: 'object' });
47+
(mockAjv.getSchema as Mock).mockReturnValue(undefined); // Force compilation path
48+
(mockAjv.compileAsync as Mock).mockResolvedValue(mockValidate.mockReturnValue(true));
2949
(getPiesSchemaUri as Mock).mockImplementation((val) => `uri-${val}`);
3050

31-
const mockAjv = {
32-
compileAsync: vi.fn().mockResolvedValue(vi.fn().mockReturnValue(true))
33-
};
34-
(createAjvInstance as Mock).mockReturnValue(mockAjv);
35-
(loadSchema as Mock).mockResolvedValue({});
36-
37-
await preCachePiesSchema();
51+
await validator.preCachePiesSchema();
3852

3953
expect(getPiesSchemaUri).toHaveBeenCalledWith('schema-a');
4054
expect(getPiesSchemaUri).toHaveBeenCalledWith('schema-b');
41-
expect(loadSchema).toHaveBeenCalledTimes(2);
55+
56+
// Verify that the underlying compilation mechanism was triggered for each schema.
57+
expect(loadSchema).toHaveBeenCalledWith('uri-schema-a');
58+
expect(loadSchema).toHaveBeenCalledWith('uri-schema-b');
59+
expect(mockAjv.compileAsync).toHaveBeenCalledTimes(2);
4260
});
4361
});
4462

@@ -48,7 +66,7 @@ describe('validateIntegrity', () => {
4866
(integrity.integrityValidators.recordLinkage as Mock).mockReturnValue(mockResult);
4967

5068
const data = { some: 'data' };
51-
const result = validateIntegrity('recordLinkage', data);
69+
const result = validator.validateIntegrity('recordLinkage', data);
5270

5371
expect(integrity.integrityValidators.recordLinkage).toHaveBeenCalledWith(data);
5472
expect(result).toBe(mockResult);
@@ -59,65 +77,97 @@ describe('validateIntegrity', () => {
5977
const data = {};
6078

6179
expect(() => {
62-
validateIntegrity(invalidType, data);
80+
validator.validateIntegrity(invalidType, data);
6381
}).toThrow();
6482
});
6583
});
6684

6785
describe('validateSchema', () => {
86+
beforeEach(() => {
87+
(createAjvInstance as Mock).mockReturnValue(mockAjv);
88+
(mockAjv.compileAsync as Mock).mockResolvedValue(mockValidate);
89+
});
90+
6891
it('should compile and cache a string-based schema', async () => {
6992
const mockUri = 'https://schema.com/test.json';
7093
const mockSchemaDef = { type: 'object' };
71-
const mockValidate = vi.fn().mockReturnValue(true);
72-
const mockAjv = {
73-
compileAsync: vi.fn().mockResolvedValue(mockValidate),
74-
getSchema: vi.fn().mockReturnValue(mockValidate)
75-
};
7694

7795
(loadSchema as Mock).mockResolvedValue(mockSchemaDef);
78-
(createAjvInstance as Mock).mockReturnValue(mockAjv);
96+
(mockAjv.getSchema as Mock).mockReturnValueOnce(undefined).mockReturnValue(mockValidate);
97+
mockValidate.mockReturnValue(true);
7998

8099
// First call: should compile
81-
const result1 = await validateSchema(mockUri, { foo: 'bar' });
82-
100+
const result1 = await validator.validateSchema(mockUri, { foo: 'bar' });
83101
expect(loadSchema).toHaveBeenCalledWith(mockUri);
84102
expect(mockAjv.compileAsync).toHaveBeenCalledWith(mockSchemaDef);
85103
expect(result1.valid).toBe(true);
86104

87105
// Second call: should use cache (mockAjv.getSchema)
88-
await validateSchema(mockUri, { foo: 'bar' });
106+
const result2 = await validator.validateSchema(mockUri, { foo: 'bar' });
89107
expect(mockAjv.getSchema).toHaveBeenCalledWith(mockUri);
108+
expect(result2.valid).toBe(true);
109+
expect(mockAjv.compileAsync).toHaveBeenCalledTimes(1);
90110
});
91111

92112
it('should handle validation errors correctly', async () => {
93113
const mockSchemaObj = { type: 'number' };
94114
const mockErrors = [{ message: 'should be number' }];
95-
const mockValidate = Object.assign(vi.fn().mockReturnValue(false), { errors: mockErrors });
115+
const failingValidator = Object.assign(vi.fn().mockReturnValue(false), { errors: mockErrors });
96116

97-
const mockAjv = {
98-
compileAsync: vi.fn().mockResolvedValue(mockValidate)
99-
};
100-
(createAjvInstance as Mock).mockReturnValue(mockAjv);
117+
(mockAjv.getSchema as Mock).mockReturnValue(undefined);
118+
(mockAjv.compileAsync as Mock).mockResolvedValue(failingValidator);
101119

102-
const result = await validateSchema(mockSchemaObj, 'not-a-number');
120+
const result = await validator.validateSchema(mockSchemaObj, 'not-a-number');
103121

104122
expect(result.valid).toBe(false);
105123
expect(result.errors).toEqual(mockErrors);
106124
});
107125

108-
it('should use WeakMap cache for object-based schemas', async () => {
126+
it('should use cache for object-based schemas', async () => {
109127
const mockSchemaObj = { type: 'string' };
110-
const mockValidate = vi.fn().mockReturnValue(true);
111-
const mockAjv = {
112-
compileAsync: vi.fn().mockResolvedValue(mockValidate)
113-
};
114-
(createAjvInstance as Mock).mockReturnValue(mockAjv);
128+
(mockAjv.getSchema as Mock).mockReturnValueOnce(undefined).mockReturnValue(mockValidate);
129+
mockValidate.mockReturnValue(true);
115130

116131
// Call twice with the same object reference
117-
await validateSchema(mockSchemaObj, 'test');
118-
await validateSchema(mockSchemaObj, 'test');
132+
await validator.validateSchema(mockSchemaObj, 'test');
133+
await validator.validateSchema(mockSchemaObj, 'test');
134+
135+
// compileAsync should only be called once for this object
136+
expect(mockAjv.compileAsync).toHaveBeenCalledTimes(1);
137+
expect(ensureSchemaId).toHaveBeenCalledTimes(2);
138+
});
139+
140+
it('should handle concurrent requests for the same schema with a single compilation', async () => {
141+
const mockUri = 'https://schema.com/concurrent.json';
142+
const mockSchemaDef = { type: 'string' };
143+
(loadSchema as Mock).mockResolvedValue(mockSchemaDef);
144+
(mockAjv.getSchema as Mock).mockReturnValue(undefined);
145+
mockValidate.mockReturnValue(true);
146+
147+
// Call validateSchema multiple times without awaiting
148+
const [result1, result2] = await Promise.all([
149+
validator.validateSchema(mockUri, 'test'),
150+
validator.validateSchema(mockUri, 'test')
151+
]);
152+
expect(result1.valid).toBe(true);
153+
expect(result2.valid).toBe(true);
154+
155+
// Check that loadSchema and compileAsync were only called once
156+
expect(loadSchema).toHaveBeenCalledTimes(1);
157+
expect(mockAjv.compileAsync).toHaveBeenCalledTimes(1);
158+
});
119159

120-
// Ajv instance should only be created once for this object
121-
expect(createAjvInstance).toHaveBeenCalledTimes(1);
160+
it('should propagate errors from schema loading and cleanup in-flight map', async () => {
161+
const mockUri = 'https://schema.com/failing.json';
162+
const error = new Error('Failed to load schema');
163+
(loadSchema as Mock).mockRejectedValue(error);
164+
(mockAjv.getSchema as Mock).mockReturnValue(undefined);
165+
166+
await expect(validator.validateSchema(mockUri, 'test')).rejects.toThrow(error);
167+
expect(loadSchema).toHaveBeenCalledTimes(1);
168+
169+
// Try again, it should try to load again since the in-flight promise was removed
170+
await expect(validator.validateSchema(mockUri, 'test')).rejects.toThrow(error);
171+
expect(loadSchema).toHaveBeenCalledTimes(2);
122172
});
123173
});

0 commit comments

Comments
 (0)