-
-
Notifications
You must be signed in to change notification settings - Fork 252
Add normalized hash-based test IDs to draft2020-12/enum.json (POC for #698) #796
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
44fc7b2
d2980e4
8b990ad
7a2270b
6cc0f8f
7b86302
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,12 +1,21 @@ | ||
| { | ||
| "name": "json-schema-test-suite", | ||
| "version": "0.1.0", | ||
| "type": "module", | ||
| "description": "A language agnostic test suite for the JSON Schema specifications", | ||
| "repository": "github:json-schema-org/JSON-Schema-Test-Suite", | ||
| "keywords": [ | ||
| "json-schema", | ||
| "tests" | ||
| ], | ||
| "author": "http://json-schema.org", | ||
| "license": "MIT" | ||
| "license": "MIT", | ||
| "dependencies": { | ||
| "@hyperjump/browser": "^1.3.1", | ||
| "@hyperjump/json-pointer": "^1.1.1", | ||
| "@hyperjump/json-schema": "^1.17.2", | ||
| "@hyperjump/pact": "^1.4.0", | ||
| "@hyperjump/uri": "^1.3.2", | ||
| "json-stringify-deterministic": "^1.0.12" | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,74 @@ | ||||||
| import * as fs from "node:fs"; | ||||||
| import * as crypto from "node:crypto"; | ||||||
| import jsonStringify from "json-stringify-deterministic"; | ||||||
| import { normalize } from "./normalize.js"; | ||||||
| import { loadRemotes } from "./load-remotes.js"; | ||||||
|
|
||||||
| const DIALECT_MAP = { | ||||||
| "https://json-schema.org/draft/2020-12/schema": "https://json-schema.org/draft/2020-12/schema", | ||||||
| "https://json-schema.org/draft/2019-09/schema": "https://json-schema.org/draft/2019-09/schema", | ||||||
| "http://json-schema.org/draft-07/schema#": "http://json-schema.org/draft-07/schema#", | ||||||
| "http://json-schema.org/draft-06/schema#": "http://json-schema.org/draft-06/schema#", | ||||||
| "http://json-schema.org/draft-04/schema#": "http://json-schema.org/draft-04/schema#" | ||||||
| }; | ||||||
|
|
||||||
| function getDialectUri(schema) { | ||||||
| if (schema.$schema && DIALECT_MAP[schema.$schema]) { | ||||||
| return DIALECT_MAP[schema.$schema]; | ||||||
| } | ||||||
| return "https://json-schema.org/draft/2020-12/schema"; | ||||||
| } | ||||||
|
|
||||||
| function generateTestId(normalizedSchema, testData, testValid) { | ||||||
| return crypto | ||||||
| .createHash("md5") | ||||||
| .update(jsonStringify(normalizedSchema) + jsonStringify(testData) + testValid) | ||||||
| .digest("hex"); | ||||||
| } | ||||||
|
|
||||||
| async function addIdsToFile(filePath) { | ||||||
|
||||||
| console.log("Reading:", filePath); | ||||||
| const tests = JSON.parse(fs.readFileSync(filePath, "utf8")); | ||||||
| let changed = false; | ||||||
| let added = 0; | ||||||
|
|
||||||
| if (!Array.isArray(tests)) { | ||||||
| console.log("Expected an array at top level, got:", typeof tests); | ||||||
| return; | ||||||
| } | ||||||
|
||||||
|
|
||||||
| for (const testCase of tests) { | ||||||
| if (!Array.isArray(testCase.tests)) continue; | ||||||
|
||||||
|
|
||||||
| const dialectUri = getDialectUri(testCase.schema || {}); | ||||||
|
||||||
| const dialectUri = getDialectUri(testCase.schema || {}); | |
| const dialectUri = getDialectUri(testCase.schema); |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unnecessary defensive programming.
| const normalizedSchema = await normalize(testCase.schema || true, dialectUri); | |
| const normalizedSchema = await normalize(testCase.schema, dialectUri); |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you really need a changed flag? Can't you just check if added is greater than 0?
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're not going to be able to add remotes for all dialects at the same time. You're going to have to run this script separately for each dialect. Most remotes don't have $schema and are expected to run as the dialect of test case that's being tested. So, you can't load them at the same time because you can't have two schemas with the same id and different dialects. Your edits to loadRemotes are a result of this misunderstanding. I don't think you should need to make changes to that function.
I suggest making the first argument of the script the local draft identifier, then convert that to the right dialect URI to pass to addIdsToFile. Then the second argument can be the optional filePath, where all files are processed if the none is given.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The catch is unnecessary here. Just let it throw if there's an error.
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,143 @@ | ||||||
| import * as fs from "node:fs"; | ||||||
| import * as path from "node:path"; | ||||||
| import * as crypto from "node:crypto"; | ||||||
| import jsonStringify from "json-stringify-deterministic"; | ||||||
| import { normalize } from "./normalize.js"; | ||||||
| import { loadRemotes } from "./load-remotes.js"; | ||||||
|
|
||||||
| const DIALECT_MAP = { | ||||||
| "https://json-schema.org/draft/2020-12/schema": "https://json-schema.org/draft/2020-12/schema", | ||||||
| "https://json-schema.org/draft/2019-09/schema": "https://json-schema.org/draft/2019-09/schema", | ||||||
| "http://json-schema.org/draft-07/schema#": "http://json-schema.org/draft-07/schema#", | ||||||
| "http://json-schema.org/draft-06/schema#": "http://json-schema.org/draft-06/schema#", | ||||||
| "http://json-schema.org/draft-04/schema#": "http://json-schema.org/draft-04/schema#" | ||||||
| }; | ||||||
|
||||||
|
|
||||||
| function* jsonFiles(dir) { | ||||||
| for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { | ||||||
| const full = path.join(dir, entry.name); | ||||||
| if (entry.isDirectory()) { | ||||||
| yield* jsonFiles(full); | ||||||
| } else if (entry.isFile() && entry.name.endsWith(".json")) { | ||||||
| yield full; | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| function getDialectUri(schema) { | ||||||
| if (schema.$schema && DIALECT_MAP[schema.$schema]) { | ||||||
| return DIALECT_MAP[schema.$schema]; | ||||||
| } | ||||||
| return "https://json-schema.org/draft/2020-12/schema"; | ||||||
| } | ||||||
|
|
||||||
| function generateTestId(normalizedSchema, testData, testValid) { | ||||||
| return crypto | ||||||
| .createHash("md5") | ||||||
| .update(jsonStringify(normalizedSchema) + jsonStringify(testData) + testValid) | ||||||
| .digest("hex"); | ||||||
| } | ||||||
|
|
||||||
| async function checkVersion(dir) { | ||||||
| const missingIdFiles = new Set(); | ||||||
| const duplicateIdFiles = new Set(); | ||||||
| const mismatchedIdFiles = new Set(); | ||||||
| const idMap = new Map(); | ||||||
|
|
||||||
| console.log(`Checking tests in ${dir}...`); | ||||||
|
|
||||||
| for (const file of jsonFiles(dir)) { | ||||||
| const tests = JSON.parse(fs.readFileSync(file, "utf8")); | ||||||
|
|
||||||
| for (let i = 0; i < tests.length; i++) { | ||||||
| const testCase = tests[i]; | ||||||
| if (!Array.isArray(testCase.tests)) continue; | ||||||
|
|
||||||
| const dialectUri = getDialectUri(testCase.schema || {}); | ||||||
| const normalizedSchema = await normalize(testCase.schema || true, dialectUri); | ||||||
|
||||||
|
|
||||||
| for (let j = 0; j < testCase.tests.length; j++) { | ||||||
|
||||||
| for (let j = 0; j < testCase.tests.length; j++) { | |
| for (const test of testCase.tests) { |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm thinking that we probably don't need to check for duplicate IDs. Checking for duplicates would be necessary if people were assigning IDs, but these ids are generated based on the content of the test, so there should never be duplicates. If someone uses the ID from another test by copy-paste mistake, we should get a Mismatched ID error, so we don't need a duplicate check as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While making this, I briefly thought about whether two different tests could somehow end up with the same ID, but now I realize that it doesn’t really make sense for two distinct tests to share the same identifier.( still a little fearful that maybe or somehow it does end up being same , i know it will not happen but just cant really remove the doubt myself)
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You don't need to catch if you're not doing to something meaningful to recover from the error. You can just let if throw.
| checkVersion(dir).catch(console.error); | |
| await checkVersion(dir); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, agreed!
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| // scripts/load-remotes.js | ||
| import * as fs from "node:fs"; | ||
| import { toAbsoluteIri } from "@hyperjump/uri"; | ||
| import { registerSchema } from "@hyperjump/json-schema/draft-2020-12"; | ||
|
|
||
| // Keep track of which remote URLs we've already registered | ||
| const loadedRemotes = new Set(); | ||
|
|
||
| export const loadRemotes = (dialectId, filePath, url = "") => { | ||
| if (!fs.existsSync(filePath)) { | ||
| console.warn(`Warning: Remotes path not found: ${filePath}`); | ||
| return; | ||
| } | ||
|
|
||
| fs.readdirSync(filePath, { withFileTypes: true }).forEach((entry) => { | ||
| if (entry.isFile() && entry.name.endsWith(".json")) { | ||
| const remotePath = `${filePath}/${entry.name}`; | ||
| const remoteUrl = `http://localhost:1234${url}/${entry.name}`; | ||
|
|
||
| // If we've already registered this URL once, skip it | ||
| if (loadedRemotes.has(remoteUrl)) { | ||
| return; | ||
| } | ||
|
|
||
| const remote = JSON.parse(fs.readFileSync(remotePath, "utf8")); | ||
|
|
||
| // Only register if $schema matches dialect OR there's no $schema | ||
| if (!remote.$schema || toAbsoluteIri(remote.$schema) === dialectId) { | ||
| registerSchema(remote, remoteUrl, dialectId); | ||
| loadedRemotes.add(remoteUrl); // ✅ Remember we've registered it | ||
| } | ||
| } else if (entry.isDirectory()) { | ||
| loadRemotes(dialectId, `${filePath}/${entry.name}`, `${url}/${entry.name}`); | ||
| } | ||
| }); | ||
| }; |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are a couple of issues here.
First, you can't use 2020-12 as the default dialect. The default needs to be the dialect associated with the directory the test file is in. For example, the convention to use
$schemain every test case schema was only adopted starting in 2019-09. So, tests for draft-04/6/7 don't have$schema. That means this script will treat all of those tests as 2020-12 schemas, which would be very wrong.This whole function should be unnecessary anyway. This is already handled internally by
@hyperjump/json-schema. The result of this function is only used to pass to theregisterSchemafunction, which should be the dialect from the directory the test file is in, not from the schema.