From 475e0a0cacb58453816848eac658fc1316f20f43 Mon Sep 17 00:00:00 2001 From: Matthias Schoettle Date: Fri, 11 Apr 2025 09:48:14 +0000 Subject: [PATCH 1/2] Add force-filetype option to force a filetype for a file --- src/check_jsonschema/cli/main_command.py | 8 ++++ src/check_jsonschema/cli/parse_result.py | 1 + src/check_jsonschema/instance_loader.py | 4 +- src/check_jsonschema/parsers/__init__.py | 24 +++++++++--- src/check_jsonschema/schema_loader/readers.py | 9 ++++- .../schema_loader/resolver.py | 8 ++-- tests/unit/cli/test_annotations.py | 1 + tests/unit/test_instance_loader.py | 39 ++++++++++++++++++- 8 files changed, 82 insertions(+), 12 deletions(-) diff --git a/src/check_jsonschema/cli/main_command.py b/src/check_jsonschema/cli/main_command.py index 9e93ff1ff..cfd54d141 100644 --- a/src/check_jsonschema/cli/main_command.py +++ b/src/check_jsonschema/cli/main_command.py @@ -161,6 +161,11 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str: show_default=True, type=click.Choice(SUPPORTED_FILE_FORMATS, case_sensitive=True), ) +@click.option( + "--force-filetype", + help="Force a file typr to use for the file", + type=click.Choice(SUPPORTED_FILE_FORMATS, case_sensitive=True), +) @click.option( "--traceback-mode", help=( @@ -242,6 +247,7 @@ def main( format_regex: t.Literal["python", "nonunicode", "default"] | None, regex_variant: t.Literal["python", "nonunicode", "default"] | None, default_filetype: t.Literal["json", "yaml", "toml", "json5"], + force_filetype: t.Literal["json", "yaml", "toml", "json5"] | None, traceback_mode: t.Literal["full", "short"], data_transform: t.Literal["azure-pipelines", "gitlab-ci"] | None, fill_defaults: bool, @@ -271,6 +277,7 @@ def main( args.disable_cache = no_cache args.default_filetype = default_filetype + args.force_filetype = force_filetype args.fill_defaults = fill_defaults if data_transform is not None: args.data_transform = TRANSFORM_LIBRARY[data_transform] @@ -311,6 +318,7 @@ def build_instance_loader(args: ParseResult) -> InstanceLoader: return InstanceLoader( args.instancefiles, default_filetype=args.default_filetype, + force_filetype=args.force_filetype, data_transform=args.data_transform, ) diff --git a/src/check_jsonschema/cli/parse_result.py b/src/check_jsonschema/cli/parse_result.py index bfd9065b1..fd925118c 100644 --- a/src/check_jsonschema/cli/parse_result.py +++ b/src/check_jsonschema/cli/parse_result.py @@ -29,6 +29,7 @@ def __init__(self) -> None: self.cache_filename: str | None = None # filetype detection (JSON, YAML, TOML, etc) self.default_filetype: str = "json" + self.force_filetype: str | None = None # data-transform (for Azure Pipelines and potentially future transforms) self.data_transform: Transform | None = None # validation behavioral controls diff --git a/src/check_jsonschema/instance_loader.py b/src/check_jsonschema/instance_loader.py index 2d0651c4f..5d76bbfe7 100644 --- a/src/check_jsonschema/instance_loader.py +++ b/src/check_jsonschema/instance_loader.py @@ -14,10 +14,12 @@ def __init__( self, files: t.Sequence[t.IO[bytes] | CustomLazyFile], default_filetype: str = "json", + force_filetype: str | None = None, data_transform: Transform | None = None, ) -> None: self._files = files self._default_filetype = default_filetype + self._force_filetype = force_filetype self._data_transform = ( data_transform if data_transform is not None else Transform() ) @@ -46,7 +48,7 @@ def iter_files(self) -> t.Iterator[tuple[str, ParseError | t.Any]]: try: data: t.Any = self._parsers.parse_data_with_path( - stream, name, self._default_filetype + stream, name, self._default_filetype, self._force_filetype ) except ParseError as err: data = err diff --git a/src/check_jsonschema/parsers/__init__.py b/src/check_jsonschema/parsers/__init__.py index 6db5e95bf..d4c284ab5 100644 --- a/src/check_jsonschema/parsers/__init__.py +++ b/src/check_jsonschema/parsers/__init__.py @@ -65,11 +65,16 @@ def __init__( } def get( - self, path: pathlib.Path | str, default_filetype: str + self, + path: pathlib.Path | str, + default_filetype: str, + force_filetype: str | None, ) -> t.Callable[[t.IO[bytes]], t.Any]: filetype = path_to_type(path, default_type=default_filetype) if filetype in self._by_tag: + filetype = force_filetype or filetype + return self._by_tag[filetype] if filetype in MISSING_SUPPORT_MESSAGES: @@ -83,9 +88,13 @@ def get( ) def parse_data_with_path( - self, data: t.IO[bytes] | bytes, path: pathlib.Path | str, default_filetype: str + self, + data: t.IO[bytes] | bytes, + path: pathlib.Path | str, + default_filetype: str, + force_filetype: str | None, ) -> t.Any: - loadfunc = self.get(path, default_filetype) + loadfunc = self.get(path, default_filetype, force_filetype) try: if isinstance(data, bytes): data = io.BytesIO(data) @@ -93,6 +102,11 @@ def parse_data_with_path( except LOADING_FAILURE_ERROR_TYPES as e: raise FailedFileLoadError(f"Failed to parse {path}") from e - def parse_file(self, path: pathlib.Path | str, default_filetype: str) -> t.Any: + def parse_file( + self, + path: pathlib.Path | str, + default_filetype: str, + force_filetype: str | None, + ) -> t.Any: with open(path, "rb") as fp: - return self.parse_data_with_path(fp, path, default_filetype) + return self.parse_data_with_path(fp, path, default_filetype, force_filetype) diff --git a/src/check_jsonschema/schema_loader/readers.py b/src/check_jsonschema/schema_loader/readers.py index 61299350a..8722ea9d3 100644 --- a/src/check_jsonschema/schema_loader/readers.py +++ b/src/check_jsonschema/schema_loader/readers.py @@ -44,7 +44,9 @@ def get_retrieval_uri(self) -> str | None: return self.path.as_uri() def _read_impl(self) -> t.Any: - return self.parsers.parse_file(self.path, default_filetype="json") + return self.parsers.parse_file( + self.path, default_filetype="json", force_filetype=None + ) def read_schema(self) -> dict: if self._parsed_schema is _UNSET: @@ -84,7 +86,10 @@ def __init__( def _parse(self, schema_bytes: bytes) -> t.Any: return self.parsers.parse_data_with_path( - io.BytesIO(schema_bytes), self.url, default_filetype="json" + io.BytesIO(schema_bytes), + self.url, + default_filetype="json", + force_filetype=None, ) def get_retrieval_uri(self) -> str | None: diff --git a/src/check_jsonschema/schema_loader/resolver.py b/src/check_jsonschema/schema_loader/resolver.py index 15344d6bd..a7819ed25 100644 --- a/src/check_jsonschema/schema_loader/resolver.py +++ b/src/check_jsonschema/schema_loader/resolver.py @@ -54,7 +54,7 @@ def create_retrieve_callable( def get_local_file(uri: str) -> t.Any: path = filename2path(uri) - return parser_set.parse_file(path, "json") + return parser_set.parse_file(path, "json", None) def retrieve_reference(uri: str) -> referencing.Resource[Schema]: scheme = urllib.parse.urlsplit(uri).scheme @@ -70,7 +70,7 @@ def retrieve_reference(uri: str) -> referencing.Resource[Schema]: if full_uri_scheme in ("http", "https"): def validation_callback(content: bytes) -> None: - parser_set.parse_data_with_path(content, full_uri, "json") + parser_set.parse_data_with_path(content, full_uri, "json", None) bound_downloader = downloader.bind( full_uri, validation_callback=validation_callback @@ -78,7 +78,9 @@ def validation_callback(content: bytes) -> None: with bound_downloader.open() as fp: data = fp.read() - parsed_object = parser_set.parse_data_with_path(data, full_uri, "json") + parsed_object = parser_set.parse_data_with_path( + data, full_uri, "json", None + ) else: parsed_object = get_local_file(full_uri) diff --git a/tests/unit/cli/test_annotations.py b/tests/unit/cli/test_annotations.py index 1ab41aa51..24cab0152 100644 --- a/tests/unit/cli/test_annotations.py +++ b/tests/unit/cli/test_annotations.py @@ -18,5 +18,6 @@ def test_annotations_match_click_params(): # force default_filetype to be a Literal including `json5`, which is only # included in the choices if a parser is installed "default_filetype": t.Literal["json", "yaml", "toml", "json5"], + "force_filetype": t.Literal["json", "yaml", "toml", "json5"] | None, }, ) diff --git a/tests/unit/test_instance_loader.py b/tests/unit/test_instance_loader.py index fa9c3e91c..ae09fb46e 100644 --- a/tests/unit/test_instance_loader.py +++ b/tests/unit/test_instance_loader.py @@ -79,13 +79,50 @@ def test_instanceloader_yaml_data(tmp_path, filename, default_filetype, open_wid ], ) def test_instanceloader_toml_data(tmp_path, filename, default_filetype, open_wide): - f = tmp_path / "foo.toml" + f = tmp_path / filename f.write_text('[foo]\nbar = "baz"\n') loader = InstanceLoader(open_wide(f), default_filetype=default_filetype) data = list(loader.iter_files()) assert data == [(str(f), {"foo": {"bar": "baz"}})] +@pytest.mark.parametrize( + "filename, force_filetype", + [ + ("foo.test", "toml"), + ("foo", "toml"), + ], +) +def test_instanceloader_force_filetype_toml( + tmp_path, filename, force_filetype, open_wide +): + f = tmp_path / filename + f.write_text('[foo]\nbar = "baz"\n') + loader = InstanceLoader(open_wide(f), force_filetype=force_filetype) + data = list(loader.iter_files()) + assert data == [(str(f), {"foo": {"bar": "baz"}})] + + +@pytest.mark.parametrize( + "filename, force_filetype", + [ + ("foo.test", "json5"), + ("foo.json", "json5"), + ], +) +def test_instanceloader_force_filetype_json( + tmp_path, filename, force_filetype, open_wide +): + if not JSON5_ENABLED: + pytest.skip("test requires json5") + f = tmp_path / filename + f.write_text("// a comment\n{}") + loader = InstanceLoader(open_wide(f), force_filetype=force_filetype) + data = list(loader.iter_files()) + print(data) + assert data == [(str(f), {})] + + def test_instanceloader_unknown_type_nonjson_content(tmp_path, open_wide): f = tmp_path / "foo" # no extension here f.write_text("a:b") # non-json data (cannot be detected as JSON) From b0c97c9fb3f6e63ef64fd7f093aa159afac4df5b Mon Sep 17 00:00:00 2001 From: Matthias Schoettle Date: Fri, 11 Apr 2025 14:55:32 +0000 Subject: [PATCH 2/2] Address feedback --- src/check_jsonschema/cli/main_command.py | 2 +- src/check_jsonschema/parsers/__init__.py | 13 +++++++------ src/check_jsonschema/schema_loader/readers.py | 9 ++------- src/check_jsonschema/schema_loader/resolver.py | 8 +++----- tests/unit/test_instance_loader.py | 3 +-- 5 files changed, 14 insertions(+), 21 deletions(-) diff --git a/src/check_jsonschema/cli/main_command.py b/src/check_jsonschema/cli/main_command.py index cfd54d141..62d79bb35 100644 --- a/src/check_jsonschema/cli/main_command.py +++ b/src/check_jsonschema/cli/main_command.py @@ -163,7 +163,7 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str: ) @click.option( "--force-filetype", - help="Force a file typr to use for the file", + help="Force a file type to use when parsing instance files", type=click.Choice(SUPPORTED_FILE_FORMATS, case_sensitive=True), ) @click.option( diff --git a/src/check_jsonschema/parsers/__init__.py b/src/check_jsonschema/parsers/__init__.py index d4c284ab5..5938ce5d7 100644 --- a/src/check_jsonschema/parsers/__init__.py +++ b/src/check_jsonschema/parsers/__init__.py @@ -68,13 +68,14 @@ def get( self, path: pathlib.Path | str, default_filetype: str, - force_filetype: str | None, + force_filetype: str | None = None, ) -> t.Callable[[t.IO[bytes]], t.Any]: - filetype = path_to_type(path, default_type=default_filetype) + if force_filetype: + filetype = force_filetype + else: + filetype = path_to_type(path, default_type=default_filetype) if filetype in self._by_tag: - filetype = force_filetype or filetype - return self._by_tag[filetype] if filetype in MISSING_SUPPORT_MESSAGES: @@ -92,7 +93,7 @@ def parse_data_with_path( data: t.IO[bytes] | bytes, path: pathlib.Path | str, default_filetype: str, - force_filetype: str | None, + force_filetype: str | None = None, ) -> t.Any: loadfunc = self.get(path, default_filetype, force_filetype) try: @@ -106,7 +107,7 @@ def parse_file( self, path: pathlib.Path | str, default_filetype: str, - force_filetype: str | None, + force_filetype: str | None = None, ) -> t.Any: with open(path, "rb") as fp: return self.parse_data_with_path(fp, path, default_filetype, force_filetype) diff --git a/src/check_jsonschema/schema_loader/readers.py b/src/check_jsonschema/schema_loader/readers.py index 8722ea9d3..61299350a 100644 --- a/src/check_jsonschema/schema_loader/readers.py +++ b/src/check_jsonschema/schema_loader/readers.py @@ -44,9 +44,7 @@ def get_retrieval_uri(self) -> str | None: return self.path.as_uri() def _read_impl(self) -> t.Any: - return self.parsers.parse_file( - self.path, default_filetype="json", force_filetype=None - ) + return self.parsers.parse_file(self.path, default_filetype="json") def read_schema(self) -> dict: if self._parsed_schema is _UNSET: @@ -86,10 +84,7 @@ def __init__( def _parse(self, schema_bytes: bytes) -> t.Any: return self.parsers.parse_data_with_path( - io.BytesIO(schema_bytes), - self.url, - default_filetype="json", - force_filetype=None, + io.BytesIO(schema_bytes), self.url, default_filetype="json" ) def get_retrieval_uri(self) -> str | None: diff --git a/src/check_jsonschema/schema_loader/resolver.py b/src/check_jsonschema/schema_loader/resolver.py index a7819ed25..15344d6bd 100644 --- a/src/check_jsonschema/schema_loader/resolver.py +++ b/src/check_jsonschema/schema_loader/resolver.py @@ -54,7 +54,7 @@ def create_retrieve_callable( def get_local_file(uri: str) -> t.Any: path = filename2path(uri) - return parser_set.parse_file(path, "json", None) + return parser_set.parse_file(path, "json") def retrieve_reference(uri: str) -> referencing.Resource[Schema]: scheme = urllib.parse.urlsplit(uri).scheme @@ -70,7 +70,7 @@ def retrieve_reference(uri: str) -> referencing.Resource[Schema]: if full_uri_scheme in ("http", "https"): def validation_callback(content: bytes) -> None: - parser_set.parse_data_with_path(content, full_uri, "json", None) + parser_set.parse_data_with_path(content, full_uri, "json") bound_downloader = downloader.bind( full_uri, validation_callback=validation_callback @@ -78,9 +78,7 @@ def validation_callback(content: bytes) -> None: with bound_downloader.open() as fp: data = fp.read() - parsed_object = parser_set.parse_data_with_path( - data, full_uri, "json", None - ) + parsed_object = parser_set.parse_data_with_path(data, full_uri, "json") else: parsed_object = get_local_file(full_uri) diff --git a/tests/unit/test_instance_loader.py b/tests/unit/test_instance_loader.py index ae09fb46e..b7dc25667 100644 --- a/tests/unit/test_instance_loader.py +++ b/tests/unit/test_instance_loader.py @@ -103,6 +103,7 @@ def test_instanceloader_force_filetype_toml( assert data == [(str(f), {"foo": {"bar": "baz"}})] +@pytest.mark.skipif(not JSON5_ENABLED, reason="test requires json5") @pytest.mark.parametrize( "filename, force_filetype", [ @@ -113,8 +114,6 @@ def test_instanceloader_force_filetype_toml( def test_instanceloader_force_filetype_json( tmp_path, filename, force_filetype, open_wide ): - if not JSON5_ENABLED: - pytest.skip("test requires json5") f = tmp_path / filename f.write_text("// a comment\n{}") loader = InstanceLoader(open_wide(f), force_filetype=force_filetype)