diff --git a/src/check_jsonschema/cli/main_command.py b/src/check_jsonschema/cli/main_command.py index 9e93ff1ff..62d79bb35 100644 --- a/src/check_jsonschema/cli/main_command.py +++ b/src/check_jsonschema/cli/main_command.py @@ -161,6 +161,11 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str: show_default=True, type=click.Choice(SUPPORTED_FILE_FORMATS, case_sensitive=True), ) +@click.option( + "--force-filetype", + help="Force a file type to use when parsing instance files", + type=click.Choice(SUPPORTED_FILE_FORMATS, case_sensitive=True), +) @click.option( "--traceback-mode", help=( @@ -242,6 +247,7 @@ def main( format_regex: t.Literal["python", "nonunicode", "default"] | None, regex_variant: t.Literal["python", "nonunicode", "default"] | None, default_filetype: t.Literal["json", "yaml", "toml", "json5"], + force_filetype: t.Literal["json", "yaml", "toml", "json5"] | None, traceback_mode: t.Literal["full", "short"], data_transform: t.Literal["azure-pipelines", "gitlab-ci"] | None, fill_defaults: bool, @@ -271,6 +277,7 @@ def main( args.disable_cache = no_cache args.default_filetype = default_filetype + args.force_filetype = force_filetype args.fill_defaults = fill_defaults if data_transform is not None: args.data_transform = TRANSFORM_LIBRARY[data_transform] @@ -311,6 +318,7 @@ def build_instance_loader(args: ParseResult) -> InstanceLoader: return InstanceLoader( args.instancefiles, default_filetype=args.default_filetype, + force_filetype=args.force_filetype, data_transform=args.data_transform, ) diff --git a/src/check_jsonschema/cli/parse_result.py b/src/check_jsonschema/cli/parse_result.py index bfd9065b1..fd925118c 100644 --- a/src/check_jsonschema/cli/parse_result.py +++ b/src/check_jsonschema/cli/parse_result.py @@ -29,6 +29,7 @@ def __init__(self) -> None: self.cache_filename: str | None = None # filetype detection (JSON, YAML, TOML, etc) self.default_filetype: str = "json" + self.force_filetype: str | None = None # data-transform (for Azure Pipelines and potentially future transforms) self.data_transform: Transform | None = None # validation behavioral controls diff --git a/src/check_jsonschema/instance_loader.py b/src/check_jsonschema/instance_loader.py index 2d0651c4f..5d76bbfe7 100644 --- a/src/check_jsonschema/instance_loader.py +++ b/src/check_jsonschema/instance_loader.py @@ -14,10 +14,12 @@ def __init__( self, files: t.Sequence[t.IO[bytes] | CustomLazyFile], default_filetype: str = "json", + force_filetype: str | None = None, data_transform: Transform | None = None, ) -> None: self._files = files self._default_filetype = default_filetype + self._force_filetype = force_filetype self._data_transform = ( data_transform if data_transform is not None else Transform() ) @@ -46,7 +48,7 @@ def iter_files(self) -> t.Iterator[tuple[str, ParseError | t.Any]]: try: data: t.Any = self._parsers.parse_data_with_path( - stream, name, self._default_filetype + stream, name, self._default_filetype, self._force_filetype ) except ParseError as err: data = err diff --git a/src/check_jsonschema/parsers/__init__.py b/src/check_jsonschema/parsers/__init__.py index 6db5e95bf..5938ce5d7 100644 --- a/src/check_jsonschema/parsers/__init__.py +++ b/src/check_jsonschema/parsers/__init__.py @@ -65,9 +65,15 @@ def __init__( } def get( - self, path: pathlib.Path | str, default_filetype: str + self, + path: pathlib.Path | str, + default_filetype: str, + force_filetype: str | None = None, ) -> t.Callable[[t.IO[bytes]], t.Any]: - filetype = path_to_type(path, default_type=default_filetype) + if force_filetype: + filetype = force_filetype + else: + filetype = path_to_type(path, default_type=default_filetype) if filetype in self._by_tag: return self._by_tag[filetype] @@ -83,9 +89,13 @@ def get( ) def parse_data_with_path( - self, data: t.IO[bytes] | bytes, path: pathlib.Path | str, default_filetype: str + self, + data: t.IO[bytes] | bytes, + path: pathlib.Path | str, + default_filetype: str, + force_filetype: str | None = None, ) -> t.Any: - loadfunc = self.get(path, default_filetype) + loadfunc = self.get(path, default_filetype, force_filetype) try: if isinstance(data, bytes): data = io.BytesIO(data) @@ -93,6 +103,11 @@ def parse_data_with_path( except LOADING_FAILURE_ERROR_TYPES as e: raise FailedFileLoadError(f"Failed to parse {path}") from e - def parse_file(self, path: pathlib.Path | str, default_filetype: str) -> t.Any: + def parse_file( + self, + path: pathlib.Path | str, + default_filetype: str, + force_filetype: str | None = None, + ) -> t.Any: with open(path, "rb") as fp: - return self.parse_data_with_path(fp, path, default_filetype) + return self.parse_data_with_path(fp, path, default_filetype, force_filetype) diff --git a/tests/unit/cli/test_annotations.py b/tests/unit/cli/test_annotations.py index 1ab41aa51..24cab0152 100644 --- a/tests/unit/cli/test_annotations.py +++ b/tests/unit/cli/test_annotations.py @@ -18,5 +18,6 @@ def test_annotations_match_click_params(): # force default_filetype to be a Literal including `json5`, which is only # included in the choices if a parser is installed "default_filetype": t.Literal["json", "yaml", "toml", "json5"], + "force_filetype": t.Literal["json", "yaml", "toml", "json5"] | None, }, ) diff --git a/tests/unit/test_instance_loader.py b/tests/unit/test_instance_loader.py index fa9c3e91c..b7dc25667 100644 --- a/tests/unit/test_instance_loader.py +++ b/tests/unit/test_instance_loader.py @@ -79,13 +79,49 @@ def test_instanceloader_yaml_data(tmp_path, filename, default_filetype, open_wid ], ) def test_instanceloader_toml_data(tmp_path, filename, default_filetype, open_wide): - f = tmp_path / "foo.toml" + f = tmp_path / filename f.write_text('[foo]\nbar = "baz"\n') loader = InstanceLoader(open_wide(f), default_filetype=default_filetype) data = list(loader.iter_files()) assert data == [(str(f), {"foo": {"bar": "baz"}})] +@pytest.mark.parametrize( + "filename, force_filetype", + [ + ("foo.test", "toml"), + ("foo", "toml"), + ], +) +def test_instanceloader_force_filetype_toml( + tmp_path, filename, force_filetype, open_wide +): + f = tmp_path / filename + f.write_text('[foo]\nbar = "baz"\n') + loader = InstanceLoader(open_wide(f), force_filetype=force_filetype) + data = list(loader.iter_files()) + assert data == [(str(f), {"foo": {"bar": "baz"}})] + + +@pytest.mark.skipif(not JSON5_ENABLED, reason="test requires json5") +@pytest.mark.parametrize( + "filename, force_filetype", + [ + ("foo.test", "json5"), + ("foo.json", "json5"), + ], +) +def test_instanceloader_force_filetype_json( + tmp_path, filename, force_filetype, open_wide +): + f = tmp_path / filename + f.write_text("// a comment\n{}") + loader = InstanceLoader(open_wide(f), force_filetype=force_filetype) + data = list(loader.iter_files()) + print(data) + assert data == [(str(f), {})] + + def test_instanceloader_unknown_type_nonjson_content(tmp_path, open_wide): f = tmp_path / "foo" # no extension here f.write_text("a:b") # non-json data (cannot be detected as JSON)