Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/check_jsonschema/cli/main_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str:
show_default=True,
type=click.Choice(SUPPORTED_FILE_FORMATS, case_sensitive=True),
)
@click.option(
"--force-filetype",
help="Force a file typr to use for the file",
type=click.Choice(SUPPORTED_FILE_FORMATS, case_sensitive=True),
)
@click.option(
"--traceback-mode",
help=(
Expand Down Expand Up @@ -242,6 +247,7 @@ def main(
format_regex: t.Literal["python", "nonunicode", "default"] | None,
regex_variant: t.Literal["python", "nonunicode", "default"] | None,
default_filetype: t.Literal["json", "yaml", "toml", "json5"],
force_filetype: t.Literal["json", "yaml", "toml", "json5"] | None,
traceback_mode: t.Literal["full", "short"],
data_transform: t.Literal["azure-pipelines", "gitlab-ci"] | None,
fill_defaults: bool,
Expand Down Expand Up @@ -271,6 +277,7 @@ def main(

args.disable_cache = no_cache
args.default_filetype = default_filetype
args.force_filetype = force_filetype
args.fill_defaults = fill_defaults
if data_transform is not None:
args.data_transform = TRANSFORM_LIBRARY[data_transform]
Expand Down Expand Up @@ -311,6 +318,7 @@ def build_instance_loader(args: ParseResult) -> InstanceLoader:
return InstanceLoader(
args.instancefiles,
default_filetype=args.default_filetype,
force_filetype=args.force_filetype,
data_transform=args.data_transform,
)

Expand Down
1 change: 1 addition & 0 deletions src/check_jsonschema/cli/parse_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def __init__(self) -> None:
self.cache_filename: str | None = None
# filetype detection (JSON, YAML, TOML, etc)
self.default_filetype: str = "json"
self.force_filetype: str | None = None
# data-transform (for Azure Pipelines and potentially future transforms)
self.data_transform: Transform | None = None
# validation behavioral controls
Expand Down
4 changes: 3 additions & 1 deletion src/check_jsonschema/instance_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ def __init__(
self,
files: t.Sequence[t.IO[bytes] | CustomLazyFile],
default_filetype: str = "json",
force_filetype: str | None = None,
data_transform: Transform | None = None,
) -> None:
self._files = files
self._default_filetype = default_filetype
self._force_filetype = force_filetype
self._data_transform = (
data_transform if data_transform is not None else Transform()
)
Expand Down Expand Up @@ -46,7 +48,7 @@ def iter_files(self) -> t.Iterator[tuple[str, ParseError | t.Any]]:

try:
data: t.Any = self._parsers.parse_data_with_path(
stream, name, self._default_filetype
stream, name, self._default_filetype, self._force_filetype
)
except ParseError as err:
data = err
Expand Down
24 changes: 19 additions & 5 deletions src/check_jsonschema/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,16 @@ def __init__(
}

def get(
self, path: pathlib.Path | str, default_filetype: str
self,
path: pathlib.Path | str,
default_filetype: str,
force_filetype: str | None,
) -> t.Callable[[t.IO[bytes]], t.Any]:
filetype = path_to_type(path, default_type=default_filetype)

if filetype in self._by_tag:
filetype = force_filetype or filetype
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it might make more sense to use force_filetype as an alternative to calling path_to_type().
e.g.,

if force_filetype is None:
    filetype = path_to_type(path, default_filetype=default_filetype)
else:
    filetype = force_filetype

I'm pretty sure it doesn't matter in practice -- outside of unit tests, the missing support block is hard to reach -- but IMO it would read cleaner because we take care of the assignment before we start making any control-flow decisions based on the value.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point! This actually caught a potential issue when force_filetype is not in self._by_tag.


return self._by_tag[filetype]

if filetype in MISSING_SUPPORT_MESSAGES:
Expand All @@ -83,16 +88,25 @@ def get(
)

def parse_data_with_path(
self, data: t.IO[bytes] | bytes, path: pathlib.Path | str, default_filetype: str
self,
data: t.IO[bytes] | bytes,
path: pathlib.Path | str,
default_filetype: str,
force_filetype: str | None,
) -> t.Any:
loadfunc = self.get(path, default_filetype)
loadfunc = self.get(path, default_filetype, force_filetype)
try:
if isinstance(data, bytes):
data = io.BytesIO(data)
return loadfunc(data)
except LOADING_FAILURE_ERROR_TYPES as e:
raise FailedFileLoadError(f"Failed to parse {path}") from e

def parse_file(self, path: pathlib.Path | str, default_filetype: str) -> t.Any:
def parse_file(
self,
path: pathlib.Path | str,
default_filetype: str,
force_filetype: str | None,
) -> t.Any:
with open(path, "rb") as fp:
return self.parse_data_with_path(fp, path, default_filetype)
return self.parse_data_with_path(fp, path, default_filetype, force_filetype)
9 changes: 7 additions & 2 deletions src/check_jsonschema/schema_loader/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ def get_retrieval_uri(self) -> str | None:
return self.path.as_uri()

def _read_impl(self) -> t.Any:
return self.parsers.parse_file(self.path, default_filetype="json")
return self.parsers.parse_file(
self.path, default_filetype="json", force_filetype=None
)

def read_schema(self) -> dict:
if self._parsed_schema is _UNSET:
Expand Down Expand Up @@ -84,7 +86,10 @@ def __init__(

def _parse(self, schema_bytes: bytes) -> t.Any:
return self.parsers.parse_data_with_path(
io.BytesIO(schema_bytes), self.url, default_filetype="json"
io.BytesIO(schema_bytes),
self.url,
default_filetype="json",
force_filetype=None,
)

def get_retrieval_uri(self) -> str | None:
Expand Down
8 changes: 5 additions & 3 deletions src/check_jsonschema/schema_loader/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def create_retrieve_callable(

def get_local_file(uri: str) -> t.Any:
path = filename2path(uri)
return parser_set.parse_file(path, "json")
return parser_set.parse_file(path, "json", None)

def retrieve_reference(uri: str) -> referencing.Resource[Schema]:
scheme = urllib.parse.urlsplit(uri).scheme
Expand All @@ -70,15 +70,17 @@ def retrieve_reference(uri: str) -> referencing.Resource[Schema]:
if full_uri_scheme in ("http", "https"):

def validation_callback(content: bytes) -> None:
parser_set.parse_data_with_path(content, full_uri, "json")
parser_set.parse_data_with_path(content, full_uri, "json", None)

bound_downloader = downloader.bind(
full_uri, validation_callback=validation_callback
)
with bound_downloader.open() as fp:
data = fp.read()

parsed_object = parser_set.parse_data_with_path(data, full_uri, "json")
parsed_object = parser_set.parse_data_with_path(
data, full_uri, "json", None
)
else:
parsed_object = get_local_file(full_uri)

Expand Down
1 change: 1 addition & 0 deletions tests/unit/cli/test_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ def test_annotations_match_click_params():
# force default_filetype to be a Literal including `json5`, which is only
# included in the choices if a parser is installed
"default_filetype": t.Literal["json", "yaml", "toml", "json5"],
"force_filetype": t.Literal["json", "yaml", "toml", "json5"] | None,
},
)
39 changes: 38 additions & 1 deletion tests/unit/test_instance_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,50 @@ def test_instanceloader_yaml_data(tmp_path, filename, default_filetype, open_wid
],
)
def test_instanceloader_toml_data(tmp_path, filename, default_filetype, open_wide):
f = tmp_path / "foo.toml"
f = tmp_path / filename
f.write_text('[foo]\nbar = "baz"\n')
loader = InstanceLoader(open_wide(f), default_filetype=default_filetype)
data = list(loader.iter_files())
assert data == [(str(f), {"foo": {"bar": "baz"}})]


@pytest.mark.parametrize(
"filename, force_filetype",
[
("foo.test", "toml"),
("foo", "toml"),
],
)
def test_instanceloader_force_filetype_toml(
tmp_path, filename, force_filetype, open_wide
):
f = tmp_path / filename
f.write_text('[foo]\nbar = "baz"\n')
loader = InstanceLoader(open_wide(f), force_filetype=force_filetype)
data = list(loader.iter_files())
assert data == [(str(f), {"foo": {"bar": "baz"}})]


@pytest.mark.parametrize(
"filename, force_filetype",
[
("foo.test", "json5"),
("foo.json", "json5"),
],
)
def test_instanceloader_force_filetype_json(
tmp_path, filename, force_filetype, open_wide
):
if not JSON5_ENABLED:
pytest.skip("test requires json5")
f = tmp_path / filename
f.write_text("// a comment\n{}")
loader = InstanceLoader(open_wide(f), force_filetype=force_filetype)
data = list(loader.iter_files())
print(data)
assert data == [(str(f), {})]


def test_instanceloader_unknown_type_nonjson_content(tmp_path, open_wide):
f = tmp_path / "foo" # no extension here
f.write_text("a:b") # non-json data (cannot be detected as JSON)
Expand Down