python-jsonschema · sirosen · Apr 11, 2025 · Apr 11, 2025 · Apr 11, 2025 · sirosen
diff --git a/src/check_jsonschema/cli/main_command.py b/src/check_jsonschema/cli/main_command.py
@@ -161,6 +161,11 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str:
     show_default=True,
     type=click.Choice(SUPPORTED_FILE_FORMATS, case_sensitive=True),
 )
+@click.option(
+    "--force-filetype",
+    help="Force a file typr to use for the file",
+    type=click.Choice(SUPPORTED_FILE_FORMATS, case_sensitive=True),
+)
 @click.option(
     "--traceback-mode",
     help=(
@@ -242,6 +247,7 @@ def main(
     format_regex: t.Literal["python", "nonunicode", "default"] | None,
     regex_variant: t.Literal["python", "nonunicode", "default"] | None,
     default_filetype: t.Literal["json", "yaml", "toml", "json5"],
+    force_filetype: t.Literal["json", "yaml", "toml", "json5"] | None,
     traceback_mode: t.Literal["full", "short"],
     data_transform: t.Literal["azure-pipelines", "gitlab-ci"] | None,
     fill_defaults: bool,
@@ -271,6 +277,7 @@ def main(
 
     args.disable_cache = no_cache
     args.default_filetype = default_filetype
+    args.force_filetype = force_filetype
     args.fill_defaults = fill_defaults
     if data_transform is not None:
         args.data_transform = TRANSFORM_LIBRARY[data_transform]
@@ -311,6 +318,7 @@ def build_instance_loader(args: ParseResult) -> InstanceLoader:
     return InstanceLoader(
         args.instancefiles,
         default_filetype=args.default_filetype,
+        force_filetype=args.force_filetype,
         data_transform=args.data_transform,
     )
 

diff --git a/src/check_jsonschema/cli/parse_result.py b/src/check_jsonschema/cli/parse_result.py
@@ -29,6 +29,7 @@ def __init__(self) -> None:
         self.cache_filename: str | None = None
         # filetype detection (JSON, YAML, TOML, etc)
         self.default_filetype: str = "json"
+        self.force_filetype: str | None = None
         # data-transform (for Azure Pipelines and potentially future transforms)
         self.data_transform: Transform | None = None
         # validation behavioral controls

diff --git a/src/check_jsonschema/instance_loader.py b/src/check_jsonschema/instance_loader.py
@@ -14,10 +14,12 @@ def __init__(
         self,
         files: t.Sequence[t.IO[bytes] | CustomLazyFile],
         default_filetype: str = "json",
+        force_filetype: str | None = None,
         data_transform: Transform | None = None,
     ) -> None:
         self._files = files
         self._default_filetype = default_filetype
+        self._force_filetype = force_filetype
         self._data_transform = (
             data_transform if data_transform is not None else Transform()
         )
@@ -46,7 +48,7 @@ def iter_files(self) -> t.Iterator[tuple[str, ParseError | t.Any]]:
 
                 try:
                     data: t.Any = self._parsers.parse_data_with_path(
-                        stream, name, self._default_filetype
+                        stream, name, self._default_filetype, self._force_filetype
                     )
                 except ParseError as err:
                     data = err

diff --git a/src/check_jsonschema/parsers/__init__.py b/src/check_jsonschema/parsers/__init__.py
@@ -65,11 +65,16 @@ def __init__(
             }
 
     def get(
-        self, path: pathlib.Path | str, default_filetype: str
+        self,
+        path: pathlib.Path | str,
+        default_filetype: str,
+        force_filetype: str | None,
     ) -> t.Callable[[t.IO[bytes]], t.Any]:
         filetype = path_to_type(path, default_type=default_filetype)
 
         if filetype in self._by_tag:
+            filetype = force_filetype or filetype
+
             return self._by_tag[filetype]
 
         if filetype in MISSING_SUPPORT_MESSAGES:
@@ -83,16 +88,25 @@ def get(
         )
 
     def parse_data_with_path(
-        self, data: t.IO[bytes] | bytes, path: pathlib.Path | str, default_filetype: str
+        self,
+        data: t.IO[bytes] | bytes,
+        path: pathlib.Path | str,
+        default_filetype: str,
+        force_filetype: str | None,
     ) -> t.Any:
-        loadfunc = self.get(path, default_filetype)
+        loadfunc = self.get(path, default_filetype, force_filetype)
         try:
             if isinstance(data, bytes):
                 data = io.BytesIO(data)
             return loadfunc(data)
         except LOADING_FAILURE_ERROR_TYPES as e:
             raise FailedFileLoadError(f"Failed to parse {path}") from e
 
-    def parse_file(self, path: pathlib.Path | str, default_filetype: str) -> t.Any:
+    def parse_file(
+        self,
+        path: pathlib.Path | str,
+        default_filetype: str,
+        force_filetype: str | None,
+    ) -> t.Any:
         with open(path, "rb") as fp:
-            return self.parse_data_with_path(fp, path, default_filetype)
+            return self.parse_data_with_path(fp, path, default_filetype, force_filetype)
diff --git a/src/check_jsonschema/schema_loader/readers.py b/src/check_jsonschema/schema_loader/readers.py
@@ -44,7 +44,9 @@ def get_retrieval_uri(self) -> str | None:
         return self.path.as_uri()
 
     def _read_impl(self) -> t.Any:
-        return self.parsers.parse_file(self.path, default_filetype="json")
+        return self.parsers.parse_file(
+            self.path, default_filetype="json", force_filetype=None
+        )
 
     def read_schema(self) -> dict:
         if self._parsed_schema is _UNSET:
@@ -84,7 +86,10 @@ def __init__(
 
     def _parse(self, schema_bytes: bytes) -> t.Any:
         return self.parsers.parse_data_with_path(
-            io.BytesIO(schema_bytes), self.url, default_filetype="json"
+            io.BytesIO(schema_bytes),
+            self.url,
+            default_filetype="json",
+            force_filetype=None,
         )
 
     def get_retrieval_uri(self) -> str | None:

diff --git a/src/check_jsonschema/schema_loader/resolver.py b/src/check_jsonschema/schema_loader/resolver.py
@@ -54,7 +54,7 @@ def create_retrieve_callable(
 
     def get_local_file(uri: str) -> t.Any:
         path = filename2path(uri)
-        return parser_set.parse_file(path, "json")
+        return parser_set.parse_file(path, "json", None)
 
     def retrieve_reference(uri: str) -> referencing.Resource[Schema]:
         scheme = urllib.parse.urlsplit(uri).scheme
@@ -70,15 +70,17 @@ def retrieve_reference(uri: str) -> referencing.Resource[Schema]:
         if full_uri_scheme in ("http", "https"):
 
             def validation_callback(content: bytes) -> None:
-                parser_set.parse_data_with_path(content, full_uri, "json")
+                parser_set.parse_data_with_path(content, full_uri, "json", None)
 
             bound_downloader = downloader.bind(
                 full_uri, validation_callback=validation_callback
             )
             with bound_downloader.open() as fp:
                 data = fp.read()
 
-            parsed_object = parser_set.parse_data_with_path(data, full_uri, "json")
+            parsed_object = parser_set.parse_data_with_path(
+                data, full_uri, "json", None
+            )
         else:
             parsed_object = get_local_file(full_uri)
 

diff --git a/tests/unit/cli/test_annotations.py b/tests/unit/cli/test_annotations.py
@@ -18,5 +18,6 @@ def test_annotations_match_click_params():
             # force default_filetype to be a Literal including `json5`, which is only
             # included in the choices if a parser is installed
             "default_filetype": t.Literal["json", "yaml", "toml", "json5"],
+            "force_filetype": t.Literal["json", "yaml", "toml", "json5"] | None,
         },
     )
diff --git a/tests/unit/test_instance_loader.py b/tests/unit/test_instance_loader.py
@@ -79,13 +79,50 @@ def test_instanceloader_yaml_data(tmp_path, filename, default_filetype, open_wid
     ],
 )
 def test_instanceloader_toml_data(tmp_path, filename, default_filetype, open_wide):
-    f = tmp_path / "foo.toml"
+    f = tmp_path / filename
     f.write_text('[foo]\nbar = "baz"\n')
     loader = InstanceLoader(open_wide(f), default_filetype=default_filetype)
     data = list(loader.iter_files())
     assert data == [(str(f), {"foo": {"bar": "baz"}})]
 
 
+@pytest.mark.parametrize(
+    "filename, force_filetype",
+    [
+        ("foo.test", "toml"),
+        ("foo", "toml"),
+    ],
+)
+def test_instanceloader_force_filetype_toml(
+    tmp_path, filename, force_filetype, open_wide
+):
+    f = tmp_path / filename
+    f.write_text('[foo]\nbar = "baz"\n')
+    loader = InstanceLoader(open_wide(f), force_filetype=force_filetype)
+    data = list(loader.iter_files())
+    assert data == [(str(f), {"foo": {"bar": "baz"}})]
+
+
+@pytest.mark.parametrize(
+    "filename, force_filetype",
+    [
+        ("foo.test", "json5"),
+        ("foo.json", "json5"),
+    ],
+)
+def test_instanceloader_force_filetype_json(
+    tmp_path, filename, force_filetype, open_wide
+):
+    if not JSON5_ENABLED:
+        pytest.skip("test requires json5")
+    f = tmp_path / filename
+    f.write_text("// a comment\n{}")
+    loader = InstanceLoader(open_wide(f), force_filetype=force_filetype)
+    data = list(loader.iter_files())
+    print(data)
+    assert data == [(str(f), {})]
+
+
 def test_instanceloader_unknown_type_nonjson_content(tmp_path, open_wide):
     f = tmp_path / "foo"  # no extension here
     f.write_text("a:b")  # non-json data (cannot be detected as JSON)