Skip to content
This repository was archived by the owner on Sep 9, 2025. It is now read-only.

Commit b903539

Browse files
committed
Add taxonomy qna.yaml parsing API
The parse method will yamllint and jsonschema validate a qna.yaml file. It will return an object holding the parsed yaml. Signed-off-by: BJ Hargrave <hargrave@us.ibm.com>
1 parent 238ec56 commit b903539

File tree

22 files changed

+1195
-105
lines changed

22 files changed

+1195
-105
lines changed

.github/dependabot.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,10 @@ updates:
1313
directory: "/.github/workflows"
1414
schedule:
1515
interval: "daily"
16+
17+
# Maintain dependencies for Python code
18+
- package-ecosystem: "pip"
19+
directory: "/"
20+
versioning-strategy: "increase-if-necessary"
21+
schedule:
22+
interval: "daily"

.github/workflows/lint.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ jobs:
4848
tox -e jsonschema
4949
- name: "ruff"
5050
commands: |
51-
tox -e ruff -- check
51+
tox -e ruffcheck
5252
- name: "pylint"
5353
commands: |
5454
echo "::add-matcher::.github/workflows/matchers/pylint.json"

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Taxonomy Schema
22

3-
This Python package defines the JSON schema for the InstructLab [Taxonomy](https://github.com/instructlab/taxonomy) YAML.
3+
This Python package defines the JSON schema and a parser for the InstructLab [Taxonomy](https://github.com/instructlab/taxonomy) YAML.
44

5-
Consumers of this schema can `pip install instructlab-schema`, and access the schema files using `importlib.resources` on the `instructlab.schema` package.
5+
Consumers of this schema can `pip install instructlab-schema`, and use the `instructlab.schema.taxonomy.TaxonomyParser` class to parse and validate `qna.yaml` taxonomy files.
6+
Schema files can be directly accessed using the `instructlab.schema.schema_base()` method to get access the base of the schema resources.

pyproject.toml

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
[build-system]
4-
requires = ["setuptools>=64", "setuptools_scm>=8"]
4+
requires = ["setuptools>=70.1.0", "setuptools_scm>=8"]
55
build-backend = "setuptools.build_meta"
66

77
[project]
@@ -21,7 +21,15 @@ classifiers = [
2121
"Programming Language :: Python :: 3.11",
2222
"Programming Language :: Python :: 3.12",
2323
]
24-
dynamic = ["dependencies", "optional-dependencies", "version"]
24+
dependencies = [
25+
"typing_extensions",
26+
"jsonschema>=4.22.0",
27+
"PyYAML>=6.0.0",
28+
# The below library should NOT be imported into any python files
29+
# We only use the command via subprocess
30+
"yamllint>=1.35.1",
31+
]
32+
dynamic = ["version"]
2533

2634
[project.urls]
2735
homepage = "https://instructlab.ai"
@@ -40,6 +48,7 @@ exclude = ["^src/instructlab/schema/_version\\.py$"]
4048
target-version = "py310"
4149
src = ["src", "tests"]
4250
extend-exclude = ["src/instructlab/schema/_version.py"]
51+
line-length = 180
4352

4453
[tool.ruff.lint]
4554
select = [
@@ -53,11 +62,23 @@ select = [
5362
"TID", # flake8-tidy-imports
5463
]
5564

65+
[tool.ruff.lint.flake8-tidy-imports.banned-api]
66+
"yamllint".msg = "yamllint is for use as a command via subprocess."
67+
5668
[tool.pylint.main]
5769
py-version = "3.10"
5870
source-roots = ["src", "tests"]
5971
ignore = ["_version.py"]
6072

73+
[tool.pylint.design]
74+
max-branches = 20
75+
max-line-length = 180
76+
max-locals = 20
77+
min-public-methods = 1
78+
79+
[tool.pylint.format]
80+
max-args = 8
81+
6182
[tool.pylint."messages control"]
6283
disable = [
6384
"missing-class-docstring",

scripts/ruff.sh

Lines changed: 0 additions & 50 deletions
This file was deleted.

src/instructlab/schema/__init__.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,20 @@
33
"""InstructLab Taxonomy Schema"""
44

55
# Standard
6-
from importlib import resources
6+
import importlib.resources
7+
from importlib.abc import Traversable
78

8-
try:
9-
from importlib.resources.abc import Traversable # type: ignore[import-not-found]
10-
except ImportError: # python<3.11
11-
from importlib.abc import Traversable
9+
__all__ = ["schema_base", "schema_versions"]
1210

13-
__all__ = ["schema_versions"]
11+
12+
def schema_base() -> Traversable:
13+
"""Return the schema base.
14+
15+
Returns:
16+
Traversable: The base for the schema versions.
17+
"""
18+
base = importlib.resources.files(__name__)
19+
return base
1420

1521

1622
def schema_versions() -> list[Traversable]:
@@ -19,9 +25,8 @@ def schema_versions() -> list[Traversable]:
1925
Returns:
2026
list[Traversable]: A sorted list of schema versions.
2127
"""
22-
schema_base = resources.files(__package__)
2328
versions = sorted(
24-
(v for v in schema_base.iterdir() if v.name[0] == "v" and v.name[1:].isdigit()),
29+
(v for v in schema_base().iterdir() if v.name[0] == "v" and v.name[1:].isdigit()),
2530
key=lambda k: int(k.name[1:]),
2631
)
2732
return versions

0 commit comments

Comments
 (0)