Skip to content

Commit 04ee985

Browse files
authored
Merge branch 'main' into update_valid_emsl_data
2 parents 9773d13 + cc0d251 commit 04ee985

File tree

7 files changed

+229
-32
lines changed

7 files changed

+229
-32
lines changed

.github/workflows/run_tests.yaml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,22 +37,28 @@ jobs:
3737
make validate-examples
3838
continue-on-error: true
3939

40+
- name: Test Python code
41+
id: test_python_code
42+
run: |
43+
make test-python
44+
continue-on-error: true
45+
4046
- name: Test documentation generation
4147
id: test_docgen
4248
run: |
4349
make gendoc
4450
continue-on-error: true
4551

4652
- name: outcome failure
47-
if: steps.lint_linkml.outcome != 'success' || steps.validate_sample_data.outcome != 'success' || steps.test_docgen.outcome != 'success'
53+
if: steps.lint_linkml.outcome != 'success' || steps.validate_sample_data.outcome != 'success' || steps.test_python_code.outcome != 'success' || steps.test_docgen.outcome != 'success'
4854
run: |
4955
echo "linkml linting: ${{ steps.lint_linkml.outcome }}"
5056
echo "sample data validation: ${{ steps.validate_sample_data.outcome }}"
5157
echo "documentation generator: ${{ steps.test_docgen.outcome }}"
5258
exit 1
5359
5460
- name: outcome success
55-
if: steps.lint_linkml.outcome == 'success' && steps.validate_sample_data.outcome == 'success' && steps.test_docgen.outcome == 'success'
61+
if: steps.lint_linkml.outcome == 'success' && steps.validate_sample_data.outcome == 'success' && steps.test_python_code.outcome == 'success' && steps.test_docgen.outcome == 'success'
5662
run: |
5763
echo All tests passed successfully!
5864
exit 0

.gitignore

Lines changed: 93 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,93 @@
1-
# Ignore files that are generated whenever a developer `pip install`s this package
2-
# from a local clone of this repository (e.g. `pip install /path/to/bertron-schema`),
3-
# rather than via its Git URL or—once it gets published to PyPI—via PyPI.
4-
/src/*.egg-info/
5-
/build/
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[codz]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py.cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Sphinx documentation
55+
docs/_build/
56+
57+
# Jupyter Notebook
58+
.ipynb_checkpoints
59+
60+
# IPython
61+
profile_default/
62+
ipython_config.py
63+
64+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
65+
__pypackages__/
66+
67+
# Environments
68+
.env
69+
.envrc
70+
.venv
71+
env/
72+
venv/
73+
ENV/
74+
env.bak/
75+
venv.bak/
76+
77+
# mypy
78+
.mypy_cache/
79+
.dmypy.json
80+
dmypy.json
81+
82+
# Pyre type checker
83+
.pyre/
84+
85+
# pytype static type analyzer
86+
.pytype/
87+
88+
89+
# Ruff stuff:
90+
.ruff_cache/
91+
92+
# PyPI configuration file
93+
.pypirc

src/sample_data/__init__.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,24 @@
33
import json
44
from importlib import resources
55
from importlib.abc import Traversable
6+
from pathlib import Path
67
from typing import Any
78

89
import yaml
910

11+
# Define the path someone could use to `import` the Python package _containing_ the
12+
# `invalid/` and `valid/` directories (e.g. `import {something}`); which, currently,
13+
# happens to be the directory containing this `__init__.py` file.
14+
PACKAGE_IMPORT_PATH = "sample_data"
15+
16+
# Define a mapping from file extension to a function that can be used to parse the
17+
# content of a file having that extension.
18+
PARSERS_BY_FILE_EXTENSION = {
19+
"yaml": yaml.safe_load,
20+
"yml": yaml.safe_load,
21+
"json": json.loads,
22+
}
23+
1024

1125
def _get_traversable() -> Traversable:
1226
"""Get a `Traversable` object for the `sample_data/` package.
@@ -23,13 +37,8 @@ def _get_traversable() -> Traversable:
2337
- https://docs.python.org/3/library/importlib.resources.abc.html#importlib.resources.abc.Traversable
2438
2539
"""
26-
# Define the path someone could use to `import` the Python package _containing_ the
27-
# `invalid/` and `valid/` directories (e.g. `import {something}`); which, currently,
28-
# happens to be the directory containing this `__init__.py` file.
29-
package_import_path = "sample_data"
30-
3140
# Create a `Traversable` object that can be passed to the `resources.as_file()` function.
32-
return resources.files(package_import_path)
41+
return resources.files(PACKAGE_IMPORT_PATH)
3342

3443

3544
def get_sample_data_file_paths() -> list[str]:
@@ -44,12 +53,14 @@ def get_sample_data_file_paths() -> list[str]:
4453
4554
"""
4655
traversable = _get_traversable()
56+
file_extension_patterns = [f"**/*.{ext}" for ext in PARSERS_BY_FILE_EXTENSION]
4757
with resources.as_file(traversable) as path:
48-
return [
58+
paths = [
4959
str(p.relative_to(path))
50-
for pattern in ["**/*.yaml", "**/*.yml", "**/*.json"]
60+
for pattern in file_extension_patterns
5161
for p in path.glob(pattern)
5262
]
63+
return sorted(paths)
5364

5465

5566
def get_sample_data_text(file_path: str, encoding: str = "utf-8") -> str:
@@ -84,15 +95,15 @@ def get_sample_data(file_path: str, encoding: str = "utf-8") -> Any: # noqa: AN
8495
8596
"""
8697
# Determine which parsing function we will use, based upon the file's extension.
87-
if file_path.endswith((".yaml", ".yml")):
88-
parse = yaml.safe_load
89-
elif file_path.endswith(".json"):
90-
parse = json.loads
98+
path = Path(file_path)
99+
file_extension = path.suffix.lstrip(".") # ".yaml" -> "yaml"
100+
if file_extension in PARSERS_BY_FILE_EXTENSION:
101+
parse = PARSERS_BY_FILE_EXTENSION[file_extension]
91102
else:
92103
# Raise an error indicating that we don't support files having that extension.
93104
# Note: The `!r` after the in-string variable below calls `repr()` on the value.
94105
# Since the value is a string, the string will appear wrapped in quotes.
95-
msg = f"File extension suggest an unsupported file type: {file_path!r}"
106+
msg = f"Filename extension suggests an unsupported file type: {file_path!r}"
96107
raise ValueError(msg)
97108

98109
text = get_sample_data_text(file_path, encoding=encoding)

src/sample_data/valid/Entity-ess-dive-example-00001.yaml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,11 @@ coordinates:
33
latitude: 65.162309
44
longitude: -164.819851
55
entity_type:
6-
- unspecified
7-
description: Maps of land surface phenology derived from PlanetScope data,
6+
- site
7+
name: Maps of land surface phenology derived from PlanetScope data,
88
2018-2022, Teller, Kougarok, and Council, Seward Peninsula
99
id: doi:10.15485/2441497
10-
name: NGEE Arctic Kougarok Site, Mile Marker 64, Alaska
10+
description: NGEE Arctic Kougarok Site, Mile Marker 64, Alaska
1111
alt_ids:
1212
- NGA547
13-
part_of_collection: []
1413
uri: https://data.ess-dive.lbl.gov/view/doi:10.15485/2441497

src/sample_data/valid/Entity-ess-dive-example-00002.yaml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,11 @@ coordinates:
33
latitude: 64.735492
44
longitude: -165.95039
55
entity_type:
6-
- unspecified
7-
description: Maps of land surface phenology derived from PlanetScope data,
6+
- site
7+
name: Maps of land surface phenology derived from PlanetScope data,
88
2018-2022, Teller, Kougarok, and Council, Seward Peninsula
99
id: doi:10.15485/2441497
10-
name: NGEE Arctic Teller Site, Mile Marker 27, Alaska
10+
description: NGEE Arctic Teller Site, Mile Marker 27, Alaska
1111
alt_ids:
1212
- NGA547
13-
part_of_collection: []
1413
uri: https://data.ess-dive.lbl.gov/view/doi:10.15485/2441497

src/sample_data/valid/Entity-ess-dive-example-00003.yaml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,11 @@ coordinates:
33
latitude: 64.847286
44
longitude: -163.71993600000002
55
entity_type:
6-
- unspecified
7-
description: Maps of land surface phenology derived from PlanetScope data,
6+
- site
7+
name: Maps of land surface phenology derived from PlanetScope data,
88
2018-2022, Teller, Kougarok, and Council, Seward Peninsula
99
id: doi:10.15485/2441497
10-
name: NGEE Arctic Council Site, Mile Marker 71, Alaska
10+
description: NGEE Arctic Council Site, Mile Marker 71, Alaska
1111
alt_ids:
1212
- NGA547
13-
part_of_collection: []
1413
uri: https://data.ess-dive.lbl.gov/view/doi:10.15485/2441497

tests/test_sample_data.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""Tests targeting functions that expose sample data."""
2+
3+
import json
4+
from collections.abc import Generator
5+
from pathlib import Path
6+
from tempfile import TemporaryDirectory
7+
8+
import pytest
9+
import yaml
10+
11+
from sample_data import get_sample_data, get_sample_data_file_paths, get_sample_data_text
12+
13+
14+
@pytest.fixture
15+
def sample_json_content() -> str:
16+
"""Fixture that returns the text content of a sample JSON file."""
17+
return r"""
18+
{
19+
"id": "001",
20+
"name": "foo bar",
21+
"primary email": "foo.bar@example.com",
22+
"age_in_years": 33
23+
}
24+
"""
25+
26+
@pytest.fixture
27+
def sample_yaml_content() -> str:
28+
"""Fixture that returns the text content of a sample YAML file."""
29+
return r"""
30+
# Some YAML documents begin with "front matter".
31+
---
32+
id: "001"
33+
name: foo bar
34+
primary email: foo.bar@example.com
35+
age_in_years: 33
36+
"""
37+
38+
@pytest.fixture(autouse=True)
39+
def mock__get_traversable(
40+
monkeypatch: Generator[pytest.MonkeyPatch, None, None],
41+
sample_yaml_content: str,
42+
sample_json_content: str,
43+
) -> Generator[None, None, None]:
44+
"""Fixture that mocks the `sample_data._get_traversable` helper function.
45+
46+
This fixture (a) creates a temporary directory, (b) populates it with sample data files,
47+
and (c) patches the `_get_traversable` function so it returns a `Path` object pointing
48+
to that temporary directory. This decouples the tests from the contents of the real
49+
`sample_data/` directory that the module-under-test accesses in production.
50+
51+
Note: All `Path` objects are also `Traversable` object.
52+
"""
53+
with TemporaryDirectory() as temp_dir:
54+
temp_dir_path = Path(temp_dir)
55+
(temp_dir_path / "data.json").write_text(sample_json_content)
56+
(temp_dir_path / "data.yaml").write_text(sample_yaml_content)
57+
(temp_dir_path / "data.yml").write_text(sample_yaml_content)
58+
(temp_dir_path / "data.txt").write_text("some text") # unsupported file suffix
59+
monkeypatch.setattr("sample_data._get_traversable", lambda: temp_dir_path)
60+
yield None
61+
62+
63+
def test_get_sample_data_file_paths_returns_list_of_file_paths_supported() -> None:
64+
"""Test that `get_sample_data_file_paths` returns a list of the file paths we support."""
65+
assert get_sample_data_file_paths() == ["data.json", "data.yaml", "data.yml"]
66+
67+
68+
def test_get_sample_data_text_returns_expected_sample_data_as_string(
69+
sample_json_content: str,
70+
sample_yaml_content: str,
71+
) -> None:
72+
"""Test that `get_sample_data_text` returns the sample data we expect, as a string."""
73+
for path in get_sample_data_file_paths():
74+
if path == "data.json":
75+
assert sample_json_content == get_sample_data_text(path)
76+
if path in ("data.yaml", "data.yml"):
77+
assert sample_yaml_content == get_sample_data_text(path)
78+
79+
80+
def test_get_sample_data_returns_sample_data_as_python_object(
81+
sample_json_content: str,
82+
sample_yaml_content: str,
83+
) -> None:
84+
"""Test that `get_sample_data` returns sample data as a Python object."""
85+
for path in get_sample_data_file_paths():
86+
if path == "data.json":
87+
assert json.loads(sample_json_content) == get_sample_data(path)
88+
if path in ("data.yaml", "data.yml"):
89+
assert yaml.safe_load(sample_yaml_content) == get_sample_data(path)
90+
91+
92+
def test_get_sample_data_rejects_unsupported_filename_extensions() -> None:
93+
"""Test that `get_sample_data` raises an exception for an unsupported filename extension."""
94+
with pytest.raises(ValueError, match=r"^Filename extension"):
95+
get_sample_data("my_file.txt")

0 commit comments

Comments
 (0)