Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Change log

## Release 1.0.2 (2026-02-09)

Fixes:

- Allow untracked .ttl files in `--prov-from-git`. Skip them with an info log instead of raising an error. [#339](https://github.com/nfdi4cat/voc4cat-tool/issues/339)

## Release 1.0.1 (2026-01-06)

Fixes:
Expand Down
23 changes: 6 additions & 17 deletions src/voc4cat/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,7 @@ def add_prov_from_git(
source_dir: Directory to look up git history from (if different from vocab_dir).
Used when files have been copied to a new location.

Raises:
Voc4catError: If any .ttl file is not tracked in git.
Untracked .ttl files are skipped with an informational log message.
"""
repo_dir = Path(repo_dir) if repo_dir else Path.cwd()
vocab_dir = Path(vocab_dir)
Expand All @@ -182,21 +181,6 @@ def add_prov_from_git(
# Get git info from the source directory (or vocab_dir if no source specified)
git_info = get_directory_git_info(git_lookup_dir, repo_dir)

# Check that all .ttl files are tracked in git (by relative path)
for ttl_file in ttl_files:
# Preserve subdirectory structure when looking up in source directory
rel_to_vocab = ttl_file.relative_to(vocab_dir)
source_file = git_lookup_dir / rel_to_vocab
try:
rel_path = source_file.relative_to(repo_dir)
except ValueError:
rel_path = source_file
# Normalize path separators (git uses forward slashes)
rel_path_str = str(rel_path).replace("\\", "/")
if rel_path_str not in git_info:
msg = f'File "{ttl_file}" is not tracked in git. Cannot add provenance.'
raise Voc4catError(msg)

# Process each .ttl file
for ttl_file in ttl_files:
# Preserve subdirectory structure when looking up in source directory
Expand All @@ -207,6 +191,11 @@ def add_prov_from_git(
except ValueError:
rel_path = source_file
rel_path_str = str(rel_path).replace("\\", "/")
if rel_path_str not in git_info:
logger.info(
'File "%s" is not tracked in git. Skipping provenance.', ttl_file
)
continue
info = git_info[rel_path_str]

# Parse the RDF graph
Expand Down
20 changes: 14 additions & 6 deletions tests/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,8 @@ def test_prov_from_git_adds_dates(git_repo_with_split_files, monkeypatch, caplog
assert len(modified_values) == 1


def test_prov_from_git_error_untracked(tmp_path, datadir, monkeypatch, caplog):
"""Test that --prov-from-git fails if files are not tracked in git."""
def test_prov_from_git_skips_untracked(tmp_path, datadir, monkeypatch, caplog):
"""Test that --prov-from-git skips untracked files with a warning."""
# Initialize git repo but don't add files
_run_git(["git", "init"], tmp_path)
_run_git(["git", "config", "user.email", "test@example.com"], tmp_path)
Expand All @@ -236,12 +236,20 @@ def test_prov_from_git_error_untracked(tmp_path, datadir, monkeypatch, caplog):

monkeypatch.chdir(tmp_path)

with (
caplog.at_level(logging.ERROR),
pytest.raises(Voc4catError, match="is not tracked in git"),
):
contents_before = {f: f.read_bytes() for f in vocdir.rglob("*.ttl")}

with caplog.at_level(logging.INFO):
main_cli(["transform", "--prov-from-git", "--inplace", str(vocdir)])

assert "is not tracked in git" in caplog.text
assert "Skipping provenance" in caplog.text

# Verify that untracked files were not modified (no provenance added)
ttl_files = list(vocdir.rglob("*.ttl"))
assert len(ttl_files) > 0
contents_after = {f: f.read_bytes() for f in ttl_files}
assert contents_after == contents_before


def test_prov_from_git_error_not_directory(tmp_path, datadir, monkeypatch, caplog):
"""Test that --prov-from-git fails if input is not a directory with .ttl files."""
Expand Down