Merge pull request #1 from mideind/separate-torch-dep

HaukurPall · web-flow · commit 45cccb185c90 · 2025-07-08T12:00:54.000Z
Make torch an optional dependency
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -35,7 +35,7 @@ jobs:
       - name: Run tests
         run: |
           # --locked ensures we use exact versions from uv.lock without updating
-          uv run --locked pytest tests/ -v
+          uv run --locked --extra torch pytest tests/ -v
         env:
           HF_HUB_CACHE: ~/.cache/huggingface
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
diff --git a/README.md b/README.md
@@ -10,10 +10,15 @@ A high-level Python interface for PoS tagging Icelandic text using the [IceBERT-
 
 ```bash
 # This package is currently not available on PyPI, so you need to install it directly from the source repository.
+
+# Without PyTorch (lighter, but model inference won't work)
 pip install git+ssh://git@github.com/mideind/IceBERT-PoS.git
+
+# With PyTorch support (required for model inference) - RECOMMENDED
+pip install "git+ssh://git@github.com/mideind/IceBERT-PoS.git[torch]"
 ```
 
-This will install the package with PyTorch.
+> **Note**: The `[torch]` extra is required for model inference, as PyTorch models need PyTorch to run. The default installation is only useful for development work that doesn't involve running the actual models.
 
 ## Features
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,23 +1,23 @@
 [project]
 name = "icebert-pos"
-dynamic = ["version"]  # managed by setuptools-scm
+dynamic = ["version"] # managed by setuptools-scm
 description = "A package for interacting with the IceBERT PoS model(s)."
 readme = "README.md"
 requires-python = ">=3.10,<4.0"
 dependencies = [
     "tokenizer>=3.4.4,<4.0",
-    "transformers[torch]>=4.46.3,<5.0",
+    "transformers>=4.46.3,<5.0",
     "rich>=13.0.0,<14.0",
 ]
 
+[project.optional-dependencies]
+torch = ["transformers[torch]>=4.46.3,<5.0"]
+
 [project.scripts]
 icebert-pos = "icebert_pos.cli:main"
 
 [dependency-groups]
-dev = [
-    "pytest",
-    "ruff",
-]
+dev = ["pytest", "ruff"]
 
 [tool.setuptools_scm]
 # Use git tags for versioning
diff --git a/src/icebert_pos/interface.py b/src/icebert_pos/interface.py
@@ -1,12 +1,16 @@
 # Copyright (C) Miðeind ehf.
 # Simple POS tagging interface with classical tokenization
 
+from __future__ import annotations
+
 import logging
 from dataclasses import dataclass
+from typing import TYPE_CHECKING
 
 import tokenizer
-import torch
-from torch.nn.utils.rnn import pad_sequence
+
+if TYPE_CHECKING:
+    import torch
 
 logger = logging.getLogger(__name__)
 
@@ -154,6 +158,14 @@ def batch_sentences(
     Returns:
         Batched input tensors
     """
+    try:
+        from torch.nn.utils.rnn import pad_sequence
+    except ModuleNotFoundError as e:
+        raise ImportError(
+            "The 'torch' library is required for this function. Please install it using "
+            "'pip install icebert-pos[torch]'."
+        ) from e
+
     # Unzip the list of tuples into separate lists
     input_ids, attention_mask, word_mask = zip(*sentence_tensors, strict=True)
 
diff --git a/uv.lock b/uv.lock