explodinggradients
diff --git a/‎.github/workflows/ci.yaml‎
Lines changed: 147 additions & 0 deletions b/‎.github/workflows/ci.yaml‎
Lines changed: 147 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 30 additions & 0 deletions b/‎Makefile‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎belar/metrics/__init__.py‎
Lines changed: 15 additions & 1 deletion b/‎belar/metrics/__init__.py‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎belar/metrics/base.py‎
Lines changed: 10 additions & 5 deletions b/‎belar/metrics/base.py‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎belar/metrics/factual.py‎
Lines changed: 4 additions & 1 deletion b/‎belar/metrics/factual.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎belar/metrics/similarity.py‎
Lines changed: 7 additions & 0 deletions b/‎belar/metrics/similarity.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎belar/metrics/simple.py‎
Lines changed: 2 additions & 4 deletions b/‎belar/metrics/simple.py‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎belar/utils.py‎
Lines changed: 13 additions & 11 deletions b/‎belar/utils.py‎
Lines changed: 13 additions & 11 deletions
@@ -0,0 +1,147 @@
+name: CI
+
+on:
+  pull_request:
+
+permissions:
+  contents: read
+
+env:
+  LINES: 120
+  COLUMNS: 120
+
+# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#defaultsrun
+defaults:
+  run:
+    shell: bash --noprofile --norc -exo pipefail {0}
+
+jobs:
+  diff:
+    runs-on: ubuntu-latest
+    outputs:
+      related: ${{ steps.filter.outputs.related }}
+      belar: ${{ steps.filter.outputs.belar }}
+      docs: ${{ steps.filter.outputs.docs }}
+    steps:
+      - uses: actions/checkout@v3
+      - uses: dorny/paths-filter@v2
+        id: filter
+        with:
+          base: "main"
+          token: ${{ github.token }}
+          filters: |
+            related: &related
+              - .github/workflows/ci.yml
+              - codecov.yml
+              - pyproject.toml
+              - requirements/test.txt
+            belar:
+              - "belar/**"
+              - "tests/**"
+              - "examples/**"
+            docs:
+              - *related
+              - requirements/docs-requirements.txt
+              - "docs/**"
+
+  unit_tests:
+    needs:
+      - diff
+
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
+
+    if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.belar == 'true') || github.event_name == 'push' }}
+    name: python${{ matrix.python-version }}_unit_tests (${{ matrix.os }})
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0 # fetch all tags and branches
+
+      - name: Setup python
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+          architecture: x64
+
+      - name: Get pip cache dir
+        id: cache-dir
+        run: |
+          echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
+
+      - name: Cache pip dependencies
+        uses: actions/cache@v3
+        id: cache-pip
+        with:
+          path: ${{ steps.cache-dir.outputs.dir }}
+          key: ${{ runner.os }}-tests-${{ hashFiles('requirements/test.txt') }}
+
+      - name: Install dependencies
+        run: |
+          pip install "."
+          pip install -r requirements/test.txt
+
+      - name: Run unit tests
+        run: |
+          # OPTS=(--cov-config pyproject.toml --cov=src/bentoml --cov-append)
+          if [ "${{ matrix.os }}" != 'windows-latest' ]; then
+            # we will use pytest-xdist to improve tests run-time.
+            OPTS=(--dist loadfile -n auto)
+          fi
+          # Now run the unit tests
+          pytest tests/unit "${OPTS[@]}"
+
+  codestyle_check:
+    runs-on: ubuntu-latest
+    needs:
+      - diff
+
+    if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.belar == 'true') || github.event_name == 'push' }}
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Setup python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10.6"
+          architecture: x64
+
+      - name: Get pip cache dir
+        id: cache-dir
+        run: |
+          echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
+
+      - name: Fetch origin
+        run: git fetch origin "$GITHUB_BASE_REF"
+
+      - name: Setup node
+        uses: actions/setup-node@v3
+        with:
+          node-version: "17"
+
+      - name: Cache pip dependencies
+        uses: actions/cache@v3
+        id: cache-pip
+        with:
+          path: ${{ steps.cache-dir.outputs.dir }}
+          key: codestyle-${{ hashFiles('requirements/dev.txt') }}
+
+      - name: Install dependencies
+        run: |
+          pip install .
+          pip install -r requirements/dev.txt
+
+      - name: Format check
+        run: |
+          make format
+      - name: Lint check
+        run: make lint
+      - name: Type check
+        if: ${{ github.event_name == 'pull_request' }}
+        run: git diff --name-only --diff-filter=AM "origin/$GITHUB_BASE_REF" -z -- '**/*.py' '**/*.pyi' | xargs -0 --no-run-if-empty pyright
@@ -0,0 +1,30 @@
+GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
+
+help: ## Show all Makefile targets
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
+
+.PHONY: format lint type style clean run-benchmarks
+format: ## Running code formatter: black and isort
+	@echo "(black) Formatting codebase..."
+	@black --config pyproject.toml belar tests examples
+	@echo "(black) Formatting stubs..."
+	@find belar -name "*.pyi" ! -name "*_pb2*" -exec black --pyi --config pyproject.toml {} \;
+	@echo "(isort) Reordering imports..."
+	@isort .
+	@echo "(ruff) Running fix only..."
+	@ruff check belar examples tests --fix-only
+lint: ## Running lint checker: ruff
+	@echo "(ruff) Linting development project..."
+	@ruff check belar examples tests
+type: ## Running type checker: pyright
+	@echo "(pyright) Typechecking codebase..."
+	@pyright -p belar
+clean: ## Clean all generated files
+	@echo "Cleaning all generated files..."
+	@cd $(GIT_ROOT)/docs && make clean
+	@cd $(GIT_ROOT) || exit 1
+	@find . -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete
+run-ci: format lint type ## Running all CI checks
+run-benchmarks: ## Run benchmarks
+	@echo "Running benchmarks..."
+	@cd $(GIT_ROOT)/tests/benchmarks && python benchmark.py 
@@ -1,4 +1,18 @@
 from belar.metrics.base import Evaluation, Metric
 from belar.metrics.factual import EntailmentScore
 from belar.metrics.similarity import SBERTScore
-from belar.metrics.simple import *
+from belar.metrics.simple import (BLUE, EditDistance, EditRatio, Rouge1,
+                                  Rouge2, RougeL)
+
+__all__ = [
+    "Evaluation",
+    "Metric",
+    "EntailmentScore",
+    "SBERTScore",
+    "BLUE",
+    "EditDistance",
+    "EditRatio",
+    "RougeL",
+    "Rouge1",
+    "Rouge2",
+]
@@ -2,7 +2,6 @@
 
 import typing as t
 from abc import ABC, abstractmethod
-from collections import namedtuple
 from dataclasses import dataclass
 
 import numpy as np
@@ -13,16 +12,18 @@
 class Metric(ABC):
     @property
     @abstractmethod
-    def name(self) -> str:
+    def name(self: t.Self) -> str:
         ...
 
     @property
     @abstractmethod
-    def is_batchable(self) -> bool:
+    def is_batchable(self: t.Self) -> bool:
         ...
 
     @abstractmethod
-    def score(self, ground_truth: list[str], generated_text: list[str]) -> list[float]:
+    def score(
+        self: t.Self, ground_truth: list[str], generated_text: list[str]
+    ) -> list[float]:
         ...
 
 
@@ -68,7 +69,11 @@ def _get_score(self, row: dict[str, list[t.Any]] | dict[str, t.Any]):
             else:  # not batched
                 split_indices = len(row["ground_truth"])
                 ground_truths = row["ground_truth"]
-                generated_texts = [row["generated_text"]] * split_indices
+                generated_text = row["generated_text"]
+                assert isinstance(
+                    generated_text, str
+                ), f"generated_text should be str but got {type(generated_text)}"
+                generated_texts = [generated_text] * split_indices
                 scores = metric.score(ground_truths, generated_texts)
                 score = np.max(scores)
 
 
@@ -8,6 +8,9 @@
 from belar.metrics import Metric
 from belar.utils import device_check
 
+if t.TYPE_CHECKING:
+    from torch import device as Device
+
 
 @dataclass
 class EntailmentScore(Metric):
@@ -18,7 +21,7 @@ class EntailmentScore(Metric):
     model_name: str = "typeform/distilbert-base-uncased-mnli"
     max_length: int = 512
     batch_size: int = 4
-    device: t.Literal["cpu", "cuda"] = "cpu"
+    device: t.Literal["cpu", "cuda"] | Device = "cpu"
 
     def __post_init__(self):
         self.device = device_check(self.device)
 
@@ -9,6 +9,9 @@
 
 from belar.metrics.base import Metric
 
+if t.TYPE_CHECKING:
+    from torch import Tensor
+
 SBERT_METRIC = t.Literal["cosine", "euclidean"]
 
 
@@ -42,6 +45,10 @@ def score(
         gentext_emb = self.model.encode(
             generated_text, batch_size=self.batch_size, convert_to_numpy=True
         )
+        assert isinstance(gentext_emb, Tensor) and isinstance(gndtruth_emb, Tensor), (
+            f"Both gndtruth_emb[{type(gentext_emb)}], gentext_emb[{type(gentext_emb)}]"
+            " should be Tensor."
+        )
 
         if self.similarity_metric == "cosine":
             score = np.dot(gndtruth_emb, gentext_emb.T) / (
 
@@ -14,7 +14,7 @@
 
 
 @dataclass
-class BLEU(Metric):
+class BLEUScore(Metric):
     weights: list[float] = field(default_factory=lambda: [0.25, 0.25, 0.25, 0.25])
     smoothing_function = None
 
@@ -94,8 +94,6 @@ def score(self, ground_truth: t.List[str], generated_text: t.List[str]):
 Rouge1 = ROUGE("rouge1")
 Rouge2 = ROUGE("rouge2")
 RougeL = ROUGE("rougeL")
-BLUE = BLEU()
+BLUE = BLEUScore()
 EditDistance = EditScore("distance")
 EditRatio = EditScore("ratio")
-
-__all__ = ["Rouge1", "Rouge2", "RougeL", "BLEU", "EditDistance", "EditRatio"]
@@ -1,19 +1,21 @@
-import torch
 import typing as t
 from warnings import warn
 
+import torch
+
+if t.TYPE_CHECKING:
+    from torch import device as Device
+
 DEVICES = ["cpu", "cuda"]
 
 
-def device_check(device: t.Literal[DEVICES]):
-    if device == "cuda":
-        if torch.cuda.is_available():
-            device = torch.device("cuda")
-        else:
-            warn("cuda not available, using cpu")
-    elif device == "cpu":
-        device = torch.device("cpu")
-    else:
+def device_check(device: t.Literal["cpu", "cuda"] | Device) -> torch.device:
+    if isinstance(device, Device):
+        return device
+    if device not in DEVICES:
         raise ValueError(f"Invalid device {device}")
+    if device == "cuda" and not torch.cuda.is_available():
+        warn("cuda not available, using cpu")
+        device = "cpu"
 
-    return device
+    return torch.device(device)