diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..ddc62d4 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,30 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + +jobs: + tests: + strategy: + matrix: + os: [ubuntu-latest, windows-laest, macos-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'pip' + - name: Install the project dependencies + run: | + pip install --upgrade pip + make install-dev + - name: Run lint + run: | + make lint-check + - name: Run tests + run: | + make tests \ No newline at end of file diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..53f4169 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,34 @@ +name: Release + +on: + release: + types: [released] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'pip' + - name: Install pypa/build + run: pip install build --user + - name: Install the project dependencies + run: make install + - name: Build wheel and tarball + run: python -m build . --outdir dist/ + + pypi-publish: + name: Publish to PyPI + needs: + - build + runs-on: ubuntu-latest + permissions: + # IMPORTANT: this permission is mandatory for trusted publishing + id-token: write + steps: + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/Makefile b/Makefile index efebc18..b1aced4 100644 --- a/Makefile +++ b/Makefile @@ -12,5 +12,5 @@ lint-check: ruff format . --check ruff check **/*.py -test: - pytest -n auto \ No newline at end of file +tests: + pytest -n 5 \ No newline at end of file diff --git a/chromacache/__init__.py b/chromacache/__init__.py index 128c0bf..90e0536 100644 --- a/chromacache/__init__.py +++ b/chromacache/__init__.py @@ -1,2 +1,2 @@ -from .chromacache import ChromaCache +from .chromacache import * from .embedding_functions import * diff --git a/chromacache/chromacache.py b/chromacache/chromacache.py index ba4cf28..36e73e8 100644 --- a/chromacache/chromacache.py +++ b/chromacache/chromacache.py @@ -1,14 +1,5 @@ -try: - import chromadb - from chromadb import Documents, EmbeddingFunction, Embeddings -except Exception: - __import__("pysqlite3") - import sys - - sys.modules["sqlite3"] = sys.modules.pop("pysqlite3") - - import chromadb - from chromadb import EmbeddingFunction +import chromadb +from chromadb import Documents, EmbeddingFunction, Embeddings class ChromaCache: diff --git a/chromacache/embedding_functions/AbstractEmbeddingFunction.py b/chromacache/embedding_functions/AbstractEmbeddingFunction.py index a01ff07..3652b52 100644 --- a/chromacache/embedding_functions/AbstractEmbeddingFunction.py +++ b/chromacache/embedding_functions/AbstractEmbeddingFunction.py @@ -1,11 +1,3 @@ -try: - __import__("pysqlite3") - import sys - - sys.modules["sqlite3"] = sys.modules.pop("pysqlite3") -except Exception: - pass - from abc import ABC, abstractmethod import tiktoken @@ -70,7 +62,7 @@ def __call__(self, input: Documents) -> Embeddings: """Wrapper that truncates the documents, encodes them Args: - documents (Documents): List of documents + input (Documents): List of documents Returns: Embeddings: the encoded sentences diff --git a/chromacache/embedding_functions/__init__.py b/chromacache/embedding_functions/__init__.py index b7a8116..6862128 100644 --- a/chromacache/embedding_functions/__init__.py +++ b/chromacache/embedding_functions/__init__.py @@ -5,3 +5,13 @@ from .OpenAIEmbeddingFunction import OpenAIEmbeddingFunction from .SentenceTransformerEmbeddingFunction import SentenceTransformerEmbeddingFunction from .VoyageAIEmbeddingFunction import VoyageAIEmbeddingFunction + +__all__ = [ + "AbstractEmbeddingFunction", + "CohereEmbeddingFunction", + "LaserEmbeddingFunction", + "MistralAIEmbeddingFunction", + "OpenAIEmbeddingFunction", + "SentenceTransformerEmbeddingFunction", + "VoyageAIEmbeddingFunction", +] diff --git a/pyproject.toml b/pyproject.toml index e90731f..a8c7096 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,9 @@ requires = ["setuptools>=69.0", "wheel"] build-backend = "setuptools.build_meta" +[tool.setuptools.packages.find] +exclude = ["tests"] + [project] name = "chromacache" version = "0.0.1" @@ -22,12 +25,15 @@ dependencies = [ "voyageai>=0.1.6", "tiktoken>=0.5.2", "openai>=1.6.1", - "chromadb>=0.4.21", + "chromadb>=0.5.3", "python-dotenv>=1.0.1", - "cohere==4.41", - "laser_encoders==0.0.1", - "sentence-transformers==2.2.2", - "mistralai==0.0.12" + "cohere>=4.41", + "laser_encoders>=0.0.1", + "sentence-transformers>=2.2.2", + "mistralai>=0.0.12", + "fairseq>=0.12", + "hydra-core==1.3.2", + "omegaconf==2.3.0" ] [project.urls] @@ -36,9 +42,6 @@ Homepage = "https://github.com/Lyon-NLP/chroma_cache" [project.optional-dependencies] dev = ["ruff>=0.0.254", "pytest", "pytest-xdist"] -[tool.setuptools.packages.find] -exclude = ["tests", "results"] - [tool.ruff] target-version = "py38" diff --git a/tests/embedding_functions/test_AbstractEmbeddingFunction.py b/tests/embedding_functions/test_AbstractEmbeddingFunction.py index 578f1c0..6b2e76d 100644 --- a/tests/embedding_functions/test_AbstractEmbeddingFunction.py +++ b/tests/embedding_functions/test_AbstractEmbeddingFunction.py @@ -29,7 +29,7 @@ def test_truncate_documents(tokenizer, sentences): ) assert len(truncated_sentences) == len(sentences) - + tokenized_truncated_sentences = [tokenizer.encode(s) for s in truncated_sentences] for truncated_sentence in tokenized_truncated_sentences: assert len(truncated_sentence) <= max_token_length