diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..303f47b8 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,74 @@ +default_language_version: + python: python3.9 + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + - id: end-of-file-fixer + - id: mixed-line-ending + args: [--fix=auto] + - id: check-merge-conflict + - id: check-json + - id: check-yaml + - id: check-toml + - id: check-ast + - id: debug-statements + - id: detect-private-key + - id: pretty-format-json + args: [--autofix, --no-ensure-ascii, --indent=2] + - id: check-illegal-windows-names + - id: check-case-conflict + - id: check-symlinks + - id: check-executables-have-shebangs + - id: check-shebang-scripts-are-executable + - id: requirements-txt-fixer + - id: no-commit-to-branch + args: [--branch, main] + + - repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + name: Sort imports with isort + files: ^(sygra|tests)/.*\.py$ + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.2.1 + hooks: + - id: ruff + name: Lint with Ruff + args: [--fix, --unsafe-fixes, --exit-non-zero-on-fix, --show-fixes] + files: ^(sygra|tests)/ + + - repo: local + hooks: + - id: mypy-poetry + name: Type check with mypy + entry: poetry run mypy sygra + language: system + pass_filenames: false + stages: [ pre-commit ] + + - repo: https://github.com/psf/black + rev: 24.8.0 + hooks: + - id: black + name: Format code with Black + files: ^(sygra|tests)/.*\.py$ + + - repo: https://github.com/python-poetry/poetry + rev: 1.7.0 + hooks: + - id: poetry-check + + - repo: local + hooks: + - id: pytest + name: Run tests with pytest + entry: poetry run pytest -q tests + language: system + pass_filenames: false + stages: [ pre-commit ] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 46416cbb..2d953b4c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -23,6 +23,30 @@ When everything is set up, you can refer to the sections to setup a synthetic data generation pipeline. +## Pre-commit hooks + +We use [pre-commit](https://pre-commit.com/) to keep the codebase consistent. Hooks are run automatically on `git commit` (and some on `pre-push`). + +### One-time setup + +```bash +# Install dependencies (includes pre-commit as a dev dep) +poetry install + +# Install Git hooks for this repo +poetry run pre-commit install +poetry run pre-commit install -t pre-push + +# (optional) Warm the caches so your first commit is fast +poetry run pre-commit run --all-files +``` + +#### Why pre-commit? + +- Fast feedback on style/format issues +- Consistent code across contributors +- Fewer “nit” comments in PR reviews + ## How to submit a bug report [Open an issue on Github](https://github.com/ServiceNow/sygra/issues/new/choose) and select "Bug report". If you are not sure whether it is a bug or not, submit an issue and we will be able to help you. diff --git a/pyproject.toml b/pyproject.toml index 23b1add0..a071eef6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,7 @@ dependencies = [ "sentence-transformers>=5.1,<6.0", "soundfile>=0.13,<1.0", "types-pyyaml>=6.0,<7.0", + "fasttext-wheel (>=0.9.2,<0.10.0)", ] [project.optional-dependencies] diff --git a/sygra/models/__init__.py b/sygra/models/__init__.py index de37e781..11c5011a 100644 --- a/sygra/models/__init__.py +++ b/sygra/models/__init__.py @@ -37,18 +37,7 @@ def from_name(model_name: str, **kwargs) -> Optional[dict[str, Any]]: base_config["parameters"] = {} params: dict[str, Any] = cast(dict[str, Any], base_config["parameters"]) - params.update( - { - "temperature": kwargs.get( - "temperature", - params.get("temperature", 0.7), - ), - "max_tokens": kwargs.get( - "max_tokens", - params.get("max_tokens", 1000), - ), - } - ) + params.update(kwargs) return base_config else: